Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: char_samp_enum.cpp 00003 * Description: Implementation of a Character Sample Set Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <stdlib.h> 00021 #include <string> 00022 #include "char_samp_set.h" 00023 #include "cached_file.h" 00024 00025 namespace tesseract { 00026 00027 CharSampSet::CharSampSet() { 00028 cnt_ = 0; 00029 samp_buff_ = NULL; 00030 own_samples_ = false; 00031 } 00032 00033 CharSampSet::~CharSampSet() { 00034 Cleanup(); 00035 } 00036 00037 // free buffers and init vars 00038 void CharSampSet::Cleanup() { 00039 if (samp_buff_ != NULL) { 00040 // only free samples if owned by class 00041 if (own_samples_ == true) { 00042 for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) { 00043 if (samp_buff_[samp_idx] != NULL) { 00044 delete samp_buff_[samp_idx]; 00045 } 00046 } 00047 } 00048 delete []samp_buff_; 00049 } 00050 cnt_ = 0; 00051 samp_buff_ = NULL; 00052 } 00053 00054 // add a new sample 00055 bool CharSampSet::Add(CharSamp *char_samp) { 00056 if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) { 00057 // create an extended buffer 00058 CharSamp **new_samp_buff = 00059 reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]); 00060 if (new_samp_buff == NULL) { 00061 return false; 00062 } 00063 // copy old contents 00064 if (cnt_ > 0) { 00065 memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_)); 00066 delete []samp_buff_; 00067 } 00068 samp_buff_ = new_samp_buff; 00069 } 00070 samp_buff_[cnt_++] = char_samp; 00071 return true; 00072 } 00073 00074 // load char samples from file 00075 bool CharSampSet::LoadCharSamples(FILE *fp) { 00076 // free existing 00077 Cleanup(); 00078 // samples are created here and owned by the class 00079 own_samples_ = true; 00080 // start loading char samples 00081 while (feof(fp) == 0) { 00082 CharSamp *new_samp = CharSamp::FromCharDumpFile(fp); 00083 if (new_samp != NULL) { 00084 if (Add(new_samp) == false) { 00085 return false; 00086 } 00087 } 00088 } 00089 return true; 00090 } 00091 00092 // creates a CharSampSet object from file 00093 CharSampSet * CharSampSet::FromCharDumpFile(string file_name) { 00094 FILE *fp; 00095 unsigned int val32; 00096 // open the file 00097 fp = fopen(file_name.c_str(), "rb"); 00098 if (fp == NULL) { 00099 return NULL; 00100 } 00101 // read and verify marker 00102 if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00103 return NULL; 00104 } 00105 if (val32 != 0xfefeabd0) { 00106 return NULL; 00107 } 00108 // create an object 00109 CharSampSet *samp_set = new CharSampSet(); 00110 if (samp_set == NULL) { 00111 return NULL; 00112 } 00113 if (samp_set->LoadCharSamples(fp) == false) { 00114 delete samp_set; 00115 samp_set = NULL; 00116 } 00117 fclose(fp); 00118 return samp_set; 00119 } 00120 00121 // Create a new Char Dump file 00122 FILE *CharSampSet::CreateCharDumpFile(string file_name) { 00123 FILE *fp; 00124 unsigned int val32; 00125 // create the file 00126 fp = fopen(file_name.c_str(), "wb"); 00127 if (!fp) { 00128 return NULL; 00129 } 00130 // read and verify marker 00131 val32 = 0xfefeabd0; 00132 if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00133 return NULL; 00134 } 00135 return fp; 00136 } 00137 00138 // Enumerate the Samples in the set one-by-one calling the enumertor's 00139 // EnumCharSamp method for each sample 00140 bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) { 00141 CachedFile *fp_in; 00142 unsigned int val32; 00143 long i64_size, 00144 i64_pos; 00145 // open the file 00146 fp_in = new CachedFile(file_name); 00147 if (fp_in == NULL) { 00148 return false; 00149 } 00150 i64_size = fp_in->Size(); 00151 if (i64_size < 1) { 00152 return false; 00153 } 00154 // read and verify marker 00155 if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) { 00156 return false; 00157 } 00158 if (val32 != 0xfefeabd0) { 00159 return false; 00160 } 00161 // start loading char samples 00162 while (fp_in->eof() == false) { 00163 CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in); 00164 i64_pos = fp_in->Tell(); 00165 if (new_samp != NULL) { 00166 bool ret_flag = (enum_obj)->EnumCharSamp(new_samp, 00167 (100.0f * i64_pos / i64_size)); 00168 delete new_samp; 00169 if (ret_flag == false) { 00170 break; 00171 } 00172 } 00173 } 00174 delete fp_in; 00175 return true; 00176 } 00177 00178 } // namespace ocrlib