Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: word_size_model.cpp 00003 * Description: Implementation of the Word Size Model Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <math.h> 00021 #include <string> 00022 #include <vector> 00023 #include "word_size_model.h" 00024 #include "cube_utils.h" 00025 00026 namespace tesseract { 00027 00028 WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) { 00029 char_set_ = char_set; 00030 contextual_ = contextual; 00031 } 00032 00033 WordSizeModel::~WordSizeModel() { 00034 for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) { 00035 FontPairSizeInfo fnt_info = font_pair_size_models_[fnt]; 00036 delete []fnt_info.pair_size_info[0]; 00037 delete []fnt_info.pair_size_info; 00038 } 00039 } 00040 00041 WordSizeModel *WordSizeModel::Create(const string &data_file_path, 00042 const string &lang, 00043 CharSet *char_set, 00044 bool contextual) { 00045 WordSizeModel *obj = new WordSizeModel(char_set, contextual); 00046 if (!obj) { 00047 fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate " 00048 "new word size model object\n"); 00049 return NULL; 00050 } 00051 00052 if (!obj->Init(data_file_path, lang)) { 00053 delete obj; 00054 return NULL; 00055 } 00056 return obj; 00057 } 00058 00059 bool WordSizeModel::Init(const string &data_file_path, const string &lang) { 00060 string stats_file_name; 00061 stats_file_name = data_file_path + lang; 00062 stats_file_name += ".cube.size"; 00063 00064 // read file to memory 00065 string str_data; 00066 00067 if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) { 00068 return false; 00069 } 00070 00071 // split to words 00072 vector<string> tokens; 00073 CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens); 00074 if (tokens.size() < 1) { 00075 fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid " 00076 "file contents: %s\n", stats_file_name.c_str()); 00077 return false; 00078 } 00079 00080 font_pair_size_models_.clear(); 00081 00082 // token count per line depends on whether the language is contextual or not 00083 int token_cnt = contextual_ ? 00084 (kExpectedTokenCount + 4) : kExpectedTokenCount; 00085 // the count of size classes depends on whether the language is contextual 00086 // or not. For non contextual languages (Ex: Eng), it is equal to the class 00087 // count. For contextual languages (Ex: Ara), it is equal to the class count 00088 // multiplied by the position count (4: start, middle, final, isolated) 00089 int size_class_cnt = contextual_ ? 00090 (char_set_->ClassCount() * 4) : char_set_->ClassCount(); 00091 string fnt_name = ""; 00092 00093 for (int tok = 0; tok < tokens.size(); tok += token_cnt) { 00094 // a new font, write the old font data and re-init 00095 if (tok == 0 || fnt_name != tokens[tok]) { 00096 FontPairSizeInfo fnt_info; 00097 00098 fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt]; 00099 if (!fnt_info.pair_size_info) { 00100 fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allcoating " 00101 "memory for font pair size info\n"); 00102 return false; 00103 } 00104 00105 fnt_info.pair_size_info[0] = 00106 new PairSizeInfo[size_class_cnt * size_class_cnt]; 00107 if (!fnt_info.pair_size_info[0]) { 00108 fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allocating " 00109 "memory for font pair size info\n"); 00110 return false; 00111 } 00112 00113 memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt * 00114 sizeof(PairSizeInfo)); 00115 00116 for (int cls = 1; cls < size_class_cnt; cls++) { 00117 fnt_info.pair_size_info[cls] = 00118 fnt_info.pair_size_info[cls - 1] + size_class_cnt; 00119 } 00120 00121 // strip out path and extension 00122 string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.')); 00123 string::size_type strt_pos = stripped_font_name.find_last_of("/\\"); 00124 if (strt_pos != string::npos) { 00125 fnt_info.font_name = stripped_font_name.substr(strt_pos); 00126 } else { 00127 fnt_info.font_name = stripped_font_name; 00128 } 00129 font_pair_size_models_.push_back(fnt_info); 00130 } 00131 00132 // parse the data 00133 int cls_0; 00134 int cls_1; 00135 double delta_top; 00136 double wid_0; 00137 double hgt_0; 00138 double wid_1; 00139 double hgt_1; 00140 int size_code_0; 00141 int size_code_1; 00142 00143 // read and parse the tokens 00144 if (contextual_) { 00145 int start_0; 00146 int end_0; 00147 int start_1; 00148 int end_1; 00149 // The expected format for a character size bigram is as follows: 00150 // ClassId0<delim>Start-flag0<delim>End-flag0<delim>String0(ignored) 00151 // Width0<delim>Height0<delim> 00152 // ClassId1<delim>Start-flag1<delim>End-flag1<delim>String1(ignored) 00153 // HeightDelta<delim>Width1<delim>Height0<delim> 00154 // In case of non-contextual languages, the Start and End flags are 00155 // omitted 00156 if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 || 00157 sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 || 00158 sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 || 00159 sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 || 00160 sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 || 00161 sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 || 00162 sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 || 00163 sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 || 00164 sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 || 00165 sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 || 00166 sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 || 00167 (start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) || 00168 (start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) { 00169 fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at " 00170 "line %d\n", 1 + (tok / token_cnt)); 00171 return false; 00172 } 00173 size_code_0 = SizeCode(cls_0, start_0, end_0); 00174 size_code_1 = SizeCode(cls_1, start_1, end_1); 00175 } else { 00176 if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 || 00177 sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 || 00178 sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 || 00179 sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 || 00180 sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 || 00181 sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 || 00182 sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) { 00183 fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at " 00184 "line %d\n", 1 + (tok / token_cnt)); 00185 return false; 00186 } 00187 size_code_0 = cls_0; 00188 size_code_1 = cls_1; 00189 } 00190 00191 // copy the data to the size tables 00192 FontPairSizeInfo fnt_info = font_pair_size_models_.back(); 00193 fnt_info.pair_size_info[size_code_0][size_code_1].delta_top = 00194 static_cast<int>(delta_top * kShapeModelScale); 00195 fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 = 00196 static_cast<int>(wid_0 * kShapeModelScale); 00197 fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 = 00198 static_cast<int>(hgt_0 * kShapeModelScale); 00199 fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 = 00200 static_cast<int>(wid_1 * kShapeModelScale); 00201 fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 = 00202 static_cast<int>(hgt_1 * kShapeModelScale); 00203 00204 fnt_name = tokens[tok]; 00205 } 00206 00207 return true; 00208 } 00209 00210 int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const { 00211 if (samp_cnt < 2) { 00212 return 0; 00213 } 00214 double best_dist = static_cast<double>(WORST_COST); 00215 int best_fnt = -1; 00216 for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) { 00217 const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt]; 00218 double mean_dist = 0; 00219 int pair_cnt = 0; 00220 00221 for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) { 00222 int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel()); 00223 if (cls_0 < 1) { 00224 continue; 00225 } 00226 // compute size code for samp 0 based on class id and position 00227 int size_code_0; 00228 if (contextual_) { 00229 size_code_0 = SizeCode(cls_0, 00230 samp_array[smp_0]->FirstChar() == 0 ? 0 : 1, 00231 samp_array[smp_0]->LastChar() == 0 ? 0 : 1); 00232 } else { 00233 size_code_0 = cls_0; 00234 } 00235 00236 int char0_height = samp_array[smp_0]->Height(); 00237 int char0_width = samp_array[smp_0]->Width(); 00238 int char0_top = samp_array[smp_0]->Top(); 00239 00240 for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) { 00241 int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel()); 00242 if (cls_1 < 1) { 00243 continue; 00244 } 00245 // compute size code for samp 0 based on class id and position 00246 int size_code_1; 00247 if (contextual_) { 00248 size_code_1 = SizeCode(cls_1, 00249 samp_array[smp_1]->FirstChar() == 0 ? 0 : 1, 00250 samp_array[smp_1]->LastChar() == 0 ? 0 : 1); 00251 } else { 00252 size_code_1 = cls_1; 00253 } 00254 double dist = PairCost( 00255 char0_width, char0_height, char0_top, samp_array[smp_1]->Width(), 00256 samp_array[smp_1]->Height(), samp_array[smp_1]->Top(), 00257 fnt_info->pair_size_info[size_code_0][size_code_1]); 00258 if (dist > 0) { 00259 mean_dist += dist; 00260 pair_cnt++; 00261 } 00262 } // smp_1 00263 } // smp_0 00264 if (pair_cnt == 0) { 00265 continue; 00266 } 00267 mean_dist /= pair_cnt; 00268 if (best_fnt == -1 || mean_dist < best_dist) { 00269 best_dist = mean_dist; 00270 best_fnt = fnt; 00271 } 00272 } 00273 if (best_fnt == -1) { 00274 return static_cast<int>(WORST_COST); 00275 } else { 00276 return static_cast<int>(best_dist); 00277 } 00278 } 00279 00280 double WordSizeModel::PairCost(int width_0, int height_0, int top_0, 00281 int width_1, int height_1, int top_1, 00282 const PairSizeInfo& pair_info) { 00283 double scale_factor = static_cast<double>(pair_info.hgt_0) / 00284 static_cast<double>(height_0); 00285 double dist = 0.0; 00286 if (scale_factor > 0) { 00287 double norm_width_0 = width_0 * scale_factor; 00288 double norm_width_1 = width_1 * scale_factor; 00289 double norm_height_1 = height_1 * scale_factor; 00290 double norm_delta_top = (top_1 - top_0) * scale_factor; 00291 00292 // accumulate the distance between the model character and the 00293 // predicted one on all dimensions of the pair 00294 dist += fabs(pair_info.wid_0 - norm_width_0); 00295 dist += fabs(pair_info.wid_1 - norm_width_1); 00296 dist += fabs(pair_info.hgt_1 - norm_height_1); 00297 dist += fabs(pair_info.delta_top - norm_delta_top); 00298 } 00299 return dist; 00300 } 00301 } // namespace tesseract