Tesseract  3.02
tesseract-ocr/cube/word_size_model.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        word_size_model.cpp
00003  * Description: Implementation of the Word Size Model Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <math.h>
00021 #include <string>
00022 #include <vector>
00023 #include "word_size_model.h"
00024 #include "cube_utils.h"
00025 
00026 namespace tesseract {
00027 
00028 WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) {
00029   char_set_ = char_set;
00030   contextual_ = contextual;
00031 }
00032 
00033 WordSizeModel::~WordSizeModel() {
00034   for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
00035     FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];
00036     delete []fnt_info.pair_size_info[0];
00037     delete []fnt_info.pair_size_info;
00038   }
00039 }
00040 
00041 WordSizeModel *WordSizeModel::Create(const string &data_file_path,
00042                                      const string &lang,
00043                                      CharSet *char_set,
00044                                      bool contextual) {
00045   WordSizeModel *obj = new WordSizeModel(char_set, contextual);
00046   if (!obj) {
00047     fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate "
00048             "new word size model object\n");
00049     return NULL;
00050   }
00051 
00052   if (!obj->Init(data_file_path, lang)) {
00053     delete obj;
00054     return NULL;
00055   }
00056   return obj;
00057 }
00058 
00059 bool WordSizeModel::Init(const string &data_file_path, const string &lang) {
00060   string stats_file_name;
00061   stats_file_name = data_file_path + lang;
00062   stats_file_name += ".cube.size";
00063 
00064   // read file to memory
00065   string str_data;
00066 
00067   if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) {
00068     return false;
00069   }
00070 
00071   // split to words
00072   vector<string> tokens;
00073   CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens);
00074   if (tokens.size() < 1) {
00075     fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid "
00076             "file contents: %s\n", stats_file_name.c_str());
00077     return false;
00078   }
00079 
00080   font_pair_size_models_.clear();
00081 
00082   // token count per line depends on whether the language is contextual or not
00083   int token_cnt = contextual_ ?
00084       (kExpectedTokenCount + 4) : kExpectedTokenCount;
00085   // the count of size classes depends on whether the language is contextual
00086   // or not. For non contextual languages (Ex: Eng), it is equal to the class
00087   // count. For contextual languages (Ex: Ara), it is equal to the class count
00088   // multiplied by the position count (4: start, middle, final, isolated)
00089   int size_class_cnt = contextual_ ?
00090       (char_set_->ClassCount() * 4) : char_set_->ClassCount();
00091   string fnt_name = "";
00092 
00093   for (int tok = 0; tok < tokens.size(); tok += token_cnt) {
00094     // a new font, write the old font data and re-init
00095     if (tok == 0 || fnt_name != tokens[tok]) {
00096       FontPairSizeInfo fnt_info;
00097 
00098       fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt];
00099       if (!fnt_info.pair_size_info) {
00100         fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allcoating "
00101                 "memory for font pair size info\n");
00102         return false;
00103       }
00104 
00105       fnt_info.pair_size_info[0] =
00106           new PairSizeInfo[size_class_cnt * size_class_cnt];
00107       if (!fnt_info.pair_size_info[0]) {
00108         fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allocating "
00109                 "memory for font pair size info\n");
00110         return false;
00111       }
00112 
00113       memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
00114              sizeof(PairSizeInfo));
00115 
00116       for (int cls = 1; cls < size_class_cnt; cls++) {
00117         fnt_info.pair_size_info[cls] =
00118             fnt_info.pair_size_info[cls - 1] + size_class_cnt;
00119       }
00120 
00121       // strip out path and extension
00122       string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.'));
00123       string::size_type strt_pos = stripped_font_name.find_last_of("/\\");
00124       if (strt_pos != string::npos) {
00125         fnt_info.font_name = stripped_font_name.substr(strt_pos);
00126       } else {
00127         fnt_info.font_name = stripped_font_name;
00128       }
00129       font_pair_size_models_.push_back(fnt_info);
00130     }
00131 
00132     // parse the data
00133     int cls_0;
00134     int cls_1;
00135     double delta_top;
00136     double wid_0;
00137     double hgt_0;
00138     double wid_1;
00139     double hgt_1;
00140     int size_code_0;
00141     int size_code_1;
00142 
00143     // read and parse the tokens
00144     if (contextual_) {
00145       int start_0;
00146       int end_0;
00147       int start_1;
00148       int end_1;
00149       // The expected format for a character size bigram is as follows:
00150       // ClassId0<delim>Start-flag0<delim>End-flag0<delim>String0(ignored)
00151       // Width0<delim>Height0<delim>
00152       // ClassId1<delim>Start-flag1<delim>End-flag1<delim>String1(ignored)
00153       // HeightDelta<delim>Width1<delim>Height0<delim>
00154       // In case of non-contextual languages, the Start and End flags are
00155       // omitted
00156       if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
00157           sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 ||
00158           sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 ||
00159           sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 ||
00160           sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 ||
00161           sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 ||
00162           sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 ||
00163           sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 ||
00164           sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 ||
00165           sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 ||
00166           sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 ||
00167           (start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) ||
00168           (start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) {
00169         fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
00170                 "line %d\n", 1 + (tok / token_cnt));
00171         return false;
00172       }
00173       size_code_0 = SizeCode(cls_0, start_0, end_0);
00174       size_code_1 = SizeCode(cls_1, start_1, end_1);
00175     } else {
00176       if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
00177           sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 ||
00178           sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 ||
00179           sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 ||
00180           sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 ||
00181           sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 ||
00182           sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) {
00183         fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
00184                 "line %d\n", 1 + (tok / token_cnt));
00185         return false;
00186       }
00187       size_code_0 = cls_0;
00188       size_code_1 = cls_1;
00189     }
00190 
00191     // copy the data to the size tables
00192     FontPairSizeInfo fnt_info = font_pair_size_models_.back();
00193     fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =
00194         static_cast<int>(delta_top * kShapeModelScale);
00195     fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =
00196         static_cast<int>(wid_0 * kShapeModelScale);
00197     fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =
00198         static_cast<int>(hgt_0 * kShapeModelScale);
00199     fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =
00200         static_cast<int>(wid_1 * kShapeModelScale);
00201     fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =
00202         static_cast<int>(hgt_1 * kShapeModelScale);
00203 
00204     fnt_name = tokens[tok];
00205   }
00206 
00207   return true;
00208 }
00209 
00210 int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const {
00211   if (samp_cnt < 2) {
00212     return 0;
00213   }
00214   double best_dist = static_cast<double>(WORST_COST);
00215   int best_fnt = -1;
00216   for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
00217     const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];
00218     double mean_dist = 0;
00219     int pair_cnt = 0;
00220 
00221     for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
00222       int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());
00223       if (cls_0 < 1) {
00224         continue;
00225       }
00226       // compute size code for samp 0 based on class id and position
00227       int size_code_0;
00228       if (contextual_) {
00229         size_code_0 = SizeCode(cls_0,
00230                                samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
00231                                samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
00232       } else {
00233         size_code_0 = cls_0;
00234       }
00235 
00236       int char0_height = samp_array[smp_0]->Height();
00237       int char0_width = samp_array[smp_0]->Width();
00238       int char0_top = samp_array[smp_0]->Top();
00239 
00240       for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
00241         int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());
00242         if (cls_1 < 1) {
00243           continue;
00244         }
00245         // compute size code for samp 0 based on class id and position
00246         int size_code_1;
00247         if (contextual_) {
00248           size_code_1 = SizeCode(cls_1,
00249                                  samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
00250                                  samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
00251         } else {
00252           size_code_1 = cls_1;
00253         }
00254         double dist = PairCost(
00255             char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
00256             samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
00257             fnt_info->pair_size_info[size_code_0][size_code_1]);
00258         if (dist > 0) {
00259           mean_dist += dist;
00260           pair_cnt++;
00261         }
00262       }  // smp_1
00263     }  // smp_0
00264     if (pair_cnt == 0) {
00265       continue;
00266     }
00267     mean_dist /= pair_cnt;
00268     if (best_fnt == -1 || mean_dist < best_dist) {
00269       best_dist = mean_dist;
00270       best_fnt = fnt;
00271     }
00272   }
00273   if (best_fnt == -1) {
00274     return static_cast<int>(WORST_COST);
00275   } else {
00276     return static_cast<int>(best_dist);
00277   }
00278 }
00279 
00280 double WordSizeModel::PairCost(int width_0, int height_0, int top_0,
00281                                int width_1, int height_1, int top_1,
00282                                const PairSizeInfo& pair_info) {
00283   double scale_factor = static_cast<double>(pair_info.hgt_0) /
00284       static_cast<double>(height_0);
00285   double dist = 0.0;
00286   if (scale_factor > 0) {
00287     double norm_width_0 = width_0 * scale_factor;
00288     double norm_width_1 = width_1 * scale_factor;
00289     double norm_height_1 = height_1 * scale_factor;
00290     double norm_delta_top = (top_1 - top_0) * scale_factor;
00291 
00292     // accumulate the distance between the model character and the
00293     // predicted one on all dimensions of the pair
00294     dist += fabs(pair_info.wid_0 - norm_width_0);
00295     dist += fabs(pair_info.wid_1 - norm_width_1);
00296     dist += fabs(pair_info.hgt_1 - norm_height_1);
00297     dist += fabs(pair_info.delta_top - norm_delta_top);
00298   }
00299   return dist;
00300 }
00301 }  // namespace tesseract