Tesseract  3.02
tesseract-ocr/cube/cube_object.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_object.cpp
00003  * Description: Implementation of the Cube Object Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <math.h>
00021 #include "cube_object.h"
00022 #include "cube_utils.h"
00023 #include "word_list_lang_model.h"
00024 
00025 namespace tesseract {
00026 CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) {
00027   Init();
00028   char_samp_ = char_samp;
00029   cntxt_ = cntxt;
00030 }
00031 
00032 CubeObject::CubeObject(CubeRecoContext *cntxt, IMAGE *img,
00033                        int left, int top, int wid, int hgt) {
00034   Init();
00035   char_samp_ = CubeUtils::CharSampleFromImg(img, left, top, wid, hgt);
00036   own_char_samp_ = true;
00037   cntxt_ = cntxt;
00038 }
00039 
00040 CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix,
00041                        int left, int top, int wid, int hgt) {
00042   Init();
00043   char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt);
00044   own_char_samp_ = true;
00045   cntxt_ = cntxt;
00046 }
00047 
00048 // Data member initialization function
00049 void CubeObject::Init() {
00050   char_samp_ = NULL;
00051   own_char_samp_ = false;
00052   alt_list_ = NULL;
00053   srch_obj_ = NULL;
00054   deslanted_alt_list_ = NULL;
00055   deslanted_srch_obj_ = NULL;
00056   deslanted_ = false;
00057   deslanted_char_samp_ = NULL;
00058   beam_obj_ = NULL;
00059   deslanted_beam_obj_ = NULL;
00060   cntxt_ = NULL;
00061 }
00062 
00063 // Cleanup function
00064 void CubeObject::Cleanup() {
00065   if (alt_list_ != NULL) {
00066     delete alt_list_;
00067     alt_list_ = NULL;
00068   }
00069 
00070   if (deslanted_alt_list_ != NULL) {
00071     delete deslanted_alt_list_;
00072     deslanted_alt_list_ = NULL;
00073   }
00074 }
00075 
00076 CubeObject::~CubeObject() {
00077   if (char_samp_ != NULL && own_char_samp_ == true) {
00078     delete char_samp_;
00079     char_samp_ = NULL;
00080   }
00081 
00082   if (srch_obj_ != NULL) {
00083     delete srch_obj_;
00084     srch_obj_ = NULL;
00085   }
00086 
00087   if (deslanted_srch_obj_ != NULL) {
00088     delete deslanted_srch_obj_;
00089     deslanted_srch_obj_ = NULL;
00090   }
00091 
00092   if (beam_obj_ != NULL) {
00093     delete beam_obj_;
00094     beam_obj_ = NULL;
00095   }
00096 
00097   if (deslanted_beam_obj_ != NULL) {
00098     delete deslanted_beam_obj_;
00099     deslanted_beam_obj_ = NULL;
00100   }
00101 
00102   if (deslanted_char_samp_ != NULL) {
00103     delete deslanted_char_samp_;
00104     deslanted_char_samp_ = NULL;
00105   }
00106 
00107   Cleanup();
00108 }
00109 
00110 // Actually do the recognition using the specified language mode. If none
00111 // is specified, the default language model in the CubeRecoContext is used.
00112 // Returns the sorted list of alternate answers
00113 // The Word mode determines whether recognition is done as a word or a phrase
00114 WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
00115   if (char_samp_ == NULL) {
00116     return NULL;
00117   }
00118 
00119   // clear alt lists
00120   Cleanup();
00121 
00122   // no specified language model, use the one in the reco context
00123   if (lang_mod == NULL) {
00124     lang_mod = cntxt_->LangMod();
00125   }
00126 
00127   // normalize if necessary
00128   if (cntxt_->SizeNormalization()) {
00129     Normalize();
00130   }
00131 
00132   // assume not de-slanted by default
00133   deslanted_ = false;
00134 
00135   // create a beam search object
00136   if (beam_obj_ == NULL) {
00137     beam_obj_ = new BeamSearch(cntxt_, word_mode);
00138     if (beam_obj_ == NULL) {
00139       fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct "
00140               "BeamSearch\n");
00141       return NULL;
00142     }
00143   }
00144 
00145   // create a cube search object
00146   if (srch_obj_ == NULL) {
00147     srch_obj_ = new CubeSearchObject(cntxt_, char_samp_);
00148     if (srch_obj_ == NULL) {
00149       fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct "
00150               "CubeSearchObject\n");
00151       return NULL;
00152     }
00153   }
00154 
00155   // run a beam search against the tesslang model
00156   alt_list_ = beam_obj_->Search(srch_obj_, lang_mod);
00157 
00158   // deslant (if supported by language) and re-reco if probability is low enough
00159   if (cntxt_->HasItalics() == true &&
00160       (alt_list_ == NULL || alt_list_->AltCount() < 1 ||
00161        alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) {
00162 
00163     if (deslanted_beam_obj_ == NULL) {
00164       deslanted_beam_obj_ = new BeamSearch(cntxt_);
00165       if (deslanted_beam_obj_ == NULL) {
00166         fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
00167                 "construct deslanted BeamSearch\n");
00168         return false;
00169       }
00170     }
00171 
00172     if (deslanted_srch_obj_ == NULL) {
00173       deslanted_char_samp_ = char_samp_->Clone();
00174       if (deslanted_char_samp_ == NULL) {
00175         fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
00176                 "construct deslanted CharSamp\n");
00177         return NULL;
00178       }
00179 
00180       if (deslanted_char_samp_->Deslant() == false) {
00181         return NULL;
00182       }
00183 
00184       deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_);
00185       if (deslanted_srch_obj_ == NULL) {
00186         fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
00187                 "construct deslanted CubeSearchObject\n");
00188         return NULL;
00189       }
00190     }
00191 
00192     // run a beam search against the tesslang model
00193     deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_,
00194                                                       lang_mod);
00195     // should we use de-slanted altlist?
00196     if (deslanted_alt_list_ != NULL &&  deslanted_alt_list_->AltCount() > 0) {
00197       if (alt_list_ == NULL || alt_list_->AltCount() < 1 ||
00198           deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) {
00199         deslanted_ = true;
00200         return deslanted_alt_list_;
00201       }
00202     }
00203   }
00204 
00205   return alt_list_;
00206 }
00207 
00208 // Recognize the member char sample as a word
00209 WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) {
00210   return Recognize(lang_mod, true);
00211 }
00212 
00213 // Recognize the member char sample as a word
00214 WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
00215   return Recognize(lang_mod, false);
00216 }
00217 
00218 // Computes the cost of a specific string. This is done by performing
00219 // recognition of a language model that allows only the specified word
00220 int CubeObject::WordCost(const char *str) {
00221   WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
00222   if (lang_mod == NULL) {
00223     return WORST_COST;
00224   }
00225 
00226   if (lang_mod->AddString(str) == false) {
00227     delete lang_mod;
00228     return WORST_COST;
00229   }
00230 
00231   // run a beam search against the single string wordlist model
00232   WordAltList *alt_list = RecognizeWord(lang_mod);
00233   delete lang_mod;
00234 
00235   int cost = WORST_COST;
00236   if (alt_list != NULL) {
00237     if (alt_list->AltCount() > 0) {
00238       cost = alt_list->AltCost(0);
00239     }
00240   }
00241 
00242   return cost;
00243 }
00244 
00245 // Recognizes a single character and returns the list of results.
00246 CharAltList *CubeObject::RecognizeChar() {
00247   if (char_samp_ == NULL) return NULL;
00248   CharAltList* alt_list = NULL;
00249   CharClassifier *char_classifier = cntxt_->Classifier();
00250   ASSERT_HOST(char_classifier != NULL);
00251   alt_list = char_classifier->Classify(char_samp_);
00252   return alt_list;
00253 }
00254 
00255 // Normalize the input word bitmap to have a minimum aspect ratio
00256 bool CubeObject::Normalize() {
00257   // create a cube search object
00258   CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_);
00259   if (srch_obj == NULL) {
00260     return false;
00261   }
00262   // Perform over-segmentation
00263   int seg_cnt = srch_obj->SegPtCnt();
00264   // Only perform normalization if segment count is large enough
00265   if (seg_cnt < kMinNormalizationSegmentCnt) {
00266     delete srch_obj;
00267     return true;
00268   }
00269   // compute the mean AR of the segments
00270   double ar_mean = 0.0;
00271   for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) {
00272     CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx);
00273     if (seg_samp != NULL && seg_samp->Width() > 0) {
00274       ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width());
00275     }
00276   }
00277   ar_mean /= (seg_cnt + 1);
00278   // perform normalization if segment AR is too high
00279   if (ar_mean > kMinNormalizationAspectRatio) {
00280     // scale down the image in the y-direction to attain AR
00281     CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(),
00282                                            2.0 * char_samp_->Height() / ar_mean,
00283                                            false);
00284     if (new_samp != NULL) {
00285       // free existing char samp if owned
00286       if (own_char_samp_) {
00287         delete char_samp_;
00288       }
00289       // update with new scaled charsamp and set ownership flag
00290       char_samp_ = new_samp;
00291       own_char_samp_ = true;
00292     }
00293   }
00294   delete srch_obj;
00295   return true;
00296 }
00297 }