Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: cube_object.cpp 00003 * Description: Implementation of the Cube Object Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <math.h> 00021 #include "cube_object.h" 00022 #include "cube_utils.h" 00023 #include "word_list_lang_model.h" 00024 00025 namespace tesseract { 00026 CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) { 00027 Init(); 00028 char_samp_ = char_samp; 00029 cntxt_ = cntxt; 00030 } 00031 00032 CubeObject::CubeObject(CubeRecoContext *cntxt, IMAGE *img, 00033 int left, int top, int wid, int hgt) { 00034 Init(); 00035 char_samp_ = CubeUtils::CharSampleFromImg(img, left, top, wid, hgt); 00036 own_char_samp_ = true; 00037 cntxt_ = cntxt; 00038 } 00039 00040 CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix, 00041 int left, int top, int wid, int hgt) { 00042 Init(); 00043 char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt); 00044 own_char_samp_ = true; 00045 cntxt_ = cntxt; 00046 } 00047 00048 // Data member initialization function 00049 void CubeObject::Init() { 00050 char_samp_ = NULL; 00051 own_char_samp_ = false; 00052 alt_list_ = NULL; 00053 srch_obj_ = NULL; 00054 deslanted_alt_list_ = NULL; 00055 deslanted_srch_obj_ = NULL; 00056 deslanted_ = false; 00057 deslanted_char_samp_ = NULL; 00058 beam_obj_ = NULL; 00059 deslanted_beam_obj_ = NULL; 00060 cntxt_ = NULL; 00061 } 00062 00063 // Cleanup function 00064 void CubeObject::Cleanup() { 00065 if (alt_list_ != NULL) { 00066 delete alt_list_; 00067 alt_list_ = NULL; 00068 } 00069 00070 if (deslanted_alt_list_ != NULL) { 00071 delete deslanted_alt_list_; 00072 deslanted_alt_list_ = NULL; 00073 } 00074 } 00075 00076 CubeObject::~CubeObject() { 00077 if (char_samp_ != NULL && own_char_samp_ == true) { 00078 delete char_samp_; 00079 char_samp_ = NULL; 00080 } 00081 00082 if (srch_obj_ != NULL) { 00083 delete srch_obj_; 00084 srch_obj_ = NULL; 00085 } 00086 00087 if (deslanted_srch_obj_ != NULL) { 00088 delete deslanted_srch_obj_; 00089 deslanted_srch_obj_ = NULL; 00090 } 00091 00092 if (beam_obj_ != NULL) { 00093 delete beam_obj_; 00094 beam_obj_ = NULL; 00095 } 00096 00097 if (deslanted_beam_obj_ != NULL) { 00098 delete deslanted_beam_obj_; 00099 deslanted_beam_obj_ = NULL; 00100 } 00101 00102 if (deslanted_char_samp_ != NULL) { 00103 delete deslanted_char_samp_; 00104 deslanted_char_samp_ = NULL; 00105 } 00106 00107 Cleanup(); 00108 } 00109 00110 // Actually do the recognition using the specified language mode. If none 00111 // is specified, the default language model in the CubeRecoContext is used. 00112 // Returns the sorted list of alternate answers 00113 // The Word mode determines whether recognition is done as a word or a phrase 00114 WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { 00115 if (char_samp_ == NULL) { 00116 return NULL; 00117 } 00118 00119 // clear alt lists 00120 Cleanup(); 00121 00122 // no specified language model, use the one in the reco context 00123 if (lang_mod == NULL) { 00124 lang_mod = cntxt_->LangMod(); 00125 } 00126 00127 // normalize if necessary 00128 if (cntxt_->SizeNormalization()) { 00129 Normalize(); 00130 } 00131 00132 // assume not de-slanted by default 00133 deslanted_ = false; 00134 00135 // create a beam search object 00136 if (beam_obj_ == NULL) { 00137 beam_obj_ = new BeamSearch(cntxt_, word_mode); 00138 if (beam_obj_ == NULL) { 00139 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " 00140 "BeamSearch\n"); 00141 return NULL; 00142 } 00143 } 00144 00145 // create a cube search object 00146 if (srch_obj_ == NULL) { 00147 srch_obj_ = new CubeSearchObject(cntxt_, char_samp_); 00148 if (srch_obj_ == NULL) { 00149 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " 00150 "CubeSearchObject\n"); 00151 return NULL; 00152 } 00153 } 00154 00155 // run a beam search against the tesslang model 00156 alt_list_ = beam_obj_->Search(srch_obj_, lang_mod); 00157 00158 // deslant (if supported by language) and re-reco if probability is low enough 00159 if (cntxt_->HasItalics() == true && 00160 (alt_list_ == NULL || alt_list_->AltCount() < 1 || 00161 alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) { 00162 00163 if (deslanted_beam_obj_ == NULL) { 00164 deslanted_beam_obj_ = new BeamSearch(cntxt_); 00165 if (deslanted_beam_obj_ == NULL) { 00166 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00167 "construct deslanted BeamSearch\n"); 00168 return false; 00169 } 00170 } 00171 00172 if (deslanted_srch_obj_ == NULL) { 00173 deslanted_char_samp_ = char_samp_->Clone(); 00174 if (deslanted_char_samp_ == NULL) { 00175 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00176 "construct deslanted CharSamp\n"); 00177 return NULL; 00178 } 00179 00180 if (deslanted_char_samp_->Deslant() == false) { 00181 return NULL; 00182 } 00183 00184 deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_); 00185 if (deslanted_srch_obj_ == NULL) { 00186 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00187 "construct deslanted CubeSearchObject\n"); 00188 return NULL; 00189 } 00190 } 00191 00192 // run a beam search against the tesslang model 00193 deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_, 00194 lang_mod); 00195 // should we use de-slanted altlist? 00196 if (deslanted_alt_list_ != NULL && deslanted_alt_list_->AltCount() > 0) { 00197 if (alt_list_ == NULL || alt_list_->AltCount() < 1 || 00198 deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) { 00199 deslanted_ = true; 00200 return deslanted_alt_list_; 00201 } 00202 } 00203 } 00204 00205 return alt_list_; 00206 } 00207 00208 // Recognize the member char sample as a word 00209 WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) { 00210 return Recognize(lang_mod, true); 00211 } 00212 00213 // Recognize the member char sample as a word 00214 WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) { 00215 return Recognize(lang_mod, false); 00216 } 00217 00218 // Computes the cost of a specific string. This is done by performing 00219 // recognition of a language model that allows only the specified word 00220 int CubeObject::WordCost(const char *str) { 00221 WordListLangModel *lang_mod = new WordListLangModel(cntxt_); 00222 if (lang_mod == NULL) { 00223 return WORST_COST; 00224 } 00225 00226 if (lang_mod->AddString(str) == false) { 00227 delete lang_mod; 00228 return WORST_COST; 00229 } 00230 00231 // run a beam search against the single string wordlist model 00232 WordAltList *alt_list = RecognizeWord(lang_mod); 00233 delete lang_mod; 00234 00235 int cost = WORST_COST; 00236 if (alt_list != NULL) { 00237 if (alt_list->AltCount() > 0) { 00238 cost = alt_list->AltCost(0); 00239 } 00240 } 00241 00242 return cost; 00243 } 00244 00245 // Recognizes a single character and returns the list of results. 00246 CharAltList *CubeObject::RecognizeChar() { 00247 if (char_samp_ == NULL) return NULL; 00248 CharAltList* alt_list = NULL; 00249 CharClassifier *char_classifier = cntxt_->Classifier(); 00250 ASSERT_HOST(char_classifier != NULL); 00251 alt_list = char_classifier->Classify(char_samp_); 00252 return alt_list; 00253 } 00254 00255 // Normalize the input word bitmap to have a minimum aspect ratio 00256 bool CubeObject::Normalize() { 00257 // create a cube search object 00258 CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_); 00259 if (srch_obj == NULL) { 00260 return false; 00261 } 00262 // Perform over-segmentation 00263 int seg_cnt = srch_obj->SegPtCnt(); 00264 // Only perform normalization if segment count is large enough 00265 if (seg_cnt < kMinNormalizationSegmentCnt) { 00266 delete srch_obj; 00267 return true; 00268 } 00269 // compute the mean AR of the segments 00270 double ar_mean = 0.0; 00271 for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) { 00272 CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx); 00273 if (seg_samp != NULL && seg_samp->Width() > 0) { 00274 ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width()); 00275 } 00276 } 00277 ar_mean /= (seg_cnt + 1); 00278 // perform normalization if segment AR is too high 00279 if (ar_mean > kMinNormalizationAspectRatio) { 00280 // scale down the image in the y-direction to attain AR 00281 CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(), 00282 2.0 * char_samp_->Height() / ar_mean, 00283 false); 00284 if (new_samp != NULL) { 00285 // free existing char samp if owned 00286 if (own_char_samp_) { 00287 delete char_samp_; 00288 } 00289 // update with new scaled charsamp and set ownership flag 00290 char_samp_ = new_samp; 00291 own_char_samp_ = true; 00292 } 00293 } 00294 delete srch_obj; 00295 return true; 00296 } 00297 }