Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: cube_search_object.cpp 00003 * Description: Implementation of the Cube Search Object Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "cube_search_object.h" 00021 #include "cube_utils.h" 00022 #include "ndminx.h" 00023 00024 namespace tesseract { 00025 00026 const bool CubeSearchObject::kUseCroppedChars = true; 00027 00028 CubeSearchObject::CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp) 00029 : SearchObject(cntxt) { 00030 init_ = false; 00031 reco_cache_ = NULL; 00032 samp_cache_ = NULL; 00033 segments_ = NULL; 00034 segment_cnt_ = 0; 00035 samp_ = samp; 00036 left_ = 0; 00037 itop_ = 0; 00038 space_cost_ = NULL; 00039 no_space_cost_ = NULL; 00040 wid_ = samp_->Width(); 00041 hgt_ = samp_->Height(); 00042 max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar(); 00043 rtl_ = (cntxt_->ReadingOrder() == CubeRecoContext::R2L); 00044 min_spc_gap_ = 00045 static_cast<int>(hgt_ * cntxt_->Params()->MinSpaceHeightRatio()); 00046 max_spc_gap_ = 00047 static_cast<int>(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio()); 00048 } 00049 00050 CubeSearchObject::~CubeSearchObject() { 00051 Cleanup(); 00052 } 00053 00054 // Cleanup 00055 void CubeSearchObject::Cleanup() { 00056 // delete Recognition Cache 00057 if (reco_cache_) { 00058 for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) { 00059 if (reco_cache_[strt_seg]) { 00060 for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) { 00061 if (reco_cache_[strt_seg][end_seg]) { 00062 delete reco_cache_[strt_seg][end_seg]; 00063 } 00064 } 00065 delete []reco_cache_[strt_seg]; 00066 } 00067 } 00068 delete []reco_cache_; 00069 reco_cache_ = NULL; 00070 } 00071 00072 // delete CharSamp Cache 00073 if (samp_cache_) { 00074 for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) { 00075 if (samp_cache_[strt_seg]) { 00076 for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) { 00077 if (samp_cache_[strt_seg][end_seg]) { 00078 delete samp_cache_[strt_seg][end_seg]; 00079 } 00080 } 00081 delete []samp_cache_[strt_seg]; 00082 } 00083 } 00084 delete []samp_cache_; 00085 samp_cache_ = NULL; 00086 } 00087 00088 // delete segment list 00089 if (segments_) { 00090 for (int seg = 0; seg < segment_cnt_; seg++) { 00091 if (segments_[seg]) { 00092 delete segments_[seg]; 00093 } 00094 } 00095 delete []segments_; 00096 segments_ = NULL; 00097 } 00098 00099 if (space_cost_) { 00100 delete []space_cost_; 00101 space_cost_ = NULL; 00102 } 00103 00104 if (no_space_cost_) { 00105 delete []no_space_cost_; 00106 no_space_cost_ = NULL; 00107 } 00108 00109 segment_cnt_ = 0; 00110 init_ = false; 00111 } 00112 00113 // # of segmentation points. One less than the count of segments 00114 int CubeSearchObject::SegPtCnt() { 00115 if (!init_ && !Init()) 00116 return -1; 00117 return segment_cnt_ - 1; 00118 } 00119 00120 // init and allocate variables, perform segmentation 00121 bool CubeSearchObject::Init() { 00122 if (init_) 00123 return true; 00124 if (!Segment()) { 00125 return false; 00126 } 00127 00128 // init cache 00129 reco_cache_ = new CharAltList **[segment_cnt_]; 00130 if (reco_cache_ == NULL) { 00131 fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " 00132 "allocate CharAltList array\n"); 00133 return false; 00134 } 00135 00136 samp_cache_ = new CharSamp **[segment_cnt_]; 00137 if (samp_cache_ == NULL) { 00138 fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " 00139 "allocate CharSamp array\n"); 00140 return false; 00141 } 00142 00143 for (int seg = 0; seg < segment_cnt_; seg++) { 00144 reco_cache_[seg] = new CharAltList *[segment_cnt_]; 00145 if (reco_cache_[seg] == NULL) { 00146 fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " 00147 "allocate a single segment's CharAltList array\n"); 00148 return false; 00149 } 00150 00151 memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg])); 00152 00153 samp_cache_[seg] = new CharSamp *[segment_cnt_]; 00154 if (samp_cache_[seg] == NULL) { 00155 fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " 00156 "allocate a single segment's CharSamp array\n"); 00157 return false; 00158 } 00159 00160 memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg])); 00161 } 00162 00163 init_ = true; 00164 return true; 00165 } 00166 00167 // returns a char sample corresponding to the bitmap between 2 seg pts 00168 CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) { 00169 // init if necessary 00170 if (!init_ && !Init()) 00171 return NULL; 00172 // validate segment range 00173 if (!IsValidSegmentRange(start_pt, end_pt)) 00174 return NULL; 00175 00176 // look for the samp in the cache 00177 if (samp_cache_ && samp_cache_[start_pt + 1] && 00178 samp_cache_[start_pt + 1][end_pt]) { 00179 return samp_cache_[start_pt + 1][end_pt]; 00180 } 00181 // create a char samp object from the specified range of segments 00182 bool left_most; 00183 bool right_most; 00184 CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1, 00185 end_pt - start_pt, NULL, 00186 &left_most, &right_most, hgt_); 00187 if (!samp) 00188 return NULL; 00189 00190 if (kUseCroppedChars) { 00191 CharSamp *cropped_samp = samp->Crop(); 00192 // we no longer need the orig sample 00193 delete samp; 00194 if (!cropped_samp) 00195 return NULL; 00196 samp = cropped_samp; 00197 } 00198 00199 // get the dimensions of the new cropped sample 00200 int char_top = samp->Top(); 00201 int char_wid = samp->Width(); 00202 int char_hgt = samp->Height(); 00203 00204 // for cursive languages, these features correspond to whether 00205 // the charsamp is at the beginning or end of conncomp 00206 if (cntxt_->Cursive() == true) { 00207 // first and last char flags depend on reading order 00208 bool first_char = rtl_ ? right_most : left_most; 00209 bool last_char = rtl_ ? left_most : right_most; 00210 00211 samp->SetFirstChar(first_char ? 255 : 0); 00212 samp->SetLastChar(last_char ? 255 : 0); 00213 } else { 00214 // for non cursive languages, these features correspond 00215 // to whether the charsamp is at the begining or end of the word 00216 samp->SetFirstChar((start_pt == -1) ? 255 : 0); 00217 samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0); 00218 } 00219 samp->SetNormTop(255 * char_top / hgt_); 00220 samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_); 00221 samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt)); 00222 00223 // add to cache & return 00224 samp_cache_[start_pt + 1][end_pt] = samp; 00225 return samp; 00226 } 00227 00228 Box *CubeSearchObject::CharBox(int start_pt, int end_pt) { 00229 if (!init_ && !Init()) 00230 return NULL; 00231 if (!IsValidSegmentRange(start_pt, end_pt)) { 00232 fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid " 00233 "segment range (%d, %d)\n", start_pt, end_pt); 00234 return NULL; 00235 } 00236 00237 // create a char samp object from the specified range of segments, 00238 // extract its dimensions into a leptonica box, and delete it 00239 bool left_most; 00240 bool right_most; 00241 CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1, 00242 end_pt - start_pt, NULL, 00243 &left_most, &right_most, hgt_); 00244 if (!samp) 00245 return NULL; 00246 if (kUseCroppedChars) { 00247 CharSamp *cropped_samp = samp->Crop(); 00248 delete samp; 00249 if (!cropped_samp) { 00250 return NULL; 00251 } 00252 samp = cropped_samp; 00253 } 00254 Box *box = boxCreate(samp->Left(), samp->Top(), 00255 samp->Width(), samp->Height()); 00256 delete samp; 00257 return box; 00258 } 00259 00260 // call from Beam Search to return the alt list corresponding to 00261 // recognizing the bitmap between two segmentation pts 00262 CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) { 00263 // init if necessary 00264 if (!init_ && !Init()) { 00265 fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could " 00266 "not initialize CubeSearchObject\n"); 00267 return NULL; 00268 } 00269 00270 // validate segment range 00271 if (!IsValidSegmentRange(start_pt, end_pt)) { 00272 fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid " 00273 "segment range (%d, %d)\n", start_pt, end_pt); 00274 return NULL; 00275 } 00276 00277 // look for the recognition results in cache in the cache 00278 if (reco_cache_ && reco_cache_[start_pt + 1] && 00279 reco_cache_[start_pt + 1][end_pt]) { 00280 return reco_cache_[start_pt + 1][end_pt]; 00281 } 00282 00283 // create the char sample corresponding to the blob 00284 CharSamp *samp = CharSample(start_pt, end_pt); 00285 if (!samp) { 00286 fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could " 00287 "not construct CharSamp\n"); 00288 return NULL; 00289 } 00290 00291 // recognize the char sample 00292 CharClassifier *char_classifier = cntxt_->Classifier(); 00293 if (char_classifier) { 00294 reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp); 00295 } else { 00296 // no classifer: all characters are equally probable; add a penalty 00297 // that favors 2-segment characters and aspect ratios (w/h) > 1 00298 fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube " 00299 "context has no character classifier!! Inventing a probability " 00300 "distribution.\n"); 00301 int class_cnt = cntxt_->CharacterSet()->ClassCount(); 00302 CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt); 00303 int seg_cnt = end_pt - start_pt; 00304 double prob_val = (1.0 / class_cnt) * 00305 exp(-abs(seg_cnt - 2.0)) * 00306 exp(-samp->Width() / static_cast<double>(samp->Height())); 00307 00308 if (alt_list) { 00309 for (int class_idx = 0; class_idx < class_cnt; class_idx++) { 00310 alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val)); 00311 } 00312 reco_cache_[start_pt + 1][end_pt] = alt_list; 00313 } 00314 } 00315 00316 return reco_cache_[start_pt + 1][end_pt]; 00317 } 00318 00319 // Perform segmentation of the bitmap by detecting connected components, 00320 // segmenting each connected component using windowed vertical pixel density 00321 // histogram and sorting the resulting segments in reading order 00322 bool CubeSearchObject::Segment() { 00323 if (!samp_) 00324 return false; 00325 segment_cnt_ = 0; 00326 segments_ = samp_->Segment(&segment_cnt_, rtl_, 00327 cntxt_->Params()->HistWindWid(), 00328 cntxt_->Params()->MinConCompSize()); 00329 if (!segments_ || segment_cnt_ <= 0) { 00330 return false; 00331 } 00332 if (segment_cnt_ >= kMaxSegmentCnt) { 00333 return false; 00334 } 00335 return true; 00336 } 00337 00338 // computes the space and no space costs at gaps between segments 00339 bool CubeSearchObject::ComputeSpaceCosts() { 00340 // init if necessary 00341 if (!init_ && !Init()) 00342 return false; 00343 00344 // Already computed 00345 if (space_cost_) 00346 return true; 00347 00348 // No segmentation points 00349 if (segment_cnt_ < 2) 00350 return false; 00351 00352 // Compute the maximum x to the left of and minimum x to the right of each 00353 // segmentation point 00354 int *max_left_x = new int[segment_cnt_ - 1]; 00355 int *min_right_x = new int[segment_cnt_ - 1]; 00356 if (!max_left_x || !min_right_x) { 00357 delete []min_right_x; 00358 delete []max_left_x; 00359 return false; 00360 } 00361 if (rtl_) { 00362 min_right_x[0] = segments_[0]->Left(); 00363 max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right(); 00364 for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) { 00365 min_right_x[pt_idx] = 00366 MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left()); 00367 max_left_x[segment_cnt_ - pt_idx - 2] = 00368 MAX(max_left_x[segment_cnt_ - pt_idx - 1], 00369 segments_[segment_cnt_ - pt_idx - 1]->Right()); 00370 } 00371 } else { 00372 min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left(); 00373 max_left_x[0] = segments_[0]->Right(); 00374 for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) { 00375 min_right_x[segment_cnt_ - pt_idx - 2] = 00376 MIN(min_right_x[segment_cnt_ - pt_idx - 1], 00377 segments_[segment_cnt_ - pt_idx - 1]->Left()); 00378 max_left_x[pt_idx] = 00379 MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right()); 00380 } 00381 } 00382 00383 // Allocate memory for space and no space costs 00384 // trivial cases 00385 space_cost_ = new int[segment_cnt_ - 1]; 00386 no_space_cost_ = new int[segment_cnt_ - 1]; 00387 if (!space_cost_ || !no_space_cost_) { 00388 delete []min_right_x; 00389 delete []max_left_x; 00390 return false; 00391 } 00392 00393 // go through all segmentation points determining the horizontal gap between 00394 // the images on both sides of each break points. Use the gap to estimate 00395 // the probability of a space. The probability is modeled a linear function 00396 // of the gap width 00397 for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) { 00398 // determine the gap at the segmentation point 00399 int gap = min_right_x[pt_idx] - max_left_x[pt_idx]; 00400 float prob = 0.0; 00401 00402 // gap is too small => no space 00403 if (gap < min_spc_gap_) { 00404 prob = 0.0; 00405 } else if (gap > max_spc_gap_) { 00406 // gap is too big => definite space 00407 prob = 1.0; 00408 } else { 00409 // gap is somewhere in between, compute probability 00410 prob = (gap - min_spc_gap_) / 00411 static_cast<double>(max_spc_gap_ - min_spc_gap_); 00412 } 00413 00414 // compute cost of space and non-space 00415 space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) + 00416 CubeUtils::Prob2Cost(0.1); 00417 no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob); 00418 } 00419 00420 delete []min_right_x; 00421 delete []max_left_x; 00422 00423 return true; 00424 } 00425 00426 // Returns the cost of having a space before the specified segmentation point 00427 int CubeSearchObject::SpaceCost(int pt_idx) { 00428 if (!space_cost_ && !ComputeSpaceCosts()) { 00429 // Failed to compute costs return a zero prob 00430 return CubeUtils::Prob2Cost(0.0); 00431 } 00432 return space_cost_[pt_idx]; 00433 } 00434 00435 // Returns the cost of not having a space before the specified 00436 // segmentation point 00437 int CubeSearchObject::NoSpaceCost(int pt_idx) { 00438 // If failed to compute costs, return a 1.0 prob 00439 if (!space_cost_ && !ComputeSpaceCosts()) 00440 return CubeUtils::Prob2Cost(0.0); 00441 return no_space_cost_[pt_idx]; 00442 } 00443 00444 // Returns the cost of not having any spaces within the specified range 00445 // of segmentation points 00446 int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) { 00447 // If fail to compute costs, return a 1.0 prob 00448 if (!space_cost_ && !ComputeSpaceCosts()) 00449 return CubeUtils::Prob2Cost(1.0); 00450 int no_spc_cost = 0; 00451 for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++) 00452 no_spc_cost += NoSpaceCost(pt_idx); 00453 return no_spc_cost; 00454 } 00455 }