Tesseract  3.02
tesseract-ocr/cube/cube_search_object.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_search_object.cpp
00003  * Description: Implementation of the Cube Search Object Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "cube_search_object.h"
00021 #include "cube_utils.h"
00022 #include "ndminx.h"
00023 
00024 namespace tesseract {
00025 
00026 const bool CubeSearchObject::kUseCroppedChars = true;
00027 
00028 CubeSearchObject::CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp)
00029     : SearchObject(cntxt) {
00030   init_ = false;
00031   reco_cache_ = NULL;
00032   samp_cache_ = NULL;
00033   segments_ = NULL;
00034   segment_cnt_ = 0;
00035   samp_ = samp;
00036   left_ = 0;
00037   itop_ = 0;
00038   space_cost_ = NULL;
00039   no_space_cost_ = NULL;
00040   wid_ = samp_->Width();
00041   hgt_ = samp_->Height();
00042   max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar();
00043   rtl_ = (cntxt_->ReadingOrder() == CubeRecoContext::R2L);
00044   min_spc_gap_ =
00045       static_cast<int>(hgt_ * cntxt_->Params()->MinSpaceHeightRatio());
00046   max_spc_gap_ =
00047       static_cast<int>(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio());
00048 }
00049 
00050 CubeSearchObject::~CubeSearchObject() {
00051   Cleanup();
00052 }
00053 
00054 // Cleanup
00055 void CubeSearchObject::Cleanup() {
00056   // delete Recognition Cache
00057   if (reco_cache_) {
00058     for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++)  {
00059       if (reco_cache_[strt_seg]) {
00060         for (int end_seg = 0; end_seg < segment_cnt_; end_seg++)  {
00061           if (reco_cache_[strt_seg][end_seg]) {
00062             delete reco_cache_[strt_seg][end_seg];
00063           }
00064         }
00065         delete []reco_cache_[strt_seg];
00066       }
00067     }
00068     delete []reco_cache_;
00069     reco_cache_ = NULL;
00070   }
00071 
00072   // delete CharSamp Cache
00073   if (samp_cache_) {
00074     for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++)  {
00075       if (samp_cache_[strt_seg]) {
00076         for (int end_seg = 0; end_seg < segment_cnt_; end_seg++)  {
00077           if (samp_cache_[strt_seg][end_seg]) {
00078             delete samp_cache_[strt_seg][end_seg];
00079           }
00080         }
00081         delete []samp_cache_[strt_seg];
00082       }
00083     }
00084     delete []samp_cache_;
00085     samp_cache_ = NULL;
00086   }
00087 
00088   // delete segment list
00089   if (segments_) {
00090     for (int seg = 0; seg < segment_cnt_; seg++) {
00091       if (segments_[seg]) {
00092         delete segments_[seg];
00093       }
00094     }
00095     delete []segments_;
00096     segments_ = NULL;
00097   }
00098 
00099   if (space_cost_) {
00100     delete []space_cost_;
00101     space_cost_ = NULL;
00102   }
00103 
00104   if (no_space_cost_) {
00105     delete []no_space_cost_;
00106     no_space_cost_ = NULL;
00107   }
00108 
00109   segment_cnt_ = 0;
00110   init_ = false;
00111 }
00112 
00113 // # of segmentation points. One less than the count of segments
00114 int CubeSearchObject::SegPtCnt() {
00115   if (!init_ && !Init())
00116     return -1;
00117   return segment_cnt_ - 1;
00118 }
00119 
00120 // init and allocate variables, perform segmentation
00121 bool CubeSearchObject::Init() {
00122   if (init_)
00123     return true;
00124   if (!Segment()) {
00125     return false;
00126   }
00127 
00128   // init cache
00129   reco_cache_ = new CharAltList **[segment_cnt_];
00130   if (reco_cache_ == NULL) {
00131     fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
00132             "allocate CharAltList array\n");
00133     return false;
00134   }
00135 
00136   samp_cache_ = new CharSamp **[segment_cnt_];
00137   if (samp_cache_ == NULL) {
00138     fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
00139             "allocate CharSamp array\n");
00140     return false;
00141   }
00142 
00143   for (int seg = 0; seg < segment_cnt_; seg++) {
00144     reco_cache_[seg] = new CharAltList *[segment_cnt_];
00145     if (reco_cache_[seg] == NULL) {
00146       fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
00147               "allocate a single segment's CharAltList array\n");
00148       return false;
00149     }
00150 
00151     memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg]));
00152 
00153     samp_cache_[seg] = new CharSamp *[segment_cnt_];
00154     if (samp_cache_[seg] == NULL) {
00155       fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
00156               "allocate a single segment's CharSamp array\n");
00157       return false;
00158     }
00159 
00160     memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg]));
00161   }
00162 
00163   init_ = true;
00164   return true;
00165 }
00166 
00167 // returns a char sample corresponding to the bitmap between 2 seg pts
00168 CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) {
00169   // init if necessary
00170   if (!init_ && !Init())
00171     return NULL;
00172   // validate segment range
00173   if (!IsValidSegmentRange(start_pt, end_pt))
00174     return NULL;
00175 
00176   // look for the samp in the cache
00177   if (samp_cache_ && samp_cache_[start_pt + 1] &&
00178       samp_cache_[start_pt + 1][end_pt]) {
00179     return samp_cache_[start_pt + 1][end_pt];
00180   }
00181   // create a char samp object from the specified range of segments
00182   bool left_most;
00183   bool right_most;
00184   CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
00185                                           end_pt - start_pt, NULL,
00186                                           &left_most, &right_most, hgt_);
00187   if (!samp)
00188     return NULL;
00189 
00190   if (kUseCroppedChars) {
00191     CharSamp *cropped_samp = samp->Crop();
00192     // we no longer need the orig sample
00193     delete samp;
00194     if (!cropped_samp)
00195       return NULL;
00196     samp = cropped_samp;
00197   }
00198 
00199   // get the dimensions of the new cropped sample
00200   int char_top = samp->Top();
00201   int char_wid = samp->Width();
00202   int char_hgt = samp->Height();
00203 
00204   // for cursive languages, these features correspond to whether
00205   // the charsamp is at the beginning or end of conncomp
00206   if (cntxt_->Cursive() == true) {
00207     // first and last char flags depend on reading order
00208     bool first_char = rtl_ ? right_most : left_most;
00209     bool last_char = rtl_ ? left_most : right_most;
00210 
00211     samp->SetFirstChar(first_char ? 255 : 0);
00212     samp->SetLastChar(last_char ? 255 : 0);
00213   } else {
00214     // for non cursive languages, these features correspond
00215     // to whether the charsamp is at the begining or end of the word
00216     samp->SetFirstChar((start_pt == -1) ? 255 : 0);
00217     samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0);
00218   }
00219   samp->SetNormTop(255 * char_top / hgt_);
00220   samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_);
00221   samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt));
00222 
00223   // add to cache & return
00224   samp_cache_[start_pt + 1][end_pt] = samp;
00225   return samp;
00226 }
00227 
00228 Box *CubeSearchObject::CharBox(int start_pt, int end_pt) {
00229   if (!init_ && !Init())
00230     return NULL;
00231   if (!IsValidSegmentRange(start_pt, end_pt)) {
00232     fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid "
00233             "segment range (%d, %d)\n", start_pt, end_pt);
00234     return NULL;
00235   }
00236 
00237   // create a char samp object from the specified range of segments,
00238   // extract its dimensions into a leptonica box, and delete it
00239   bool left_most;
00240   bool right_most;
00241   CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
00242                                           end_pt - start_pt, NULL,
00243                                           &left_most, &right_most, hgt_);
00244   if (!samp)
00245     return NULL;
00246   if (kUseCroppedChars) {
00247     CharSamp *cropped_samp = samp->Crop();
00248     delete samp;
00249     if (!cropped_samp) {
00250       return NULL;
00251     }
00252     samp = cropped_samp;
00253   }
00254   Box *box = boxCreate(samp->Left(), samp->Top(),
00255                        samp->Width(), samp->Height());
00256   delete samp;
00257   return box;
00258 }
00259 
00260 // call from Beam Search to return the alt list corresponding to
00261 // recognizing the bitmap between two segmentation pts
00262 CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) {
00263   // init if necessary
00264   if (!init_ && !Init()) {
00265     fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
00266             "not initialize CubeSearchObject\n");
00267     return NULL;
00268   }
00269 
00270   // validate segment range
00271   if (!IsValidSegmentRange(start_pt, end_pt)) {
00272     fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid "
00273             "segment range (%d, %d)\n", start_pt, end_pt);
00274     return NULL;
00275   }
00276 
00277   // look for the recognition results in cache in the cache
00278   if (reco_cache_ && reco_cache_[start_pt + 1] &&
00279       reco_cache_[start_pt + 1][end_pt]) {
00280     return reco_cache_[start_pt + 1][end_pt];
00281   }
00282 
00283   // create the char sample corresponding to the blob
00284   CharSamp *samp = CharSample(start_pt, end_pt);
00285   if (!samp) {
00286     fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
00287             "not construct CharSamp\n");
00288     return NULL;
00289   }
00290 
00291   // recognize the char sample
00292   CharClassifier *char_classifier = cntxt_->Classifier();
00293   if (char_classifier) {
00294     reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp);
00295   } else {
00296     // no classifer: all characters are equally probable; add a penalty
00297     // that favors 2-segment characters and aspect ratios (w/h) > 1
00298     fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube "
00299             "context has no character classifier!! Inventing a probability "
00300             "distribution.\n");
00301     int class_cnt = cntxt_->CharacterSet()->ClassCount();
00302     CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt);
00303     int seg_cnt = end_pt - start_pt;
00304     double prob_val = (1.0 / class_cnt) *
00305         exp(-abs(seg_cnt - 2.0)) *
00306         exp(-samp->Width() / static_cast<double>(samp->Height()));
00307 
00308     if (alt_list) {
00309       for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
00310         alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val));
00311       }
00312       reco_cache_[start_pt + 1][end_pt] = alt_list;
00313     }
00314   }
00315 
00316   return reco_cache_[start_pt + 1][end_pt];
00317 }
00318 
00319 // Perform segmentation of the bitmap by detecting connected components,
00320 // segmenting each connected component using windowed vertical pixel density
00321 // histogram and sorting the resulting segments in reading order
00322 bool CubeSearchObject::Segment() {
00323   if (!samp_)
00324     return false;
00325   segment_cnt_ = 0;
00326   segments_ = samp_->Segment(&segment_cnt_, rtl_,
00327                              cntxt_->Params()->HistWindWid(),
00328                              cntxt_->Params()->MinConCompSize());
00329   if (!segments_ || segment_cnt_ <= 0) {
00330     return false;
00331   }
00332   if (segment_cnt_ >= kMaxSegmentCnt) {
00333     return false;
00334   }
00335   return true;
00336 }
00337 
00338 // computes the space and no space costs at gaps between segments
00339 bool CubeSearchObject::ComputeSpaceCosts() {
00340   // init if necessary
00341   if (!init_ && !Init())
00342     return false;
00343 
00344   // Already computed
00345   if (space_cost_)
00346     return true;
00347 
00348   // No segmentation points
00349   if (segment_cnt_ < 2)
00350     return false;
00351 
00352   // Compute the maximum x to the left of and minimum x to the right of each
00353   // segmentation point
00354   int *max_left_x = new int[segment_cnt_ - 1];
00355   int *min_right_x = new int[segment_cnt_ - 1];
00356   if (!max_left_x || !min_right_x) {
00357     delete []min_right_x;
00358     delete []max_left_x;
00359     return false;
00360   }
00361   if (rtl_) {
00362     min_right_x[0] = segments_[0]->Left();
00363     max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right();
00364     for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
00365       min_right_x[pt_idx] =
00366           MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left());
00367       max_left_x[segment_cnt_ - pt_idx - 2] =
00368           MAX(max_left_x[segment_cnt_ - pt_idx - 1],
00369               segments_[segment_cnt_ - pt_idx - 1]->Right());
00370     }
00371   } else {
00372     min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left();
00373     max_left_x[0] = segments_[0]->Right();
00374     for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
00375       min_right_x[segment_cnt_ - pt_idx - 2] =
00376           MIN(min_right_x[segment_cnt_ - pt_idx - 1],
00377               segments_[segment_cnt_ - pt_idx - 1]->Left());
00378       max_left_x[pt_idx] =
00379           MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right());
00380     }
00381   }
00382 
00383   // Allocate memory for space and no space costs
00384   // trivial cases
00385   space_cost_ = new int[segment_cnt_ - 1];
00386   no_space_cost_ = new int[segment_cnt_ - 1];
00387   if (!space_cost_ || !no_space_cost_) {
00388     delete []min_right_x;
00389     delete []max_left_x;
00390     return false;
00391   }
00392 
00393   // go through all segmentation points determining the horizontal gap between
00394   // the images on both sides of each break points. Use the gap to estimate
00395   // the probability of a space. The probability is modeled a linear function
00396   // of the gap width
00397   for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) {
00398     // determine the gap at the segmentation point
00399     int gap = min_right_x[pt_idx] - max_left_x[pt_idx];
00400     float prob = 0.0;
00401 
00402     // gap is too small => no space
00403     if (gap < min_spc_gap_) {
00404       prob = 0.0;
00405     } else if (gap > max_spc_gap_) {
00406       // gap is too big => definite space
00407       prob = 1.0;
00408     } else {
00409       // gap is somewhere in between, compute probability
00410       prob = (gap - min_spc_gap_) /
00411           static_cast<double>(max_spc_gap_ - min_spc_gap_);
00412     }
00413 
00414     // compute cost of space and non-space
00415     space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) +
00416                           CubeUtils::Prob2Cost(0.1);
00417     no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob);
00418   }
00419 
00420   delete []min_right_x;
00421   delete []max_left_x;
00422 
00423   return true;
00424 }
00425 
00426 // Returns the cost of having a space before the specified segmentation point
00427 int CubeSearchObject::SpaceCost(int pt_idx) {
00428   if (!space_cost_ && !ComputeSpaceCosts()) {
00429     // Failed to compute costs return a zero prob
00430     return CubeUtils::Prob2Cost(0.0);
00431   }
00432   return space_cost_[pt_idx];
00433 }
00434 
00435 // Returns the cost of not having a space before the specified
00436 // segmentation point
00437 int CubeSearchObject::NoSpaceCost(int pt_idx) {
00438   // If failed to compute costs, return a 1.0 prob
00439   if (!space_cost_ && !ComputeSpaceCosts())
00440     return CubeUtils::Prob2Cost(0.0);
00441   return no_space_cost_[pt_idx];
00442 }
00443 
00444 // Returns the cost of not having any spaces within the specified range
00445 // of segmentation points
00446 int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) {
00447   // If fail to compute costs, return a 1.0 prob
00448   if (!space_cost_ && !ComputeSpaceCosts())
00449     return CubeUtils::Prob2Cost(1.0);
00450   int no_spc_cost = 0;
00451   for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++)
00452     no_spc_cost += NoSpaceCost(pt_idx);
00453   return no_spc_cost;
00454 }
00455 }