Tesseract
3.02
|
00001 00002 // File: equationdetect.cpp 00003 // Description: Helper classes to detect equations. 00004 // Author: Zongyi (Joe) Liu (joeliu@google.com) 00005 // Created: Fri Aug 31 11:13:01 PST 2011 00006 // 00007 // (C) Copyright 2011, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifdef _MSC_VER 00021 #pragma warning(disable:4244) // Conversion warnings 00022 #include "mathfix.h" 00023 #endif 00024 00025 #ifdef __MINGW32__ 00026 #include <limits.h> 00027 #endif 00028 00029 #include <float.h> 00030 00031 // Include automatically generated configuration file if running autoconf. 00032 #ifdef HAVE_CONFIG_H 00033 #include "config_auto.h" 00034 #endif 00035 00036 #include "equationdetect.h" 00037 00038 #include "bbgrid.h" 00039 #include "classify.h" 00040 #include "colpartition.h" 00041 #include "colpartitiongrid.h" 00042 #include "colpartitionset.h" 00043 #include "helpers.h" 00044 #include "ratngs.h" 00045 #include "tesseractclass.h" 00046 00047 // Config variables. 00048 BOOL_VAR(equationdetect_save_bi_image, false, "Save input bi image"); 00049 BOOL_VAR(equationdetect_save_spt_image, false, "Save special character image"); 00050 BOOL_VAR(equationdetect_save_seed_image, false, "Save the seed image"); 00051 BOOL_VAR(equationdetect_save_merged_image, false, "Save the merged image"); 00052 00053 namespace tesseract { 00054 00056 // Utility ColParition sort functions. 00058 static int SortCPByTopReverse(const void* p1, const void* p2) { 00059 const ColPartition* cp1 = *reinterpret_cast<ColPartition* const*>(p1); 00060 const ColPartition* cp2 = *reinterpret_cast<ColPartition* const*>(p2); 00061 ASSERT_HOST(cp1 != NULL && cp2 != NULL); 00062 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box()); 00063 return box2.top() - box1.top(); 00064 } 00065 00066 static int SortCPByBottom(const void* p1, const void* p2) { 00067 const ColPartition* cp1 = *reinterpret_cast<ColPartition* const*>(p1); 00068 const ColPartition* cp2 = *reinterpret_cast<ColPartition* const*>(p2); 00069 ASSERT_HOST(cp1 != NULL && cp2 != NULL); 00070 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box()); 00071 return box1.bottom() - box2.bottom(); 00072 } 00073 00074 static int SortCPByHeight(const void* p1, const void* p2) { 00075 const ColPartition* cp1 = *reinterpret_cast<ColPartition* const*>(p1); 00076 const ColPartition* cp2 = *reinterpret_cast<ColPartition* const*>(p2); 00077 ASSERT_HOST(cp1 != NULL && cp2 != NULL); 00078 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box()); 00079 return box1.height() - box2.height(); 00080 } 00081 00082 // TODO(joeliu): we may want to parameterize these constants. 00083 const float kMathDigitDensityTh1 = 0.25; 00084 const float kMathDigitDensityTh2 = 0.1; 00085 const float kMathItalicDensityTh = 0.5; 00086 const float kUnclearDensityTh = 0.25; 00087 const int kSeedBlobsCountTh = 10; 00088 const int kLeftIndentAlignmentCountTh = 1; 00089 00090 // Returns true if PolyBlockType is of text type or equation type. 00091 inline bool IsTextOrEquationType(PolyBlockType type) { 00092 return PTIsTextType(type) || type == PT_EQUATION; 00093 } 00094 00095 inline bool IsLeftIndented(const EquationDetect::IndentType type) { 00096 return type == EquationDetect::LEFT_INDENT || 00097 type == EquationDetect::BOTH_INDENT; 00098 } 00099 00100 inline bool IsRightIndented(const EquationDetect::IndentType type) { 00101 return type == EquationDetect::RIGHT_INDENT || 00102 type == EquationDetect::BOTH_INDENT; 00103 } 00104 00105 EquationDetect::EquationDetect(const char* equ_datapath, 00106 const char* equ_name) { 00107 const char* default_name = "equ"; 00108 if (equ_name == NULL) { 00109 equ_name = default_name; 00110 } 00111 equ_tesseract_ = lang_tesseract_ = NULL; 00112 resolution_ = 0; 00113 page_count_ = 0; 00114 00115 // Construct equ_tesseract_. 00116 equ_tesseract_ = new Tesseract(); 00117 if (equ_tesseract_->init_tesseract(equ_datapath, equ_name, 00118 OEM_TESSERACT_ONLY)) { 00119 tprintf("Warning: equation region detection requested," 00120 " but %s failed to load from %s\n", equ_name, equ_datapath); 00121 delete equ_tesseract_; 00122 equ_tesseract_ = NULL; 00123 } 00124 00125 cps_super_bbox_ = NULL; 00126 } 00127 00128 EquationDetect::~EquationDetect() { 00129 if (equ_tesseract_) { 00130 delete (equ_tesseract_); 00131 } 00132 if (cps_super_bbox_) { 00133 delete(cps_super_bbox_); 00134 } 00135 } 00136 00137 void EquationDetect::SetLangTesseract(Tesseract* lang_tesseract) { 00138 lang_tesseract_ = lang_tesseract; 00139 } 00140 00141 void EquationDetect::SetResolution(const int resolution) { 00142 resolution_ = resolution; 00143 } 00144 00145 int EquationDetect::LabelSpecialText(TO_BLOCK* to_block) { 00146 if (to_block == NULL) { 00147 tprintf("Warning: input to_block is NULL!\n"); 00148 return -1; 00149 } 00150 00151 GenericVector<BLOBNBOX_LIST*> blob_lists; 00152 blob_lists.push_back(&(to_block->blobs)); 00153 blob_lists.push_back(&(to_block->large_blobs)); 00154 for (int i = 0; i < blob_lists.size(); ++i) { 00155 BLOBNBOX_IT bbox_it(blob_lists[i]); 00156 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); 00157 bbox_it.forward()) { 00158 bbox_it.data()->set_special_text_type(BSTT_NONE); 00159 } 00160 } 00161 00162 return 0; 00163 } 00164 00165 void EquationDetect::IdentifySpecialText( 00166 BLOBNBOX *blobnbox, const int height_th) { 00167 ASSERT_HOST(blobnbox != NULL); 00168 if (blobnbox->bounding_box().height() < height_th && height_th > 0) { 00169 // For small blob, we simply set to BSTT_NONE. 00170 blobnbox->set_special_text_type(BSTT_NONE); 00171 return; 00172 } 00173 00174 BLOB_CHOICE_LIST ratings_equ, ratings_lang; 00175 C_BLOB* blob = blobnbox->cblob(); 00176 TBLOB* tblob = TBLOB::PolygonalCopy(blob); 00177 const TBOX& box = tblob->bounding_box(); 00178 00179 // Normalize the blob. Set the origin to the place we want to be the 00180 // bottom-middle, and scaling is to make the height the x-height. 00181 float scaling = static_cast<float>(kBlnXHeight) / box.height(); 00182 DENORM denorm; 00183 float x_orig = (box.left() + box.right()) / 2.0f, y_orig = box.bottom(); 00184 denorm.SetupNormalization(NULL, NULL, NULL, NULL, NULL, 0, 00185 x_orig, y_orig, scaling, scaling, 00186 0.0f, static_cast<float>(kBlnBaselineOffset)); 00187 TBLOB* normed_blob = new TBLOB(*tblob); 00188 normed_blob->Normalize(denorm); 00189 equ_tesseract_->AdaptiveClassifier(normed_blob, denorm, &ratings_equ, NULL); 00190 lang_tesseract_->AdaptiveClassifier(normed_blob, denorm, &ratings_lang, NULL); 00191 delete normed_blob; 00192 delete tblob; 00193 00194 // Get the best choice from ratings_lang and rating_equ. As the choice in the 00195 // list has already been sorted by the certainty, we simply use the first 00196 // choice. 00197 BLOB_CHOICE *lang_choice = NULL, *equ_choice = NULL; 00198 if (ratings_lang.length() > 0) { 00199 BLOB_CHOICE_IT choice_it(&ratings_lang); 00200 lang_choice = choice_it.data(); 00201 } 00202 if (ratings_equ.length() > 0) { 00203 BLOB_CHOICE_IT choice_it(&ratings_equ); 00204 equ_choice = choice_it.data(); 00205 } 00206 00207 float lang_score = lang_choice ? lang_choice->certainty() : -FLT_MAX; 00208 float equ_score = equ_choice ? equ_choice->certainty() : -FLT_MAX; 00209 00210 const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8; 00211 // The scores here are negative, so the max/min == fabs(min/max). 00212 // float ratio = fmax(lang_score, equ_score) / fmin(lang_score, equ_score); 00213 float diff = fabs(lang_score - equ_score); 00214 BlobSpecialTextType type = BSTT_NONE; 00215 00216 // Classification. 00217 if (fmax(lang_score, equ_score) < kConfScoreTh) { 00218 // If both score are very small, then mark it as unclear. 00219 type = BSTT_UNCLEAR; 00220 } else if (diff > kConfDiffTh && equ_score > lang_score) { 00221 // If equ_score is significantly higher, then we classify this character as 00222 // math symbol. 00223 type = BSTT_MATH; 00224 } else if (lang_choice) { 00225 // For other cases: lang_score is similar or significantly higher. 00226 type = EstimateTypeForUnichar( 00227 lang_tesseract_->unicharset, lang_choice->unichar_id()); 00228 } 00229 00230 if (type == BSTT_NONE && lang_tesseract_->get_fontinfo_table().get( 00231 lang_choice->fontinfo_id()).is_italic()) { 00232 // For text symbol, we still check if it is italic. 00233 blobnbox->set_special_text_type(BSTT_ITALIC); 00234 } else { 00235 blobnbox->set_special_text_type(type); 00236 } 00237 } 00238 00239 BlobSpecialTextType EquationDetect::EstimateTypeForUnichar( 00240 const UNICHARSET& unicharset, const UNICHAR_ID id) const { 00241 STRING s = unicharset.id_to_unichar(id); 00242 if (unicharset.get_isalpha(id)) { 00243 return BSTT_NONE; 00244 } 00245 00246 if (unicharset.get_ispunctuation(id)) { 00247 // Exclude some special texts that are likely to be confused as math symbol. 00248 static GenericVector<UNICHAR_ID> ids_to_exclude; 00249 if (ids_to_exclude.empty()) { 00250 static const STRING kCharsToEx[] = {"'", "`", "\"", "\\", ",", ".", 00251 "〈", "〉", "《", "》", "」", "「", ""}; 00252 int i = 0; 00253 while (kCharsToEx[i] != "") { 00254 ids_to_exclude.push_back( 00255 unicharset.unichar_to_id(kCharsToEx[i++].string())); 00256 } 00257 ids_to_exclude.sort(); 00258 } 00259 return ids_to_exclude.bool_binary_search(id) ? BSTT_NONE : BSTT_MATH; 00260 } 00261 00262 // Check if it is digit. In addition to the isdigit attribute, we also check 00263 // if this character belongs to those likely to be confused with a digit. 00264 static const STRING kDigitsChars = "|"; 00265 if (unicharset.get_isdigit(id) || 00266 (s.length() == 1 && kDigitsChars.contains(s[0]))) { 00267 return BSTT_DIGIT; 00268 } else { 00269 return BSTT_MATH; 00270 } 00271 } 00272 00273 void EquationDetect::IdentifySpecialText() { 00274 // Set configuration for Tesseract::AdaptiveClassifier. 00275 equ_tesseract_->tess_cn_matching.set_value(true); // turn it on 00276 equ_tesseract_->tess_bn_matching.set_value(false); 00277 00278 // Set the multiplier to zero for lang_tesseract_ to improve the accuracy. 00279 int classify_class_pruner = lang_tesseract_->classify_class_pruner_multiplier; 00280 int classify_integer_matcher = 00281 lang_tesseract_->classify_integer_matcher_multiplier; 00282 lang_tesseract_->classify_class_pruner_multiplier.set_value(0); 00283 lang_tesseract_->classify_integer_matcher_multiplier.set_value(0); 00284 00285 ColPartitionGridSearch gsearch(part_grid_); 00286 ColPartition *part = NULL; 00287 gsearch.StartFullSearch(); 00288 while ((part = gsearch.NextFullSearch()) != NULL) { 00289 if (!IsTextOrEquationType(part->type())) { 00290 continue; 00291 } 00292 IdentifyBlobsToSkip(part); 00293 BLOBNBOX_C_IT bbox_it(part->boxes()); 00294 // Compute the height threshold. 00295 GenericVector<int> blob_heights; 00296 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); 00297 bbox_it.forward()) { 00298 if (bbox_it.data()->special_text_type() != BSTT_SKIP) { 00299 blob_heights.push_back(bbox_it.data()->bounding_box().height()); 00300 } 00301 } 00302 blob_heights.sort(); 00303 int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2; 00304 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); 00305 bbox_it.forward()) { 00306 if (bbox_it.data()->special_text_type() != BSTT_SKIP) { 00307 IdentifySpecialText(bbox_it.data(), height_th); 00308 } 00309 } 00310 } 00311 00312 // Set the multiplier values back. 00313 lang_tesseract_->classify_class_pruner_multiplier.set_value( 00314 classify_class_pruner); 00315 lang_tesseract_->classify_integer_matcher_multiplier.set_value( 00316 classify_integer_matcher); 00317 00318 if (equationdetect_save_spt_image) { // For debug. 00319 STRING outfile; 00320 GetOutputTiffName("_spt", &outfile); 00321 PaintSpecialTexts(outfile); 00322 } 00323 } 00324 00325 void EquationDetect::IdentifyBlobsToSkip(ColPartition* part) { 00326 ASSERT_HOST(part); 00327 BLOBNBOX_C_IT blob_it(part->boxes()); 00328 00329 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00330 // At this moment, no blob should have been joined. 00331 ASSERT_HOST(!blob_it.data()->joined_to_prev()); 00332 } 00333 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00334 BLOBNBOX* blob = blob_it.data(); 00335 if (blob->joined_to_prev() || blob->special_text_type() == BSTT_SKIP) { 00336 continue; 00337 } 00338 TBOX blob_box = blob->bounding_box(); 00339 00340 // Search if any blob can be merged into blob. If found, then we mark all 00341 // these blobs as BSTT_SKIP. 00342 BLOBNBOX_C_IT blob_it2 = blob_it; 00343 bool found = false; 00344 while (!blob_it2.at_last()) { 00345 BLOBNBOX* nextblob = blob_it2.forward(); 00346 const TBOX& nextblob_box = nextblob->bounding_box(); 00347 if (nextblob_box.left() >= blob_box.right()) { 00348 break; 00349 } 00350 const float kWidthR = 0.4, kHeightR = 0.3; 00351 bool xoverlap = blob_box.major_x_overlap(nextblob_box), 00352 yoverlap = blob_box.y_overlap(nextblob_box); 00353 float widthR = static_cast<float>( 00354 MIN(nextblob_box.width(), blob_box.width())) / 00355 MAX(nextblob_box.width(), blob_box.width()); 00356 float heightR = static_cast<float>( 00357 MIN(nextblob_box.height(), blob_box.height())) / 00358 MAX(nextblob_box.height(), blob_box.height()); 00359 00360 if (xoverlap && yoverlap && widthR > kWidthR && heightR > kHeightR) { 00361 // Found one, set nextblob type and recompute blob_box. 00362 found = true; 00363 nextblob->set_special_text_type(BSTT_SKIP); 00364 blob_box += nextblob_box; 00365 } 00366 } 00367 if (found) { 00368 blob->set_special_text_type(BSTT_SKIP); 00369 } 00370 } 00371 } 00372 00373 int EquationDetect::FindEquationParts( 00374 ColPartitionGrid* part_grid, ColPartitionSet** best_columns) { 00375 if (!equ_tesseract_ || !lang_tesseract_) { 00376 tprintf("Warning: equ_tesseract_/lang_tesseract_ is NULL!\n"); 00377 return -1; 00378 } 00379 if (!part_grid || !best_columns) { 00380 tprintf("part_grid/best_columns is NULL!!\n"); 00381 return -1; 00382 } 00383 cp_seeds_.clear(); 00384 part_grid_ = part_grid; 00385 best_columns_ = best_columns; 00386 resolution_ = lang_tesseract_->source_resolution(); 00387 STRING outfile; 00388 page_count_++; 00389 00390 if (equationdetect_save_bi_image) { 00391 GetOutputTiffName("_bi", &outfile); 00392 pixWrite(outfile.string(), lang_tesseract_->pix_binary(), IFF_TIFF_G4); 00393 } 00394 00395 // Pass 0: Compute special text type for blobs. 00396 IdentifySpecialText(); 00397 00398 // Pass 1: Merge parts by overlap. 00399 MergePartsByLocation(); 00400 00401 // Pass 2: compute the math blob density and find the seed partition. 00402 IdentifySeedParts(); 00403 // We still need separate seed into block seed and inline seed partition. 00404 IdentifyInlineParts(); 00405 00406 if (equationdetect_save_seed_image) { 00407 GetOutputTiffName("_seed", &outfile); 00408 PaintColParts(outfile); 00409 } 00410 00411 // Pass 3: expand block equation seeds. 00412 while (!cp_seeds_.empty()) { 00413 GenericVector<ColPartition*> seeds_expanded; 00414 for (int i = 0; i < cp_seeds_.size(); ++i) { 00415 if (ExpandSeed(cp_seeds_[i])) { 00416 // If this seed is expanded, then we add it into seeds_expanded. Note 00417 // this seed has been removed from part_grid_ if it is expanded. 00418 seeds_expanded.push_back(cp_seeds_[i]); 00419 } 00420 } 00421 // Add seeds_expanded back into part_grid_ and reset cp_seeds_. 00422 for (int i = 0; i < seeds_expanded.size(); ++i) { 00423 InsertPartAfterAbsorb(seeds_expanded[i]); 00424 } 00425 cp_seeds_ = seeds_expanded; 00426 } 00427 00428 // Pass 4: find math block satellite text partitions and merge them. 00429 ProcessMathBlockSatelliteParts(); 00430 00431 if (equationdetect_save_merged_image) { // For debug. 00432 GetOutputTiffName("_merged", &outfile); 00433 PaintColParts(outfile); 00434 } 00435 00436 return 0; 00437 } 00438 00439 void EquationDetect::MergePartsByLocation() { 00440 while (true) { 00441 ColPartition* part = NULL; 00442 // partitions that have been updated. 00443 GenericVector<ColPartition*> parts_updated; 00444 ColPartitionGridSearch gsearch(part_grid_); 00445 gsearch.StartFullSearch(); 00446 while ((part = gsearch.NextFullSearch()) != NULL) { 00447 if (!IsTextOrEquationType(part->type())) { 00448 continue; 00449 } 00450 GenericVector<ColPartition*> parts_to_merge; 00451 SearchByOverlap(part, &parts_to_merge); 00452 if (parts_to_merge.empty()) { 00453 continue; 00454 } 00455 00456 // Merge parts_to_merge with part, and remove them from part_grid_. 00457 part_grid_->RemoveBBox(part); 00458 for (int i = 0; i < parts_to_merge.size(); ++i) { 00459 ASSERT_HOST(parts_to_merge[i] != NULL && parts_to_merge[i] != part); 00460 part->Absorb(parts_to_merge[i], NULL); 00461 } 00462 gsearch.RepositionIterator(); 00463 00464 parts_updated.push_back(part); 00465 } 00466 00467 if (parts_updated.empty()) { // Exit the loop 00468 break; 00469 } 00470 00471 // Re-insert parts_updated into part_grid_. 00472 for (int i = 0; i < parts_updated.size(); ++i) { 00473 InsertPartAfterAbsorb(parts_updated[i]); 00474 } 00475 } 00476 } 00477 00478 void EquationDetect::SearchByOverlap( 00479 ColPartition* seed, 00480 GenericVector<ColPartition*>* parts_overlap) { 00481 ASSERT_HOST(seed != NULL && parts_overlap != NULL); 00482 if (!IsTextOrEquationType(seed->type())) { 00483 return; 00484 } 00485 ColPartitionGridSearch search(part_grid_); 00486 const TBOX& seed_box(seed->bounding_box()); 00487 const int kRadNeighborCells = 30; 00488 search.StartRadSearch((seed_box.left() + seed_box.right()) / 2, 00489 (seed_box.top() + seed_box.bottom()) / 2, 00490 kRadNeighborCells); 00491 search.SetUniqueMode(true); 00492 00493 // Search iteratively. 00494 ColPartition *part; 00495 GenericVector<ColPartition*> parts; 00496 const float kLargeOverlapTh = 0.95; 00497 const float kEquXOverlap = 0.4, kEquYOverlap = 0.5; 00498 while ((part = search.NextRadSearch()) != NULL) { 00499 if (part == seed || !IsTextOrEquationType(part->type())) { 00500 continue; 00501 } 00502 const TBOX& part_box(part->bounding_box()); 00503 bool merge = false; 00504 00505 float x_overlap_fraction = part_box.x_overlap_fraction(seed_box), 00506 y_overlap_fraction = part_box.y_overlap_fraction(seed_box); 00507 00508 // If part is large overlapped with seed, then set merge to true. 00509 if (x_overlap_fraction >= kLargeOverlapTh && 00510 y_overlap_fraction >= kLargeOverlapTh) { 00511 merge = true; 00512 } else if (seed->type() == PT_EQUATION && 00513 IsTextOrEquationType(part->type())) { 00514 if ((x_overlap_fraction > kEquXOverlap && y_overlap_fraction > 0.0) || 00515 (x_overlap_fraction > 0.0 && y_overlap_fraction > kEquYOverlap)) { 00516 merge = true; 00517 } 00518 } 00519 00520 if (merge) { // Remove the part from search and put it into parts. 00521 search.RemoveBBox(); 00522 parts_overlap->push_back(part); 00523 } 00524 } 00525 } 00526 00527 void EquationDetect::InsertPartAfterAbsorb(ColPartition* part) { 00528 ASSERT_HOST(part); 00529 00530 // Before insert part back into part_grid_, we will need re-compute some 00531 // of its attributes such as first_column_, last_column_. However, we still 00532 // want to preserve its type. 00533 BlobTextFlowType flow_type = part->flow(); 00534 PolyBlockType part_type = part->type(); 00535 BlobRegionType blob_type = part->blob_type(); 00536 00537 // Call SetPartitionType to re-compute the attributes of part. 00538 const TBOX& part_box(part->bounding_box()); 00539 int grid_x, grid_y; 00540 part_grid_->GridCoords( 00541 part_box.left(), part_box.bottom(), &grid_x, &grid_y); 00542 part->SetPartitionType(resolution_, best_columns_[grid_y]); 00543 00544 // Reset the types back. 00545 part->set_type(part_type); 00546 part->set_blob_type(blob_type); 00547 part->set_flow(flow_type); 00548 part->SetBlobTypes(); 00549 00550 // Insert into part_grid_. 00551 part_grid_->InsertBBox(true, true, part); 00552 } 00553 00554 void EquationDetect::IdentifySeedParts() { 00555 ColPartitionGridSearch gsearch(part_grid_); 00556 ColPartition *part = NULL; 00557 gsearch.StartFullSearch(); 00558 00559 GenericVector<ColPartition*> seeds1, seeds2; 00560 // The left coordinates of indented text partitions. 00561 GenericVector<int> indented_texts_left; 00562 // The foreground density of text partitions. 00563 GenericVector<float> texts_foreground_density; 00564 while ((part = gsearch.NextFullSearch()) != NULL) { 00565 if (!IsTextOrEquationType(part->type())) { 00566 continue; 00567 } 00568 part->ComputeSpecialBlobsDensity(); 00569 bool blobs_check = CheckSeedBlobsCount(part); 00570 const int kTextBlobsTh = 20; 00571 00572 if (CheckSeedDensity(kMathDigitDensityTh1, kMathDigitDensityTh2, part) && 00573 blobs_check) { 00574 // Passed high density threshold test, save into seeds1. 00575 seeds1.push_back(part); 00576 } else { 00577 IndentType indent = IsIndented(part); 00578 if (IsLeftIndented(indent) && blobs_check && 00579 CheckSeedDensity(kMathDigitDensityTh2, kMathDigitDensityTh2, part)) { 00580 // Passed low density threshold test and is indented, save into seeds2. 00581 seeds2.push_back(part); 00582 } else if (!IsRightIndented(indent) && 00583 part->boxes_count() > kTextBlobsTh) { 00584 // This is likely to be a text part, save the features. 00585 const TBOX&box = part->bounding_box(); 00586 if (IsLeftIndented(indent)) { 00587 indented_texts_left.push_back(box.left()); 00588 } 00589 texts_foreground_density.push_back(ComputeForegroundDensity(box)); 00590 } 00591 } 00592 } 00593 00594 // Sort the features collected from text regions. 00595 indented_texts_left.sort(); 00596 texts_foreground_density.sort(); 00597 float foreground_density_th = 0.15; // Default value. 00598 if (!texts_foreground_density.empty()) { 00599 // Use the median of the texts_foreground_density. 00600 foreground_density_th = 0.8 * texts_foreground_density[ 00601 texts_foreground_density.size() / 2]; 00602 } 00603 00604 for (int i = 0; i < seeds1.size(); ++i) { 00605 const TBOX& box = seeds1[i]->bounding_box(); 00606 if (CheckSeedFgDensity(foreground_density_th, seeds1[i]) && 00607 !(IsLeftIndented(IsIndented(seeds1[i])) && 00608 CountAlignment(indented_texts_left, box.left()) >= 00609 kLeftIndentAlignmentCountTh)) { 00610 // Mark as PT_EQUATION type. 00611 seeds1[i]->set_type(PT_EQUATION); 00612 cp_seeds_.push_back(seeds1[i]); 00613 } else { // Mark as PT_INLINE_EQUATION type. 00614 seeds1[i]->set_type(PT_INLINE_EQUATION); 00615 } 00616 } 00617 00618 for (int i = 0; i < seeds2.size(); ++i) { 00619 if (CheckForSeed2(indented_texts_left, foreground_density_th, seeds2[i])) { 00620 seeds2[i]->set_type(PT_EQUATION); 00621 cp_seeds_.push_back(seeds2[i]); 00622 } 00623 } 00624 } 00625 00626 float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) { 00627 #if LIBLEPT_MINOR_VERSION < 69 && LIBLEPT_MAJOR_VERSION <= 1 00628 // This will disable the detector because no seed will be identified. 00629 return 1.0f; 00630 #else 00631 Pix *pix_bi = lang_tesseract_->pix_binary(); 00632 int pix_height = pixGetHeight(pix_bi); 00633 Box* box = boxCreate(tbox.left(), pix_height - tbox.top(), 00634 tbox.width(), tbox.height()); 00635 Pix *pix_sub = pixClipRectangle(pix_bi, box, NULL); 00636 l_float32 fract; 00637 pixForegroundFraction(pix_sub, &fract); 00638 pixDestroy(&pix_sub); 00639 boxDestroy(&box); 00640 00641 return fract; 00642 #endif 00643 } 00644 00645 bool EquationDetect::CheckSeedFgDensity(const float density_th, 00646 ColPartition* part) { 00647 ASSERT_HOST(part); 00648 00649 // Split part horizontall, and check for each sub part. 00650 GenericVector<TBOX> sub_boxes; 00651 SplitCPHorLite(part, &sub_boxes); 00652 float parts_passed = 0.0; 00653 for (int i = 0; i < sub_boxes.size(); ++i) { 00654 float density = ComputeForegroundDensity(sub_boxes[i]); 00655 if (density < density_th) { 00656 parts_passed++; 00657 } 00658 } 00659 00660 // If most sub parts passed, then we return true. 00661 const float kSeedPartRatioTh = 0.3; 00662 bool retval = (parts_passed / sub_boxes.size() >= kSeedPartRatioTh); 00663 00664 return retval; 00665 } 00666 00667 void EquationDetect::SplitCPHor(ColPartition* part, 00668 GenericVector<ColPartition*>* parts_splitted) { 00669 ASSERT_HOST(part && parts_splitted); 00670 if (part->median_width() == 0 || part->boxes_count() == 0) { 00671 return; 00672 } 00673 00674 // Make a copy of part, and reset parts_splitted. 00675 ColPartition* right_part = part->CopyButDontOwnBlobs(); 00676 parts_splitted->delete_data_pointers(); 00677 parts_splitted->clear(); 00678 00679 const double kThreshold = part->median_width() * 3.0; 00680 bool found_split = true; 00681 while (found_split) { 00682 found_split = false; 00683 BLOBNBOX_C_IT box_it(right_part->boxes()); 00684 // Blobs are sorted left side first. If blobs overlap, 00685 // the previous blob may have a "more right" right side. 00686 // Account for this by always keeping the largest "right" 00687 // so far. 00688 int previous_right = MIN_INT32; 00689 00690 // Look for the next split in the partition. 00691 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { 00692 const TBOX& box = box_it.data()->bounding_box(); 00693 if (previous_right != MIN_INT32 && 00694 box.left() - previous_right > kThreshold) { 00695 // We have a split position. Split the partition in two pieces. 00696 // Insert the left piece in the grid and keep processing the right. 00697 int mid_x = (box.left() + previous_right) / 2; 00698 ColPartition* left_part = right_part; 00699 right_part = left_part->SplitAt(mid_x); 00700 00701 parts_splitted->push_back(left_part); 00702 left_part->ComputeSpecialBlobsDensity(); 00703 found_split = true; 00704 break; 00705 } 00706 00707 // The right side of the previous blobs. 00708 previous_right = MAX(previous_right, box.right()); 00709 } 00710 } 00711 00712 // Add the last piece. 00713 right_part->ComputeSpecialBlobsDensity(); 00714 parts_splitted->push_back(right_part); 00715 } 00716 00717 void EquationDetect::SplitCPHorLite(ColPartition* part, 00718 GenericVector<TBOX>* splitted_boxes) { 00719 ASSERT_HOST(part && splitted_boxes); 00720 splitted_boxes->clear(); 00721 if (part->median_width() == 0) { 00722 return; 00723 } 00724 00725 const double kThreshold = part->median_width() * 3.0; 00726 00727 // Blobs are sorted left side first. If blobs overlap, 00728 // the previous blob may have a "more right" right side. 00729 // Account for this by always keeping the largest "right" 00730 // so far. 00731 TBOX union_box; 00732 int previous_right = MIN_INT32; 00733 BLOBNBOX_C_IT box_it(part->boxes()); 00734 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { 00735 const TBOX& box = box_it.data()->bounding_box(); 00736 if (previous_right != MIN_INT32 && 00737 box.left() - previous_right > kThreshold) { 00738 // We have a split position. 00739 splitted_boxes->push_back(union_box); 00740 previous_right = MIN_INT32; 00741 } 00742 if (previous_right == MIN_INT32) { 00743 union_box = box; 00744 } else { 00745 union_box += box; 00746 } 00747 // The right side of the previous blobs. 00748 previous_right = MAX(previous_right, box.right()); 00749 } 00750 00751 // Add the last piece. 00752 if (previous_right != MIN_INT32) { 00753 splitted_boxes->push_back(union_box); 00754 } 00755 } 00756 00757 bool EquationDetect::CheckForSeed2( 00758 const GenericVector<int>& indented_texts_left, 00759 const float foreground_density_th, 00760 ColPartition* part) { 00761 ASSERT_HOST(part); 00762 const TBOX& box = part->bounding_box(); 00763 00764 // Check if it is aligned with any indented_texts_left. 00765 if (!indented_texts_left.empty() && 00766 CountAlignment(indented_texts_left, box.left()) >= 00767 kLeftIndentAlignmentCountTh) { 00768 return false; 00769 } 00770 00771 // Check the foreground density. 00772 if (ComputeForegroundDensity(box) > foreground_density_th) { 00773 return false; 00774 } 00775 00776 return true; 00777 } 00778 00779 int EquationDetect::CountAlignment( 00780 const GenericVector<int>& sorted_vec, const int val) const { 00781 if (sorted_vec.empty()) { 00782 return 0; 00783 } 00784 const int kDistTh = static_cast<int>(roundf(0.03 * resolution_)); 00785 int pos = sorted_vec.binary_search(val), count = 0; 00786 00787 // Search left side. 00788 int index = pos; 00789 while (index >= 0 && abs(val - sorted_vec[index--]) < kDistTh) { 00790 count++; 00791 } 00792 00793 // Search right side. 00794 index = pos + 1; 00795 while (index < sorted_vec.size() && sorted_vec[index++] - val < kDistTh) { 00796 count++; 00797 } 00798 00799 return count; 00800 } 00801 00802 void EquationDetect::IdentifyInlineParts() { 00803 ComputeCPsSuperBBox(); 00804 IdentifyInlinePartsHorizontal(); 00805 int textparts_linespacing = EstimateTextPartLineSpacing(); 00806 IdentifyInlinePartsVertical(true, textparts_linespacing); 00807 IdentifyInlinePartsVertical(false, textparts_linespacing); 00808 } 00809 00810 void EquationDetect::ComputeCPsSuperBBox() { 00811 ColPartitionGridSearch gsearch(part_grid_); 00812 ColPartition *part = NULL; 00813 gsearch.StartFullSearch(); 00814 if (cps_super_bbox_) { 00815 delete cps_super_bbox_; 00816 } 00817 cps_super_bbox_ = new TBOX(); 00818 while ((part = gsearch.NextFullSearch()) != NULL) { 00819 (*cps_super_bbox_) += part->bounding_box(); 00820 } 00821 } 00822 00823 void EquationDetect::IdentifyInlinePartsHorizontal() { 00824 ASSERT_HOST(cps_super_bbox_); 00825 GenericVector<ColPartition*> new_seeds; 00826 const int kMarginDiffTh = IntCastRounded( 00827 0.5 * lang_tesseract_->source_resolution()); 00828 const int kGapTh = static_cast<int>(roundf( 00829 1.0 * lang_tesseract_->source_resolution())); 00830 ColPartitionGridSearch search(part_grid_); 00831 search.SetUniqueMode(true); 00832 // The center x coordinate of the cp_super_bbox_. 00833 int cps_cx = cps_super_bbox_->left() + cps_super_bbox_->width() / 2; 00834 for (int i = 0; i < cp_seeds_.size(); ++i) { 00835 ColPartition* part = cp_seeds_[i]; 00836 const TBOX& part_box(part->bounding_box()); 00837 int left_margin = part_box.left() - cps_super_bbox_->left(), 00838 right_margin = cps_super_bbox_->right() - part_box.right(); 00839 bool right_to_left; 00840 if (left_margin + kMarginDiffTh < right_margin && 00841 left_margin < kMarginDiffTh) { 00842 // part is left aligned, so we search if it has any right neighbor. 00843 search.StartSideSearch( 00844 part_box.right(), part_box.top(), part_box.bottom()); 00845 right_to_left = false; 00846 } else if (left_margin > cps_cx) { 00847 // part locates on the right half on image, so search if it has any left 00848 // neighbor. 00849 search.StartSideSearch( 00850 part_box.left(), part_box.top(), part_box.bottom()); 00851 right_to_left = true; 00852 } else { // part is not an inline equation. 00853 new_seeds.push_back(part); 00854 continue; 00855 } 00856 ColPartition* neighbor = NULL; 00857 bool side_neighbor_found = false; 00858 while ((neighbor = search.NextSideSearch(right_to_left)) != NULL) { 00859 const TBOX& neighbor_box(neighbor->bounding_box()); 00860 if (!IsTextOrEquationType(neighbor->type()) || 00861 part_box.x_gap(neighbor_box) > kGapTh || 00862 !part_box.major_y_overlap(neighbor_box) || 00863 part_box.major_x_overlap(neighbor_box)) { 00864 continue; 00865 } 00866 // We have found one. Set the side_neighbor_found flag. 00867 side_neighbor_found = true; 00868 break; 00869 } 00870 if (!side_neighbor_found) { // Mark part as PT_INLINE_EQUATION. 00871 part->set_type(PT_INLINE_EQUATION); 00872 } else { 00873 // Check the geometric feature of neighbor. 00874 const TBOX& neighbor_box(neighbor->bounding_box()); 00875 if (neighbor_box.width() > part_box.width() && 00876 neighbor->type() != PT_EQUATION) { // Mark as PT_INLINE_EQUATION. 00877 part->set_type(PT_INLINE_EQUATION); 00878 } else { // part is not an inline equation type. 00879 new_seeds.push_back(part); 00880 } 00881 } 00882 } 00883 00884 // Reset the cp_seeds_ using the new_seeds. 00885 cp_seeds_ = new_seeds; 00886 } 00887 00888 int EquationDetect::EstimateTextPartLineSpacing() { 00889 ColPartitionGridSearch gsearch(part_grid_); 00890 00891 // Get the y gap between text partitions; 00892 ColPartition *current = NULL, *prev = NULL; 00893 gsearch.StartFullSearch(); 00894 GenericVector<int> ygaps; 00895 while ((current = gsearch.NextFullSearch()) != NULL) { 00896 if (!PTIsTextType(current->type())) { 00897 continue; 00898 } 00899 if (prev != NULL) { 00900 const TBOX ¤t_box = current->bounding_box(); 00901 const TBOX &prev_box = prev->bounding_box(); 00902 // prev and current should be x major overlap and non y overlap. 00903 if (current_box.major_x_overlap(prev_box) && 00904 !current_box.y_overlap(prev_box)) { 00905 int gap = current_box.y_gap(prev_box); 00906 if (gap < MIN(current_box.height(), prev_box.height())) { 00907 // The gap should be smaller than the height of the bounding boxes. 00908 ygaps.push_back(gap); 00909 } 00910 } 00911 } 00912 prev = current; 00913 } 00914 00915 if (ygaps.size() < 8) { // We do not have enough data. 00916 return -1; 00917 } 00918 00919 // Compute the line spacing from ygaps: use the mean of the first half. 00920 ygaps.sort(); 00921 int spacing = 0, count; 00922 for (count = 0; count < ygaps.size() / 2; count++) { 00923 spacing += ygaps[count]; 00924 } 00925 return spacing / count; 00926 } 00927 00928 void EquationDetect::IdentifyInlinePartsVertical( 00929 const bool top_to_bottom, const int textparts_linespacing) { 00930 if (cp_seeds_.empty()) { 00931 return; 00932 } 00933 00934 // Sort cp_seeds_. 00935 if (top_to_bottom) { // From top to bottom. 00936 cp_seeds_.sort(&SortCPByTopReverse); 00937 } else { // From bottom to top. 00938 cp_seeds_.sort(&SortCPByBottom); 00939 } 00940 00941 GenericVector<ColPartition*> new_seeds; 00942 for (int i = 0; i < cp_seeds_.size(); ++i) { 00943 ColPartition* part = cp_seeds_[i]; 00944 // If we sort cp_seeds_ from top to bottom, then for each cp_seeds_, we look 00945 // for its top neighbors, so that if two/more inline regions are connected 00946 // to each other, then we will identify the top one, and then use it to 00947 // identify the bottom one. 00948 if (IsInline(!top_to_bottom, textparts_linespacing, part)) { 00949 part->set_type(PT_INLINE_EQUATION); 00950 } else { 00951 new_seeds.push_back(part); 00952 } 00953 } 00954 cp_seeds_ = new_seeds; 00955 } 00956 00957 bool EquationDetect::IsInline(const bool search_bottom, 00958 const int textparts_linespacing, 00959 ColPartition* part) { 00960 ASSERT_HOST(part != NULL); 00961 // Look for its nearest vertical neighbor that hardly overlaps in y but 00962 // largely overlaps in x. 00963 ColPartitionGridSearch search(part_grid_); 00964 ColPartition *neighbor = NULL; 00965 const TBOX& part_box(part->bounding_box()); 00966 const float kYGapRatioTh = 1.0; 00967 00968 if (search_bottom) { 00969 search.StartVerticalSearch(part_box.left(), part_box.right(), 00970 part_box.bottom()); 00971 } else { 00972 search.StartVerticalSearch(part_box.left(), part_box.right(), 00973 part_box.top()); 00974 } 00975 search.SetUniqueMode(true); 00976 while ((neighbor = search.NextVerticalSearch(search_bottom)) != NULL) { 00977 const TBOX& neighbor_box(neighbor->bounding_box()); 00978 if (part_box.y_gap(neighbor_box) > kYGapRatioTh * 00979 MIN(part_box.height(), neighbor_box.height())) { 00980 // Finished searching. 00981 break; 00982 } 00983 if (!PTIsTextType(neighbor->type())) { 00984 continue; 00985 } 00986 00987 // Check if neighbor and part is inline similar. 00988 const float kHeightRatioTh = 0.5; 00989 const int kYGapTh = textparts_linespacing > 0 ? 00990 textparts_linespacing + static_cast<int>(roundf(0.02 * resolution_)): 00991 static_cast<int>(roundf(0.05 * resolution_)); // Default value. 00992 if (part_box.x_overlap(neighbor_box) && // Location feature. 00993 part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing. 00994 // Geo feature. 00995 static_cast<float>(MIN(part_box.height(), neighbor_box.height())) / 00996 MAX(part_box.height(), neighbor_box.height()) > kHeightRatioTh) { 00997 return true; 00998 } 00999 } 01000 01001 return false; 01002 } 01003 01004 bool EquationDetect::CheckSeedBlobsCount(ColPartition* part) { 01005 if (!part) { 01006 return false; 01007 } 01008 const int kSeedMathBlobsCount = 2; 01009 const int kSeedMathDigitBlobsCount = 5; 01010 01011 int blobs = part->boxes_count(), 01012 math_blobs = part->SpecialBlobsCount(BSTT_MATH), 01013 digit_blobs = part->SpecialBlobsCount(BSTT_DIGIT); 01014 if (blobs < kSeedBlobsCountTh || math_blobs <= kSeedMathBlobsCount || 01015 math_blobs + digit_blobs <= kSeedMathDigitBlobsCount) { 01016 return false; 01017 } 01018 01019 return true; 01020 } 01021 01022 bool EquationDetect::CheckSeedDensity( 01023 const float math_density_high, 01024 const float math_density_low, 01025 const ColPartition* part) const { 01026 ASSERT_HOST(part); 01027 float math_digit_density = part->SpecialBlobsDensity(BSTT_MATH) 01028 + part->SpecialBlobsDensity(BSTT_DIGIT); 01029 float italic_density = part->SpecialBlobsDensity(BSTT_ITALIC); 01030 if (math_digit_density > math_density_high) { 01031 return true; 01032 } 01033 if (math_digit_density + italic_density > kMathItalicDensityTh && 01034 math_digit_density > math_density_low) { 01035 return true; 01036 } 01037 01038 return false; 01039 } 01040 01041 EquationDetect::IndentType EquationDetect::IsIndented(ColPartition* part) { 01042 ASSERT_HOST(part); 01043 01044 ColPartitionGridSearch search(part_grid_); 01045 ColPartition *neighbor = NULL; 01046 const TBOX& part_box(part->bounding_box()); 01047 const int kXGapTh = static_cast<int>(roundf(0.5 * resolution_)); 01048 const int kRadiusTh = static_cast<int>(roundf(3.0 * resolution_)); 01049 const int kYGapTh = static_cast<int>(roundf(0.5 * resolution_)); 01050 01051 // Here we use a simple approximation algorithm: from the center of part, We 01052 // perform the radius search, and check if we can find a neighboring parition 01053 // that locates on the top/bottom left of part. 01054 search.StartRadSearch((part_box.left() + part_box.right()) / 2, 01055 (part_box.top() + part_box.bottom()) / 2, kRadiusTh); 01056 search.SetUniqueMode(true); 01057 bool left_indented = false, right_indented = false; 01058 while ((neighbor = search.NextRadSearch()) != NULL && 01059 (!left_indented || !right_indented)) { 01060 if (neighbor == part) { 01061 continue; 01062 } 01063 const TBOX& neighbor_box(neighbor->bounding_box()); 01064 01065 if (part_box.major_y_overlap(neighbor_box) && 01066 part_box.x_gap(neighbor_box) < kXGapTh) { 01067 // When this happens, it is likely part is a fragment of an 01068 // over-segmented colpartition. So we return false. 01069 return NO_INDENT; 01070 } 01071 01072 if (!IsTextOrEquationType(neighbor->type())) { 01073 continue; 01074 } 01075 01076 // The neighbor should be above/below part, and overlap in x direction. 01077 if (!part_box.x_overlap(neighbor_box) || part_box.y_overlap(neighbor_box)) { 01078 continue; 01079 } 01080 01081 if (part_box.y_gap(neighbor_box) < kYGapTh) { 01082 int left_gap = part_box.left() - neighbor_box.left(); 01083 int right_gap = neighbor_box.right() - part_box.right(); 01084 if (left_gap > kXGapTh) { 01085 left_indented = true; 01086 } 01087 if (right_gap > kXGapTh) { 01088 right_indented = true; 01089 } 01090 } 01091 } 01092 01093 if (left_indented && right_indented) { 01094 return BOTH_INDENT; 01095 } 01096 if (left_indented) { 01097 return LEFT_INDENT; 01098 } 01099 if (right_indented) { 01100 return RIGHT_INDENT; 01101 } 01102 return NO_INDENT; 01103 } 01104 01105 bool EquationDetect::ExpandSeed(ColPartition* seed) { 01106 if (seed == NULL || // This seed has been absorbed by other seeds. 01107 seed->IsVerticalType()) { // We skip vertical type right now. 01108 return false; 01109 } 01110 01111 // Expand in four directions. 01112 GenericVector<ColPartition*> parts_to_merge; 01113 ExpandSeedHorizontal(true, seed, &parts_to_merge); 01114 ExpandSeedHorizontal(false, seed, &parts_to_merge); 01115 ExpandSeedVertical(true, seed, &parts_to_merge); 01116 ExpandSeedVertical(false, seed, &parts_to_merge); 01117 SearchByOverlap(seed, &parts_to_merge); 01118 01119 if (parts_to_merge.empty()) { // We don't find any partition to merge. 01120 return false; 01121 } 01122 01123 // Merge all partitions in parts_to_merge with seed. We first remove seed 01124 // from part_grid_ as its bounding box is going to expand. Then we add it 01125 // back after it aborbs all parts_to_merge parititions. 01126 part_grid_->RemoveBBox(seed); 01127 for (int i = 0; i < parts_to_merge.size(); ++i) { 01128 ColPartition* part = parts_to_merge[i]; 01129 if (part->type() == PT_EQUATION) { 01130 // If part is in cp_seeds_, then we mark it as NULL so that we won't 01131 // process it again. 01132 for (int j = 0; j < cp_seeds_.size(); ++j) { 01133 if (part == cp_seeds_[j]) { 01134 cp_seeds_[j] = NULL; 01135 break; 01136 } 01137 } 01138 } 01139 01140 // part has already been removed from part_grid_ in function 01141 // ExpandSeedHorizontal/ExpandSeedVertical. 01142 seed->Absorb(part, NULL); 01143 } 01144 01145 return true; 01146 } 01147 01148 void EquationDetect::ExpandSeedHorizontal( 01149 const bool search_left, 01150 ColPartition* seed, 01151 GenericVector<ColPartition*>* parts_to_merge) { 01152 ASSERT_HOST(seed != NULL && parts_to_merge != NULL); 01153 const float kYOverlapTh = 0.6; 01154 const int kXGapTh = static_cast<int>(roundf(0.2 * resolution_)); 01155 01156 ColPartitionGridSearch search(part_grid_); 01157 const TBOX& seed_box(seed->bounding_box()); 01158 int x = search_left ? seed_box.left() : seed_box.right(); 01159 search.StartSideSearch(x, seed_box.bottom(), seed_box.top()); 01160 search.SetUniqueMode(true); 01161 01162 // Search iteratively. 01163 ColPartition *part = NULL; 01164 while ((part = search.NextSideSearch(search_left)) != NULL) { 01165 if (part == seed) { 01166 continue; 01167 } 01168 const TBOX& part_box(part->bounding_box()); 01169 if (part_box.x_gap(seed_box) > kXGapTh) { // Out of scope. 01170 break; 01171 } 01172 01173 // Check part location. 01174 if ((part_box.left() >= seed_box.left() && search_left) || 01175 (part_box.right() <= seed_box.right() && !search_left)) { 01176 continue; 01177 } 01178 01179 if (part->type() != PT_EQUATION) { // Non-equation type. 01180 // Skip PT_LINLINE_EQUATION and non text type. 01181 if (part->type() == PT_INLINE_EQUATION || 01182 (!IsTextOrEquationType(part->type()) && 01183 part->blob_type() != BRT_HLINE)) { 01184 continue; 01185 } 01186 // For other types, it should be the near small neighbor of seed. 01187 if (!IsNearSmallNeighbor(seed_box, part_box) || 01188 !CheckSeedNeighborDensity(part)) { 01189 continue; 01190 } 01191 } else { // Equation type, check the y overlap. 01192 if (part_box.y_overlap_fraction(seed_box) < kYOverlapTh && 01193 seed_box.y_overlap_fraction(part_box) < kYOverlapTh) { 01194 continue; 01195 } 01196 } 01197 01198 // Passed the check, delete it from search and add into parts_to_merge. 01199 search.RemoveBBox(); 01200 parts_to_merge->push_back(part); 01201 } 01202 } 01203 01204 void EquationDetect::ExpandSeedVertical( 01205 const bool search_bottom, 01206 ColPartition* seed, 01207 GenericVector<ColPartition*>* parts_to_merge) { 01208 ASSERT_HOST(seed != NULL && parts_to_merge != NULL && 01209 cps_super_bbox_ != NULL); 01210 const float kXOverlapTh = 0.4; 01211 const int kYGapTh = static_cast<int>(roundf(0.2 * resolution_)); 01212 01213 ColPartitionGridSearch search(part_grid_); 01214 const TBOX& seed_box(seed->bounding_box()); 01215 int y = search_bottom ? seed_box.bottom() : seed_box.top(); 01216 search.StartVerticalSearch( 01217 cps_super_bbox_->left(), cps_super_bbox_->right(), y); 01218 search.SetUniqueMode(true); 01219 01220 // Search iteratively. 01221 ColPartition *part = NULL; 01222 GenericVector<ColPartition*> parts; 01223 int skipped_min_top = INT_MAX, skipped_max_bottom = -1; 01224 while ((part = search.NextVerticalSearch(search_bottom)) != NULL) { 01225 if (part == seed) { 01226 continue; 01227 } 01228 const TBOX& part_box(part->bounding_box()); 01229 01230 if (part_box.y_gap(seed_box) > kYGapTh) { // Out of scope. 01231 break; 01232 } 01233 01234 // Check part location. 01235 if ((part_box.bottom() >= seed_box.bottom() && search_bottom) || 01236 (part_box.top() <= seed_box.top() && !search_bottom)) { 01237 continue; 01238 } 01239 01240 bool skip_part = false; 01241 if (part->type() != PT_EQUATION) { // Non-equation type. 01242 // Skip PT_LINLINE_EQUATION and non text type. 01243 if (part->type() == PT_INLINE_EQUATION || 01244 (!IsTextOrEquationType(part->type()) && 01245 part->blob_type() != BRT_HLINE)) { 01246 skip_part = true; 01247 } else if (!IsNearSmallNeighbor(seed_box, part_box) || 01248 !CheckSeedNeighborDensity(part)) { 01249 // For other types, it should be the near small neighbor of seed. 01250 skip_part = true; 01251 } 01252 } else { // Equation type, check the x overlap. 01253 if (part_box.x_overlap_fraction(seed_box) < kXOverlapTh && 01254 seed_box.x_overlap_fraction(part_box) < kXOverlapTh) { 01255 skip_part = true; 01256 } 01257 } 01258 if (skip_part) { 01259 if (part->type() != PT_EQUATION) { 01260 if (skipped_min_top > part_box.top()) { 01261 skipped_min_top = part_box.top(); 01262 } 01263 if (skipped_max_bottom < part_box.bottom()) { 01264 skipped_max_bottom = part_box.bottom(); 01265 } 01266 } 01267 } else { 01268 parts.push_back(part); 01269 } 01270 } 01271 01272 // For every part in parts, we need verify it is not above skipped_min_top 01273 // when search top, or not below skipped_max_bottom when search bottom. I.e., 01274 // we will skip a part if it looks like: 01275 // search bottom | search top 01276 // seed: ****************** | part: ********** 01277 // skipped: xxx | skipped: xxx 01278 // part: ********** | seed: *********** 01279 for (int i = 0; i < parts.size(); i++) { 01280 const TBOX& part_box(parts[i]->bounding_box()); 01281 if ((search_bottom && part_box.top() <= skipped_max_bottom) || 01282 (!search_bottom && part_box.bottom() >= skipped_min_top)) { 01283 continue; 01284 } 01285 // Add parts[i] into parts_to_merge, and delete it from part_grid_. 01286 parts_to_merge->push_back(parts[i]); 01287 part_grid_->RemoveBBox(parts[i]); 01288 } 01289 } 01290 01291 bool EquationDetect::IsNearSmallNeighbor(const TBOX& seed_box, 01292 const TBOX& part_box) const { 01293 const int kXGapTh = static_cast<int>(roundf(0.25 * resolution_)); 01294 const int kYGapTh = static_cast<int>(roundf(0.05 * resolution_)); 01295 01296 // Check geometric feature. 01297 if (part_box.height() > seed_box.height() || 01298 part_box.width() > seed_box.width()) { 01299 return false; 01300 } 01301 01302 // Check overlap and distance. 01303 if ((!part_box.major_x_overlap(seed_box) || 01304 part_box.y_gap(seed_box) > kYGapTh) && 01305 (!part_box.major_y_overlap(seed_box) || 01306 part_box.x_gap(seed_box) > kXGapTh)) { 01307 return false; 01308 } 01309 01310 return true; 01311 } 01312 01313 bool EquationDetect::CheckSeedNeighborDensity(const ColPartition* part) const { 01314 ASSERT_HOST(part); 01315 if (part->boxes_count() < kSeedBlobsCountTh) { 01316 // Too few blobs, skip the check. 01317 return true; 01318 } 01319 01320 // We check the math blobs density and the unclear blobs density. 01321 if (part->SpecialBlobsDensity(BSTT_MATH) + 01322 part->SpecialBlobsDensity(BSTT_DIGIT) > kMathDigitDensityTh1 || 01323 part->SpecialBlobsDensity(BSTT_UNCLEAR) > kUnclearDensityTh) { 01324 return true; 01325 } 01326 01327 return false; 01328 } 01329 01330 void EquationDetect::ProcessMathBlockSatelliteParts() { 01331 // Iterate over part_grid_, and find all parts that are text type but not 01332 // equation type. 01333 ColPartition *part = NULL; 01334 GenericVector<ColPartition*> text_parts; 01335 ColPartitionGridSearch gsearch(part_grid_); 01336 gsearch.StartFullSearch(); 01337 while ((part = gsearch.NextFullSearch()) != NULL) { 01338 if (part->type() == PT_FLOWING_TEXT || part->type() == PT_HEADING_TEXT) { 01339 text_parts.push_back(part); 01340 } 01341 } 01342 if (text_parts.empty()) { 01343 return; 01344 } 01345 01346 // Compute the medium height of the text_parts. 01347 text_parts.sort(&SortCPByHeight); 01348 const TBOX& text_box = text_parts[text_parts.size() / 2]->bounding_box(); 01349 int med_height = text_box.height(); 01350 if (text_parts.size() % 2 == 0 && text_parts.size() > 1) { 01351 const TBOX& text_box = 01352 text_parts[text_parts.size() / 2 - 1]->bounding_box(); 01353 med_height = static_cast<int>(roundf( 01354 0.5 * (text_box.height() + med_height))); 01355 } 01356 01357 // Iterate every text_parts and check if it is a math block satellite. 01358 for (int i = 0; i < text_parts.size(); ++i) { 01359 const TBOX& text_box(text_parts[i]->bounding_box()); 01360 if (text_box.height() > med_height) { 01361 continue; 01362 } 01363 GenericVector<ColPartition*> math_blocks; 01364 if (!IsMathBlockSatellite(text_parts[i], &math_blocks)) { 01365 continue; 01366 } 01367 01368 // Found. merge text_parts[i] with math_blocks. 01369 part_grid_->RemoveBBox(text_parts[i]); 01370 text_parts[i]->set_type(PT_EQUATION); 01371 for (int j = 0; j < math_blocks.size(); ++j) { 01372 part_grid_->RemoveBBox(math_blocks[j]); 01373 text_parts[i]->Absorb(math_blocks[j], NULL); 01374 } 01375 InsertPartAfterAbsorb(text_parts[i]); 01376 } 01377 } 01378 01379 bool EquationDetect::IsMathBlockSatellite( 01380 ColPartition* part, GenericVector<ColPartition*>* math_blocks) { 01381 ASSERT_HOST(part != NULL && math_blocks != NULL); 01382 math_blocks->clear(); 01383 const TBOX& part_box(part->bounding_box()); 01384 // Find the top/bottom nearest neighbor of part. 01385 ColPartition *neighbors[2]; 01386 int y_gaps[2] = {INT_MAX, INT_MAX}; 01387 // The horizontal boundary of the neighbors. 01388 int neighbors_left = INT_MAX, neighbors_right = 0; 01389 for (int i = 0; i < 2; ++i) { 01390 neighbors[i] = SearchNNVertical(i != 0, part); 01391 if (neighbors[i]) { 01392 const TBOX& neighbor_box = neighbors[i]->bounding_box(); 01393 y_gaps[i] = neighbor_box.y_gap(part_box); 01394 if (neighbor_box.left() < neighbors_left) { 01395 neighbors_left = neighbor_box.left(); 01396 } 01397 if (neighbor_box.right() > neighbors_right) { 01398 neighbors_right = neighbor_box.right(); 01399 } 01400 } 01401 } 01402 if (neighbors[0] == neighbors[1]) { 01403 // This happens when part is inside neighbor. 01404 neighbors[1] = NULL; 01405 y_gaps[1] = INT_MAX; 01406 } 01407 01408 // Check if part is within [neighbors_left, neighbors_right]. 01409 if (part_box.left() < neighbors_left || part_box.right() > neighbors_right) { 01410 return false; 01411 } 01412 01413 // Get the index of the near one in neighbors. 01414 int index = y_gaps[0] < y_gaps[1] ? 0 : 1; 01415 01416 // Check the near one. 01417 if (IsNearMathNeighbor(y_gaps[index], neighbors[index])) { 01418 math_blocks->push_back(neighbors[index]); 01419 } else { 01420 // If the near one failed the check, then we skip checking the far one. 01421 return false; 01422 } 01423 01424 // Check the far one. 01425 index = 1 - index; 01426 if (IsNearMathNeighbor(y_gaps[index], neighbors[index])) { 01427 math_blocks->push_back(neighbors[index]); 01428 } 01429 01430 return true; 01431 } 01432 01433 ColPartition* EquationDetect::SearchNNVertical( 01434 const bool search_bottom, const ColPartition* part) { 01435 ASSERT_HOST(part); 01436 ColPartition *nearest_neighbor = NULL, *neighbor = NULL; 01437 const int kYGapTh = static_cast<int>(roundf(resolution_ * 0.5)); 01438 01439 ColPartitionGridSearch search(part_grid_); 01440 search.SetUniqueMode(true); 01441 const TBOX& part_box(part->bounding_box()); 01442 int y = search_bottom ? part_box.bottom() : part_box.top(); 01443 search.StartVerticalSearch(part_box.left(), part_box.right(), y); 01444 int min_y_gap = INT_MAX; 01445 while ((neighbor = search.NextVerticalSearch(search_bottom)) != NULL) { 01446 if (neighbor == part || !IsTextOrEquationType(neighbor->type())) { 01447 continue; 01448 } 01449 const TBOX& neighbor_box(neighbor->bounding_box()); 01450 int y_gap = neighbor_box.y_gap(part_box); 01451 if (y_gap > kYGapTh) { // Out of scope. 01452 break; 01453 } 01454 if (!neighbor_box.major_x_overlap(part_box) || 01455 (search_bottom && neighbor_box.bottom() > part_box.bottom()) || 01456 (!search_bottom && neighbor_box.top() < part_box.top())) { 01457 continue; 01458 } 01459 if (y_gap < min_y_gap) { 01460 min_y_gap = y_gap; 01461 nearest_neighbor = neighbor; 01462 } 01463 } 01464 01465 return nearest_neighbor; 01466 } 01467 01468 bool EquationDetect::IsNearMathNeighbor( 01469 const int y_gap, const ColPartition *neighbor) const { 01470 if (!neighbor) { 01471 return false; 01472 } 01473 const int kYGapTh = static_cast<int>(roundf(resolution_ * 0.1)); 01474 return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh; 01475 } 01476 01477 void EquationDetect::GetOutputTiffName(const char* name, 01478 STRING* image_name) const { 01479 ASSERT_HOST(image_name && name); 01480 char page[50]; 01481 snprintf(page, sizeof(page), "%04d", page_count_); 01482 *image_name = STRING(lang_tesseract_->imagebasename) + page + name + ".tif"; 01483 } 01484 01485 void EquationDetect::PaintSpecialTexts(const STRING& outfile) const { 01486 Pix *pix = NULL, *pixBi = lang_tesseract_->pix_binary(); 01487 pix = pixConvertTo32(pixBi); 01488 ColPartitionGridSearch gsearch(part_grid_); 01489 ColPartition* part = NULL; 01490 gsearch.StartFullSearch(); 01491 while ((part = gsearch.NextFullSearch()) != NULL) { 01492 BLOBNBOX_C_IT blob_it(part->boxes()); 01493 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 01494 RenderSpecialText(pix, blob_it.data()); 01495 } 01496 } 01497 01498 pixWrite(outfile.string(), pix, IFF_TIFF_LZW); 01499 pixDestroy(&pix); 01500 } 01501 01502 void EquationDetect::PaintColParts(const STRING& outfile) const { 01503 Pix *pix = pixConvertTo32(lang_tesseract_->BestPix()); 01504 ColPartitionGridSearch gsearch(part_grid_); 01505 gsearch.StartFullSearch(); 01506 ColPartition* part = NULL; 01507 while ((part = gsearch.NextFullSearch()) != NULL) { 01508 const TBOX& tbox = part->bounding_box(); 01509 Box *box = boxCreate(tbox.left(), pixGetHeight(pix) - tbox.top(), 01510 tbox.width(), tbox.height()); 01511 if (part->type() == PT_EQUATION) { 01512 pixRenderBoxArb(pix, box, 5, 255, 0, 0); 01513 } else if (part->type() == PT_INLINE_EQUATION) { 01514 pixRenderBoxArb(pix, box, 5, 0, 255, 0); 01515 } else { 01516 pixRenderBoxArb(pix, box, 5, 0, 0, 255); 01517 } 01518 boxDestroy(&box); 01519 } 01520 01521 pixWrite(outfile.string(), pix, IFF_TIFF_LZW); 01522 pixDestroy(&pix); 01523 } 01524 01525 void EquationDetect::PrintSpecialBlobsDensity(const ColPartition* part) const { 01526 ASSERT_HOST(part); 01527 TBOX box(part->bounding_box()); 01528 int h = pixGetHeight(lang_tesseract_->BestPix()); 01529 tprintf("Printing special blobs density values for ColParition (t=%d,b=%d) ", 01530 h - box.top(), h - box.bottom()); 01531 box.print(); 01532 tprintf("blobs count = %d, density = ", part->boxes_count()); 01533 for (int i = 0; i < BSTT_COUNT; ++i) { 01534 BlobSpecialTextType type = static_cast<BlobSpecialTextType>(i); 01535 tprintf("%d:%f ", i, part->SpecialBlobsDensity(type)); 01536 } 01537 tprintf("\n"); 01538 } 01539 01540 }; // namespace tesseract