Tesseract
3.02
|
00001 00002 // File: colfind.cpp 00003 // Description: Class to hold BLOBNBOXs in a grid for fast access 00004 // to neighbours. 00005 // Author: Ray Smith 00006 // Created: Wed Jun 06 17:22:01 PDT 2007 00007 // 00008 // (C) Copyright 2007, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifdef _MSC_VER 00022 #pragma warning(disable:4244) // Conversion warnings 00023 #endif 00024 00025 #include "colfind.h" 00026 00027 #include "ccnontextdetect.h" 00028 #include "colpartition.h" 00029 #include "colpartitionset.h" 00030 #include "equationdetectbase.h" 00031 #include "linefind.h" 00032 #include "normalis.h" 00033 #include "strokewidth.h" 00034 #include "blobbox.h" 00035 #include "scrollview.h" 00036 #include "tablefind.h" 00037 #include "params.h" 00038 #include "workingpartset.h" 00039 00040 // Include automatically generated configuration file if running autoconf. 00041 #ifdef HAVE_CONFIG_H 00042 #include "config_auto.h" 00043 #endif 00044 00045 namespace tesseract { 00046 00047 // Minimum width (in pixels) to be considered when making columns. 00048 // TODO(rays) convert to inches, dependent on resolution. 00049 const int kMinColumnWidth = 100; 00050 // When assigning columns, the max number of misfit grid rows/ColPartitionSets 00051 // that can be ignored. 00052 const int kMaxIncompatibleColumnCount = 2; 00053 // Min fraction of ColPartition height to be overlapping for margin purposes. 00054 const double kMarginOverlapFraction = 0.25; 00055 // Max fraction of mean_column_gap_ for the gap between two partitions within a 00056 // column to allow them to merge. 00057 const double kHorizontalGapMergeFraction = 0.5; 00058 // Min fraction of grid size to not be considered likely noise. 00059 const double kMinNonNoiseFraction = 0.5; 00060 // Minimum gutter width as a fraction of gridsize 00061 const double kMinGutterWidthGrid = 0.5; 00062 // Max multiple of a partition's median size as a distance threshold for 00063 // adding noise blobs. 00064 const double kMaxDistToPartSizeRatio = 1.5; 00065 00066 BOOL_VAR(textord_tabfind_show_initial_partitions, 00067 false, "Show partition bounds"); 00068 BOOL_VAR(textord_tabfind_show_reject_blobs, 00069 false, "Show blobs rejected as noise"); 00070 INT_VAR(textord_tabfind_show_partitions, 0, 00071 "Show partition bounds, waiting if >1"); 00072 BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds"); 00073 BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds"); 00074 BOOL_VAR(textord_tabfind_find_tables, true, "run table detection"); 00075 00076 ScrollView* ColumnFinder::blocks_win_ = NULL; 00077 00078 // Gridsize is an estimate of the text size in the image. A suitable value 00079 // is in TO_BLOCK::line_size after find_components has been used to make 00080 // the blobs. 00081 // bleft and tright are the bounds of the image (or rectangle) being processed. 00082 // vlines is a (possibly empty) list of TabVector and vertical_x and y are 00083 // the sum logical vertical vector produced by LineFinder::FindVerticalLines. 00084 ColumnFinder::ColumnFinder(int gridsize, 00085 const ICOORD& bleft, const ICOORD& tright, 00086 int resolution, 00087 TabVector_LIST* vlines, TabVector_LIST* hlines, 00088 int vertical_x, int vertical_y) 00089 : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, 00090 resolution), 00091 min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)), 00092 mean_column_gap_(tright.x() - bleft.x()), 00093 reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), 00094 best_columns_(NULL), stroke_width_(NULL), 00095 part_grid_(gridsize, bleft, tright), nontext_map_(NULL), 00096 projection_(resolution), 00097 denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) { 00098 TabVector_IT h_it(&horizontal_lines_); 00099 h_it.add_list_after(hlines); 00100 } 00101 00102 ColumnFinder::~ColumnFinder() { 00103 column_sets_.delete_data_pointers(); 00104 if (best_columns_ != NULL) { 00105 delete [] best_columns_; 00106 } 00107 if (stroke_width_ != NULL) 00108 delete stroke_width_; 00109 delete input_blobs_win_; 00110 pixDestroy(&nontext_map_); 00111 while (denorm_ != NULL) { 00112 DENORM* dead_denorm = denorm_; 00113 denorm_ = const_cast<DENORM*>(denorm_->predecessor()); 00114 delete dead_denorm; 00115 } 00116 00117 // The ColPartitions are destroyed automatically, but any boxes in 00118 // the noise_parts_ list are owned and need to be deleted explicitly. 00119 ColPartition_IT part_it(&noise_parts_); 00120 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { 00121 ColPartition* part = part_it.data(); 00122 part->DeleteBoxes(); 00123 } 00124 // Likewise any boxes in the good_parts_ list need to be deleted. 00125 // These are just the image parts. Text parts have already given their 00126 // boxes on to the TO_BLOCK, and have empty lists. 00127 part_it.set_to_list(&good_parts_); 00128 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { 00129 ColPartition* part = part_it.data(); 00130 part->DeleteBoxes(); 00131 } 00132 // Also, any blobs on the image_bblobs_ list need to have their cblobs 00133 // deleted. This only happens if there has been an early return from 00134 // FindColumns, as in a normal return, the blobs go into the grid and 00135 // end up in noise_parts_, good_parts_ or the output blocks. 00136 BLOBNBOX_IT bb_it(&image_bblobs_); 00137 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { 00138 BLOBNBOX* bblob = bb_it.data(); 00139 delete bblob->cblob(); 00140 } 00141 } 00142 00143 // Performs initial processing on the blobs in the input_block: 00144 // Setup the part_grid, stroke_width_, nontext_map. 00145 // Obvious noise blobs are filtered out and used to mark the nontext_map_. 00146 // Initial stroke-width analysis is used to get local text alignment 00147 // direction, so the textline projection_ map can be setup. 00148 // On return, IsVerticallyAlignedText may be called (now optionally) to 00149 // determine the gross textline alignment of the page. 00150 void ColumnFinder::SetupAndFilterNoise(Pix* photo_mask_pix, 00151 TO_BLOCK* input_block) { 00152 part_grid_.Init(gridsize(), bleft(), tright()); 00153 if (stroke_width_ != NULL) 00154 delete stroke_width_; 00155 stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); 00156 min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize()); 00157 input_block->ReSetAndReFilterBlobs(); 00158 #ifndef GRAPHICS_DISABLED 00159 if (textord_tabfind_show_blocks) { 00160 input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); 00161 input_block->plot_graded_blobs(input_blobs_win_); 00162 } 00163 #endif // GRAPHICS_DISABLED 00164 SetBlockRuleEdges(input_block); 00165 pixDestroy(&nontext_map_); 00166 // Run a preliminary strokewidth neighbour detection on the medium blobs. 00167 stroke_width_->SetNeighboursOnMediumBlobs(input_block); 00168 CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); 00169 // Remove obvious noise and make the initial non-text map. 00170 nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind, 00171 photo_mask_pix, input_block); 00172 // TODO(rays) experiment with making broken CJK fixing dependent on the 00173 // language, and keeping the merged blobs on output instead of exploding at 00174 // ColPartition::MakeBlock. 00175 stroke_width_->FindTextlineDirectionAndFixBrokenCJK(true, input_block); 00176 // Clear the strokewidth grid ready for rotation or leader finding. 00177 stroke_width_->Clear(); 00178 } 00179 00180 // Tests for vertical alignment of text (returning true if so), and generates 00181 // a list of blobs of moderate aspect ratio, in the most frequent writing 00182 // direction (in osd_blobs) for orientation and script detection to test 00183 // the character orientation. 00184 // block is the single block for the whole page or rectangle to be OCRed. 00185 // Note that the vertical alignment may be due to text whose writing direction 00186 // is vertical, like say Japanese, or due to text whose writing direction is 00187 // horizontal but whose text appears vertically aligned because the image is 00188 // not the right way up. 00189 bool ColumnFinder::IsVerticallyAlignedText(TO_BLOCK* block, 00190 BLOBNBOX_CLIST* osd_blobs) { 00191 return stroke_width_->TestVerticalTextDirection(block, osd_blobs); 00192 } 00193 00194 // Rotates the blobs and the TabVectors so that the gross writing direction 00195 // (text lines) are horizontal and lines are read down the page. 00196 // Applied rotation stored in rotation_. 00197 // A second rotation is calculated for application during recognition to 00198 // make the rotated blobs upright for recognition. 00199 // Subsequent rotation stored in text_rotation_. 00200 // 00201 // Arguments: 00202 // vertical_text_lines true if the text lines are vertical. 00203 // recognition_rotation [0..3] is the number of anti-clockwise 90 degree 00204 // rotations from osd required for the text to be upright and readable. 00205 void ColumnFinder::CorrectOrientation(TO_BLOCK* block, 00206 bool vertical_text_lines, 00207 int recognition_rotation) { 00208 const FCOORD anticlockwise90(0.0f, 1.0f); 00209 const FCOORD clockwise90(0.0f, -1.0f); 00210 const FCOORD rotation180(-1.0f, 0.0f); 00211 const FCOORD norotation(1.0f, 0.0f); 00212 00213 text_rotation_ = norotation; 00214 // Rotate the page to make the text upright, as implied by 00215 // recognition_rotation. 00216 rotation_ = norotation; 00217 if (recognition_rotation == 1) { 00218 rotation_ = anticlockwise90; 00219 } else if (recognition_rotation == 2) { 00220 rotation_ = rotation180; 00221 } else if (recognition_rotation == 3) { 00222 rotation_ = clockwise90; 00223 } 00224 // We infer text writing direction to be vertical if there are several 00225 // vertical text lines detected, and horizontal if not. But if the page 00226 // orientation was determined to be 90 or 270 degrees, the true writing 00227 // direction is the opposite of what we inferred. 00228 if (recognition_rotation & 1) { 00229 vertical_text_lines = !vertical_text_lines; 00230 } 00231 // If we still believe the writing direction is vertical, we use the 00232 // convention of rotating the page ccw 90 degrees to make the text lines 00233 // horizontal, and mark the blobs for rotation cw 90 degrees for 00234 // classification so that the text order is correct after recognition. 00235 if (vertical_text_lines) { 00236 rotation_.rotate(anticlockwise90); 00237 text_rotation_.rotate(clockwise90); 00238 } 00239 // Set rerotate_ to the inverse of rotation_. 00240 rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); 00241 if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { 00242 // Rotate all the blobs and tab vectors. 00243 RotateBlobList(rotation_, &block->large_blobs); 00244 RotateBlobList(rotation_, &block->blobs); 00245 RotateBlobList(rotation_, &block->small_blobs); 00246 RotateBlobList(rotation_, &block->noise_blobs); 00247 TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, 00248 &min_gutter_width_); 00249 part_grid_.Init(gridsize(), bleft(), tright()); 00250 // Reset all blobs to initial state and filter by size. 00251 // Since they have rotated, the list they belong on could have changed. 00252 block->ReSetAndReFilterBlobs(); 00253 SetBlockRuleEdges(block); 00254 stroke_width_->CorrectForRotation(rerotate_, &part_grid_); 00255 } 00256 if (textord_debug_tabfind) { 00257 tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", 00258 vertical_text_lines, recognition_rotation, 00259 rotation_.x(), rotation_.y(), 00260 text_rotation_.x(), text_rotation_.y()); 00261 } 00262 // Setup the denormalization. 00263 ASSERT_HOST(denorm_ == NULL); 00264 denorm_ = new DENORM; 00265 denorm_->SetupNormalization(NULL, NULL, &rotation_, NULL, NULL, 0, 00266 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); 00267 } 00268 00269 // Finds blocks of text, image, rule line, table etc, returning them in the 00270 // blocks and to_blocks 00271 // (Each TO_BLOCK points to the basic BLOCK and adds more information.) 00272 // Image blocks are generated by a combination of photo_mask_pix (which may 00273 // NOT be NULL) and the rejected text found during preliminary textline 00274 // finding. 00275 // The input_block is the result of a call to find_components, and contains 00276 // the blobs found in the image or rectangle to be OCRed. These blobs will be 00277 // removed and placed in the output blocks, while unused ones will be deleted. 00278 // If single_column is true, the input is treated as single column, but 00279 // it is still divided into blocks of equal line spacing/text size. 00280 // scaled_color is scaled down by scaled_factor from the input color image, 00281 // and may be NULL if the input was not color. 00282 // Returns -1 if the user hits the 'd' key in the blocks window while running 00283 // in debug mode, which requests a retry with more debug info. 00284 int ColumnFinder::FindBlocks(bool single_column, 00285 Pix* scaled_color, int scaled_factor, 00286 TO_BLOCK* input_block, Pix* photo_mask_pix, 00287 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { 00288 pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); 00289 stroke_width_->FindLeaderPartitions(input_block, &part_grid_); 00290 stroke_width_->RemoveLineResidue(&big_parts_); 00291 FindInitialTabVectors(NULL, min_gutter_width_, input_block); 00292 SetBlockRuleEdges(input_block); 00293 stroke_width_->GradeBlobsIntoPartitions(rerotate_, input_block, nontext_map_, 00294 denorm_, &projection_, 00295 &part_grid_, &big_parts_); 00296 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, 00297 input_block, this, &part_grid_, &big_parts_); 00298 ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, 00299 photo_mask_pix); 00300 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, 00301 input_block, this, &part_grid_, &big_parts_); 00302 part_grid_.ReTypeBlobs(&image_bblobs_); 00303 TidyBlobs(input_block); 00304 Reset(); 00305 // TODO(rays) need to properly handle big_parts_. 00306 ColPartition_IT p_it(&big_parts_); 00307 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) 00308 p_it.data()->DisownBoxes(); 00309 big_parts_.clear(); 00310 delete stroke_width_; 00311 stroke_width_ = NULL; 00312 00313 // A note about handling right-to-left scripts (Hebrew/Arabic): 00314 // The columns must be reversed and come out in right-to-left instead of 00315 // the normal left-to-right order. Because the left-to-right ordering 00316 // is implicit in many data structures, it is simpler to fool the algorithms 00317 // into thinking they are dealing with left-to-right text. 00318 // To do this, we reflect the needed data in the y-axis and then reflect 00319 // the blocks back after they have been created. This is a temporary 00320 // arrangment that is confined to this function only, so the reflection 00321 // is completely invisible in the output blocks. 00322 // The only objects reflected are: 00323 // The vertical separator lines that have already been found; 00324 // The bounding boxes of all BLOBNBOXES on all lists on the input_block 00325 // plus the image_bblobs. The outlines are not touched, since they are 00326 // not looked at. 00327 bool input_is_rtl = input_block->block->right_to_left(); 00328 if (input_is_rtl) { 00329 // Reflect the vertical separator lines (member of TabFind). 00330 ReflectInYAxis(); 00331 // Reflect the blob boxes. 00332 ReflectForRtl(input_block, &image_bblobs_); 00333 part_grid_.ReflectInYAxis(); 00334 } 00335 00336 if (single_column) { 00337 // No tab stops needed. Just the grid that FindTabVectors makes. 00338 DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); 00339 } else { 00340 SetBlockRuleEdges(input_block); 00341 // Find the tab stops, estimate skew, and deskew the tabs, blobs and 00342 // part_grid_. 00343 FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, 00344 min_gutter_width_, &part_grid_, &deskew_, &reskew_); 00345 // Add the deskew to the denorm_. 00346 DENORM* new_denorm = new DENORM; 00347 new_denorm->SetupNormalization(NULL, NULL, &deskew_, denorm_, NULL, 0, 00348 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); 00349 denorm_ = new_denorm; 00350 } 00351 SetBlockRuleEdges(input_block); 00352 part_grid_.SetTabStops(this); 00353 00354 // Make the column_sets_. 00355 if (!MakeColumns(single_column)) { 00356 tprintf("Empty page!!\n"); 00357 return 0; // This is an empty page. 00358 } 00359 00360 // Refill the grid using rectangular spreading, and get the benefit 00361 // of the completed tab vectors marking the rule edges of each blob. 00362 Clear(); 00363 #ifndef GRAPHICS_DISABLED 00364 if (textord_tabfind_show_reject_blobs) { 00365 ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs"); 00366 input_block->plot_graded_blobs(rej_win); 00367 } 00368 #endif // GRAPHICS_DISABLED 00369 InsertBlobsToGrid(false, false, &image_bblobs_, this); 00370 InsertBlobsToGrid(true, true, &input_block->blobs, this); 00371 00372 part_grid_.GridFindMargins(best_columns_); 00373 // Split and merge the partitions by looking at local neighbours. 00374 GridSplitPartitions(); 00375 // Resolve unknown partitions by adding to an existing partition, fixing 00376 // the type, or declaring them noise. 00377 part_grid_.GridFindMargins(best_columns_); 00378 GridMergePartitions(); 00379 // Insert any unused noise blobs that are close enough to an appropriate 00380 // partition. 00381 InsertRemainingNoise(input_block); 00382 // Add horizontal line separators as partitions. 00383 GridInsertHLinePartitions(); 00384 GridInsertVLinePartitions(); 00385 // Recompute margins based on a local neighbourhood search. 00386 part_grid_.GridFindMargins(best_columns_); 00387 SetPartitionTypes(); 00388 if (textord_tabfind_show_initial_partitions) { 00389 ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions"); 00390 part_grid_.DisplayBoxes(part_win); 00391 DisplayTabVectors(part_win); 00392 } 00393 00394 if (equation_detect_) { 00395 equation_detect_->FindEquationParts(&part_grid_, best_columns_); 00396 } 00397 if (textord_tabfind_find_tables) { 00398 TableFinder table_finder; 00399 table_finder.Init(gridsize(), bleft(), tright()); 00400 table_finder.set_resolution(resolution_); 00401 table_finder.set_left_to_right_language( 00402 !input_block->block->right_to_left()); 00403 // Copy cleaned partitions from part_grid_ to clean_part_grid_ and 00404 // insert dot-like noise into period_grid_ 00405 table_finder.InsertCleanPartitions(&part_grid_, input_block); 00406 // Get Table Regions 00407 table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); 00408 } 00409 GridRemoveUnderlinePartitions(); 00410 part_grid_.DeleteUnknownParts(input_block); 00411 00412 // Build the partitions into chains that belong in the same block and 00413 // refine into one-to-one links, then smooth the types within each chain. 00414 part_grid_.FindPartitionPartners(); 00415 part_grid_.FindFigureCaptions(); 00416 part_grid_.RefinePartitionPartners(true); 00417 SmoothPartnerRuns(); 00418 00419 #ifndef GRAPHICS_DISABLED 00420 if (textord_tabfind_show_partitions) { 00421 ScrollView* window = MakeWindow(400, 300, "Partitions"); 00422 if (textord_debug_images) 00423 window->Image(AlignedBlob::textord_debug_pix().string(), 00424 image_origin().x(), image_origin().y()); 00425 part_grid_.DisplayBoxes(window); 00426 if (!textord_debug_printable) 00427 DisplayTabVectors(window); 00428 if (window != NULL && textord_tabfind_show_partitions > 1) { 00429 delete window->AwaitEvent(SVET_DESTROY); 00430 } 00431 } 00432 #endif // GRAPHICS_DISABLED 00433 part_grid_.AssertNoDuplicates(); 00434 // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, 00435 // and ownership of the BLOBNBOXes moves to the ColPartitions. 00436 // (They were previously owned by the block or the image_bblobs list.) 00437 ReleaseBlobsAndCleanupUnused(input_block); 00438 // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and 00439 // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves 00440 // from the ColPartitions to the output TO_BLOCK. In non-text, the 00441 // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. 00442 TransformToBlocks(blocks, to_blocks); 00443 if (textord_debug_tabfind) { 00444 tprintf("Found %d blocks, %d to_blocks\n", 00445 blocks->length(), to_blocks->length()); 00446 } 00447 00448 DisplayBlocks(blocks); 00449 RotateAndReskewBlocks(input_is_rtl, to_blocks); 00450 int result = 0; 00451 #ifndef GRAPHICS_DISABLED 00452 if (blocks_win_ != NULL) { 00453 bool waiting = false; 00454 do { 00455 waiting = false; 00456 SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY); 00457 if (event->type == SVET_INPUT && event->parameter != NULL) { 00458 if (*event->parameter == 'd') 00459 result = -1; 00460 else 00461 blocks->clear(); 00462 } else if (event->type == SVET_DESTROY) { 00463 blocks_win_ = NULL; 00464 } else { 00465 waiting = true; 00466 } 00467 delete event; 00468 } while (waiting); 00469 } 00470 #endif // GRAPHICS_DISABLED 00471 return result; 00472 } 00473 00474 // Get the rotation required to deskew, and its inverse rotation. 00475 void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) { 00476 *reskew = reskew_; 00477 *deskew = reskew_; 00478 deskew->set_y(-deskew->y()); 00479 } 00480 00481 void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) { 00482 equation_detect_ = detect; 00483 } 00484 00486 00487 // Displays the blob and block bounding boxes in a window called Blocks. 00488 void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { 00489 #ifndef GRAPHICS_DISABLED 00490 if (textord_tabfind_show_blocks) { 00491 if (blocks_win_ == NULL) 00492 blocks_win_ = MakeWindow(700, 300, "Blocks"); 00493 else 00494 blocks_win_->Clear(); 00495 if (textord_debug_images) 00496 blocks_win_->Image(AlignedBlob::textord_debug_pix().string(), 00497 image_origin().x(), image_origin().y()); 00498 else 00499 DisplayBoxes(blocks_win_); 00500 BLOCK_IT block_it(blocks); 00501 int serial = 1; 00502 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); 00503 block_it.forward()) { 00504 BLOCK* block = block_it.data(); 00505 block->plot(blocks_win_, serial++, 00506 textord_debug_printable ? ScrollView::BLUE 00507 : ScrollView::GREEN); 00508 } 00509 blocks_win_->Update(); 00510 } 00511 #endif 00512 } 00513 00514 // Displays the column edges at each grid y coordinate defined by 00515 // best_columns_. 00516 void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) { 00517 #ifndef GRAPHICS_DISABLED 00518 ScrollView* col_win = MakeWindow(50, 300, "Columns"); 00519 if (textord_debug_images) 00520 col_win->Image(AlignedBlob::textord_debug_pix().string(), 00521 image_origin().x(), image_origin().y()); 00522 else 00523 DisplayBoxes(col_win); 00524 col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); 00525 for (int i = 0; i < gridheight_; ++i) { 00526 ColPartitionSet* columns = best_columns_[i]; 00527 if (columns != NULL) 00528 columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win); 00529 } 00530 #endif 00531 } 00532 00533 // Sets up column_sets_ (the determined column layout at each horizontal 00534 // slice). Returns false if the page is empty. 00535 bool ColumnFinder::MakeColumns(bool single_column) { 00536 // The part_sets_ are a temporary structure used during column creation, 00537 // and is a vector of ColPartitionSets, representing ColPartitions found 00538 // at horizontal slices through the page. 00539 PartSetVector part_sets; 00540 if (!single_column) { 00541 if (!part_grid_.MakeColPartSets(&part_sets)) 00542 return false; // Empty page. 00543 ASSERT_HOST(part_grid_.gridheight() == gridheight_); 00544 // Try using only the good parts first. 00545 bool good_only = true; 00546 do { 00547 for (int i = 0; i < gridheight_; ++i) { 00548 ColPartitionSet* line_set = part_sets.get(i); 00549 if (line_set != NULL && line_set->LegalColumnCandidate()) { 00550 ColPartitionSet* column_candidate = line_set->Copy(good_only); 00551 if (column_candidate != NULL) 00552 column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); 00553 } 00554 } 00555 good_only = !good_only; 00556 } while (column_sets_.empty() && !good_only); 00557 if (textord_debug_tabfind) 00558 PrintColumnCandidates("Column candidates"); 00559 // Improve the column candidates against themselves. 00560 ImproveColumnCandidates(&column_sets_, &column_sets_); 00561 if (textord_debug_tabfind) 00562 PrintColumnCandidates("Improved columns"); 00563 // Improve the column candidates using the part_sets_. 00564 ImproveColumnCandidates(&part_sets, &column_sets_); 00565 } 00566 ColPartitionSet* single_column_set = 00567 part_grid_.MakeSingleColumnSet(WidthCB()); 00568 if (single_column_set != NULL) { 00569 // Always add the single column set as a backup even if not in 00570 // single column mode. 00571 single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); 00572 } 00573 if (textord_debug_tabfind) 00574 PrintColumnCandidates("Final Columns"); 00575 if (!column_sets_.empty()) { 00576 // Divide the page into sections of uniform column layout. 00577 AssignColumns(part_sets); 00578 if (textord_tabfind_show_columns) { 00579 DisplayColumnBounds(&part_sets); 00580 } 00581 ComputeMeanColumnGap(); 00582 ColPartition_LIST parts; 00583 for (int i = 0; i < part_sets.size(); ++i) { 00584 ColPartitionSet* line_set = part_sets.get(i); 00585 if (line_set != NULL) { 00586 line_set->RelinquishParts(); 00587 delete line_set; 00588 } 00589 } 00590 return true; 00591 } 00592 return false; 00593 } 00594 00595 // Attempt to improve the column_candidates by expanding the columns 00596 // and adding new partitions from the partition sets in src_sets. 00597 // Src_sets may be equal to column_candidates, in which case it will 00598 // use them as a source to improve themselves. 00599 void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets, 00600 PartSetVector* column_sets) { 00601 PartSetVector temp_cols; 00602 temp_cols.move(column_sets); 00603 if (src_sets == column_sets) 00604 src_sets = &temp_cols; 00605 int set_size = temp_cols.size(); 00606 // Try using only the good parts first. 00607 bool good_only = true; 00608 do { 00609 for (int i = 0; i < set_size; ++i) { 00610 ColPartitionSet* column_candidate = temp_cols.get(i); 00611 ASSERT_HOST(column_candidate != NULL); 00612 ColPartitionSet* improved = column_candidate->Copy(good_only); 00613 if (improved != NULL) { 00614 improved->ImproveColumnCandidate(WidthCB(), src_sets); 00615 improved->AddToColumnSetsIfUnique(column_sets, WidthCB()); 00616 } 00617 } 00618 good_only = !good_only; 00619 } while (column_sets->empty() && !good_only); 00620 if (column_sets->empty()) 00621 column_sets->move(&temp_cols); 00622 else 00623 temp_cols.delete_data_pointers(); 00624 } 00625 00626 // Prints debug information on the column candidates. 00627 void ColumnFinder::PrintColumnCandidates(const char* title) { 00628 int set_size = column_sets_.size(); 00629 tprintf("Found %d %s:\n", set_size, title); 00630 if (textord_debug_tabfind >= 3) { 00631 for (int i = 0; i < set_size; ++i) { 00632 ColPartitionSet* column_set = column_sets_.get(i); 00633 column_set->Print(); 00634 } 00635 } 00636 } 00637 00638 // Finds the optimal set of columns that cover the entire image with as 00639 // few changes in column partition as possible. 00640 // NOTE: this could be thought of as an optimization problem, but a simple 00641 // greedy algorithm is used instead. The algorithm repeatedly finds the modal 00642 // compatible column in an unassigned region and uses that with the extra 00643 // tweak of extending the modal region over small breaks in compatibility. 00644 // Where modal regions overlap, the boundary is chosen so as to minimize 00645 // the cost in terms of ColPartitions not fitting an approved column. 00646 void ColumnFinder::AssignColumns(const PartSetVector& part_sets) { 00647 int set_count = part_sets.size(); 00648 ASSERT_HOST(set_count == gridheight()); 00649 // Allocate and init the best_columns_. 00650 best_columns_ = new ColPartitionSet*[set_count]; 00651 for (int y = 0; y < set_count; ++y) 00652 best_columns_[y] = NULL; 00653 int column_count = column_sets_.size(); 00654 // column_set_costs[part_sets_ index][column_sets_ index] is 00655 // < MAX_INT32 if the partition set is compatible with the column set, 00656 // in which case its value is the cost for that set used in deciding 00657 // which competing set to assign. 00658 // any_columns_possible[part_sets_ index] is true if any of 00659 // possible_column_sets[part_sets_ index][*] is < MAX_INT32. 00660 // assigned_costs[part_sets_ index] is set to the column_set_costs 00661 // of the assigned column_sets_ index or MAX_INT32 if none is set. 00662 // On return the best_columns_ member is set. 00663 bool* any_columns_possible = new bool[set_count]; 00664 int* assigned_costs = new int[set_count]; 00665 int** column_set_costs = new int*[set_count]; 00666 // Set possible column_sets to indicate whether each set is compatible 00667 // with each column. 00668 for (int part_i = 0; part_i < set_count; ++part_i) { 00669 ColPartitionSet* line_set = part_sets.get(part_i); 00670 bool debug = line_set != NULL && 00671 WithinTestRegion(2, line_set->bounding_box().left(), 00672 line_set->bounding_box().bottom()); 00673 column_set_costs[part_i] = new int[column_count]; 00674 any_columns_possible[part_i] = false; 00675 assigned_costs[part_i] = MAX_INT32; 00676 for (int col_i = 0; col_i < column_count; ++col_i) { 00677 if (line_set != NULL && 00678 column_sets_.get(col_i)->CompatibleColumns(debug, line_set, 00679 WidthCB())) { 00680 column_set_costs[part_i][col_i] = 00681 column_sets_.get(col_i)->UnmatchedWidth(line_set); 00682 any_columns_possible[part_i] = true; 00683 } else { 00684 column_set_costs[part_i][col_i] = MAX_INT32; 00685 if (debug) 00686 tprintf("Set id %d did not match at y=%d, lineset =%p\n", 00687 col_i, part_i, line_set); 00688 } 00689 } 00690 } 00691 // Assign a column set to each vertical grid position. 00692 // While there is an unassigned range, find its mode. 00693 int start, end; 00694 while (BiggestUnassignedRange(set_count, any_columns_possible, 00695 &start, &end)) { 00696 if (textord_debug_tabfind >= 2) 00697 tprintf("Biggest unassigned range = %d- %d\n", start, end); 00698 // Find the modal column_set_id in the range. 00699 int column_set_id = RangeModalColumnSet(column_set_costs, 00700 assigned_costs, start, end); 00701 if (textord_debug_tabfind >= 2) { 00702 tprintf("Range modal column id = %d\n", column_set_id); 00703 column_sets_.get(column_set_id)->Print(); 00704 } 00705 // Now find the longest run of the column_set_id in the range. 00706 ShrinkRangeToLongestRun(column_set_costs, assigned_costs, 00707 any_columns_possible, 00708 column_set_id, &start, &end); 00709 if (textord_debug_tabfind >= 2) 00710 tprintf("Shrunk range = %d- %d\n", start, end); 00711 // Extend the start and end past the longest run, while there are 00712 // only small gaps in compatibility that can be overcome by larger 00713 // regions of compatibility beyond. 00714 ExtendRangePastSmallGaps(column_set_costs, assigned_costs, 00715 any_columns_possible, 00716 column_set_id, -1, -1, &start); 00717 --end; 00718 ExtendRangePastSmallGaps(column_set_costs, assigned_costs, 00719 any_columns_possible, 00720 column_set_id, 1, set_count, &end); 00721 ++end; 00722 if (textord_debug_tabfind) 00723 tprintf("Column id %d applies to range = %d - %d\n", 00724 column_set_id, start, end); 00725 // Assign the column to the range, which now may overlap with other ranges. 00726 AssignColumnToRange(column_set_id, start, end, column_set_costs, 00727 assigned_costs); 00728 } 00729 // If anything remains unassigned, the whole lot is unassigned, so 00730 // arbitrarily assign id 0. 00731 if (best_columns_[0] == NULL) { 00732 AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); 00733 } 00734 // Free memory. 00735 for (int i = 0; i < set_count; ++i) { 00736 delete [] column_set_costs[i]; 00737 } 00738 delete [] assigned_costs; 00739 delete [] any_columns_possible; 00740 delete [] column_set_costs; 00741 } 00742 00743 // Finds the biggest range in part_sets_ that has no assigned column, but 00744 // column assignment is possible. 00745 bool ColumnFinder::BiggestUnassignedRange(int set_count, 00746 const bool* any_columns_possible, 00747 int* best_start, int* best_end) { 00748 int best_range_size = 0; 00749 *best_start = set_count; 00750 *best_end = set_count; 00751 int end = set_count; 00752 for (int start = 0; start < gridheight_; start = end) { 00753 // Find the first unassigned index in start. 00754 while (start < set_count) { 00755 if (best_columns_[start] == NULL && any_columns_possible[start]) 00756 break; 00757 ++start; 00758 } 00759 // Find the first past the end and count the good ones in between. 00760 int range_size = 1; // Number of non-null, but unassigned line sets. 00761 end = start + 1; 00762 while (end < set_count) { 00763 if (best_columns_[end] != NULL) 00764 break; 00765 if (any_columns_possible[end]) 00766 ++range_size; 00767 ++end; 00768 } 00769 if (start < set_count && range_size > best_range_size) { 00770 best_range_size = range_size; 00771 *best_start = start; 00772 *best_end = end; 00773 } 00774 } 00775 return *best_start < *best_end; 00776 } 00777 00778 // Finds the modal compatible column_set_ index within the given range. 00779 int ColumnFinder::RangeModalColumnSet(int** column_set_costs, 00780 const int* assigned_costs, 00781 int start, int end) { 00782 int column_count = column_sets_.size(); 00783 STATS column_stats(0, column_count); 00784 for (int part_i = start; part_i < end; ++part_i) { 00785 for (int col_j = 0; col_j < column_count; ++col_j) { 00786 if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) 00787 column_stats.add(col_j, 1); 00788 } 00789 } 00790 ASSERT_HOST(column_stats.get_total() > 0); 00791 return column_stats.mode(); 00792 } 00793 00794 // Given that there are many column_set_id compatible columns in the range, 00795 // shrinks the range to the longest contiguous run of compatibility, allowing 00796 // gaps where no columns are possible, but not where competing columns are 00797 // possible. 00798 void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, 00799 const int* assigned_costs, 00800 const bool* any_columns_possible, 00801 int column_set_id, 00802 int* best_start, int* best_end) { 00803 // orig_start and orig_end are the maximum range we will look at. 00804 int orig_start = *best_start; 00805 int orig_end = *best_end; 00806 int best_range_size = 0; 00807 *best_start = orig_end; 00808 *best_end = orig_end; 00809 int end = orig_end; 00810 for (int start = orig_start; start < orig_end; start = end) { 00811 // Find the first possible 00812 while (start < orig_end) { 00813 if (column_set_costs[start][column_set_id] < assigned_costs[start] || 00814 !any_columns_possible[start]) 00815 break; 00816 ++start; 00817 } 00818 // Find the first past the end. 00819 end = start + 1; 00820 while (end < orig_end) { 00821 if (column_set_costs[end][column_set_id] >= assigned_costs[start] && 00822 any_columns_possible[end]) 00823 break; 00824 ++end; 00825 } 00826 if (start < orig_end && end - start > best_range_size) { 00827 best_range_size = end - start; 00828 *best_start = start; 00829 *best_end = end; 00830 } 00831 } 00832 } 00833 00834 // Moves start in the direction of step, upto, but not including end while 00835 // the only incompatible regions are no more than kMaxIncompatibleColumnCount 00836 // in size, and the compatible regions beyond are bigger. 00837 void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, 00838 const int* assigned_costs, 00839 const bool* any_columns_possible, 00840 int column_set_id, 00841 int step, int end, int* start) { 00842 if (textord_debug_tabfind > 2) 00843 tprintf("Starting expansion at %d, step=%d, limit=%d\n", 00844 *start, step, end); 00845 if (*start == end) 00846 return; // Cannot be expanded. 00847 00848 int barrier_size = 0; 00849 int good_size = 0; 00850 do { 00851 // Find the size of the incompatible barrier. 00852 barrier_size = 0; 00853 int i; 00854 for (i = *start + step; i != end; i += step) { 00855 if (column_set_costs[i][column_set_id] < assigned_costs[i]) 00856 break; // We are back on. 00857 // Locations where none are possible don't count. 00858 if (any_columns_possible[i]) 00859 ++barrier_size; 00860 } 00861 if (textord_debug_tabfind > 2) 00862 tprintf("At %d, Barrier size=%d\n", i, barrier_size); 00863 if (barrier_size > kMaxIncompatibleColumnCount) 00864 return; // Barrier too big. 00865 if (i == end) { 00866 // We can't go any further, but the barrier was small, so go to the end. 00867 *start = i - step; 00868 return; 00869 } 00870 // Now find the size of the good region on the other side. 00871 good_size = 1; 00872 for (i += step; i != end; i += step) { 00873 if (column_set_costs[i][column_set_id] < assigned_costs[i]) 00874 ++good_size; 00875 else if (any_columns_possible[i]) 00876 break; 00877 } 00878 if (textord_debug_tabfind > 2) 00879 tprintf("At %d, good size = %d\n", i, good_size); 00880 // If we had enough good ones we can extend the start and keep looking. 00881 if (good_size >= barrier_size) 00882 *start = i - step; 00883 } while (good_size >= barrier_size); 00884 } 00885 00886 // Assigns the given column_set_id to the given range. 00887 void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, 00888 int** column_set_costs, 00889 int* assigned_costs) { 00890 ColPartitionSet* column_set = column_sets_.get(column_set_id); 00891 for (int i = start; i < end; ++i) { 00892 assigned_costs[i] = column_set_costs[i][column_set_id]; 00893 best_columns_[i] = column_set; 00894 } 00895 } 00896 00897 // Computes the mean_column_gap_. 00898 void ColumnFinder::ComputeMeanColumnGap() { 00899 int total_gap = 0; 00900 int total_width = 0; 00901 int gap_samples = 0; 00902 int width_samples = 0; 00903 for (int i = 0; i < gridheight_; ++i) { 00904 ASSERT_HOST(best_columns_[i] != NULL); 00905 best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, 00906 &width_samples, 00907 &total_gap, 00908 &gap_samples); 00909 } 00910 mean_column_gap_ = gap_samples > 0 ? total_gap / gap_samples 00911 : total_width / width_samples; 00912 } 00913 00916 00917 // Helper to delete all the deletable blobs on the list. Owned blobs are 00918 // extracted from the list, but not deleted, leaving them owned by the owner(). 00919 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) { 00920 for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) { 00921 BLOBNBOX* blob = blob_it.extract(); 00922 if (blob->owner() == NULL) { 00923 delete blob->cblob(); 00924 delete blob; 00925 } 00926 } 00927 } 00928 00929 // Hoovers up all un-owned blobs and deletes them. 00930 // The rest get released from the block so the ColPartitions can pass 00931 // ownership to the output blocks. 00932 void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) { 00933 ReleaseAllBlobsAndDeleteUnused(&block->blobs); 00934 ReleaseAllBlobsAndDeleteUnused(&block->small_blobs); 00935 ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs); 00936 ReleaseAllBlobsAndDeleteUnused(&block->large_blobs); 00937 ReleaseAllBlobsAndDeleteUnused(&image_bblobs_); 00938 } 00939 00940 // Splits partitions that cross columns where they have nothing in the gap. 00941 void ColumnFinder::GridSplitPartitions() { 00942 // Iterate the ColPartitions in the grid. 00943 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 00944 gsearch(&part_grid_); 00945 gsearch.StartFullSearch(); 00946 ColPartition* dont_repeat = NULL; 00947 ColPartition* part; 00948 while ((part = gsearch.NextFullSearch()) != NULL) { 00949 if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat) 00950 continue; // Only applies to text partitions. 00951 ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; 00952 int first_col = -1; 00953 int last_col = -1; 00954 // Find which columns the partition spans. 00955 part->ColumnRange(resolution_, column_set, &first_col, &last_col); 00956 if (first_col > 0) 00957 --first_col; 00958 // Convert output column indices to physical column indices. 00959 first_col /= 2; 00960 last_col /= 2; 00961 // We will only consider cases where a partition spans two columns, 00962 // since a heading that spans more columns than that is most likely 00963 // genuine. 00964 if (last_col != first_col + 1) 00965 continue; 00966 // Set up a rectangle search x-bounded by the column gap and y by the part. 00967 int y = part->MidY(); 00968 TBOX margin_box = part->bounding_box(); 00969 bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(), 00970 margin_box.bottom()); 00971 if (debug) { 00972 tprintf("Considering partition for GridSplit:"); 00973 part->Print(); 00974 } 00975 ColPartition* column = column_set->GetColumnByIndex(first_col); 00976 if (column == NULL) 00977 continue; 00978 margin_box.set_left(column->RightAtY(y) + 2); 00979 column = column_set->GetColumnByIndex(last_col); 00980 if (column == NULL) 00981 continue; 00982 margin_box.set_right(column->LeftAtY(y) - 2); 00983 // TODO(rays) Decide whether to keep rectangular filling or not in the 00984 // main grid and therefore whether we need a fancier search here. 00985 // Now run the rect search on the main blob grid. 00986 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this); 00987 if (debug) { 00988 tprintf("Searching box (%d,%d)->(%d,%d)\n", 00989 margin_box.left(), margin_box.bottom(), 00990 margin_box.right(), margin_box.top()); 00991 part->Print(); 00992 } 00993 rectsearch.StartRectSearch(margin_box); 00994 BLOBNBOX* bbox; 00995 while ((bbox = rectsearch.NextRectSearch()) != NULL) { 00996 if (bbox->bounding_box().overlap(margin_box)) 00997 break; 00998 } 00999 if (bbox == NULL) { 01000 // There seems to be nothing in the hole, so split the partition. 01001 gsearch.RemoveBBox(); 01002 int x_middle = (margin_box.left() + margin_box.right()) / 2; 01003 if (debug) { 01004 tprintf("Splitting part at %d:", x_middle); 01005 part->Print(); 01006 } 01007 ColPartition* split_part = part->SplitAt(x_middle); 01008 if (split_part != NULL) { 01009 if (debug) { 01010 tprintf("Split result:"); 01011 part->Print(); 01012 split_part->Print(); 01013 } 01014 part_grid_.InsertBBox(true, true, split_part); 01015 } else { 01016 // Split had no effect 01017 if (debug) 01018 tprintf("Split had no effect\n"); 01019 dont_repeat = part; 01020 } 01021 part_grid_.InsertBBox(true, true, part); 01022 gsearch.RepositionIterator(); 01023 } else if (debug) { 01024 tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n", 01025 bbox->bounding_box().left(), bbox->bounding_box().bottom(), 01026 bbox->bounding_box().right(), bbox->bounding_box().top()); 01027 } 01028 } 01029 } 01030 01031 // Merges partitions where there is vertical overlap, within a single column, 01032 // and the horizontal gap is small enough. 01033 void ColumnFinder::GridMergePartitions() { 01034 // Iterate the ColPartitions in the grid. 01035 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01036 gsearch(&part_grid_); 01037 gsearch.StartFullSearch(); 01038 ColPartition* part; 01039 while ((part = gsearch.NextFullSearch()) != NULL) { 01040 if (part->IsUnMergeableType()) 01041 continue; 01042 // Set up a rectangle search x-bounded by the column and y by the part. 01043 ColPartitionSet* columns = best_columns_[gsearch.GridY()]; 01044 TBOX box = part->bounding_box(); 01045 bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom()); 01046 if (debug) { 01047 tprintf("Considering part for merge at:"); 01048 part->Print(); 01049 } 01050 int y = part->MidY(); 01051 ColPartition* left_column = columns->ColumnContaining(box.left(), y); 01052 ColPartition* right_column = columns->ColumnContaining(box.right(), y); 01053 if (left_column == NULL || right_column != left_column) { 01054 if (debug) 01055 tprintf("In different columns\n"); 01056 continue; 01057 } 01058 box.set_left(left_column->LeftAtY(y)); 01059 box.set_right(right_column->RightAtY(y)); 01060 // Now run the rect search. 01061 bool modified_box = false; 01062 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01063 rsearch(&part_grid_); 01064 rsearch.SetUniqueMode(true); 01065 rsearch.StartRectSearch(box); 01066 ColPartition* neighbour; 01067 01068 while ((neighbour = rsearch.NextRectSearch()) != NULL) { 01069 if (neighbour == part || neighbour->IsUnMergeableType()) 01070 continue; 01071 const TBOX& neighbour_box = neighbour->bounding_box(); 01072 if (debug) { 01073 tprintf("Considering merge with neighbour at:"); 01074 neighbour->Print(); 01075 } 01076 if (neighbour_box.right() < box.left() || 01077 neighbour_box.left() > box.right()) 01078 continue; // Not within the same column. 01079 if (part->VSignificantCoreOverlap(*neighbour) && 01080 part->TypesMatch(*neighbour)) { 01081 // There is vertical overlap and the gross types match, but only 01082 // merge if the horizontal gap is small enough, as one of the 01083 // partitions may be a figure caption within a column. 01084 // If there is only one column, then the mean_column_gap_ is large 01085 // enough to allow almost any merge, by being the mean column width. 01086 const TBOX& part_box = part->bounding_box(); 01087 // Don't merge if there is something else in the way. Use the margin 01088 // to decide, and check both to allow a bit of overlap. 01089 if (neighbour_box.left() > part->right_margin() && 01090 part_box.right() < neighbour->left_margin()) 01091 continue; // Neighbour is too far to the right. 01092 if (neighbour_box.right() < part->left_margin() && 01093 part_box.left() > neighbour->right_margin()) 01094 continue; // Neighbour is too far to the left. 01095 int h_gap = MAX(part_box.left(), neighbour_box.left()) - 01096 MIN(part_box.right(), neighbour_box.right()); 01097 if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || 01098 part_box.width() < mean_column_gap_ || 01099 neighbour_box.width() < mean_column_gap_) { 01100 if (debug) { 01101 tprintf("Running grid-based merge between:\n"); 01102 part->Print(); 01103 neighbour->Print(); 01104 } 01105 rsearch.RemoveBBox(); 01106 gsearch.RepositionIterator(); 01107 part->Absorb(neighbour, WidthCB()); 01108 modified_box = true; 01109 } else if (debug) { 01110 tprintf("Neighbour failed hgap test\n"); 01111 } 01112 } else if (debug) { 01113 tprintf("Neighbour failed overlap or typesmatch test\n"); 01114 } 01115 } 01116 if (modified_box) { 01117 // We modified the box of part, so re-insert it into the grid. 01118 // This does no harm in the current cell, as it already exists there, 01119 // but it needs to exist in all the cells covered by its bounding box, 01120 // or it will never be found by a full search. 01121 // Because the box has changed, it has to be removed first, otherwise 01122 // add_sorted may fail to keep a single copy of the pointer. 01123 gsearch.RemoveBBox(); 01124 part_grid_.InsertBBox(true, true, part); 01125 gsearch.RepositionIterator(); 01126 } 01127 } 01128 } 01129 01130 // Inserts remaining noise blobs into the most applicable partition if any. 01131 // If there is no applicable partition, then the blobs are deleted. 01132 void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { 01133 BLOBNBOX_IT blob_it(&block->noise_blobs); 01134 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 01135 BLOBNBOX* blob = blob_it.data(); 01136 if (blob->owner() != NULL) continue; 01137 TBOX search_box(blob->bounding_box()); 01138 bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom()); 01139 search_box.pad(gridsize(), gridsize()); 01140 // Setup a rectangle search to find the best partition to merge with. 01141 ColPartitionGridSearch rsearch(&part_grid_); 01142 rsearch.SetUniqueMode(true); 01143 rsearch.StartRectSearch(search_box); 01144 ColPartition* part; 01145 ColPartition* best_part = NULL; 01146 int best_distance = 0; 01147 while ((part = rsearch.NextRectSearch()) != NULL) { 01148 if (part->IsUnMergeableType()) 01149 continue; 01150 int distance = projection_.DistanceOfBoxFromPartition( 01151 blob->bounding_box(), *part, denorm_, debug); 01152 if (best_part == NULL || distance < best_distance) { 01153 best_part = part; 01154 best_distance = distance; 01155 } 01156 } 01157 if (best_part != NULL && 01158 best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) { 01159 // Close enough to merge. 01160 if (debug) { 01161 tprintf("Adding noise blob with distance %d, thr=%g:box:", 01162 best_distance, 01163 kMaxDistToPartSizeRatio * best_part->median_size()); 01164 blob->bounding_box().print(); 01165 tprintf("To partition:"); 01166 best_part->Print(); 01167 } 01168 part_grid_.RemoveBBox(best_part); 01169 best_part->AddBox(blob); 01170 part_grid_.InsertBBox(true, true, best_part); 01171 blob->set_owner(best_part); 01172 blob->set_flow(best_part->flow()); 01173 blob->set_region_type(best_part->blob_type()); 01174 } else { 01175 // Mark the blob for deletion. 01176 blob->set_region_type(BRT_NOISE); 01177 } 01178 } 01179 // Delete the marked blobs, clearing neighbour references. 01180 block->DeleteUnownedNoise(); 01181 } 01182 01183 // Helper makes a box from a horizontal line. 01184 static TBOX BoxFromHLine(const TabVector* hline) { 01185 int top = MAX(hline->startpt().y(), hline->endpt().y()); 01186 int bottom = MIN(hline->startpt().y(), hline->endpt().y()); 01187 top += hline->mean_width(); 01188 if (top == bottom) { 01189 if (bottom > 0) 01190 --bottom; 01191 else 01192 ++top; 01193 } 01194 return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top); 01195 } 01196 01197 // Remove partitions that come from horizontal lines that look like 01198 // underlines, but are not part of a table. 01199 void ColumnFinder::GridRemoveUnderlinePartitions() { 01200 TabVector_IT hline_it(&horizontal_lines_); 01201 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { 01202 TabVector* hline = hline_it.data(); 01203 if (hline->intersects_other_lines()) 01204 continue; 01205 TBOX line_box = BoxFromHLine(hline); 01206 TBOX search_box = line_box; 01207 search_box.pad(0, line_box.height()); 01208 ColPartitionGridSearch part_search(&part_grid_); 01209 part_search.SetUniqueMode(true); 01210 part_search.StartRectSearch(search_box); 01211 ColPartition* covered; 01212 bool touched_table = false; 01213 bool touched_text = false; 01214 ColPartition* line_part = NULL; 01215 while ((covered = part_search.NextRectSearch()) != NULL) { 01216 if (covered->type() == PT_TABLE) { 01217 touched_table = true; 01218 break; 01219 } else if (covered->IsTextType()) { 01220 // TODO(rays) Add a list of underline sections to ColPartition. 01221 int text_bottom = covered->median_bottom(); 01222 if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) 01223 touched_text = true; 01224 } else if (covered->blob_type() == BRT_HLINE && 01225 line_box.contains(covered->bounding_box())) { 01226 line_part = covered; 01227 } 01228 } 01229 if (line_part != NULL && !touched_table && touched_text) { 01230 part_grid_.RemoveBBox(line_part); 01231 delete line_part; 01232 } 01233 } 01234 } 01235 01236 // Add horizontal line separators as partitions. 01237 void ColumnFinder::GridInsertHLinePartitions() { 01238 TabVector_IT hline_it(&horizontal_lines_); 01239 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { 01240 TabVector* hline = hline_it.data(); 01241 TBOX line_box = BoxFromHLine(hline); 01242 ColPartition* part = ColPartition::MakeLinePartition( 01243 BRT_HLINE, vertical_skew_, 01244 line_box.left(), line_box.bottom(), line_box.right(), line_box.top()); 01245 part->set_type(PT_HORZ_LINE); 01246 bool any_image = false; 01247 ColPartitionGridSearch part_search(&part_grid_); 01248 part_search.SetUniqueMode(true); 01249 part_search.StartRectSearch(line_box); 01250 ColPartition* covered; 01251 while ((covered = part_search.NextRectSearch()) != NULL) { 01252 if (covered->IsImageType()) { 01253 any_image = true; 01254 break; 01255 } 01256 } 01257 if (!any_image) 01258 part_grid_.InsertBBox(true, true, part); 01259 else 01260 delete part; 01261 } 01262 } 01263 01264 // Add horizontal line separators as partitions. 01265 void ColumnFinder::GridInsertVLinePartitions() { 01266 TabVector_IT vline_it(dead_vectors()); 01267 for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { 01268 TabVector* vline = vline_it.data(); 01269 if (!vline->IsSeparator()) 01270 continue; 01271 int left = MIN(vline->startpt().x(), vline->endpt().x()); 01272 int right = MAX(vline->startpt().x(), vline->endpt().x()); 01273 right += vline->mean_width(); 01274 if (left == right) { 01275 if (left > 0) 01276 --left; 01277 else 01278 ++right; 01279 } 01280 ColPartition* part = ColPartition::MakeLinePartition( 01281 BRT_VLINE, vertical_skew_, 01282 left, vline->startpt().y(), right, vline->endpt().y()); 01283 part->set_type(PT_VERT_LINE); 01284 bool any_image = false; 01285 ColPartitionGridSearch part_search(&part_grid_); 01286 part_search.SetUniqueMode(true); 01287 part_search.StartRectSearch(part->bounding_box()); 01288 ColPartition* covered; 01289 while ((covered = part_search.NextRectSearch()) != NULL) { 01290 if (covered->IsImageType()) { 01291 any_image = true; 01292 break; 01293 } 01294 } 01295 if (!any_image) 01296 part_grid_.InsertBBox(true, true, part); 01297 else 01298 delete part; 01299 } 01300 } 01301 01302 // For every ColPartition in the grid, sets its type based on position 01303 // in the columns. 01304 void ColumnFinder::SetPartitionTypes() { 01305 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01306 gsearch(&part_grid_); 01307 gsearch.StartFullSearch(); 01308 ColPartition* part; 01309 while ((part = gsearch.NextFullSearch()) != NULL) { 01310 part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); 01311 } 01312 } 01313 01314 // Only images remain with multiple types in a run of partners. 01315 // Sets the type of all in the group to the maximum of the group. 01316 void ColumnFinder::SmoothPartnerRuns() { 01317 // Iterate the ColPartitions in the grid. 01318 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01319 gsearch(&part_grid_); 01320 gsearch.StartFullSearch(); 01321 ColPartition* part; 01322 while ((part = gsearch.NextFullSearch()) != NULL) { 01323 ColPartition* partner = part->SingletonPartner(true); 01324 if (partner != NULL) { 01325 if (partner->SingletonPartner(false) != part) { 01326 tprintf("Ooops! Partition:(%d partners)", 01327 part->upper_partners()->length()); 01328 part->Print(); 01329 tprintf("has singleton partner:(%d partners", 01330 partner->lower_partners()->length()); 01331 partner->Print(); 01332 tprintf("but its singleton partner is:"); 01333 if (partner->SingletonPartner(false) == NULL) 01334 tprintf("NULL\n"); 01335 else 01336 partner->SingletonPartner(false)->Print(); 01337 } 01338 ASSERT_HOST(partner->SingletonPartner(false) == part); 01339 } else if (part->SingletonPartner(false) != NULL) { 01340 ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; 01341 int column_count = column_set->ColumnCount(); 01342 part->SmoothPartnerRun(column_count * 2 + 1); 01343 } 01344 } 01345 } 01346 01347 // Helper functions for TransformToBlocks. 01348 // Add the part to the temp list in the correct order. 01349 void ColumnFinder::AddToTempPartList(ColPartition* part, 01350 ColPartition_CLIST* temp_list) { 01351 int mid_y = part->MidY(); 01352 ColPartition_C_IT it(temp_list); 01353 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01354 ColPartition* test_part = it.data(); 01355 if (part->type() == PT_NOISE || test_part->type() == PT_NOISE) 01356 continue; // Noise stays in sequence. 01357 if (test_part == part->SingletonPartner(false)) 01358 break; // Insert before its lower partner. 01359 int neighbour_bottom = test_part->median_bottom(); 01360 int neighbour_top = test_part->median_top(); 01361 int neighbour_y = (neighbour_bottom + neighbour_top) / 2; 01362 if (neighbour_y < mid_y) 01363 break; // part is above test_part so insert it. 01364 if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) 01365 continue; // Incompatibles stay in order 01366 } 01367 if (it.cycled_list()) { 01368 it.add_to_end(part); 01369 } else { 01370 it.add_before_stay_put(part); 01371 } 01372 } 01373 01374 // Add everything from the temp list to the work_set assuming correct order. 01375 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list, 01376 WorkingPartSet_LIST* work_set) { 01377 ColPartition_C_IT it(temp_list); 01378 while (!it.empty()) { 01379 it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, 01380 &good_parts_, work_set); 01381 it.forward(); 01382 } 01383 } 01384 01385 // Transform the grid of partitions to the output blocks. 01386 void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, 01387 TO_BLOCK_LIST* to_blocks) { 01388 WorkingPartSet_LIST work_set; 01389 ColPartitionSet* column_set = NULL; 01390 ColPartition_IT noise_it(&noise_parts_); 01391 // The temp_part_list holds a list of parts at the same grid y coord 01392 // so they can be added in the correct order. This prevents thin objects 01393 // like horizontal lines going before the text lines above them. 01394 ColPartition_CLIST temp_part_list; 01395 // Iterate the ColPartitions in the grid. It starts at the top 01396 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01397 gsearch(&part_grid_); 01398 gsearch.StartFullSearch(); 01399 int prev_grid_y = -1; 01400 ColPartition* part; 01401 while ((part = gsearch.NextFullSearch()) != NULL) { 01402 int grid_y = gsearch.GridY(); 01403 if (grid_y != prev_grid_y) { 01404 EmptyTempPartList(&temp_part_list, &work_set); 01405 prev_grid_y = grid_y; 01406 } 01407 if (best_columns_[grid_y] != column_set) { 01408 column_set = best_columns_[grid_y]; 01409 // Every line should have a non-null best column. 01410 ASSERT_HOST(column_set != NULL); 01411 column_set->ChangeWorkColumns(bleft_, tright_, resolution_, 01412 &good_parts_, &work_set); 01413 if (textord_debug_tabfind) 01414 tprintf("Changed column groups at grid index %d, y=%d\n", 01415 gsearch.GridY(), gsearch.GridY() * gridsize()); 01416 } 01417 if (part->type() == PT_NOISE) { 01418 noise_it.add_to_end(part); 01419 } else { 01420 AddToTempPartList(part, &temp_part_list); 01421 } 01422 } 01423 EmptyTempPartList(&temp_part_list, &work_set); 01424 // Now finish all working sets and transfer ColPartitionSets to block_sets. 01425 WorkingPartSet_IT work_it(&work_set); 01426 while (!work_it.empty()) { 01427 WorkingPartSet* working_set = work_it.extract(); 01428 working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_, 01429 &good_parts_, blocks, to_blocks); 01430 delete working_set; 01431 work_it.forward(); 01432 } 01433 } 01434 01435 // Helper reflects a list of blobs in the y-axis. 01436 // Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below. 01437 static void ReflectBlobList(BLOBNBOX_LIST* bblobs) { 01438 BLOBNBOX_IT it(bblobs); 01439 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01440 it.data()->reflect_box_in_y_axis(); 01441 } 01442 } 01443 01444 // Reflect the blob boxes (but not the outlines) in the y-axis so that 01445 // the blocks get created in the correct RTL order. Reflects the blobs 01446 // in the input_block and the bblobs list. 01447 // The reflection is undone in RotateAndReskewBlocks by 01448 // reflecting the blocks themselves, and then recomputing the blob bounding 01449 // boxes. 01450 void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) { 01451 ReflectBlobList(bblobs); 01452 ReflectBlobList(&input_block->blobs); 01453 ReflectBlobList(&input_block->small_blobs); 01454 ReflectBlobList(&input_block->noise_blobs); 01455 ReflectBlobList(&input_block->large_blobs); 01456 // Update the denorm with the reflection. 01457 DENORM* new_denorm = new DENORM; 01458 new_denorm->SetupNormalization(NULL, NULL, NULL, denorm_, NULL, 0, 01459 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); 01460 denorm_ = new_denorm; 01461 } 01462 01463 // Undo the deskew that was done in FindTabVectors, as recognition is done 01464 // without correcting blobs or blob outlines for skew. 01465 // Reskew the completed blocks to put them back to the original rotated coords 01466 // that were created by CorrectOrientation. 01467 // If the input_is_rtl, then reflect the blocks in the y-axis to undo the 01468 // reflection that was done before FindTabVectors. 01469 // Blocks that were identified as vertical text (relative to the rotated 01470 // coordinates) are further rotated so the text lines are horizontal. 01471 // blob polygonal outlines are rotated to match the position of the blocks 01472 // that they are in, and their bounding boxes are recalculated to be accurate. 01473 // Record appropriate inverse transformations and required 01474 // classifier transformation in the blocks. 01475 void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, 01476 TO_BLOCK_LIST* blocks) { 01477 if (input_is_rtl) { 01478 // The skew is backwards because of the reflection. 01479 FCOORD tmp = deskew_; 01480 deskew_ = reskew_; 01481 reskew_ = tmp; 01482 } 01483 TO_BLOCK_IT it(blocks); 01484 int block_index = 1; 01485 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01486 TO_BLOCK* to_block = it.data(); 01487 BLOCK* block = to_block->block; 01488 // Blocks are created on the deskewed blob outlines in TransformToBlocks() 01489 // so we need to reskew them back to page coordinates. 01490 if (input_is_rtl) { 01491 block->reflect_polygon_in_y_axis(); 01492 } 01493 block->rotate(reskew_); 01494 // Copy the right_to_left flag to the created block. 01495 block->set_right_to_left(input_is_rtl); 01496 // Save the skew angle in the block for baseline computations. 01497 block->set_skew(reskew_); 01498 block->set_index(block_index++); 01499 FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); 01500 // Rotate all the blobs if needed and recompute the bounding boxes. 01501 // Compute the block median blob width and height as we go. 01502 STATS widths(0, block->bounding_box().width()); 01503 STATS heights(0, block->bounding_box().height()); 01504 BLOBNBOX_IT blob_it(&to_block->blobs); 01505 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 01506 BLOBNBOX* blob = blob_it.data(); 01507 if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { 01508 blob->cblob()->rotate(blob_rotation); 01509 } 01510 blob->compute_bounding_box(); 01511 widths.add(blob->bounding_box().width(), 1); 01512 heights.add(blob->bounding_box().height(), 1); 01513 } 01514 block->set_median_size(static_cast<int>(widths.median() + 0.5), 01515 static_cast<int>(heights.median() + 0.5)); 01516 if (textord_debug_tabfind >= 2) 01517 tprintf("Block median size = (%d, %d)\n", 01518 block->median_size().x(), block->median_size().y()); 01519 } 01520 } 01521 01522 // Computes the rotations for the block (to make textlines horizontal) and 01523 // for the blobs (for classification) and sets the appropriate members 01524 // of the given block. 01525 // Returns the rotation that needs to be applied to the blobs to make 01526 // them sit in the rotated block. 01527 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) { 01528 // The text_rotation_ tells us the gross page text rotation that needs 01529 // to be applied for classification 01530 // TODO(rays) find block-level classify rotation by orientation detection. 01531 // In the mean time, assume that "up" for text printed in the minority 01532 // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. 01533 // Accomplish this by zero-ing out the text rotation. This covers the 01534 // common cases of image credits in documents written in Latin scripts 01535 // and page headings for predominantly vertically written CJK books. 01536 FCOORD classify_rotation(text_rotation_); 01537 FCOORD block_rotation(1.0f, 0.0f); 01538 if (block->poly_block()->isA() == PT_VERTICAL_TEXT) { 01539 // Vertical text needs to be 90 degrees rotated relative to the rest. 01540 // If the rest has a 90 degree rotation already, use the inverse, making 01541 // the vertical text the original way up. Otherwise use 90 degrees 01542 // clockwise. 01543 if (rerotate_.x() == 0.0f) 01544 block_rotation = rerotate_; 01545 else 01546 block_rotation = FCOORD(0.0f, -1.0f); 01547 block->rotate(block_rotation); 01548 classify_rotation = FCOORD(1.0f, 0.0f); 01549 } 01550 block_rotation.rotate(rotation_); 01551 // block_rotation is now what we have done to the blocks. Now do the same 01552 // thing to the blobs, but save the inverse rotation in the block, as that 01553 // is what we need to DENORM back to the image coordinates. 01554 FCOORD blob_rotation(block_rotation); 01555 block_rotation.set_y(-block_rotation.y()); 01556 block->set_re_rotation(block_rotation); 01557 block->set_classify_rotation(classify_rotation); 01558 if (textord_debug_tabfind) { 01559 tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", 01560 block->index(), block->poly_block()->isA(), 01561 block->re_rotation().x(), block->re_rotation().y(), 01562 classify_rotation.x(), classify_rotation.y()); 01563 } 01564 return blob_rotation; 01565 } 01566 01567 } // namespace tesseract.