Tesseract  3.02
tesseract-ocr/textord/colfind.cpp
Go to the documentation of this file.
00001 
00002 // File:        colfind.cpp
00003 // Description: Class to hold BLOBNBOXs in a grid for fast access
00004 //              to neighbours.
00005 // Author:      Ray Smith
00006 // Created:     Wed Jun 06 17:22:01 PDT 2007
00007 //
00008 // (C) Copyright 2007, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifdef _MSC_VER
00022 #pragma warning(disable:4244)  // Conversion warnings
00023 #endif
00024 
00025 #include "colfind.h"
00026 
00027 #include "ccnontextdetect.h"
00028 #include "colpartition.h"
00029 #include "colpartitionset.h"
00030 #include "equationdetectbase.h"
00031 #include "linefind.h"
00032 #include "normalis.h"
00033 #include "strokewidth.h"
00034 #include "blobbox.h"
00035 #include "scrollview.h"
00036 #include "tablefind.h"
00037 #include "params.h"
00038 #include "workingpartset.h"
00039 
00040 // Include automatically generated configuration file if running autoconf.
00041 #ifdef HAVE_CONFIG_H
00042 #include "config_auto.h"
00043 #endif
00044 
00045 namespace tesseract {
00046 
00047 // Minimum width (in pixels) to be considered when making columns.
00048 // TODO(rays) convert to inches, dependent on resolution.
00049 const int kMinColumnWidth = 100;
00050 // When assigning columns, the max number of misfit grid rows/ColPartitionSets
00051 // that can be ignored.
00052 const int kMaxIncompatibleColumnCount = 2;
00053 // Min fraction of ColPartition height to be overlapping for margin purposes.
00054 const double kMarginOverlapFraction = 0.25;
00055 // Max fraction of mean_column_gap_ for the gap between two partitions within a
00056 // column to allow them to merge.
00057 const double kHorizontalGapMergeFraction = 0.5;
00058 // Min fraction of grid size to not be considered likely noise.
00059 const double kMinNonNoiseFraction = 0.5;
00060 // Minimum gutter width as a fraction of gridsize
00061 const double kMinGutterWidthGrid = 0.5;
00062 // Max multiple of a partition's median size as a distance threshold for
00063 // adding noise blobs.
00064 const double kMaxDistToPartSizeRatio = 1.5;
00065 
00066 BOOL_VAR(textord_tabfind_show_initial_partitions,
00067          false, "Show partition bounds");
00068 BOOL_VAR(textord_tabfind_show_reject_blobs,
00069          false, "Show blobs rejected as noise");
00070 INT_VAR(textord_tabfind_show_partitions, 0,
00071         "Show partition bounds, waiting if >1");
00072 BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds");
00073 BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds");
00074 BOOL_VAR(textord_tabfind_find_tables, true, "run table detection");
00075 
00076 ScrollView* ColumnFinder::blocks_win_ = NULL;
00077 
00078 // Gridsize is an estimate of the text size in the image. A suitable value
00079 // is in TO_BLOCK::line_size after find_components has been used to make
00080 // the blobs.
00081 // bleft and tright are the bounds of the image (or rectangle) being processed.
00082 // vlines is a (possibly empty) list of TabVector and vertical_x and y are
00083 // the sum logical vertical vector produced by LineFinder::FindVerticalLines.
00084 ColumnFinder::ColumnFinder(int gridsize,
00085                            const ICOORD& bleft, const ICOORD& tright,
00086                            int resolution,
00087                            TabVector_LIST* vlines, TabVector_LIST* hlines,
00088                            int vertical_x, int vertical_y)
00089   : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
00090             resolution),
00091     min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
00092     mean_column_gap_(tright.x() - bleft.x()),
00093     reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
00094     best_columns_(NULL), stroke_width_(NULL),
00095     part_grid_(gridsize, bleft, tright), nontext_map_(NULL),
00096     projection_(resolution),
00097     denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) {
00098   TabVector_IT h_it(&horizontal_lines_);
00099   h_it.add_list_after(hlines);
00100 }
00101 
00102 ColumnFinder::~ColumnFinder() {
00103   column_sets_.delete_data_pointers();
00104   if (best_columns_ != NULL) {
00105     delete [] best_columns_;
00106   }
00107   if (stroke_width_ != NULL)
00108     delete stroke_width_;
00109   delete input_blobs_win_;
00110   pixDestroy(&nontext_map_);
00111   while (denorm_ != NULL) {
00112     DENORM* dead_denorm = denorm_;
00113     denorm_ = const_cast<DENORM*>(denorm_->predecessor());
00114     delete dead_denorm;
00115   }
00116 
00117   // The ColPartitions are destroyed automatically, but any boxes in
00118   // the noise_parts_ list are owned and need to be deleted explicitly.
00119   ColPartition_IT part_it(&noise_parts_);
00120   for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
00121     ColPartition* part = part_it.data();
00122     part->DeleteBoxes();
00123   }
00124   // Likewise any boxes in the good_parts_ list need to be deleted.
00125   // These are just the image parts. Text parts have already given their
00126   // boxes on to the TO_BLOCK, and have empty lists.
00127   part_it.set_to_list(&good_parts_);
00128   for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
00129     ColPartition* part = part_it.data();
00130     part->DeleteBoxes();
00131   }
00132   // Also, any blobs on the image_bblobs_ list need to have their cblobs
00133   // deleted. This only happens if there has been an early return from
00134   // FindColumns, as in a normal return, the blobs go into the grid and
00135   // end up in noise_parts_, good_parts_ or the output blocks.
00136   BLOBNBOX_IT bb_it(&image_bblobs_);
00137   for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
00138     BLOBNBOX* bblob = bb_it.data();
00139     delete bblob->cblob();
00140   }
00141 }
00142 
00143 // Performs initial processing on the blobs in the input_block:
00144 // Setup the part_grid, stroke_width_, nontext_map.
00145 // Obvious noise blobs are filtered out and used to mark the nontext_map_.
00146 // Initial stroke-width analysis is used to get local text alignment
00147 // direction, so the textline projection_ map can be setup.
00148 // On return, IsVerticallyAlignedText may be called (now optionally) to
00149 // determine the gross textline alignment of the page.
00150 void ColumnFinder::SetupAndFilterNoise(Pix* photo_mask_pix,
00151                                        TO_BLOCK* input_block) {
00152   part_grid_.Init(gridsize(), bleft(), tright());
00153   if (stroke_width_ != NULL)
00154     delete stroke_width_;
00155   stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright());
00156   min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize());
00157   input_block->ReSetAndReFilterBlobs();
00158   #ifndef GRAPHICS_DISABLED
00159   if (textord_tabfind_show_blocks) {
00160     input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
00161     input_block->plot_graded_blobs(input_blobs_win_);
00162   }
00163   #endif  // GRAPHICS_DISABLED
00164   SetBlockRuleEdges(input_block);
00165   pixDestroy(&nontext_map_);
00166   // Run a preliminary strokewidth neighbour detection on the medium blobs.
00167   stroke_width_->SetNeighboursOnMediumBlobs(input_block);
00168   CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
00169   // Remove obvious noise and make the initial non-text map.
00170   nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
00171                                                    photo_mask_pix, input_block);
00172   // TODO(rays) experiment with making broken CJK fixing dependent on the
00173   // language, and keeping the merged blobs on output instead of exploding at
00174   // ColPartition::MakeBlock.
00175   stroke_width_->FindTextlineDirectionAndFixBrokenCJK(true, input_block);
00176   // Clear the strokewidth grid ready for rotation or leader finding.
00177   stroke_width_->Clear();
00178 }
00179 
00180 // Tests for vertical alignment of text (returning true if so), and generates
00181 // a list of blobs of moderate aspect ratio, in the most frequent writing
00182 // direction (in osd_blobs) for orientation and script detection to test
00183 // the character orientation.
00184 // block is the single block for the whole page or rectangle to be OCRed.
00185 // Note that the vertical alignment may be due to text whose writing direction
00186 // is vertical, like say Japanese, or due to text whose writing direction is
00187 // horizontal but whose text appears vertically aligned because the image is
00188 // not the right way up.
00189 bool ColumnFinder::IsVerticallyAlignedText(TO_BLOCK* block,
00190                                            BLOBNBOX_CLIST* osd_blobs) {
00191   return stroke_width_->TestVerticalTextDirection(block, osd_blobs);
00192 }
00193 
00194 // Rotates the blobs and the TabVectors so that the gross writing direction
00195 // (text lines) are horizontal and lines are read down the page.
00196 // Applied rotation stored in rotation_.
00197 // A second rotation is calculated for application during recognition to
00198 // make the rotated blobs upright for recognition.
00199 // Subsequent rotation stored in text_rotation_.
00200 //
00201 // Arguments:
00202 //   vertical_text_lines true if the text lines are vertical.
00203 //   recognition_rotation [0..3] is the number of anti-clockwise 90 degree
00204 //   rotations from osd required for the text to be upright and readable.
00205 void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
00206                                       bool vertical_text_lines,
00207                                       int recognition_rotation) {
00208   const FCOORD anticlockwise90(0.0f, 1.0f);
00209   const FCOORD clockwise90(0.0f, -1.0f);
00210   const FCOORD rotation180(-1.0f, 0.0f);
00211   const FCOORD norotation(1.0f, 0.0f);
00212 
00213   text_rotation_ = norotation;
00214   // Rotate the page to make the text upright, as implied by
00215   // recognition_rotation.
00216   rotation_ = norotation;
00217   if (recognition_rotation == 1) {
00218     rotation_ = anticlockwise90;
00219   } else if (recognition_rotation == 2) {
00220     rotation_ = rotation180;
00221   } else if (recognition_rotation == 3) {
00222     rotation_ = clockwise90;
00223   }
00224   // We infer text writing direction to be vertical if there are several
00225   // vertical text lines detected, and horizontal if not. But if the page
00226   // orientation was determined to be 90 or 270 degrees, the true writing
00227   // direction is the opposite of what we inferred.
00228   if (recognition_rotation & 1) {
00229     vertical_text_lines = !vertical_text_lines;
00230   }
00231   // If we still believe the writing direction is vertical, we use the
00232   // convention of rotating the page ccw 90 degrees to make the text lines
00233   // horizontal, and mark the blobs for rotation cw 90 degrees for
00234   // classification so that the text order is correct after recognition.
00235   if (vertical_text_lines) {
00236     rotation_.rotate(anticlockwise90);
00237     text_rotation_.rotate(clockwise90);
00238   }
00239   // Set rerotate_ to the inverse of rotation_.
00240   rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
00241   if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
00242     // Rotate all the blobs and tab vectors.
00243     RotateBlobList(rotation_, &block->large_blobs);
00244     RotateBlobList(rotation_, &block->blobs);
00245     RotateBlobList(rotation_, &block->small_blobs);
00246     RotateBlobList(rotation_, &block->noise_blobs);
00247     TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_,
00248                                   &min_gutter_width_);
00249     part_grid_.Init(gridsize(), bleft(), tright());
00250     // Reset all blobs to initial state and filter by size.
00251     // Since they have rotated, the list they belong on could have changed.
00252     block->ReSetAndReFilterBlobs();
00253     SetBlockRuleEdges(block);
00254     stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
00255   }
00256   if (textord_debug_tabfind) {
00257     tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
00258             vertical_text_lines, recognition_rotation,
00259             rotation_.x(), rotation_.y(),
00260             text_rotation_.x(), text_rotation_.y());
00261   }
00262   // Setup the denormalization.
00263   ASSERT_HOST(denorm_ == NULL);
00264   denorm_ = new DENORM;
00265   denorm_->SetupNormalization(NULL, NULL, &rotation_, NULL, NULL, 0,
00266                               0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
00267 }
00268 
00269 // Finds blocks of text, image, rule line, table etc, returning them in the
00270 // blocks and to_blocks
00271 // (Each TO_BLOCK points to the basic BLOCK and adds more information.)
00272 // Image blocks are generated by a combination of photo_mask_pix (which may
00273 // NOT be NULL) and the rejected text found during preliminary textline
00274 // finding.
00275 // The input_block is the result of a call to find_components, and contains
00276 // the blobs found in the image or rectangle to be OCRed. These blobs will be
00277 // removed and placed in the output blocks, while unused ones will be deleted.
00278 // If single_column is true, the input is treated as single column, but
00279 // it is still divided into blocks of equal line spacing/text size.
00280 // scaled_color is scaled down by scaled_factor from the input color image,
00281 // and may be NULL if the input was not color.
00282 // Returns -1 if the user hits the 'd' key in the blocks window while running
00283 // in debug mode, which requests a retry with more debug info.
00284 int ColumnFinder::FindBlocks(bool single_column,
00285                              Pix* scaled_color, int scaled_factor,
00286                              TO_BLOCK* input_block, Pix* photo_mask_pix,
00287                              BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
00288   pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
00289   stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
00290   stroke_width_->RemoveLineResidue(&big_parts_);
00291   FindInitialTabVectors(NULL, min_gutter_width_, input_block);
00292   SetBlockRuleEdges(input_block);
00293   stroke_width_->GradeBlobsIntoPartitions(rerotate_, input_block, nontext_map_,
00294                                           denorm_, &projection_,
00295                                           &part_grid_, &big_parts_);
00296   ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
00297                                  input_block, this, &part_grid_, &big_parts_);
00298   ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
00299                                            photo_mask_pix);
00300   ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
00301                                  input_block, this, &part_grid_, &big_parts_);
00302   part_grid_.ReTypeBlobs(&image_bblobs_);
00303   TidyBlobs(input_block);
00304   Reset();
00305   // TODO(rays) need to properly handle big_parts_.
00306   ColPartition_IT p_it(&big_parts_);
00307   for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
00308     p_it.data()->DisownBoxes();
00309   big_parts_.clear();
00310   delete stroke_width_;
00311   stroke_width_ = NULL;
00312 
00313   // A note about handling right-to-left scripts (Hebrew/Arabic):
00314   // The columns must be reversed and come out in right-to-left instead of
00315   // the normal left-to-right order. Because the left-to-right ordering
00316   // is implicit in many data structures, it is simpler to fool the algorithms
00317   // into thinking they are dealing with left-to-right text.
00318   // To do this, we reflect the needed data in the y-axis and then reflect
00319   // the blocks back after they have been created. This is a temporary
00320   // arrangment that is confined to this function only, so the reflection
00321   // is completely invisible in the output blocks.
00322   // The only objects reflected are:
00323   // The vertical separator lines that have already been found;
00324   // The bounding boxes of all BLOBNBOXES on all lists on the input_block
00325   // plus the image_bblobs. The outlines are not touched, since they are
00326   // not looked at.
00327   bool input_is_rtl = input_block->block->right_to_left();
00328   if (input_is_rtl) {
00329     // Reflect the vertical separator lines (member of TabFind).
00330     ReflectInYAxis();
00331     // Reflect the blob boxes.
00332     ReflectForRtl(input_block, &image_bblobs_);
00333     part_grid_.ReflectInYAxis();
00334   }
00335 
00336   if (single_column) {
00337     // No tab stops needed. Just the grid that FindTabVectors makes.
00338     DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
00339   } else {
00340     SetBlockRuleEdges(input_block);
00341     // Find the tab stops, estimate skew, and deskew the tabs, blobs and
00342     // part_grid_.
00343     FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block,
00344                    min_gutter_width_, &part_grid_, &deskew_, &reskew_);
00345     // Add the deskew to the denorm_.
00346     DENORM* new_denorm = new DENORM;
00347     new_denorm->SetupNormalization(NULL, NULL, &deskew_, denorm_, NULL, 0,
00348                                    0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
00349     denorm_ = new_denorm;
00350   }
00351   SetBlockRuleEdges(input_block);
00352   part_grid_.SetTabStops(this);
00353 
00354   // Make the column_sets_.
00355   if (!MakeColumns(single_column)) {
00356     tprintf("Empty page!!\n");
00357     return 0;  // This is an empty page.
00358   }
00359 
00360   // Refill the grid using rectangular spreading, and get the benefit
00361   // of the completed tab vectors marking the rule edges of each blob.
00362   Clear();
00363   #ifndef GRAPHICS_DISABLED
00364   if (textord_tabfind_show_reject_blobs) {
00365     ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs");
00366     input_block->plot_graded_blobs(rej_win);
00367   }
00368   #endif  // GRAPHICS_DISABLED
00369   InsertBlobsToGrid(false, false, &image_bblobs_, this);
00370   InsertBlobsToGrid(true, true, &input_block->blobs, this);
00371 
00372   part_grid_.GridFindMargins(best_columns_);
00373   // Split and merge the partitions by looking at local neighbours.
00374   GridSplitPartitions();
00375   // Resolve unknown partitions by adding to an existing partition, fixing
00376   // the type, or declaring them noise.
00377   part_grid_.GridFindMargins(best_columns_);
00378   GridMergePartitions();
00379   // Insert any unused noise blobs that are close enough to an appropriate
00380   // partition.
00381   InsertRemainingNoise(input_block);
00382   // Add horizontal line separators as partitions.
00383   GridInsertHLinePartitions();
00384   GridInsertVLinePartitions();
00385   // Recompute margins based on a local neighbourhood search.
00386   part_grid_.GridFindMargins(best_columns_);
00387   SetPartitionTypes();
00388   if (textord_tabfind_show_initial_partitions) {
00389     ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions");
00390     part_grid_.DisplayBoxes(part_win);
00391     DisplayTabVectors(part_win);
00392   }
00393 
00394   if (equation_detect_) {
00395     equation_detect_->FindEquationParts(&part_grid_, best_columns_);
00396   }
00397   if (textord_tabfind_find_tables) {
00398     TableFinder table_finder;
00399     table_finder.Init(gridsize(), bleft(), tright());
00400     table_finder.set_resolution(resolution_);
00401     table_finder.set_left_to_right_language(
00402         !input_block->block->right_to_left());
00403     // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
00404     // insert dot-like noise into period_grid_
00405     table_finder.InsertCleanPartitions(&part_grid_, input_block);
00406     // Get Table Regions
00407     table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
00408   }
00409   GridRemoveUnderlinePartitions();
00410   part_grid_.DeleteUnknownParts(input_block);
00411 
00412   // Build the partitions into chains that belong in the same block and
00413   // refine into one-to-one links, then smooth the types within each chain.
00414   part_grid_.FindPartitionPartners();
00415   part_grid_.FindFigureCaptions();
00416   part_grid_.RefinePartitionPartners(true);
00417   SmoothPartnerRuns();
00418 
00419   #ifndef GRAPHICS_DISABLED
00420   if (textord_tabfind_show_partitions) {
00421     ScrollView* window = MakeWindow(400, 300, "Partitions");
00422     if (textord_debug_images)
00423       window->Image(AlignedBlob::textord_debug_pix().string(),
00424                     image_origin().x(), image_origin().y());
00425     part_grid_.DisplayBoxes(window);
00426     if (!textord_debug_printable)
00427       DisplayTabVectors(window);
00428     if (window != NULL && textord_tabfind_show_partitions > 1) {
00429       delete window->AwaitEvent(SVET_DESTROY);
00430     }
00431   }
00432   #endif  // GRAPHICS_DISABLED
00433   part_grid_.AssertNoDuplicates();
00434   // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
00435   // and ownership of the BLOBNBOXes moves to the ColPartitions.
00436   // (They were previously owned by the block or the image_bblobs list.)
00437   ReleaseBlobsAndCleanupUnused(input_block);
00438   // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
00439   // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
00440   // from the ColPartitions to the output TO_BLOCK. In non-text, the
00441   // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
00442   TransformToBlocks(blocks, to_blocks);
00443   if (textord_debug_tabfind) {
00444     tprintf("Found %d blocks, %d to_blocks\n",
00445             blocks->length(), to_blocks->length());
00446   }
00447 
00448   DisplayBlocks(blocks);
00449   RotateAndReskewBlocks(input_is_rtl, to_blocks);
00450   int result = 0;
00451   #ifndef GRAPHICS_DISABLED
00452   if (blocks_win_ != NULL) {
00453     bool waiting = false;
00454     do {
00455       waiting = false;
00456       SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY);
00457       if (event->type == SVET_INPUT && event->parameter != NULL) {
00458         if (*event->parameter == 'd')
00459           result = -1;
00460         else
00461           blocks->clear();
00462       } else if (event->type == SVET_DESTROY) {
00463         blocks_win_ = NULL;
00464       } else {
00465         waiting = true;
00466       }
00467       delete event;
00468     } while (waiting);
00469   }
00470   #endif  // GRAPHICS_DISABLED
00471   return result;
00472 }
00473 
00474 // Get the rotation required to deskew, and its inverse rotation.
00475 void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
00476   *reskew = reskew_;
00477   *deskew = reskew_;
00478   deskew->set_y(-deskew->y());
00479 }
00480 
00481 void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) {
00482   equation_detect_ = detect;
00483 }
00484 
00486 
00487 // Displays the blob and block bounding boxes in a window called Blocks.
00488 void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
00489 #ifndef GRAPHICS_DISABLED
00490   if (textord_tabfind_show_blocks) {
00491     if (blocks_win_ == NULL)
00492       blocks_win_ = MakeWindow(700, 300, "Blocks");
00493     else
00494       blocks_win_->Clear();
00495     if (textord_debug_images)
00496       blocks_win_->Image(AlignedBlob::textord_debug_pix().string(),
00497                          image_origin().x(), image_origin().y());
00498     else
00499       DisplayBoxes(blocks_win_);
00500     BLOCK_IT block_it(blocks);
00501     int serial = 1;
00502     for (block_it.mark_cycle_pt(); !block_it.cycled_list();
00503          block_it.forward()) {
00504       BLOCK* block = block_it.data();
00505       block->plot(blocks_win_, serial++,
00506                   textord_debug_printable ? ScrollView::BLUE
00507                                           : ScrollView::GREEN);
00508     }
00509     blocks_win_->Update();
00510   }
00511 #endif
00512 }
00513 
00514 // Displays the column edges at each grid y coordinate defined by
00515 // best_columns_.
00516 void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) {
00517 #ifndef GRAPHICS_DISABLED
00518   ScrollView* col_win = MakeWindow(50, 300, "Columns");
00519   if (textord_debug_images)
00520     col_win->Image(AlignedBlob::textord_debug_pix().string(),
00521                    image_origin().x(), image_origin().y());
00522   else
00523     DisplayBoxes(col_win);
00524   col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN);
00525   for (int i = 0; i < gridheight_; ++i) {
00526     ColPartitionSet* columns = best_columns_[i];
00527     if (columns != NULL)
00528       columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win);
00529   }
00530 #endif
00531 }
00532 
00533 // Sets up column_sets_ (the determined column layout at each horizontal
00534 // slice). Returns false if the page is empty.
00535 bool ColumnFinder::MakeColumns(bool single_column) {
00536   // The part_sets_ are a temporary structure used during column creation,
00537   // and is a vector of ColPartitionSets, representing ColPartitions found
00538   // at horizontal slices through the page.
00539   PartSetVector part_sets;
00540   if (!single_column) {
00541     if (!part_grid_.MakeColPartSets(&part_sets))
00542       return false;  // Empty page.
00543     ASSERT_HOST(part_grid_.gridheight() == gridheight_);
00544     // Try using only the good parts first.
00545     bool good_only = true;
00546     do {
00547       for (int i = 0; i < gridheight_; ++i) {
00548         ColPartitionSet* line_set = part_sets.get(i);
00549         if (line_set != NULL && line_set->LegalColumnCandidate()) {
00550           ColPartitionSet* column_candidate = line_set->Copy(good_only);
00551           if (column_candidate != NULL)
00552             column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
00553         }
00554       }
00555       good_only = !good_only;
00556     } while (column_sets_.empty() && !good_only);
00557     if (textord_debug_tabfind)
00558       PrintColumnCandidates("Column candidates");
00559     // Improve the column candidates against themselves.
00560     ImproveColumnCandidates(&column_sets_, &column_sets_);
00561     if (textord_debug_tabfind)
00562       PrintColumnCandidates("Improved columns");
00563     // Improve the column candidates using the part_sets_.
00564     ImproveColumnCandidates(&part_sets, &column_sets_);
00565   }
00566   ColPartitionSet* single_column_set =
00567       part_grid_.MakeSingleColumnSet(WidthCB());
00568   if (single_column_set != NULL) {
00569     // Always add the single column set as a backup even if not in
00570     // single column mode.
00571     single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
00572   }
00573   if (textord_debug_tabfind)
00574     PrintColumnCandidates("Final Columns");
00575   if (!column_sets_.empty()) {
00576     // Divide the page into sections of uniform column layout.
00577     AssignColumns(part_sets);
00578     if (textord_tabfind_show_columns) {
00579       DisplayColumnBounds(&part_sets);
00580     }
00581     ComputeMeanColumnGap();
00582     ColPartition_LIST parts;
00583     for (int i = 0; i < part_sets.size(); ++i) {
00584       ColPartitionSet* line_set = part_sets.get(i);
00585       if (line_set != NULL) {
00586         line_set->RelinquishParts();
00587         delete line_set;
00588       }
00589     }
00590     return true;
00591   }
00592   return false;
00593 }
00594 
00595 // Attempt to improve the column_candidates by expanding the columns
00596 // and adding new partitions from the partition sets in src_sets.
00597 // Src_sets may be equal to column_candidates, in which case it will
00598 // use them as a source to improve themselves.
00599 void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets,
00600                                            PartSetVector* column_sets) {
00601   PartSetVector temp_cols;
00602   temp_cols.move(column_sets);
00603   if (src_sets == column_sets)
00604     src_sets = &temp_cols;
00605   int set_size = temp_cols.size();
00606   // Try using only the good parts first.
00607   bool good_only = true;
00608   do {
00609     for (int i = 0; i < set_size; ++i) {
00610       ColPartitionSet* column_candidate = temp_cols.get(i);
00611       ASSERT_HOST(column_candidate != NULL);
00612       ColPartitionSet* improved = column_candidate->Copy(good_only);
00613       if (improved != NULL) {
00614         improved->ImproveColumnCandidate(WidthCB(), src_sets);
00615         improved->AddToColumnSetsIfUnique(column_sets, WidthCB());
00616       }
00617     }
00618     good_only = !good_only;
00619   } while (column_sets->empty() && !good_only);
00620   if (column_sets->empty())
00621     column_sets->move(&temp_cols);
00622   else
00623     temp_cols.delete_data_pointers();
00624 }
00625 
00626 // Prints debug information on the column candidates.
00627 void ColumnFinder::PrintColumnCandidates(const char* title) {
00628   int set_size =  column_sets_.size();
00629   tprintf("Found %d %s:\n", set_size, title);
00630   if (textord_debug_tabfind >= 3) {
00631     for (int i = 0; i < set_size; ++i) {
00632       ColPartitionSet* column_set = column_sets_.get(i);
00633       column_set->Print();
00634     }
00635   }
00636 }
00637 
00638 // Finds the optimal set of columns that cover the entire image with as
00639 // few changes in column partition as possible.
00640 // NOTE: this could be thought of as an optimization problem, but a simple
00641 // greedy algorithm is used instead. The algorithm repeatedly finds the modal
00642 // compatible column in an unassigned region and uses that with the extra
00643 // tweak of extending the modal region over small breaks in compatibility.
00644 // Where modal regions overlap, the boundary is chosen so as to minimize
00645 // the cost in terms of ColPartitions not fitting an approved column.
00646 void ColumnFinder::AssignColumns(const PartSetVector& part_sets) {
00647   int set_count = part_sets.size();
00648   ASSERT_HOST(set_count == gridheight());
00649   // Allocate and init the best_columns_.
00650   best_columns_ = new ColPartitionSet*[set_count];
00651   for (int y = 0; y < set_count; ++y)
00652     best_columns_[y] = NULL;
00653   int column_count = column_sets_.size();
00654   // column_set_costs[part_sets_ index][column_sets_ index] is
00655   // < MAX_INT32 if the partition set is compatible with the column set,
00656   // in which case its value is the cost for that set used in deciding
00657   // which competing set to assign.
00658   // any_columns_possible[part_sets_ index] is true if any of
00659   // possible_column_sets[part_sets_ index][*] is < MAX_INT32.
00660   // assigned_costs[part_sets_ index] is set to the column_set_costs
00661   // of the assigned column_sets_ index or MAX_INT32 if none is set.
00662   // On return the best_columns_ member is set.
00663   bool* any_columns_possible = new bool[set_count];
00664   int* assigned_costs = new int[set_count];
00665   int** column_set_costs = new int*[set_count];
00666   // Set possible column_sets to indicate whether each set is compatible
00667   // with each column.
00668   for (int part_i = 0; part_i < set_count; ++part_i) {
00669     ColPartitionSet* line_set = part_sets.get(part_i);
00670     bool debug = line_set != NULL &&
00671                  WithinTestRegion(2, line_set->bounding_box().left(),
00672                                   line_set->bounding_box().bottom());
00673     column_set_costs[part_i] = new int[column_count];
00674     any_columns_possible[part_i] = false;
00675     assigned_costs[part_i] = MAX_INT32;
00676     for (int col_i = 0; col_i < column_count; ++col_i) {
00677       if (line_set != NULL &&
00678           column_sets_.get(col_i)->CompatibleColumns(debug, line_set,
00679                                                      WidthCB())) {
00680         column_set_costs[part_i][col_i] =
00681             column_sets_.get(col_i)->UnmatchedWidth(line_set);
00682         any_columns_possible[part_i] = true;
00683       } else {
00684         column_set_costs[part_i][col_i] = MAX_INT32;
00685         if (debug)
00686           tprintf("Set id %d did not match at y=%d, lineset =%p\n",
00687                   col_i, part_i, line_set);
00688       }
00689     }
00690   }
00691   // Assign a column set to each vertical grid position.
00692   // While there is an unassigned range, find its mode.
00693   int start, end;
00694   while (BiggestUnassignedRange(set_count, any_columns_possible,
00695                                 &start, &end)) {
00696     if (textord_debug_tabfind >= 2)
00697       tprintf("Biggest unassigned range = %d- %d\n", start, end);
00698     // Find the modal column_set_id in the range.
00699     int column_set_id = RangeModalColumnSet(column_set_costs,
00700                                             assigned_costs, start, end);
00701     if (textord_debug_tabfind >= 2) {
00702       tprintf("Range modal column id = %d\n", column_set_id);
00703       column_sets_.get(column_set_id)->Print();
00704     }
00705     // Now find the longest run of the column_set_id in the range.
00706     ShrinkRangeToLongestRun(column_set_costs, assigned_costs,
00707                             any_columns_possible,
00708                             column_set_id, &start, &end);
00709     if (textord_debug_tabfind >= 2)
00710       tprintf("Shrunk range = %d- %d\n", start, end);
00711     // Extend the start and end past the longest run, while there are
00712     // only small gaps in compatibility that can be overcome by larger
00713     // regions of compatibility beyond.
00714     ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
00715                              any_columns_possible,
00716                              column_set_id, -1, -1, &start);
00717     --end;
00718     ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
00719                              any_columns_possible,
00720                              column_set_id, 1, set_count, &end);
00721     ++end;
00722     if (textord_debug_tabfind)
00723       tprintf("Column id %d applies to range = %d - %d\n",
00724               column_set_id, start, end);
00725     // Assign the column to the range, which now may overlap with other ranges.
00726     AssignColumnToRange(column_set_id, start, end, column_set_costs,
00727                         assigned_costs);
00728   }
00729   // If anything remains unassigned, the whole lot is unassigned, so
00730   // arbitrarily assign id 0.
00731   if (best_columns_[0] == NULL) {
00732     AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs);
00733   }
00734   // Free memory.
00735   for (int i = 0; i < set_count; ++i) {
00736     delete [] column_set_costs[i];
00737   }
00738   delete [] assigned_costs;
00739   delete [] any_columns_possible;
00740   delete [] column_set_costs;
00741 }
00742 
00743 // Finds the biggest range in part_sets_ that has no assigned column, but
00744 // column assignment is possible.
00745 bool ColumnFinder::BiggestUnassignedRange(int set_count,
00746                                           const bool* any_columns_possible,
00747                                           int* best_start, int* best_end) {
00748   int best_range_size = 0;
00749   *best_start = set_count;
00750   *best_end = set_count;
00751   int end = set_count;
00752   for (int start = 0; start < gridheight_; start = end) {
00753     // Find the first unassigned index in start.
00754     while (start < set_count) {
00755       if (best_columns_[start] == NULL && any_columns_possible[start])
00756         break;
00757       ++start;
00758     }
00759     // Find the first past the end and count the good ones in between.
00760     int range_size = 1;  // Number of non-null, but unassigned line sets.
00761     end = start + 1;
00762     while (end < set_count) {
00763       if (best_columns_[end] != NULL)
00764         break;
00765       if (any_columns_possible[end])
00766         ++range_size;
00767       ++end;
00768     }
00769     if (start < set_count && range_size > best_range_size) {
00770       best_range_size = range_size;
00771       *best_start = start;
00772       *best_end = end;
00773     }
00774   }
00775   return *best_start < *best_end;
00776 }
00777 
00778 // Finds the modal compatible column_set_ index within the given range.
00779 int ColumnFinder::RangeModalColumnSet(int** column_set_costs,
00780                                       const int* assigned_costs,
00781                                       int start, int end) {
00782   int column_count = column_sets_.size();
00783   STATS column_stats(0, column_count);
00784   for (int part_i = start; part_i < end; ++part_i) {
00785     for (int col_j = 0; col_j < column_count; ++col_j) {
00786       if (column_set_costs[part_i][col_j] < assigned_costs[part_i])
00787         column_stats.add(col_j, 1);
00788     }
00789   }
00790   ASSERT_HOST(column_stats.get_total() > 0);
00791   return column_stats.mode();
00792 }
00793 
00794 // Given that there are many column_set_id compatible columns in the range,
00795 // shrinks the range to the longest contiguous run of compatibility, allowing
00796 // gaps where no columns are possible, but not where competing columns are
00797 // possible.
00798 void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs,
00799                                            const int* assigned_costs,
00800                                            const bool* any_columns_possible,
00801                                            int column_set_id,
00802                                            int* best_start, int* best_end) {
00803   // orig_start and orig_end are the maximum range we will look at.
00804   int orig_start = *best_start;
00805   int orig_end = *best_end;
00806   int best_range_size = 0;
00807   *best_start = orig_end;
00808   *best_end = orig_end;
00809   int end = orig_end;
00810   for (int start = orig_start; start < orig_end; start = end) {
00811     // Find the first possible
00812     while (start < orig_end) {
00813       if (column_set_costs[start][column_set_id] < assigned_costs[start] ||
00814           !any_columns_possible[start])
00815         break;
00816       ++start;
00817     }
00818     // Find the first past the end.
00819     end = start + 1;
00820     while (end < orig_end) {
00821       if (column_set_costs[end][column_set_id] >= assigned_costs[start] &&
00822           any_columns_possible[end])
00823           break;
00824       ++end;
00825     }
00826     if (start < orig_end && end - start > best_range_size) {
00827       best_range_size = end - start;
00828       *best_start = start;
00829       *best_end = end;
00830     }
00831   }
00832 }
00833 
00834 // Moves start in the direction of step, upto, but not including end while
00835 // the only incompatible regions are no more than kMaxIncompatibleColumnCount
00836 // in size, and the compatible regions beyond are bigger.
00837 void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs,
00838                                             const int* assigned_costs,
00839                                             const bool* any_columns_possible,
00840                                             int column_set_id,
00841                                             int step, int end, int* start) {
00842   if (textord_debug_tabfind > 2)
00843     tprintf("Starting expansion at %d, step=%d, limit=%d\n",
00844             *start, step, end);
00845   if (*start == end)
00846     return;  // Cannot be expanded.
00847 
00848   int barrier_size = 0;
00849   int good_size = 0;
00850   do {
00851     // Find the size of the incompatible barrier.
00852     barrier_size = 0;
00853     int i;
00854     for (i = *start + step; i != end; i += step) {
00855       if (column_set_costs[i][column_set_id] < assigned_costs[i])
00856         break;  // We are back on.
00857       // Locations where none are possible don't count.
00858       if (any_columns_possible[i])
00859         ++barrier_size;
00860     }
00861     if (textord_debug_tabfind > 2)
00862       tprintf("At %d, Barrier size=%d\n", i, barrier_size);
00863     if (barrier_size > kMaxIncompatibleColumnCount)
00864       return;  // Barrier too big.
00865     if (i == end) {
00866       // We can't go any further, but the barrier was small, so go to the end.
00867       *start = i - step;
00868       return;
00869     }
00870     // Now find the size of the good region on the other side.
00871     good_size = 1;
00872     for (i += step; i != end; i += step) {
00873       if (column_set_costs[i][column_set_id] < assigned_costs[i])
00874         ++good_size;
00875       else if (any_columns_possible[i])
00876         break;
00877     }
00878     if (textord_debug_tabfind > 2)
00879       tprintf("At %d, good size = %d\n", i, good_size);
00880     // If we had enough good ones we can extend the start and keep looking.
00881     if (good_size >= barrier_size)
00882       *start = i - step;
00883   } while (good_size >= barrier_size);
00884 }
00885 
00886 // Assigns the given column_set_id to the given range.
00887 void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end,
00888                                        int** column_set_costs,
00889                                        int* assigned_costs) {
00890   ColPartitionSet* column_set = column_sets_.get(column_set_id);
00891   for (int i = start; i < end; ++i) {
00892     assigned_costs[i] = column_set_costs[i][column_set_id];
00893     best_columns_[i] = column_set;
00894   }
00895 }
00896 
00897 // Computes the mean_column_gap_.
00898 void ColumnFinder::ComputeMeanColumnGap() {
00899   int total_gap = 0;
00900   int total_width = 0;
00901   int gap_samples = 0;
00902   int width_samples = 0;
00903   for (int i = 0; i < gridheight_; ++i) {
00904     ASSERT_HOST(best_columns_[i] != NULL);
00905     best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width,
00906                                                     &width_samples,
00907                                                     &total_gap,
00908                                                     &gap_samples);
00909   }
00910   mean_column_gap_ = gap_samples > 0 ? total_gap / gap_samples
00911                                      : total_width / width_samples;
00912 }
00913 
00916 
00917 // Helper to delete all the deletable blobs on the list. Owned blobs are
00918 // extracted from the list, but not deleted, leaving them owned by the owner().
00919 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) {
00920   for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) {
00921     BLOBNBOX* blob = blob_it.extract();
00922     if (blob->owner() == NULL) {
00923       delete blob->cblob();
00924       delete blob;
00925     }
00926   }
00927 }
00928 
00929 // Hoovers up all un-owned blobs and deletes them.
00930 // The rest get released from the block so the ColPartitions can pass
00931 // ownership to the output blocks.
00932 void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) {
00933   ReleaseAllBlobsAndDeleteUnused(&block->blobs);
00934   ReleaseAllBlobsAndDeleteUnused(&block->small_blobs);
00935   ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs);
00936   ReleaseAllBlobsAndDeleteUnused(&block->large_blobs);
00937   ReleaseAllBlobsAndDeleteUnused(&image_bblobs_);
00938 }
00939 
00940 // Splits partitions that cross columns where they have nothing in the gap.
00941 void ColumnFinder::GridSplitPartitions() {
00942   // Iterate the ColPartitions in the grid.
00943   GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
00944     gsearch(&part_grid_);
00945   gsearch.StartFullSearch();
00946   ColPartition* dont_repeat = NULL;
00947   ColPartition* part;
00948   while ((part = gsearch.NextFullSearch()) != NULL) {
00949     if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat)
00950       continue;  // Only applies to text partitions.
00951     ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
00952     int first_col = -1;
00953     int last_col = -1;
00954     // Find which columns the partition spans.
00955     part->ColumnRange(resolution_, column_set, &first_col, &last_col);
00956     if (first_col > 0)
00957       --first_col;
00958     // Convert output column indices to physical column indices.
00959     first_col /= 2;
00960     last_col /= 2;
00961     // We will only consider cases where a partition spans two columns,
00962     // since a heading that spans more columns than that is most likely
00963     // genuine.
00964     if (last_col != first_col + 1)
00965       continue;
00966     // Set up a rectangle search x-bounded by the column gap and y by the part.
00967     int y = part->MidY();
00968     TBOX margin_box = part->bounding_box();
00969     bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(),
00970                                                margin_box.bottom());
00971     if (debug) {
00972       tprintf("Considering partition for GridSplit:");
00973       part->Print();
00974     }
00975     ColPartition* column = column_set->GetColumnByIndex(first_col);
00976     if (column == NULL)
00977       continue;
00978     margin_box.set_left(column->RightAtY(y) + 2);
00979     column = column_set->GetColumnByIndex(last_col);
00980     if (column == NULL)
00981       continue;
00982     margin_box.set_right(column->LeftAtY(y) - 2);
00983     // TODO(rays) Decide whether to keep rectangular filling or not in the
00984     // main grid and therefore whether we need a fancier search here.
00985     // Now run the rect search on the main blob grid.
00986     GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this);
00987     if (debug) {
00988       tprintf("Searching box (%d,%d)->(%d,%d)\n",
00989               margin_box.left(), margin_box.bottom(),
00990               margin_box.right(), margin_box.top());
00991       part->Print();
00992     }
00993     rectsearch.StartRectSearch(margin_box);
00994     BLOBNBOX* bbox;
00995     while ((bbox = rectsearch.NextRectSearch()) != NULL) {
00996       if (bbox->bounding_box().overlap(margin_box))
00997         break;
00998     }
00999     if (bbox == NULL) {
01000       // There seems to be nothing in the hole, so split the partition.
01001       gsearch.RemoveBBox();
01002       int x_middle = (margin_box.left() + margin_box.right()) / 2;
01003       if (debug) {
01004         tprintf("Splitting part at %d:", x_middle);
01005         part->Print();
01006       }
01007       ColPartition* split_part = part->SplitAt(x_middle);
01008       if (split_part != NULL) {
01009         if (debug) {
01010           tprintf("Split result:");
01011           part->Print();
01012           split_part->Print();
01013         }
01014         part_grid_.InsertBBox(true, true, split_part);
01015       } else {
01016         // Split had no effect
01017         if (debug)
01018           tprintf("Split had no effect\n");
01019         dont_repeat = part;
01020       }
01021       part_grid_.InsertBBox(true, true, part);
01022       gsearch.RepositionIterator();
01023     } else if (debug) {
01024       tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n",
01025               bbox->bounding_box().left(), bbox->bounding_box().bottom(),
01026               bbox->bounding_box().right(), bbox->bounding_box().top());
01027     }
01028   }
01029 }
01030 
01031 // Merges partitions where there is vertical overlap, within a single column,
01032 // and the horizontal gap is small enough.
01033 void ColumnFinder::GridMergePartitions() {
01034   // Iterate the ColPartitions in the grid.
01035   GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
01036     gsearch(&part_grid_);
01037   gsearch.StartFullSearch();
01038   ColPartition* part;
01039   while ((part = gsearch.NextFullSearch()) != NULL) {
01040     if (part->IsUnMergeableType())
01041       continue;
01042     // Set up a rectangle search x-bounded by the column and y by the part.
01043     ColPartitionSet* columns = best_columns_[gsearch.GridY()];
01044     TBOX box = part->bounding_box();
01045     bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom());
01046     if (debug) {
01047       tprintf("Considering part for merge at:");
01048       part->Print();
01049     }
01050     int y = part->MidY();
01051     ColPartition* left_column = columns->ColumnContaining(box.left(), y);
01052     ColPartition* right_column = columns->ColumnContaining(box.right(), y);
01053     if (left_column == NULL || right_column != left_column) {
01054       if (debug)
01055         tprintf("In different columns\n");
01056       continue;
01057     }
01058     box.set_left(left_column->LeftAtY(y));
01059     box.set_right(right_column->RightAtY(y));
01060     // Now run the rect search.
01061     bool modified_box = false;
01062     GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
01063       rsearch(&part_grid_);
01064     rsearch.SetUniqueMode(true);
01065     rsearch.StartRectSearch(box);
01066     ColPartition* neighbour;
01067 
01068     while ((neighbour = rsearch.NextRectSearch()) != NULL) {
01069       if (neighbour == part || neighbour->IsUnMergeableType())
01070         continue;
01071       const TBOX& neighbour_box = neighbour->bounding_box();
01072       if (debug) {
01073         tprintf("Considering merge with neighbour at:");
01074         neighbour->Print();
01075       }
01076       if (neighbour_box.right() < box.left() ||
01077           neighbour_box.left() > box.right())
01078         continue;  // Not within the same column.
01079       if (part->VSignificantCoreOverlap(*neighbour) &&
01080           part->TypesMatch(*neighbour)) {
01081         // There is vertical overlap and the gross types match, but only
01082         // merge if the horizontal gap is small enough, as one of the
01083         // partitions may be a figure caption within a column.
01084         // If there is only one column, then the mean_column_gap_ is large
01085         // enough to allow almost any merge, by being the mean column width.
01086         const TBOX& part_box = part->bounding_box();
01087         // Don't merge if there is something else in the way. Use the margin
01088         // to decide, and check both to allow a bit of overlap.
01089         if (neighbour_box.left() > part->right_margin() &&
01090             part_box.right() < neighbour->left_margin())
01091           continue;  // Neighbour is too far to the right.
01092         if (neighbour_box.right() < part->left_margin() &&
01093             part_box.left() > neighbour->right_margin())
01094           continue;  // Neighbour is too far to the left.
01095         int h_gap = MAX(part_box.left(), neighbour_box.left()) -
01096                     MIN(part_box.right(), neighbour_box.right());
01097         if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction ||
01098             part_box.width() < mean_column_gap_ ||
01099             neighbour_box.width() < mean_column_gap_) {
01100           if (debug) {
01101             tprintf("Running grid-based merge between:\n");
01102             part->Print();
01103             neighbour->Print();
01104           }
01105           rsearch.RemoveBBox();
01106           gsearch.RepositionIterator();
01107           part->Absorb(neighbour, WidthCB());
01108           modified_box = true;
01109         } else if (debug) {
01110           tprintf("Neighbour failed hgap test\n");
01111         }
01112       } else if (debug) {
01113         tprintf("Neighbour failed overlap or typesmatch test\n");
01114       }
01115     }
01116     if (modified_box) {
01117       // We modified the box of part, so re-insert it into the grid.
01118       // This does no harm in the current cell, as it already exists there,
01119       // but it needs to exist in all the cells covered by its bounding box,
01120       // or it will never be found by a full search.
01121       // Because the box has changed, it has to be removed first, otherwise
01122       // add_sorted may fail to keep a single copy of the pointer.
01123       gsearch.RemoveBBox();
01124       part_grid_.InsertBBox(true, true, part);
01125       gsearch.RepositionIterator();
01126     }
01127   }
01128 }
01129 
01130 // Inserts remaining noise blobs into the most applicable partition if any.
01131 // If there is no applicable partition, then the blobs are deleted.
01132 void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
01133   BLOBNBOX_IT blob_it(&block->noise_blobs);
01134   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
01135     BLOBNBOX* blob = blob_it.data();
01136     if (blob->owner() != NULL) continue;
01137     TBOX search_box(blob->bounding_box());
01138     bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom());
01139     search_box.pad(gridsize(), gridsize());
01140     // Setup a rectangle search to find the best partition to merge with.
01141     ColPartitionGridSearch rsearch(&part_grid_);
01142     rsearch.SetUniqueMode(true);
01143     rsearch.StartRectSearch(search_box);
01144     ColPartition* part;
01145     ColPartition* best_part = NULL;
01146     int best_distance = 0;
01147     while ((part = rsearch.NextRectSearch()) != NULL) {
01148       if (part->IsUnMergeableType())
01149         continue;
01150       int distance = projection_.DistanceOfBoxFromPartition(
01151           blob->bounding_box(), *part, denorm_, debug);
01152       if (best_part == NULL || distance < best_distance) {
01153         best_part = part;
01154         best_distance = distance;
01155       }
01156     }
01157     if (best_part != NULL &&
01158         best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
01159       // Close enough to merge.
01160       if (debug) {
01161         tprintf("Adding noise blob with distance %d, thr=%g:box:",
01162                 best_distance,
01163                 kMaxDistToPartSizeRatio * best_part->median_size());
01164         blob->bounding_box().print();
01165         tprintf("To partition:");
01166         best_part->Print();
01167       }
01168       part_grid_.RemoveBBox(best_part);
01169       best_part->AddBox(blob);
01170       part_grid_.InsertBBox(true, true, best_part);
01171       blob->set_owner(best_part);
01172       blob->set_flow(best_part->flow());
01173       blob->set_region_type(best_part->blob_type());
01174     } else {
01175       // Mark the blob for deletion.
01176       blob->set_region_type(BRT_NOISE);
01177     }
01178   }
01179   // Delete the marked blobs, clearing neighbour references.
01180   block->DeleteUnownedNoise();
01181 }
01182 
01183 // Helper makes a box from a horizontal line.
01184 static TBOX BoxFromHLine(const TabVector* hline) {
01185   int top = MAX(hline->startpt().y(), hline->endpt().y());
01186   int bottom = MIN(hline->startpt().y(), hline->endpt().y());
01187   top += hline->mean_width();
01188   if (top == bottom) {
01189     if (bottom > 0)
01190       --bottom;
01191     else
01192       ++top;
01193   }
01194   return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top);
01195 }
01196 
01197 // Remove partitions that come from horizontal lines that look like
01198 // underlines, but are not part of a table.
01199 void ColumnFinder::GridRemoveUnderlinePartitions() {
01200   TabVector_IT hline_it(&horizontal_lines_);
01201   for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
01202     TabVector* hline = hline_it.data();
01203     if (hline->intersects_other_lines())
01204       continue;
01205     TBOX line_box = BoxFromHLine(hline);
01206     TBOX search_box = line_box;
01207     search_box.pad(0, line_box.height());
01208     ColPartitionGridSearch part_search(&part_grid_);
01209     part_search.SetUniqueMode(true);
01210     part_search.StartRectSearch(search_box);
01211     ColPartition* covered;
01212     bool touched_table = false;
01213     bool touched_text = false;
01214     ColPartition* line_part = NULL;
01215     while ((covered = part_search.NextRectSearch()) != NULL) {
01216       if (covered->type() == PT_TABLE) {
01217         touched_table = true;
01218         break;
01219       } else if (covered->IsTextType()) {
01220         // TODO(rays) Add a list of underline sections to ColPartition.
01221         int text_bottom = covered->median_bottom();
01222         if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top())
01223           touched_text = true;
01224       } else if (covered->blob_type() == BRT_HLINE &&
01225           line_box.contains(covered->bounding_box())) {
01226         line_part = covered;
01227       }
01228     }
01229     if (line_part != NULL && !touched_table && touched_text) {
01230       part_grid_.RemoveBBox(line_part);
01231       delete line_part;
01232     }
01233   }
01234 }
01235 
01236 // Add horizontal line separators as partitions.
01237 void ColumnFinder::GridInsertHLinePartitions() {
01238   TabVector_IT hline_it(&horizontal_lines_);
01239   for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
01240     TabVector* hline = hline_it.data();
01241     TBOX line_box = BoxFromHLine(hline);
01242     ColPartition* part = ColPartition::MakeLinePartition(
01243         BRT_HLINE, vertical_skew_,
01244         line_box.left(), line_box.bottom(), line_box.right(), line_box.top());
01245     part->set_type(PT_HORZ_LINE);
01246     bool any_image = false;
01247     ColPartitionGridSearch part_search(&part_grid_);
01248     part_search.SetUniqueMode(true);
01249     part_search.StartRectSearch(line_box);
01250     ColPartition* covered;
01251     while ((covered = part_search.NextRectSearch()) != NULL) {
01252       if (covered->IsImageType()) {
01253         any_image = true;
01254         break;
01255       }
01256     }
01257     if (!any_image)
01258       part_grid_.InsertBBox(true, true, part);
01259     else
01260       delete part;
01261   }
01262 }
01263 
01264 // Add horizontal line separators as partitions.
01265 void ColumnFinder::GridInsertVLinePartitions() {
01266   TabVector_IT vline_it(dead_vectors());
01267   for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) {
01268     TabVector* vline = vline_it.data();
01269     if (!vline->IsSeparator())
01270       continue;
01271     int left = MIN(vline->startpt().x(), vline->endpt().x());
01272     int right = MAX(vline->startpt().x(), vline->endpt().x());
01273     right += vline->mean_width();
01274     if (left == right) {
01275       if (left > 0)
01276         --left;
01277       else
01278         ++right;
01279     }
01280     ColPartition* part = ColPartition::MakeLinePartition(
01281         BRT_VLINE, vertical_skew_,
01282         left, vline->startpt().y(), right, vline->endpt().y());
01283     part->set_type(PT_VERT_LINE);
01284     bool any_image = false;
01285     ColPartitionGridSearch part_search(&part_grid_);
01286     part_search.SetUniqueMode(true);
01287     part_search.StartRectSearch(part->bounding_box());
01288     ColPartition* covered;
01289     while ((covered = part_search.NextRectSearch()) != NULL) {
01290       if (covered->IsImageType()) {
01291         any_image = true;
01292         break;
01293       }
01294     }
01295     if (!any_image)
01296       part_grid_.InsertBBox(true, true, part);
01297     else
01298       delete part;
01299   }
01300 }
01301 
01302 // For every ColPartition in the grid, sets its type based on position
01303 // in the columns.
01304 void ColumnFinder::SetPartitionTypes() {
01305   GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
01306     gsearch(&part_grid_);
01307   gsearch.StartFullSearch();
01308   ColPartition* part;
01309   while ((part = gsearch.NextFullSearch()) != NULL) {
01310     part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]);
01311   }
01312 }
01313 
01314 // Only images remain with multiple types in a run of partners.
01315 // Sets the type of all in the group to the maximum of the group.
01316 void ColumnFinder::SmoothPartnerRuns() {
01317   // Iterate the ColPartitions in the grid.
01318   GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
01319     gsearch(&part_grid_);
01320   gsearch.StartFullSearch();
01321   ColPartition* part;
01322   while ((part = gsearch.NextFullSearch()) != NULL) {
01323     ColPartition* partner = part->SingletonPartner(true);
01324     if (partner != NULL) {
01325       if (partner->SingletonPartner(false) != part) {
01326         tprintf("Ooops! Partition:(%d partners)",
01327                 part->upper_partners()->length());
01328         part->Print();
01329         tprintf("has singleton partner:(%d partners",
01330                 partner->lower_partners()->length());
01331         partner->Print();
01332         tprintf("but its singleton partner is:");
01333         if (partner->SingletonPartner(false) == NULL)
01334           tprintf("NULL\n");
01335         else
01336           partner->SingletonPartner(false)->Print();
01337       }
01338       ASSERT_HOST(partner->SingletonPartner(false) == part);
01339     } else if (part->SingletonPartner(false) != NULL) {
01340       ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
01341       int column_count = column_set->ColumnCount();
01342       part->SmoothPartnerRun(column_count * 2 + 1);
01343     }
01344   }
01345 }
01346 
01347 // Helper functions for TransformToBlocks.
01348 // Add the part to the temp list in the correct order.
01349 void ColumnFinder::AddToTempPartList(ColPartition* part,
01350                                      ColPartition_CLIST* temp_list) {
01351   int mid_y = part->MidY();
01352   ColPartition_C_IT it(temp_list);
01353   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
01354     ColPartition* test_part = it.data();
01355     if (part->type() == PT_NOISE || test_part->type() == PT_NOISE)
01356       continue;  // Noise stays in sequence.
01357     if (test_part == part->SingletonPartner(false))
01358       break;  // Insert before its lower partner.
01359     int neighbour_bottom = test_part->median_bottom();
01360     int neighbour_top = test_part->median_top();
01361     int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
01362     if (neighbour_y < mid_y)
01363       break;  // part is above test_part so insert it.
01364     if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part))
01365       continue;  // Incompatibles stay in order
01366   }
01367   if (it.cycled_list()) {
01368     it.add_to_end(part);
01369   } else {
01370     it.add_before_stay_put(part);
01371   }
01372 }
01373 
01374 // Add everything from the temp list to the work_set assuming correct order.
01375 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list,
01376                                      WorkingPartSet_LIST* work_set) {
01377   ColPartition_C_IT it(temp_list);
01378   while (!it.empty()) {
01379     it.extract()->AddToWorkingSet(bleft_, tright_, resolution_,
01380                           &good_parts_, work_set);
01381     it.forward();
01382   }
01383 }
01384 
01385 // Transform the grid of partitions to the output blocks.
01386 void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks,
01387                                      TO_BLOCK_LIST* to_blocks) {
01388   WorkingPartSet_LIST work_set;
01389   ColPartitionSet* column_set = NULL;
01390   ColPartition_IT noise_it(&noise_parts_);
01391   // The temp_part_list holds a list of parts at the same grid y coord
01392   // so they can be added in the correct order. This prevents thin objects
01393   // like horizontal lines going before the text lines above them.
01394   ColPartition_CLIST temp_part_list;
01395   // Iterate the ColPartitions in the grid. It starts at the top
01396   GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
01397     gsearch(&part_grid_);
01398   gsearch.StartFullSearch();
01399   int prev_grid_y = -1;
01400   ColPartition* part;
01401   while ((part = gsearch.NextFullSearch()) != NULL) {
01402     int grid_y = gsearch.GridY();
01403     if (grid_y != prev_grid_y) {
01404       EmptyTempPartList(&temp_part_list, &work_set);
01405       prev_grid_y = grid_y;
01406     }
01407     if (best_columns_[grid_y] != column_set) {
01408       column_set = best_columns_[grid_y];
01409       // Every line should have a non-null best column.
01410       ASSERT_HOST(column_set != NULL);
01411       column_set->ChangeWorkColumns(bleft_, tright_, resolution_,
01412                                     &good_parts_, &work_set);
01413       if (textord_debug_tabfind)
01414         tprintf("Changed column groups at grid index %d, y=%d\n",
01415                 gsearch.GridY(), gsearch.GridY() * gridsize());
01416     }
01417     if (part->type() == PT_NOISE) {
01418       noise_it.add_to_end(part);
01419     } else {
01420       AddToTempPartList(part, &temp_part_list);
01421     }
01422   }
01423   EmptyTempPartList(&temp_part_list, &work_set);
01424   // Now finish all working sets and transfer ColPartitionSets to block_sets.
01425   WorkingPartSet_IT work_it(&work_set);
01426   while (!work_it.empty()) {
01427     WorkingPartSet* working_set = work_it.extract();
01428     working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_,
01429                                         &good_parts_, blocks, to_blocks);
01430     delete working_set;
01431     work_it.forward();
01432   }
01433 }
01434 
01435 // Helper reflects a list of blobs in the y-axis.
01436 // Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below.
01437 static void ReflectBlobList(BLOBNBOX_LIST* bblobs) {
01438   BLOBNBOX_IT it(bblobs);
01439   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
01440     it.data()->reflect_box_in_y_axis();
01441   }
01442 }
01443 
01444 // Reflect the blob boxes (but not the outlines) in the y-axis so that
01445 // the blocks get created in the correct RTL order. Reflects the blobs
01446 // in the input_block and the bblobs list.
01447 // The reflection is undone in RotateAndReskewBlocks by
01448 // reflecting the blocks themselves, and then recomputing the blob bounding
01449 // boxes.
01450 void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
01451   ReflectBlobList(bblobs);
01452   ReflectBlobList(&input_block->blobs);
01453   ReflectBlobList(&input_block->small_blobs);
01454   ReflectBlobList(&input_block->noise_blobs);
01455   ReflectBlobList(&input_block->large_blobs);
01456   // Update the denorm with the reflection.
01457   DENORM* new_denorm = new DENORM;
01458   new_denorm->SetupNormalization(NULL, NULL, NULL, denorm_, NULL, 0,
01459                                  0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
01460   denorm_ = new_denorm;
01461 }
01462 
01463 // Undo the deskew that was done in FindTabVectors, as recognition is done
01464 // without correcting blobs or blob outlines for skew.
01465 // Reskew the completed blocks to put them back to the original rotated coords
01466 // that were created by CorrectOrientation.
01467 // If the input_is_rtl, then reflect the blocks in the y-axis to undo the
01468 // reflection that was done before FindTabVectors.
01469 // Blocks that were identified as vertical text (relative to the rotated
01470 // coordinates) are further rotated so the text lines are horizontal.
01471 // blob polygonal outlines are rotated to match the position of the blocks
01472 // that they are in, and their bounding boxes are recalculated to be accurate.
01473 // Record appropriate inverse transformations and required
01474 // classifier transformation in the blocks.
01475 void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl,
01476                                          TO_BLOCK_LIST* blocks) {
01477   if (input_is_rtl) {
01478     // The skew is backwards because of the reflection.
01479     FCOORD tmp = deskew_;
01480     deskew_ = reskew_;
01481     reskew_ = tmp;
01482   }
01483   TO_BLOCK_IT it(blocks);
01484   int block_index = 1;
01485   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
01486     TO_BLOCK* to_block = it.data();
01487     BLOCK* block = to_block->block;
01488     // Blocks are created on the deskewed blob outlines in TransformToBlocks()
01489     // so we need to reskew them back to page coordinates.
01490     if (input_is_rtl) {
01491       block->reflect_polygon_in_y_axis();
01492     }
01493     block->rotate(reskew_);
01494     // Copy the right_to_left flag to the created block.
01495     block->set_right_to_left(input_is_rtl);
01496     // Save the skew angle in the block for baseline computations.
01497     block->set_skew(reskew_);
01498     block->set_index(block_index++);
01499     FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
01500     // Rotate all the blobs if needed and recompute the bounding boxes.
01501     // Compute the block median blob width and height as we go.
01502     STATS widths(0, block->bounding_box().width());
01503     STATS heights(0, block->bounding_box().height());
01504     BLOBNBOX_IT blob_it(&to_block->blobs);
01505     for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
01506       BLOBNBOX* blob = blob_it.data();
01507       if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) {
01508         blob->cblob()->rotate(blob_rotation);
01509       }
01510       blob->compute_bounding_box();
01511       widths.add(blob->bounding_box().width(), 1);
01512       heights.add(blob->bounding_box().height(), 1);
01513     }
01514     block->set_median_size(static_cast<int>(widths.median() + 0.5),
01515                            static_cast<int>(heights.median() + 0.5));
01516     if (textord_debug_tabfind >= 2)
01517       tprintf("Block median size = (%d, %d)\n",
01518               block->median_size().x(), block->median_size().y());
01519   }
01520 }
01521 
01522 // Computes the rotations for the block (to make textlines horizontal) and
01523 // for the blobs (for classification) and sets the appropriate members
01524 // of the given block.
01525 // Returns the rotation that needs to be applied to the blobs to make
01526 // them sit in the rotated block.
01527 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) {
01528   // The text_rotation_ tells us the gross page text rotation that needs
01529   // to be applied for classification
01530   // TODO(rays) find block-level classify rotation by orientation detection.
01531   // In the mean time, assume that "up" for text printed in the minority
01532   // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading.
01533   // Accomplish this by zero-ing out the text rotation.  This covers the
01534   // common cases of image credits in documents written in Latin scripts
01535   // and page headings for predominantly vertically written CJK books.
01536   FCOORD classify_rotation(text_rotation_);
01537   FCOORD block_rotation(1.0f, 0.0f);
01538   if (block->poly_block()->isA() == PT_VERTICAL_TEXT) {
01539     // Vertical text needs to be 90 degrees rotated relative to the rest.
01540     // If the rest has a 90 degree rotation already, use the inverse, making
01541     // the vertical text the original way up. Otherwise use 90 degrees
01542     // clockwise.
01543     if (rerotate_.x() == 0.0f)
01544       block_rotation = rerotate_;
01545     else
01546       block_rotation = FCOORD(0.0f, -1.0f);
01547     block->rotate(block_rotation);
01548     classify_rotation = FCOORD(1.0f, 0.0f);
01549   }
01550   block_rotation.rotate(rotation_);
01551   // block_rotation is now what we have done to the blocks. Now do the same
01552   // thing to the blobs, but save the inverse rotation in the block, as that
01553   // is what we need to DENORM back to the image coordinates.
01554   FCOORD blob_rotation(block_rotation);
01555   block_rotation.set_y(-block_rotation.y());
01556   block->set_re_rotation(block_rotation);
01557   block->set_classify_rotation(classify_rotation);
01558   if (textord_debug_tabfind) {
01559     tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:",
01560             block->index(), block->poly_block()->isA(),
01561             block->re_rotation().x(), block->re_rotation().y(),
01562             classify_rotation.x(), classify_rotation.y());
01563   }
01564   return blob_rotation;
01565 }
01566 
01567 }  // namespace tesseract.