Tesseract  3.02
tesseract-ocr/textord/tablefind.h
Go to the documentation of this file.
00001 
00002 // File:        tablefind.h
00003 // Description: Helper classes to find tables from ColPartitions.
00004 // Author:      Faisal Shafait (faisal.shafait@dfki.de)
00005 // Created:     Tue Jan 06 11:13:01 PST 2009
00006 //
00007 // (C) Copyright 2009, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TEXTORD_TABLEFIND_H__
00021 #define TESSERACT_TEXTORD_TABLEFIND_H__
00022 
00023 #include "colpartitiongrid.h"
00024 #include "elst.h"
00025 #include "rect.h"
00026 
00027 namespace tesseract {
00028 
00029 // Possible types for a column segment.
00030 enum ColSegType {
00031   COL_UNKNOWN,
00032   COL_TEXT,
00033   COL_TABLE,
00034   COL_MIXED,
00035   COL_COUNT
00036 };
00037 
00038 class ColPartitionSet;
00039 
00040 // ColSegment holds rectangular blocks that represent segmentation of a page
00041 // into regions containing single column text/table.
00042 class ColSegment;
00043 ELISTIZEH(ColSegment)
00044 CLISTIZEH(ColSegment)
00045 
00046 class ColSegment : public ELIST_LINK {
00047  public:
00048   ColSegment();
00049   ~ColSegment();
00050 
00051   // Simple accessors and mutators
00052   const TBOX& bounding_box() const {
00053     return bounding_box_;
00054   }
00055 
00056   void set_top(int y) {
00057     bounding_box_.set_top(y);
00058   }
00059 
00060   void set_bottom(int y) {
00061     bounding_box_.set_bottom(y);
00062   }
00063 
00064   void set_left(int x) {
00065     bounding_box_.set_left(x);
00066   }
00067 
00068   void set_right(int x) {
00069     bounding_box_.set_right(x);
00070   }
00071 
00072   void set_bounding_box(const TBOX& other) {
00073     bounding_box_ = other;
00074   }
00075 
00076   int get_num_table_cells() const {
00077     return num_table_cells_;
00078   }
00079 
00080   // set the number of table colpartitions covered by the bounding_box_
00081   void set_num_table_cells(int n) {
00082     num_table_cells_ = n;
00083   }
00084 
00085   int get_num_text_cells() const {
00086     return num_text_cells_;
00087   }
00088 
00089   // set the number of text colpartitions covered by the bounding_box_
00090   void set_num_text_cells(int n) {
00091     num_text_cells_ = n;
00092   }
00093 
00094   ColSegType type() const {
00095     return type_;
00096   }
00097 
00098   // set the type of the block based on the ratio of table to text
00099   // colpartitions covered by it.
00100   void set_type();
00101 
00102   // Provides a color for BBGrid to draw the rectangle.
00103   ScrollView::Color  BoxColor() const;
00104 
00105   // Insert a rectangle into bounding_box_
00106   void InsertBox(const TBOX& other);
00107 
00108  private:
00109   TBOX bounding_box_;                    // bounding box
00110   int num_table_cells_;
00111   int num_text_cells_;
00112   ColSegType type_;
00113 };
00114 
00115 // Typedef BBGrid of ColSegments
00116 typedef BBGrid<ColSegment,
00117                ColSegment_CLIST,
00118                ColSegment_C_IT> ColSegmentGrid;
00119 typedef GridSearch<ColSegment,
00120                    ColSegment_CLIST,
00121                    ColSegment_C_IT> ColSegmentGridSearch;
00122 
00123 // TableFinder is a utility class to find a set of tables given a set of
00124 // ColPartitions and Columns. The TableFinder will mark candidate ColPartitions
00125 // based on research in "Table Detection in Heterogeneous Documents".
00126 // Usage flow is as follows:
00127 //   TableFinder finder;
00128 //   finder.InsertCleanPartitions(/* grid info */)
00129 //   finder.LocateTables(/* ColPartitions and Columns */);
00130 //   finder.Update TODO(nbeato)
00131 class TableFinder {
00132  public:
00133   // Constructor is simple initializations
00134   TableFinder();
00135   ~TableFinder();
00136 
00137   // Set the resolution of the connected components in ppi.
00138   void set_resolution(int resolution) {
00139     resolution_ = resolution;
00140   }
00141   // Change the reading order. Initially it is left to right.
00142   void set_left_to_right_language(bool order);
00143 
00144   // Initialize
00145   void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right);
00146 
00147   // Copy cleaned partitions from ColumnFinder's part_grid_ to this
00148   // clean_part_grid_ and insert dot-like noise into period_grid_.
00149   // It resizes the grids in this object to the dimensions of grid.
00150   void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block);
00151 
00152   // High level function to perform table detection
00153   // Finds tables and updates the grid object with new partitions for the
00154   // tables. The columns and width callbacks are used to merge tables.
00155   // The reskew argument is only used to write the tables to the out.png
00156   // if that feature is enabled.
00157   void LocateTables(ColPartitionGrid* grid,
00158                     ColPartitionSet** columns,
00159                     WidthCallback* width_cb,
00160                     const FCOORD& reskew);
00161 
00162  protected:
00163   // Access for the grid dimensions.
00164   // The results will not be correct until InsertCleanPartitions
00165   // has been called. The values are taken from the grid passed as an argument
00166   // to that function.
00167   int gridsize() const;
00168   int gridwidth() const;
00169   int gridheight() const;
00170   const ICOORD& bleft() const;
00171   const ICOORD& tright() const;
00172 
00173   // Makes a window for debugging, see BBGrid
00174   ScrollView* MakeWindow(int x, int y, const char* window_name);
00175 
00178   // Inserts text into the table finder.
00179   void InsertTextPartition(ColPartition* part);
00180   void InsertFragmentedTextPartition(ColPartition* part);
00181   void InsertLeaderPartition(ColPartition* part);
00182   void InsertRulingPartition(ColPartition* part);
00183   void InsertImagePartition(ColPartition* part);
00184   void SplitAndInsertFragmentedTextPartition(ColPartition* part);
00185   bool AllowTextPartition(const ColPartition& part) const;
00186   bool AllowBlob(const BLOBNBOX& blob) const;
00187 
00191 
00192   // Utility function to move segments to col_seg_grid
00193   // Note: Move includes ownership,
00194   // so segments will be be owned by col_seg_grid
00195   void MoveColSegmentsToGrid(ColSegment_LIST* segments,
00196                              ColSegmentGrid* col_seg_grid);
00197 
00201 
00202   // Initialize the grid and partitions
00203   void InitializePartitions(ColPartitionSet** all_columns);
00204 
00205   // Set left, right and top, bottom spacings of each colpartition.
00206   // Left/right spacings are w.r.t the column boundaries
00207   // Top/bottom spacings are w.r.t. previous and next colpartitions
00208   static void SetPartitionSpacings(ColPartitionGrid* grid,
00209                                    ColPartitionSet** all_columns);
00210 
00211   // Set spacing and closest neighbors above and below a given colpartition.
00212   void SetVerticalSpacing(ColPartition* part);
00213 
00214   // Set global spacing estimates. This function is dependent on the
00215   // partition spacings. So make sure SetPartitionSpacings is called
00216   // on the same grid before this.
00217   void SetGlobalSpacings(ColPartitionGrid* grid);
00218   // Access to the global median xheight. The xheight is the height
00219   // of a lowercase 'x' character on the page. This can be viewed as the
00220   // average height of a lowercase letter in a textline. As a result
00221   // it is used to make assumptions about spacing between words and
00222   // table cells.
00223   void set_global_median_xheight(int xheight);
00224   // Access to the global median blob width. The width is useful
00225   // when deciding if a partition is noise.
00226   void set_global_median_blob_width(int width);
00227   // Access to the global median ledding. The ledding is the distance between
00228   // two adjacent text lines. This value can be used to get a rough estimate
00229   // for the amount of space between two lines of text. As a result, it
00230   // is used to calculate appropriate spacing between adjacent rows of text.
00231   void set_global_median_ledding(int ledding);
00232 
00233   // Updates the nearest neighbors for each ColPartition in clean_part_grid_.
00234   // The neighbors are most likely SingletonPartner calls after the neighbors
00235   // are assigned. This is hear until it is decided to remove the
00236   // nearest_neighbor code in ColPartition
00237   void FindNeighbors();
00238 
00243 
00244   // High level function to mark partitions as table rows/cells.
00245   // When this function is done, the column partitions in clean_part_grid_
00246   // should mostly be marked as tables.
00247   void MarkTablePartitions();
00248   // Marks partitions given a local view of a single partition
00249   void MarkPartitionsUsingLocalInformation();
00251   // Check if the partition has at least one large gap between words or no
00252   // significant gap at all
00253   // TODO(nbeato): Make const, prevented because blobnbox array access
00254   bool HasWideOrNoInterWordGap(ColPartition* part) const;
00255   // Checks if a partition is adjacent to leaders on the page
00256   bool HasLeaderAdjacent(const ColPartition& part);
00257   // Filter individual text partitions marked as table partitions
00258   // consisting of paragraph endings, small section headings, and
00259   // headers and footers.
00260   void FilterFalseAlarms();
00261   void FilterParagraphEndings();
00262   void FilterHeaderAndFooter();
00263   // Mark all ColPartitions as table cells that have a table cell above
00264   // and below them
00265   void SmoothTablePartitionRuns();
00266 
00279 
00280   // Get Column segments from best_columns_
00281   void GetColumnBlocks(ColPartitionSet** columns,
00282                        ColSegment_LIST *col_segments);
00283 
00284   // Group Column segments into consecutive single column regions.
00285   void GroupColumnBlocks(ColSegment_LIST *current_segments,
00286                         ColSegment_LIST *col_segments);
00287 
00288   // Check if two boxes are consecutive within the same column
00289   bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
00290 
00291   // Set the ratio of candidate table partitions in each column
00292   void SetColumnsType(ColSegment_LIST* col_segments);
00293 
00294   // Merge Column Blocks that were split due to the presence of a table
00295   void GridMergeColumnBlocks();
00296 
00302 
00303   // Merge partititons cells into table columns
00304   // Differs from paper by just looking at marked table partitions
00305   // instead of similarity metric.
00306   // Modified section 4.1 of paper.
00307   void GetTableColumns(ColSegment_LIST *table_columns);
00308 
00309   // Finds regions within a column that potentially contain a table.
00310   // Ie, the table columns from GetTableColumns are turned into boxes
00311   // that span the entire page column (using ColumnBlocks found in
00312   // earlier functions) in the x direction and the min/max extent of
00313   // overlapping table columns in the y direction.
00314   // Section 4.2 of paper.
00315   void GetTableRegions(ColSegment_LIST *table_columns,
00316                        ColSegment_LIST *table_regions);
00317 
00318 
00321 
00322   // Merge table regions corresponding to tables spanning multiple columns
00323   void GridMergeTableRegions();
00324   bool BelongToOneTable(const TBOX &box1, const TBOX &box2);
00325 
00326   // Adjust table boundaries by building a tight bounding box around all
00327   // ColPartitions contained in it.
00328   void AdjustTableBoundaries();
00329 
00330   // Grows a table to include partitions that are partially covered
00331   // by the table. This includes lines and text. It does not include
00332   // noise or images.
00333   // On entry, result_box is the minimum size of the result. The results of the
00334   // function will union the actual result with result_box.
00335   void GrowTableBox(const TBOX& table_box, TBOX* result_box);
00336   // Grow a table by increasing the size of the box to include
00337   // partitions with significant overlap with the table.
00338   void GrowTableToIncludePartials(const TBOX& table_box,
00339                                   const TBOX& search_range,
00340                                   TBOX* result_box);
00341   // Grow a table by expanding to the extents of significantly
00342   // overlapping lines.
00343   void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range,
00344                                TBOX* result_box);
00345   // Checks whether the horizontal line belong to the table by looking at the
00346   // side spacing of extra ColParitions that will be included in the table
00347   // due to expansion
00348   bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box);
00349 
00350   // Look for isolated column headers above the given table box and
00351   // include them in the table
00352   void IncludeLeftOutColumnHeaders(TBOX* table_box);
00353 
00354   // Remove false alarms consiting of a single column
00355   void DeleteSingleColumnTables();
00356 
00357   // Return true if at least one gap larger than the global x-height
00358   // exists in the horizontal projection
00359   bool GapInXProjection(int* xprojection, int length);
00360 
00363   // This function will run the table recognizer and try to find better
00364   // bounding boxes. The structures of the tables never leave this function
00365   // right now. It just tries to prune and merge tables based on info it
00366   // has available.
00367   void RecognizeTables();
00368 
00372 
00373   // Displays Colpartitions marked as table row. Overlays them on top of
00374   // part_grid_.
00375   void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols,
00376                           ScrollView::Color color);
00377 
00378   // Displays the colpartitions using a new coloring on an existing window.
00379   // Note: This method is only for debug purpose during development and
00380   // would not be part of checked in code
00381   void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid,
00382                             ScrollView::Color text_color,
00383                             ScrollView::Color table_color);
00384   void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid,
00385                             ScrollView::Color default_color);
00386   void DisplayColPartitionConnections(ScrollView* win,
00387                                       ColPartitionGrid* grid,
00388                                       ScrollView::Color default_color);
00389   void DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid,
00390                              ScrollView::Color color);
00391 
00392   // Write ColParitions and Tables to a PIX image
00393   // Note: This method is only for debug purpose during development and
00394   // would not be part of checked in code
00395   void WriteToPix(const FCOORD& reskew);
00396 
00397   // Merge all colpartitions in table regions to make them a single
00398   // colpartition and revert types of isolated table cells not
00399   // assigned to any table to their original types.
00400   void MakeTableBlocks(ColPartitionGrid* grid,
00401                        ColPartitionSet** columns,
00402                        WidthCallback* width_cb);
00403 
00405   // Useful objects used during table find process.
00407   // Resolution of the connected components in ppi.
00408   int resolution_;
00409   // Estimate of median x-height over the page
00410   int global_median_xheight_;
00411   // Estimate of the median blob width on the page
00412   int global_median_blob_width_;
00413   // Estimate of median leading on the page
00414   int global_median_ledding_;
00415   // Grid to hold cleaned colpartitions after removing all
00416   // colpartitions that consist of only noise blobs, and removing
00417   // noise blobs from remaining colpartitions.
00418   ColPartitionGrid clean_part_grid_;
00419   // Grid contains the leaders and ruling lines.
00420   ColPartitionGrid leader_and_ruling_grid_;
00421   // Grid contains the broken down column partitions. It can be thought
00422   // of as a "word" grid. However, it usually doesn't break apart text lines.
00423   // It does break apart table data (most of the time).
00424   ColPartitionGrid fragmented_text_grid_;
00425   // Grid of page column blocks
00426   ColSegmentGrid col_seg_grid_;
00427   // Grid of detected tables
00428   ColSegmentGrid table_grid_;
00429   // The reading order of text. Defaults to true, for languages such as English.
00430   bool left_to_right_language_;
00431 };
00432 
00433 }  // namespace tesseract.
00434 
00435 #endif  // TESSERACT_TEXTORD_TABLEFIND_H__