Tesseract
3.02
|
00001 00002 // File: tablefind.h 00003 // Description: Helper classes to find tables from ColPartitions. 00004 // Author: Faisal Shafait (faisal.shafait@dfki.de) 00005 // Created: Tue Jan 06 11:13:01 PST 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_TABLEFIND_H__ 00021 #define TESSERACT_TEXTORD_TABLEFIND_H__ 00022 00023 #include "colpartitiongrid.h" 00024 #include "elst.h" 00025 #include "rect.h" 00026 00027 namespace tesseract { 00028 00029 // Possible types for a column segment. 00030 enum ColSegType { 00031 COL_UNKNOWN, 00032 COL_TEXT, 00033 COL_TABLE, 00034 COL_MIXED, 00035 COL_COUNT 00036 }; 00037 00038 class ColPartitionSet; 00039 00040 // ColSegment holds rectangular blocks that represent segmentation of a page 00041 // into regions containing single column text/table. 00042 class ColSegment; 00043 ELISTIZEH(ColSegment) 00044 CLISTIZEH(ColSegment) 00045 00046 class ColSegment : public ELIST_LINK { 00047 public: 00048 ColSegment(); 00049 ~ColSegment(); 00050 00051 // Simple accessors and mutators 00052 const TBOX& bounding_box() const { 00053 return bounding_box_; 00054 } 00055 00056 void set_top(int y) { 00057 bounding_box_.set_top(y); 00058 } 00059 00060 void set_bottom(int y) { 00061 bounding_box_.set_bottom(y); 00062 } 00063 00064 void set_left(int x) { 00065 bounding_box_.set_left(x); 00066 } 00067 00068 void set_right(int x) { 00069 bounding_box_.set_right(x); 00070 } 00071 00072 void set_bounding_box(const TBOX& other) { 00073 bounding_box_ = other; 00074 } 00075 00076 int get_num_table_cells() const { 00077 return num_table_cells_; 00078 } 00079 00080 // set the number of table colpartitions covered by the bounding_box_ 00081 void set_num_table_cells(int n) { 00082 num_table_cells_ = n; 00083 } 00084 00085 int get_num_text_cells() const { 00086 return num_text_cells_; 00087 } 00088 00089 // set the number of text colpartitions covered by the bounding_box_ 00090 void set_num_text_cells(int n) { 00091 num_text_cells_ = n; 00092 } 00093 00094 ColSegType type() const { 00095 return type_; 00096 } 00097 00098 // set the type of the block based on the ratio of table to text 00099 // colpartitions covered by it. 00100 void set_type(); 00101 00102 // Provides a color for BBGrid to draw the rectangle. 00103 ScrollView::Color BoxColor() const; 00104 00105 // Insert a rectangle into bounding_box_ 00106 void InsertBox(const TBOX& other); 00107 00108 private: 00109 TBOX bounding_box_; // bounding box 00110 int num_table_cells_; 00111 int num_text_cells_; 00112 ColSegType type_; 00113 }; 00114 00115 // Typedef BBGrid of ColSegments 00116 typedef BBGrid<ColSegment, 00117 ColSegment_CLIST, 00118 ColSegment_C_IT> ColSegmentGrid; 00119 typedef GridSearch<ColSegment, 00120 ColSegment_CLIST, 00121 ColSegment_C_IT> ColSegmentGridSearch; 00122 00123 // TableFinder is a utility class to find a set of tables given a set of 00124 // ColPartitions and Columns. The TableFinder will mark candidate ColPartitions 00125 // based on research in "Table Detection in Heterogeneous Documents". 00126 // Usage flow is as follows: 00127 // TableFinder finder; 00128 // finder.InsertCleanPartitions(/* grid info */) 00129 // finder.LocateTables(/* ColPartitions and Columns */); 00130 // finder.Update TODO(nbeato) 00131 class TableFinder { 00132 public: 00133 // Constructor is simple initializations 00134 TableFinder(); 00135 ~TableFinder(); 00136 00137 // Set the resolution of the connected components in ppi. 00138 void set_resolution(int resolution) { 00139 resolution_ = resolution; 00140 } 00141 // Change the reading order. Initially it is left to right. 00142 void set_left_to_right_language(bool order); 00143 00144 // Initialize 00145 void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right); 00146 00147 // Copy cleaned partitions from ColumnFinder's part_grid_ to this 00148 // clean_part_grid_ and insert dot-like noise into period_grid_. 00149 // It resizes the grids in this object to the dimensions of grid. 00150 void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block); 00151 00152 // High level function to perform table detection 00153 // Finds tables and updates the grid object with new partitions for the 00154 // tables. The columns and width callbacks are used to merge tables. 00155 // The reskew argument is only used to write the tables to the out.png 00156 // if that feature is enabled. 00157 void LocateTables(ColPartitionGrid* grid, 00158 ColPartitionSet** columns, 00159 WidthCallback* width_cb, 00160 const FCOORD& reskew); 00161 00162 protected: 00163 // Access for the grid dimensions. 00164 // The results will not be correct until InsertCleanPartitions 00165 // has been called. The values are taken from the grid passed as an argument 00166 // to that function. 00167 int gridsize() const; 00168 int gridwidth() const; 00169 int gridheight() const; 00170 const ICOORD& bleft() const; 00171 const ICOORD& tright() const; 00172 00173 // Makes a window for debugging, see BBGrid 00174 ScrollView* MakeWindow(int x, int y, const char* window_name); 00175 00178 // Inserts text into the table finder. 00179 void InsertTextPartition(ColPartition* part); 00180 void InsertFragmentedTextPartition(ColPartition* part); 00181 void InsertLeaderPartition(ColPartition* part); 00182 void InsertRulingPartition(ColPartition* part); 00183 void InsertImagePartition(ColPartition* part); 00184 void SplitAndInsertFragmentedTextPartition(ColPartition* part); 00185 bool AllowTextPartition(const ColPartition& part) const; 00186 bool AllowBlob(const BLOBNBOX& blob) const; 00187 00191 00192 // Utility function to move segments to col_seg_grid 00193 // Note: Move includes ownership, 00194 // so segments will be be owned by col_seg_grid 00195 void MoveColSegmentsToGrid(ColSegment_LIST* segments, 00196 ColSegmentGrid* col_seg_grid); 00197 00201 00202 // Initialize the grid and partitions 00203 void InitializePartitions(ColPartitionSet** all_columns); 00204 00205 // Set left, right and top, bottom spacings of each colpartition. 00206 // Left/right spacings are w.r.t the column boundaries 00207 // Top/bottom spacings are w.r.t. previous and next colpartitions 00208 static void SetPartitionSpacings(ColPartitionGrid* grid, 00209 ColPartitionSet** all_columns); 00210 00211 // Set spacing and closest neighbors above and below a given colpartition. 00212 void SetVerticalSpacing(ColPartition* part); 00213 00214 // Set global spacing estimates. This function is dependent on the 00215 // partition spacings. So make sure SetPartitionSpacings is called 00216 // on the same grid before this. 00217 void SetGlobalSpacings(ColPartitionGrid* grid); 00218 // Access to the global median xheight. The xheight is the height 00219 // of a lowercase 'x' character on the page. This can be viewed as the 00220 // average height of a lowercase letter in a textline. As a result 00221 // it is used to make assumptions about spacing between words and 00222 // table cells. 00223 void set_global_median_xheight(int xheight); 00224 // Access to the global median blob width. The width is useful 00225 // when deciding if a partition is noise. 00226 void set_global_median_blob_width(int width); 00227 // Access to the global median ledding. The ledding is the distance between 00228 // two adjacent text lines. This value can be used to get a rough estimate 00229 // for the amount of space between two lines of text. As a result, it 00230 // is used to calculate appropriate spacing between adjacent rows of text. 00231 void set_global_median_ledding(int ledding); 00232 00233 // Updates the nearest neighbors for each ColPartition in clean_part_grid_. 00234 // The neighbors are most likely SingletonPartner calls after the neighbors 00235 // are assigned. This is hear until it is decided to remove the 00236 // nearest_neighbor code in ColPartition 00237 void FindNeighbors(); 00238 00243 00244 // High level function to mark partitions as table rows/cells. 00245 // When this function is done, the column partitions in clean_part_grid_ 00246 // should mostly be marked as tables. 00247 void MarkTablePartitions(); 00248 // Marks partitions given a local view of a single partition 00249 void MarkPartitionsUsingLocalInformation(); 00251 // Check if the partition has at least one large gap between words or no 00252 // significant gap at all 00253 // TODO(nbeato): Make const, prevented because blobnbox array access 00254 bool HasWideOrNoInterWordGap(ColPartition* part) const; 00255 // Checks if a partition is adjacent to leaders on the page 00256 bool HasLeaderAdjacent(const ColPartition& part); 00257 // Filter individual text partitions marked as table partitions 00258 // consisting of paragraph endings, small section headings, and 00259 // headers and footers. 00260 void FilterFalseAlarms(); 00261 void FilterParagraphEndings(); 00262 void FilterHeaderAndFooter(); 00263 // Mark all ColPartitions as table cells that have a table cell above 00264 // and below them 00265 void SmoothTablePartitionRuns(); 00266 00279 00280 // Get Column segments from best_columns_ 00281 void GetColumnBlocks(ColPartitionSet** columns, 00282 ColSegment_LIST *col_segments); 00283 00284 // Group Column segments into consecutive single column regions. 00285 void GroupColumnBlocks(ColSegment_LIST *current_segments, 00286 ColSegment_LIST *col_segments); 00287 00288 // Check if two boxes are consecutive within the same column 00289 bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); 00290 00291 // Set the ratio of candidate table partitions in each column 00292 void SetColumnsType(ColSegment_LIST* col_segments); 00293 00294 // Merge Column Blocks that were split due to the presence of a table 00295 void GridMergeColumnBlocks(); 00296 00302 00303 // Merge partititons cells into table columns 00304 // Differs from paper by just looking at marked table partitions 00305 // instead of similarity metric. 00306 // Modified section 4.1 of paper. 00307 void GetTableColumns(ColSegment_LIST *table_columns); 00308 00309 // Finds regions within a column that potentially contain a table. 00310 // Ie, the table columns from GetTableColumns are turned into boxes 00311 // that span the entire page column (using ColumnBlocks found in 00312 // earlier functions) in the x direction and the min/max extent of 00313 // overlapping table columns in the y direction. 00314 // Section 4.2 of paper. 00315 void GetTableRegions(ColSegment_LIST *table_columns, 00316 ColSegment_LIST *table_regions); 00317 00318 00321 00322 // Merge table regions corresponding to tables spanning multiple columns 00323 void GridMergeTableRegions(); 00324 bool BelongToOneTable(const TBOX &box1, const TBOX &box2); 00325 00326 // Adjust table boundaries by building a tight bounding box around all 00327 // ColPartitions contained in it. 00328 void AdjustTableBoundaries(); 00329 00330 // Grows a table to include partitions that are partially covered 00331 // by the table. This includes lines and text. It does not include 00332 // noise or images. 00333 // On entry, result_box is the minimum size of the result. The results of the 00334 // function will union the actual result with result_box. 00335 void GrowTableBox(const TBOX& table_box, TBOX* result_box); 00336 // Grow a table by increasing the size of the box to include 00337 // partitions with significant overlap with the table. 00338 void GrowTableToIncludePartials(const TBOX& table_box, 00339 const TBOX& search_range, 00340 TBOX* result_box); 00341 // Grow a table by expanding to the extents of significantly 00342 // overlapping lines. 00343 void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range, 00344 TBOX* result_box); 00345 // Checks whether the horizontal line belong to the table by looking at the 00346 // side spacing of extra ColParitions that will be included in the table 00347 // due to expansion 00348 bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box); 00349 00350 // Look for isolated column headers above the given table box and 00351 // include them in the table 00352 void IncludeLeftOutColumnHeaders(TBOX* table_box); 00353 00354 // Remove false alarms consiting of a single column 00355 void DeleteSingleColumnTables(); 00356 00357 // Return true if at least one gap larger than the global x-height 00358 // exists in the horizontal projection 00359 bool GapInXProjection(int* xprojection, int length); 00360 00363 // This function will run the table recognizer and try to find better 00364 // bounding boxes. The structures of the tables never leave this function 00365 // right now. It just tries to prune and merge tables based on info it 00366 // has available. 00367 void RecognizeTables(); 00368 00372 00373 // Displays Colpartitions marked as table row. Overlays them on top of 00374 // part_grid_. 00375 void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols, 00376 ScrollView::Color color); 00377 00378 // Displays the colpartitions using a new coloring on an existing window. 00379 // Note: This method is only for debug purpose during development and 00380 // would not be part of checked in code 00381 void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, 00382 ScrollView::Color text_color, 00383 ScrollView::Color table_color); 00384 void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, 00385 ScrollView::Color default_color); 00386 void DisplayColPartitionConnections(ScrollView* win, 00387 ColPartitionGrid* grid, 00388 ScrollView::Color default_color); 00389 void DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, 00390 ScrollView::Color color); 00391 00392 // Write ColParitions and Tables to a PIX image 00393 // Note: This method is only for debug purpose during development and 00394 // would not be part of checked in code 00395 void WriteToPix(const FCOORD& reskew); 00396 00397 // Merge all colpartitions in table regions to make them a single 00398 // colpartition and revert types of isolated table cells not 00399 // assigned to any table to their original types. 00400 void MakeTableBlocks(ColPartitionGrid* grid, 00401 ColPartitionSet** columns, 00402 WidthCallback* width_cb); 00403 00405 // Useful objects used during table find process. 00407 // Resolution of the connected components in ppi. 00408 int resolution_; 00409 // Estimate of median x-height over the page 00410 int global_median_xheight_; 00411 // Estimate of the median blob width on the page 00412 int global_median_blob_width_; 00413 // Estimate of median leading on the page 00414 int global_median_ledding_; 00415 // Grid to hold cleaned colpartitions after removing all 00416 // colpartitions that consist of only noise blobs, and removing 00417 // noise blobs from remaining colpartitions. 00418 ColPartitionGrid clean_part_grid_; 00419 // Grid contains the leaders and ruling lines. 00420 ColPartitionGrid leader_and_ruling_grid_; 00421 // Grid contains the broken down column partitions. It can be thought 00422 // of as a "word" grid. However, it usually doesn't break apart text lines. 00423 // It does break apart table data (most of the time). 00424 ColPartitionGrid fragmented_text_grid_; 00425 // Grid of page column blocks 00426 ColSegmentGrid col_seg_grid_; 00427 // Grid of detected tables 00428 ColSegmentGrid table_grid_; 00429 // The reading order of text. Defaults to true, for languages such as English. 00430 bool left_to_right_language_; 00431 }; 00432 00433 } // namespace tesseract. 00434 00435 #endif // TESSERACT_TEXTORD_TABLEFIND_H__