Tesseract
3.02
|
00001 00002 // File: colpartitionrid.h 00003 // Description: Class collecting code that acts on a BBGrid of ColPartitions. 00004 // Author: Ray Smith 00005 // Created: Mon Oct 05 08:42:01 PDT 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H__ 00021 #define TESSERACT_TEXTORD_COLPARTITIONGRID_H__ 00022 00023 #include "bbgrid.h" 00024 #include "colpartition.h" 00025 #include "colpartitionset.h" 00026 00027 namespace tesseract { 00028 00029 class TabFind; 00030 00031 // ColPartitionGrid is a BBGrid of ColPartition. 00032 // It collects functions that work on the grid. 00033 class ColPartitionGrid : public BBGrid<ColPartition, 00034 ColPartition_CLIST, 00035 ColPartition_C_IT> { 00036 public: 00037 ColPartitionGrid(); 00038 ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); 00039 00040 virtual ~ColPartitionGrid(); 00041 00042 // Handles a click event in a display window. 00043 void HandleClick(int x, int y); 00044 00045 // Merges ColPartitions in the grid that look like they belong in the same 00046 // textline. 00047 // For all partitions in the grid, calls the box_cb permanent callback 00048 // to compute the search box, seaches the box, and if a candidate is found, 00049 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00050 // true, then the partitions are merged. 00051 // Both callbacks are deleted before returning. 00052 void Merges(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00053 TessResultCallback2<bool, const ColPartition*, 00054 const ColPartition*>* confirm_cb); 00055 00056 // For the given partition, calls the box_cb permanent callback 00057 // to compute the search box, searches the box, and if a candidate is found, 00058 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00059 // true, then the partitions are merged. 00060 // Returns true if the partition is consumed by one or more merges. 00061 bool MergePart(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00062 TessResultCallback2<bool, const ColPartition*, 00063 const ColPartition*>* confirm_cb, 00064 ColPartition* part); 00065 00066 // Finds all the ColPartitions in the grid that overlap with the given 00067 // box and returns them SortByBoxLeft(ed) and uniqued in the given list. 00068 // Any partition equal to not_this (may be NULL) is excluded. 00069 void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this, 00070 ColPartition_CLIST* parts); 00071 00072 // Finds and returns the best candidate ColPartition to merge with part, 00073 // selected from the candidates list, based on the minimum increase in 00074 // pairwise overlap among all the partitions overlapped by the combined box. 00075 // If overlap_increase is not NULL then it returns the increase in overlap 00076 // that would result from the merge. 00077 // See colpartitiongrid.cpp for a diagram. 00078 ColPartition* BestMergeCandidate( 00079 const ColPartition* part, ColPartition_CLIST* candidates, bool debug, 00080 TessResultCallback2<bool, const ColPartition*, 00081 const ColPartition*>* confirm_cb, 00082 int* overlap_increase); 00083 00084 // Split partitions where it reduces overlap between their bounding boxes. 00085 // ColPartitions are after all supposed to be a partitioning of the blobs 00086 // AND of the space on the page! 00087 // Blobs that cause overlaps get removed, put in individual partitions 00088 // and added to the big_parts list. They are most likely characters on 00089 // 2 textlines that touch, or something big like a dropcap. 00090 void SplitOverlappingPartitions(ColPartition_LIST* big_parts); 00091 00092 // Filters partitions of source_type by looking at local neighbours. 00093 // Where a majority of neighbours have a text type, the partitions are 00094 // changed to text, where the neighbours have image type, they are changed 00095 // to image, and partitions that have no definite neighbourhood type are 00096 // left unchanged. 00097 // im_box and rerotation are used to map blob coordinates onto the 00098 // nontext_map, which is used to prevent the spread of text neighbourhoods 00099 // into images. 00100 // Returns true if anything was changed. 00101 bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map, 00102 const TBOX& im_box, const FCOORD& rerotation); 00103 00104 // Compute the mean RGB of the light and dark pixels in each ColPartition 00105 // and also the rms error in the linearity of color. 00106 void ComputePartitionColors(Pix* scaled_color, int scaled_factor, 00107 const FCOORD& rerotation); 00108 00109 // Reflects the grid and its colpartitions in the y-axis, assuming that 00110 // all blob boxes have already been done. 00111 void ReflectInYAxis(); 00112 00113 // Rotates the grid and its colpartitions by the given angle, assuming that 00114 // all blob boxes have already been done. 00115 void Deskew(const FCOORD& deskew); 00116 00117 // Sets the left and right tabs of the partitions in the grid. 00118 void SetTabStops(TabFind* tabgrid); 00119 00120 // Makes the ColPartSets and puts them in the PartSetVector ready 00121 // for finding column bounds. Returns false if no partitions were found. 00122 // Each ColPartition in the grid is placed in a single ColPartSet based 00123 // on the bottom-left of its bounding box. 00124 bool MakeColPartSets(PartSetVector* part_sets); 00125 00126 // Makes a single ColPartitionSet consisting of a single ColPartition that 00127 // represents the total horizontal extent of the significant content on the 00128 // page. Used for the single column setting in place of automatic detection. 00129 // Returns NULL if the page is empty of significant content. 00130 ColPartitionSet* MakeSingleColumnSet(WidthCallback* cb); 00131 00132 // Mark the BLOBNBOXes in each partition as being owned by that partition. 00133 void ClaimBoxes(); 00134 00135 // Retypes all the blobs referenced by the partitions in the grid. 00136 // Image blobs are sliced on the grid boundaries to give the tab finder 00137 // a better handle on the edges of the images, and the actual blobs are 00138 // returned in the im_blobs list, as they are not owned by the block. 00139 void ReTypeBlobs(BLOBNBOX_LIST* im_blobs); 00140 00141 // The boxes within the partitions have changed (by deskew) so recompute 00142 // the bounds of all the partitions and reinsert them into the grid. 00143 void RecomputeBounds(int gridsize, const ICOORD& bleft, 00144 const ICOORD& tright, const ICOORD& vertical); 00145 00146 // Improves the margins of the ColPartitions in the grid by calling 00147 // FindPartitionMargins on each. 00148 void GridFindMargins(ColPartitionSet** best_columns); 00149 00150 // Improves the margins of the ColPartitions in the list by calling 00151 // FindPartitionMargins on each. 00152 void ListFindMargins(ColPartitionSet** best_columns, 00153 ColPartition_LIST* parts); 00154 00155 // Deletes all the partitions in the grid after disowning all the blobs. 00156 void DeleteParts(); 00157 00158 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and 00159 // all the blobs in them. 00160 void DeleteUnknownParts(TO_BLOCK* block); 00161 00162 // Finds and marks text partitions that represent figure captions. 00163 void FindFigureCaptions(); 00164 00167 // For every ColPartition in the grid, finds its upper and lower neighbours. 00168 void FindPartitionPartners(); 00169 // Finds the best partner in the given direction for the given partition. 00170 // Stores the result with AddPartner. 00171 void FindPartitionPartners(bool upper, ColPartition* part); 00172 // Finds the best partner in the given direction for the given partition. 00173 // Stores the result with AddPartner. 00174 void FindVPartitionPartners(bool to_the_left, ColPartition* part); 00175 // For every ColPartition with multiple partners in the grid, reduces the 00176 // number of partners to 0 or 1. If get_desperate is true, goes to more 00177 // desperate merge methods to merge flowing text before breaking partnerships. 00178 void RefinePartitionPartners(bool get_desperate); 00179 00180 private: 00181 // Finds and returns a list of candidate ColPartitions to merge with part. 00182 // The candidates must overlap search_box, and when merged must not 00183 // overlap any other partitions that are not overlapped by each individually. 00184 void FindMergeCandidates(const ColPartition* part, const TBOX& search_box, 00185 bool debug, ColPartition_CLIST* candidates); 00186 00187 // Smoothes the region type/flow type of the given part by looking at local 00188 // neigbours and the given image mask. Searches a padded rectangle with the 00189 // padding truncated on one size of the part's box in turn for each side, 00190 // using the result (if any) that has the least distance to all neighbours 00191 // that contribute to the decision. This biases in favor of rectangular 00192 // regions without completely enforcing them. 00193 // If a good decision cannot be reached, the part is left unchanged. 00194 // im_box and rerotation are used to map blob coordinates onto the 00195 // nontext_map, which is used to prevent the spread of text neighbourhoods 00196 // into images. 00197 // Returns true if the partition was changed. 00198 bool SmoothRegionType(Pix* nontext_map, 00199 const TBOX& im_box, 00200 const FCOORD& rerotation, 00201 bool debug, 00202 ColPartition* part); 00203 // Executes the search for SmoothRegionType in a single direction. 00204 // Creates a bounding box that is padded in all directions except direction, 00205 // and searches it for other partitions. Finds the nearest collection of 00206 // partitions that makes a decisive result (if any) and returns the type 00207 // and the distance of the collection. If there are any pixels in the 00208 // nontext_map, then the decision is biased towards image. 00209 BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, 00210 Pix* nontext_map, 00211 const TBOX& im_box, 00212 const FCOORD& rerotation, 00213 bool debug, 00214 const ColPartition& part, 00215 int* best_distance); 00216 // Counts the partitions in the given search_box by appending the gap 00217 // distance (scaled by dist_scaling) of the part from the base_part to the 00218 // vector of the appropriate type for the partition. Prior to return, the 00219 // vectors in the dists array are sorted in increasing order. 00220 // dists must be an array of GenericVectors of size NPT_COUNT. 00221 void AccumulatePartDistances(const ColPartition& base_part, 00222 const ICOORD& dist_scaling, 00223 const TBOX& search_box, 00224 Pix* nontext_map, 00225 const TBOX& im_box, 00226 const FCOORD& rerotation, 00227 bool debug, 00228 GenericVector<int>* dists); 00229 00230 // Improves the margins of the ColPartition by searching for 00231 // neighbours that vertically overlap significantly. 00232 void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); 00233 00234 // Starting at x, and going in the specified direction, upto x_limit, finds 00235 // the margin for the given y range by searching sideways, 00236 // and ignoring not_this. 00237 int FindMargin(int x, bool right_to_left, int x_limit, 00238 int y_bottom, int y_top, const ColPartition* not_this); 00239 }; 00240 00241 } // namespace tesseract. 00242 00243 #endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H__