Tesseract
3.02
|
00001 00002 // File: equationdetect.h 00003 // Description: The equation detection class that inherits equationdetectbase. 00004 // Author: Zongyi (Joe) Liu (joeliu@google.com) 00005 // Created: Fri Aug 31 11:13:01 PST 2011 00006 // 00007 // (C) Copyright 2011, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H__ 00021 #define TESSERACT_CCMAIN_EQUATIONDETECT_H__ 00022 00023 #include "blobbox.h" 00024 #include "equationdetectbase.h" 00025 #include "genericvector.h" 00026 #include "unichar.h" 00027 00028 class BLOBNBOX; 00029 class BLOB_CHOICE; 00030 class BLOB_CHOICE_LIST; 00031 class TO_BLOCK_LIST; 00032 class TBOX; 00033 class UNICHARSET; 00034 00035 namespace tesseract { 00036 00037 class Tesseract; 00038 class ColPartition; 00039 class ColPartitionGrid; 00040 class ColPartitionSet; 00041 00042 class EquationDetect : public EquationDetectBase { 00043 public: 00044 EquationDetect(const char* equ_datapath, 00045 const char* equ_language); 00046 ~EquationDetect(); 00047 00048 enum IndentType { 00049 NO_INDENT, 00050 LEFT_INDENT, 00051 RIGHT_INDENT, 00052 BOTH_INDENT, 00053 INDENT_TYPE_COUNT 00054 }; 00055 00056 // Reset the lang_tesseract_ pointer. This function should be called before we 00057 // do any detector work. 00058 void SetLangTesseract(Tesseract* lang_tesseract); 00059 00060 // Iterate over the blobs inside to_block, and set the blobs that we want to 00061 // process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function 00062 // returns 0 upon success. 00063 int LabelSpecialText(TO_BLOCK* to_block); 00064 00065 // Find possible equation partitions from part_grid. Should be called 00066 // after the special_text_type of blobs are set. 00067 // It returns 0 upon success. 00068 int FindEquationParts(ColPartitionGrid* part_grid, 00069 ColPartitionSet** best_columns); 00070 00071 // Reset the resolution of the processing image. TEST only function. 00072 void SetResolution(const int resolution); 00073 00074 protected: 00075 // Identify the special text type for one blob, and update its field. When 00076 // height_th is set (> 0), we will label the blob as BSTT_NONE if its height 00077 // is less than height_th. 00078 void IdentifySpecialText(BLOBNBOX *blob, const int height_th); 00079 00080 // Estimate the type for one unichar. 00081 BlobSpecialTextType EstimateTypeForUnichar( 00082 const UNICHARSET& unicharset, const UNICHAR_ID id) const; 00083 00084 // Compute special text type for each blobs in part_grid_. 00085 void IdentifySpecialText(); 00086 00087 // Identify blobs that we want to skip during special blob type 00088 // classification. 00089 void IdentifyBlobsToSkip(ColPartition* part); 00090 00091 // The ColPartitions in part_grid_ maybe over-segmented, particularly in the 00092 // block equation regions. So we like to identify these partitions and merge 00093 // them before we do the searching. 00094 void MergePartsByLocation(); 00095 00096 // Staring from the seed center, we do radius search. And for partitions that 00097 // have large overlaps with seed, we remove them from part_grid_ and add into 00098 // parts_overlap. Note: this function may update the part_grid_, so if the 00099 // caller is also running ColPartitionGridSearch, use the RepositionIterator 00100 // to continue. 00101 void SearchByOverlap(ColPartition* seed, 00102 GenericVector<ColPartition*>* parts_overlap); 00103 00104 // Insert part back into part_grid_, after it absorbs some other parts. 00105 void InsertPartAfterAbsorb(ColPartition* part); 00106 00107 // Identify the colparitions in part_grid_, label them as PT_EQUATION, and 00108 // save them into cp_seeds_. 00109 void IdentifySeedParts(); 00110 00111 // Check the blobs count for a seed region candidate. 00112 bool CheckSeedBlobsCount(ColPartition* part); 00113 00114 // Compute the foreground pixel density for a tbox area. 00115 float ComputeForegroundDensity(const TBOX& tbox); 00116 00117 // Check if part from seed2 label: with low math density and left indented. We 00118 // are using two checks: 00119 // 1. If its left is aligned with any coordinates in indented_texts_left, 00120 // which we assume have been sorted. 00121 // 2. If its foreground density is over foreground_density_th. 00122 bool CheckForSeed2( 00123 const GenericVector<int>& indented_texts_left, 00124 const float foreground_density_th, 00125 ColPartition* part); 00126 00127 // Count the number of values in sorted_vec that is close to val, used to 00128 // check if a partition is aligned with text partitions. 00129 int CountAlignment( 00130 const GenericVector<int>& sorted_vec, const int val) const; 00131 00132 // Check for a seed candidate using the foreground pixel density. And we 00133 // return true if the density is below a certain threshold, because characters 00134 // in equation regions usually are apart with more white spaces. 00135 bool CheckSeedFgDensity(const float density_th, ColPartition* part); 00136 00137 // A light version of SplitCPHor: instead of really doing the part split, we 00138 // simply compute the union bounding box of each splitted part. 00139 void SplitCPHorLite(ColPartition* part, GenericVector<TBOX>* splitted_boxes); 00140 00141 // Split the part (horizontally), and save the splitted result into 00142 // parts_splitted. Note that it is caller's responsibility to release the 00143 // memory owns by parts_splitted. On the other hand, the part is unchanged 00144 // during this process and still owns the blobs, so do NOT call DeleteBoxes 00145 // when freeing the colpartitions in parts_splitted. 00146 void SplitCPHor(ColPartition* part, 00147 GenericVector<ColPartition*>* parts_splitted); 00148 00149 // Check the density for a seed candidate (part) using its math density and 00150 // italic density, returns true if the check passed. 00151 bool CheckSeedDensity(const float math_density_high, 00152 const float math_density_low, 00153 const ColPartition* part) const; 00154 00155 // Check if part is indented. 00156 IndentType IsIndented(ColPartition* part); 00157 00158 // Identify inline partitions from cp_seeds_, and re-label them. 00159 void IdentifyInlineParts(); 00160 00161 // Comute the super bounding box for all colpartitions inside part_grid_. 00162 void ComputeCPsSuperBBox(); 00163 00164 // Identify inline partitions from cp_seeds_ using the horizontal search. 00165 void IdentifyInlinePartsHorizontal(); 00166 00167 // Estimate the line spacing between two text partitions. Returns -1 if not 00168 // enough data. 00169 int EstimateTextPartLineSpacing(); 00170 00171 // Identify inline partitions from cp_seeds_ using vertical search. 00172 void IdentifyInlinePartsVertical(const bool top_to_bottom, 00173 const int textPartsLineSpacing); 00174 00175 // Check if part is an inline equation zone. This should be called after we 00176 // identified the seed regions. 00177 bool IsInline(const bool search_bottom, 00178 const int textPartsLineSpacing, 00179 ColPartition* part); 00180 00181 // For a given seed partition, we search the part_grid_ and see if there is 00182 // any partition can be merged with it. It returns true if the seed has been 00183 // expanded. 00184 bool ExpandSeed(ColPartition* seed); 00185 00186 // Starting from the seed position, we search the part_grid_ 00187 // horizontally/vertically, find all parititions that can be 00188 // merged with seed, remove them from part_grid_, and put them into 00189 // parts_to_merge. 00190 void ExpandSeedHorizontal(const bool search_left, 00191 ColPartition* seed, 00192 GenericVector<ColPartition*>* parts_to_merge); 00193 void ExpandSeedVertical(const bool search_bottom, 00194 ColPartition* seed, 00195 GenericVector<ColPartition*>* parts_to_merge); 00196 00197 // Check if a part_box is the small neighbor of seed_box. 00198 bool IsNearSmallNeighbor(const TBOX& seed_box, 00199 const TBOX& part_box) const; 00200 00201 // Perform the density check for part, which we assume is nearing a seed 00202 // partition. It returns true if the check passed. 00203 bool CheckSeedNeighborDensity(const ColPartition* part) const; 00204 00205 // After identify the math blocks, we do one more scanning on all text 00206 // partitions, and check if any of them is the satellite of: 00207 // math blocks: here a p is the satellite of q if: 00208 // 1. q is the nearest vertical neighbor of p, and 00209 // 2. y_gap(p, q) is less than a threshold, and 00210 // 3. x_overlap(p, q) is over a threshold. 00211 // Note that p can be the satellites of two blocks: its top neighbor and 00212 // bottom neighbor. 00213 void ProcessMathBlockSatelliteParts(); 00214 00215 // Check if part is the satellite of one/two math blocks. If it is, we return 00216 // true, and save the blocks into math_blocks. 00217 bool IsMathBlockSatellite( 00218 ColPartition* part, GenericVector<ColPartition*>* math_blocks); 00219 00220 // Search the nearest neighbor of part in one vertical direction as defined in 00221 // search_bottom. It returns the neighbor found that major x overlap with it, 00222 // or NULL when not found. 00223 ColPartition* SearchNNVertical(const bool search_bottom, 00224 const ColPartition* part); 00225 00226 // Check if the neighbor with vertical distance of y_gap is a near and math 00227 // block partition. 00228 bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const; 00229 00230 // Generate the tiff file name for output/debug file. 00231 void GetOutputTiffName(const char* name, STRING* image_name) const; 00232 00233 // Debugger function that renders ColPartitions on the input image, where: 00234 // parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION 00235 // will be painted in green, and other parts will be painted in blue. 00236 void PaintColParts(const STRING& outfile) const; 00237 00238 // Debugger function that renders the blobs in part_grid_ over the input 00239 // image. 00240 void PaintSpecialTexts(const STRING& outfile) const; 00241 00242 // Debugger function that print the math blobs density values for a 00243 // ColPartition object. 00244 void PrintSpecialBlobsDensity(const ColPartition* part) const; 00245 00246 // The tesseract engine intialized from equation training data. 00247 Tesseract* equ_tesseract_; 00248 00249 // The tesseract engine used for OCR. This pointer is passed in by the caller, 00250 // so do NOT destroy it in this class. 00251 Tesseract* lang_tesseract_; 00252 00253 // The ColPartitionGrid that we are processing. This pointer is passed in from 00254 // the caller, so do NOT destroy it in the class. 00255 ColPartitionGrid* part_grid_; 00256 00257 // A simple array of pointers to the best assigned column division at 00258 // each grid y coordinate. This pointer is passed in from the caller, so do 00259 // NOT destroy it in the class. 00260 ColPartitionSet** best_columns_; 00261 00262 // The super bounding box of all cps in the part_grid_. 00263 TBOX* cps_super_bbox_; 00264 00265 // The seed ColPartition for equation region. 00266 GenericVector<ColPartition*> cp_seeds_; 00267 00268 // The resolution (dpi) of the processing image. 00269 int resolution_; 00270 00271 // The number of pages we have processed. 00272 int page_count_; 00273 }; 00274 00275 } // namespace tesseract 00276 00277 #endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_