Tesseract
3.02
|
00001 // Copyright 2008 Google Inc. All Rights Reserved. 00002 // Author: shobhitsaxena@google.com (Shobhit Saxena) 00003 00004 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 00005 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 00006 00007 #include "ocrblock.h" 00008 #include "params.h" 00009 00010 struct Pix; 00011 struct Box; 00012 struct Boxa; 00013 00014 extern 00015 INT_VAR_H(devanagari_split_debuglevel, 0, 00016 "Debug level for split shiro-rekha process."); 00017 00018 extern 00019 BOOL_VAR_H(devanagari_split_debugimage, 0, 00020 "Whether to create a debug image for split shiro-rekha process."); 00021 00022 class TBOX; 00023 class IMAGE; 00024 00025 namespace tesseract { 00026 00027 class PixelHistogram { 00028 public: 00029 PixelHistogram() { 00030 hist_ = NULL; 00031 length_ = 0; 00032 } 00033 00034 ~PixelHistogram() { 00035 Clear(); 00036 } 00037 00038 void Clear() { 00039 if (hist_) { 00040 delete[] hist_; 00041 } 00042 length_ = 0; 00043 } 00044 00045 int* const hist() const { 00046 return hist_; 00047 } 00048 00049 int length() const { 00050 return length_; 00051 } 00052 00053 // Methods to construct histograms from images. These clear any existing data. 00054 void ConstructVerticalCountHist(Pix* pix); 00055 void ConstructHorizontalCountHist(Pix* pix); 00056 00057 // This method returns the global-maxima for the histogram. The frequency of 00058 // the global maxima is returned in count, if specified. 00059 int GetHistogramMaximum(int* count) const; 00060 00061 private: 00062 int* hist_; 00063 int length_; 00064 }; 00065 00066 class ShiroRekhaSplitter { 00067 public: 00068 enum SplitStrategy { 00069 NO_SPLIT = 0, // No splitting is performed for the phase. 00070 MINIMAL_SPLIT, // Blobs are split minimally. 00071 MAXIMAL_SPLIT // Blobs are split maximally. 00072 }; 00073 00074 ShiroRekhaSplitter(); 00075 virtual ~ShiroRekhaSplitter(); 00076 00077 // Top-level method to perform splitting based on current settings. 00078 // Returns true if a split was actually performed. 00079 // If split_for_pageseg is true, the pageseg_split_strategy_ is used for 00080 // splitting. If false, the ocr_split_strategy_ is used. 00081 bool Split(bool split_for_pageseg); 00082 00083 // Clears the memory held by this object. 00084 void Clear(); 00085 00086 // Refreshes the words in the segmentation block list by using blobs in the 00087 // input blob list. 00088 // The segmentation block list must be set. 00089 void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs); 00090 00091 // Returns true if the split strategies for pageseg and ocr are different. 00092 bool HasDifferentSplitStrategies() const { 00093 return pageseg_split_strategy_ != ocr_split_strategy_; 00094 } 00095 00096 // This only keeps a copy of the block list pointer. At split call, the list 00097 // object should still be alive. This block list is used as a golden 00098 // segmentation when performing splitting. 00099 void set_segmentation_block_list(BLOCK_LIST* block_list) { 00100 segmentation_block_list_ = block_list; 00101 } 00102 00103 static const int kUnspecifiedXheight = -1; 00104 00105 void set_global_xheight(int xheight) { 00106 global_xheight_ = xheight; 00107 } 00108 00109 void set_perform_close(bool perform) { 00110 perform_close_ = perform; 00111 } 00112 00113 // Returns the image obtained from shiro-rekha splitting. The returned object 00114 // is owned by this class. Callers may want to clone the returned pix to keep 00115 // it alive beyond the life of ShiroRekhaSplitter object. 00116 Pix* splitted_image() { 00117 return splitted_image_; 00118 } 00119 00120 // On setting the input image, a clone of it is owned by this class. 00121 void set_orig_pix(Pix* pix); 00122 00123 // Returns the input image provided to the object. This object is owned by 00124 // this class. Callers may want to clone the returned pix to work with it. 00125 Pix* orig_pix() { 00126 return orig_pix_; 00127 } 00128 00129 SplitStrategy ocr_split_strategy() const { 00130 return ocr_split_strategy_; 00131 } 00132 00133 void set_ocr_split_strategy(SplitStrategy strategy) { 00134 ocr_split_strategy_ = strategy; 00135 } 00136 00137 SplitStrategy pageseg_split_strategy() const { 00138 return pageseg_split_strategy_; 00139 } 00140 00141 void set_pageseg_split_strategy(SplitStrategy strategy) { 00142 pageseg_split_strategy_ = strategy; 00143 } 00144 00145 BLOCK_LIST* segmentation_block_list() { 00146 return segmentation_block_list_; 00147 } 00148 00149 // This method dumps a debug image to the specified location. 00150 void DumpDebugImage(const char* filename) const; 00151 00152 // This method returns the computed mode-height of blobs in the pix. 00153 // It also prunes very small blobs from calculation. Could be used to provide 00154 // a global xheight estimate for images which have the same point-size text. 00155 static int GetModeHeight(Pix* pix); 00156 00157 private: 00158 // Method to perform a close operation on the input image. The xheight 00159 // estimate decides the size of sel used. 00160 static void PerformClose(Pix* pix, int xheight_estimate); 00161 00162 // This method resolves the cc bbox to a particular row and returns the row's 00163 // xheight. This uses block_list_ if available, else just returns the 00164 // global_xheight_ estimate currently set in the object. 00165 int GetXheightForCC(Box* cc_bbox); 00166 00167 // Returns a list of regions (boxes) which should be cleared in the original 00168 // image so as to perform shiro-rekha splitting. Pix is assumed to carry one 00169 // (or less) word only. Xheight measure could be the global estimate, the row 00170 // estimate, or unspecified. If unspecified, over splitting may occur, since a 00171 // conservative estimate of stroke width along with an associated multiplier 00172 // is used in its place. It is advisable to have a specified xheight when 00173 // splitting for classification/training. 00174 void SplitWordShiroRekha(SplitStrategy split_strategy, 00175 Pix* pix, 00176 int xheight, 00177 int word_left, 00178 int word_top, 00179 Boxa* regions_to_clear); 00180 00181 // Returns a new box object for the corresponding TBOX, based on the original 00182 // image's coordinate system. 00183 Box* GetBoxForTBOX(const TBOX& tbox) const; 00184 00185 // This method returns y-extents of the shiro-rekha computed from the input 00186 // word image. 00187 static void GetShiroRekhaYExtents(Pix* word_pix, 00188 int* shirorekha_top, 00189 int* shirorekha_bottom, 00190 int* shirorekha_ylevel); 00191 00192 Pix* orig_pix_; // Just a clone of the input image passed. 00193 Pix* splitted_image_; // Image produced after the last splitting round. The 00194 // object is owned by this class. 00195 SplitStrategy pageseg_split_strategy_; 00196 SplitStrategy ocr_split_strategy_; 00197 Pix* debug_image_; 00198 // This block list is used as a golden segmentation when performing splitting. 00199 BLOCK_LIST* segmentation_block_list_; 00200 int global_xheight_; 00201 bool perform_close_; // Whether a morphological close operation should be 00202 // performed before CCs are run through splitting. 00203 }; 00204 00205 } // namespace tesseract. 00206 00207 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_