Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: cube_page_segmenter.h 00003 * Description: Declaration of the Cube Page Segmenter Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // TODO(ahmadab) 00021 // This is really a makeshift line segmenter that works well for Arabic 00022 // This should eventually be replaced by Ray Smith's Page segmenter 00023 // There are lots of magic numbers below that were determined empirically 00024 // but not thoroughly tested 00025 00026 #ifndef CUBE_LINE_SEGMENTER_H 00027 #define CUBE_LINE_SEGMENTER_H 00028 00029 #include "cube_reco_context.h" 00030 #include "allheaders.h" 00031 00032 namespace tesseract { 00033 00034 class CubeLineSegmenter { 00035 public: 00036 CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img); 00037 ~CubeLineSegmenter(); 00038 00039 // Accessor functions 00040 Pix *PostProcessedImage() { 00041 if (init_ == false && Init() == false) { 00042 return NULL; 00043 } 00044 return img_; 00045 } 00046 int ColumnCnt() { 00047 if (init_ == false && Init() == false) { 00048 return NULL; 00049 } 00050 return columns_->n; 00051 } 00052 Box *Column(int col) { 00053 if (init_ == false && Init() == false) { 00054 return NULL; 00055 } 00056 00057 return columns_->boxa->box[col]; 00058 } 00059 int LineCnt() { 00060 if (init_ == false && Init() == false) { 00061 return NULL; 00062 } 00063 00064 return line_cnt_; 00065 } 00066 Pixa *ConComps() { 00067 if (init_ == false && Init() == false) { 00068 return NULL; 00069 } 00070 00071 return con_comps_; 00072 } 00073 Pixaa *Columns() { 00074 if (init_ == false && Init() == false) { 00075 return NULL; 00076 } 00077 00078 return columns_; 00079 } 00080 inline double AlefHgtEst() { return est_alef_hgt_; } 00081 inline double DotHgtEst() { return est_dot_hgt_; } 00082 Pix *Line(int line, Box **line_box); 00083 00084 private: 00085 static const float kMinValidLineHgtRatio; 00086 static const int kLineSepMorphMinHgt; 00087 static const int kHgtBins; 00088 static const int kMaxConnCompHgt; 00089 static const int kMaxConnCompWid; 00090 static const int kMaxHorzAspectRatio; 00091 static const int kMaxVertAspectRatio; 00092 static const int kMinWid; 00093 static const int kMinHgt; 00094 static const double kMaxValidLineRatio; 00095 00096 // Cube Reco context 00097 CubeRecoContext *cntxt_; 00098 // Original image 00099 Pix *orig_img_; 00100 // Post processed image 00101 Pix *img_; 00102 // Init flag 00103 bool init_; 00104 // Output Line and column info 00105 int line_cnt_; 00106 Pixaa *columns_; 00107 Pixa *con_comps_; 00108 Pixa *lines_pixa_; 00109 // Estimates for sizes of ALEF and DOT needed for Arabic analysis 00110 double est_alef_hgt_; 00111 double est_dot_hgt_; 00112 00113 // Init the page analysis 00114 bool Init(); 00115 // Performs line segmentation 00116 bool LineSegment(); 00117 // Cleanup function 00118 Pix *CleanUp(Pix *pix); 00119 // compute validity ratio for a line 00120 double ValidityRatio(Pix *line_mask_pix, Box *line_box); 00121 // validate line 00122 bool ValidLine(Pix *line_mask_pix, Box *line_box); 00123 // split a line continuously until valid or fail 00124 Pixa *SplitLine(Pix *line_mask_pix, Box *line_box); 00125 // do a desperate attempt at cracking lines 00126 Pixa *CrackLine(Pix *line_mask_pix, Box *line_box); 00127 Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt); 00128 // Checks of a line is too small 00129 bool SmallLine(Box *line_box); 00130 // Compute the connected components in a line 00131 Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box, 00132 Pixa **con_comps_pixa); 00133 // create a union of two arbitrary pix 00134 Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box); 00135 // create a union of a pixa subset 00136 Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt); 00137 // create a union of a pixa 00138 Pix *Pixa2Pix(Pixa *pixa, Box **dest_box); 00139 // merges a number of lines into one line given a bounding box and a mask 00140 bool MergeLine(Pix *line_mask_pix, Box *line_box, 00141 Pixa *lines, Boxaa *lines_con_comps); 00142 // Creates new set of lines from the computed columns 00143 bool AddLines(Pixa *lines); 00144 // Estimate the parameters of the font(s) used in the page 00145 bool EstimateFontParams(); 00146 // perform a vertical Closing with the specified threshold 00147 // returning the resulting conn comps as a pixa 00148 Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa); 00149 // Index the specific pixa using RTL reading order 00150 int *IndexRTL(Pixa *pixa); 00151 // Implements a rudimentary page & line segmenter 00152 bool FindLines(); 00153 }; 00154 } 00155 00156 #endif // CUBE_LINE_SEGMENTER_H