Tesseract
3.02
|
00001 00002 // File: textord.h 00003 // Description: The Textord class definition gathers text line and word 00004 // finding functionality. 00005 // Author: Ray Smith 00006 // Created: Fri Mar 13 14:29:01 PDT 2009 00007 // 00008 // (C) Copyright 2009, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_TEXTORD_TEXTORD_H__ 00022 #define TESSERACT_TEXTORD_TEXTORD_H__ 00023 00024 #include "ccstruct.h" 00025 #include "blobbox.h" 00026 #include "gap_map.h" 00027 #include "notdll.h" 00028 #include "publictypes.h" // For PageSegMode. 00029 00030 class FCOORD; 00031 class BLOCK_LIST; 00032 class PAGE_RES; 00033 class TO_BLOCK; 00034 class TO_BLOCK_LIST; 00035 class ScrollView; 00036 00037 namespace tesseract { 00038 00039 class Textord { 00040 public: 00041 explicit Textord(CCStruct* ccstruct); 00042 ~Textord(); 00043 00044 // Make the textlines and words inside each block. 00045 void TextordPage(PageSegMode pageseg_mode, 00046 int width, int height, Pix* pix, 00047 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00048 00049 // If we were supposed to return only a single textline, and there is more 00050 // than one, clean up and leave only the best. 00051 void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res); 00052 00053 bool use_cjk_fp_model() const { 00054 return use_cjk_fp_model_; 00055 } 00056 void set_use_cjk_fp_model(bool flag) { 00057 use_cjk_fp_model_ = flag; 00058 } 00059 00060 // tospace.cpp /////////////////////////////////////////// 00061 void to_spacing( 00062 ICOORD page_tr, //topright of page 00063 TO_BLOCK_LIST *blocks //blocks on page 00064 ); 00065 ROW *make_prop_words(TO_ROW *row, // row to make 00066 FCOORD rotation // for drawing 00067 ); 00068 ROW *make_blob_words(TO_ROW *row, // row to make 00069 FCOORD rotation // for drawing 00070 ); 00071 // tordmain.cpp /////////////////////////////////////////// 00072 void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); 00073 void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on); 00074 00075 private: 00076 // For underlying memory management and other utilities. 00077 CCStruct* ccstruct_; 00078 00079 // The size of the input image. 00080 ICOORD page_tr_; 00081 00082 bool use_cjk_fp_model_; 00083 00084 // makerow.cpp /////////////////////////////////////////// 00085 // Make the textlines inside each block. 00086 void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew, 00087 int width, int height, TO_BLOCK_LIST* to_blocks); 00088 // Make the textlines inside a single block. 00089 void MakeBlockRows(int min_spacing, int max_spacing, 00090 const FCOORD& skew, TO_BLOCK* block, 00091 ScrollView* win); 00092 00093 void fit_rows(float gradient, ICOORD page_tr, TO_BLOCK_LIST *blocks); 00094 void cleanup_rows_fitting(ICOORD page_tr, // top right 00095 TO_BLOCK *block, // block to do 00096 float gradient, // gradient to fit 00097 FCOORD rotation, // for drawing 00098 inT32 block_edge, // edge of block 00099 BOOL8 testing_on); // correct orientation 00100 void compute_block_xheight(TO_BLOCK *block, float gradient); 00101 void compute_row_xheight(TO_ROW *row, // row to do 00102 const FCOORD& rotation, 00103 float gradient, // global skew 00104 int block_line_size); 00105 void make_spline_rows(TO_BLOCK *block, // block to do 00106 float gradient, // gradient to fit 00107 FCOORD rotation, // for drawing 00108 inT32 block_edge, // edge of block 00109 BOOL8 testing_on); 00110 00112 void make_old_baselines(TO_BLOCK *block, // block to do 00113 BOOL8 testing_on, // correct orientation 00114 float gradient); 00115 void correlate_lines(TO_BLOCK *block, float gradient); 00116 void correlate_neighbours(TO_BLOCK *block, // block rows are in. 00117 TO_ROW **rows, // rows of block. 00118 int rowcount); // no of rows to do. 00119 int correlate_with_stats(TO_ROW **rows, // rows of block. 00120 int rowcount, // no of rows to do. 00121 TO_BLOCK* block); 00122 void find_textlines(TO_BLOCK *block, // block row is in 00123 TO_ROW *row, // row to do 00124 int degree, // required approximation 00125 QSPLINE *spline); // starting spline 00126 // tospace.cpp /////////////////////////////////////////// 00127 //DEBUG USE ONLY 00128 void block_spacing_stats(TO_BLOCK *block, 00129 GAPMAP *gapmap, 00130 BOOL8 &old_text_ord_proportional, 00131 //resulting estimate 00132 inT16 &block_space_gap_width, 00133 //resulting estimate 00134 inT16 &block_non_space_gap_width 00135 ); 00136 void row_spacing_stats(TO_ROW *row, 00137 GAPMAP *gapmap, 00138 inT16 block_idx, 00139 inT16 row_idx, 00140 //estimate for block 00141 inT16 block_space_gap_width, 00142 //estimate for block 00143 inT16 block_non_space_gap_width 00144 ); 00145 void old_to_method(TO_ROW *row, 00146 STATS *all_gap_stats, 00147 STATS *space_gap_stats, 00148 STATS *small_gap_stats, 00149 inT16 block_space_gap_width, 00150 //estimate for block 00151 inT16 block_non_space_gap_width 00152 ); 00153 BOOL8 isolated_row_stats(TO_ROW *row, 00154 GAPMAP *gapmap, 00155 STATS *all_gap_stats, 00156 BOOL8 suspected_table, 00157 inT16 block_idx, 00158 inT16 row_idx); 00159 inT16 stats_count_under(STATS *stats, inT16 threshold); 00160 void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); 00161 BOOL8 make_a_word_break(TO_ROW *row, // row being made 00162 TBOX blob_box, // for next_blob // how many blanks? 00163 inT16 prev_gap, 00164 TBOX prev_blob_box, 00165 inT16 real_current_gap, 00166 inT16 within_xht_current_gap, 00167 TBOX next_blob_box, 00168 inT16 next_gap, 00169 uinT8 &blanks, 00170 BOOL8 &fuzzy_sp, 00171 BOOL8 &fuzzy_non, 00172 BOOL8& prev_gap_was_a_space, 00173 BOOL8& break_at_next_gap); 00174 BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box); 00175 BOOL8 wide_blob(TO_ROW *row, TBOX blob_box); 00176 BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box); 00177 void peek_at_next_gap(TO_ROW *row, 00178 BLOBNBOX_IT box_it, 00179 TBOX &next_blob_box, 00180 inT16 &next_gap, 00181 inT16 &next_within_xht_gap); 00182 void mark_gap(TBOX blob, //blob following gap 00183 inT16 rule, // heuristic id 00184 inT16 prev_gap, 00185 inT16 prev_blob_width, 00186 inT16 current_gap, 00187 inT16 next_blob_width, 00188 inT16 next_gap); 00189 float find_mean_blob_spacing(WERD *word); 00190 BOOL8 ignore_big_gap(TO_ROW *row, 00191 inT32 row_length, 00192 GAPMAP *gapmap, 00193 inT16 left, 00194 inT16 right); 00195 //get bounding box 00196 TBOX reduced_box_next(TO_ROW *row, //current row 00197 BLOBNBOX_IT *it //iterator to blobds 00198 ); 00199 TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht); 00200 // tordmain.cpp /////////////////////////////////////////// 00201 float filter_noise_blobs(BLOBNBOX_LIST *src_list, 00202 BLOBNBOX_LIST *noise_list, 00203 BLOBNBOX_LIST *small_list, 00204 BLOBNBOX_LIST *large_list); 00205 void cleanup_blocks(BLOCK_LIST *blocks); 00206 BOOL8 clean_noise_from_row(ROW *row); 00207 void clean_noise_from_words(ROW *row); 00208 // Remove outlines that are a tiny fraction in either width or height 00209 // of the word height. 00210 void clean_small_noise_from_words(ROW *row); 00211 public: 00212 // makerow.cpp /////////////////////////////////////////// 00213 BOOL_VAR_H(textord_single_height_mode, false, 00214 "Script has no xheight, so use a single mode for horizontal text"); 00215 // tospace.cpp /////////////////////////////////////////// 00216 BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?"); 00217 BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false, 00218 "Constrain relative values of inter and intra-word gaps for " 00219 "old_to_method."); 00220 BOOL_VAR_H(tosp_only_use_prop_rows, true, 00221 "Block stats to use fixed pitch rows?"); 00222 BOOL_VAR_H(tosp_force_wordbreak_on_punct, false, 00223 "Force word breaks on punct to break long lines in non-space " 00224 "delimited langs"); 00225 BOOL_VAR_H(tosp_use_pre_chopping, false, 00226 "Space stats use prechopping?"); 00227 BOOL_VAR_H(tosp_old_to_bug_fix, false, 00228 "Fix suspected bug in old code"); 00229 BOOL_VAR_H(tosp_block_use_cert_spaces, true, 00230 "Only stat OBVIOUS spaces"); 00231 BOOL_VAR_H(tosp_row_use_cert_spaces, true, 00232 "Only stat OBVIOUS spaces"); 00233 BOOL_VAR_H(tosp_narrow_blobs_not_cert, true, 00234 "Only stat OBVIOUS spaces"); 00235 BOOL_VAR_H(tosp_row_use_cert_spaces1, true, 00236 "Only stat OBVIOUS spaces"); 00237 BOOL_VAR_H(tosp_recovery_isolated_row_stats, true, 00238 "Use row alone when inadequate cert spaces"); 00239 BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess"); 00240 BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?"); 00241 BOOL_VAR_H(tosp_fuzzy_limit_all, true, 00242 "Dont restrict kn->sp fuzzy limit to tables"); 00243 BOOL_VAR_H(tosp_stats_use_xht_gaps, true, 00244 "Use within xht gap for wd breaks"); 00245 BOOL_VAR_H(tosp_use_xht_gaps, true, 00246 "Use within xht gap for wd breaks"); 00247 BOOL_VAR_H(tosp_only_use_xht_gaps, false, 00248 "Only use within xht gap for wd breaks"); 00249 BOOL_VAR_H(tosp_rule_9_test_punct, false, 00250 "Dont chng kn to space next to punct"); 00251 BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip"); 00252 BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip"); 00253 BOOL_VAR_H(tosp_improve_thresh, false, 00254 "Enable improvement heuristic"); 00255 INT_VAR_H(tosp_debug_level, 0, "Debug data"); 00256 INT_VAR_H(tosp_enough_space_samples_for_median, 3, 00257 "or should we use mean"); 00258 INT_VAR_H(tosp_redo_kern_limit, 10, 00259 "No.samples reqd to reestimate for row"); 00260 INT_VAR_H(tosp_few_samples, 40, 00261 "No.gaps reqd with 1 large gap to treat as a table"); 00262 INT_VAR_H(tosp_short_row, 20, 00263 "No.gaps reqd with few cert spaces to use certs"); 00264 INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly"); 00265 double_VAR_H(tosp_old_sp_kn_th_factor, 2.0, 00266 "Factor for defining space threshold in terms of space and " 00267 "kern sizes"); 00268 double_VAR_H(tosp_threshold_bias1, 0, 00269 "how far between kern and space?"); 00270 double_VAR_H(tosp_threshold_bias2, 0, 00271 "how far between kern and space?"); 00272 double_VAR_H(tosp_narrow_fraction, 0.3, 00273 "Fract of xheight for narrow"); 00274 double_VAR_H(tosp_narrow_aspect_ratio, 0.48, 00275 "narrow if w/h less than this"); 00276 double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); 00277 double_VAR_H(tosp_wide_aspect_ratio, 0.0, 00278 "wide if w/h less than this"); 00279 double_VAR_H(tosp_fuzzy_space_factor, 0.6, 00280 "Fract of xheight for fuzz sp"); 00281 double_VAR_H(tosp_fuzzy_space_factor1, 0.5, 00282 "Fract of xheight for fuzz sp"); 00283 double_VAR_H(tosp_fuzzy_space_factor2, 0.72, 00284 "Fract of xheight for fuzz sp"); 00285 double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); 00286 double_VAR_H(tosp_kern_gap_factor1, 2.0, 00287 "gap ratio to flip kern->sp"); 00288 double_VAR_H(tosp_kern_gap_factor2, 1.3, 00289 "gap ratio to flip kern->sp"); 00290 double_VAR_H(tosp_kern_gap_factor3, 2.5, 00291 "gap ratio to flip kern->sp"); 00292 double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier"); 00293 double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); 00294 double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space"); 00295 double_VAR_H(tosp_enough_small_gaps, 0.65, 00296 "Fract of kerns reqd for isolated row stats"); 00297 double_VAR_H(tosp_table_kn_sp_ratio, 2.25, 00298 "Min difference of kn & sp in table"); 00299 double_VAR_H(tosp_table_xht_sp_ratio, 0.33, 00300 "Expect spaces bigger than this"); 00301 double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, 00302 "Fuzzy if less than this"); 00303 double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); 00304 double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); 00305 double_VAR_H(tosp_min_sane_kn_sp, 1.5, 00306 "Dont trust spaces less than this time kn"); 00307 double_VAR_H(tosp_init_guess_kn_mult, 2.2, 00308 "Thresh guess - mult kn by this"); 00309 double_VAR_H(tosp_init_guess_xht_mult, 0.28, 00310 "Thresh guess - mult xht by this"); 00311 double_VAR_H(tosp_max_sane_kn_thresh, 5.0, 00312 "Multiplier on kn to limit thresh"); 00313 double_VAR_H(tosp_flip_caution, 0.0, 00314 "Dont autoflip kn to sp when large separation"); 00315 double_VAR_H(tosp_large_kerning, 0.19, 00316 "Limit use of xht gap with large kns"); 00317 double_VAR_H(tosp_dont_fool_with_small_kerns, -1, 00318 "Limit use of xht gap with odd small kns"); 00319 double_VAR_H(tosp_near_lh_edge, 0, 00320 "Dont reduce box if the top left is non blank"); 00321 double_VAR_H(tosp_silly_kn_sp_gap, 0.2, 00322 "Dont let sp minus kn get too small"); 00323 double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75, 00324 "How wide fuzzies need context"); 00325 // tordmain.cpp /////////////////////////////////////////// 00326 BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs"); 00327 BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs"); 00328 BOOL_VAR_H(textord_show_boxes, false, "Display boxes"); 00329 INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise"); 00330 double_VAR_H(textord_blob_size_bigile, 95, "Percentile for large blobs"); 00331 double_VAR_H(textord_noise_area_ratio, 0.7, 00332 "Fraction of bounding box for noise"); 00333 double_VAR_H(textord_blob_size_smallile, 20, "Percentile for small blobs"); 00334 double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess"); 00335 double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess"); 00336 INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima"); 00337 double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count"); 00338 INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob"); 00339 double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion"); 00340 BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words"); 00341 BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows"); 00342 double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs"); 00343 double_VAR_H(textord_noise_sxfract, 0.4, 00344 "xh fract width error for norm blobs"); 00345 double_VAR_H(textord_noise_hfract, 1.0/64, 00346 "Height fraction to discard outlines as speckle noise"); 00347 INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row"); 00348 double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion"); 00349 BOOL_VAR_H(textord_noise_debug, FALSE, "Debug row garbage detector"); 00350 double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift"); 00351 double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift"); 00352 }; 00353 } // namespace tesseract. 00354 00355 #endif // TESSERACT_TEXTORD_TEXTORD_H__