Tesseract
3.02
|
00001 00002 // File: textord.cpp 00003 // Description: The top-level text line and word finding functionality. 00004 // Author: Ray Smith 00005 // Created: Fri Mar 13 14:43:01 PDT 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "drawtord.h" 00021 #include "textord.h" 00022 #include "makerow.h" 00023 #include "pageres.h" 00024 #include "tordmain.h" 00025 #include "wordseg.h" 00026 00027 namespace tesseract { 00028 00029 Textord::Textord(CCStruct* ccstruct) 00030 : ccstruct_(ccstruct), use_cjk_fp_model_(false), 00031 // makerow.cpp /////////////////////////////////////////// 00032 BOOL_MEMBER(textord_single_height_mode, false, 00033 "Script has no xheight, so use a single mode", 00034 ccstruct_->params()), 00035 // tospace.cpp /////////////////////////////////////////// 00036 BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", 00037 ccstruct_->params()), 00038 BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false, 00039 "Constrain relative values of inter and intra-word gaps for " 00040 "old_to_method.", 00041 ccstruct_->params()), 00042 BOOL_MEMBER(tosp_only_use_prop_rows, true, 00043 "Block stats to use fixed pitch rows?", 00044 ccstruct_->params()), 00045 BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, 00046 "Force word breaks on punct to break long lines in non-space " 00047 "delimited langs", 00048 ccstruct_->params()), 00049 BOOL_MEMBER(tosp_use_pre_chopping, false, 00050 "Space stats use prechopping?", 00051 ccstruct_->params()), 00052 BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", 00053 ccstruct_->params()), 00054 BOOL_MEMBER(tosp_block_use_cert_spaces, true, 00055 "Only stat OBVIOUS spaces", 00056 ccstruct_->params()), 00057 BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", 00058 ccstruct_->params()), 00059 BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, 00060 "Only stat OBVIOUS spaces", 00061 ccstruct_->params()), 00062 BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", 00063 ccstruct_->params()), 00064 BOOL_MEMBER(tosp_recovery_isolated_row_stats, true, 00065 "Use row alone when inadequate cert spaces", 00066 ccstruct_->params()), 00067 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", 00068 ccstruct_->params()), 00069 BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", 00070 ccstruct_->params()), 00071 BOOL_MEMBER(tosp_fuzzy_limit_all, true, 00072 "Dont restrict kn->sp fuzzy limit to tables", 00073 ccstruct_->params()), 00074 BOOL_MEMBER(tosp_stats_use_xht_gaps, true, 00075 "Use within xht gap for wd breaks", 00076 ccstruct_->params()), 00077 BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", 00078 ccstruct_->params()), 00079 BOOL_MEMBER(tosp_only_use_xht_gaps, false, 00080 "Only use within xht gap for wd breaks", 00081 ccstruct_->params()), 00082 BOOL_MEMBER(tosp_rule_9_test_punct, false, 00083 "Dont chng kn to space next to punct", 00084 ccstruct_->params()), 00085 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", 00086 ccstruct_->params()), 00087 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", 00088 ccstruct_->params()), 00089 BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", 00090 ccstruct_->params()), 00091 INT_MEMBER(tosp_debug_level, 0, "Debug data", 00092 ccstruct_->params()), 00093 INT_MEMBER(tosp_enough_space_samples_for_median, 3, 00094 "or should we use mean", 00095 ccstruct_->params()), 00096 INT_MEMBER(tosp_redo_kern_limit, 10, 00097 "No.samples reqd to reestimate for row", 00098 ccstruct_->params()), 00099 INT_MEMBER(tosp_few_samples, 40, 00100 "No.gaps reqd with 1 large gap to treat as a table", 00101 ccstruct_->params()), 00102 INT_MEMBER(tosp_short_row, 20, 00103 "No.gaps reqd with few cert spaces to use certs", 00104 ccstruct_->params()), 00105 INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", 00106 ccstruct_->params()), 00107 double_MEMBER(tosp_old_sp_kn_th_factor, 2.0, 00108 "Factor for defining space threshold in terms of space and " 00109 "kern sizes", 00110 ccstruct_->params()), 00111 double_MEMBER(tosp_threshold_bias1, 0, 00112 "how far between kern and space?", 00113 ccstruct_->params()), 00114 double_MEMBER(tosp_threshold_bias2, 0, 00115 "how far between kern and space?", 00116 ccstruct_->params()), 00117 double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", 00118 ccstruct_->params()), 00119 double_MEMBER(tosp_narrow_aspect_ratio, 0.48, 00120 "narrow if w/h less than this", 00121 ccstruct_->params()), 00122 double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", 00123 ccstruct_->params()), 00124 double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", 00125 ccstruct_->params()), 00126 double_MEMBER(tosp_fuzzy_space_factor, 0.6, 00127 "Fract of xheight for fuzz sp", 00128 ccstruct_->params()), 00129 double_MEMBER(tosp_fuzzy_space_factor1, 0.5, 00130 "Fract of xheight for fuzz sp", 00131 ccstruct_->params()), 00132 double_MEMBER(tosp_fuzzy_space_factor2, 0.72, 00133 "Fract of xheight for fuzz sp", 00134 ccstruct_->params()), 00135 double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", 00136 ccstruct_->params()), 00137 double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", 00138 ccstruct_->params()), 00139 double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", 00140 ccstruct_->params()), 00141 double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", 00142 ccstruct_->params()), 00143 double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", 00144 ccstruct_->params()), 00145 double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", 00146 ccstruct_->params()), 00147 double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", 00148 ccstruct_->params()), 00149 double_MEMBER(tosp_enough_small_gaps, 0.65, 00150 "Fract of kerns reqd for isolated row stats", 00151 ccstruct_->params()), 00152 double_MEMBER(tosp_table_kn_sp_ratio, 2.25, 00153 "Min difference of kn & sp in table", 00154 ccstruct_->params()), 00155 double_MEMBER(tosp_table_xht_sp_ratio, 0.33, 00156 "Expect spaces bigger than this", 00157 ccstruct_->params()), 00158 double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, 00159 "Fuzzy if less than this", 00160 ccstruct_->params()), 00161 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", 00162 ccstruct_->params()), 00163 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", 00164 ccstruct_->params()), 00165 double_MEMBER(tosp_min_sane_kn_sp, 1.5, 00166 "Dont trust spaces less than this time kn", 00167 ccstruct_->params()), 00168 double_MEMBER(tosp_init_guess_kn_mult, 2.2, 00169 "Thresh guess - mult kn by this", 00170 ccstruct_->params()), 00171 double_MEMBER(tosp_init_guess_xht_mult, 0.28, 00172 "Thresh guess - mult xht by this", 00173 ccstruct_->params()), 00174 double_MEMBER(tosp_max_sane_kn_thresh, 5.0, 00175 "Multiplier on kn to limit thresh", 00176 ccstruct_->params()), 00177 double_MEMBER(tosp_flip_caution, 0.0, 00178 "Dont autoflip kn to sp when large separation", 00179 ccstruct_->params()), 00180 double_MEMBER(tosp_large_kerning, 0.19, 00181 "Limit use of xht gap with large kns", 00182 ccstruct_->params()), 00183 double_MEMBER(tosp_dont_fool_with_small_kerns, -1, 00184 "Limit use of xht gap with odd small kns", 00185 ccstruct_->params()), 00186 double_MEMBER(tosp_near_lh_edge, 0, 00187 "Dont reduce box if the top left is non blank", 00188 ccstruct_->params()), 00189 double_MEMBER(tosp_silly_kn_sp_gap, 0.2, 00190 "Dont let sp minus kn get too small", 00191 ccstruct_->params()), 00192 double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, 00193 "How wide fuzzies need context", 00194 ccstruct_->params()), 00195 // tordmain.cpp /////////////////////////////////////////// 00196 BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", 00197 ccstruct_->params()), 00198 BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", 00199 ccstruct_->params()), 00200 BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", 00201 ccstruct_->params()), 00202 INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", 00203 ccstruct_->params()), 00204 double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs", 00205 ccstruct_->params()), 00206 double_MEMBER(textord_noise_area_ratio, 0.7, 00207 "Fraction of bounding box for noise", 00208 ccstruct_->params()), 00209 double_MEMBER(textord_blob_size_smallile, 20, 00210 "Percentile for small blobs", 00211 ccstruct_->params()), 00212 double_MEMBER(textord_initialx_ile, 0.75, 00213 "Ile of sizes for xheight guess", 00214 ccstruct_->params()), 00215 double_MEMBER(textord_initialasc_ile, 0.90, 00216 "Ile of sizes for xheight guess", 00217 ccstruct_->params()), 00218 INT_MEMBER(textord_noise_sizefraction, 10, 00219 "Fraction of size for maxima", 00220 ccstruct_->params()), 00221 double_MEMBER(textord_noise_sizelimit, 0.5, 00222 "Fraction of x for big t count", 00223 ccstruct_->params()), 00224 INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", 00225 ccstruct_->params()), 00226 double_MEMBER(textord_noise_normratio, 2.0, 00227 "Dot to norm ratio for deletion", 00228 ccstruct_->params()), 00229 BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", 00230 ccstruct_->params()), 00231 BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", 00232 ccstruct_->params()), 00233 double_MEMBER(textord_noise_syfract, 0.2, 00234 "xh fract height error for norm blobs", 00235 ccstruct_->params()), 00236 double_MEMBER(textord_noise_sxfract, 0.4, 00237 "xh fract width error for norm blobs", 00238 ccstruct_->params()), 00239 double_MEMBER(textord_noise_hfract, 1.0/64, 00240 "Height fraction to discard outlines as speckle noise", 00241 ccstruct_->params()), 00242 INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", 00243 ccstruct_->params()), 00244 double_MEMBER(textord_noise_rowratio, 6.0, 00245 "Dot to norm ratio for deletion", 00246 ccstruct_->params()), 00247 BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", 00248 ccstruct_->params()), 00249 double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", 00250 ccstruct_->params()), 00251 double_MEMBER(textord_blshift_xfraction, 9.99, 00252 "Min size of baseline shift", 00253 ccstruct_->params()) { 00254 } 00255 00256 Textord::~Textord() { 00257 } 00258 00259 // Make the textlines and words inside each block. 00260 void Textord::TextordPage(PageSegMode pageseg_mode, 00261 int width, int height, Pix* pix, 00262 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { 00263 page_tr_.set_x(width); 00264 page_tr_.set_y(height); 00265 if (to_blocks->empty()) { 00266 // AutoPageSeg was not used, so we need to find_components first. 00267 find_components(pix, blocks, to_blocks); 00268 } else { 00269 // AutoPageSeg does not need to find_components as it did that already. 00270 // Filter_blobs sets up the TO_BLOCKs the same as find_components does. 00271 filter_blobs(page_tr_, to_blocks, true); 00272 } 00273 00274 ASSERT_HOST(!to_blocks->empty()); 00275 if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) { 00276 const FCOORD anticlockwise90(0.0f, 1.0f); 00277 const FCOORD clockwise90(0.0f, -1.0f); 00278 TO_BLOCK_IT it(to_blocks); 00279 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00280 TO_BLOCK* to_block = it.data(); 00281 BLOCK* block = to_block->block; 00282 // Create a fake poly_block in block from its bounding box. 00283 block->set_poly_block(new POLY_BLOCK(block->bounding_box(), 00284 PT_VERTICAL_TEXT)); 00285 // Rotate the to_block along with its contained block and blobnbox lists. 00286 to_block->rotate(anticlockwise90); 00287 // Set the block's rotation values to obey the convention followed in 00288 // layout analysis for vertical text. 00289 block->set_re_rotation(clockwise90); 00290 block->set_classify_rotation(clockwise90); 00291 } 00292 } 00293 00294 TO_BLOCK_IT to_block_it(to_blocks); 00295 TO_BLOCK* to_block = to_block_it.data(); 00296 // Make the rows in the block. 00297 float gradient; 00298 // Do it the old fashioned way. 00299 if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { 00300 gradient = make_rows(page_tr_, to_blocks); 00301 } else { 00302 // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. 00303 gradient = make_single_row(page_tr_, to_block, to_blocks); 00304 } 00305 // Now fit baselines. For now only old mode is available. 00306 fit_rows(gradient, page_tr_, to_blocks); 00307 // Now make the words in the lines. 00308 if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { 00309 // SINGLE_LINE uses the old word maker on the single line. 00310 make_words(this, page_tr_, gradient, blocks, to_blocks); 00311 } else { 00312 // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a 00313 // single word, and in SINGLE_CHAR mode, all the outlines 00314 // go in a single blob. 00315 TO_BLOCK* to_block = to_block_it.data(); 00316 make_single_word(pageseg_mode == PSM_SINGLE_CHAR, 00317 to_block->get_rows(), to_block->block->row_list()); 00318 } 00319 cleanup_blocks(blocks); // Remove empties. 00320 00321 // Compute the margins for each row in the block, to be used later for 00322 // paragraph detection. 00323 BLOCK_IT b_it(blocks); 00324 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00325 b_it.data()->compute_row_margins(); 00326 } 00327 #ifndef GRAPHICS_DISABLED 00328 close_to_win(); 00329 #endif 00330 } 00331 00332 // If we were supposed to return only a single textline, and there is more 00333 // than one, clean up and leave only the best. 00334 void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, 00335 PAGE_RES* page_res) { 00336 if (PSM_LINE_FIND_ENABLED(pageseg_mode)) 00337 return; // No cleanup required. 00338 PAGE_RES_IT it(page_res); 00339 // Find the best row, being the greatest mean word conf. 00340 float row_total_conf = 0.0f; 00341 int row_word_count = 0; 00342 ROW_RES* best_row = NULL; 00343 float best_conf = 0.0f; 00344 for (it.restart_page(); it.word() != NULL; it.forward()) { 00345 WERD_RES* word = it.word(); 00346 row_total_conf += word->best_choice->certainty(); 00347 ++row_word_count; 00348 if (it.next_row() != it.row()) { 00349 row_total_conf /= row_word_count; 00350 if (best_row == NULL || best_conf < row_total_conf) { 00351 best_row = it.row(); 00352 best_conf = row_total_conf; 00353 } 00354 row_total_conf = 0.0f; 00355 row_word_count = 0; 00356 } 00357 } 00358 // Now eliminate any word not in the best row. 00359 for (it.restart_page(); it.word() != NULL; it.forward()) { 00360 if (it.row() != best_row) 00361 it.DeleteCurrentWord(); 00362 } 00363 } 00364 00365 } // namespace tesseract.