Tesseract  3.02
tesseract-ocr/textord/textord.cpp
Go to the documentation of this file.
00001 
00002 // File:        textord.cpp
00003 // Description: The top-level text line and word finding functionality.
00004 // Author:      Ray Smith
00005 // Created:     Fri Mar 13 14:43:01 PDT 2009
00006 //
00007 // (C) Copyright 2009, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #include "drawtord.h"
00021 #include "textord.h"
00022 #include "makerow.h"
00023 #include "pageres.h"
00024 #include "tordmain.h"
00025 #include "wordseg.h"
00026 
00027 namespace tesseract {
00028 
00029 Textord::Textord(CCStruct* ccstruct)
00030     : ccstruct_(ccstruct), use_cjk_fp_model_(false),
00031       // makerow.cpp ///////////////////////////////////////////
00032       BOOL_MEMBER(textord_single_height_mode, false,
00033                   "Script has no xheight, so use a single mode",
00034                   ccstruct_->params()),
00035       // tospace.cpp ///////////////////////////////////////////
00036       BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
00037                   ccstruct_->params()),
00038       BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
00039                   "Constrain relative values of inter and intra-word gaps for "
00040                   "old_to_method.",
00041                   ccstruct_->params()),
00042       BOOL_MEMBER(tosp_only_use_prop_rows, true,
00043                   "Block stats to use fixed pitch rows?",
00044                   ccstruct_->params()),
00045       BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
00046                   "Force word breaks on punct to break long lines in non-space "
00047                   "delimited langs",
00048                   ccstruct_->params()),
00049       BOOL_MEMBER(tosp_use_pre_chopping, false,
00050                   "Space stats use prechopping?",
00051                   ccstruct_->params()),
00052       BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
00053                   ccstruct_->params()),
00054       BOOL_MEMBER(tosp_block_use_cert_spaces, true,
00055                   "Only stat OBVIOUS spaces",
00056                   ccstruct_->params()),
00057       BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
00058                   ccstruct_->params()),
00059       BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
00060             "Only stat OBVIOUS spaces",
00061                   ccstruct_->params()),
00062       BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
00063                   ccstruct_->params()),
00064       BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
00065                   "Use row alone when inadequate cert spaces",
00066                   ccstruct_->params()),
00067       BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
00068                   ccstruct_->params()),
00069       BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
00070                   ccstruct_->params()),
00071       BOOL_MEMBER(tosp_fuzzy_limit_all, true,
00072                   "Dont restrict kn->sp fuzzy limit to tables",
00073                   ccstruct_->params()),
00074       BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
00075                   "Use within xht gap for wd breaks",
00076                   ccstruct_->params()),
00077       BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
00078                   ccstruct_->params()),
00079       BOOL_MEMBER(tosp_only_use_xht_gaps, false,
00080                   "Only use within xht gap for wd breaks",
00081                   ccstruct_->params()),
00082       BOOL_MEMBER(tosp_rule_9_test_punct, false,
00083                   "Dont chng kn to space next to punct",
00084                   ccstruct_->params()),
00085       BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
00086                   ccstruct_->params()),
00087       BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
00088                   ccstruct_->params()),
00089       BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
00090                   ccstruct_->params()),
00091       INT_MEMBER(tosp_debug_level, 0, "Debug data",
00092                  ccstruct_->params()),
00093       INT_MEMBER(tosp_enough_space_samples_for_median, 3,
00094            "or should we use mean",
00095                  ccstruct_->params()),
00096       INT_MEMBER(tosp_redo_kern_limit, 10,
00097                  "No.samples reqd to reestimate for row",
00098                  ccstruct_->params()),
00099       INT_MEMBER(tosp_few_samples, 40,
00100                  "No.gaps reqd with 1 large gap to treat as a table",
00101                  ccstruct_->params()),
00102       INT_MEMBER(tosp_short_row, 20,
00103                  "No.gaps reqd with few cert spaces to use certs",
00104                  ccstruct_->params()),
00105       INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
00106                  ccstruct_->params()),
00107       double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
00108                     "Factor for defining space threshold in terms of space and "
00109                     "kern sizes",
00110                     ccstruct_->params()),
00111       double_MEMBER(tosp_threshold_bias1, 0,
00112                     "how far between kern and space?",
00113                     ccstruct_->params()),
00114       double_MEMBER(tosp_threshold_bias2, 0,
00115                     "how far between kern and space?",
00116                     ccstruct_->params()),
00117       double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
00118                     ccstruct_->params()),
00119       double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
00120                     "narrow if w/h less than this",
00121                     ccstruct_->params()),
00122       double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
00123                     ccstruct_->params()),
00124       double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
00125                     ccstruct_->params()),
00126       double_MEMBER(tosp_fuzzy_space_factor, 0.6,
00127                     "Fract of xheight for fuzz sp",
00128                     ccstruct_->params()),
00129       double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
00130                     "Fract of xheight for fuzz sp",
00131                     ccstruct_->params()),
00132       double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
00133                     "Fract of xheight for fuzz sp",
00134                     ccstruct_->params()),
00135       double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
00136                     ccstruct_->params()),
00137       double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
00138                     ccstruct_->params()),
00139       double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
00140                     ccstruct_->params()),
00141       double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
00142                     ccstruct_->params()),
00143       double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
00144                     ccstruct_->params()),
00145       double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
00146                     ccstruct_->params()),
00147       double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
00148                     ccstruct_->params()),
00149       double_MEMBER(tosp_enough_small_gaps, 0.65,
00150                     "Fract of kerns reqd for isolated row stats",
00151                     ccstruct_->params()),
00152       double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
00153                     "Min difference of kn & sp in table",
00154                     ccstruct_->params()),
00155       double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
00156                     "Expect spaces bigger than this",
00157                     ccstruct_->params()),
00158       double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
00159                     "Fuzzy if less than this",
00160                     ccstruct_->params()),
00161       double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
00162                     ccstruct_->params()),
00163       double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
00164                     ccstruct_->params()),
00165       double_MEMBER(tosp_min_sane_kn_sp, 1.5,
00166                     "Dont trust spaces less than this time kn",
00167                     ccstruct_->params()),
00168       double_MEMBER(tosp_init_guess_kn_mult, 2.2,
00169                     "Thresh guess - mult kn by this",
00170                     ccstruct_->params()),
00171       double_MEMBER(tosp_init_guess_xht_mult, 0.28,
00172                     "Thresh guess - mult xht by this",
00173                     ccstruct_->params()),
00174       double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
00175                     "Multiplier on kn to limit thresh",
00176                     ccstruct_->params()),
00177       double_MEMBER(tosp_flip_caution, 0.0,
00178                     "Dont autoflip kn to sp when large separation",
00179                     ccstruct_->params()),
00180       double_MEMBER(tosp_large_kerning, 0.19,
00181                     "Limit use of xht gap with large kns",
00182                     ccstruct_->params()),
00183       double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
00184                     "Limit use of xht gap with odd small kns",
00185                     ccstruct_->params()),
00186       double_MEMBER(tosp_near_lh_edge, 0,
00187                     "Dont reduce box if the top left is non blank",
00188                     ccstruct_->params()),
00189       double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
00190                     "Dont let sp minus kn get too small",
00191                     ccstruct_->params()),
00192       double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
00193                     "How wide fuzzies need context",
00194                     ccstruct_->params()),
00195       // tordmain.cpp ///////////////////////////////////////////
00196       BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
00197                   ccstruct_->params()),
00198       BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
00199                   ccstruct_->params()),
00200       BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
00201                   ccstruct_->params()),
00202       INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
00203                   ccstruct_->params()),
00204       double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
00205                     ccstruct_->params()),
00206       double_MEMBER(textord_noise_area_ratio, 0.7,
00207                     "Fraction of bounding box for noise",
00208                     ccstruct_->params()),
00209       double_MEMBER(textord_blob_size_smallile, 20,
00210                     "Percentile for small blobs",
00211                     ccstruct_->params()),
00212       double_MEMBER(textord_initialx_ile, 0.75,
00213                     "Ile of sizes for xheight guess",
00214                     ccstruct_->params()),
00215       double_MEMBER(textord_initialasc_ile, 0.90,
00216                     "Ile of sizes for xheight guess",
00217                     ccstruct_->params()),
00218       INT_MEMBER(textord_noise_sizefraction, 10,
00219                  "Fraction of size for maxima",
00220                  ccstruct_->params()),
00221       double_MEMBER(textord_noise_sizelimit, 0.5,
00222                     "Fraction of x for big t count",
00223                     ccstruct_->params()),
00224       INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
00225                  ccstruct_->params()),
00226       double_MEMBER(textord_noise_normratio, 2.0,
00227                     "Dot to norm ratio for deletion",
00228                     ccstruct_->params()),
00229       BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
00230                   ccstruct_->params()),
00231       BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
00232                   ccstruct_->params()),
00233       double_MEMBER(textord_noise_syfract, 0.2,
00234                     "xh fract height error for norm blobs",
00235                     ccstruct_->params()),
00236       double_MEMBER(textord_noise_sxfract, 0.4,
00237                     "xh fract width error for norm blobs",
00238                     ccstruct_->params()),
00239       double_MEMBER(textord_noise_hfract, 1.0/64,
00240                     "Height fraction to discard outlines as speckle noise",
00241                     ccstruct_->params()),
00242       INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
00243                  ccstruct_->params()),
00244       double_MEMBER(textord_noise_rowratio, 6.0,
00245                     "Dot to norm ratio for deletion",
00246                     ccstruct_->params()),
00247       BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
00248                   ccstruct_->params()),
00249       double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
00250                     ccstruct_->params()),
00251       double_MEMBER(textord_blshift_xfraction, 9.99,
00252                     "Min size of baseline shift",
00253                     ccstruct_->params()) {
00254 }
00255 
00256 Textord::~Textord() {
00257 }
00258 
00259 // Make the textlines and words inside each block.
00260 void Textord::TextordPage(PageSegMode pageseg_mode,
00261                           int width, int height, Pix* pix,
00262                           BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
00263   page_tr_.set_x(width);
00264   page_tr_.set_y(height);
00265   if (to_blocks->empty()) {
00266     // AutoPageSeg was not used, so we need to find_components first.
00267     find_components(pix, blocks, to_blocks);
00268   } else {
00269     // AutoPageSeg does not need to find_components as it did that already.
00270     // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
00271     filter_blobs(page_tr_, to_blocks, true);
00272   }
00273 
00274   ASSERT_HOST(!to_blocks->empty());
00275   if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
00276     const FCOORD anticlockwise90(0.0f, 1.0f);
00277     const FCOORD clockwise90(0.0f, -1.0f);
00278     TO_BLOCK_IT it(to_blocks);
00279     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00280       TO_BLOCK* to_block = it.data();
00281       BLOCK* block = to_block->block;
00282       // Create a fake poly_block in block from its bounding box.
00283       block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
00284                                            PT_VERTICAL_TEXT));
00285       // Rotate the to_block along with its contained block and blobnbox lists.
00286       to_block->rotate(anticlockwise90);
00287       // Set the block's rotation values to obey the convention followed in
00288       // layout analysis for vertical text.
00289       block->set_re_rotation(clockwise90);
00290       block->set_classify_rotation(clockwise90);
00291     }
00292   }
00293 
00294   TO_BLOCK_IT to_block_it(to_blocks);
00295   TO_BLOCK* to_block = to_block_it.data();
00296   // Make the rows in the block.
00297   float gradient;
00298   // Do it the old fashioned way.
00299   if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
00300     gradient = make_rows(page_tr_, to_blocks);
00301   } else {
00302     // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
00303     gradient = make_single_row(page_tr_, to_block, to_blocks);
00304   }
00305   // Now fit baselines. For now only old mode is available.
00306   fit_rows(gradient, page_tr_, to_blocks);
00307   // Now make the words in the lines.
00308   if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
00309     // SINGLE_LINE uses the old word maker on the single line.
00310     make_words(this, page_tr_, gradient, blocks, to_blocks);
00311   } else {
00312     // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
00313     // single word, and in SINGLE_CHAR mode, all the outlines
00314     // go in a single blob.
00315     TO_BLOCK* to_block = to_block_it.data();
00316     make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
00317                      to_block->get_rows(), to_block->block->row_list());
00318   }
00319   cleanup_blocks(blocks);  // Remove empties.
00320 
00321   // Compute the margins for each row in the block, to be used later for
00322   // paragraph detection.
00323   BLOCK_IT b_it(blocks);
00324   for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00325     b_it.data()->compute_row_margins();
00326   }
00327 #ifndef GRAPHICS_DISABLED
00328   close_to_win();
00329 #endif
00330 }
00331 
00332 // If we were supposed to return only a single textline, and there is more
00333 // than one, clean up and leave only the best.
00334 void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode,
00335                                      PAGE_RES* page_res) {
00336   if (PSM_LINE_FIND_ENABLED(pageseg_mode))
00337     return;  // No cleanup required.
00338   PAGE_RES_IT it(page_res);
00339   // Find the best row, being the greatest mean word conf.
00340   float row_total_conf = 0.0f;
00341   int row_word_count = 0;
00342   ROW_RES* best_row = NULL;
00343   float best_conf = 0.0f;
00344   for (it.restart_page(); it.word() != NULL; it.forward()) {
00345     WERD_RES* word = it.word();
00346     row_total_conf += word->best_choice->certainty();
00347     ++row_word_count;
00348     if (it.next_row() != it.row()) {
00349       row_total_conf /= row_word_count;
00350       if (best_row == NULL || best_conf < row_total_conf) {
00351         best_row = it.row();
00352         best_conf = row_total_conf;
00353       }
00354       row_total_conf = 0.0f;
00355       row_word_count = 0;
00356     }
00357   }
00358   // Now eliminate any word not in the best row.
00359   for (it.restart_page(); it.word() != NULL; it.forward()) {
00360     if (it.row() != best_row)
00361       it.DeleteCurrentWord();
00362   }
00363 }
00364 
00365 }  // namespace tesseract.