Tesseract  3.02
tesseract-ocr/textord/textord.h
Go to the documentation of this file.
00001 
00002 // File:        textord.h
00003 // Description: The Textord class definition gathers text line and word
00004 //              finding functionality.
00005 // Author:      Ray Smith
00006 // Created:     Fri Mar 13 14:29:01 PDT 2009
00007 //
00008 // (C) Copyright 2009, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifndef TESSERACT_TEXTORD_TEXTORD_H__
00022 #define TESSERACT_TEXTORD_TEXTORD_H__
00023 
00024 #include "ccstruct.h"
00025 #include "blobbox.h"
00026 #include "gap_map.h"
00027 #include "notdll.h"
00028 #include "publictypes.h"  // For PageSegMode.
00029 
00030 class FCOORD;
00031 class BLOCK_LIST;
00032 class PAGE_RES;
00033 class TO_BLOCK;
00034 class TO_BLOCK_LIST;
00035 class ScrollView;
00036 
00037 namespace tesseract {
00038 
00039 class Textord {
00040  public:
00041   explicit Textord(CCStruct* ccstruct);
00042   ~Textord();
00043 
00044   // Make the textlines and words inside each block.
00045   void TextordPage(PageSegMode pageseg_mode,
00046                    int width, int height, Pix* pix,
00047                    BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
00048 
00049   // If we were supposed to return only a single textline, and there is more
00050   // than one, clean up and leave only the best.
00051   void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res);
00052 
00053   bool use_cjk_fp_model() const {
00054     return use_cjk_fp_model_;
00055   }
00056   void set_use_cjk_fp_model(bool flag) {
00057     use_cjk_fp_model_ = flag;
00058   }
00059 
00060   // tospace.cpp ///////////////////////////////////////////
00061   void to_spacing(
00062       ICOORD page_tr,        //topright of page
00063       TO_BLOCK_LIST *blocks  //blocks on page
00064                                          );
00065   ROW *make_prop_words(TO_ROW *row,     // row to make
00066                        FCOORD rotation  // for drawing
00067                        );
00068   ROW *make_blob_words(TO_ROW *row,     // row to make
00069                        FCOORD rotation  // for drawing
00070                        );
00071   // tordmain.cpp ///////////////////////////////////////////
00072   void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
00073   void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on);
00074 
00075  private:
00076   // For underlying memory management and other utilities.
00077   CCStruct* ccstruct_;
00078 
00079   // The size of the input image.
00080   ICOORD page_tr_;
00081 
00082   bool use_cjk_fp_model_;
00083 
00084   // makerow.cpp ///////////////////////////////////////////
00085   // Make the textlines inside each block.
00086   void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew,
00087                 int width, int height, TO_BLOCK_LIST* to_blocks);
00088   // Make the textlines inside a single block.
00089   void MakeBlockRows(int min_spacing, int max_spacing,
00090                      const FCOORD& skew, TO_BLOCK* block,
00091                      ScrollView* win);
00092 
00093   void fit_rows(float gradient, ICOORD page_tr, TO_BLOCK_LIST *blocks);
00094   void cleanup_rows_fitting(ICOORD page_tr,    // top right
00095                             TO_BLOCK *block,   // block to do
00096                             float gradient,    // gradient to fit
00097                             FCOORD rotation,   // for drawing
00098                             inT32 block_edge,  // edge of block
00099                             BOOL8 testing_on);  // correct orientation
00100   void compute_block_xheight(TO_BLOCK *block, float gradient);
00101   void compute_row_xheight(TO_ROW *row,          // row to do
00102                            const FCOORD& rotation,
00103                            float gradient,       // global skew
00104                            int block_line_size);
00105   void make_spline_rows(TO_BLOCK *block,   // block to do
00106                         float gradient,    // gradient to fit
00107                         FCOORD rotation,   // for drawing
00108                         inT32 block_edge,  // edge of block
00109                         BOOL8 testing_on);
00110 
00112   void make_old_baselines(TO_BLOCK *block,   // block to do
00113                           BOOL8 testing_on,  // correct orientation
00114                           float gradient);
00115   void correlate_lines(TO_BLOCK *block, float gradient);
00116   void correlate_neighbours(TO_BLOCK *block,  // block rows are in.
00117                             TO_ROW **rows,    // rows of block.
00118                             int rowcount);    // no of rows to do.
00119   int correlate_with_stats(TO_ROW **rows,  // rows of block.
00120                            int rowcount,   // no of rows to do.
00121                            TO_BLOCK* block);
00122   void find_textlines(TO_BLOCK *block,  // block row is in
00123                       TO_ROW *row,      // row to do
00124                       int degree,       // required approximation
00125                       QSPLINE *spline);  // starting spline
00126   // tospace.cpp ///////////////////////////////////////////
00127   //DEBUG USE ONLY
00128   void block_spacing_stats(TO_BLOCK *block,
00129                            GAPMAP *gapmap,
00130                            BOOL8 &old_text_ord_proportional,
00131                            //resulting estimate
00132                            inT16 &block_space_gap_width,
00133                            //resulting estimate
00134                            inT16 &block_non_space_gap_width
00135                            );
00136   void row_spacing_stats(TO_ROW *row,
00137                          GAPMAP *gapmap,
00138                          inT16 block_idx,
00139                          inT16 row_idx,
00140                          //estimate for block
00141                          inT16 block_space_gap_width,
00142                          //estimate for block
00143                          inT16 block_non_space_gap_width
00144                          );
00145   void old_to_method(TO_ROW *row,
00146                      STATS *all_gap_stats,
00147                      STATS *space_gap_stats,
00148                      STATS *small_gap_stats,
00149                      inT16 block_space_gap_width,
00150                      //estimate for block
00151                      inT16 block_non_space_gap_width
00152                      );
00153   BOOL8 isolated_row_stats(TO_ROW *row,
00154                            GAPMAP *gapmap,
00155                            STATS *all_gap_stats,
00156                            BOOL8 suspected_table,
00157                            inT16 block_idx,
00158                            inT16 row_idx);
00159   inT16 stats_count_under(STATS *stats, inT16 threshold);
00160   void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
00161   BOOL8 make_a_word_break(TO_ROW *row,   // row being made
00162                           TBOX blob_box, // for next_blob // how many blanks?
00163                           inT16 prev_gap,
00164                           TBOX prev_blob_box,
00165                           inT16 real_current_gap,
00166                           inT16 within_xht_current_gap,
00167                           TBOX next_blob_box,
00168                           inT16 next_gap,
00169                           uinT8 &blanks,
00170                           BOOL8 &fuzzy_sp,
00171                           BOOL8 &fuzzy_non,
00172                           BOOL8& prev_gap_was_a_space,
00173                           BOOL8& break_at_next_gap);
00174   BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box);
00175   BOOL8 wide_blob(TO_ROW *row, TBOX blob_box);
00176   BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box);
00177   void peek_at_next_gap(TO_ROW *row,
00178                         BLOBNBOX_IT box_it,
00179                         TBOX &next_blob_box,
00180                         inT16 &next_gap,
00181                         inT16 &next_within_xht_gap);
00182   void mark_gap(TBOX blob,    //blob following gap
00183                 inT16 rule,  // heuristic id
00184                 inT16 prev_gap,
00185                 inT16 prev_blob_width,
00186                 inT16 current_gap,
00187                 inT16 next_blob_width,
00188                 inT16 next_gap);
00189   float find_mean_blob_spacing(WERD *word);
00190   BOOL8 ignore_big_gap(TO_ROW *row,
00191                        inT32 row_length,
00192                        GAPMAP *gapmap,
00193                        inT16 left,
00194                        inT16 right);
00195   //get bounding box
00196   TBOX reduced_box_next(TO_ROW *row,     //current row
00197                         BLOBNBOX_IT *it  //iterator to blobds
00198                         );
00199   TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht);
00200   // tordmain.cpp ///////////////////////////////////////////
00201   float filter_noise_blobs(BLOBNBOX_LIST *src_list,
00202                            BLOBNBOX_LIST *noise_list,
00203                            BLOBNBOX_LIST *small_list,
00204                            BLOBNBOX_LIST *large_list);
00205   void cleanup_blocks(BLOCK_LIST *blocks);
00206   BOOL8 clean_noise_from_row(ROW *row);
00207   void clean_noise_from_words(ROW *row);
00208   // Remove outlines that are a tiny fraction in either width or height
00209   // of the word height.
00210   void clean_small_noise_from_words(ROW *row);
00211  public:
00212   // makerow.cpp ///////////////////////////////////////////
00213   BOOL_VAR_H(textord_single_height_mode, false,
00214              "Script has no xheight, so use a single mode for horizontal text");
00215   // tospace.cpp ///////////////////////////////////////////
00216   BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?");
00217   BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false,
00218              "Constrain relative values of inter and intra-word gaps for "
00219              "old_to_method.");
00220   BOOL_VAR_H(tosp_only_use_prop_rows, true,
00221              "Block stats to use fixed pitch rows?");
00222   BOOL_VAR_H(tosp_force_wordbreak_on_punct, false,
00223              "Force word breaks on punct to break long lines in non-space "
00224              "delimited langs");
00225   BOOL_VAR_H(tosp_use_pre_chopping, false,
00226              "Space stats use prechopping?");
00227   BOOL_VAR_H(tosp_old_to_bug_fix, false,
00228              "Fix suspected bug in old code");
00229   BOOL_VAR_H(tosp_block_use_cert_spaces, true,
00230              "Only stat OBVIOUS spaces");
00231   BOOL_VAR_H(tosp_row_use_cert_spaces, true,
00232              "Only stat OBVIOUS spaces");
00233   BOOL_VAR_H(tosp_narrow_blobs_not_cert, true,
00234              "Only stat OBVIOUS spaces");
00235   BOOL_VAR_H(tosp_row_use_cert_spaces1, true,
00236              "Only stat OBVIOUS spaces");
00237   BOOL_VAR_H(tosp_recovery_isolated_row_stats, true,
00238              "Use row alone when inadequate cert spaces");
00239   BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess");
00240   BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?");
00241   BOOL_VAR_H(tosp_fuzzy_limit_all, true,
00242              "Dont restrict kn->sp fuzzy limit to tables");
00243   BOOL_VAR_H(tosp_stats_use_xht_gaps, true,
00244              "Use within xht gap for wd breaks");
00245   BOOL_VAR_H(tosp_use_xht_gaps, true,
00246              "Use within xht gap for wd breaks");
00247   BOOL_VAR_H(tosp_only_use_xht_gaps, false,
00248              "Only use within xht gap for wd breaks");
00249   BOOL_VAR_H(tosp_rule_9_test_punct, false,
00250              "Dont chng kn to space next to punct");
00251   BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip");
00252   BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip");
00253   BOOL_VAR_H(tosp_improve_thresh, false,
00254              "Enable improvement heuristic");
00255   INT_VAR_H(tosp_debug_level, 0, "Debug data");
00256   INT_VAR_H(tosp_enough_space_samples_for_median, 3,
00257             "or should we use mean");
00258   INT_VAR_H(tosp_redo_kern_limit, 10,
00259             "No.samples reqd to reestimate for row");
00260   INT_VAR_H(tosp_few_samples, 40,
00261             "No.gaps reqd with 1 large gap to treat as a table");
00262   INT_VAR_H(tosp_short_row, 20,
00263             "No.gaps reqd with few cert spaces to use certs");
00264   INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly");
00265   double_VAR_H(tosp_old_sp_kn_th_factor, 2.0,
00266                "Factor for defining space threshold in terms of space and "
00267                "kern sizes");
00268   double_VAR_H(tosp_threshold_bias1, 0,
00269                "how far between kern and space?");
00270   double_VAR_H(tosp_threshold_bias2, 0,
00271                "how far between kern and space?");
00272   double_VAR_H(tosp_narrow_fraction, 0.3,
00273                "Fract of xheight for narrow");
00274   double_VAR_H(tosp_narrow_aspect_ratio, 0.48,
00275                "narrow if w/h less than this");
00276   double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide");
00277   double_VAR_H(tosp_wide_aspect_ratio, 0.0,
00278                "wide if w/h less than this");
00279   double_VAR_H(tosp_fuzzy_space_factor, 0.6,
00280                "Fract of xheight for fuzz sp");
00281   double_VAR_H(tosp_fuzzy_space_factor1, 0.5,
00282                "Fract of xheight for fuzz sp");
00283   double_VAR_H(tosp_fuzzy_space_factor2, 0.72,
00284                "Fract of xheight for fuzz sp");
00285   double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
00286   double_VAR_H(tosp_kern_gap_factor1, 2.0,
00287                "gap ratio to flip kern->sp");
00288   double_VAR_H(tosp_kern_gap_factor2, 1.3,
00289                "gap ratio to flip kern->sp");
00290   double_VAR_H(tosp_kern_gap_factor3, 2.5,
00291                "gap ratio to flip kern->sp");
00292   double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier");
00293   double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
00294   double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space");
00295   double_VAR_H(tosp_enough_small_gaps, 0.65,
00296                "Fract of kerns reqd for isolated row stats");
00297   double_VAR_H(tosp_table_kn_sp_ratio, 2.25,
00298                "Min difference of kn & sp in table");
00299   double_VAR_H(tosp_table_xht_sp_ratio, 0.33,
00300                "Expect spaces bigger than this");
00301   double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0,
00302                "Fuzzy if less than this");
00303   double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
00304   double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
00305   double_VAR_H(tosp_min_sane_kn_sp, 1.5,
00306                "Dont trust spaces less than this time kn");
00307   double_VAR_H(tosp_init_guess_kn_mult, 2.2,
00308                "Thresh guess - mult kn by this");
00309   double_VAR_H(tosp_init_guess_xht_mult, 0.28,
00310                "Thresh guess - mult xht by this");
00311   double_VAR_H(tosp_max_sane_kn_thresh, 5.0,
00312                "Multiplier on kn to limit thresh");
00313   double_VAR_H(tosp_flip_caution, 0.0,
00314                "Dont autoflip kn to sp when large separation");
00315   double_VAR_H(tosp_large_kerning, 0.19,
00316                "Limit use of xht gap with large kns");
00317   double_VAR_H(tosp_dont_fool_with_small_kerns, -1,
00318                "Limit use of xht gap with odd small kns");
00319   double_VAR_H(tosp_near_lh_edge, 0,
00320                "Dont reduce box if the top left is non blank");
00321   double_VAR_H(tosp_silly_kn_sp_gap, 0.2,
00322                "Dont let sp minus kn get too small");
00323   double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75,
00324                "How wide fuzzies need context");
00325   // tordmain.cpp ///////////////////////////////////////////
00326   BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs");
00327   BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs");
00328   BOOL_VAR_H(textord_show_boxes, false, "Display boxes");
00329   INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise");
00330   double_VAR_H(textord_blob_size_bigile, 95, "Percentile for large blobs");
00331   double_VAR_H(textord_noise_area_ratio, 0.7,
00332                "Fraction of bounding box for noise");
00333   double_VAR_H(textord_blob_size_smallile, 20, "Percentile for small blobs");
00334   double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess");
00335   double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess");
00336   INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima");
00337   double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count");
00338   INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob");
00339   double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion");
00340   BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words");
00341   BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows");
00342   double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs");
00343   double_VAR_H(textord_noise_sxfract, 0.4,
00344                "xh fract width error for norm blobs");
00345   double_VAR_H(textord_noise_hfract, 1.0/64,
00346                "Height fraction to discard outlines as speckle noise");
00347   INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row");
00348   double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion");
00349   BOOL_VAR_H(textord_noise_debug, FALSE, "Debug row garbage detector");
00350   double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift");
00351   double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift");
00352 };
00353 }  // namespace tesseract.
00354 
00355 #endif  // TESSERACT_TEXTORD_TEXTORD_H__