Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: topitch.h (Formerly to_pitch.h) 00003 * Description: Code to determine fixed pitchness and the pitch if fixed. 00004 * Author: Ray Smith 00005 * Created: Tue Aug 24 16:57:29 BST 1993 00006 * 00007 * (C) Copyright 1993, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef TOPITCH_H 00021 #define TOPITCH_H 00022 00023 #include "blobbox.h" 00024 #include "notdll.h" 00025 00026 namespace tesseract { 00027 class Tesseract; 00028 } 00029 extern BOOL_VAR_H (textord_debug_pitch_test, FALSE, 00030 "Debug on fixed pitch test"); 00031 extern BOOL_VAR_H (textord_debug_pitch_metric, FALSE, 00032 "Write full metric stuff"); 00033 extern BOOL_VAR_H (textord_show_row_cuts, FALSE, "Draw row-level cuts"); 00034 extern BOOL_VAR_H (textord_show_page_cuts, FALSE, "Draw page-level cuts"); 00035 extern BOOL_VAR_H (textord_pitch_cheat, FALSE, 00036 "Use correct answer for fixed/prop"); 00037 extern BOOL_VAR_H (textord_blockndoc_fixed, TRUE, 00038 "Attempt whole doc/block fixed pitch"); 00039 extern BOOL_VAR_H (textord_fast_pitch_test, FALSE, 00040 "Do even faster pitch algorithm"); 00041 extern double_VAR_H (textord_projection_scale, 0.125, 00042 "Ding rate for mid-cuts"); 00043 extern double_VAR_H (textord_balance_factor, 2.0, 00044 "Ding rate for unbalanced char cells"); 00045 00046 void compute_fixed_pitch(ICOORD page_tr, // top right 00047 TO_BLOCK_LIST *port_blocks, // input list 00048 float gradient, // page skew 00049 FCOORD rotation, // for drawing 00050 BOOL8 testing_on); // correct orientation 00051 void fix_row_pitch( //get some value 00052 TO_ROW *bad_row, //row to fix 00053 TO_BLOCK *bad_block, //block of bad_row 00054 TO_BLOCK_LIST *blocks, //blocks to scan 00055 inT32 row_target, //number of row 00056 inT32 block_target //number of block 00057 ); 00058 void compute_block_pitch( TO_BLOCK *block, // input list 00059 FCOORD rotation, // for drawing 00060 inT32 block_index, // block number 00061 BOOL8 testing_on); // correct orientation 00062 BOOL8 compute_rows_pitch( //find line stats 00063 TO_BLOCK *block, //block to do 00064 inT32 block_index, //block number 00065 BOOL8 testing_on //correct orientation 00066 ); 00067 BOOL8 try_doc_fixed( //determine pitch 00068 ICOORD page_tr, //top right 00069 TO_BLOCK_LIST *port_blocks, //input list 00070 float gradient //page skew 00071 ); 00072 BOOL8 try_block_fixed( //find line stats 00073 TO_BLOCK *block, //block to do 00074 inT32 block_index //block number 00075 ); 00076 BOOL8 try_rows_fixed( //find line stats 00077 TO_BLOCK *block, //block to do 00078 inT32 block_index, //block number 00079 BOOL8 testing_on //correct orientation 00080 ); 00081 void print_block_counts( //find line stats 00082 TO_BLOCK *block, //block to do 00083 inT32 block_index //block number 00084 ); 00085 void count_block_votes( //find line stats 00086 TO_BLOCK *block, //block to do 00087 inT32 &def_fixed, //add to counts 00088 inT32 &def_prop, 00089 inT32 &maybe_fixed, 00090 inT32 &maybe_prop, 00091 inT32 &corr_fixed, 00092 inT32 &corr_prop, 00093 inT32 &dunno); 00094 BOOL8 row_pitch_stats( //find line stats 00095 TO_ROW *row, //current row 00096 inT32 maxwidth, //of spaces 00097 BOOL8 testing_on //correct orientation 00098 ); 00099 BOOL8 find_row_pitch( //find lines 00100 TO_ROW *row, //row to do 00101 inT32 maxwidth, //max permitted space 00102 inT32 dm_gap, //ignorable gaps 00103 TO_BLOCK *block, //block of row 00104 inT32 block_index, //block_number 00105 inT32 row_index, //number of row 00106 BOOL8 testing_on //correct orientation 00107 ); 00108 BOOL8 fixed_pitch_row( //find lines 00109 TO_ROW *row, //row to do 00110 BLOCK* block, 00111 inT32 block_index //block_number 00112 ); 00113 BOOL8 count_pitch_stats( //find lines 00114 TO_ROW *row, //row to do 00115 STATS *gap_stats, //blob gaps 00116 STATS *pitch_stats, //centre-centre stats 00117 float initial_pitch, //guess at pitch 00118 float min_space, //estimate space size 00119 BOOL8 ignore_outsize, //discard big objects 00120 BOOL8 split_outsize, //split big objects 00121 inT32 dm_gap //ignorable gaps 00122 ); 00123 float tune_row_pitch( //find fp cells 00124 TO_ROW *row, //row to do 00125 STATS *projection, //vertical projection 00126 inT16 projection_left, //edge of projection 00127 inT16 projection_right, //edge of projection 00128 float space_size, //size of blank 00129 float &initial_pitch, //guess at pitch 00130 float &best_sp_sd, //space sd 00131 inT16 &best_mid_cuts, //no of cheap cuts 00132 ICOORDELT_LIST *best_cells, //row cells 00133 BOOL8 testing_on //inidividual words 00134 ); 00135 float tune_row_pitch2( //find fp cells 00136 TO_ROW *row, //row to do 00137 STATS *projection, //vertical projection 00138 inT16 projection_left, //edge of projection 00139 inT16 projection_right, //edge of projection 00140 float space_size, //size of blank 00141 float &initial_pitch, //guess at pitch 00142 float &best_sp_sd, //space sd 00143 inT16 &best_mid_cuts, //no of cheap cuts 00144 ICOORDELT_LIST *best_cells, //row cells 00145 BOOL8 testing_on //inidividual words 00146 ); 00147 float compute_pitch_sd ( //find fp cells 00148 TO_ROW * row, //row to do 00149 STATS * projection, //vertical projection 00150 inT16 projection_left, //edge 00151 inT16 projection_right, //edge 00152 float space_size, //size of blank 00153 float initial_pitch, //guess at pitch 00154 float &sp_sd, //space sd 00155 inT16 & mid_cuts, //no of free cuts 00156 ICOORDELT_LIST * row_cells, //list of chop pts 00157 BOOL8 testing_on, //inidividual words 00158 inT16 start = 0, //start of good range 00159 inT16 end = 0 //end of good range 00160 ); 00161 float compute_pitch_sd2 ( //find fp cells 00162 TO_ROW * row, //row to do 00163 STATS * projection, //vertical projection 00164 inT16 projection_left, //edge 00165 inT16 projection_right, //edge 00166 float initial_pitch, //guess at pitch 00167 inT16 & occupation, //no of occupied cells 00168 inT16 & mid_cuts, //no of free cuts 00169 ICOORDELT_LIST * row_cells, //list of chop pts 00170 BOOL8 testing_on, //inidividual words 00171 inT16 start = 0, //start of good range 00172 inT16 end = 0 //end of good range 00173 ); 00174 void print_pitch_sd( //find fp cells 00175 TO_ROW *row, //row to do 00176 STATS *projection, //vertical projection 00177 inT16 projection_left, //edges //size of blank 00178 inT16 projection_right, 00179 float space_size, 00180 float initial_pitch //guess at pitch 00181 ); 00182 void find_repeated_chars(TO_BLOCK *block, // Block to search. 00183 BOOL8 testing_on); // Debug mode. 00184 void plot_fp_word( //draw block of words 00185 TO_BLOCK *block, //block to draw 00186 float pitch, //pitch to draw with 00187 float nonspace //for space threshold 00188 ); 00189 #endif