Tesseract  3.02
tesseract-ocr/textord/topitch.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        topitch.cpp  (Formerly to_pitch.c)
00003  * Description: Code to determine fixed pitchness and the pitch if fixed.
00004  * Author:              Ray Smith
00005  * Created:             Tue Aug 24 16:57:29 BST 1993
00006  *
00007  * (C) Copyright 1993, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "mfcpch.h"
00021 #ifdef __UNIX__
00022 #include          <assert.h>
00023 #endif
00024 #include          "stderr.h"
00025 #include          "blobbox.h"
00026 #include          "statistc.h"
00027 #include          "drawtord.h"
00028 #include          "makerow.h"
00029 #include          "pitsync1.h"
00030 #include          "pithsync.h"
00031 #include          "tovars.h"
00032 #include          "wordseg.h"
00033 #include          "topitch.h"
00034 #include          "secname.h"
00035 #include          "helpers.h"
00036 
00037 // Include automatically generated configuration file if running autoconf.
00038 #ifdef HAVE_CONFIG_H
00039 #include "config_auto.h"
00040 #endif
00041 
00042 #define EXTERN
00043 
00044 EXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text");
00045 EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE,
00046 "Debug on fixed pitch test");
00047 EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE,
00048 "Turn off dp fixed pitch algorithm");
00049 EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE,
00050 "Do even faster pitch algorithm");
00051 EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE,
00052 "Write full metric stuff");
00053 EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts");
00054 EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts");
00055 EXTERN BOOL_VAR (textord_pitch_cheat, FALSE,
00056 "Use correct answer for fixed/prop");
00057 EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE,
00058 "Attempt whole doc/block fixed pitch");
00059 EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
00060 EXTERN double_VAR (textord_balance_factor, 1.0,
00061 "Ding rate for unbalanced char cells");
00062 
00063 #define FIXED_WIDTH_MULTIPLE  5
00064 #define BLOCK_STATS_CLUSTERS  10
00065 #define MAX_ALLOWED_PITCH 100    //max pixel pitch.
00066 
00067 /**********************************************************************
00068  * compute_fixed_pitch
00069  *
00070  * Decide whether each row is fixed pitch individually.
00071  * Correlate definite and uncertain results to obtain an individual
00072  * result for each row in the TO_ROW class.
00073  **********************************************************************/
00074 
00075 void compute_fixed_pitch(ICOORD page_tr,              // top right
00076                          TO_BLOCK_LIST *port_blocks,  // input list
00077                          float gradient,              // page skew
00078                          FCOORD rotation,             // for drawing
00079                          BOOL8 testing_on) {          // correct orientation
00080   TO_BLOCK_IT block_it;          //iterator
00081   TO_BLOCK *block;               //current block;
00082   TO_ROW_IT row_it;              //row iterator
00083   TO_ROW *row;                   //current row
00084   int block_index;               //block number
00085   int row_index;                 //row number
00086 
00087 #ifndef GRAPHICS_DISABLED
00088   if (textord_show_initial_words && testing_on) {
00089     if (to_win == NULL)
00090       create_to_win(page_tr);
00091   }
00092 #endif
00093 
00094   block_it.set_to_list (port_blocks);
00095   block_index = 1;
00096   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00097   block_it.forward ()) {
00098     block = block_it.data ();
00099     compute_block_pitch(block, rotation, block_index, testing_on);
00100     block_index++;
00101   }
00102 
00103   if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
00104     block_index = 1;
00105     for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00106     block_it.forward ()) {
00107       block = block_it.data ();
00108       if (!try_block_fixed (block, block_index))
00109         try_rows_fixed(block, block_index, testing_on);
00110       block_index++;
00111     }
00112   }
00113 
00114   block_index = 1;
00115   for (block_it.mark_cycle_pt(); !block_it.cycled_list();
00116        block_it.forward()) {
00117     block = block_it.data ();
00118     POLY_BLOCK* pb = block->block->poly_block();
00119     if (pb != NULL && !pb->IsText()) continue;  // Non-text doesn't exist!
00120     row_it.set_to_list (block->get_rows ());
00121     row_index = 1;
00122     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00123       row = row_it.data ();
00124       fix_row_pitch(row, block, port_blocks, row_index, block_index);
00125       row_index++;
00126     }
00127     block_index++;
00128   }
00129 #ifndef GRAPHICS_DISABLED
00130   if (textord_show_initial_words && testing_on) {
00131     ScrollView::Update();
00132   }
00133 #endif
00134 }
00135 
00136 
00137 /**********************************************************************
00138  * fix_row_pitch
00139  *
00140  * Get a pitch_decision for this row by voting among similar rows in the
00141  * block, then similar rows over all the page, or any other rows at all.
00142  **********************************************************************/
00143 
00144 void fix_row_pitch(TO_ROW *bad_row,        // row to fix
00145                    TO_BLOCK *bad_block,    // block of bad_row
00146                    TO_BLOCK_LIST *blocks,  // blocks to scan
00147                    inT32 row_target,       // number of row
00148                    inT32 block_target) {   // number of block
00149   inT16 mid_cuts;
00150   int block_votes;               //votes in block
00151   int like_votes;                //votes over page
00152   int other_votes;               //votes of unlike blocks
00153   int block_index;               //number of block
00154   int row_index;                 //number of row
00155   int maxwidth;                  //max pitch
00156   TO_BLOCK_IT block_it = blocks; //block iterator
00157   TO_ROW_IT row_it;
00158   TO_BLOCK *block;               //current block
00159   TO_ROW *row;                   //current row
00160   float sp_sd;                   //space deviation
00161   STATS block_stats;             //pitches in block
00162   STATS like_stats;              //pitches in page
00163 
00164   block_votes = like_votes = other_votes = 0;
00165   maxwidth = (inT32) ceil (bad_row->xheight * textord_words_maxspace);
00166   if (bad_row->pitch_decision != PITCH_DEF_FIXED
00167   && bad_row->pitch_decision != PITCH_DEF_PROP) {
00168     block_stats.set_range (0, maxwidth);
00169     like_stats.set_range (0, maxwidth);
00170     block_index = 1;
00171     for (block_it.mark_cycle_pt(); !block_it.cycled_list();
00172          block_it.forward()) {
00173       block = block_it.data();
00174       POLY_BLOCK* pb = block->block->poly_block();
00175       if (pb != NULL && !pb->IsText()) continue;  // Non text doesn't exist!
00176       row_index = 1;
00177       row_it.set_to_list (block->get_rows ());
00178       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00179       row_it.forward ()) {
00180         row = row_it.data ();
00181         if ((bad_row->all_caps
00182           && row->xheight + row->ascrise
00183           <
00184           (bad_row->xheight + bad_row->ascrise) * (1 +
00185           textord_pitch_rowsimilarity)
00186           && row->xheight + row->ascrise >
00187           (bad_row->xheight + bad_row->ascrise) * (1 -
00188           textord_pitch_rowsimilarity))
00189           || (!bad_row->all_caps
00190           && row->xheight <
00191           bad_row->xheight * (1 + textord_pitch_rowsimilarity)
00192           && row->xheight >
00193         bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
00194           if (block_index == block_target) {
00195             if (row->pitch_decision == PITCH_DEF_FIXED) {
00196               block_votes += textord_words_veto_power;
00197               block_stats.add ((inT32) row->fixed_pitch,
00198                 textord_words_veto_power);
00199             }
00200             else if (row->pitch_decision == PITCH_MAYBE_FIXED
00201             || row->pitch_decision == PITCH_CORR_FIXED) {
00202               block_votes++;
00203               block_stats.add ((inT32) row->fixed_pitch, 1);
00204             }
00205             else if (row->pitch_decision == PITCH_DEF_PROP)
00206               block_votes -= textord_words_veto_power;
00207             else if (row->pitch_decision == PITCH_MAYBE_PROP
00208               || row->pitch_decision == PITCH_CORR_PROP)
00209               block_votes--;
00210           }
00211           else {
00212             if (row->pitch_decision == PITCH_DEF_FIXED) {
00213               like_votes += textord_words_veto_power;
00214               like_stats.add ((inT32) row->fixed_pitch,
00215                 textord_words_veto_power);
00216             }
00217             else if (row->pitch_decision == PITCH_MAYBE_FIXED
00218             || row->pitch_decision == PITCH_CORR_FIXED) {
00219               like_votes++;
00220               like_stats.add ((inT32) row->fixed_pitch, 1);
00221             }
00222             else if (row->pitch_decision == PITCH_DEF_PROP)
00223               like_votes -= textord_words_veto_power;
00224             else if (row->pitch_decision == PITCH_MAYBE_PROP
00225               || row->pitch_decision == PITCH_CORR_PROP)
00226               like_votes--;
00227           }
00228         }
00229         else {
00230           if (row->pitch_decision == PITCH_DEF_FIXED)
00231             other_votes += textord_words_veto_power;
00232           else if (row->pitch_decision == PITCH_MAYBE_FIXED
00233             || row->pitch_decision == PITCH_CORR_FIXED)
00234             other_votes++;
00235           else if (row->pitch_decision == PITCH_DEF_PROP)
00236             other_votes -= textord_words_veto_power;
00237           else if (row->pitch_decision == PITCH_MAYBE_PROP
00238             || row->pitch_decision == PITCH_CORR_PROP)
00239             other_votes--;
00240         }
00241         row_index++;
00242       }
00243       block_index++;
00244     }
00245     if (block_votes > textord_words_veto_power) {
00246       bad_row->fixed_pitch = block_stats.ile (0.5);
00247       bad_row->pitch_decision = PITCH_CORR_FIXED;
00248     }
00249     else if (block_votes <= textord_words_veto_power && like_votes > 0) {
00250       bad_row->fixed_pitch = like_stats.ile (0.5);
00251       bad_row->pitch_decision = PITCH_CORR_FIXED;
00252     }
00253     else {
00254       bad_row->pitch_decision = PITCH_CORR_PROP;
00255       #ifndef SECURE_NAMES
00256       if (block_votes == 0 && like_votes == 0 && other_votes > 0
00257         && (textord_debug_pitch_test || textord_debug_pitch_metric))
00258         tprintf
00259           ("Warning:row %d of block %d set prop with no like rows against trend\n",
00260           row_target, block_target);
00261       #endif
00262     }
00263   }
00264   if (textord_debug_pitch_metric) {
00265     tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
00266             block_votes, like_votes, other_votes);
00267     tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
00268   }
00269   if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
00270     if (bad_row->fixed_pitch < textord_min_xheight) {
00271       if (block_votes > 0)
00272         bad_row->fixed_pitch = block_stats.ile (0.5);
00273       else if (block_votes == 0 && like_votes > 0)
00274         bad_row->fixed_pitch = like_stats.ile (0.5);
00275       else {
00276         tprintf
00277           ("Warning:guessing pitch as xheight on row %d, block %d\n",
00278           row_target, block_target);
00279         bad_row->fixed_pitch = bad_row->xheight;
00280       }
00281     }
00282     if (bad_row->fixed_pitch < textord_min_xheight)
00283       bad_row->fixed_pitch = (float) textord_min_xheight;
00284     bad_row->kern_size = bad_row->fixed_pitch / 4;
00285     bad_row->min_space = (inT32) (bad_row->fixed_pitch * 0.6);
00286     bad_row->max_nonspace = (inT32) (bad_row->fixed_pitch * 0.4);
00287     bad_row->space_threshold =
00288       (bad_row->min_space + bad_row->max_nonspace) / 2;
00289     bad_row->space_size = bad_row->fixed_pitch;
00290     if (bad_row->char_cells.empty ())
00291       tune_row_pitch (bad_row, &bad_row->projection,
00292         bad_row->projection_left, bad_row->projection_right,
00293         (bad_row->fixed_pitch +
00294         bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
00295         sp_sd, mid_cuts, &bad_row->char_cells, FALSE);
00296   }
00297   else if (bad_row->pitch_decision == PITCH_CORR_PROP
00298   || bad_row->pitch_decision == PITCH_DEF_PROP) {
00299     bad_row->fixed_pitch = 0.0f;
00300     bad_row->char_cells.clear ();
00301   }
00302 }
00303 
00304 
00305 /**********************************************************************
00306  * compute_block_pitch
00307  *
00308  * Decide whether each block is fixed pitch individually.
00309  **********************************************************************/
00310 
00311 void compute_block_pitch(TO_BLOCK *block,     // input list
00312                          FCOORD rotation,     // for drawing
00313                          inT32 block_index,   // block number
00314                          BOOL8 testing_on) {  // correct orientation
00315    TBOX block_box;                 //bounding box
00316 
00317   block_box = block->block->bounding_box ();
00318   if (testing_on && textord_debug_pitch_test) {
00319     tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
00320       block_index,
00321       block_box.left (), block_box.bottom (),
00322       block_box.right (), block_box.top ());
00323   }
00324   block->min_space = (inT32) floor (block->xheight
00325     * textord_words_default_minspace);
00326   block->max_nonspace = (inT32) ceil (block->xheight
00327     * textord_words_default_nonspace);
00328   block->fixed_pitch = 0.0f;
00329   block->space_size = (float) block->min_space;
00330   block->kern_size = (float) block->max_nonspace;
00331   block->pr_nonsp = block->xheight * words_default_prop_nonspace;
00332   block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
00333   if (!block->get_rows ()->empty ()) {
00334     ASSERT_HOST (block->xheight > 0);
00335     find_repeated_chars(block, textord_show_initial_words && testing_on);
00336 #ifndef GRAPHICS_DISABLED
00337     if (textord_show_initial_words && testing_on)
00338       //overlap_picture_ops(TRUE);
00339       ScrollView::Update();
00340 #endif
00341     compute_rows_pitch(block,
00342                        block_index,
00343                        textord_debug_pitch_test &&testing_on);
00344   }
00345 }
00346 
00347 
00348 /**********************************************************************
00349  * compute_rows_pitch
00350  *
00351  * Decide whether each row is fixed pitch individually.
00352  **********************************************************************/
00353 
00354 BOOL8 compute_rows_pitch(                    //find line stats
00355                          TO_BLOCK *block,    //block to do
00356                          inT32 block_index,  //block number
00357                          BOOL8 testing_on    //correct orientation
00358                         ) {
00359   inT32 maxwidth;                //of spaces
00360   TO_ROW *row;                   //current row
00361   inT32 row_index;               //row number.
00362   float lower, upper;            //cluster thresholds
00363   TO_ROW_IT row_it = block->get_rows ();
00364 
00365   row_index = 1;
00366   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00367     row = row_it.data ();
00368     ASSERT_HOST (row->xheight > 0);
00369     row->compute_vertical_projection ();
00370     maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
00371     if (row_pitch_stats (row, maxwidth, testing_on)
00372       && find_row_pitch (row, maxwidth,
00373       textord_dotmatrix_gap + 1, block, block_index,
00374     row_index, testing_on)) {
00375       if (row->fixed_pitch == 0) {
00376         lower = row->pr_nonsp;
00377         upper = row->pr_space;
00378         row->space_size = upper;
00379         row->kern_size = lower;
00380       }
00381     }
00382     else {
00383       row->fixed_pitch = 0.0f;   //insufficient data
00384       row->pitch_decision = PITCH_DUNNO;
00385     }
00386     row_index++;
00387   }
00388   return FALSE;
00389 }
00390 
00391 
00392 /**********************************************************************
00393  * try_doc_fixed
00394  *
00395  * Attempt to call the entire document fixed pitch.
00396  **********************************************************************/
00397 
00398 BOOL8 try_doc_fixed(                             //determine pitch
00399                     ICOORD page_tr,              //top right
00400                     TO_BLOCK_LIST *port_blocks,  //input list
00401                     float gradient               //page skew
00402                    ) {
00403   inT16 master_x;                //uniform shifts
00404   inT16 pitch;                   //median pitch.
00405   int x;                         //profile coord
00406   int prop_blocks;               //correct counts
00407   int fixed_blocks;
00408   int total_row_count;           //total in page
00409                                  //iterator
00410   TO_BLOCK_IT block_it = port_blocks;
00411   TO_BLOCK *block;               //current block;
00412   TO_ROW_IT row_it;              //row iterator
00413   TO_ROW *row;                   //current row
00414   inT16 projection_left;         //edges
00415   inT16 projection_right;
00416   inT16 row_left;                //edges of row
00417   inT16 row_right;
00418   ICOORDELT_LIST *master_cells;  //cells for page
00419   float master_y;                //uniform shifts
00420   float shift_factor;            //page skew correction
00421   float row_shift;               //shift for row
00422   float final_pitch;             //output pitch
00423   float row_y;                   //baseline
00424   STATS projection;              //entire page
00425   STATS pitches (0, MAX_ALLOWED_PITCH);
00426   //for median
00427   float sp_sd;                   //space sd
00428   inT16 mid_cuts;                //no of cheap cuts
00429   float pitch_sd;                //sync rating
00430 
00431   if (block_it.empty ()
00432     //      || block_it.data()==block_it.data_relative(1)
00433     || !textord_blockndoc_fixed)
00434     return FALSE;
00435   shift_factor = gradient / (gradient * gradient + 1);
00436   row_it.set_to_list (block_it.data ()->get_rows ());
00437   master_x = row_it.data ()->projection_left;
00438   master_y = row_it.data ()->baseline.y (master_x);
00439   projection_left = MAX_INT16;
00440   projection_right = -MAX_INT16;
00441   prop_blocks = 0;
00442   fixed_blocks = 0;
00443   total_row_count = 0;
00444 
00445   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00446   block_it.forward ()) {
00447     block = block_it.data ();
00448     row_it.set_to_list (block->get_rows ());
00449     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00450       row = row_it.data ();
00451       total_row_count++;
00452       if (row->fixed_pitch > 0)
00453         pitches.add ((inT32) (row->fixed_pitch), 1);
00454       //find median
00455       row_y = row->baseline.y (master_x);
00456       row_left =
00457         (inT16) (row->projection_left -
00458         shift_factor * (master_y - row_y));
00459       row_right =
00460         (inT16) (row->projection_right -
00461         shift_factor * (master_y - row_y));
00462       if (row_left < projection_left)
00463         projection_left = row_left;
00464       if (row_right > projection_right)
00465         projection_right = row_right;
00466     }
00467   }
00468   if (pitches.get_total () == 0)
00469     return FALSE;
00470   projection.set_range (projection_left, projection_right);
00471 
00472   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00473   block_it.forward ()) {
00474     block = block_it.data ();
00475     row_it.set_to_list (block->get_rows ());
00476     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00477       row = row_it.data ();
00478       row_y = row->baseline.y (master_x);
00479       row_left =
00480         (inT16) (row->projection_left -
00481         shift_factor * (master_y - row_y));
00482       for (x = row->projection_left; x < row->projection_right;
00483       x++, row_left++) {
00484         projection.add (row_left, row->projection.pile_count (x));
00485       }
00486     }
00487   }
00488 
00489   row_it.set_to_list (block_it.data ()->get_rows ());
00490   row = row_it.data ();
00491 #ifndef GRAPHICS_DISABLED
00492   if (textord_show_page_cuts && to_win != NULL)
00493     projection.plot (to_win, projection_left,
00494       row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
00495 #endif
00496   final_pitch = pitches.ile (0.5);
00497   pitch = (inT16) final_pitch;
00498   pitch_sd =
00499     tune_row_pitch (row, &projection, projection_left, projection_right,
00500     pitch * 0.75, final_pitch, sp_sd, mid_cuts,
00501     &row->char_cells, FALSE);
00502 
00503   if (textord_debug_pitch_metric)
00504     tprintf
00505       ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
00506       prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
00507       pitch_sd / total_row_count, pitch_sd / pitch,
00508       pitch_sd / total_row_count / pitch);
00509 
00510 #ifndef GRAPHICS_DISABLED
00511   if (textord_show_page_cuts && to_win != NULL) {
00512     master_cells = &row->char_cells;
00513     for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00514     block_it.forward ()) {
00515       block = block_it.data ();
00516       row_it.set_to_list (block->get_rows ());
00517       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00518       row_it.forward ()) {
00519         row = row_it.data ();
00520         row_y = row->baseline.y (master_x);
00521         row_shift = shift_factor * (master_y - row_y);
00522         plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
00523       }
00524     }
00525   }
00526 #endif
00527   row->char_cells.clear ();
00528   return FALSE;
00529 }
00530 
00531 
00532 /**********************************************************************
00533  * try_block_fixed
00534  *
00535  * Try to call the entire block fixed.
00536  **********************************************************************/
00537 
00538 BOOL8 try_block_fixed(                   //find line stats
00539                       TO_BLOCK *block,   //block to do
00540                       inT32 block_index  //block number
00541                      ) {
00542   return FALSE;
00543 }
00544 
00545 
00546 /**********************************************************************
00547  * try_rows_fixed
00548  *
00549  * Decide whether each row is fixed pitch individually.
00550  **********************************************************************/
00551 
00552 BOOL8 try_rows_fixed(                    //find line stats
00553                      TO_BLOCK *block,    //block to do
00554                      inT32 block_index,  //block number
00555                      BOOL8 testing_on    //correct orientation
00556                     ) {
00557   inT32 maxwidth;                //of spaces
00558   TO_ROW *row;                   //current row
00559   inT32 row_index;               //row number.
00560   inT32 def_fixed = 0;           //counters
00561   inT32 def_prop = 0;
00562   inT32 maybe_fixed = 0;
00563   inT32 maybe_prop = 0;
00564   inT32 dunno = 0;
00565   inT32 corr_fixed = 0;
00566   inT32 corr_prop = 0;
00567   float lower, upper;            //cluster thresholds
00568   TO_ROW_IT row_it = block->get_rows ();
00569 
00570   row_index = 1;
00571   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00572     row = row_it.data ();
00573     ASSERT_HOST (row->xheight > 0);
00574     maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
00575     if (row->fixed_pitch > 0 &&
00576         fixed_pitch_row(row, block->block, block_index)) {
00577       if (row->fixed_pitch == 0) {
00578         lower = row->pr_nonsp;
00579         upper = row->pr_space;
00580         row->space_size = upper;
00581         row->kern_size = lower;
00582       }
00583     }
00584     row_index++;
00585   }
00586   count_block_votes(block,
00587                     def_fixed,
00588                     def_prop,
00589                     maybe_fixed,
00590                     maybe_prop,
00591                     corr_fixed,
00592                     corr_prop,
00593                     dunno);
00594   if (testing_on
00595     && (textord_debug_pitch_test
00596   || textord_blocksall_prop || textord_blocksall_fixed)) {
00597     tprintf ("Initially:");
00598     print_block_counts(block, block_index);
00599   }
00600   if (def_fixed > def_prop * textord_words_veto_power)
00601     block->pitch_decision = PITCH_DEF_FIXED;
00602   else if (def_prop > def_fixed * textord_words_veto_power)
00603     block->pitch_decision = PITCH_DEF_PROP;
00604   else if (def_fixed > 0 || def_prop > 0)
00605     block->pitch_decision = PITCH_DUNNO;
00606   else if (maybe_fixed > maybe_prop * textord_words_veto_power)
00607     block->pitch_decision = PITCH_MAYBE_FIXED;
00608   else if (maybe_prop > maybe_fixed * textord_words_veto_power)
00609     block->pitch_decision = PITCH_MAYBE_PROP;
00610   else
00611     block->pitch_decision = PITCH_DUNNO;
00612   return FALSE;
00613 }
00614 
00615 
00616 /**********************************************************************
00617  * print_block_counts
00618  *
00619  * Count up how many rows have what decision and print the results.
00620  **********************************************************************/
00621 
00622 void print_block_counts(                   //find line stats
00623                         TO_BLOCK *block,   //block to do
00624                         inT32 block_index  //block number
00625                        ) {
00626   inT32 def_fixed = 0;           //counters
00627   inT32 def_prop = 0;
00628   inT32 maybe_fixed = 0;
00629   inT32 maybe_prop = 0;
00630   inT32 dunno = 0;
00631   inT32 corr_fixed = 0;
00632   inT32 corr_prop = 0;
00633 
00634   count_block_votes(block,
00635                     def_fixed,
00636                     def_prop,
00637                     maybe_fixed,
00638                     maybe_prop,
00639                     corr_fixed,
00640                     corr_prop,
00641                     dunno);
00642   tprintf ("Block %d has (%d,%d,%d)",
00643     block_index, def_fixed, maybe_fixed, corr_fixed);
00644   if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed))
00645     tprintf (" (Wrongly)");
00646   tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
00647   if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop))
00648     tprintf (" (Wrongly)");
00649   tprintf (" prop, %d dunno\n", dunno);
00650 }
00651 
00652 
00653 /**********************************************************************
00654  * count_block_votes
00655  *
00656  * Count the number of rows in the block with each kind of pitch_decision.
00657  **********************************************************************/
00658 
00659 void count_block_votes(                   //find line stats
00660                        TO_BLOCK *block,   //block to do
00661                        inT32 &def_fixed,  //add to counts
00662                        inT32 &def_prop,
00663                        inT32 &maybe_fixed,
00664                        inT32 &maybe_prop,
00665                        inT32 &corr_fixed,
00666                        inT32 &corr_prop,
00667                        inT32 &dunno) {
00668   TO_ROW *row;                   //current row
00669   TO_ROW_IT row_it = block->get_rows ();
00670 
00671   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00672     row = row_it.data ();
00673     switch (row->pitch_decision) {
00674       case PITCH_DUNNO:
00675         dunno++;
00676         break;
00677       case PITCH_DEF_PROP:
00678         def_prop++;
00679         break;
00680       case PITCH_MAYBE_PROP:
00681         maybe_prop++;
00682         break;
00683       case PITCH_DEF_FIXED:
00684         def_fixed++;
00685         break;
00686       case PITCH_MAYBE_FIXED:
00687         maybe_fixed++;
00688         break;
00689       case PITCH_CORR_PROP:
00690         corr_prop++;
00691         break;
00692       case PITCH_CORR_FIXED:
00693         corr_fixed++;
00694         break;
00695     }
00696   }
00697 }
00698 
00699 
00700 /**********************************************************************
00701  * row_pitch_stats
00702  *
00703  * Decide whether each row is fixed pitch individually.
00704  **********************************************************************/
00705 
00706 BOOL8 row_pitch_stats(                  //find line stats
00707                       TO_ROW *row,      //current row
00708                       inT32 maxwidth,   //of spaces
00709                       BOOL8 testing_on  //correct orientation
00710                      ) {
00711   BLOBNBOX *blob;                //current blob
00712   int gap_index;                 //current gap
00713   inT32 prev_x;                  //end of prev blob
00714   inT32 cluster_count;           //no of clusters
00715   inT32 prev_count;              //of clusters
00716   inT32 smooth_factor;           //for smoothing stats
00717   TBOX blob_box;                  //bounding box
00718   float lower, upper;            //cluster thresholds
00719                                  //gap sizes
00720   float gaps[BLOCK_STATS_CLUSTERS];
00721                                  //blobs
00722   BLOBNBOX_IT blob_it = row->blob_list ();
00723   STATS gap_stats (0, maxwidth);
00724   STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
00725   //clusters
00726 
00727   smooth_factor =
00728     (inT32) (row->xheight * textord_wordstats_smooth_factor + 1.5);
00729   if (!blob_it.empty ()) {
00730     prev_x = blob_it.data ()->bounding_box ().right ();
00731     blob_it.forward ();
00732     while (!blob_it.at_first ()) {
00733       blob = blob_it.data ();
00734       if (!blob->joined_to_prev ()) {
00735         blob_box = blob->bounding_box ();
00736         if (blob_box.left () - prev_x < maxwidth)
00737           gap_stats.add (blob_box.left () - prev_x, 1);
00738         prev_x = blob_box.right ();
00739       }
00740       blob_it.forward ();
00741     }
00742   }
00743   if (gap_stats.get_total () == 0) {
00744     return FALSE;
00745   }
00746   cluster_count = 0;
00747   lower = row->xheight * words_initial_lower;
00748   upper = row->xheight * words_initial_upper;
00749   gap_stats.smooth (smooth_factor);
00750   do {
00751     prev_count = cluster_count;
00752     cluster_count = gap_stats.cluster (lower, upper,
00753       textord_spacesize_ratioprop,
00754       BLOCK_STATS_CLUSTERS, cluster_stats);
00755   }
00756   while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
00757   if (cluster_count < 1) {
00758     return FALSE;
00759   }
00760   for (gap_index = 0; gap_index < cluster_count; gap_index++)
00761     gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
00762   //get medians
00763   if (testing_on) {
00764     tprintf ("cluster_count=%d:", cluster_count);
00765     for (gap_index = 0; gap_index < cluster_count; gap_index++)
00766       tprintf (" %g(%d)", gaps[gap_index],
00767         cluster_stats[gap_index + 1].get_total ());
00768     tprintf ("\n");
00769   }
00770   qsort (gaps, cluster_count, sizeof (float), sort_floats);
00771 
00772   //Try to find proportional non-space and space for row.
00773   lower = row->xheight * words_default_prop_nonspace;
00774   upper = row->xheight * textord_words_min_minspace;
00775   for (gap_index = 0; gap_index < cluster_count
00776     && gaps[gap_index] < lower; gap_index++);
00777   if (gap_index == 0) {
00778     if (testing_on)
00779       tprintf ("No clusters below nonspace threshold!!\n");
00780     if (cluster_count > 1) {
00781       row->pr_nonsp = gaps[0];
00782       row->pr_space = gaps[1];
00783     }
00784     else {
00785       row->pr_nonsp = lower;
00786       row->pr_space = gaps[0];
00787     }
00788   }
00789   else {
00790     row->pr_nonsp = gaps[gap_index - 1];
00791     while (gap_index < cluster_count && gaps[gap_index] < upper)
00792       gap_index++;
00793     if (gap_index == cluster_count) {
00794       if (testing_on)
00795         tprintf ("No clusters above nonspace threshold!!\n");
00796       row->pr_space = lower * textord_spacesize_ratioprop;
00797     }
00798     else
00799       row->pr_space = gaps[gap_index];
00800   }
00801 
00802   //Now try to find the fixed pitch space and non-space.
00803   upper = row->xheight * words_default_fixed_space;
00804   for (gap_index = 0; gap_index < cluster_count
00805     && gaps[gap_index] < upper; gap_index++);
00806   if (gap_index == 0) {
00807     if (testing_on)
00808       tprintf ("No clusters below space threshold!!\n");
00809     row->fp_nonsp = upper;
00810     row->fp_space = gaps[0];
00811   }
00812   else {
00813     row->fp_nonsp = gaps[gap_index - 1];
00814     if (gap_index == cluster_count) {
00815       if (testing_on)
00816         tprintf ("No clusters above space threshold!!\n");
00817       row->fp_space = row->xheight;
00818     }
00819     else
00820       row->fp_space = gaps[gap_index];
00821   }
00822   if (testing_on) {
00823     tprintf
00824       ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
00825       row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
00826   }
00827   return TRUE;                   //computed some stats
00828 }
00829 
00830 
00831 /**********************************************************************
00832  * find_row_pitch
00833  *
00834  * Check to see if this row could be fixed pitch using the given spacings.
00835  * Blobs with gaps smaller than the lower threshold are assumed to be one.
00836  * The larger threshold is the word gap threshold.
00837  **********************************************************************/
00838 
00839 BOOL8 find_row_pitch(                    //find lines
00840                      TO_ROW *row,        //row to do
00841                      inT32 maxwidth,     //max permitted space
00842                      inT32 dm_gap,       //ignorable gaps
00843                      TO_BLOCK *block,    //block of row
00844                      inT32 block_index,  //block_number
00845                      inT32 row_index,    //number of row
00846                      BOOL8 testing_on    //correct orientation
00847                     ) {
00848   BOOL8 used_dm_model;           //looks lik dot matrix
00849   float min_space;               //estimate threshold
00850   float non_space;               //gap size
00851   float gap_iqr;                 //interquartile range
00852   float pitch_iqr;
00853   float dm_gap_iqr;              //interquartile range
00854   float dm_pitch_iqr;
00855   float dm_pitch;                //pitch with dm on
00856   float pitch;                   //revised estimate
00857   float initial_pitch;           //guess at pitch
00858   STATS gap_stats (0, maxwidth);
00859                                  //centre-centre
00860   STATS pitch_stats (0, maxwidth);
00861 
00862   row->fixed_pitch = 0.0f;
00863   initial_pitch = row->fp_space;
00864   if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
00865     initial_pitch = row->xheight;//keep pitch decent
00866   non_space = row->fp_nonsp;
00867   if (non_space > initial_pitch)
00868     non_space = initial_pitch;
00869   min_space = (initial_pitch + non_space) / 2;
00870 
00871   if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
00872   initial_pitch, min_space, TRUE, FALSE, dm_gap)) {
00873     dm_gap_iqr = 0.0001;
00874     dm_pitch_iqr = maxwidth * 2.0f;
00875     dm_pitch = initial_pitch;
00876   }
00877   else {
00878     dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00879     dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00880     dm_pitch = pitch_stats.ile (0.5);
00881   }
00882   gap_stats.clear ();
00883   pitch_stats.clear ();
00884   if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
00885   initial_pitch, min_space, TRUE, FALSE, 0)) {
00886     gap_iqr = 0.0001;
00887     pitch_iqr = maxwidth * 3.0f;
00888   }
00889   else {
00890     gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00891     pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00892     if (testing_on)
00893       tprintf
00894         ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
00895         initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
00896     initial_pitch = pitch_stats.ile (0.5);
00897     if (min_space > initial_pitch
00898       && count_pitch_stats (row, &gap_stats, &pitch_stats,
00899     initial_pitch, initial_pitch, TRUE, FALSE, 0)) {
00900       min_space = initial_pitch;
00901       gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00902       pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00903       if (testing_on)
00904         tprintf
00905           ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
00906           initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
00907       initial_pitch = pitch_stats.ile (0.5);
00908     }
00909   }
00910   if (textord_debug_pitch_metric)
00911     tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
00912             block_index, row_index, 'X',
00913             pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
00914             pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
00915               (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
00916   if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
00917     row->pitch_decision = PITCH_DUNNO;
00918     if (textord_debug_pitch_metric)
00919       tprintf ("\n");
00920     return FALSE;                //insufficient data
00921   }
00922   if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
00923     if (testing_on)
00924       tprintf
00925         ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
00926         pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
00927     gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00928     pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00929     pitch = pitch_stats.ile (0.5);
00930     used_dm_model = FALSE;
00931   }
00932   else {
00933     if (testing_on)
00934       tprintf
00935         ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
00936         pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
00937     gap_iqr = dm_gap_iqr;
00938     pitch_iqr = dm_pitch_iqr;
00939     pitch = dm_pitch;
00940     used_dm_model = TRUE;
00941   }
00942   if (textord_debug_pitch_metric) {
00943     tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
00944       pitch_iqr, gap_iqr, pitch);
00945     tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
00946       pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
00947       pitch_iqr < gap_iqr * textord_fpiqr_ratio
00948       && pitch_iqr < block->xheight * textord_max_pitch_iqr
00949       && pitch < block->xheight * textord_words_default_maxspace
00950       ? 'F' : 'P');
00951   }
00952   if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
00953     && pitch_iqr < block->xheight * textord_max_pitch_iqr
00954     && pitch < block->xheight * textord_words_default_maxspace)
00955     row->pitch_decision = PITCH_MAYBE_FIXED;
00956   else
00957     row->pitch_decision = PITCH_MAYBE_PROP;
00958   row->fixed_pitch = pitch;
00959   row->kern_size = gap_stats.ile (0.5);
00960   row->min_space = (inT32) (row->fixed_pitch + non_space) / 2;
00961   if (row->min_space > row->fixed_pitch)
00962     row->min_space = (inT32) row->fixed_pitch;
00963   row->max_nonspace = row->min_space;
00964   row->space_size = row->fixed_pitch;
00965   row->space_threshold = (row->max_nonspace + row->min_space) / 2;
00966   row->used_dm_model = used_dm_model;
00967   return TRUE;
00968 }
00969 
00970 
00971 /**********************************************************************
00972  * fixed_pitch_row
00973  *
00974  * Check to see if this row could be fixed pitch using the given spacings.
00975  * Blobs with gaps smaller than the lower threshold are assumed to be one.
00976  * The larger threshold is the word gap threshold.
00977  **********************************************************************/
00978 
00979 BOOL8 fixed_pitch_row(TO_ROW *row,       // row to do
00980                       BLOCK* block,
00981                       inT32 block_index  // block_number
00982                      ) {
00983   const char *res_string;        //pitch result
00984   inT16 mid_cuts;                //no of cheap cuts
00985   float non_space;               //gap size
00986   float pitch_sd;                //error on pitch
00987   float sp_sd;                   //space sd
00988 
00989   non_space = row->fp_nonsp;
00990   if (non_space > row->fixed_pitch)
00991     non_space = row->fixed_pitch;
00992   POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
00993   if (textord_all_prop || (pb != NULL && !pb->IsText())) {
00994     // Set the decision to definitely proportional.
00995     pitch_sd = textord_words_def_prop * row->fixed_pitch;
00996     row->pitch_decision = PITCH_DEF_PROP;
00997   } else {
00998     pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
00999                                row->projection_right,
01000                                (row->fixed_pitch + non_space * 3) / 4,
01001                                row->fixed_pitch, sp_sd, mid_cuts,
01002                                &row->char_cells,
01003                                block_index == textord_debug_block);
01004     if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
01005       && ((pitsync_linear_version & 3) < 3
01006       || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
01007       || sp_sd > 20
01008     || (pitch_sd == 0 && sp_sd > 10))))) {
01009       if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
01010         && !row->all_caps
01011         && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
01012         row->pitch_decision = PITCH_DEF_FIXED;
01013       else
01014         row->pitch_decision = PITCH_MAYBE_FIXED;
01015     }
01016     else if ((pitsync_linear_version & 3) < 3
01017       || sp_sd > 20
01018       || mid_cuts > 0
01019       || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
01020       if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
01021         row->pitch_decision = PITCH_MAYBE_PROP;
01022       else
01023         row->pitch_decision = PITCH_DEF_PROP;
01024     }
01025     else
01026       row->pitch_decision = PITCH_DUNNO;
01027   }
01028 
01029   if (textord_debug_pitch_metric) {
01030     res_string = "??";
01031     switch (row->pitch_decision) {
01032       case PITCH_DEF_PROP:
01033         res_string = "DP";
01034         break;
01035       case PITCH_MAYBE_PROP:
01036         res_string = "MP";
01037         break;
01038       case PITCH_DEF_FIXED:
01039         res_string = "DF";
01040         break;
01041       case PITCH_MAYBE_FIXED:
01042         res_string = "MF";
01043       default:
01044         res_string = "??";
01045     }
01046     tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
01047       pitch_sd / row->fixed_pitch, sp_sd, res_string);
01048   }
01049   return TRUE;
01050 }
01051 
01052 
01053 /**********************************************************************
01054  * count_pitch_stats
01055  *
01056  * Count up the gap and pitch stats on the block to see if it is fixed pitch.
01057  * Blobs with gaps smaller than the lower threshold are assumed to be one.
01058  * The larger threshold is the word gap threshold.
01059  * The return value indicates whether there were any decent values to use.
01060  **********************************************************************/
01061 
01062 BOOL8 count_pitch_stats(                       //find lines
01063                         TO_ROW *row,           //row to do
01064                         STATS *gap_stats,      //blob gaps
01065                         STATS *pitch_stats,    //centre-centre stats
01066                         float initial_pitch,   //guess at pitch
01067                         float min_space,       //estimate space size
01068                         BOOL8 ignore_outsize,  //discard big objects
01069                         BOOL8 split_outsize,   //split big objects
01070                         inT32 dm_gap           //ignorable gaps
01071                        ) {
01072   BOOL8 prev_valid;              //not word broken
01073   BLOBNBOX *blob;                //current blob
01074                                  //blobs
01075   BLOBNBOX_IT blob_it = row->blob_list ();
01076   inT32 prev_right;              //end of prev blob
01077   inT32 prev_centre;             //centre of previous blob
01078   inT32 x_centre;                //centre of this blob
01079   inT32 blob_width;              //width of blob
01080   inT32 width_units;             //no of widths in blob
01081   float width;                   //blob width
01082   TBOX blob_box;                  //bounding box
01083   TBOX joined_box;                //of super blob
01084 
01085   gap_stats->clear ();
01086   pitch_stats->clear ();
01087   if (blob_it.empty ())
01088     return FALSE;
01089   prev_valid = FALSE;
01090   prev_centre = 0;
01091   prev_right = 0;                //stop complier warning
01092   joined_box = blob_it.data ()->bounding_box ();
01093   do {
01094     blob_it.forward ();
01095     blob = blob_it.data ();
01096     if (!blob->joined_to_prev ()) {
01097       blob_box = blob->bounding_box ();
01098       if ((blob_box.left () - joined_box.right () < dm_gap
01099         && !blob_it.at_first ())
01100         || blob->cblob() == NULL)
01101         joined_box += blob_box;  //merge blobs
01102       else {
01103         blob_width = joined_box.width ();
01104         if (split_outsize) {
01105           width_units =
01106             (inT32) floor ((float) blob_width / initial_pitch + 0.5);
01107           if (width_units < 1)
01108             width_units = 1;
01109           width_units--;
01110         }
01111         else if (ignore_outsize) {
01112           width = (float) blob_width / initial_pitch;
01113           width_units = width < 1 + words_default_fixed_limit
01114             && width > 1 - words_default_fixed_limit ? 0 : -1;
01115         }
01116         else
01117           width_units = 0;       //everything in
01118         x_centre = (inT32) (joined_box.left ()
01119           + (blob_width -
01120           width_units * initial_pitch) / 2);
01121         if (prev_valid && width_units >= 0) {
01122           //                                              if (width_units>0)
01123           //                                              {
01124           //                                                      tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
01125           //                                                              width_units,blob_width,x_centre,x_centre-prev_centre);
01126           //                                              }
01127           gap_stats->add (joined_box.left () - prev_right, 1);
01128           pitch_stats->add (x_centre - prev_centre, 1);
01129         }
01130         prev_centre = (inT32) (x_centre + width_units * initial_pitch);
01131         prev_right = joined_box.right ();
01132         prev_valid = blob_box.left () - joined_box.right () < min_space;
01133         prev_valid = prev_valid && width_units >= 0;
01134         joined_box = blob_box;
01135       }
01136     }
01137   }
01138   while (!blob_it.at_first ());
01139   return gap_stats->get_total () >= 3;
01140 }
01141 
01142 
01143 /**********************************************************************
01144  * tune_row_pitch
01145  *
01146  * Use a dp algorithm to fit the character cells and return the sd of
01147  * the cell size over the row.
01148  **********************************************************************/
01149 
01150 float tune_row_pitch(                             //find fp cells
01151                      TO_ROW *row,                 //row to do
01152                      STATS *projection,           //vertical projection
01153                      inT16 projection_left,       //edge of projection
01154                      inT16 projection_right,      //edge of projection
01155                      float space_size,            //size of blank
01156                      float &initial_pitch,        //guess at pitch
01157                      float &best_sp_sd,           //space sd
01158                      inT16 &best_mid_cuts,        //no of cheap cuts
01159                      ICOORDELT_LIST *best_cells,  //row cells
01160                      BOOL8 testing_on             //inidividual words
01161                     ) {
01162   int pitch_delta;               //offset pitch
01163   inT16 mid_cuts;                //cheap cuts
01164   float pitch_sd;                //current sd
01165   float best_sd;                 //best result
01166   float best_pitch;              //pitch for best result
01167   float initial_sd;              //starting error
01168   float sp_sd;                   //space sd
01169   ICOORDELT_LIST test_cells;     //row cells
01170   ICOORDELT_IT best_it;          //start of best list
01171 
01172   if (textord_fast_pitch_test)
01173     return tune_row_pitch2 (row, projection, projection_left,
01174       projection_right, space_size, initial_pitch,
01175       best_sp_sd,
01176     //space sd
01177       best_mid_cuts, best_cells, testing_on);
01178   if (textord_disable_pitch_test) {
01179     best_sp_sd = initial_pitch;
01180     return initial_pitch;
01181   }
01182   initial_sd =
01183     compute_pitch_sd(row,
01184                      projection,
01185                      projection_left,
01186                      projection_right,
01187                      space_size,
01188                      initial_pitch,
01189                      best_sp_sd,
01190                      best_mid_cuts,
01191                      best_cells,
01192                      testing_on);
01193   best_sd = initial_sd;
01194   best_pitch = initial_pitch;
01195   if (testing_on)
01196     tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
01197   for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
01198     pitch_sd =
01199       compute_pitch_sd (row, projection, projection_left, projection_right,
01200       space_size, initial_pitch + pitch_delta, sp_sd,
01201       mid_cuts, &test_cells, testing_on);
01202     if (testing_on)
01203       tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
01204         pitch_sd);
01205     if (pitch_sd < best_sd) {
01206       best_sd = pitch_sd;
01207       best_mid_cuts = mid_cuts;
01208       best_sp_sd = sp_sd;
01209       best_pitch = initial_pitch + pitch_delta;
01210       best_cells->clear ();
01211       best_it.set_to_list (best_cells);
01212       best_it.add_list_after (&test_cells);
01213     }
01214     else
01215       test_cells.clear ();
01216     if (pitch_sd > initial_sd)
01217       break;                     //getting worse
01218   }
01219   for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
01220     pitch_sd =
01221       compute_pitch_sd (row, projection, projection_left, projection_right,
01222       space_size, initial_pitch - pitch_delta, sp_sd,
01223       mid_cuts, &test_cells, testing_on);
01224     if (testing_on)
01225       tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
01226         pitch_sd);
01227     if (pitch_sd < best_sd) {
01228       best_sd = pitch_sd;
01229       best_mid_cuts = mid_cuts;
01230       best_sp_sd = sp_sd;
01231       best_pitch = initial_pitch - pitch_delta;
01232       best_cells->clear ();
01233       best_it.set_to_list (best_cells);
01234       best_it.add_list_after (&test_cells);
01235     }
01236     else
01237       test_cells.clear ();
01238     if (pitch_sd > initial_sd)
01239       break;
01240   }
01241   initial_pitch = best_pitch;
01242 
01243   if (textord_debug_pitch_metric)
01244     print_pitch_sd(row,
01245                    projection,
01246                    projection_left,
01247                    projection_right,
01248                    space_size,
01249                    best_pitch);
01250 
01251   return best_sd;
01252 }
01253 
01254 
01255 /**********************************************************************
01256  * tune_row_pitch
01257  *
01258  * Use a dp algorithm to fit the character cells and return the sd of
01259  * the cell size over the row.
01260  **********************************************************************/
01261 
01262 float tune_row_pitch2(                             //find fp cells
01263                       TO_ROW *row,                 //row to do
01264                       STATS *projection,           //vertical projection
01265                       inT16 projection_left,       //edge of projection
01266                       inT16 projection_right,      //edge of projection
01267                       float space_size,            //size of blank
01268                       float &initial_pitch,        //guess at pitch
01269                       float &best_sp_sd,           //space sd
01270                       inT16 &best_mid_cuts,        //no of cheap cuts
01271                       ICOORDELT_LIST *best_cells,  //row cells
01272                       BOOL8 testing_on             //inidividual words
01273                      ) {
01274   int pitch_delta;               //offset pitch
01275   inT16 pixel;                   //pixel coord
01276   inT16 best_pixel;              //pixel coord
01277   inT16 best_delta;              //best pitch
01278   inT16 best_pitch;              //best pitch
01279   inT16 start;                   //of good range
01280   inT16 end;                     //of good range
01281   inT32 best_count;              //lowest sum
01282   float best_sd;                 //best result
01283   STATS *sum_proj;               //summed projection
01284 
01285   best_sp_sd = initial_pitch;
01286 
01287   if (textord_disable_pitch_test) {
01288     return initial_pitch;
01289   }
01290   sum_proj = new STATS[textord_pitch_range * 2 + 1];
01291   if (sum_proj == NULL)
01292     return initial_pitch;
01293   best_pitch = (inT32) initial_pitch;
01294 
01295   for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
01296     pitch_delta++)
01297   sum_proj[textord_pitch_range + pitch_delta].set_range (0,
01298       best_pitch +
01299       pitch_delta + 1);
01300   for (pixel = projection_left; pixel <= projection_right; pixel++) {
01301     for (pitch_delta = -textord_pitch_range;
01302       pitch_delta <= textord_pitch_range; pitch_delta++)
01303     sum_proj[textord_pitch_range +
01304         pitch_delta].add ((pixel - projection_left) % (best_pitch +
01305         pitch_delta),
01306         projection->pile_count (pixel));
01307   }
01308   best_count = sum_proj[textord_pitch_range].pile_count (0);
01309   best_delta = 0;
01310   best_pixel = 0;
01311   for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
01312   pitch_delta++) {
01313     for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
01314       if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
01315       < best_count) {
01316         best_count =
01317           sum_proj[textord_pitch_range +
01318           pitch_delta].pile_count (pixel);
01319         best_delta = pitch_delta;
01320         best_pixel = pixel;
01321       }
01322     }
01323   }
01324   if (testing_on)
01325     tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
01326       initial_pitch, best_delta, best_count);
01327   best_pitch += best_delta;
01328   initial_pitch = best_pitch;
01329   best_count++;
01330   best_count += best_count;
01331   for (start = best_pixel - 2; start > best_pixel - best_pitch
01332     && sum_proj[textord_pitch_range +
01333     best_delta].pile_count (start % best_pitch) <= best_count;
01334     start--);
01335   for (end = best_pixel + 2;
01336     end < best_pixel + best_pitch
01337     && sum_proj[textord_pitch_range +
01338     best_delta].pile_count (end % best_pitch) <= best_count;
01339     end++);
01340 
01341   best_sd =
01342     compute_pitch_sd(row,
01343                      projection,
01344                      projection_left,
01345                      projection_right,
01346                      space_size,
01347                      initial_pitch,
01348                      best_sp_sd,
01349                      best_mid_cuts,
01350                      best_cells,
01351                      testing_on,
01352                      start,
01353                      end);
01354   if (testing_on)
01355     tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
01356       best_sd);
01357 
01358   if (textord_debug_pitch_metric)
01359     print_pitch_sd(row,
01360                    projection,
01361                    projection_left,
01362                    projection_right,
01363                    space_size,
01364                    initial_pitch);
01365 
01366   delete[]sum_proj;
01367 
01368   return best_sd;
01369 }
01370 
01371 
01372 /**********************************************************************
01373  * compute_pitch_sd
01374  *
01375  * Use a dp algorithm to fit the character cells and return the sd of
01376  * the cell size over the row.
01377  **********************************************************************/
01378 
01379 float compute_pitch_sd(                            //find fp cells
01380                        TO_ROW *row,                //row to do
01381                        STATS *projection,          //vertical projection
01382                        inT16 projection_left,      //edge
01383                        inT16 projection_right,     //edge
01384                        float space_size,           //size of blank
01385                        float initial_pitch,        //guess at pitch
01386                        float &sp_sd,               //space sd
01387                        inT16 &mid_cuts,            //no of free cuts
01388                        ICOORDELT_LIST *row_cells,  //list of chop pts
01389                        BOOL8 testing_on,           //inidividual words
01390                        inT16 start,                //start of good range
01391                        inT16 end                   //end of good range
01392                       ) {
01393   inT16 occupation;              //no of cells in word.
01394                                  //blobs
01395   BLOBNBOX_IT blob_it = row->blob_list ();
01396   BLOBNBOX_IT start_it;          //start of word
01397   BLOBNBOX_IT plot_it;           //for plotting
01398   inT16 blob_count;              //no of blobs
01399   TBOX blob_box;                  //bounding box
01400   TBOX prev_box;                  //of super blob
01401   inT32 prev_right;              //of word sync
01402   int scale_factor;              //on scores for big words
01403   inT32 sp_count;                //spaces
01404   FPSEGPT_LIST seg_list;         //char cells
01405   FPSEGPT_IT seg_it;             //iterator
01406   inT16 segpos;                  //position of segment
01407   inT16 cellpos;                 //previous cell boundary
01408                                  //iterator
01409   ICOORDELT_IT cell_it = row_cells;
01410   ICOORDELT *cell;               //new cell
01411   double sqsum;                  //sum of squares
01412   double spsum;                  //of spaces
01413   double sp_var;                 //space error
01414   double word_sync;              //result for word
01415   inT32 total_count;             //total blobs
01416 
01417   if ((pitsync_linear_version & 3) > 1) {
01418     word_sync = compute_pitch_sd2 (row, projection, projection_left,
01419       projection_right, initial_pitch,
01420       occupation, mid_cuts, row_cells,
01421       testing_on, start, end);
01422     sp_sd = occupation;
01423     return word_sync;
01424   }
01425   mid_cuts = 0;
01426   cellpos = 0;
01427   total_count = 0;
01428   sqsum = 0;
01429   sp_count = 0;
01430   spsum = 0;
01431   prev_right = -1;
01432   if (blob_it.empty ())
01433     return space_size * 10;
01434 #ifndef GRAPHICS_DISABLED
01435   if (testing_on && to_win > 0) {
01436     blob_box = blob_it.data ()->bounding_box ();
01437     projection->plot (to_win, projection_left,
01438       row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
01439   }
01440 #endif
01441   start_it = blob_it;
01442   blob_count = 0;
01443   blob_box = box_next (&blob_it);//first blob
01444   blob_it.mark_cycle_pt ();
01445   do {
01446     for (; blob_count > 0; blob_count--)
01447       box_next(&start_it);
01448     do {
01449       prev_box = blob_box;
01450       blob_count++;
01451       blob_box = box_next (&blob_it);
01452     }
01453     while (!blob_it.cycled_list ()
01454       && blob_box.left () - prev_box.right () < space_size);
01455     plot_it = start_it;
01456     if (pitsync_linear_version & 3)
01457       word_sync =
01458         check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
01459         projection, projection_left, projection_right,
01460         row->xheight * textord_projection_scale,
01461         occupation, &seg_list, start, end);
01462     else
01463       word_sync =
01464         check_pitch_sync (&start_it, blob_count, (inT16) initial_pitch, 2,
01465         projection, &seg_list);
01466     if (testing_on) {
01467       tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
01468         prev_box.right (), prev_box.top (),
01469         seg_list.length () - 1, word_sync);
01470       seg_it.set_to_list (&seg_list);
01471       for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
01472       seg_it.forward ()) {
01473         if (seg_it.data ()->faked)
01474           tprintf ("(F)");
01475         tprintf ("%d, ", seg_it.data ()->position ());
01476         //                              tprintf("C=%g, s=%g, sq=%g\n",
01477         //                                      seg_it.data()->cost_function(),
01478         //                                      seg_it.data()->sum(),
01479         //                                      seg_it.data()->squares());
01480       }
01481       tprintf ("\n");
01482     }
01483 #ifndef GRAPHICS_DISABLED
01484     if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
01485       plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
01486 #endif
01487     seg_it.set_to_list (&seg_list);
01488     if (prev_right >= 0) {
01489       sp_var = seg_it.data ()->position () - prev_right;
01490       sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
01491       sp_var *= sp_var;
01492       spsum += sp_var;
01493       sp_count++;
01494     }
01495     for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
01496       segpos = seg_it.data ()->position ();
01497       if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
01498                                  //big gap
01499         while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
01500           cell = new ICOORDELT (cellpos + (inT16) initial_pitch, 0);
01501           cell_it.add_after_then_move (cell);
01502           cellpos += (inT16) initial_pitch;
01503         }
01504                                  //make new one
01505         cell = new ICOORDELT (segpos, 0);
01506         cell_it.add_after_then_move (cell);
01507         cellpos = segpos;
01508       }
01509       else if (segpos > cellpos - initial_pitch / 2) {
01510         cell = cell_it.data ();
01511                                  //average positions
01512         cell->set_x ((cellpos + segpos) / 2);
01513         cellpos = cell->x ();
01514       }
01515     }
01516     seg_it.move_to_last ();
01517     prev_right = seg_it.data ()->position ();
01518     if (textord_pitch_scalebigwords) {
01519       scale_factor = (seg_list.length () - 2) / 2;
01520       if (scale_factor < 1)
01521         scale_factor = 1;
01522     }
01523     else
01524       scale_factor = 1;
01525     sqsum += word_sync * scale_factor;
01526     total_count += (seg_list.length () - 1) * scale_factor;
01527     seg_list.clear ();
01528   }
01529   while (!blob_it.cycled_list ());
01530   sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
01531   return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
01532 }
01533 
01534 
01535 /**********************************************************************
01536  * compute_pitch_sd2
01537  *
01538  * Use a dp algorithm to fit the character cells and return the sd of
01539  * the cell size over the row.
01540  **********************************************************************/
01541 
01542 float compute_pitch_sd2(                            //find fp cells
01543                         TO_ROW *row,                //row to do
01544                         STATS *projection,          //vertical projection
01545                         inT16 projection_left,      //edge
01546                         inT16 projection_right,     //edge
01547                         float initial_pitch,        //guess at pitch
01548                         inT16 &occupation,          //no of occupied cells
01549                         inT16 &mid_cuts,            //no of free cuts
01550                         ICOORDELT_LIST *row_cells,  //list of chop pts
01551                         BOOL8 testing_on,           //inidividual words
01552                         inT16 start,                //start of good range
01553                         inT16 end                   //end of good range
01554                        ) {
01555                                  //blobs
01556   BLOBNBOX_IT blob_it = row->blob_list ();
01557   BLOBNBOX_IT plot_it;
01558   inT16 blob_count;              //no of blobs
01559   TBOX blob_box;                  //bounding box
01560   FPSEGPT_LIST seg_list;         //char cells
01561   FPSEGPT_IT seg_it;             //iterator
01562   inT16 segpos;                  //position of segment
01563                                  //iterator
01564   ICOORDELT_IT cell_it = row_cells;
01565   ICOORDELT *cell;               //new cell
01566   double word_sync;              //result for word
01567 
01568   mid_cuts = 0;
01569   if (blob_it.empty ()) {
01570     occupation = 0;
01571     return initial_pitch * 10;
01572   }
01573 #ifndef GRAPHICS_DISABLED
01574   if (testing_on && to_win > 0) {
01575     projection->plot (to_win, projection_left,
01576       row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
01577   }
01578 #endif
01579   blob_count = 0;
01580   blob_it.mark_cycle_pt ();
01581   do {
01582                                  //first blob
01583     blob_box = box_next (&blob_it);
01584     blob_count++;
01585   }
01586   while (!blob_it.cycled_list ());
01587   plot_it = blob_it;
01588   word_sync = check_pitch_sync2 (&blob_it, blob_count, (inT16) initial_pitch,
01589     2, projection, projection_left,
01590     projection_right,
01591     row->xheight * textord_projection_scale,
01592     occupation, &seg_list, start, end);
01593   if (testing_on) {
01594     tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
01595       blob_box.right (), blob_box.top (),
01596       seg_list.length () - 1, word_sync);
01597     seg_it.set_to_list (&seg_list);
01598     for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
01599       if (seg_it.data ()->faked)
01600         tprintf ("(F)");
01601       tprintf ("%d, ", seg_it.data ()->position ());
01602       //                              tprintf("C=%g, s=%g, sq=%g\n",
01603       //                                      seg_it.data()->cost_function(),
01604       //                                      seg_it.data()->sum(),
01605       //                                      seg_it.data()->squares());
01606     }
01607     tprintf ("\n");
01608   }
01609 #ifndef GRAPHICS_DISABLED
01610   if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
01611     plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
01612 #endif
01613   seg_it.set_to_list (&seg_list);
01614   for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
01615     segpos = seg_it.data ()->position ();
01616                                  //make new one
01617     cell = new ICOORDELT (segpos, 0);
01618     cell_it.add_after_then_move (cell);
01619     if (seg_it.at_last ())
01620       mid_cuts = seg_it.data ()->cheap_cuts ();
01621   }
01622   seg_list.clear ();
01623   return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
01624 }
01625 
01626 
01627 /**********************************************************************
01628  * print_pitch_sd
01629  *
01630  * Use a dp algorithm to fit the character cells and return the sd of
01631  * the cell size over the row.
01632  **********************************************************************/
01633 
01634 void print_pitch_sd(                        //find fp cells
01635                     TO_ROW *row,            //row to do
01636                     STATS *projection,      //vertical projection
01637                     inT16 projection_left,  //edges //size of blank
01638                     inT16 projection_right,
01639                     float space_size,
01640                     float initial_pitch     //guess at pitch
01641                    ) {
01642   const char *res2;              //pitch result
01643   inT16 occupation;              //used cells
01644   float sp_sd;                   //space sd
01645                                  //blobs
01646   BLOBNBOX_IT blob_it = row->blob_list ();
01647   BLOBNBOX_IT start_it;          //start of word
01648   BLOBNBOX_IT row_start;         //start of row
01649   inT16 blob_count;              //no of blobs
01650   inT16 total_blob_count;        //total blobs in line
01651   TBOX blob_box;                  //bounding box
01652   TBOX prev_box;                  //of super blob
01653   inT32 prev_right;              //of word sync
01654   int scale_factor;              //on scores for big words
01655   inT32 sp_count;                //spaces
01656   FPSEGPT_LIST seg_list;         //char cells
01657   FPSEGPT_IT seg_it;             //iterator
01658   double sqsum;                  //sum of squares
01659   double spsum;                  //of spaces
01660   double sp_var;                 //space error
01661   double word_sync;              //result for word
01662   double total_count;            //total cuts
01663 
01664   if (blob_it.empty ())
01665     return;
01666   row_start = blob_it;
01667   total_blob_count = 0;
01668 
01669   total_count = 0;
01670   sqsum = 0;
01671   sp_count = 0;
01672   spsum = 0;
01673   prev_right = -1;
01674   blob_it = row_start;
01675   start_it = blob_it;
01676   blob_count = 0;
01677   blob_box = box_next (&blob_it);//first blob
01678   blob_it.mark_cycle_pt ();
01679   do {
01680     for (; blob_count > 0; blob_count--)
01681       box_next(&start_it);
01682     do {
01683       prev_box = blob_box;
01684       blob_count++;
01685       blob_box = box_next (&blob_it);
01686     }
01687     while (!blob_it.cycled_list ()
01688       && blob_box.left () - prev_box.right () < space_size);
01689     word_sync =
01690       check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
01691       projection, projection_left, projection_right,
01692       row->xheight * textord_projection_scale,
01693       occupation, &seg_list, 0, 0);
01694     total_blob_count += blob_count;
01695     seg_it.set_to_list (&seg_list);
01696     if (prev_right >= 0) {
01697       sp_var = seg_it.data ()->position () - prev_right;
01698       sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
01699       sp_var *= sp_var;
01700       spsum += sp_var;
01701       sp_count++;
01702     }
01703     seg_it.move_to_last ();
01704     prev_right = seg_it.data ()->position ();
01705     if (textord_pitch_scalebigwords) {
01706       scale_factor = (seg_list.length () - 2) / 2;
01707       if (scale_factor < 1)
01708         scale_factor = 1;
01709     }
01710     else
01711       scale_factor = 1;
01712     sqsum += word_sync * scale_factor;
01713     total_count += (seg_list.length () - 1) * scale_factor;
01714     seg_list.clear ();
01715   }
01716   while (!blob_it.cycled_list ());
01717   sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
01718   word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
01719   tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
01720     word_sync, word_sync / initial_pitch, sp_sd,
01721     word_sync < textord_words_pitchsd_threshold * initial_pitch
01722     ? 'F' : 'P');
01723 
01724   start_it = row_start;
01725   blob_it = row_start;
01726   word_sync =
01727     check_pitch_sync2 (&blob_it, total_blob_count, (inT16) initial_pitch, 2,
01728     projection, projection_left, projection_right,
01729     row->xheight * textord_projection_scale, occupation,
01730     &seg_list, 0, 0);
01731   if (occupation > 1)
01732     word_sync /= occupation;
01733   word_sync = sqrt (word_sync);
01734 
01735 #ifndef GRAPHICS_DISABLED
01736   if (textord_show_row_cuts && to_win != NULL)
01737     plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
01738 #endif
01739   seg_list.clear ();
01740   if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
01741     if (word_sync < textord_words_def_fixed * initial_pitch
01742       && !row->all_caps)
01743       res2 = "DF";
01744     else
01745       res2 = "MF";
01746   }
01747   else
01748     res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
01749   tprintf
01750     ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
01751     word_sync, word_sync / initial_pitch,
01752     word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
01753     occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
01754 }
01755 
01756 /**********************************************************************
01757  * find_repeated_chars
01758  *
01759  * Extract marked leader blobs and put them
01760  * into words in advance of fixed pitch checking and word generation.
01761  **********************************************************************/
01762 void find_repeated_chars(TO_BLOCK *block,       // Block to search.
01763                          BOOL8 testing_on) {    // Debug mode.
01764   POLY_BLOCK* pb = block->block->poly_block();
01765   if (pb != NULL && !pb->IsText())
01766     return;  // Don't find repeated chars in non-text blocks.
01767 
01768   TO_ROW *row;
01769   BLOBNBOX_IT box_it;
01770   BLOBNBOX_IT search_it;         // forward search
01771   WERD_IT word_it;               // new words
01772   WERD *word;                    // new word
01773   TBOX word_box;                 // for plotting
01774   int blobcount, repeated_set;
01775 
01776   TO_ROW_IT row_it = block->get_rows();
01777   if (row_it.empty()) return;  // empty block
01778   for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
01779     row = row_it.data();
01780     box_it.set_to_list(row->blob_list());
01781     if (box_it.empty())  continue; // no blobs in this row
01782     if (!row->rep_chars_marked()) {
01783       mark_repeated_chars(row);
01784     }
01785     if (row->num_repeated_sets() == 0) continue;  // nothing to do for this row
01786     word_it.set_to_list(&row->rep_words);
01787     do {
01788       if (box_it.data()->repeated_set() != 0 &&
01789           !box_it.data()->joined_to_prev()) {
01790         blobcount = 1;
01791         repeated_set = box_it.data()->repeated_set();
01792         search_it = box_it;
01793         search_it.forward();
01794         while (!search_it.at_first() &&
01795                search_it.data()->repeated_set() == repeated_set) {
01796           blobcount++;
01797           search_it.forward();
01798         }
01799         // After the call to make_real_word() all the blobs from this
01800         // repeated set will be removed from the blob list. box_it will be
01801         // set to point to the blob after the end of the extracted sequence.
01802         word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
01803         if (!box_it.empty() && box_it.data()->joined_to_prev()) {
01804           tprintf("Bad box joined to prev at");
01805           box_it.data()->bounding_box().print();
01806           tprintf("After repeated word:");
01807           word->bounding_box().print();
01808         }
01809         ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
01810         word->set_flag(W_REP_CHAR, true);
01811         word->set_flag(W_DONT_CHOP, true);
01812         word_it.add_after_then_move(word);
01813       } else {
01814         box_it.forward();
01815       }
01816     } while (!box_it.at_first());
01817   }
01818 }
01819 
01820 
01821 /**********************************************************************
01822  * plot_fp_word
01823  *
01824  * Plot a block of words as if fixed pitch.
01825  **********************************************************************/
01826 
01827 #ifndef GRAPHICS_DISABLED
01828 void plot_fp_word(                  //draw block of words
01829                   TO_BLOCK *block,  //block to draw
01830                   float pitch,      //pitch to draw with
01831                   float nonspace    //for space threshold
01832                  ) {
01833   TO_ROW *row;                   //current row
01834   TO_ROW_IT row_it = block->get_rows ();
01835 
01836   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01837     row = row_it.data ();
01838     row->min_space = (inT32) ((pitch + nonspace) / 2);
01839     row->max_nonspace = row->min_space;
01840     row->space_threshold = row->min_space;
01841     plot_word_decisions (to_win, (inT16) pitch, row);
01842   }
01843 }
01844 #endif