Tesseract  3.02
tesseract-ocr/textord/makerow.h File Reference
#include "params.h"
#include "ocrblock.h"
#include "blobs.h"
#include "blobbox.h"
#include "statistc.h"
#include "notdll.h"

Go to the source code of this file.

Enumerations

enum  OVERLAP_STATE { ASSIGN, REJECT, NEW_ROW }
enum  ROW_CATEGORY { ROW_ASCENDERS_FOUND, ROW_DESCENDERS_FOUND, ROW_UNKNOWN, ROW_INVALID }

Functions

void get_min_max_xheight (int block_linesize, int *min_height, int *max_height)
ROW_CATEGORY get_row_category (const TO_ROW *row)
bool within_error_margin (float test, float num, float margin)
void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
float make_single_row (ICOORD page_tr, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
void fit_lms_line (TO_ROW *row)
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
BOOL8 find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
void compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
void compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
void compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count)
void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
void adjust_row_limits (TO_BLOCK *block)
void compute_row_stats (TO_BLOCK *block, BOOL8 testing_on)
float median_block_xheight (TO_BLOCK *block, float gradient)
int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
inT32 compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *heights)
inT32 compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
void fit_parallel_lms (float gradient, TO_ROW *row)
void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
BOOL8 segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
int blob_x_order (const void *item1, const void *item2)
int row_y_order (const void *item1, const void *item2)
int row_spacing_order (const void *item1, const void *item2)
void mark_repeated_chars (TO_ROW *row)

Variables

bool textord_show_initial_rows = 0
bool textord_show_parallel_rows = 0
bool textord_show_expanded_rows = 0
bool textord_show_final_rows = 0
bool textord_show_final_blobs = 0
bool textord_test_landscape = 0
bool textord_parallel_baselines = 1
bool textord_straight_baselines = 0
bool textord_quadratic_baselines = 0
bool textord_old_baselines = 1
bool textord_old_xheight = 1
bool textord_fix_xheight_bug = 1
bool textord_fix_makerow_bug = 1
bool textord_cblob_blockocc = 1
bool textord_debug_xheights = 0
int textord_test_x = 0
int textord_test_y = 0
int textord_min_blobs_in_row = 4
int textord_spline_minblobs = 8
int textord_spline_medianwin = 6
int textord_min_xheight = 10
double textord_spline_shift_fraction = 0.02
double textord_spline_outlier_fraction = 0.1
double textord_skew_ile = 0.5
double textord_skew_lag = 0.75
double textord_linespace_iqrlimit = 0.2
double textord_width_limit = 8
double textord_chop_width = 1.5
double textord_minxh = 0.25
double textord_min_linesize = 1.25
double textord_excess_blobsize = 1.3
double textord_occupancy_threshold = 0.4
double textord_underline_width = 2.0
double textord_min_blob_height_fraction = 0.75
double textord_xheight_mode_fraction = 0.4
double textord_ascheight_mode_fraction = 0.15
double textord_ascx_ratio_min = 1.2
double textord_ascx_ratio_max = 1.7
double textord_descx_ratio_min = 0.15
double textord_descx_ratio_max = 0.6
double textord_xheight_error_margin = 0.1
int textord_lms_line_trials = 12
bool textord_new_initial_xheight = 1

Enumeration Type Documentation

Enumerator:
ASSIGN 
REJECT 
NEW_ROW 

Definition at line 30 of file makerow.h.

{
  ASSIGN,                        //assign it to row
  REJECT,                        //reject it - dual overlap
  NEW_ROW
};
Enumerator:
ROW_ASCENDERS_FOUND 
ROW_DESCENDERS_FOUND 
ROW_UNKNOWN 
ROW_INVALID 

Definition at line 37 of file makerow.h.


Function Documentation

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1223 of file makerow.cpp.

                        {
  TO_ROW *row;                   //current row
  float size;                    //size of row
  float ymax;                    //top of row
  float ymin;                    //bottom of row
  TO_ROW_IT row_it = block->get_rows ();

  if (textord_show_expanded_rows)
    tprintf("Adjusting row limits for block(%d,%d)\n",
            block->block->bounding_box().left(),
            block->block->bounding_box().top());
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    size = row->max_y () - row->min_y ();
    if (textord_show_expanded_rows)
      tprintf("Row at %f has min %f, max %f, size %f\n",
              row->intercept(), row->min_y(), row->max_y(), size);
    size /= tesseract::CCStruct::kXHeightFraction +
        tesseract::CCStruct::kAscenderFraction +
        tesseract::CCStruct::kDescenderFraction;
    ymax = size * (tesseract::CCStruct::kXHeightFraction +
                   tesseract::CCStruct::kAscenderFraction);
    ymin = -size * tesseract::CCStruct::kDescenderFraction;
    row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
    row->merged = FALSE;
  }
}
void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
BOOL8  reject_misses,
BOOL8  make_new_rows,
BOOL8  drawing_skew 
)

Definition at line 2402 of file makerow.cpp.

                           {
  OVERLAP_STATE overlap_result;  //what to do with it
  float ycoord;                  //current y
  float top, bottom;             //of blob
  float g_length = 1.0f;         //from gradient
  inT16 row_count;               //no of rows
  inT16 left_x;                  //left edge
  inT16 last_x;                  //previous edge
  float block_skew;              //y delta
  float smooth_factor;           //for new coords
  float near_dist;               //dist to nearest row
  ICOORD testpt;                 //testing only
  BLOBNBOX *blob;                //current blob
  TO_ROW *row;                   //current row
  TO_ROW *dest_row = NULL;       //row to put blob in
                                 //iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows ();

  ycoord =
    (block->block->bounding_box ().bottom () +
    block->block->bounding_box ().top ()) / 2.0f;
  if (gradient != NULL)
    g_length = sqrt (1 + *gradient * *gradient);
#ifndef GRAPHICS_DISABLED
  if (drawing_skew)
    to_win->SetCursor(block->block->bounding_box ().left (), ycoord);
#endif
  testpt = ICOORD (textord_test_x, textord_test_y);
  blob_it.sort (blob_x_order);
  smooth_factor = 1.0;
  block_skew = 0.0f;
  row_count = row_it.length ();  //might have rows
  if (!blob_it.empty ()) {
    left_x = blob_it.data ()->bounding_box ().left ();
  }
  else {
    left_x = block->block->bounding_box ().left ();
  }
  last_x = left_x;
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    if (gradient != NULL) {
      block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
        + *gradient / g_length * blob->bounding_box ().left ();
    }
    else if (blob->bounding_box ().left () - last_x > block->line_size / 2
      && last_x - left_x > block->line_size * 2
    && textord_interpolating_skew) {
      //                      tprintf("Interpolating skew from %g",block_skew);
      block_skew *= (float) (blob->bounding_box ().left () - left_x)
        / (last_x - left_x);
      //                      tprintf("to %g\n",block_skew);
    }
    last_x = blob->bounding_box ().left ();
    top = blob->bounding_box ().top () - block_skew;
    bottom = blob->bounding_box ().bottom () - block_skew;
#ifndef GRAPHICS_DISABLED
    if (drawing_skew)
      to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
#endif
    if (!row_it.empty ()) {
      for (row_it.move_to_first ();
        !row_it.at_last () && row_it.data ()->min_y () > top;
        row_it.forward ());
      row = row_it.data ();
      if (row->min_y () <= top && row->max_y () >= bottom) {
      //any overlap
        dest_row = row;
        overlap_result = most_overlapping_row (&row_it, dest_row,
          top, bottom,
          block->line_size,
          blob->bounding_box ().
          contains (testpt));
        if (overlap_result == NEW_ROW && !reject_misses)
          overlap_result = ASSIGN;
      }
      else {
        overlap_result = NEW_ROW;
        if (!make_new_rows) {
          near_dist = row_it.data_relative (-1)->min_y () - top;
                                 //below bottom
          if (bottom < row->min_y ()) {
            if (row->min_y () - bottom <=
              (block->line_spacing -
            block->line_size) * tesseract::CCStruct::kDescenderFraction) {
                                 //done it
              overlap_result = ASSIGN;
              dest_row = row;
            }
          }
          else if (near_dist > 0
          && near_dist < bottom - row->max_y ()) {
            row_it.backward ();
            dest_row = row_it.data ();
            if (dest_row->min_y () - bottom <=
              (block->line_spacing -
            block->line_size) * tesseract::CCStruct::kDescenderFraction) {
                                 //done it
              overlap_result = ASSIGN;
            }
          }
          else {
            if (top - row->max_y () <=
              (block->line_spacing -
              block->line_size) * (textord_overlap_x +
            tesseract::CCStruct::kAscenderFraction)) {
                                 //done it
              overlap_result = ASSIGN;
              dest_row = row;
            }
          }
        }
      }
      if (overlap_result == ASSIGN)
        dest_row->add_blob (blob_it.extract (), top, bottom,
          block->line_size);
      if (overlap_result == NEW_ROW) {
        if (make_new_rows && top - bottom < block->max_blob_size) {
          dest_row =
            new TO_ROW (blob_it.extract (), top, bottom,
            block->line_size);
          row_count++;
          if (bottom > row_it.data ()->min_y ())
            row_it.add_before_then_move (dest_row);
          //insert in right place
          else
            row_it.add_after_then_move (dest_row);
          smooth_factor =
            1.0 / (row_count * textord_skew_lag +
            textord_skewsmooth_offset);
        }
        else
          overlap_result = REJECT;
      }
    }
    else if (make_new_rows && top - bottom < block->max_blob_size) {
      overlap_result = NEW_ROW;
      dest_row =
        new TO_ROW (blob_it.extract (), top, bottom, block->line_size);
      row_count++;
      row_it.add_after_then_move (dest_row);
      smooth_factor = 1.0 / (row_count * textord_skew_lag +
                             textord_skewsmooth_offset2);
    }
    else
      overlap_result = REJECT;
    if (blob->bounding_box ().contains (testpt)) {
      if (overlap_result != REJECT) {
        tprintf ("Test blob assigned to row at (%g,%g) on pass %d\n",
          dest_row->min_y (), dest_row->max_y (), pass);
      }
      else {
        tprintf ("Test blob assigned to no row on pass %d\n", pass);
      }
    }
    if (overlap_result != REJECT) {
      while (!row_it.at_first ()
        && row_it.data ()->min_y () >
      row_it.data_relative (-1)->min_y ()) {
        row = row_it.extract ();
        row_it.backward ();
        row_it.add_before_then_move (row);
      }
      while (!row_it.at_last ()
        && row_it.data ()->min_y () <
      row_it.data_relative (1)->min_y ()) {
        row = row_it.extract ();
        row_it.forward ();
                                 //keep rows in order
        row_it.add_after_then_move (row);
      }
      block_skew = (1 - smooth_factor) * block_skew
        + smooth_factor * (blob->bounding_box ().bottom () -
        dest_row->initial_min_y ());
    }
  }
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    if (row_it.data ()->blob_list ()->empty ())
      delete row_it.extract ();  //discard empty rows
  }
}
int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2694 of file makerow.cpp.

                                    {
                                 //converted ptr
  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
                                 //converted ptr
  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;

  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
    return -1;
  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
    return 1;
  else
    return 0;
}
void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 534 of file makerow.cpp.

                   {
                                 //iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  if (textord_show_parallel_rows && testing_on) {
    if (to_win == NULL)
      create_to_win(page_tr);
  }
#endif
                                 //get row coords
  fit_parallel_rows(block,
                    gradient,
                    rotation,
                    block_edge,
                    textord_show_parallel_rows &&testing_on);
  delete_non_dropout_rows(block,
                          gradient,
                          rotation,
                          block_edge,
                          textord_show_parallel_rows &&testing_on);
  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
  blob_it.set_to_list (&block->blobs);
  row_it.set_to_list (block->get_rows ());
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
    blob_it.add_list_after (row_it.data ()->blob_list ());
  //give blobs back
  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
  //now new rows must be genuine
  blob_it.set_to_list (&block->blobs);
  blob_it.add_list_after (&block->large_blobs);
  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
  //safe to use big ones now
  blob_it.set_to_list (&block->blobs);
                                 //throw all blobs in
  blob_it.add_list_after (&block->noise_blobs);
  blob_it.add_list_after (&block->small_blobs);
  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
}
void compute_dropout_distances ( inT32 occupation,
inT32 thresholds,
inT32  line_count 
)

Definition at line 1018 of file makerow.cpp.

                                {
  inT32 line_index;              //of thresholds line
  inT32 distance;                //from prev dropout
  inT32 next_dist;               //to next dropout
  inT32 back_index;              //for back filling
  inT32 prev_threshold;          //before overwrite

  distance = -line_count;
  line_index = 0;
  do {
    do {
      distance--;
      prev_threshold = thresholds[line_index];
                                 //distance from prev
      thresholds[line_index] = distance;
      line_index++;
    }
    while (line_index < line_count
      && (occupation[line_index] < thresholds[line_index]
      || occupation[line_index - 1] >= prev_threshold));
    if (line_index < line_count) {
      back_index = line_index - 1;
      next_dist = 1;
      while (next_dist < -distance && back_index >= 0) {
        thresholds[back_index] = next_dist;
        back_index--;
        next_dist++;
        distance++;
      }
      distance = 1;
    }
  }
  while (line_index < line_count);
}
inT32 compute_height_modes ( STATS heights,
inT32  min_height,
inT32  max_height,
inT32 modes,
inT32  maxmodes 
)

Definition at line 1743 of file makerow.cpp.

                                           {  // size of modes
  inT32 pile_count;              // no in source pile
  inT32 src_count;               // no of source entries
  inT32 src_index;               // current entry
  inT32 least_count;             // height of smalllest
  inT32 least_index;             // index of least
  inT32 dest_count;              // index in modes

  src_count = max_height + 1 - min_height;
  dest_count = 0;
  least_count = MAX_INT32;
  least_index = -1;
  for (src_index = 0; src_index < src_count; src_index++) {
    pile_count = heights->pile_count(min_height + src_index);
    if (pile_count > 0) {
      if (dest_count < maxmodes) {
        if (pile_count < least_count) {
          // find smallest in array
          least_count = pile_count;
          least_index = dest_count;
        }
        modes[dest_count++] = min_height + src_index;
      } else if (pile_count >= least_count) {
        while (least_index < maxmodes - 1) {
          modes[least_index] = modes[least_index + 1];
          // shuffle up
          least_index++;
        }
        // new one on end
        modes[maxmodes - 1] = min_height + src_index;
        if (pile_count == least_count) {
          // new smallest
          least_index = maxmodes - 1;
        } else {
          least_count = heights->pile_count(modes[0]);
          least_index = 0;
          for (dest_count = 1; dest_count < maxmodes; dest_count++) {
            pile_count = heights->pile_count(modes[dest_count]);
            if (pile_count < least_count) {
              // find smallest
              least_count = pile_count;
              least_index = dest_count;
            }
          }
        }
      }
    }
  }
  return dest_count;
}
void compute_line_occupation ( TO_BLOCK block,
float  gradient,
inT32  min_y,
inT32  max_y,
inT32 occupation,
inT32 deltas 
)

Definition at line 871 of file makerow.cpp.

                              {
  inT32 line_count;              //maxy-miny+1
  inT32 line_index;              //of scan line
  int index;                     //array index for daft compilers
  float top, bottom;             //coords of blob
  inT32 width;                   //of blob
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();
  BLOBNBOX *blob;                //current blob
  BLOBNBOX_IT blob_it;           //iterator
  float length;                  //of skew vector
  TBOX blob_box;                  //bounding box
  FCOORD rotation;               //inverse of skew

  line_count = max_y - min_y + 1;
  length = sqrt (gradient * gradient + 1);
  rotation = FCOORD (1 / length, -gradient / length);
  for (line_index = 0; line_index < line_count; line_index++)
    deltas[line_index] = 0;
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    blob_it.set_to_list (row->blob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      blob_box = blob->bounding_box ();
      blob_box.rotate (rotation);//de-skew it
      top = blob_box.top ();
      bottom = blob_box.bottom ();
      width =
        (inT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
      if ((inT32) floor (bottom) < min_y
        || (inT32) floor (bottom) - min_y >= line_count)
        fprintf (stderr,
          "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
          INT32FORMAT ")\n", (inT32) floor (bottom), min_y, max_y);
                                 //count transitions
      index = (inT32) floor (bottom) - min_y;
      deltas[index] += width;
      if ((inT32) floor (top) < min_y
        || (inT32) floor (top) - min_y >= line_count)
        fprintf (stderr,
          "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
          INT32FORMAT ")\n", (inT32) floor (top), min_y, max_y);
      index = (inT32) floor (top) - min_y;
      deltas[index] -= width;
    }
  }
  occupation[0] = deltas[0];
  for (line_index = 1; line_index < line_count; line_index++)
    occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
}
void compute_occupation_threshold ( inT32  low_window,
inT32  high_window,
inT32  line_count,
inT32 occupation,
inT32 thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 937 of file makerow.cpp.

                                   {
  inT32 line_index;              //of thresholds line
  inT32 low_index;               //in occupation
  inT32 high_index;              //in occupation
  inT32 sum;                     //current average
  inT32 divisor;                 //to get thresholds
  inT32 min_index;               //of min occ
  inT32 min_occ;                 //min in locality
  inT32 test_index;              //for finding min

  divisor =
    (inT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
  if (low_window + high_window < line_count) {
    for (sum = 0, high_index = 0; high_index < low_window; high_index++)
      sum += occupation[high_index];
    for (low_index = 0; low_index < high_window; low_index++, high_index++)
      sum += occupation[high_index];
    min_occ = occupation[0];
    min_index = 0;
    for (test_index = 1; test_index < high_index; test_index++) {
      if (occupation[test_index] <= min_occ) {
        min_occ = occupation[test_index];
        min_index = test_index;  //find min in region
      }
    }
    for (line_index = 0; line_index < low_window; line_index++)
      thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
    //same out to end
    for (low_index = 0; high_index < line_count; low_index++, high_index++) {
      sum -= occupation[low_index];
      sum += occupation[high_index];
      if (occupation[high_index] <= min_occ) {
                                 //find min in region
        min_occ = occupation[high_index];
        min_index = high_index;
      }
                                 //lost min from region
      if (min_index <= low_index) {
        min_occ = occupation[low_index + 1];
        min_index = low_index + 1;
        for (test_index = low_index + 2; test_index <= high_index;
        test_index++) {
          if (occupation[test_index] <= min_occ) {
            min_occ = occupation[test_index];
                                 //find min in region
            min_index = test_index;
          }
        }
      }
      thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
    }
  }
  else {
    min_occ = occupation[0];
    min_index = 0;
    for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
      if (occupation[low_index] < min_occ) {
        min_occ = occupation[low_index];
        min_index = low_index;
      }
      sum += occupation[low_index];
    }
    line_index = 0;
  }
  for (; line_index < line_count; line_index++)
    thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
  //same out to end
}
void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 296 of file makerow.cpp.

                        {
  inT32 row_count;               //total rows
  inT32 blob_count;              //total_blobs
  inT32 row_err;                 //integer error
  float *gradients;              //of rows
  float *errors;                 //of rows
  inT32 row_index;               //of total
  TO_ROW *row;                   //current row
  TO_BLOCK_IT block_it = blocks; //iterator
  TO_ROW_IT row_it;

  row_count = 0;
  blob_count = 0;
  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
       block_it.forward ()) {
    POLY_BLOCK* pb = block_it.data()->block->poly_block();
    if (pb != NULL && !pb->IsText())
      continue;  // Pretend non-text blocks don't exist.
    row_count += block_it.data ()->get_rows ()->length ();
    //count up rows
    row_it.set_to_list (block_it.data ()->get_rows ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
      blob_count += row_it.data ()->blob_list ()->length ();
  }
  if (row_count == 0) {
    page_m = 0.0f;
    page_err = 0.0f;
    return;
  }
  gradients = (float *) alloc_mem (blob_count * sizeof (float));
  //get mem
  errors = (float *) alloc_mem (blob_count * sizeof (float));
  if (gradients == NULL || errors == NULL)
    MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);

  row_index = 0;
  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
       block_it.forward ()) {
    POLY_BLOCK* pb = block_it.data()->block->poly_block();
    if (pb != NULL && !pb->IsText())
      continue;  // Pretend non-text blocks don't exist.
    row_it.set_to_list (block_it.data ()->get_rows ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      blob_count = row->blob_list ()->length ();
      row_err = (inT32) ceil (row->line_error ());
      if (row_err <= 0)
        row_err = 1;
      if (textord_biased_skewcalc) {
        blob_count /= row_err;
        for (blob_count /= row_err; blob_count > 0; blob_count--) {
          gradients[row_index] = row->line_m ();
          errors[row_index] = row->line_error ();
          row_index++;
        }
      }
      else if (blob_count >= textord_min_blobs_in_row) {
                                 //get gradient
        gradients[row_index] = row->line_m ();
        errors[row_index] = row->line_error ();
        row_index++;
      }
    }
  }
  if (row_index == 0) {
                                 //desperate
    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
         block_it.forward ()) {
      POLY_BLOCK* pb = block_it.data()->block->poly_block();
      if (pb != NULL && !pb->IsText())
        continue;  // Pretend non-text blocks don't exist.
      row_it.set_to_list (block_it.data ()->get_rows ());
      for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
           row_it.forward ()) {
        row = row_it.data ();
        gradients[row_index] = row->line_m ();
        errors[row_index] = row->line_error ();
        row_index++;
      }
    }
  }
  row_count = row_index;
  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
    gradients, row_count);
  page_m = gradients[row_index];
  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
    errors, row_count);
  page_err = errors[row_index];
  free_mem(gradients);
  free_mem(errors);
}
inT32 compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS heights 
)

Definition at line 1683 of file makerow.cpp.

                                                                       {
  // Count how many potential ascenders are in this row.
  int i_min = asc_heights->min_bucket();
  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
    i_min = static_cast<int>(
        floor(row->xheight * textord_ascx_ratio_min + 0.5));
  }
  int i_max = asc_heights->max_bucket();
  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
    i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
  }
  int num_potential_asc = 0;
  for (int i = i_min; i <= i_max; ++i) {
    num_potential_asc += asc_heights->pile_count(i);
  }
  inT32 min_height =
    static_cast<inT32>(floor(row->xheight * textord_descx_ratio_min + 0.5));
  inT32 max_height =
    static_cast<inT32>(floor(row->xheight * textord_descx_ratio_max));
  float xcentre;                 // centre of blob
  float height;                  // height of blob
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX *blob;                // current blob
  STATS heights (min_height, max_height + 1);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      xcentre = (blob->bounding_box().left() +
                 blob->bounding_box().right()) / 2.0f;
      height = (gradient * xcentre + row->parallel_c() -
                blob->bounding_box().bottom());
      if (height >= min_height && height <= max_height)
        heights.add(static_cast<int>(floor(height + 0.5)), 1);
    }
  }
  int blob_index = heights.mode();  // find mode
  int blob_count = heights.pile_count(blob_index);  // get count of mode
  float total_fraction =
    (textord_descheight_mode_fraction + textord_ascheight_mode_fraction);
  if (static_cast<float>(blob_count + num_potential_asc) <
      xheight_blob_count * total_fraction) {
    blob_count = 0;
  }
  int descdrop = blob_count > 0 ? -blob_index : 0;
  if (textord_debug_xheights) {
    tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
            descdrop, num_potential_asc, blob_count);
    heights.print();
  }
  return descdrop;
}
void compute_row_stats ( TO_BLOCK block,
BOOL8  testing_on 
)

Definition at line 1259 of file makerow.cpp.

                        {
  inT32 row_index;               //of median
  TO_ROW *row;                   //current row
  TO_ROW *prev_row;              //previous row
  float iqr;                     //inter quartile range
  TO_ROW_IT row_it = block->get_rows ();
                                 //number of rows
  inT16 rowcount = row_it.length ();
  TO_ROW **rows;                 //for choose nth

  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
  if (rows == NULL)
    MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
  rowcount = 0;
  prev_row = NULL;
  row_it.move_to_last ();        //start at bottom
  do {
    row = row_it.data ();
    if (prev_row != NULL) {
      rows[rowcount++] = prev_row;
      prev_row->spacing = row->intercept () - prev_row->intercept ();
      if (testing_on)
        tprintf ("Row at %g yields spacing of %g\n",
          row->intercept (), prev_row->spacing);
    }
    prev_row = row;
    row_it.backward ();
  }
  while (!row_it.at_last ());
  block->key_row = prev_row;
  block->baseline_offset =
    fmod (prev_row->parallel_c (), block->line_spacing);
  if (testing_on)
    tprintf ("Blob based spacing=(%g,%g), offset=%g",
      block->line_size, block->line_spacing, block->baseline_offset);
  if (rowcount > 0) {
    row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
      sizeof (TO_ROW *), row_spacing_order);
    iqr = rows[row_index]->spacing;
    row_index = choose_nth_item (rowcount / 4, rows, rowcount,
      sizeof (TO_ROW *), row_spacing_order);
    iqr -= rows[row_index]->spacing;
    row_index = choose_nth_item (rowcount / 2, rows, rowcount,
      sizeof (TO_ROW *), row_spacing_order);
    block->key_row = rows[row_index];
    if (testing_on)
      tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
    if (rowcount > 2
    && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
      if (!textord_new_initial_xheight) {
        if (rows[row_index]->spacing < block->line_spacing
          && rows[row_index]->spacing > block->line_size)
          //within range
          block->line_size = rows[row_index]->spacing;
        //spacing=size
        else if (rows[row_index]->spacing > block->line_spacing)
          block->line_size = block->line_spacing;
        //too big so use max
      }
      else {
        if (rows[row_index]->spacing < block->line_spacing)
          block->line_size = rows[row_index]->spacing;
        else
          block->line_size = block->line_spacing;
        //too big so use max
      }
      if (block->line_size < textord_min_xheight)
        block->line_size = (float) textord_min_xheight;
      block->line_spacing = rows[row_index]->spacing;
      block->max_blob_size =
        block->line_spacing * textord_excess_blobsize;
    }
    block->baseline_offset = fmod (rows[row_index]->intercept (),
      block->line_spacing);
  }
  if (testing_on)
    tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
      block->line_size, block->line_spacing, block->baseline_offset);
  free_mem(rows);
}
int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1587 of file makerow.cpp.

                                                    {
  int blob_index = heights->mode();  // find mode
  int blob_count = heights->pile_count(blob_index);  // get count of mode
  if (textord_debug_xheights) {
    tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
            min_height, max_height, blob_index, blob_count,
            heights->get_total());
    heights->print();
    floating_heights->print();
  }
  if (blob_count == 0) return 0;
  int modes[MAX_HEIGHT_MODES];  // biggest piles
  bool in_best_pile = FALSE;
  int prev_size = -MAX_INT32;
  int best_count = 0;
  int mode_count = compute_height_modes(heights, min_height, max_height,
                                        modes, MAX_HEIGHT_MODES);
  if (cap_only && mode_count > 1)
    mode_count = 1;
  int x;
  if (textord_debug_xheights) {
    tprintf("found %d modes: ", mode_count);
    for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
    tprintf("\n");
  }

  for (x = 0; x < mode_count - 1; x++) {
    if (modes[x] != prev_size + 1)
      in_best_pile = FALSE;    // had empty height
    int modes_x_count = heights->pile_count(modes[x]) -
      floating_heights->pile_count(modes[x]);
    if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
        (in_best_pile || modes_x_count > best_count)) {
      for (int asc = x + 1; asc < mode_count; asc++) {
        float ratio =
          static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
        if (textord_ascx_ratio_min < ratio &&
            ratio < textord_ascx_ratio_max &&
            (heights->pile_count(modes[asc]) >=
             blob_count * textord_ascheight_mode_fraction)) {
          if (modes_x_count > best_count) {
            in_best_pile = true;
            best_count = modes_x_count;
          }
          if (textord_debug_xheights) {
            tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
                    modes[x], modes[asc]-modes[x], modes_x_count, ratio);
          }
          prev_size = modes[x];
          *xheight = static_cast<float>(modes[x]);
          *ascrise = static_cast<float>(modes[asc] - modes[x]);
        }
      }
    }
  }
  if (*xheight == 0) {  // single mode
    // Remove counts of the "floating" blobs (the one whose height is too
    // small in relation to it's top end of the bounding box) from heights
    // before computing the single-mode xheight.
    // Restore the counts in heights after the mode is found, since
    // floating blobs might be useful for determining potential ascenders
    // in compute_row_descdrop().
    if (floating_heights->get_total() > 0) {
      for (x = min_height; x < max_height; ++x) {
        heights->add(x, -(floating_heights->pile_count(x)));
      }
      blob_index = heights->mode();  // find the modified mode
      for (x = min_height; x < max_height; ++x) {
        heights->add(x, floating_heights->pile_count(x));
      }
    }
    *xheight = static_cast<float>(blob_index);
    *ascrise = 0.0f;
    best_count = heights->pile_count(blob_index);
    if (textord_debug_xheights)
      tprintf("Single mode xheight set to %g\n", *xheight);
  } else if (textord_debug_xheights) {
    tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
  }
  return best_count;
}
void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1805 of file makerow.cpp.

                                                        {
  ROW_CATEGORY row_category = get_row_category(row);
  if (textord_debug_xheights) {
    tprintf("correcting row xheight: row->xheight %.4f"
            ", row->acrise %.4f row->descdrop %.4f\n",
            row->xheight, row->ascrise, row->descdrop);
  }
  bool normal_xheight =
    within_error_margin(row->xheight, xheight, textord_xheight_error_margin);
  bool cap_xheight =
    within_error_margin(row->xheight, xheight + ascrise,
                        textord_xheight_error_margin);
  // Use the average xheight/ascrise for the following cases:
  // -- the xheight of the row could not be determined at all
  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
  //    and its xheight is close to either cap height or average xheight
  // -- the row does not have ascenders or descenders, but its xheight
  //    is close to the average block xheight (e.g. row with "www.mmm.com")
  if (row_category == ROW_ASCENDERS_FOUND) {
    if (row->descdrop >= 0.0) {
      row->descdrop = row->xheight * (descdrop / xheight);
    }
  } else if (row_category == ROW_INVALID ||
             (row_category == ROW_DESCENDERS_FOUND &&
              (normal_xheight || cap_xheight)) ||
              (row_category == ROW_UNKNOWN && normal_xheight)) {
    if (textord_debug_xheights) tprintf("using average xheight\n");
    row->xheight = xheight;
    row->ascrise = ascrise;
    row->descdrop = descdrop;
  } else if (row_category == ROW_DESCENDERS_FOUND) {
    // Assume this is a row with mostly lowercase letters and it's xheight
    // is computed correctly (unfortunately there is no way to distinguish
    // this from the case when descenders are found, but the most common
    // height is capheight).
    if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
    row->ascrise = row->xheight * (ascrise / xheight);
  } else if (row_category == ROW_UNKNOWN) {
  // Otherwise assume this row is an all-caps or small-caps row
  // and adjust xheight and ascrise of the row.

    row->all_caps = true;
    if (cap_xheight) { // regular all caps
      if (textord_debug_xheights) tprintf("all caps\n");
      row->xheight = xheight;
      row->ascrise = ascrise;
      row->descdrop = descdrop;
    } else {  // small caps or caps with an odd xheight
      if (textord_debug_xheights) {
        if (row->xheight < xheight + ascrise && row->xheight > xheight) {
          tprintf("small caps\n");
        } else {
          tprintf("all caps with irregular xheight\n");
        }
      }
      row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
      row->xheight -= row->ascrise;
      row->descdrop = row->xheight * (descdrop / xheight);
    }
  }
  if (textord_debug_xheights) {
    tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
            " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
  }
}
void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 667 of file makerow.cpp.

                              {
  TBOX block_box;                 //deskewed block
  inT32 *deltas;                 //change in occupation
  inT32 *occupation;             //of pixel coords
  inT32 max_y;                   //in block
  inT32 min_y;
  inT32 line_index;              //of scan line
  inT32 line_count;              //no of scan lines
  inT32 distance;                //to drop-out
  inT32 xleft;                   //of block
  inT32 ybottom;                 //of block
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();
  BLOBNBOX_IT blob_it = &block->blobs;

  if (row_it.length () == 0)
    return;                      //empty block
  block_box = deskew_block_coords (block, gradient);
  xleft = block->block->bounding_box ().left ();
  ybottom = block->block->bounding_box ().bottom ();
  min_y = block_box.bottom () - 1;
  max_y = block_box.top () + 1;
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    line_index = (inT32) floor (row_it.data ()->intercept ());
    if (line_index <= min_y)
      min_y = line_index - 1;
    if (line_index >= max_y)
      max_y = line_index + 1;
  }
  line_count = max_y - min_y + 1;
  if (line_count <= 0)
    return;                      //empty block
  deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));
  occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));
  if (deltas == NULL || occupation == NULL)
    MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);

  compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
  compute_occupation_threshold ((inT32)
    ceil (block->line_spacing *
    (tesseract::CCStruct::kDescenderFraction +
    tesseract::CCStruct::kAscenderFraction)),
    (inT32) ceil (block->line_spacing *
    (tesseract::CCStruct::kXHeightFraction +
    tesseract::CCStruct::kAscenderFraction)),
    max_y - min_y + 1, occupation, deltas);
#ifndef GRAPHICS_DISABLED
  if (testing_on) {
    draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
  }
#endif
  compute_dropout_distances(occupation, deltas, line_count);
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    line_index = (inT32) floor (row->intercept ());
    distance = deltas[line_index - min_y];
    if (find_best_dropout_row (row, distance, block->line_spacing / 2,
    line_index, &row_it, testing_on)) {
#ifndef GRAPHICS_DISABLED
      if (testing_on)
        plot_parallel_row(row, gradient, block_edge,
                          ScrollView::WHITE, rotation);
#endif
      blob_it.add_list_after (row_it.data ()->blob_list ());
      delete row_it.extract ();  //too far away
    }
  }
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    blob_it.add_list_after (row_it.data ()->blob_list ());
  }

  free_mem(deltas);
  free_mem(occupation);
}
TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 835 of file makerow.cpp.

                         {
  TBOX result;                    //block bounds
  TBOX blob_box;                  //of block
  FCOORD rotation;               //deskew vector
  float length;                  //of gradient vector
  TO_ROW_IT row_it = block->get_rows ();
  TO_ROW *row;                   //current row
  BLOBNBOX *blob;                //current blob
  BLOBNBOX_IT blob_it;           //iterator

  length = sqrt (gradient * gradient + 1);
  rotation = FCOORD (1 / length, -gradient / length);
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    blob_it.set_to_list (row->blob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      blob_box = blob->bounding_box ();
      blob_box.rotate (rotation);//de-skew it
      result += blob_box;
    }
  }
  return result;
}
void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 1065 of file makerow.cpp.

                  {
  BOOL8 swallowed_row;           //eaten a neighbour
  float y_max, y_min;            //new row limits
  float y_bottom, y_top;         //allowed limits
  TO_ROW *test_row;              //next row
  TO_ROW *row;                   //current row
                                 //iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  if (textord_show_expanded_rows && testing_on) {
    if (to_win == NULL)
      create_to_win(page_tr);
  }
#endif

  adjust_row_limits(block);  //shift min,max.
  if (textord_new_initial_xheight) {
    if (block->get_rows ()->length () == 0)
      return;
    compute_row_stats(block, textord_show_expanded_rows &&testing_on);
  }
  assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
  //get real membership
  if (block->get_rows ()->length () == 0)
    return;
  fit_parallel_rows(block,
                    gradient,
                    rotation,
                    block_edge,
                    textord_show_expanded_rows &&testing_on);
  if (!textord_new_initial_xheight)
    compute_row_stats(block, textord_show_expanded_rows &&testing_on);
  row_it.move_to_last ();
  do {
    row = row_it.data ();
    y_max = row->max_y ();       //get current limits
    y_min = row->min_y ();
    y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
      tesseract::CCStruct::kDescenderFraction;
    y_top = row->intercept () + block->line_size * textord_expansion_factor *
        (tesseract::CCStruct::kXHeightFraction +
         tesseract::CCStruct::kAscenderFraction);
    if (y_min > y_bottom) {      //expansion allowed
      if (textord_show_expanded_rows && testing_on)
        tprintf("Expanding bottom of row at %f from %f to %f\n",
                row->intercept(), y_min, y_bottom);
                                 //expandable
      swallowed_row = TRUE;
      while (swallowed_row && !row_it.at_last ()) {
        swallowed_row = FALSE;
                                 //get next one
        test_row = row_it.data_relative (1);
                                 //overlaps space
        if (test_row->max_y () > y_bottom) {
          if (test_row->min_y () > y_bottom) {
            if (textord_show_expanded_rows && testing_on)
              tprintf("Eating row below at %f\n", test_row->intercept());
            row_it.forward ();
#ifndef GRAPHICS_DISABLED
            if (textord_show_expanded_rows && testing_on)
              plot_parallel_row(test_row,
                                gradient,
                                block_edge,
                                ScrollView::WHITE,
                                rotation);
#endif
            blob_it.set_to_list (row->blob_list ());
            blob_it.add_list_after (test_row->blob_list ());
                                 //swallow complete row
            delete row_it.extract ();
            row_it.backward ();
            swallowed_row = TRUE;
          }
          else if (test_row->max_y () < y_min) {
                                 //shorter limit
            y_bottom = test_row->max_y ();
            if (textord_show_expanded_rows && testing_on)
              tprintf("Truncating limit to %f due to touching row at %f\n",
                      y_bottom, test_row->intercept());
          }
          else {
            y_bottom = y_min;    //can't expand it
            if (textord_show_expanded_rows && testing_on)
              tprintf("Not expanding limit beyond %f due to touching row at %f\n",
                      y_bottom, test_row->intercept());
          }
        }
      }
      y_min = y_bottom;          //expand it
    }
    if (y_max < y_top) {         //expansion allowed
      if (textord_show_expanded_rows && testing_on)
        tprintf("Expanding top of row at %f from %f to %f\n",
                row->intercept(), y_max, y_top);
      swallowed_row = TRUE;
      while (swallowed_row && !row_it.at_first ()) {
        swallowed_row = FALSE;
                                 //get one above
        test_row = row_it.data_relative (-1);
        if (test_row->min_y () < y_top) {
          if (test_row->max_y () < y_top) {
            if (textord_show_expanded_rows && testing_on)
              tprintf("Eating row above at %f\n", test_row->intercept());
            row_it.backward ();
            blob_it.set_to_list (row->blob_list ());
#ifndef GRAPHICS_DISABLED
            if (textord_show_expanded_rows && testing_on)
              plot_parallel_row(test_row,
                                gradient,
                                block_edge,
                                ScrollView::WHITE,
                                rotation);
#endif
            blob_it.add_list_after (test_row->blob_list ());
                                 //swallow complete row
            delete row_it.extract ();
            row_it.forward ();
            swallowed_row = TRUE;
          }
          else if (test_row->min_y () < y_max) {
                                 //shorter limit
            y_top = test_row->min_y ();
            if (textord_show_expanded_rows && testing_on)
              tprintf("Truncating limit to %f due to touching row at %f\n",
                      y_top, test_row->intercept());
          }
          else {
            y_top = y_max;       //can't expand it
            if (textord_show_expanded_rows && testing_on)
              tprintf("Not expanding limit beyond %f due to touching row at %f\n",
                      y_top, test_row->intercept());
          }
        }
      }
      y_max = y_top;
    }
                                 //new limits
    row->set_limits (y_min, y_max);
    row_it.backward ();
  }
  while (!row_it.at_last ());
}
void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1526 of file makerow.cpp.

                                                                           {
  float xcentre;                 // centre of blob
  float top;                     // top y coord of blob
  float height;                  // height of blob
  BLOBNBOX *blob;                // current blob
  int repeated_set;
  BLOBNBOX_IT blob_it = row->blob_list();
  if (blob_it.empty()) return;  // no blobs in this row
  bool has_rep_chars =
    row->rep_chars_marked() && row->num_repeated_sets() > 0;
  do {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      xcentre = (blob->bounding_box().left() +
                 blob->bounding_box().right()) / 2.0f;
      top = blob->bounding_box().top();
      height = blob->bounding_box().height();
      if (textord_fix_xheight_bug)
        top -= row->baseline.y(xcentre);
      else
        top -= gradient * xcentre + row->parallel_c();
      if (top >= min_height && top <= max_height) {
        heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
        if (height / top < textord_min_blob_height_fraction) {
          floating_heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
        }
      }
    }
    // Skip repeated chars, since they are likely to skew the height stats.
    if (has_rep_chars && blob->repeated_set() != 0) {
      repeated_set = blob->repeated_set();
      blob_it.forward();
      while (!blob_it.at_first() &&
             blob_it.data()->repeated_set() == repeated_set) {
        blob_it.forward();
        if (textord_debug_xheights)
          tprintf("Skipping repeated char when computing xheight\n");
      }
    } else {
      blob_it.forward();
    }
  } while (!blob_it.at_first());
}
BOOL8 find_best_dropout_row ( TO_ROW row,
inT32  distance,
float  dist_limit,
inT32  line_index,
TO_ROW_IT *  row_it,
BOOL8  testing_on 
)

Definition at line 755 of file makerow.cpp.

                             {
  inT32 next_index;              //of neigbouring row
  inT32 row_offset;              //from current row
  inT32 abs_dist;                //absolute distance
  inT8 row_inc;                  //increment to row_index
  TO_ROW *next_row;              //nextious row

  if (testing_on)
    tprintf ("Row at %g(%g), dropout dist=%d,",
      row->intercept (), row->parallel_c (), distance);
  if (distance < 0) {
    row_inc = 1;
    abs_dist = -distance;
  }
  else {
    row_inc = -1;
    abs_dist = distance;
  }
  if (abs_dist > dist_limit) {
    if (testing_on) {
      tprintf (" too far - deleting\n");
    }
    return TRUE;
  }
  if ((distance < 0 && !row_it->at_last ())
  || (distance >= 0 && !row_it->at_first ())) {
    row_offset = row_inc;
    do {
      next_row = row_it->data_relative (row_offset);
      next_index = (inT32) floor (next_row->intercept ());
      if ((distance < 0
        && next_index < line_index
        && next_index > line_index + distance + distance)
        || (distance >= 0
        && next_index > line_index
      && next_index < line_index + distance + distance)) {
        if (testing_on) {
          tprintf (" nearer neighbour (%d) at %g\n",
            line_index + distance - next_index,
            next_row->intercept ());
        }
        return TRUE;             //other is nearer
      }
      else if (next_index == line_index
      || next_index == line_index + distance + distance) {
        if (row->believability () <= next_row->believability ()) {
          if (testing_on) {
            tprintf (" equal but more believable at %g (%g/%g)\n",
              next_row->intercept (),
              row->believability (),
              next_row->believability ());
          }
          return TRUE;           //other is more believable
        }
      }
      row_offset += row_inc;
    }
    while ((next_index == line_index
      || next_index == line_index + distance + distance)
      && row_offset < row_it->length ());
    if (testing_on)
      tprintf (" keeping\n");
  }
  return FALSE;
}
void fit_lms_line ( TO_ROW row)

Definition at line 276 of file makerow.cpp.

                               {
  float m, c;                    // fitted line
  tesseract::DetLineFit lms;
  BLOBNBOX_IT blob_it = row->blob_list();

  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    const TBOX& box = blob_it.data()->bounding_box();
    lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
  }
  double error = lms.Fit(&m, &c);
  row->set_line(m, c, error);
}
void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 2093 of file makerow.cpp.

                                                   {
  float c;                       // fitted line
  int blobcount;                 // no of blobs
   tesseract::DetLineFit lms;
  BLOBNBOX_IT blob_it = row->blob_list();

  blobcount = 0;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    if (!blob_it.data()->joined_to_prev()) {
      const TBOX& box = blob_it.data()->bounding_box();
      lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
      blobcount++;
    }
  }
  double error = lms.ConstrainedFit(gradient, &c);
  row->set_parallel_line(gradient, c, error);
  if (textord_straight_baselines && blobcount > textord_lms_line_trials) {
    error = lms.Fit(&gradient, &c);
  }
                                 //set the other too
  row->set_line(gradient, c, error);
}
void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 2051 of file makerow.cpp.

                        {
#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour;                 //of row
#endif
  TO_ROW_IT row_it = block->get_rows ();

  row_it.move_to_first ();
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    if (row_it.data ()->blob_list ()->empty ())
      delete row_it.extract ();  //nothing in it
    else
      fit_parallel_lms (gradient, row_it.data ());
  }
#ifndef GRAPHICS_DISABLED
  if (testing_on) {
    colour = ScrollView::RED;
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      plot_parallel_row (row_it.data (), gradient,
        block_edge, colour, rotation);
      colour = (ScrollView::Color) (colour + 1);
      if (colour > ScrollView::MAGENTA)
        colour = ScrollView::RED;
    }
  }
#endif
  row_it.sort (row_y_order);     //may have gone out of order
}
void get_min_max_xheight ( int  block_linesize,
int *  min_height,
int *  max_height 
) [inline]

Definition at line 115 of file makerow.h.

                                                                  {
  *min_height = static_cast<inT32>(floor(block_linesize * textord_minxh));
  if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
  *max_height = static_cast<inT32>(ceil(block_linesize * 3.0));
}
ROW_CATEGORY get_row_category ( const TO_ROW row) [inline]

Definition at line 122 of file makerow.h.

                                                        {
  if (row->xheight <= 0) return ROW_INVALID;
  return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
    (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
}
double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2311 of file makerow.cpp.

  {
  int blobcount;                 //no of blobs
  int blobindex;                 //current blob
  int index1, index2;            //blob numbers
  int blobs_per_segment;         //blobs in each
  TBOX box;                       //blob box
  TBOX new_box;                   //new_it box
                                 //blobs
  BLOBNBOX_IT blob_it = row->blob_list ();
  BLOBNBOX_IT new_it = blob_it;  //front end
  float b, c;                    //fitted curve
  tesseract::DetLineFit lms;
  double *coeffs;                //quadratic coeffs
  inT32 segment;                 //current segment

  box = box_next_pre_chopped (&blob_it);
  xstarts[0] = box.left ();
  blobcount = 1;
  while (!blob_it.at_first ()) {
    blobcount++;
    box = box_next_pre_chopped (&blob_it);
  }
  segments = blobcount / textord_spline_medianwin;
  if (segments < 1)
    segments = 1;
  blobs_per_segment = blobcount / segments;
  coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
  if (textord_oldbl_debug)
    tprintf
      ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
      blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
  segment = 1;
  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
    box_next_pre_chopped(&new_it);
  index1 = 0;
  blobindex = index2;
  do {
    blobindex += blobs_per_segment;
    lms.Clear();
    while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
      box = box_next_pre_chopped (&blob_it);
      int middle = (box.left() + box.right()) / 2;
      lms.Add(ICOORD(middle, box.bottom()));
      index1++;
      if (index1 == blobindex - blobs_per_segment / 2
      || index1 == blobcount - 1) {
        xstarts[segment] = box.left ();
      }
    }
    lms.Fit(&b, &c);
    coeffs[segment * 3 - 3] = 0;
    coeffs[segment * 3 - 2] = b;
    coeffs[segment * 3 - 1] = c;
    segment++;
    if (segment > segments)
      break;

    blobindex += blobs_per_segment;
    lms.Clear();
    while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
      new_box = box_next_pre_chopped (&new_it);
      int middle = (new_box.left() + new_box.right()) / 2;
      lms.Add(ICOORD (middle, new_box.bottom()));
      index2++;
      if (index2 == blobindex - blobs_per_segment / 2
      || index2 == blobcount - 1) {
        xstarts[segment] = new_box.left ();
      }
    }
    lms.Fit(&b, &c);
    coeffs[segment * 3 - 3] = 0;
    coeffs[segment * 3 - 2] = b;
    coeffs[segment * 3 - 1] = c;
    segment++;
  }
  while (segment <= segments);
  return coeffs;
}
void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2178 of file makerow.cpp.

                                           {
  BLOBNBOX_IT blob_it = row->blob_list ();
  inT32 *xstarts;                // spline boundaries
  double *coeffs;                // quadratic coeffs
  inT32 segments;                // no of segments

  xstarts =
    (inT32 *) alloc_mem((row->blob_list()->length() + 1) * sizeof(inT32));
  if (segment_baseline(row, block, segments, xstarts)
  && !textord_straight_baselines && !textord_parallel_baselines) {
    coeffs = linear_spline_baseline(row, block, segments, xstarts);
  } else {
    xstarts[1] = xstarts[segments];
    segments = 1;
    coeffs = (double *) alloc_mem (3 * sizeof (double));
    coeffs[0] = 0;
    coeffs[1] = row->line_m ();
    coeffs[2] = row->line_c ();
  }
  row->baseline = QSPLINE (segments, xstarts, coeffs);
  free_mem(coeffs);
  free_mem(xstarts);
}
void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 236 of file makerow.cpp.

                            {
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour;                 //of row

  if (textord_show_initial_rows && testing_on) {
    if (to_win == NULL)
      create_to_win(page_tr);
  }
#endif
                                 //guess skew
  assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
  row_it.move_to_first ();
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
    fit_lms_line (row_it.data ());
#ifndef GRAPHICS_DISABLED
  if (textord_show_initial_rows && testing_on) {
    colour = ScrollView::RED;
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      plot_to_row (row_it.data (), colour, rotation);
      colour = (ScrollView::Color) (colour + 1);
      if (colour > ScrollView::MAGENTA)
        colour = ScrollView::RED;
    }
  }
#endif
}
float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 197 of file makerow.cpp.

                                                            {
  float port_m;                  // global skew
  float port_err;                // global noise
  TO_BLOCK_IT block_it;          // iterator

  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward())
  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
      !(BOOL8) textord_test_landscape);
                                 // compute globally
  compute_page_skew(port_blocks, port_m, port_err);
  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
                 block_it.data()->block->bounding_box().left(),
                 !(BOOL8)textord_test_landscape);
  }
  return port_m;                 // global skew
}
float make_single_row ( ICOORD  page_tr,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 167 of file makerow.cpp.

                                                                              {
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows();

  // Include all the small blobs and large blobs.
  blob_it.add_list_after(&block->small_blobs);
  blob_it.add_list_after(&block->noise_blobs);
  blob_it.add_list_after(&block->large_blobs);
  if (block->blobs.singleton()) {
    blob_it.move_to_first();
    float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
    if (size > block->line_size)
      block->line_size = size;
  }
  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
  // Fit an LMS line to the rows.
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
    fit_lms_line(row_it.data());
  float gradient;
  float fit_error;
  // Compute the skew based on the fitted line.
  compute_page_skew(blocks, gradient, fit_error);
  return gradient;
}
void mark_repeated_chars ( TO_ROW row)

Definition at line 2760 of file makerow.cpp.

                                      {
  BLOBNBOX_IT box_it(row->blob_list());            // Iterator.
  int num_repeated_sets = 0;
  if (!box_it.empty()) {
    do {
      BLOBNBOX* bblob = box_it.data();
      int repeat_length = 0;
      if (bblob->flow() == BTFT_LEADER &&
          !bblob->joined_to_prev() && bblob->cblob() != NULL) {
        BLOBNBOX_IT test_it(box_it);
        for (test_it.forward(); !test_it.at_first(); test_it.forward()) {
          bblob = test_it.data();
          if (bblob->flow() != BTFT_LEADER)
            break;
          if (bblob->joined_to_prev() || bblob->cblob() == NULL) {
            tprintf("Cancelled repeat of length %d due to %s\n",
                    repeat_length,
                    bblob->joined_to_prev() ? "Joined" : "Null");
            repeat_length = 0;
            break;
          }
          ++repeat_length;
        }
      }
      if (repeat_length >= kMinLeaderCount) {
        num_repeated_sets++;
        for (; repeat_length > 0; box_it.forward(), --repeat_length) {
          bblob = box_it.data();
          bblob->set_repeated_set(num_repeated_sets);
        }
        if (!box_it.at_first())
          bblob->set_repeated_set(0);
     } else {
        box_it.forward();
        bblob->set_repeated_set(0);
      }
    } while (!box_it.at_first());  // until all done
  }
  row->set_num_repeated_sets(num_repeated_sets);
}
float median_block_xheight ( TO_BLOCK block,
float  gradient 
)
OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
BOOL8  testing_blob 
)

Definition at line 2598 of file makerow.cpp.

                                    {
  OVERLAP_STATE result;          //result of tests
  float overlap;                 //of blob & row
  float bestover;                //nearest row
  float merge_top, merge_bottom; //size of merged row
  ICOORD testpt;                 //testing only
  TO_ROW *row;                   //current row
  TO_ROW *test_row;              //for multiple overlaps
  BLOBNBOX_IT blob_it;           //for merging rows

  result = ASSIGN;
  row = row_it->data ();
  bestover = top - bottom;
  if (top > row->max_y ())
    bestover -= top - row->max_y ();
  if (bottom < row->min_y ())
                                 //compute overlap
    bestover -= row->min_y () - bottom;
  if (testing_blob) {
    tprintf ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f\n",
      bottom, top, row->min_y (), row->max_y (), bestover);
  }
  test_row = row;
  do {
    if (!row_it->at_last ()) {
      row_it->forward ();
      test_row = row_it->data ();
      if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
        merge_top =
          test_row->max_y () >
          row->max_y ()? test_row->max_y () : row->max_y ();
        merge_bottom =
          test_row->min_y () <
          row->min_y ()? test_row->min_y () : row->min_y ();
        if (merge_top - merge_bottom <= rowsize) {
          if (testing_blob) {
            tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
              row->min_y (), row->max_y (),
              test_row->min_y (), test_row->max_y ());
          }
          test_row->set_limits (merge_bottom, merge_top);
          blob_it.set_to_list (test_row->blob_list ());
          blob_it.add_list_after (row->blob_list ());
          blob_it.sort (blob_x_order);
          row_it->backward ();
          delete row_it->extract ();
          row_it->forward ();
          bestover = -1.0f;      //force replacement
        }
        overlap = top - bottom;
        if (top > test_row->max_y ())
          overlap -= top - test_row->max_y ();
        if (bottom < test_row->min_y ())
          overlap -= test_row->min_y () - bottom;
        if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
          result = REJECT;
        }
        if (overlap > bestover) {
          bestover = overlap;    //find biggest overlap
          row = test_row;
        }
        if (testing_blob) {
          tprintf
            ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f->%f\n",
            bottom, top, test_row->min_y (), test_row->max_y (),
            overlap, bestover);
        }
      }
    }
  }
  while (!row_it->at_last ()
    && test_row->min_y () <= top && test_row->max_y () >= bottom);
  while (row_it->data () != row)
    row_it->backward ();         //make it point to row
                                 //doesn't overlap much
  if (top - bottom - bestover > rowsize * textord_overlap_x &&
      (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
    && result == ASSIGN)
    result = NEW_ROW;            //doesn't overlap enough
  best_row = row;
  return result;
}
void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1965 of file makerow.cpp.

                          {
#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour;                 //of boxes
#endif
  BLOBNBOX *blob;                //current blob
  BLOBNBOX *nextblob;            //next in list
  TBOX blob_box;
  FCOORD blob_rotation;          //inverse of rotation
  BLOBNBOX_IT blob_it;           //iterator
  BLOBNBOX_IT start_it;          //iterator
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  colour = ScrollView::RED;
#endif

  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
                                 //get blobs
    blob_it.set_to_list (row_it.data ()->blob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      blob_box = blob->bounding_box ();
      start_it = blob_it;        //save start point
      //                      if (testing_on && textord_show_final_blobs)
      //                      {
      //                              tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
      //                                      blob_box.left(),blob_box.bottom(),
      //                                      blob_box.right(),blob_box.top(),
      //                                      (void*)blob,blob_it.length());
      //                      }
      bool overlap;
      do {
        overlap = false;
        if (!blob_it.at_last ()) {
          nextblob = blob_it.data_relative(1);
          overlap = blob_box.major_x_overlap(nextblob->bounding_box());
          if (overlap) {
            blob->merge(nextblob); // merge new blob
            blob_box = blob->bounding_box(); // get bigger box
            blob_it.forward();
          }
        }
      }
      while (overlap);
      blob->chop (&start_it, &blob_it,
        blob_rotation,
        block->line_size * tesseract::CCStruct::kXHeightFraction *
        textord_chop_width);
      //attempt chop
    }
#ifndef GRAPHICS_DISABLED
    if (testing_on && textord_show_final_blobs) {
      if (to_win == NULL)
        create_to_win(page_tr);
      to_win->Pen(colour);
      for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
      blob_it.forward ()) {
        blob = blob_it.data ();
        blob_box = blob->bounding_box ();
        blob_box.rotate (rotation);
        if (!blob->joined_to_prev ()) {
          to_win->Rectangle (blob_box.left (), blob_box.bottom (),
            blob_box.right (), blob_box.top ());
        }
      }
      colour = (ScrollView::Color) (colour + 1);
      if (colour > ScrollView::MAGENTA)
        colour = ScrollView::RED;
    }
#endif
  }
}
int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2738 of file makerow.cpp.

                                         {
                                 //converted ptr
  TO_ROW *row1 = *(TO_ROW **) item1;
                                 //converted ptr
  TO_ROW *row2 = *(TO_ROW **) item2;

  if (row1->spacing < row2->spacing)
    return -1;
  else if (row1->spacing > row2->spacing)
    return 1;
  else
    return 0;
}
int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2716 of file makerow.cpp.

                                   {
                                 //converted ptr
  TO_ROW *row1 = *(TO_ROW **) item1;
                                 //converted ptr
  TO_ROW *row2 = *(TO_ROW **) item2;

  if (row1->parallel_c () > row2->parallel_c ())
    return -1;
  else if (row1->parallel_c () < row2->parallel_c ())
    return 1;
  else
    return 0;
}
BOOL8 segment_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2212 of file makerow.cpp.

  {
  BOOL8 needs_curve;             //needs curved line
  int blobcount;                 //no of blobs
  int blobindex;                 //current blob
  int last_state;                //above, on , below
  int state;                     //of current blob
  float yshift;                  //from baseline
  TBOX box;                       //blob box
  TBOX new_box;                   //new_it box
  float middle;                  //xcentre of blob
                                 //blobs
  BLOBNBOX_IT blob_it = row->blob_list ();
  BLOBNBOX_IT new_it = blob_it;  //front end
  SORTED_FLOATS yshifts;         //shifts from baseline

  needs_curve = FALSE;
  box = box_next_pre_chopped (&blob_it);
  xstarts[0] = box.left ();
  segments = 1;
  blobcount = row->blob_list ()->length ();
  if (textord_oldbl_debug)
    tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
      blobcount, box.left (), box.bottom ());
  if (blobcount <= textord_spline_medianwin
  || blobcount < textord_spline_minblobs) {
    blob_it.move_to_last ();
    box = blob_it.data ()->bounding_box ();
    xstarts[1] = box.right ();
    return FALSE;
  }
  last_state = 0;
  new_it.mark_cycle_pt ();
  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
    new_box = box_next_pre_chopped (&new_it);
    middle = (new_box.left () + new_box.right ()) / 2.0;
    yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
                                 //record shift
    yshifts.add (yshift, blobindex);
    if (new_it.cycled_list ()) {
      xstarts[1] = new_box.right ();
      return FALSE;
    }
  }
  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
    box = box_next_pre_chopped (&blob_it);
  do {
    new_box = box_next_pre_chopped (&new_it);
                                 //get middle one
    yshift = yshifts[textord_spline_medianwin / 2];
    if (yshift > textord_spline_shift_fraction * block->line_size)
      state = 1;
    else if (-yshift > textord_spline_shift_fraction * block->line_size)
      state = -1;
    else
      state = 0;
    if (state != 0)
      needs_curve = TRUE;
    //              tprintf("State=%d, prev=%d, shift=%g\n",
    //                      state,last_state,yshift);
    if (state != last_state && blobcount > textord_spline_minblobs) {
      xstarts[segments++] = box.left ();
      blobcount = 0;
    }
    last_state = state;
    yshifts.remove (blobindex - textord_spline_medianwin);
    box = box_next_pre_chopped (&blob_it);
    middle = (new_box.left () + new_box.right ()) / 2.0;
    yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
    yshifts.add (yshift, blobindex);
    blobindex++;
    blobcount++;
  }
  while (!new_it.cycled_list ());
  if (blobcount > textord_spline_minblobs || segments == 1) {
    xstarts[segments] = new_box.right ();
  }
  else {
    xstarts[--segments] = new_box.right ();
  }
  if (textord_oldbl_debug)
    tprintf ("Made %d segments on row at (%d,%d)\n",
      segments, box.right (), box.bottom ());
  return needs_curve;
}
void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1892 of file makerow.cpp.

                                           {  // correct orientation
  BLOBNBOX *blob;                // current blob
  C_BLOB *rotated_blob;          // rotated blob
  TO_ROW *row;                   // current row
  float length;                  // of g_vec
  TBOX blob_box;
  FCOORD blob_rotation;          // inverse of rotation
  FCOORD g_vec;                  // skew rotation
  BLOBNBOX_IT blob_it;           // iterator
                                 // iterator
  BLOBNBOX_IT under_it = &block->underlines;
  BLOBNBOX_IT large_it = &block->large_blobs;
  TO_ROW_IT row_it = block->get_rows();
  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
                                         block->line_size + 0.5);

                                 // length of vector
  length = sqrt(1 + gradient * gradient);
  g_vec = FCOORD(1 / length, -gradient / length);
  blob_rotation = FCOORD(rotation.x(), -rotation.y());
  blob_rotation.rotate(g_vec);  // undoing everything
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
                                 // get blobs
    blob_it.set_to_list(row->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
         blob_it.forward()) {
      blob = blob_it.data();
      blob_box = blob->bounding_box();
      if (blob_box.width() > block->line_size * textord_underline_width) {
        ASSERT_HOST(blob->cblob() != NULL);
        rotated_blob = crotate_cblob (blob->cblob(),
          blob_rotation);
        if (test_underline(
            testing_on && textord_show_final_rows,
            rotated_blob, static_cast<inT16>(row->intercept()),
            static_cast<inT16>(
                block->line_size *
                (tesseract::CCStruct::kXHeightFraction +
                 tesseract::CCStruct::kAscenderFraction / 2.0f)))) {
          under_it.add_after_then_move(blob_it.extract());
          if (testing_on && textord_show_final_rows) {
            tprintf("Underlined blob at:");
              rotated_blob->bounding_box().print();
            tprintf("Was:");
              blob_box.print();
          }
        } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
                                 row->blob_list()) >
                   textord_max_blob_overlaps) {
          large_it.add_after_then_move(blob_it.extract());
          if (testing_on && textord_show_final_rows) {
            tprintf("Large blob overlaps %d blobs at:",
                    CountOverlaps(blob_box, min_blob_height,
                                  row->blob_list()));
            blob_box.print();
          }
        }
        delete rotated_blob;
      }
    }
  }
}
bool within_error_margin ( float  test,
float  num,
float  margin 
) [inline]

Definition at line 128 of file makerow.h.

                                                                     {
  return (test >= num * (1 - margin) && test <= num * (1 + margin));
}

Variable Documentation

"Min pile height to make ascheight"

Definition at line 94 of file makerow.cpp.

double textord_ascx_ratio_max = 1.7

"Max cap/xheight"

Definition at line 98 of file makerow.cpp.

double textord_ascx_ratio_min = 1.2

"Min cap/xheight"

Definition at line 97 of file makerow.cpp.

"Use new projection for underlines"

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 79 of file makerow.cpp.

"Test xheight algorithms"

Definition at line 58 of file makerow.cpp.

"Max desc/xheight"

Definition at line 100 of file makerow.cpp.

double textord_descx_ratio_min = 0.15

"Min desc/xheight"

Definition at line 99 of file makerow.cpp.

"New row made if blob makes row this big"

Definition at line 86 of file makerow.cpp.

"Prevent multiple baselines"

Definition at line 57 of file makerow.cpp.

"Use spline baseline"

Definition at line 56 of file makerow.cpp.

"Max iqr/median for linespace"

Definition at line 77 of file makerow.cpp.

"Number of linew fits to do"

Definition at line 102 of file makerow.cpp.

"Min blob height/top to include blob top into xheight stats"

Definition at line 90 of file makerow.cpp.

"Min blobs before gradient counted"

Definition at line 65 of file makerow.cpp.

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 84 of file makerow.cpp.

"Min credible pixel xheight"

Definition at line 70 of file makerow.cpp.

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 83 of file makerow.cpp.

"Use test xheight mechanism"

Definition at line 103 of file makerow.cpp.

"Fraction of neighbourhood"

Definition at line 87 of file makerow.cpp.

"Use old baseline algorithm"

Definition at line 54 of file makerow.cpp.

"Use old xheight algorithm"

Definition at line 55 of file makerow.cpp.

"Force parallel baselines"

Definition at line 52 of file makerow.cpp.

"Use quadratic splines"

"Display rows after expanding"

Definition at line 48 of file makerow.cpp.

"Display blob bounds after pre-ass"

Definition at line 50 of file makerow.cpp.

"Display rows after final fitting"

Definition at line 49 of file makerow.cpp.

"Display row accumulation"

Definition at line 46 of file makerow.cpp.

"Display page correlated rows"

Definition at line 47 of file makerow.cpp.

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 75 of file makerow.cpp.

double textord_skew_lag = 0.75

"Lag for skew on row accumulation"

Definition at line 76 of file makerow.cpp.

"Size of window for spline segmentation"

Definition at line 67 of file makerow.cpp.

"Min blobs in each spline segment"

Definition at line 66 of file makerow.cpp.

"Fraction of line spacing for outlier"

Definition at line 74 of file makerow.cpp.

"Fraction of line spacing for quad"

Definition at line 72 of file makerow.cpp.

"Force straight baselines"

Definition at line 53 of file makerow.cpp.

"Tests refer to land/port"

Definition at line 51 of file makerow.cpp.

int textord_test_x = 0

"coord of test pt"

Definition at line 63 of file makerow.cpp.

int textord_test_y = 0

"coord of test pt"

Definition at line 64 of file makerow.cpp.

"Multiple of line_size for underline"

Definition at line 88 of file makerow.cpp.

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 78 of file makerow.cpp.

"Accepted variation"

Definition at line 101 of file makerow.cpp.

"Min pile height to make xheight"

Definition at line 92 of file makerow.cpp.