Tesseract  3.02
tesseract-ocr/textord/makerow.cpp File Reference
#include "mfcpch.h"
#include "stderr.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"

Go to the source code of this file.

Namespaces

namespace  tesseract

Defines

#define MAX_HEIGHT_MODES   12

Functions

float MakeRowFromSubBlobs (TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. TRUE is returned if the row should be deleted.

BOOL8 find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
void compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count)
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
void adjust_row_limits (TO_BLOCK *block)
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, BOOL8 testing_on)
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, `, ", ', etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

inT32 compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

inT32 compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return TRUE if enough blobs were far enough away to need a quadratic.

BOOL8 segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns:
TRUE if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
row_y_order

Sort function to sort rows in y from page top.

int row_y_order (const void *item1, const void *item2)
row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

int row_spacing_order (const void *item1, const void *item2)
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)

Variables

bool textord_heavy_nr = 0
bool textord_show_initial_rows = 0
bool textord_show_parallel_rows = 0
bool textord_show_expanded_rows = 0
bool textord_show_final_rows = 0
bool textord_show_final_blobs = 0
bool textord_test_landscape = 0
bool textord_parallel_baselines = 1
bool textord_straight_baselines = 0
bool textord_old_baselines = 1
bool textord_old_xheight = 0
bool textord_fix_xheight_bug = 1
bool textord_fix_makerow_bug = 1
bool textord_debug_xheights = 0
bool textord_biased_skewcalc = 1
bool textord_interpolating_skew = 1
int textord_skewsmooth_offset = 2
int textord_skewsmooth_offset2 = 1
int textord_test_x = -1
int textord_test_y = -1
int textord_min_blobs_in_row = 4
int textord_spline_minblobs = 8
int textord_spline_medianwin = 6
int textord_max_blob_overlaps = 4
int textord_min_xheight = 10
double textord_spline_shift_fraction = 0.02
double textord_spline_outlier_fraction = 0.1
double textord_skew_ile = 0.5
double textord_skew_lag = 0.01
double textord_linespace_iqrlimit = 0.2
double textord_width_limit = 8
double textord_chop_width = 1.5
double textord_expansion_factor = 1.0
double textord_overlap_x = 0.5
double textord_minxh = 0.25
double textord_min_linesize = 1.25
double textord_excess_blobsize = 1.3
double textord_occupancy_threshold = 0.4
double textord_underline_width = 2.0
double textord_min_blob_height_fraction = 0.75
double textord_xheight_mode_fraction = 0.4
double textord_ascheight_mode_fraction = 0.08
double textord_descheight_mode_fraction = 0.08
double textord_ascx_ratio_min = 1.25
double textord_ascx_ratio_max = 1.8
double textord_descx_ratio_min = 0.25
double textord_descx_ratio_max = 0.6
double textord_xheight_error_margin = 0.1
int textord_lms_line_trials = 12
bool textord_new_initial_xheight = 1
const int kMinLeaderCount = 5

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
const int kMinSize = 8
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)

Define Documentation

#define MAX_HEIGHT_MODES   12

Definition at line 105 of file makerow.cpp.


Function Documentation

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1223 of file makerow.cpp.

                        {
  TO_ROW *row;                   //current row
  float size;                    //size of row
  float ymax;                    //top of row
  float ymin;                    //bottom of row
  TO_ROW_IT row_it = block->get_rows ();

  if (textord_show_expanded_rows)
    tprintf("Adjusting row limits for block(%d,%d)\n",
            block->block->bounding_box().left(),
            block->block->bounding_box().top());
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    size = row->max_y () - row->min_y ();
    if (textord_show_expanded_rows)
      tprintf("Row at %f has min %f, max %f, size %f\n",
              row->intercept(), row->min_y(), row->max_y(), size);
    size /= tesseract::CCStruct::kXHeightFraction +
        tesseract::CCStruct::kAscenderFraction +
        tesseract::CCStruct::kDescenderFraction;
    ymax = size * (tesseract::CCStruct::kXHeightFraction +
                   tesseract::CCStruct::kAscenderFraction);
    ymin = -size * tesseract::CCStruct::kDescenderFraction;
    row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
    row->merged = FALSE;
  }
}
void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
BOOL8  reject_misses,
BOOL8  make_new_rows,
BOOL8  drawing_skew 
)

Definition at line 2402 of file makerow.cpp.

                           {
  OVERLAP_STATE overlap_result;  //what to do with it
  float ycoord;                  //current y
  float top, bottom;             //of blob
  float g_length = 1.0f;         //from gradient
  inT16 row_count;               //no of rows
  inT16 left_x;                  //left edge
  inT16 last_x;                  //previous edge
  float block_skew;              //y delta
  float smooth_factor;           //for new coords
  float near_dist;               //dist to nearest row
  ICOORD testpt;                 //testing only
  BLOBNBOX *blob;                //current blob
  TO_ROW *row;                   //current row
  TO_ROW *dest_row = NULL;       //row to put blob in
                                 //iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows ();

  ycoord =
    (block->block->bounding_box ().bottom () +
    block->block->bounding_box ().top ()) / 2.0f;
  if (gradient != NULL)
    g_length = sqrt (1 + *gradient * *gradient);
#ifndef GRAPHICS_DISABLED
  if (drawing_skew)
    to_win->SetCursor(block->block->bounding_box ().left (), ycoord);
#endif
  testpt = ICOORD (textord_test_x, textord_test_y);
  blob_it.sort (blob_x_order);
  smooth_factor = 1.0;
  block_skew = 0.0f;
  row_count = row_it.length ();  //might have rows
  if (!blob_it.empty ()) {
    left_x = blob_it.data ()->bounding_box ().left ();
  }
  else {
    left_x = block->block->bounding_box ().left ();
  }
  last_x = left_x;
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    if (gradient != NULL) {
      block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
        + *gradient / g_length * blob->bounding_box ().left ();
    }
    else if (blob->bounding_box ().left () - last_x > block->line_size / 2
      && last_x - left_x > block->line_size * 2
    && textord_interpolating_skew) {
      //                      tprintf("Interpolating skew from %g",block_skew);
      block_skew *= (float) (blob->bounding_box ().left () - left_x)
        / (last_x - left_x);
      //                      tprintf("to %g\n",block_skew);
    }
    last_x = blob->bounding_box ().left ();
    top = blob->bounding_box ().top () - block_skew;
    bottom = blob->bounding_box ().bottom () - block_skew;
#ifndef GRAPHICS_DISABLED
    if (drawing_skew)
      to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
#endif
    if (!row_it.empty ()) {
      for (row_it.move_to_first ();
        !row_it.at_last () && row_it.data ()->min_y () > top;
        row_it.forward ());
      row = row_it.data ();
      if (row->min_y () <= top && row->max_y () >= bottom) {
      //any overlap
        dest_row = row;
        overlap_result = most_overlapping_row (&row_it, dest_row,
          top, bottom,
          block->line_size,
          blob->bounding_box ().
          contains (testpt));
        if (overlap_result == NEW_ROW && !reject_misses)
          overlap_result = ASSIGN;
      }
      else {
        overlap_result = NEW_ROW;
        if (!make_new_rows) {
          near_dist = row_it.data_relative (-1)->min_y () - top;
                                 //below bottom
          if (bottom < row->min_y ()) {
            if (row->min_y () - bottom <=
              (block->line_spacing -
            block->line_size) * tesseract::CCStruct::kDescenderFraction) {
                                 //done it
              overlap_result = ASSIGN;
              dest_row = row;
            }
          }
          else if (near_dist > 0
          && near_dist < bottom - row->max_y ()) {
            row_it.backward ();
            dest_row = row_it.data ();
            if (dest_row->min_y () - bottom <=
              (block->line_spacing -
            block->line_size) * tesseract::CCStruct::kDescenderFraction) {
                                 //done it
              overlap_result = ASSIGN;
            }
          }
          else {
            if (top - row->max_y () <=
              (block->line_spacing -
              block->line_size) * (textord_overlap_x +
            tesseract::CCStruct::kAscenderFraction)) {
                                 //done it
              overlap_result = ASSIGN;
              dest_row = row;
            }
          }
        }
      }
      if (overlap_result == ASSIGN)
        dest_row->add_blob (blob_it.extract (), top, bottom,
          block->line_size);
      if (overlap_result == NEW_ROW) {
        if (make_new_rows && top - bottom < block->max_blob_size) {
          dest_row =
            new TO_ROW (blob_it.extract (), top, bottom,
            block->line_size);
          row_count++;
          if (bottom > row_it.data ()->min_y ())
            row_it.add_before_then_move (dest_row);
          //insert in right place
          else
            row_it.add_after_then_move (dest_row);
          smooth_factor =
            1.0 / (row_count * textord_skew_lag +
            textord_skewsmooth_offset);
        }
        else
          overlap_result = REJECT;
      }
    }
    else if (make_new_rows && top - bottom < block->max_blob_size) {
      overlap_result = NEW_ROW;
      dest_row =
        new TO_ROW (blob_it.extract (), top, bottom, block->line_size);
      row_count++;
      row_it.add_after_then_move (dest_row);
      smooth_factor = 1.0 / (row_count * textord_skew_lag +
                             textord_skewsmooth_offset2);
    }
    else
      overlap_result = REJECT;
    if (blob->bounding_box ().contains (testpt)) {
      if (overlap_result != REJECT) {
        tprintf ("Test blob assigned to row at (%g,%g) on pass %d\n",
          dest_row->min_y (), dest_row->max_y (), pass);
      }
      else {
        tprintf ("Test blob assigned to no row on pass %d\n", pass);
      }
    }
    if (overlap_result != REJECT) {
      while (!row_it.at_first ()
        && row_it.data ()->min_y () >
      row_it.data_relative (-1)->min_y ()) {
        row = row_it.extract ();
        row_it.backward ();
        row_it.add_before_then_move (row);
      }
      while (!row_it.at_last ()
        && row_it.data ()->min_y () <
      row_it.data_relative (1)->min_y ()) {
        row = row_it.extract ();
        row_it.forward ();
                                 //keep rows in order
        row_it.add_after_then_move (row);
      }
      block_skew = (1 - smooth_factor) * block_skew
        + smooth_factor * (blob->bounding_box ().bottom () -
        dest_row->initial_min_y ());
    }
  }
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    if (row_it.data ()->blob_list ()->empty ())
      delete row_it.extract ();  //discard empty rows
  }
}
int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2694 of file makerow.cpp.

                                    {
                                 //converted ptr
  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
                                 //converted ptr
  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;

  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
    return -1;
  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
    return 1;
  else
    return 0;
}
void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 534 of file makerow.cpp.

                   {
                                 //iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  if (textord_show_parallel_rows && testing_on) {
    if (to_win == NULL)
      create_to_win(page_tr);
  }
#endif
                                 //get row coords
  fit_parallel_rows(block,
                    gradient,
                    rotation,
                    block_edge,
                    textord_show_parallel_rows &&testing_on);
  delete_non_dropout_rows(block,
                          gradient,
                          rotation,
                          block_edge,
                          textord_show_parallel_rows &&testing_on);
  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
  blob_it.set_to_list (&block->blobs);
  row_it.set_to_list (block->get_rows ());
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
    blob_it.add_list_after (row_it.data ()->blob_list ());
  //give blobs back
  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
  //now new rows must be genuine
  blob_it.set_to_list (&block->blobs);
  blob_it.add_list_after (&block->large_blobs);
  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
  //safe to use big ones now
  blob_it.set_to_list (&block->blobs);
                                 //throw all blobs in
  blob_it.add_list_after (&block->noise_blobs);
  blob_it.add_list_after (&block->small_blobs);
  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
}
void compute_dropout_distances ( inT32 occupation,
inT32 thresholds,
inT32  line_count 
)

Definition at line 1018 of file makerow.cpp.

                                {
  inT32 line_index;              //of thresholds line
  inT32 distance;                //from prev dropout
  inT32 next_dist;               //to next dropout
  inT32 back_index;              //for back filling
  inT32 prev_threshold;          //before overwrite

  distance = -line_count;
  line_index = 0;
  do {
    do {
      distance--;
      prev_threshold = thresholds[line_index];
                                 //distance from prev
      thresholds[line_index] = distance;
      line_index++;
    }
    while (line_index < line_count
      && (occupation[line_index] < thresholds[line_index]
      || occupation[line_index - 1] >= prev_threshold));
    if (line_index < line_count) {
      back_index = line_index - 1;
      next_dist = 1;
      while (next_dist < -distance && back_index >= 0) {
        thresholds[back_index] = next_dist;
        back_index--;
        next_dist++;
        distance++;
      }
      distance = 1;
    }
  }
  while (line_index < line_count);
}
inT32 compute_height_modes ( STATS heights,
inT32  min_height,
inT32  max_height,
inT32 modes,
inT32  maxmodes 
)

Definition at line 1743 of file makerow.cpp.

                                           {  // size of modes
  inT32 pile_count;              // no in source pile
  inT32 src_count;               // no of source entries
  inT32 src_index;               // current entry
  inT32 least_count;             // height of smalllest
  inT32 least_index;             // index of least
  inT32 dest_count;              // index in modes

  src_count = max_height + 1 - min_height;
  dest_count = 0;
  least_count = MAX_INT32;
  least_index = -1;
  for (src_index = 0; src_index < src_count; src_index++) {
    pile_count = heights->pile_count(min_height + src_index);
    if (pile_count > 0) {
      if (dest_count < maxmodes) {
        if (pile_count < least_count) {
          // find smallest in array
          least_count = pile_count;
          least_index = dest_count;
        }
        modes[dest_count++] = min_height + src_index;
      } else if (pile_count >= least_count) {
        while (least_index < maxmodes - 1) {
          modes[least_index] = modes[least_index + 1];
          // shuffle up
          least_index++;
        }
        // new one on end
        modes[maxmodes - 1] = min_height + src_index;
        if (pile_count == least_count) {
          // new smallest
          least_index = maxmodes - 1;
        } else {
          least_count = heights->pile_count(modes[0]);
          least_index = 0;
          for (dest_count = 1; dest_count < maxmodes; dest_count++) {
            pile_count = heights->pile_count(modes[dest_count]);
            if (pile_count < least_count) {
              // find smallest
              least_count = pile_count;
              least_index = dest_count;
            }
          }
        }
      }
    }
  }
  return dest_count;
}
void compute_line_occupation ( TO_BLOCK block,
float  gradient,
inT32  min_y,
inT32  max_y,
inT32 occupation,
inT32 deltas 
)

Definition at line 871 of file makerow.cpp.

                              {
  inT32 line_count;              //maxy-miny+1
  inT32 line_index;              //of scan line
  int index;                     //array index for daft compilers
  float top, bottom;             //coords of blob
  inT32 width;                   //of blob
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();
  BLOBNBOX *blob;                //current blob
  BLOBNBOX_IT blob_it;           //iterator
  float length;                  //of skew vector
  TBOX blob_box;                  //bounding box
  FCOORD rotation;               //inverse of skew

  line_count = max_y - min_y + 1;
  length = sqrt (gradient * gradient + 1);
  rotation = FCOORD (1 / length, -gradient / length);
  for (line_index = 0; line_index < line_count; line_index++)
    deltas[line_index] = 0;
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    blob_it.set_to_list (row->blob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      blob_box = blob->bounding_box ();
      blob_box.rotate (rotation);//de-skew it
      top = blob_box.top ();
      bottom = blob_box.bottom ();
      width =
        (inT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
      if ((inT32) floor (bottom) < min_y
        || (inT32) floor (bottom) - min_y >= line_count)
        fprintf (stderr,
          "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
          INT32FORMAT ")\n", (inT32) floor (bottom), min_y, max_y);
                                 //count transitions
      index = (inT32) floor (bottom) - min_y;
      deltas[index] += width;
      if ((inT32) floor (top) < min_y
        || (inT32) floor (top) - min_y >= line_count)
        fprintf (stderr,
          "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
          INT32FORMAT ")\n", (inT32) floor (top), min_y, max_y);
      index = (inT32) floor (top) - min_y;
      deltas[index] -= width;
    }
  }
  occupation[0] = deltas[0];
  for (line_index = 1; line_index < line_count; line_index++)
    occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
}
void compute_occupation_threshold ( inT32  low_window,
inT32  high_window,
inT32  line_count,
inT32 occupation,
inT32 thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 937 of file makerow.cpp.

                                   {
  inT32 line_index;              //of thresholds line
  inT32 low_index;               //in occupation
  inT32 high_index;              //in occupation
  inT32 sum;                     //current average
  inT32 divisor;                 //to get thresholds
  inT32 min_index;               //of min occ
  inT32 min_occ;                 //min in locality
  inT32 test_index;              //for finding min

  divisor =
    (inT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
  if (low_window + high_window < line_count) {
    for (sum = 0, high_index = 0; high_index < low_window; high_index++)
      sum += occupation[high_index];
    for (low_index = 0; low_index < high_window; low_index++, high_index++)
      sum += occupation[high_index];
    min_occ = occupation[0];
    min_index = 0;
    for (test_index = 1; test_index < high_index; test_index++) {
      if (occupation[test_index] <= min_occ) {
        min_occ = occupation[test_index];
        min_index = test_index;  //find min in region
      }
    }
    for (line_index = 0; line_index < low_window; line_index++)
      thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
    //same out to end
    for (low_index = 0; high_index < line_count; low_index++, high_index++) {
      sum -= occupation[low_index];
      sum += occupation[high_index];
      if (occupation[high_index] <= min_occ) {
                                 //find min in region
        min_occ = occupation[high_index];
        min_index = high_index;
      }
                                 //lost min from region
      if (min_index <= low_index) {
        min_occ = occupation[low_index + 1];
        min_index = low_index + 1;
        for (test_index = low_index + 2; test_index <= high_index;
        test_index++) {
          if (occupation[test_index] <= min_occ) {
            min_occ = occupation[test_index];
                                 //find min in region
            min_index = test_index;
          }
        }
      }
      thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
    }
  }
  else {
    min_occ = occupation[0];
    min_index = 0;
    for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
      if (occupation[low_index] < min_occ) {
        min_occ = occupation[low_index];
        min_index = low_index;
      }
      sum += occupation[low_index];
    }
    line_index = 0;
  }
  for (; line_index < line_count; line_index++)
    thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
  //same out to end
}
void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 296 of file makerow.cpp.

                        {
  inT32 row_count;               //total rows
  inT32 blob_count;              //total_blobs
  inT32 row_err;                 //integer error
  float *gradients;              //of rows
  float *errors;                 //of rows
  inT32 row_index;               //of total
  TO_ROW *row;                   //current row
  TO_BLOCK_IT block_it = blocks; //iterator
  TO_ROW_IT row_it;

  row_count = 0;
  blob_count = 0;
  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
       block_it.forward ()) {
    POLY_BLOCK* pb = block_it.data()->block->poly_block();
    if (pb != NULL && !pb->IsText())
      continue;  // Pretend non-text blocks don't exist.
    row_count += block_it.data ()->get_rows ()->length ();
    //count up rows
    row_it.set_to_list (block_it.data ()->get_rows ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
      blob_count += row_it.data ()->blob_list ()->length ();
  }
  if (row_count == 0) {
    page_m = 0.0f;
    page_err = 0.0f;
    return;
  }
  gradients = (float *) alloc_mem (blob_count * sizeof (float));
  //get mem
  errors = (float *) alloc_mem (blob_count * sizeof (float));
  if (gradients == NULL || errors == NULL)
    MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);

  row_index = 0;
  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
       block_it.forward ()) {
    POLY_BLOCK* pb = block_it.data()->block->poly_block();
    if (pb != NULL && !pb->IsText())
      continue;  // Pretend non-text blocks don't exist.
    row_it.set_to_list (block_it.data ()->get_rows ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      blob_count = row->blob_list ()->length ();
      row_err = (inT32) ceil (row->line_error ());
      if (row_err <= 0)
        row_err = 1;
      if (textord_biased_skewcalc) {
        blob_count /= row_err;
        for (blob_count /= row_err; blob_count > 0; blob_count--) {
          gradients[row_index] = row->line_m ();
          errors[row_index] = row->line_error ();
          row_index++;
        }
      }
      else if (blob_count >= textord_min_blobs_in_row) {
                                 //get gradient
        gradients[row_index] = row->line_m ();
        errors[row_index] = row->line_error ();
        row_index++;
      }
    }
  }
  if (row_index == 0) {
                                 //desperate
    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
         block_it.forward ()) {
      POLY_BLOCK* pb = block_it.data()->block->poly_block();
      if (pb != NULL && !pb->IsText())
        continue;  // Pretend non-text blocks don't exist.
      row_it.set_to_list (block_it.data ()->get_rows ());
      for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
           row_it.forward ()) {
        row = row_it.data ();
        gradients[row_index] = row->line_m ();
        errors[row_index] = row->line_error ();
        row_index++;
      }
    }
  }
  row_count = row_index;
  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
    gradients, row_count);
  page_m = gradients[row_index];
  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
    errors, row_count);
  page_err = errors[row_index];
  free_mem(gradients);
  free_mem(errors);
}
inT32 compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1683 of file makerow.cpp.

                                                                       {
  // Count how many potential ascenders are in this row.
  int i_min = asc_heights->min_bucket();
  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
    i_min = static_cast<int>(
        floor(row->xheight * textord_ascx_ratio_min + 0.5));
  }
  int i_max = asc_heights->max_bucket();
  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
    i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
  }
  int num_potential_asc = 0;
  for (int i = i_min; i <= i_max; ++i) {
    num_potential_asc += asc_heights->pile_count(i);
  }
  inT32 min_height =
    static_cast<inT32>(floor(row->xheight * textord_descx_ratio_min + 0.5));
  inT32 max_height =
    static_cast<inT32>(floor(row->xheight * textord_descx_ratio_max));
  float xcentre;                 // centre of blob
  float height;                  // height of blob
  BLOBNBOX_IT blob_it = row->blob_list();
  BLOBNBOX *blob;                // current blob
  STATS heights (min_height, max_height + 1);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      xcentre = (blob->bounding_box().left() +
                 blob->bounding_box().right()) / 2.0f;
      height = (gradient * xcentre + row->parallel_c() -
                blob->bounding_box().bottom());
      if (height >= min_height && height <= max_height)
        heights.add(static_cast<int>(floor(height + 0.5)), 1);
    }
  }
  int blob_index = heights.mode();  // find mode
  int blob_count = heights.pile_count(blob_index);  // get count of mode
  float total_fraction =
    (textord_descheight_mode_fraction + textord_ascheight_mode_fraction);
  if (static_cast<float>(blob_count + num_potential_asc) <
      xheight_blob_count * total_fraction) {
    blob_count = 0;
  }
  int descdrop = blob_count > 0 ? -blob_index : 0;
  if (textord_debug_xheights) {
    tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
            descdrop, num_potential_asc, blob_count);
    heights.print();
  }
  return descdrop;
}
void compute_row_stats ( TO_BLOCK block,
BOOL8  testing_on 
)

Definition at line 1259 of file makerow.cpp.

                        {
  inT32 row_index;               //of median
  TO_ROW *row;                   //current row
  TO_ROW *prev_row;              //previous row
  float iqr;                     //inter quartile range
  TO_ROW_IT row_it = block->get_rows ();
                                 //number of rows
  inT16 rowcount = row_it.length ();
  TO_ROW **rows;                 //for choose nth

  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
  if (rows == NULL)
    MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
  rowcount = 0;
  prev_row = NULL;
  row_it.move_to_last ();        //start at bottom
  do {
    row = row_it.data ();
    if (prev_row != NULL) {
      rows[rowcount++] = prev_row;
      prev_row->spacing = row->intercept () - prev_row->intercept ();
      if (testing_on)
        tprintf ("Row at %g yields spacing of %g\n",
          row->intercept (), prev_row->spacing);
    }
    prev_row = row;
    row_it.backward ();
  }
  while (!row_it.at_last ());
  block->key_row = prev_row;
  block->baseline_offset =
    fmod (prev_row->parallel_c (), block->line_spacing);
  if (testing_on)
    tprintf ("Blob based spacing=(%g,%g), offset=%g",
      block->line_size, block->line_spacing, block->baseline_offset);
  if (rowcount > 0) {
    row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
      sizeof (TO_ROW *), row_spacing_order);
    iqr = rows[row_index]->spacing;
    row_index = choose_nth_item (rowcount / 4, rows, rowcount,
      sizeof (TO_ROW *), row_spacing_order);
    iqr -= rows[row_index]->spacing;
    row_index = choose_nth_item (rowcount / 2, rows, rowcount,
      sizeof (TO_ROW *), row_spacing_order);
    block->key_row = rows[row_index];
    if (testing_on)
      tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
    if (rowcount > 2
    && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
      if (!textord_new_initial_xheight) {
        if (rows[row_index]->spacing < block->line_spacing
          && rows[row_index]->spacing > block->line_size)
          //within range
          block->line_size = rows[row_index]->spacing;
        //spacing=size
        else if (rows[row_index]->spacing > block->line_spacing)
          block->line_size = block->line_spacing;
        //too big so use max
      }
      else {
        if (rows[row_index]->spacing < block->line_spacing)
          block->line_size = rows[row_index]->spacing;
        else
          block->line_size = block->line_spacing;
        //too big so use max
      }
      if (block->line_size < textord_min_xheight)
        block->line_size = (float) textord_min_xheight;
      block->line_spacing = rows[row_index]->spacing;
      block->max_blob_size =
        block->line_spacing * textord_excess_blobsize;
    }
    block->baseline_offset = fmod (rows[row_index]->intercept (),
      block->line_spacing);
  }
  if (testing_on)
    tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
      block->line_size, block->line_spacing, block->baseline_offset);
  free_mem(rows);
}
int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1587 of file makerow.cpp.

                                                    {
  int blob_index = heights->mode();  // find mode
  int blob_count = heights->pile_count(blob_index);  // get count of mode
  if (textord_debug_xheights) {
    tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
            min_height, max_height, blob_index, blob_count,
            heights->get_total());
    heights->print();
    floating_heights->print();
  }
  if (blob_count == 0) return 0;
  int modes[MAX_HEIGHT_MODES];  // biggest piles
  bool in_best_pile = FALSE;
  int prev_size = -MAX_INT32;
  int best_count = 0;
  int mode_count = compute_height_modes(heights, min_height, max_height,
                                        modes, MAX_HEIGHT_MODES);
  if (cap_only && mode_count > 1)
    mode_count = 1;
  int x;
  if (textord_debug_xheights) {
    tprintf("found %d modes: ", mode_count);
    for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
    tprintf("\n");
  }

  for (x = 0; x < mode_count - 1; x++) {
    if (modes[x] != prev_size + 1)
      in_best_pile = FALSE;    // had empty height
    int modes_x_count = heights->pile_count(modes[x]) -
      floating_heights->pile_count(modes[x]);
    if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
        (in_best_pile || modes_x_count > best_count)) {
      for (int asc = x + 1; asc < mode_count; asc++) {
        float ratio =
          static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
        if (textord_ascx_ratio_min < ratio &&
            ratio < textord_ascx_ratio_max &&
            (heights->pile_count(modes[asc]) >=
             blob_count * textord_ascheight_mode_fraction)) {
          if (modes_x_count > best_count) {
            in_best_pile = true;
            best_count = modes_x_count;
          }
          if (textord_debug_xheights) {
            tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
                    modes[x], modes[asc]-modes[x], modes_x_count, ratio);
          }
          prev_size = modes[x];
          *xheight = static_cast<float>(modes[x]);
          *ascrise = static_cast<float>(modes[asc] - modes[x]);
        }
      }
    }
  }
  if (*xheight == 0) {  // single mode
    // Remove counts of the "floating" blobs (the one whose height is too
    // small in relation to it's top end of the bounding box) from heights
    // before computing the single-mode xheight.
    // Restore the counts in heights after the mode is found, since
    // floating blobs might be useful for determining potential ascenders
    // in compute_row_descdrop().
    if (floating_heights->get_total() > 0) {
      for (x = min_height; x < max_height; ++x) {
        heights->add(x, -(floating_heights->pile_count(x)));
      }
      blob_index = heights->mode();  // find the modified mode
      for (x = min_height; x < max_height; ++x) {
        heights->add(x, floating_heights->pile_count(x));
      }
    }
    *xheight = static_cast<float>(blob_index);
    *ascrise = 0.0f;
    best_count = heights->pile_count(blob_index);
    if (textord_debug_xheights)
      tprintf("Single mode xheight set to %g\n", *xheight);
  } else if (textord_debug_xheights) {
    tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
  }
  return best_count;
}
void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1805 of file makerow.cpp.

                                                        {
  ROW_CATEGORY row_category = get_row_category(row);
  if (textord_debug_xheights) {
    tprintf("correcting row xheight: row->xheight %.4f"
            ", row->acrise %.4f row->descdrop %.4f\n",
            row->xheight, row->ascrise, row->descdrop);
  }
  bool normal_xheight =
    within_error_margin(row->xheight, xheight, textord_xheight_error_margin);
  bool cap_xheight =
    within_error_margin(row->xheight, xheight + ascrise,
                        textord_xheight_error_margin);
  // Use the average xheight/ascrise for the following cases:
  // -- the xheight of the row could not be determined at all
  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
  //    and its xheight is close to either cap height or average xheight
  // -- the row does not have ascenders or descenders, but its xheight
  //    is close to the average block xheight (e.g. row with "www.mmm.com")
  if (row_category == ROW_ASCENDERS_FOUND) {
    if (row->descdrop >= 0.0) {
      row->descdrop = row->xheight * (descdrop / xheight);
    }
  } else if (row_category == ROW_INVALID ||
             (row_category == ROW_DESCENDERS_FOUND &&
              (normal_xheight || cap_xheight)) ||
              (row_category == ROW_UNKNOWN && normal_xheight)) {
    if (textord_debug_xheights) tprintf("using average xheight\n");
    row->xheight = xheight;
    row->ascrise = ascrise;
    row->descdrop = descdrop;
  } else if (row_category == ROW_DESCENDERS_FOUND) {
    // Assume this is a row with mostly lowercase letters and it's xheight
    // is computed correctly (unfortunately there is no way to distinguish
    // this from the case when descenders are found, but the most common
    // height is capheight).
    if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
    row->ascrise = row->xheight * (ascrise / xheight);
  } else if (row_category == ROW_UNKNOWN) {
  // Otherwise assume this row is an all-caps or small-caps row
  // and adjust xheight and ascrise of the row.

    row->all_caps = true;
    if (cap_xheight) { // regular all caps
      if (textord_debug_xheights) tprintf("all caps\n");
      row->xheight = xheight;
      row->ascrise = ascrise;
      row->descdrop = descdrop;
    } else {  // small caps or caps with an odd xheight
      if (textord_debug_xheights) {
        if (row->xheight < xheight + ascrise && row->xheight > xheight) {
          tprintf("small caps\n");
        } else {
          tprintf("all caps with irregular xheight\n");
        }
      }
      row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
      row->xheight -= row->ascrise;
      row->descdrop = row->xheight * (descdrop / xheight);
    }
  }
  if (textord_debug_xheights) {
    tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
            " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
  }
}
void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 667 of file makerow.cpp.

                              {
  TBOX block_box;                 //deskewed block
  inT32 *deltas;                 //change in occupation
  inT32 *occupation;             //of pixel coords
  inT32 max_y;                   //in block
  inT32 min_y;
  inT32 line_index;              //of scan line
  inT32 line_count;              //no of scan lines
  inT32 distance;                //to drop-out
  inT32 xleft;                   //of block
  inT32 ybottom;                 //of block
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();
  BLOBNBOX_IT blob_it = &block->blobs;

  if (row_it.length () == 0)
    return;                      //empty block
  block_box = deskew_block_coords (block, gradient);
  xleft = block->block->bounding_box ().left ();
  ybottom = block->block->bounding_box ().bottom ();
  min_y = block_box.bottom () - 1;
  max_y = block_box.top () + 1;
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    line_index = (inT32) floor (row_it.data ()->intercept ());
    if (line_index <= min_y)
      min_y = line_index - 1;
    if (line_index >= max_y)
      max_y = line_index + 1;
  }
  line_count = max_y - min_y + 1;
  if (line_count <= 0)
    return;                      //empty block
  deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));
  occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));
  if (deltas == NULL || occupation == NULL)
    MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);

  compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
  compute_occupation_threshold ((inT32)
    ceil (block->line_spacing *
    (tesseract::CCStruct::kDescenderFraction +
    tesseract::CCStruct::kAscenderFraction)),
    (inT32) ceil (block->line_spacing *
    (tesseract::CCStruct::kXHeightFraction +
    tesseract::CCStruct::kAscenderFraction)),
    max_y - min_y + 1, occupation, deltas);
#ifndef GRAPHICS_DISABLED
  if (testing_on) {
    draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
  }
#endif
  compute_dropout_distances(occupation, deltas, line_count);
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    line_index = (inT32) floor (row->intercept ());
    distance = deltas[line_index - min_y];
    if (find_best_dropout_row (row, distance, block->line_spacing / 2,
    line_index, &row_it, testing_on)) {
#ifndef GRAPHICS_DISABLED
      if (testing_on)
        plot_parallel_row(row, gradient, block_edge,
                          ScrollView::WHITE, rotation);
#endif
      blob_it.add_list_after (row_it.data ()->blob_list ());
      delete row_it.extract ();  //too far away
    }
  }
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    blob_it.add_list_after (row_it.data ()->blob_list ());
  }

  free_mem(deltas);
  free_mem(occupation);
}
TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 835 of file makerow.cpp.

                         {
  TBOX result;                    //block bounds
  TBOX blob_box;                  //of block
  FCOORD rotation;               //deskew vector
  float length;                  //of gradient vector
  TO_ROW_IT row_it = block->get_rows ();
  TO_ROW *row;                   //current row
  BLOBNBOX *blob;                //current blob
  BLOBNBOX_IT blob_it;           //iterator

  length = sqrt (gradient * gradient + 1);
  rotation = FCOORD (1 / length, -gradient / length);
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    blob_it.set_to_list (row->blob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      blob_box = blob->bounding_box ();
      blob_box.rotate (rotation);//de-skew it
      result += blob_box;
    }
  }
  return result;
}
void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 1065 of file makerow.cpp.

                  {
  BOOL8 swallowed_row;           //eaten a neighbour
  float y_max, y_min;            //new row limits
  float y_bottom, y_top;         //allowed limits
  TO_ROW *test_row;              //next row
  TO_ROW *row;                   //current row
                                 //iterators
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  if (textord_show_expanded_rows && testing_on) {
    if (to_win == NULL)
      create_to_win(page_tr);
  }
#endif

  adjust_row_limits(block);  //shift min,max.
  if (textord_new_initial_xheight) {
    if (block->get_rows ()->length () == 0)
      return;
    compute_row_stats(block, textord_show_expanded_rows &&testing_on);
  }
  assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
  //get real membership
  if (block->get_rows ()->length () == 0)
    return;
  fit_parallel_rows(block,
                    gradient,
                    rotation,
                    block_edge,
                    textord_show_expanded_rows &&testing_on);
  if (!textord_new_initial_xheight)
    compute_row_stats(block, textord_show_expanded_rows &&testing_on);
  row_it.move_to_last ();
  do {
    row = row_it.data ();
    y_max = row->max_y ();       //get current limits
    y_min = row->min_y ();
    y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
      tesseract::CCStruct::kDescenderFraction;
    y_top = row->intercept () + block->line_size * textord_expansion_factor *
        (tesseract::CCStruct::kXHeightFraction +
         tesseract::CCStruct::kAscenderFraction);
    if (y_min > y_bottom) {      //expansion allowed
      if (textord_show_expanded_rows && testing_on)
        tprintf("Expanding bottom of row at %f from %f to %f\n",
                row->intercept(), y_min, y_bottom);
                                 //expandable
      swallowed_row = TRUE;
      while (swallowed_row && !row_it.at_last ()) {
        swallowed_row = FALSE;
                                 //get next one
        test_row = row_it.data_relative (1);
                                 //overlaps space
        if (test_row->max_y () > y_bottom) {
          if (test_row->min_y () > y_bottom) {
            if (textord_show_expanded_rows && testing_on)
              tprintf("Eating row below at %f\n", test_row->intercept());
            row_it.forward ();
#ifndef GRAPHICS_DISABLED
            if (textord_show_expanded_rows && testing_on)
              plot_parallel_row(test_row,
                                gradient,
                                block_edge,
                                ScrollView::WHITE,
                                rotation);
#endif
            blob_it.set_to_list (row->blob_list ());
            blob_it.add_list_after (test_row->blob_list ());
                                 //swallow complete row
            delete row_it.extract ();
            row_it.backward ();
            swallowed_row = TRUE;
          }
          else if (test_row->max_y () < y_min) {
                                 //shorter limit
            y_bottom = test_row->max_y ();
            if (textord_show_expanded_rows && testing_on)
              tprintf("Truncating limit to %f due to touching row at %f\n",
                      y_bottom, test_row->intercept());
          }
          else {
            y_bottom = y_min;    //can't expand it
            if (textord_show_expanded_rows && testing_on)
              tprintf("Not expanding limit beyond %f due to touching row at %f\n",
                      y_bottom, test_row->intercept());
          }
        }
      }
      y_min = y_bottom;          //expand it
    }
    if (y_max < y_top) {         //expansion allowed
      if (textord_show_expanded_rows && testing_on)
        tprintf("Expanding top of row at %f from %f to %f\n",
                row->intercept(), y_max, y_top);
      swallowed_row = TRUE;
      while (swallowed_row && !row_it.at_first ()) {
        swallowed_row = FALSE;
                                 //get one above
        test_row = row_it.data_relative (-1);
        if (test_row->min_y () < y_top) {
          if (test_row->max_y () < y_top) {
            if (textord_show_expanded_rows && testing_on)
              tprintf("Eating row above at %f\n", test_row->intercept());
            row_it.backward ();
            blob_it.set_to_list (row->blob_list ());
#ifndef GRAPHICS_DISABLED
            if (textord_show_expanded_rows && testing_on)
              plot_parallel_row(test_row,
                                gradient,
                                block_edge,
                                ScrollView::WHITE,
                                rotation);
#endif
            blob_it.add_list_after (test_row->blob_list ());
                                 //swallow complete row
            delete row_it.extract ();
            row_it.forward ();
            swallowed_row = TRUE;
          }
          else if (test_row->min_y () < y_max) {
                                 //shorter limit
            y_top = test_row->min_y ();
            if (textord_show_expanded_rows && testing_on)
              tprintf("Truncating limit to %f due to touching row at %f\n",
                      y_top, test_row->intercept());
          }
          else {
            y_top = y_max;       //can't expand it
            if (textord_show_expanded_rows && testing_on)
              tprintf("Not expanding limit beyond %f due to touching row at %f\n",
                      y_top, test_row->intercept());
          }
        }
      }
      y_max = y_top;
    }
                                 //new limits
    row->set_limits (y_min, y_max);
    row_it.backward ();
  }
  while (!row_it.at_last ());
}
void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1526 of file makerow.cpp.

                                                                           {
  float xcentre;                 // centre of blob
  float top;                     // top y coord of blob
  float height;                  // height of blob
  BLOBNBOX *blob;                // current blob
  int repeated_set;
  BLOBNBOX_IT blob_it = row->blob_list();
  if (blob_it.empty()) return;  // no blobs in this row
  bool has_rep_chars =
    row->rep_chars_marked() && row->num_repeated_sets() > 0;
  do {
    blob = blob_it.data();
    if (!blob->joined_to_prev()) {
      xcentre = (blob->bounding_box().left() +
                 blob->bounding_box().right()) / 2.0f;
      top = blob->bounding_box().top();
      height = blob->bounding_box().height();
      if (textord_fix_xheight_bug)
        top -= row->baseline.y(xcentre);
      else
        top -= gradient * xcentre + row->parallel_c();
      if (top >= min_height && top <= max_height) {
        heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
        if (height / top < textord_min_blob_height_fraction) {
          floating_heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
        }
      }
    }
    // Skip repeated chars, since they are likely to skew the height stats.
    if (has_rep_chars && blob->repeated_set() != 0) {
      repeated_set = blob->repeated_set();
      blob_it.forward();
      while (!blob_it.at_first() &&
             blob_it.data()->repeated_set() == repeated_set) {
        blob_it.forward();
        if (textord_debug_xheights)
          tprintf("Skipping repeated char when computing xheight\n");
      }
    } else {
      blob_it.forward();
    }
  } while (!blob_it.at_first());
}
BOOL8 find_best_dropout_row ( TO_ROW row,
inT32  distance,
float  dist_limit,
inT32  line_index,
TO_ROW_IT *  row_it,
BOOL8  testing_on 
)

Definition at line 755 of file makerow.cpp.

                             {
  inT32 next_index;              //of neigbouring row
  inT32 row_offset;              //from current row
  inT32 abs_dist;                //absolute distance
  inT8 row_inc;                  //increment to row_index
  TO_ROW *next_row;              //nextious row

  if (testing_on)
    tprintf ("Row at %g(%g), dropout dist=%d,",
      row->intercept (), row->parallel_c (), distance);
  if (distance < 0) {
    row_inc = 1;
    abs_dist = -distance;
  }
  else {
    row_inc = -1;
    abs_dist = distance;
  }
  if (abs_dist > dist_limit) {
    if (testing_on) {
      tprintf (" too far - deleting\n");
    }
    return TRUE;
  }
  if ((distance < 0 && !row_it->at_last ())
  || (distance >= 0 && !row_it->at_first ())) {
    row_offset = row_inc;
    do {
      next_row = row_it->data_relative (row_offset);
      next_index = (inT32) floor (next_row->intercept ());
      if ((distance < 0
        && next_index < line_index
        && next_index > line_index + distance + distance)
        || (distance >= 0
        && next_index > line_index
      && next_index < line_index + distance + distance)) {
        if (testing_on) {
          tprintf (" nearer neighbour (%d) at %g\n",
            line_index + distance - next_index,
            next_row->intercept ());
        }
        return TRUE;             //other is nearer
      }
      else if (next_index == line_index
      || next_index == line_index + distance + distance) {
        if (row->believability () <= next_row->believability ()) {
          if (testing_on) {
            tprintf (" equal but more believable at %g (%g/%g)\n",
              next_row->intercept (),
              row->believability (),
              next_row->believability ());
          }
          return TRUE;           //other is more believable
        }
      }
      row_offset += row_inc;
    }
    while ((next_index == line_index
      || next_index == line_index + distance + distance)
      && row_offset < row_it->length ());
    if (testing_on)
      tprintf (" keeping\n");
  }
  return FALSE;
}
void fit_lms_line ( TO_ROW row)

Definition at line 276 of file makerow.cpp.

                               {
  float m, c;                    // fitted line
  tesseract::DetLineFit lms;
  BLOBNBOX_IT blob_it = row->blob_list();

  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    const TBOX& box = blob_it.data()->bounding_box();
    lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
  }
  double error = lms.Fit(&m, &c);
  row->set_line(m, c, error);
}
void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 2093 of file makerow.cpp.

                                                   {
  float c;                       // fitted line
  int blobcount;                 // no of blobs
   tesseract::DetLineFit lms;
  BLOBNBOX_IT blob_it = row->blob_list();

  blobcount = 0;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    if (!blob_it.data()->joined_to_prev()) {
      const TBOX& box = blob_it.data()->bounding_box();
      lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
      blobcount++;
    }
  }
  double error = lms.ConstrainedFit(gradient, &c);
  row->set_parallel_line(gradient, c, error);
  if (textord_straight_baselines && blobcount > textord_lms_line_trials) {
    error = lms.Fit(&gradient, &c);
  }
                                 //set the other too
  row->set_line(gradient, c, error);
}
void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 2051 of file makerow.cpp.

                        {
#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour;                 //of row
#endif
  TO_ROW_IT row_it = block->get_rows ();

  row_it.move_to_first ();
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    if (row_it.data ()->blob_list ()->empty ())
      delete row_it.extract ();  //nothing in it
    else
      fit_parallel_lms (gradient, row_it.data ());
  }
#ifndef GRAPHICS_DISABLED
  if (testing_on) {
    colour = ScrollView::RED;
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      plot_parallel_row (row_it.data (), gradient,
        block_edge, colour, rotation);
      colour = (ScrollView::Color) (colour + 1);
      if (colour > ScrollView::MAGENTA)
        colour = ScrollView::RED;
    }
  }
#endif
  row_it.sort (row_y_order);     //may have gone out of order
}
double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2311 of file makerow.cpp.

  {
  int blobcount;                 //no of blobs
  int blobindex;                 //current blob
  int index1, index2;            //blob numbers
  int blobs_per_segment;         //blobs in each
  TBOX box;                       //blob box
  TBOX new_box;                   //new_it box
                                 //blobs
  BLOBNBOX_IT blob_it = row->blob_list ();
  BLOBNBOX_IT new_it = blob_it;  //front end
  float b, c;                    //fitted curve
  tesseract::DetLineFit lms;
  double *coeffs;                //quadratic coeffs
  inT32 segment;                 //current segment

  box = box_next_pre_chopped (&blob_it);
  xstarts[0] = box.left ();
  blobcount = 1;
  while (!blob_it.at_first ()) {
    blobcount++;
    box = box_next_pre_chopped (&blob_it);
  }
  segments = blobcount / textord_spline_medianwin;
  if (segments < 1)
    segments = 1;
  blobs_per_segment = blobcount / segments;
  coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
  if (textord_oldbl_debug)
    tprintf
      ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
      blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
  segment = 1;
  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
    box_next_pre_chopped(&new_it);
  index1 = 0;
  blobindex = index2;
  do {
    blobindex += blobs_per_segment;
    lms.Clear();
    while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
      box = box_next_pre_chopped (&blob_it);
      int middle = (box.left() + box.right()) / 2;
      lms.Add(ICOORD(middle, box.bottom()));
      index1++;
      if (index1 == blobindex - blobs_per_segment / 2
      || index1 == blobcount - 1) {
        xstarts[segment] = box.left ();
      }
    }
    lms.Fit(&b, &c);
    coeffs[segment * 3 - 3] = 0;
    coeffs[segment * 3 - 2] = b;
    coeffs[segment * 3 - 1] = c;
    segment++;
    if (segment > segments)
      break;

    blobindex += blobs_per_segment;
    lms.Clear();
    while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
      new_box = box_next_pre_chopped (&new_it);
      int middle = (new_box.left() + new_box.right()) / 2;
      lms.Add(ICOORD (middle, new_box.bottom()));
      index2++;
      if (index2 == blobindex - blobs_per_segment / 2
      || index2 == blobcount - 1) {
        xstarts[segment] = new_box.left ();
      }
    }
    lms.Fit(&b, &c);
    coeffs[segment * 3 - 3] = 0;
    coeffs[segment * 3 - 2] = b;
    coeffs[segment * 3 - 1] = c;
    segment++;
  }
  while (segment <= segments);
  return coeffs;
}
void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2178 of file makerow.cpp.

                                           {
  BLOBNBOX_IT blob_it = row->blob_list ();
  inT32 *xstarts;                // spline boundaries
  double *coeffs;                // quadratic coeffs
  inT32 segments;                // no of segments

  xstarts =
    (inT32 *) alloc_mem((row->blob_list()->length() + 1) * sizeof(inT32));
  if (segment_baseline(row, block, segments, xstarts)
  && !textord_straight_baselines && !textord_parallel_baselines) {
    coeffs = linear_spline_baseline(row, block, segments, xstarts);
  } else {
    xstarts[1] = xstarts[segments];
    segments = 1;
    coeffs = (double *) alloc_mem (3 * sizeof (double));
    coeffs[0] = 0;
    coeffs[1] = row->line_m ();
    coeffs[2] = row->line_c ();
  }
  row->baseline = QSPLINE (segments, xstarts, coeffs);
  free_mem(coeffs);
  free_mem(xstarts);
}
void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 236 of file makerow.cpp.

                            {
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour;                 //of row

  if (textord_show_initial_rows && testing_on) {
    if (to_win == NULL)
      create_to_win(page_tr);
  }
#endif
                                 //guess skew
  assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
  row_it.move_to_first ();
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
    fit_lms_line (row_it.data ());
#ifndef GRAPHICS_DISABLED
  if (textord_show_initial_rows && testing_on) {
    colour = ScrollView::RED;
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      plot_to_row (row_it.data (), colour, rotation);
      colour = (ScrollView::Color) (colour + 1);
      if (colour > ScrollView::MAGENTA)
        colour = ScrollView::RED;
    }
  }
#endif
}
float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 197 of file makerow.cpp.

                                                            {
  float port_m;                  // global skew
  float port_err;                // global noise
  TO_BLOCK_IT block_it;          // iterator

  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward())
  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
      !(BOOL8) textord_test_landscape);
                                 // compute globally
  compute_page_skew(port_blocks, port_m, port_err);
  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
                 block_it.data()->block->bounding_box().left(),
                 !(BOOL8)textord_test_landscape);
  }
  return port_m;                 // global skew
}
float make_single_row ( ICOORD  page_tr,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 167 of file makerow.cpp.

                                                                              {
  BLOBNBOX_IT blob_it = &block->blobs;
  TO_ROW_IT row_it = block->get_rows();

  // Include all the small blobs and large blobs.
  blob_it.add_list_after(&block->small_blobs);
  blob_it.add_list_after(&block->noise_blobs);
  blob_it.add_list_after(&block->large_blobs);
  if (block->blobs.singleton()) {
    blob_it.move_to_first();
    float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
    if (size > block->line_size)
      block->line_size = size;
  }
  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
  // Fit an LMS line to the rows.
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
    fit_lms_line(row_it.data());
  float gradient;
  float fit_error;
  // Compute the skew based on the fitted line.
  compute_page_skew(blocks, gradient, fit_error);
  return gradient;
}
float MakeRowFromSubBlobs ( TO_BLOCK block,
C_BLOB blob,
TO_ROW_IT *  row_it 
)

Definition at line 137 of file makerow.cpp.

                                                                            {
  // The blobs made from the children will go in the small_blobs list.
  BLOBNBOX_IT bb_it(&block->small_blobs);
  C_OUTLINE_IT ol_it(blob->out_list());
  // Get the children.
  ol_it.set_to_list(ol_it.data()->child());
  if (ol_it.empty())
    return 0.0f;
  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
    // Deep copy the child outline and use that to make a blob.
    C_OUTLINE* outline = C_OUTLINE::deep_copy(ol_it.data());
    // The constructor from a list of outlines corrects the direction.
    C_OUTLINE_LIST outlines;
    C_OUTLINE_IT ol_it(&outlines);
    ol_it.add_after_then_move(outline);
    C_BLOB* blob = new C_BLOB(&outlines);
    BLOBNBOX* bbox = new BLOBNBOX(blob);
    bb_it.add_after_then_move(bbox);
  }
  // Now we can make a row from the blobs.
  return MakeRowFromBlobs(block->line_size, &bb_it, row_it);
}
void mark_repeated_chars ( TO_ROW row)

Definition at line 2760 of file makerow.cpp.

                                      {
  BLOBNBOX_IT box_it(row->blob_list());            // Iterator.
  int num_repeated_sets = 0;
  if (!box_it.empty()) {
    do {
      BLOBNBOX* bblob = box_it.data();
      int repeat_length = 0;
      if (bblob->flow() == BTFT_LEADER &&
          !bblob->joined_to_prev() && bblob->cblob() != NULL) {
        BLOBNBOX_IT test_it(box_it);
        for (test_it.forward(); !test_it.at_first(); test_it.forward()) {
          bblob = test_it.data();
          if (bblob->flow() != BTFT_LEADER)
            break;
          if (bblob->joined_to_prev() || bblob->cblob() == NULL) {
            tprintf("Cancelled repeat of length %d due to %s\n",
                    repeat_length,
                    bblob->joined_to_prev() ? "Joined" : "Null");
            repeat_length = 0;
            break;
          }
          ++repeat_length;
        }
      }
      if (repeat_length >= kMinLeaderCount) {
        num_repeated_sets++;
        for (; repeat_length > 0; box_it.forward(), --repeat_length) {
          bblob = box_it.data();
          bblob->set_repeated_set(num_repeated_sets);
        }
        if (!box_it.at_first())
          bblob->set_repeated_set(0);
     } else {
        box_it.forward();
        bblob->set_repeated_set(0);
      }
    } while (!box_it.at_first());  // until all done
  }
  row->set_num_repeated_sets(num_repeated_sets);
}
OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
BOOL8  testing_blob 
)

Definition at line 2598 of file makerow.cpp.

                                    {
  OVERLAP_STATE result;          //result of tests
  float overlap;                 //of blob & row
  float bestover;                //nearest row
  float merge_top, merge_bottom; //size of merged row
  ICOORD testpt;                 //testing only
  TO_ROW *row;                   //current row
  TO_ROW *test_row;              //for multiple overlaps
  BLOBNBOX_IT blob_it;           //for merging rows

  result = ASSIGN;
  row = row_it->data ();
  bestover = top - bottom;
  if (top > row->max_y ())
    bestover -= top - row->max_y ();
  if (bottom < row->min_y ())
                                 //compute overlap
    bestover -= row->min_y () - bottom;
  if (testing_blob) {
    tprintf ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f\n",
      bottom, top, row->min_y (), row->max_y (), bestover);
  }
  test_row = row;
  do {
    if (!row_it->at_last ()) {
      row_it->forward ();
      test_row = row_it->data ();
      if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
        merge_top =
          test_row->max_y () >
          row->max_y ()? test_row->max_y () : row->max_y ();
        merge_bottom =
          test_row->min_y () <
          row->min_y ()? test_row->min_y () : row->min_y ();
        if (merge_top - merge_bottom <= rowsize) {
          if (testing_blob) {
            tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
              row->min_y (), row->max_y (),
              test_row->min_y (), test_row->max_y ());
          }
          test_row->set_limits (merge_bottom, merge_top);
          blob_it.set_to_list (test_row->blob_list ());
          blob_it.add_list_after (row->blob_list ());
          blob_it.sort (blob_x_order);
          row_it->backward ();
          delete row_it->extract ();
          row_it->forward ();
          bestover = -1.0f;      //force replacement
        }
        overlap = top - bottom;
        if (top > test_row->max_y ())
          overlap -= top - test_row->max_y ();
        if (bottom < test_row->min_y ())
          overlap -= test_row->min_y () - bottom;
        if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
          result = REJECT;
        }
        if (overlap > bestover) {
          bestover = overlap;    //find biggest overlap
          row = test_row;
        }
        if (testing_blob) {
          tprintf
            ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f->%f\n",
            bottom, top, test_row->min_y (), test_row->max_y (),
            overlap, bestover);
        }
      }
    }
  }
  while (!row_it->at_last ()
    && test_row->min_y () <= top && test_row->max_y () >= bottom);
  while (row_it->data () != row)
    row_it->backward ();         //make it point to row
                                 //doesn't overlap much
  if (top - bottom - bestover > rowsize * textord_overlap_x &&
      (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
    && result == ASSIGN)
    result = NEW_ROW;            //doesn't overlap enough
  best_row = row;
  return result;
}
void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1965 of file makerow.cpp.

                          {
#ifndef GRAPHICS_DISABLED
  ScrollView::Color colour;                 //of boxes
#endif
  BLOBNBOX *blob;                //current blob
  BLOBNBOX *nextblob;            //next in list
  TBOX blob_box;
  FCOORD blob_rotation;          //inverse of rotation
  BLOBNBOX_IT blob_it;           //iterator
  BLOBNBOX_IT start_it;          //iterator
  TO_ROW_IT row_it = block->get_rows ();

#ifndef GRAPHICS_DISABLED
  colour = ScrollView::RED;
#endif

  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
                                 //get blobs
    blob_it.set_to_list (row_it.data ()->blob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      blob_box = blob->bounding_box ();
      start_it = blob_it;        //save start point
      //                      if (testing_on && textord_show_final_blobs)
      //                      {
      //                              tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
      //                                      blob_box.left(),blob_box.bottom(),
      //                                      blob_box.right(),blob_box.top(),
      //                                      (void*)blob,blob_it.length());
      //                      }
      bool overlap;
      do {
        overlap = false;
        if (!blob_it.at_last ()) {
          nextblob = blob_it.data_relative(1);
          overlap = blob_box.major_x_overlap(nextblob->bounding_box());
          if (overlap) {
            blob->merge(nextblob); // merge new blob
            blob_box = blob->bounding_box(); // get bigger box
            blob_it.forward();
          }
        }
      }
      while (overlap);
      blob->chop (&start_it, &blob_it,
        blob_rotation,
        block->line_size * tesseract::CCStruct::kXHeightFraction *
        textord_chop_width);
      //attempt chop
    }
#ifndef GRAPHICS_DISABLED
    if (testing_on && textord_show_final_blobs) {
      if (to_win == NULL)
        create_to_win(page_tr);
      to_win->Pen(colour);
      for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
      blob_it.forward ()) {
        blob = blob_it.data ();
        blob_box = blob->bounding_box ();
        blob_box.rotate (rotation);
        if (!blob->joined_to_prev ()) {
          to_win->Rectangle (blob_box.left (), blob_box.bottom (),
            blob_box.right (), blob_box.top ());
        }
      }
      colour = (ScrollView::Color) (colour + 1);
      if (colour > ScrollView::MAGENTA)
        colour = ScrollView::RED;
    }
#endif
  }
}
int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2738 of file makerow.cpp.

                                         {
                                 //converted ptr
  TO_ROW *row1 = *(TO_ROW **) item1;
                                 //converted ptr
  TO_ROW *row2 = *(TO_ROW **) item2;

  if (row1->spacing < row2->spacing)
    return -1;
  else if (row1->spacing > row2->spacing)
    return 1;
  else
    return 0;
}
int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2716 of file makerow.cpp.

                                   {
                                 //converted ptr
  TO_ROW *row1 = *(TO_ROW **) item1;
                                 //converted ptr
  TO_ROW *row2 = *(TO_ROW **) item2;

  if (row1->parallel_c () > row2->parallel_c ())
    return -1;
  else if (row1->parallel_c () < row2->parallel_c ())
    return 1;
  else
    return 0;
}
BOOL8 segment_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2212 of file makerow.cpp.

  {
  BOOL8 needs_curve;             //needs curved line
  int blobcount;                 //no of blobs
  int blobindex;                 //current blob
  int last_state;                //above, on , below
  int state;                     //of current blob
  float yshift;                  //from baseline
  TBOX box;                       //blob box
  TBOX new_box;                   //new_it box
  float middle;                  //xcentre of blob
                                 //blobs
  BLOBNBOX_IT blob_it = row->blob_list ();
  BLOBNBOX_IT new_it = blob_it;  //front end
  SORTED_FLOATS yshifts;         //shifts from baseline

  needs_curve = FALSE;
  box = box_next_pre_chopped (&blob_it);
  xstarts[0] = box.left ();
  segments = 1;
  blobcount = row->blob_list ()->length ();
  if (textord_oldbl_debug)
    tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
      blobcount, box.left (), box.bottom ());
  if (blobcount <= textord_spline_medianwin
  || blobcount < textord_spline_minblobs) {
    blob_it.move_to_last ();
    box = blob_it.data ()->bounding_box ();
    xstarts[1] = box.right ();
    return FALSE;
  }
  last_state = 0;
  new_it.mark_cycle_pt ();
  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
    new_box = box_next_pre_chopped (&new_it);
    middle = (new_box.left () + new_box.right ()) / 2.0;
    yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
                                 //record shift
    yshifts.add (yshift, blobindex);
    if (new_it.cycled_list ()) {
      xstarts[1] = new_box.right ();
      return FALSE;
    }
  }
  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
    box = box_next_pre_chopped (&blob_it);
  do {
    new_box = box_next_pre_chopped (&new_it);
                                 //get middle one
    yshift = yshifts[textord_spline_medianwin / 2];
    if (yshift > textord_spline_shift_fraction * block->line_size)
      state = 1;
    else if (-yshift > textord_spline_shift_fraction * block->line_size)
      state = -1;
    else
      state = 0;
    if (state != 0)
      needs_curve = TRUE;
    //              tprintf("State=%d, prev=%d, shift=%g\n",
    //                      state,last_state,yshift);
    if (state != last_state && blobcount > textord_spline_minblobs) {
      xstarts[segments++] = box.left ();
      blobcount = 0;
    }
    last_state = state;
    yshifts.remove (blobindex - textord_spline_medianwin);
    box = box_next_pre_chopped (&blob_it);
    middle = (new_box.left () + new_box.right ()) / 2.0;
    yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
    yshifts.add (yshift, blobindex);
    blobindex++;
    blobcount++;
  }
  while (!new_it.cycled_list ());
  if (blobcount > textord_spline_minblobs || segments == 1) {
    xstarts[segments] = new_box.right ();
  }
  else {
    xstarts[--segments] = new_box.right ();
  }
  if (textord_oldbl_debug)
    tprintf ("Made %d segments on row at (%d,%d)\n",
      segments, box.right (), box.bottom ());
  return needs_curve;
}
void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1892 of file makerow.cpp.

                                           {  // correct orientation
  BLOBNBOX *blob;                // current blob
  C_BLOB *rotated_blob;          // rotated blob
  TO_ROW *row;                   // current row
  float length;                  // of g_vec
  TBOX blob_box;
  FCOORD blob_rotation;          // inverse of rotation
  FCOORD g_vec;                  // skew rotation
  BLOBNBOX_IT blob_it;           // iterator
                                 // iterator
  BLOBNBOX_IT under_it = &block->underlines;
  BLOBNBOX_IT large_it = &block->large_blobs;
  TO_ROW_IT row_it = block->get_rows();
  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
                                         block->line_size + 0.5);

                                 // length of vector
  length = sqrt(1 + gradient * gradient);
  g_vec = FCOORD(1 / length, -gradient / length);
  blob_rotation = FCOORD(rotation.x(), -rotation.y());
  blob_rotation.rotate(g_vec);  // undoing everything
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row = row_it.data();
                                 // get blobs
    blob_it.set_to_list(row->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
         blob_it.forward()) {
      blob = blob_it.data();
      blob_box = blob->bounding_box();
      if (blob_box.width() > block->line_size * textord_underline_width) {
        ASSERT_HOST(blob->cblob() != NULL);
        rotated_blob = crotate_cblob (blob->cblob(),
          blob_rotation);
        if (test_underline(
            testing_on && textord_show_final_rows,
            rotated_blob, static_cast<inT16>(row->intercept()),
            static_cast<inT16>(
                block->line_size *
                (tesseract::CCStruct::kXHeightFraction +
                 tesseract::CCStruct::kAscenderFraction / 2.0f)))) {
          under_it.add_after_then_move(blob_it.extract());
          if (testing_on && textord_show_final_rows) {
            tprintf("Underlined blob at:");
              rotated_blob->bounding_box().print();
            tprintf("Was:");
              blob_box.print();
          }
        } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
                                 row->blob_list()) >
                   textord_max_blob_overlaps) {
          large_it.add_after_then_move(blob_it.extract());
          if (testing_on && textord_show_final_rows) {
            tprintf("Large blob overlaps %d blobs at:",
                    CountOverlaps(blob_box, min_blob_height,
                                  row->blob_list()));
            blob_box.print();
          }
        }
        delete rotated_blob;
      }
    }
  }
}

Variable Documentation

const int kMinLeaderCount = 5

Definition at line 107 of file makerow.cpp.

const int kMinSize = 8

Definition at line 393 of file makerow.cpp.

const double kNoiseSize = 0.5

Definition at line 392 of file makerow.cpp.

"Min pile height to make ascheight"

Definition at line 94 of file makerow.cpp.

double textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 98 of file makerow.cpp.

double textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 97 of file makerow.cpp.

"Bias skew estimates with line length"

Definition at line 59 of file makerow.cpp.

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 79 of file makerow.cpp.

"Test xheight algorithms"

Definition at line 58 of file makerow.cpp.

"Min pile height to make descheight"

Definition at line 96 of file makerow.cpp.

"Max desc/xheight"

Definition at line 100 of file makerow.cpp.

double textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 99 of file makerow.cpp.

"New row made if blob makes row this big"

Definition at line 86 of file makerow.cpp.

"Factor to expand rows by in expand_rows"

Definition at line 81 of file makerow.cpp.

"Prevent multiple baselines"

Definition at line 57 of file makerow.cpp.

"Use spline baseline"

Definition at line 56 of file makerow.cpp.

bool textord_heavy_nr = 0

"Vigorously remove noise"

Definition at line 45 of file makerow.cpp.

"Interpolate across gaps"

Definition at line 60 of file makerow.cpp.

"Max iqr/median for linespace"

Definition at line 77 of file makerow.cpp.

"Number of linew fits to do"

Definition at line 102 of file makerow.cpp.

"Max number of blobs a big blob can overlap"

Definition at line 69 of file makerow.cpp.

"Min blob height/top to include blob top into xheight stats"

Definition at line 90 of file makerow.cpp.

"Min blobs before gradient counted"

Definition at line 65 of file makerow.cpp.

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 84 of file makerow.cpp.

"Min credible pixel xheight"

Definition at line 70 of file makerow.cpp.

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 83 of file makerow.cpp.

"Use test xheight mechanism"

Definition at line 103 of file makerow.cpp.

"Fraction of neighbourhood"

Definition at line 87 of file makerow.cpp.

"Use old baseline algorithm"

Definition at line 54 of file makerow.cpp.

"Use old xheight algorithm"

Definition at line 55 of file makerow.cpp.

double textord_overlap_x = 0.5

"Fraction of linespace for good overlap"

Definition at line 82 of file makerow.cpp.

"Force parallel baselines"

Definition at line 52 of file makerow.cpp.

"Display rows after expanding"

Definition at line 48 of file makerow.cpp.

"Display blob bounds after pre-ass"

Definition at line 50 of file makerow.cpp.

"Display rows after final fitting"

Definition at line 49 of file makerow.cpp.

"Display row accumulation"

Definition at line 46 of file makerow.cpp.

"Display page correlated rows"

Definition at line 47 of file makerow.cpp.

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 75 of file makerow.cpp.

double textord_skew_lag = 0.01

"Lag for skew on row accumulation"

Definition at line 76 of file makerow.cpp.

"For smooth factor"

Definition at line 61 of file makerow.cpp.

"For smooth factor"

Definition at line 62 of file makerow.cpp.

"Size of window for spline segmentation"

Definition at line 67 of file makerow.cpp.

"Min blobs in each spline segment"

Definition at line 66 of file makerow.cpp.

"Fraction of line spacing for outlier"

Definition at line 74 of file makerow.cpp.

"Fraction of line spacing for quad"

Definition at line 72 of file makerow.cpp.

"Force straight baselines"

Definition at line 53 of file makerow.cpp.

"Tests refer to land/port"

Definition at line 51 of file makerow.cpp.

int textord_test_x = -1

"coord of test pt"

Definition at line 63 of file makerow.cpp.

int textord_test_y = -1

"coord of test pt"

Definition at line 64 of file makerow.cpp.

"Multiple of line_size for underline"

Definition at line 88 of file makerow.cpp.

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 78 of file makerow.cpp.

"Accepted variation"

Definition at line 101 of file makerow.cpp.

"Min pile height to make xheight"

Definition at line 92 of file makerow.cpp.