Tesseract  3.02
tesseract-ocr/textord/underlin.cpp File Reference
#include "mfcpch.h"
#include "underlin.h"

Go to the source code of this file.

Defines

#define PROJECTION_MARGIN   10
#define EXTERN

Functions

void restore_underlined_blobs (TO_BLOCK *block)
TO_ROWmost_overlapping_row (TO_ROW_LIST *rows, BLOBNBOX *blob)
void find_underlined_blobs (BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
void vertical_cunderline_projection (C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)

Variables

EXTERN double textord_underline_offset = 0.1
EXTERN bool textord_restore_underlines = 1

Define Documentation

#define EXTERN

Definition at line 27 of file underlin.cpp.

#define PROJECTION_MARGIN   10

Definition at line 26 of file underlin.cpp.


Function Documentation

void find_underlined_blobs ( BLOBNBOX u_line,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
ICOORDELT_LIST *  chop_cells 
)

Definition at line 179 of file underlin.cpp.

                            {
  inT16 x, y;                    //sides of blob
  ICOORD blob_chop;              //sides of blob
  TBOX blob_box = u_line->bounding_box ();
                                 //cell iterator
  ICOORDELT_IT cell_it = chop_cells;
  STATS upper_proj (blob_box.left (), blob_box.right () + 1);
  STATS middle_proj (blob_box.left (), blob_box.right () + 1);
  STATS lower_proj (blob_box.left (), blob_box.right () + 1);
  C_OUTLINE_IT out_it;           //outlines of blob

  ASSERT_HOST (u_line->cblob () != NULL);

  out_it.set_to_list (u_line->cblob ()->out_list ());
  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
    vertical_cunderline_projection (out_it.data (),
      baseline, xheight, baseline_offset,
      &lower_proj, &middle_proj, &upper_proj);
  }

  for (x = blob_box.left (); x < blob_box.right (); x++) {
    if (middle_proj.pile_count (x) > 0) {
      for (y = x + 1;
        y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
      blob_chop = ICOORD (x, y);
      cell_it.add_after_then_move (new ICOORDELT (blob_chop));
      x = y;
    }
  }
}
TO_ROW* most_overlapping_row ( TO_ROW_LIST *  rows,
BLOBNBOX blob 
)

Definition at line 122 of file underlin.cpp.

                              {
  inT16 x = (blob->bounding_box ().left ()
    + blob->bounding_box ().right ()) / 2;
  TO_ROW_IT row_it = rows;       //row iterator
  TO_ROW *row;                   //current row
  TO_ROW *best_row;              //output row
  float overlap;                 //of blob & row
  float bestover;                //best overlap

  best_row = NULL;
  bestover = (float) -MAX_INT32;
  if (row_it.empty ())
    return NULL;
  row = row_it.data ();
  row_it.mark_cycle_pt ();
  while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
  && !row_it.cycled_list ()) {
    best_row = row;
    bestover =
      blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
    row_it.forward ();
    row = row_it.data ();
  }
  while (row->baseline.y (x) + row->xheight + row->ascrise
  >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
    overlap = row->baseline.y (x) + row->xheight + row->ascrise;
    if (blob->bounding_box ().top () < overlap)
      overlap = blob->bounding_box ().top ();
    if (blob->bounding_box ().bottom () >
      row->baseline.y (x) + row->descdrop)
      overlap -= blob->bounding_box ().bottom ();
    else
      overlap -= row->baseline.y (x) + row->descdrop;
    if (overlap > bestover) {
      bestover = overlap;
      best_row = row;
    }
    row_it.forward ();
    row = row_it.data ();
  }
  if (bestover < 0
    && row->baseline.y (x) + row->xheight + row->ascrise
    - blob->bounding_box ().bottom () > bestover)
    best_row = row;
  return best_row;
}
void restore_underlined_blobs ( TO_BLOCK block)

Definition at line 39 of file underlin.cpp.

                               {
  inT16 chop_coord;              //chop boundary
  TBOX blob_box;                  //of underline
  BLOBNBOX *u_line;              //underline bit
  TO_ROW *row;                   //best row for blob
  ICOORDELT_LIST chop_cells;     //blobs to cut out
                                 //real underlines
  BLOBNBOX_LIST residual_underlines;
  C_OUTLINE_LIST left_coutlines;
  C_OUTLINE_LIST right_coutlines;
  ICOORDELT_IT cell_it = &chop_cells;
                                 //under lines
  BLOBNBOX_IT under_it = &block->underlines;
  BLOBNBOX_IT ru_it = &residual_underlines;

  if (block->get_rows()->empty())
    return;  // Don't crash if there are no rows.
  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
  under_it.forward ()) {
    u_line = under_it.extract ();
    blob_box = u_line->bounding_box ();
    row = most_overlapping_row (block->get_rows (), u_line);
    find_underlined_blobs (u_line, &row->baseline, row->xheight,
      row->xheight * textord_underline_offset,
      &chop_cells);
    cell_it.set_to_list (&chop_cells);
    for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
    cell_it.forward ()) {
      chop_coord = cell_it.data ()->x ();
      if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
        split_to_blob (u_line, chop_coord,
          textord_fp_chop_error + 0.5,
          &left_coutlines,
          &right_coutlines);
        if (!left_coutlines.empty()) {
          ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
        }
        chop_coord = cell_it.data ()->y ();
        split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5,
                      &left_coutlines, &right_coutlines);
        if (!left_coutlines.empty()) {
          row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
        } else {
          fprintf(stderr,
            "Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n",
            cell_it.data ()->x (), cell_it.data ()->y (),
            blob_box.left (), blob_box.bottom (),
            blob_box.right (), blob_box.top ());
          ASSERT_HOST(FALSE);
        }
        u_line = NULL;           //no more blobs to add
      }
      delete cell_it.extract();
    }
    if (!right_coutlines.empty ()) {
      split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5,
                    &left_coutlines, &right_coutlines);
      if (!left_coutlines.empty())
        ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
    }
    if (u_line != NULL) {
      if (u_line->cblob() != NULL)
        delete u_line->cblob();
      delete u_line;
    }
  }
  if (!ru_it.empty()) {
    ru_it.move_to_first();
    for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
      under_it.add_after_then_move(ru_it.extract());
    }
  }
}
void vertical_cunderline_projection ( C_OUTLINE outline,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
STATS lower_proj,
STATS middle_proj,
STATS upper_proj 
)

Definition at line 224 of file underlin.cpp.

                                     {
  ICOORD pos;                    //current point
  ICOORD step;                   //edge step
  inT16 lower_y, upper_y;        //region limits
  inT32 length;                  //of outline
  inT16 stepindex;               //current step
  C_OUTLINE_IT out_it = outline->child ();

  pos = outline->start_pos ();
  length = outline->pathlength ();
  for (stepindex = 0; stepindex < length; stepindex++) {
    step = outline->step (stepindex);
    if (step.x () > 0) {
      lower_y =
        (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
      upper_y =
        (inT16) floor (baseline->y (pos.x ()) + baseline_offset +
        xheight + 0.5);
      if (pos.y () >= lower_y) {
        lower_proj->add (pos.x (), -lower_y);
        if (pos.y () >= upper_y) {
          middle_proj->add (pos.x (), lower_y - upper_y);
          upper_proj->add (pos.x (), upper_y - pos.y ());
        }
        else
          middle_proj->add (pos.x (), lower_y - pos.y ());
      }
      else
        lower_proj->add (pos.x (), -pos.y ());
    }
    else if (step.x () < 0) {
      lower_y =
        (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
        0.5);
      upper_y =
        (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
        xheight + 0.5);
      if (pos.y () >= lower_y) {
        lower_proj->add (pos.x () - 1, lower_y);
        if (pos.y () >= upper_y) {
          middle_proj->add (pos.x () - 1, upper_y - lower_y);
          upper_proj->add (pos.x () - 1, pos.y () - upper_y);
        }
        else
          middle_proj->add (pos.x () - 1, pos.y () - lower_y);
      }
      else
        lower_proj->add (pos.x () - 1, pos.y ());
    }
    pos += step;
  }

  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
    vertical_cunderline_projection (out_it.data (),
      baseline, xheight, baseline_offset,
      lower_proj, middle_proj, upper_proj);
  }
}

Variable Documentation

EXTERN bool textord_restore_underlines = 1

"Chop underlines & put back"

Definition at line 31 of file underlin.cpp.

EXTERN double textord_underline_offset = 0.1

"Fraction of x to ignore"

Definition at line 29 of file underlin.cpp.