Tesseract  3.02
tesseract-ocr/textord/tordmain.h File Reference
#include <time.h>
#include "params.h"
#include "ocrblock.h"
#include "blobs.h"
#include "blobbox.h"
#include "notdll.h"

Go to the source code of this file.

Namespaces

namespace  tesseract

Functions

void make_blocks_from_blobs (TBLOB *tessblobs, const char *filename, ICOORD page_tr, BOOL8 do_shift, BLOCK_LIST *blocks)
void SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob)
void assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
void textord_page (ICOORD page_tr, BLOCK_LIST *blocks, TO_BLOCK_LIST *land_blocks, TO_BLOCK_LIST *port_blocks, tesseract::Tesseract *)
void tweak_row_baseline (ROW *row, double blshift_maxshift, double blshift_xfraction)
inT32 blob_y_order (void *item1, void *item2)

Function Documentation

void assign_blobs_to_blocks2 ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 156 of file tordmain.cpp.

                                                         {  // output list
  BLOCK *block;                  // current block
  BLOBNBOX *newblob;             // created blob
  C_BLOB *blob;                  // current blob
  BLOCK_IT block_it = blocks;
  C_BLOB_IT blob_it;             // iterator
  BLOBNBOX_IT port_box_it;       // iterator
                                 // destination iterator
  TO_BLOCK_IT port_block_it = port_blocks;
  TO_BLOCK *port_block;          // created block

  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    block = block_it.data();
    port_block = new TO_BLOCK(block);

    // Convert the good outlines to block->blob_list
    port_box_it.set_to_list(&port_block->blobs);
    blob_it.set_to_list(block->blob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.extract();
      newblob = new BLOBNBOX(blob);  // Convert blob to BLOBNBOX.
      SetBlobStrokeWidth(pix, newblob);
      port_box_it.add_after_then_move(newblob);
    }

    // Put the rejected outlines in block->noise_blobs, which allows them to
    // be reconsidered and sorted back into rows and recover outlines mistakenly
    // rejected.
    port_box_it.set_to_list(&port_block->noise_blobs);
    blob_it.set_to_list(block->reject_blobs());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      blob = blob_it.extract();
      newblob = new BLOBNBOX(blob);  // Convert blob to BLOBNBOX.
      SetBlobStrokeWidth(pix, newblob);
      port_box_it.add_after_then_move(newblob);
    }

    port_block_it.add_after_then_move(port_block);
  }
}
inT32 blob_y_order ( void *  item1,
void *  item2 
)

Definition at line 793 of file tordmain.cpp.

                                {
                                 //converted ptr
  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
                                 //converted ptr
  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;

  if (blob1->bounding_box ().bottom () > blob2->bounding_box ().bottom ())
    return -1;
  else if (blob1->bounding_box ().bottom () <
    blob2->bounding_box ().bottom ())
    return 1;
  else {
    if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
      return -1;
    else if (blob1->bounding_box ().left () >
      blob2->bounding_box ().left ())
      return 1;
    else
      return 0;
  }
}
void make_blocks_from_blobs ( TBLOB tessblobs,
const char *  filename,
ICOORD  page_tr,
BOOL8  do_shift,
BLOCK_LIST *  blocks 
)
void SetBlobStrokeWidth ( Pix *  pix,
BLOBNBOX blob 
)

Definition at line 56 of file tordmain.cpp.

                                                  {
  // Cut the blob rectangle into a Pix.
  int pix_height = pixGetHeight(pix);
  const TBOX& box = blob->bounding_box();
  int width = box.width();
  int height = box.height();
  Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
                                width, height);
  Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, NULL);
  boxDestroy(&blob_pix_box);
  Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
  pixDestroy(&pix_blob);
  // Compute the stroke widths.
  uinT32* data = pixGetData(dist_pix);
  int wpl = pixGetWpl(dist_pix);
  // Horizontal width of stroke.
  STATS h_stats(0, width + 1);
  for (int y = 0; y < height; ++y) {
    uinT32* pixels = data + y*wpl;
    int prev_pixel = 0;
    int pixel = GET_DATA_BYTE(pixels, 0);
    for (int x = 1; x < width; ++x) {
      int next_pixel = GET_DATA_BYTE(pixels, x);
      // We are looking for a pixel that is equal to its vertical neighbours,
      // yet greater than its left neighbour.
      if (prev_pixel < pixel &&
          (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
          (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
        if (pixel > next_pixel) {
          // Single local max, so an odd width.
          h_stats.add(pixel * 2 - 1, 1);
        } else if (pixel == next_pixel && x + 1 < width &&
                 pixel > GET_DATA_BYTE(pixels, x + 1)) {
          // Double local max, so an even width.
          h_stats.add(pixel * 2, 1);
        }
      }
      prev_pixel = pixel;
      pixel = next_pixel;
    }
  }
  // Vertical width of stroke.
  STATS v_stats(0, height + 1);
  for (int x = 0; x < width; ++x) {
    int prev_pixel = 0;
    int pixel = GET_DATA_BYTE(data, x);
    for (int y = 1; y < height; ++y) {
      uinT32* pixels = data + y*wpl;
      int next_pixel = GET_DATA_BYTE(pixels, x);
      // We are looking for a pixel that is equal to its horizontal neighbours,
      // yet greater than its upper neighbour.
      if (prev_pixel < pixel &&
          (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
          (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
        if (pixel > next_pixel) {
          // Single local max, so an odd width.
          v_stats.add(pixel * 2 - 1, 1);
        } else if (pixel == next_pixel && y + 1 < height &&
                 pixel > GET_DATA_BYTE(pixels + wpl, x)) {
          // Double local max, so an even width.
          v_stats.add(pixel * 2, 1);
        }
      }
      prev_pixel = pixel;
      pixel = next_pixel;
    }
  }
  pixDestroy(&dist_pix);
  // Store the horizontal and vertical width in the blob, keeping both
  // widths if there is enough information, otherwse only the one with
  // the most samples.
  // If there are insufficent samples, store zero, rather than using
  // 2*area/perimeter, as the numbers that gives do not match the numbers
  // from the distance method.
  if (h_stats.get_total() >= (width + height) / 4) {
    blob->set_horz_stroke_width(h_stats.ile(0.5f));
    if (v_stats.get_total() >= (width + height) / 4)
      blob->set_vert_stroke_width(v_stats.ile(0.5f));
    else
      blob->set_vert_stroke_width(0.0f);
  } else {
    if (v_stats.get_total() >= (width + height) / 4 ||
        v_stats.get_total() > h_stats.get_total()) {
      blob->set_horz_stroke_width(0.0f);
      blob->set_vert_stroke_width(v_stats.ile(0.5f));
    } else {
      blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
                                                          : 0.0f);
      blob->set_vert_stroke_width(0.0f);
    }
  }
}
void textord_page ( ICOORD  page_tr,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  land_blocks,
TO_BLOCK_LIST *  port_blocks,
tesseract::Tesseract  
)
void tweak_row_baseline ( ROW row,
double  blshift_maxshift,
double  blshift_xfraction 
)

Definition at line 680 of file tordmain.cpp.

                                                  {
  TBOX blob_box;                 //bounding box
  C_BLOB *blob;                  //current blob
  WERD *word;                    //current word
  inT32 blob_count;              //no of blobs
  inT32 src_index;               //source segment
  inT32 dest_index;              //destination segment
  inT32 *xstarts;                //spline segments
  double *coeffs;                //spline coeffs
  float ydiff;                   //baseline error
  float x_centre;                //centre of blob
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator

  blob_count = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
                                 //get total blobs
    blob_count += word->cblob_list ()->length ();
  }
  if (blob_count == 0)
    return;
  xstarts =
    (inT32 *) alloc_mem ((blob_count + row->baseline.segments + 1) *
    sizeof (inT32));
  coeffs =
    (double *) alloc_mem ((blob_count + row->baseline.segments) * 3 *
    sizeof (double));

  src_index = 0;
  dest_index = 0;
  xstarts[0] = row->baseline.xcoords[0];
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      blob_box = blob->bounding_box ();
      x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
      ydiff = blob_box.bottom () - row->base_line (x_centre);
      if (ydiff < 0)
        ydiff = -ydiff / row->x_height ();
      else
        ydiff = ydiff / row->x_height ();
      if (ydiff < blshift_maxshift
        && blob_box.height () / row->x_height () > blshift_xfraction) {
        if (xstarts[dest_index] >= x_centre)
          xstarts[dest_index] = blob_box.left ();
        coeffs[dest_index * 3] = 0;
        coeffs[dest_index * 3 + 1] = 0;
        coeffs[dest_index * 3 + 2] = blob_box.bottom ();
        //shift it
        dest_index++;
        xstarts[dest_index] = blob_box.right () + 1;
      }
      else {
        if (xstarts[dest_index] <= x_centre) {
          while (row->baseline.xcoords[src_index + 1] <= x_centre
          && src_index < row->baseline.segments - 1) {
            if (row->baseline.xcoords[src_index + 1] >
            xstarts[dest_index]) {
              coeffs[dest_index * 3] =
                row->baseline.quadratics[src_index].a;
              coeffs[dest_index * 3 + 1] =
                row->baseline.quadratics[src_index].b;
              coeffs[dest_index * 3 + 2] =
                row->baseline.quadratics[src_index].c;
              dest_index++;
              xstarts[dest_index] =
                row->baseline.xcoords[src_index + 1];
            }
            src_index++;
          }
          coeffs[dest_index * 3] =
            row->baseline.quadratics[src_index].a;
          coeffs[dest_index * 3 + 1] =
            row->baseline.quadratics[src_index].b;
          coeffs[dest_index * 3 + 2] =
            row->baseline.quadratics[src_index].c;
          dest_index++;
          xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
        }
      }
    }
  }
  while (src_index < row->baseline.segments
    && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])
    src_index++;
  while (src_index < row->baseline.segments) {
    coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
    coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
    coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
    dest_index++;
    src_index++;
    xstarts[dest_index] = row->baseline.xcoords[src_index];
  }
                                 //turn to spline
  row->baseline = QSPLINE (dest_index, xstarts, coeffs);
  free_mem(xstarts);
  free_mem(coeffs);
}