Tesseract  3.02
tesseract-ocr/textord/edgblob.cpp File Reference
#include "mfcpch.h"
#include "scanedg.h"
#include "drawedg.h"
#include "edgloop.h"
#include "edgblob.h"

Go to the source code of this file.

Defines

#define EXTERN

Functions

extract_edges

Run the edge detector over the block and return a list of blobs.

void extract_edges (Pix *pix, BLOCK *block)
outlines_to_blobs

Gather together outlines into blobs using the usual bucket sort.

void outlines_to_blobs (BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
fill_buckets

Run the edge detector over the block and return a list of blobs.

void fill_buckets (C_OUTLINE_LIST *outlines, OL_BUCKETS *buckets)
empty_buckets

Run the edge detector over the block and return a list of blobs.

void empty_buckets (BLOCK *block, OL_BUCKETS *buckets)
capture_children

Find all neighbouring outlines that are children of this outline and either move them to the output list or declare this outline illegal and return FALSE.

BOOL8 capture_children (OL_BUCKETS *buckets, C_BLOB_IT *reject_it, C_OUTLINE_IT *blob_it)

Variables

EXTERN bool edges_use_new_outline_complexity = 0
EXTERN int edges_max_children_per_outline = 10
EXTERN int edges_max_children_layers = 5
EXTERN bool edges_debug = 0
EXTERN int edges_children_per_grandchild = 10
EXTERN int edges_children_count_limit = 45
EXTERN bool edges_children_fix = 0
EXTERN int edges_min_nonhole = 12
EXTERN int edges_patharea_ratio = 40
EXTERN double edges_childarea = 0.5
EXTERN double edges_boxarea = 0.875

Define Documentation

#define EXTERN

Definition at line 31 of file edgblob.cpp.


Function Documentation

BOOL8 capture_children ( OL_BUCKETS buckets,
C_BLOB_IT *  reject_it,
C_OUTLINE_IT *  blob_it 
)

Definition at line 449 of file edgblob.cpp.

                        {
  C_OUTLINE *outline;            // master outline
  inT32 child_count;             // no of children

  outline = blob_it->data();
  if (edges_use_new_outline_complexity)
    child_count = buckets->outline_complexity(outline,
                                               edges_children_count_limit,
                                               0);
  else
    child_count = buckets->count_children(outline,
                                           edges_children_count_limit);
  if (child_count > edges_children_count_limit)
    return FALSE;

  if (child_count > 0)
    buckets->extract_children(outline, blob_it);
  return TRUE;
}
void empty_buckets ( BLOCK block,
OL_BUCKETS buckets 
)

Definition at line 403 of file edgblob.cpp.

                    {
  BOOL8 good_blob;               // healthy blob
  C_OUTLINE_LIST outlines;       // outlines in block
                                 // iterator
  C_OUTLINE_IT out_it = &outlines;
  C_OUTLINE_IT bucket_it = buckets->start_scan();
  C_OUTLINE_IT parent_it;        // parent outline
  C_BLOB *blob;                  // new blob
  C_BLOB_IT good_blobs = block->blob_list();
  C_BLOB_IT junk_blobs = block->reject_blobs();

  while (!bucket_it.empty()) {
    out_it.set_to_list(&outlines);
    do {
      parent_it = bucket_it;     // find outermost
      do {
        bucket_it.forward();
      } while (!bucket_it.at_first() &&
               !(*parent_it.data() < *bucket_it.data()));
    } while (!bucket_it.at_first());

                                 // move to new list
    out_it.add_after_then_move(parent_it.extract());
    good_blob = capture_children(buckets, &junk_blobs, &out_it);
    blob = new C_BLOB(&outlines);
    if (good_blob)
      good_blobs.add_after_then_move(blob);
    else
      junk_blobs.add_after_then_move(blob);

    bucket_it.set_to_list(buckets->scan_next());
  }
}
void extract_edges ( Pix *  pix,
BLOCK block 
)

Definition at line 335 of file edgblob.cpp.

                                 {  // block to scan
  C_OUTLINE_LIST outlines;       // outlines in block
  C_OUTLINE_IT out_it = &outlines;

  // TODO(rays) move the pix all the way down to the bottom.
  IMAGE image;
  image.FromPix(pix);

  block_edges(&image, block, &out_it);
  ICOORD bleft;                  // block box
  ICOORD tright;
  block->bounding_box(bleft, tright);
                                 // make blobs
  outlines_to_blobs(block, bleft, tright, &outlines);
}
void fill_buckets ( C_OUTLINE_LIST *  outlines,
OL_BUCKETS buckets 
)

Definition at line 378 of file edgblob.cpp.

                   {
  TBOX ol_box;                     // outline box
  C_OUTLINE_IT out_it = outlines;  // iterator
  C_OUTLINE_IT bucket_it;          // iterator in bucket
  C_OUTLINE *outline;              // current outline

  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    outline = out_it.extract();  // take off list
                                 // get box
    ol_box = outline->bounding_box();
    bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
    bucket_it.add_to_end(outline);
  }
}
void outlines_to_blobs ( BLOCK block,
ICOORD  bleft,
ICOORD  tright,
C_OUTLINE_LIST *  outlines 
)

Definition at line 359 of file edgblob.cpp.

                                                 {
                                 // make buckets
  OL_BUCKETS buckets(bleft, tright);

  fill_buckets(outlines, &buckets);
  empty_buckets(block, &buckets);
}

Variable Documentation

EXTERN double edges_boxarea = 0.875

"Min area fraction of grandchild for box"

Definition at line 61 of file edgblob.cpp.

EXTERN double edges_childarea = 0.5

"Min area fraction of child outline"

Definition at line 59 of file edgblob.cpp.

EXTERN int edges_children_count_limit = 45

"Max holes allowed in blob"

Definition at line 51 of file edgblob.cpp.

EXTERN bool edges_children_fix = 0

"Remove boxy parents of char-like children"

Definition at line 53 of file edgblob.cpp.

"Importance ratio for chucking outlines"

Definition at line 49 of file edgblob.cpp.

EXTERN bool edges_debug = 0

"turn on debugging for this module"

Definition at line 45 of file edgblob.cpp.

EXTERN int edges_max_children_layers = 5

"Max layers of nested children inside a character outline"

Definition at line 43 of file edgblob.cpp.

"Max number of children inside a character outline"

Definition at line 41 of file edgblob.cpp.

EXTERN int edges_min_nonhole = 12

"Min pixels for potential char in box"

Definition at line 55 of file edgblob.cpp.

EXTERN int edges_patharea_ratio = 40

"Max lensq/area for acceptable child outline"

Definition at line 57 of file edgblob.cpp.

"Use the new outline complexity module"

Definition at line 39 of file edgblob.cpp.