Tesseract  3.02
OL_BUCKETS Class Reference

#include <edgblob.h>

List of all members.

Public Member Functions

 ~OL_BUCKETS ()
C_OUTLINE_LIST * start_scan ()
C_OUTLINE_LIST * scan_next ()
OL_BUCKETS::OL_BUCKETS

Construct an array of buckets for associating outlines into blobs.

 OL_BUCKETS (ICOORD bleft, ICOORD tright)
OL_BUCKETS::operator(

Return a pointer to a list of C_OUTLINEs corresponding to the given pixel coordinates.

C_OUTLINE_LIST * operator() (inT16 x, inT16 y)
OL_BUCKETS::count_children

Find number of descendants of this outline.

inT32 count_children (C_OUTLINE *outline, inT32 max_count)
OL_BUCKETS::outline_complexity

This is the new version of count_child.

The goal of this function is to determine if an outline and its interiors could be part of a character blob. This is done by computing a "complexity" index for the outline, which is the return value of this function, and checking it against a threshold. The max_count is used for short-circuiting the recursion and forcing a rejection that guarantees to fail the threshold test. The complexity F for outline X with N children X[i] is F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild so each layer of nesting increases complexity exponentially. An outline can be rejected as a text blob candidate if its complexity is too high, has too many children(likely a container), or has too many layers of nested inner loops. This has the side-effect of flattening out boxed or reversed video text regions.

inT32 outline_complexity (C_OUTLINE *outline, inT32 max_count, inT16 depth)
OL_BUCKETS::extract_children

Find number of descendants of this outline.

void extract_children (C_OUTLINE *outline, C_OUTLINE_IT *it)

Detailed Description

Definition at line 33 of file edgblob.h.


Constructor & Destructor Documentation

OL_BUCKETS::OL_BUCKETS ( ICOORD  bleft,
ICOORD  tright 
)

Definition at line 69 of file edgblob.cpp.

              :         bl(bleft), tr(tright) {
  bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1;
  bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1;
                                 // make array
  buckets = new C_OUTLINE_LIST[bxdim * bydim];
  index = 0;
}
OL_BUCKETS::~OL_BUCKETS ( ) [inline]

Definition at line 40 of file edgblob.h.

                   {             //cleanup
      delete[]buckets;
    }

Member Function Documentation

inT32 OL_BUCKETS::count_children ( C_OUTLINE outline,
inT32  max_count 
)

Definition at line 184 of file edgblob.cpp.

                                  {
  BOOL8 parent_box;              // could it be boxy
  inT16 xmin, xmax;              // coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          // current bucket
  C_OUTLINE *child;              // current child
  inT32 child_count;             // no of children
  inT32 grandchild_count;        // no of grandchildren
  inT32 parent_area;             // potential box
  FLOAT32 max_parent_area;       // potential box
  inT32 child_area;              // current child
  inT32 child_length;            // current child
  TBOX olbox;
  C_OUTLINE_IT child_it;         // search iterator

  olbox = outline->bounding_box();
  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
  child_count = 0;
  grandchild_count = 0;
  parent_area = 0;
  max_parent_area = 0;
  parent_box = TRUE;
  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
      if (child_it.empty())
        continue;
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.data();
        if (child != outline && *child < *outline) {
          child_count++;
          if (child_count <= max_count) {
            int max_grand =(max_count - child_count) /
                            edges_children_per_grandchild;
            if (max_grand > 0)
              grandchild_count += count_children(child, max_grand) *
                                  edges_children_per_grandchild;
            else
              grandchild_count += count_children(child, 1);
          }
          if (child_count + grandchild_count > max_count) {
            if (edges_debug)
              tprintf("Discarding parent with child count=%d, gc=%d\n",
                      child_count,grandchild_count);
            return child_count + grandchild_count;
          }
          if (parent_area == 0) {
            parent_area = outline->outer_area();
            if (parent_area < 0)
              parent_area = -parent_area;
            max_parent_area = outline->bounding_box().area() * edges_boxarea;
            if (parent_area < max_parent_area)
              parent_box = FALSE;
          }
          if (parent_box &&
              (!edges_children_fix ||
               child->bounding_box().height() > edges_min_nonhole)) {
            child_area = child->outer_area();
            if (child_area < 0)
              child_area = -child_area;
            if (edges_children_fix) {
              if (parent_area - child_area < max_parent_area) {
                parent_box = FALSE;
                continue;
              }
              if (grandchild_count > 0) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with gc=%d\n",
                          parent_area, child_area, max_parent_area,
                          grandchild_count);
                return max_count + 1;
              }
              child_length = child->pathlength();
              if (child_length * child_length >
                  child_area * edges_patharea_ratio) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with child length=%d\n",
                          parent_area, child_area, max_parent_area,
                          child_length);
                return max_count + 1;
              }
            }
            if (child_area < child->bounding_box().area() * edges_childarea) {
              if (edges_debug)
                tprintf("Discarding parent of area %d, child area=%d, max%g "
                        "with child rect=%d\n",
                        parent_area, child_area, max_parent_area,
                        child->bounding_box().area());
              return max_count + 1;
            }
          }
        }
      }
    }
  }
  return child_count + grandchild_count;
}
void OL_BUCKETS::extract_children ( C_OUTLINE outline,
C_OUTLINE_IT *  it 
)

Definition at line 300 of file edgblob.cpp.

                                   {
  inT16 xmin, xmax;              // coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          // current bucket
  TBOX olbox;
  C_OUTLINE_IT child_it;         // search iterator

  olbox = outline->bounding_box();
  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        if (*child_it.data() < *outline) {
          it->add_after_then_move(child_it.extract());
        }
      }
    }
  }
}
C_OUTLINE_LIST * OL_BUCKETS::operator() ( inT16  x,
inT16  y 
)

Definition at line 88 of file edgblob.cpp.

         {
  return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE];
}
inT32 OL_BUCKETS::outline_complexity ( C_OUTLINE outline,
inT32  max_count,
inT16  depth 
)

Definition at line 115 of file edgblob.cpp.

                                      {
  inT16 xmin, xmax;              // coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          // current bucket
  C_OUTLINE *child;              // current child
  inT32 child_count;             // no of children
  inT32 grandchild_count;        // no of grandchildren
  C_OUTLINE_IT child_it;         // search iterator

  TBOX olbox = outline->bounding_box();
  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
  child_count = 0;
  grandchild_count = 0;
  if (++depth > edges_max_children_layers)  // nested loops are too deep
    return max_count + depth;

  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
      if (child_it.empty())
        continue;
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.data();
        if (child == outline || !(*child < *outline))
          continue;
        child_count++;

        if (child_count > edges_max_children_per_outline) {   // too fragmented
          if (edges_debug)
            tprintf("Discard outline on child_count=%d > "
                    "max_children_per_outline=%d\n",
                    child_count,
                    static_cast<inT32>(edges_max_children_per_outline));
          return max_count + child_count;
        }

        // Compute the "complexity" of each child recursively
        inT32 remaining_count = max_count - child_count - grandchild_count;
        if (remaining_count > 0)
          grandchild_count += edges_children_per_grandchild *
                              outline_complexity(child, remaining_count, depth);
        if (child_count + grandchild_count > max_count) {  // too complex
          if (edges_debug)
            tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
                    "> max_count=%d\n",
                    child_count, grandchild_count, max_count);
          return child_count + grandchild_count;
        }
      }
    }
  }
  return child_count + grandchild_count;
}
C_OUTLINE_LIST* OL_BUCKETS::scan_next ( ) [inline]

Definition at line 53 of file edgblob.h.

                                {
      for (; buckets[index].empty () && index < bxdim * bydim - 1; index++);
      return &buckets[index];
    }
C_OUTLINE_LIST* OL_BUCKETS::start_scan ( ) [inline]

Definition at line 47 of file edgblob.h.

                                 {
      for (index = 0; buckets[index].empty () && index < bxdim * bydim - 1;
        index++);
      return &buckets[index];
    }

The documentation for this class was generated from the following files: