Tesseract  3.02
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

List of all members.

Public Member Functions

 ColPartition ()
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 ~ColPartition ()
const TBOXbounding_box () const
int left_margin () const
void set_left_margin (int margin)
int right_margin () const
void set_right_margin (int margin)
int median_top () const
int median_bottom () const
int median_left () const
int median_right () const
int median_size () const
void set_median_size (int size)
int median_width () const
void set_median_width (int width)
BlobRegionType blob_type () const
void set_blob_type (BlobRegionType t)
BlobTextFlowType flow () const
void set_flow (BlobTextFlowType f)
int good_blob_score () const
bool good_width () const
bool good_column () const
bool left_key_tab () const
int left_key () const
bool right_key_tab () const
int right_key () const
PolyBlockType type () const
void set_type (PolyBlockType t)
BLOBNBOX_CLIST * boxes ()
int boxes_count () const
void set_vertical (const ICOORD &v)
ColPartition_CLIST * upper_partners ()
ColPartition_CLIST * lower_partners ()
void set_working_set (WorkingPartSet *working_set)
bool block_owned () const
void set_block_owned (bool owned)
bool desperately_merged () const
ColPartitionSetcolumn_set () const
void set_side_step (int step)
int bottom_spacing () const
void set_bottom_spacing (int spacing)
int top_spacing () const
void set_top_spacing (int spacing)
void set_table_type ()
void clear_table_type ()
bool inside_table_column ()
void set_inside_table_column (bool val)
ColPartitionnearest_neighbor_above () const
void set_nearest_neighbor_above (ColPartition *part)
ColPartitionnearest_neighbor_below () const
void set_nearest_neighbor_below (ColPartition *part)
int space_above () const
void set_space_above (int space)
int space_below () const
void set_space_below (int space)
int space_to_left () const
void set_space_to_left (int space)
int space_to_right () const
void set_space_to_right (int space)
uinT8color1 ()
uinT8color2 ()
bool owns_blobs () const
void set_owns_blobs (bool owns_blobs)
int MidY () const
int MedianY () const
int MidX () const
int SortKey (int x, int y) const
int XAtY (int sort_key, int y) const
int KeyWidth (int left_key, int right_key) const
int ColumnWidth () const
int BoxLeftKey () const
int BoxRightKey () const
int LeftAtY (int y) const
int RightAtY (int y) const
bool IsLeftOf (const ColPartition &other) const
bool ColumnContains (int x, int y) const
bool IsEmpty () const
bool IsSingleton () const
bool HOverlaps (const ColPartition &other) const
bool VOverlaps (const ColPartition &other) const
int VCoreOverlap (const ColPartition &other) const
int HCoreOverlap (const ColPartition &other) const
bool VSignificantCoreOverlap (const ColPartition &other) const
bool WithinSameMargins (const ColPartition &other) const
bool TypesMatch (const ColPartition &other) const
bool IsLineType () const
bool IsImageType () const
bool IsTextType () const
bool IsVerticalType () const
bool IsHorizontalType () const
bool IsUnMergeableType () const
bool IsVerticalLine () const
bool IsHorizontalLine () const
void AddBox (BLOBNBOX *box)
void RemoveBox (BLOBNBOX *box)
BLOBNBOXBiggestBox ()
TBOX BoundsWithoutBox (BLOBNBOX *box)
void ClaimBoxes ()
void DisownBoxes ()
void DeleteBoxes ()
void ReflectInYAxis ()
bool IsLegal ()
bool MatchingColumns (const ColPartition &other) const
bool MatchingTextColor (const ColPartition &other) const
bool MatchingSizes (const ColPartition &other) const
bool ConfirmNoTabViolation (const ColPartition &other) const
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
void SetLeftTab (const TabVector *tab_vector)
void SetRightTab (const TabVector *tab_vector)
void CopyLeftTab (const ColPartition &src, bool take_box)
void CopyRightTab (const ColPartition &src, bool take_box)
int LeftBlobRule () const
int RightBlobRule () const
float SpecialBlobsDensity (const BlobSpecialTextType type) const
int SpecialBlobsCount (const BlobSpecialTextType type)
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
void ComputeSpecialBlobsDensity ()
void AddPartner (bool upper, ColPartition *partner)
void RemovePartner (bool upper, ColPartition *partner)
ColPartitionSingletonPartner (bool upper)
void Absorb (ColPartition *other, WidthCallback *cb)
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
BLOBNBOXOverlapSplitBlob (const TBOX &box)
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
ColPartitionSplitAt (int split_x)
void ComputeLimits ()
int CountOverlappingBoxes (const TBOX &box)
void SetPartitionType (int resolution, ColPartitionSet *columns)
PolyBlockType PartitionType (ColumnSpanningType flow) const
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
void SetColumnGoodness (WidthCallback *cb)
bool MarkAsLeaderIfMonospaced ()
void SetRegionAndFlowTypesFromProjectionValue (int value)
void SetBlobTypes ()
bool HasGoodBaseline ()
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
ColPartitionShallowCopy () const
ColPartitionCopyButDontOwnBlobs ()
ScrollView::Color BoxColor () const
void Print () const
void PrintColors ()
void SmoothPartnerRun (int working_set_count)
void RefinePartners (PolyBlockType type, bool get_desparate, ColPartitionGrid *grid)
bool IsInSameColumnAs (const ColPartition &part) const
void set_first_column (int column)
void set_last_column (int column)

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 67 of file colpartition.h.


Constructor & Destructor Documentation

tesseract::ColPartition::ColPartition ( ) [inline]

Definition at line 69 of file colpartition.h.

                 {
    // This empty constructor is here only so that the class can be ELISTIZED.
    // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
    // and eliminate CLASSNAME##_copier.
  }
tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters:
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 84 of file colpartition.cpp.

  : left_margin_(-MAX_INT32), right_margin_(MAX_INT32),
    median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0),
    median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0),
    blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
    good_width_(false), good_column_(false),
    left_key_tab_(false), right_key_tab_(false),
    left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical),
    working_set_(NULL), last_add_was_vertical_(false), block_owned_(false),
    desperately_merged_(false),
    first_column_(-1), last_column_(-1), column_set_(NULL),
    side_step_(0), top_spacing_(0), bottom_spacing_(0),
    type_before_table_(PT_UNKNOWN), inside_table_column_(false),
    nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL),
    space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
    owns_blobs_(true) {
  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
}
tesseract::ColPartition::~ColPartition ( )

Definition at line 146 of file colpartition.cpp.

                            {
  // Remove this as a partner of all partners, as we don't want them
  // referring to a deleted object.
  ColPartition_C_IT it(&upper_partners_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    it.data()->RemovePartner(false, this);
  }
  it.set_to_list(&lower_partners_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    it.data()->RemovePartner(true, this);
  }
}

Member Function Documentation

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback cb 
)

Definition at line 617 of file colpartition.cpp.

                                                                {
  // The result has to either own all of the blobs or none of them.
  // Verify the flag is consisent.
  ASSERT_HOST(owns_blobs() == other->owns_blobs());
  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
  // should always be true when this is called. So there is no issues.
  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
                                bounding_box_.bottom()) ||
      TabFind::WithinTestRegion(2, other->bounding_box_.left(),
                                other->bounding_box_.bottom())) {
    tprintf("Merging:");
    Print();
    other->Print();
  }

  // Update the special_blobs_densities_.
  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
  for (int type = 0; type < BSTT_COUNT; ++type) {
    int w1 = boxes_.length(), w2 = other->boxes_.length();
    float new_val = special_blobs_densities_[type] * w1 +
        other->special_blobs_densities_[type] * w2;
    if (!w1 || !w2) {
      special_blobs_densities_[type] = new_val / (w1 + w2);
    }
  }

  // Merge the two sorted lists.
  BLOBNBOX_C_IT it(&boxes_);
  BLOBNBOX_C_IT it2(&other->boxes_);
  for (; !it2.empty(); it2.forward()) {
    BLOBNBOX* bbox2 = it2.extract();
    ColPartition* prev_owner = bbox2->owner();
    if (prev_owner != other && prev_owner != NULL) {
      // A blob on other's list is owned by someone else; let them have it.
      continue;
    }
    ASSERT_HOST(prev_owner == other || prev_owner == NULL);
    if (prev_owner == other)
      bbox2->set_owner(this);
    it.add_to_end(bbox2);
  }
  left_margin_ = MIN(left_margin_, other->left_margin_);
  right_margin_ = MAX(right_margin_, other->right_margin_);
  if (other->left_key_ < left_key_) {
    left_key_ = other->left_key_;
    left_key_tab_ = other->left_key_tab_;
  }
  if (other->right_key_ > right_key_) {
    right_key_ = other->right_key_;
    right_key_tab_ = other->right_key_tab_;
  }
  // Combine the flow and blob_type in a sensible way.
  // Dominant flows stay.
  if (!DominatesInMerge(flow_, other->flow_)) {
    flow_ = other->flow_;
    blob_type_ = other->blob_type_;
  }
  SetBlobTypes();
  if (IsVerticalType()) {
    boxes_.sort(SortByBoxBottom<BLOBNBOX>);
    last_add_was_vertical_ = true;
  } else {
    boxes_.sort(SortByBoxLeft<BLOBNBOX>);
    last_add_was_vertical_ = false;
  }
  ComputeLimits();
  // Fix partner lists. other is going away, so remove it as a
  // partner of all its partners and add this in its place.
  for (int upper = 0; upper < 2; ++upper) {
    ColPartition_CLIST partners;
    ColPartition_C_IT part_it(&partners);
    part_it.add_list_after(upper ? &other->upper_partners_
                                 : &other->lower_partners_);
    for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
      ColPartition* partner = part_it.extract();
      partner->RemovePartner(!upper, other);
      partner->RemovePartner(!upper, this);
      partner->AddPartner(!upper, this);
    }
  }
  delete other;
  if (cb != NULL) {
    SetColumnGoodness(cb);
  }
}
void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 180 of file colpartition.cpp.

                                        {
  TBOX box = bbox->bounding_box();
  // Update the partition limits.
  if (boxes_.length() == 0) {
    bounding_box_ = box;
  } else {
    bounding_box_ += box;
  }

  if (IsVerticalType()) {
    if (!last_add_was_vertical_) {
      boxes_.sort(SortByBoxBottom<BLOBNBOX>);
      last_add_was_vertical_ = true;
    }
    boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
  } else {
    if (last_add_was_vertical_) {
      boxes_.sort(SortByBoxLeft<BLOBNBOX>);
      last_add_was_vertical_ = false;
    }
    boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
  }
  if (!left_key_tab_)
    left_key_ = BoxLeftKey();
  if (!right_key_tab_)
    right_key_ = BoxRightKey();
  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
    tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
            box.left(), box.bottom(), box.right(), box.top(),
            bounding_box_.left(), bounding_box_.right());
}
void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 582 of file colpartition.cpp.

                                                               {
  if (upper) {
    partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
                                        true, this);
    upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
  } else {
    partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
                                        true, this);
    lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
  }
}
void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1313 of file colpartition.cpp.

                                                                      {
  if (block_owned_)
    return;  // Done it already.
  block_owned_ = true;
  WorkingPartSet_IT it(working_sets);
  // If there is an upper partner use its working_set_ directly.
  ColPartition* partner = SingletonPartner(true);
  if (partner != NULL && partner->working_set_ != NULL) {
    working_set_ = partner->working_set_;
    working_set_->AddPartition(this);
    return;
  }
  if (partner != NULL && textord_debug_bugs) {
    tprintf("Partition with partner has no working set!:");
    Print();
    partner->Print();
  }
  // Search for the column that the left edge fits in.
  WorkingPartSet* work_set = NULL;
  it.move_to_first();
  int col_index = 0;
  for (it.mark_cycle_pt(); !it.cycled_list() &&
       col_index != first_column_;
        it.forward(), ++col_index);
  if (textord_debug_tabfind >= 2) {
    tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
    Print();
  }
  if (it.cycled_list() && textord_debug_bugs) {
    tprintf("Target column=%d, only had %d\n", first_column_, col_index);
  }
  ASSERT_HOST(!it.cycled_list());
  work_set = it.data();
  // If last_column_ != first_column, then we need to scoop up all blocks
  // between here and the last_column_ and put back in work_set.
  if (!it.cycled_list() && last_column_ != first_column_) {
    // Find the column that the right edge falls in.
    BLOCK_LIST completed_blocks;
    TO_BLOCK_LIST to_blocks;
    for (; !it.cycled_list() && col_index <= last_column_;
         it.forward(), ++col_index) {
      WorkingPartSet* end_set = it.data();
      end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
                                      &completed_blocks, &to_blocks);
    }
    work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
  }
  working_set_ = work_set;
  work_set->AddPartition(this);
}
BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 226 of file colpartition.cpp.

                                   {
  BLOBNBOX* biggest = NULL;
  BLOBNBOX_C_IT bb_it(&boxes_);
  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
    BLOBNBOX* bbox = bb_it.data();
    if (IsVerticalType()) {
      if (biggest == NULL ||
          bbox->bounding_box().width() > biggest->bounding_box().width())
        biggest = bbox;
    } else {
      if (biggest == NULL ||
          bbox->bounding_box().height() > biggest->bounding_box().height())
        biggest = bbox;
    }
  }
  return biggest;
}
BlobRegionType tesseract::ColPartition::blob_type ( ) const [inline]

Definition at line 148 of file colpartition.h.

                                   {
    return blob_type_;
  }
bool tesseract::ColPartition::block_owned ( ) const [inline]

Definition at line 205 of file colpartition.h.

                           {
    return block_owned_;
  }
int tesseract::ColPartition::bottom_spacing ( ) const [inline]

Definition at line 220 of file colpartition.h.

                             {
    return bottom_spacing_;
  }
const TBOX& tesseract::ColPartition::bounding_box ( ) const [inline]

Definition at line 109 of file colpartition.h.

                                   {
    return bounding_box_;
  }
TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 245 of file colpartition.cpp.

                                                 {
  TBOX result;
  BLOBNBOX_C_IT bb_it(&boxes_);
  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
    if (box != bb_it.data()) {
      result += bb_it.data()->bounding_box();
    }
  }
  return result;
}
ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1724 of file colpartition.cpp.

                                              {
  if (type_ == PT_UNKNOWN)
    return BLOBNBOX::TextlineColor(blob_type_, flow_);
  return POLY_BLOCK::ColorForPolyBlockType(type_);
}
BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( ) [inline]

Definition at line 187 of file colpartition.h.

                          {
    return &boxes_;
  }
int tesseract::ColPartition::boxes_count ( ) const [inline]

Definition at line 190 of file colpartition.h.

                          {
    return boxes_.length();
  }
int tesseract::ColPartition::BoxLeftKey ( ) const [inline]

Definition at line 332 of file colpartition.h.

                         {
    return SortKey(bounding_box_.left(), MidY());
  }
int tesseract::ColPartition::BoxRightKey ( ) const [inline]

Definition at line 336 of file colpartition.h.

                          {
    return SortKey(bounding_box_.right(), MidY());
  }
void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 258 of file colpartition.cpp.

                              {
  BLOBNBOX_C_IT bb_it(&boxes_);
  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
    BLOBNBOX* bblob = bb_it.data();
    ColPartition* other = bblob->owner();
    if (other == NULL) {
      // Normal case: ownership is available.
      bblob->set_owner(this);
    } else {
      ASSERT_HOST(other == this);
    }
  }
}
void tesseract::ColPartition::clear_table_type ( ) [inline]

Definition at line 239 of file colpartition.h.

                          {
    if (type_ == PT_TABLE)
      type_ = type_before_table_;
  }
uinT8* tesseract::ColPartition::color1 ( ) [inline]

Definition at line 285 of file colpartition.h.

                  {
    return color1_;
  }
uinT8* tesseract::ColPartition::color2 ( ) [inline]

Definition at line 288 of file colpartition.h.

                  {
    return color2_;
  }
ColPartitionSet* tesseract::ColPartition::column_set ( ) const [inline]

Definition at line 214 of file colpartition.h.

                                      {
    return column_set_;
  }
bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const [inline]

Definition at line 353 of file colpartition.h.

                                          {
    return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
  }
void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1028 of file colpartition.cpp.

                                                              {
  int first_spanned_col = -1;
  ColumnSpanningType span_type =
      columns->SpanningType(resolution,
                            bounding_box_.left(), bounding_box_.right(),
                            MidY(), left_margin_, right_margin_,
                            first_col, last_col,
                            &first_spanned_col);
  type_ = PartitionType(span_type);
}
int tesseract::ColPartition::ColumnWidth ( ) const [inline]

Definition at line 328 of file colpartition.h.

                          {
    return KeyWidth(left_key_, right_key_);
  }
void tesseract::ColPartition::ComputeLimits ( )

Definition at line 834 of file colpartition.cpp.

                                 {
  bounding_box_ = TBOX();  // Clear it
  BLOBNBOX_C_IT it(&boxes_);
  BLOBNBOX* bbox = NULL;
  int non_leader_count = 0;
  if (it.empty()) {
    bounding_box_.set_left(left_margin_);
    bounding_box_.set_right(right_margin_);
    bounding_box_.set_bottom(0);
    bounding_box_.set_top(0);
  } else {
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      bbox = it.data();
      bounding_box_ += bbox->bounding_box();
      if (bbox->flow() != BTFT_LEADER)
        ++non_leader_count;
    }
  }
  if (!left_key_tab_)
    left_key_ = BoxLeftKey();
  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
    // TODO(rays) investigate the causes of these error messages, to find
    // out if they are genuinely harmful, or just indicative of junk input.
    tprintf("Computed left-illegal partition\n");
    Print();
  }
  if (!right_key_tab_)
    right_key_ = BoxRightKey();
  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
    tprintf("Computed right-illegal partition\n");
    Print();
  }
  if (it.empty())
    return;
  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
      blob_type() == BRT_POLYIMAGE) {
    median_top_ = bounding_box_.top();
    median_bottom_ = bounding_box_.bottom();
    median_size_ = bounding_box_.height();
    median_left_ = bounding_box_.left();
    median_right_ = bounding_box_.right();
    median_width_ = bounding_box_.width();
  } else {
    STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
    STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
    STATS size_stats(0, bounding_box_.height() + 1);
    STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
    STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
    STATS width_stats(0, bounding_box_.width() + 1);
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      bbox = it.data();
      if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
        TBOX box = bbox->bounding_box();
        int area = box.area();
        top_stats.add(box.top(), area);
        bottom_stats.add(box.bottom(), area);
        size_stats.add(box.height(), area);
        left_stats.add(box.left(), area);
        right_stats.add(box.right(), area);
        width_stats.add(box.width(), area);
      }
    }
    median_top_ = static_cast<int>(top_stats.median() + 0.5);
    median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
    median_size_ = static_cast<int>(size_stats.median() + 0.5);
    median_left_ = static_cast<int>(left_stats.median() + 0.5);
    median_right_ = static_cast<int>(right_stats.median() + 0.5);
    median_width_ = static_cast<int>(width_stats.median() + 0.5);
  }

  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
    tprintf("Made partition with bad right coords");
    Print();
  }
  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
    tprintf("Made partition with bad left coords");
    Print();
  }
  // Fix partner lists. The bounding box has changed and partners are stored
  // in bounding box order, so remove and reinsert this as a partner
  // of all its partners.
  for (int upper = 0; upper < 2; ++upper) {
    ColPartition_CLIST partners;
    ColPartition_C_IT part_it(&partners);
    part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
    for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
      ColPartition* partner = part_it.extract();
      partner->RemovePartner(!upper, this);
      partner->AddPartner(!upper, this);
    }
  }
  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
                                bounding_box_.bottom())) {
    tprintf("Recomputed box for partition %p\n", this);
    Print();
  }
}
void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 561 of file colpartition.cpp.

                                              {
  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
  if (boxes_.empty()) {
    return;
  }

  BLOBNBOX_C_IT blob_it(&boxes_);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    BlobSpecialTextType type = blob->special_text_type();
    special_blobs_densities_[type]++;
  }

  for (int type = 0; type < BSTT_COUNT; ++type) {
    special_blobs_densities_[type] /= boxes_.length();
  }
}
bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 392 of file colpartition.cpp.

                                                                        {
  if (bounding_box_.right() < other.bounding_box_.left() &&
      bounding_box_.right() < other.LeftBlobRule())
    return false;
  if (other.bounding_box_.right() < bounding_box_.left() &&
      other.bounding_box_.right() < LeftBlobRule())
    return false;
  if (bounding_box_.left() > other.bounding_box_.right() &&
      bounding_box_.left() > other.RightBlobRule())
    return false;
  if (other.bounding_box_.left() > bounding_box_.right() &&
      other.bounding_box_.left() > RightBlobRule())
    return false;
  return true;
}
ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1711 of file colpartition.cpp.

                                                {
  ColPartition* copy = ShallowCopy();
  copy->set_owns_blobs(false);
  BLOBNBOX_C_IT inserter(copy->boxes());
  BLOBNBOX_C_IT traverser(boxes());
  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
    inserter.add_after_then_move(traverser.data());
  return copy;
}
void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 498 of file colpartition.cpp.

                                                                     {
  left_key_tab_ = take_box ? false : src.left_key_tab_;
  if (left_key_tab_) {
    left_key_ = src.left_key_;
  } else {
    bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
    left_key_ = BoxLeftKey();
  }
  if (left_margin_ > bounding_box_.left())
    left_margin_ = src.left_margin_;
}
void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 511 of file colpartition.cpp.

                                                                      {
  right_key_tab_ = take_box ? false : src.right_key_tab_;
  if (right_key_tab_) {
    right_key_ = src.right_key_;
  } else {
    bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
    right_key_ = BoxRightKey();
  }
  if (right_margin_ < bounding_box_.right())
    right_margin_ = src.right_margin_;
}
int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 933 of file colpartition.cpp.

                                                       {
  BLOBNBOX_C_IT it(&boxes_);
  int overlap_count = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* bbox = it.data();
    if (box.overlap(bbox->bounding_box()))
      ++overlap_count;
  }
  return overlap_count;
}
void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 284 of file colpartition.cpp.

                               {
  // Although the boxes_ list is a C_LIST, in some cases it owns the
  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
  // and the BLOBNBOXes own the underlying C_BLOBs.
  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
    BLOBNBOX* bblob = bb_it.extract();
    delete bblob->cblob();
    delete bblob;
  }
}
bool tesseract::ColPartition::desperately_merged ( ) const [inline]

Definition at line 211 of file colpartition.h.

                                  {
    return desperately_merged_;
  }
void tesseract::ColPartition::DisownBoxes ( )

Definition at line 274 of file colpartition.cpp.

                               {
  BLOBNBOX_C_IT bb_it(&boxes_);
  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
    BLOBNBOX* bblob = bb_it.data();
    ASSERT_HOST(bblob->owner() == this || bblob->owner() == NULL);
    bblob->set_owner(NULL);
  }
}
ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
) [static]

Definition at line 108 of file colpartition.cpp.

                                                                 {
  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
  part->set_type(block_type);
  part->set_flow(flow);
  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
  part->set_left_margin(box.left());
  part->set_right_margin(box.right());
  part->SetBlobTypes();
  part->ComputeLimits();
  part->ClaimBoxes();
  return part;
}
BlobTextFlowType tesseract::ColPartition::flow ( ) const [inline]

Definition at line 154 of file colpartition.h.

                                {
    return flow_;
  }
int tesseract::ColPartition::good_blob_score ( ) const [inline]

Definition at line 160 of file colpartition.h.

                              {
    return good_blob_score_;
  }
bool tesseract::ColPartition::good_column ( ) const [inline]

Definition at line 166 of file colpartition.h.

                           {
    return good_column_;
  }
bool tesseract::ColPartition::good_width ( ) const [inline]

Definition at line 163 of file colpartition.h.

                          {
    return good_width_;
  }
bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1248 of file colpartition.cpp.

                                   {
  // Approximation of the baseline.
  DetLineFit linepoints;
  // Calculation of the mean height on this line segment. Note that these
  // variable names apply to the context of a horizontal line, and work
  // analogously, rather than literally in the case of a vertical line.
  int total_height = 0;
  int coverage = 0;
  int height_count = 0;
  int width = 0;
  BLOBNBOX_C_IT it(&boxes_);
  TBOX box(it.data()->bounding_box());
  // Accumulate points representing the baseline at the middle of each blob,
  // but add an additional point for each end of the line. This makes it
  // harder to fit a severe skew angle, as it is most likely not right.
  if (IsVerticalType()) {
    // For a vertical line, use the right side as the baseline.
    ICOORD first_pt(box.right(), box.bottom());
    // Use the bottom-right of the first (bottom) box, the top-right of the
    // last, and the middle-right of all others.
    linepoints.Add(first_pt);
    for (it.forward(); !it.at_last(); it.forward()) {
      BLOBNBOX* blob = it.data();
      box = blob->bounding_box();
      ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
      linepoints.Add(box_pt);
      total_height += box.width();
      coverage += box.height();
      ++height_count;
    }
    box = it.data()->bounding_box();
    ICOORD last_pt(box.right(), box.top());
    linepoints.Add(last_pt);
    width = last_pt.y() - first_pt.y();

  } else {
    // Horizontal lines use the bottom as the baseline.
    TBOX box(it.data()->bounding_box());
    // Use the bottom-left of the first box, the the bottom-right of the last,
    // and the middle of all others.
    ICOORD first_pt(box.left(), box.bottom());
    linepoints.Add(first_pt);
    for (it.forward(); !it.at_last(); it.forward()) {
      BLOBNBOX* blob = it.data();
      box = blob->bounding_box();
      ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
      linepoints.Add(box_pt);
      total_height += box.height();
      coverage += box.width();
      ++height_count;
    }
    box = it.data()->bounding_box();
    ICOORD last_pt(box.right(), box.bottom());
    linepoints.Add(last_pt);
    width = last_pt.x() - first_pt.x();
  }
  // Maximum median error allowed to be a good text line.
  double max_error = kMaxBaselineError * total_height / height_count;
  ICOORD start_pt, end_pt;
  double error = linepoints.Fit(&start_pt, &end_pt);
  return error < max_error && coverage >= kMinBaselineCoverage * width;
}
int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const [inline]

Definition at line 381 of file colpartition.h.

                                                    {
    return MIN(median_right_, other.median_right_) -
           MAX(median_left_, other.median_left_);
  }
bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const [inline]

Definition at line 365 of file colpartition.h.

                                                  {
    return bounding_box_.x_overlap(other.bounding_box_);
  }
bool tesseract::ColPartition::inside_table_column ( ) [inline]

Definition at line 243 of file colpartition.h.

                             {
    return inside_table_column_;
  }
bool tesseract::ColPartition::IsEmpty ( ) const [inline]

Definition at line 357 of file colpartition.h.

                       {
    return boxes_.empty();
  }
bool tesseract::ColPartition::IsHorizontalLine ( ) const [inline]

Definition at line 449 of file colpartition.h.

                                {
    return IsHorizontalType() && IsLineType();
  }
bool tesseract::ColPartition::IsHorizontalType ( ) const [inline]

Definition at line 435 of file colpartition.h.

                                {
    return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
  }
bool tesseract::ColPartition::IsImageType ( ) const [inline]

Definition at line 423 of file colpartition.h.

                           {
    return PTIsImageType(type_);
  }
bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2128 of file colpartition.cpp.

                                                                  {
  // Overlap does not occur when last < part.first or first > part.last.
  // In other words, one is completely to the side of the other.
  // This is just DeMorgan's law applied to that so the function returns true.
  return (last_column_ >= part.first_column_) &&
         (first_column_ <= part.last_column_);
}
bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const [inline]

Definition at line 349 of file colpartition.h.

                                                 {
    return bounding_box_.right() < other.bounding_box_.right();
  }
bool tesseract::ColPartition::IsLegal ( )

Definition at line 321 of file colpartition.cpp.

                           {
  if (bounding_box_.left() > bounding_box_.right()) {
    if (textord_debug_bugs) {
      tprintf("Bounding box invalid\n");
      Print();
    }
    return false;  // Bounding box invalid.
  }
  if (left_margin_ > bounding_box_.left() ||
      right_margin_ < bounding_box_.right()) {
    if (textord_debug_bugs) {
      tprintf("Margins invalid\n");
      Print();
    }
    return false;  // Margins invalid.
  }
  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
    if (textord_debug_bugs) {
      tprintf("Key inside box: %d v %d or %d v %d\n",
              left_key_, BoxLeftKey(), right_key_, BoxRightKey());
      Print();
    }
    return false;  // Keys inside the box.
  }
  return true;
}
bool tesseract::ColPartition::IsLineType ( ) const [inline]

Definition at line 419 of file colpartition.h.

                          {
    return PTIsLineType(type_);
  }
bool tesseract::ColPartition::IsSingleton ( ) const [inline]

Definition at line 361 of file colpartition.h.

                           {
    return boxes_.singleton();
  }
bool tesseract::ColPartition::IsTextType ( ) const [inline]

Definition at line 427 of file colpartition.h.

                          {
    return PTIsTextType(type_);
  }
bool tesseract::ColPartition::IsUnMergeableType ( ) const [inline]

Definition at line 439 of file colpartition.h.

                                 {
    return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
  }
bool tesseract::ColPartition::IsVerticalLine ( ) const [inline]

Definition at line 444 of file colpartition.h.

                              {
    return IsVerticalType() && IsLineType();
  }
bool tesseract::ColPartition::IsVerticalType ( ) const [inline]

Definition at line 431 of file colpartition.h.

                              {
    return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
  }
int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const [inline]

Definition at line 324 of file colpartition.h.

                                                  {
    return (right_key - left_key) / vertical_.y();
  }
int tesseract::ColPartition::left_key ( ) const [inline]

Definition at line 172 of file colpartition.h.

                       {
    return left_key_;
  }
bool tesseract::ColPartition::left_key_tab ( ) const [inline]

Definition at line 169 of file colpartition.h.

                            {
    return left_key_tab_;
  }
int tesseract::ColPartition::left_margin ( ) const [inline]

Definition at line 112 of file colpartition.h.

                          {
    return left_margin_;
  }
int tesseract::ColPartition::LeftAtY ( int  y) const [inline]

Definition at line 340 of file colpartition.h.

                           {
    return XAtY(left_key_, y);
  }
int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 524 of file colpartition.cpp.

                                     {
  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
  return it.data()->left_rule();
}
void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
) [static]

Definition at line 1373 of file colpartition.cpp.

                                                               {
  int page_height = tright.y() - bleft.y();
  // Compute the initial spacing stats.
  ColPartition_IT it(block_parts);
  int part_count = 0;
  int max_line_height = 0;

  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
  // because their line spacing with their neighbors maybe smaller and their
  // height may be slightly larger.

  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    ASSERT_HOST(!part->boxes()->empty());
    STATS side_steps(0, part->bounding_box().height());
    if (part->bounding_box().height() > max_line_height)
      max_line_height = part->bounding_box().height();
    BLOBNBOX_C_IT blob_it(part->boxes());
    int prev_bottom = blob_it.data()->bounding_box().bottom();
    for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
      BLOBNBOX* blob = blob_it.data();
      int bottom = blob->bounding_box().bottom();
      int step = bottom - prev_bottom;
      if (step < 0)
        step = -step;
      side_steps.add(step, 1);
      prev_bottom = bottom;
    }
    part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
    if (!it.at_last()) {
      ColPartition* next_part = it.data_relative(1);
      part->set_bottom_spacing(part->median_bottom() -
                               next_part->median_bottom());
      part->set_top_spacing(part->median_top() - next_part->median_top());
    } else {
      part->set_bottom_spacing(page_height);
      part->set_top_spacing(page_height);
    }
    if (textord_debug_tabfind) {
      part->Print();
      tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
              side_steps.median(), part->top_spacing(), part->bottom_spacing());
    }
    ++part_count;
  }
  if (part_count == 0)
    return;

  SmoothSpacings(resolution, page_height, block_parts);

  // Move the partitions into individual block lists and make the blocks.
  BLOCK_IT block_it(completed_blocks);
  TO_BLOCK_IT to_block_it(to_blocks);
  ColPartition_LIST spacing_parts;
  ColPartition_IT sp_block_it(&spacing_parts);
  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
  for (it.mark_cycle_pt(); !it.empty();) {
    ColPartition* part = it.extract();
    sp_block_it.add_to_end(part);
    it.forward();
    if (it.empty() || part->bottom_spacing() > same_block_threshold ||
        !part->SpacingsEqual(*it.data(), resolution)) {
      // There is a spacing boundary. Check to see if it.data() belongs
      // better in the current block or the next one.
      if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
        ColPartition* next_part = it.data();
        // If there is a size match one-way, then the middle line goes with
        // its matched size, otherwise it goes with the smallest spacing.
        ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1);
        if (textord_debug_tabfind) {
          tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
                  " sizes %d %d %d\n",
                  part->top_spacing(), part->bottom_spacing(),
                  next_part->top_spacing(), next_part->bottom_spacing(),
                  part->median_size(), next_part->median_size(),
                  third_part != NULL ? third_part->median_size() : 0);
        }
        // We can only consider adding the next line to the block if the sizes
        // match and the lines are close enough for their size.
        if (part->SizesSimilar(*next_part) &&
            next_part->median_size() * kMaxSameBlockLineSpacing >
                part->bottom_spacing() &&
            part->median_size() * kMaxSameBlockLineSpacing >
                part->top_spacing()) {
          // Even now, we can only add it as long as the third line doesn't
          // match in the same way and have a smaller bottom spacing.
          if (third_part == NULL ||
              !next_part->SizesSimilar(*third_part) ||
              third_part->median_size() * kMaxSameBlockLineSpacing <=
                  next_part->bottom_spacing() ||
              next_part->median_size() * kMaxSameBlockLineSpacing <=
                  next_part->top_spacing() ||
                  next_part->bottom_spacing() > part->bottom_spacing()) {
            // Add to the current block.
            sp_block_it.add_to_end(it.extract());
            it.forward();
            if (textord_debug_tabfind) {
              tprintf("Added line to current block.\n");
            }
          }
        }
      }
      TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
      if (to_block != NULL) {
        to_block_it.add_to_end(to_block);
        block_it.add_to_end(to_block->block);
      }
      sp_block_it.set_to_list(&spacing_parts);
    } else {
      if (textord_debug_tabfind && !it.empty()) {
        ColPartition* next_part = it.data();
        tprintf("Spacings equal: upper:%d/%d, lower:%d/%d\n",
                part->top_spacing(), part->bottom_spacing(),
                next_part->top_spacing(), next_part->bottom_spacing(),
                part->median_size(), next_part->median_size());
      }
    }
  }
}
ColPartition_CLIST* tesseract::ColPartition::lower_partners ( ) [inline]

Definition at line 199 of file colpartition.h.

                                       {
    return &lower_partners_;
  }
ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
) [static]

Definition at line 129 of file colpartition.cpp.

                                                                               {
  box->set_owner(NULL);
  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
  single->set_flow(BTFT_NONE);
  single->AddBox(box);
  single->ComputeLimits();
  single->ClaimBoxes();
  single->SetBlobTypes();
  single->set_block_owned(true);
  if (big_part_list != NULL) {
    ColPartition_IT part_it(big_part_list);
    part_it.add_to_end(single);
  }
  return single;
}
TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
) [static]

Definition at line 1605 of file colpartition.cpp.

                                                                 {
  if (block_parts->empty())
    return NULL;  // Nothing to do.
  ColPartition_IT it(block_parts);
  ColPartition* part = it.data();
  PolyBlockType type = part->type();
  if (type == PT_VERTICAL_TEXT)
    return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
  // put the average spacing in each partition, so we can just take the
  // linespacing from the first partition.
  int line_spacing = part->bottom_spacing();
  if (line_spacing < part->median_size())
    line_spacing = part->bounding_box().height();
  ICOORDELT_LIST vertices;
  ICOORDELT_IT vert_it(&vertices);
  ICOORD start, end;
  int min_x = MAX_INT32;
  int max_x = -MAX_INT32;
  int min_y = MAX_INT32;
  int max_y = -MAX_INT32;
  int iteration = 0;
  do {
    if (iteration == 0)
      ColPartition::LeftEdgeRun(&it, &start, &end);
    else
      ColPartition::RightEdgeRun(&it, &start, &end);
    ClipCoord(bleft, tright, &start);
    ClipCoord(bleft, tright, &end);
    vert_it.add_after_then_move(new ICOORDELT(start));
    vert_it.add_after_then_move(new ICOORDELT(end));
    UpdateRange(start.x(), &min_x, &max_x);
    UpdateRange(end.x(), &min_x, &max_x);
    UpdateRange(start.y(), &min_y, &max_y);
    UpdateRange(end.y(), &min_y, &max_y);
    if ((iteration == 0 && it.at_first()) ||
        (iteration == 1 && it.at_last())) {
      ++iteration;
      it.move_to_last();
    }
  } while (iteration < 2);
  if (textord_debug_tabfind)
    tprintf("Making block at (%d,%d)->(%d,%d)\n",
            min_x, min_y, max_x, max_y);
  BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
  block->set_poly_block(new POLY_BLOCK(&vertices, type));
  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
}
ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
) [static]

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 161 of file colpartition.cpp.

                                                                  {
  ColPartition* part = new ColPartition(blob_type, vertical);
  part->bounding_box_ = TBOX(left, bottom, right, top);
  part->median_bottom_ = bottom;
  part->median_top_ = top;
  part->median_size_ = top - bottom;
  part->median_width_ = right - left;
  part->left_key_ = part->BoxLeftKey();
  part->right_key_ = part->BoxRightKey();
  return part;
}
TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
) [static]

Definition at line 1658 of file colpartition.cpp.

                                                                             {
  if (block_parts->empty())
    return NULL;  // Nothing to do.
  ColPartition_IT it(block_parts);
  ColPartition* part = it.data();
  TBOX block_box = part->bounding_box();
  int line_spacing = block_box.width();
  PolyBlockType type = it.data()->type();
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    block_box += it.data()->bounding_box();
  }
  if (textord_debug_tabfind) {
    tprintf("Making block at:");
    block_box.print();
  }
  BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
                           block_box.right(), block_box.top());
  block->set_poly_block(new POLY_BLOCK(block_box, type));
  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
}
bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1054 of file colpartition.cpp.

                                            {
  bool result = false;
  // Gather statistics on the gaps between blobs and the widths of the blobs.
  int part_width = bounding_box_.width();
  STATS gap_stats(0, part_width);
  STATS width_stats(0, part_width);
  BLOBNBOX_C_IT it(&boxes_);
  BLOBNBOX* prev_blob = it.data();
  prev_blob->set_flow(BTFT_NEIGHBOURS);
  width_stats.add(prev_blob->bounding_box().width(), 1);
  int blob_count = 1;
  for (it.forward(); !it.at_first(); it.forward()) {
    BLOBNBOX* blob = it.data();
    int left = blob->bounding_box().left();
    int right = blob->bounding_box().right();
    gap_stats.add(left - prev_blob->bounding_box().right(), 1);
    width_stats.add(right - left, 1);
    blob->set_flow(BTFT_NEIGHBOURS);
    prev_blob = blob;
    ++blob_count;
  }
  double median_gap = gap_stats.median();
  double median_width = width_stats.median();
  double max_width = MAX(median_gap, median_width);
  double min_width = MIN(median_gap, median_width);
  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
  if (textord_debug_tabfind >= 4) {
    tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
            gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
            min_width * kMaxLeaderGapFractionOfMin);
  }
  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
      gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
      blob_count >= kMinLeaderCount) {
    // This is stable enough to be called a leader, so check the widths.
    // Since leader dashes can join, run a dp cutting algorithm and go
    // on the cost.
    int offset = static_cast<int>(ceil(gap_iqr * 2));
    int min_step = static_cast<int>(median_gap + median_width + 0.5);
    int max_step = min_step + offset;
    min_step -= offset;
    // Pad the buffer with min_step/2 on each end.
    int part_left = bounding_box_.left() - min_step / 2;
    part_width += min_step;
    DPPoint* projection = new DPPoint[part_width];
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      BLOBNBOX* blob = it.data();
      int left = blob->bounding_box().left();
      int right = blob->bounding_box().right();
      int height = blob->bounding_box().height();
      for (int x = left; x < right; ++x) {
        projection[left - part_left].AddLocalCost(height);
      }
    }
    DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
                                       &DPPoint::CostWithVariance,
                                       part_width, projection);
    if (best_end != NULL && best_end->total_cost() < blob_count) {
      // Good enough. Call it a leader.
      result = true;
      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
        BLOBNBOX* blob = it.data();
        TBOX box = blob->bounding_box();
        // If the first or last blob is spaced too much, don't mark it.
        if (it.at_first()) {
          int gap = it.data_relative(1)->bounding_box().left() -
                     blob->bounding_box().right();
          if (blob->bounding_box().width() + gap > max_step) {
            it.extract();
            continue;
          }
        }
        if (it.at_last()) {
          int gap = blob->bounding_box().left() -
                     it.data_relative(-1)->bounding_box().right();
          if (blob->bounding_box().width() + gap > max_step) {
            it.extract();
            break;
          }
        }
        blob->set_region_type(BRT_TEXT);
        blob->set_flow(BTFT_LEADER);
      }
      blob_type_ = BRT_TEXT;
      flow_ = BTFT_LEADER;
    } else if (textord_debug_tabfind) {
      if (best_end == NULL) {
        tprintf("No path\n");
      } else {
        tprintf("Total cost = %d vs allowed %d\n",
                best_end->total_cost() < blob_count);
      }
    }
    delete [] projection;
  }
  return result;
}
bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 349 of file colpartition.cpp.

                                                                  {
  int y = (MidY() + other.MidY()) / 2;
  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
                   LeftAtY(y) / kColumnWidthFactor, 1))
    return false;
  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
                   RightAtY(y) / kColumnWidthFactor, 1))
    return false;
  return true;
}
bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 384 of file colpartition.cpp.

                                                                {
  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
    return !TabFind::DifferentSizes(median_width_, other.median_width_);
  else
    return !TabFind::DifferentSizes(median_size_, other.median_size_);
}
bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 409 of file colpartition.cpp.

                                                                        {
  int match_count = 0;
  int nonmatch_count = 0;
  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
  box_it.mark_cycle_pt();
  other_it.mark_cycle_pt();
  while (!box_it.cycled_list() && !other_it.cycled_list()) {
    if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
                                           fractional_tolerance,
                                           constant_tolerance))
      ++match_count;
    else
      ++nonmatch_count;
    box_it.forward();
    other_it.forward();
  }
  return match_count > nonmatch_count;
}
bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 361 of file colpartition.cpp.

                                                                    {
  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
      other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
    return false;  // Too noisy.

  // Colors must match for other to count.
  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
                                                      other.color2_,
                                                      color1_);
  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
                                                      other.color2_,
                                                      color2_);
  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
                                                      other.color1_);
  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
                                                      other.color2_);
// All 4 distances must be small enough.
  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
         d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
}
int tesseract::ColPartition::median_bottom ( ) const [inline]

Definition at line 127 of file colpartition.h.

                            {
    return median_bottom_;
  }
int tesseract::ColPartition::median_left ( ) const [inline]

Definition at line 130 of file colpartition.h.

                          {
    return median_left_;
  }
int tesseract::ColPartition::median_right ( ) const [inline]

Definition at line 133 of file colpartition.h.

                           {
    return median_right_;
  }
int tesseract::ColPartition::median_size ( ) const [inline]

Definition at line 136 of file colpartition.h.

                          {
    return median_size_;
  }
int tesseract::ColPartition::median_top ( ) const [inline]

Definition at line 124 of file colpartition.h.

                         {
    return median_top_;
  }
int tesseract::ColPartition::median_width ( ) const [inline]

Definition at line 142 of file colpartition.h.

                           {
    return median_width_;
  }
int tesseract::ColPartition::MedianY ( ) const [inline]

Definition at line 308 of file colpartition.h.

                      {
    return (median_top_ + median_bottom_) / 2;
  }
int tesseract::ColPartition::MidX ( ) const [inline]

Definition at line 312 of file colpartition.h.

                   {
    return (bounding_box_.left() + bounding_box_.right()) / 2;
  }
int tesseract::ColPartition::MidY ( ) const [inline]

Definition at line 304 of file colpartition.h.

                   {
    return (bounding_box_.top() + bounding_box_.bottom()) / 2;
  }
ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const [inline]

Definition at line 249 of file colpartition.h.

                                               {
    return nearest_neighbor_above_;
  }
ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const [inline]

Definition at line 255 of file colpartition.h.

                                               {
    return nearest_neighbor_below_;
  }
bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 437 of file colpartition.cpp.

                                                      {
  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
  int min_top = MAX_INT32;
  int max_bottom = -MAX_INT32;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* blob = it.data();
    if (!blob->IsDiacritic()) {
      if (debug) {
        tprintf("Blob is not a diacritic:");
        blob->bounding_box().print();
      }
      return false;  // All blobs must have diacritic bases.
    }
    if (blob->base_char_top() < min_top)
      min_top = blob->base_char_top();
    if (blob->base_char_bottom() > max_bottom)
      max_bottom = blob->base_char_bottom();
  }
  // If the intersection of all vertical ranges of all base characters
  // overlaps the median range of this, then it is OK.
  bool result = min_top > candidate.median_bottom_ &&
                max_bottom < candidate.median_top_;
  if (debug) {
    if (result)
      tprintf("OKDiacritic!\n");
    else
      tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
              max_bottom, min_top, median_bottom_, median_top_);
  }
  return result;
}
bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 713 of file colpartition.cpp.

                                                                  {
  // Vertical partitions are not allowed to be involved.
  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
    if (debug)
      tprintf("Vertical partition\n");
    return false;
  }
  // The merging partitions must strongly overlap each other.
  if (!merge1.VSignificantCoreOverlap(merge2)) {
    if (debug)
      tprintf("Voverlap %d (%d)\n",
              merge1.VCoreOverlap(merge2),
              merge1.VSignificantCoreOverlap(merge2));
    return false;
  }
  // The merged box must not overlap the median bounds of this.
  TBOX merged_box(merge1.bounding_box());
  merged_box += merge2.bounding_box();
  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
      merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
      merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
    if (debug)
      tprintf("Excessive box overlap\n");
    return false;
  }
  // Looks OK!
  return true;
}
BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 746 of file colpartition.cpp.

                                                        {
  if (boxes_.empty() || boxes_.singleton())
    return NULL;
  BLOBNBOX_C_IT it(&boxes_);
  TBOX left_box(it.data()->bounding_box());
  for (it.forward(); !it.at_first(); it.forward()) {
    BLOBNBOX* bbox = it.data();
    left_box += bbox->bounding_box();
    if (left_box.overlap(box))
      return bbox;
  }
  return NULL;
}
bool tesseract::ColPartition::owns_blobs ( ) const [inline]

Definition at line 291 of file colpartition.h.

                          {
    return owns_blobs_;
  }
PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 978 of file colpartition.cpp.

                                                                       {
  if (flow == CST_NOISE) {
    if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
        blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
      return PT_NOISE;
    flow = CST_FLOWING;
  }

  switch (blob_type_) {
    case BRT_NOISE:
      return PT_NOISE;
    case BRT_HLINE:
      return PT_HORZ_LINE;
    case BRT_VLINE:
      return PT_VERT_LINE;
    case BRT_RECTIMAGE:
    case BRT_POLYIMAGE:
      switch (flow) {
        case CST_FLOWING:
          return PT_FLOWING_IMAGE;
        case CST_HEADING:
          return PT_HEADING_IMAGE;
        case CST_PULLOUT:
          return PT_PULLOUT_IMAGE;
        default:
          ASSERT_HOST(!"Undefined flow type for image!");
      }
      break;
    case BRT_VERT_TEXT:
      return PT_VERTICAL_TEXT;
    case BRT_TEXT:
    case BRT_UNKNOWN:
    default:
      switch (flow) {
        case CST_FLOWING:
          return PT_FLOWING_TEXT;
        case CST_HEADING:
          return PT_HEADING_TEXT;
        case CST_PULLOUT:
          return PT_PULLOUT_TEXT;
        default:
          ASSERT_HOST(!"Undefined flow type for text!");
      }
  }
  ASSERT_HOST(!"Should never get here!");
  return PT_NOISE;
}
void tesseract::ColPartition::Print ( ) const

Definition at line 1735 of file colpartition.cpp.

                               {
  int y = MidY();
  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
          " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
          " ts=%d bs=%d ls=%d rs=%d\n",
          boxes_.empty() ? 'E' : ' ',
          left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
          bounding_box_.left(), median_left_,
          bounding_box_.bottom(), median_bottom_,
          bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
          right_margin_, median_right_, bounding_box_.top(), median_top_,
          good_width_, good_column_, type_,
          kBlobTypes[blob_type_], flow_,
          first_column_, last_column_, boxes_.length(),
          space_above_, space_below_, space_to_left_, space_to_right_);
}
void tesseract::ColPartition::PrintColors ( )

Definition at line 1753 of file colpartition.cpp.

                               {
  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
          color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
          color1_[L_ALPHA_CHANNEL],
          color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
}
void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desparate,
ColPartitionGrid grid 
)

Definition at line 1830 of file colpartition.cpp.

                                                          {
  if (TypesSimilar(type_, type)) {
    RefinePartnersInternal(true, get_desperate, grid);
    RefinePartnersInternal(false, get_desperate, grid);
  } else if (type == PT_COUNT) {
    // This is the final pass. Make sure only the correctly typed
    // partners surivive, however many there are.
    RefinePartnersByType(true, &upper_partners_);
    RefinePartnersByType(false, &lower_partners_);
    // It is possible for a merge to have given a partition multiple
    // partners again, so the last resort is to use overlap which is
    // guaranteed to leave at most one partner left.
    if (!upper_partners_.empty() && !upper_partners_.singleton())
      RefinePartnersByOverlap(true, &upper_partners_);
    if (!lower_partners_.empty() && !lower_partners_.singleton())
      RefinePartnersByOverlap(false, &lower_partners_);
  }
}
void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 299 of file colpartition.cpp.

                                  {
  ColPartition_CLIST reversed_boxes;
  ColPartition_C_IT reversed_it(&reversed_boxes);
  // Reverse the order of the boxes_.
  BLOBNBOX_C_IT bb_it(&boxes_);
  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
    reversed_it.add_before_then_move(bb_it.extract());
  }
  bb_it.add_list_after(&reversed_boxes);
  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
  int tmp = left_margin_;
  left_margin_ = -right_margin_;
  right_margin_ = -tmp;
  ComputeLimits();
}
void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 213 of file colpartition.cpp.

                                          {
  BLOBNBOX_C_IT bb_it(&boxes_);
  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
    if (box == bb_it.data()) {
      bb_it.extract();
      ComputeLimits();
      return;
    }
  }
}
void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 597 of file colpartition.cpp.

                                                                  {
  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    if (it.data() == partner) {
      it.extract();
      break;
    }
  }
}
int tesseract::ColPartition::right_key ( ) const [inline]

Definition at line 178 of file colpartition.h.

                        {
    return right_key_;
  }
bool tesseract::ColPartition::right_key_tab ( ) const [inline]

Definition at line 175 of file colpartition.h.

                             {
    return right_key_tab_;
  }
int tesseract::ColPartition::right_margin ( ) const [inline]

Definition at line 118 of file colpartition.h.

                           {
    return right_margin_;
  }
int tesseract::ColPartition::RightAtY ( int  y) const [inline]

Definition at line 344 of file colpartition.h.

                            {
    return XAtY(right_key_, y);
  }
int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 529 of file colpartition.cpp.

                                      {
  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
  it.move_to_last();
  return it.data()->right_rule();
}
void tesseract::ColPartition::set_blob_type ( BlobRegionType  t) [inline]

Definition at line 151 of file colpartition.h.

                                       {
    blob_type_ = t;
  }
void tesseract::ColPartition::set_block_owned ( bool  owned) [inline]

Definition at line 208 of file colpartition.h.

                                   {
    block_owned_ = owned;
  }
void tesseract::ColPartition::set_bottom_spacing ( int  spacing) [inline]

Definition at line 223 of file colpartition.h.

                                       {
    bottom_spacing_ = spacing;
  }
void tesseract::ColPartition::set_first_column ( int  column) [inline]

Definition at line 688 of file colpartition.h.

                                    {
    first_column_ = column;
  }
void tesseract::ColPartition::set_flow ( BlobTextFlowType  f) [inline]

Definition at line 157 of file colpartition.h.

                                    {
    flow_ = f;
  }
void tesseract::ColPartition::set_inside_table_column ( bool  val) [inline]

Definition at line 246 of file colpartition.h.

                                         {
    inside_table_column_ = val;
  }
void tesseract::ColPartition::set_last_column ( int  column) [inline]

Definition at line 691 of file colpartition.h.

                                   {
    last_column_ = column;
  }
void tesseract::ColPartition::set_left_margin ( int  margin) [inline]

Definition at line 115 of file colpartition.h.

                                   {
    left_margin_ = margin;
  }
void tesseract::ColPartition::set_median_size ( int  size) [inline]

Definition at line 139 of file colpartition.h.

                                 {
    median_size_ = size;
  }
void tesseract::ColPartition::set_median_width ( int  width) [inline]

Definition at line 145 of file colpartition.h.

                                   {
    median_width_ = width;
  }
void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part) [inline]

Definition at line 252 of file colpartition.h.

                                                      {
    nearest_neighbor_above_ = part;
  }
void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part) [inline]

Definition at line 258 of file colpartition.h.

                                                      {
    nearest_neighbor_below_ = part;
  }
void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs) [inline]

Definition at line 294 of file colpartition.h.

                                       {
    // Do NOT change ownership flag when there are blobs in the list.
    // Immediately set the ownership flag when creating copies.
    ASSERT_HOST(boxes_.empty());
    owns_blobs_ = owns_blobs;
  }
void tesseract::ColPartition::set_right_margin ( int  margin) [inline]

Definition at line 121 of file colpartition.h.

                                    {
    right_margin_ = margin;
  }
void tesseract::ColPartition::set_side_step ( int  step) [inline]

Definition at line 217 of file colpartition.h.

                               {
    side_step_ = step;
  }
void tesseract::ColPartition::set_space_above ( int  space) [inline]

Definition at line 264 of file colpartition.h.

                                  {
    space_above_ = space;
  }
void tesseract::ColPartition::set_space_below ( int  space) [inline]

Definition at line 270 of file colpartition.h.

                                  {
    space_below_ = space;
  }
void tesseract::ColPartition::set_space_to_left ( int  space) [inline]

Definition at line 276 of file colpartition.h.

                                    {
    space_to_left_ = space;
  }
void tesseract::ColPartition::set_space_to_right ( int  space) [inline]

Definition at line 282 of file colpartition.h.

                                     {
    space_to_right_ = space;
  }
void tesseract::ColPartition::set_table_type ( ) [inline]

Definition at line 233 of file colpartition.h.

                        {
    if (type_ != PT_TABLE) {
      type_before_table_ = type_;
      type_ = PT_TABLE;
    }
  }
void tesseract::ColPartition::set_top_spacing ( int  spacing) [inline]

Definition at line 229 of file colpartition.h.

                                    {
    top_spacing_ = spacing;
  }
void tesseract::ColPartition::set_type ( PolyBlockType  t) [inline]

Definition at line 184 of file colpartition.h.

                                 {
    type_ = t;
  }
void tesseract::ColPartition::set_vertical ( const ICOORD v) [inline]

Definition at line 193 of file colpartition.h.

                                     {
    vertical_ = v;
  }
void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set) [inline]

Definition at line 202 of file colpartition.h.

                                                    {
    working_set_ = working_set;
  }
void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1233 of file colpartition.cpp.

                                {
  if (!owns_blobs())
    return;
  BLOBNBOX_C_IT it(&boxes_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* blob = it.data();
    if (blob->flow() != BTFT_LEADER)
      blob->set_flow(flow_);
    blob->set_region_type(blob_type_);
    ASSERT_HOST(blob->owner() == NULL || blob->owner() == this);
  }
}
void tesseract::ColPartition::SetColumnGoodness ( WidthCallback cb)

Definition at line 1041 of file colpartition.cpp.

                                                      {
  int y = MidY();
  int width = RightAtY(y) - LeftAtY(y);
  good_width_ = cb->Run(width);
  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
}
void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 473 of file colpartition.cpp.

                                                         {
  if (tab_vector != NULL) {
    left_key_ = tab_vector->sort_key();
    left_key_tab_ = left_key_ <= BoxLeftKey();
  } else {
    left_key_tab_ = false;
  }
  if (!left_key_tab_)
    left_key_ = BoxLeftKey();
}
void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 946 of file colpartition.cpp.

                                                                            {
  int first_spanned_col = -1;
  ColumnSpanningType span_type =
      columns->SpanningType(resolution,
                            bounding_box_.left(), bounding_box_.right(),
                            MidY(), left_margin_, right_margin_,
                            &first_column_, &last_column_,
                            &first_spanned_col);
  column_set_ = columns;
  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
      !IsLineType()) {
    // Unequal columns may indicate that the pullout spans one of the columns
    // it lies in, so force it to be allocated to just that column.
    if (first_spanned_col >= 0) {
      first_column_ = first_spanned_col;
      last_column_ = first_spanned_col;
    } else {
      if ((first_column_ & 1) == 0)
        last_column_ = first_column_;
      else if ((last_column_ & 1) == 0)
        first_column_ = last_column_;
      else
        first_column_ = last_column_ = (first_column_ + last_column_) / 2;
    }
  }
  type_ = PartitionType(span_type);
}
void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1159 of file colpartition.cpp.

                                                                     {
  int blob_count = 0;        // Total # blobs.
  int good_blob_score_ = 0;  // Total # good strokewidth neighbours.
  int noisy_count = 0;       // Total # neighbours marked as noise.
  int hline_count = 0;
  int vline_count = 0;
  BLOBNBOX_C_IT it(&boxes_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* blob = it.data();
    ++blob_count;
    noisy_count += blob->NoisyNeighbours();
    good_blob_score_ += blob->GoodTextBlob();
    if (blob->region_type() == BRT_HLINE) ++hline_count;
    if (blob->region_type() == BRT_VLINE) ++vline_count;
  }
  flow_ = BTFT_NEIGHBOURS;
  blob_type_ = BRT_UNKNOWN;
  if (hline_count > vline_count) {
    flow_ = BTFT_NONE;
    blob_type_ = BRT_HLINE;
  } else if (vline_count > hline_count) {
    flow_ = BTFT_NONE;
    blob_type_ = BRT_VLINE;
  } else if (value < -1 || 1 < value) {
    int long_side;
    int short_side;
    if (value > 0) {
      long_side = bounding_box_.width();
      short_side = bounding_box_.height();
      blob_type_ = BRT_TEXT;
    } else {
      long_side = bounding_box_.height();
      short_side = bounding_box_.width();
      blob_type_ = BRT_VERT_TEXT;
    }
    // We will combine the old metrics using aspect ratio and blob counts
    // with the input value by allowing a strong indication to flip the
    // STRONG_CHAIN/CHAIN flow values.
    int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
    if (short_side > kHorzStrongTextlineHeight) ++strong_score;
    if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
    if (abs(value) >= kMinStrongTextValue)
      flow_ = BTFT_STRONG_CHAIN;
    else if (abs(value) >= kMinChainTextValue)
      flow_ = BTFT_CHAIN;
    else
      flow_ = BTFT_NEIGHBOURS;
    // Upgrade chain to strong chain if the other indicators are good
    if (flow_ == BTFT_CHAIN && strong_score == 3)
      flow_ = BTFT_STRONG_CHAIN;
    // Downgrade strong vertical text to chain if the indicators are bad.
    if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
      flow_ = BTFT_CHAIN;
  }
  if (flow_ == BTFT_NEIGHBOURS) {
    // Check for noisy neighbours.
    if (noisy_count >= blob_count) {
      flow_ = BTFT_NONTEXT;
      blob_type_= BRT_NOISE;
    }
  }
  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
                                bounding_box_.bottom())) {
    tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
            blob_count, noisy_count, good_blob_score_);
    tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
            value, flow_, blob_type_);
    Print();
  }
  SetBlobTypes();
}
void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 485 of file colpartition.cpp.

                                                          {
  if (tab_vector != NULL) {
    right_key_ = tab_vector->sort_key();
    right_key_tab_ = right_key_ >= BoxRightKey();
  } else {
    right_key_tab_ = false;
  }
  if (!right_key_tab_)
    right_key_ = BoxRightKey();
}
void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 555 of file colpartition.cpp.

                                                         {
  ASSERT_HOST(type < BSTT_COUNT);
  special_blobs_densities_[type] = density;
}
ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1684 of file colpartition.cpp.

                                              {
  ColPartition* part = new ColPartition(blob_type_, vertical_);
  part->left_margin_ = left_margin_;
  part->right_margin_ = right_margin_;
  part->bounding_box_ = bounding_box_;
  memcpy(part->special_blobs_densities_, special_blobs_densities_,
         sizeof(special_blobs_densities_));
  part->median_bottom_ = median_bottom_;
  part->median_top_ = median_top_;
  part->median_size_ = median_size_;
  part->median_left_ = median_left_;
  part->median_right_ = median_right_;
  part->median_width_ = median_width_;
  part->good_width_ = good_width_;
  part->good_column_ = good_column_;
  part->left_key_tab_ = left_key_tab_;
  part->right_key_tab_ = right_key_tab_;
  part->type_ = type_;
  part->flow_ = flow_;
  part->left_key_ = left_key_;
  part->right_key_ = right_key_;
  part->first_column_ = first_column_;
  part->last_column_ = last_column_;
  part->owns_blobs_ = false;
  return part;
}
ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 608 of file colpartition.cpp.

                                                       {
  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
  if (!partners->singleton())
    return NULL;
  ColPartition_C_IT it(partners);
  return it.data();
}
void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1761 of file colpartition.cpp.

                                                         {
  STATS left_stats(0, working_set_count);
  STATS right_stats(0, working_set_count);
  PolyBlockType max_type = type_;
  ColPartition* partner;
  for (partner = SingletonPartner(false); partner != NULL;
       partner = partner->SingletonPartner(false)) {
    if (partner->type_ > max_type)
      max_type = partner->type_;
    if (column_set_ == partner->column_set_) {
      left_stats.add(partner->first_column_, 1);
      right_stats.add(partner->last_column_, 1);
    }
  }
  type_ = max_type;
  // TODO(rays) Either establish that it isn't necessary to set the columns,
  // or find a way to do it that does not cause an assert failure in
  // AddToWorkingSet.
#if 0
  first_column_ = left_stats.mode();
  last_column_ = right_stats.mode();
  if (last_column_ < first_column_)
    last_column_ = first_column_;
#endif

  for (partner = SingletonPartner(false); partner != NULL;
       partner = partner->SingletonPartner(false)) {
    partner->type_ = max_type;
#if 0  // See TODO above
    if (column_set_ == partner->column_set_) {
      partner->first_column_ = first_column_;
      partner->last_column_ = last_column_;
    }
#endif
  }
}
int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const [inline]

Definition at line 316 of file colpartition.h.

                                  {
    return TabVector::SortKey(vertical_, x, y);
  }
int tesseract::ColPartition::space_above ( ) const [inline]

Definition at line 261 of file colpartition.h.

                          {
    return space_above_;
  }
int tesseract::ColPartition::space_below ( ) const [inline]

Definition at line 267 of file colpartition.h.

                          {
    return space_below_;
  }
int tesseract::ColPartition::space_to_left ( ) const [inline]

Definition at line 273 of file colpartition.h.

                            {
    return space_to_left_;
  }
int tesseract::ColPartition::space_to_right ( ) const [inline]

Definition at line 279 of file colpartition.h.

                             {
    return space_to_right_;
  }
int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 540 of file colpartition.cpp.

                                                                  {
  ASSERT_HOST(type < BSTT_COUNT);
  BLOBNBOX_C_IT blob_it(&boxes_);
  int count = 0;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    BlobSpecialTextType blob_type = blob->special_text_type();
    if (blob_type == type) {
      count++;
    }
  }

  return count;
}
float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 535 of file colpartition.cpp.

                                                                            {
  ASSERT_HOST(type < BSTT_COUNT);
  return special_blobs_densities_[type];
}
ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 800 of file colpartition.cpp.

                                               {
  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
    return NULL;  // There will be no change.
  ColPartition* split_part = ShallowCopy();
  split_part->set_owns_blobs(owns_blobs());
  BLOBNBOX_C_IT it(&boxes_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* bbox = it.data();
    ColPartition* prev_owner = bbox->owner();
    ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
    const TBOX& box = bbox->bounding_box();
    if (box.left() >= split_x) {
      split_part->AddBox(it.extract());
      if (owns_blobs() && prev_owner != NULL)
        bbox->set_owner(split_part);
    }
  }
  ASSERT_HOST(!it.empty());
  if (split_part->IsEmpty()) {
    // Split part ended up with nothing. Possible if split_x passes
    // through the last blob.
    delete split_part;
    return NULL;
  }
  right_key_tab_ = false;
  split_part->left_key_tab_ = false;
  right_margin_ = split_x;
  split_part->left_margin_ = split_x;
  ComputeLimits();
  split_part->ComputeLimits();
  return split_part;
}
ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 764 of file colpartition.cpp.

                                                            {
  ColPartition* split_part = ShallowCopy();
  split_part->set_owns_blobs(owns_blobs());
  BLOBNBOX_C_IT it(&boxes_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* bbox = it.data();
    ColPartition* prev_owner = bbox->owner();
    ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
    if (bbox == split_blob || !split_part->boxes_.empty()) {
      split_part->AddBox(it.extract());
      if (owns_blobs() && prev_owner != NULL)
        bbox->set_owner(split_part);
    }
  }
  ASSERT_HOST(!it.empty());
  if (split_part->IsEmpty()) {
    // Split part ended up with nothing. Possible if split_blob is not
    // in the list of blobs.
    delete split_part;
    return NULL;
  }
  right_key_tab_ = false;
  split_part->left_key_tab_ = false;
  ComputeLimits();
  // TODO(nbeato) Merge Ray's CL like this:
  // if (owns_blobs())
  //  SetBlobTextlineGoodness();
  split_part->ComputeLimits();
  // TODO(nbeato) Merge Ray's CL like this:
  // if (split_part->owns_blobs())
  //   split_part->SetBlobTextlineGoodness();
  return split_part;
}
int tesseract::ColPartition::top_spacing ( ) const [inline]

Definition at line 226 of file colpartition.h.

                          {
    return top_spacing_;
  }
PolyBlockType tesseract::ColPartition::type ( ) const [inline]

Definition at line 181 of file colpartition.h.

                             {
    return type_;
  }
bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const [inline]

Definition at line 403 of file colpartition.h.

                                                   {
    return TypesMatch(blob_type_, other.blob_type_);
  }
static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
) [inline, static]

Definition at line 406 of file colpartition.h.

                                                                     {
    return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
           !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
  }
static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
) [inline, static]

Definition at line 412 of file colpartition.h.

                                                                     {
    return (type1 == type2 ||
            (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
            (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
  }
ColPartition_CLIST* tesseract::ColPartition::upper_partners ( ) [inline]

Definition at line 196 of file colpartition.h.

                                       {
    return &upper_partners_;
  }
int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const [inline]

Definition at line 375 of file colpartition.h.

                                                    {
    return MIN(median_top_, other.median_top_) -
           MAX(median_bottom_, other.median_bottom_);
  }
bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const [inline]

Definition at line 370 of file colpartition.h.

                                                  {
    return bounding_box_.y_gap(other.bounding_box_) < 0;
  }
bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const [inline]

Definition at line 387 of file colpartition.h.

                                                                {
    int overlap = VCoreOverlap(other);
    int height = MIN(median_top_ - median_bottom_,
                     other.median_top_ - other.median_bottom_);
    return overlap * 3 > height;
  }
bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const [inline]

Definition at line 395 of file colpartition.h.

                                                          {
    return left_margin_ <= other.bounding_box_.left() &&
           bounding_box_.left() >= other.left_margin_ &&
           bounding_box_.right() <= other.right_margin_ &&
           right_margin_ >= other.bounding_box_.right();
  }
int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const [inline]

Definition at line 320 of file colpartition.h.

                                      {
    return TabVector::XAtY(vertical_, sort_key, y);
  }

The documentation for this class was generated from the following files: