Tesseract  3.02
tesseract::ColPartitionSet Class Reference

#include <colpartitionset.h>

Inheritance diagram for tesseract::ColPartitionSet:
ELIST_LINK

List of all members.

Public Member Functions

 ColPartitionSet ()
 ColPartitionSet (ColPartition_LIST *partitions)
 ColPartitionSet (ColPartition *partition)
 ~ColPartitionSet ()
const TBOXbounding_box () const
bool Empty ()
int ColumnCount ()
ColPartitionGetColumnByIndex (int index)
ColPartitionColumnContaining (int x, int y)
void GetColumnBoxes (int y_bottom, int y_top, ColSegment_LIST *segments)
void RelinquishParts ()
void ImproveColumnCandidate (WidthCallback *cb, PartSetVector *src_sets)
void AddToColumnSetsIfUnique (PartSetVector *column_sets, WidthCallback *cb)
bool CompatibleColumns (bool debug, ColPartitionSet *other, WidthCallback *cb)
int UnmatchedWidth (ColPartitionSet *part_set)
bool LegalColumnCandidate ()
ColPartitionSetCopy (bool good_only)
void DisplayColumnEdges (int y_bottom, int y_top, ScrollView *win)
ColumnSpanningType SpanningType (int resolution, int left, int right, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
void ChangeWorkColumns (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
void AccumulateColumnWidthsAndGaps (int *total_width, int *width_samples, int *total_gap, int *gap_samples)
void Print ()

Detailed Description

Definition at line 40 of file colpartitionset.h.


Constructor & Destructor Documentation

tesseract::ColPartitionSet::ColPartitionSet ( ) [inline]

Definition at line 42 of file colpartitionset.h.

                    {
  }
tesseract::ColPartitionSet::ColPartitionSet ( ColPartition_LIST *  partitions) [explicit]

Definition at line 33 of file colpartitionset.cpp.

                                                              {
  ColPartition_IT it(&parts_);
  it.add_list_after(partitions);
  ComputeCoverage();
}
tesseract::ColPartitionSet::ColPartitionSet ( ColPartition partition) [explicit]

Definition at line 39 of file colpartitionset.cpp.

                                                   {
  ColPartition_IT it(&parts_);
  it.add_after_then_move(part);
  ComputeCoverage();
}
tesseract::ColPartitionSet::~ColPartitionSet ( )

Definition at line 45 of file colpartitionset.cpp.

                                  {
}

Member Function Documentation

void tesseract::ColPartitionSet::AccumulateColumnWidthsAndGaps ( int *  total_width,
int *  width_samples,
int *  total_gap,
int *  gap_samples 
)

Definition at line 557 of file colpartitionset.cpp.

                                                                      {
  ColPartition_IT it(&parts_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    *total_width += part->ColumnWidth();
    ++*width_samples;
    if (!it.at_last()) {
      ColPartition* next_part = it.data_relative(1);
      int gap = part->KeyWidth(part->right_key(), next_part->left_key());
      *total_gap += gap;
      ++*gap_samples;
    }
  }
}
void tesseract::ColPartitionSet::AddToColumnSetsIfUnique ( PartSetVector column_sets,
WidthCallback cb 
)

Definition at line 164 of file colpartitionset.cpp.

                                                                 {
  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
                                         bounding_box_.bottom());
  if (debug) {
    tprintf("Considering new column candidate:\n");
    Print();
  }
  if (!LegalColumnCandidate()) {
    if (debug) {
      tprintf("Not a legal column candidate:\n");
      Print();
    }
    delete this;
    return;
  }
  for (int i = 0; i < column_sets->size(); ++i) {
    ColPartitionSet* columns = column_sets->get(i);
    // In ordering the column set candidates, good_coverage_ is king,
    // followed by good_column_count_ and then bad_coverage_.
    bool better = good_coverage_ > columns->good_coverage_;
    if (good_coverage_ == columns->good_coverage_) {
      better = good_column_count_ > columns->good_column_count_;
      if (good_column_count_ == columns->good_column_count_) {
          better = bad_coverage_ > columns->bad_coverage_;
      }
    }
    if (better) {
      // The new one is better so add it.
      if (debug)
        tprintf("Good one\n");
      column_sets->insert(this, i);
      return;
    }
    if (columns->CompatibleColumns(false, this, cb)) {
      if (debug)
        tprintf("Duplicate\n");
      delete this;
      return;  // It is not unique.
    }
  }
  if (debug)
    tprintf("Added to end\n");
  column_sets->push_back(this);
}
const TBOX& tesseract::ColPartitionSet::bounding_box ( ) const [inline]

Definition at line 50 of file colpartitionset.h.

                                   {
    return bounding_box_;
  }
void tesseract::ColPartitionSet::ChangeWorkColumns ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 485 of file colpartitionset.cpp.

                                                                               {
  // Move the input list to a temporary location so we can delete its elements
  // as we add them to the output working_set.
  WorkingPartSet_LIST work_src;
  WorkingPartSet_IT src_it(&work_src);
  src_it.add_list_after(working_set_list);
  src_it.move_to_first();
  WorkingPartSet_IT dest_it(working_set_list);
  // Completed blocks and to_blocks are accumulated and given to the first new
  // one  whenever we keep a column, or at the end.
  BLOCK_LIST completed_blocks;
  TO_BLOCK_LIST to_blocks;
  WorkingPartSet* first_new_set = NULL;
  WorkingPartSet* working_set = NULL;
  ColPartition_IT col_it(&parts_);
  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
    ColPartition* column = col_it.data();
    // Any existing column to the left of column is completed.
    while (!src_it.empty() &&
           ((working_set = src_it.data())->column() == NULL ||
            working_set->column()->right_key() <= column->left_key())) {
      src_it.extract();
      working_set->ExtractCompletedBlocks(bleft, tright, resolution,
                                          used_parts, &completed_blocks,
                                          &to_blocks);
      delete working_set;
      src_it.forward();
    }
    // Make a new between-column WorkingSet for before the current column.
    working_set = new WorkingPartSet(NULL);
    dest_it.add_after_then_move(working_set);
    if (first_new_set == NULL)
      first_new_set = working_set;
    // A matching column gets to stay, and first_new_set gets all the
    // completed_sets.
    working_set = src_it.empty() ? NULL : src_it.data();
    if (working_set != NULL &&
        working_set->column()->MatchingColumns(*column)) {
      working_set->set_column(column);
      dest_it.add_after_then_move(src_it.extract());
      src_it.forward();
      first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
      first_new_set = NULL;
    } else {
      // Just make a new working set for the current column.
      working_set = new WorkingPartSet(column);
      dest_it.add_after_then_move(working_set);
    }
  }
  // Complete any remaining src working sets.
  while (!src_it.empty()) {
    working_set = src_it.extract();
    working_set->ExtractCompletedBlocks(bleft, tright, resolution,
                                        used_parts, &completed_blocks,
                                        &to_blocks);
    delete working_set;
    src_it.forward();
  }
  // Make a new between-column WorkingSet for after the last column.
  working_set = new WorkingPartSet(NULL);
  dest_it.add_after_then_move(working_set);
  if (first_new_set == NULL)
    first_new_set = working_set;
  // The first_new_set now gets any accumulated completed_parts/blocks.
  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
}
ColPartition * tesseract::ColPartitionSet::ColumnContaining ( int  x,
int  y 
)

Definition at line 59 of file colpartitionset.cpp.

                                                            {
  ColPartition_IT it(&parts_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    if (part->ColumnContains(x, y))
      return part;
  }
  return NULL;
}
int tesseract::ColPartitionSet::ColumnCount ( ) [inline]

Definition at line 56 of file colpartitionset.h.

                    {
    return parts_.length();
  }
bool tesseract::ColPartitionSet::CompatibleColumns ( bool  debug,
ColPartitionSet other,
WidthCallback cb 
)

Definition at line 212 of file colpartitionset.cpp.

                                                           {
  if (debug) {
    tprintf("CompatibleColumns testing compatibility\n");
    Print();
    other->Print();
  }
  if (other->parts_.empty()) {
    if (debug)
      tprintf("CompatibleColumns true due to empty other\n");
    return true;
  }
  ColPartition_IT it(&other->parts_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    if (part->blob_type() < BRT_UNKNOWN) {
      if (debug) {
        tprintf("CompatibleColumns ignoring image partition\n");
        part->Print();
      }
      continue;  // Image partitions are irrelevant to column compatibility.
    }
    int y = part->MidY();
    int left = part->bounding_box().left();
    int right = part->bounding_box().right();
    ColPartition* left_col = ColumnContaining(left, y);
    ColPartition* right_col = ColumnContaining(right, y);
    if (right_col == NULL || left_col == NULL) {
      if (debug) {
        tprintf("CompatibleColumns false due to partition edge outside\n");
        part->Print();
      }
      return false;  // A partition edge lies outside of all columns
    }
    if (right_col != left_col && cb->Run(right - left)) {
      if (debug) {
        tprintf("CompatibleColumns false due to good width in multiple cols\n");
        part->Print();
      }
      return false;  // Partition with a good width must be in a single column.
    }

    ColPartition_IT it2= it;
    while (!it2.at_last()) {
      it2.forward();
      ColPartition* next_part = it2.data();
      if (!BLOBNBOX::IsTextType(next_part->blob_type()))
        continue;  // Non-text partitions are irrelevant.
      int next_left = next_part->bounding_box().left();
      if (next_left == right) {
        break;  // They share the same edge, so one must be a pull-out.
      }
      // Search to see if right and next_left fall within a single column.
      ColPartition* next_left_col = ColumnContaining(next_left, y);
      if (right_col == next_left_col) {
        // There is a column break in this column.
        // This can be due to a figure caption within a column, a pull-out
        // block, or a simple broken textline that remains to be merged:
        // all allowed, or a change in column layout: not allowed.
        // If both partitions are of good width, then it is likely
        // a change in column layout, otherwise probably an allowed situation.
        if (part->good_width() && next_part->good_width()) {
          if (debug) {
            int next_right = next_part->bounding_box().right();
            tprintf("CompatibleColumns false due to 2 parts of good width\n");
            tprintf("part1 %d-%d, part2 %d-%d\n",
                    left, right, next_left, next_right);
            right_col->Print();
          }
          return false;
        }
      }
      break;
    }
  }
  if (debug)
    tprintf("CompatibleColumns true!\n");
  return true;
}
ColPartitionSet * tesseract::ColPartitionSet::Copy ( bool  good_only)

Definition at line 343 of file colpartitionset.cpp.

                                                     {
  ColPartition_LIST copy_parts;
  ColPartition_IT src_it(&parts_);
  ColPartition_IT dest_it(&copy_parts);
  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
    ColPartition* part = src_it.data();
    if (BLOBNBOX::IsTextType(part->blob_type()) &&
        (!good_only || part->good_width() || part->good_column()))
      dest_it.add_after_then_move(part->ShallowCopy());
  }
  if (dest_it.empty())
    return NULL;
  return new ColPartitionSet(&copy_parts);
}
void tesseract::ColPartitionSet::DisplayColumnEdges ( int  y_bottom,
int  y_top,
ScrollView win 
)

Definition at line 375 of file colpartitionset.cpp.

                                                          {
  #ifndef GRAPHICS_DISABLED
  ColPartition_IT it(&parts_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
    win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
  }
  #endif  // GRAPHICS_DISABLED
}
bool tesseract::ColPartitionSet::Empty ( ) [inline]

Definition at line 53 of file colpartitionset.h.

               {
    return parts_.empty();
  }
void tesseract::ColPartitionSet::GetColumnBoxes ( int  y_bottom,
int  y_top,
ColSegment_LIST *  segments 
)

Definition at line 359 of file colpartitionset.cpp.

                                                                {
  ColPartition_IT it(&parts_);
  ColSegment_IT col_it(segments);
  col_it.move_to_last();
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
    ICOORD top_right(part->RightAtY(y_bottom), y_top);
    ColSegment *col_seg = new ColSegment();
    col_seg->InsertBox(TBOX(bot_left, top_right));
    col_it.add_after_then_move(col_seg);
  }
}
ColPartition * tesseract::ColPartitionSet::GetColumnByIndex ( int  index)

Definition at line 49 of file colpartitionset.cpp.

                                                         {
  ColPartition_IT it(&parts_);
  it.mark_cycle_pt();
  for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
  if (it.cycled_list())
    return NULL;
  return it.data();
}
void tesseract::ColPartitionSet::ImproveColumnCandidate ( WidthCallback cb,
PartSetVector src_sets 
)

Definition at line 79 of file colpartitionset.cpp.

                                                                      {
  int set_size = src_sets->size();
  // Iterate over the provided column sets, as each one may have something
  // to improve this.
  for (int i = 0; i < set_size; ++i) {
    ColPartitionSet* column_set = src_sets->get(i);
    if (column_set == NULL)
      continue;
    // Iterate over the parts in this and column_set, adding bigger or
    // new parts in column_set to this.
    ColPartition_IT part_it(&parts_);
    ASSERT_HOST(!part_it.empty());
    int prev_right = MIN_INT32;
    part_it.mark_cycle_pt();
    ColPartition_IT col_it(&column_set->parts_);
    for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
      ColPartition* col_part = col_it.data();
      if (col_part->blob_type() < BRT_UNKNOWN)
        continue;  // Ignore image partitions.
      int col_left = col_part->left_key();
      int col_right = col_part->right_key();
      // Sync-up part_it (in this) so it matches the col_part in column_set.
      ColPartition* part = part_it.data();
      while (!part_it.at_last() && part->right_key() < col_left) {
        prev_right = part->right_key();
        part_it.forward();
        part = part_it.data();
      }
      int part_left = part->left_key();
      int part_right = part->right_key();
      if (part_right < col_left || col_right < part_left) {
        // There is no overlap so this is a new partition.
        AddPartition(col_part->ShallowCopy(), &part_it);
        continue;
      }
      // Check the edges of col_part to see if they can improve part.
      bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right));
      if (col_left < part_left && col_left > prev_right) {
        // The left edge of the column is better and it doesn't overlap,
        // so we can potentially expand it.
        int col_box_left = col_part->BoxLeftKey();
        bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right));
        bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right));
        if (tab_width_ok || (!part_width_ok )) {
          // The tab is leaving the good column metric at least as good as
          // it was before, so use the tab.
          part->CopyLeftTab(*col_part, false);
          part->SetColumnGoodness(cb);
        } else if (col_box_left < part_left &&
                   (box_width_ok || !part_width_ok)) {
          // The box is leaving the good column metric at least as good as
          // it was before, so use the box.
          part->CopyLeftTab(*col_part, true);
          part->SetColumnGoodness(cb);
        }
        part_left = part->left_key();
      }
      if (col_right > part_right &&
          (part_it.at_last() ||
           part_it.data_relative(1)->left_key() > col_right)) {
        // The right edge is better, so we can possibly expand it.
        int col_box_right = col_part->BoxRightKey();
        bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right));
        bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right));
        if (tab_width_ok || (!part_width_ok )) {
          // The tab is leaving the good column metric at least as good as
          // it was before, so use the tab.
          part->CopyRightTab(*col_part, false);
          part->SetColumnGoodness(cb);
        } else if (col_box_right > part_right &&
                   (box_width_ok || !part_width_ok)) {
          // The box is leaving the good column metric at least as good as
          // it was before, so use the box.
          part->CopyRightTab(*col_part, true);
          part->SetColumnGoodness(cb);
        }
      }
    }
  }
  ComputeCoverage();
}
bool tesseract::ColPartitionSet::LegalColumnCandidate ( )

Definition at line 320 of file colpartitionset.cpp.

                                           {
  ColPartition_IT it(&parts_);
  if (it.empty())
    return false;
  bool any_text_parts = false;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    if (BLOBNBOX::IsTextType(part->blob_type())) {
      if (!part->IsLegal())
        return false;  // Individual partition is illegal.
      any_text_parts = true;
    }
    if (!it.at_last()) {
      ColPartition* next_part = it.data_relative(1);
      if (next_part->left_key() < part->right_key()) {
        return false;
      }
    }
  }
  return any_text_parts;
}
void tesseract::ColPartitionSet::Print ( )

Definition at line 576 of file colpartitionset.cpp.

                            {
  ColPartition_IT it(&parts_);
  tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
          " (%d,%d)->(%d,%d)\n",
          it.length(), good_column_count_, good_coverage_, bad_coverage_,
          bounding_box_.left(), bounding_box_.bottom(),
          bounding_box_.right(), bounding_box_.top());
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    part->Print();
  }
}
void tesseract::ColPartitionSet::RelinquishParts ( )

Definition at line 70 of file colpartitionset.cpp.

                                      {
  ColPartition_IT it(&parts_);
  while (!it.empty()) {
    it.extract();
    it.forward();
  }
}
ColumnSpanningType tesseract::ColPartitionSet::SpanningType ( int  resolution,
int  left,
int  right,
int  y,
int  left_margin,
int  right_margin,
int *  first_col,
int *  last_col,
int *  first_spanned_col 
)

Definition at line 394 of file colpartitionset.cpp.

                                                                         {
  *first_col = -1;
  *last_col = -1;
  *first_spanned_col = -1;
  int margin_columns = 0;
  ColPartition_IT it(&parts_);
  int col_index = 1;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
    ColPartition* part = it.data();
    if (part->ColumnContains(left, y)) {
      // In the default case, first_col is set, but columns_spanned remains
      // zero, so first_col will get reset in the first column genuinely
      // spanned, but we can tell the difference from a noise partition
      // that touches no column.
      *first_col = col_index;
      if (part->ColumnContains(right, y)) {
        // Both within a single column.
        *last_col = col_index;
        return CST_FLOWING;
      }
      if (left_margin <= part->LeftAtY(y)) {
        // It completely spans this column.
        *first_spanned_col = col_index;
        margin_columns = 1;
      }
    } else if (part->ColumnContains(right, y)) {
      if (*first_col < 0) {
        // It started in-between.
        *first_col = col_index - 1;
      }
      if (right_margin >= part->RightAtY(y)) {
        // It completely spans this column.
        if (margin_columns == 0)
          *first_spanned_col = col_index;
        ++margin_columns;
      }
      *last_col = col_index;
      break;
    } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
      // Neither left nor right are contained within, so it spans this
      // column.
      if (*first_col < 0) {
        // It started in between the previous column and the current column.
        *first_col = col_index - 1;
      }
      if (margin_columns == 0)
        *first_spanned_col = col_index;
      *last_col = col_index;
    } else if (right < part->LeftAtY(y)) {
      // We have gone past the end.
      *last_col = col_index - 1;
      if (*first_col < 0) {
        // It must lie completely between columns =>noise.
        *first_col = col_index - 1;
      }
      break;
    }
  }
  if (*first_col < 0)
    *first_col = col_index - 1;  // The last in-between.
  if (*last_col < 0)
    *last_col = col_index - 1;  // The last in-between.
  ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
  ASSERT_HOST(*first_col <= *last_col);
  if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
    // Neither end was in a column, and it didn't span any, so it lies
    // entirely between columns, therefore noise.
    return CST_NOISE;
  } else if (margin_columns <= 1) {
    // An exception for headings that stick outside of single-column text.
    if (margin_columns == 1 && parts_.singleton()) {
      return CST_HEADING;
    }
    // It is a pullout, as left and right were not in the same column, but
    // it doesn't go to the edge of its start and end.
    return CST_PULLOUT;
  }
  // Its margins went to the edges of first and last columns => heading.
  return CST_HEADING;
}
int tesseract::ColPartitionSet::UnmatchedWidth ( ColPartitionSet part_set)

Definition at line 295 of file colpartitionset.cpp.

                                                             {
  int total_width = 0;
  ColPartition_IT it(&part_set->parts_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    if (!BLOBNBOX::IsTextType(part->blob_type())) {
      continue;  // Non-text partitions are irrelevant to column compatibility.
    }
    int y = part->MidY();
    BLOBNBOX_C_IT box_it(part->boxes());
    for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
      const TBOX& box = it.data()->bounding_box();
      // Assume that the whole blob is outside any column iff its x-middle
      // is outside.
      int x = (box.left() + box.right()) / 2;
      ColPartition* col = ColumnContaining(x, y);
      if (col == NULL)
        total_width += box.width();
    }
  }
  return total_width;
}

The documentation for this class was generated from the following files: