Tesseract
3.02
|
#include <colpartition.h>
Public Member Functions | |
ColPartition () | |
ColPartition (BlobRegionType blob_type, const ICOORD &vertical) | |
~ColPartition () | |
const TBOX & | bounding_box () const |
int | left_margin () const |
void | set_left_margin (int margin) |
int | right_margin () const |
void | set_right_margin (int margin) |
int | median_top () const |
int | median_bottom () const |
int | median_left () const |
int | median_right () const |
int | median_size () const |
void | set_median_size (int size) |
int | median_width () const |
void | set_median_width (int width) |
BlobRegionType | blob_type () const |
void | set_blob_type (BlobRegionType t) |
BlobTextFlowType | flow () const |
void | set_flow (BlobTextFlowType f) |
int | good_blob_score () const |
bool | good_width () const |
bool | good_column () const |
bool | left_key_tab () const |
int | left_key () const |
bool | right_key_tab () const |
int | right_key () const |
PolyBlockType | type () const |
void | set_type (PolyBlockType t) |
BLOBNBOX_CLIST * | boxes () |
int | boxes_count () const |
void | set_vertical (const ICOORD &v) |
ColPartition_CLIST * | upper_partners () |
ColPartition_CLIST * | lower_partners () |
void | set_working_set (WorkingPartSet *working_set) |
bool | block_owned () const |
void | set_block_owned (bool owned) |
bool | desperately_merged () const |
ColPartitionSet * | column_set () const |
void | set_side_step (int step) |
int | bottom_spacing () const |
void | set_bottom_spacing (int spacing) |
int | top_spacing () const |
void | set_top_spacing (int spacing) |
void | set_table_type () |
void | clear_table_type () |
bool | inside_table_column () |
void | set_inside_table_column (bool val) |
ColPartition * | nearest_neighbor_above () const |
void | set_nearest_neighbor_above (ColPartition *part) |
ColPartition * | nearest_neighbor_below () const |
void | set_nearest_neighbor_below (ColPartition *part) |
int | space_above () const |
void | set_space_above (int space) |
int | space_below () const |
void | set_space_below (int space) |
int | space_to_left () const |
void | set_space_to_left (int space) |
int | space_to_right () const |
void | set_space_to_right (int space) |
uinT8 * | color1 () |
uinT8 * | color2 () |
bool | owns_blobs () const |
void | set_owns_blobs (bool owns_blobs) |
int | MidY () const |
int | MedianY () const |
int | MidX () const |
int | SortKey (int x, int y) const |
int | XAtY (int sort_key, int y) const |
int | KeyWidth (int left_key, int right_key) const |
int | ColumnWidth () const |
int | BoxLeftKey () const |
int | BoxRightKey () const |
int | LeftAtY (int y) const |
int | RightAtY (int y) const |
bool | IsLeftOf (const ColPartition &other) const |
bool | ColumnContains (int x, int y) const |
bool | IsEmpty () const |
bool | IsSingleton () const |
bool | HOverlaps (const ColPartition &other) const |
bool | VOverlaps (const ColPartition &other) const |
int | VCoreOverlap (const ColPartition &other) const |
int | HCoreOverlap (const ColPartition &other) const |
bool | VSignificantCoreOverlap (const ColPartition &other) const |
bool | WithinSameMargins (const ColPartition &other) const |
bool | TypesMatch (const ColPartition &other) const |
bool | IsLineType () const |
bool | IsImageType () const |
bool | IsTextType () const |
bool | IsVerticalType () const |
bool | IsHorizontalType () const |
bool | IsUnMergeableType () const |
bool | IsVerticalLine () const |
bool | IsHorizontalLine () const |
void | AddBox (BLOBNBOX *box) |
void | RemoveBox (BLOBNBOX *box) |
BLOBNBOX * | BiggestBox () |
TBOX | BoundsWithoutBox (BLOBNBOX *box) |
void | ClaimBoxes () |
void | DisownBoxes () |
void | DeleteBoxes () |
void | ReflectInYAxis () |
bool | IsLegal () |
bool | MatchingColumns (const ColPartition &other) const |
bool | MatchingTextColor (const ColPartition &other) const |
bool | MatchingSizes (const ColPartition &other) const |
bool | ConfirmNoTabViolation (const ColPartition &other) const |
bool | MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const |
bool | OKDiacriticMerge (const ColPartition &candidate, bool debug) const |
void | SetLeftTab (const TabVector *tab_vector) |
void | SetRightTab (const TabVector *tab_vector) |
void | CopyLeftTab (const ColPartition &src, bool take_box) |
void | CopyRightTab (const ColPartition &src, bool take_box) |
int | LeftBlobRule () const |
int | RightBlobRule () const |
float | SpecialBlobsDensity (const BlobSpecialTextType type) const |
int | SpecialBlobsCount (const BlobSpecialTextType type) |
void | SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density) |
void | ComputeSpecialBlobsDensity () |
void | AddPartner (bool upper, ColPartition *partner) |
void | RemovePartner (bool upper, ColPartition *partner) |
ColPartition * | SingletonPartner (bool upper) |
void | Absorb (ColPartition *other, WidthCallback *cb) |
bool | OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug) |
BLOBNBOX * | OverlapSplitBlob (const TBOX &box) |
ColPartition * | SplitAtBlob (BLOBNBOX *split_blob) |
ColPartition * | SplitAt (int split_x) |
void | ComputeLimits () |
int | CountOverlappingBoxes (const TBOX &box) |
void | SetPartitionType (int resolution, ColPartitionSet *columns) |
PolyBlockType | PartitionType (ColumnSpanningType flow) const |
void | ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col) |
void | SetColumnGoodness (WidthCallback *cb) |
bool | MarkAsLeaderIfMonospaced () |
void | SetRegionAndFlowTypesFromProjectionValue (int value) |
void | SetBlobTypes () |
bool | HasGoodBaseline () |
void | AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set) |
ColPartition * | ShallowCopy () const |
ColPartition * | CopyButDontOwnBlobs () |
ScrollView::Color | BoxColor () const |
void | Print () const |
void | PrintColors () |
void | SmoothPartnerRun (int working_set_count) |
void | RefinePartners (PolyBlockType type, bool get_desparate, ColPartitionGrid *grid) |
bool | IsInSameColumnAs (const ColPartition &part) const |
void | set_first_column (int column) |
void | set_last_column (int column) |
Static Public Member Functions | |
static ColPartition * | MakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top) |
static ColPartition * | FakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow) |
static ColPartition * | MakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list) |
static bool | TypesMatch (BlobRegionType type1, BlobRegionType type2) |
static bool | TypesSimilar (PolyBlockType type1, PolyBlockType type2) |
static void | LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks) |
static TO_BLOCK * | MakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) |
static TO_BLOCK * | MakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) |
ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.
Definition at line 67 of file colpartition.h.
tesseract::ColPartition::ColPartition | ( | ) | [inline] |
Definition at line 69 of file colpartition.h.
{ // This empty constructor is here only so that the class can be ELISTIZED. // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier // and eliminate CLASSNAME##_copier. }
tesseract::ColPartition::ColPartition | ( | BlobRegionType | blob_type, |
const ICOORD & | vertical | ||
) |
blob_type | is the blob_region_type_ of the blobs in this partition. |
vertical | is the direction of logical vertical on the possibly skewed image. |
Definition at line 84 of file colpartition.cpp.
: left_margin_(-MAX_INT32), right_margin_(MAX_INT32), median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0), median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0), blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0), good_width_(false), good_column_(false), left_key_tab_(false), right_key_tab_(false), left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical), working_set_(NULL), last_add_was_vertical_(false), block_owned_(false), desperately_merged_(false), first_column_(-1), last_column_(-1), column_set_(NULL), side_step_(0), top_spacing_(0), bottom_spacing_(0), type_before_table_(PT_UNKNOWN), inside_table_column_(false), nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL), space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0), owns_blobs_(true) { memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); }
tesseract::ColPartition::~ColPartition | ( | ) |
Definition at line 146 of file colpartition.cpp.
{ // Remove this as a partner of all partners, as we don't want them // referring to a deleted object. ColPartition_C_IT it(&upper_partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->RemovePartner(false, this); } it.set_to_list(&lower_partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->RemovePartner(true, this); } }
void tesseract::ColPartition::Absorb | ( | ColPartition * | other, |
WidthCallback * | cb | ||
) |
Definition at line 617 of file colpartition.cpp.
{ // The result has to either own all of the blobs or none of them. // Verify the flag is consisent. ASSERT_HOST(owns_blobs() == other->owns_blobs()); // TODO(nbeato): check owns_blobs better. Right now owns_blobs // should always be true when this is called. So there is no issues. if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()) || TabFind::WithinTestRegion(2, other->bounding_box_.left(), other->bounding_box_.bottom())) { tprintf("Merging:"); Print(); other->Print(); } // Update the special_blobs_densities_. memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); for (int type = 0; type < BSTT_COUNT; ++type) { int w1 = boxes_.length(), w2 = other->boxes_.length(); float new_val = special_blobs_densities_[type] * w1 + other->special_blobs_densities_[type] * w2; if (!w1 || !w2) { special_blobs_densities_[type] = new_val / (w1 + w2); } } // Merge the two sorted lists. BLOBNBOX_C_IT it(&boxes_); BLOBNBOX_C_IT it2(&other->boxes_); for (; !it2.empty(); it2.forward()) { BLOBNBOX* bbox2 = it2.extract(); ColPartition* prev_owner = bbox2->owner(); if (prev_owner != other && prev_owner != NULL) { // A blob on other's list is owned by someone else; let them have it. continue; } ASSERT_HOST(prev_owner == other || prev_owner == NULL); if (prev_owner == other) bbox2->set_owner(this); it.add_to_end(bbox2); } left_margin_ = MIN(left_margin_, other->left_margin_); right_margin_ = MAX(right_margin_, other->right_margin_); if (other->left_key_ < left_key_) { left_key_ = other->left_key_; left_key_tab_ = other->left_key_tab_; } if (other->right_key_ > right_key_) { right_key_ = other->right_key_; right_key_tab_ = other->right_key_tab_; } // Combine the flow and blob_type in a sensible way. // Dominant flows stay. if (!DominatesInMerge(flow_, other->flow_)) { flow_ = other->flow_; blob_type_ = other->blob_type_; } SetBlobTypes(); if (IsVerticalType()) { boxes_.sort(SortByBoxBottom<BLOBNBOX>); last_add_was_vertical_ = true; } else { boxes_.sort(SortByBoxLeft<BLOBNBOX>); last_add_was_vertical_ = false; } ComputeLimits(); // Fix partner lists. other is going away, so remove it as a // partner of all its partners and add this in its place. for (int upper = 0; upper < 2; ++upper) { ColPartition_CLIST partners; ColPartition_C_IT part_it(&partners); part_it.add_list_after(upper ? &other->upper_partners_ : &other->lower_partners_); for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { ColPartition* partner = part_it.extract(); partner->RemovePartner(!upper, other); partner->RemovePartner(!upper, this); partner->AddPartner(!upper, this); } } delete other; if (cb != NULL) { SetColumnGoodness(cb); } }
void tesseract::ColPartition::AddBox | ( | BLOBNBOX * | box | ) |
Definition at line 180 of file colpartition.cpp.
{ TBOX box = bbox->bounding_box(); // Update the partition limits. if (boxes_.length() == 0) { bounding_box_ = box; } else { bounding_box_ += box; } if (IsVerticalType()) { if (!last_add_was_vertical_) { boxes_.sort(SortByBoxBottom<BLOBNBOX>); last_add_was_vertical_ = true; } boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox); } else { if (last_add_was_vertical_) { boxes_.sort(SortByBoxLeft<BLOBNBOX>); last_add_was_vertical_ = false; } boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox); } if (!left_key_tab_) left_key_ = BoxLeftKey(); if (!right_key_tab_) right_key_ = BoxRightKey(); if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", box.left(), box.bottom(), box.right(), box.top(), bounding_box_.left(), bounding_box_.right()); }
void tesseract::ColPartition::AddPartner | ( | bool | upper, |
ColPartition * | partner | ||
) |
Definition at line 582 of file colpartition.cpp.
{ if (upper) { partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, this); upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner); } else { partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, this); lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner); } }
void tesseract::ColPartition::AddToWorkingSet | ( | const ICOORD & | bleft, |
const ICOORD & | tright, | ||
int | resolution, | ||
ColPartition_LIST * | used_parts, | ||
WorkingPartSet_LIST * | working_set | ||
) |
Definition at line 1313 of file colpartition.cpp.
{ if (block_owned_) return; // Done it already. block_owned_ = true; WorkingPartSet_IT it(working_sets); // If there is an upper partner use its working_set_ directly. ColPartition* partner = SingletonPartner(true); if (partner != NULL && partner->working_set_ != NULL) { working_set_ = partner->working_set_; working_set_->AddPartition(this); return; } if (partner != NULL && textord_debug_bugs) { tprintf("Partition with partner has no working set!:"); Print(); partner->Print(); } // Search for the column that the left edge fits in. WorkingPartSet* work_set = NULL; it.move_to_first(); int col_index = 0; for (it.mark_cycle_pt(); !it.cycled_list() && col_index != first_column_; it.forward(), ++col_index); if (textord_debug_tabfind >= 2) { tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between"); Print(); } if (it.cycled_list() && textord_debug_bugs) { tprintf("Target column=%d, only had %d\n", first_column_, col_index); } ASSERT_HOST(!it.cycled_list()); work_set = it.data(); // If last_column_ != first_column, then we need to scoop up all blocks // between here and the last_column_ and put back in work_set. if (!it.cycled_list() && last_column_ != first_column_) { // Find the column that the right edge falls in. BLOCK_LIST completed_blocks; TO_BLOCK_LIST to_blocks; for (; !it.cycled_list() && col_index <= last_column_; it.forward(), ++col_index) { WorkingPartSet* end_set = it.data(); end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks, &to_blocks); } work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); } working_set_ = work_set; work_set->AddPartition(this); }
BLOBNBOX * tesseract::ColPartition::BiggestBox | ( | ) |
Definition at line 226 of file colpartition.cpp.
{ BLOBNBOX* biggest = NULL; BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bbox = bb_it.data(); if (IsVerticalType()) { if (biggest == NULL || bbox->bounding_box().width() > biggest->bounding_box().width()) biggest = bbox; } else { if (biggest == NULL || bbox->bounding_box().height() > biggest->bounding_box().height()) biggest = bbox; } } return biggest; }
BlobRegionType tesseract::ColPartition::blob_type | ( | ) | const [inline] |
Definition at line 148 of file colpartition.h.
{
return blob_type_;
}
bool tesseract::ColPartition::block_owned | ( | ) | const [inline] |
Definition at line 205 of file colpartition.h.
{
return block_owned_;
}
int tesseract::ColPartition::bottom_spacing | ( | ) | const [inline] |
Definition at line 220 of file colpartition.h.
{
return bottom_spacing_;
}
const TBOX& tesseract::ColPartition::bounding_box | ( | ) | const [inline] |
Definition at line 109 of file colpartition.h.
{
return bounding_box_;
}
Definition at line 245 of file colpartition.cpp.
{ TBOX result; BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { if (box != bb_it.data()) { result += bb_it.data()->bounding_box(); } } return result; }
ScrollView::Color tesseract::ColPartition::BoxColor | ( | ) | const |
Definition at line 1724 of file colpartition.cpp.
{ if (type_ == PT_UNKNOWN) return BLOBNBOX::TextlineColor(blob_type_, flow_); return POLY_BLOCK::ColorForPolyBlockType(type_); }
BLOBNBOX_CLIST* tesseract::ColPartition::boxes | ( | ) | [inline] |
Definition at line 187 of file colpartition.h.
{
return &boxes_;
}
int tesseract::ColPartition::boxes_count | ( | ) | const [inline] |
Definition at line 190 of file colpartition.h.
{
return boxes_.length();
}
int tesseract::ColPartition::BoxLeftKey | ( | ) | const [inline] |
Definition at line 332 of file colpartition.h.
int tesseract::ColPartition::BoxRightKey | ( | ) | const [inline] |
Definition at line 336 of file colpartition.h.
void tesseract::ColPartition::ClaimBoxes | ( | ) |
Definition at line 258 of file colpartition.cpp.
{ BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); ColPartition* other = bblob->owner(); if (other == NULL) { // Normal case: ownership is available. bblob->set_owner(this); } else { ASSERT_HOST(other == this); } } }
void tesseract::ColPartition::clear_table_type | ( | ) | [inline] |
Definition at line 239 of file colpartition.h.
{ if (type_ == PT_TABLE) type_ = type_before_table_; }
uinT8* tesseract::ColPartition::color1 | ( | ) | [inline] |
Definition at line 285 of file colpartition.h.
{
return color1_;
}
uinT8* tesseract::ColPartition::color2 | ( | ) | [inline] |
Definition at line 288 of file colpartition.h.
{
return color2_;
}
ColPartitionSet* tesseract::ColPartition::column_set | ( | ) | const [inline] |
Definition at line 214 of file colpartition.h.
{
return column_set_;
}
bool tesseract::ColPartition::ColumnContains | ( | int | x, |
int | y | ||
) | const [inline] |
Definition at line 353 of file colpartition.h.
void tesseract::ColPartition::ColumnRange | ( | int | resolution, |
ColPartitionSet * | columns, | ||
int * | first_col, | ||
int * | last_col | ||
) |
Definition at line 1028 of file colpartition.cpp.
{ int first_spanned_col = -1; ColumnSpanningType span_type = columns->SpanningType(resolution, bounding_box_.left(), bounding_box_.right(), MidY(), left_margin_, right_margin_, first_col, last_col, &first_spanned_col); type_ = PartitionType(span_type); }
int tesseract::ColPartition::ColumnWidth | ( | ) | const [inline] |
Definition at line 328 of file colpartition.h.
{ return KeyWidth(left_key_, right_key_); }
void tesseract::ColPartition::ComputeLimits | ( | ) |
Definition at line 834 of file colpartition.cpp.
{ bounding_box_ = TBOX(); // Clear it BLOBNBOX_C_IT it(&boxes_); BLOBNBOX* bbox = NULL; int non_leader_count = 0; if (it.empty()) { bounding_box_.set_left(left_margin_); bounding_box_.set_right(right_margin_); bounding_box_.set_bottom(0); bounding_box_.set_top(0); } else { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); bounding_box_ += bbox->bounding_box(); if (bbox->flow() != BTFT_LEADER) ++non_leader_count; } } if (!left_key_tab_) left_key_ = BoxLeftKey(); if (left_key_ > BoxLeftKey() && textord_debug_bugs) { // TODO(rays) investigate the causes of these error messages, to find // out if they are genuinely harmful, or just indicative of junk input. tprintf("Computed left-illegal partition\n"); Print(); } if (!right_key_tab_) right_key_ = BoxRightKey(); if (right_key_ < BoxRightKey() && textord_debug_bugs) { tprintf("Computed right-illegal partition\n"); Print(); } if (it.empty()) return; if (IsImageType() || blob_type() == BRT_RECTIMAGE || blob_type() == BRT_POLYIMAGE) { median_top_ = bounding_box_.top(); median_bottom_ = bounding_box_.bottom(); median_size_ = bounding_box_.height(); median_left_ = bounding_box_.left(); median_right_ = bounding_box_.right(); median_width_ = bounding_box_.width(); } else { STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); STATS size_stats(0, bounding_box_.height() + 1); STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1); STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1); STATS width_stats(0, bounding_box_.width() + 1); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) { TBOX box = bbox->bounding_box(); int area = box.area(); top_stats.add(box.top(), area); bottom_stats.add(box.bottom(), area); size_stats.add(box.height(), area); left_stats.add(box.left(), area); right_stats.add(box.right(), area); width_stats.add(box.width(), area); } } median_top_ = static_cast<int>(top_stats.median() + 0.5); median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5); median_size_ = static_cast<int>(size_stats.median() + 0.5); median_left_ = static_cast<int>(left_stats.median() + 0.5); median_right_ = static_cast<int>(right_stats.median() + 0.5); median_width_ = static_cast<int>(width_stats.median() + 0.5); } if (right_margin_ < bounding_box_.right() && textord_debug_bugs) { tprintf("Made partition with bad right coords"); Print(); } if (left_margin_ > bounding_box_.left() && textord_debug_bugs) { tprintf("Made partition with bad left coords"); Print(); } // Fix partner lists. The bounding box has changed and partners are stored // in bounding box order, so remove and reinsert this as a partner // of all its partners. for (int upper = 0; upper < 2; ++upper) { ColPartition_CLIST partners; ColPartition_C_IT part_it(&partners); part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_); for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { ColPartition* partner = part_it.extract(); partner->RemovePartner(!upper, this); partner->AddPartner(!upper, this); } } if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { tprintf("Recomputed box for partition %p\n", this); Print(); } }
void tesseract::ColPartition::ComputeSpecialBlobsDensity | ( | ) |
Definition at line 561 of file colpartition.cpp.
{ memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); if (boxes_.empty()) { return; } BLOBNBOX_C_IT blob_it(&boxes_); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); BlobSpecialTextType type = blob->special_text_type(); special_blobs_densities_[type]++; } for (int type = 0; type < BSTT_COUNT; ++type) { special_blobs_densities_[type] /= boxes_.length(); } }
bool tesseract::ColPartition::ConfirmNoTabViolation | ( | const ColPartition & | other | ) | const |
Definition at line 392 of file colpartition.cpp.
{ if (bounding_box_.right() < other.bounding_box_.left() && bounding_box_.right() < other.LeftBlobRule()) return false; if (other.bounding_box_.right() < bounding_box_.left() && other.bounding_box_.right() < LeftBlobRule()) return false; if (bounding_box_.left() > other.bounding_box_.right() && bounding_box_.left() > other.RightBlobRule()) return false; if (other.bounding_box_.left() > bounding_box_.right() && other.bounding_box_.left() > RightBlobRule()) return false; return true; }
ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs | ( | ) |
Definition at line 1711 of file colpartition.cpp.
{ ColPartition* copy = ShallowCopy(); copy->set_owns_blobs(false); BLOBNBOX_C_IT inserter(copy->boxes()); BLOBNBOX_C_IT traverser(boxes()); for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) inserter.add_after_then_move(traverser.data()); return copy; }
void tesseract::ColPartition::CopyLeftTab | ( | const ColPartition & | src, |
bool | take_box | ||
) |
Definition at line 498 of file colpartition.cpp.
{ left_key_tab_ = take_box ? false : src.left_key_tab_; if (left_key_tab_) { left_key_ = src.left_key_; } else { bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY())); left_key_ = BoxLeftKey(); } if (left_margin_ > bounding_box_.left()) left_margin_ = src.left_margin_; }
void tesseract::ColPartition::CopyRightTab | ( | const ColPartition & | src, |
bool | take_box | ||
) |
Definition at line 511 of file colpartition.cpp.
{ right_key_tab_ = take_box ? false : src.right_key_tab_; if (right_key_tab_) { right_key_ = src.right_key_; } else { bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY())); right_key_ = BoxRightKey(); } if (right_margin_ < bounding_box_.right()) right_margin_ = src.right_margin_; }
int tesseract::ColPartition::CountOverlappingBoxes | ( | const TBOX & | box | ) |
Definition at line 933 of file colpartition.cpp.
{ BLOBNBOX_C_IT it(&boxes_); int overlap_count = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); if (box.overlap(bbox->bounding_box())) ++overlap_count; } return overlap_count; }
void tesseract::ColPartition::DeleteBoxes | ( | ) |
Definition at line 284 of file colpartition.cpp.
{ // Although the boxes_ list is a C_LIST, in some cases it owns the // BLOBNBOXes, as the ColPartition takes ownership from the grid, // and the BLOBNBOXes own the underlying C_BLOBs. for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.extract(); delete bblob->cblob(); delete bblob; } }
bool tesseract::ColPartition::desperately_merged | ( | ) | const [inline] |
Definition at line 211 of file colpartition.h.
{
return desperately_merged_;
}
void tesseract::ColPartition::DisownBoxes | ( | ) |
Definition at line 274 of file colpartition.cpp.
ColPartition * tesseract::ColPartition::FakePartition | ( | const TBOX & | box, |
PolyBlockType | block_type, | ||
BlobRegionType | blob_type, | ||
BlobTextFlowType | flow | ||
) | [static] |
Definition at line 108 of file colpartition.cpp.
{ ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1)); part->set_type(block_type); part->set_flow(flow); part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box))); part->set_left_margin(box.left()); part->set_right_margin(box.right()); part->SetBlobTypes(); part->ComputeLimits(); part->ClaimBoxes(); return part; }
BlobTextFlowType tesseract::ColPartition::flow | ( | ) | const [inline] |
Definition at line 154 of file colpartition.h.
{
return flow_;
}
int tesseract::ColPartition::good_blob_score | ( | ) | const [inline] |
Definition at line 160 of file colpartition.h.
{
return good_blob_score_;
}
bool tesseract::ColPartition::good_column | ( | ) | const [inline] |
Definition at line 166 of file colpartition.h.
{
return good_column_;
}
bool tesseract::ColPartition::good_width | ( | ) | const [inline] |
Definition at line 163 of file colpartition.h.
{
return good_width_;
}
bool tesseract::ColPartition::HasGoodBaseline | ( | ) |
Definition at line 1248 of file colpartition.cpp.
{ // Approximation of the baseline. DetLineFit linepoints; // Calculation of the mean height on this line segment. Note that these // variable names apply to the context of a horizontal line, and work // analogously, rather than literally in the case of a vertical line. int total_height = 0; int coverage = 0; int height_count = 0; int width = 0; BLOBNBOX_C_IT it(&boxes_); TBOX box(it.data()->bounding_box()); // Accumulate points representing the baseline at the middle of each blob, // but add an additional point for each end of the line. This makes it // harder to fit a severe skew angle, as it is most likely not right. if (IsVerticalType()) { // For a vertical line, use the right side as the baseline. ICOORD first_pt(box.right(), box.bottom()); // Use the bottom-right of the first (bottom) box, the top-right of the // last, and the middle-right of all others. linepoints.Add(first_pt); for (it.forward(); !it.at_last(); it.forward()) { BLOBNBOX* blob = it.data(); box = blob->bounding_box(); ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2); linepoints.Add(box_pt); total_height += box.width(); coverage += box.height(); ++height_count; } box = it.data()->bounding_box(); ICOORD last_pt(box.right(), box.top()); linepoints.Add(last_pt); width = last_pt.y() - first_pt.y(); } else { // Horizontal lines use the bottom as the baseline. TBOX box(it.data()->bounding_box()); // Use the bottom-left of the first box, the the bottom-right of the last, // and the middle of all others. ICOORD first_pt(box.left(), box.bottom()); linepoints.Add(first_pt); for (it.forward(); !it.at_last(); it.forward()) { BLOBNBOX* blob = it.data(); box = blob->bounding_box(); ICOORD box_pt((box.left() + box.right()) / 2, box.bottom()); linepoints.Add(box_pt); total_height += box.height(); coverage += box.width(); ++height_count; } box = it.data()->bounding_box(); ICOORD last_pt(box.right(), box.bottom()); linepoints.Add(last_pt); width = last_pt.x() - first_pt.x(); } // Maximum median error allowed to be a good text line. double max_error = kMaxBaselineError * total_height / height_count; ICOORD start_pt, end_pt; double error = linepoints.Fit(&start_pt, &end_pt); return error < max_error && coverage >= kMinBaselineCoverage * width; }
int tesseract::ColPartition::HCoreOverlap | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 381 of file colpartition.h.
bool tesseract::ColPartition::HOverlaps | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 365 of file colpartition.h.
{ return bounding_box_.x_overlap(other.bounding_box_); }
bool tesseract::ColPartition::inside_table_column | ( | ) | [inline] |
Definition at line 243 of file colpartition.h.
{
return inside_table_column_;
}
bool tesseract::ColPartition::IsEmpty | ( | ) | const [inline] |
Definition at line 357 of file colpartition.h.
{
return boxes_.empty();
}
bool tesseract::ColPartition::IsHorizontalLine | ( | ) | const [inline] |
Definition at line 449 of file colpartition.h.
{ return IsHorizontalType() && IsLineType(); }
bool tesseract::ColPartition::IsHorizontalType | ( | ) | const [inline] |
Definition at line 435 of file colpartition.h.
bool tesseract::ColPartition::IsImageType | ( | ) | const [inline] |
Definition at line 423 of file colpartition.h.
{ return PTIsImageType(type_); }
bool tesseract::ColPartition::IsInSameColumnAs | ( | const ColPartition & | part | ) | const |
Definition at line 2128 of file colpartition.cpp.
{ // Overlap does not occur when last < part.first or first > part.last. // In other words, one is completely to the side of the other. // This is just DeMorgan's law applied to that so the function returns true. return (last_column_ >= part.first_column_) && (first_column_ <= part.last_column_); }
bool tesseract::ColPartition::IsLeftOf | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 349 of file colpartition.h.
{ return bounding_box_.right() < other.bounding_box_.right(); }
bool tesseract::ColPartition::IsLegal | ( | ) |
Definition at line 321 of file colpartition.cpp.
{ if (bounding_box_.left() > bounding_box_.right()) { if (textord_debug_bugs) { tprintf("Bounding box invalid\n"); Print(); } return false; // Bounding box invalid. } if (left_margin_ > bounding_box_.left() || right_margin_ < bounding_box_.right()) { if (textord_debug_bugs) { tprintf("Margins invalid\n"); Print(); } return false; // Margins invalid. } if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) { if (textord_debug_bugs) { tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(), right_key_, BoxRightKey()); Print(); } return false; // Keys inside the box. } return true; }
bool tesseract::ColPartition::IsLineType | ( | ) | const [inline] |
Definition at line 419 of file colpartition.h.
{ return PTIsLineType(type_); }
bool tesseract::ColPartition::IsSingleton | ( | ) | const [inline] |
Definition at line 361 of file colpartition.h.
{
return boxes_.singleton();
}
bool tesseract::ColPartition::IsTextType | ( | ) | const [inline] |
Definition at line 427 of file colpartition.h.
{ return PTIsTextType(type_); }
bool tesseract::ColPartition::IsUnMergeableType | ( | ) | const [inline] |
Definition at line 439 of file colpartition.h.
{ return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE; }
bool tesseract::ColPartition::IsVerticalLine | ( | ) | const [inline] |
Definition at line 444 of file colpartition.h.
{ return IsVerticalType() && IsLineType(); }
bool tesseract::ColPartition::IsVerticalType | ( | ) | const [inline] |
Definition at line 431 of file colpartition.h.
{ return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE; }
int tesseract::ColPartition::KeyWidth | ( | int | left_key, |
int | right_key | ||
) | const [inline] |
Definition at line 324 of file colpartition.h.
int tesseract::ColPartition::left_key | ( | ) | const [inline] |
Definition at line 172 of file colpartition.h.
{
return left_key_;
}
bool tesseract::ColPartition::left_key_tab | ( | ) | const [inline] |
Definition at line 169 of file colpartition.h.
{
return left_key_tab_;
}
int tesseract::ColPartition::left_margin | ( | ) | const [inline] |
Definition at line 112 of file colpartition.h.
{
return left_margin_;
}
int tesseract::ColPartition::LeftAtY | ( | int | y | ) | const [inline] |
Definition at line 340 of file colpartition.h.
{ return XAtY(left_key_, y); }
int tesseract::ColPartition::LeftBlobRule | ( | ) | const |
Definition at line 524 of file colpartition.cpp.
{
BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
return it.data()->left_rule();
}
void tesseract::ColPartition::LineSpacingBlocks | ( | const ICOORD & | bleft, |
const ICOORD & | tright, | ||
int | resolution, | ||
ColPartition_LIST * | block_parts, | ||
ColPartition_LIST * | used_parts, | ||
BLOCK_LIST * | completed_blocks, | ||
TO_BLOCK_LIST * | to_blocks | ||
) | [static] |
Definition at line 1373 of file colpartition.cpp.
{ int page_height = tright.y() - bleft.y(); // Compute the initial spacing stats. ColPartition_IT it(block_parts); int part_count = 0; int max_line_height = 0; // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type // because their line spacing with their neighbors maybe smaller and their // height may be slightly larger. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* part = it.data(); ASSERT_HOST(!part->boxes()->empty()); STATS side_steps(0, part->bounding_box().height()); if (part->bounding_box().height() > max_line_height) max_line_height = part->bounding_box().height(); BLOBNBOX_C_IT blob_it(part->boxes()); int prev_bottom = blob_it.data()->bounding_box().bottom(); for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); int bottom = blob->bounding_box().bottom(); int step = bottom - prev_bottom; if (step < 0) step = -step; side_steps.add(step, 1); prev_bottom = bottom; } part->set_side_step(static_cast<int>(side_steps.median() + 0.5)); if (!it.at_last()) { ColPartition* next_part = it.data_relative(1); part->set_bottom_spacing(part->median_bottom() - next_part->median_bottom()); part->set_top_spacing(part->median_top() - next_part->median_top()); } else { part->set_bottom_spacing(page_height); part->set_top_spacing(page_height); } if (textord_debug_tabfind) { part->Print(); tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", side_steps.median(), part->top_spacing(), part->bottom_spacing()); } ++part_count; } if (part_count == 0) return; SmoothSpacings(resolution, page_height, block_parts); // Move the partitions into individual block lists and make the blocks. BLOCK_IT block_it(completed_blocks); TO_BLOCK_IT to_block_it(to_blocks); ColPartition_LIST spacing_parts; ColPartition_IT sp_block_it(&spacing_parts); int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing; for (it.mark_cycle_pt(); !it.empty();) { ColPartition* part = it.extract(); sp_block_it.add_to_end(part); it.forward(); if (it.empty() || part->bottom_spacing() > same_block_threshold || !part->SpacingsEqual(*it.data(), resolution)) { // There is a spacing boundary. Check to see if it.data() belongs // better in the current block or the next one. if (!it.empty() && part->bottom_spacing() <= same_block_threshold) { ColPartition* next_part = it.data(); // If there is a size match one-way, then the middle line goes with // its matched size, otherwise it goes with the smallest spacing. ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1); if (textord_debug_tabfind) { tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d," " sizes %d %d %d\n", part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(), part->median_size(), next_part->median_size(), third_part != NULL ? third_part->median_size() : 0); } // We can only consider adding the next line to the block if the sizes // match and the lines are close enough for their size. if (part->SizesSimilar(*next_part) && next_part->median_size() * kMaxSameBlockLineSpacing > part->bottom_spacing() && part->median_size() * kMaxSameBlockLineSpacing > part->top_spacing()) { // Even now, we can only add it as long as the third line doesn't // match in the same way and have a smaller bottom spacing. if (third_part == NULL || !next_part->SizesSimilar(*third_part) || third_part->median_size() * kMaxSameBlockLineSpacing <= next_part->bottom_spacing() || next_part->median_size() * kMaxSameBlockLineSpacing <= next_part->top_spacing() || next_part->bottom_spacing() > part->bottom_spacing()) { // Add to the current block. sp_block_it.add_to_end(it.extract()); it.forward(); if (textord_debug_tabfind) { tprintf("Added line to current block.\n"); } } } } TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts); if (to_block != NULL) { to_block_it.add_to_end(to_block); block_it.add_to_end(to_block->block); } sp_block_it.set_to_list(&spacing_parts); } else { if (textord_debug_tabfind && !it.empty()) { ColPartition* next_part = it.data(); tprintf("Spacings equal: upper:%d/%d, lower:%d/%d\n", part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(), part->median_size(), next_part->median_size()); } } } }
ColPartition_CLIST* tesseract::ColPartition::lower_partners | ( | ) | [inline] |
Definition at line 199 of file colpartition.h.
{
return &lower_partners_;
}
ColPartition * tesseract::ColPartition::MakeBigPartition | ( | BLOBNBOX * | box, |
ColPartition_LIST * | big_part_list | ||
) | [static] |
Definition at line 129 of file colpartition.cpp.
{ box->set_owner(NULL); ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); single->set_flow(BTFT_NONE); single->AddBox(box); single->ComputeLimits(); single->ClaimBoxes(); single->SetBlobTypes(); single->set_block_owned(true); if (big_part_list != NULL) { ColPartition_IT part_it(big_part_list); part_it.add_to_end(single); } return single; }
TO_BLOCK * tesseract::ColPartition::MakeBlock | ( | const ICOORD & | bleft, |
const ICOORD & | tright, | ||
ColPartition_LIST * | block_parts, | ||
ColPartition_LIST * | used_parts | ||
) | [static] |
Definition at line 1605 of file colpartition.cpp.
{ if (block_parts->empty()) return NULL; // Nothing to do. ColPartition_IT it(block_parts); ColPartition* part = it.data(); PolyBlockType type = part->type(); if (type == PT_VERTICAL_TEXT) return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts); // LineSpacingBlocks has handed us a collection of evenly spaced lines and // put the average spacing in each partition, so we can just take the // linespacing from the first partition. int line_spacing = part->bottom_spacing(); if (line_spacing < part->median_size()) line_spacing = part->bounding_box().height(); ICOORDELT_LIST vertices; ICOORDELT_IT vert_it(&vertices); ICOORD start, end; int min_x = MAX_INT32; int max_x = -MAX_INT32; int min_y = MAX_INT32; int max_y = -MAX_INT32; int iteration = 0; do { if (iteration == 0) ColPartition::LeftEdgeRun(&it, &start, &end); else ColPartition::RightEdgeRun(&it, &start, &end); ClipCoord(bleft, tright, &start); ClipCoord(bleft, tright, &end); vert_it.add_after_then_move(new ICOORDELT(start)); vert_it.add_after_then_move(new ICOORDELT(end)); UpdateRange(start.x(), &min_x, &max_x); UpdateRange(end.x(), &min_x, &max_x); UpdateRange(start.y(), &min_y, &max_y); UpdateRange(end.y(), &min_y, &max_y); if ((iteration == 0 && it.at_first()) || (iteration == 1 && it.at_last())) { ++iteration; it.move_to_last(); } } while (iteration < 2); if (textord_debug_tabfind) tprintf("Making block at (%d,%d)->(%d,%d)\n", min_x, min_y, max_x, max_y); BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y); block->set_poly_block(new POLY_BLOCK(&vertices, type)); return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts); }
ColPartition * tesseract::ColPartition::MakeLinePartition | ( | BlobRegionType | blob_type, |
const ICOORD & | vertical, | ||
int | left, | ||
int | bottom, | ||
int | right, | ||
int | top | ||
) | [static] |
Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.
Definition at line 161 of file colpartition.cpp.
{ ColPartition* part = new ColPartition(blob_type, vertical); part->bounding_box_ = TBOX(left, bottom, right, top); part->median_bottom_ = bottom; part->median_top_ = top; part->median_size_ = top - bottom; part->median_width_ = right - left; part->left_key_ = part->BoxLeftKey(); part->right_key_ = part->BoxRightKey(); return part; }
TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock | ( | const ICOORD & | bleft, |
const ICOORD & | tright, | ||
ColPartition_LIST * | block_parts, | ||
ColPartition_LIST * | used_parts | ||
) | [static] |
Definition at line 1658 of file colpartition.cpp.
{ if (block_parts->empty()) return NULL; // Nothing to do. ColPartition_IT it(block_parts); ColPartition* part = it.data(); TBOX block_box = part->bounding_box(); int line_spacing = block_box.width(); PolyBlockType type = it.data()->type(); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { block_box += it.data()->bounding_box(); } if (textord_debug_tabfind) { tprintf("Making block at:"); block_box.print(); } BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), block_box.right(), block_box.top()); block->set_poly_block(new POLY_BLOCK(block_box, type)); return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts); }
bool tesseract::ColPartition::MarkAsLeaderIfMonospaced | ( | ) |
Definition at line 1054 of file colpartition.cpp.
{ bool result = false; // Gather statistics on the gaps between blobs and the widths of the blobs. int part_width = bounding_box_.width(); STATS gap_stats(0, part_width); STATS width_stats(0, part_width); BLOBNBOX_C_IT it(&boxes_); BLOBNBOX* prev_blob = it.data(); prev_blob->set_flow(BTFT_NEIGHBOURS); width_stats.add(prev_blob->bounding_box().width(), 1); int blob_count = 1; for (it.forward(); !it.at_first(); it.forward()) { BLOBNBOX* blob = it.data(); int left = blob->bounding_box().left(); int right = blob->bounding_box().right(); gap_stats.add(left - prev_blob->bounding_box().right(), 1); width_stats.add(right - left, 1); blob->set_flow(BTFT_NEIGHBOURS); prev_blob = blob; ++blob_count; } double median_gap = gap_stats.median(); double median_width = width_stats.median(); double max_width = MAX(median_gap, median_width); double min_width = MIN(median_gap, median_width); double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f); if (textord_debug_tabfind >= 4) { tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax, min_width * kMaxLeaderGapFractionOfMin); } if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax && gap_iqr < min_width * kMaxLeaderGapFractionOfMin && blob_count >= kMinLeaderCount) { // This is stable enough to be called a leader, so check the widths. // Since leader dashes can join, run a dp cutting algorithm and go // on the cost. int offset = static_cast<int>(ceil(gap_iqr * 2)); int min_step = static_cast<int>(median_gap + median_width + 0.5); int max_step = min_step + offset; min_step -= offset; // Pad the buffer with min_step/2 on each end. int part_left = bounding_box_.left() - min_step / 2; part_width += min_step; DPPoint* projection = new DPPoint[part_width]; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); int left = blob->bounding_box().left(); int right = blob->bounding_box().right(); int height = blob->bounding_box().height(); for (int x = left; x < right; ++x) { projection[left - part_left].AddLocalCost(height); } } DPPoint* best_end = DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance, part_width, projection); if (best_end != NULL && best_end->total_cost() < blob_count) { // Good enough. Call it a leader. result = true; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); TBOX box = blob->bounding_box(); // If the first or last blob is spaced too much, don't mark it. if (it.at_first()) { int gap = it.data_relative(1)->bounding_box().left() - blob->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); continue; } } if (it.at_last()) { int gap = blob->bounding_box().left() - it.data_relative(-1)->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); break; } } blob->set_region_type(BRT_TEXT); blob->set_flow(BTFT_LEADER); } blob_type_ = BRT_TEXT; flow_ = BTFT_LEADER; } else if (textord_debug_tabfind) { if (best_end == NULL) { tprintf("No path\n"); } else { tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost() < blob_count); } } delete [] projection; } return result; }
bool tesseract::ColPartition::MatchingColumns | ( | const ColPartition & | other | ) | const |
Definition at line 349 of file colpartition.cpp.
{ int y = (MidY() + other.MidY()) / 2; if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, LeftAtY(y) / kColumnWidthFactor, 1)) return false; if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, RightAtY(y) / kColumnWidthFactor, 1)) return false; return true; }
bool tesseract::ColPartition::MatchingSizes | ( | const ColPartition & | other | ) | const |
Definition at line 384 of file colpartition.cpp.
{ if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) return !TabFind::DifferentSizes(median_width_, other.median_width_); else return !TabFind::DifferentSizes(median_size_, other.median_size_); }
bool tesseract::ColPartition::MatchingStrokeWidth | ( | const ColPartition & | other, |
double | fractional_tolerance, | ||
double | constant_tolerance | ||
) | const |
Definition at line 409 of file colpartition.cpp.
{ int match_count = 0; int nonmatch_count = 0; BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_)); BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_)); box_it.mark_cycle_pt(); other_it.mark_cycle_pt(); while (!box_it.cycled_list() && !other_it.cycled_list()) { if (box_it.data()->MatchingStrokeWidth(*other_it.data(), fractional_tolerance, constant_tolerance)) ++match_count; else ++nonmatch_count; box_it.forward(); other_it.forward(); } return match_count > nonmatch_count; }
bool tesseract::ColPartition::MatchingTextColor | ( | const ColPartition & | other | ) | const |
Definition at line 361 of file colpartition.cpp.
{ if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise && other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise) return false; // Too noisy. // Colors must match for other to count. double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_); double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_); double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_); double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_); // All 4 distances must be small enough. return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance && d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance; }
int tesseract::ColPartition::median_bottom | ( | ) | const [inline] |
Definition at line 127 of file colpartition.h.
{
return median_bottom_;
}
int tesseract::ColPartition::median_left | ( | ) | const [inline] |
Definition at line 130 of file colpartition.h.
{
return median_left_;
}
int tesseract::ColPartition::median_right | ( | ) | const [inline] |
Definition at line 133 of file colpartition.h.
{
return median_right_;
}
int tesseract::ColPartition::median_size | ( | ) | const [inline] |
Definition at line 136 of file colpartition.h.
{
return median_size_;
}
int tesseract::ColPartition::median_top | ( | ) | const [inline] |
Definition at line 124 of file colpartition.h.
{
return median_top_;
}
int tesseract::ColPartition::median_width | ( | ) | const [inline] |
Definition at line 142 of file colpartition.h.
{
return median_width_;
}
int tesseract::ColPartition::MedianY | ( | ) | const [inline] |
Definition at line 308 of file colpartition.h.
{
return (median_top_ + median_bottom_) / 2;
}
int tesseract::ColPartition::MidX | ( | ) | const [inline] |
Definition at line 312 of file colpartition.h.
int tesseract::ColPartition::MidY | ( | ) | const [inline] |
Definition at line 304 of file colpartition.h.
ColPartition* tesseract::ColPartition::nearest_neighbor_above | ( | ) | const [inline] |
Definition at line 249 of file colpartition.h.
{
return nearest_neighbor_above_;
}
ColPartition* tesseract::ColPartition::nearest_neighbor_below | ( | ) | const [inline] |
Definition at line 255 of file colpartition.h.
{
return nearest_neighbor_below_;
}
bool tesseract::ColPartition::OKDiacriticMerge | ( | const ColPartition & | candidate, |
bool | debug | ||
) | const |
Definition at line 437 of file colpartition.cpp.
{ BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_)); int min_top = MAX_INT32; int max_bottom = -MAX_INT32; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); if (!blob->IsDiacritic()) { if (debug) { tprintf("Blob is not a diacritic:"); blob->bounding_box().print(); } return false; // All blobs must have diacritic bases. } if (blob->base_char_top() < min_top) min_top = blob->base_char_top(); if (blob->base_char_bottom() > max_bottom) max_bottom = blob->base_char_bottom(); } // If the intersection of all vertical ranges of all base characters // overlaps the median range of this, then it is OK. bool result = min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_; if (debug) { if (result) tprintf("OKDiacritic!\n"); else tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top, median_bottom_, median_top_); } return result; }
bool tesseract::ColPartition::OKMergeOverlap | ( | const ColPartition & | merge1, |
const ColPartition & | merge2, | ||
int | ok_box_overlap, | ||
bool | debug | ||
) |
Definition at line 713 of file colpartition.cpp.
{ // Vertical partitions are not allowed to be involved. if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) { if (debug) tprintf("Vertical partition\n"); return false; } // The merging partitions must strongly overlap each other. if (!merge1.VSignificantCoreOverlap(merge2)) { if (debug) tprintf("Voverlap %d (%d)\n", merge1.VCoreOverlap(merge2), merge1.VSignificantCoreOverlap(merge2)); return false; } // The merged box must not overlap the median bounds of this. TBOX merged_box(merge1.bounding_box()); merged_box += merge2.bounding_box(); if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ && merged_box.bottom() < bounding_box_.top() - ok_box_overlap && merged_box.top() > bounding_box_.bottom() + ok_box_overlap) { if (debug) tprintf("Excessive box overlap\n"); return false; } // Looks OK! return true; }
Definition at line 746 of file colpartition.cpp.
{ if (boxes_.empty() || boxes_.singleton()) return NULL; BLOBNBOX_C_IT it(&boxes_); TBOX left_box(it.data()->bounding_box()); for (it.forward(); !it.at_first(); it.forward()) { BLOBNBOX* bbox = it.data(); left_box += bbox->bounding_box(); if (left_box.overlap(box)) return bbox; } return NULL; }
bool tesseract::ColPartition::owns_blobs | ( | ) | const [inline] |
Definition at line 291 of file colpartition.h.
{
return owns_blobs_;
}
PolyBlockType tesseract::ColPartition::PartitionType | ( | ColumnSpanningType | flow | ) | const |
Definition at line 978 of file colpartition.cpp.
{ if (flow == CST_NOISE) { if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) return PT_NOISE; flow = CST_FLOWING; } switch (blob_type_) { case BRT_NOISE: return PT_NOISE; case BRT_HLINE: return PT_HORZ_LINE; case BRT_VLINE: return PT_VERT_LINE; case BRT_RECTIMAGE: case BRT_POLYIMAGE: switch (flow) { case CST_FLOWING: return PT_FLOWING_IMAGE; case CST_HEADING: return PT_HEADING_IMAGE; case CST_PULLOUT: return PT_PULLOUT_IMAGE; default: ASSERT_HOST(!"Undefined flow type for image!"); } break; case BRT_VERT_TEXT: return PT_VERTICAL_TEXT; case BRT_TEXT: case BRT_UNKNOWN: default: switch (flow) { case CST_FLOWING: return PT_FLOWING_TEXT; case CST_HEADING: return PT_HEADING_TEXT; case CST_PULLOUT: return PT_PULLOUT_TEXT; default: ASSERT_HOST(!"Undefined flow type for text!"); } } ASSERT_HOST(!"Should never get here!"); return PT_NOISE; }
void tesseract::ColPartition::Print | ( | ) | const |
Definition at line 1735 of file colpartition.cpp.
{ int y = MidY(); tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" " ts=%d bs=%d ls=%d rs=%d\n", boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(), median_bottom_, bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', right_margin_, median_right_, bounding_box_.top(), median_top_, good_width_, good_column_, type_, kBlobTypes[blob_type_], flow_, first_column_, last_column_, boxes_.length(), space_above_, space_below_, space_to_left_, space_to_right_); }
void tesseract::ColPartition::PrintColors | ( | ) |
Definition at line 1753 of file colpartition.cpp.
{ tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL], color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]); }
void tesseract::ColPartition::RefinePartners | ( | PolyBlockType | type, |
bool | get_desparate, | ||
ColPartitionGrid * | grid | ||
) |
Definition at line 1830 of file colpartition.cpp.
{ if (TypesSimilar(type_, type)) { RefinePartnersInternal(true, get_desperate, grid); RefinePartnersInternal(false, get_desperate, grid); } else if (type == PT_COUNT) { // This is the final pass. Make sure only the correctly typed // partners surivive, however many there are. RefinePartnersByType(true, &upper_partners_); RefinePartnersByType(false, &lower_partners_); // It is possible for a merge to have given a partition multiple // partners again, so the last resort is to use overlap which is // guaranteed to leave at most one partner left. if (!upper_partners_.empty() && !upper_partners_.singleton()) RefinePartnersByOverlap(true, &upper_partners_); if (!lower_partners_.empty() && !lower_partners_.singleton()) RefinePartnersByOverlap(false, &lower_partners_); } }
void tesseract::ColPartition::ReflectInYAxis | ( | ) |
Definition at line 299 of file colpartition.cpp.
{ ColPartition_CLIST reversed_boxes; ColPartition_C_IT reversed_it(&reversed_boxes); // Reverse the order of the boxes_. BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { reversed_it.add_before_then_move(bb_it.extract()); } bb_it.add_list_after(&reversed_boxes); ASSERT_HOST(!left_key_tab_ && !right_key_tab_); int tmp = left_margin_; left_margin_ = -right_margin_; right_margin_ = -tmp; ComputeLimits(); }
void tesseract::ColPartition::RemoveBox | ( | BLOBNBOX * | box | ) |
Definition at line 213 of file colpartition.cpp.
{ BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { if (box == bb_it.data()) { bb_it.extract(); ComputeLimits(); return; } } }
void tesseract::ColPartition::RemovePartner | ( | bool | upper, |
ColPartition * | partner | ||
) |
Definition at line 597 of file colpartition.cpp.
{ ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { if (it.data() == partner) { it.extract(); break; } } }
int tesseract::ColPartition::right_key | ( | ) | const [inline] |
Definition at line 178 of file colpartition.h.
{
return right_key_;
}
bool tesseract::ColPartition::right_key_tab | ( | ) | const [inline] |
Definition at line 175 of file colpartition.h.
{
return right_key_tab_;
}
int tesseract::ColPartition::right_margin | ( | ) | const [inline] |
Definition at line 118 of file colpartition.h.
{
return right_margin_;
}
int tesseract::ColPartition::RightAtY | ( | int | y | ) | const [inline] |
Definition at line 344 of file colpartition.h.
{ return XAtY(right_key_, y); }
int tesseract::ColPartition::RightBlobRule | ( | ) | const |
Definition at line 529 of file colpartition.cpp.
{
BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
it.move_to_last();
return it.data()->right_rule();
}
void tesseract::ColPartition::set_blob_type | ( | BlobRegionType | t | ) | [inline] |
Definition at line 151 of file colpartition.h.
{ blob_type_ = t; }
void tesseract::ColPartition::set_block_owned | ( | bool | owned | ) | [inline] |
Definition at line 208 of file colpartition.h.
{ block_owned_ = owned; }
void tesseract::ColPartition::set_bottom_spacing | ( | int | spacing | ) | [inline] |
Definition at line 223 of file colpartition.h.
{ bottom_spacing_ = spacing; }
void tesseract::ColPartition::set_first_column | ( | int | column | ) | [inline] |
Definition at line 688 of file colpartition.h.
{ first_column_ = column; }
void tesseract::ColPartition::set_flow | ( | BlobTextFlowType | f | ) | [inline] |
Definition at line 157 of file colpartition.h.
{ flow_ = f; }
void tesseract::ColPartition::set_inside_table_column | ( | bool | val | ) | [inline] |
Definition at line 246 of file colpartition.h.
{ inside_table_column_ = val; }
void tesseract::ColPartition::set_last_column | ( | int | column | ) | [inline] |
Definition at line 691 of file colpartition.h.
{ last_column_ = column; }
void tesseract::ColPartition::set_left_margin | ( | int | margin | ) | [inline] |
Definition at line 115 of file colpartition.h.
{ left_margin_ = margin; }
void tesseract::ColPartition::set_median_size | ( | int | size | ) | [inline] |
Definition at line 139 of file colpartition.h.
{ median_size_ = size; }
void tesseract::ColPartition::set_median_width | ( | int | width | ) | [inline] |
Definition at line 145 of file colpartition.h.
{ median_width_ = width; }
void tesseract::ColPartition::set_nearest_neighbor_above | ( | ColPartition * | part | ) | [inline] |
Definition at line 252 of file colpartition.h.
{ nearest_neighbor_above_ = part; }
void tesseract::ColPartition::set_nearest_neighbor_below | ( | ColPartition * | part | ) | [inline] |
Definition at line 258 of file colpartition.h.
{ nearest_neighbor_below_ = part; }
void tesseract::ColPartition::set_owns_blobs | ( | bool | owns_blobs | ) | [inline] |
Definition at line 294 of file colpartition.h.
{ // Do NOT change ownership flag when there are blobs in the list. // Immediately set the ownership flag when creating copies. ASSERT_HOST(boxes_.empty()); owns_blobs_ = owns_blobs; }
void tesseract::ColPartition::set_right_margin | ( | int | margin | ) | [inline] |
Definition at line 121 of file colpartition.h.
{ right_margin_ = margin; }
void tesseract::ColPartition::set_side_step | ( | int | step | ) | [inline] |
Definition at line 217 of file colpartition.h.
{ side_step_ = step; }
void tesseract::ColPartition::set_space_above | ( | int | space | ) | [inline] |
Definition at line 264 of file colpartition.h.
{ space_above_ = space; }
void tesseract::ColPartition::set_space_below | ( | int | space | ) | [inline] |
Definition at line 270 of file colpartition.h.
{ space_below_ = space; }
void tesseract::ColPartition::set_space_to_left | ( | int | space | ) | [inline] |
Definition at line 276 of file colpartition.h.
{ space_to_left_ = space; }
void tesseract::ColPartition::set_space_to_right | ( | int | space | ) | [inline] |
Definition at line 282 of file colpartition.h.
{ space_to_right_ = space; }
void tesseract::ColPartition::set_table_type | ( | ) | [inline] |
Definition at line 233 of file colpartition.h.
void tesseract::ColPartition::set_top_spacing | ( | int | spacing | ) | [inline] |
Definition at line 229 of file colpartition.h.
{ top_spacing_ = spacing; }
void tesseract::ColPartition::set_type | ( | PolyBlockType | t | ) | [inline] |
Definition at line 184 of file colpartition.h.
{ type_ = t; }
void tesseract::ColPartition::set_vertical | ( | const ICOORD & | v | ) | [inline] |
Definition at line 193 of file colpartition.h.
{ vertical_ = v; }
void tesseract::ColPartition::set_working_set | ( | WorkingPartSet * | working_set | ) | [inline] |
Definition at line 202 of file colpartition.h.
{ working_set_ = working_set; }
void tesseract::ColPartition::SetBlobTypes | ( | ) |
Definition at line 1233 of file colpartition.cpp.
{ if (!owns_blobs()) return; BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); if (blob->flow() != BTFT_LEADER) blob->set_flow(flow_); blob->set_region_type(blob_type_); ASSERT_HOST(blob->owner() == NULL || blob->owner() == this); } }
void tesseract::ColPartition::SetColumnGoodness | ( | WidthCallback * | cb | ) |
void tesseract::ColPartition::SetLeftTab | ( | const TabVector * | tab_vector | ) |
Definition at line 473 of file colpartition.cpp.
{ if (tab_vector != NULL) { left_key_ = tab_vector->sort_key(); left_key_tab_ = left_key_ <= BoxLeftKey(); } else { left_key_tab_ = false; } if (!left_key_tab_) left_key_ = BoxLeftKey(); }
void tesseract::ColPartition::SetPartitionType | ( | int | resolution, |
ColPartitionSet * | columns | ||
) |
Definition at line 946 of file colpartition.cpp.
{ int first_spanned_col = -1; ColumnSpanningType span_type = columns->SpanningType(resolution, bounding_box_.left(), bounding_box_.right(), MidY(), left_margin_, right_margin_, &first_column_, &last_column_, &first_spanned_col); column_set_ = columns; if (first_column_ < last_column_ && span_type == CST_PULLOUT && !IsLineType()) { // Unequal columns may indicate that the pullout spans one of the columns // it lies in, so force it to be allocated to just that column. if (first_spanned_col >= 0) { first_column_ = first_spanned_col; last_column_ = first_spanned_col; } else { if ((first_column_ & 1) == 0) last_column_ = first_column_; else if ((last_column_ & 1) == 0) first_column_ = last_column_; else first_column_ = last_column_ = (first_column_ + last_column_) / 2; } } type_ = PartitionType(span_type); }
void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue | ( | int | value | ) |
Definition at line 1159 of file colpartition.cpp.
{ int blob_count = 0; // Total # blobs. int good_blob_score_ = 0; // Total # good strokewidth neighbours. int noisy_count = 0; // Total # neighbours marked as noise. int hline_count = 0; int vline_count = 0; BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); ++blob_count; noisy_count += blob->NoisyNeighbours(); good_blob_score_ += blob->GoodTextBlob(); if (blob->region_type() == BRT_HLINE) ++hline_count; if (blob->region_type() == BRT_VLINE) ++vline_count; } flow_ = BTFT_NEIGHBOURS; blob_type_ = BRT_UNKNOWN; if (hline_count > vline_count) { flow_ = BTFT_NONE; blob_type_ = BRT_HLINE; } else if (vline_count > hline_count) { flow_ = BTFT_NONE; blob_type_ = BRT_VLINE; } else if (value < -1 || 1 < value) { int long_side; int short_side; if (value > 0) { long_side = bounding_box_.width(); short_side = bounding_box_.height(); blob_type_ = BRT_TEXT; } else { long_side = bounding_box_.height(); short_side = bounding_box_.width(); blob_type_ = BRT_VERT_TEXT; } // We will combine the old metrics using aspect ratio and blob counts // with the input value by allowing a strong indication to flip the // STRONG_CHAIN/CHAIN flow values. int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0; if (short_side > kHorzStrongTextlineHeight) ++strong_score; if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score; if (abs(value) >= kMinStrongTextValue) flow_ = BTFT_STRONG_CHAIN; else if (abs(value) >= kMinChainTextValue) flow_ = BTFT_CHAIN; else flow_ = BTFT_NEIGHBOURS; // Upgrade chain to strong chain if the other indicators are good if (flow_ == BTFT_CHAIN && strong_score == 3) flow_ = BTFT_STRONG_CHAIN; // Downgrade strong vertical text to chain if the indicators are bad. if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2) flow_ = BTFT_CHAIN; } if (flow_ == BTFT_NEIGHBOURS) { // Check for noisy neighbours. if (noisy_count >= blob_count) { flow_ = BTFT_NONTEXT; blob_type_= BRT_NOISE; } } if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", blob_count, noisy_count, good_blob_score_); tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_, blob_type_); Print(); } SetBlobTypes(); }
void tesseract::ColPartition::SetRightTab | ( | const TabVector * | tab_vector | ) |
Definition at line 485 of file colpartition.cpp.
{ if (tab_vector != NULL) { right_key_ = tab_vector->sort_key(); right_key_tab_ = right_key_ >= BoxRightKey(); } else { right_key_tab_ = false; } if (!right_key_tab_) right_key_ = BoxRightKey(); }
void tesseract::ColPartition::SetSpecialBlobsDensity | ( | const BlobSpecialTextType | type, |
const float | density | ||
) |
Definition at line 555 of file colpartition.cpp.
{ ASSERT_HOST(type < BSTT_COUNT); special_blobs_densities_[type] = density; }
ColPartition * tesseract::ColPartition::ShallowCopy | ( | ) | const |
Definition at line 1684 of file colpartition.cpp.
{ ColPartition* part = new ColPartition(blob_type_, vertical_); part->left_margin_ = left_margin_; part->right_margin_ = right_margin_; part->bounding_box_ = bounding_box_; memcpy(part->special_blobs_densities_, special_blobs_densities_, sizeof(special_blobs_densities_)); part->median_bottom_ = median_bottom_; part->median_top_ = median_top_; part->median_size_ = median_size_; part->median_left_ = median_left_; part->median_right_ = median_right_; part->median_width_ = median_width_; part->good_width_ = good_width_; part->good_column_ = good_column_; part->left_key_tab_ = left_key_tab_; part->right_key_tab_ = right_key_tab_; part->type_ = type_; part->flow_ = flow_; part->left_key_ = left_key_; part->right_key_ = right_key_; part->first_column_ = first_column_; part->last_column_ = last_column_; part->owns_blobs_ = false; return part; }
ColPartition * tesseract::ColPartition::SingletonPartner | ( | bool | upper | ) |
Definition at line 608 of file colpartition.cpp.
{ ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; if (!partners->singleton()) return NULL; ColPartition_C_IT it(partners); return it.data(); }
void tesseract::ColPartition::SmoothPartnerRun | ( | int | working_set_count | ) |
Definition at line 1761 of file colpartition.cpp.
{ STATS left_stats(0, working_set_count); STATS right_stats(0, working_set_count); PolyBlockType max_type = type_; ColPartition* partner; for (partner = SingletonPartner(false); partner != NULL; partner = partner->SingletonPartner(false)) { if (partner->type_ > max_type) max_type = partner->type_; if (column_set_ == partner->column_set_) { left_stats.add(partner->first_column_, 1); right_stats.add(partner->last_column_, 1); } } type_ = max_type; // TODO(rays) Either establish that it isn't necessary to set the columns, // or find a way to do it that does not cause an assert failure in // AddToWorkingSet. #if 0 first_column_ = left_stats.mode(); last_column_ = right_stats.mode(); if (last_column_ < first_column_) last_column_ = first_column_; #endif for (partner = SingletonPartner(false); partner != NULL; partner = partner->SingletonPartner(false)) { partner->type_ = max_type; #if 0 // See TODO above if (column_set_ == partner->column_set_) { partner->first_column_ = first_column_; partner->last_column_ = last_column_; } #endif } }
int tesseract::ColPartition::SortKey | ( | int | x, |
int | y | ||
) | const [inline] |
Definition at line 316 of file colpartition.h.
{ return TabVector::SortKey(vertical_, x, y); }
int tesseract::ColPartition::space_above | ( | ) | const [inline] |
Definition at line 261 of file colpartition.h.
{
return space_above_;
}
int tesseract::ColPartition::space_below | ( | ) | const [inline] |
Definition at line 267 of file colpartition.h.
{
return space_below_;
}
int tesseract::ColPartition::space_to_left | ( | ) | const [inline] |
Definition at line 273 of file colpartition.h.
{
return space_to_left_;
}
int tesseract::ColPartition::space_to_right | ( | ) | const [inline] |
Definition at line 279 of file colpartition.h.
{
return space_to_right_;
}
int tesseract::ColPartition::SpecialBlobsCount | ( | const BlobSpecialTextType | type | ) |
Definition at line 540 of file colpartition.cpp.
{ ASSERT_HOST(type < BSTT_COUNT); BLOBNBOX_C_IT blob_it(&boxes_); int count = 0; for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); BlobSpecialTextType blob_type = blob->special_text_type(); if (blob_type == type) { count++; } } return count; }
float tesseract::ColPartition::SpecialBlobsDensity | ( | const BlobSpecialTextType | type | ) | const |
Definition at line 535 of file colpartition.cpp.
{ ASSERT_HOST(type < BSTT_COUNT); return special_blobs_densities_[type]; }
ColPartition * tesseract::ColPartition::SplitAt | ( | int | split_x | ) |
Definition at line 800 of file colpartition.cpp.
{ if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right()) return NULL; // There will be no change. ColPartition* split_part = ShallowCopy(); split_part->set_owns_blobs(owns_blobs()); BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); ColPartition* prev_owner = bbox->owner(); ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL); const TBOX& box = bbox->bounding_box(); if (box.left() >= split_x) { split_part->AddBox(it.extract()); if (owns_blobs() && prev_owner != NULL) bbox->set_owner(split_part); } } ASSERT_HOST(!it.empty()); if (split_part->IsEmpty()) { // Split part ended up with nothing. Possible if split_x passes // through the last blob. delete split_part; return NULL; } right_key_tab_ = false; split_part->left_key_tab_ = false; right_margin_ = split_x; split_part->left_margin_ = split_x; ComputeLimits(); split_part->ComputeLimits(); return split_part; }
ColPartition * tesseract::ColPartition::SplitAtBlob | ( | BLOBNBOX * | split_blob | ) |
Definition at line 764 of file colpartition.cpp.
{ ColPartition* split_part = ShallowCopy(); split_part->set_owns_blobs(owns_blobs()); BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); ColPartition* prev_owner = bbox->owner(); ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL); if (bbox == split_blob || !split_part->boxes_.empty()) { split_part->AddBox(it.extract()); if (owns_blobs() && prev_owner != NULL) bbox->set_owner(split_part); } } ASSERT_HOST(!it.empty()); if (split_part->IsEmpty()) { // Split part ended up with nothing. Possible if split_blob is not // in the list of blobs. delete split_part; return NULL; } right_key_tab_ = false; split_part->left_key_tab_ = false; ComputeLimits(); // TODO(nbeato) Merge Ray's CL like this: // if (owns_blobs()) // SetBlobTextlineGoodness(); split_part->ComputeLimits(); // TODO(nbeato) Merge Ray's CL like this: // if (split_part->owns_blobs()) // split_part->SetBlobTextlineGoodness(); return split_part; }
int tesseract::ColPartition::top_spacing | ( | ) | const [inline] |
Definition at line 226 of file colpartition.h.
{
return top_spacing_;
}
PolyBlockType tesseract::ColPartition::type | ( | ) | const [inline] |
Definition at line 181 of file colpartition.h.
{
return type_;
}
bool tesseract::ColPartition::TypesMatch | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 403 of file colpartition.h.
{ return TypesMatch(blob_type_, other.blob_type_); }
static bool tesseract::ColPartition::TypesMatch | ( | BlobRegionType | type1, |
BlobRegionType | type2 | ||
) | [inline, static] |
Definition at line 406 of file colpartition.h.
{ return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) && !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2); }
static bool tesseract::ColPartition::TypesSimilar | ( | PolyBlockType | type1, |
PolyBlockType | type2 | ||
) | [inline, static] |
Definition at line 412 of file colpartition.h.
{ return (type1 == type2 || (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) || (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION)); }
ColPartition_CLIST* tesseract::ColPartition::upper_partners | ( | ) | [inline] |
Definition at line 196 of file colpartition.h.
{
return &upper_partners_;
}
int tesseract::ColPartition::VCoreOverlap | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 375 of file colpartition.h.
bool tesseract::ColPartition::VOverlaps | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 370 of file colpartition.h.
{ return bounding_box_.y_gap(other.bounding_box_) < 0; }
bool tesseract::ColPartition::VSignificantCoreOverlap | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 387 of file colpartition.h.
{ int overlap = VCoreOverlap(other); int height = MIN(median_top_ - median_bottom_, other.median_top_ - other.median_bottom_); return overlap * 3 > height; }
bool tesseract::ColPartition::WithinSameMargins | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 395 of file colpartition.h.
int tesseract::ColPartition::XAtY | ( | int | sort_key, |
int | y | ||
) | const [inline] |
Definition at line 320 of file colpartition.h.
{ return TabVector::XAtY(vertical_, sort_key, y); }