|
Tesseract
3.02
|
#include <colpartition.h>
Public Member Functions | |
| ColPartition () | |
| ColPartition (BlobRegionType blob_type, const ICOORD &vertical) | |
| ~ColPartition () | |
| const TBOX & | bounding_box () const |
| int | left_margin () const |
| void | set_left_margin (int margin) |
| int | right_margin () const |
| void | set_right_margin (int margin) |
| int | median_top () const |
| int | median_bottom () const |
| int | median_left () const |
| int | median_right () const |
| int | median_size () const |
| void | set_median_size (int size) |
| int | median_width () const |
| void | set_median_width (int width) |
| BlobRegionType | blob_type () const |
| void | set_blob_type (BlobRegionType t) |
| BlobTextFlowType | flow () const |
| void | set_flow (BlobTextFlowType f) |
| int | good_blob_score () const |
| bool | good_width () const |
| bool | good_column () const |
| bool | left_key_tab () const |
| int | left_key () const |
| bool | right_key_tab () const |
| int | right_key () const |
| PolyBlockType | type () const |
| void | set_type (PolyBlockType t) |
| BLOBNBOX_CLIST * | boxes () |
| int | boxes_count () const |
| void | set_vertical (const ICOORD &v) |
| ColPartition_CLIST * | upper_partners () |
| ColPartition_CLIST * | lower_partners () |
| void | set_working_set (WorkingPartSet *working_set) |
| bool | block_owned () const |
| void | set_block_owned (bool owned) |
| bool | desperately_merged () const |
| ColPartitionSet * | column_set () const |
| void | set_side_step (int step) |
| int | bottom_spacing () const |
| void | set_bottom_spacing (int spacing) |
| int | top_spacing () const |
| void | set_top_spacing (int spacing) |
| void | set_table_type () |
| void | clear_table_type () |
| bool | inside_table_column () |
| void | set_inside_table_column (bool val) |
| ColPartition * | nearest_neighbor_above () const |
| void | set_nearest_neighbor_above (ColPartition *part) |
| ColPartition * | nearest_neighbor_below () const |
| void | set_nearest_neighbor_below (ColPartition *part) |
| int | space_above () const |
| void | set_space_above (int space) |
| int | space_below () const |
| void | set_space_below (int space) |
| int | space_to_left () const |
| void | set_space_to_left (int space) |
| int | space_to_right () const |
| void | set_space_to_right (int space) |
| uinT8 * | color1 () |
| uinT8 * | color2 () |
| bool | owns_blobs () const |
| void | set_owns_blobs (bool owns_blobs) |
| int | MidY () const |
| int | MedianY () const |
| int | MidX () const |
| int | SortKey (int x, int y) const |
| int | XAtY (int sort_key, int y) const |
| int | KeyWidth (int left_key, int right_key) const |
| int | ColumnWidth () const |
| int | BoxLeftKey () const |
| int | BoxRightKey () const |
| int | LeftAtY (int y) const |
| int | RightAtY (int y) const |
| bool | IsLeftOf (const ColPartition &other) const |
| bool | ColumnContains (int x, int y) const |
| bool | IsEmpty () const |
| bool | IsSingleton () const |
| bool | HOverlaps (const ColPartition &other) const |
| bool | VOverlaps (const ColPartition &other) const |
| int | VCoreOverlap (const ColPartition &other) const |
| int | HCoreOverlap (const ColPartition &other) const |
| bool | VSignificantCoreOverlap (const ColPartition &other) const |
| bool | WithinSameMargins (const ColPartition &other) const |
| bool | TypesMatch (const ColPartition &other) const |
| bool | IsLineType () const |
| bool | IsImageType () const |
| bool | IsTextType () const |
| bool | IsVerticalType () const |
| bool | IsHorizontalType () const |
| bool | IsUnMergeableType () const |
| bool | IsVerticalLine () const |
| bool | IsHorizontalLine () const |
| void | AddBox (BLOBNBOX *box) |
| void | RemoveBox (BLOBNBOX *box) |
| BLOBNBOX * | BiggestBox () |
| TBOX | BoundsWithoutBox (BLOBNBOX *box) |
| void | ClaimBoxes () |
| void | DisownBoxes () |
| void | DeleteBoxes () |
| void | ReflectInYAxis () |
| bool | IsLegal () |
| bool | MatchingColumns (const ColPartition &other) const |
| bool | MatchingTextColor (const ColPartition &other) const |
| bool | MatchingSizes (const ColPartition &other) const |
| bool | ConfirmNoTabViolation (const ColPartition &other) const |
| bool | MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const |
| bool | OKDiacriticMerge (const ColPartition &candidate, bool debug) const |
| void | SetLeftTab (const TabVector *tab_vector) |
| void | SetRightTab (const TabVector *tab_vector) |
| void | CopyLeftTab (const ColPartition &src, bool take_box) |
| void | CopyRightTab (const ColPartition &src, bool take_box) |
| int | LeftBlobRule () const |
| int | RightBlobRule () const |
| float | SpecialBlobsDensity (const BlobSpecialTextType type) const |
| int | SpecialBlobsCount (const BlobSpecialTextType type) |
| void | SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density) |
| void | ComputeSpecialBlobsDensity () |
| void | AddPartner (bool upper, ColPartition *partner) |
| void | RemovePartner (bool upper, ColPartition *partner) |
| ColPartition * | SingletonPartner (bool upper) |
| void | Absorb (ColPartition *other, WidthCallback *cb) |
| bool | OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug) |
| BLOBNBOX * | OverlapSplitBlob (const TBOX &box) |
| ColPartition * | SplitAtBlob (BLOBNBOX *split_blob) |
| ColPartition * | SplitAt (int split_x) |
| void | ComputeLimits () |
| int | CountOverlappingBoxes (const TBOX &box) |
| void | SetPartitionType (int resolution, ColPartitionSet *columns) |
| PolyBlockType | PartitionType (ColumnSpanningType flow) const |
| void | ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col) |
| void | SetColumnGoodness (WidthCallback *cb) |
| bool | MarkAsLeaderIfMonospaced () |
| void | SetRegionAndFlowTypesFromProjectionValue (int value) |
| void | SetBlobTypes () |
| bool | HasGoodBaseline () |
| void | AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set) |
| ColPartition * | ShallowCopy () const |
| ColPartition * | CopyButDontOwnBlobs () |
| ScrollView::Color | BoxColor () const |
| void | Print () const |
| void | PrintColors () |
| void | SmoothPartnerRun (int working_set_count) |
| void | RefinePartners (PolyBlockType type, bool get_desparate, ColPartitionGrid *grid) |
| bool | IsInSameColumnAs (const ColPartition &part) const |
| void | set_first_column (int column) |
| void | set_last_column (int column) |
Static Public Member Functions | |
| static ColPartition * | MakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top) |
| static ColPartition * | FakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow) |
| static ColPartition * | MakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list) |
| static bool | TypesMatch (BlobRegionType type1, BlobRegionType type2) |
| static bool | TypesSimilar (PolyBlockType type1, PolyBlockType type2) |
| static void | LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks) |
| static TO_BLOCK * | MakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) |
| static TO_BLOCK * | MakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) |
ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.
Definition at line 67 of file colpartition.h.
| tesseract::ColPartition::ColPartition | ( | ) | [inline] |
Definition at line 69 of file colpartition.h.
{
// This empty constructor is here only so that the class can be ELISTIZED.
// TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
// and eliminate CLASSNAME##_copier.
}
| tesseract::ColPartition::ColPartition | ( | BlobRegionType | blob_type, |
| const ICOORD & | vertical | ||
| ) |
| blob_type | is the blob_region_type_ of the blobs in this partition. |
| vertical | is the direction of logical vertical on the possibly skewed image. |
Definition at line 84 of file colpartition.cpp.
: left_margin_(-MAX_INT32), right_margin_(MAX_INT32), median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0), median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0), blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0), good_width_(false), good_column_(false), left_key_tab_(false), right_key_tab_(false), left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical), working_set_(NULL), last_add_was_vertical_(false), block_owned_(false), desperately_merged_(false), first_column_(-1), last_column_(-1), column_set_(NULL), side_step_(0), top_spacing_(0), bottom_spacing_(0), type_before_table_(PT_UNKNOWN), inside_table_column_(false), nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL), space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0), owns_blobs_(true) { memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); }
| tesseract::ColPartition::~ColPartition | ( | ) |
Definition at line 146 of file colpartition.cpp.
{
// Remove this as a partner of all partners, as we don't want them
// referring to a deleted object.
ColPartition_C_IT it(&upper_partners_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->RemovePartner(false, this);
}
it.set_to_list(&lower_partners_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->RemovePartner(true, this);
}
}
| void tesseract::ColPartition::Absorb | ( | ColPartition * | other, |
| WidthCallback * | cb | ||
| ) |
Definition at line 617 of file colpartition.cpp.
{
// The result has to either own all of the blobs or none of them.
// Verify the flag is consisent.
ASSERT_HOST(owns_blobs() == other->owns_blobs());
// TODO(nbeato): check owns_blobs better. Right now owns_blobs
// should always be true when this is called. So there is no issues.
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom()) ||
TabFind::WithinTestRegion(2, other->bounding_box_.left(),
other->bounding_box_.bottom())) {
tprintf("Merging:");
Print();
other->Print();
}
// Update the special_blobs_densities_.
memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
for (int type = 0; type < BSTT_COUNT; ++type) {
int w1 = boxes_.length(), w2 = other->boxes_.length();
float new_val = special_blobs_densities_[type] * w1 +
other->special_blobs_densities_[type] * w2;
if (!w1 || !w2) {
special_blobs_densities_[type] = new_val / (w1 + w2);
}
}
// Merge the two sorted lists.
BLOBNBOX_C_IT it(&boxes_);
BLOBNBOX_C_IT it2(&other->boxes_);
for (; !it2.empty(); it2.forward()) {
BLOBNBOX* bbox2 = it2.extract();
ColPartition* prev_owner = bbox2->owner();
if (prev_owner != other && prev_owner != NULL) {
// A blob on other's list is owned by someone else; let them have it.
continue;
}
ASSERT_HOST(prev_owner == other || prev_owner == NULL);
if (prev_owner == other)
bbox2->set_owner(this);
it.add_to_end(bbox2);
}
left_margin_ = MIN(left_margin_, other->left_margin_);
right_margin_ = MAX(right_margin_, other->right_margin_);
if (other->left_key_ < left_key_) {
left_key_ = other->left_key_;
left_key_tab_ = other->left_key_tab_;
}
if (other->right_key_ > right_key_) {
right_key_ = other->right_key_;
right_key_tab_ = other->right_key_tab_;
}
// Combine the flow and blob_type in a sensible way.
// Dominant flows stay.
if (!DominatesInMerge(flow_, other->flow_)) {
flow_ = other->flow_;
blob_type_ = other->blob_type_;
}
SetBlobTypes();
if (IsVerticalType()) {
boxes_.sort(SortByBoxBottom<BLOBNBOX>);
last_add_was_vertical_ = true;
} else {
boxes_.sort(SortByBoxLeft<BLOBNBOX>);
last_add_was_vertical_ = false;
}
ComputeLimits();
// Fix partner lists. other is going away, so remove it as a
// partner of all its partners and add this in its place.
for (int upper = 0; upper < 2; ++upper) {
ColPartition_CLIST partners;
ColPartition_C_IT part_it(&partners);
part_it.add_list_after(upper ? &other->upper_partners_
: &other->lower_partners_);
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
ColPartition* partner = part_it.extract();
partner->RemovePartner(!upper, other);
partner->RemovePartner(!upper, this);
partner->AddPartner(!upper, this);
}
}
delete other;
if (cb != NULL) {
SetColumnGoodness(cb);
}
}
| void tesseract::ColPartition::AddBox | ( | BLOBNBOX * | box | ) |
Definition at line 180 of file colpartition.cpp.
{
TBOX box = bbox->bounding_box();
// Update the partition limits.
if (boxes_.length() == 0) {
bounding_box_ = box;
} else {
bounding_box_ += box;
}
if (IsVerticalType()) {
if (!last_add_was_vertical_) {
boxes_.sort(SortByBoxBottom<BLOBNBOX>);
last_add_was_vertical_ = true;
}
boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
} else {
if (last_add_was_vertical_) {
boxes_.sort(SortByBoxLeft<BLOBNBOX>);
last_add_was_vertical_ = false;
}
boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
}
if (!left_key_tab_)
left_key_ = BoxLeftKey();
if (!right_key_tab_)
right_key_ = BoxRightKey();
if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
box.left(), box.bottom(), box.right(), box.top(),
bounding_box_.left(), bounding_box_.right());
}
| void tesseract::ColPartition::AddPartner | ( | bool | upper, |
| ColPartition * | partner | ||
| ) |
Definition at line 582 of file colpartition.cpp.
{
if (upper) {
partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, this);
upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
} else {
partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, this);
lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
}
}
| void tesseract::ColPartition::AddToWorkingSet | ( | const ICOORD & | bleft, |
| const ICOORD & | tright, | ||
| int | resolution, | ||
| ColPartition_LIST * | used_parts, | ||
| WorkingPartSet_LIST * | working_set | ||
| ) |
Definition at line 1313 of file colpartition.cpp.
{
if (block_owned_)
return; // Done it already.
block_owned_ = true;
WorkingPartSet_IT it(working_sets);
// If there is an upper partner use its working_set_ directly.
ColPartition* partner = SingletonPartner(true);
if (partner != NULL && partner->working_set_ != NULL) {
working_set_ = partner->working_set_;
working_set_->AddPartition(this);
return;
}
if (partner != NULL && textord_debug_bugs) {
tprintf("Partition with partner has no working set!:");
Print();
partner->Print();
}
// Search for the column that the left edge fits in.
WorkingPartSet* work_set = NULL;
it.move_to_first();
int col_index = 0;
for (it.mark_cycle_pt(); !it.cycled_list() &&
col_index != first_column_;
it.forward(), ++col_index);
if (textord_debug_tabfind >= 2) {
tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
Print();
}
if (it.cycled_list() && textord_debug_bugs) {
tprintf("Target column=%d, only had %d\n", first_column_, col_index);
}
ASSERT_HOST(!it.cycled_list());
work_set = it.data();
// If last_column_ != first_column, then we need to scoop up all blocks
// between here and the last_column_ and put back in work_set.
if (!it.cycled_list() && last_column_ != first_column_) {
// Find the column that the right edge falls in.
BLOCK_LIST completed_blocks;
TO_BLOCK_LIST to_blocks;
for (; !it.cycled_list() && col_index <= last_column_;
it.forward(), ++col_index) {
WorkingPartSet* end_set = it.data();
end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
&completed_blocks, &to_blocks);
}
work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
}
working_set_ = work_set;
work_set->AddPartition(this);
}
| BLOBNBOX * tesseract::ColPartition::BiggestBox | ( | ) |
Definition at line 226 of file colpartition.cpp.
{
BLOBNBOX* biggest = NULL;
BLOBNBOX_C_IT bb_it(&boxes_);
for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
BLOBNBOX* bbox = bb_it.data();
if (IsVerticalType()) {
if (biggest == NULL ||
bbox->bounding_box().width() > biggest->bounding_box().width())
biggest = bbox;
} else {
if (biggest == NULL ||
bbox->bounding_box().height() > biggest->bounding_box().height())
biggest = bbox;
}
}
return biggest;
}
| BlobRegionType tesseract::ColPartition::blob_type | ( | ) | const [inline] |
Definition at line 148 of file colpartition.h.
{
return blob_type_;
}
| bool tesseract::ColPartition::block_owned | ( | ) | const [inline] |
Definition at line 205 of file colpartition.h.
{
return block_owned_;
}
| int tesseract::ColPartition::bottom_spacing | ( | ) | const [inline] |
Definition at line 220 of file colpartition.h.
{
return bottom_spacing_;
}
| const TBOX& tesseract::ColPartition::bounding_box | ( | ) | const [inline] |
Definition at line 109 of file colpartition.h.
{
return bounding_box_;
}
Definition at line 245 of file colpartition.cpp.
{
TBOX result;
BLOBNBOX_C_IT bb_it(&boxes_);
for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
if (box != bb_it.data()) {
result += bb_it.data()->bounding_box();
}
}
return result;
}
| ScrollView::Color tesseract::ColPartition::BoxColor | ( | ) | const |
Definition at line 1724 of file colpartition.cpp.
{
if (type_ == PT_UNKNOWN)
return BLOBNBOX::TextlineColor(blob_type_, flow_);
return POLY_BLOCK::ColorForPolyBlockType(type_);
}
| BLOBNBOX_CLIST* tesseract::ColPartition::boxes | ( | ) | [inline] |
Definition at line 187 of file colpartition.h.
{
return &boxes_;
}
| int tesseract::ColPartition::boxes_count | ( | ) | const [inline] |
Definition at line 190 of file colpartition.h.
{
return boxes_.length();
}
| int tesseract::ColPartition::BoxLeftKey | ( | ) | const [inline] |
Definition at line 332 of file colpartition.h.
| int tesseract::ColPartition::BoxRightKey | ( | ) | const [inline] |
Definition at line 336 of file colpartition.h.
| void tesseract::ColPartition::ClaimBoxes | ( | ) |
Definition at line 258 of file colpartition.cpp.
{
BLOBNBOX_C_IT bb_it(&boxes_);
for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
BLOBNBOX* bblob = bb_it.data();
ColPartition* other = bblob->owner();
if (other == NULL) {
// Normal case: ownership is available.
bblob->set_owner(this);
} else {
ASSERT_HOST(other == this);
}
}
}
| void tesseract::ColPartition::clear_table_type | ( | ) | [inline] |
Definition at line 239 of file colpartition.h.
{
if (type_ == PT_TABLE)
type_ = type_before_table_;
}
| uinT8* tesseract::ColPartition::color1 | ( | ) | [inline] |
Definition at line 285 of file colpartition.h.
{
return color1_;
}
| uinT8* tesseract::ColPartition::color2 | ( | ) | [inline] |
Definition at line 288 of file colpartition.h.
{
return color2_;
}
| ColPartitionSet* tesseract::ColPartition::column_set | ( | ) | const [inline] |
Definition at line 214 of file colpartition.h.
{
return column_set_;
}
| bool tesseract::ColPartition::ColumnContains | ( | int | x, |
| int | y | ||
| ) | const [inline] |
Definition at line 353 of file colpartition.h.
| void tesseract::ColPartition::ColumnRange | ( | int | resolution, |
| ColPartitionSet * | columns, | ||
| int * | first_col, | ||
| int * | last_col | ||
| ) |
Definition at line 1028 of file colpartition.cpp.
{
int first_spanned_col = -1;
ColumnSpanningType span_type =
columns->SpanningType(resolution,
bounding_box_.left(), bounding_box_.right(),
MidY(), left_margin_, right_margin_,
first_col, last_col,
&first_spanned_col);
type_ = PartitionType(span_type);
}
| int tesseract::ColPartition::ColumnWidth | ( | ) | const [inline] |
Definition at line 328 of file colpartition.h.
{
return KeyWidth(left_key_, right_key_);
}
| void tesseract::ColPartition::ComputeLimits | ( | ) |
Definition at line 834 of file colpartition.cpp.
{
bounding_box_ = TBOX(); // Clear it
BLOBNBOX_C_IT it(&boxes_);
BLOBNBOX* bbox = NULL;
int non_leader_count = 0;
if (it.empty()) {
bounding_box_.set_left(left_margin_);
bounding_box_.set_right(right_margin_);
bounding_box_.set_bottom(0);
bounding_box_.set_top(0);
} else {
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
bbox = it.data();
bounding_box_ += bbox->bounding_box();
if (bbox->flow() != BTFT_LEADER)
++non_leader_count;
}
}
if (!left_key_tab_)
left_key_ = BoxLeftKey();
if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
// TODO(rays) investigate the causes of these error messages, to find
// out if they are genuinely harmful, or just indicative of junk input.
tprintf("Computed left-illegal partition\n");
Print();
}
if (!right_key_tab_)
right_key_ = BoxRightKey();
if (right_key_ < BoxRightKey() && textord_debug_bugs) {
tprintf("Computed right-illegal partition\n");
Print();
}
if (it.empty())
return;
if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
blob_type() == BRT_POLYIMAGE) {
median_top_ = bounding_box_.top();
median_bottom_ = bounding_box_.bottom();
median_size_ = bounding_box_.height();
median_left_ = bounding_box_.left();
median_right_ = bounding_box_.right();
median_width_ = bounding_box_.width();
} else {
STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
STATS size_stats(0, bounding_box_.height() + 1);
STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
STATS width_stats(0, bounding_box_.width() + 1);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
bbox = it.data();
if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
TBOX box = bbox->bounding_box();
int area = box.area();
top_stats.add(box.top(), area);
bottom_stats.add(box.bottom(), area);
size_stats.add(box.height(), area);
left_stats.add(box.left(), area);
right_stats.add(box.right(), area);
width_stats.add(box.width(), area);
}
}
median_top_ = static_cast<int>(top_stats.median() + 0.5);
median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
median_size_ = static_cast<int>(size_stats.median() + 0.5);
median_left_ = static_cast<int>(left_stats.median() + 0.5);
median_right_ = static_cast<int>(right_stats.median() + 0.5);
median_width_ = static_cast<int>(width_stats.median() + 0.5);
}
if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
tprintf("Made partition with bad right coords");
Print();
}
if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
tprintf("Made partition with bad left coords");
Print();
}
// Fix partner lists. The bounding box has changed and partners are stored
// in bounding box order, so remove and reinsert this as a partner
// of all its partners.
for (int upper = 0; upper < 2; ++upper) {
ColPartition_CLIST partners;
ColPartition_C_IT part_it(&partners);
part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
ColPartition* partner = part_it.extract();
partner->RemovePartner(!upper, this);
partner->AddPartner(!upper, this);
}
}
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom())) {
tprintf("Recomputed box for partition %p\n", this);
Print();
}
}
| void tesseract::ColPartition::ComputeSpecialBlobsDensity | ( | ) |
Definition at line 561 of file colpartition.cpp.
{
memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
if (boxes_.empty()) {
return;
}
BLOBNBOX_C_IT blob_it(&boxes_);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
BlobSpecialTextType type = blob->special_text_type();
special_blobs_densities_[type]++;
}
for (int type = 0; type < BSTT_COUNT; ++type) {
special_blobs_densities_[type] /= boxes_.length();
}
}
| bool tesseract::ColPartition::ConfirmNoTabViolation | ( | const ColPartition & | other | ) | const |
Definition at line 392 of file colpartition.cpp.
{
if (bounding_box_.right() < other.bounding_box_.left() &&
bounding_box_.right() < other.LeftBlobRule())
return false;
if (other.bounding_box_.right() < bounding_box_.left() &&
other.bounding_box_.right() < LeftBlobRule())
return false;
if (bounding_box_.left() > other.bounding_box_.right() &&
bounding_box_.left() > other.RightBlobRule())
return false;
if (other.bounding_box_.left() > bounding_box_.right() &&
other.bounding_box_.left() > RightBlobRule())
return false;
return true;
}
| ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs | ( | ) |
Definition at line 1711 of file colpartition.cpp.
{
ColPartition* copy = ShallowCopy();
copy->set_owns_blobs(false);
BLOBNBOX_C_IT inserter(copy->boxes());
BLOBNBOX_C_IT traverser(boxes());
for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
inserter.add_after_then_move(traverser.data());
return copy;
}
| void tesseract::ColPartition::CopyLeftTab | ( | const ColPartition & | src, |
| bool | take_box | ||
| ) |
Definition at line 498 of file colpartition.cpp.
{
left_key_tab_ = take_box ? false : src.left_key_tab_;
if (left_key_tab_) {
left_key_ = src.left_key_;
} else {
bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
left_key_ = BoxLeftKey();
}
if (left_margin_ > bounding_box_.left())
left_margin_ = src.left_margin_;
}
| void tesseract::ColPartition::CopyRightTab | ( | const ColPartition & | src, |
| bool | take_box | ||
| ) |
Definition at line 511 of file colpartition.cpp.
{
right_key_tab_ = take_box ? false : src.right_key_tab_;
if (right_key_tab_) {
right_key_ = src.right_key_;
} else {
bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
right_key_ = BoxRightKey();
}
if (right_margin_ < bounding_box_.right())
right_margin_ = src.right_margin_;
}
| int tesseract::ColPartition::CountOverlappingBoxes | ( | const TBOX & | box | ) |
Definition at line 933 of file colpartition.cpp.
{
BLOBNBOX_C_IT it(&boxes_);
int overlap_count = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* bbox = it.data();
if (box.overlap(bbox->bounding_box()))
++overlap_count;
}
return overlap_count;
}
| void tesseract::ColPartition::DeleteBoxes | ( | ) |
Definition at line 284 of file colpartition.cpp.
{
// Although the boxes_ list is a C_LIST, in some cases it owns the
// BLOBNBOXes, as the ColPartition takes ownership from the grid,
// and the BLOBNBOXes own the underlying C_BLOBs.
for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
BLOBNBOX* bblob = bb_it.extract();
delete bblob->cblob();
delete bblob;
}
}
| bool tesseract::ColPartition::desperately_merged | ( | ) | const [inline] |
Definition at line 211 of file colpartition.h.
{
return desperately_merged_;
}
| void tesseract::ColPartition::DisownBoxes | ( | ) |
Definition at line 274 of file colpartition.cpp.
| ColPartition * tesseract::ColPartition::FakePartition | ( | const TBOX & | box, |
| PolyBlockType | block_type, | ||
| BlobRegionType | blob_type, | ||
| BlobTextFlowType | flow | ||
| ) | [static] |
Definition at line 108 of file colpartition.cpp.
{
ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
part->set_type(block_type);
part->set_flow(flow);
part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
part->set_left_margin(box.left());
part->set_right_margin(box.right());
part->SetBlobTypes();
part->ComputeLimits();
part->ClaimBoxes();
return part;
}
| BlobTextFlowType tesseract::ColPartition::flow | ( | ) | const [inline] |
Definition at line 154 of file colpartition.h.
{
return flow_;
}
| int tesseract::ColPartition::good_blob_score | ( | ) | const [inline] |
Definition at line 160 of file colpartition.h.
{
return good_blob_score_;
}
| bool tesseract::ColPartition::good_column | ( | ) | const [inline] |
Definition at line 166 of file colpartition.h.
{
return good_column_;
}
| bool tesseract::ColPartition::good_width | ( | ) | const [inline] |
Definition at line 163 of file colpartition.h.
{
return good_width_;
}
| bool tesseract::ColPartition::HasGoodBaseline | ( | ) |
Definition at line 1248 of file colpartition.cpp.
{
// Approximation of the baseline.
DetLineFit linepoints;
// Calculation of the mean height on this line segment. Note that these
// variable names apply to the context of a horizontal line, and work
// analogously, rather than literally in the case of a vertical line.
int total_height = 0;
int coverage = 0;
int height_count = 0;
int width = 0;
BLOBNBOX_C_IT it(&boxes_);
TBOX box(it.data()->bounding_box());
// Accumulate points representing the baseline at the middle of each blob,
// but add an additional point for each end of the line. This makes it
// harder to fit a severe skew angle, as it is most likely not right.
if (IsVerticalType()) {
// For a vertical line, use the right side as the baseline.
ICOORD first_pt(box.right(), box.bottom());
// Use the bottom-right of the first (bottom) box, the top-right of the
// last, and the middle-right of all others.
linepoints.Add(first_pt);
for (it.forward(); !it.at_last(); it.forward()) {
BLOBNBOX* blob = it.data();
box = blob->bounding_box();
ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
linepoints.Add(box_pt);
total_height += box.width();
coverage += box.height();
++height_count;
}
box = it.data()->bounding_box();
ICOORD last_pt(box.right(), box.top());
linepoints.Add(last_pt);
width = last_pt.y() - first_pt.y();
} else {
// Horizontal lines use the bottom as the baseline.
TBOX box(it.data()->bounding_box());
// Use the bottom-left of the first box, the the bottom-right of the last,
// and the middle of all others.
ICOORD first_pt(box.left(), box.bottom());
linepoints.Add(first_pt);
for (it.forward(); !it.at_last(); it.forward()) {
BLOBNBOX* blob = it.data();
box = blob->bounding_box();
ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
linepoints.Add(box_pt);
total_height += box.height();
coverage += box.width();
++height_count;
}
box = it.data()->bounding_box();
ICOORD last_pt(box.right(), box.bottom());
linepoints.Add(last_pt);
width = last_pt.x() - first_pt.x();
}
// Maximum median error allowed to be a good text line.
double max_error = kMaxBaselineError * total_height / height_count;
ICOORD start_pt, end_pt;
double error = linepoints.Fit(&start_pt, &end_pt);
return error < max_error && coverage >= kMinBaselineCoverage * width;
}
| int tesseract::ColPartition::HCoreOverlap | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 381 of file colpartition.h.
| bool tesseract::ColPartition::HOverlaps | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 365 of file colpartition.h.
{
return bounding_box_.x_overlap(other.bounding_box_);
}
| bool tesseract::ColPartition::inside_table_column | ( | ) | [inline] |
Definition at line 243 of file colpartition.h.
{
return inside_table_column_;
}
| bool tesseract::ColPartition::IsEmpty | ( | ) | const [inline] |
Definition at line 357 of file colpartition.h.
{
return boxes_.empty();
}
| bool tesseract::ColPartition::IsHorizontalLine | ( | ) | const [inline] |
Definition at line 449 of file colpartition.h.
{
return IsHorizontalType() && IsLineType();
}
| bool tesseract::ColPartition::IsHorizontalType | ( | ) | const [inline] |
Definition at line 435 of file colpartition.h.
| bool tesseract::ColPartition::IsImageType | ( | ) | const [inline] |
Definition at line 423 of file colpartition.h.
{
return PTIsImageType(type_);
}
| bool tesseract::ColPartition::IsInSameColumnAs | ( | const ColPartition & | part | ) | const |
Definition at line 2128 of file colpartition.cpp.
{
// Overlap does not occur when last < part.first or first > part.last.
// In other words, one is completely to the side of the other.
// This is just DeMorgan's law applied to that so the function returns true.
return (last_column_ >= part.first_column_) &&
(first_column_ <= part.last_column_);
}
| bool tesseract::ColPartition::IsLeftOf | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 349 of file colpartition.h.
{
return bounding_box_.right() < other.bounding_box_.right();
}
| bool tesseract::ColPartition::IsLegal | ( | ) |
Definition at line 321 of file colpartition.cpp.
{
if (bounding_box_.left() > bounding_box_.right()) {
if (textord_debug_bugs) {
tprintf("Bounding box invalid\n");
Print();
}
return false; // Bounding box invalid.
}
if (left_margin_ > bounding_box_.left() ||
right_margin_ < bounding_box_.right()) {
if (textord_debug_bugs) {
tprintf("Margins invalid\n");
Print();
}
return false; // Margins invalid.
}
if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
if (textord_debug_bugs) {
tprintf("Key inside box: %d v %d or %d v %d\n",
left_key_, BoxLeftKey(), right_key_, BoxRightKey());
Print();
}
return false; // Keys inside the box.
}
return true;
}
| bool tesseract::ColPartition::IsLineType | ( | ) | const [inline] |
Definition at line 419 of file colpartition.h.
{
return PTIsLineType(type_);
}
| bool tesseract::ColPartition::IsSingleton | ( | ) | const [inline] |
Definition at line 361 of file colpartition.h.
{
return boxes_.singleton();
}
| bool tesseract::ColPartition::IsTextType | ( | ) | const [inline] |
Definition at line 427 of file colpartition.h.
{
return PTIsTextType(type_);
}
| bool tesseract::ColPartition::IsUnMergeableType | ( | ) | const [inline] |
Definition at line 439 of file colpartition.h.
{
return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
}
| bool tesseract::ColPartition::IsVerticalLine | ( | ) | const [inline] |
Definition at line 444 of file colpartition.h.
{
return IsVerticalType() && IsLineType();
}
| bool tesseract::ColPartition::IsVerticalType | ( | ) | const [inline] |
Definition at line 431 of file colpartition.h.
{
return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
}
| int tesseract::ColPartition::KeyWidth | ( | int | left_key, |
| int | right_key | ||
| ) | const [inline] |
Definition at line 324 of file colpartition.h.
| int tesseract::ColPartition::left_key | ( | ) | const [inline] |
Definition at line 172 of file colpartition.h.
{
return left_key_;
}
| bool tesseract::ColPartition::left_key_tab | ( | ) | const [inline] |
Definition at line 169 of file colpartition.h.
{
return left_key_tab_;
}
| int tesseract::ColPartition::left_margin | ( | ) | const [inline] |
Definition at line 112 of file colpartition.h.
{
return left_margin_;
}
| int tesseract::ColPartition::LeftAtY | ( | int | y | ) | const [inline] |
Definition at line 340 of file colpartition.h.
{
return XAtY(left_key_, y);
}
| int tesseract::ColPartition::LeftBlobRule | ( | ) | const |
Definition at line 524 of file colpartition.cpp.
{
BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
return it.data()->left_rule();
}
| void tesseract::ColPartition::LineSpacingBlocks | ( | const ICOORD & | bleft, |
| const ICOORD & | tright, | ||
| int | resolution, | ||
| ColPartition_LIST * | block_parts, | ||
| ColPartition_LIST * | used_parts, | ||
| BLOCK_LIST * | completed_blocks, | ||
| TO_BLOCK_LIST * | to_blocks | ||
| ) | [static] |
Definition at line 1373 of file colpartition.cpp.
{
int page_height = tright.y() - bleft.y();
// Compute the initial spacing stats.
ColPartition_IT it(block_parts);
int part_count = 0;
int max_line_height = 0;
// TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
// because their line spacing with their neighbors maybe smaller and their
// height may be slightly larger.
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition* part = it.data();
ASSERT_HOST(!part->boxes()->empty());
STATS side_steps(0, part->bounding_box().height());
if (part->bounding_box().height() > max_line_height)
max_line_height = part->bounding_box().height();
BLOBNBOX_C_IT blob_it(part->boxes());
int prev_bottom = blob_it.data()->bounding_box().bottom();
for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
int bottom = blob->bounding_box().bottom();
int step = bottom - prev_bottom;
if (step < 0)
step = -step;
side_steps.add(step, 1);
prev_bottom = bottom;
}
part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
if (!it.at_last()) {
ColPartition* next_part = it.data_relative(1);
part->set_bottom_spacing(part->median_bottom() -
next_part->median_bottom());
part->set_top_spacing(part->median_top() - next_part->median_top());
} else {
part->set_bottom_spacing(page_height);
part->set_top_spacing(page_height);
}
if (textord_debug_tabfind) {
part->Print();
tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
side_steps.median(), part->top_spacing(), part->bottom_spacing());
}
++part_count;
}
if (part_count == 0)
return;
SmoothSpacings(resolution, page_height, block_parts);
// Move the partitions into individual block lists and make the blocks.
BLOCK_IT block_it(completed_blocks);
TO_BLOCK_IT to_block_it(to_blocks);
ColPartition_LIST spacing_parts;
ColPartition_IT sp_block_it(&spacing_parts);
int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
for (it.mark_cycle_pt(); !it.empty();) {
ColPartition* part = it.extract();
sp_block_it.add_to_end(part);
it.forward();
if (it.empty() || part->bottom_spacing() > same_block_threshold ||
!part->SpacingsEqual(*it.data(), resolution)) {
// There is a spacing boundary. Check to see if it.data() belongs
// better in the current block or the next one.
if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
ColPartition* next_part = it.data();
// If there is a size match one-way, then the middle line goes with
// its matched size, otherwise it goes with the smallest spacing.
ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1);
if (textord_debug_tabfind) {
tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
" sizes %d %d %d\n",
part->top_spacing(), part->bottom_spacing(),
next_part->top_spacing(), next_part->bottom_spacing(),
part->median_size(), next_part->median_size(),
third_part != NULL ? third_part->median_size() : 0);
}
// We can only consider adding the next line to the block if the sizes
// match and the lines are close enough for their size.
if (part->SizesSimilar(*next_part) &&
next_part->median_size() * kMaxSameBlockLineSpacing >
part->bottom_spacing() &&
part->median_size() * kMaxSameBlockLineSpacing >
part->top_spacing()) {
// Even now, we can only add it as long as the third line doesn't
// match in the same way and have a smaller bottom spacing.
if (third_part == NULL ||
!next_part->SizesSimilar(*third_part) ||
third_part->median_size() * kMaxSameBlockLineSpacing <=
next_part->bottom_spacing() ||
next_part->median_size() * kMaxSameBlockLineSpacing <=
next_part->top_spacing() ||
next_part->bottom_spacing() > part->bottom_spacing()) {
// Add to the current block.
sp_block_it.add_to_end(it.extract());
it.forward();
if (textord_debug_tabfind) {
tprintf("Added line to current block.\n");
}
}
}
}
TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
if (to_block != NULL) {
to_block_it.add_to_end(to_block);
block_it.add_to_end(to_block->block);
}
sp_block_it.set_to_list(&spacing_parts);
} else {
if (textord_debug_tabfind && !it.empty()) {
ColPartition* next_part = it.data();
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d\n",
part->top_spacing(), part->bottom_spacing(),
next_part->top_spacing(), next_part->bottom_spacing(),
part->median_size(), next_part->median_size());
}
}
}
}
| ColPartition_CLIST* tesseract::ColPartition::lower_partners | ( | ) | [inline] |
Definition at line 199 of file colpartition.h.
{
return &lower_partners_;
}
| ColPartition * tesseract::ColPartition::MakeBigPartition | ( | BLOBNBOX * | box, |
| ColPartition_LIST * | big_part_list | ||
| ) | [static] |
Definition at line 129 of file colpartition.cpp.
{
box->set_owner(NULL);
ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
single->set_flow(BTFT_NONE);
single->AddBox(box);
single->ComputeLimits();
single->ClaimBoxes();
single->SetBlobTypes();
single->set_block_owned(true);
if (big_part_list != NULL) {
ColPartition_IT part_it(big_part_list);
part_it.add_to_end(single);
}
return single;
}
| TO_BLOCK * tesseract::ColPartition::MakeBlock | ( | const ICOORD & | bleft, |
| const ICOORD & | tright, | ||
| ColPartition_LIST * | block_parts, | ||
| ColPartition_LIST * | used_parts | ||
| ) | [static] |
Definition at line 1605 of file colpartition.cpp.
{
if (block_parts->empty())
return NULL; // Nothing to do.
ColPartition_IT it(block_parts);
ColPartition* part = it.data();
PolyBlockType type = part->type();
if (type == PT_VERTICAL_TEXT)
return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
// LineSpacingBlocks has handed us a collection of evenly spaced lines and
// put the average spacing in each partition, so we can just take the
// linespacing from the first partition.
int line_spacing = part->bottom_spacing();
if (line_spacing < part->median_size())
line_spacing = part->bounding_box().height();
ICOORDELT_LIST vertices;
ICOORDELT_IT vert_it(&vertices);
ICOORD start, end;
int min_x = MAX_INT32;
int max_x = -MAX_INT32;
int min_y = MAX_INT32;
int max_y = -MAX_INT32;
int iteration = 0;
do {
if (iteration == 0)
ColPartition::LeftEdgeRun(&it, &start, &end);
else
ColPartition::RightEdgeRun(&it, &start, &end);
ClipCoord(bleft, tright, &start);
ClipCoord(bleft, tright, &end);
vert_it.add_after_then_move(new ICOORDELT(start));
vert_it.add_after_then_move(new ICOORDELT(end));
UpdateRange(start.x(), &min_x, &max_x);
UpdateRange(end.x(), &min_x, &max_x);
UpdateRange(start.y(), &min_y, &max_y);
UpdateRange(end.y(), &min_y, &max_y);
if ((iteration == 0 && it.at_first()) ||
(iteration == 1 && it.at_last())) {
++iteration;
it.move_to_last();
}
} while (iteration < 2);
if (textord_debug_tabfind)
tprintf("Making block at (%d,%d)->(%d,%d)\n",
min_x, min_y, max_x, max_y);
BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
block->set_poly_block(new POLY_BLOCK(&vertices, type));
return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
}
| ColPartition * tesseract::ColPartition::MakeLinePartition | ( | BlobRegionType | blob_type, |
| const ICOORD & | vertical, | ||
| int | left, | ||
| int | bottom, | ||
| int | right, | ||
| int | top | ||
| ) | [static] |
Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.
Definition at line 161 of file colpartition.cpp.
{
ColPartition* part = new ColPartition(blob_type, vertical);
part->bounding_box_ = TBOX(left, bottom, right, top);
part->median_bottom_ = bottom;
part->median_top_ = top;
part->median_size_ = top - bottom;
part->median_width_ = right - left;
part->left_key_ = part->BoxLeftKey();
part->right_key_ = part->BoxRightKey();
return part;
}
| TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock | ( | const ICOORD & | bleft, |
| const ICOORD & | tright, | ||
| ColPartition_LIST * | block_parts, | ||
| ColPartition_LIST * | used_parts | ||
| ) | [static] |
Definition at line 1658 of file colpartition.cpp.
{
if (block_parts->empty())
return NULL; // Nothing to do.
ColPartition_IT it(block_parts);
ColPartition* part = it.data();
TBOX block_box = part->bounding_box();
int line_spacing = block_box.width();
PolyBlockType type = it.data()->type();
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
block_box += it.data()->bounding_box();
}
if (textord_debug_tabfind) {
tprintf("Making block at:");
block_box.print();
}
BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
block_box.right(), block_box.top());
block->set_poly_block(new POLY_BLOCK(block_box, type));
return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
}
| bool tesseract::ColPartition::MarkAsLeaderIfMonospaced | ( | ) |
Definition at line 1054 of file colpartition.cpp.
{
bool result = false;
// Gather statistics on the gaps between blobs and the widths of the blobs.
int part_width = bounding_box_.width();
STATS gap_stats(0, part_width);
STATS width_stats(0, part_width);
BLOBNBOX_C_IT it(&boxes_);
BLOBNBOX* prev_blob = it.data();
prev_blob->set_flow(BTFT_NEIGHBOURS);
width_stats.add(prev_blob->bounding_box().width(), 1);
int blob_count = 1;
for (it.forward(); !it.at_first(); it.forward()) {
BLOBNBOX* blob = it.data();
int left = blob->bounding_box().left();
int right = blob->bounding_box().right();
gap_stats.add(left - prev_blob->bounding_box().right(), 1);
width_stats.add(right - left, 1);
blob->set_flow(BTFT_NEIGHBOURS);
prev_blob = blob;
++blob_count;
}
double median_gap = gap_stats.median();
double median_width = width_stats.median();
double max_width = MAX(median_gap, median_width);
double min_width = MIN(median_gap, median_width);
double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
if (textord_debug_tabfind >= 4) {
tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
min_width * kMaxLeaderGapFractionOfMin);
}
if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
blob_count >= kMinLeaderCount) {
// This is stable enough to be called a leader, so check the widths.
// Since leader dashes can join, run a dp cutting algorithm and go
// on the cost.
int offset = static_cast<int>(ceil(gap_iqr * 2));
int min_step = static_cast<int>(median_gap + median_width + 0.5);
int max_step = min_step + offset;
min_step -= offset;
// Pad the buffer with min_step/2 on each end.
int part_left = bounding_box_.left() - min_step / 2;
part_width += min_step;
DPPoint* projection = new DPPoint[part_width];
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
int left = blob->bounding_box().left();
int right = blob->bounding_box().right();
int height = blob->bounding_box().height();
for (int x = left; x < right; ++x) {
projection[left - part_left].AddLocalCost(height);
}
}
DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
&DPPoint::CostWithVariance,
part_width, projection);
if (best_end != NULL && best_end->total_cost() < blob_count) {
// Good enough. Call it a leader.
result = true;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
TBOX box = blob->bounding_box();
// If the first or last blob is spaced too much, don't mark it.
if (it.at_first()) {
int gap = it.data_relative(1)->bounding_box().left() -
blob->bounding_box().right();
if (blob->bounding_box().width() + gap > max_step) {
it.extract();
continue;
}
}
if (it.at_last()) {
int gap = blob->bounding_box().left() -
it.data_relative(-1)->bounding_box().right();
if (blob->bounding_box().width() + gap > max_step) {
it.extract();
break;
}
}
blob->set_region_type(BRT_TEXT);
blob->set_flow(BTFT_LEADER);
}
blob_type_ = BRT_TEXT;
flow_ = BTFT_LEADER;
} else if (textord_debug_tabfind) {
if (best_end == NULL) {
tprintf("No path\n");
} else {
tprintf("Total cost = %d vs allowed %d\n",
best_end->total_cost() < blob_count);
}
}
delete [] projection;
}
return result;
}
| bool tesseract::ColPartition::MatchingColumns | ( | const ColPartition & | other | ) | const |
Definition at line 349 of file colpartition.cpp.
{
int y = (MidY() + other.MidY()) / 2;
if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
LeftAtY(y) / kColumnWidthFactor, 1))
return false;
if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
RightAtY(y) / kColumnWidthFactor, 1))
return false;
return true;
}
| bool tesseract::ColPartition::MatchingSizes | ( | const ColPartition & | other | ) | const |
Definition at line 384 of file colpartition.cpp.
{
if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
return !TabFind::DifferentSizes(median_width_, other.median_width_);
else
return !TabFind::DifferentSizes(median_size_, other.median_size_);
}
| bool tesseract::ColPartition::MatchingStrokeWidth | ( | const ColPartition & | other, |
| double | fractional_tolerance, | ||
| double | constant_tolerance | ||
| ) | const |
Definition at line 409 of file colpartition.cpp.
{
int match_count = 0;
int nonmatch_count = 0;
BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
box_it.mark_cycle_pt();
other_it.mark_cycle_pt();
while (!box_it.cycled_list() && !other_it.cycled_list()) {
if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
fractional_tolerance,
constant_tolerance))
++match_count;
else
++nonmatch_count;
box_it.forward();
other_it.forward();
}
return match_count > nonmatch_count;
}
| bool tesseract::ColPartition::MatchingTextColor | ( | const ColPartition & | other | ) | const |
Definition at line 361 of file colpartition.cpp.
{
if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
return false; // Too noisy.
// Colors must match for other to count.
double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
other.color2_,
color1_);
double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
other.color2_,
color2_);
double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
other.color1_);
double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
other.color2_);
// All 4 distances must be small enough.
return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
}
| int tesseract::ColPartition::median_bottom | ( | ) | const [inline] |
Definition at line 127 of file colpartition.h.
{
return median_bottom_;
}
| int tesseract::ColPartition::median_left | ( | ) | const [inline] |
Definition at line 130 of file colpartition.h.
{
return median_left_;
}
| int tesseract::ColPartition::median_right | ( | ) | const [inline] |
Definition at line 133 of file colpartition.h.
{
return median_right_;
}
| int tesseract::ColPartition::median_size | ( | ) | const [inline] |
Definition at line 136 of file colpartition.h.
{
return median_size_;
}
| int tesseract::ColPartition::median_top | ( | ) | const [inline] |
Definition at line 124 of file colpartition.h.
{
return median_top_;
}
| int tesseract::ColPartition::median_width | ( | ) | const [inline] |
Definition at line 142 of file colpartition.h.
{
return median_width_;
}
| int tesseract::ColPartition::MedianY | ( | ) | const [inline] |
Definition at line 308 of file colpartition.h.
{
return (median_top_ + median_bottom_) / 2;
}
| int tesseract::ColPartition::MidX | ( | ) | const [inline] |
Definition at line 312 of file colpartition.h.
| int tesseract::ColPartition::MidY | ( | ) | const [inline] |
Definition at line 304 of file colpartition.h.
| ColPartition* tesseract::ColPartition::nearest_neighbor_above | ( | ) | const [inline] |
Definition at line 249 of file colpartition.h.
{
return nearest_neighbor_above_;
}
| ColPartition* tesseract::ColPartition::nearest_neighbor_below | ( | ) | const [inline] |
Definition at line 255 of file colpartition.h.
{
return nearest_neighbor_below_;
}
| bool tesseract::ColPartition::OKDiacriticMerge | ( | const ColPartition & | candidate, |
| bool | debug | ||
| ) | const |
Definition at line 437 of file colpartition.cpp.
{
BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
int min_top = MAX_INT32;
int max_bottom = -MAX_INT32;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
if (!blob->IsDiacritic()) {
if (debug) {
tprintf("Blob is not a diacritic:");
blob->bounding_box().print();
}
return false; // All blobs must have diacritic bases.
}
if (blob->base_char_top() < min_top)
min_top = blob->base_char_top();
if (blob->base_char_bottom() > max_bottom)
max_bottom = blob->base_char_bottom();
}
// If the intersection of all vertical ranges of all base characters
// overlaps the median range of this, then it is OK.
bool result = min_top > candidate.median_bottom_ &&
max_bottom < candidate.median_top_;
if (debug) {
if (result)
tprintf("OKDiacritic!\n");
else
tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
max_bottom, min_top, median_bottom_, median_top_);
}
return result;
}
| bool tesseract::ColPartition::OKMergeOverlap | ( | const ColPartition & | merge1, |
| const ColPartition & | merge2, | ||
| int | ok_box_overlap, | ||
| bool | debug | ||
| ) |
Definition at line 713 of file colpartition.cpp.
{
// Vertical partitions are not allowed to be involved.
if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
if (debug)
tprintf("Vertical partition\n");
return false;
}
// The merging partitions must strongly overlap each other.
if (!merge1.VSignificantCoreOverlap(merge2)) {
if (debug)
tprintf("Voverlap %d (%d)\n",
merge1.VCoreOverlap(merge2),
merge1.VSignificantCoreOverlap(merge2));
return false;
}
// The merged box must not overlap the median bounds of this.
TBOX merged_box(merge1.bounding_box());
merged_box += merge2.bounding_box();
if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
if (debug)
tprintf("Excessive box overlap\n");
return false;
}
// Looks OK!
return true;
}
Definition at line 746 of file colpartition.cpp.
{
if (boxes_.empty() || boxes_.singleton())
return NULL;
BLOBNBOX_C_IT it(&boxes_);
TBOX left_box(it.data()->bounding_box());
for (it.forward(); !it.at_first(); it.forward()) {
BLOBNBOX* bbox = it.data();
left_box += bbox->bounding_box();
if (left_box.overlap(box))
return bbox;
}
return NULL;
}
| bool tesseract::ColPartition::owns_blobs | ( | ) | const [inline] |
Definition at line 291 of file colpartition.h.
{
return owns_blobs_;
}
| PolyBlockType tesseract::ColPartition::PartitionType | ( | ColumnSpanningType | flow | ) | const |
Definition at line 978 of file colpartition.cpp.
{
if (flow == CST_NOISE) {
if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
return PT_NOISE;
flow = CST_FLOWING;
}
switch (blob_type_) {
case BRT_NOISE:
return PT_NOISE;
case BRT_HLINE:
return PT_HORZ_LINE;
case BRT_VLINE:
return PT_VERT_LINE;
case BRT_RECTIMAGE:
case BRT_POLYIMAGE:
switch (flow) {
case CST_FLOWING:
return PT_FLOWING_IMAGE;
case CST_HEADING:
return PT_HEADING_IMAGE;
case CST_PULLOUT:
return PT_PULLOUT_IMAGE;
default:
ASSERT_HOST(!"Undefined flow type for image!");
}
break;
case BRT_VERT_TEXT:
return PT_VERTICAL_TEXT;
case BRT_TEXT:
case BRT_UNKNOWN:
default:
switch (flow) {
case CST_FLOWING:
return PT_FLOWING_TEXT;
case CST_HEADING:
return PT_HEADING_TEXT;
case CST_PULLOUT:
return PT_PULLOUT_TEXT;
default:
ASSERT_HOST(!"Undefined flow type for text!");
}
}
ASSERT_HOST(!"Should never get here!");
return PT_NOISE;
}
| void tesseract::ColPartition::Print | ( | ) | const |
Definition at line 1735 of file colpartition.cpp.
{
int y = MidY();
tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
" w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
" ts=%d bs=%d ls=%d rs=%d\n",
boxes_.empty() ? 'E' : ' ',
left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
bounding_box_.left(), median_left_,
bounding_box_.bottom(), median_bottom_,
bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
right_margin_, median_right_, bounding_box_.top(), median_top_,
good_width_, good_column_, type_,
kBlobTypes[blob_type_], flow_,
first_column_, last_column_, boxes_.length(),
space_above_, space_below_, space_to_left_, space_to_right_);
}
| void tesseract::ColPartition::PrintColors | ( | ) |
Definition at line 1753 of file colpartition.cpp.
{
tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
color1_[L_ALPHA_CHANNEL],
color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
}
| void tesseract::ColPartition::RefinePartners | ( | PolyBlockType | type, |
| bool | get_desparate, | ||
| ColPartitionGrid * | grid | ||
| ) |
Definition at line 1830 of file colpartition.cpp.
{
if (TypesSimilar(type_, type)) {
RefinePartnersInternal(true, get_desperate, grid);
RefinePartnersInternal(false, get_desperate, grid);
} else if (type == PT_COUNT) {
// This is the final pass. Make sure only the correctly typed
// partners surivive, however many there are.
RefinePartnersByType(true, &upper_partners_);
RefinePartnersByType(false, &lower_partners_);
// It is possible for a merge to have given a partition multiple
// partners again, so the last resort is to use overlap which is
// guaranteed to leave at most one partner left.
if (!upper_partners_.empty() && !upper_partners_.singleton())
RefinePartnersByOverlap(true, &upper_partners_);
if (!lower_partners_.empty() && !lower_partners_.singleton())
RefinePartnersByOverlap(false, &lower_partners_);
}
}
| void tesseract::ColPartition::ReflectInYAxis | ( | ) |
Definition at line 299 of file colpartition.cpp.
{
ColPartition_CLIST reversed_boxes;
ColPartition_C_IT reversed_it(&reversed_boxes);
// Reverse the order of the boxes_.
BLOBNBOX_C_IT bb_it(&boxes_);
for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
reversed_it.add_before_then_move(bb_it.extract());
}
bb_it.add_list_after(&reversed_boxes);
ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
int tmp = left_margin_;
left_margin_ = -right_margin_;
right_margin_ = -tmp;
ComputeLimits();
}
| void tesseract::ColPartition::RemoveBox | ( | BLOBNBOX * | box | ) |
Definition at line 213 of file colpartition.cpp.
{
BLOBNBOX_C_IT bb_it(&boxes_);
for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
if (box == bb_it.data()) {
bb_it.extract();
ComputeLimits();
return;
}
}
}
| void tesseract::ColPartition::RemovePartner | ( | bool | upper, |
| ColPartition * | partner | ||
| ) |
Definition at line 597 of file colpartition.cpp.
{
ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
if (it.data() == partner) {
it.extract();
break;
}
}
}
| int tesseract::ColPartition::right_key | ( | ) | const [inline] |
Definition at line 178 of file colpartition.h.
{
return right_key_;
}
| bool tesseract::ColPartition::right_key_tab | ( | ) | const [inline] |
Definition at line 175 of file colpartition.h.
{
return right_key_tab_;
}
| int tesseract::ColPartition::right_margin | ( | ) | const [inline] |
Definition at line 118 of file colpartition.h.
{
return right_margin_;
}
| int tesseract::ColPartition::RightAtY | ( | int | y | ) | const [inline] |
Definition at line 344 of file colpartition.h.
{
return XAtY(right_key_, y);
}
| int tesseract::ColPartition::RightBlobRule | ( | ) | const |
Definition at line 529 of file colpartition.cpp.
{
BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
it.move_to_last();
return it.data()->right_rule();
}
| void tesseract::ColPartition::set_blob_type | ( | BlobRegionType | t | ) | [inline] |
Definition at line 151 of file colpartition.h.
{
blob_type_ = t;
}
| void tesseract::ColPartition::set_block_owned | ( | bool | owned | ) | [inline] |
Definition at line 208 of file colpartition.h.
{
block_owned_ = owned;
}
| void tesseract::ColPartition::set_bottom_spacing | ( | int | spacing | ) | [inline] |
Definition at line 223 of file colpartition.h.
{
bottom_spacing_ = spacing;
}
| void tesseract::ColPartition::set_first_column | ( | int | column | ) | [inline] |
Definition at line 688 of file colpartition.h.
{
first_column_ = column;
}
| void tesseract::ColPartition::set_flow | ( | BlobTextFlowType | f | ) | [inline] |
Definition at line 157 of file colpartition.h.
{
flow_ = f;
}
| void tesseract::ColPartition::set_inside_table_column | ( | bool | val | ) | [inline] |
Definition at line 246 of file colpartition.h.
{
inside_table_column_ = val;
}
| void tesseract::ColPartition::set_last_column | ( | int | column | ) | [inline] |
Definition at line 691 of file colpartition.h.
{
last_column_ = column;
}
| void tesseract::ColPartition::set_left_margin | ( | int | margin | ) | [inline] |
Definition at line 115 of file colpartition.h.
{
left_margin_ = margin;
}
| void tesseract::ColPartition::set_median_size | ( | int | size | ) | [inline] |
Definition at line 139 of file colpartition.h.
{
median_size_ = size;
}
| void tesseract::ColPartition::set_median_width | ( | int | width | ) | [inline] |
Definition at line 145 of file colpartition.h.
{
median_width_ = width;
}
| void tesseract::ColPartition::set_nearest_neighbor_above | ( | ColPartition * | part | ) | [inline] |
Definition at line 252 of file colpartition.h.
{
nearest_neighbor_above_ = part;
}
| void tesseract::ColPartition::set_nearest_neighbor_below | ( | ColPartition * | part | ) | [inline] |
Definition at line 258 of file colpartition.h.
{
nearest_neighbor_below_ = part;
}
| void tesseract::ColPartition::set_owns_blobs | ( | bool | owns_blobs | ) | [inline] |
Definition at line 294 of file colpartition.h.
{
// Do NOT change ownership flag when there are blobs in the list.
// Immediately set the ownership flag when creating copies.
ASSERT_HOST(boxes_.empty());
owns_blobs_ = owns_blobs;
}
| void tesseract::ColPartition::set_right_margin | ( | int | margin | ) | [inline] |
Definition at line 121 of file colpartition.h.
{
right_margin_ = margin;
}
| void tesseract::ColPartition::set_side_step | ( | int | step | ) | [inline] |
Definition at line 217 of file colpartition.h.
{
side_step_ = step;
}
| void tesseract::ColPartition::set_space_above | ( | int | space | ) | [inline] |
Definition at line 264 of file colpartition.h.
{
space_above_ = space;
}
| void tesseract::ColPartition::set_space_below | ( | int | space | ) | [inline] |
Definition at line 270 of file colpartition.h.
{
space_below_ = space;
}
| void tesseract::ColPartition::set_space_to_left | ( | int | space | ) | [inline] |
Definition at line 276 of file colpartition.h.
{
space_to_left_ = space;
}
| void tesseract::ColPartition::set_space_to_right | ( | int | space | ) | [inline] |
Definition at line 282 of file colpartition.h.
{
space_to_right_ = space;
}
| void tesseract::ColPartition::set_table_type | ( | ) | [inline] |
Definition at line 233 of file colpartition.h.
| void tesseract::ColPartition::set_top_spacing | ( | int | spacing | ) | [inline] |
Definition at line 229 of file colpartition.h.
{
top_spacing_ = spacing;
}
| void tesseract::ColPartition::set_type | ( | PolyBlockType | t | ) | [inline] |
Definition at line 184 of file colpartition.h.
{
type_ = t;
}
| void tesseract::ColPartition::set_vertical | ( | const ICOORD & | v | ) | [inline] |
Definition at line 193 of file colpartition.h.
{
vertical_ = v;
}
| void tesseract::ColPartition::set_working_set | ( | WorkingPartSet * | working_set | ) | [inline] |
Definition at line 202 of file colpartition.h.
{
working_set_ = working_set;
}
| void tesseract::ColPartition::SetBlobTypes | ( | ) |
Definition at line 1233 of file colpartition.cpp.
{
if (!owns_blobs())
return;
BLOBNBOX_C_IT it(&boxes_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
if (blob->flow() != BTFT_LEADER)
blob->set_flow(flow_);
blob->set_region_type(blob_type_);
ASSERT_HOST(blob->owner() == NULL || blob->owner() == this);
}
}
| void tesseract::ColPartition::SetColumnGoodness | ( | WidthCallback * | cb | ) |
| void tesseract::ColPartition::SetLeftTab | ( | const TabVector * | tab_vector | ) |
Definition at line 473 of file colpartition.cpp.
{
if (tab_vector != NULL) {
left_key_ = tab_vector->sort_key();
left_key_tab_ = left_key_ <= BoxLeftKey();
} else {
left_key_tab_ = false;
}
if (!left_key_tab_)
left_key_ = BoxLeftKey();
}
| void tesseract::ColPartition::SetPartitionType | ( | int | resolution, |
| ColPartitionSet * | columns | ||
| ) |
Definition at line 946 of file colpartition.cpp.
{
int first_spanned_col = -1;
ColumnSpanningType span_type =
columns->SpanningType(resolution,
bounding_box_.left(), bounding_box_.right(),
MidY(), left_margin_, right_margin_,
&first_column_, &last_column_,
&first_spanned_col);
column_set_ = columns;
if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
!IsLineType()) {
// Unequal columns may indicate that the pullout spans one of the columns
// it lies in, so force it to be allocated to just that column.
if (first_spanned_col >= 0) {
first_column_ = first_spanned_col;
last_column_ = first_spanned_col;
} else {
if ((first_column_ & 1) == 0)
last_column_ = first_column_;
else if ((last_column_ & 1) == 0)
first_column_ = last_column_;
else
first_column_ = last_column_ = (first_column_ + last_column_) / 2;
}
}
type_ = PartitionType(span_type);
}
| void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue | ( | int | value | ) |
Definition at line 1159 of file colpartition.cpp.
{
int blob_count = 0; // Total # blobs.
int good_blob_score_ = 0; // Total # good strokewidth neighbours.
int noisy_count = 0; // Total # neighbours marked as noise.
int hline_count = 0;
int vline_count = 0;
BLOBNBOX_C_IT it(&boxes_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
++blob_count;
noisy_count += blob->NoisyNeighbours();
good_blob_score_ += blob->GoodTextBlob();
if (blob->region_type() == BRT_HLINE) ++hline_count;
if (blob->region_type() == BRT_VLINE) ++vline_count;
}
flow_ = BTFT_NEIGHBOURS;
blob_type_ = BRT_UNKNOWN;
if (hline_count > vline_count) {
flow_ = BTFT_NONE;
blob_type_ = BRT_HLINE;
} else if (vline_count > hline_count) {
flow_ = BTFT_NONE;
blob_type_ = BRT_VLINE;
} else if (value < -1 || 1 < value) {
int long_side;
int short_side;
if (value > 0) {
long_side = bounding_box_.width();
short_side = bounding_box_.height();
blob_type_ = BRT_TEXT;
} else {
long_side = bounding_box_.height();
short_side = bounding_box_.width();
blob_type_ = BRT_VERT_TEXT;
}
// We will combine the old metrics using aspect ratio and blob counts
// with the input value by allowing a strong indication to flip the
// STRONG_CHAIN/CHAIN flow values.
int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
if (short_side > kHorzStrongTextlineHeight) ++strong_score;
if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
if (abs(value) >= kMinStrongTextValue)
flow_ = BTFT_STRONG_CHAIN;
else if (abs(value) >= kMinChainTextValue)
flow_ = BTFT_CHAIN;
else
flow_ = BTFT_NEIGHBOURS;
// Upgrade chain to strong chain if the other indicators are good
if (flow_ == BTFT_CHAIN && strong_score == 3)
flow_ = BTFT_STRONG_CHAIN;
// Downgrade strong vertical text to chain if the indicators are bad.
if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
flow_ = BTFT_CHAIN;
}
if (flow_ == BTFT_NEIGHBOURS) {
// Check for noisy neighbours.
if (noisy_count >= blob_count) {
flow_ = BTFT_NONTEXT;
blob_type_= BRT_NOISE;
}
}
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom())) {
tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
blob_count, noisy_count, good_blob_score_);
tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
value, flow_, blob_type_);
Print();
}
SetBlobTypes();
}
| void tesseract::ColPartition::SetRightTab | ( | const TabVector * | tab_vector | ) |
Definition at line 485 of file colpartition.cpp.
{
if (tab_vector != NULL) {
right_key_ = tab_vector->sort_key();
right_key_tab_ = right_key_ >= BoxRightKey();
} else {
right_key_tab_ = false;
}
if (!right_key_tab_)
right_key_ = BoxRightKey();
}
| void tesseract::ColPartition::SetSpecialBlobsDensity | ( | const BlobSpecialTextType | type, |
| const float | density | ||
| ) |
Definition at line 555 of file colpartition.cpp.
{
ASSERT_HOST(type < BSTT_COUNT);
special_blobs_densities_[type] = density;
}
| ColPartition * tesseract::ColPartition::ShallowCopy | ( | ) | const |
Definition at line 1684 of file colpartition.cpp.
{
ColPartition* part = new ColPartition(blob_type_, vertical_);
part->left_margin_ = left_margin_;
part->right_margin_ = right_margin_;
part->bounding_box_ = bounding_box_;
memcpy(part->special_blobs_densities_, special_blobs_densities_,
sizeof(special_blobs_densities_));
part->median_bottom_ = median_bottom_;
part->median_top_ = median_top_;
part->median_size_ = median_size_;
part->median_left_ = median_left_;
part->median_right_ = median_right_;
part->median_width_ = median_width_;
part->good_width_ = good_width_;
part->good_column_ = good_column_;
part->left_key_tab_ = left_key_tab_;
part->right_key_tab_ = right_key_tab_;
part->type_ = type_;
part->flow_ = flow_;
part->left_key_ = left_key_;
part->right_key_ = right_key_;
part->first_column_ = first_column_;
part->last_column_ = last_column_;
part->owns_blobs_ = false;
return part;
}
| ColPartition * tesseract::ColPartition::SingletonPartner | ( | bool | upper | ) |
Definition at line 608 of file colpartition.cpp.
{
ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
if (!partners->singleton())
return NULL;
ColPartition_C_IT it(partners);
return it.data();
}
| void tesseract::ColPartition::SmoothPartnerRun | ( | int | working_set_count | ) |
Definition at line 1761 of file colpartition.cpp.
{
STATS left_stats(0, working_set_count);
STATS right_stats(0, working_set_count);
PolyBlockType max_type = type_;
ColPartition* partner;
for (partner = SingletonPartner(false); partner != NULL;
partner = partner->SingletonPartner(false)) {
if (partner->type_ > max_type)
max_type = partner->type_;
if (column_set_ == partner->column_set_) {
left_stats.add(partner->first_column_, 1);
right_stats.add(partner->last_column_, 1);
}
}
type_ = max_type;
// TODO(rays) Either establish that it isn't necessary to set the columns,
// or find a way to do it that does not cause an assert failure in
// AddToWorkingSet.
#if 0
first_column_ = left_stats.mode();
last_column_ = right_stats.mode();
if (last_column_ < first_column_)
last_column_ = first_column_;
#endif
for (partner = SingletonPartner(false); partner != NULL;
partner = partner->SingletonPartner(false)) {
partner->type_ = max_type;
#if 0 // See TODO above
if (column_set_ == partner->column_set_) {
partner->first_column_ = first_column_;
partner->last_column_ = last_column_;
}
#endif
}
}
| int tesseract::ColPartition::SortKey | ( | int | x, |
| int | y | ||
| ) | const [inline] |
Definition at line 316 of file colpartition.h.
{
return TabVector::SortKey(vertical_, x, y);
}
| int tesseract::ColPartition::space_above | ( | ) | const [inline] |
Definition at line 261 of file colpartition.h.
{
return space_above_;
}
| int tesseract::ColPartition::space_below | ( | ) | const [inline] |
Definition at line 267 of file colpartition.h.
{
return space_below_;
}
| int tesseract::ColPartition::space_to_left | ( | ) | const [inline] |
Definition at line 273 of file colpartition.h.
{
return space_to_left_;
}
| int tesseract::ColPartition::space_to_right | ( | ) | const [inline] |
Definition at line 279 of file colpartition.h.
{
return space_to_right_;
}
| int tesseract::ColPartition::SpecialBlobsCount | ( | const BlobSpecialTextType | type | ) |
Definition at line 540 of file colpartition.cpp.
{
ASSERT_HOST(type < BSTT_COUNT);
BLOBNBOX_C_IT blob_it(&boxes_);
int count = 0;
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
BlobSpecialTextType blob_type = blob->special_text_type();
if (blob_type == type) {
count++;
}
}
return count;
}
| float tesseract::ColPartition::SpecialBlobsDensity | ( | const BlobSpecialTextType | type | ) | const |
Definition at line 535 of file colpartition.cpp.
{
ASSERT_HOST(type < BSTT_COUNT);
return special_blobs_densities_[type];
}
| ColPartition * tesseract::ColPartition::SplitAt | ( | int | split_x | ) |
Definition at line 800 of file colpartition.cpp.
{
if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
return NULL; // There will be no change.
ColPartition* split_part = ShallowCopy();
split_part->set_owns_blobs(owns_blobs());
BLOBNBOX_C_IT it(&boxes_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* bbox = it.data();
ColPartition* prev_owner = bbox->owner();
ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
const TBOX& box = bbox->bounding_box();
if (box.left() >= split_x) {
split_part->AddBox(it.extract());
if (owns_blobs() && prev_owner != NULL)
bbox->set_owner(split_part);
}
}
ASSERT_HOST(!it.empty());
if (split_part->IsEmpty()) {
// Split part ended up with nothing. Possible if split_x passes
// through the last blob.
delete split_part;
return NULL;
}
right_key_tab_ = false;
split_part->left_key_tab_ = false;
right_margin_ = split_x;
split_part->left_margin_ = split_x;
ComputeLimits();
split_part->ComputeLimits();
return split_part;
}
| ColPartition * tesseract::ColPartition::SplitAtBlob | ( | BLOBNBOX * | split_blob | ) |
Definition at line 764 of file colpartition.cpp.
{
ColPartition* split_part = ShallowCopy();
split_part->set_owns_blobs(owns_blobs());
BLOBNBOX_C_IT it(&boxes_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* bbox = it.data();
ColPartition* prev_owner = bbox->owner();
ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
if (bbox == split_blob || !split_part->boxes_.empty()) {
split_part->AddBox(it.extract());
if (owns_blobs() && prev_owner != NULL)
bbox->set_owner(split_part);
}
}
ASSERT_HOST(!it.empty());
if (split_part->IsEmpty()) {
// Split part ended up with nothing. Possible if split_blob is not
// in the list of blobs.
delete split_part;
return NULL;
}
right_key_tab_ = false;
split_part->left_key_tab_ = false;
ComputeLimits();
// TODO(nbeato) Merge Ray's CL like this:
// if (owns_blobs())
// SetBlobTextlineGoodness();
split_part->ComputeLimits();
// TODO(nbeato) Merge Ray's CL like this:
// if (split_part->owns_blobs())
// split_part->SetBlobTextlineGoodness();
return split_part;
}
| int tesseract::ColPartition::top_spacing | ( | ) | const [inline] |
Definition at line 226 of file colpartition.h.
{
return top_spacing_;
}
| PolyBlockType tesseract::ColPartition::type | ( | ) | const [inline] |
Definition at line 181 of file colpartition.h.
{
return type_;
}
| bool tesseract::ColPartition::TypesMatch | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 403 of file colpartition.h.
{
return TypesMatch(blob_type_, other.blob_type_);
}
| static bool tesseract::ColPartition::TypesMatch | ( | BlobRegionType | type1, |
| BlobRegionType | type2 | ||
| ) | [inline, static] |
Definition at line 406 of file colpartition.h.
{
return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
!BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
}
| static bool tesseract::ColPartition::TypesSimilar | ( | PolyBlockType | type1, |
| PolyBlockType | type2 | ||
| ) | [inline, static] |
Definition at line 412 of file colpartition.h.
{
return (type1 == type2 ||
(type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
(type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
}
| ColPartition_CLIST* tesseract::ColPartition::upper_partners | ( | ) | [inline] |
Definition at line 196 of file colpartition.h.
{
return &upper_partners_;
}
| int tesseract::ColPartition::VCoreOverlap | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 375 of file colpartition.h.
| bool tesseract::ColPartition::VOverlaps | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 370 of file colpartition.h.
{
return bounding_box_.y_gap(other.bounding_box_) < 0;
}
| bool tesseract::ColPartition::VSignificantCoreOverlap | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 387 of file colpartition.h.
{
int overlap = VCoreOverlap(other);
int height = MIN(median_top_ - median_bottom_,
other.median_top_ - other.median_bottom_);
return overlap * 3 > height;
}
| bool tesseract::ColPartition::WithinSameMargins | ( | const ColPartition & | other | ) | const [inline] |
Definition at line 395 of file colpartition.h.
| int tesseract::ColPartition::XAtY | ( | int | sort_key, |
| int | y | ||
| ) | const [inline] |
Definition at line 320 of file colpartition.h.
{
return TabVector::XAtY(vertical_, sort_key, y);
}