Tesseract
3.02
|
#include <tabvector.h>
Public Member Functions | |
TabVector () | |
~TabVector () | |
TabVector (const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew, BLOBNBOX *blob) | |
TabVector * | ShallowCopy () const |
const ICOORD & | startpt () const |
const ICOORD & | endpt () const |
int | extended_ymax () const |
int | extended_ymin () const |
int | sort_key () const |
int | mean_width () const |
void | set_top_constraints (TabConstraint_LIST *constraints) |
void | set_bottom_constraints (TabConstraint_LIST *constraints) |
TabVector_CLIST * | partners () |
void | set_startpt (const ICOORD &start) |
void | set_endpt (const ICOORD &end) |
bool | intersects_other_lines () const |
void | set_intersects_other_lines (bool value) |
int | XAtY (int y) const |
int | VOverlap (const TabVector &other) const |
int | VOverlap (int top_y, int bottom_y) const |
int | ExtendedOverlap (int top_y, int bottom_y) const |
bool | IsLeftTab () const |
bool | IsRightTab () const |
bool | IsSeparator () const |
bool | IsCenterTab () const |
bool | IsRagged () const |
bool | IsLeftOf (const TabVector &other) const |
bool | Partnerless () |
int | BoxCount () |
void | Freeze () |
void | XYFlip () |
void | ReflectInYAxis () |
void | ExtendToBox (BLOBNBOX *blob) |
void | SetYStart (int start_y) |
void | SetYEnd (int end_y) |
void | Rotate (const FCOORD &rotation) |
void | SetupConstraints () |
void | SetupPartnerConstraints () |
void | SetupPartnerConstraints (TabVector *partner) |
void | ApplyConstraints () |
bool | SimilarTo (const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const |
void | MergeWith (const ICOORD &vertical, TabVector *other) |
void | AddPartner (TabVector *partner) |
bool | IsAPartner (const TabVector *other) |
void | Print (const char *prefix) |
void | Debug (const char *prefix) |
void | Display (ScrollView *tab_win) |
void | FitAndEvaluateIfNeeded (const ICOORD &vertical, TabFind *finder) |
void | Evaluate (const ICOORD &vertical, TabFind *finder) |
bool | Fit (ICOORD vertical, bool force_parallel) |
TabVector * | VerticalTextlinePartner () |
TabVector * | GetSinglePartner () |
Static Public Member Functions | |
static TabVector * | FitVector (TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y) |
static int | SortKey (const ICOORD &vertical, int x, int y) |
static int | XAtY (const ICOORD &vertical, int sort_key, int y) |
static int | SortVectorsByKey (const void *v1, const void *v2) |
static void | MergeSimilarTabVectors (const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid) |
Definition at line 111 of file tabvector.h.
tesseract::TabVector::TabVector | ( | ) | [inline] |
Definition at line 113 of file tabvector.h.
{ // TODO(rays) fix this in elst.h line 1076, where it should use the // copy constructor instead of operator=. }
tesseract::TabVector::~TabVector | ( | ) |
Definition at line 171 of file tabvector.cpp.
{ }
tesseract::TabVector::TabVector | ( | const TabVector & | src, |
TabAlignment | alignment, | ||
const ICOORD & | vertical_skew, | ||
BLOBNBOX * | blob | ||
) |
Definition at line 205 of file tabvector.cpp.
: extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_), sort_key_(0), percent_score_(0), mean_width_(0), needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false), alignment_(alignment), top_constraints_(NULL), bottom_constraints_(NULL) { BLOBNBOX_C_IT it(&boxes_); it.add_to_end(blob); TBOX box = blob->bounding_box(); if (IsLeftTab()) { startpt_ = box.botleft(); endpt_ = box.topleft(); } else { startpt_ = box.botright(); endpt_ = box.topright(); } sort_key_ = SortKey(vertical_skew, (startpt_.x() + endpt_.x()) / 2, (startpt_.y() + endpt_.y()) / 2); if (textord_debug_tabfind > 3) Print("Constructed a new tab vector:"); }
void tesseract::TabVector::AddPartner | ( | TabVector * | partner | ) |
Definition at line 493 of file tabvector.cpp.
{ if (IsSeparator() || partner->IsSeparator()) return; TabVector_C_IT it(&partners_); if (!it.empty()) { it.move_to_last(); if (it.data() == partner) return; } it.add_after_then_move(partner); }
void tesseract::TabVector::ApplyConstraints | ( | ) |
Definition at line 354 of file tabvector.cpp.
{ if (top_constraints_ != NULL) TabConstraint::ApplyConstraints(top_constraints_); if (bottom_constraints_ != NULL) TabConstraint::ApplyConstraints(bottom_constraints_); }
int tesseract::TabVector::BoxCount | ( | ) | [inline] |
Definition at line 245 of file tabvector.h.
{
return boxes_.length();
}
void tesseract::TabVector::Debug | ( | const char * | prefix | ) |
Definition at line 540 of file tabvector.cpp.
void tesseract::TabVector::Display | ( | ScrollView * | tab_win | ) |
Definition at line 552 of file tabvector.cpp.
{ #ifndef GRAPHICS_DISABLED if (textord_debug_printable) tab_win->Pen(ScrollView::BLUE); else if (alignment_ == TA_LEFT_ALIGNED) tab_win->Pen(ScrollView::LIME_GREEN); else if (alignment_ == TA_LEFT_RAGGED) tab_win->Pen(ScrollView::DARK_GREEN); else if (alignment_ == TA_RIGHT_ALIGNED) tab_win->Pen(ScrollView::PINK); else if (alignment_ == TA_RIGHT_RAGGED) tab_win->Pen(ScrollView::CORAL); else tab_win->Pen(ScrollView::WHITE); tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y()); tab_win->Pen(ScrollView::GREY); tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_); tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y()); char score_buf[64]; snprintf(score_buf, sizeof(score_buf), "%d", percent_score_); tab_win->TextAttributes("Times", 50, false, false, false); tab_win->Text(startpt_.x(), startpt_.y(), score_buf); #endif }
const ICOORD& tesseract::TabVector::endpt | ( | ) | const [inline] |
Definition at line 149 of file tabvector.h.
{
return endpt_;
}
Definition at line 592 of file tabvector.cpp.
{ bool debug = false; needs_evaluation_ = false; int length = endpt_.y() - startpt_.y(); if (length == 0 || boxes_.empty()) { percent_score_ = 0; Print("Zero length in evaluate"); return; } // Compute the mean box height. BLOBNBOX_C_IT it(&boxes_); int mean_height = 0; int height_count = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); const TBOX& box = bbox->bounding_box(); int height = box.height(); mean_height += height; ++height_count; } mean_height /= height_count; int max_gutter = kGutterMultiple * mean_height; if (IsRagged()) { // Ragged edges face a tougher test in that the gap must always be within // the height of the blob. max_gutter = kGutterToNeighbourRatio * mean_height; } STATS gutters(0, max_gutter + 1); // Evaluate the boxes for their goodness, calculating the coverage as we go. // Remove boxes that are not good and shorten the list to the first and // last good boxes. int num_deleted_boxes = 0; bool text_on_image = false; int good_length = 0; const TBOX* prev_good_box = NULL; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); const TBOX& box = bbox->bounding_box(); int mid_y = (box.top() + box.bottom()) / 2; if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) { if (!debug) { tprintf("After already deleting %d boxes, ", num_deleted_boxes); Print("Starting evaluation"); } debug = true; } // A good box is one where the nearest neighbour on the inside is closer // than half the distance to the nearest neighbour on the outside // (of the putative column). bool left = IsLeftTab(); int tab_x = XAtY(mid_y); int gutter_width; int neighbour_gap; finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width, &neighbour_gap); if (debug) { tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", box.left(), box.bottom(), box.right(), box.top(), gutter_width, neighbour_gap); } // Now we can make the test. if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) { // A good box contributes its height to the good_length. good_length += box.top() - box.bottom(); gutters.add(gutter_width, 1); // Two good boxes together contribute the gap between them // to the good_length as well, as long as the gap is not // too big. if (prev_good_box != NULL) { int vertical_gap = box.bottom() - prev_good_box->top(); double size1 = sqrt(static_cast<double>(prev_good_box->area())); double size2 = sqrt(static_cast<double>(box.area())); if (vertical_gap < kMaxFillinMultiple * MIN(size1, size2)) good_length += vertical_gap; if (debug) { tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", vertical_gap, kMaxFillinMultiple * MIN(size1, size2), good_length); } } else { // Adjust the start to the first good box. SetYStart(box.bottom()); } prev_good_box = &box; if (bbox->flow() == BTFT_TEXT_ON_IMAGE) text_on_image = true; } else { // Get rid of boxes that are not good. if (debug) { tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", box.left(), box.bottom(), box.right(), box.top(), gutter_width, neighbour_gap); } it.extract(); ++num_deleted_boxes; } } if (debug) { Print("Evaluating:"); } // If there are any good boxes, do it again, except this time get rid of // boxes that have a gutter that is a small fraction of the mean gutter. // This filters out ends that run into a coincidental gap in the text. int search_top = endpt_.y(); int search_bottom = startpt_.y(); int median_gutter = IntCastRounded(gutters.median()); if (gutters.get_total() > 0) { prev_good_box = NULL; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); const TBOX& box = bbox->bounding_box(); int mid_y = (box.top() + box.bottom()) / 2; // A good box is one where the gutter width is at least some constant // fraction of the mean gutter width. bool left = IsLeftTab(); int tab_x = XAtY(mid_y); int max_gutter = kGutterMultiple * mean_height; if (IsRagged()) { // Ragged edges face a tougher test in that the gap must always be // within the height of the blob. max_gutter = kGutterToNeighbourRatio * mean_height; } int gutter_width; int neighbour_gap; finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width, &neighbour_gap); // Now we can make the test. if (gutter_width >= median_gutter * kMinGutterFraction) { if (prev_good_box == NULL) { // Adjust the start to the first good box. SetYStart(box.bottom()); search_bottom = box.top(); } prev_good_box = &box; search_top = box.bottom(); } else { // Get rid of boxes that are not good. if (debug) { tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", box.left(), box.bottom(), box.right(), box.top(), gutter_width, median_gutter); } it.extract(); ++num_deleted_boxes = true; } } } // If there has been a good box, adjust the end. if (prev_good_box != NULL) { SetYEnd(prev_good_box->top()); // Compute the percentage of the vector that is occupied by good boxes. int length = endpt_.y() - startpt_.y(); percent_score_ = 100 * good_length / length; if (num_deleted_boxes > 0) { needs_refit_ = true; FitAndEvaluateIfNeeded(vertical, finder); if (boxes_.empty()) return; } // Test the gutter over the whole vector, instead of just at the boxes. int required_shift; if (search_bottom > search_top) { search_bottom = startpt_.y(); search_top = endpt_.y(); } double min_gutter_width = kLineCountReciprocal / boxes_.length(); min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter; min_gutter_width *= mean_height; int max_gutter_width = IntCastRounded(min_gutter_width) + 1; if (median_gutter > max_gutter_width) max_gutter_width = median_gutter; int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, text_on_image, max_gutter_width, &required_shift); if (gutter_width < min_gutter_width) { if (debug) { tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", gutter_width, min_gutter_width); } boxes_.shallow_clear(); percent_score_ = 0; } else if (debug) { tprintf("Final gutter %d, vs limit of %g, required shift = %d\n", gutter_width, min_gutter_width, required_shift); } } else { // There are no good boxes left, so score is 0. percent_score_ = 0; } if (debug) { Print("Evaluation complete:"); } }
int tesseract::TabVector::extended_ymax | ( | ) | const [inline] |
Definition at line 152 of file tabvector.h.
{
return extended_ymax_;
}
int tesseract::TabVector::extended_ymin | ( | ) | const [inline] |
Definition at line 155 of file tabvector.h.
{
return extended_ymin_;
}
int tesseract::TabVector::ExtendedOverlap | ( | int | top_y, |
int | bottom_y | ||
) | const [inline] |
Definition at line 208 of file tabvector.h.
void tesseract::TabVector::ExtendToBox | ( | BLOBNBOX * | blob | ) |
Definition at line 247 of file tabvector.cpp.
{ TBOX new_box = new_blob->bounding_box(); BLOBNBOX_C_IT it(&boxes_); if (!it.empty()) { BLOBNBOX* blob = it.data(); TBOX box = blob->bounding_box(); while (!it.at_last() && box.top() <= new_box.top()) { if (blob == new_blob) return; // We have it already. it.forward(); blob = it.data(); box = blob->bounding_box(); } if (box.top() >= new_box.top()) { it.add_before_stay_put(new_blob); needs_refit_ = true; return; } } needs_refit_ = true; it.add_after_stay_put(new_blob); }
bool tesseract::TabVector::Fit | ( | ICOORD | vertical, |
bool | force_parallel | ||
) |
Definition at line 793 of file tabvector.cpp.
{ needs_refit_ = false; if (boxes_.empty()) { // Don't refit something with no boxes, as that only happens // in Evaluate, and we don't want to end up with a zero vector. if (!force_parallel) return false; // If we are forcing parallel, then we just need to set the sort_key_. ICOORD midpt = startpt_; midpt += endpt_; midpt /= 2; sort_key_ = SortKey(vertical, midpt.x(), midpt.y()); return startpt_.y() != endpt_.y(); } if (!force_parallel && !IsRagged()) { // Use a fitted line as the vertical. DetLineFit linepoints; BLOBNBOX_C_IT it(&boxes_); // Fit a line to all the boxes in the list. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); TBOX box = bbox->bounding_box(); int x1 = IsRightTab() ? box.right() : box.left(); ICOORD boxpt(x1, box.bottom()); linepoints.Add(boxpt); if (it.at_last()) { ICOORD top_pt(x1, box.top()); linepoints.Add(top_pt); } } linepoints.Fit(&startpt_, &endpt_); if (startpt_.y() != endpt_.y()) { vertical = endpt_; vertical -= startpt_; } } int start_y = startpt_.y(); int end_y = endpt_.y(); sort_key_ = IsLeftTab() ? MAX_INT32 : -MAX_INT32; BLOBNBOX_C_IT it(&boxes_); // Choose a line parallel to the vertical such that all boxes are on the // correct side of it. mean_width_ = 0; int width_count = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); TBOX box = bbox->bounding_box(); mean_width_ += box.width(); ++width_count; int x1 = IsRightTab() ? box.right() : box.left(); // Test both the bottom and the top, as one will be more extreme, depending // on the direction of skew. int bottom_y = box.bottom(); int top_y = box.top(); int key = SortKey(vertical, x1, bottom_y); if (IsLeftTab() == (key < sort_key_)) { sort_key_ = key; startpt_ = ICOORD(x1, bottom_y); } key = SortKey(vertical, x1, top_y); if (IsLeftTab() == (key < sort_key_)) { sort_key_ = key; startpt_ = ICOORD(x1, top_y); } if (it.at_first()) start_y = bottom_y; if (it.at_last()) end_y = top_y; } if (width_count > 0) { mean_width_ = (mean_width_ + width_count - 1) / width_count; } endpt_ = startpt_ + vertical; needs_evaluation_ = true; if (start_y != end_y) { // Set the ends of the vector to fully include the first and last blobs. startpt_.set_x(XAtY(vertical, sort_key_, start_y)); startpt_.set_y(start_y); endpt_.set_x(XAtY(vertical, sort_key_, end_y)); endpt_.set_y(end_y); return true; } return false; }
Definition at line 578 of file tabvector.cpp.
TabVector * tesseract::TabVector::FitVector | ( | TabAlignment | alignment, |
ICOORD | vertical, | ||
int | extended_start_y, | ||
int | extended_end_y, | ||
BLOBNBOX_CLIST * | good_points, | ||
int * | vertical_x, | ||
int * | vertical_y | ||
) | [static] |
Definition at line 183 of file tabvector.cpp.
{ TabVector* vector = new TabVector(extended_start_y, extended_end_y, alignment, good_points); if (!vector->Fit(vertical, false)) { delete vector; return NULL; } if (!vector->IsRagged()) { vertical = vector->endpt_ - vector->startpt_; int weight = vector->BoxCount(); *vertical_x += vertical.x() * weight; *vertical_y += vertical.y() * weight; } return vector; }
void tesseract::TabVector::Freeze | ( | ) | [inline] |
Definition at line 250 of file tabvector.h.
{ boxes_.shallow_clear(); }
TabVector * tesseract::TabVector::GetSinglePartner | ( | ) |
Definition at line 879 of file tabvector.cpp.
bool tesseract::TabVector::intersects_other_lines | ( | ) | const [inline] |
Definition at line 179 of file tabvector.h.
{
return intersects_other_lines_;
}
bool tesseract::TabVector::IsAPartner | ( | const TabVector * | other | ) |
Definition at line 506 of file tabvector.cpp.
{ TabVector_C_IT it(&partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { if (it.data() == other) return true; } return false; }
bool tesseract::TabVector::IsCenterTab | ( | ) | const [inline] |
Definition at line 225 of file tabvector.h.
{ return alignment_ == TA_CENTER_JUSTIFIED; }
bool tesseract::TabVector::IsLeftOf | ( | const TabVector & | other | ) | const [inline] |
Definition at line 235 of file tabvector.h.
{
return sort_key_ < other.sort_key_;
}
bool tesseract::TabVector::IsLeftTab | ( | ) | const [inline] |
Definition at line 213 of file tabvector.h.
{ return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED; }
bool tesseract::TabVector::IsRagged | ( | ) | const [inline] |
Definition at line 229 of file tabvector.h.
{ return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED; }
bool tesseract::TabVector::IsRightTab | ( | ) | const [inline] |
Definition at line 217 of file tabvector.h.
{ return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED; }
bool tesseract::TabVector::IsSeparator | ( | ) | const [inline] |
Definition at line 221 of file tabvector.h.
{ return alignment_ == TA_SEPARATOR; }
int tesseract::TabVector::mean_width | ( | ) | const [inline] |
Definition at line 161 of file tabvector.h.
{
return mean_width_;
}
void tesseract::TabVector::MergeSimilarTabVectors | ( | const ICOORD & | vertical, |
TabVector_LIST * | vectors, | ||
BlobGrid * | grid | ||
) | [static] |
Definition at line 362 of file tabvector.cpp.
{ TabVector_IT it1(vectors); for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) { TabVector* v1 = it1.data(); TabVector_IT it2(it1); for (it2.forward(); !it2.at_first(); it2.forward()) { TabVector* v2 = it2.data(); if (v2->SimilarTo(vertical, *v1, grid)) { // Merge into the forward one, in case the combined vector now // overlaps one in between. if (textord_debug_tabfind) { v2->Print("Merging"); v1->Print("by deleting"); } v2->MergeWith(vertical, it1.extract()); if (textord_debug_tabfind) { v2->Print("Producing"); } ICOORD merged_vector = v2->endpt(); merged_vector -= v2->startpt(); if (abs(merged_vector.x()) > 100) { v2->Print("Garbage result of merge?"); } break; } } } }
Definition at line 459 of file tabvector.cpp.
{ extended_ymin_ = MIN(extended_ymin_, other->extended_ymin_); extended_ymax_ = MAX(extended_ymax_, other->extended_ymax_); if (other->IsRagged()) { alignment_ = other->alignment_; } // Merge sort the two lists of boxes. BLOBNBOX_C_IT it1(&boxes_); BLOBNBOX_C_IT it2(&other->boxes_); while (!it2.empty()) { BLOBNBOX* bbox2 = it2.extract(); it2.forward(); TBOX box2 = bbox2->bounding_box(); BLOBNBOX* bbox1 = it1.data(); TBOX box1 = bbox1->bounding_box(); while (box1.bottom() < box2.bottom() && !it1.at_last()) { it1.forward(); bbox1 = it1.data(); box1 = bbox1->bounding_box(); } if (box1.bottom() < box2.bottom()) { it1.add_to_end(bbox2); } else if (bbox1 != bbox2) { it1.add_before_stay_put(bbox2); } } Fit(vertical, true); other->Delete(this); }
bool tesseract::TabVector::Partnerless | ( | ) | [inline] |
Definition at line 240 of file tabvector.h.
{
return partners_.empty();
}
TabVector_CLIST* tesseract::TabVector::partners | ( | ) | [inline] |
Definition at line 170 of file tabvector.h.
{
return &partners_;
}
void tesseract::TabVector::Print | ( | const char * | prefix | ) |
Definition at line 526 of file tabvector.cpp.
{ if (this == NULL) { tprintf("%s <null>\n", prefix); } else { tprintf("%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," " partners=%d\n", prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_, boxes_.length(), partners_.length()); } }
void tesseract::TabVector::ReflectInYAxis | ( | ) | [inline] |
Definition at line 265 of file tabvector.h.
{ startpt_.set_x(-startpt_.x()); endpt_.set_x(-endpt_.x()); sort_key_ = -sort_key_; if (alignment_ == TA_LEFT_ALIGNED) alignment_ = TA_RIGHT_ALIGNED; else if (alignment_ == TA_RIGHT_ALIGNED) alignment_ = TA_LEFT_ALIGNED; if (alignment_ == TA_LEFT_RAGGED) alignment_ = TA_RIGHT_RAGGED; else if (alignment_ == TA_RIGHT_RAGGED) alignment_ = TA_LEFT_RAGGED; }
void tesseract::TabVector::Rotate | ( | const FCOORD & | rotation | ) |
Definition at line 282 of file tabvector.cpp.
void tesseract::TabVector::set_bottom_constraints | ( | TabConstraint_LIST * | constraints | ) | [inline] |
Definition at line 167 of file tabvector.h.
{ bottom_constraints_ = constraints; }
void tesseract::TabVector::set_endpt | ( | const ICOORD & | end | ) | [inline] |
Definition at line 176 of file tabvector.h.
{ endpt_ = end; }
void tesseract::TabVector::set_intersects_other_lines | ( | bool | value | ) | [inline] |
Definition at line 182 of file tabvector.h.
{ intersects_other_lines_ = value; }
void tesseract::TabVector::set_startpt | ( | const ICOORD & | start | ) | [inline] |
Definition at line 173 of file tabvector.h.
{ startpt_ = start; }
void tesseract::TabVector::set_top_constraints | ( | TabConstraint_LIST * | constraints | ) | [inline] |
Definition at line 164 of file tabvector.h.
{ top_constraints_ = constraints; }
void tesseract::TabVector::SetupConstraints | ( | ) |
Definition at line 297 of file tabvector.cpp.
{ TabConstraint::CreateConstraint(this, false); TabConstraint::CreateConstraint(this, true); }
void tesseract::TabVector::SetupPartnerConstraints | ( | ) |
Definition at line 303 of file tabvector.cpp.
{ // With the first and last partner, we want a common bottom and top, // respectively, and for each change of partner, we want a common // top of first with bottom of next. TabVector_C_IT it(&partners_); TabVector* prev_partner = NULL; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabVector* partner = it.data(); if (partner->top_constraints_ == NULL || partner->bottom_constraints_ == NULL) { partner->Print("Impossible: has no constraints"); Print("This vector has it as a partner"); continue; } if (prev_partner == NULL) { // This is the first partner, so common bottom. if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_); } else { // We need prev top to be common with partner bottom. if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_, partner->bottom_constraints_)) TabConstraint::MergeConstraints(prev_partner->top_constraints_, partner->bottom_constraints_); } prev_partner = partner; if (it.at_last()) { // This is the last partner, so common top. if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_); } } }
void tesseract::TabVector::SetupPartnerConstraints | ( | TabVector * | partner | ) |
Definition at line 342 of file tabvector.cpp.
{ if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_); if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_); }
void tesseract::TabVector::SetYEnd | ( | int | end_y | ) |
Definition at line 276 of file tabvector.cpp.
void tesseract::TabVector::SetYStart | ( | int | start_y | ) |
Definition at line 271 of file tabvector.cpp.
TabVector * tesseract::TabVector::ShallowCopy | ( | ) | const |
Definition at line 234 of file tabvector.cpp.
bool tesseract::TabVector::SimilarTo | ( | const ICOORD & | vertical, |
const TabVector & | other, | ||
BlobGrid * | grid | ||
) | const |
Definition at line 395 of file tabvector.cpp.
{ if ((IsRightTab() && other.IsRightTab()) || (IsLeftTab() && other.IsLeftTab())) { // If they don't overlap, at least in extensions, then there is no chance. if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) return false; // A fast approximation to the scale factor of the sort_key_. int v_scale = abs(vertical.y()); if (v_scale == 0) v_scale = 1; // If they are close enough, then OK. if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ && sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) return true; // Ragged tabs get a bigger threshold. if (!IsRagged() || !other.IsRagged() || sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ || sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) return false; if (grid == NULL) { // There is nothing else to test! return true; } // If there is nothing in the rectangle between the vector that is going to // move, and the place it is moving to, then they can be merged. // Setup a vertical search for any blob. const TabVector* mover = (IsRightTab() && sort_key_ < other.sort_key_) ? this : &other; int top_y = mover->endpt_.y(); int bottom_y = mover->startpt_.y(); int left = MIN(mover->XAtY(top_y), mover->XAtY(bottom_y)); int right = MAX(mover->XAtY(top_y), mover->XAtY(bottom_y)); int shift = abs(sort_key_ - other.sort_key_) / v_scale; if (IsRightTab()) { right += shift; } else { left -= shift; } GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid); vsearch.StartVerticalSearch(left, right, top_y); BLOBNBOX* blob; while ((blob = vsearch.NextVerticalSearch(true)) != NULL) { TBOX box = blob->bounding_box(); if (box.top() > bottom_y) return true; // Nothing found. if (box.bottom() < top_y) continue; // Doesn't overlap. int left_at_box = XAtY(box.bottom()); int right_at_box = left_at_box; if (IsRightTab()) right_at_box += shift; else left_at_box -= shift; if (MIN(right_at_box, box.right()) > MAX(left_at_box, box.left())) return false; } return true; // Nothing found. } return false; }
int tesseract::TabVector::sort_key | ( | ) | const [inline] |
Definition at line 158 of file tabvector.h.
{
return sort_key_;
}
static int tesseract::TabVector::SortKey | ( | const ICOORD & | vertical, |
int | x, | ||
int | y | ||
) | [inline, static] |
Definition at line 280 of file tabvector.h.
{ ICOORD pt(x, y); return pt * vertical; }
static int tesseract::TabVector::SortVectorsByKey | ( | const void * | v1, |
const void * | v2 | ||
) | [inline, static] |
const ICOORD& tesseract::TabVector::startpt | ( | ) | const [inline] |
Definition at line 146 of file tabvector.h.
{
return startpt_;
}
TabVector * tesseract::TabVector::VerticalTextlinePartner | ( | ) |
Definition at line 889 of file tabvector.cpp.
{ if (!partners_.singleton()) return NULL; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. if (textord_debug_tabfind > 1) { Print("Testing for vertical text"); partner->Print(" partner"); } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = NULL; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { BLOBNBOX* bbox = box_it1.data(); TBOX box = bbox->bounding_box(); if (prev_bbox != NULL) { gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); } while (!box_it2.cycled_list() && box_it2.data() != bbox && box_it2.data()->bounding_box().bottom() < box.bottom()) { box_it2.forward(); } if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN)) ++num_matched; else ++num_unmatched; total_widths += box.width(); prev_bbox = bbox; } double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } return (is_vertical) ? partner : NULL; }
int tesseract::TabVector::VOverlap | ( | const TabVector & | other | ) | const [inline] |
int tesseract::TabVector::VOverlap | ( | int | top_y, |
int | bottom_y | ||
) | const [inline] |
int tesseract::TabVector::XAtY | ( | int | y | ) | const [inline] |
static int tesseract::TabVector::XAtY | ( | const ICOORD & | vertical, |
int | sort_key, | ||
int | y | ||
) | [inline, static] |
void tesseract::TabVector::XYFlip | ( | ) | [inline] |