Tesseract
3.02
|
#include <paragraphs_internal.h>
Public Member Functions | |
ParagraphModelSmearer (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) | |
void | Smear () |
Definition at line 234 of file paragraphs_internal.h.
tesseract::ParagraphModelSmearer::ParagraphModelSmearer | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1258 of file paragraphs.cpp.
: theory_(theory), rows_(rows), row_start_(row_start), row_end_(row_end) { if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) { row_start_ = 0; row_end_ = 0; return; } SetOfModels no_models; for (int row = row_start - 1; row <= row_end; row++) { open_models_.push_back(no_models); } }
void tesseract::ParagraphModelSmearer::Smear | ( | ) |
Definition at line 1305 of file paragraphs.cpp.
{ CalculateOpenModels(row_start_, row_end_); // For each row which we're unsure about (that is, it is LT_UNKNOWN or // we have multiple LT_START hypotheses), see if there's a model that // was recently used (an "open" model) which might model it well. for (int i = row_start_; i < row_end_; i++) { RowScratchRegisters &row = (*rows_)[i]; if (row.ri_->num_words == 0) continue; // Step One: // Figure out if there are "open" models which are left-alined or // right-aligned. This is important for determining whether the // "first" word in a row would fit at the "end" of the previous row. bool left_align_open = false; bool right_align_open = false; for (int m = 0; m < OpenModels(i).size(); m++) { switch (OpenModels(i)[m]->justification()) { case JUSTIFICATION_LEFT: left_align_open = true; break; case JUSTIFICATION_RIGHT: right_align_open = true; break; default: left_align_open = right_align_open = true; } } // Step Two: // Use that knowledge to figure out if this row is likely to // start a paragraph. bool likely_start; if (i == 0) { likely_start = true; } else { if ((left_align_open && right_align_open) || (!left_align_open && !right_align_open)) { likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT) || LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT); } else if (left_align_open) { likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT); } else { likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT); } } // Step Three: // If this text line seems like an obvious first line of an // open model, or an obvious continuation of an existing // modelled paragraph, mark it up. if (likely_start) { // Add Start Hypotheses for all Open models that fit. for (int m = 0; m < OpenModels(i).size(); m++) { if (ValidFirstLine(rows_, i, OpenModels(i)[m])) { row.AddStartLine(OpenModels(i)[m]); } } } else { // Add relevant body line hypotheses. SetOfModels last_line_models; if (i > 0) { (*rows_)[i - 1].StrongHypotheses(&last_line_models); } else { theory_->NonCenteredModels(&last_line_models); } for (int m = 0; m < last_line_models.size(); m++) { const ParagraphModel *model = last_line_models[m]; if (ValidBodyLine(rows_, i, model)) row.AddBodyLine(model); } } // Step Four: // If we're still quite unsure about this line, go through all // models in our theory and see if this row could be the start // of any of our models. if (row.GetLineType() == LT_UNKNOWN || (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) { SetOfModels all_models; theory_->NonCenteredModels(&all_models); for (int m = 0; m < all_models.size(); m++) { if (ValidFirstLine(rows_, i, all_models[m])) { row.AddStartLine(all_models[m]); } } } // Step Five: // Since we may have updated the hypotheses about this row, we need // to recalculate the Open models for the rest of rows[i + 1, row_end) if (row.GetLineType() != LT_UNKNOWN) { CalculateOpenModels(i + 1, row_end_); } } }