Tesseract
3.02
|
#include <ltrresultiterator.h>
Public Member Functions | |
LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
virtual | ~LTRResultIterator () |
char * | GetUTF8Text (PageIteratorLevel level) const |
void | SetLineSeparator (const char *new_line) |
void | SetParagraphSeparator (const char *new_para) |
float | Confidence (PageIteratorLevel level) const |
const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
const char * | WordRecognitionLanguage () const |
StrongScriptDirection | WordDirection () const |
bool | WordIsFromDictionary () const |
bool | WordIsNumeric () const |
bool | HasBlamerInfo () const |
void * | GetParamsTrainingBundle () const |
const char * | GetBlamerDebug () const |
const char * | GetBlamerMisadaptionDebug () const |
char * | WordTruthUTF8Text () const |
const char * | WordLattice (int *lattice_size) const |
bool | SymbolIsSuperscript () const |
bool | SymbolIsSubscript () const |
bool | SymbolIsDropcap () const |
Protected Attributes | |
const char * | line_separator_ |
const char * | paragraph_separator_ |
Friends | |
class | ChoiceIterator |
Definition at line 46 of file ltrresultiterator.h.
tesseract::LTRResultIterator::LTRResultIterator | ( | PAGE_RES * | page_res, |
Tesseract * | tesseract, | ||
int | scale, | ||
int | scaled_yres, | ||
int | rect_left, | ||
int | rect_top, | ||
int | rect_width, | ||
int | rect_height | ||
) |
Definition at line 30 of file ltrresultiterator.cpp.
: PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width, rect_height), line_separator_("\n"), paragraph_separator_("\n") { }
tesseract::LTRResultIterator::~LTRResultIterator | ( | ) | [virtual] |
Definition at line 40 of file ltrresultiterator.cpp.
{ }
float tesseract::LTRResultIterator::Confidence | ( | PageIteratorLevel | level | ) | const |
Definition at line 94 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return 0.0f; // Already at the end! float mean_certainty = 0.0f; int certainty_count = 0; PAGE_RES_IT res_it(*it_); WERD_CHOICE* best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); switch (level) { case RIL_BLOCK: do { best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); } while (res_it.block() == res_it.prev_block()); break; case RIL_PARA: do { best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); } while (res_it.block() == res_it.prev_block() && res_it.row()->row->para() == res_it.prev_row()->row->para()); break; case RIL_TEXTLINE: do { best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); } while (res_it.row() == res_it.prev_row()); break; case RIL_WORD: mean_certainty += best_choice->certainty(); ++certainty_count; break; case RIL_SYMBOL: BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices(); if (choices != NULL) { BLOB_CHOICE_LIST_C_IT blob_choices_it(choices); for (int blob = 0; blob < blob_index_; ++blob) blob_choices_it.forward(); BLOB_CHOICE_IT choice_it(blob_choices_it.data()); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { if (choice_it.data()->unichar_id() == best_choice->unichar_id(blob_index_)) break; } mean_certainty += choice_it.data()->certainty(); } else { mean_certainty += best_choice->certainty(); } ++certainty_count; } if (certainty_count > 0) { mean_certainty /= certainty_count; float confidence = 100 + 5 * mean_certainty; if (confidence < 0.0f) confidence = 0.0f; if (confidence > 100.0f) confidence = 100.0f; return confidence; } return 0.0f; }
const char * tesseract::LTRResultIterator::GetBlamerDebug | ( | ) | const |
Definition at line 254 of file ltrresultiterator.cpp.
{ return it_->word()->blamer_bundle->debug.string(); }
const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug | ( | ) | const |
Definition at line 260 of file ltrresultiterator.cpp.
{ return it_->word()->blamer_bundle->misadaption_debug.string(); }
void * tesseract::LTRResultIterator::GetParamsTrainingBundle | ( | ) | const |
Definition at line 247 of file ltrresultiterator.cpp.
{ return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ? &(it_->word()->blamer_bundle->params_training_bundle) : NULL; }
char * tesseract::LTRResultIterator::GetUTF8Text | ( | PageIteratorLevel | level | ) | const |
Reimplemented in tesseract::ResultIterator.
Definition at line 45 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! STRING text; PAGE_RES_IT res_it(*it_); WERD_CHOICE* best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); if (level == RIL_SYMBOL) { text = res_it.word()->BestUTF8(blob_index_, false); } else if (level == RIL_WORD) { text = best_choice->unichar_string(); } else { bool eol = false; // end of line? bool eop = false; // end of paragraph? do { // for each paragraph in a block do { // for each text line in a paragraph do { // for each word in a text line best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); text += best_choice->unichar_string(); text += " "; res_it.forward(); eol = res_it.row() != res_it.prev_row(); } while (!eol); text.truncate_at(text.length() - 1); text += line_separator_; eop = res_it.block() != res_it.prev_block() || res_it.row()->row->para() != res_it.prev_row()->row->para(); } while (level != RIL_TEXTLINE && !eop); if (eop) text += paragraph_separator_; } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); } int length = text.length() + 1; char* result = new char[length]; strncpy(result, text.string(), length); return result; }
bool tesseract::LTRResultIterator::HasBlamerInfo | ( | ) | const |
Definition at line 239 of file ltrresultiterator.cpp.
{ return (it_->word() != NULL && it_->word()->blamer_bundle != NULL && (it_->word()->blamer_bundle->debug.length() > 0 || it_->word()->blamer_bundle->misadaption_debug.length() > 0)); }
void tesseract::LTRResultIterator::SetLineSeparator | ( | const char * | new_line | ) |
Definition at line 83 of file ltrresultiterator.cpp.
{ line_separator_ = new_line; }
void tesseract::LTRResultIterator::SetParagraphSeparator | ( | const char * | new_para | ) |
Definition at line 88 of file ltrresultiterator.cpp.
{ paragraph_separator_ = new_para; }
bool tesseract::LTRResultIterator::SymbolIsDropcap | ( | ) | const |
Definition at line 312 of file ltrresultiterator.cpp.
{ if (cblob_it_ == NULL && it_->word() != NULL) return it_->word()->box_word->BlobPosition(blob_index_) == SP_DROPCAP; return false; }
bool tesseract::LTRResultIterator::SymbolIsSubscript | ( | ) | const |
Definition at line 303 of file ltrresultiterator.cpp.
{ if (cblob_it_ == NULL && it_->word() != NULL) return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUBSCRIPT; return false; }
bool tesseract::LTRResultIterator::SymbolIsSuperscript | ( | ) | const |
Definition at line 294 of file ltrresultiterator.cpp.
{ if (cblob_it_ == NULL && it_->word() != NULL) return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUPERSCRIPT; return false; }
StrongScriptDirection tesseract::LTRResultIterator::WordDirection | ( | ) | const |
Definition at line 210 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return DIR_NEUTRAL; bool has_rtl = it_->word()->AnyRtlCharsInWord(); bool has_ltr = it_->word()->AnyLtrCharsInWord(); if (has_rtl && !has_ltr) return DIR_RIGHT_TO_LEFT; if (has_ltr && !has_rtl) return DIR_LEFT_TO_RIGHT; if (!has_ltr && !has_rtl) return DIR_NEUTRAL; return DIR_MIX; }
const char * tesseract::LTRResultIterator::WordFontAttributes | ( | bool * | is_bold, |
bool * | is_italic, | ||
bool * | is_underlined, | ||
bool * | is_monospace, | ||
bool * | is_serif, | ||
bool * | is_smallcaps, | ||
int * | pointsize, | ||
int * | font_id | ||
) | const |
Definition at line 172 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! if (it_->word()->fontinfo == NULL) { *font_id = -1; return NULL; // No font information. } const FontInfo& font_info = *it_->word()->fontinfo; *font_id = font_info.universal_id; *is_bold = font_info.is_bold(); *is_italic = font_info.is_italic(); *is_underlined = false; // TODO(rays) fix this! *is_monospace = font_info.is_fixed_pitch(); *is_serif = font_info.is_serif(); *is_smallcaps = it_->word()->small_caps; float row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() - it_->row()->row->descenders(); // Convert from pixels to printers points. *pointsize = scaled_yres_ > 0 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) : 0; return font_info.name; }
bool tesseract::LTRResultIterator::WordIsFromDictionary | ( | ) | const |
Definition at line 224 of file ltrresultiterator.cpp.
bool tesseract::LTRResultIterator::WordIsNumeric | ( | ) | const |
Definition at line 232 of file ltrresultiterator.cpp.
const char * tesseract::LTRResultIterator::WordLattice | ( | int * | lattice_size | ) | const |
Definition at line 284 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! if (it_->word()->blamer_bundle == NULL) return NULL; *lattice_size = it_->word()->blamer_bundle->lattice_size; return it_->word()->blamer_bundle->lattice_data; }
const char * tesseract::LTRResultIterator::WordRecognitionLanguage | ( | ) | const |
char * tesseract::LTRResultIterator::WordTruthUTF8Text | ( | ) | const |
Definition at line 266 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! if (it_->word()->blamer_bundle == NULL || it_->word()->blamer_bundle->incorrect_result_reason == IRR_NO_TRUTH) { return NULL; // no truth information for this word } const GenericVector<STRING> &truth_vec = it_->word()->blamer_bundle->truth_text; STRING truth_text; for (int i = 0; i < truth_vec.size(); ++i) truth_text += truth_vec[i]; int length = truth_text.length() + 1; char* result = new char[length]; strncpy(result, truth_text.string(), length); return result; }
friend class ChoiceIterator [friend] |
Definition at line 47 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::line_separator_ [protected] |
Definition at line 165 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::paragraph_separator_ [protected] |
Definition at line 166 of file ltrresultiterator.h.