|
Tesseract
3.02
|
#include <ltrresultiterator.h>
Public Member Functions | |
| LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
| virtual | ~LTRResultIterator () |
| char * | GetUTF8Text (PageIteratorLevel level) const |
| void | SetLineSeparator (const char *new_line) |
| void | SetParagraphSeparator (const char *new_para) |
| float | Confidence (PageIteratorLevel level) const |
| const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
| const char * | WordRecognitionLanguage () const |
| StrongScriptDirection | WordDirection () const |
| bool | WordIsFromDictionary () const |
| bool | WordIsNumeric () const |
| bool | HasBlamerInfo () const |
| void * | GetParamsTrainingBundle () const |
| const char * | GetBlamerDebug () const |
| const char * | GetBlamerMisadaptionDebug () const |
| char * | WordTruthUTF8Text () const |
| const char * | WordLattice (int *lattice_size) const |
| bool | SymbolIsSuperscript () const |
| bool | SymbolIsSubscript () const |
| bool | SymbolIsDropcap () const |
Protected Attributes | |
| const char * | line_separator_ |
| const char * | paragraph_separator_ |
Friends | |
| class | ChoiceIterator |
Definition at line 46 of file ltrresultiterator.h.
| tesseract::LTRResultIterator::LTRResultIterator | ( | PAGE_RES * | page_res, |
| Tesseract * | tesseract, | ||
| int | scale, | ||
| int | scaled_yres, | ||
| int | rect_left, | ||
| int | rect_top, | ||
| int | rect_width, | ||
| int | rect_height | ||
| ) |
Definition at line 30 of file ltrresultiterator.cpp.
: PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width, rect_height), line_separator_("\n"), paragraph_separator_("\n") { }
| tesseract::LTRResultIterator::~LTRResultIterator | ( | ) | [virtual] |
Definition at line 40 of file ltrresultiterator.cpp.
{
}
| float tesseract::LTRResultIterator::Confidence | ( | PageIteratorLevel | level | ) | const |
Definition at line 94 of file ltrresultiterator.cpp.
{
if (it_->word() == NULL) return 0.0f; // Already at the end!
float mean_certainty = 0.0f;
int certainty_count = 0;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE* best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
switch (level) {
case RIL_BLOCK:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block());
break;
case RIL_PARA:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block() &&
res_it.row()->row->para() == res_it.prev_row()->row->para());
break;
case RIL_TEXTLINE:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.row() == res_it.prev_row());
break;
case RIL_WORD:
mean_certainty += best_choice->certainty();
++certainty_count;
break;
case RIL_SYMBOL:
BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices();
if (choices != NULL) {
BLOB_CHOICE_LIST_C_IT blob_choices_it(choices);
for (int blob = 0; blob < blob_index_; ++blob)
blob_choices_it.forward();
BLOB_CHOICE_IT choice_it(blob_choices_it.data());
for (choice_it.mark_cycle_pt();
!choice_it.cycled_list();
choice_it.forward()) {
if (choice_it.data()->unichar_id() ==
best_choice->unichar_id(blob_index_))
break;
}
mean_certainty += choice_it.data()->certainty();
} else {
mean_certainty += best_choice->certainty();
}
++certainty_count;
}
if (certainty_count > 0) {
mean_certainty /= certainty_count;
float confidence = 100 + 5 * mean_certainty;
if (confidence < 0.0f) confidence = 0.0f;
if (confidence > 100.0f) confidence = 100.0f;
return confidence;
}
return 0.0f;
}
| const char * tesseract::LTRResultIterator::GetBlamerDebug | ( | ) | const |
Definition at line 254 of file ltrresultiterator.cpp.
{
return it_->word()->blamer_bundle->debug.string();
}
| const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug | ( | ) | const |
Definition at line 260 of file ltrresultiterator.cpp.
{
return it_->word()->blamer_bundle->misadaption_debug.string();
}
| void * tesseract::LTRResultIterator::GetParamsTrainingBundle | ( | ) | const |
Definition at line 247 of file ltrresultiterator.cpp.
{
return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
&(it_->word()->blamer_bundle->params_training_bundle) : NULL;
}
| char * tesseract::LTRResultIterator::GetUTF8Text | ( | PageIteratorLevel | level | ) | const |
Reimplemented in tesseract::ResultIterator.
Definition at line 45 of file ltrresultiterator.cpp.
{
if (it_->word() == NULL) return NULL; // Already at the end!
STRING text;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE* best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
if (level == RIL_SYMBOL) {
text = res_it.word()->BestUTF8(blob_index_, false);
} else if (level == RIL_WORD) {
text = best_choice->unichar_string();
} else {
bool eol = false; // end of line?
bool eop = false; // end of paragraph?
do { // for each paragraph in a block
do { // for each text line in a paragraph
do { // for each word in a text line
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
text += best_choice->unichar_string();
text += " ";
res_it.forward();
eol = res_it.row() != res_it.prev_row();
} while (!eol);
text.truncate_at(text.length() - 1);
text += line_separator_;
eop = res_it.block() != res_it.prev_block() ||
res_it.row()->row->para() != res_it.prev_row()->row->para();
} while (level != RIL_TEXTLINE && !eop);
if (eop) text += paragraph_separator_;
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
char* result = new char[length];
strncpy(result, text.string(), length);
return result;
}
| bool tesseract::LTRResultIterator::HasBlamerInfo | ( | ) | const |
Definition at line 239 of file ltrresultiterator.cpp.
{
return (it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
(it_->word()->blamer_bundle->debug.length() > 0 ||
it_->word()->blamer_bundle->misadaption_debug.length() > 0));
}
| void tesseract::LTRResultIterator::SetLineSeparator | ( | const char * | new_line | ) |
Definition at line 83 of file ltrresultiterator.cpp.
{
line_separator_ = new_line;
}
| void tesseract::LTRResultIterator::SetParagraphSeparator | ( | const char * | new_para | ) |
Definition at line 88 of file ltrresultiterator.cpp.
{
paragraph_separator_ = new_para;
}
| bool tesseract::LTRResultIterator::SymbolIsDropcap | ( | ) | const |
Definition at line 312 of file ltrresultiterator.cpp.
{
if (cblob_it_ == NULL && it_->word() != NULL)
return it_->word()->box_word->BlobPosition(blob_index_) == SP_DROPCAP;
return false;
}
| bool tesseract::LTRResultIterator::SymbolIsSubscript | ( | ) | const |
Definition at line 303 of file ltrresultiterator.cpp.
{
if (cblob_it_ == NULL && it_->word() != NULL)
return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUBSCRIPT;
return false;
}
| bool tesseract::LTRResultIterator::SymbolIsSuperscript | ( | ) | const |
Definition at line 294 of file ltrresultiterator.cpp.
{
if (cblob_it_ == NULL && it_->word() != NULL)
return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUPERSCRIPT;
return false;
}
| StrongScriptDirection tesseract::LTRResultIterator::WordDirection | ( | ) | const |
Definition at line 210 of file ltrresultiterator.cpp.
{
if (it_->word() == NULL) return DIR_NEUTRAL;
bool has_rtl = it_->word()->AnyRtlCharsInWord();
bool has_ltr = it_->word()->AnyLtrCharsInWord();
if (has_rtl && !has_ltr)
return DIR_RIGHT_TO_LEFT;
if (has_ltr && !has_rtl)
return DIR_LEFT_TO_RIGHT;
if (!has_ltr && !has_rtl)
return DIR_NEUTRAL;
return DIR_MIX;
}
| const char * tesseract::LTRResultIterator::WordFontAttributes | ( | bool * | is_bold, |
| bool * | is_italic, | ||
| bool * | is_underlined, | ||
| bool * | is_monospace, | ||
| bool * | is_serif, | ||
| bool * | is_smallcaps, | ||
| int * | pointsize, | ||
| int * | font_id | ||
| ) | const |
Definition at line 172 of file ltrresultiterator.cpp.
{
if (it_->word() == NULL) return NULL; // Already at the end!
if (it_->word()->fontinfo == NULL) {
*font_id = -1;
return NULL; // No font information.
}
const FontInfo& font_info = *it_->word()->fontinfo;
*font_id = font_info.universal_id;
*is_bold = font_info.is_bold();
*is_italic = font_info.is_italic();
*is_underlined = false; // TODO(rays) fix this!
*is_monospace = font_info.is_fixed_pitch();
*is_serif = font_info.is_serif();
*is_smallcaps = it_->word()->small_caps;
float row_height = it_->row()->row->x_height() +
it_->row()->row->ascenders() - it_->row()->row->descenders();
// Convert from pixels to printers points.
*pointsize = scaled_yres_ > 0
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
: 0;
return font_info.name;
}
| bool tesseract::LTRResultIterator::WordIsFromDictionary | ( | ) | const |
Definition at line 224 of file ltrresultiterator.cpp.
| bool tesseract::LTRResultIterator::WordIsNumeric | ( | ) | const |
Definition at line 232 of file ltrresultiterator.cpp.
| const char * tesseract::LTRResultIterator::WordLattice | ( | int * | lattice_size | ) | const |
Definition at line 284 of file ltrresultiterator.cpp.
{
if (it_->word() == NULL) return NULL; // Already at the end!
if (it_->word()->blamer_bundle == NULL) return NULL;
*lattice_size = it_->word()->blamer_bundle->lattice_size;
return it_->word()->blamer_bundle->lattice_data;
}
| const char * tesseract::LTRResultIterator::WordRecognitionLanguage | ( | ) | const |
| char * tesseract::LTRResultIterator::WordTruthUTF8Text | ( | ) | const |
Definition at line 266 of file ltrresultiterator.cpp.
{
if (it_->word() == NULL) return NULL; // Already at the end!
if (it_->word()->blamer_bundle == NULL ||
it_->word()->blamer_bundle->incorrect_result_reason == IRR_NO_TRUTH) {
return NULL; // no truth information for this word
}
const GenericVector<STRING> &truth_vec =
it_->word()->blamer_bundle->truth_text;
STRING truth_text;
for (int i = 0; i < truth_vec.size(); ++i) truth_text += truth_vec[i];
int length = truth_text.length() + 1;
char* result = new char[length];
strncpy(result, truth_text.string(), length);
return result;
}
friend class ChoiceIterator [friend] |
Definition at line 47 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::line_separator_ [protected] |
Definition at line 165 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::paragraph_separator_ [protected] |
Definition at line 166 of file ltrresultiterator.h.