Tesseract
3.02
|
#include <pageres.h>
WERD_RES::WERD_RES | ( | ) | [inline] |
Definition at line 456 of file pageres.h.
{ InitNonPointers(); InitPointers(); }
WERD_RES::WERD_RES | ( | WERD * | the_word | ) | [inline] |
Definition at line 460 of file pageres.h.
{ InitNonPointers(); InitPointers(); word = the_word; }
WERD_RES::WERD_RES | ( | const WERD_RES & | source | ) | [inline] |
Definition at line 465 of file pageres.h.
{ InitPointers(); *this = source; // see operator= }
WERD_RES::~WERD_RES | ( | ) |
Definition at line 746 of file pageres.cpp.
{ Clear(); }
bool WERD_RES::AnyLtrCharsInWord | ( | ) | const [inline] |
Definition at line 523 of file pageres.h.
{ if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1) return false; for (int id = 0; id < best_choice->length(); id++) { int unichar_id = best_choice->unichar_id(id); if (unichar_id < 0 || unichar_id >= uch_set->size()) continue; // Ignore illegal chars. UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); if (dir == UNICHARSET::U_LEFT_TO_RIGHT) return true; } return false; }
bool WERD_RES::AnyRtlCharsInWord | ( | ) | const [inline] |
Definition at line 506 of file pageres.h.
{ if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1) return false; for (int id = 0; id < best_choice->length(); id++) { int unichar_id = best_choice->unichar_id(id); if (unichar_id < 0 || unichar_id >= uch_set->size()) continue; // Ignore illegal chars. UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || dir == UNICHARSET::U_ARABIC_NUMBER) return true; } return false; }
void WERD_RES::BestChoiceToCorrectText | ( | ) |
Definition at line 572 of file pageres.cpp.
{ correct_text.clear(); ASSERT_HOST(best_choice != NULL); for (int i = 0; i < best_choice->length(); ++i) { UNICHAR_ID choice_id = best_choice->unichar_id(i); const char* blob_choice = uch_set->id_to_unichar(choice_id); correct_text.push_back(STRING(blob_choice)); } }
const char* const WERD_RES::BestUTF8 | ( | int | blob_index, |
bool | in_rtl_context | ||
) | const [inline] |
Definition at line 477 of file pageres.h.
{ if (blob_index < 0 || blob_index >= best_choice->length()) return NULL; UNICHAR_ID id = best_choice->unichar_id(blob_index); if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) return NULL; UNICHAR_ID mirrored = uch_set->get_mirror(id); if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID) id = mirrored; return uch_set->id_to_unichar_ext(id); }
UNICHAR_ID WERD_RES::BothHyphens | ( | UNICHAR_ID | id1, |
UNICHAR_ID | id2 | ||
) |
Definition at line 683 of file pageres.cpp.
{ const char *ch = uch_set->id_to_unichar(id1); const char *next_ch = uch_set->id_to_unichar(id2); if (strlen(ch) == 1 && strlen(next_ch) == 1 && (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~')) return uch_set->unichar_to_id("-"); return INVALID_UNICHAR_ID; }
UNICHAR_ID WERD_RES::BothQuotes | ( | UNICHAR_ID | id1, |
UNICHAR_ID | id2 | ||
) |
Definition at line 660 of file pageres.cpp.
{ const char *ch = uch_set->id_to_unichar(id1); const char *next_ch = uch_set->id_to_unichar(id2); if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) return uch_set->unichar_to_id("\""); return INVALID_UNICHAR_ID; }
UNICHAR_ID WERD_RES::BothSpaces | ( | UNICHAR_ID | id1, |
UNICHAR_ID | id2 | ||
) |
Definition at line 713 of file pageres.cpp.
{ if (id1 == id2 && id1 == uch_set->unichar_to_id(" ")) return id1; else return INVALID_UNICHAR_ID; }
void WERD_RES::Clear | ( | ) |
Definition at line 789 of file pageres.cpp.
{ if (word != NULL && combination) { delete word; } word = NULL; delete blamer_bundle; blamer_bundle = NULL; ClearResults(); }
void WERD_RES::ClearResults | ( | ) |
Definition at line 799 of file pageres.cpp.
{ done = false; fontinfo = NULL; fontinfo2 = NULL; fontinfo_id_count = 0; fontinfo_id2_count = 0; if (bln_boxes != NULL) { delete bln_boxes; bln_boxes = NULL; } if (chopped_word != NULL) { delete chopped_word; chopped_word = NULL; } if (rebuild_word != NULL) { delete rebuild_word; rebuild_word = NULL; } if (box_word != NULL) { delete box_word; box_word = NULL; } best_state.clear(); correct_text.clear(); if (seam_array != NULL) { free_seam_list(seam_array); seam_array = NULL; } if (best_choice != NULL) { delete best_choice; delete raw_choice; best_choice = NULL; raw_choice = NULL; } if (!alt_choices.empty()) { alt_choices.delete_data_pointers(); alt_choices.clear(); } alt_states.clear(); if (ep_choice != NULL) { delete ep_choice; ep_choice = NULL; } if (blamer_bundle != NULL) blamer_bundle->ClearResults(); }
void WERD_RES::CloneChoppedToRebuild | ( | ) |
Definition at line 480 of file pageres.cpp.
{ if (rebuild_word != NULL) delete rebuild_word; rebuild_word = new TWERD(*chopped_word); SetupBoxWord(); int word_len = box_word->length(); best_state.reserve(word_len); correct_text.reserve(word_len); for (int i = 0; i < word_len; ++i) { best_state.push_back(1); correct_text.push_back(STRING("")); } }
bool WERD_RES::ConditionalBlobMerge | ( | TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > * | class_cb, |
TessResultCallback2< bool, const TBOX &, const TBOX & > * | box_cb, | ||
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
Definition at line 587 of file pageres.cpp.
{ bool modified = false; for (int i = 0; i + 1 < best_choice->length(); ++i) { UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i), best_choice->unichar_id(i+1)); if (new_id != INVALID_UNICHAR_ID && (box_cb == NULL || box_cb->Run(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) { if (reject_map.length() == best_choice->length()) reject_map.remove_pos(i); best_choice->set_unichar_id(new_id, i); best_choice->remove_unichar_id(i + 1); raw_choice->set_unichar_id(new_id, i); raw_choice->remove_unichar_id(i + 1); modified = true; rebuild_word->MergeBlobs(i, i + 2); box_word->MergeBoxes(i, i + 2); if (i + 1 < best_state.length()) { best_state[i] += best_state[i + 1]; best_state.remove(i + 1); } BLOB_CHOICE_LIST_C_IT blob_choices_it(blob_choices); for (int j = 0; j < i; ++j) blob_choices_it.forward(); BLOB_CHOICE_IT it1(blob_choices_it.data()); // first choices BLOB_CHOICE_LIST* target_choices = blob_choices_it.data_relative(1); BLOB_CHOICE_IT it2(target_choices); // second choices float certainty = it2.data()->certainty(); float rating = it2.data()->rating(); if (it1.data()->certainty() < certainty) { certainty = it1.data()->certainty(); rating = it1.data()->rating(); target_choices = blob_choices_it.data(); blob_choices_it.forward(); } delete blob_choices_it.extract(); // get rid of spare // TODO(rays) Fix the choices so they contain the desired result. // Do we really need to ? Only needed for fix_quotes, which should be // going away. } } delete class_cb; delete box_cb; return modified; }
void WERD_RES::ConsumeWordResults | ( | WERD_RES * | word | ) |
Definition at line 411 of file pageres.cpp.
{ denorm = word->denorm; MovePointerData(&chopped_word, &word->chopped_word); MovePointerData(&rebuild_word, &word->rebuild_word); MovePointerData(&box_word, &word->box_word); if (seam_array != NULL) free_seam_list(seam_array); seam_array = word->seam_array; word->seam_array = NULL; best_state.move(&word->best_state); correct_text.move(&word->correct_text); MovePointerData(&best_choice, &word->best_choice); MovePointerData(&raw_choice, &word->raw_choice); alt_choices.delete_data_pointers(); alt_choices.move(&word->alt_choices); alt_states.move(&word->alt_states); reject_map = word->reject_map; if (word->blamer_bundle != NULL) { assert(blamer_bundle != NULL); blamer_bundle->CopyResults(*(word->blamer_bundle)); } CopySimpleFields(*word); }
void WERD_RES::copy_on | ( | WERD_RES * | word_res | ) | [inline] |
void WERD_RES::CopySimpleFields | ( | const WERD_RES & | source | ) |
Definition at line 234 of file pageres.cpp.
{ tess_failed = source.tess_failed; tess_accepted = source.tess_accepted; tess_would_adapt = source.tess_would_adapt; done = source.done; unlv_crunch_mode = source.unlv_crunch_mode; small_caps = source.small_caps; italic = source.italic; bold = source.bold; fontinfo = source.fontinfo; fontinfo2 = source.fontinfo2; fontinfo_id_count = source.fontinfo_id_count; fontinfo_id2_count = source.fontinfo_id2_count; x_height = source.x_height; caps_height = source.caps_height; guessed_x_ht = source.guessed_x_ht; guessed_caps_ht = source.guessed_caps_ht; reject_spaces = source.reject_spaces; uch_set = source.uch_set; tesseract = source.tesseract; }
void WERD_RES::FakeClassifyWord | ( | int | blob_count, |
BLOB_CHOICE ** | choices | ||
) |
Definition at line 549 of file pageres.cpp.
{ // Setup the WERD_RES. ASSERT_HOST(box_word != NULL); ASSERT_HOST(blob_count == box_word->length()); ASSERT_HOST(best_choice != NULL); BLOB_CHOICE_LIST_CLIST* word_choices = new BLOB_CHOICE_LIST_CLIST; BLOB_CHOICE_LIST_C_IT bc_it(word_choices); for (int c = 0; c < blob_count; ++c) { best_choice->append_unichar_id( choices[c]->unichar_id(), 1, choices[c]->rating(), choices[c]->certainty()); BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST; BLOB_CHOICE_IT choice_it(choice_list); choice_it.add_after_then_move(choices[c]); bc_it.add_after_then_move(choice_list); } best_choice->set_blob_choices(word_choices); delete raw_choice; raw_choice = new WERD_CHOICE(*best_choice); reject_map.initialise(blob_count); }
void WERD_RES::fix_hyphens | ( | BLOB_CHOICE_LIST_CLIST * | blob_choices | ) |
Definition at line 700 of file pageres.cpp.
{ if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-"))) return; // Don't create it if it is disallowed. ConditionalBlobMerge( NewPermanentTessCallback(this, &WERD_RES::BothHyphens), NewPermanentTessCallback(this, &WERD_RES::HyphenBoxesOverlap), blob_choices); }
void WERD_RES::fix_quotes | ( | BLOB_CHOICE_LIST_CLIST * | blob_choices | ) |
Definition at line 670 of file pageres.cpp.
{ if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\""))) return; // Don't create it if it is disallowed. ConditionalBlobMerge( NewPermanentTessCallback(this, &WERD_RES::BothQuotes), NULL, blob_choices); }
Definition at line 694 of file pageres.cpp.
void WERD_RES::InitForRetryRecognition | ( | const WERD_RES & | source | ) |
Definition at line 260 of file pageres.cpp.
{ word = source.word; CopySimpleFields(source); if (source.blamer_bundle != NULL) { blamer_bundle = new BlamerBundle(); blamer_bundle->CopyTruth(*source.blamer_bundle); } }
void WERD_RES::InitNonPointers | ( | ) |
Definition at line 750 of file pageres.cpp.
{ tess_failed = FALSE; tess_accepted = FALSE; tess_would_adapt = FALSE; done = FALSE; unlv_crunch_mode = CR_NONE; small_caps = false; italic = FALSE; bold = FALSE; // The fontinfos and tesseract count as non-pointers as they point to // data owned elsewhere. fontinfo = NULL; fontinfo2 = NULL; tesseract = NULL; fontinfo_id_count = 0; fontinfo_id2_count = 0; x_height = 0.0; caps_height = 0.0; guessed_x_ht = TRUE; guessed_caps_ht = TRUE; combination = FALSE; part_of_combo = FALSE; reject_spaces = FALSE; }
void WERD_RES::InitPointers | ( | ) |
Definition at line 775 of file pageres.cpp.
{ word = NULL; bln_boxes = NULL; uch_set = NULL; chopped_word = NULL; rebuild_word = NULL; box_word = NULL; seam_array = NULL; best_choice = NULL; raw_choice = NULL; ep_choice = NULL; blamer_bundle = NULL; }
void WERD_RES::merge_tess_fails | ( | ) |
Definition at line 721 of file pageres.cpp.
{ if (ConditionalBlobMerge( NewPermanentTessCallback(this, &WERD_RES::BothSpaces), NULL, best_choice->blob_choices())) { int len = best_choice->length(); ASSERT_HOST(reject_map.length() == len); ASSERT_HOST(box_word->length() == len); } }
Definition at line 177 of file pageres.cpp.
{ this->ELIST_LINK::operator=(source); Clear(); if (source.combination) { word = new WERD; *word = *(source.word); // deep copy } else { word = source.word; // pt to same word } if (source.bln_boxes != NULL) bln_boxes = new tesseract::BoxWord(*source.bln_boxes); if (source.chopped_word != NULL) chopped_word = new TWERD(*source.chopped_word); if (source.rebuild_word != NULL) rebuild_word = new TWERD(*source.rebuild_word); // TODO(rays) Do we ever need to copy the seam_array? denorm = source.denorm; if (source.box_word != NULL) box_word = new tesseract::BoxWord(*source.box_word); best_state = source.best_state; correct_text = source.correct_text; if (source.best_choice != NULL) { best_choice = new WERD_CHOICE(*source.best_choice); raw_choice = new WERD_CHOICE(*source.raw_choice); best_choice_fontinfo_ids = source.best_choice_fontinfo_ids; } else { best_choice = NULL; raw_choice = NULL; if (!best_choice_fontinfo_ids.empty()) { best_choice_fontinfo_ids.clear(); } } for (int i = 0; i < source.alt_choices.length(); ++i) { const WERD_CHOICE *choice = source.alt_choices[i]; ASSERT_HOST(choice != NULL); alt_choices.push_back(new WERD_CHOICE(*choice)); } alt_states = source.alt_states; if (source.ep_choice != NULL) { ep_choice = new WERD_CHOICE(*source.ep_choice); } else { ep_choice = NULL; } reject_map = source.reject_map; combination = source.combination; part_of_combo = source.part_of_combo; CopySimpleFields(source); if (source.blamer_bundle != NULL) { blamer_bundle = new BlamerBundle(*(source.blamer_bundle)); } return *this; }
bool WERD_RES::PiecesAllNatural | ( | int | start, |
int | count | ||
) | const |
Definition at line 733 of file pageres.cpp.
{ // all seams must have no splits. for (int index = start; index < start + count - 1; ++index) { if (index >= 0 && index < array_count(seam_array)) { SEAM* seam = reinterpret_cast<SEAM *>(array_value(seam_array, index)); if (seam != NULL && seam->split1 != NULL) return false; } } return true; }
const char* const WERD_RES::RawUTF8 | ( | int | blob_index | ) | const [inline] |
Definition at line 489 of file pageres.h.
{ if (blob_index < 0 || blob_index >= raw_choice->length()) return NULL; UNICHAR_ID id = raw_choice->unichar_id(blob_index); if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) return NULL; return uch_set->id_to_unichar(id); }
void WERD_RES::RebuildBestState | ( | ) |
Definition at line 452 of file pageres.cpp.
{ if (rebuild_word != NULL) delete rebuild_word; rebuild_word = new TWERD; if (seam_array == NULL) { seam_array = start_seam_list(chopped_word->blobs); } TBLOB* prev_blob = NULL; int start = 0; for (int i = 0; i < best_state.size(); ++i) { int length = best_state[i]; join_pieces(chopped_word->blobs, seam_array, start, start + length - 1); TBLOB* blob = chopped_word->blobs; for (int i = 0; i < start; ++i) blob = blob->next; TBLOB* copy_blob = new TBLOB(*blob); if (prev_blob == NULL) rebuild_word->blobs = copy_blob; else prev_blob->next = copy_blob; prev_blob = copy_blob; break_pieces(blob, seam_array, start, start + length - 1); start += length; } }
void WERD_RES::ReplaceBestChoice | ( | const WERD_CHOICE & | choice, |
const GenericVector< int > & | segmentation_state | ||
) |
Definition at line 436 of file pageres.cpp.
{ delete best_choice; best_choice = new WERD_CHOICE(choice); best_state = segmentation_state; RebuildBestState(); SetupBoxWord(); // Make up a fake reject map of the right length to keep the // rejection pass happy. reject_map.initialise(segmentation_state.length()); done = tess_accepted = tess_would_adapt = true; SetScriptPositions(); }
void WERD_RES::SetScriptPositions | ( | ) |
Definition at line 505 of file pageres.cpp.
void WERD_RES::SetupBlamerBundle | ( | ) |
Definition at line 379 of file pageres.cpp.
{ if (blamer_bundle != NULL) { blamer_bundle->norm_box_tolerance = kBlamerBoxTolerance * denorm.x_scale(); TPOINT topleft; TPOINT botright; TPOINT norm_topleft; TPOINT norm_botright; for (int b = 0; b < blamer_bundle->truth_word.length(); ++b) { const TBOX &box = blamer_bundle->truth_word.BlobBox(b); topleft.x = box.left(); topleft.y = box.top(); botright.x = box.right(); botright.y = box.bottom(); denorm.NormTransform(topleft, &norm_topleft); denorm.NormTransform(botright, &norm_botright); TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, norm_topleft.y); blamer_bundle->norm_truth_word.InsertBox(b, norm_box); } } }
void WERD_RES::SetupBoxWord | ( | ) |
Definition at line 495 of file pageres.cpp.
{ if (box_word != NULL) delete box_word; rebuild_word->ComputeBoundingBoxes(); box_word = tesseract::BoxWord::CopyFromNormalized(&denorm, rebuild_word); box_word->ClipToOriginalWord(denorm.block(), word); }
void WERD_RES::SetupFake | ( | const UNICHARSET & | uch | ) |
Definition at line 340 of file pageres.cpp.
{ ClearResults(); SetupWordScript(unicharset_in); chopped_word = new TWERD; rebuild_word = new TWERD; bln_boxes = new tesseract::BoxWord; box_word = new tesseract::BoxWord; int blob_count = word->cblob_list()->length(); best_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f, TOP_CHOICE_PERM, unicharset_in); raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f, TOP_CHOICE_PERM, unicharset_in); if (blob_count > 0) { BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count]; // For non-text blocks, just pass any blobs through to the box_word // and call the word failed with a fake classification. C_BLOB_IT b_it(word->cblob_list()); int blob_id = 0; for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { TBOX box = b_it.data()->bounding_box(); box_word->InsertBox(box_word->length(), box); fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f, -1, -1, -1, 0, 0, false); } FakeClassifyWord(blob_count, fake_choices); delete [] fake_choices; } tess_failed = true; }
bool WERD_RES::SetupForCubeRecognition | ( | const UNICHARSET & | unicharset_in, |
tesseract::Tesseract * | tesseract, | ||
const BLOCK * | block | ||
) |
Definition at line 317 of file pageres.cpp.
{ tesseract = tess; POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL; if (pb != NULL && !pb->IsText()) { // Ignore words in graphic regions. SetupFake(unicharset_in); word->set_flag(W_REP_CHAR, false); return false; } ClearResults(); SetupWordScript(unicharset_in); TBOX word_box = word->bounding_box(); denorm.SetupNormalization(block, NULL, NULL, NULL, NULL, 0, word_box.left(), word_box.bottom(), 1.0f, 1.0f, 0.0f, 0.0f); SetupBlamerBundle(); return true; }
bool WERD_RES::SetupForTessRecognition | ( | const UNICHARSET & | unicharset_in, |
tesseract::Tesseract * | tesseract, | ||
Pix * | pix, | ||
bool | numeric_mode, | ||
bool | use_body_size, | ||
ROW * | row, | ||
BLOCK * | block | ||
) |
Definition at line 272 of file pageres.cpp.
{ tesseract = tess; POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL; if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) { // Empty words occur when all the blobs have been moved to the rej_blobs // list, which seems to occur frequently in junk. SetupFake(unicharset_in); word->set_flag(W_REP_CHAR, false); return false; } ClearResults(); SetupWordScript(unicharset_in); chopped_word = TWERD::PolygonalCopy(word); if (use_body_size && row->body_size() > 0.0f) { chopped_word->SetupBLNormalize(block, row, row->body_size(), numeric_mode, &denorm); } else { chopped_word->SetupBLNormalize(block, row, x_height, numeric_mode, &denorm); } // The image will be 8-bit grey if the input was grey or color. Note that in // a grey image 0 is black and 255 is white. If the input was binary, then // the pix will be binary and 0 is white, with 1 being black. // To tell the difference pixGetDepth() will return 8 or 1. denorm.set_pix(pix); // The inverse flag will be true iff the word has been determined to be white // on black, and is independent of whether the pix is 8 bit or 1 bit. denorm.set_inverse(word->flag(W_INVERSE)); chopped_word->Normalize(denorm); bln_boxes = tesseract::BoxWord::CopyFromNormalized(NULL, chopped_word); seam_array = start_seam_list(chopped_word->blobs); best_choice = new WERD_CHOICE(&unicharset_in); best_choice->make_bad(); raw_choice = new WERD_CHOICE(&unicharset_in); raw_choice->make_bad(); SetupBlamerBundle(); return true; }
void WERD_RES::SetupWordScript | ( | const UNICHARSET & | unicharset_in | ) |
Definition at line 370 of file pageres.cpp.
{ uch_set = &uch; int script = uch.default_sid(); word->set_script_id(script); word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight()); word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid()); }
UNICHARSET::Direction WERD_RES::SymbolDirection | ( | int | blob_index | ) | const [inline] |
Definition at line 498 of file pageres.h.
{ if (best_choice == NULL || blob_index >= best_choice->length() || blob_index < 0) return UNICHARSET::U_OTHER_NEUTRAL; return uch_set->get_direction(best_choice->unichar_id(blob_index)); }
bool WERD_RES::UnicharsInReadingOrder | ( | ) | const [inline] |
Definition at line 540 of file pageres.h.
{ return best_choice->unichars_in_script_order(); }
void WERD_RES::WithoutFootnoteSpan | ( | int * | start, |
int * | end | ||
) | const |
Definition at line 510 of file pageres.cpp.
{ int end = best_choice->length(); while (end > 0 && uch_set->get_isdigit(best_choice->unichar_ids()[end - 1]) && box_word->BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) { end--; } int start = 0; while (start < end && uch_set->get_isdigit(best_choice->unichar_ids()[start]) && box_word->BlobPosition(start) == tesseract::SP_SUPERSCRIPT) { start++; } *pstart = start; *pend = end; }
void WERD_RES::WithoutFootnoteSpan | ( | const WERD_CHOICE & | choice, |
const GenericVector< int > & | state, | ||
int * | start, | ||
int * | end | ||
) | const |
Definition at line 527 of file pageres.cpp.
{ int len = word.length(); *pstart = 0; *pend = len; if (len < 2) return; if (!word.unicharset()->get_isdigit(word.unichar_ids()[len - 1]) && !word.unicharset()->get_isdigit(word.unichar_ids()[0])) return; // ok, now that we know the word ends in digits, do the expensive bit of // figuring out if they're superscript. WERD_RES copy(*this); copy.ReplaceBestChoice(word, state); copy.WithoutFootnoteSpan(pstart, pend); }
float WERD_RES::caps_height |
const FontInfo* WERD_RES::fontinfo |
const FontInfo* WERD_RES::fontinfo2 |
bool WERD_RES::small_caps |
const UNICHARSET* WERD_RES::uch_set |
float WERD_RES::x_height |