|
Tesseract
3.02
|
#include <pageres.h>
| WERD_RES::WERD_RES | ( | ) | [inline] |
Definition at line 456 of file pageres.h.
{
InitNonPointers();
InitPointers();
}
| WERD_RES::WERD_RES | ( | WERD * | the_word | ) | [inline] |
Definition at line 460 of file pageres.h.
{
InitNonPointers();
InitPointers();
word = the_word;
}
| WERD_RES::WERD_RES | ( | const WERD_RES & | source | ) | [inline] |
Definition at line 465 of file pageres.h.
{
InitPointers();
*this = source; // see operator=
}
| WERD_RES::~WERD_RES | ( | ) |
Definition at line 746 of file pageres.cpp.
{
Clear();
}
| bool WERD_RES::AnyLtrCharsInWord | ( | ) | const [inline] |
Definition at line 523 of file pageres.h.
{
if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
return false;
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
continue; // Ignore illegal chars.
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
return true;
}
return false;
}
| bool WERD_RES::AnyRtlCharsInWord | ( | ) | const [inline] |
Definition at line 506 of file pageres.h.
{
if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
return false;
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
continue; // Ignore illegal chars.
UNICHARSET::Direction dir =
uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
dir == UNICHARSET::U_ARABIC_NUMBER)
return true;
}
return false;
}
| void WERD_RES::BestChoiceToCorrectText | ( | ) |
Definition at line 572 of file pageres.cpp.
{
correct_text.clear();
ASSERT_HOST(best_choice != NULL);
for (int i = 0; i < best_choice->length(); ++i) {
UNICHAR_ID choice_id = best_choice->unichar_id(i);
const char* blob_choice = uch_set->id_to_unichar(choice_id);
correct_text.push_back(STRING(blob_choice));
}
}
| const char* const WERD_RES::BestUTF8 | ( | int | blob_index, |
| bool | in_rtl_context | ||
| ) | const [inline] |
Definition at line 477 of file pageres.h.
{
if (blob_index < 0 || blob_index >= best_choice->length())
return NULL;
UNICHAR_ID id = best_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
return NULL;
UNICHAR_ID mirrored = uch_set->get_mirror(id);
if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
id = mirrored;
return uch_set->id_to_unichar_ext(id);
}
| UNICHAR_ID WERD_RES::BothHyphens | ( | UNICHAR_ID | id1, |
| UNICHAR_ID | id2 | ||
| ) |
Definition at line 683 of file pageres.cpp.
{
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
(*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
return uch_set->unichar_to_id("-");
return INVALID_UNICHAR_ID;
}
| UNICHAR_ID WERD_RES::BothQuotes | ( | UNICHAR_ID | id1, |
| UNICHAR_ID | id2 | ||
| ) |
Definition at line 660 of file pageres.cpp.
{
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) &&
is_simple_quote(next_ch, strlen(next_ch)))
return uch_set->unichar_to_id("\"");
return INVALID_UNICHAR_ID;
}
| UNICHAR_ID WERD_RES::BothSpaces | ( | UNICHAR_ID | id1, |
| UNICHAR_ID | id2 | ||
| ) |
Definition at line 713 of file pageres.cpp.
{
if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
return id1;
else
return INVALID_UNICHAR_ID;
}
| void WERD_RES::Clear | ( | ) |
Definition at line 789 of file pageres.cpp.
{
if (word != NULL && combination) {
delete word;
}
word = NULL;
delete blamer_bundle;
blamer_bundle = NULL;
ClearResults();
}
| void WERD_RES::ClearResults | ( | ) |
Definition at line 799 of file pageres.cpp.
{
done = false;
fontinfo = NULL;
fontinfo2 = NULL;
fontinfo_id_count = 0;
fontinfo_id2_count = 0;
if (bln_boxes != NULL) {
delete bln_boxes;
bln_boxes = NULL;
}
if (chopped_word != NULL) {
delete chopped_word;
chopped_word = NULL;
}
if (rebuild_word != NULL) {
delete rebuild_word;
rebuild_word = NULL;
}
if (box_word != NULL) {
delete box_word;
box_word = NULL;
}
best_state.clear();
correct_text.clear();
if (seam_array != NULL) {
free_seam_list(seam_array);
seam_array = NULL;
}
if (best_choice != NULL) {
delete best_choice;
delete raw_choice;
best_choice = NULL;
raw_choice = NULL;
}
if (!alt_choices.empty()) {
alt_choices.delete_data_pointers();
alt_choices.clear();
}
alt_states.clear();
if (ep_choice != NULL) {
delete ep_choice;
ep_choice = NULL;
}
if (blamer_bundle != NULL) blamer_bundle->ClearResults();
}
| void WERD_RES::CloneChoppedToRebuild | ( | ) |
Definition at line 480 of file pageres.cpp.
{
if (rebuild_word != NULL)
delete rebuild_word;
rebuild_word = new TWERD(*chopped_word);
SetupBoxWord();
int word_len = box_word->length();
best_state.reserve(word_len);
correct_text.reserve(word_len);
for (int i = 0; i < word_len; ++i) {
best_state.push_back(1);
correct_text.push_back(STRING(""));
}
}
| bool WERD_RES::ConditionalBlobMerge | ( | TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > * | class_cb, |
| TessResultCallback2< bool, const TBOX &, const TBOX & > * | box_cb, | ||
| BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
| ) |
Definition at line 587 of file pageres.cpp.
{
bool modified = false;
for (int i = 0; i + 1 < best_choice->length(); ++i) {
UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
best_choice->unichar_id(i+1));
if (new_id != INVALID_UNICHAR_ID &&
(box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
box_word->BlobBox(i + 1)))) {
if (reject_map.length() == best_choice->length())
reject_map.remove_pos(i);
best_choice->set_unichar_id(new_id, i);
best_choice->remove_unichar_id(i + 1);
raw_choice->set_unichar_id(new_id, i);
raw_choice->remove_unichar_id(i + 1);
modified = true;
rebuild_word->MergeBlobs(i, i + 2);
box_word->MergeBoxes(i, i + 2);
if (i + 1 < best_state.length()) {
best_state[i] += best_state[i + 1];
best_state.remove(i + 1);
}
BLOB_CHOICE_LIST_C_IT blob_choices_it(blob_choices);
for (int j = 0; j < i; ++j)
blob_choices_it.forward();
BLOB_CHOICE_IT it1(blob_choices_it.data()); // first choices
BLOB_CHOICE_LIST* target_choices = blob_choices_it.data_relative(1);
BLOB_CHOICE_IT it2(target_choices); // second choices
float certainty = it2.data()->certainty();
float rating = it2.data()->rating();
if (it1.data()->certainty() < certainty) {
certainty = it1.data()->certainty();
rating = it1.data()->rating();
target_choices = blob_choices_it.data();
blob_choices_it.forward();
}
delete blob_choices_it.extract(); // get rid of spare
// TODO(rays) Fix the choices so they contain the desired result.
// Do we really need to ? Only needed for fix_quotes, which should be
// going away.
}
}
delete class_cb;
delete box_cb;
return modified;
}
| void WERD_RES::ConsumeWordResults | ( | WERD_RES * | word | ) |
Definition at line 411 of file pageres.cpp.
{
denorm = word->denorm;
MovePointerData(&chopped_word, &word->chopped_word);
MovePointerData(&rebuild_word, &word->rebuild_word);
MovePointerData(&box_word, &word->box_word);
if (seam_array != NULL)
free_seam_list(seam_array);
seam_array = word->seam_array;
word->seam_array = NULL;
best_state.move(&word->best_state);
correct_text.move(&word->correct_text);
MovePointerData(&best_choice, &word->best_choice);
MovePointerData(&raw_choice, &word->raw_choice);
alt_choices.delete_data_pointers();
alt_choices.move(&word->alt_choices);
alt_states.move(&word->alt_states);
reject_map = word->reject_map;
if (word->blamer_bundle != NULL) {
assert(blamer_bundle != NULL);
blamer_bundle->CopyResults(*(word->blamer_bundle));
}
CopySimpleFields(*word);
}
| void WERD_RES::copy_on | ( | WERD_RES * | word_res | ) | [inline] |
| void WERD_RES::CopySimpleFields | ( | const WERD_RES & | source | ) |
Definition at line 234 of file pageres.cpp.
{
tess_failed = source.tess_failed;
tess_accepted = source.tess_accepted;
tess_would_adapt = source.tess_would_adapt;
done = source.done;
unlv_crunch_mode = source.unlv_crunch_mode;
small_caps = source.small_caps;
italic = source.italic;
bold = source.bold;
fontinfo = source.fontinfo;
fontinfo2 = source.fontinfo2;
fontinfo_id_count = source.fontinfo_id_count;
fontinfo_id2_count = source.fontinfo_id2_count;
x_height = source.x_height;
caps_height = source.caps_height;
guessed_x_ht = source.guessed_x_ht;
guessed_caps_ht = source.guessed_caps_ht;
reject_spaces = source.reject_spaces;
uch_set = source.uch_set;
tesseract = source.tesseract;
}
| void WERD_RES::FakeClassifyWord | ( | int | blob_count, |
| BLOB_CHOICE ** | choices | ||
| ) |
Definition at line 549 of file pageres.cpp.
{
// Setup the WERD_RES.
ASSERT_HOST(box_word != NULL);
ASSERT_HOST(blob_count == box_word->length());
ASSERT_HOST(best_choice != NULL);
BLOB_CHOICE_LIST_CLIST* word_choices = new BLOB_CHOICE_LIST_CLIST;
BLOB_CHOICE_LIST_C_IT bc_it(word_choices);
for (int c = 0; c < blob_count; ++c) {
best_choice->append_unichar_id(
choices[c]->unichar_id(), 1,
choices[c]->rating(), choices[c]->certainty());
BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
BLOB_CHOICE_IT choice_it(choice_list);
choice_it.add_after_then_move(choices[c]);
bc_it.add_after_then_move(choice_list);
}
best_choice->set_blob_choices(word_choices);
delete raw_choice;
raw_choice = new WERD_CHOICE(*best_choice);
reject_map.initialise(blob_count);
}
| void WERD_RES::fix_hyphens | ( | BLOB_CHOICE_LIST_CLIST * | blob_choices | ) |
Definition at line 700 of file pageres.cpp.
{
if (!uch_set->contains_unichar("-") ||
!uch_set->get_enabled(uch_set->unichar_to_id("-")))
return; // Don't create it if it is disallowed.
ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothHyphens),
NewPermanentTessCallback(this, &WERD_RES::HyphenBoxesOverlap),
blob_choices);
}
| void WERD_RES::fix_quotes | ( | BLOB_CHOICE_LIST_CLIST * | blob_choices | ) |
Definition at line 670 of file pageres.cpp.
{
if (!uch_set->contains_unichar("\"") ||
!uch_set->get_enabled(uch_set->unichar_to_id("\"")))
return; // Don't create it if it is disallowed.
ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothQuotes),
NULL,
blob_choices);
}
Definition at line 694 of file pageres.cpp.
| void WERD_RES::InitForRetryRecognition | ( | const WERD_RES & | source | ) |
Definition at line 260 of file pageres.cpp.
{
word = source.word;
CopySimpleFields(source);
if (source.blamer_bundle != NULL) {
blamer_bundle = new BlamerBundle();
blamer_bundle->CopyTruth(*source.blamer_bundle);
}
}
| void WERD_RES::InitNonPointers | ( | ) |
Definition at line 750 of file pageres.cpp.
{
tess_failed = FALSE;
tess_accepted = FALSE;
tess_would_adapt = FALSE;
done = FALSE;
unlv_crunch_mode = CR_NONE;
small_caps = false;
italic = FALSE;
bold = FALSE;
// The fontinfos and tesseract count as non-pointers as they point to
// data owned elsewhere.
fontinfo = NULL;
fontinfo2 = NULL;
tesseract = NULL;
fontinfo_id_count = 0;
fontinfo_id2_count = 0;
x_height = 0.0;
caps_height = 0.0;
guessed_x_ht = TRUE;
guessed_caps_ht = TRUE;
combination = FALSE;
part_of_combo = FALSE;
reject_spaces = FALSE;
}
| void WERD_RES::InitPointers | ( | ) |
Definition at line 775 of file pageres.cpp.
{
word = NULL;
bln_boxes = NULL;
uch_set = NULL;
chopped_word = NULL;
rebuild_word = NULL;
box_word = NULL;
seam_array = NULL;
best_choice = NULL;
raw_choice = NULL;
ep_choice = NULL;
blamer_bundle = NULL;
}
| void WERD_RES::merge_tess_fails | ( | ) |
Definition at line 721 of file pageres.cpp.
{
if (ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothSpaces), NULL,
best_choice->blob_choices())) {
int len = best_choice->length();
ASSERT_HOST(reject_map.length() == len);
ASSERT_HOST(box_word->length() == len);
}
}
Definition at line 177 of file pageres.cpp.
{
this->ELIST_LINK::operator=(source);
Clear();
if (source.combination) {
word = new WERD;
*word = *(source.word); // deep copy
} else {
word = source.word; // pt to same word
}
if (source.bln_boxes != NULL)
bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
if (source.chopped_word != NULL)
chopped_word = new TWERD(*source.chopped_word);
if (source.rebuild_word != NULL)
rebuild_word = new TWERD(*source.rebuild_word);
// TODO(rays) Do we ever need to copy the seam_array?
denorm = source.denorm;
if (source.box_word != NULL)
box_word = new tesseract::BoxWord(*source.box_word);
best_state = source.best_state;
correct_text = source.correct_text;
if (source.best_choice != NULL) {
best_choice = new WERD_CHOICE(*source.best_choice);
raw_choice = new WERD_CHOICE(*source.raw_choice);
best_choice_fontinfo_ids = source.best_choice_fontinfo_ids;
}
else {
best_choice = NULL;
raw_choice = NULL;
if (!best_choice_fontinfo_ids.empty()) {
best_choice_fontinfo_ids.clear();
}
}
for (int i = 0; i < source.alt_choices.length(); ++i) {
const WERD_CHOICE *choice = source.alt_choices[i];
ASSERT_HOST(choice != NULL);
alt_choices.push_back(new WERD_CHOICE(*choice));
}
alt_states = source.alt_states;
if (source.ep_choice != NULL) {
ep_choice = new WERD_CHOICE(*source.ep_choice);
} else {
ep_choice = NULL;
}
reject_map = source.reject_map;
combination = source.combination;
part_of_combo = source.part_of_combo;
CopySimpleFields(source);
if (source.blamer_bundle != NULL) {
blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
}
return *this;
}
| bool WERD_RES::PiecesAllNatural | ( | int | start, |
| int | count | ||
| ) | const |
Definition at line 733 of file pageres.cpp.
{
// all seams must have no splits.
for (int index = start; index < start + count - 1; ++index) {
if (index >= 0 && index < array_count(seam_array)) {
SEAM* seam = reinterpret_cast<SEAM *>(array_value(seam_array, index));
if (seam != NULL && seam->split1 != NULL)
return false;
}
}
return true;
}
| const char* const WERD_RES::RawUTF8 | ( | int | blob_index | ) | const [inline] |
Definition at line 489 of file pageres.h.
{
if (blob_index < 0 || blob_index >= raw_choice->length())
return NULL;
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
return NULL;
return uch_set->id_to_unichar(id);
}
| void WERD_RES::RebuildBestState | ( | ) |
Definition at line 452 of file pageres.cpp.
{
if (rebuild_word != NULL)
delete rebuild_word;
rebuild_word = new TWERD;
if (seam_array == NULL) {
seam_array = start_seam_list(chopped_word->blobs);
}
TBLOB* prev_blob = NULL;
int start = 0;
for (int i = 0; i < best_state.size(); ++i) {
int length = best_state[i];
join_pieces(chopped_word->blobs, seam_array, start, start + length - 1);
TBLOB* blob = chopped_word->blobs;
for (int i = 0; i < start; ++i)
blob = blob->next;
TBLOB* copy_blob = new TBLOB(*blob);
if (prev_blob == NULL)
rebuild_word->blobs = copy_blob;
else
prev_blob->next = copy_blob;
prev_blob = copy_blob;
break_pieces(blob, seam_array, start, start + length - 1);
start += length;
}
}
| void WERD_RES::ReplaceBestChoice | ( | const WERD_CHOICE & | choice, |
| const GenericVector< int > & | segmentation_state | ||
| ) |
Definition at line 436 of file pageres.cpp.
{
delete best_choice;
best_choice = new WERD_CHOICE(choice);
best_state = segmentation_state;
RebuildBestState();
SetupBoxWord();
// Make up a fake reject map of the right length to keep the
// rejection pass happy.
reject_map.initialise(segmentation_state.length());
done = tess_accepted = tess_would_adapt = true;
SetScriptPositions();
}
| void WERD_RES::SetScriptPositions | ( | ) |
Definition at line 505 of file pageres.cpp.
| void WERD_RES::SetupBlamerBundle | ( | ) |
Definition at line 379 of file pageres.cpp.
{
if (blamer_bundle != NULL) {
blamer_bundle->norm_box_tolerance = kBlamerBoxTolerance * denorm.x_scale();
TPOINT topleft;
TPOINT botright;
TPOINT norm_topleft;
TPOINT norm_botright;
for (int b = 0; b < blamer_bundle->truth_word.length(); ++b) {
const TBOX &box = blamer_bundle->truth_word.BlobBox(b);
topleft.x = box.left();
topleft.y = box.top();
botright.x = box.right();
botright.y = box.bottom();
denorm.NormTransform(topleft, &norm_topleft);
denorm.NormTransform(botright, &norm_botright);
TBOX norm_box(norm_topleft.x, norm_botright.y,
norm_botright.x, norm_topleft.y);
blamer_bundle->norm_truth_word.InsertBox(b, norm_box);
}
}
}
| void WERD_RES::SetupBoxWord | ( | ) |
Definition at line 495 of file pageres.cpp.
{
if (box_word != NULL)
delete box_word;
rebuild_word->ComputeBoundingBoxes();
box_word = tesseract::BoxWord::CopyFromNormalized(&denorm, rebuild_word);
box_word->ClipToOriginalWord(denorm.block(), word);
}
| void WERD_RES::SetupFake | ( | const UNICHARSET & | uch | ) |
Definition at line 340 of file pageres.cpp.
{
ClearResults();
SetupWordScript(unicharset_in);
chopped_word = new TWERD;
rebuild_word = new TWERD;
bln_boxes = new tesseract::BoxWord;
box_word = new tesseract::BoxWord;
int blob_count = word->cblob_list()->length();
best_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
TOP_CHOICE_PERM, unicharset_in);
raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
TOP_CHOICE_PERM, unicharset_in);
if (blob_count > 0) {
BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
// For non-text blocks, just pass any blobs through to the box_word
// and call the word failed with a fake classification.
C_BLOB_IT b_it(word->cblob_list());
int blob_id = 0;
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
TBOX box = b_it.data()->bounding_box();
box_word->InsertBox(box_word->length(), box);
fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f,
-1, -1, -1, 0, 0, false);
}
FakeClassifyWord(blob_count, fake_choices);
delete [] fake_choices;
}
tess_failed = true;
}
| bool WERD_RES::SetupForCubeRecognition | ( | const UNICHARSET & | unicharset_in, |
| tesseract::Tesseract * | tesseract, | ||
| const BLOCK * | block | ||
| ) |
Definition at line 317 of file pageres.cpp.
{
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (pb != NULL && !pb->IsText()) {
// Ignore words in graphic regions.
SetupFake(unicharset_in);
word->set_flag(W_REP_CHAR, false);
return false;
}
ClearResults();
SetupWordScript(unicharset_in);
TBOX word_box = word->bounding_box();
denorm.SetupNormalization(block, NULL, NULL, NULL, NULL, 0,
word_box.left(), word_box.bottom(),
1.0f, 1.0f, 0.0f, 0.0f);
SetupBlamerBundle();
return true;
}
| bool WERD_RES::SetupForTessRecognition | ( | const UNICHARSET & | unicharset_in, |
| tesseract::Tesseract * | tesseract, | ||
| Pix * | pix, | ||
| bool | numeric_mode, | ||
| bool | use_body_size, | ||
| ROW * | row, | ||
| BLOCK * | block | ||
| ) |
Definition at line 272 of file pageres.cpp.
{
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) {
// Empty words occur when all the blobs have been moved to the rej_blobs
// list, which seems to occur frequently in junk.
SetupFake(unicharset_in);
word->set_flag(W_REP_CHAR, false);
return false;
}
ClearResults();
SetupWordScript(unicharset_in);
chopped_word = TWERD::PolygonalCopy(word);
if (use_body_size && row->body_size() > 0.0f) {
chopped_word->SetupBLNormalize(block, row, row->body_size(),
numeric_mode, &denorm);
} else {
chopped_word->SetupBLNormalize(block, row, x_height, numeric_mode, &denorm);
}
// The image will be 8-bit grey if the input was grey or color. Note that in
// a grey image 0 is black and 255 is white. If the input was binary, then
// the pix will be binary and 0 is white, with 1 being black.
// To tell the difference pixGetDepth() will return 8 or 1.
denorm.set_pix(pix);
// The inverse flag will be true iff the word has been determined to be white
// on black, and is independent of whether the pix is 8 bit or 1 bit.
denorm.set_inverse(word->flag(W_INVERSE));
chopped_word->Normalize(denorm);
bln_boxes = tesseract::BoxWord::CopyFromNormalized(NULL, chopped_word);
seam_array = start_seam_list(chopped_word->blobs);
best_choice = new WERD_CHOICE(&unicharset_in);
best_choice->make_bad();
raw_choice = new WERD_CHOICE(&unicharset_in);
raw_choice->make_bad();
SetupBlamerBundle();
return true;
}
| void WERD_RES::SetupWordScript | ( | const UNICHARSET & | unicharset_in | ) |
Definition at line 370 of file pageres.cpp.
{
uch_set = &uch;
int script = uch.default_sid();
word->set_script_id(script);
word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
}
| UNICHARSET::Direction WERD_RES::SymbolDirection | ( | int | blob_index | ) | const [inline] |
Definition at line 498 of file pageres.h.
{
if (best_choice == NULL ||
blob_index >= best_choice->length() ||
blob_index < 0)
return UNICHARSET::U_OTHER_NEUTRAL;
return uch_set->get_direction(best_choice->unichar_id(blob_index));
}
| bool WERD_RES::UnicharsInReadingOrder | ( | ) | const [inline] |
Definition at line 540 of file pageres.h.
{
return best_choice->unichars_in_script_order();
}
| void WERD_RES::WithoutFootnoteSpan | ( | int * | start, |
| int * | end | ||
| ) | const |
Definition at line 510 of file pageres.cpp.
{
int end = best_choice->length();
while (end > 0 &&
uch_set->get_isdigit(best_choice->unichar_ids()[end - 1]) &&
box_word->BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) {
end--;
}
int start = 0;
while (start < end &&
uch_set->get_isdigit(best_choice->unichar_ids()[start]) &&
box_word->BlobPosition(start) == tesseract::SP_SUPERSCRIPT) {
start++;
}
*pstart = start;
*pend = end;
}
| void WERD_RES::WithoutFootnoteSpan | ( | const WERD_CHOICE & | choice, |
| const GenericVector< int > & | state, | ||
| int * | start, | ||
| int * | end | ||
| ) | const |
Definition at line 527 of file pageres.cpp.
{
int len = word.length();
*pstart = 0;
*pend = len;
if (len < 2) return;
if (!word.unicharset()->get_isdigit(word.unichar_ids()[len - 1]) &&
!word.unicharset()->get_isdigit(word.unichar_ids()[0])) return;
// ok, now that we know the word ends in digits, do the expensive bit of
// figuring out if they're superscript.
WERD_RES copy(*this);
copy.ReplaceBestChoice(word, state);
copy.WithoutFootnoteSpan(pstart, pend);
}
| float WERD_RES::caps_height |
| const FontInfo* WERD_RES::fontinfo |
| const FontInfo* WERD_RES::fontinfo2 |
| bool WERD_RES::small_caps |
| const UNICHARSET* WERD_RES::uch_set |
| float WERD_RES::x_height |