Tesseract
3.02
|
#include <ratngs.h>
Public Member Functions | |
WERD_CHOICE (const UNICHARSET *unicharset) | |
WERD_CHOICE (const UNICHARSET *unicharset, int reserved) | |
WERD_CHOICE (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET &unicharset) | |
WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset) | |
WERD_CHOICE (const WERD_CHOICE &word) | |
~WERD_CHOICE () | |
const UNICHARSET * | unicharset () const |
int | length () const |
const UNICHAR_ID * | unichar_ids () const |
const UNICHAR_ID | unichar_id (int index) const |
const char * | fragment_lengths () const |
const char | fragment_length (int index) const |
float | rating () const |
float | certainty () const |
uinT8 | permuter () const |
const char * | permuter_name () const |
bool | fragment_mark () const |
BLOB_CHOICE_LIST_CLIST * | blob_choices () |
void | set_unichar_id (UNICHAR_ID unichar_id, int index) |
void | set_fragment_length (char flen, int index) |
void | set_rating (float new_val) |
void | set_certainty (float new_val) |
void | set_permuter (uinT8 perm) |
void | set_fragment_mark (bool new_fragment_mark) |
void | set_length (int len) |
void | set_blob_choices (BLOB_CHOICE_LIST_CLIST *blob_choices) |
void | double_the_size () |
Make more space in unichar_id_ and fragment_lengths_ arrays. | |
void | init (int reserved) |
void | init (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter) |
void | make_bad () |
Set the fields in this choice to be default (bad) values. | |
void | append_unichar_id_space_allocated (UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty) |
void | append_unichar_id (UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty) |
void | set_unichar_id (UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty, int index) |
bool | contains_unichar_id (UNICHAR_ID unichar_id) const |
void | remove_unichar_ids (int index, int num) |
void | remove_last_unichar_id () |
void | remove_unichar_id (int index) |
bool | has_rtl_unichar_id () const |
void | reverse_and_mirror_unichar_ids () |
void | punct_stripped (int *start_core, int *end_core) const |
WERD_CHOICE | shallow_copy (int start, int end) const |
void | string_and_lengths (STRING *word_str, STRING *word_lengths_str) const |
const STRING | debug_string () const |
bool | set_unichars_in_script_order (bool in_script_order) |
bool | unichars_in_script_order () const |
const STRING & | unichar_string () const |
const STRING & | unichar_lengths () const |
const void | print () const |
const void | print (const char *msg) const |
WERD_CHOICE & | operator+= (const WERD_CHOICE &second) |
WERD_CHOICE & | operator= (const WERD_CHOICE &source) |
Static Public Attributes | |
static const float | kBadRating = 100000.0 |
WERD_CHOICE::WERD_CHOICE | ( | const UNICHARSET * | unicharset | ) | [inline] |
WERD_CHOICE::WERD_CHOICE | ( | const UNICHARSET * | unicharset, |
int | reserved | ||
) | [inline] |
WERD_CHOICE::WERD_CHOICE | ( | const char * | src_string, |
const char * | src_lengths, | ||
float | src_rating, | ||
float | src_certainty, | ||
uinT8 | src_permuter, | ||
const UNICHARSET & | unicharset | ||
) | [inline] |
WERD_CHOICE::WERD_CHOICE | ( | const char * | src_string, |
const UNICHARSET & | unicharset | ||
) |
Constructor to build a WERD_CHOICE from the given string. The function assumes that src_string is not NULL.
Definition at line 112 of file ratngs.cpp.
: unicharset_(&unicharset){ STRING src_lengths; const char *ptr = src_string; const char *end = src_string + strlen(src_string); int step = unicharset.step(ptr); for (; ptr < end && step > 0; step = unicharset.step(ptr), src_lengths += step, ptr += step); if (step != 0 && ptr == end) { this->init(src_string, src_lengths.string(), 0.0, 0.0, NO_PERM); } else { // there must have been an invalid unichar in the string this->init(8); this->make_bad(); } }
WERD_CHOICE::WERD_CHOICE | ( | const WERD_CHOICE & | word | ) | [inline] |
WERD_CHOICE::~WERD_CHOICE | ( | ) |
Definition at line 168 of file ratngs.cpp.
{ delete[] unichar_ids_; delete[] fragment_lengths_; delete_blob_choices(); }
void WERD_CHOICE::append_unichar_id | ( | UNICHAR_ID | unichar_id, |
char | fragment_length, | ||
float | rating, | ||
float | certainty | ||
) |
append_unichar_id
Make sure there is enough space in the word for the new unichar id and call append_unichar_id_space_allocated().
Definition at line 313 of file ratngs.cpp.
{ if (length_ == reserved_) { this->double_the_size(); } this->append_unichar_id_space_allocated(unichar_id, fragment_length, rating, certainty); }
void WERD_CHOICE::append_unichar_id_space_allocated | ( | UNICHAR_ID | unichar_id, |
char | fragment_length, | ||
float | rating, | ||
float | certainty | ||
) | [inline] |
This function assumes that there is enough space reserved in the WERD_CHOICE for adding another unichar. This is an efficient alternative to append_unichar_id().
Definition at line 322 of file ratngs.h.
{ assert(reserved_ > length_); length_++; this->set_unichar_id(unichar_id, fragment_length, rating, certainty, length_-1); }
BLOB_CHOICE_LIST_CLIST* WERD_CHOICE::blob_choices | ( | ) | [inline] |
float WERD_CHOICE::certainty | ( | ) | const [inline] |
bool WERD_CHOICE::contains_unichar_id | ( | UNICHAR_ID | unichar_id | ) | const |
contains_unichar_id
Returns true if unichar_ids_ contain the given unichar_id, false otherwise.
Definition at line 197 of file ratngs.cpp.
{ for (int i = 0; i < length_; ++i) { if (unichar_ids_[i] == unichar_id) { return true; } } return false; }
const STRING WERD_CHOICE::debug_string | ( | ) | const [inline] |
void WERD_CHOICE::double_the_size | ( | ) | [inline] |
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition at line 268 of file ratngs.h.
{ if (reserved_ > 0) { unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy( reserved_, unichar_ids_); fragment_lengths_ = GenericVector<char>::double_the_size_memcpy( reserved_, fragment_lengths_); reserved_ *= 2; } else { unichar_ids_ = new UNICHAR_ID[1]; fragment_lengths_ = new char[1]; reserved_ = 1; } }
const char WERD_CHOICE::fragment_length | ( | int | index | ) | const [inline] |
const char* WERD_CHOICE::fragment_lengths | ( | ) | const [inline] |
bool WERD_CHOICE::fragment_mark | ( | ) | const [inline] |
bool WERD_CHOICE::has_rtl_unichar_id | ( | ) | const |
has_rtl_unichar_id
Returns true if unichar_ids contain at least one "strongly" RTL unichar.
Definition at line 276 of file ratngs.cpp.
{ int i; for (i = 0; i < length_; ++i) { UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]); if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) { return true; } } return false; }
void WERD_CHOICE::init | ( | int | reserved | ) | [inline] |
Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and fragment_length_ arrays. Sets other values to default (blank) values.
Definition at line 284 of file ratngs.h.
{ reserved_ = reserved; if (reserved > 0) { unichar_ids_ = new UNICHAR_ID[reserved]; fragment_lengths_ = new char[reserved]; } else { unichar_ids_ = NULL; fragment_lengths_ = NULL; } length_ = 0; rating_ = 0.0; certainty_ = MAX_FLOAT32; permuter_ = NO_PERM; fragment_mark_ = false; blob_choices_ = NULL; unichars_in_script_order_ = false; // Tesseract is strict left-to-right. }
void WERD_CHOICE::init | ( | const char * | src_string, |
const char * | src_lengths, | ||
float | src_rating, | ||
float | src_certainty, | ||
uinT8 | src_permuter | ||
) |
Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter. The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.
Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter.
The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.
Definition at line 140 of file ratngs.cpp.
{ int src_string_len = strlen(src_string); if (src_string_len == 0) { this->init(8); } else { this->init(src_lengths ? strlen(src_lengths): src_string_len); length_ = reserved_; int offset = 0; for (int i = 0; i < length_; ++i) { int unichar_length = src_lengths ? src_lengths[i] : 1; unichar_ids_[i] = unicharset_->unichar_to_id(src_string+offset, unichar_length); fragment_lengths_[i] = 1; offset += unichar_length; } } rating_ = src_rating; certainty_ = src_certainty; permuter_ = src_permuter; }
int WERD_CHOICE::length | ( | ) | const [inline] |
void WERD_CHOICE::make_bad | ( | ) | [inline] |
Set the fields in this choice to be default (bad) values.
Definition at line 312 of file ratngs.h.
{ length_ = 0; rating_ = kBadRating; certainty_ = -MAX_FLOAT32; fragment_mark_ = false; }
WERD_CHOICE & WERD_CHOICE::operator+= | ( | const WERD_CHOICE & | second | ) |
Cat a second word rating on the end of this current one. The ratings are added and the confidence is the min. If the permuters are NOT the same the permuter is set to COMPOUND_PERM
Definition at line 330 of file ratngs.cpp.
{ // TODO(daria): find out why the choice was cleared this way if any // of the pieces are empty. Add the description of this behavior // to the comments. // if (word_string.length () == 0 || second.word_string.length () == 0) { // word_string = NULL; //make it empty // word_lengths = NULL; // delete_blob_choices(); // } else { ASSERT_HOST(unicharset_ == second.unicharset_); while (reserved_ < length_ + second.length()) { this->double_the_size(); } const UNICHAR_ID *other_unichar_ids = second.unichar_ids(); const char *other_fragment_lengths = second.fragment_lengths(); for (int i = 0; i < second.length(); ++i) { unichar_ids_[length_ + i] = other_unichar_ids[i]; fragment_lengths_[length_ + i] = other_fragment_lengths[i]; } length_ += second.length(); rating_ += second.rating(); // add ratings if (second.certainty() < certainty_) // take min certainty_ = second.certainty(); if (permuter_ == NO_PERM) { permuter_ = second.permuter(); } else if (second.permuter() != NO_PERM && second.permuter() != permuter_) { permuter_ = COMPOUND_PERM; } // Append a deep copy of second blob_choices if it exists. if (second.blob_choices_ != NULL) { if (this->blob_choices_ == NULL) this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST; BLOB_CHOICE_LIST_C_IT this_blob_choices_it; BLOB_CHOICE_LIST_C_IT second_blob_choices_it; this_blob_choices_it.set_to_list(this->blob_choices_); this_blob_choices_it.move_to_last(); second_blob_choices_it.set_to_list(second.blob_choices_); for (second_blob_choices_it.mark_cycle_pt(); !second_blob_choices_it.cycled_list(); second_blob_choices_it.forward()) { BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST(); blob_choices_copy->deep_copy(second_blob_choices_it.data(), &BLOB_CHOICE::deep_copy); this_blob_choices_it.add_after_then_move(blob_choices_copy); } } return *this; }
WERD_CHOICE & WERD_CHOICE::operator= | ( | const WERD_CHOICE & | source | ) |
Allocate enough memory to hold a copy of source and copy over all the information from source to this WERD_CHOICE.
Definition at line 394 of file ratngs.cpp.
{ while (reserved_ < source.length()) { this->double_the_size(); } unicharset_ = source.unicharset_; const UNICHAR_ID *other_unichar_ids = source.unichar_ids(); const char *other_fragment_lengths = source.fragment_lengths(); for (int i = 0; i < source.length(); ++i) { unichar_ids_[i] = other_unichar_ids[i]; fragment_lengths_[i] = other_fragment_lengths[i]; } length_ = source.length(); rating_ = source.rating(); certainty_ = source.certainty(); permuter_ = source.permuter(); fragment_mark_ = source.fragment_mark(); // Delete existing blob_choices this->delete_blob_choices(); // Deep copy blob_choices of source if (source.blob_choices_ != NULL) { BLOB_CHOICE_LIST_C_IT this_blob_choices_it; BLOB_CHOICE_LIST_C_IT source_blob_choices_it; this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST(); this_blob_choices_it.set_to_list(this->blob_choices_); source_blob_choices_it.set_to_list(source.blob_choices_); for (source_blob_choices_it.mark_cycle_pt(); !source_blob_choices_it.cycled_list(); source_blob_choices_it.forward()) { BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST(); blob_choices_copy->deep_copy(source_blob_choices_it.data(), &BLOB_CHOICE::deep_copy); this_blob_choices_it.add_after_then_move(blob_choices_copy); } } return *this; }
uinT8 WERD_CHOICE::permuter | ( | ) | const [inline] |
const char * WERD_CHOICE::permuter_name | ( | ) | const |
Definition at line 174 of file ratngs.cpp.
{
return kPermuterTypeNames[permuter_];
}
const void WERD_CHOICE::print | ( | ) | const [inline] |
const void WERD_CHOICE::print | ( | const char * | msg | ) | const |
Print WERD_CHOICE to stdout.
Definition at line 457 of file ratngs.cpp.
{ tprintf("%s WERD_CHOICE:\n", msg); tprintf("length_ %d reserved_ %d permuter_ %d\n", length_, reserved_, permuter_); tprintf("rating_ %.4f certainty_ %.4f", rating_, certainty_); if (fragment_mark_) { tprintf(" fragment_mark_ true"); } tprintf("\n"); if (unichar_string_.length() > 0) { tprintf("unichar_string_ %s unichar_lengths_ %s\n", unichar_string_.string(), unichar_lengths_.string()); } tprintf("unichar_ids: "); int i; for (i = 0; i < length_; ++i) { tprintf("%d ", unichar_ids_[i]); } tprintf("\nfragment_lengths_: "); for (i = 0; i < length_; ++i) { tprintf("%d ", fragment_lengths_[i]); } tprintf("\n"); fflush(stdout); }
void WERD_CHOICE::punct_stripped | ( | int * | start, |
int * | end | ||
) | const |
punct_stripped
Returns the half-open interval of unichar_id indices [start, end) which enclose the core portion of this word -- the part after stripping punctuation from the left and right.
Definition at line 245 of file ratngs.cpp.
{ *start = 0; *end = length() - 1; while (*start < length() && unicharset()->get_ispunctuation(unichar_id(*start))) { (*start)++; } while (*end > -1 && unicharset()->get_ispunctuation(unichar_id(*end))) { (*end)--; } (*end)++; }
float WERD_CHOICE::rating | ( | ) | const [inline] |
void WERD_CHOICE::remove_last_unichar_id | ( | ) | [inline] |
void WERD_CHOICE::remove_unichar_id | ( | int | index | ) | [inline] |
Definition at line 348 of file ratngs.h.
{ this->remove_unichar_ids(index, 1); }
void WERD_CHOICE::remove_unichar_ids | ( | int | start, |
int | num | ||
) |
remove_unichar_ids
Removes num unichar ids starting from index start from unichar_ids_ and updates length_ and fragment_lengths_ to reflect this change. Note: this function does not modify rating_ and certainty_.
Definition at line 213 of file ratngs.cpp.
{ ASSERT_HOST(start >= 0 && start + num <= length_); for (int i = start; i+num < length_; ++i) { unichar_ids_[i] = unichar_ids_[i+num]; fragment_lengths_[i] = fragment_lengths_[i+num]; } length_ -= num; }
void WERD_CHOICE::reverse_and_mirror_unichar_ids | ( | ) |
reverse_and_mirror_unichar_ids
Reverses and mirrors unichars in unichar_ids.
Definition at line 227 of file ratngs.cpp.
{ for (int i = 0; i < length_/2; ++i) { UNICHAR_ID tmp_id = unichar_ids_[i]; unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]); unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id); } if (length_ % 2 != 0) { unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]); } }
void WERD_CHOICE::set_blob_choices | ( | BLOB_CHOICE_LIST_CLIST * | blob_choices | ) |
Delete current blob_choices. Set the blob_choices to the given new list.
Definition at line 184 of file ratngs.cpp.
{ if (blob_choices_ != blob_choices) { delete_blob_choices(); blob_choices_ = blob_choices; } }
void WERD_CHOICE::set_certainty | ( | float | new_val | ) | [inline] |
void WERD_CHOICE::set_fragment_length | ( | char | flen, |
int | index | ||
) | [inline] |
void WERD_CHOICE::set_fragment_mark | ( | bool | new_fragment_mark | ) | [inline] |
void WERD_CHOICE::set_length | ( | int | len | ) | [inline] |
Definition at line 261 of file ratngs.h.
{ ASSERT_HOST(reserved_ >= len); length_ = len; }
void WERD_CHOICE::set_permuter | ( | uinT8 | perm | ) | [inline] |
void WERD_CHOICE::set_rating | ( | float | new_val | ) | [inline] |
void WERD_CHOICE::set_unichar_id | ( | UNICHAR_ID | unichar_id, |
int | index | ||
) | [inline] |
Definition at line 238 of file ratngs.h.
{ assert(index < length_); unichar_ids_[index] = unichar_id; }
void WERD_CHOICE::set_unichar_id | ( | UNICHAR_ID | unichar_id, |
char | fragment_length, | ||
float | rating, | ||
float | certainty, | ||
int | index | ||
) | [inline] |
Definition at line 334 of file ratngs.h.
{ assert(index < length_); unichar_ids_[index] = unichar_id; fragment_lengths_[index] = fragment_length; rating_ += rating; if (certainty < certainty_) { certainty_ = certainty; } }
bool WERD_CHOICE::set_unichars_in_script_order | ( | bool | in_script_order | ) | [inline] |
WERD_CHOICE WERD_CHOICE::shallow_copy | ( | int | start, |
int | end | ||
) | const |
Definition at line 259 of file ratngs.cpp.
{ ASSERT_HOST(start >= 0 && start <= length_); ASSERT_HOST(end >= 0 && end <= length_); if (end < start) { end = start; } WERD_CHOICE retval(unicharset_, end - start); for (int i = start; i < end; i++) { retval.append_unichar_id_space_allocated( unichar_ids_[i], fragment_lengths_[i], 0.0f, 0.0f); } return retval; }
string_and_lengths
Populates the given word_str with unichars from unichar_ids and and word_lengths_str with the corresponding unichar lengths.
Definition at line 294 of file ratngs.cpp.
{ *word_str = ""; if (word_lengths_str != NULL) *word_lengths_str = ""; for (int i = 0; i < length_; ++i) { const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]); *word_str += ch; if (word_lengths_str != NULL) { *word_lengths_str += strlen(ch); } } }
const UNICHAR_ID WERD_CHOICE::unichar_id | ( | int | index | ) | const [inline] |
const UNICHAR_ID* WERD_CHOICE::unichar_ids | ( | ) | const [inline] |
const STRING& WERD_CHOICE::unichar_lengths | ( | ) | const [inline] |
Definition at line 393 of file ratngs.h.
{ this->string_and_lengths(&unichar_string_, &unichar_lengths_); return unichar_lengths_; }
const STRING& WERD_CHOICE::unichar_string | ( | ) | const [inline] |
Definition at line 386 of file ratngs.h.
{ this->string_and_lengths(&unichar_string_, &unichar_lengths_); return unichar_string_; }
bool WERD_CHOICE::unichars_in_script_order | ( | ) | const [inline] |
const UNICHARSET* WERD_CHOICE::unicharset | ( | ) | const [inline] |
const float WERD_CHOICE::kBadRating = 100000.0 [static] |