|
Tesseract
3.02
|
Go to the source code of this file.
Functions | |
| void | reject_blanks (WERD_RES *word) |
| void | reject_poor_matches (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) |
| float | compute_reject_threshold (BLOB_CHOICE_LIST_CLIST *blob_choices) |
| BOOL8 | word_contains_non_1_digit (const char *word, const char *word_lengths) |
| void | dont_allow_1Il (WERD_RES *word) |
| void | flip_hyphens (WERD_RES *word) |
| void | flip_0O (WERD_RES *word) |
| BOOL8 | non_0_digit (const char *str, int length) |
| float compute_reject_threshold | ( | BLOB_CHOICE_LIST_CLIST * | blob_choices | ) |
Definition at line 370 of file reject.cpp.
{
inT16 index; //to ratings
inT16 blob_count; //no of blobs in word
inT16 ok_blob_count = 0; //non TESS rej blobs in word
float *ratings; //array of confidences
float threshold; //rejection threshold
float bestgap; //biggest gap
float gapstart; //bottom of gap
//super iterator
BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
BLOB_CHOICE_IT choice_it; //real iterator
blob_count = blob_choices->length ();
ratings = (float *) alloc_mem (blob_count * sizeof (float));
for (list_it.mark_cycle_pt (), index = 0;
!list_it.cycled_list (); list_it.forward (), index++) {
choice_it.set_to_list (list_it.data ());
if (choice_it.length () > 0) {
ratings[ok_blob_count] = choice_it.data ()->certainty ();
//get in an array
// tprintf("Rating[%d]=%c %g %g\n",
// index,choice_it.data()->char_class(),
// choice_it.data()->rating(),choice_it.data()->certainty());
ok_blob_count++;
}
}
ASSERT_HOST (index == blob_count);
qsort (ratings, ok_blob_count, sizeof (float), sort_floats);
//sort them
bestgap = 0;
gapstart = ratings[0] - 1; //all reject if none better
if (ok_blob_count >= 3) {
for (index = 0; index < ok_blob_count - 1; index++) {
if (ratings[index + 1] - ratings[index] > bestgap) {
bestgap = ratings[index + 1] - ratings[index];
//find biggest
gapstart = ratings[index];
}
}
}
threshold = gapstart + bestgap / 2;
// tprintf("First=%g, last=%g, gap=%g, threshold=%g\n",
// ratings[0],ratings[index],bestgap,threshold);
free_mem(ratings);
return threshold;
}
| void dont_allow_1Il | ( | WERD_RES * | word | ) |
| void flip_0O | ( | WERD_RES * | word | ) |
| void flip_hyphens | ( | WERD_RES * | word | ) |
| BOOL8 non_0_digit | ( | const char * | str, |
| int | length | ||
| ) |
| void reject_blanks | ( | WERD_RES * | word | ) |
Definition at line 290 of file reject.cpp.
{
inT16 i;
inT16 offset;
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
offset += word->best_choice->unichar_lengths()[i], i += 1) {
if (word->best_choice->unichar_string()[offset] == ' ')
//rej unrecognised blobs
word->reject_map[i].setrej_tess_failure ();
}
}
| void reject_poor_matches | ( | WERD_RES * | word, |
| BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
| ) |
Definition at line 319 of file reject.cpp.
{
float threshold;
inT16 i = 0;
inT16 offset = 0;
//super iterator
BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
BLOB_CHOICE_IT choice_it; //real iterator
#ifndef SECURE_NAMES
if (strlen(word->best_choice->unichar_lengths().string()) !=
list_it.length()) {
tprintf
("ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n",
word->best_choice->unichar_string().string(),
strlen (word->best_choice->unichar_lengths().string()), list_it.length(),
word->box_word->length());
}
#endif
ASSERT_HOST (strlen (word->best_choice->unichar_lengths().string ()) ==
list_it.length ());
ASSERT_HOST(word->box_word->length() == list_it.length());
threshold = compute_reject_threshold (blob_choices);
for (list_it.mark_cycle_pt ();
!list_it.cycled_list (); list_it.forward (), i++,
offset += word->best_choice->unichar_lengths()[i]) {
/* NB - only compares the threshold against the TOP choice char in the
choices list for a blob !! - the selected one may be below the threshold
*/
choice_it.set_to_list (list_it.data ());
if ((word->best_choice->unichar_string()[offset] == ' ') ||
(choice_it.length () == 0))
//rej unrecognised blobs
word->reject_map[i].setrej_tess_failure ();
else if (choice_it.data ()->certainty () < threshold)
//rej poor score blob
word->reject_map[i].setrej_poor_match ();
}
}
| BOOL8 word_contains_non_1_digit | ( | const char * | word, |
| const char * | word_lengths | ||
| ) |