Tesseract
3.02
|
Go to the source code of this file.
Functions | |
void | reject_blanks (WERD_RES *word) |
void | reject_poor_matches (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) |
float | compute_reject_threshold (BLOB_CHOICE_LIST_CLIST *blob_choices) |
BOOL8 | word_contains_non_1_digit (const char *word, const char *word_lengths) |
void | dont_allow_1Il (WERD_RES *word) |
void | flip_hyphens (WERD_RES *word) |
void | flip_0O (WERD_RES *word) |
BOOL8 | non_0_digit (const char *str, int length) |
float compute_reject_threshold | ( | BLOB_CHOICE_LIST_CLIST * | blob_choices | ) |
Definition at line 370 of file reject.cpp.
{ inT16 index; //to ratings inT16 blob_count; //no of blobs in word inT16 ok_blob_count = 0; //non TESS rej blobs in word float *ratings; //array of confidences float threshold; //rejection threshold float bestgap; //biggest gap float gapstart; //bottom of gap //super iterator BLOB_CHOICE_LIST_C_IT list_it = blob_choices; BLOB_CHOICE_IT choice_it; //real iterator blob_count = blob_choices->length (); ratings = (float *) alloc_mem (blob_count * sizeof (float)); for (list_it.mark_cycle_pt (), index = 0; !list_it.cycled_list (); list_it.forward (), index++) { choice_it.set_to_list (list_it.data ()); if (choice_it.length () > 0) { ratings[ok_blob_count] = choice_it.data ()->certainty (); //get in an array // tprintf("Rating[%d]=%c %g %g\n", // index,choice_it.data()->char_class(), // choice_it.data()->rating(),choice_it.data()->certainty()); ok_blob_count++; } } ASSERT_HOST (index == blob_count); qsort (ratings, ok_blob_count, sizeof (float), sort_floats); //sort them bestgap = 0; gapstart = ratings[0] - 1; //all reject if none better if (ok_blob_count >= 3) { for (index = 0; index < ok_blob_count - 1; index++) { if (ratings[index + 1] - ratings[index] > bestgap) { bestgap = ratings[index + 1] - ratings[index]; //find biggest gapstart = ratings[index]; } } } threshold = gapstart + bestgap / 2; // tprintf("First=%g, last=%g, gap=%g, threshold=%g\n", // ratings[0],ratings[index],bestgap,threshold); free_mem(ratings); return threshold; }
void dont_allow_1Il | ( | WERD_RES * | word | ) |
void flip_0O | ( | WERD_RES * | word | ) |
void flip_hyphens | ( | WERD_RES * | word | ) |
BOOL8 non_0_digit | ( | const char * | str, |
int | length | ||
) |
void reject_blanks | ( | WERD_RES * | word | ) |
Definition at line 290 of file reject.cpp.
{ inT16 i; inT16 offset; for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0'; offset += word->best_choice->unichar_lengths()[i], i += 1) { if (word->best_choice->unichar_string()[offset] == ' ') //rej unrecognised blobs word->reject_map[i].setrej_tess_failure (); } }
void reject_poor_matches | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
Definition at line 319 of file reject.cpp.
{ float threshold; inT16 i = 0; inT16 offset = 0; //super iterator BLOB_CHOICE_LIST_C_IT list_it = blob_choices; BLOB_CHOICE_IT choice_it; //real iterator #ifndef SECURE_NAMES if (strlen(word->best_choice->unichar_lengths().string()) != list_it.length()) { tprintf ("ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n", word->best_choice->unichar_string().string(), strlen (word->best_choice->unichar_lengths().string()), list_it.length(), word->box_word->length()); } #endif ASSERT_HOST (strlen (word->best_choice->unichar_lengths().string ()) == list_it.length ()); ASSERT_HOST(word->box_word->length() == list_it.length()); threshold = compute_reject_threshold (blob_choices); for (list_it.mark_cycle_pt (); !list_it.cycled_list (); list_it.forward (), i++, offset += word->best_choice->unichar_lengths()[i]) { /* NB - only compares the threshold against the TOP choice char in the choices list for a blob !! - the selected one may be below the threshold */ choice_it.set_to_list (list_it.data ()); if ((word->best_choice->unichar_string()[offset] == ' ') || (choice_it.length () == 0)) //rej unrecognised blobs word->reject_map[i].setrej_tess_failure (); else if (choice_it.data ()->certainty () < threshold) //rej poor score blob word->reject_map[i].setrej_poor_match (); } }
BOOL8 word_contains_non_1_digit | ( | const char * | word, |
const char * | word_lengths | ||
) |