Tesseract  3.02
tesseract-ocr/ccmain/reject.h File Reference
#include "params.h"
#include "pageres.h"
#include "notdll.h"

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
void reject_poor_matches (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
float compute_reject_threshold (BLOB_CHOICE_LIST_CLIST *blob_choices)
BOOL8 word_contains_non_1_digit (const char *word, const char *word_lengths)
void dont_allow_1Il (WERD_RES *word)
void flip_hyphens (WERD_RES *word)
void flip_0O (WERD_RES *word)
BOOL8 non_0_digit (const char *str, int length)

Function Documentation

float compute_reject_threshold ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 370 of file reject.cpp.

                                                                     {
  inT16 index;                   //to ratings
  inT16 blob_count;              //no of blobs in word
  inT16 ok_blob_count = 0;       //non TESS rej blobs in word
  float *ratings;                //array of confidences
  float threshold;               //rejection threshold
  float bestgap;                 //biggest gap
  float gapstart;                //bottom of gap
                                 //super iterator
  BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
  BLOB_CHOICE_IT choice_it;      //real iterator

  blob_count = blob_choices->length ();
  ratings = (float *) alloc_mem (blob_count * sizeof (float));
  for (list_it.mark_cycle_pt (), index = 0;
  !list_it.cycled_list (); list_it.forward (), index++) {
    choice_it.set_to_list (list_it.data ());
    if (choice_it.length () > 0) {
      ratings[ok_blob_count] = choice_it.data ()->certainty ();
      //get in an array
      //                 tprintf("Rating[%d]=%c %g %g\n",
      //                         index,choice_it.data()->char_class(),
      //                         choice_it.data()->rating(),choice_it.data()->certainty());
      ok_blob_count++;
    }
  }
  ASSERT_HOST (index == blob_count);
  qsort (ratings, ok_blob_count, sizeof (float), sort_floats);
  //sort them
  bestgap = 0;
  gapstart = ratings[0] - 1;     //all reject if none better
  if (ok_blob_count >= 3) {
    for (index = 0; index < ok_blob_count - 1; index++) {
      if (ratings[index + 1] - ratings[index] > bestgap) {
        bestgap = ratings[index + 1] - ratings[index];
        //find biggest
        gapstart = ratings[index];
      }
    }
  }
  threshold = gapstart + bestgap / 2;
  //      tprintf("First=%g, last=%g, gap=%g, threshold=%g\n",
  //              ratings[0],ratings[index],bestgap,threshold);

  free_mem(ratings);
  return threshold;
}
void dont_allow_1Il ( WERD_RES word)
void flip_0O ( WERD_RES word)
void flip_hyphens ( WERD_RES word)
BOOL8 non_0_digit ( const char *  str,
int  length 
)
void reject_blanks ( WERD_RES word)

Definition at line 290 of file reject.cpp.

                                   {
  inT16 i;
  inT16 offset;

  for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
       offset += word->best_choice->unichar_lengths()[i], i += 1) {
    if (word->best_choice->unichar_string()[offset] == ' ')
                                 //rej unrecognised blobs
      word->reject_map[i].setrej_tess_failure ();
  }
}
void reject_poor_matches ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

Definition at line 319 of file reject.cpp.

                                                               {
  float threshold;
  inT16 i = 0;
  inT16 offset = 0;
                                 //super iterator
  BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
  BLOB_CHOICE_IT choice_it;      //real iterator

  #ifndef SECURE_NAMES
  if (strlen(word->best_choice->unichar_lengths().string()) !=
      list_it.length()) {
    tprintf
      ("ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n",
      word->best_choice->unichar_string().string(),
      strlen (word->best_choice->unichar_lengths().string()), list_it.length(),
      word->box_word->length());
  }
  #endif
  ASSERT_HOST (strlen (word->best_choice->unichar_lengths().string ()) ==
    list_it.length ());
  ASSERT_HOST(word->box_word->length() == list_it.length());
  threshold = compute_reject_threshold (blob_choices);

  for (list_it.mark_cycle_pt ();
  !list_it.cycled_list (); list_it.forward (), i++,
           offset += word->best_choice->unichar_lengths()[i]) {
    /* NB - only compares the threshold against the TOP choice char in the
      choices list for a blob !! - the selected one may be below the threshold
    */
    choice_it.set_to_list (list_it.data ());
    if ((word->best_choice->unichar_string()[offset] == ' ') ||
      (choice_it.length () == 0))
                                 //rej unrecognised blobs
      word->reject_map[i].setrej_tess_failure ();
    else if (choice_it.data ()->certainty () < threshold)
                                 //rej poor score blob
      word->reject_map[i].setrej_poor_match ();
  }
}
BOOL8 word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)