Tesseract  3.02
ocropus add-ons

Functions

TESS_LOCAL void tesseract::TessBaseAPI::AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass1 (BLOCK_LIST *block_list)
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result)
TESS_LOCAL void tesseract::TessBaseAPI::DetectParagraphs (int debug_level)
static TESS_LOCAL int tesseract::TessBaseAPI::TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
TESS_LOCAL const PAGE_REStesseract::TessBaseAPI::GetPageRes () const

Function Documentation

void tesseract::TessBaseAPI::AdaptToCharacter ( const char *  unichar_repr,
int  length,
float  baseline,
float  xheight,
float  descender,
float  ascender 
) [protected]

Adapt to recognize the current image as the given character. The image must be preloaded and be just an image of a single character.

Adapt to recognize the current image as the given character. The image must be preloaded into pix_binary_ and be just an image of a single character.

Definition at line 1876 of file baseapi.cpp.

                                                   {
  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
                                    tesseract_->classify_bln_numeric_mode,
                                    tesseract_->pix_binary());
  float threshold;
  UNICHAR_ID best_class = 0;
  float best_rating = -100;


  // Classify to get a raw choice.
  BLOB_CHOICE_LIST choices;
  DENORM denorm;
  tesseract_->AdaptiveClassifier(blob, denorm, &choices, NULL);
  BLOB_CHOICE_IT choice_it;
  choice_it.set_to_list(&choices);
  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
       choice_it.forward()) {
    if (choice_it.data()->rating() > best_rating) {
      best_rating = choice_it.data()->rating();
      best_class = choice_it.data()->unichar_id();
    }
  }

  threshold = tesseract_->matcher_good_threshold;

  if (blob->outlines)
    tesseract_->AdaptToChar(blob, denorm, id, kUnknownFontinfoId, threshold);
  delete blob;
}
void tesseract::TessBaseAPI::DetectParagraphs ( int  debug_level) [protected]

After text is recognized, break each paragraph into blocks.

Definition at line 1929 of file baseapi.cpp.

                                                  {
  if (paragraph_models_ == NULL)
    paragraph_models_ = new GenericVector<ParagraphModel*>;
  MutableIterator *result_it = GetMutableIterator();
  do {  // Detect paragraphs for this block
    GenericVector<ParagraphModel *> models;
    ::tesseract::DetectParagraphs(debug_level, result_it, &models);
    *paragraph_models_ += models;
  } while (result_it->Next(RIL_BLOCK));
  delete result_it;
}
TESS_LOCAL const PAGE_RES* tesseract::TessBaseAPI::GetPageRes ( ) const [inline, protected]

Definition at line 751 of file baseapi.h.

                                                {
    return page_res_;
  };
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass1 ( BLOCK_LIST *  block_list) [protected]

Recognize text doing one pass only, using settings for a given pass.

Definition at line 1913 of file baseapi.cpp.

                                                              {
  PAGE_RES *page_res = new PAGE_RES(block_list,
                                    &(tesseract_->prev_word_best_choice_));
  tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1);
  return page_res;
}
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass2 ( BLOCK_LIST *  block_list,
PAGE_RES pass1_result 
) [protected]

Definition at line 1920 of file baseapi.cpp.

                                                                {
  if (!pass1_result)
    pass1_result = new PAGE_RES(block_list,
                                &(tesseract_->prev_word_best_choice_));
  tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
  return pass1_result;
}
int tesseract::TessBaseAPI::TesseractExtractResult ( char **  text,
int **  lengths,
float **  costs,
int **  x0,
int **  y0,
int **  x1,
int **  y1,
PAGE_RES page_res 
) [static, protected]

Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.

Definition at line 2012 of file baseapi.cpp.

                                                            {
  TESS_CHAR_LIST tess_chars;
  TESS_CHAR_IT tess_chars_it(&tess_chars);
  extract_result(&tess_chars_it, page_res);
  tess_chars_it.move_to_first();
  int n = tess_chars.length();
  int text_len = 0;
  *lengths = new int[n];
  *costs = new float[n];
  *x0 = new int[n];
  *y0 = new int[n];
  *x1 = new int[n];
  *y1 = new int[n];
  int i = 0;
  for (tess_chars_it.mark_cycle_pt();
       !tess_chars_it.cycled_list();
       tess_chars_it.forward(), i++) {
    TESS_CHAR *tc = tess_chars_it.data();
    text_len += (*lengths)[i] = tc->length;
    (*costs)[i] = tc->cost;
    (*x0)[i] = tc->box.left();
    (*y0)[i] = tc->box.bottom();
    (*x1)[i] = tc->box.right();
    (*y1)[i] = tc->box.top();
  }
  char *p = *text = new char[text_len];

  tess_chars_it.move_to_first();
  for (tess_chars_it.mark_cycle_pt();
        !tess_chars_it.cycled_list();
       tess_chars_it.forward()) {
    TESS_CHAR *tc = tess_chars_it.data();
    strncpy(p, tc->unicode_repr, tc->length);
    p += tc->length;
  }
  return n;
}