|
Tesseract
3.02
|
Functions | |
| TESS_LOCAL void | tesseract::TessBaseAPI::AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender) |
| TESS_LOCAL PAGE_RES * | tesseract::TessBaseAPI::RecognitionPass1 (BLOCK_LIST *block_list) |
| TESS_LOCAL PAGE_RES * | tesseract::TessBaseAPI::RecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result) |
| TESS_LOCAL void | tesseract::TessBaseAPI::DetectParagraphs (int debug_level) |
| static TESS_LOCAL int | tesseract::TessBaseAPI::TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res) |
| TESS_LOCAL const PAGE_RES * | tesseract::TessBaseAPI::GetPageRes () const |
| void tesseract::TessBaseAPI::AdaptToCharacter | ( | const char * | unichar_repr, |
| int | length, | ||
| float | baseline, | ||
| float | xheight, | ||
| float | descender, | ||
| float | ascender | ||
| ) | [protected] |
Adapt to recognize the current image as the given character. The image must be preloaded and be just an image of a single character.
Adapt to recognize the current image as the given character. The image must be preloaded into pix_binary_ and be just an image of a single character.
Definition at line 1876 of file baseapi.cpp.
{
UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
tesseract_->classify_bln_numeric_mode,
tesseract_->pix_binary());
float threshold;
UNICHAR_ID best_class = 0;
float best_rating = -100;
// Classify to get a raw choice.
BLOB_CHOICE_LIST choices;
DENORM denorm;
tesseract_->AdaptiveClassifier(blob, denorm, &choices, NULL);
BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(&choices);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
if (choice_it.data()->rating() > best_rating) {
best_rating = choice_it.data()->rating();
best_class = choice_it.data()->unichar_id();
}
}
threshold = tesseract_->matcher_good_threshold;
if (blob->outlines)
tesseract_->AdaptToChar(blob, denorm, id, kUnknownFontinfoId, threshold);
delete blob;
}
| void tesseract::TessBaseAPI::DetectParagraphs | ( | int | debug_level | ) | [protected] |
After text is recognized, break each paragraph into blocks.
Definition at line 1929 of file baseapi.cpp.
{
if (paragraph_models_ == NULL)
paragraph_models_ = new GenericVector<ParagraphModel*>;
MutableIterator *result_it = GetMutableIterator();
do { // Detect paragraphs for this block
GenericVector<ParagraphModel *> models;
::tesseract::DetectParagraphs(debug_level, result_it, &models);
*paragraph_models_ += models;
} while (result_it->Next(RIL_BLOCK));
delete result_it;
}
| TESS_LOCAL const PAGE_RES* tesseract::TessBaseAPI::GetPageRes | ( | ) | const [inline, protected] |
| PAGE_RES * tesseract::TessBaseAPI::RecognitionPass1 | ( | BLOCK_LIST * | block_list | ) | [protected] |
Recognize text doing one pass only, using settings for a given pass.
Definition at line 1913 of file baseapi.cpp.
{
PAGE_RES *page_res = new PAGE_RES(block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1);
return page_res;
}
| PAGE_RES * tesseract::TessBaseAPI::RecognitionPass2 | ( | BLOCK_LIST * | block_list, |
| PAGE_RES * | pass1_result | ||
| ) | [protected] |
Definition at line 1920 of file baseapi.cpp.
{
if (!pass1_result)
pass1_result = new PAGE_RES(block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
return pass1_result;
}
| int tesseract::TessBaseAPI::TesseractExtractResult | ( | char ** | text, |
| int ** | lengths, | ||
| float ** | costs, | ||
| int ** | x0, | ||
| int ** | y0, | ||
| int ** | x1, | ||
| int ** | y1, | ||
| PAGE_RES * | page_res | ||
| ) | [static, protected] |
Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.
Definition at line 2012 of file baseapi.cpp.
{
TESS_CHAR_LIST tess_chars;
TESS_CHAR_IT tess_chars_it(&tess_chars);
extract_result(&tess_chars_it, page_res);
tess_chars_it.move_to_first();
int n = tess_chars.length();
int text_len = 0;
*lengths = new int[n];
*costs = new float[n];
*x0 = new int[n];
*y0 = new int[n];
*x1 = new int[n];
*y1 = new int[n];
int i = 0;
for (tess_chars_it.mark_cycle_pt();
!tess_chars_it.cycled_list();
tess_chars_it.forward(), i++) {
TESS_CHAR *tc = tess_chars_it.data();
text_len += (*lengths)[i] = tc->length;
(*costs)[i] = tc->cost;
(*x0)[i] = tc->box.left();
(*y0)[i] = tc->box.bottom();
(*x1)[i] = tc->box.right();
(*y1)[i] = tc->box.top();
}
char *p = *text = new char[text_len];
tess_chars_it.move_to_first();
for (tess_chars_it.mark_cycle_pt();
!tess_chars_it.cycled_list();
tess_chars_it.forward()) {
TESS_CHAR *tc = tess_chars_it.data();
strncpy(p, tc->unicode_repr, tc->length);
p += tc->length;
}
return n;
}