Tesseract
3.02
|
Functions | |
TESS_LOCAL void | tesseract::TessBaseAPI::AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender) |
TESS_LOCAL PAGE_RES * | tesseract::TessBaseAPI::RecognitionPass1 (BLOCK_LIST *block_list) |
TESS_LOCAL PAGE_RES * | tesseract::TessBaseAPI::RecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result) |
TESS_LOCAL void | tesseract::TessBaseAPI::DetectParagraphs (int debug_level) |
static TESS_LOCAL int | tesseract::TessBaseAPI::TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res) |
TESS_LOCAL const PAGE_RES * | tesseract::TessBaseAPI::GetPageRes () const |
void tesseract::TessBaseAPI::AdaptToCharacter | ( | const char * | unichar_repr, |
int | length, | ||
float | baseline, | ||
float | xheight, | ||
float | descender, | ||
float | ascender | ||
) | [protected] |
Adapt to recognize the current image as the given character. The image must be preloaded and be just an image of a single character.
Adapt to recognize the current image as the given character. The image must be preloaded into pix_binary_ and be just an image of a single character.
Definition at line 1876 of file baseapi.cpp.
{ UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, tesseract_->classify_bln_numeric_mode, tesseract_->pix_binary()); float threshold; UNICHAR_ID best_class = 0; float best_rating = -100; // Classify to get a raw choice. BLOB_CHOICE_LIST choices; DENORM denorm; tesseract_->AdaptiveClassifier(blob, denorm, &choices, NULL); BLOB_CHOICE_IT choice_it; choice_it.set_to_list(&choices); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { if (choice_it.data()->rating() > best_rating) { best_rating = choice_it.data()->rating(); best_class = choice_it.data()->unichar_id(); } } threshold = tesseract_->matcher_good_threshold; if (blob->outlines) tesseract_->AdaptToChar(blob, denorm, id, kUnknownFontinfoId, threshold); delete blob; }
void tesseract::TessBaseAPI::DetectParagraphs | ( | int | debug_level | ) | [protected] |
After text is recognized, break each paragraph into blocks.
Definition at line 1929 of file baseapi.cpp.
{ if (paragraph_models_ == NULL) paragraph_models_ = new GenericVector<ParagraphModel*>; MutableIterator *result_it = GetMutableIterator(); do { // Detect paragraphs for this block GenericVector<ParagraphModel *> models; ::tesseract::DetectParagraphs(debug_level, result_it, &models); *paragraph_models_ += models; } while (result_it->Next(RIL_BLOCK)); delete result_it; }
TESS_LOCAL const PAGE_RES* tesseract::TessBaseAPI::GetPageRes | ( | ) | const [inline, protected] |
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass1 | ( | BLOCK_LIST * | block_list | ) | [protected] |
Recognize text doing one pass only, using settings for a given pass.
Definition at line 1913 of file baseapi.cpp.
{ PAGE_RES *page_res = new PAGE_RES(block_list, &(tesseract_->prev_word_best_choice_)); tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1); return page_res; }
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass2 | ( | BLOCK_LIST * | block_list, |
PAGE_RES * | pass1_result | ||
) | [protected] |
Definition at line 1920 of file baseapi.cpp.
{ if (!pass1_result) pass1_result = new PAGE_RES(block_list, &(tesseract_->prev_word_best_choice_)); tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2); return pass1_result; }
int tesseract::TessBaseAPI::TesseractExtractResult | ( | char ** | text, |
int ** | lengths, | ||
float ** | costs, | ||
int ** | x0, | ||
int ** | y0, | ||
int ** | x1, | ||
int ** | y1, | ||
PAGE_RES * | page_res | ||
) | [static, protected] |
Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.
Definition at line 2012 of file baseapi.cpp.
{ TESS_CHAR_LIST tess_chars; TESS_CHAR_IT tess_chars_it(&tess_chars); extract_result(&tess_chars_it, page_res); tess_chars_it.move_to_first(); int n = tess_chars.length(); int text_len = 0; *lengths = new int[n]; *costs = new float[n]; *x0 = new int[n]; *y0 = new int[n]; *x1 = new int[n]; *y1 = new int[n]; int i = 0; for (tess_chars_it.mark_cycle_pt(); !tess_chars_it.cycled_list(); tess_chars_it.forward(), i++) { TESS_CHAR *tc = tess_chars_it.data(); text_len += (*lengths)[i] = tc->length; (*costs)[i] = tc->cost; (*x0)[i] = tc->box.left(); (*y0)[i] = tc->box.bottom(); (*x1)[i] = tc->box.right(); (*y1)[i] = tc->box.top(); } char *p = *text = new char[text_len]; tess_chars_it.move_to_first(); for (tess_chars_it.mark_cycle_pt(); !tess_chars_it.cycled_list(); tess_chars_it.forward()) { TESS_CHAR *tc = tess_chars_it.data(); strncpy(p, tc->unicode_repr, tc->length); p += tc->length; } return n; }