Tesseract
3.02
|
#include "blobs.h"
#include "boxword.h"
#include "elst.h"
#include "genericvector.h"
#include "normalis.h"
#include "ocrblock.h"
#include "ocrrow.h"
#include "params_training_featdef.h"
#include "ratngs.h"
#include "rejctmap.h"
#include "seam.h"
#include "werd.h"
Go to the source code of this file.
Classes | |
struct | BlamerBundle |
class | PAGE_RES |
class | BLOCK_RES |
class | ROW_RES |
class | WERD_RES |
class | PAGE_RES_IT |
Namespaces | |
namespace | tesseract |
Enumerations | |
enum | IncorrectResultReason { IRR_CORRECT, IRR_CLASSIFIER, IRR_CHOPPER, IRR_CLASS_LM_TRADEOFF, IRR_PAGE_LAYOUT, IRR_SEGSEARCH_HEUR, IRR_SEGSEARCH_PP, IRR_CLASS_OLD_LM_TRADEOFF, IRR_ADAPTION, IRR_NO_TRUTH_SPLIT, IRR_NO_TRUTH, IRR_UNKNOWN, IRR_NUM_REASONS } |
enum | CRUNCH_MODE { CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE } |
Functions | |
ELISTIZEH (BLOCK_RES) CLISTIZEH(BLOCK_RES) class ROW_RES |
enum CRUNCH_MODE |
Definition at line 304 of file pageres.h.
{ CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE };
Definition at line 45 of file pageres.h.
{ // The text recorded in best choice == truth text IRR_CORRECT, // Either: Top choice is incorrect and is a dictionary word (language model // is unlikely to help correct such errors, so blame the classifier). // Or: the correct unichar was not included in shortlist produced by the // classifier at all. IRR_CLASSIFIER, // Chopper have not found one or more splits that correspond to the correct // character bounding boxes recorded in BlamerBundle::truth_word. IRR_CHOPPER, // Classifier did include correct unichars for each blob in the correct // segmentation, however its rating could have been too bad to allow the // language model to pull out the correct choice. On the other hand the // strength of the language model might have been too weak to favor the // correct answer, this we call this case a classifier-language model // tradeoff error. IRR_CLASS_LM_TRADEOFF, // Page layout failed to produce the correct bounding box. Blame page layout // if the truth was not found for the word, which implies that the bounding // box of the word was incorrect (no truth word had a similar bounding box). IRR_PAGE_LAYOUT, // SegSearch heuristic prevented one or more blobs from the correct // segmentation state to be classified (e.g. the blob was too wide). IRR_SEGSEARCH_HEUR, // The correct segmentaiton state was not explored because of poor SegSearch // pain point prioritization. We blame SegSearch pain point prioritization // if the best rating of a choice constructed from correct segmentation is // better than that of the best choice (i.e. if we got to explore the correct // segmentation state, language model would have picked the correct choice). IRR_SEGSEARCH_PP, // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word, // and thus use the old language model (permuters). // TODO(antonova): integrate the new language mode with chopper IRR_CLASS_OLD_LM_TRADEOFF, // If there is an incorrect adaptive template match with a better score than // a correct one (either pre-trained or adapted), mark this as adaption error. IRR_ADAPTION, // split_and_recog_word() failed to find a suitable split in truth. IRR_NO_TRUTH_SPLIT, // Truth is not available for this word (e.g. when words in corrected content // file are turned into ~~~~ because an appropriate alignment was not found. IRR_NO_TRUTH, // The text recorded in best choice != truth text, but none of the above // reasons are set. IRR_UNKNOWN, IRR_NUM_REASONS };
ELISTIZEH | ( | BLOCK_RES | ) |