Tesseract
3.02
|
00001 00002 // File: baseapi.h 00003 // Description: Simple API for calling tesseract. 00004 // Author: Ray Smith 00005 // Created: Fri Oct 06 15:35:01 PDT 2006 00006 // 00007 // (C) Copyright 2006, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_API_BASEAPI_H__ 00021 #define TESSERACT_API_BASEAPI_H__ 00022 00023 #include <stdio.h> 00024 // To avoid collision with other typenames include the ABSOLUTE MINIMUM 00025 // complexity of includes here. Use forward declarations wherever possible 00026 // and hide includes of complex types in baseapi.cpp. 00027 #include "platform.h" 00028 #include "apitypes.h" 00029 #include "thresholder.h" 00030 #include "unichar.h" 00031 #include "tesscallback.h" 00032 #include "publictypes.h" 00033 #include "pageiterator.h" 00034 #include "resultiterator.h" 00035 00036 template <typename T> class GenericVector; 00037 class PAGE_RES; 00038 class PAGE_RES_IT; 00039 class ParagraphModel; 00040 class BlamerBundle; 00041 class BLOCK_LIST; 00042 class DENORM; 00043 class IMAGE; 00044 class MATRIX; 00045 class PBLOB; 00046 class ROW; 00047 class STRING; 00048 class WERD; 00049 struct Pix; 00050 struct Box; 00051 struct Pixa; 00052 struct Boxa; 00053 class ETEXT_DESC; 00054 struct OSResults; 00055 class TBOX; 00056 class UNICHARSET; 00057 00058 // From oldlist.h 00059 // TODO(antonova): remove when oldlist is deprecated. 00060 struct list_rec; 00061 typedef list_rec *LIST; 00062 00063 #define MAX_NUM_INT_FEATURES 512 00064 struct INT_FEATURE_STRUCT; 00065 typedef INT_FEATURE_STRUCT *INT_FEATURE; 00066 typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; 00067 struct TBLOB; 00068 00069 namespace tesseract { 00070 00071 class CubeRecoContext; 00072 class Dawg; 00073 class Dict; 00074 class EquationDetect; 00075 class LTRResultIterator; 00076 class MutableIterator; 00077 class Tesseract; 00078 class Trie; 00079 class Wordrec; 00080 00081 typedef int (Dict::*DictFunc)(void* void_dawg_args, 00082 UNICHAR_ID unichar_id, bool word_end) const; 00083 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, 00084 const char* context, 00085 int context_bytes, 00086 const char* character, 00087 int character_bytes); 00088 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, 00089 const LIST &best_choices, 00090 const UNICHARSET &unicharset, 00091 BlamerBundle *blamer_bundle); 00092 typedef TessCallback3<const UNICHARSET &, int, PAGE_RES *> TruthCallback; 00093 00102 class TESS_API TessBaseAPI { 00103 public: 00104 TessBaseAPI(); 00105 virtual ~TessBaseAPI(); 00106 00110 static const char* Version(); 00111 00116 void SetInputName(const char* name); 00117 00119 void SetOutputName(const char* name); 00120 00136 bool SetVariable(const char* name, const char* value); 00137 bool SetDebugVariable(const char* name, const char* value); 00138 00143 bool GetIntVariable(const char *name, int *value) const; 00144 bool GetBoolVariable(const char *name, bool *value) const; 00145 bool GetDoubleVariable(const char *name, double *value) const; 00146 00151 const char *GetStringVariable(const char *name) const; 00152 00156 void PrintVariables(FILE *fp) const; 00157 00161 bool GetVariableAsString(const char *name, STRING *val); 00162 00201 int Init(const char* datapath, const char* language, OcrEngineMode mode, 00202 char **configs, int configs_size, 00203 const GenericVector<STRING> *vars_vec, 00204 const GenericVector<STRING> *vars_values, 00205 bool set_only_non_debug_params); 00206 int Init(const char* datapath, const char* language, OcrEngineMode oem) { 00207 return Init(datapath, language, oem, NULL, 0, NULL, NULL, false); 00208 } 00209 int Init(const char* datapath, const char* language) { 00210 return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false); 00211 } 00212 00221 const char* GetInitLanguagesAsString() const; 00222 00228 void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const; 00229 00236 int InitLangMod(const char* datapath, const char* language); 00237 00242 void InitForAnalysePage(); 00243 00250 void ReadConfigFile(const char* filename); 00252 void ReadDebugConfigFile(const char* filename); 00253 00259 void SetPageSegMode(PageSegMode mode); 00260 00262 PageSegMode GetPageSegMode() const; 00263 00281 char* TesseractRect(const unsigned char* imagedata, 00282 int bytes_per_pixel, int bytes_per_line, 00283 int left, int top, int width, int height); 00284 00289 void ClearAdaptiveClassifier(); 00290 00297 /* @{ */ 00298 00308 void SetImage(const unsigned char* imagedata, int width, int height, 00309 int bytes_per_pixel, int bytes_per_line); 00310 00321 void SetImage(const Pix* pix); 00322 00327 void SetSourceResolution(int ppi); 00328 00334 void SetRectangle(int left, int top, int width, int height); 00335 00343 void SetThresholder(ImageThresholder* thresholder) { 00344 if (thresholder_ != NULL) 00345 delete thresholder_; 00346 thresholder_ = thresholder; 00347 ClearResults(); 00348 } 00349 00355 Pix* GetThresholdedImage(); 00356 00362 Boxa* GetRegions(Pixa** pixa); 00363 00371 Boxa* GetTextlines(Pixa** pixa, int** blockids); 00372 00381 Boxa* GetStrips(Pixa** pixa, int** blockids); 00382 00388 Boxa* GetWords(Pixa** pixa); 00389 00398 Boxa* GetConnectedComponents(Pixa** cc); 00399 00408 Boxa* GetComponentImages(PageIteratorLevel level, 00409 bool text_only, 00410 Pixa** pixa, int** blockids); 00411 00418 int GetThresholdedImageScaleFactor() const; 00419 00425 void DumpPGM(const char* filename); 00426 00438 PageIterator* AnalyseLayout(); 00439 00446 int Recognize(ETEXT_DESC* monitor); 00447 00454 int RecognizeForChopTest(ETEXT_DESC* monitor); 00455 00472 bool ProcessPages(const char* filename, 00473 const char* retry_config, int timeout_millisec, 00474 STRING* text_out); 00475 00487 bool ProcessPage(Pix* pix, int page_index, const char* filename, 00488 const char* retry_config, int timeout_millisec, 00489 STRING* text_out); 00490 00499 ResultIterator* GetIterator(); 00500 00509 MutableIterator* GetMutableIterator(); 00510 00515 char* GetUTF8Text(); 00516 00522 char* GetHOCRText(int page_number); 00530 char* GetBoxText(int page_number); 00536 char* GetUNLVText(); 00538 int MeanTextConf(); 00545 int* AllWordConfidences(); 00546 00557 bool AdaptToWordStr(PageSegMode mode, const char* wordstr); 00558 00565 void Clear(); 00566 00573 void End(); 00574 00581 int IsValidWord(const char *word); 00582 00583 bool GetTextDirection(int* out_offset, float* out_slope); 00584 00586 void SetDictFunc(DictFunc f); 00587 00591 void SetProbabilityInContextFunc(ProbabilityInContextFunc f); 00592 00594 void SetFillLatticeFunc(FillLatticeFunc f); 00595 00600 bool DetectOS(OSResults*); 00601 00603 void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm, 00604 INT_FEATURE_ARRAY int_features, 00605 int* num_features, int* FeatureOutlineIndex); 00606 00611 static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, 00612 int right, int bottom); 00613 00618 void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm, 00619 int num_max_matches, 00620 int* unichar_ids, 00621 float* ratings, 00622 int* num_matches_returned); 00623 00625 const char* GetUnichar(int unichar_id); 00626 00628 const Dawg *GetDawg(int i) const; 00629 00631 int NumDawgs() const; 00632 00634 static ROW *MakeTessOCRRow(float baseline, float xheight, 00635 float descender, float ascender); 00636 00638 static TBLOB *MakeTBLOB(Pix *pix); 00639 00645 static void NormalizeTBLOB(TBLOB *tblob, ROW *row, 00646 bool numeric_mode, DENORM *denorm); 00647 00648 Tesseract* const tesseract() const { 00649 return tesseract_; 00650 } 00651 00652 OcrEngineMode const oem() const { 00653 return last_oem_requested_; 00654 } 00655 00656 void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } 00657 00659 CubeRecoContext *GetCubeRecoContext() const; 00660 00661 void set_min_orientation_margin(double margin); 00662 00667 void GetBlockTextOrientations(int** block_orientation, 00668 bool** vertical_writing); 00669 00671 BLOCK_LIST* FindLinesCreateBlockList(); 00672 00678 static void DeleteBlockList(BLOCK_LIST* block_list); 00679 /* @} */ 00680 00681 protected: 00682 00684 TESS_LOCAL bool InternalSetImage(); 00685 00690 TESS_LOCAL virtual void Threshold(Pix** pix); 00691 00696 TESS_LOCAL int FindLines(); 00697 00699 TESS_LOCAL void ClearResults(); 00700 00706 TESS_LOCAL LTRResultIterator* GetLTRIterator(); 00707 00714 TESS_LOCAL int TextLength(int* blob_count); 00715 00717 /* @{ */ 00718 00723 TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, 00724 int length, 00725 float baseline, 00726 float xheight, 00727 float descender, 00728 float ascender); 00729 00731 TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); 00732 TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result); 00733 00735 00736 TESS_LOCAL void DetectParagraphs(int debug_level); 00737 00742 TESS_LOCAL static int TesseractExtractResult(char** text, 00743 int** lengths, 00744 float** costs, 00745 int** x0, 00746 int** y0, 00747 int** x1, 00748 int** y1, 00749 PAGE_RES* page_res); 00750 00751 TESS_LOCAL const PAGE_RES* GetPageRes() const { 00752 return page_res_; 00753 }; 00754 /* @} */ 00755 00756 protected: 00757 Tesseract* tesseract_; 00758 Tesseract* osd_tesseract_; 00759 EquationDetect* equ_detect_; 00760 ImageThresholder* thresholder_; 00761 GenericVector<ParagraphModel *>* paragraph_models_; 00762 BLOCK_LIST* block_list_; 00763 PAGE_RES* page_res_; 00764 STRING* input_file_; 00765 STRING* output_file_; 00766 STRING* datapath_; 00767 STRING* language_; 00768 OcrEngineMode last_oem_requested_; 00769 bool recognition_done_; 00770 TruthCallback *truth_cb_; 00771 00776 /* @{ */ 00777 int rect_left_; 00778 int rect_top_; 00779 int rect_width_; 00780 int rect_height_; 00781 int image_width_; 00782 int image_height_; 00783 /* @} */ 00784 00785 }; 00786 00787 } // namespace tesseract. 00788 00789 #endif // TESSERACT_API_BASEAPI_H__