Tesseract  3.02
tesseract-ocr/api/baseapi.h
Go to the documentation of this file.
00001 
00002 // File:        baseapi.h
00003 // Description: Simple API for calling tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Fri Oct 06 15:35:01 PDT 2006
00006 //
00007 // (C) Copyright 2006, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_API_BASEAPI_H__
00021 #define TESSERACT_API_BASEAPI_H__
00022 
00023 #include <stdio.h>
00024 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
00025 // complexity of includes here. Use forward declarations wherever possible
00026 // and hide includes of complex types in baseapi.cpp.
00027 #include "platform.h"
00028 #include "apitypes.h"
00029 #include "thresholder.h"
00030 #include "unichar.h"
00031 #include "tesscallback.h"
00032 #include "publictypes.h"
00033 #include "pageiterator.h"
00034 #include "resultiterator.h"
00035 
00036 template <typename T> class GenericVector;
00037 class PAGE_RES;
00038 class PAGE_RES_IT;
00039 class ParagraphModel;
00040 class BlamerBundle;
00041 class BLOCK_LIST;
00042 class DENORM;
00043 class IMAGE;
00044 class MATRIX;
00045 class PBLOB;
00046 class ROW;
00047 class STRING;
00048 class WERD;
00049 struct Pix;
00050 struct Box;
00051 struct Pixa;
00052 struct Boxa;
00053 class ETEXT_DESC;
00054 struct OSResults;
00055 class TBOX;
00056 class UNICHARSET;
00057 
00058 // From oldlist.h
00059 // TODO(antonova): remove when oldlist is deprecated.
00060 struct list_rec;
00061 typedef list_rec *LIST;
00062 
00063 #define MAX_NUM_INT_FEATURES 512
00064 struct INT_FEATURE_STRUCT;
00065 typedef INT_FEATURE_STRUCT *INT_FEATURE;
00066 typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES];
00067 struct TBLOB;
00068 
00069 namespace tesseract {
00070 
00071 class CubeRecoContext;
00072 class Dawg;
00073 class Dict;
00074 class EquationDetect;
00075 class LTRResultIterator;
00076 class MutableIterator;
00077 class Tesseract;
00078 class Trie;
00079 class Wordrec;
00080 
00081 typedef int (Dict::*DictFunc)(void* void_dawg_args,
00082                               UNICHAR_ID unichar_id, bool word_end) const;
00083 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
00084                                                  const char* context,
00085                                                  int context_bytes,
00086                                                  const char* character,
00087                                                  int character_bytes);
00088 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
00089                                          const LIST &best_choices,
00090                                          const UNICHARSET &unicharset,
00091                                          BlamerBundle *blamer_bundle);
00092 typedef TessCallback3<const UNICHARSET &, int, PAGE_RES *> TruthCallback;
00093 
00102 class TESS_API TessBaseAPI {
00103  public:
00104   TessBaseAPI();
00105   virtual ~TessBaseAPI();
00106 
00110   static const char* Version();
00111 
00116   void SetInputName(const char* name);
00117 
00119   void SetOutputName(const char* name);
00120 
00136   bool SetVariable(const char* name, const char* value);
00137   bool SetDebugVariable(const char* name, const char* value);
00138 
00143   bool GetIntVariable(const char *name, int *value) const;
00144   bool GetBoolVariable(const char *name, bool *value) const;
00145   bool GetDoubleVariable(const char *name, double *value) const;
00146 
00151   const char *GetStringVariable(const char *name) const;
00152 
00156   void PrintVariables(FILE *fp) const;
00157 
00161   bool GetVariableAsString(const char *name, STRING *val);
00162 
00201   int Init(const char* datapath, const char* language, OcrEngineMode mode,
00202            char **configs, int configs_size,
00203            const GenericVector<STRING> *vars_vec,
00204            const GenericVector<STRING> *vars_values,
00205            bool set_only_non_debug_params);
00206   int Init(const char* datapath, const char* language, OcrEngineMode oem) {
00207     return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
00208   }
00209   int Init(const char* datapath, const char* language) {
00210     return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
00211   }
00212 
00221   const char* GetInitLanguagesAsString() const;
00222 
00228   void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
00229 
00236   int InitLangMod(const char* datapath, const char* language);
00237 
00242   void InitForAnalysePage();
00243 
00250   void ReadConfigFile(const char* filename);
00252   void ReadDebugConfigFile(const char* filename);
00253 
00259   void SetPageSegMode(PageSegMode mode);
00260 
00262   PageSegMode GetPageSegMode() const;
00263 
00281   char* TesseractRect(const unsigned char* imagedata,
00282                       int bytes_per_pixel, int bytes_per_line,
00283                       int left, int top, int width, int height);
00284 
00289   void ClearAdaptiveClassifier();
00290 
00297    /* @{ */
00298 
00308   void SetImage(const unsigned char* imagedata, int width, int height,
00309                 int bytes_per_pixel, int bytes_per_line);
00310 
00321   void SetImage(const Pix* pix);
00322 
00327   void SetSourceResolution(int ppi);
00328 
00334   void SetRectangle(int left, int top, int width, int height);
00335 
00343   void SetThresholder(ImageThresholder* thresholder) {
00344     if (thresholder_ != NULL)
00345       delete thresholder_;
00346     thresholder_ = thresholder;
00347     ClearResults();
00348   }
00349 
00355   Pix* GetThresholdedImage();
00356 
00362   Boxa* GetRegions(Pixa** pixa);
00363 
00371   Boxa* GetTextlines(Pixa** pixa, int** blockids);
00372 
00381   Boxa* GetStrips(Pixa** pixa, int** blockids);
00382 
00388   Boxa* GetWords(Pixa** pixa);
00389 
00398   Boxa* GetConnectedComponents(Pixa** cc);
00399 
00408   Boxa* GetComponentImages(PageIteratorLevel level,
00409                            bool text_only,
00410                            Pixa** pixa, int** blockids);
00411 
00418   int GetThresholdedImageScaleFactor() const;
00419 
00425   void DumpPGM(const char* filename);
00426 
00438   PageIterator* AnalyseLayout();
00439 
00446   int Recognize(ETEXT_DESC* monitor);
00447 
00454   int RecognizeForChopTest(ETEXT_DESC* monitor);
00455 
00472   bool ProcessPages(const char* filename,
00473                     const char* retry_config, int timeout_millisec,
00474                     STRING* text_out);
00475 
00487   bool ProcessPage(Pix* pix, int page_index, const char* filename,
00488                    const char* retry_config, int timeout_millisec,
00489                    STRING* text_out);
00490 
00499   ResultIterator* GetIterator();
00500 
00509   MutableIterator* GetMutableIterator();
00510 
00515   char* GetUTF8Text();
00516 
00522   char* GetHOCRText(int page_number);
00530   char* GetBoxText(int page_number);
00536   char* GetUNLVText();
00538   int MeanTextConf();
00545   int* AllWordConfidences();
00546 
00557   bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
00558 
00565   void Clear();
00566 
00573   void End();
00574 
00581   int IsValidWord(const char *word);
00582 
00583   bool GetTextDirection(int* out_offset, float* out_slope);
00584 
00586   void SetDictFunc(DictFunc f);
00587 
00591   void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
00592 
00594   void SetFillLatticeFunc(FillLatticeFunc f);
00595 
00600   bool DetectOS(OSResults*);
00601 
00603   void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm,
00604                           INT_FEATURE_ARRAY int_features,
00605                           int* num_features, int* FeatureOutlineIndex);
00606 
00611   static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
00612                             int right, int bottom);
00613 
00618   void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
00619                              int num_max_matches,
00620                              int* unichar_ids,
00621                              float* ratings,
00622                              int* num_matches_returned);
00623 
00625   const char* GetUnichar(int unichar_id);
00626 
00628   const Dawg *GetDawg(int i) const;
00629 
00631   int NumDawgs() const;
00632 
00634   static ROW *MakeTessOCRRow(float baseline, float xheight,
00635                              float descender, float ascender);
00636 
00638   static TBLOB *MakeTBLOB(Pix *pix);
00639 
00645   static void NormalizeTBLOB(TBLOB *tblob, ROW *row,
00646                              bool numeric_mode, DENORM *denorm);
00647 
00648   Tesseract* const tesseract() const {
00649     return tesseract_;
00650   }
00651   
00652   OcrEngineMode const oem() const {
00653     return last_oem_requested_;
00654   }
00655 
00656   void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
00657 
00659   CubeRecoContext *GetCubeRecoContext() const;
00660 
00661   void set_min_orientation_margin(double margin);
00662 
00667   void GetBlockTextOrientations(int** block_orientation,
00668                                 bool** vertical_writing);
00669 
00671   BLOCK_LIST* FindLinesCreateBlockList();
00672 
00678   static void DeleteBlockList(BLOCK_LIST* block_list);
00679  /* @} */
00680 
00681  protected:
00682 
00684   TESS_LOCAL bool InternalSetImage();
00685 
00690   TESS_LOCAL virtual void Threshold(Pix** pix);
00691 
00696   TESS_LOCAL int FindLines();
00697 
00699   TESS_LOCAL void ClearResults();
00700 
00706   TESS_LOCAL LTRResultIterator* GetLTRIterator();
00707 
00714   TESS_LOCAL int TextLength(int* blob_count);
00715 
00717   /* @{ */
00718 
00723   TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
00724                                    int length,
00725                                    float baseline,
00726                                    float xheight,
00727                                    float descender,
00728                                    float ascender);
00729 
00731   TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
00732   TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result);
00733 
00735 
00736   TESS_LOCAL void DetectParagraphs(int debug_level);
00737 
00742   TESS_LOCAL static int TesseractExtractResult(char** text,
00743                                     int** lengths,
00744                                     float** costs,
00745                                     int** x0,
00746                                     int** y0,
00747                                     int** x1,
00748                                     int** y1,
00749                                     PAGE_RES* page_res);
00750 
00751   TESS_LOCAL const PAGE_RES* GetPageRes() const {
00752     return page_res_;
00753   };
00754   /* @} */
00755 
00756  protected:
00757   Tesseract*        tesseract_;       
00758   Tesseract*        osd_tesseract_;   
00759   EquationDetect*   equ_detect_;      
00760   ImageThresholder* thresholder_;     
00761   GenericVector<ParagraphModel *>* paragraph_models_;
00762   BLOCK_LIST*       block_list_;      
00763   PAGE_RES*         page_res_;        
00764   STRING*           input_file_;      
00765   STRING*           output_file_;     
00766   STRING*           datapath_;        
00767   STRING*           language_;        
00768   OcrEngineMode last_oem_requested_;  
00769   bool          recognition_done_;   
00770   TruthCallback *truth_cb_;           
00771 
00776   /* @{ */
00777   int rect_left_;
00778   int rect_top_;
00779   int rect_width_;
00780   int rect_height_;
00781   int image_width_;
00782   int image_height_;
00783   /* @} */
00784 
00785 };
00786 
00787 }  // namespace tesseract.
00788 
00789 #endif  // TESSERACT_API_BASEAPI_H__