Tesseract  3.02
tesseract-ocr/ccmain/cube_reco_context.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_reco_context.h
00003  * Description: Declaration of the Cube Recognition Context Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
00021 // (or a thread) would create one CubeRecoContext object per language.
00022 // The CubeRecoContext object also provides methods to get and set the
00023 // different attribues of the Cube OCR Engine.
00024 
00025 #ifndef CUBE_RECO_CONTEXT_H
00026 #define CUBE_RECO_CONTEXT_H
00027 
00028 #include <string>
00029 #include "neural_net.h"
00030 #include "lang_model.h"
00031 #include "classifier_base.h"
00032 #include "feature_base.h"
00033 #include "char_set.h"
00034 #include "word_size_model.h"
00035 #include "char_bigrams.h"
00036 #include "word_unigrams.h"
00037 
00038 namespace tesseract {
00039 
00040 class Tesseract;
00041 class TessdataManager;
00042 
00043 class CubeRecoContext {
00044  public:
00045   // Reading order enum type
00046   enum ReadOrder {
00047    L2R,
00048    R2L
00049   };
00050 
00051   // Instantiate using a Tesseract object
00052   CubeRecoContext(Tesseract *tess_obj);
00053 
00054   ~CubeRecoContext();
00055 
00056   // accessor functions
00057   inline const string & Lang() const { return lang_; }
00058   inline CharSet *CharacterSet() const { return char_set_; }
00059   const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
00060   inline CharClassifier *Classifier() const { return char_classifier_; }
00061   inline WordSizeModel *SizeModel() const { return word_size_model_; }
00062   inline CharBigrams *Bigrams() const { return char_bigrams_; }
00063   inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
00064   inline TuningParams *Params() const { return params_; }
00065   inline LangModel *LangMod() const { return lang_mod_; }
00066 
00067   // the reading order of the language
00068   inline ReadOrder ReadingOrder() const {
00069     return ((lang_ == "ara") ? R2L : L2R);
00070   }
00071 
00072   // does the language support case
00073   inline bool HasCase() const {
00074     return (lang_ != "ara" && lang_ != "hin");
00075   }
00076 
00077   inline bool Cursive() const {
00078     return (lang_ == "ara");
00079   }
00080 
00081   inline bool HasItalics() const {
00082     return (lang_ != "ara" && lang_ != "hin" && lang_ != "uk");
00083   }
00084 
00085   inline bool Contextual() const {
00086     return (lang_ == "ara");
00087   }
00088 
00089   // RecoContext runtime flags accessor functions
00090   inline bool SizeNormalization() const { return size_normalization_; }
00091   inline bool NoisyInput() const { return noisy_input_; }
00092   inline bool OOD() const { return lang_mod_->OOD(); }
00093   inline bool Numeric() const { return lang_mod_->Numeric(); }
00094   inline bool WordList() const { return lang_mod_->WordList(); }
00095   inline bool Punc() const { return lang_mod_->Punc(); }
00096   inline bool CaseSensitive() const {
00097     return char_classifier_->CaseSensitive();
00098   }
00099 
00100   inline void SetSizeNormalization(bool size_normalization) {
00101     size_normalization_ = size_normalization;
00102   }
00103   inline void SetNoisyInput(bool noisy_input) {
00104     noisy_input_ = noisy_input;
00105   }
00106   inline void SetOOD(bool ood_enabled) {
00107     lang_mod_->SetOOD(ood_enabled);
00108   }
00109   inline void SetNumeric(bool numeric_enabled) {
00110     lang_mod_->SetNumeric(numeric_enabled);
00111   }
00112   inline void SetWordList(bool word_list_enabled) {
00113     lang_mod_->SetWordList(word_list_enabled);
00114   }
00115   inline void SetPunc(bool punc_enabled) {
00116     lang_mod_->SetPunc(punc_enabled);
00117   }
00118   inline void SetCaseSensitive(bool case_sensitive) {
00119     char_classifier_->SetCaseSensitive(case_sensitive);
00120   }
00121   inline tesseract::Tesseract *TesseractObject() const {
00122     return tess_obj_;
00123   }
00124 
00125   // Returns the path of the data files
00126   bool GetDataFilePath(string *path) const;
00127   // Creates a CubeRecoContext object using a tesseract object. Data
00128   // files are loaded via the tessdata_manager, and the tesseract
00129   // unicharset is provided in order to map Cube's unicharset to
00130   // Tesseract's in the case where the two unicharsets differ.
00131   static CubeRecoContext *Create(Tesseract *tess_obj,
00132                                  TessdataManager *tessdata_manager,
00133                                  UNICHARSET *tess_unicharset);
00134 
00135  private:
00136   bool loaded_;
00137   string lang_;
00138   CharSet *char_set_;
00139   UNICHARSET *tess_unicharset_;
00140   WordSizeModel *word_size_model_;
00141   CharClassifier *char_classifier_;
00142   CharBigrams *char_bigrams_;
00143   WordUnigrams *word_unigrams_;
00144   TuningParams *params_;
00145   LangModel *lang_mod_;
00146   Tesseract *tess_obj_;  // CubeRecoContext does not own this pointer
00147   bool size_normalization_;
00148   bool noisy_input_;
00149 
00150   // Loads and initialized all the necessary components of a
00151   // CubeRecoContext. See .cpp for more details.
00152   bool Load(TessdataManager *tessdata_manager,
00153             UNICHARSET *tess_unicharset);
00154 };
00155 }
00156 
00157 #endif  // CUBE_RECO_CONTEXT_H