tesseract-doc/cube__reco__context_8h_source.html

00001 /**********************************************************************
00002  * File:        cube_reco_context.h
00003  * Description: Declaration of the Cube Recognition Context Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019
00020 // The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
00021 // (or a thread) would create one CubeRecoContext object per language.
00022 // The CubeRecoContext object also provides methods to get and set the
00023 // different attribues of the Cube OCR Engine.
00024
00025 #ifndef CUBE_RECO_CONTEXT_H
00026 #define CUBE_RECO_CONTEXT_H
00027
00028 #include <string>
00029 #include "neural_net.h"
00030 #include "lang_model.h"
00031 #include "classifier_base.h"
00032 #include "feature_base.h"
00033 #include "char_set.h"
00034 #include "word_size_model.h"
00035 #include "char_bigrams.h"
00036 #include "word_unigrams.h"
00037
00038 namespace tesseract {
00039
00040 class Tesseract;
00041 class TessdataManager;
00042
00043 class CubeRecoContext {
00044  public:
00045   // Reading order enum type
00046   enum ReadOrder {
00047    L2R,
00048    R2L
00049   };
00050
00051   // Instantiate using a Tesseract object
00052   CubeRecoContext(Tesseract *tess_obj);
00053
00054   ~CubeRecoContext();
00055
00056   // accessor functions
00057   inline const string & Lang() const { return lang_; }
00058   inline CharSet *CharacterSet() const { return char_set_; }
00059   const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
00060   inline CharClassifier *Classifier() const { return char_classifier_; }
00061   inline WordSizeModel *SizeModel() const { return word_size_model_; }
00062   inline CharBigrams *Bigrams() const { return char_bigrams_; }
00063   inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
00064   inline TuningParams *Params() const { return params_; }
00065   inline LangModel *LangMod() const { return lang_mod_; }
00066
00067   // the reading order of the language
00068   inline ReadOrder ReadingOrder() const {
00069     return ((lang_ == "ara") ? R2L : L2R);
00070   }
00071
00072   // does the language support case
00073   inline bool HasCase() const {
00074     return (lang_ != "ara" && lang_ != "hin");
00075   }
00076
00077   inline bool Cursive() const {
00078     return (lang_ == "ara");
00079   }
00080
00081   inline bool HasItalics() const {
00082     return (lang_ != "ara" && lang_ != "hin" && lang_ != "uk");
00083   }
00084
00085   inline bool Contextual() const {
00086     return (lang_ == "ara");
00087   }
00088
00089   // RecoContext runtime flags accessor functions
00090   inline bool SizeNormalization() const { return size_normalization_; }
00091   inline bool NoisyInput() const { return noisy_input_; }
00092   inline bool OOD() const { return lang_mod_->OOD(); }
00093   inline bool Numeric() const { return lang_mod_->Numeric(); }
00094   inline bool WordList() const { return lang_mod_->WordList(); }
00095   inline bool Punc() const { return lang_mod_->Punc(); }
00096   inline bool CaseSensitive() const {
00097     return char_classifier_->CaseSensitive();
00098   }
00099
00100   inline void SetSizeNormalization(bool size_normalization) {
00101     size_normalization_ = size_normalization;
00102   }
00103   inline void SetNoisyInput(bool noisy_input) {
00104     noisy_input_ = noisy_input;
00105   }
00106   inline void SetOOD(bool ood_enabled) {
00107     lang_mod_->SetOOD(ood_enabled);
00108   }
00109   inline void SetNumeric(bool numeric_enabled) {
00110     lang_mod_->SetNumeric(numeric_enabled);
00111   }
00112   inline void SetWordList(bool word_list_enabled) {
00113     lang_mod_->SetWordList(word_list_enabled);
00114   }
00115   inline void SetPunc(bool punc_enabled) {
00116     lang_mod_->SetPunc(punc_enabled);
00117   }
00118   inline void SetCaseSensitive(bool case_sensitive) {
00119     char_classifier_->SetCaseSensitive(case_sensitive);
00120   }
00121   inline tesseract::Tesseract *TesseractObject() const {
00122     return tess_obj_;
00123   }
00124
00125   // Returns the path of the data files
00126   bool GetDataFilePath(string *path) const;
00127   // Creates a CubeRecoContext object using a tesseract object. Data
00128   // files are loaded via the tessdata_manager, and the tesseract
00129   // unicharset is provided in order to map Cube's unicharset to
00130   // Tesseract's in the case where the two unicharsets differ.
00131   static CubeRecoContext *Create(Tesseract *tess_obj,
00132                                  TessdataManager *tessdata_manager,
00133                                  UNICHARSET *tess_unicharset);
00134
00135  private:
00136   bool loaded_;
00137   string lang_;
00138   CharSet *char_set_;
00139   UNICHARSET *tess_unicharset_;
00140   WordSizeModel *word_size_model_;
00141   CharClassifier *char_classifier_;
00142   CharBigrams *char_bigrams_;
00143   WordUnigrams *word_unigrams_;
00144   TuningParams *params_;
00145   LangModel *lang_mod_;
00146   Tesseract *tess_obj_;  // CubeRecoContext does not own this pointer
00147   bool size_normalization_;
00148   bool noisy_input_;
00149
00150   // Loads and initialized all the necessary components of a
00151   // CubeRecoContext. See .cpp for more details.
00152   bool Load(TessdataManager *tessdata_manager,
00153             UNICHARSET *tess_unicharset);
00154 };
00155 }
00156
00157 #endif  // CUBE_RECO_CONTEXT_H