Tesseract  3.02
tesseract-ocr/ccmain/cube_reco_context.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_reco_context.cpp
00003  * Description: Implementation of the Cube Recognition Context Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <string>
00021 #include <limits.h>
00022 
00023 #include "cube_reco_context.h"
00024 
00025 #include "classifier_factory.h"
00026 #include "cube_tuning_params.h"
00027 #include "dict.h"
00028 #include "feature_bmp.h"
00029 #include "tessdatamanager.h"
00030 #include "tesseractclass.h"
00031 #include "tess_lang_model.h"
00032 
00033 namespace tesseract {
00034 
00035 // Instantiate a CubeRecoContext object using a Tesseract object.
00036 // CubeRecoContext will not take ownership of tess_obj, but will
00037 // record the pointer to it and will make use of various Tesseract
00038 // components (language model, flags, etc). Thus the caller should
00039 // keep tess_obj alive so long as the instantiated CubeRecoContext is used.
00040 CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
00041   tess_obj_ = tess_obj;
00042   lang_ = "";
00043   loaded_ = false;
00044   lang_mod_ = NULL;
00045   params_ = NULL;
00046   char_classifier_ = NULL;
00047   char_set_ = NULL;
00048   word_size_model_ = NULL;
00049   char_bigrams_ = NULL;
00050   word_unigrams_ = NULL;
00051   noisy_input_ = false;
00052   size_normalization_ = false;
00053 }
00054 
00055 CubeRecoContext::~CubeRecoContext() {
00056   if (char_classifier_ != NULL) {
00057     delete char_classifier_;
00058     char_classifier_ = NULL;
00059   }
00060 
00061   if (word_size_model_ != NULL) {
00062     delete word_size_model_;
00063     word_size_model_ = NULL;
00064   }
00065 
00066   if (char_set_ != NULL) {
00067     delete char_set_;
00068     char_set_ = NULL;
00069   }
00070 
00071   if (char_bigrams_ != NULL) {
00072     delete char_bigrams_;
00073     char_bigrams_ = NULL;
00074   }
00075 
00076   if (word_unigrams_ != NULL) {
00077     delete word_unigrams_;
00078     word_unigrams_ = NULL;
00079   }
00080 
00081   if (lang_mod_ != NULL) {
00082     delete lang_mod_;
00083     lang_mod_ = NULL;
00084   }
00085 
00086   if (params_ != NULL) {
00087     delete params_;
00088     params_ = NULL;
00089   }
00090 }
00091 
00092 // Returns the path of the data files by looking up the TESSDATA_PREFIX
00093 // environment variable and appending a "tessdata" directory to it
00094 bool CubeRecoContext::GetDataFilePath(string *path) const {
00095   *path = tess_obj_->datadir.string();
00096   return true;
00097 }
00098 
00099 // The object initialization function that loads all the necessary
00100 // components of a RecoContext.  TessdataManager is used to load the
00101 // data from [lang].traineddata file.  If TESSDATA_CUBE_UNICHARSET
00102 // component is present, Cube will be instantiated with the unicharset
00103 // specified in this component and the corresponding dictionary
00104 // (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
00105 // Tesseract's. Otherwise, TessdataManager will assume that Cube will
00106 // be using Tesseract's unicharset and dawgs, and will load the
00107 // unicharset from the TESSDATA_UNICHARSET component and will load the
00108 // dawgs from TESSDATA_*_DAWG components.
00109 bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
00110                            UNICHARSET *tess_unicharset) {
00111   ASSERT_HOST(tess_obj_ != NULL);
00112   tess_unicharset_ = tess_unicharset;
00113   string data_file_path;
00114 
00115   // Get the data file path.
00116   if (GetDataFilePath(&data_file_path) == false) {
00117     fprintf(stderr, "Unable to get data file path\n");
00118     return false;
00119   }
00120 
00121   // Get the language from the Tesseract object.
00122   lang_ = tess_obj_->lang.string();
00123 
00124   // Create the char set.
00125   if ((char_set_ =
00126        CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
00127     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
00128             "CharSet\n");
00129     return false;
00130   }
00131   // Create the language model.
00132   string lm_file_name = data_file_path + lang_ + ".cube.lm";
00133   string lm_params;
00134   if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
00135     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
00136             "language model params from %s\n", lm_file_name.c_str());
00137     return false;
00138   }
00139   lang_mod_ = new TessLangModel(lm_params, data_file_path,
00140                                 tess_obj_->getDict().load_system_dawg,
00141                                 tessdata_manager, this);
00142   if (lang_mod_ == NULL) {
00143     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create "
00144             "TessLangModel\n");
00145     return false;
00146   }
00147 
00148   // Create the optional char bigrams object.
00149   char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
00150 
00151   // Create the optional word unigrams object.
00152   word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
00153 
00154   // Create the optional size model.
00155   word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
00156     char_set_, Contextual());
00157 
00158   // Load tuning params.
00159   params_ = CubeTuningParams::Create(data_file_path, lang_);
00160   if (params_ == NULL) {
00161     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
00162             "CubeTuningParams from %s\n", data_file_path.c_str());
00163     return false;
00164   }
00165 
00166   // Create the char classifier.
00167   char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
00168                                                    lang_mod_, char_set_,
00169                                                    params_);
00170   if (char_classifier_ == NULL) {
00171     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
00172             "CharClassifierFactory object from %s\n", data_file_path.c_str());
00173     return false;
00174   }
00175 
00176   loaded_ = true;
00177 
00178   return true;
00179 }
00180 
00181 // Creates a CubeRecoContext object using a tesseract object
00182 CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
00183                                           TessdataManager *tessdata_manager,
00184                                           UNICHARSET *tess_unicharset) {
00185   // create the object
00186   CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
00187   if (cntxt == NULL) {
00188     fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create "
00189             "CubeRecoContext object\n");
00190     return NULL;
00191   }
00192   // load the necessary components
00193   if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
00194     fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
00195             "CubeRecoContext object\n");
00196     delete cntxt;
00197     return NULL;
00198   }
00199   // success
00200   return cntxt;
00201 }
00202 }  // tesseract}