Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: cube_object.h 00003 * Description: Declaration of the Cube Object Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The CubeObject class is the main class used to perform recognition of 00021 // a specific char_samp as a single word. 00022 // To recognize a word, a CubeObject is constructed for this word. 00023 // A Call to RecognizeWord is then issued specifying the language model that 00024 // will be used during recognition. If none is specified, the default language 00025 // model in the CubeRecoContext is used. The CubeRecoContext is passed at 00026 // construction time 00027 // 00028 // The typical usage pattern for Cube is shown below: 00029 // 00030 // // Create and initialize Tesseract object and get its 00031 // // CubeRecoContext object (note that Tesseract object owns it, 00032 // // so it will be freed when the Tesseract object is freed). 00033 // tesseract::Tesseract *tess_obj = new tesseract::Tesseract(); 00034 // tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY); 00035 // CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext(); 00036 // CHECK(cntxt != NULL) << "Unable to create a Cube reco context"; 00037 // . 00038 // . 00039 // . 00040 // // Do this to recognize a word in pix whose co-ordinates are 00041 // // (left,top,width,height) 00042 // tesseract::CubeObject *cube_obj; 00043 // cube_obj = new tesseract::CubeObject(cntxt, pix, 00044 // left, top, width, height); 00045 // 00046 // // Get back Cube's list of answers 00047 // tesseract::WordAltList *alt_list = cube_obj->RecognizeWord(); 00048 // CHECK(alt_list != NULL && alt_list->AltCount() > 0); 00049 // 00050 // // Get the string and cost of every alternate 00051 // for (int alt = 0; alt < alt_list->AltCount(); alt++) { 00052 // // Return the result as a UTF-32 string 00053 // string_32 res_str32 = alt_list->Alt(alt); 00054 // // Convert to UTF8 if need-be 00055 // string res_str; 00056 // CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str); 00057 // // Get the string cost. This should get bigger as you go deeper 00058 // // in the list 00059 // int cost = alt_list->AltCost(alt); 00060 // } 00061 // 00062 // // Call this once you are done recognizing this word 00063 // delete cube_obj; 00064 // 00065 // // Call this once you are done recognizing all words with 00066 // // for the current language 00067 // delete tess_obj; 00068 // 00069 // Note that if the language supports "Italics" (see the CubeRecoContext), the 00070 // RecognizeWord function attempts to de-slant the word. 00071 00072 #ifndef CUBE_OBJECT_H 00073 #define CUBE_OBJECT_H 00074 00075 #include "img.h" 00076 #include "char_samp.h" 00077 #include "word_altlist.h" 00078 #include "beam_search.h" 00079 #include "cube_search_object.h" 00080 #include "tess_lang_model.h" 00081 #include "cube_reco_context.h" 00082 00083 namespace tesseract { 00084 00085 // minimum aspect ratio needed to normalize a char_samp before recognition 00086 static const float kMinNormalizationAspectRatio = 3.5; 00087 // minimum probability a top alt choice must meet before having 00088 // deslanted processing applied to it 00089 static const float kMinProbSkipDeslanted = 0.25; 00090 00091 class CubeObject { 00092 public: 00093 // Different flavors of constructor. They just differ in the way the 00094 // word image is specified 00095 CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp); 00096 CubeObject(CubeRecoContext *cntxt, IMAGE *img, 00097 int left, int top, int wid, int hgt); 00098 CubeObject(CubeRecoContext *cntxt, Pix *pix, 00099 int left, int top, int wid, int hgt); 00100 ~CubeObject(); 00101 00102 // Perform the word recognition using the specified language mode. If none 00103 // is specified, the default language model in the CubeRecoContext is used. 00104 // Returns the sorted list of alternate word answers 00105 WordAltList *RecognizeWord(LangModel *lang_mod = NULL); 00106 // Same as RecognizeWord but recognizes as a phrase 00107 WordAltList *RecognizePhrase(LangModel *lang_mod = NULL); 00108 // Computes the cost of a specific string. This is done by performing 00109 // recognition of a language model that allows only the specified word. 00110 // The alternate list(s) will be permanently modified. 00111 int WordCost(const char *str); 00112 // Recognizes a single character and returns the list of results. 00113 CharAltList *RecognizeChar(); 00114 00115 // Returns the BeamSearch object that resulted from the last call to 00116 // RecognizeWord 00117 inline BeamSearch *BeamObj() const { 00118 return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_); 00119 } 00120 // Returns the WordAltList object that resulted from the last call to 00121 // RecognizeWord 00122 inline WordAltList *AlternateList() const { 00123 return (deslanted_ == true ? deslanted_alt_list_ : alt_list_); 00124 } 00125 // Returns the CubeSearchObject object that resulted from the last call to 00126 // RecognizeWord 00127 inline CubeSearchObject *SrchObj() const { 00128 return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_); 00129 } 00130 // Returns the CharSamp object that resulted from the last call to 00131 // RecognizeWord. Note that this object is not necessarily identical to the 00132 // one passed at construction time as normalization might have occurred 00133 inline CharSamp *CharSample() const { 00134 return (deslanted_ == true ? deslanted_char_samp_ : char_samp_); 00135 } 00136 00137 // Set the ownership of the CharSamp 00138 inline void SetCharSampOwnership(bool own_char_samp) { 00139 own_char_samp_ = own_char_samp; 00140 } 00141 00142 protected: 00143 // Normalize the CharSamp if its aspect ratio exceeds the below constant. 00144 bool Normalize(); 00145 00146 private: 00147 // minimum segment count needed to normalize a char_samp before recognition 00148 static const int kMinNormalizationSegmentCnt = 4; 00149 00150 // Data member initialization function 00151 void Init(); 00152 // Free alternate lists. 00153 void Cleanup(); 00154 // Perform the actual recognition using the specified language mode. If none 00155 // is specified, the default language model in the CubeRecoContext is used. 00156 // Returns the sorted list of alternate answers. Called by both 00157 // RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false) 00158 WordAltList *Recognize(LangModel *lang_mod, bool word_mode); 00159 00160 CubeRecoContext *cntxt_; 00161 BeamSearch *beam_obj_; 00162 BeamSearch *deslanted_beam_obj_; 00163 bool offline_mode_; 00164 bool own_char_samp_; 00165 bool deslanted_; 00166 CharSamp *char_samp_; 00167 CharSamp *deslanted_char_samp_; 00168 CubeSearchObject *srch_obj_; 00169 CubeSearchObject *deslanted_srch_obj_; 00170 WordAltList *alt_list_; 00171 WordAltList *deslanted_alt_list_; 00172 }; 00173 } 00174 00175 #endif // CUBE_OBJECT_H