Tesseract  3.02
tesseract-ocr/cube/cube_object.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_object.h
00003  * Description: Declaration of the Cube Object Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The CubeObject class is the main class used to perform recognition of
00021 // a specific char_samp as a single word.
00022 // To recognize a word, a CubeObject is constructed for this word.
00023 // A Call to RecognizeWord is then issued specifying the language model that
00024 // will be used during recognition. If none is specified, the default language
00025 // model in the CubeRecoContext is used. The CubeRecoContext is passed at
00026 // construction time
00027 //
00028 // The typical usage pattern for Cube is shown below:
00029 //
00030 //         // Create and initialize Tesseract object and get its
00031 //         // CubeRecoContext object (note that Tesseract object owns it,
00032 //         // so it will be freed when the Tesseract object is freed).
00033 //         tesseract::Tesseract *tess_obj =  new tesseract::Tesseract();
00034 //         tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY);
00035 //         CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext();
00036 //         CHECK(cntxt != NULL) << "Unable to create a Cube reco context";
00037 //         .
00038 //         .
00039 //         .
00040 //         // Do this to recognize a word in pix whose co-ordinates are
00041 //         // (left,top,width,height)
00042 //         tesseract::CubeObject *cube_obj;
00043 //         cube_obj = new tesseract::CubeObject(cntxt, pix,
00044 //                                              left, top, width, height);
00045 //
00046 //         // Get back Cube's list of answers
00047 //         tesseract::WordAltList *alt_list = cube_obj->RecognizeWord();
00048 //         CHECK(alt_list != NULL && alt_list->AltCount() > 0);
00049 //
00050 //         // Get the string and cost of every alternate
00051 //         for (int alt = 0; alt < alt_list->AltCount(); alt++) {
00052 //           // Return the result as a UTF-32 string
00053 //           string_32 res_str32 = alt_list->Alt(alt);
00054 //           // Convert to UTF8 if need-be
00055 //           string res_str;
00056 //           CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str);
00057 //           // Get the string cost. This should get bigger as you go deeper
00058 //           // in the list
00059 //           int cost = alt_list->AltCost(alt);
00060 //         }
00061 //
00062 //         // Call this once you are done recognizing this word
00063 //         delete cube_obj;
00064 //
00065 //         // Call this once you are done recognizing all words with
00066 //         // for the current language
00067 //         delete tess_obj;
00068 //
00069 // Note that if the language supports "Italics" (see the CubeRecoContext), the
00070 // RecognizeWord function attempts to de-slant the word.
00071 
00072 #ifndef CUBE_OBJECT_H
00073 #define CUBE_OBJECT_H
00074 
00075 #include "img.h"
00076 #include "char_samp.h"
00077 #include "word_altlist.h"
00078 #include "beam_search.h"
00079 #include "cube_search_object.h"
00080 #include "tess_lang_model.h"
00081 #include "cube_reco_context.h"
00082 
00083 namespace tesseract {
00084 
00085 // minimum aspect ratio needed to normalize a char_samp before recognition
00086 static const float kMinNormalizationAspectRatio = 3.5;
00087 // minimum probability a top alt choice must meet before having
00088 // deslanted processing applied to it
00089 static const float kMinProbSkipDeslanted = 0.25;
00090 
00091 class CubeObject {
00092  public:
00093   // Different flavors of constructor. They just differ in the way the
00094   // word image is specified
00095   CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp);
00096   CubeObject(CubeRecoContext *cntxt, IMAGE *img,
00097              int left, int top, int wid, int hgt);
00098   CubeObject(CubeRecoContext *cntxt, Pix *pix,
00099              int left, int top, int wid, int hgt);
00100   ~CubeObject();
00101 
00102   // Perform the word recognition using the specified language mode. If none
00103   // is specified, the default language model in the CubeRecoContext is used.
00104   // Returns the sorted list of alternate word answers
00105   WordAltList *RecognizeWord(LangModel *lang_mod = NULL);
00106   // Same as RecognizeWord but recognizes as a phrase
00107   WordAltList *RecognizePhrase(LangModel *lang_mod = NULL);
00108   // Computes the cost of a specific string. This is done by performing
00109   // recognition of a language model that allows only the specified word.
00110   // The alternate list(s) will be permanently modified.
00111   int WordCost(const char *str);
00112   // Recognizes a single character and returns the list of results.
00113   CharAltList *RecognizeChar();
00114 
00115   // Returns the BeamSearch object that resulted from the last call to
00116   // RecognizeWord
00117   inline BeamSearch *BeamObj() const {
00118     return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_);
00119   }
00120   // Returns the WordAltList object that resulted from the last call to
00121   // RecognizeWord
00122   inline WordAltList *AlternateList() const {
00123     return (deslanted_ == true ? deslanted_alt_list_ : alt_list_);
00124   }
00125   // Returns the CubeSearchObject object that resulted from the last call to
00126   // RecognizeWord
00127   inline CubeSearchObject *SrchObj() const {
00128     return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_);
00129   }
00130   // Returns the CharSamp object that resulted from the last call to
00131   // RecognizeWord. Note that this object is not necessarily identical to the
00132   // one passed at construction time as normalization might have occurred
00133   inline CharSamp *CharSample() const {
00134     return (deslanted_ == true ? deslanted_char_samp_ : char_samp_);
00135   }
00136 
00137   // Set the ownership of the CharSamp
00138   inline void SetCharSampOwnership(bool own_char_samp) {
00139     own_char_samp_ = own_char_samp;
00140   }
00141 
00142  protected:
00143   // Normalize the CharSamp if its aspect ratio exceeds the below constant.
00144   bool Normalize();
00145 
00146  private:
00147   // minimum segment count needed to normalize a char_samp before recognition
00148   static const int kMinNormalizationSegmentCnt = 4;
00149 
00150   // Data member initialization function
00151   void Init();
00152   // Free alternate lists.
00153   void Cleanup();
00154   // Perform the actual recognition using the specified language mode. If none
00155   // is specified, the default language model in the CubeRecoContext is used.
00156   // Returns the sorted list of alternate answers. Called by both
00157   // RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false)
00158   WordAltList *Recognize(LangModel *lang_mod, bool word_mode);
00159 
00160   CubeRecoContext *cntxt_;
00161   BeamSearch *beam_obj_;
00162   BeamSearch *deslanted_beam_obj_;
00163   bool offline_mode_;
00164   bool own_char_samp_;
00165   bool deslanted_;
00166   CharSamp *char_samp_;
00167   CharSamp *deslanted_char_samp_;
00168   CubeSearchObject *srch_obj_;
00169   CubeSearchObject *deslanted_srch_obj_;
00170   WordAltList *alt_list_;
00171   WordAltList *deslanted_alt_list_;
00172 };
00173 }
00174 
00175 #endif  // CUBE_OBJECT_H