Tesseract
3.02
|
00001 00002 // File: ltrresultiterator.h 00003 // Description: Iterator for tesseract results in strict left-to-right 00004 // order that avoids using tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 11:01:06 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ 00022 #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ 00023 00024 #include "platform.h" 00025 #include "pageiterator.h" 00026 #include "unicharset.h" 00027 00028 class BLOB_CHOICE_IT; 00029 class WERD_RES; 00030 00031 namespace tesseract { 00032 00033 class Tesseract; 00034 00035 // Class to iterate over tesseract results, providing access to all levels 00036 // of the page hierarchy, without including any tesseract headers or having 00037 // to handle any tesseract structures. 00038 // WARNING! This class points to data held within the TessBaseAPI class, and 00039 // therefore can only be used while the TessBaseAPI class still exists and 00040 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End 00041 // DetectOS, or anything else that changes the internal PAGE_RES. 00042 // See apitypes.h for the definition of PageIteratorLevel. 00043 // See also base class PageIterator, which contains the bulk of the interface. 00044 // LTRResultIterator adds text-specific methods for access to OCR output. 00045 00046 class TESS_API LTRResultIterator : public PageIterator { 00047 friend class ChoiceIterator; 00048 public: 00049 // page_res and tesseract come directly from the BaseAPI. 00050 // The rectangle parameters are copied indirectly from the Thresholder, 00051 // via the BaseAPI. They represent the coordinates of some rectangle in an 00052 // original image (in top-left-origin coordinates) and therefore the top-left 00053 // needs to be added to any output boxes in order to specify coordinates 00054 // in the original image. See TessBaseAPI::SetRectangle. 00055 // The scale and scaled_yres are in case the Thresholder scaled the image 00056 // rectangle prior to thresholding. Any coordinates in tesseract's image 00057 // must be divided by scale before adding (rect_left, rect_top). 00058 // The scaled_yres indicates the effective resolution of the binary image 00059 // that tesseract has been given by the Thresholder. 00060 // After the constructor, Begin has already been called. 00061 LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, 00062 int scale, int scaled_yres, 00063 int rect_left, int rect_top, 00064 int rect_width, int rect_height); 00065 virtual ~LTRResultIterator(); 00066 00067 // LTRResultIterators may be copied! This makes it possible to iterate over 00068 // all the objects at a lower level, while maintaining an iterator to 00069 // objects at a higher level. These constructors DO NOT CALL Begin, so 00070 // iterations will continue from the location of src. 00071 // TODO: For now the copy constructor and operator= only need the base class 00072 // versions, but if new data members are added, don't forget to add them! 00073 00074 // ============= Moving around within the page ============. 00075 00076 // See PageIterator. 00077 00078 // ============= Accessing data ==============. 00079 00080 // Returns the null terminated UTF-8 encoded text string for the current 00081 // object at the given level. Use delete [] to free after use. 00082 char* GetUTF8Text(PageIteratorLevel level) const; 00083 00084 // Set the string inserted at the end of each text line. "\n" by default. 00085 void SetLineSeparator(const char *new_line); 00086 00087 // Set the string inserted at the end of each paragraph. "\n" by default. 00088 void SetParagraphSeparator(const char *new_para); 00089 00090 // Returns the mean confidence of the current object at the given level. 00091 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00092 float Confidence(PageIteratorLevel level) const; 00093 00094 // ============= Functions that refer to words only ============. 00095 00096 // Returns the font attributes of the current word. If iterating at a higher 00097 // level object than words, eg textlines, then this will return the 00098 // attributes of the first word in that textline. 00099 // The actual return value is a string representing a font name. It points 00100 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as 00101 // the iterator itself, ie rendered invalid by various members of 00102 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. 00103 // Pointsize is returned in printers points (1/72 inch.) 00104 const char* WordFontAttributes(bool* is_bold, 00105 bool* is_italic, 00106 bool* is_underlined, 00107 bool* is_monospace, 00108 bool* is_serif, 00109 bool* is_smallcaps, 00110 int* pointsize, 00111 int* font_id) const; 00112 00113 // Return the name of the language used to recognize this word. 00114 // On error, NULL. Do not delete this pointer. 00115 const char* WordRecognitionLanguage() const; 00116 00117 // Return the overall directionality of this word. 00118 StrongScriptDirection WordDirection() const; 00119 00120 // Returns true if the current word was found in a dictionary. 00121 bool WordIsFromDictionary() const; 00122 00123 // Returns true if the current word is numeric. 00124 bool WordIsNumeric() const; 00125 00126 // Returns true if the word contains blamer information. 00127 bool HasBlamerInfo() const; 00128 00129 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle 00130 // of the current word. 00131 void *GetParamsTrainingBundle() const; 00132 00133 // Returns a pointer to the string with blamer information for this word. 00134 // Assumes that the word's blamer_bundle is not NULL. 00135 const char *GetBlamerDebug() const; 00136 00137 // Returns a pointer to the string with misadaption information for this word. 00138 // Assumes that the word's blamer_bundle is not NULL. 00139 const char *GetBlamerMisadaptionDebug() const; 00140 00141 // Returns a null terminated UTF-8 encoded truth string for the current word. 00142 // Use delete [] to free after use. 00143 char* WordTruthUTF8Text() const; 00144 00145 // Returns a pointer to serialized choice lattice. 00146 // Fills lattice_size with the number of bytes in lattice data. 00147 const char *WordLattice(int *lattice_size) const; 00148 00149 // ============= Functions that refer to symbols only ============. 00150 00151 // Returns true if the current symbol is a superscript. 00152 // If iterating at a higher level object than symbols, eg words, then 00153 // this will return the attributes of the first symbol in that word. 00154 bool SymbolIsSuperscript() const; 00155 // Returns true if the current symbol is a subscript. 00156 // If iterating at a higher level object than symbols, eg words, then 00157 // this will return the attributes of the first symbol in that word. 00158 bool SymbolIsSubscript() const; 00159 // Returns true if the current symbol is a dropcap. 00160 // If iterating at a higher level object than symbols, eg words, then 00161 // this will return the attributes of the first symbol in that word. 00162 bool SymbolIsDropcap() const; 00163 00164 protected: 00165 const char *line_separator_; 00166 const char *paragraph_separator_; 00167 }; 00168 00169 // Class to iterate over the classifier choices for a single RIL_SYMBOL. 00170 class ChoiceIterator { 00171 public: 00172 // Construction is from a LTRResultIterator that points to the symbol of 00173 // interest. The ChoiceIterator allows a one-shot iteration over the 00174 // choices for this symbol and after that is is useless. 00175 explicit ChoiceIterator(const LTRResultIterator& result_it); 00176 ~ChoiceIterator(); 00177 00178 // Moves to the next choice for the symbol and returns false if there 00179 // are none left. 00180 bool Next(); 00181 00182 // ============= Accessing data ==============. 00183 00184 // Returns the null terminated UTF-8 encoded text string for the current 00185 // choice. 00186 // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an 00187 // internal structure and should NOT be delete[]ed to free after use. 00188 const char* GetUTF8Text() const; 00189 00190 // Returns the confidence of the current choice. 00191 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00192 float Confidence() const; 00193 00194 private: 00195 // Pointer to the WERD_RES object owned by the API. 00196 WERD_RES* word_res_; 00197 // Iterator over the blob choices. 00198 BLOB_CHOICE_IT* choice_it_; 00199 }; 00200 00201 } // namespace tesseract. 00202 00203 #endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__