Tesseract
3.02
|
00001 00002 // File: resultiterator.h 00003 // Description: Iterator for tesseract results that is capable of 00004 // iterating in proper reading order over Bi Directional 00005 // (e.g. mixed Hebrew and English) text. 00006 // Author: David Eger 00007 // Created: Fri May 27 13:58:06 PST 2011 00008 // 00009 // (C) Copyright 2011, Google Inc. 00010 // Licensed under the Apache License, Version 2.0 (the "License"); 00011 // you may not use this file except in compliance with the License. 00012 // You may obtain a copy of the License at 00013 // http://www.apache.org/licenses/LICENSE-2.0 00014 // Unless required by applicable law or agreed to in writing, software 00015 // distributed under the License is distributed on an "AS IS" BASIS, 00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 // See the License for the specific language governing permissions and 00018 // limitations under the License. 00019 // 00021 00022 #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H__ 00023 #define TESSERACT_CCMAIN_RESULT_ITERATOR_H__ 00024 00025 #include "platform.h" 00026 #include "ltrresultiterator.h" 00027 #include "genericvector.h" 00028 00029 class BLOB_CHOICE_IT; 00030 class WERD_RES; 00031 class STRING; 00032 00033 namespace tesseract { 00034 00035 class Tesseract; 00036 00037 class TESS_API ResultIterator : public LTRResultIterator { 00038 public: 00039 static ResultIterator *StartOfParagraph(const LTRResultIterator &resit); 00040 00045 virtual ~ResultIterator() {} 00046 00047 // ============= Moving around within the page ============. 00052 virtual void Begin(); 00053 00066 virtual bool Next(PageIteratorLevel level); 00067 00074 virtual bool IsAtBeginningOf(PageIteratorLevel level) const; 00075 00081 virtual bool IsAtFinalElement(PageIteratorLevel level, 00082 PageIteratorLevel element) const; 00083 00084 // ============= Accessing data ==============. 00085 00090 virtual char* GetUTF8Text(PageIteratorLevel level) const; 00091 00096 bool ParagraphIsLtr() const; 00097 00098 // ============= Exposed only for testing =============. 00099 00122 static void CalculateTextlineOrder( 00123 bool paragraph_is_ltr, 00124 const GenericVector<StrongScriptDirection> &word_dirs, 00125 GenericVectorEqEq<int> *reading_order); 00126 00127 static const int kMinorRunStart; 00128 static const int kMinorRunEnd; 00129 static const int kComplexWord; 00130 00131 protected: 00138 TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit); 00139 00140 private: 00145 bool CurrentParagraphIsLtr() const; 00146 00158 void CalculateTextlineOrder(bool paragraph_is_ltr, 00159 const LTRResultIterator &resit, 00160 GenericVectorEqEq<int> *indices) const; 00162 void CalculateTextlineOrder(bool paragraph_is_ltr, 00163 const LTRResultIterator &resit, 00164 GenericVector<StrongScriptDirection> *ssd, 00165 GenericVectorEqEq<int> *indices) const; 00166 00171 int LTRWordIndex() const; 00172 00177 void CalculateBlobOrder(GenericVector<int> *blob_indices) const; 00178 00180 void MoveToLogicalStartOfTextline(); 00181 00186 void MoveToLogicalStartOfWord(); 00187 00189 bool IsAtFinalSymbolOfWord() const; 00190 00192 bool IsAtFirstSymbolOfWord() const; 00193 00198 void AppendSuffixMarks(STRING *text) const; 00199 00201 void AppendUTF8WordText(STRING *text) const; 00202 00210 void IterateAndAppendUTF8TextlineText(STRING *text); 00211 00218 void AppendUTF8ParagraphText(STRING *text) const; 00219 00221 bool BidiDebug(int min_level) const; 00222 00223 bool current_paragraph_is_ltr_; 00224 00229 bool at_beginning_of_minor_run_; 00230 00232 bool in_minor_direction_; 00233 }; 00234 00235 } // namespace tesseract. 00236 00237 #endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H__