Tesseract
3.02
|
00001 00002 // File: ltrresultiterator.cpp 00003 // Description: Iterator for tesseract results in strict left-to-right 00004 // order that avoids using tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 14:32:09 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "ltrresultiterator.h" 00022 00023 #include "allheaders.h" 00024 #include "pageres.h" 00025 #include "strngs.h" 00026 #include "tesseractclass.h" 00027 00028 namespace tesseract { 00029 00030 LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, 00031 int scale, int scaled_yres, 00032 int rect_left, int rect_top, 00033 int rect_width, int rect_height) 00034 : PageIterator(page_res, tesseract, scale, scaled_yres, 00035 rect_left, rect_top, rect_width, rect_height), 00036 line_separator_("\n"), 00037 paragraph_separator_("\n") { 00038 } 00039 00040 LTRResultIterator::~LTRResultIterator() { 00041 } 00042 00043 // Returns the null terminated UTF-8 encoded text string for the current 00044 // object at the given level. Use delete [] to free after use. 00045 char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { 00046 if (it_->word() == NULL) return NULL; // Already at the end! 00047 STRING text; 00048 PAGE_RES_IT res_it(*it_); 00049 WERD_CHOICE* best_choice = res_it.word()->best_choice; 00050 ASSERT_HOST(best_choice != NULL); 00051 if (level == RIL_SYMBOL) { 00052 text = res_it.word()->BestUTF8(blob_index_, false); 00053 } else if (level == RIL_WORD) { 00054 text = best_choice->unichar_string(); 00055 } else { 00056 bool eol = false; // end of line? 00057 bool eop = false; // end of paragraph? 00058 do { // for each paragraph in a block 00059 do { // for each text line in a paragraph 00060 do { // for each word in a text line 00061 best_choice = res_it.word()->best_choice; 00062 ASSERT_HOST(best_choice != NULL); 00063 text += best_choice->unichar_string(); 00064 text += " "; 00065 res_it.forward(); 00066 eol = res_it.row() != res_it.prev_row(); 00067 } while (!eol); 00068 text.truncate_at(text.length() - 1); 00069 text += line_separator_; 00070 eop = res_it.block() != res_it.prev_block() || 00071 res_it.row()->row->para() != res_it.prev_row()->row->para(); 00072 } while (level != RIL_TEXTLINE && !eop); 00073 if (eop) text += paragraph_separator_; 00074 } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); 00075 } 00076 int length = text.length() + 1; 00077 char* result = new char[length]; 00078 strncpy(result, text.string(), length); 00079 return result; 00080 } 00081 00082 // Set the string inserted at the end of each text line. "\n" by default. 00083 void LTRResultIterator::SetLineSeparator(const char *new_line) { 00084 line_separator_ = new_line; 00085 } 00086 00087 // Set the string inserted at the end of each paragraph. "\n" by default. 00088 void LTRResultIterator::SetParagraphSeparator(const char *new_para) { 00089 paragraph_separator_ = new_para; 00090 } 00091 00092 // Returns the mean confidence of the current object at the given level. 00093 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00094 float LTRResultIterator::Confidence(PageIteratorLevel level) const { 00095 if (it_->word() == NULL) return 0.0f; // Already at the end! 00096 float mean_certainty = 0.0f; 00097 int certainty_count = 0; 00098 PAGE_RES_IT res_it(*it_); 00099 WERD_CHOICE* best_choice = res_it.word()->best_choice; 00100 ASSERT_HOST(best_choice != NULL); 00101 switch (level) { 00102 case RIL_BLOCK: 00103 do { 00104 best_choice = res_it.word()->best_choice; 00105 ASSERT_HOST(best_choice != NULL); 00106 mean_certainty += best_choice->certainty(); 00107 ++certainty_count; 00108 res_it.forward(); 00109 } while (res_it.block() == res_it.prev_block()); 00110 break; 00111 case RIL_PARA: 00112 do { 00113 best_choice = res_it.word()->best_choice; 00114 ASSERT_HOST(best_choice != NULL); 00115 mean_certainty += best_choice->certainty(); 00116 ++certainty_count; 00117 res_it.forward(); 00118 } while (res_it.block() == res_it.prev_block() && 00119 res_it.row()->row->para() == res_it.prev_row()->row->para()); 00120 break; 00121 case RIL_TEXTLINE: 00122 do { 00123 best_choice = res_it.word()->best_choice; 00124 ASSERT_HOST(best_choice != NULL); 00125 mean_certainty += best_choice->certainty(); 00126 ++certainty_count; 00127 res_it.forward(); 00128 } while (res_it.row() == res_it.prev_row()); 00129 break; 00130 case RIL_WORD: 00131 mean_certainty += best_choice->certainty(); 00132 ++certainty_count; 00133 break; 00134 case RIL_SYMBOL: 00135 BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices(); 00136 if (choices != NULL) { 00137 BLOB_CHOICE_LIST_C_IT blob_choices_it(choices); 00138 for (int blob = 0; blob < blob_index_; ++blob) 00139 blob_choices_it.forward(); 00140 BLOB_CHOICE_IT choice_it(blob_choices_it.data()); 00141 for (choice_it.mark_cycle_pt(); 00142 !choice_it.cycled_list(); 00143 choice_it.forward()) { 00144 if (choice_it.data()->unichar_id() == 00145 best_choice->unichar_id(blob_index_)) 00146 break; 00147 } 00148 mean_certainty += choice_it.data()->certainty(); 00149 } else { 00150 mean_certainty += best_choice->certainty(); 00151 } 00152 ++certainty_count; 00153 } 00154 if (certainty_count > 0) { 00155 mean_certainty /= certainty_count; 00156 float confidence = 100 + 5 * mean_certainty; 00157 if (confidence < 0.0f) confidence = 0.0f; 00158 if (confidence > 100.0f) confidence = 100.0f; 00159 return confidence; 00160 } 00161 return 0.0f; 00162 } 00163 00164 // Returns the font attributes of the current word. If iterating at a higher 00165 // level object than words, eg textlines, then this will return the 00166 // attributes of the first word in that textline. 00167 // The actual return value is a string representing a font name. It points 00168 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as 00169 // the iterator itself, ie rendered invalid by various members of 00170 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. 00171 // Pointsize is returned in printers points (1/72 inch.) 00172 const char* LTRResultIterator::WordFontAttributes(bool* is_bold, 00173 bool* is_italic, 00174 bool* is_underlined, 00175 bool* is_monospace, 00176 bool* is_serif, 00177 bool* is_smallcaps, 00178 int* pointsize, 00179 int* font_id) const { 00180 if (it_->word() == NULL) return NULL; // Already at the end! 00181 if (it_->word()->fontinfo == NULL) { 00182 *font_id = -1; 00183 return NULL; // No font information. 00184 } 00185 const FontInfo& font_info = *it_->word()->fontinfo; 00186 *font_id = font_info.universal_id; 00187 *is_bold = font_info.is_bold(); 00188 *is_italic = font_info.is_italic(); 00189 *is_underlined = false; // TODO(rays) fix this! 00190 *is_monospace = font_info.is_fixed_pitch(); 00191 *is_serif = font_info.is_serif(); 00192 *is_smallcaps = it_->word()->small_caps; 00193 float row_height = it_->row()->row->x_height() + 00194 it_->row()->row->ascenders() - it_->row()->row->descenders(); 00195 // Convert from pixels to printers points. 00196 *pointsize = scaled_yres_ > 0 00197 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) 00198 : 0; 00199 00200 return font_info.name; 00201 } 00202 00203 // Returns the name of the language used to recognize this word. 00204 const char* LTRResultIterator::WordRecognitionLanguage() const { 00205 if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL; 00206 return it_->word()->tesseract->lang.string(); 00207 } 00208 00209 // Return the overall directionality of this word. 00210 StrongScriptDirection LTRResultIterator::WordDirection() const { 00211 if (it_->word() == NULL) return DIR_NEUTRAL; 00212 bool has_rtl = it_->word()->AnyRtlCharsInWord(); 00213 bool has_ltr = it_->word()->AnyLtrCharsInWord(); 00214 if (has_rtl && !has_ltr) 00215 return DIR_RIGHT_TO_LEFT; 00216 if (has_ltr && !has_rtl) 00217 return DIR_LEFT_TO_RIGHT; 00218 if (!has_ltr && !has_rtl) 00219 return DIR_NEUTRAL; 00220 return DIR_MIX; 00221 } 00222 00223 // Returns true if the current word was found in a dictionary. 00224 bool LTRResultIterator::WordIsFromDictionary() const { 00225 if (it_->word() == NULL) return false; // Already at the end! 00226 int permuter = it_->word()->best_choice->permuter(); 00227 return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || 00228 permuter == USER_DAWG_PERM; 00229 } 00230 00231 // Returns true if the current word is numeric. 00232 bool LTRResultIterator::WordIsNumeric() const { 00233 if (it_->word() == NULL) return false; // Already at the end! 00234 int permuter = it_->word()->best_choice->permuter(); 00235 return permuter == NUMBER_PERM; 00236 } 00237 00238 // Returns true if the word contains blamer information. 00239 bool LTRResultIterator::HasBlamerInfo() const { 00240 return (it_->word() != NULL && it_->word()->blamer_bundle != NULL && 00241 (it_->word()->blamer_bundle->debug.length() > 0 || 00242 it_->word()->blamer_bundle->misadaption_debug.length() > 0)); 00243 } 00244 00245 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle 00246 // of the current word. 00247 void *LTRResultIterator::GetParamsTrainingBundle() const { 00248 return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ? 00249 &(it_->word()->blamer_bundle->params_training_bundle) : NULL; 00250 } 00251 00252 // Returns the pointer to the string with blamer information for this word. 00253 // Assumes that the word's blamer_bundle is not NULL. 00254 const char *LTRResultIterator::GetBlamerDebug() const { 00255 return it_->word()->blamer_bundle->debug.string(); 00256 } 00257 00258 // Returns the pointer to the string with misadaption information for this word. 00259 // Assumes that the word's blamer_bundle is not NULL. 00260 const char *LTRResultIterator::GetBlamerMisadaptionDebug() const { 00261 return it_->word()->blamer_bundle->misadaption_debug.string(); 00262 } 00263 00264 // Returns the null terminated UTF-8 encoded truth string for the current word. 00265 // Use delete [] to free after use. 00266 char* LTRResultIterator::WordTruthUTF8Text() const { 00267 if (it_->word() == NULL) return NULL; // Already at the end! 00268 if (it_->word()->blamer_bundle == NULL || 00269 it_->word()->blamer_bundle->incorrect_result_reason == IRR_NO_TRUTH) { 00270 return NULL; // no truth information for this word 00271 } 00272 const GenericVector<STRING> &truth_vec = 00273 it_->word()->blamer_bundle->truth_text; 00274 STRING truth_text; 00275 for (int i = 0; i < truth_vec.size(); ++i) truth_text += truth_vec[i]; 00276 int length = truth_text.length() + 1; 00277 char* result = new char[length]; 00278 strncpy(result, truth_text.string(), length); 00279 return result; 00280 } 00281 00282 // Returns a pointer to serialized choice lattice. 00283 // Fills lattice_size with the number of bytes in lattice data. 00284 const char *LTRResultIterator::WordLattice(int *lattice_size) const { 00285 if (it_->word() == NULL) return NULL; // Already at the end! 00286 if (it_->word()->blamer_bundle == NULL) return NULL; 00287 *lattice_size = it_->word()->blamer_bundle->lattice_size; 00288 return it_->word()->blamer_bundle->lattice_data; 00289 } 00290 00291 // Returns true if the current symbol is a superscript. 00292 // If iterating at a higher level object than symbols, eg words, then 00293 // this will return the attributes of the first symbol in that word. 00294 bool LTRResultIterator::SymbolIsSuperscript() const { 00295 if (cblob_it_ == NULL && it_->word() != NULL) 00296 return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUPERSCRIPT; 00297 return false; 00298 } 00299 00300 // Returns true if the current symbol is a subscript. 00301 // If iterating at a higher level object than symbols, eg words, then 00302 // this will return the attributes of the first symbol in that word. 00303 bool LTRResultIterator::SymbolIsSubscript() const { 00304 if (cblob_it_ == NULL && it_->word() != NULL) 00305 return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUBSCRIPT; 00306 return false; 00307 } 00308 00309 // Returns true if the current symbol is a dropcap. 00310 // If iterating at a higher level object than symbols, eg words, then 00311 // this will return the attributes of the first symbol in that word. 00312 bool LTRResultIterator::SymbolIsDropcap() const { 00313 if (cblob_it_ == NULL && it_->word() != NULL) 00314 return it_->word()->box_word->BlobPosition(blob_index_) == SP_DROPCAP; 00315 return false; 00316 } 00317 00318 ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) { 00319 ASSERT_HOST(result_it.it_->word() != NULL); 00320 word_res_ = result_it.it_->word(); 00321 PAGE_RES_IT res_it(*result_it.it_); 00322 WERD_CHOICE* best_choice = word_res_->best_choice; 00323 BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices(); 00324 if (choices != NULL) { 00325 BLOB_CHOICE_LIST_C_IT blob_choices_it(choices); 00326 for (int blob = 0; blob < result_it.blob_index_; ++blob) 00327 blob_choices_it.forward(); 00328 choice_it_ = new BLOB_CHOICE_IT(blob_choices_it.data()); 00329 choice_it_->mark_cycle_pt(); 00330 } else { 00331 choice_it_ = NULL; 00332 } 00333 } 00334 00335 ChoiceIterator::~ChoiceIterator() { 00336 delete choice_it_; 00337 } 00338 00339 // Moves to the next choice for the symbol and returns false if there 00340 // are none left. 00341 bool ChoiceIterator::Next() { 00342 if (choice_it_ == NULL) 00343 return false; 00344 choice_it_->forward(); 00345 return !choice_it_->cycled_list(); 00346 } 00347 00348 // Returns the null terminated UTF-8 encoded text string for the current 00349 // choice. Use delete [] to free after use. 00350 const char* ChoiceIterator::GetUTF8Text() const { 00351 if (choice_it_ == NULL) 00352 return NULL; 00353 UNICHAR_ID id = choice_it_->data()->unichar_id(); 00354 return word_res_->BestUTF8(id, false); 00355 } 00356 00357 // Returns the confidence of the current choice. 00358 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00359 float ChoiceIterator::Confidence() const { 00360 if (choice_it_ == NULL) 00361 return 0.0f; 00362 float confidence = 100 + 5 * choice_it_->data()->certainty(); 00363 if (confidence < 0.0f) confidence = 0.0f; 00364 if (confidence > 100.0f) confidence = 100.0f; 00365 return confidence; 00366 } 00367 00368 00369 } // namespace tesseract.