Tesseract  3.02
tesseract-ocr/ccmain/ltrresultiterator.cpp
Go to the documentation of this file.
00001 
00002 // File:        ltrresultiterator.cpp
00003 // Description: Iterator for tesseract results in strict left-to-right
00004 //              order that avoids using tesseract internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 14:32:09 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "ltrresultiterator.h"
00022 
00023 #include "allheaders.h"
00024 #include "pageres.h"
00025 #include "strngs.h"
00026 #include "tesseractclass.h"
00027 
00028 namespace tesseract {
00029 
00030 LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
00031                                      int scale, int scaled_yres,
00032                                      int rect_left, int rect_top,
00033                                      int rect_width, int rect_height)
00034   : PageIterator(page_res, tesseract, scale, scaled_yres,
00035                  rect_left, rect_top, rect_width, rect_height),
00036     line_separator_("\n"),
00037     paragraph_separator_("\n") {
00038 }
00039 
00040 LTRResultIterator::~LTRResultIterator() {
00041 }
00042 
00043 // Returns the null terminated UTF-8 encoded text string for the current
00044 // object at the given level. Use delete [] to free after use.
00045 char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
00046   if (it_->word() == NULL) return NULL;  // Already at the end!
00047   STRING text;
00048   PAGE_RES_IT res_it(*it_);
00049   WERD_CHOICE* best_choice = res_it.word()->best_choice;
00050   ASSERT_HOST(best_choice != NULL);
00051   if (level == RIL_SYMBOL) {
00052     text = res_it.word()->BestUTF8(blob_index_, false);
00053   } else if (level == RIL_WORD) {
00054     text = best_choice->unichar_string();
00055   } else {
00056     bool eol = false;  // end of line?
00057     bool eop = false;  // end of paragraph?
00058     do {  // for each paragraph in a block
00059       do {  // for each text line in a paragraph
00060         do {  // for each word in a text line
00061           best_choice = res_it.word()->best_choice;
00062           ASSERT_HOST(best_choice != NULL);
00063           text += best_choice->unichar_string();
00064           text += " ";
00065           res_it.forward();
00066           eol = res_it.row() != res_it.prev_row();
00067         } while (!eol);
00068         text.truncate_at(text.length() - 1);
00069         text += line_separator_;
00070         eop = res_it.block() != res_it.prev_block() ||
00071             res_it.row()->row->para() != res_it.prev_row()->row->para();
00072       } while (level != RIL_TEXTLINE && !eop);
00073       if (eop) text += paragraph_separator_;
00074     } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
00075   }
00076   int length = text.length() + 1;
00077   char* result = new char[length];
00078   strncpy(result, text.string(), length);
00079   return result;
00080 }
00081 
00082 // Set the string inserted at the end of each text line. "\n" by default.
00083 void LTRResultIterator::SetLineSeparator(const char *new_line) {
00084   line_separator_ = new_line;
00085 }
00086 
00087 // Set the string inserted at the end of each paragraph. "\n" by default.
00088 void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
00089   paragraph_separator_ = new_para;
00090 }
00091 
00092 // Returns the mean confidence of the current object at the given level.
00093 // The number should be interpreted as a percent probability. (0.0f-100.0f)
00094 float LTRResultIterator::Confidence(PageIteratorLevel level) const {
00095   if (it_->word() == NULL) return 0.0f;  // Already at the end!
00096   float mean_certainty = 0.0f;
00097   int certainty_count = 0;
00098   PAGE_RES_IT res_it(*it_);
00099   WERD_CHOICE* best_choice = res_it.word()->best_choice;
00100   ASSERT_HOST(best_choice != NULL);
00101   switch (level) {
00102     case RIL_BLOCK:
00103       do {
00104         best_choice = res_it.word()->best_choice;
00105         ASSERT_HOST(best_choice != NULL);
00106         mean_certainty += best_choice->certainty();
00107         ++certainty_count;
00108         res_it.forward();
00109       } while (res_it.block() == res_it.prev_block());
00110       break;
00111     case RIL_PARA:
00112       do {
00113         best_choice = res_it.word()->best_choice;
00114         ASSERT_HOST(best_choice != NULL);
00115         mean_certainty += best_choice->certainty();
00116         ++certainty_count;
00117         res_it.forward();
00118       } while (res_it.block() == res_it.prev_block() &&
00119                res_it.row()->row->para() == res_it.prev_row()->row->para());
00120       break;
00121     case RIL_TEXTLINE:
00122       do {
00123         best_choice = res_it.word()->best_choice;
00124         ASSERT_HOST(best_choice != NULL);
00125         mean_certainty += best_choice->certainty();
00126         ++certainty_count;
00127         res_it.forward();
00128       } while (res_it.row() == res_it.prev_row());
00129       break;
00130     case RIL_WORD:
00131       mean_certainty += best_choice->certainty();
00132      ++certainty_count;
00133       break;
00134     case RIL_SYMBOL:
00135       BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices();
00136       if (choices != NULL) {
00137         BLOB_CHOICE_LIST_C_IT blob_choices_it(choices);
00138         for (int blob = 0; blob < blob_index_; ++blob)
00139           blob_choices_it.forward();
00140         BLOB_CHOICE_IT choice_it(blob_choices_it.data());
00141         for (choice_it.mark_cycle_pt();
00142              !choice_it.cycled_list();
00143              choice_it.forward()) {
00144           if (choice_it.data()->unichar_id() ==
00145               best_choice->unichar_id(blob_index_))
00146             break;
00147         }
00148         mean_certainty += choice_it.data()->certainty();
00149       } else {
00150         mean_certainty += best_choice->certainty();
00151       }
00152       ++certainty_count;
00153   }
00154   if (certainty_count > 0) {
00155     mean_certainty /= certainty_count;
00156     float confidence = 100 + 5 * mean_certainty;
00157     if (confidence < 0.0f) confidence = 0.0f;
00158     if (confidence > 100.0f) confidence = 100.0f;
00159     return confidence;
00160   }
00161   return 0.0f;
00162 }
00163 
00164 // Returns the font attributes of the current word. If iterating at a higher
00165 // level object than words, eg textlines, then this will return the
00166 // attributes of the first word in that textline.
00167 // The actual return value is a string representing a font name. It points
00168 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
00169 // the iterator itself, ie rendered invalid by various members of
00170 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
00171 // Pointsize is returned in printers points (1/72 inch.)
00172 const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
00173                                                   bool* is_italic,
00174                                                   bool* is_underlined,
00175                                                   bool* is_monospace,
00176                                                   bool* is_serif,
00177                                                   bool* is_smallcaps,
00178                                                   int* pointsize,
00179                                                   int* font_id) const {
00180   if (it_->word() == NULL) return NULL;  // Already at the end!
00181   if (it_->word()->fontinfo == NULL) {
00182     *font_id = -1;
00183     return NULL;  // No font information.
00184   }
00185   const FontInfo& font_info = *it_->word()->fontinfo;
00186   *font_id = font_info.universal_id;
00187   *is_bold = font_info.is_bold();
00188   *is_italic = font_info.is_italic();
00189   *is_underlined = false;  // TODO(rays) fix this!
00190   *is_monospace = font_info.is_fixed_pitch();
00191   *is_serif = font_info.is_serif();
00192   *is_smallcaps = it_->word()->small_caps;
00193   float row_height = it_->row()->row->x_height() +
00194       it_->row()->row->ascenders() - it_->row()->row->descenders();
00195   // Convert from pixels to printers points.
00196   *pointsize = scaled_yres_ > 0
00197       ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
00198       : 0;
00199 
00200   return font_info.name;
00201 }
00202 
00203 // Returns the name of the language used to recognize this word.
00204 const char* LTRResultIterator::WordRecognitionLanguage() const {
00205   if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
00206   return it_->word()->tesseract->lang.string();
00207 }
00208 
00209 // Return the overall directionality of this word.
00210 StrongScriptDirection LTRResultIterator::WordDirection() const {
00211   if (it_->word() == NULL) return DIR_NEUTRAL;
00212   bool has_rtl = it_->word()->AnyRtlCharsInWord();
00213   bool has_ltr = it_->word()->AnyLtrCharsInWord();
00214   if (has_rtl && !has_ltr)
00215     return DIR_RIGHT_TO_LEFT;
00216   if (has_ltr && !has_rtl)
00217     return DIR_LEFT_TO_RIGHT;
00218   if (!has_ltr && !has_rtl)
00219     return DIR_NEUTRAL;
00220   return DIR_MIX;
00221 }
00222 
00223 // Returns true if the current word was found in a dictionary.
00224 bool LTRResultIterator::WordIsFromDictionary() const {
00225   if (it_->word() == NULL) return false;  // Already at the end!
00226   int permuter = it_->word()->best_choice->permuter();
00227   return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
00228          permuter == USER_DAWG_PERM;
00229 }
00230 
00231 // Returns true if the current word is numeric.
00232 bool LTRResultIterator::WordIsNumeric() const {
00233   if (it_->word() == NULL) return false;  // Already at the end!
00234   int permuter = it_->word()->best_choice->permuter();
00235   return permuter == NUMBER_PERM;
00236 }
00237 
00238 // Returns true if the word contains blamer information.
00239 bool LTRResultIterator::HasBlamerInfo() const {
00240   return (it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
00241            (it_->word()->blamer_bundle->debug.length() > 0 ||
00242             it_->word()->blamer_bundle->misadaption_debug.length() > 0));
00243 }
00244 
00245 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
00246 // of the current word.
00247 void *LTRResultIterator::GetParamsTrainingBundle() const {
00248   return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
00249       &(it_->word()->blamer_bundle->params_training_bundle) : NULL;
00250 }
00251 
00252 // Returns the pointer to the string with blamer information for this word.
00253 // Assumes that the word's blamer_bundle is not NULL.
00254 const char *LTRResultIterator::GetBlamerDebug() const {
00255   return it_->word()->blamer_bundle->debug.string();
00256 }
00257 
00258 // Returns the pointer to the string with misadaption information for this word.
00259 // Assumes that the word's blamer_bundle is not NULL.
00260 const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
00261   return it_->word()->blamer_bundle->misadaption_debug.string();
00262 }
00263 
00264 // Returns the null terminated UTF-8 encoded truth string for the current word.
00265 // Use delete [] to free after use.
00266 char* LTRResultIterator::WordTruthUTF8Text() const {
00267   if (it_->word() == NULL) return NULL;  // Already at the end!
00268   if (it_->word()->blamer_bundle == NULL ||
00269       it_->word()->blamer_bundle->incorrect_result_reason == IRR_NO_TRUTH) {
00270     return NULL;  // no truth information for this word
00271   }
00272   const GenericVector<STRING> &truth_vec =
00273       it_->word()->blamer_bundle->truth_text;
00274   STRING truth_text;
00275   for (int i = 0; i < truth_vec.size(); ++i) truth_text += truth_vec[i];
00276   int length = truth_text.length() + 1;
00277   char* result = new char[length];
00278   strncpy(result, truth_text.string(), length);
00279   return result;
00280 }
00281 
00282 // Returns a pointer to serialized choice lattice.
00283 // Fills lattice_size with the number of bytes in lattice data.
00284 const char *LTRResultIterator::WordLattice(int *lattice_size) const {
00285   if (it_->word() == NULL) return NULL;  // Already at the end!
00286   if (it_->word()->blamer_bundle == NULL) return NULL;
00287   *lattice_size = it_->word()->blamer_bundle->lattice_size;
00288   return it_->word()->blamer_bundle->lattice_data;
00289 }
00290 
00291 // Returns true if the current symbol is a superscript.
00292 // If iterating at a higher level object than symbols, eg words, then
00293 // this will return the attributes of the first symbol in that word.
00294 bool LTRResultIterator::SymbolIsSuperscript() const {
00295   if (cblob_it_ == NULL && it_->word() != NULL)
00296     return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUPERSCRIPT;
00297   return false;
00298 }
00299 
00300 // Returns true if the current symbol is a subscript.
00301 // If iterating at a higher level object than symbols, eg words, then
00302 // this will return the attributes of the first symbol in that word.
00303 bool LTRResultIterator::SymbolIsSubscript() const {
00304   if (cblob_it_ == NULL && it_->word() != NULL)
00305     return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUBSCRIPT;
00306   return false;
00307 }
00308 
00309 // Returns true if the current symbol is a dropcap.
00310 // If iterating at a higher level object than symbols, eg words, then
00311 // this will return the attributes of the first symbol in that word.
00312 bool LTRResultIterator::SymbolIsDropcap() const {
00313   if (cblob_it_ == NULL && it_->word() != NULL)
00314     return it_->word()->box_word->BlobPosition(blob_index_) == SP_DROPCAP;
00315   return false;
00316 }
00317 
00318 ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
00319   ASSERT_HOST(result_it.it_->word() != NULL);
00320   word_res_ = result_it.it_->word();
00321   PAGE_RES_IT res_it(*result_it.it_);
00322   WERD_CHOICE* best_choice = word_res_->best_choice;
00323   BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices();
00324   if (choices != NULL) {
00325     BLOB_CHOICE_LIST_C_IT blob_choices_it(choices);
00326     for (int blob = 0; blob < result_it.blob_index_; ++blob)
00327       blob_choices_it.forward();
00328     choice_it_ = new BLOB_CHOICE_IT(blob_choices_it.data());
00329     choice_it_->mark_cycle_pt();
00330   } else {
00331     choice_it_ = NULL;
00332   }
00333 }
00334 
00335 ChoiceIterator::~ChoiceIterator() {
00336   delete choice_it_;
00337 }
00338 
00339 // Moves to the next choice for the symbol and returns false if there
00340 // are none left.
00341 bool ChoiceIterator::Next() {
00342   if (choice_it_ == NULL)
00343     return false;
00344   choice_it_->forward();
00345   return !choice_it_->cycled_list();
00346 }
00347 
00348 // Returns the null terminated UTF-8 encoded text string for the current
00349 // choice. Use delete [] to free after use.
00350 const char* ChoiceIterator::GetUTF8Text() const {
00351   if (choice_it_ == NULL)
00352     return NULL;
00353   UNICHAR_ID id = choice_it_->data()->unichar_id();
00354   return word_res_->BestUTF8(id, false);
00355 }
00356 
00357 // Returns the confidence of the current choice.
00358 // The number should be interpreted as a percent probability. (0.0f-100.0f)
00359 float ChoiceIterator::Confidence() const {
00360   if (choice_it_ == NULL)
00361     return 0.0f;
00362   float confidence = 100 + 5 * choice_it_->data()->certainty();
00363   if (confidence < 0.0f) confidence = 0.0f;
00364   if (confidence > 100.0f) confidence = 100.0f;
00365   return confidence;
00366 }
00367 
00368 
00369 }  // namespace tesseract.