Tesseract  3.02
tesseract-ocr/ccmain/pageiterator.cpp
Go to the documentation of this file.
00001 
00002 // File:        pageiterator.cpp
00003 // Description: Iterator for tesseract page structure that avoids using
00004 //              tesseract internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 14:32:09 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "pageiterator.h"
00022 #include "allheaders.h"
00023 #include "helpers.h"
00024 #include "pageres.h"
00025 #include "tesseractclass.h"
00026 
00027 namespace tesseract {
00028 
00029 PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
00030                            int scale, int scaled_yres,
00031                            int rect_left, int rect_top,
00032                            int rect_width, int rect_height)
00033   : page_res_(page_res), tesseract_(tesseract),
00034     word_(NULL), word_length_(0), blob_index_(0), cblob_it_(NULL),
00035     scale_(scale), scaled_yres_(scaled_yres),
00036     rect_left_(rect_left), rect_top_(rect_top),
00037     rect_width_(rect_width), rect_height_(rect_height) {
00038   it_ = new PAGE_RES_IT(page_res);
00039   PageIterator::Begin();
00040 }
00041 
00042 PageIterator::~PageIterator() {
00043   delete it_;
00044   delete cblob_it_;
00045 }
00046 
00052 PageIterator::PageIterator(const PageIterator& src)
00053   : page_res_(src.page_res_), tesseract_(src.tesseract_),
00054     word_(NULL), word_length_(src.word_length_),
00055     blob_index_(src.blob_index_), cblob_it_(NULL),
00056     scale_(src.scale_), scaled_yres_(src.scaled_yres_),
00057     rect_left_(src.rect_left_), rect_top_(src.rect_top_),
00058     rect_width_(src.rect_width_), rect_height_(src.rect_height_) {
00059   it_ = new PAGE_RES_IT(*src.it_);
00060   BeginWord(src.blob_index_);
00061 }
00062 
00063 const PageIterator& PageIterator::operator=(const PageIterator& src) {
00064   page_res_ = src.page_res_;
00065   tesseract_ = src.tesseract_;
00066   scale_ = src.scale_;
00067   scaled_yres_ = src.scaled_yres_;
00068   rect_left_ = src.rect_left_;
00069   rect_top_ = src.rect_top_;
00070   rect_width_ = src.rect_width_;
00071   rect_height_ = src.rect_height_;
00072   if (it_ != NULL) delete it_;
00073   it_ = new PAGE_RES_IT(*src.it_);
00074   BeginWord(src.blob_index_);
00075   return *this;
00076 }
00077 
00078 bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const {
00079   return (it_ == NULL && it_ == other) ||
00080      ((other != NULL) && (it_ != NULL) && (*it_ == *other));
00081 }
00082 
00083 // ============= Moving around within the page ============.
00084 
00086 void PageIterator::Begin() {
00087   it_->restart_page_with_empties();
00088   BeginWord(0);
00089 }
00090 
00091 void PageIterator::RestartParagraph() {
00092   if (it_->block() == NULL) return; // At end of the document.
00093   PAGE_RES_IT para(page_res_);
00094   PAGE_RES_IT next_para(para);
00095   next_para.forward_paragraph();
00096   while (next_para.cmp(*it_) <= 0) {
00097     para = next_para;
00098     next_para.forward_paragraph();
00099   }
00100   *it_ = para;
00101   BeginWord(0);
00102 }
00103 
00104 bool PageIterator::IsWithinFirstTextlineOfParagraph() const {
00105   PageIterator p_start(*this);
00106   p_start.RestartParagraph();
00107   return p_start.it_->row() == it_->row();
00108 }
00109 
00110 void PageIterator::RestartRow() {
00111   it_->restart_row();
00112   BeginWord(0);
00113 }
00114 
00128 bool PageIterator::Next(PageIteratorLevel level) {
00129   if (it_->block() == NULL) return false;  // Already at the end!
00130   if (it_->word() == NULL)
00131     level = RIL_BLOCK;
00132 
00133   switch (level) {
00134     case RIL_BLOCK:
00135       it_->forward_block();
00136       break;
00137     case RIL_PARA:
00138       it_->forward_paragraph();
00139       break;
00140     case RIL_TEXTLINE:
00141       for (it_->forward_with_empties(); it_->row() == it_->prev_row();
00142            it_->forward_with_empties());
00143       break;
00144     case RIL_WORD:
00145       it_->forward_with_empties();
00146       break;
00147     case RIL_SYMBOL:
00148       if (cblob_it_ != NULL)
00149         cblob_it_->forward();
00150       ++blob_index_;
00151       if (blob_index_ >= word_length_)
00152         it_->forward_with_empties();
00153       else
00154         return true;
00155       break;
00156   }
00157   BeginWord(0);
00158   return it_->block() != NULL;
00159 }
00160 
00166 bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
00167   if (it_->block() == NULL) return false;  // Already at the end!
00168   if (it_->word() == NULL) return true;  // In an image block.
00169   switch (level) {
00170     case RIL_BLOCK:
00171       return blob_index_ == 0 && it_->block() != it_->prev_block();
00172     case RIL_PARA:
00173       return blob_index_ == 0 &&
00174           (it_->block() != it_->prev_block() ||
00175            it_->row()->row->para() != it_->prev_row()->row->para());
00176     case RIL_TEXTLINE:
00177       return blob_index_ == 0 && it_->row() != it_->prev_row();
00178     case RIL_WORD:
00179       return blob_index_ == 0;
00180     case RIL_SYMBOL:
00181       return true;
00182   }
00183   return false;
00184 }
00185 
00190 bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
00191                                     PageIteratorLevel element) const {
00192   if (Empty(element)) return true;  // Already at the end!
00193   // The result is true if we step forward by element and find we are
00194   // at the the end of the page or at beginning of *all* levels in:
00195   // [level, element).
00196   // When there is more than one level difference between element and level,
00197   // we could for instance move forward one symbol and still be at the first
00198   // word on a line, so we also have to be at the first symbol in a word.
00199   PageIterator next(*this);
00200   next.Next(element);
00201   if (next.Empty(element)) return true;  // Reached the end of the page.
00202   while (element > level) {
00203     element = static_cast<PageIteratorLevel>(element - 1);
00204     if (!next.IsAtBeginningOf(element))
00205       return false;
00206   }
00207   return true;
00208 }
00209 
00216 int PageIterator::Cmp(const PageIterator &other) const {
00217   int word_cmp = it_->cmp(*other.it_);
00218   if (word_cmp != 0)
00219     return word_cmp;
00220   if (blob_index_ < other.blob_index_)
00221     return -1;
00222   if (blob_index_ == other.blob_index_)
00223     return 0;
00224   return 1;
00225 }
00226 
00227 // ============= Accessing data ==============.
00228 // Coordinate system:
00229 // Integer coordinates are at the cracks between the pixels.
00230 // The top-left corner of the top-left pixel in the image is at (0,0).
00231 // The bottom-right corner of the bottom-right pixel in the image is at
00232 // (width, height).
00233 // Every bounding box goes from the top-left of the top-left contained
00234 // pixel to the bottom-right of the bottom-right contained pixel, so
00235 // the bounding box of the single top-left pixel in the image is:
00236 // (0,0)->(1,1).
00237 // If an image rectangle has been set in the API, then returned coordinates
00238 // relate to the original (full) image, rather than the rectangle.
00239 
00246 bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
00247                                        int* left, int* top,
00248                                        int* right, int* bottom) const {
00249   if (Empty(level))
00250     return false;
00251   TBOX box;
00252   PARA *para = NULL;
00253   switch (level) {
00254     case RIL_BLOCK:
00255       box = it_->block()->block->bounding_box();
00256       break;
00257     case RIL_PARA:
00258       para = it_->row()->row->para();
00259       // explicit fall-through.
00260     case RIL_TEXTLINE:
00261       box = it_->row()->row->bounding_box();
00262       break;
00263     case RIL_WORD:
00264       box = it_->word()->word->bounding_box();
00265       break;
00266     case RIL_SYMBOL:
00267       if (cblob_it_ == NULL)
00268         box = it_->word()->box_word->BlobBox(blob_index_);
00269       else
00270         box = cblob_it_->data()->bounding_box();
00271   }
00272   if (level == RIL_PARA) {
00273     PageIterator other = *this;
00274     other.Begin();
00275     do {
00276       if (other.it_->block() &&
00277           other.it_->block()->block == it_->block()->block &&
00278           other.it_->row() && other.it_->row()->row &&
00279           other.it_->row()->row->para() == para) {
00280         box = box.bounding_union(other.it_->row()->row->bounding_box());
00281       }
00282     } while (other.Next(RIL_TEXTLINE));
00283   }
00284   if (level != RIL_SYMBOL || cblob_it_ != NULL)
00285     box.rotate(it_->block()->block->re_rotation());
00286   // Now we have a box in tesseract coordinates relative to the image rectangle,
00287   // we have to convert the coords to a top-down system.
00288   const int pix_height = pixGetHeight(tesseract_->pix_binary());
00289   const int pix_width = pixGetWidth(tesseract_->pix_binary());
00290   *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
00291   *top = ClipToRange(pix_height - box.top(), 0, pix_height);
00292   *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
00293   *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
00294   return true;
00295 }
00296 
00303 bool PageIterator::BoundingBox(PageIteratorLevel level,
00304                                int* left, int* top,
00305                                int* right, int* bottom) const {
00306   if (!BoundingBoxInternal(level, left, top, right, bottom))
00307     return false;
00308   // Convert to the coordinate system of the original image.
00309   *left = ClipToRange(*left / scale_ + rect_left_,
00310                       rect_left_, rect_left_ + rect_width_);
00311   *top = ClipToRange(*top / scale_ + rect_top_,
00312                      rect_top_, rect_top_ + rect_height_);
00313   *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_,
00314                        *left, rect_left_ + rect_width_);
00315   *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_,
00316                         *top, rect_top_ + rect_height_);
00317   return true;
00318 }
00319 
00321 bool PageIterator::Empty(PageIteratorLevel level) const {
00322   if (it_->block() == NULL) return true;  // Already at the end!
00323   if (it_->word() == NULL && level != RIL_BLOCK) return true;  // image block
00324   if (level == RIL_SYMBOL && blob_index_ >= word_length_)
00325     return true;  // Zero length word, or already at the end of it.
00326   return false;
00327 }
00328 
00330 PolyBlockType PageIterator::BlockType() const {
00331   if (it_->block() == NULL || it_->block()->block == NULL)
00332     return PT_UNKNOWN;  // Already at the end!
00333   if (it_->block()->block->poly_block() == NULL)
00334     return PT_FLOWING_TEXT;  // No layout analysis used - assume text.
00335   return it_->block()->block->poly_block()->isA();
00336 }
00337 
00360 Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
00361   int left, top, right, bottom;
00362   if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
00363     return NULL;
00364   Pix* pix = NULL;
00365   switch (level) {
00366     case RIL_BLOCK:
00367     case RIL_PARA:
00368       int bleft, btop, bright, bbottom;
00369       BoundingBoxInternal(RIL_BLOCK, &bleft, &btop, &bright, &bbottom);
00370       pix = it_->block()->block->render_mask();
00371       // AND the mask and the image.
00372       pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix),
00373                   PIX_SRC & PIX_DST, tesseract_->pix_binary(),
00374                   bleft, btop);
00375       if (level == RIL_PARA) {
00376         // RIL_PARA needs further attention:
00377         //   clip the paragraph from the block mask.
00378         Box* box = boxCreate(left - bleft, top - btop,
00379                              right - left, bottom - top);
00380         Pix* pix2 = pixClipRectangle(pix, box, NULL);
00381         boxDestroy(&box);
00382         pixDestroy(&pix);
00383         pix = pix2;
00384       }
00385       break;
00386     case RIL_TEXTLINE:
00387     case RIL_WORD:
00388     case RIL_SYMBOL:
00389       if (level == RIL_SYMBOL && cblob_it_ != NULL &&
00390           cblob_it_->data()->area() != 0)
00391         return cblob_it_->data()->render();
00392       // Just clip from the bounding box.
00393       Box* box = boxCreate(left, top, right - left, bottom - top);
00394       pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
00395       boxDestroy(&box);
00396       break;
00397   }
00398   return pix;
00399 }
00400 
00411 Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
00412                             int* left, int* top) const {
00413   int right, bottom;
00414   if (!BoundingBox(level, left, top, &right, &bottom))
00415     return NULL;
00416   Pix* pix = tesseract_->pix_grey();
00417   if (pix == NULL)
00418     return GetBinaryImage(level);
00419 
00420   // Expand the box.
00421   *left = MAX(*left - padding, 0);
00422   *top = MAX(*top - padding, 0);
00423   right = MIN(right + padding, rect_width_);
00424   bottom = MIN(bottom + padding, rect_height_);
00425   Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
00426   Pix* grey_pix = pixClipRectangle(pix, box, NULL);
00427   boxDestroy(&box);
00428   if (level == RIL_BLOCK) {
00429     Pix* mask = it_->block()->block->render_mask();
00430     Pix* expanded_mask = pixCreate(right - *left, bottom - *top, 1);
00431     pixRasterop(expanded_mask, padding, padding,
00432                 pixGetWidth(mask), pixGetHeight(mask),
00433                 PIX_SRC, mask, 0, 0);
00434     pixDestroy(&mask);
00435     pixDilateBrick(expanded_mask, expanded_mask, 2*padding + 1, 2*padding + 1);
00436     pixInvert(expanded_mask, expanded_mask);
00437     pixSetMasked(grey_pix, expanded_mask, 255);
00438     pixDestroy(&expanded_mask);
00439   }
00440   return grey_pix;
00441 }
00442 
00448 bool PageIterator::Baseline(PageIteratorLevel level,
00449                             int* x1, int* y1, int* x2, int* y2) const {
00450   if (it_->word() == NULL) return false;  // Already at the end!
00451   ROW* row = it_->row()->row;
00452   WERD* word = it_->word()->word;
00453   TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
00454            ? word->bounding_box()
00455            : row->bounding_box();
00456   int left = box.left();
00457   ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
00458   int right = box.right();
00459   ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
00460   // Rotate to image coordinates and convert to global image coords.
00461   startpt.rotate(it_->block()->block->re_rotation());
00462   endpt.rotate(it_->block()->block->re_rotation());
00463   *x1 = startpt.x() / scale_ + rect_left_;
00464   *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
00465   *x2 = endpt.x() / scale_ + rect_left_;
00466   *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
00467   return true;
00468 }
00469 
00470 void PageIterator::Orientation(tesseract::Orientation *orientation,
00471                                tesseract::WritingDirection *writing_direction,
00472                                tesseract::TextlineOrder *textline_order,
00473                                float *deskew_angle) const {
00474   BLOCK* block = it_->block()->block;
00475 
00476   // Orientation
00477   FCOORD up_in_image(0.0, 1.0);
00478   up_in_image.unrotate(block->classify_rotation());
00479   up_in_image.rotate(block->re_rotation());
00480 
00481   if (up_in_image.x() == 0.0F) {
00482     if (up_in_image.y() > 0.0F) {
00483       *orientation = ORIENTATION_PAGE_UP;
00484     } else {
00485       *orientation = ORIENTATION_PAGE_DOWN;
00486     }
00487   } else if (up_in_image.x() > 0.0F) {
00488     *orientation = ORIENTATION_PAGE_RIGHT;
00489   } else {
00490     *orientation = ORIENTATION_PAGE_LEFT;
00491   }
00492 
00493   // Writing direction
00494   bool is_vertical_text = (block->classify_rotation().x() == 0.0);
00495   bool right_to_left = block->right_to_left();
00496   *writing_direction =
00497       is_vertical_text
00498           ? WRITING_DIRECTION_TOP_TO_BOTTOM
00499           : (right_to_left
00500                 ? WRITING_DIRECTION_RIGHT_TO_LEFT
00501                 : WRITING_DIRECTION_LEFT_TO_RIGHT);
00502 
00503   // Textline Order
00504   bool is_mongolian = false;  // TODO(eger): fix me
00505   *textline_order = is_vertical_text
00506       ? (is_mongolian
00507          ? TEXTLINE_ORDER_LEFT_TO_RIGHT
00508          : TEXTLINE_ORDER_RIGHT_TO_LEFT)
00509       : TEXTLINE_ORDER_TOP_TO_BOTTOM;
00510 
00511   // Deskew angle
00512   FCOORD skew = block->skew();  // true horizontal for textlines
00513   *deskew_angle = -skew.angle();
00514 }
00515 
00516 void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
00517                                  bool *is_list_item,
00518                                  bool *is_crown,
00519                                  int *first_line_indent) const {
00520   *just = tesseract::JUSTIFICATION_UNKNOWN;
00521   if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
00522       !it_->row()->row->para()->model)
00523     return;
00524 
00525   PARA *para = it_->row()->row->para();
00526   *is_list_item = para->is_list_item;
00527   *is_crown = para->is_very_first_or_continuation;
00528   *first_line_indent = para->model->first_indent() -
00529       para->model->body_indent();
00530 }
00531 
00536 void PageIterator::BeginWord(int offset) {
00537   WERD_RES* word_res = it_->word();
00538   if (word_res == NULL) {
00539     // This is a non-text block, so there is no word.
00540     word_length_ = 0;
00541     blob_index_ = 0;
00542     word_ = NULL;
00543     return;
00544   }
00545   if (word_res->best_choice != NULL) {
00546     // Recognition has been done, so we are using the box_word, which
00547     // is already baseline denormalized.
00548     word_length_ = word_res->best_choice->length();
00549     ASSERT_HOST(word_res->box_word != NULL);
00550     if (word_res->box_word->length() != word_length_) {
00551       tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
00552               word_length_, word_res->best_choice->unichar_string().string(),
00553               word_res->box_word->length());
00554       word_res->box_word->bounding_box().print();
00555     }
00556     ASSERT_HOST(word_res->box_word->length() == word_length_);
00557     word_ = NULL;
00558     // We will be iterating the box_word.
00559     if (cblob_it_ != NULL) {
00560       delete cblob_it_;
00561       cblob_it_ = NULL;
00562     }
00563   } else {
00564     // No recognition yet, so a "symbol" is a cblob.
00565     word_ = word_res->word;
00566     ASSERT_HOST(word_->cblob_list() != NULL);
00567     word_length_ = word_->cblob_list()->length();
00568     if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT;
00569     cblob_it_->set_to_list(word_->cblob_list());
00570   }
00571   for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
00572     if (cblob_it_ != NULL)
00573       cblob_it_->forward();
00574   }
00575 }
00576 
00577 }  // namespace tesseract.