Tesseract
3.02
|
00001 00002 // File: pageiterator.cpp 00003 // Description: Iterator for tesseract page structure that avoids using 00004 // tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 14:32:09 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "pageiterator.h" 00022 #include "allheaders.h" 00023 #include "helpers.h" 00024 #include "pageres.h" 00025 #include "tesseractclass.h" 00026 00027 namespace tesseract { 00028 00029 PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, 00030 int scale, int scaled_yres, 00031 int rect_left, int rect_top, 00032 int rect_width, int rect_height) 00033 : page_res_(page_res), tesseract_(tesseract), 00034 word_(NULL), word_length_(0), blob_index_(0), cblob_it_(NULL), 00035 scale_(scale), scaled_yres_(scaled_yres), 00036 rect_left_(rect_left), rect_top_(rect_top), 00037 rect_width_(rect_width), rect_height_(rect_height) { 00038 it_ = new PAGE_RES_IT(page_res); 00039 PageIterator::Begin(); 00040 } 00041 00042 PageIterator::~PageIterator() { 00043 delete it_; 00044 delete cblob_it_; 00045 } 00046 00052 PageIterator::PageIterator(const PageIterator& src) 00053 : page_res_(src.page_res_), tesseract_(src.tesseract_), 00054 word_(NULL), word_length_(src.word_length_), 00055 blob_index_(src.blob_index_), cblob_it_(NULL), 00056 scale_(src.scale_), scaled_yres_(src.scaled_yres_), 00057 rect_left_(src.rect_left_), rect_top_(src.rect_top_), 00058 rect_width_(src.rect_width_), rect_height_(src.rect_height_) { 00059 it_ = new PAGE_RES_IT(*src.it_); 00060 BeginWord(src.blob_index_); 00061 } 00062 00063 const PageIterator& PageIterator::operator=(const PageIterator& src) { 00064 page_res_ = src.page_res_; 00065 tesseract_ = src.tesseract_; 00066 scale_ = src.scale_; 00067 scaled_yres_ = src.scaled_yres_; 00068 rect_left_ = src.rect_left_; 00069 rect_top_ = src.rect_top_; 00070 rect_width_ = src.rect_width_; 00071 rect_height_ = src.rect_height_; 00072 if (it_ != NULL) delete it_; 00073 it_ = new PAGE_RES_IT(*src.it_); 00074 BeginWord(src.blob_index_); 00075 return *this; 00076 } 00077 00078 bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const { 00079 return (it_ == NULL && it_ == other) || 00080 ((other != NULL) && (it_ != NULL) && (*it_ == *other)); 00081 } 00082 00083 // ============= Moving around within the page ============. 00084 00086 void PageIterator::Begin() { 00087 it_->restart_page_with_empties(); 00088 BeginWord(0); 00089 } 00090 00091 void PageIterator::RestartParagraph() { 00092 if (it_->block() == NULL) return; // At end of the document. 00093 PAGE_RES_IT para(page_res_); 00094 PAGE_RES_IT next_para(para); 00095 next_para.forward_paragraph(); 00096 while (next_para.cmp(*it_) <= 0) { 00097 para = next_para; 00098 next_para.forward_paragraph(); 00099 } 00100 *it_ = para; 00101 BeginWord(0); 00102 } 00103 00104 bool PageIterator::IsWithinFirstTextlineOfParagraph() const { 00105 PageIterator p_start(*this); 00106 p_start.RestartParagraph(); 00107 return p_start.it_->row() == it_->row(); 00108 } 00109 00110 void PageIterator::RestartRow() { 00111 it_->restart_row(); 00112 BeginWord(0); 00113 } 00114 00128 bool PageIterator::Next(PageIteratorLevel level) { 00129 if (it_->block() == NULL) return false; // Already at the end! 00130 if (it_->word() == NULL) 00131 level = RIL_BLOCK; 00132 00133 switch (level) { 00134 case RIL_BLOCK: 00135 it_->forward_block(); 00136 break; 00137 case RIL_PARA: 00138 it_->forward_paragraph(); 00139 break; 00140 case RIL_TEXTLINE: 00141 for (it_->forward_with_empties(); it_->row() == it_->prev_row(); 00142 it_->forward_with_empties()); 00143 break; 00144 case RIL_WORD: 00145 it_->forward_with_empties(); 00146 break; 00147 case RIL_SYMBOL: 00148 if (cblob_it_ != NULL) 00149 cblob_it_->forward(); 00150 ++blob_index_; 00151 if (blob_index_ >= word_length_) 00152 it_->forward_with_empties(); 00153 else 00154 return true; 00155 break; 00156 } 00157 BeginWord(0); 00158 return it_->block() != NULL; 00159 } 00160 00166 bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const { 00167 if (it_->block() == NULL) return false; // Already at the end! 00168 if (it_->word() == NULL) return true; // In an image block. 00169 switch (level) { 00170 case RIL_BLOCK: 00171 return blob_index_ == 0 && it_->block() != it_->prev_block(); 00172 case RIL_PARA: 00173 return blob_index_ == 0 && 00174 (it_->block() != it_->prev_block() || 00175 it_->row()->row->para() != it_->prev_row()->row->para()); 00176 case RIL_TEXTLINE: 00177 return blob_index_ == 0 && it_->row() != it_->prev_row(); 00178 case RIL_WORD: 00179 return blob_index_ == 0; 00180 case RIL_SYMBOL: 00181 return true; 00182 } 00183 return false; 00184 } 00185 00190 bool PageIterator::IsAtFinalElement(PageIteratorLevel level, 00191 PageIteratorLevel element) const { 00192 if (Empty(element)) return true; // Already at the end! 00193 // The result is true if we step forward by element and find we are 00194 // at the the end of the page or at beginning of *all* levels in: 00195 // [level, element). 00196 // When there is more than one level difference between element and level, 00197 // we could for instance move forward one symbol and still be at the first 00198 // word on a line, so we also have to be at the first symbol in a word. 00199 PageIterator next(*this); 00200 next.Next(element); 00201 if (next.Empty(element)) return true; // Reached the end of the page. 00202 while (element > level) { 00203 element = static_cast<PageIteratorLevel>(element - 1); 00204 if (!next.IsAtBeginningOf(element)) 00205 return false; 00206 } 00207 return true; 00208 } 00209 00216 int PageIterator::Cmp(const PageIterator &other) const { 00217 int word_cmp = it_->cmp(*other.it_); 00218 if (word_cmp != 0) 00219 return word_cmp; 00220 if (blob_index_ < other.blob_index_) 00221 return -1; 00222 if (blob_index_ == other.blob_index_) 00223 return 0; 00224 return 1; 00225 } 00226 00227 // ============= Accessing data ==============. 00228 // Coordinate system: 00229 // Integer coordinates are at the cracks between the pixels. 00230 // The top-left corner of the top-left pixel in the image is at (0,0). 00231 // The bottom-right corner of the bottom-right pixel in the image is at 00232 // (width, height). 00233 // Every bounding box goes from the top-left of the top-left contained 00234 // pixel to the bottom-right of the bottom-right contained pixel, so 00235 // the bounding box of the single top-left pixel in the image is: 00236 // (0,0)->(1,1). 00237 // If an image rectangle has been set in the API, then returned coordinates 00238 // relate to the original (full) image, rather than the rectangle. 00239 00246 bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, 00247 int* left, int* top, 00248 int* right, int* bottom) const { 00249 if (Empty(level)) 00250 return false; 00251 TBOX box; 00252 PARA *para = NULL; 00253 switch (level) { 00254 case RIL_BLOCK: 00255 box = it_->block()->block->bounding_box(); 00256 break; 00257 case RIL_PARA: 00258 para = it_->row()->row->para(); 00259 // explicit fall-through. 00260 case RIL_TEXTLINE: 00261 box = it_->row()->row->bounding_box(); 00262 break; 00263 case RIL_WORD: 00264 box = it_->word()->word->bounding_box(); 00265 break; 00266 case RIL_SYMBOL: 00267 if (cblob_it_ == NULL) 00268 box = it_->word()->box_word->BlobBox(blob_index_); 00269 else 00270 box = cblob_it_->data()->bounding_box(); 00271 } 00272 if (level == RIL_PARA) { 00273 PageIterator other = *this; 00274 other.Begin(); 00275 do { 00276 if (other.it_->block() && 00277 other.it_->block()->block == it_->block()->block && 00278 other.it_->row() && other.it_->row()->row && 00279 other.it_->row()->row->para() == para) { 00280 box = box.bounding_union(other.it_->row()->row->bounding_box()); 00281 } 00282 } while (other.Next(RIL_TEXTLINE)); 00283 } 00284 if (level != RIL_SYMBOL || cblob_it_ != NULL) 00285 box.rotate(it_->block()->block->re_rotation()); 00286 // Now we have a box in tesseract coordinates relative to the image rectangle, 00287 // we have to convert the coords to a top-down system. 00288 const int pix_height = pixGetHeight(tesseract_->pix_binary()); 00289 const int pix_width = pixGetWidth(tesseract_->pix_binary()); 00290 *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width); 00291 *top = ClipToRange(pix_height - box.top(), 0, pix_height); 00292 *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width); 00293 *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); 00294 return true; 00295 } 00296 00303 bool PageIterator::BoundingBox(PageIteratorLevel level, 00304 int* left, int* top, 00305 int* right, int* bottom) const { 00306 if (!BoundingBoxInternal(level, left, top, right, bottom)) 00307 return false; 00308 // Convert to the coordinate system of the original image. 00309 *left = ClipToRange(*left / scale_ + rect_left_, 00310 rect_left_, rect_left_ + rect_width_); 00311 *top = ClipToRange(*top / scale_ + rect_top_, 00312 rect_top_, rect_top_ + rect_height_); 00313 *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_, 00314 *left, rect_left_ + rect_width_); 00315 *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_, 00316 *top, rect_top_ + rect_height_); 00317 return true; 00318 } 00319 00321 bool PageIterator::Empty(PageIteratorLevel level) const { 00322 if (it_->block() == NULL) return true; // Already at the end! 00323 if (it_->word() == NULL && level != RIL_BLOCK) return true; // image block 00324 if (level == RIL_SYMBOL && blob_index_ >= word_length_) 00325 return true; // Zero length word, or already at the end of it. 00326 return false; 00327 } 00328 00330 PolyBlockType PageIterator::BlockType() const { 00331 if (it_->block() == NULL || it_->block()->block == NULL) 00332 return PT_UNKNOWN; // Already at the end! 00333 if (it_->block()->block->poly_block() == NULL) 00334 return PT_FLOWING_TEXT; // No layout analysis used - assume text. 00335 return it_->block()->block->poly_block()->isA(); 00336 } 00337 00360 Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const { 00361 int left, top, right, bottom; 00362 if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) 00363 return NULL; 00364 Pix* pix = NULL; 00365 switch (level) { 00366 case RIL_BLOCK: 00367 case RIL_PARA: 00368 int bleft, btop, bright, bbottom; 00369 BoundingBoxInternal(RIL_BLOCK, &bleft, &btop, &bright, &bbottom); 00370 pix = it_->block()->block->render_mask(); 00371 // AND the mask and the image. 00372 pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix), 00373 PIX_SRC & PIX_DST, tesseract_->pix_binary(), 00374 bleft, btop); 00375 if (level == RIL_PARA) { 00376 // RIL_PARA needs further attention: 00377 // clip the paragraph from the block mask. 00378 Box* box = boxCreate(left - bleft, top - btop, 00379 right - left, bottom - top); 00380 Pix* pix2 = pixClipRectangle(pix, box, NULL); 00381 boxDestroy(&box); 00382 pixDestroy(&pix); 00383 pix = pix2; 00384 } 00385 break; 00386 case RIL_TEXTLINE: 00387 case RIL_WORD: 00388 case RIL_SYMBOL: 00389 if (level == RIL_SYMBOL && cblob_it_ != NULL && 00390 cblob_it_->data()->area() != 0) 00391 return cblob_it_->data()->render(); 00392 // Just clip from the bounding box. 00393 Box* box = boxCreate(left, top, right - left, bottom - top); 00394 pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL); 00395 boxDestroy(&box); 00396 break; 00397 } 00398 return pix; 00399 } 00400 00411 Pix* PageIterator::GetImage(PageIteratorLevel level, int padding, 00412 int* left, int* top) const { 00413 int right, bottom; 00414 if (!BoundingBox(level, left, top, &right, &bottom)) 00415 return NULL; 00416 Pix* pix = tesseract_->pix_grey(); 00417 if (pix == NULL) 00418 return GetBinaryImage(level); 00419 00420 // Expand the box. 00421 *left = MAX(*left - padding, 0); 00422 *top = MAX(*top - padding, 0); 00423 right = MIN(right + padding, rect_width_); 00424 bottom = MIN(bottom + padding, rect_height_); 00425 Box* box = boxCreate(*left, *top, right - *left, bottom - *top); 00426 Pix* grey_pix = pixClipRectangle(pix, box, NULL); 00427 boxDestroy(&box); 00428 if (level == RIL_BLOCK) { 00429 Pix* mask = it_->block()->block->render_mask(); 00430 Pix* expanded_mask = pixCreate(right - *left, bottom - *top, 1); 00431 pixRasterop(expanded_mask, padding, padding, 00432 pixGetWidth(mask), pixGetHeight(mask), 00433 PIX_SRC, mask, 0, 0); 00434 pixDestroy(&mask); 00435 pixDilateBrick(expanded_mask, expanded_mask, 2*padding + 1, 2*padding + 1); 00436 pixInvert(expanded_mask, expanded_mask); 00437 pixSetMasked(grey_pix, expanded_mask, 255); 00438 pixDestroy(&expanded_mask); 00439 } 00440 return grey_pix; 00441 } 00442 00448 bool PageIterator::Baseline(PageIteratorLevel level, 00449 int* x1, int* y1, int* x2, int* y2) const { 00450 if (it_->word() == NULL) return false; // Already at the end! 00451 ROW* row = it_->row()->row; 00452 WERD* word = it_->word()->word; 00453 TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) 00454 ? word->bounding_box() 00455 : row->bounding_box(); 00456 int left = box.left(); 00457 ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5)); 00458 int right = box.right(); 00459 ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5)); 00460 // Rotate to image coordinates and convert to global image coords. 00461 startpt.rotate(it_->block()->block->re_rotation()); 00462 endpt.rotate(it_->block()->block->re_rotation()); 00463 *x1 = startpt.x() / scale_ + rect_left_; 00464 *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; 00465 *x2 = endpt.x() / scale_ + rect_left_; 00466 *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; 00467 return true; 00468 } 00469 00470 void PageIterator::Orientation(tesseract::Orientation *orientation, 00471 tesseract::WritingDirection *writing_direction, 00472 tesseract::TextlineOrder *textline_order, 00473 float *deskew_angle) const { 00474 BLOCK* block = it_->block()->block; 00475 00476 // Orientation 00477 FCOORD up_in_image(0.0, 1.0); 00478 up_in_image.unrotate(block->classify_rotation()); 00479 up_in_image.rotate(block->re_rotation()); 00480 00481 if (up_in_image.x() == 0.0F) { 00482 if (up_in_image.y() > 0.0F) { 00483 *orientation = ORIENTATION_PAGE_UP; 00484 } else { 00485 *orientation = ORIENTATION_PAGE_DOWN; 00486 } 00487 } else if (up_in_image.x() > 0.0F) { 00488 *orientation = ORIENTATION_PAGE_RIGHT; 00489 } else { 00490 *orientation = ORIENTATION_PAGE_LEFT; 00491 } 00492 00493 // Writing direction 00494 bool is_vertical_text = (block->classify_rotation().x() == 0.0); 00495 bool right_to_left = block->right_to_left(); 00496 *writing_direction = 00497 is_vertical_text 00498 ? WRITING_DIRECTION_TOP_TO_BOTTOM 00499 : (right_to_left 00500 ? WRITING_DIRECTION_RIGHT_TO_LEFT 00501 : WRITING_DIRECTION_LEFT_TO_RIGHT); 00502 00503 // Textline Order 00504 bool is_mongolian = false; // TODO(eger): fix me 00505 *textline_order = is_vertical_text 00506 ? (is_mongolian 00507 ? TEXTLINE_ORDER_LEFT_TO_RIGHT 00508 : TEXTLINE_ORDER_RIGHT_TO_LEFT) 00509 : TEXTLINE_ORDER_TOP_TO_BOTTOM; 00510 00511 // Deskew angle 00512 FCOORD skew = block->skew(); // true horizontal for textlines 00513 *deskew_angle = -skew.angle(); 00514 } 00515 00516 void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, 00517 bool *is_list_item, 00518 bool *is_crown, 00519 int *first_line_indent) const { 00520 *just = tesseract::JUSTIFICATION_UNKNOWN; 00521 if (!it_->row() || !it_->row()->row || !it_->row()->row->para() || 00522 !it_->row()->row->para()->model) 00523 return; 00524 00525 PARA *para = it_->row()->row->para(); 00526 *is_list_item = para->is_list_item; 00527 *is_crown = para->is_very_first_or_continuation; 00528 *first_line_indent = para->model->first_indent() - 00529 para->model->body_indent(); 00530 } 00531 00536 void PageIterator::BeginWord(int offset) { 00537 WERD_RES* word_res = it_->word(); 00538 if (word_res == NULL) { 00539 // This is a non-text block, so there is no word. 00540 word_length_ = 0; 00541 blob_index_ = 0; 00542 word_ = NULL; 00543 return; 00544 } 00545 if (word_res->best_choice != NULL) { 00546 // Recognition has been done, so we are using the box_word, which 00547 // is already baseline denormalized. 00548 word_length_ = word_res->best_choice->length(); 00549 ASSERT_HOST(word_res->box_word != NULL); 00550 if (word_res->box_word->length() != word_length_) { 00551 tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ", 00552 word_length_, word_res->best_choice->unichar_string().string(), 00553 word_res->box_word->length()); 00554 word_res->box_word->bounding_box().print(); 00555 } 00556 ASSERT_HOST(word_res->box_word->length() == word_length_); 00557 word_ = NULL; 00558 // We will be iterating the box_word. 00559 if (cblob_it_ != NULL) { 00560 delete cblob_it_; 00561 cblob_it_ = NULL; 00562 } 00563 } else { 00564 // No recognition yet, so a "symbol" is a cblob. 00565 word_ = word_res->word; 00566 ASSERT_HOST(word_->cblob_list() != NULL); 00567 word_length_ = word_->cblob_list()->length(); 00568 if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT; 00569 cblob_it_->set_to_list(word_->cblob_list()); 00570 } 00571 for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) { 00572 if (cblob_it_ != NULL) 00573 cblob_it_->forward(); 00574 } 00575 } 00576 00577 } // namespace tesseract.