Tesseract
3.02
|
#include <colfind.h>
Public Member Functions | |
ColumnFinder (int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, TabVector_LIST *vlines, TabVector_LIST *hlines, int vertical_x, int vertical_y) | |
virtual | ~ColumnFinder () |
const DENORM * | denorm () const |
const TextlineProjection * | projection () const |
void | SetupAndFilterNoise (Pix *photo_mask_pix, TO_BLOCK *input_block) |
bool | IsVerticallyAlignedText (TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs) |
void | CorrectOrientation (TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation) |
int | FindBlocks (bool single_column, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) |
void | GetDeskewVectors (FCOORD *deskew, FCOORD *reskew) |
void | SetEquationDetect (EquationDetectBase *detect) |
tesseract::ColumnFinder::ColumnFinder | ( | int | gridsize, |
const ICOORD & | bleft, | ||
const ICOORD & | tright, | ||
int | resolution, | ||
TabVector_LIST * | vlines, | ||
TabVector_LIST * | hlines, | ||
int | vertical_x, | ||
int | vertical_y | ||
) |
Definition at line 84 of file colfind.cpp.
: TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, resolution), min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)), mean_column_gap_(tright.x() - bleft.x()), reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), best_columns_(NULL), stroke_width_(NULL), part_grid_(gridsize, bleft, tright), nontext_map_(NULL), projection_(resolution), denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) { TabVector_IT h_it(&horizontal_lines_); h_it.add_list_after(hlines); }
tesseract::ColumnFinder::~ColumnFinder | ( | ) | [virtual] |
Definition at line 102 of file colfind.cpp.
{ column_sets_.delete_data_pointers(); if (best_columns_ != NULL) { delete [] best_columns_; } if (stroke_width_ != NULL) delete stroke_width_; delete input_blobs_win_; pixDestroy(&nontext_map_); while (denorm_ != NULL) { DENORM* dead_denorm = denorm_; denorm_ = const_cast<DENORM*>(denorm_->predecessor()); delete dead_denorm; } // The ColPartitions are destroyed automatically, but any boxes in // the noise_parts_ list are owned and need to be deleted explicitly. ColPartition_IT part_it(&noise_parts_); for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { ColPartition* part = part_it.data(); part->DeleteBoxes(); } // Likewise any boxes in the good_parts_ list need to be deleted. // These are just the image parts. Text parts have already given their // boxes on to the TO_BLOCK, and have empty lists. part_it.set_to_list(&good_parts_); for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { ColPartition* part = part_it.data(); part->DeleteBoxes(); } // Also, any blobs on the image_bblobs_ list need to have their cblobs // deleted. This only happens if there has been an early return from // FindColumns, as in a normal return, the blobs go into the grid and // end up in noise_parts_, good_parts_ or the output blocks. BLOBNBOX_IT bb_it(&image_bblobs_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); delete bblob->cblob(); } }
void tesseract::ColumnFinder::CorrectOrientation | ( | TO_BLOCK * | block, |
bool | vertical_text_lines, | ||
int | recognition_rotation | ||
) |
Definition at line 205 of file colfind.cpp.
{ const FCOORD anticlockwise90(0.0f, 1.0f); const FCOORD clockwise90(0.0f, -1.0f); const FCOORD rotation180(-1.0f, 0.0f); const FCOORD norotation(1.0f, 0.0f); text_rotation_ = norotation; // Rotate the page to make the text upright, as implied by // recognition_rotation. rotation_ = norotation; if (recognition_rotation == 1) { rotation_ = anticlockwise90; } else if (recognition_rotation == 2) { rotation_ = rotation180; } else if (recognition_rotation == 3) { rotation_ = clockwise90; } // We infer text writing direction to be vertical if there are several // vertical text lines detected, and horizontal if not. But if the page // orientation was determined to be 90 or 270 degrees, the true writing // direction is the opposite of what we inferred. if (recognition_rotation & 1) { vertical_text_lines = !vertical_text_lines; } // If we still believe the writing direction is vertical, we use the // convention of rotating the page ccw 90 degrees to make the text lines // horizontal, and mark the blobs for rotation cw 90 degrees for // classification so that the text order is correct after recognition. if (vertical_text_lines) { rotation_.rotate(anticlockwise90); text_rotation_.rotate(clockwise90); } // Set rerotate_ to the inverse of rotation_. rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { // Rotate all the blobs and tab vectors. RotateBlobList(rotation_, &block->large_blobs); RotateBlobList(rotation_, &block->blobs); RotateBlobList(rotation_, &block->small_blobs); RotateBlobList(rotation_, &block->noise_blobs); TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, &min_gutter_width_); part_grid_.Init(gridsize(), bleft(), tright()); // Reset all blobs to initial state and filter by size. // Since they have rotated, the list they belong on could have changed. block->ReSetAndReFilterBlobs(); SetBlockRuleEdges(block); stroke_width_->CorrectForRotation(rerotate_, &part_grid_); } if (textord_debug_tabfind) { tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", vertical_text_lines, recognition_rotation, rotation_.x(), rotation_.y(), text_rotation_.x(), text_rotation_.y()); } // Setup the denormalization. ASSERT_HOST(denorm_ == NULL); denorm_ = new DENORM; denorm_->SetupNormalization(NULL, NULL, &rotation_, NULL, NULL, 0, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); }
const DENORM* tesseract::ColumnFinder::denorm | ( | ) | const [inline] |
int tesseract::ColumnFinder::FindBlocks | ( | bool | single_column, |
Pix * | scaled_color, | ||
int | scaled_factor, | ||
TO_BLOCK * | block, | ||
Pix * | photo_mask_pix, | ||
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | to_blocks | ||
) |
Definition at line 284 of file colfind.cpp.
{ pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); stroke_width_->FindLeaderPartitions(input_block, &part_grid_); stroke_width_->RemoveLineResidue(&big_parts_); FindInitialTabVectors(NULL, min_gutter_width_, input_block); SetBlockRuleEdges(input_block); stroke_width_->GradeBlobsIntoPartitions(rerotate_, input_block, nontext_map_, denorm_, &projection_, &part_grid_, &big_parts_); ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, &part_grid_, &big_parts_); ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, photo_mask_pix); ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, &part_grid_, &big_parts_); part_grid_.ReTypeBlobs(&image_bblobs_); TidyBlobs(input_block); Reset(); // TODO(rays) need to properly handle big_parts_. ColPartition_IT p_it(&big_parts_); for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) p_it.data()->DisownBoxes(); big_parts_.clear(); delete stroke_width_; stroke_width_ = NULL; // A note about handling right-to-left scripts (Hebrew/Arabic): // The columns must be reversed and come out in right-to-left instead of // the normal left-to-right order. Because the left-to-right ordering // is implicit in many data structures, it is simpler to fool the algorithms // into thinking they are dealing with left-to-right text. // To do this, we reflect the needed data in the y-axis and then reflect // the blocks back after they have been created. This is a temporary // arrangment that is confined to this function only, so the reflection // is completely invisible in the output blocks. // The only objects reflected are: // The vertical separator lines that have already been found; // The bounding boxes of all BLOBNBOXES on all lists on the input_block // plus the image_bblobs. The outlines are not touched, since they are // not looked at. bool input_is_rtl = input_block->block->right_to_left(); if (input_is_rtl) { // Reflect the vertical separator lines (member of TabFind). ReflectInYAxis(); // Reflect the blob boxes. ReflectForRtl(input_block, &image_bblobs_); part_grid_.ReflectInYAxis(); } if (single_column) { // No tab stops needed. Just the grid that FindTabVectors makes. DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); } else { SetBlockRuleEdges(input_block); // Find the tab stops, estimate skew, and deskew the tabs, blobs and // part_grid_. FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, min_gutter_width_, &part_grid_, &deskew_, &reskew_); // Add the deskew to the denorm_. DENORM* new_denorm = new DENORM; new_denorm->SetupNormalization(NULL, NULL, &deskew_, denorm_, NULL, 0, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); denorm_ = new_denorm; } SetBlockRuleEdges(input_block); part_grid_.SetTabStops(this); // Make the column_sets_. if (!MakeColumns(single_column)) { tprintf("Empty page!!\n"); return 0; // This is an empty page. } // Refill the grid using rectangular spreading, and get the benefit // of the completed tab vectors marking the rule edges of each blob. Clear(); #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_reject_blobs) { ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs"); input_block->plot_graded_blobs(rej_win); } #endif // GRAPHICS_DISABLED InsertBlobsToGrid(false, false, &image_bblobs_, this); InsertBlobsToGrid(true, true, &input_block->blobs, this); part_grid_.GridFindMargins(best_columns_); // Split and merge the partitions by looking at local neighbours. GridSplitPartitions(); // Resolve unknown partitions by adding to an existing partition, fixing // the type, or declaring them noise. part_grid_.GridFindMargins(best_columns_); GridMergePartitions(); // Insert any unused noise blobs that are close enough to an appropriate // partition. InsertRemainingNoise(input_block); // Add horizontal line separators as partitions. GridInsertHLinePartitions(); GridInsertVLinePartitions(); // Recompute margins based on a local neighbourhood search. part_grid_.GridFindMargins(best_columns_); SetPartitionTypes(); if (textord_tabfind_show_initial_partitions) { ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions"); part_grid_.DisplayBoxes(part_win); DisplayTabVectors(part_win); } if (equation_detect_) { equation_detect_->FindEquationParts(&part_grid_, best_columns_); } if (textord_tabfind_find_tables) { TableFinder table_finder; table_finder.Init(gridsize(), bleft(), tright()); table_finder.set_resolution(resolution_); table_finder.set_left_to_right_language( !input_block->block->right_to_left()); // Copy cleaned partitions from part_grid_ to clean_part_grid_ and // insert dot-like noise into period_grid_ table_finder.InsertCleanPartitions(&part_grid_, input_block); // Get Table Regions table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); } GridRemoveUnderlinePartitions(); part_grid_.DeleteUnknownParts(input_block); // Build the partitions into chains that belong in the same block and // refine into one-to-one links, then smooth the types within each chain. part_grid_.FindPartitionPartners(); part_grid_.FindFigureCaptions(); part_grid_.RefinePartitionPartners(true); SmoothPartnerRuns(); #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_partitions) { ScrollView* window = MakeWindow(400, 300, "Partitions"); if (textord_debug_images) window->Image(AlignedBlob::textord_debug_pix().string(), image_origin().x(), image_origin().y()); part_grid_.DisplayBoxes(window); if (!textord_debug_printable) DisplayTabVectors(window); if (window != NULL && textord_tabfind_show_partitions > 1) { delete window->AwaitEvent(SVET_DESTROY); } } #endif // GRAPHICS_DISABLED part_grid_.AssertNoDuplicates(); // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, // and ownership of the BLOBNBOXes moves to the ColPartitions. // (They were previously owned by the block or the image_bblobs list.) ReleaseBlobsAndCleanupUnused(input_block); // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves // from the ColPartitions to the output TO_BLOCK. In non-text, the // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. TransformToBlocks(blocks, to_blocks); if (textord_debug_tabfind) { tprintf("Found %d blocks, %d to_blocks\n", blocks->length(), to_blocks->length()); } DisplayBlocks(blocks); RotateAndReskewBlocks(input_is_rtl, to_blocks); int result = 0; #ifndef GRAPHICS_DISABLED if (blocks_win_ != NULL) { bool waiting = false; do { waiting = false; SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY); if (event->type == SVET_INPUT && event->parameter != NULL) { if (*event->parameter == 'd') result = -1; else blocks->clear(); } else if (event->type == SVET_DESTROY) { blocks_win_ = NULL; } else { waiting = true; } delete event; } while (waiting); } #endif // GRAPHICS_DISABLED return result; }
Definition at line 475 of file colfind.cpp.
bool tesseract::ColumnFinder::IsVerticallyAlignedText | ( | TO_BLOCK * | block, |
BLOBNBOX_CLIST * | osd_blobs | ||
) |
Definition at line 189 of file colfind.cpp.
{
return stroke_width_->TestVerticalTextDirection(block, osd_blobs);
}
const TextlineProjection* tesseract::ColumnFinder::projection | ( | ) | const [inline] |
void tesseract::ColumnFinder::SetEquationDetect | ( | EquationDetectBase * | detect | ) |
Definition at line 481 of file colfind.cpp.
{ equation_detect_ = detect; }
void tesseract::ColumnFinder::SetupAndFilterNoise | ( | Pix * | photo_mask_pix, |
TO_BLOCK * | input_block | ||
) |
Definition at line 150 of file colfind.cpp.
{ part_grid_.Init(gridsize(), bleft(), tright()); if (stroke_width_ != NULL) delete stroke_width_; stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize()); input_block->ReSetAndReFilterBlobs(); #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_blocks) { input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); input_block->plot_graded_blobs(input_blobs_win_); } #endif // GRAPHICS_DISABLED SetBlockRuleEdges(input_block); pixDestroy(&nontext_map_); // Run a preliminary strokewidth neighbour detection on the medium blobs. stroke_width_->SetNeighboursOnMediumBlobs(input_block); CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); // Remove obvious noise and make the initial non-text map. nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind, photo_mask_pix, input_block); // TODO(rays) experiment with making broken CJK fixing dependent on the // language, and keeping the merged blobs on output instead of exploding at // ColPartition::MakeBlock. stroke_width_->FindTextlineDirectionAndFixBrokenCJK(true, input_block); // Clear the strokewidth grid ready for rotation or leader finding. stroke_width_->Clear(); }