Tesseract
3.02
|
#include <colpartitiongrid.h>
Public Member Functions | |
ColPartitionGrid () | |
ColPartitionGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright) | |
virtual | ~ColPartitionGrid () |
void | HandleClick (int x, int y) |
void | Merges (TessResultCallback2< bool, ColPartition *, TBOX * > *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb) |
bool | MergePart (TessResultCallback2< bool, ColPartition *, TBOX * > *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, ColPartition *part) |
void | FindOverlappingPartitions (const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts) |
ColPartition * | BestMergeCandidate (const ColPartition *part, ColPartition_CLIST *candidates, bool debug, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, int *overlap_increase) |
void | SplitOverlappingPartitions (ColPartition_LIST *big_parts) |
bool | GridSmoothNeighbours (BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation) |
void | ComputePartitionColors (Pix *scaled_color, int scaled_factor, const FCOORD &rerotation) |
void | ReflectInYAxis () |
void | Deskew (const FCOORD &deskew) |
void | SetTabStops (TabFind *tabgrid) |
bool | MakeColPartSets (PartSetVector *part_sets) |
ColPartitionSet * | MakeSingleColumnSet (WidthCallback *cb) |
void | ClaimBoxes () |
void | ReTypeBlobs (BLOBNBOX_LIST *im_blobs) |
void | RecomputeBounds (int gridsize, const ICOORD &bleft, const ICOORD &tright, const ICOORD &vertical) |
void | GridFindMargins (ColPartitionSet **best_columns) |
void | ListFindMargins (ColPartitionSet **best_columns, ColPartition_LIST *parts) |
void | DeleteParts () |
void | DeleteUnknownParts (TO_BLOCK *block) |
void | FindFigureCaptions () |
void | FindPartitionPartners () |
void | FindPartitionPartners (bool upper, ColPartition *part) |
void | FindVPartitionPartners (bool to_the_left, ColPartition *part) |
void | RefinePartitionPartners (bool get_desperate) |
Definition at line 33 of file colpartitiongrid.h.
tesseract::ColPartitionGrid::ColPartitionGrid | ( | ) |
Definition at line 71 of file colpartitiongrid.cpp.
{ }
tesseract::ColPartitionGrid::ColPartitionGrid | ( | int | gridsize, |
const ICOORD & | bleft, | ||
const ICOORD & | tright | ||
) |
Definition at line 73 of file colpartitiongrid.cpp.
tesseract::ColPartitionGrid::~ColPartitionGrid | ( | ) | [virtual] |
Definition at line 79 of file colpartitiongrid.cpp.
{ }
ColPartition * tesseract::ColPartitionGrid::BestMergeCandidate | ( | const ColPartition * | part, |
ColPartition_CLIST * | candidates, | ||
bool | debug, | ||
TessResultCallback2< bool, const ColPartition *, const ColPartition * > * | confirm_cb, | ||
int * | overlap_increase | ||
) |
Definition at line 379 of file colpartitiongrid.cpp.
{ if (overlap_increase != NULL) *overlap_increase = 0; if (candidates->empty()) return NULL; int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); // The best neighbour to merge with is the one that causes least // total pairwise overlap among all the neighbours. // If more than one offers the same total overlap, choose the one // with the least total area. const TBOX& part_box = part->bounding_box(); ColPartition_C_IT it(candidates); ColPartition* best_candidate = NULL; // Find the total combined box of all candidates and the original. TBOX full_box(part_box); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* candidate = it.data(); full_box += candidate->bounding_box(); } // Keep valid neighbours in a list. ColPartition_CLIST neighbours; // Now run a rect search of the merged box for overlapping neighbours, as // we need anything that might be overlapped by the merged box. FindOverlappingPartitions(full_box, part, &neighbours); if (debug) { tprintf("Finding best merge candidate from %d, %d neighbours for box:", candidates->length(), neighbours.length()); part_box.print(); } // If the best increase in overlap is positive, then we also check the // worst non-candidate overlap. This catches the case of multiple good // candidates that overlap each other when merged. If the worst // non-candidate overlap is better than the best overlap, then return // the worst non-candidate overlap instead. ColPartition_CLIST non_candidate_neighbours; non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true, &neighbours, candidates); int worst_nc_increase = 0; int best_increase = MAX_INT32; int best_area = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* candidate = it.data(); if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) { if (debug) { tprintf("Candidate not confirmed:"); candidate->bounding_box().print(); } continue; } int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours); const TBOX& cand_box = candidate->bounding_box(); if (best_candidate == NULL || increase < best_increase) { best_candidate = candidate; best_increase = increase; best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); if (debug) { tprintf("New best merge candidate has increase %d, area %d, over box:", increase, best_area); full_box.print(); candidate->Print(); } } else if (increase == best_increase) { int area = cand_box.bounding_union(part_box).area() - cand_box.area(); if (area < best_area) { best_area = area; best_candidate = candidate; } } increase = IncreaseInOverlap(part, candidate, ok_overlap, &non_candidate_neighbours); if (increase > worst_nc_increase) worst_nc_increase = increase; } if (best_increase > 0) { // If the worst non-candidate increase is less than the best increase // including the candidates, then all the candidates can merge together // and the increase in outside overlap would be less, so use that result, // but only if each candidate is either a good diacritic merge with part, // or an ok merge candidate with all the others. // See TestCompatibleCandidates for more explanation and a picture. if (worst_nc_increase < best_increase && TestCompatibleCandidates(*part, debug, candidates)) { best_increase = worst_nc_increase; } } if (overlap_increase != NULL) *overlap_increase = best_increase; return best_candidate; }
void tesseract::ColPartitionGrid::ClaimBoxes | ( | ) |
Definition at line 812 of file colpartitiongrid.cpp.
{ // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { part->ClaimBoxes(); } }
void tesseract::ColPartitionGrid::ComputePartitionColors | ( | Pix * | scaled_color, |
int | scaled_factor, | ||
const FCOORD & | rerotation | ||
) |
Definition at line 620 of file colpartitiongrid.cpp.
{ if (scaled_color == NULL) return; Pix* color_map1 = NULL; Pix* color_map2 = NULL; Pix* rms_map = NULL; if (textord_tabfind_show_color_fit) { int width = pixGetWidth(scaled_color); int height = pixGetHeight(scaled_color); color_map1 = pixCreate(width, height, 32); color_map2 = pixCreate(width, height, 32); rms_map = pixCreate(width, height, 8); } // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { TBOX part_box = part->bounding_box(); part_box.rotate_large(rerotation); ImageFind::ComputeRectangleColors(part_box, scaled_color, scaled_factor, color_map1, color_map2, rms_map, part->color1(), part->color2()); } if (color_map1 != NULL) { pixWrite("swcolorinput.png", scaled_color, IFF_PNG); pixWrite("swcolor1.png", color_map1, IFF_PNG); pixWrite("swcolor2.png", color_map2, IFF_PNG); pixWrite("swrms.png", rms_map, IFF_PNG); pixDestroy(&color_map1); pixDestroy(&color_map2); pixDestroy(&rms_map); } }
void tesseract::ColPartitionGrid::DeleteParts | ( | ) |
Definition at line 950 of file colpartitiongrid.cpp.
{ ColPartition_LIST dead_parts; ColPartition_IT dead_it(&dead_parts); ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { part->DisownBoxes(); dead_it.add_to_end(part); // Parts will be deleted on return. } Clear(); }
void tesseract::ColPartitionGrid::DeleteUnknownParts | ( | TO_BLOCK * | block | ) |
Definition at line 965 of file colpartitiongrid.cpp.
{ ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { if (part->blob_type() == BRT_UNKNOWN) { gsearch.RemoveBBox(); // Once marked, the blobs will be swept up by DeleteUnownedNoise. part->set_flow(BTFT_NONTEXT); part->set_blob_type(BRT_NOISE); part->SetBlobTypes(); part->DisownBoxes(); delete part; } } block->DeleteUnownedNoise(); }
void tesseract::ColPartitionGrid::Deskew | ( | const FCOORD & | deskew | ) |
Definition at line 684 of file colpartitiongrid.cpp.
{ ColPartition_LIST parts; ColPartition_IT part_it(&parts); // Iterate the ColPartitions in the grid to extract them. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { part_it.add_after_then_move(part); } // Rebuild the grid to the new size. TBOX grid_box(bleft_, tright_); grid_box.rotate_large(deskew); Init(gridsize(), grid_box.botleft(), grid_box.topright()); // Reinitializing the grid with rotated coords also clears all the // pointers, so parts will now own the ColPartitions. (Briefly). for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { part = part_it.extract(); part->ComputeLimits(); InsertBBox(true, true, part); } }
void tesseract::ColPartitionGrid::FindFigureCaptions | ( | ) |
Definition at line 984 of file colpartitiongrid.cpp.
{ // For each image region find its best candidate text caption region, // if any and mark it as such. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { if (part->IsImageType()) { const TBOX& part_box = part->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom()); ColPartition* best_caption = NULL; int best_dist = 0; // Distance to best_caption. int best_upper = 0; // Direction of best_caption. // Handle both lower and upper directions. for (int upper = 0; upper < 2; ++upper) { ColPartition_C_IT partner_it(upper ? part->upper_partners() : part->lower_partners()); // If there are no image partners, then this direction is ok. for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) { ColPartition* partner = partner_it.data(); if (partner->IsImageType()) { break; } } if (!partner_it.cycled_list()) continue; // Find the nearest totally overlapping text partner. for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) { ColPartition* partner = partner_it.data(); if (!partner->IsTextType()) continue; const TBOX& partner_box = partner->bounding_box(); if (debug) { tprintf("Finding figure captions for image part:"); part_box.print(); tprintf("Considering partner:"); partner_box.print(); } if (partner_box.left() >= part_box.left() && partner_box.right() <= part_box.right()) { int dist = partner_box.y_gap(part_box); if (best_caption == NULL || dist < best_dist) { best_dist = dist; best_caption = partner; best_upper = upper; } } } } if (best_caption != NULL) { if (debug) { tprintf("Best caption candidate:"); best_caption->bounding_box().print(); } // We have a candidate caption. Qualify it as being separable from // any body text. We are looking for either a small number of lines // or a big gap that indicates a separation from the body text. int line_count = 0; int biggest_gap = 0; int smallest_gap = MAX_INT16; int total_height = 0; int mean_height = 0; ColPartition* end_partner = NULL; ColPartition* next_partner = NULL; for (ColPartition* partner = best_caption; partner != NULL && line_count <= kMaxCaptionLines; partner = next_partner) { if (!partner->IsTextType()) { end_partner = partner; break; } ++line_count; total_height += partner->bounding_box().height(); next_partner = partner->SingletonPartner(best_upper); if (next_partner != NULL) { int gap = partner->bounding_box().y_gap( next_partner->bounding_box()); if (gap > biggest_gap) { biggest_gap = gap; end_partner = next_partner; mean_height = total_height / line_count; } else if (gap < smallest_gap) { smallest_gap = gap; } // If the gap looks big compared to the text size and the smallest // gap seen so far, then we can stop. if (biggest_gap > mean_height * kMinCaptionGapHeightRatio && biggest_gap > smallest_gap * kMinCaptionGapRatio) break; } } if (debug) { tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", line_count, biggest_gap, smallest_gap, mean_height); if (end_partner != NULL) { tprintf("End partner:"); end_partner->bounding_box().print(); } } if (next_partner == NULL && line_count <= kMaxCaptionLines) end_partner = NULL; // No gap, but line count is small. if (line_count <= kMaxCaptionLines) { // This is a qualified caption. Mark the text as caption. for (ColPartition* partner = best_caption; partner != NULL && partner != end_partner; partner = next_partner) { partner->set_type(PT_CAPTION_TEXT); partner->SetBlobTypes(); if (debug) { tprintf("Set caption type for partition:"); partner->bounding_box().print(); } next_partner = partner->SingletonPartner(best_upper); } } } } } }
void tesseract::ColPartitionGrid::FindOverlappingPartitions | ( | const TBOX & | box, |
const ColPartition * | not_this, | ||
ColPartition_CLIST * | parts | ||
) |
Definition at line 326 of file colpartitiongrid.cpp.
{ ColPartitionGridSearch rsearch(this); rsearch.StartRectSearch(box); ColPartition* part; while ((part = rsearch.NextRectSearch()) != NULL) { if (part != not_this) parts->add_sorted(SortByBoxLeft<ColPartition>, true, part); } }
void tesseract::ColPartitionGrid::FindPartitionPartners | ( | ) |
Definition at line 1109 of file colpartitiongrid.cpp.
{ ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { if (part->IsVerticalType()) { FindVPartitionPartners(true, part); FindVPartitionPartners(false, part); } else { FindPartitionPartners(true, part); FindPartitionPartners(false, part); } } }
void tesseract::ColPartitionGrid::FindPartitionPartners | ( | bool | upper, |
ColPartition * | part | ||
) |
Definition at line 1126 of file colpartitiongrid.cpp.
{ if (part->type() == PT_NOISE) return; // Noise is not allowed to partner anything. const TBOX& box = part->bounding_box(); int top = part->median_top(); int bottom = part->median_bottom(); int height = top - bottom; int mid_y = (bottom + top) / 2; ColPartitionGridSearch vsearch(this); // Search down for neighbour below vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); ColPartition* neighbour; ColPartition* best_neighbour = NULL; int best_dist = MAX_INT32; while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) { if (neighbour == part || neighbour->type() == PT_NOISE) continue; // Noise is not allowed to partner anything. int neighbour_bottom = neighbour->median_bottom(); int neighbour_top = neighbour->median_top(); int neighbour_y = (neighbour_bottom + neighbour_top) / 2; if (upper != (neighbour_y > mid_y)) continue; if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) continue; if (!part->TypesMatch(*neighbour)) { if (best_neighbour == NULL) best_neighbour = neighbour; continue; } int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; if (dist <= kMaxPartitionSpacing * height) { if (dist < best_dist) { best_dist = dist; best_neighbour = neighbour; } } else { break; } } if (best_neighbour != NULL) part->AddPartner(upper, best_neighbour); }
void tesseract::ColPartitionGrid::FindVPartitionPartners | ( | bool | to_the_left, |
ColPartition * | part | ||
) |
Definition at line 1171 of file colpartitiongrid.cpp.
{ if (part->type() == PT_NOISE) return; // Noise is not allowed to partner anything. const TBOX& box = part->bounding_box(); int left = part->median_left(); int right = part->median_right(); int width = right - left; int mid_x = (left + right) / 2; ColPartitionGridSearch hsearch(this); // Search left for neighbour to_the_left hsearch.StartSideSearch(mid_x, box.bottom(), box.top()); ColPartition* neighbour; ColPartition* best_neighbour = NULL; int best_dist = MAX_INT32; while ((neighbour = hsearch.NextSideSearch(to_the_left)) != NULL) { if (neighbour == part || neighbour->type() == PT_NOISE) continue; // Noise is not allowed to partner anything. int neighbour_left = neighbour->median_left(); int neighbour_right = neighbour->median_right(); int neighbour_x = (neighbour_left + neighbour_right) / 2; if (to_the_left != (neighbour_x < mid_x)) continue; if (!part->VOverlaps(*neighbour)) continue; if (!part->TypesMatch(*neighbour)) continue; // Only match to other vertical text. int dist = to_the_left ? left - neighbour_right : neighbour_left - right; if (dist <= kMaxPartitionSpacing * width) { if (dist < best_dist || best_neighbour == NULL) { best_dist = dist; best_neighbour = neighbour; } } else { break; } } // For vertical partitions, the upper partner is to the left, and lower is // to the right. if (best_neighbour != NULL) part->AddPartner(to_the_left, best_neighbour); }
void tesseract::ColPartitionGrid::GridFindMargins | ( | ColPartitionSet ** | best_columns | ) |
Definition at line 908 of file colpartitiongrid.cpp.
{ // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { // Set up a rectangle search x-bounded by the column and y by the part. ColPartitionSet* columns = best_columns != NULL ? best_columns[gsearch.GridY()] : NULL; FindPartitionMargins(columns, part); const TBOX& box = part->bounding_box(); if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { tprintf("Computed margins for part:"); part->Print(); } } }
bool tesseract::ColPartitionGrid::GridSmoothNeighbours | ( | BlobTextFlowType | source_type, |
Pix * | nontext_map, | ||
const TBOX & | im_box, | ||
const FCOORD & | rerotation | ||
) |
Definition at line 598 of file colpartitiongrid.cpp.
{ // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; bool any_changed = false; while ((part = gsearch.NextFullSearch()) != NULL) { if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) continue; const TBOX& box = part->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) any_changed = true; } return any_changed; }
void tesseract::ColPartitionGrid::HandleClick | ( | int | x, |
int | y | ||
) | [virtual] |
Reimplemented from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >.
Definition at line 83 of file colpartitiongrid.cpp.
{ BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y); // Run a radial search for partitions that overlap. ColPartitionGridSearch radsearch(this); radsearch.SetUniqueMode(true); radsearch.StartRadSearch(x, y, 1); ColPartition* neighbour; FCOORD click(x, y); while ((neighbour = radsearch.NextRadSearch()) != NULL) { TBOX nbox = neighbour->bounding_box(); if (nbox.contains(click)) { tprintf("Block box:"); neighbour->bounding_box().print(); neighbour->Print(); } } }
void tesseract::ColPartitionGrid::ListFindMargins | ( | ColPartitionSet ** | best_columns, |
ColPartition_LIST * | parts | ||
) |
Definition at line 932 of file colpartitiongrid.cpp.
{ ColPartition_IT part_it(parts); for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { ColPartition* part = part_it.data(); ColPartitionSet* columns = NULL; if (best_columns != NULL) { TBOX part_box = part->bounding_box(); // Get the columns from the y grid coord. int grid_x, grid_y; GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); columns = best_columns[grid_y]; } FindPartitionMargins(columns, part); } }
bool tesseract::ColPartitionGrid::MakeColPartSets | ( | PartSetVector * | part_sets | ) |
Definition at line 732 of file colpartitiongrid.cpp.
{ ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()]; part_sets->reserve(gridheight()); // Iterate the ColPartitions in the grid to get parts onto lists for the // y bottom of each. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; bool any_parts_found = false; while ((part = gsearch.NextFullSearch()) != NULL) { BlobRegionType blob_type = part->blob_type(); if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { int grid_x, grid_y; const TBOX& part_box = part->bounding_box(); GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); ColPartition_IT part_it(&part_lists[grid_y]); part_it.add_to_end(part); any_parts_found = true; } } if (any_parts_found) { for (int grid_y = 0; grid_y < gridheight(); ++grid_y) { ColPartitionSet* line_set = NULL; if (!part_lists[grid_y].empty()) { line_set = new ColPartitionSet(&part_lists[grid_y]); } part_sets->push_back(line_set); } } delete [] part_lists; return any_parts_found; }
ColPartitionSet * tesseract::ColPartitionGrid::MakeSingleColumnSet | ( | WidthCallback * | cb | ) |
Definition at line 770 of file colpartitiongrid.cpp.
{ ColPartition* single_column_part = NULL; // Iterate the ColPartitions in the grid to get parts onto lists for the // y bottom of each. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { BlobRegionType blob_type = part->blob_type(); if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { // Consider for single column. BlobTextFlowType flow = part->flow(); if ((blob_type == BRT_TEXT && (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { if (single_column_part == NULL) { single_column_part = part->ShallowCopy(); single_column_part->set_blob_type(BRT_TEXT); // Copy the tabs from itself to properly setup the margins. single_column_part->CopyLeftTab(*single_column_part, false); single_column_part->CopyRightTab(*single_column_part, false); } else { if (part->left_key() < single_column_part->left_key()) single_column_part->CopyLeftTab(*part, false); if (part->right_key() > single_column_part->right_key()) single_column_part->CopyRightTab(*part, false); } } } } if (single_column_part != NULL) { // Make a ColPartitionSet out of the single_column_part as a candidate // for the single column case. single_column_part->SetColumnGoodness(cb); return new ColPartitionSet(single_column_part); } return NULL; }
bool tesseract::ColPartitionGrid::MergePart | ( | TessResultCallback2< bool, ColPartition *, TBOX * > * | box_cb, |
TessResultCallback2< bool, const ColPartition *, const ColPartition * > * | confirm_cb, | ||
ColPartition * | part | ||
) |
Definition at line 130 of file colpartitiongrid.cpp.
{ if (part->IsUnMergeableType()) return false; bool any_done = false; // Repeatedly merge part while we find a best merge candidate that works. bool merge_done = false; do { merge_done = false; TBOX box = part->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); if (debug) { tprintf("Merge candidate:"); box.print(); } // Set up a rectangle search bounded by the part. if (!box_cb->Run(part, &box)) continue; // Create a list of merge candidates. ColPartition_CLIST merge_candidates; FindMergeCandidates(part, box, debug, &merge_candidates); // Find the best merge candidate based on minimal overlap increase. int overlap_increase; ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug, confirm_cb, &overlap_increase); if (neighbour != NULL && overlap_increase <= 0) { if (debug) { tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour), overlap_increase); } // Looks like a good candidate so merge it. RemoveBBox(neighbour); // We will modify the box of part, so remove it from the grid, merge // it and then re-insert it into the grid. RemoveBBox(part); part->Absorb(neighbour, NULL); InsertBBox(true, true, part); merge_done = true; any_done = true; } else if (neighbour != NULL) { if (debug) { tprintf("Overlapped when merged with increase %d: ", overlap_increase); neighbour->bounding_box().print(); } } else if (debug) { tprintf("No candidate neighbour returned\n"); } } while (merge_done); return any_done; }
void tesseract::ColPartitionGrid::Merges | ( | TessResultCallback2< bool, ColPartition *, TBOX * > * | box_cb, |
TessResultCallback2< bool, const ColPartition *, const ColPartition * > * | confirm_cb | ||
) |
Definition at line 109 of file colpartitiongrid.cpp.
{ // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { if (MergePart(box_cb, confirm_cb, part)) gsearch.RepositionIterator(); } delete box_cb; delete confirm_cb; }
void tesseract::ColPartitionGrid::RecomputeBounds | ( | int | gridsize, |
const ICOORD & | bleft, | ||
const ICOORD & | tright, | ||
const ICOORD & | vertical | ||
) |
Definition at line 879 of file colpartitiongrid.cpp.
{ ColPartition_LIST saved_parts; ColPartition_IT part_it(&saved_parts); // Iterate the ColPartitions in the grid to get parts onto a list. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { part_it.add_to_end(part); } // Reinitialize grid to the new size. Init(gridsize, bleft, tright); // Recompute the bounds of the parts and put them back in the new grid. for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { part = part_it.extract(); part->set_vertical(vertical); part->ComputeLimits(); InsertBBox(true, true, part); } }
void tesseract::ColPartitionGrid::RefinePartitionPartners | ( | bool | get_desperate | ) |
Definition at line 1217 of file colpartitiongrid.cpp.
{ ColPartitionGridSearch gsearch(this); // Refine in type order so that chasing multiple partners can be done // before eliminating type mis-matching partners. for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { // Iterate the ColPartitions in the grid. gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate, this); // Iterator may have been messed up by a merge. gsearch.RepositionIterator(); } } }
void tesseract::ColPartitionGrid::ReflectInYAxis | ( | ) |
Definition at line 660 of file colpartitiongrid.cpp.
{ ColPartition_LIST parts; ColPartition_IT part_it(&parts); // Iterate the ColPartitions in the grid to extract them. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { part_it.add_after_then_move(part); } ICOORD bot_left(-tright().x(), bleft().y()); ICOORD top_right(-bleft().x(), tright().y()); // Reinitializing the grid with reflected coords also clears all the // pointers, so parts will now own the ColPartitions. (Briefly). Init(gridsize(), bot_left, top_right); for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { part = part_it.extract(); part->ReflectInYAxis(); InsertBBox(true, true, part); } }
void tesseract::ColPartitionGrid::ReTypeBlobs | ( | BLOBNBOX_LIST * | im_blobs | ) |
Definition at line 825 of file colpartitiongrid.cpp.
{ BLOBNBOX_IT im_blob_it(im_blobs); ColPartition_LIST dead_parts; ColPartition_IT dead_part_it(&dead_parts); // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { BlobRegionType blob_type = part->blob_type(); BlobTextFlowType flow = part->flow(); if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) { BLOBNBOX_C_IT blob_it(part->boxes()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); im_blob_it.add_after_then_move(blob); } } else if (blob_type != BRT_NOISE) { // Make sure the blobs are marked with the correct type and flow. BLOBNBOX_C_IT blob_it(part->boxes()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->region_type() == BRT_NOISE) { // TODO(rays) Deprecated. Change this section to an assert to verify // and then delete. ASSERT_HOST(blob->cblob()->area() != 0); blob->set_owner(NULL); blob_it.extract(); } else { blob->set_region_type(blob_type); if (blob->flow() != BTFT_LEADER) blob->set_flow(flow); } } } if (blob_type == BRT_NOISE || part->boxes()->empty()) { BLOBNBOX_C_IT blob_it(part->boxes()); part->DisownBoxes(); dead_part_it.add_to_end(part); gsearch.RemoveBBox(); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->cblob()->area() == 0) { // Any blob with zero area is a fake image blob and should be deleted. delete blob->cblob(); delete blob; } } } } }
void tesseract::ColPartitionGrid::SetTabStops | ( | TabFind * | tabgrid | ) |
Definition at line 708 of file colpartitiongrid.cpp.
{ // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { const TBOX& part_box = part->bounding_box(); TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false); // If the overlapping line is not a left tab, try for non-overlapping. if (left_line != NULL && !left_line->IsLeftTab()) left_line = tabgrid->LeftTabForBox(part_box, false, false); if (left_line != NULL && left_line->IsLeftTab()) part->SetLeftTab(left_line); TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false); if (right_line != NULL && !right_line->IsRightTab()) right_line = tabgrid->RightTabForBox(part_box, false, false); if (right_line != NULL && right_line->IsRightTab()) part->SetRightTab(right_line); part->SetColumnGoodness(tabgrid->WidthCB()); } }
void tesseract::ColPartitionGrid::SplitOverlappingPartitions | ( | ColPartition_LIST * | big_parts | ) |
Definition at line 488 of file colpartitiongrid.cpp.
{ int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { // Set up a rectangle search bounded by the part. const TBOX& box = part->bounding_box(); ColPartitionGridSearch rsearch(this); rsearch.SetUniqueMode(true); rsearch.StartRectSearch(box); int unresolved_overlaps = 0; ColPartition* neighbour; while ((neighbour = rsearch.NextRectSearch()) != NULL) { if (neighbour == part) continue; const TBOX& neighbour_box = neighbour->bounding_box(); if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) && part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) continue; // The overlap is OK both ways. // If removal of the biggest box from either partition eliminates the // overlap, and it is much bigger than the box left behind, then // it is either a drop-cap, an inter-line join, or some junk that // we don't want anyway, so put it in the big_parts list. if (!part->IsSingleton()) { BLOBNBOX* excluded = part->BiggestBox(); TBOX shrunken = part->BoundsWithoutBox(excluded); if (!shrunken.overlap(neighbour_box) && excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) { // Removing the biggest box fixes the overlap, so do it! gsearch.RemoveBBox(); RemoveBadBox(excluded, part, big_parts); InsertBBox(true, true, part); gsearch.RepositionIterator(); break; } } else if (box.contains(neighbour_box)) { ++unresolved_overlaps; continue; // No amount of splitting will fix it. } if (!neighbour->IsSingleton()) { BLOBNBOX* excluded = neighbour->BiggestBox(); TBOX shrunken = neighbour->BoundsWithoutBox(excluded); if (!shrunken.overlap(box) && excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) { // Removing the biggest box fixes the overlap, so do it! rsearch.RemoveBBox(); RemoveBadBox(excluded, neighbour, big_parts); InsertBBox(true, true, neighbour); gsearch.RepositionIterator(); break; } } int part_overlap_count = part->CountOverlappingBoxes(neighbour_box); int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box); ColPartition* right_part = NULL; if (neighbour_overlap_count <= part_overlap_count || part->IsSingleton()) { // Try to split the neighbour to reduce overlap. BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box); if (split_blob != NULL) { rsearch.RemoveBBox(); right_part = neighbour->SplitAtBlob(split_blob); InsertBBox(true, true, neighbour); ASSERT_HOST(right_part != NULL); } } else { // Try to split part to reduce overlap. BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box); if (split_blob != NULL) { gsearch.RemoveBBox(); right_part = part->SplitAtBlob(split_blob); InsertBBox(true, true, part); ASSERT_HOST(right_part != NULL); } } if (right_part != NULL) { InsertBBox(true, true, right_part); gsearch.RepositionIterator(); rsearch.RepositionIterator(); break; } } if (unresolved_overlaps > 2 && part->IsSingleton()) { // This part is no good so just add to big_parts. RemoveBBox(part); ColPartition_IT big_it(big_parts); part->set_block_owned(true); big_it.add_to_end(part); gsearch.RepositionIterator(); } } }