Tesseract  3.02
tesseract::ColPartitionGrid Class Reference

#include <colpartitiongrid.h>

Inheritance diagram for tesseract::ColPartitionGrid:
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT > tesseract::GridBase

List of all members.

Public Member Functions

 ColPartitionGrid ()
 ColPartitionGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
virtual ~ColPartitionGrid ()
void HandleClick (int x, int y)
void Merges (TessResultCallback2< bool, ColPartition *, TBOX * > *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb)
bool MergePart (TessResultCallback2< bool, ColPartition *, TBOX * > *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, ColPartition *part)
void FindOverlappingPartitions (const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
ColPartitionBestMergeCandidate (const ColPartition *part, ColPartition_CLIST *candidates, bool debug, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, int *overlap_increase)
void SplitOverlappingPartitions (ColPartition_LIST *big_parts)
bool GridSmoothNeighbours (BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation)
void ComputePartitionColors (Pix *scaled_color, int scaled_factor, const FCOORD &rerotation)
void ReflectInYAxis ()
void Deskew (const FCOORD &deskew)
void SetTabStops (TabFind *tabgrid)
bool MakeColPartSets (PartSetVector *part_sets)
ColPartitionSetMakeSingleColumnSet (WidthCallback *cb)
void ClaimBoxes ()
void ReTypeBlobs (BLOBNBOX_LIST *im_blobs)
void RecomputeBounds (int gridsize, const ICOORD &bleft, const ICOORD &tright, const ICOORD &vertical)
void GridFindMargins (ColPartitionSet **best_columns)
void ListFindMargins (ColPartitionSet **best_columns, ColPartition_LIST *parts)
void DeleteParts ()
void DeleteUnknownParts (TO_BLOCK *block)
void FindFigureCaptions ()
void FindPartitionPartners ()
void FindPartitionPartners (bool upper, ColPartition *part)
void FindVPartitionPartners (bool to_the_left, ColPartition *part)
void RefinePartitionPartners (bool get_desperate)

Detailed Description

Definition at line 33 of file colpartitiongrid.h.


Constructor & Destructor Documentation

tesseract::ColPartitionGrid::ColPartitionGrid ( )

Definition at line 71 of file colpartitiongrid.cpp.

                                   {
}
tesseract::ColPartitionGrid::ColPartitionGrid ( int  gridsize,
const ICOORD bleft,
const ICOORD tright 
)

Definition at line 73 of file colpartitiongrid.cpp.

  : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
                                                                bleft, tright) {
}
tesseract::ColPartitionGrid::~ColPartitionGrid ( ) [virtual]

Definition at line 79 of file colpartitiongrid.cpp.

                                    {
}

Member Function Documentation

ColPartition * tesseract::ColPartitionGrid::BestMergeCandidate ( const ColPartition part,
ColPartition_CLIST *  candidates,
bool  debug,
TessResultCallback2< bool, const ColPartition *, const ColPartition * > *  confirm_cb,
int *  overlap_increase 
)

Definition at line 379 of file colpartitiongrid.cpp.

                           {
  if (overlap_increase != NULL)
    *overlap_increase = 0;
  if (candidates->empty())
    return NULL;
  int ok_overlap =
      static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
  // The best neighbour to merge with is the one that causes least
  // total pairwise overlap among all the neighbours.
  // If more than one offers the same total overlap, choose the one
  // with the least total area.
  const TBOX& part_box = part->bounding_box();
  ColPartition_C_IT it(candidates);
  ColPartition* best_candidate = NULL;
  // Find the total combined box of all candidates and the original.
  TBOX full_box(part_box);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* candidate = it.data();
    full_box += candidate->bounding_box();
  }
  // Keep valid neighbours in a list.
  ColPartition_CLIST neighbours;
  // Now run a rect search of the merged box for overlapping neighbours, as
  // we need anything that might be overlapped by the merged box.
  FindOverlappingPartitions(full_box, part, &neighbours);
  if (debug) {
    tprintf("Finding best merge candidate from %d, %d neighbours for box:",
            candidates->length(), neighbours.length());
    part_box.print();
  }
  // If the best increase in overlap is positive, then we also check the
  // worst non-candidate overlap. This catches the case of multiple good
  // candidates that overlap each other when merged. If the worst
  // non-candidate overlap is better than the best overlap, then return
  // the worst non-candidate overlap instead.
  ColPartition_CLIST non_candidate_neighbours;
  non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
                                        &neighbours, candidates);
  int worst_nc_increase = 0;
  int best_increase = MAX_INT32;
  int best_area = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* candidate = it.data();
    if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) {
      if (debug) {
        tprintf("Candidate not confirmed:");
        candidate->bounding_box().print();
      }
      continue;
    }
    int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
    const TBOX& cand_box = candidate->bounding_box();
    if (best_candidate == NULL || increase < best_increase) {
      best_candidate = candidate;
      best_increase = increase;
      best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
      if (debug) {
        tprintf("New best merge candidate has increase %d, area %d, over box:",
                increase, best_area);
        full_box.print();
        candidate->Print();
      }
    } else if (increase == best_increase) {
      int area = cand_box.bounding_union(part_box).area() - cand_box.area();
      if (area < best_area) {
        best_area = area;
        best_candidate = candidate;
      }
    }
    increase = IncreaseInOverlap(part, candidate, ok_overlap,
                                 &non_candidate_neighbours);
    if (increase > worst_nc_increase)
      worst_nc_increase = increase;
  }
  if (best_increase > 0) {
    // If the worst non-candidate increase is less than the best increase
    // including the candidates, then all the candidates can merge together
    // and the increase in outside overlap would be less, so use that result,
    // but only if each candidate is either a good diacritic merge with part,
    // or an ok merge candidate with all the others.
    // See TestCompatibleCandidates for more explanation and a picture.
    if (worst_nc_increase < best_increase &&
        TestCompatibleCandidates(*part, debug, candidates)) {
      best_increase = worst_nc_increase;
    }
  }
  if (overlap_increase != NULL)
    *overlap_increase = best_increase;
  return best_candidate;
}
void tesseract::ColPartitionGrid::ClaimBoxes ( )

Definition at line 812 of file colpartitiongrid.cpp.

                                  {
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    part->ClaimBoxes();
  }
}
void tesseract::ColPartitionGrid::ComputePartitionColors ( Pix *  scaled_color,
int  scaled_factor,
const FCOORD rerotation 
)

Definition at line 620 of file colpartitiongrid.cpp.

                                                                        {
  if (scaled_color == NULL)
    return;
  Pix* color_map1 = NULL;
  Pix* color_map2 = NULL;
  Pix* rms_map = NULL;
  if (textord_tabfind_show_color_fit) {
    int width = pixGetWidth(scaled_color);
    int height = pixGetHeight(scaled_color);
    color_map1 = pixCreate(width, height, 32);
    color_map2 = pixCreate(width, height, 32);
    rms_map = pixCreate(width, height, 8);
  }
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    TBOX part_box = part->bounding_box();
    part_box.rotate_large(rerotation);
    ImageFind::ComputeRectangleColors(part_box, scaled_color,
                                      scaled_factor,
                                      color_map1, color_map2, rms_map,
                                      part->color1(), part->color2());
  }
  if (color_map1 != NULL) {
    pixWrite("swcolorinput.png", scaled_color, IFF_PNG);
    pixWrite("swcolor1.png", color_map1, IFF_PNG);
    pixWrite("swcolor2.png", color_map2, IFF_PNG);
    pixWrite("swrms.png", rms_map, IFF_PNG);
    pixDestroy(&color_map1);
    pixDestroy(&color_map2);
    pixDestroy(&rms_map);
  }
}
void tesseract::ColPartitionGrid::DeleteParts ( )

Definition at line 950 of file colpartitiongrid.cpp.

                                   {
  ColPartition_LIST dead_parts;
  ColPartition_IT dead_it(&dead_parts);
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    part->DisownBoxes();
    dead_it.add_to_end(part);  // Parts will be deleted on return.
  }
  Clear();
}
void tesseract::ColPartitionGrid::DeleteUnknownParts ( TO_BLOCK block)

Definition at line 965 of file colpartitiongrid.cpp.

                                                         {
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    if (part->blob_type() == BRT_UNKNOWN) {
      gsearch.RemoveBBox();
      // Once marked, the blobs will be swept up by DeleteUnownedNoise.
      part->set_flow(BTFT_NONTEXT);
      part->set_blob_type(BRT_NOISE);
      part->SetBlobTypes();
      part->DisownBoxes();
      delete part;
    }
  }
  block->DeleteUnownedNoise();
}
void tesseract::ColPartitionGrid::Deskew ( const FCOORD deskew)

Definition at line 684 of file colpartitiongrid.cpp.

                                                  {
  ColPartition_LIST parts;
  ColPartition_IT part_it(&parts);
  // Iterate the ColPartitions in the grid to extract them.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    part_it.add_after_then_move(part);
  }
  // Rebuild the grid to the new size.
  TBOX grid_box(bleft_, tright_);
  grid_box.rotate_large(deskew);
  Init(gridsize(), grid_box.botleft(), grid_box.topright());
  // Reinitializing the grid with rotated coords also clears all the
  // pointers, so parts will now own the ColPartitions. (Briefly).
  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
    part = part_it.extract();
    part->ComputeLimits();
    InsertBBox(true, true, part);
  }
}
void tesseract::ColPartitionGrid::FindFigureCaptions ( )

Definition at line 984 of file colpartitiongrid.cpp.

                                          {
  // For each image region find its best candidate text caption region,
  // if any and mark it as such.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    if (part->IsImageType()) {
      const TBOX& part_box = part->bounding_box();
      bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
                                                 part_box.bottom());
      ColPartition* best_caption = NULL;
      int best_dist = 0;   // Distance to best_caption.
      int best_upper = 0;  // Direction of best_caption.
      // Handle both lower and upper directions.
      for (int upper = 0; upper < 2; ++upper) {
        ColPartition_C_IT partner_it(upper ? part->upper_partners()
                                           : part->lower_partners());
        // If there are no image partners, then this direction is ok.
        for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
             partner_it.forward()) {
          ColPartition* partner = partner_it.data();
          if (partner->IsImageType()) {
            break;
          }
        }
        if (!partner_it.cycled_list()) continue;
        // Find the nearest totally overlapping text partner.
        for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
             partner_it.forward()) {
          ColPartition* partner = partner_it.data();
          if (!partner->IsTextType()) continue;
          const TBOX& partner_box = partner->bounding_box();
          if (debug) {
            tprintf("Finding figure captions for image part:");
            part_box.print();
            tprintf("Considering partner:");
            partner_box.print();
          }
          if (partner_box.left() >= part_box.left() &&
              partner_box.right() <= part_box.right()) {
            int dist = partner_box.y_gap(part_box);
            if (best_caption == NULL || dist < best_dist) {
              best_dist = dist;
              best_caption = partner;
              best_upper = upper;
            }
          }
        }
      }
      if (best_caption != NULL) {
        if (debug) {
          tprintf("Best caption candidate:");
          best_caption->bounding_box().print();
        }
        // We have a candidate caption. Qualify it as being separable from
        // any body text. We are looking for either a small number of lines
        // or a big gap that indicates a separation from the body text.
        int line_count = 0;
        int biggest_gap = 0;
        int smallest_gap = MAX_INT16;
        int total_height = 0;
        int mean_height = 0;
        ColPartition* end_partner = NULL;
        ColPartition* next_partner = NULL;
        for (ColPartition* partner = best_caption; partner != NULL &&
             line_count <= kMaxCaptionLines;
             partner = next_partner) {
          if (!partner->IsTextType()) {
            end_partner = partner;
            break;
          }
          ++line_count;
          total_height += partner->bounding_box().height();
          next_partner = partner->SingletonPartner(best_upper);
          if (next_partner != NULL) {
            int gap = partner->bounding_box().y_gap(
                next_partner->bounding_box());
            if (gap > biggest_gap) {
              biggest_gap = gap;
              end_partner = next_partner;
              mean_height = total_height / line_count;
            } else if (gap < smallest_gap) {
              smallest_gap = gap;
            }
            // If the gap looks big compared to the text size and the smallest
            // gap seen so far, then we can stop.
            if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
                biggest_gap > smallest_gap * kMinCaptionGapRatio)
              break;
          }
        }
        if (debug) {
          tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
                  line_count, biggest_gap, smallest_gap, mean_height);
          if (end_partner != NULL) {
            tprintf("End partner:");
            end_partner->bounding_box().print();
          }
        }
        if (next_partner == NULL && line_count <= kMaxCaptionLines)
          end_partner = NULL;  // No gap, but line count is small.
        if (line_count <= kMaxCaptionLines) {
          // This is a qualified caption. Mark the text as caption.
          for (ColPartition* partner = best_caption; partner != NULL &&
               partner != end_partner;
               partner = next_partner) {
            partner->set_type(PT_CAPTION_TEXT);
            partner->SetBlobTypes();
            if (debug) {
              tprintf("Set caption type for partition:");
              partner->bounding_box().print();
            }
            next_partner = partner->SingletonPartner(best_upper);
          }
        }
      }
    }
  }
}
void tesseract::ColPartitionGrid::FindOverlappingPartitions ( const TBOX box,
const ColPartition not_this,
ColPartition_CLIST *  parts 
)

Definition at line 326 of file colpartitiongrid.cpp.

                                                                            {
  ColPartitionGridSearch rsearch(this);
  rsearch.StartRectSearch(box);
  ColPartition* part;
  while ((part = rsearch.NextRectSearch()) != NULL) {
    if (part != not_this)
      parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
  }
}
void tesseract::ColPartitionGrid::FindPartitionPartners ( )

Definition at line 1109 of file colpartitiongrid.cpp.

                                             {
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    if (part->IsVerticalType()) {
      FindVPartitionPartners(true, part);
      FindVPartitionPartners(false, part);
    } else {
      FindPartitionPartners(true, part);
      FindPartitionPartners(false, part);
    }
  }
}
void tesseract::ColPartitionGrid::FindPartitionPartners ( bool  upper,
ColPartition part 
)

Definition at line 1126 of file colpartitiongrid.cpp.

                                                                           {
  if (part->type() == PT_NOISE)
    return;  // Noise is not allowed to partner anything.
  const TBOX& box = part->bounding_box();
  int top = part->median_top();
  int bottom = part->median_bottom();
  int height = top - bottom;
  int mid_y = (bottom + top) / 2;
  ColPartitionGridSearch vsearch(this);
  // Search down for neighbour below
  vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
  ColPartition* neighbour;
  ColPartition* best_neighbour = NULL;
  int best_dist = MAX_INT32;
  while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) {
    if (neighbour == part || neighbour->type() == PT_NOISE)
      continue;  // Noise is not allowed to partner anything.
    int neighbour_bottom = neighbour->median_bottom();
    int neighbour_top = neighbour->median_top();
    int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
    if (upper != (neighbour_y > mid_y))
      continue;
    if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
      continue;
    if (!part->TypesMatch(*neighbour)) {
      if (best_neighbour == NULL)
        best_neighbour = neighbour;
      continue;
    }
    int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
    if (dist <= kMaxPartitionSpacing * height) {
      if (dist < best_dist) {
        best_dist = dist;
        best_neighbour = neighbour;
      }
    } else {
      break;
    }
  }
  if (best_neighbour != NULL)
    part->AddPartner(upper, best_neighbour);
}
void tesseract::ColPartitionGrid::FindVPartitionPartners ( bool  to_the_left,
ColPartition part 
)

Definition at line 1171 of file colpartitiongrid.cpp.

                                                                  {
  if (part->type() == PT_NOISE)
    return;  // Noise is not allowed to partner anything.
  const TBOX& box = part->bounding_box();
  int left = part->median_left();
  int right = part->median_right();
  int width = right - left;
  int mid_x = (left + right) / 2;
  ColPartitionGridSearch hsearch(this);
  // Search left for neighbour to_the_left
  hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
  ColPartition* neighbour;
  ColPartition* best_neighbour = NULL;
  int best_dist = MAX_INT32;
  while ((neighbour = hsearch.NextSideSearch(to_the_left)) != NULL) {
    if (neighbour == part || neighbour->type() == PT_NOISE)
      continue;  // Noise is not allowed to partner anything.
    int neighbour_left = neighbour->median_left();
    int neighbour_right = neighbour->median_right();
    int neighbour_x = (neighbour_left + neighbour_right) / 2;
    if (to_the_left != (neighbour_x < mid_x))
      continue;
    if (!part->VOverlaps(*neighbour))
      continue;
    if (!part->TypesMatch(*neighbour))
      continue;  // Only match to other vertical text.
    int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
    if (dist <= kMaxPartitionSpacing * width) {
      if (dist < best_dist || best_neighbour == NULL) {
        best_dist = dist;
        best_neighbour = neighbour;
      }
    } else {
      break;
    }
  }
  // For vertical partitions, the upper partner is to the left, and lower is
  // to the right.
  if (best_neighbour != NULL)
    part->AddPartner(to_the_left, best_neighbour);
}
void tesseract::ColPartitionGrid::GridFindMargins ( ColPartitionSet **  best_columns)

Definition at line 908 of file colpartitiongrid.cpp.

                                                                     {
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    // Set up a rectangle search x-bounded by the column and y by the part.
    ColPartitionSet* columns = best_columns != NULL
                             ? best_columns[gsearch.GridY()]
                             : NULL;
    FindPartitionMargins(columns, part);
    const TBOX& box = part->bounding_box();
    if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
      tprintf("Computed margins for part:");
      part->Print();
    }
  }
}
bool tesseract::ColPartitionGrid::GridSmoothNeighbours ( BlobTextFlowType  source_type,
Pix *  nontext_map,
const TBOX im_box,
const FCOORD rerotation 
)

Definition at line 598 of file colpartitiongrid.cpp.

                                                                    {
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  bool any_changed = false;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
      continue;
    const TBOX& box = part->bounding_box();
    bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
    if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
      any_changed = true;
  }
  return any_changed;
}
void tesseract::ColPartitionGrid::HandleClick ( int  x,
int  y 
) [virtual]

Reimplemented from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >.

Definition at line 83 of file colpartitiongrid.cpp.

                                               {
  BBGrid<ColPartition,
         ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
  // Run a radial search for partitions that overlap.
  ColPartitionGridSearch radsearch(this);
  radsearch.SetUniqueMode(true);
  radsearch.StartRadSearch(x, y, 1);
  ColPartition* neighbour;
  FCOORD click(x, y);
  while ((neighbour = radsearch.NextRadSearch()) != NULL) {
    TBOX nbox = neighbour->bounding_box();
    if (nbox.contains(click)) {
      tprintf("Block box:");
      neighbour->bounding_box().print();
      neighbour->Print();
    }
  }
}
void tesseract::ColPartitionGrid::ListFindMargins ( ColPartitionSet **  best_columns,
ColPartition_LIST *  parts 
)

Definition at line 932 of file colpartitiongrid.cpp.

                                                                 {
  ColPartition_IT part_it(parts);
  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
    ColPartition* part = part_it.data();
    ColPartitionSet* columns = NULL;
    if (best_columns != NULL) {
      TBOX part_box = part->bounding_box();
      // Get the columns from the y grid coord.
      int grid_x, grid_y;
      GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
      columns = best_columns[grid_y];
    }
    FindPartitionMargins(columns, part);
  }
}
bool tesseract::ColPartitionGrid::MakeColPartSets ( PartSetVector part_sets)

Definition at line 732 of file colpartitiongrid.cpp.

                                                               {
  ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()];
  part_sets->reserve(gridheight());
  // Iterate the ColPartitions in the grid to get parts onto lists for the
  // y bottom of each.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  bool any_parts_found = false;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    BlobRegionType blob_type = part->blob_type();
    if (blob_type != BRT_NOISE &&
        (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
      int grid_x, grid_y;
      const TBOX& part_box = part->bounding_box();
      GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
      ColPartition_IT part_it(&part_lists[grid_y]);
      part_it.add_to_end(part);
      any_parts_found = true;
    }
  }
  if (any_parts_found) {
    for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
      ColPartitionSet* line_set = NULL;
      if (!part_lists[grid_y].empty()) {
        line_set = new ColPartitionSet(&part_lists[grid_y]);
      }
      part_sets->push_back(line_set);
    }
  }
  delete [] part_lists;
  return any_parts_found;
}
ColPartitionSet * tesseract::ColPartitionGrid::MakeSingleColumnSet ( WidthCallback cb)

Definition at line 770 of file colpartitiongrid.cpp.

                                                                        {
  ColPartition* single_column_part = NULL;
  // Iterate the ColPartitions in the grid to get parts onto lists for the
  // y bottom of each.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    BlobRegionType blob_type = part->blob_type();
    if (blob_type != BRT_NOISE &&
        (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
      // Consider for single column.
      BlobTextFlowType flow = part->flow();
      if ((blob_type == BRT_TEXT &&
          (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
           flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
          blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
        if (single_column_part == NULL) {
          single_column_part = part->ShallowCopy();
          single_column_part->set_blob_type(BRT_TEXT);
          // Copy the tabs from itself to properly setup the margins.
          single_column_part->CopyLeftTab(*single_column_part, false);
          single_column_part->CopyRightTab(*single_column_part, false);
        } else {
          if (part->left_key() < single_column_part->left_key())
            single_column_part->CopyLeftTab(*part, false);
          if (part->right_key() > single_column_part->right_key())
            single_column_part->CopyRightTab(*part, false);
        }
      }
    }
  }
  if (single_column_part != NULL) {
    // Make a ColPartitionSet out of the single_column_part as a candidate
    // for the single column case.
    single_column_part->SetColumnGoodness(cb);
    return new ColPartitionSet(single_column_part);
  }
  return NULL;
}
bool tesseract::ColPartitionGrid::MergePart ( TessResultCallback2< bool, ColPartition *, TBOX * > *  box_cb,
TessResultCallback2< bool, const ColPartition *, const ColPartition * > *  confirm_cb,
ColPartition part 
)

Definition at line 130 of file colpartitiongrid.cpp.

                        {
  if (part->IsUnMergeableType())
    return false;
  bool any_done = false;
  // Repeatedly merge part while we find a best merge candidate that works.
  bool merge_done = false;
  do {
    merge_done = false;
    TBOX box = part->bounding_box();
    bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
    if (debug) {
      tprintf("Merge candidate:");
      box.print();
    }
    // Set up a rectangle search bounded by the part.
    if (!box_cb->Run(part, &box))
      continue;
    // Create a list of merge candidates.
    ColPartition_CLIST merge_candidates;
    FindMergeCandidates(part, box, debug, &merge_candidates);
    // Find the best merge candidate based on minimal overlap increase.
    int overlap_increase;
    ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
                                                 confirm_cb,
                                                 &overlap_increase);
    if (neighbour != NULL && overlap_increase <= 0) {
      if (debug) {
        tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
                part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
                overlap_increase);
      }
      // Looks like a good candidate so merge it.
      RemoveBBox(neighbour);
      // We will modify the box of part, so remove it from the grid, merge
      // it and then re-insert it into the grid.
      RemoveBBox(part);
      part->Absorb(neighbour, NULL);
      InsertBBox(true, true, part);
      merge_done = true;
      any_done = true;
    } else if (neighbour != NULL) {
      if (debug) {
        tprintf("Overlapped when merged with increase %d: ", overlap_increase);
        neighbour->bounding_box().print();
      }
    } else if (debug) {
      tprintf("No candidate neighbour returned\n");
    }
  } while (merge_done);
  return any_done;
}
void tesseract::ColPartitionGrid::Merges ( TessResultCallback2< bool, ColPartition *, TBOX * > *  box_cb,
TessResultCallback2< bool, const ColPartition *, const ColPartition * > *  confirm_cb 
)

Definition at line 109 of file colpartitiongrid.cpp.

                                                          {
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    if (MergePart(box_cb, confirm_cb, part))
      gsearch.RepositionIterator();
  }
  delete box_cb;
  delete confirm_cb;
}
void tesseract::ColPartitionGrid::RecomputeBounds ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
const ICOORD vertical 
)

Definition at line 879 of file colpartitiongrid.cpp.

                                                               {
  ColPartition_LIST saved_parts;
  ColPartition_IT part_it(&saved_parts);
  // Iterate the ColPartitions in the grid to get parts onto a list.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    part_it.add_to_end(part);
  }
  // Reinitialize grid to the new size.
  Init(gridsize, bleft, tright);
  // Recompute the bounds of the parts and put them back in the new grid.
  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
    part = part_it.extract();
    part->set_vertical(vertical);
    part->ComputeLimits();
    InsertBBox(true, true, part);
  }
}
void tesseract::ColPartitionGrid::RefinePartitionPartners ( bool  get_desperate)

Definition at line 1217 of file colpartitiongrid.cpp.

                                                                 {
  ColPartitionGridSearch gsearch(this);
  // Refine in type order so that chasing multiple partners can be done
  // before eliminating type mis-matching partners.
  for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
    // Iterate the ColPartitions in the grid.
    gsearch.StartFullSearch();
    ColPartition* part;
    while ((part = gsearch.NextFullSearch()) != NULL) {
      part->RefinePartners(static_cast<PolyBlockType>(type),
                           get_desperate, this);
      // Iterator may have been messed up by a merge.
      gsearch.RepositionIterator();
    }
  }
}
void tesseract::ColPartitionGrid::ReflectInYAxis ( )

Definition at line 660 of file colpartitiongrid.cpp.

                                      {
  ColPartition_LIST parts;
  ColPartition_IT part_it(&parts);
  // Iterate the ColPartitions in the grid to extract them.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    part_it.add_after_then_move(part);
  }
  ICOORD bot_left(-tright().x(), bleft().y());
  ICOORD top_right(-bleft().x(), tright().y());
  // Reinitializing the grid with reflected coords also clears all the
  // pointers, so parts will now own the ColPartitions. (Briefly).
  Init(gridsize(), bot_left, top_right);
  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
    part = part_it.extract();
    part->ReflectInYAxis();
    InsertBBox(true, true, part);
  }
}
void tesseract::ColPartitionGrid::ReTypeBlobs ( BLOBNBOX_LIST *  im_blobs)

Definition at line 825 of file colpartitiongrid.cpp.

                                                          {
  BLOBNBOX_IT im_blob_it(im_blobs);
  ColPartition_LIST dead_parts;
  ColPartition_IT dead_part_it(&dead_parts);
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    BlobRegionType blob_type = part->blob_type();
    BlobTextFlowType flow = part->flow();
    if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
      BLOBNBOX_C_IT blob_it(part->boxes());
      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        BLOBNBOX* blob = blob_it.data();
        im_blob_it.add_after_then_move(blob);
      }
    } else if (blob_type != BRT_NOISE) {
      // Make sure the blobs are marked with the correct type and flow.
      BLOBNBOX_C_IT blob_it(part->boxes());
      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        BLOBNBOX* blob = blob_it.data();
        if (blob->region_type() == BRT_NOISE) {
          // TODO(rays) Deprecated. Change this section to an assert to verify
          // and then delete.
          ASSERT_HOST(blob->cblob()->area() != 0);
          blob->set_owner(NULL);
          blob_it.extract();
        } else {
          blob->set_region_type(blob_type);
          if (blob->flow() != BTFT_LEADER)
            blob->set_flow(flow);
        }
      }
    }
    if (blob_type == BRT_NOISE || part->boxes()->empty()) {
      BLOBNBOX_C_IT blob_it(part->boxes());
      part->DisownBoxes();
      dead_part_it.add_to_end(part);
      gsearch.RemoveBBox();
      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        BLOBNBOX* blob = blob_it.data();
        if (blob->cblob()->area() == 0) {
          // Any blob with zero area is a fake image blob and should be deleted.
          delete blob->cblob();
          delete blob;
        }
      }
    }
  }
}
void tesseract::ColPartitionGrid::SetTabStops ( TabFind tabgrid)

Definition at line 708 of file colpartitiongrid.cpp.

                                                   {
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    const TBOX& part_box = part->bounding_box();
    TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
    // If the overlapping line is not a left tab, try for non-overlapping.
    if (left_line != NULL && !left_line->IsLeftTab())
      left_line = tabgrid->LeftTabForBox(part_box, false, false);
    if (left_line != NULL && left_line->IsLeftTab())
      part->SetLeftTab(left_line);
    TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
    if (right_line != NULL && !right_line->IsRightTab())
      right_line = tabgrid->RightTabForBox(part_box, false, false);
    if (right_line != NULL && right_line->IsRightTab())
      part->SetRightTab(right_line);
    part->SetColumnGoodness(tabgrid->WidthCB());
  }
}
void tesseract::ColPartitionGrid::SplitOverlappingPartitions ( ColPartition_LIST *  big_parts)

Definition at line 488 of file colpartitiongrid.cpp.

                                  {
  int ok_overlap =
      static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
  // Iterate the ColPartitions in the grid.
  ColPartitionGridSearch gsearch(this);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    // Set up a rectangle search bounded by the part.
    const TBOX& box = part->bounding_box();
    ColPartitionGridSearch rsearch(this);
    rsearch.SetUniqueMode(true);
    rsearch.StartRectSearch(box);
    int unresolved_overlaps = 0;

    ColPartition* neighbour;
    while ((neighbour = rsearch.NextRectSearch()) != NULL) {
      if (neighbour == part)
        continue;
      const TBOX& neighbour_box = neighbour->bounding_box();
      if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
          part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
        continue;  // The overlap is OK both ways.

      // If removal of the biggest box from either partition eliminates the
      // overlap, and it is much bigger than the box left behind, then
      // it is either a drop-cap, an inter-line join, or some junk that
      // we don't want anyway, so put it in the big_parts list.
      if (!part->IsSingleton()) {
        BLOBNBOX* excluded = part->BiggestBox();
        TBOX shrunken = part->BoundsWithoutBox(excluded);
        if (!shrunken.overlap(neighbour_box) &&
            excluded->bounding_box().height() >
              kBigPartSizeRatio * shrunken.height()) {
          // Removing the biggest box fixes the overlap, so do it!
          gsearch.RemoveBBox();
          RemoveBadBox(excluded, part, big_parts);
          InsertBBox(true, true, part);
          gsearch.RepositionIterator();
          break;
        }
      } else if (box.contains(neighbour_box)) {
        ++unresolved_overlaps;
        continue;  // No amount of splitting will fix it.
      }
      if (!neighbour->IsSingleton()) {
        BLOBNBOX* excluded = neighbour->BiggestBox();
        TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
        if (!shrunken.overlap(box) &&
            excluded->bounding_box().height() >
              kBigPartSizeRatio * shrunken.height()) {
          // Removing the biggest box fixes the overlap, so do it!
          rsearch.RemoveBBox();
          RemoveBadBox(excluded, neighbour, big_parts);
          InsertBBox(true, true, neighbour);
          gsearch.RepositionIterator();
          break;
        }
      }
      int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
      int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
      ColPartition* right_part = NULL;
      if (neighbour_overlap_count <= part_overlap_count ||
          part->IsSingleton()) {
        // Try to split the neighbour to reduce overlap.
        BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
        if (split_blob != NULL) {
          rsearch.RemoveBBox();
          right_part = neighbour->SplitAtBlob(split_blob);
          InsertBBox(true, true, neighbour);
          ASSERT_HOST(right_part != NULL);
        }
      } else {
        // Try to split part to reduce overlap.
        BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
        if (split_blob != NULL) {
          gsearch.RemoveBBox();
          right_part = part->SplitAtBlob(split_blob);
          InsertBBox(true, true, part);
          ASSERT_HOST(right_part != NULL);
        }
      }
      if (right_part != NULL) {
        InsertBBox(true, true, right_part);
        gsearch.RepositionIterator();
        rsearch.RepositionIterator();
        break;
      }
    }
    if (unresolved_overlaps > 2 && part->IsSingleton()) {
      // This part is no good so just add to big_parts.
      RemoveBBox(part);
      ColPartition_IT big_it(big_parts);
      part->set_block_owned(true);
      big_it.add_to_end(part);
      gsearch.RepositionIterator();
    }
  }
}

The documentation for this class was generated from the following files: