Tesseract  3.02
tesseract-ocr/textord/colpartitionset.cpp
Go to the documentation of this file.
00001 
00002 // File:        colpartitionset.cpp
00003 // Description: Class to hold a list of ColPartitions of the page that
00004 //              correspond roughly to columns.
00005 // Author:      Ray Smith
00006 // Created:     Thu Aug 14 10:54:01 PDT 2008
00007 //
00008 // (C) Copyright 2008, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "colpartitionset.h"
00022 #include "ndminx.h"
00023 #include "workingpartset.h"
00024 #include "tablefind.h"
00025 
00026 namespace tesseract {
00027 
00028 // Minimum width of a column to be interesting as a multiple of resolution.
00029 const double kMinColumnWidth = 2.0 / 3;
00030 
00031 ELISTIZE(ColPartitionSet)
00032 
00033 ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) {
00034   ColPartition_IT it(&parts_);
00035   it.add_list_after(partitions);
00036   ComputeCoverage();
00037 }
00038 
00039 ColPartitionSet::ColPartitionSet(ColPartition* part) {
00040   ColPartition_IT it(&parts_);
00041   it.add_after_then_move(part);
00042   ComputeCoverage();
00043 }
00044 
00045 ColPartitionSet::~ColPartitionSet() {
00046 }
00047 
00048 // Return an element of the parts_ list from its index.
00049 ColPartition* ColPartitionSet::GetColumnByIndex(int index) {
00050   ColPartition_IT it(&parts_);
00051   it.mark_cycle_pt();
00052   for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
00053   if (it.cycled_list())
00054     return NULL;
00055   return it.data();
00056 }
00057 
00058 // Return the ColPartition that contains the given coords, if any, else NULL.
00059 ColPartition* ColPartitionSet::ColumnContaining(int x, int y) {
00060   ColPartition_IT it(&parts_);
00061   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00062     ColPartition* part = it.data();
00063     if (part->ColumnContains(x, y))
00064       return part;
00065   }
00066   return NULL;
00067 }
00068 
00069 // Extract all the parts from the list, relinquishing ownership.
00070 void ColPartitionSet::RelinquishParts() {
00071   ColPartition_IT it(&parts_);
00072   while (!it.empty()) {
00073     it.extract();
00074     it.forward();
00075   }
00076 }
00077 
00078 // Attempt to improve this by adding partitions or expanding partitions.
00079 void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb,
00080                                              PartSetVector* src_sets) {
00081   int set_size = src_sets->size();
00082   // Iterate over the provided column sets, as each one may have something
00083   // to improve this.
00084   for (int i = 0; i < set_size; ++i) {
00085     ColPartitionSet* column_set = src_sets->get(i);
00086     if (column_set == NULL)
00087       continue;
00088     // Iterate over the parts in this and column_set, adding bigger or
00089     // new parts in column_set to this.
00090     ColPartition_IT part_it(&parts_);
00091     ASSERT_HOST(!part_it.empty());
00092     int prev_right = MIN_INT32;
00093     part_it.mark_cycle_pt();
00094     ColPartition_IT col_it(&column_set->parts_);
00095     for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
00096       ColPartition* col_part = col_it.data();
00097       if (col_part->blob_type() < BRT_UNKNOWN)
00098         continue;  // Ignore image partitions.
00099       int col_left = col_part->left_key();
00100       int col_right = col_part->right_key();
00101       // Sync-up part_it (in this) so it matches the col_part in column_set.
00102       ColPartition* part = part_it.data();
00103       while (!part_it.at_last() && part->right_key() < col_left) {
00104         prev_right = part->right_key();
00105         part_it.forward();
00106         part = part_it.data();
00107       }
00108       int part_left = part->left_key();
00109       int part_right = part->right_key();
00110       if (part_right < col_left || col_right < part_left) {
00111         // There is no overlap so this is a new partition.
00112         AddPartition(col_part->ShallowCopy(), &part_it);
00113         continue;
00114       }
00115       // Check the edges of col_part to see if they can improve part.
00116       bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right));
00117       if (col_left < part_left && col_left > prev_right) {
00118         // The left edge of the column is better and it doesn't overlap,
00119         // so we can potentially expand it.
00120         int col_box_left = col_part->BoxLeftKey();
00121         bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right));
00122         bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right));
00123         if (tab_width_ok || (!part_width_ok )) {
00124           // The tab is leaving the good column metric at least as good as
00125           // it was before, so use the tab.
00126           part->CopyLeftTab(*col_part, false);
00127           part->SetColumnGoodness(cb);
00128         } else if (col_box_left < part_left &&
00129                    (box_width_ok || !part_width_ok)) {
00130           // The box is leaving the good column metric at least as good as
00131           // it was before, so use the box.
00132           part->CopyLeftTab(*col_part, true);
00133           part->SetColumnGoodness(cb);
00134         }
00135         part_left = part->left_key();
00136       }
00137       if (col_right > part_right &&
00138           (part_it.at_last() ||
00139            part_it.data_relative(1)->left_key() > col_right)) {
00140         // The right edge is better, so we can possibly expand it.
00141         int col_box_right = col_part->BoxRightKey();
00142         bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right));
00143         bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right));
00144         if (tab_width_ok || (!part_width_ok )) {
00145           // The tab is leaving the good column metric at least as good as
00146           // it was before, so use the tab.
00147           part->CopyRightTab(*col_part, false);
00148           part->SetColumnGoodness(cb);
00149         } else if (col_box_right > part_right &&
00150                    (box_width_ok || !part_width_ok)) {
00151           // The box is leaving the good column metric at least as good as
00152           // it was before, so use the box.
00153           part->CopyRightTab(*col_part, true);
00154           part->SetColumnGoodness(cb);
00155         }
00156       }
00157     }
00158   }
00159   ComputeCoverage();
00160 }
00161 
00162 // If this set is good enough to represent a new partitioning into columns,
00163 // add it to the vector of sets, otherwise delete it.
00164 void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
00165                                               WidthCallback* cb) {
00166   bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
00167                                          bounding_box_.bottom());
00168   if (debug) {
00169     tprintf("Considering new column candidate:\n");
00170     Print();
00171   }
00172   if (!LegalColumnCandidate()) {
00173     if (debug) {
00174       tprintf("Not a legal column candidate:\n");
00175       Print();
00176     }
00177     delete this;
00178     return;
00179   }
00180   for (int i = 0; i < column_sets->size(); ++i) {
00181     ColPartitionSet* columns = column_sets->get(i);
00182     // In ordering the column set candidates, good_coverage_ is king,
00183     // followed by good_column_count_ and then bad_coverage_.
00184     bool better = good_coverage_ > columns->good_coverage_;
00185     if (good_coverage_ == columns->good_coverage_) {
00186       better = good_column_count_ > columns->good_column_count_;
00187       if (good_column_count_ == columns->good_column_count_) {
00188           better = bad_coverage_ > columns->bad_coverage_;
00189       }
00190     }
00191     if (better) {
00192       // The new one is better so add it.
00193       if (debug)
00194         tprintf("Good one\n");
00195       column_sets->insert(this, i);
00196       return;
00197     }
00198     if (columns->CompatibleColumns(false, this, cb)) {
00199       if (debug)
00200         tprintf("Duplicate\n");
00201       delete this;
00202       return;  // It is not unique.
00203     }
00204   }
00205   if (debug)
00206     tprintf("Added to end\n");
00207   column_sets->push_back(this);
00208 }
00209 
00210 // Return true if the partitions in other are all compatible with the columns
00211 // in this.
00212 bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
00213                                         WidthCallback* cb) {
00214   if (debug) {
00215     tprintf("CompatibleColumns testing compatibility\n");
00216     Print();
00217     other->Print();
00218   }
00219   if (other->parts_.empty()) {
00220     if (debug)
00221       tprintf("CompatibleColumns true due to empty other\n");
00222     return true;
00223   }
00224   ColPartition_IT it(&other->parts_);
00225   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00226     ColPartition* part = it.data();
00227     if (part->blob_type() < BRT_UNKNOWN) {
00228       if (debug) {
00229         tprintf("CompatibleColumns ignoring image partition\n");
00230         part->Print();
00231       }
00232       continue;  // Image partitions are irrelevant to column compatibility.
00233     }
00234     int y = part->MidY();
00235     int left = part->bounding_box().left();
00236     int right = part->bounding_box().right();
00237     ColPartition* left_col = ColumnContaining(left, y);
00238     ColPartition* right_col = ColumnContaining(right, y);
00239     if (right_col == NULL || left_col == NULL) {
00240       if (debug) {
00241         tprintf("CompatibleColumns false due to partition edge outside\n");
00242         part->Print();
00243       }
00244       return false;  // A partition edge lies outside of all columns
00245     }
00246     if (right_col != left_col && cb->Run(right - left)) {
00247       if (debug) {
00248         tprintf("CompatibleColumns false due to good width in multiple cols\n");
00249         part->Print();
00250       }
00251       return false;  // Partition with a good width must be in a single column.
00252     }
00253 
00254     ColPartition_IT it2= it;
00255     while (!it2.at_last()) {
00256       it2.forward();
00257       ColPartition* next_part = it2.data();
00258       if (!BLOBNBOX::IsTextType(next_part->blob_type()))
00259         continue;  // Non-text partitions are irrelevant.
00260       int next_left = next_part->bounding_box().left();
00261       if (next_left == right) {
00262         break;  // They share the same edge, so one must be a pull-out.
00263       }
00264       // Search to see if right and next_left fall within a single column.
00265       ColPartition* next_left_col = ColumnContaining(next_left, y);
00266       if (right_col == next_left_col) {
00267         // There is a column break in this column.
00268         // This can be due to a figure caption within a column, a pull-out
00269         // block, or a simple broken textline that remains to be merged:
00270         // all allowed, or a change in column layout: not allowed.
00271         // If both partitions are of good width, then it is likely
00272         // a change in column layout, otherwise probably an allowed situation.
00273         if (part->good_width() && next_part->good_width()) {
00274           if (debug) {
00275             int next_right = next_part->bounding_box().right();
00276             tprintf("CompatibleColumns false due to 2 parts of good width\n");
00277             tprintf("part1 %d-%d, part2 %d-%d\n",
00278                     left, right, next_left, next_right);
00279             right_col->Print();
00280           }
00281           return false;
00282         }
00283       }
00284       break;
00285     }
00286   }
00287   if (debug)
00288     tprintf("CompatibleColumns true!\n");
00289   return true;
00290 }
00291 
00292 // Returns the total width of all blobs in the part_set that do not lie
00293 // within an approved column. Used as a cost measure for using this
00294 // column set over another that might be compatible.
00295 int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) {
00296   int total_width = 0;
00297   ColPartition_IT it(&part_set->parts_);
00298   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00299     ColPartition* part = it.data();
00300     if (!BLOBNBOX::IsTextType(part->blob_type())) {
00301       continue;  // Non-text partitions are irrelevant to column compatibility.
00302     }
00303     int y = part->MidY();
00304     BLOBNBOX_C_IT box_it(part->boxes());
00305     for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
00306       const TBOX& box = it.data()->bounding_box();
00307       // Assume that the whole blob is outside any column iff its x-middle
00308       // is outside.
00309       int x = (box.left() + box.right()) / 2;
00310       ColPartition* col = ColumnContaining(x, y);
00311       if (col == NULL)
00312         total_width += box.width();
00313     }
00314   }
00315   return total_width;
00316 }
00317 
00318 // Return true if this ColPartitionSet makes a legal column candidate by
00319 // having legal individual partitions and non-overlapping adjacent pairs.
00320 bool ColPartitionSet::LegalColumnCandidate() {
00321   ColPartition_IT it(&parts_);
00322   if (it.empty())
00323     return false;
00324   bool any_text_parts = false;
00325   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00326     ColPartition* part = it.data();
00327     if (BLOBNBOX::IsTextType(part->blob_type())) {
00328       if (!part->IsLegal())
00329         return false;  // Individual partition is illegal.
00330       any_text_parts = true;
00331     }
00332     if (!it.at_last()) {
00333       ColPartition* next_part = it.data_relative(1);
00334       if (next_part->left_key() < part->right_key()) {
00335         return false;
00336       }
00337     }
00338   }
00339   return any_text_parts;
00340 }
00341 
00342 // Return a copy of this. If good_only will only copy the Good ColPartitions.
00343 ColPartitionSet* ColPartitionSet::Copy(bool good_only) {
00344   ColPartition_LIST copy_parts;
00345   ColPartition_IT src_it(&parts_);
00346   ColPartition_IT dest_it(&copy_parts);
00347   for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
00348     ColPartition* part = src_it.data();
00349     if (BLOBNBOX::IsTextType(part->blob_type()) &&
00350         (!good_only || part->good_width() || part->good_column()))
00351       dest_it.add_after_then_move(part->ShallowCopy());
00352   }
00353   if (dest_it.empty())
00354     return NULL;
00355   return new ColPartitionSet(&copy_parts);
00356 }
00357 
00358 // Return the bounding boxes of columns at the given y-range
00359 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
00360                                      ColSegment_LIST *segments) {
00361   ColPartition_IT it(&parts_);
00362   ColSegment_IT col_it(segments);
00363   col_it.move_to_last();
00364   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00365     ColPartition* part = it.data();
00366     ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
00367     ICOORD top_right(part->RightAtY(y_bottom), y_top);
00368     ColSegment *col_seg = new ColSegment();
00369     col_seg->InsertBox(TBOX(bot_left, top_right));
00370     col_it.add_after_then_move(col_seg);
00371   }
00372 }
00373 
00374 // Display the edges of the columns at the given y coords.
00375 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
00376                                          ScrollView* win) {
00377   #ifndef GRAPHICS_DISABLED
00378   ColPartition_IT it(&parts_);
00379   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00380     ColPartition* part = it.data();
00381     win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
00382     win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
00383   }
00384   #endif  // GRAPHICS_DISABLED
00385 }
00386 
00387 // Return the ColumnSpanningType that best explains the columns overlapped
00388 // by the given coords(left,right,y), with the given margins.
00389 // Also return the first and last column index touched by the coords and
00390 // the leftmost spanned column.
00391 // Column indices are 2n + 1 for real columns (0 based) and even values
00392 // represent the gaps in between columns, with 0 being left of the leftmost.
00393 // resolution refers to the ppi resolution of the image.
00394 ColumnSpanningType ColPartitionSet::SpanningType(int resolution,
00395                                                  int left, int right, int y,
00396                                                  int left_margin,
00397                                                  int right_margin,
00398                                                  int* first_col,
00399                                                  int* last_col,
00400                                                  int* first_spanned_col) {
00401   *first_col = -1;
00402   *last_col = -1;
00403   *first_spanned_col = -1;
00404   int margin_columns = 0;
00405   ColPartition_IT it(&parts_);
00406   int col_index = 1;
00407   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
00408     ColPartition* part = it.data();
00409     if (part->ColumnContains(left, y)) {
00410       // In the default case, first_col is set, but columns_spanned remains
00411       // zero, so first_col will get reset in the first column genuinely
00412       // spanned, but we can tell the difference from a noise partition
00413       // that touches no column.
00414       *first_col = col_index;
00415       if (part->ColumnContains(right, y)) {
00416         // Both within a single column.
00417         *last_col = col_index;
00418         return CST_FLOWING;
00419       }
00420       if (left_margin <= part->LeftAtY(y)) {
00421         // It completely spans this column.
00422         *first_spanned_col = col_index;
00423         margin_columns = 1;
00424       }
00425     } else if (part->ColumnContains(right, y)) {
00426       if (*first_col < 0) {
00427         // It started in-between.
00428         *first_col = col_index - 1;
00429       }
00430       if (right_margin >= part->RightAtY(y)) {
00431         // It completely spans this column.
00432         if (margin_columns == 0)
00433           *first_spanned_col = col_index;
00434         ++margin_columns;
00435       }
00436       *last_col = col_index;
00437       break;
00438     } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
00439       // Neither left nor right are contained within, so it spans this
00440       // column.
00441       if (*first_col < 0) {
00442         // It started in between the previous column and the current column.
00443         *first_col = col_index - 1;
00444       }
00445       if (margin_columns == 0)
00446         *first_spanned_col = col_index;
00447       *last_col = col_index;
00448     } else if (right < part->LeftAtY(y)) {
00449       // We have gone past the end.
00450       *last_col = col_index - 1;
00451       if (*first_col < 0) {
00452         // It must lie completely between columns =>noise.
00453         *first_col = col_index - 1;
00454       }
00455       break;
00456     }
00457   }
00458   if (*first_col < 0)
00459     *first_col = col_index - 1;  // The last in-between.
00460   if (*last_col < 0)
00461     *last_col = col_index - 1;  // The last in-between.
00462   ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
00463   ASSERT_HOST(*first_col <= *last_col);
00464   if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
00465     // Neither end was in a column, and it didn't span any, so it lies
00466     // entirely between columns, therefore noise.
00467     return CST_NOISE;
00468   } else if (margin_columns <= 1) {
00469     // An exception for headings that stick outside of single-column text.
00470     if (margin_columns == 1 && parts_.singleton()) {
00471       return CST_HEADING;
00472     }
00473     // It is a pullout, as left and right were not in the same column, but
00474     // it doesn't go to the edge of its start and end.
00475     return CST_PULLOUT;
00476   }
00477   // Its margins went to the edges of first and last columns => heading.
00478   return CST_HEADING;
00479 }
00480 
00481 // The column_set has changed. Close down all in-progress WorkingPartSets in
00482 // columns that do not match and start new ones for the new columns in this.
00483 // As ColPartitions are turned into BLOCKs, the used ones are put in
00484 // used_parts, as they still need to be referenced in the grid.
00485 void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft,
00486                                         const ICOORD& tright,
00487                                         int resolution,
00488                                         ColPartition_LIST* used_parts,
00489                                         WorkingPartSet_LIST* working_set_list) {
00490   // Move the input list to a temporary location so we can delete its elements
00491   // as we add them to the output working_set.
00492   WorkingPartSet_LIST work_src;
00493   WorkingPartSet_IT src_it(&work_src);
00494   src_it.add_list_after(working_set_list);
00495   src_it.move_to_first();
00496   WorkingPartSet_IT dest_it(working_set_list);
00497   // Completed blocks and to_blocks are accumulated and given to the first new
00498   // one  whenever we keep a column, or at the end.
00499   BLOCK_LIST completed_blocks;
00500   TO_BLOCK_LIST to_blocks;
00501   WorkingPartSet* first_new_set = NULL;
00502   WorkingPartSet* working_set = NULL;
00503   ColPartition_IT col_it(&parts_);
00504   for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
00505     ColPartition* column = col_it.data();
00506     // Any existing column to the left of column is completed.
00507     while (!src_it.empty() &&
00508            ((working_set = src_it.data())->column() == NULL ||
00509             working_set->column()->right_key() <= column->left_key())) {
00510       src_it.extract();
00511       working_set->ExtractCompletedBlocks(bleft, tright, resolution,
00512                                           used_parts, &completed_blocks,
00513                                           &to_blocks);
00514       delete working_set;
00515       src_it.forward();
00516     }
00517     // Make a new between-column WorkingSet for before the current column.
00518     working_set = new WorkingPartSet(NULL);
00519     dest_it.add_after_then_move(working_set);
00520     if (first_new_set == NULL)
00521       first_new_set = working_set;
00522     // A matching column gets to stay, and first_new_set gets all the
00523     // completed_sets.
00524     working_set = src_it.empty() ? NULL : src_it.data();
00525     if (working_set != NULL &&
00526         working_set->column()->MatchingColumns(*column)) {
00527       working_set->set_column(column);
00528       dest_it.add_after_then_move(src_it.extract());
00529       src_it.forward();
00530       first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
00531       first_new_set = NULL;
00532     } else {
00533       // Just make a new working set for the current column.
00534       working_set = new WorkingPartSet(column);
00535       dest_it.add_after_then_move(working_set);
00536     }
00537   }
00538   // Complete any remaining src working sets.
00539   while (!src_it.empty()) {
00540     working_set = src_it.extract();
00541     working_set->ExtractCompletedBlocks(bleft, tright, resolution,
00542                                         used_parts, &completed_blocks,
00543                                         &to_blocks);
00544     delete working_set;
00545     src_it.forward();
00546   }
00547   // Make a new between-column WorkingSet for after the last column.
00548   working_set = new WorkingPartSet(NULL);
00549   dest_it.add_after_then_move(working_set);
00550   if (first_new_set == NULL)
00551     first_new_set = working_set;
00552   // The first_new_set now gets any accumulated completed_parts/blocks.
00553   first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
00554 }
00555 
00556 // Accumulate the widths and gaps into the given variables.
00557 void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width,
00558                                                     int* width_samples,
00559                                                     int* total_gap,
00560                                                     int* gap_samples) {
00561   ColPartition_IT it(&parts_);
00562   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00563     ColPartition* part = it.data();
00564     *total_width += part->ColumnWidth();
00565     ++*width_samples;
00566     if (!it.at_last()) {
00567       ColPartition* next_part = it.data_relative(1);
00568       int gap = part->KeyWidth(part->right_key(), next_part->left_key());
00569       *total_gap += gap;
00570       ++*gap_samples;
00571     }
00572   }
00573 }
00574 
00575 // Provide debug output for this ColPartitionSet and all the ColPartitions.
00576 void ColPartitionSet::Print() {
00577   ColPartition_IT it(&parts_);
00578   tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
00579           " (%d,%d)->(%d,%d)\n",
00580           it.length(), good_column_count_, good_coverage_, bad_coverage_,
00581           bounding_box_.left(), bounding_box_.bottom(),
00582           bounding_box_.right(), bounding_box_.top());
00583   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00584     ColPartition* part = it.data();
00585     part->Print();
00586   }
00587 }
00588 
00589 // PRIVATE CODE.
00590 
00591 // Add the given partition to the list in the appropriate place.
00592 void ColPartitionSet::AddPartition(ColPartition* new_part,
00593                                    ColPartition_IT* it) {
00594   AddPartitionCoverageAndBox(*new_part);
00595   int new_right = new_part->right_key();
00596   if (it->data()->left_key() >= new_right)
00597     it->add_before_stay_put(new_part);
00598   else
00599     it->add_after_stay_put(new_part);
00600 }
00601 
00602 // Compute the coverage and good column count. Coverage is the amount of the
00603 // width of the page (in pixels) that is covered by ColPartitions, which are
00604 // used to provide candidate column layouts.
00605 // Coverage is split into good and bad. Good coverage is provided by
00606 // ColPartitions of a frequent width (according to the callback function
00607 // provided by TabFinder::WidthCB, which accesses stored statistics on the
00608 // widths of ColParititions) and bad coverage is provided by all other
00609 // ColPartitions, even if they have tab vectors at both sides. Thus:
00610 // |-----------------------------------------------------------------|
00611 // |        Double     width    heading                              |
00612 // |-----------------------------------------------------------------|
00613 // |-------------------------------| |-------------------------------|
00614 // |   Common width ColParition    | |  Common width ColPartition    |
00615 // |-------------------------------| |-------------------------------|
00616 // the layout with two common-width columns has better coverage than the
00617 // double width heading, because the coverage is "good," even though less in
00618 // total coverage than the heading, because the heading coverage is "bad."
00619 void ColPartitionSet::ComputeCoverage() {
00620   // Count the number of good columns and sum their width.
00621   ColPartition_IT it(&parts_);
00622   good_column_count_ = 0;
00623   good_coverage_ = 0;
00624   bad_coverage_ = 0;
00625   bounding_box_ = TBOX();
00626   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00627     ColPartition* part = it.data();
00628     AddPartitionCoverageAndBox(*part);
00629   }
00630 }
00631 
00632 // Adds the coverage, column count and box for a single partition,
00633 // without adding it to the list. (Helper factored from ComputeCoverage.)
00634 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
00635   bounding_box_ += part.bounding_box();
00636   int coverage = part.ColumnWidth();
00637   if (part.good_width()) {
00638     good_coverage_ += coverage;
00639     good_column_count_ += 2;
00640   } else {
00641     if (part.blob_type() < BRT_UNKNOWN)
00642       coverage /= 2;
00643     if (part.good_column())
00644       ++good_column_count_;
00645     bad_coverage_ += coverage;
00646   }
00647 }
00648 
00649 }  // namespace tesseract.