Tesseract
3.02
|
00001 00002 // File: colpartitionset.cpp 00003 // Description: Class to hold a list of ColPartitions of the page that 00004 // correspond roughly to columns. 00005 // Author: Ray Smith 00006 // Created: Thu Aug 14 10:54:01 PDT 2008 00007 // 00008 // (C) Copyright 2008, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "colpartitionset.h" 00022 #include "ndminx.h" 00023 #include "workingpartset.h" 00024 #include "tablefind.h" 00025 00026 namespace tesseract { 00027 00028 // Minimum width of a column to be interesting as a multiple of resolution. 00029 const double kMinColumnWidth = 2.0 / 3; 00030 00031 ELISTIZE(ColPartitionSet) 00032 00033 ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) { 00034 ColPartition_IT it(&parts_); 00035 it.add_list_after(partitions); 00036 ComputeCoverage(); 00037 } 00038 00039 ColPartitionSet::ColPartitionSet(ColPartition* part) { 00040 ColPartition_IT it(&parts_); 00041 it.add_after_then_move(part); 00042 ComputeCoverage(); 00043 } 00044 00045 ColPartitionSet::~ColPartitionSet() { 00046 } 00047 00048 // Return an element of the parts_ list from its index. 00049 ColPartition* ColPartitionSet::GetColumnByIndex(int index) { 00050 ColPartition_IT it(&parts_); 00051 it.mark_cycle_pt(); 00052 for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward()); 00053 if (it.cycled_list()) 00054 return NULL; 00055 return it.data(); 00056 } 00057 00058 // Return the ColPartition that contains the given coords, if any, else NULL. 00059 ColPartition* ColPartitionSet::ColumnContaining(int x, int y) { 00060 ColPartition_IT it(&parts_); 00061 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00062 ColPartition* part = it.data(); 00063 if (part->ColumnContains(x, y)) 00064 return part; 00065 } 00066 return NULL; 00067 } 00068 00069 // Extract all the parts from the list, relinquishing ownership. 00070 void ColPartitionSet::RelinquishParts() { 00071 ColPartition_IT it(&parts_); 00072 while (!it.empty()) { 00073 it.extract(); 00074 it.forward(); 00075 } 00076 } 00077 00078 // Attempt to improve this by adding partitions or expanding partitions. 00079 void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb, 00080 PartSetVector* src_sets) { 00081 int set_size = src_sets->size(); 00082 // Iterate over the provided column sets, as each one may have something 00083 // to improve this. 00084 for (int i = 0; i < set_size; ++i) { 00085 ColPartitionSet* column_set = src_sets->get(i); 00086 if (column_set == NULL) 00087 continue; 00088 // Iterate over the parts in this and column_set, adding bigger or 00089 // new parts in column_set to this. 00090 ColPartition_IT part_it(&parts_); 00091 ASSERT_HOST(!part_it.empty()); 00092 int prev_right = MIN_INT32; 00093 part_it.mark_cycle_pt(); 00094 ColPartition_IT col_it(&column_set->parts_); 00095 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { 00096 ColPartition* col_part = col_it.data(); 00097 if (col_part->blob_type() < BRT_UNKNOWN) 00098 continue; // Ignore image partitions. 00099 int col_left = col_part->left_key(); 00100 int col_right = col_part->right_key(); 00101 // Sync-up part_it (in this) so it matches the col_part in column_set. 00102 ColPartition* part = part_it.data(); 00103 while (!part_it.at_last() && part->right_key() < col_left) { 00104 prev_right = part->right_key(); 00105 part_it.forward(); 00106 part = part_it.data(); 00107 } 00108 int part_left = part->left_key(); 00109 int part_right = part->right_key(); 00110 if (part_right < col_left || col_right < part_left) { 00111 // There is no overlap so this is a new partition. 00112 AddPartition(col_part->ShallowCopy(), &part_it); 00113 continue; 00114 } 00115 // Check the edges of col_part to see if they can improve part. 00116 bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right)); 00117 if (col_left < part_left && col_left > prev_right) { 00118 // The left edge of the column is better and it doesn't overlap, 00119 // so we can potentially expand it. 00120 int col_box_left = col_part->BoxLeftKey(); 00121 bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right)); 00122 bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right)); 00123 if (tab_width_ok || (!part_width_ok )) { 00124 // The tab is leaving the good column metric at least as good as 00125 // it was before, so use the tab. 00126 part->CopyLeftTab(*col_part, false); 00127 part->SetColumnGoodness(cb); 00128 } else if (col_box_left < part_left && 00129 (box_width_ok || !part_width_ok)) { 00130 // The box is leaving the good column metric at least as good as 00131 // it was before, so use the box. 00132 part->CopyLeftTab(*col_part, true); 00133 part->SetColumnGoodness(cb); 00134 } 00135 part_left = part->left_key(); 00136 } 00137 if (col_right > part_right && 00138 (part_it.at_last() || 00139 part_it.data_relative(1)->left_key() > col_right)) { 00140 // The right edge is better, so we can possibly expand it. 00141 int col_box_right = col_part->BoxRightKey(); 00142 bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right)); 00143 bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right)); 00144 if (tab_width_ok || (!part_width_ok )) { 00145 // The tab is leaving the good column metric at least as good as 00146 // it was before, so use the tab. 00147 part->CopyRightTab(*col_part, false); 00148 part->SetColumnGoodness(cb); 00149 } else if (col_box_right > part_right && 00150 (box_width_ok || !part_width_ok)) { 00151 // The box is leaving the good column metric at least as good as 00152 // it was before, so use the box. 00153 part->CopyRightTab(*col_part, true); 00154 part->SetColumnGoodness(cb); 00155 } 00156 } 00157 } 00158 } 00159 ComputeCoverage(); 00160 } 00161 00162 // If this set is good enough to represent a new partitioning into columns, 00163 // add it to the vector of sets, otherwise delete it. 00164 void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets, 00165 WidthCallback* cb) { 00166 bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), 00167 bounding_box_.bottom()); 00168 if (debug) { 00169 tprintf("Considering new column candidate:\n"); 00170 Print(); 00171 } 00172 if (!LegalColumnCandidate()) { 00173 if (debug) { 00174 tprintf("Not a legal column candidate:\n"); 00175 Print(); 00176 } 00177 delete this; 00178 return; 00179 } 00180 for (int i = 0; i < column_sets->size(); ++i) { 00181 ColPartitionSet* columns = column_sets->get(i); 00182 // In ordering the column set candidates, good_coverage_ is king, 00183 // followed by good_column_count_ and then bad_coverage_. 00184 bool better = good_coverage_ > columns->good_coverage_; 00185 if (good_coverage_ == columns->good_coverage_) { 00186 better = good_column_count_ > columns->good_column_count_; 00187 if (good_column_count_ == columns->good_column_count_) { 00188 better = bad_coverage_ > columns->bad_coverage_; 00189 } 00190 } 00191 if (better) { 00192 // The new one is better so add it. 00193 if (debug) 00194 tprintf("Good one\n"); 00195 column_sets->insert(this, i); 00196 return; 00197 } 00198 if (columns->CompatibleColumns(false, this, cb)) { 00199 if (debug) 00200 tprintf("Duplicate\n"); 00201 delete this; 00202 return; // It is not unique. 00203 } 00204 } 00205 if (debug) 00206 tprintf("Added to end\n"); 00207 column_sets->push_back(this); 00208 } 00209 00210 // Return true if the partitions in other are all compatible with the columns 00211 // in this. 00212 bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, 00213 WidthCallback* cb) { 00214 if (debug) { 00215 tprintf("CompatibleColumns testing compatibility\n"); 00216 Print(); 00217 other->Print(); 00218 } 00219 if (other->parts_.empty()) { 00220 if (debug) 00221 tprintf("CompatibleColumns true due to empty other\n"); 00222 return true; 00223 } 00224 ColPartition_IT it(&other->parts_); 00225 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00226 ColPartition* part = it.data(); 00227 if (part->blob_type() < BRT_UNKNOWN) { 00228 if (debug) { 00229 tprintf("CompatibleColumns ignoring image partition\n"); 00230 part->Print(); 00231 } 00232 continue; // Image partitions are irrelevant to column compatibility. 00233 } 00234 int y = part->MidY(); 00235 int left = part->bounding_box().left(); 00236 int right = part->bounding_box().right(); 00237 ColPartition* left_col = ColumnContaining(left, y); 00238 ColPartition* right_col = ColumnContaining(right, y); 00239 if (right_col == NULL || left_col == NULL) { 00240 if (debug) { 00241 tprintf("CompatibleColumns false due to partition edge outside\n"); 00242 part->Print(); 00243 } 00244 return false; // A partition edge lies outside of all columns 00245 } 00246 if (right_col != left_col && cb->Run(right - left)) { 00247 if (debug) { 00248 tprintf("CompatibleColumns false due to good width in multiple cols\n"); 00249 part->Print(); 00250 } 00251 return false; // Partition with a good width must be in a single column. 00252 } 00253 00254 ColPartition_IT it2= it; 00255 while (!it2.at_last()) { 00256 it2.forward(); 00257 ColPartition* next_part = it2.data(); 00258 if (!BLOBNBOX::IsTextType(next_part->blob_type())) 00259 continue; // Non-text partitions are irrelevant. 00260 int next_left = next_part->bounding_box().left(); 00261 if (next_left == right) { 00262 break; // They share the same edge, so one must be a pull-out. 00263 } 00264 // Search to see if right and next_left fall within a single column. 00265 ColPartition* next_left_col = ColumnContaining(next_left, y); 00266 if (right_col == next_left_col) { 00267 // There is a column break in this column. 00268 // This can be due to a figure caption within a column, a pull-out 00269 // block, or a simple broken textline that remains to be merged: 00270 // all allowed, or a change in column layout: not allowed. 00271 // If both partitions are of good width, then it is likely 00272 // a change in column layout, otherwise probably an allowed situation. 00273 if (part->good_width() && next_part->good_width()) { 00274 if (debug) { 00275 int next_right = next_part->bounding_box().right(); 00276 tprintf("CompatibleColumns false due to 2 parts of good width\n"); 00277 tprintf("part1 %d-%d, part2 %d-%d\n", 00278 left, right, next_left, next_right); 00279 right_col->Print(); 00280 } 00281 return false; 00282 } 00283 } 00284 break; 00285 } 00286 } 00287 if (debug) 00288 tprintf("CompatibleColumns true!\n"); 00289 return true; 00290 } 00291 00292 // Returns the total width of all blobs in the part_set that do not lie 00293 // within an approved column. Used as a cost measure for using this 00294 // column set over another that might be compatible. 00295 int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) { 00296 int total_width = 0; 00297 ColPartition_IT it(&part_set->parts_); 00298 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00299 ColPartition* part = it.data(); 00300 if (!BLOBNBOX::IsTextType(part->blob_type())) { 00301 continue; // Non-text partitions are irrelevant to column compatibility. 00302 } 00303 int y = part->MidY(); 00304 BLOBNBOX_C_IT box_it(part->boxes()); 00305 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { 00306 const TBOX& box = it.data()->bounding_box(); 00307 // Assume that the whole blob is outside any column iff its x-middle 00308 // is outside. 00309 int x = (box.left() + box.right()) / 2; 00310 ColPartition* col = ColumnContaining(x, y); 00311 if (col == NULL) 00312 total_width += box.width(); 00313 } 00314 } 00315 return total_width; 00316 } 00317 00318 // Return true if this ColPartitionSet makes a legal column candidate by 00319 // having legal individual partitions and non-overlapping adjacent pairs. 00320 bool ColPartitionSet::LegalColumnCandidate() { 00321 ColPartition_IT it(&parts_); 00322 if (it.empty()) 00323 return false; 00324 bool any_text_parts = false; 00325 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00326 ColPartition* part = it.data(); 00327 if (BLOBNBOX::IsTextType(part->blob_type())) { 00328 if (!part->IsLegal()) 00329 return false; // Individual partition is illegal. 00330 any_text_parts = true; 00331 } 00332 if (!it.at_last()) { 00333 ColPartition* next_part = it.data_relative(1); 00334 if (next_part->left_key() < part->right_key()) { 00335 return false; 00336 } 00337 } 00338 } 00339 return any_text_parts; 00340 } 00341 00342 // Return a copy of this. If good_only will only copy the Good ColPartitions. 00343 ColPartitionSet* ColPartitionSet::Copy(bool good_only) { 00344 ColPartition_LIST copy_parts; 00345 ColPartition_IT src_it(&parts_); 00346 ColPartition_IT dest_it(©_parts); 00347 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { 00348 ColPartition* part = src_it.data(); 00349 if (BLOBNBOX::IsTextType(part->blob_type()) && 00350 (!good_only || part->good_width() || part->good_column())) 00351 dest_it.add_after_then_move(part->ShallowCopy()); 00352 } 00353 if (dest_it.empty()) 00354 return NULL; 00355 return new ColPartitionSet(©_parts); 00356 } 00357 00358 // Return the bounding boxes of columns at the given y-range 00359 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, 00360 ColSegment_LIST *segments) { 00361 ColPartition_IT it(&parts_); 00362 ColSegment_IT col_it(segments); 00363 col_it.move_to_last(); 00364 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00365 ColPartition* part = it.data(); 00366 ICOORD bot_left(part->LeftAtY(y_top), y_bottom); 00367 ICOORD top_right(part->RightAtY(y_bottom), y_top); 00368 ColSegment *col_seg = new ColSegment(); 00369 col_seg->InsertBox(TBOX(bot_left, top_right)); 00370 col_it.add_after_then_move(col_seg); 00371 } 00372 } 00373 00374 // Display the edges of the columns at the given y coords. 00375 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, 00376 ScrollView* win) { 00377 #ifndef GRAPHICS_DISABLED 00378 ColPartition_IT it(&parts_); 00379 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00380 ColPartition* part = it.data(); 00381 win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom); 00382 win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom); 00383 } 00384 #endif // GRAPHICS_DISABLED 00385 } 00386 00387 // Return the ColumnSpanningType that best explains the columns overlapped 00388 // by the given coords(left,right,y), with the given margins. 00389 // Also return the first and last column index touched by the coords and 00390 // the leftmost spanned column. 00391 // Column indices are 2n + 1 for real columns (0 based) and even values 00392 // represent the gaps in between columns, with 0 being left of the leftmost. 00393 // resolution refers to the ppi resolution of the image. 00394 ColumnSpanningType ColPartitionSet::SpanningType(int resolution, 00395 int left, int right, int y, 00396 int left_margin, 00397 int right_margin, 00398 int* first_col, 00399 int* last_col, 00400 int* first_spanned_col) { 00401 *first_col = -1; 00402 *last_col = -1; 00403 *first_spanned_col = -1; 00404 int margin_columns = 0; 00405 ColPartition_IT it(&parts_); 00406 int col_index = 1; 00407 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) { 00408 ColPartition* part = it.data(); 00409 if (part->ColumnContains(left, y)) { 00410 // In the default case, first_col is set, but columns_spanned remains 00411 // zero, so first_col will get reset in the first column genuinely 00412 // spanned, but we can tell the difference from a noise partition 00413 // that touches no column. 00414 *first_col = col_index; 00415 if (part->ColumnContains(right, y)) { 00416 // Both within a single column. 00417 *last_col = col_index; 00418 return CST_FLOWING; 00419 } 00420 if (left_margin <= part->LeftAtY(y)) { 00421 // It completely spans this column. 00422 *first_spanned_col = col_index; 00423 margin_columns = 1; 00424 } 00425 } else if (part->ColumnContains(right, y)) { 00426 if (*first_col < 0) { 00427 // It started in-between. 00428 *first_col = col_index - 1; 00429 } 00430 if (right_margin >= part->RightAtY(y)) { 00431 // It completely spans this column. 00432 if (margin_columns == 0) 00433 *first_spanned_col = col_index; 00434 ++margin_columns; 00435 } 00436 *last_col = col_index; 00437 break; 00438 } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) { 00439 // Neither left nor right are contained within, so it spans this 00440 // column. 00441 if (*first_col < 0) { 00442 // It started in between the previous column and the current column. 00443 *first_col = col_index - 1; 00444 } 00445 if (margin_columns == 0) 00446 *first_spanned_col = col_index; 00447 *last_col = col_index; 00448 } else if (right < part->LeftAtY(y)) { 00449 // We have gone past the end. 00450 *last_col = col_index - 1; 00451 if (*first_col < 0) { 00452 // It must lie completely between columns =>noise. 00453 *first_col = col_index - 1; 00454 } 00455 break; 00456 } 00457 } 00458 if (*first_col < 0) 00459 *first_col = col_index - 1; // The last in-between. 00460 if (*last_col < 0) 00461 *last_col = col_index - 1; // The last in-between. 00462 ASSERT_HOST(*first_col >= 0 && *last_col >= 0); 00463 ASSERT_HOST(*first_col <= *last_col); 00464 if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) { 00465 // Neither end was in a column, and it didn't span any, so it lies 00466 // entirely between columns, therefore noise. 00467 return CST_NOISE; 00468 } else if (margin_columns <= 1) { 00469 // An exception for headings that stick outside of single-column text. 00470 if (margin_columns == 1 && parts_.singleton()) { 00471 return CST_HEADING; 00472 } 00473 // It is a pullout, as left and right were not in the same column, but 00474 // it doesn't go to the edge of its start and end. 00475 return CST_PULLOUT; 00476 } 00477 // Its margins went to the edges of first and last columns => heading. 00478 return CST_HEADING; 00479 } 00480 00481 // The column_set has changed. Close down all in-progress WorkingPartSets in 00482 // columns that do not match and start new ones for the new columns in this. 00483 // As ColPartitions are turned into BLOCKs, the used ones are put in 00484 // used_parts, as they still need to be referenced in the grid. 00485 void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft, 00486 const ICOORD& tright, 00487 int resolution, 00488 ColPartition_LIST* used_parts, 00489 WorkingPartSet_LIST* working_set_list) { 00490 // Move the input list to a temporary location so we can delete its elements 00491 // as we add them to the output working_set. 00492 WorkingPartSet_LIST work_src; 00493 WorkingPartSet_IT src_it(&work_src); 00494 src_it.add_list_after(working_set_list); 00495 src_it.move_to_first(); 00496 WorkingPartSet_IT dest_it(working_set_list); 00497 // Completed blocks and to_blocks are accumulated and given to the first new 00498 // one whenever we keep a column, or at the end. 00499 BLOCK_LIST completed_blocks; 00500 TO_BLOCK_LIST to_blocks; 00501 WorkingPartSet* first_new_set = NULL; 00502 WorkingPartSet* working_set = NULL; 00503 ColPartition_IT col_it(&parts_); 00504 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { 00505 ColPartition* column = col_it.data(); 00506 // Any existing column to the left of column is completed. 00507 while (!src_it.empty() && 00508 ((working_set = src_it.data())->column() == NULL || 00509 working_set->column()->right_key() <= column->left_key())) { 00510 src_it.extract(); 00511 working_set->ExtractCompletedBlocks(bleft, tright, resolution, 00512 used_parts, &completed_blocks, 00513 &to_blocks); 00514 delete working_set; 00515 src_it.forward(); 00516 } 00517 // Make a new between-column WorkingSet for before the current column. 00518 working_set = new WorkingPartSet(NULL); 00519 dest_it.add_after_then_move(working_set); 00520 if (first_new_set == NULL) 00521 first_new_set = working_set; 00522 // A matching column gets to stay, and first_new_set gets all the 00523 // completed_sets. 00524 working_set = src_it.empty() ? NULL : src_it.data(); 00525 if (working_set != NULL && 00526 working_set->column()->MatchingColumns(*column)) { 00527 working_set->set_column(column); 00528 dest_it.add_after_then_move(src_it.extract()); 00529 src_it.forward(); 00530 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); 00531 first_new_set = NULL; 00532 } else { 00533 // Just make a new working set for the current column. 00534 working_set = new WorkingPartSet(column); 00535 dest_it.add_after_then_move(working_set); 00536 } 00537 } 00538 // Complete any remaining src working sets. 00539 while (!src_it.empty()) { 00540 working_set = src_it.extract(); 00541 working_set->ExtractCompletedBlocks(bleft, tright, resolution, 00542 used_parts, &completed_blocks, 00543 &to_blocks); 00544 delete working_set; 00545 src_it.forward(); 00546 } 00547 // Make a new between-column WorkingSet for after the last column. 00548 working_set = new WorkingPartSet(NULL); 00549 dest_it.add_after_then_move(working_set); 00550 if (first_new_set == NULL) 00551 first_new_set = working_set; 00552 // The first_new_set now gets any accumulated completed_parts/blocks. 00553 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); 00554 } 00555 00556 // Accumulate the widths and gaps into the given variables. 00557 void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width, 00558 int* width_samples, 00559 int* total_gap, 00560 int* gap_samples) { 00561 ColPartition_IT it(&parts_); 00562 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00563 ColPartition* part = it.data(); 00564 *total_width += part->ColumnWidth(); 00565 ++*width_samples; 00566 if (!it.at_last()) { 00567 ColPartition* next_part = it.data_relative(1); 00568 int gap = part->KeyWidth(part->right_key(), next_part->left_key()); 00569 *total_gap += gap; 00570 ++*gap_samples; 00571 } 00572 } 00573 } 00574 00575 // Provide debug output for this ColPartitionSet and all the ColPartitions. 00576 void ColPartitionSet::Print() { 00577 ColPartition_IT it(&parts_); 00578 tprintf("Partition set of %d parts, %d good, coverage=%d+%d" 00579 " (%d,%d)->(%d,%d)\n", 00580 it.length(), good_column_count_, good_coverage_, bad_coverage_, 00581 bounding_box_.left(), bounding_box_.bottom(), 00582 bounding_box_.right(), bounding_box_.top()); 00583 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00584 ColPartition* part = it.data(); 00585 part->Print(); 00586 } 00587 } 00588 00589 // PRIVATE CODE. 00590 00591 // Add the given partition to the list in the appropriate place. 00592 void ColPartitionSet::AddPartition(ColPartition* new_part, 00593 ColPartition_IT* it) { 00594 AddPartitionCoverageAndBox(*new_part); 00595 int new_right = new_part->right_key(); 00596 if (it->data()->left_key() >= new_right) 00597 it->add_before_stay_put(new_part); 00598 else 00599 it->add_after_stay_put(new_part); 00600 } 00601 00602 // Compute the coverage and good column count. Coverage is the amount of the 00603 // width of the page (in pixels) that is covered by ColPartitions, which are 00604 // used to provide candidate column layouts. 00605 // Coverage is split into good and bad. Good coverage is provided by 00606 // ColPartitions of a frequent width (according to the callback function 00607 // provided by TabFinder::WidthCB, which accesses stored statistics on the 00608 // widths of ColParititions) and bad coverage is provided by all other 00609 // ColPartitions, even if they have tab vectors at both sides. Thus: 00610 // |-----------------------------------------------------------------| 00611 // | Double width heading | 00612 // |-----------------------------------------------------------------| 00613 // |-------------------------------| |-------------------------------| 00614 // | Common width ColParition | | Common width ColPartition | 00615 // |-------------------------------| |-------------------------------| 00616 // the layout with two common-width columns has better coverage than the 00617 // double width heading, because the coverage is "good," even though less in 00618 // total coverage than the heading, because the heading coverage is "bad." 00619 void ColPartitionSet::ComputeCoverage() { 00620 // Count the number of good columns and sum their width. 00621 ColPartition_IT it(&parts_); 00622 good_column_count_ = 0; 00623 good_coverage_ = 0; 00624 bad_coverage_ = 0; 00625 bounding_box_ = TBOX(); 00626 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00627 ColPartition* part = it.data(); 00628 AddPartitionCoverageAndBox(*part); 00629 } 00630 } 00631 00632 // Adds the coverage, column count and box for a single partition, 00633 // without adding it to the list. (Helper factored from ComputeCoverage.) 00634 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) { 00635 bounding_box_ += part.bounding_box(); 00636 int coverage = part.ColumnWidth(); 00637 if (part.good_width()) { 00638 good_coverage_ += coverage; 00639 good_column_count_ += 2; 00640 } else { 00641 if (part.blob_type() < BRT_UNKNOWN) 00642 coverage /= 2; 00643 if (part.good_column()) 00644 ++good_column_count_; 00645 bad_coverage_ += coverage; 00646 } 00647 } 00648 00649 } // namespace tesseract.