Tesseract
3.02
|
00001 00002 // File: alignedblob.cpp 00003 // Description: Subclass of BBGrid to find vertically aligned blobs. 00004 // Author: Ray Smith 00005 // Created: Fri Mar 21 15:03:01 PST 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "alignedblob.h" 00021 #include "ndminx.h" 00022 00023 // Include automatically generated configuration file if running autoconf. 00024 #ifdef HAVE_CONFIG_H 00025 #include "config_auto.h" 00026 #endif 00027 00028 INT_VAR(textord_debug_tabfind, 0, "Debug tab finding"); 00029 INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); 00030 INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle"); 00031 INT_VAR(textord_testregion_top, -1, "Top edge of debug reporting rectangle"); 00032 INT_VAR(textord_testregion_right, MAX_INT32, "Right edge of debug rectangle"); 00033 INT_VAR(textord_testregion_bottom, MAX_INT32, "Bottom edge of debug rectangle"); 00034 BOOL_VAR(textord_debug_images, false, "Use greyed image background for debug"); 00035 BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); 00036 00037 namespace tesseract { 00038 00039 // Fraction of resolution used as alignment tolerance for aligned tabs. 00040 const double kAlignedFraction = 0.03125; 00041 // Fraction of resolution used as alignment tolerance for ragged tabs. 00042 const double kRaggedFraction = 2.5; 00043 // Fraction of height used as a minimum gutter gap for aligned blobs. 00044 const double kAlignedGapFraction = 0.75; 00045 // Fraction of height used as a minimum gutter gap for ragged tabs. 00046 const double kRaggedGapFraction = 1.0; 00047 // Constant number of pixels used as alignment tolerance for line finding. 00048 const int kVLineAlignment = 3; 00049 // Constant number of pixels used as gutter gap tolerance for line finding. 00050 const int kVLineGutter = 1; 00051 // Constant number of pixels used as the search size for line finding. 00052 const int kVLineSearchSize = 150; 00053 // Min number of points to accept for a ragged tab stop. 00054 const int kMinRaggedTabs = 5; 00055 // Min number of points to accept for an aligned tab stop. 00056 const int kMinAlignedTabs = 4; 00057 // Constant number of pixels minimum height of a vertical line. 00058 const int kVLineMinLength = 500; 00059 // Minimum gradient for a vertical tab vector. Used to prune away junk 00060 // tab vectors with what would be a ridiculously large skew angle. 00061 // Value corresponds to tan(90 - max allowed skew angle) 00062 const double kMinTabGradient = 4.0; 00063 // Tolerance to skew on top of current estimate of skew. Divide x or y length 00064 // by kMaxSkewFactor to get the y or x skew distance. 00065 // If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. 00066 const int kMaxSkewFactor = 15; 00067 00068 // Constant part of textord_debug_pix_. 00069 const char* kTextordDebugPix = "psdebug_pix"; 00070 00071 // Name of image file to use if textord_debug_images is true. 00072 STRING AlignedBlob::textord_debug_pix_ = kTextordDebugPix; 00073 // Index to image file to use if textord_debug_images is true. 00074 int AlignedBlob::debug_pix_index_ = 0; 00075 00076 // Increment the serial number counter and set the string to use 00077 // for a filename if textord_debug_images is true. 00078 void AlignedBlob::IncrementDebugPix() { 00079 ++debug_pix_index_; 00080 textord_debug_pix_ = kTextordDebugPix; 00081 char numbuf[32]; 00082 snprintf(numbuf, sizeof(numbuf), "%d", debug_pix_index_); 00083 textord_debug_pix_ += numbuf; 00084 textord_debug_pix_ += ".pix"; 00085 } 00086 00087 // Constructor to set the parameters for finding aligned and ragged tabs. 00088 // Vertical_x and vertical_y are the current estimates of the true vertical 00089 // direction (up) in the image. Height is the height of the starter blob. 00090 // v_gap_multiple is the multiple of height that will be used as a limit 00091 // on vertical gap before giving up and calling the line ended. 00092 // resolution is the original image resolution, and align0 indicates the 00093 // type of tab stop to be found. 00094 AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, 00095 int height, int v_gap_multiple, 00096 int min_gutter_width, 00097 int resolution, TabAlignment align0) 00098 : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED), 00099 ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED), 00100 alignment(align0), 00101 confirmed_type(TT_CONFIRMED), 00102 min_length(0) { 00103 // Set the tolerances according to the type of line sought. 00104 // For tab search, these are based on the image resolution for most, or 00105 // the height of the starting blob for the maximum vertical gap. 00106 max_v_gap = height * v_gap_multiple; 00107 if (ragged) { 00108 // In the case of a ragged edge, we are much more generous with the 00109 // inside alignment fraction, but also require a much bigger gutter. 00110 gutter_fraction = kRaggedGapFraction; 00111 if (alignment == TA_RIGHT_RAGGED) { 00112 l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); 00113 r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00114 } else { 00115 l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00116 r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); 00117 } 00118 min_points = kMinRaggedTabs; 00119 } else { 00120 gutter_fraction = kAlignedGapFraction; 00121 l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00122 r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00123 min_points = kMinAlignedTabs; 00124 } 00125 min_gutter = static_cast<int>(height * gutter_fraction + 0.5); 00126 if (min_gutter < min_gutter_width) 00127 min_gutter = min_gutter_width; 00128 // Fit the vertical vector into an ICOORD, which is 16 bit. 00129 set_vertical(vertical_x, vertical_y); 00130 } 00131 00132 // Constructor to set the parameters for finding vertical lines. 00133 // Vertical_x and vertical_y are the current estimates of the true vertical 00134 // direction (up) in the image. Width is the width of the starter blob. 00135 AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, 00136 int width) 00137 : gutter_fraction(0.0), 00138 right_tab(false), 00139 ragged(false), 00140 alignment(TA_SEPARATOR), 00141 confirmed_type(TT_VLINE), 00142 max_v_gap(kVLineSearchSize), 00143 min_gutter(kVLineGutter), 00144 min_points(1), 00145 min_length(kVLineMinLength) { 00146 // Compute threshold for left and right alignment. 00147 l_align_tolerance = MAX(kVLineAlignment, width); 00148 r_align_tolerance = MAX(kVLineAlignment, width); 00149 00150 // Fit the vertical vector into an ICOORD, which is 16 bit. 00151 set_vertical(vertical_x, vertical_y); 00152 } 00153 00154 // Fit the vertical vector into an ICOORD, which is 16 bit. 00155 void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { 00156 int factor = 1; 00157 if (vertical_y > MAX_INT16) 00158 factor = vertical_y / MAX_INT16 + 1; 00159 vertical.set_x(vertical_x / factor); 00160 vertical.set_y(vertical_y / factor); 00161 } 00162 00163 00164 AlignedBlob::AlignedBlob(int gridsize, 00165 const ICOORD& bleft, const ICOORD& tright) 00166 : BlobGrid(gridsize, bleft, tright) { 00167 } 00168 00169 AlignedBlob::~AlignedBlob() { 00170 } 00171 00172 // Return true if the given coordinates are within the test rectangle 00173 // and the debug level is at least the given detail level. 00174 bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { 00175 if (textord_debug_tabfind < detail_level) 00176 return false; 00177 return x >= textord_testregion_left && x <= textord_testregion_right && 00178 y <= textord_testregion_top && y >= textord_testregion_bottom; 00179 } 00180 00181 // Display the tab codes of the BLOBNBOXes in this grid. 00182 ScrollView* AlignedBlob::DisplayTabs(const char* window_name, 00183 ScrollView* tab_win) { 00184 #ifndef GRAPHICS_DISABLED 00185 if (tab_win == NULL) 00186 tab_win = MakeWindow(0, 50, window_name); 00187 // For every tab in the grid, display it. 00188 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); 00189 gsearch.StartFullSearch(); 00190 BLOBNBOX* bbox; 00191 while ((bbox = gsearch.NextFullSearch()) != NULL) { 00192 TBOX box = bbox->bounding_box(); 00193 int left_x = box.left(); 00194 int right_x = box.right(); 00195 int top_y = box.top(); 00196 int bottom_y = box.bottom(); 00197 TabType tabtype = bbox->left_tab_type(); 00198 if (tabtype != TT_NONE) { 00199 if (tabtype == TT_MAYBE_ALIGNED) 00200 tab_win->Pen(ScrollView::BLUE); 00201 else if (tabtype == TT_MAYBE_RAGGED) 00202 tab_win->Pen(ScrollView::YELLOW); 00203 else if (tabtype == TT_CONFIRMED) 00204 tab_win->Pen(ScrollView::GREEN); 00205 else 00206 tab_win->Pen(ScrollView::GREY); 00207 tab_win->Line(left_x, top_y, left_x, bottom_y); 00208 } 00209 tabtype = bbox->right_tab_type(); 00210 if (tabtype != TT_NONE) { 00211 if (tabtype == TT_MAYBE_ALIGNED) 00212 tab_win->Pen(ScrollView::MAGENTA); 00213 else if (tabtype == TT_MAYBE_RAGGED) 00214 tab_win->Pen(ScrollView::ORANGE); 00215 else if (tabtype == TT_CONFIRMED) 00216 tab_win->Pen(ScrollView::RED); 00217 else 00218 tab_win->Pen(ScrollView::GREY); 00219 tab_win->Line(right_x, top_y, right_x, bottom_y); 00220 } 00221 } 00222 tab_win->Update(); 00223 #endif 00224 return tab_win; 00225 } 00226 00227 // Helper returns true if the total number of line_crossings of all the blobs 00228 // in the list is at least 2. 00229 static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) { 00230 BLOBNBOX_C_IT it(blobs); 00231 int total_crossings = 0; 00232 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00233 total_crossings += it.data()->line_crossings(); 00234 } 00235 return total_crossings >= 2; 00236 } 00237 00238 // Finds a vector corresponding to a set of vertically aligned blob edges 00239 // running through the given box. The type of vector returned and the 00240 // search parameters are determined by the AlignedBlobParams. 00241 // vertical_x and y are updated with an estimate of the real 00242 // vertical direction. (skew finding.) 00243 // Returns NULL if no decent vector can be found. 00244 TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, 00245 BLOBNBOX* bbox, 00246 int* vertical_x, 00247 int* vertical_y) { 00248 int ext_start_y, ext_end_y; 00249 BLOBNBOX_CLIST good_points; 00250 // Search up and then down from the starting bbox. 00251 TBOX box = bbox->bounding_box(); 00252 bool debug = WithinTestRegion(2, box.left(), box.bottom()); 00253 int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); 00254 pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); 00255 BLOBNBOX_C_IT it(&good_points); 00256 it.move_to_last(); 00257 box = it.data()->bounding_box(); 00258 int end_y = box.top(); 00259 int end_x = align_params.right_tab ? box.right() : box.left(); 00260 it.move_to_first(); 00261 box = it.data()->bounding_box(); 00262 int start_x = align_params.right_tab ? box.right() : box.left(); 00263 int start_y = box.bottom(); 00264 // Acceptable tab vectors must have a mininum number of points, 00265 // have a minimum acceptable length, and have a minimum gradient. 00266 // The gradient corresponds to the skew angle. 00267 // Ragged tabs don't need to satisfy the gradient condition, as they 00268 // will always end up parallel to the vertical direction. 00269 bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); 00270 if ((pt_count >= align_params.min_points && 00271 end_y - start_y >= align_params.min_length && 00272 (align_params.ragged || 00273 end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || 00274 at_least_2_crossings) { 00275 int confirmed_points = 0; 00276 // Count existing confirmed points to see if vector is acceptable. 00277 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00278 bbox = it.data(); 00279 if (align_params.right_tab) { 00280 if (bbox->right_tab_type() == align_params.confirmed_type) 00281 ++confirmed_points; 00282 } else { 00283 if (bbox->left_tab_type() == align_params.confirmed_type) 00284 ++confirmed_points; 00285 } 00286 } 00287 // Ragged vectors are not allowed to use too many already used points. 00288 if (!align_params.ragged || 00289 confirmed_points + confirmed_points < pt_count) { 00290 const TBOX& box = bbox->bounding_box(); 00291 if (debug) { 00292 tprintf("Confirming tab vector of %d pts starting at %d,%d\n", 00293 pt_count, box.left(), box.bottom()); 00294 } 00295 // Flag all the aligned neighbours as confirmed . 00296 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00297 bbox = it.data(); 00298 if (align_params.right_tab) { 00299 bbox->set_right_tab_type(align_params.confirmed_type); 00300 } else { 00301 bbox->set_left_tab_type(align_params.confirmed_type); 00302 } 00303 if (debug) { 00304 bbox->bounding_box().print(); 00305 } 00306 } 00307 // Now make the vector and return it. 00308 TabVector* result = TabVector::FitVector(align_params.alignment, 00309 align_params.vertical, 00310 ext_start_y, ext_end_y, 00311 &good_points, 00312 vertical_x, vertical_y); 00313 result->set_intersects_other_lines(at_least_2_crossings); 00314 if (debug) { 00315 tprintf("Box was %d, %d\n", box.left(), box.bottom()); 00316 result->Print("After fitting"); 00317 } 00318 return result; 00319 } else if (debug) { 00320 tprintf("Ragged tab used too many used points: %d out of %d\n", 00321 confirmed_points, pt_count); 00322 } 00323 } else if (debug) { 00324 tprintf("Tab vector failed basic tests: pt count %d vs min %d, " 00325 "length %d vs min %d, min grad %g\n", 00326 pt_count, align_params.min_points, end_y - start_y, 00327 align_params.min_length, abs(end_x - start_x) * kMinTabGradient); 00328 } 00329 return NULL; 00330 } 00331 00332 // Find a set of blobs that are aligned in the given vertical 00333 // direction with the given blob. Returns a list of aligned 00334 // blobs and the number in the list. 00335 // For other parameters see FindAlignedBlob below. 00336 int AlignedBlob::AlignTabs(const AlignedBlobParams& params, 00337 bool top_to_bottom, BLOBNBOX* bbox, 00338 BLOBNBOX_CLIST* good_points, int* end_y) { 00339 int ptcount = 0; 00340 BLOBNBOX_C_IT it(good_points); 00341 00342 TBOX box = bbox->bounding_box(); 00343 bool debug = WithinTestRegion(2, box.left(), box.bottom()); 00344 if (debug) { 00345 tprintf("Starting alignment run at blob:"); 00346 box.print(); 00347 } 00348 int x_start = params.right_tab ? box.right() : box.left(); 00349 while (bbox != NULL) { 00350 // Add the blob to the list if the appropriate side is a tab candidate, 00351 // or if we are working on a ragged tab. 00352 TabType type = params.right_tab ? bbox->right_tab_type() 00353 : bbox->left_tab_type(); 00354 if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && 00355 (it.empty() || it.data() != bbox)) { 00356 if (top_to_bottom) 00357 it.add_before_then_move(bbox); 00358 else 00359 it.add_after_then_move(bbox); 00360 ++ptcount; 00361 } 00362 // Find the next blob that is aligned with the current one. 00363 // FindAlignedBlob guarantees that forward progress will be made in the 00364 // top_to_bottom direction, and therefore eventually it will return NULL, 00365 // making this while (bbox != NULL) loop safe. 00366 bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); 00367 if (bbox != NULL) { 00368 box = bbox->bounding_box(); 00369 if (!params.ragged) 00370 x_start = params.right_tab ? box.right() : box.left(); 00371 } 00372 } 00373 if (debug) { 00374 tprintf("Alignment run ended with %d pts at blob:", ptcount); 00375 box.print(); 00376 } 00377 return ptcount; 00378 } 00379 00380 // Search vertically for a blob that is aligned with the input bbox. 00381 // The search parameters are determined by AlignedBlobParams. 00382 // top_to_bottom tells whether to search down or up. 00383 // The return value is NULL if nothing was found in the search box 00384 // or if a blob was found in the gutter. On a NULL return, end_y 00385 // is set to the edge of the search box or the leading edge of the 00386 // gutter blob if one was found. 00387 BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, 00388 bool top_to_bottom, BLOBNBOX* bbox, 00389 int x_start, int* end_y) { 00390 TBOX box = bbox->bounding_box(); 00391 // If there are separator lines, get the column edges. 00392 int left_column_edge = bbox->left_rule(); 00393 int right_column_edge = bbox->right_rule(); 00394 // start_y is used to guarantee that forward progress is made and the 00395 // search does not go into an infinite loop. New blobs must extend the 00396 // line beyond start_y. 00397 int start_y = top_to_bottom ? box.bottom() : box.top(); 00398 if (WithinTestRegion(2, x_start, start_y)) { 00399 tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", 00400 box.left(), box.top(), box.right(), box.bottom(), 00401 left_column_edge, right_column_edge); 00402 } 00403 // Compute skew tolerance. 00404 int skew_tolerance = p.max_v_gap / kMaxSkewFactor; 00405 // Calculate xmin and xmax of the search box so that it contains 00406 // all possibly relevant boxes upto p.max_v_gap above or below accoording 00407 // to top_to_bottom. 00408 // Start with a notion of vertical with the current estimate. 00409 int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y(); 00410 if (top_to_bottom) { 00411 x2 = x_start - x2; 00412 *end_y = start_y - p.max_v_gap; 00413 } else { 00414 x2 = x_start + x2; 00415 *end_y = start_y + p.max_v_gap; 00416 } 00417 // Expand the box by an additional skew tolerance 00418 int xmin = MIN(x_start, x2) - skew_tolerance; 00419 int xmax = MAX(x_start, x2) + skew_tolerance; 00420 // Now add direction-specific tolerances. 00421 if (p.right_tab) { 00422 xmax += p.min_gutter; 00423 xmin -= p.l_align_tolerance; 00424 } else { 00425 xmax += p.r_align_tolerance; 00426 xmin -= p.min_gutter; 00427 } 00428 // Setup a vertical search for an aligned blob. 00429 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this); 00430 if (WithinTestRegion(2, x_start, start_y)) 00431 tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", 00432 p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", 00433 xmin, xmax, start_y, p.max_v_gap, p.min_gutter); 00434 vsearch.StartVerticalSearch(xmin, xmax, start_y); 00435 // result stores the best real return value. 00436 BLOBNBOX* result = NULL; 00437 // The backup_result is not a tab candidate and can be used if no 00438 // real tab candidate result is found. 00439 BLOBNBOX* backup_result = NULL; 00440 // neighbour is the blob that is currently being investigated. 00441 BLOBNBOX* neighbour = NULL; 00442 while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != NULL) { 00443 if (neighbour == bbox) 00444 continue; 00445 TBOX nbox = neighbour->bounding_box(); 00446 int n_y = (nbox.top() + nbox.bottom()) / 2; 00447 if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || 00448 (top_to_bottom && n_y < start_y - p.max_v_gap)) { 00449 if (WithinTestRegion(2, x_start, start_y)) 00450 tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", 00451 nbox.left(), nbox.bottom(), nbox.right(), nbox.top()); 00452 break; // Gone far enough. 00453 } 00454 // It is CRITICAL to ensure that forward progress is made, (strictly 00455 // in/decreasing n_y) or the caller could loop infinitely, while 00456 // waiting for a sequence of blobs in a line to end. 00457 // NextVerticalSearch alone does not guarantee this, as there may be 00458 // more than one blob in a grid cell. See comment in AlignTabs. 00459 if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box)) 00460 continue; // Only look in the required direction. 00461 if (result != NULL && result->bounding_box().y_gap(nbox) > gridsize()) 00462 return result; // This result is clear. 00463 if (backup_result != NULL && p.ragged && result == NULL && 00464 backup_result->bounding_box().y_gap(nbox) > gridsize()) 00465 return backup_result; // This result is clear. 00466 00467 // If the neighbouring blob is the wrong side of a separator line, then it 00468 // "doesn't exist" as far as we are concerned. 00469 int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); 00470 if (x_at_n_y < neighbour->left_crossing_rule() || 00471 x_at_n_y > neighbour->right_crossing_rule()) 00472 continue; // Separator line in the way. 00473 int n_left = nbox.left(); 00474 int n_right = nbox.right(); 00475 int n_x = p.right_tab ? n_right : n_left; 00476 if (WithinTestRegion(2, x_start, start_y)) 00477 tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", 00478 nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), 00479 n_x, n_y, x_at_n_y); 00480 if (p.right_tab && 00481 n_left < x_at_n_y + p.min_gutter && 00482 n_right > x_at_n_y + p.r_align_tolerance && 00483 (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { 00484 // In the gutter so end of line. 00485 if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) 00486 bbox->set_right_tab_type(TT_DELETED); 00487 *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); 00488 if (WithinTestRegion(2, x_start, start_y)) 00489 tprintf("gutter\n"); 00490 return NULL; 00491 } 00492 if (!p.right_tab && 00493 n_left < x_at_n_y - p.l_align_tolerance && 00494 n_right > x_at_n_y - p.min_gutter && 00495 (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { 00496 // In the gutter so end of line. 00497 if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) 00498 bbox->set_left_tab_type(TT_DELETED); 00499 *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); 00500 if (WithinTestRegion(2, x_start, start_y)) 00501 tprintf("gutter\n"); 00502 return NULL; 00503 } 00504 if ((p.right_tab && neighbour->leader_on_right()) || 00505 (!p.right_tab && neighbour->leader_on_left())) 00506 continue; // Neigbours of leaders are not allowed to be used. 00507 if (n_x <= x_at_n_y + p.r_align_tolerance && 00508 n_x >= x_at_n_y - p.l_align_tolerance) { 00509 // Aligned so keep it. If it is a marked tab save it as result, 00510 // otherwise keep it as backup_result to return in case of later failure. 00511 if (WithinTestRegion(2, x_start, start_y)) 00512 tprintf("aligned, seeking%d, l=%d, r=%d\n", 00513 p.right_tab, neighbour->left_tab_type(), 00514 neighbour->right_tab_type()); 00515 TabType n_type = p.right_tab ? neighbour->right_tab_type() 00516 : neighbour->left_tab_type(); 00517 if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { 00518 if (result == NULL) { 00519 result = neighbour; 00520 } else { 00521 // Keep the closest neighbour by Euclidean distance. 00522 // This prevents it from picking a tab blob in another column. 00523 const TBOX& old_box = result->bounding_box(); 00524 int x_diff = p.right_tab ? old_box.right() : old_box.left(); 00525 x_diff -= x_at_n_y; 00526 int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y; 00527 int old_dist = x_diff * x_diff + y_diff * y_diff; 00528 x_diff = n_x - x_at_n_y; 00529 y_diff = n_y - start_y; 00530 int new_dist = x_diff * x_diff + y_diff * y_diff; 00531 if (new_dist < old_dist) 00532 result = neighbour; 00533 } 00534 } else if (backup_result == NULL) { 00535 if (WithinTestRegion(2, x_start, start_y)) 00536 tprintf("Backup\n"); 00537 backup_result = neighbour; 00538 } else { 00539 TBOX backup_box = backup_result->bounding_box(); 00540 if ((p.right_tab && backup_box.right() < nbox.right()) || 00541 (!p.right_tab && backup_box.left() > nbox.left())) { 00542 if (WithinTestRegion(2, x_start, start_y)) 00543 tprintf("Better backup\n"); 00544 backup_result = neighbour; 00545 } 00546 } 00547 } 00548 } 00549 return result != NULL ? result : backup_result; 00550 } 00551 00552 } // namespace tesseract. 00553