Tesseract  3.02
tesseract-ocr/textord/textlineprojection.cpp
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 
00014 #include "textlineprojection.h"
00015 #include "allheaders.h"
00016 #include "bbgrid.h"         // Base class.
00017 #include "blobbox.h"        // BlobNeighourDir.
00018 #include "blobs.h"
00019 #include "colpartition.h"
00020 #include "normalis.h"
00021 
00022 // Padding factor to use on definitely oriented blobs
00023 const int kOrientedPadFactor = 8;
00024 // Padding factor to use on not definitely oriented blobs.
00025 const int kDefaultPadFactor = 2;
00026 // Penalty factor for going away from the line center.
00027 const int kWrongWayPenalty = 4;
00028 // Ratio between parallel gap and perpendicular gap used to measure total
00029 // distance of a box from a target box in curved textline space.
00030 // parallel-gap is treated more favorably by this factor to allow catching
00031 // quotes and elipsis at the end of textlines.
00032 const int kParaPerpDistRatio = 4;
00033 // Multiple of scale_factor_ that the inter-line gap must be before we start
00034 // padding the increment box perpendicular to the text line.
00035 const int kMinLineSpacingFactor = 4;
00036 // Maximum tab-stop overrun for horizontal padding, in projection pixels.
00037 const int kMaxTabStopOverrun = 6;
00038 
00039 namespace tesseract {
00040 
00041 TextlineProjection::TextlineProjection(int resolution)
00042   : x_origin_(0), y_origin_(0), pix_(NULL) {
00043   // The projection map should be about 100 ppi, whatever the input.
00044   scale_factor_ = IntCastRounded(resolution / 100.0);
00045   if (scale_factor_ < 1) scale_factor_ = 1;
00046 }
00047 TextlineProjection::~TextlineProjection() {
00048   pixDestroy(&pix_);
00049 }
00050 
00051 // Build the projection profile given the input_block containing lists of
00052 // blobs, a rotation to convert to image coords,
00053 // and a full-resolution nontext_map, marking out areas to avoid.
00054 // During construction, we have the following assumptions:
00055 // The rotation is a multiple of 90 degrees, ie no deskew yet.
00056 // The blobs have had their left and right rules set to also limit
00057 // the range of projection.
00058 void TextlineProjection::ConstructProjection(TO_BLOCK* input_block,
00059                                              const FCOORD& rotation,
00060                                              Pix* nontext_map) {
00061   pixDestroy(&pix_);
00062   TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
00063   x_origin_ = 0;
00064   y_origin_ = image_box.height();
00065   int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
00066   int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
00067 
00068   pix_ = pixCreate(width, height, 8);
00069   ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
00070   ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
00071   Pix* final_pix = pixBlockconv(pix_, 1, 1);
00072 //  Pix* final_pix = pixBlockconv(pix_, 2, 2);
00073   pixDestroy(&pix_);
00074   pix_ = final_pix;
00075 }
00076 
00077 // Display the blobs in the window colored according to textline quality.
00078 void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
00079                                          ScrollView* win) {
00080   #ifndef GRAPHICS_DISABLED
00081   BLOBNBOX_IT it(blobs);
00082   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00083     BLOBNBOX* blob = it.data();
00084     const TBOX& box = blob->bounding_box();
00085     bool bad_box = BoxOutOfHTextline(box, NULL, false);
00086     if (blob->UniquelyVertical())
00087       win->Pen(ScrollView::YELLOW);
00088     else
00089       win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
00090     win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
00091   }
00092   win->Update();
00093   #endif  // GRAPHICS_DISABLED
00094 }
00095 
00096 // Moves blobs that look like they don't sit well on a textline from the
00097 // input blobs list to the output small_blobs list.
00098 // This gets them away from initial textline finding to stop diacritics
00099 // from forming incorrect textlines. (Introduced mainly to fix Thai.)
00100 void TextlineProjection::MoveNonTextlineBlobs(
00101     BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
00102   BLOBNBOX_IT it(blobs);
00103   BLOBNBOX_IT small_it(small_blobs);
00104   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00105     BLOBNBOX* blob = it.data();
00106     const TBOX& box = blob->bounding_box();
00107     bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
00108                                                box.bottom());
00109     if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) {
00110       blob->ClearNeighbours();
00111       small_it.add_to_end(it.extract());
00112     }
00113   }
00114 }
00115 
00116 // Create a window and display the projection in it.
00117 void TextlineProjection::DisplayProjection() const {
00118   int width = pixGetWidth(pix_);
00119   int height = pixGetHeight(pix_);
00120   Pix* pixc = pixCreate(width, height, 32);
00121   int src_wpl = pixGetWpl(pix_);
00122   int col_wpl = pixGetWpl(pixc);
00123   uinT32* src_data = pixGetData(pix_);
00124   uinT32* col_data = pixGetData(pixc);
00125   for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
00126     for (int x = 0; x < width; ++x) {
00127       int pixel = GET_DATA_BYTE(src_data, x);
00128       l_uint32 result;
00129       if (pixel <= 17)
00130         composeRGBPixel(0, 0, pixel * 15, &result);
00131       else if (pixel <= 145)
00132         composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
00133       else
00134         composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
00135       col_data[x] = result;
00136     }
00137   }
00138 #if 0
00139   // TODO(rays) uncomment when scrollview can display non-binary images.
00140   ScrollView* win = new ScrollView("Projection", 0, 0,
00141                                    width, height, width, height);
00142   win->Image(pixc, 0, 0);
00143   win->Update();
00144 #else
00145   pixWrite("projection.png", pixc, IFF_PNG);
00146 #endif
00147   pixDestroy(&pixc);
00148 }
00149 
00150 // Compute the distance of the box from the partition using curved projection
00151 // space. As DistanceOfBoxFromBox, except that the direction is taken from
00152 // the ColPartition and the median bounds of the ColPartition are used as
00153 // the to_box.
00154 int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box,
00155                                                    const ColPartition& part,
00156                                                    const DENORM* denorm,
00157                                                    bool debug) const {
00158   // Compute a partition box that uses the median top/bottom of the blobs
00159   // within and median left/right for vertical.
00160   TBOX part_box = part.bounding_box();
00161   if (part.IsHorizontalType()) {
00162     part_box.set_top(part.median_top());
00163     part_box.set_bottom(part.median_bottom());
00164   } else {
00165     part_box.set_left(part.median_left());
00166     part_box.set_right(part.median_right());
00167   }
00168   // Now use DistanceOfBoxFromBox to make the actual calculation.
00169   return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
00170                               denorm, debug);
00171 }
00172 
00173 // Compute the distance from the from_box to the to_box using curved
00174 // projection space. Separation that involves a decrease in projection
00175 // density (moving from the from_box to the to_box) is weighted more heavily
00176 // than constant density, and an increase is weighted less.
00177 // If horizontal_textline is true, then curved space is used vertically,
00178 // as for a diacritic on the edge of a textline.
00179 // The projection uses original image coords, so denorm is used to get
00180 // back to the image coords from box/part space.
00181 // How the calculation works: Think of a diacritic near a textline.
00182 // Distance is measured from the far side of the from_box to the near side of
00183 // the to_box. Shown is the horizontal textline case.
00184 //          |------^-----|
00185 //          | from | box |
00186 //          |------|-----|
00187 //   perpendicular |
00188 //          <------v-------->|--------------------|
00189 //                  parallel |     to box         |
00190 //                           |--------------------|
00191 // Perpendicular distance uses "curved space" See VerticalDistance below.
00192 // Parallel distance is linear.
00193 // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
00194 int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
00195                                              const TBOX& to_box,
00196                                              bool horizontal_textline,
00197                                              const DENORM* denorm,
00198                                              bool debug) const {
00199   // The parallel_gap is the horizontal gap between a horizontal textline and
00200   // the box. Analogous for vertical.
00201   int parallel_gap = 0;
00202   // start_pt is the box end of the line to be modified for curved space.
00203   TPOINT start_pt;
00204   // end_pt is the partition end of the line to be modified for curved space.
00205   TPOINT end_pt;
00206   if (horizontal_textline) {
00207     parallel_gap = from_box.x_gap(to_box) + from_box.width();
00208     start_pt.x = (from_box.left() + from_box.right()) / 2;
00209     end_pt.x = start_pt.x;
00210     if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
00211       start_pt.y = from_box.top();
00212       end_pt.y = MIN(to_box.top(), start_pt.y);
00213     } else {
00214       start_pt.y = from_box.bottom();
00215       end_pt.y = MAX(to_box.bottom(), start_pt.y);
00216     }
00217   } else {
00218     parallel_gap = from_box.y_gap(to_box) + from_box.height();
00219     if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
00220       start_pt.x = from_box.right();
00221       end_pt.x = MIN(to_box.right(), start_pt.x);
00222     } else {
00223       start_pt.x = from_box.left();
00224       end_pt.x = MAX(to_box.left(), start_pt.x);
00225     }
00226     start_pt.y = (from_box.bottom() + from_box.top()) / 2;
00227     end_pt.y = start_pt.y;
00228   }
00229   // The perpendicular gap is the max vertical distance gap out of:
00230   // top of from_box to to_box top and bottom of from_box to to_box bottom.
00231   // This value is then modified for curved projection space.
00232   // Analogous for vertical.
00233   int perpendicular_gap = 0;
00234   // If start_pt == end_pt, then the from_box lies entirely within the to_box
00235   // (in the perpendicular direction), so we don't need to calculate the
00236   // perpendicular_gap.
00237   if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
00238     if (denorm != NULL) {
00239       // Denormalize the start and end.
00240       denorm->DenormTransform(start_pt, &start_pt);
00241       denorm->DenormTransform(end_pt, &end_pt);
00242     }
00243     if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
00244       perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
00245                                            end_pt.y);
00246     } else {
00247       perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
00248                                              start_pt.y);
00249     }
00250   }
00251   // The parallel_gap weighs less than the perpendicular_gap.
00252   return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
00253 }
00254 
00255 // Compute the distance between (x, y1) and (x, y2) using the rule that
00256 // a decrease in textline density is weighted more heavily than an increase.
00257 // The coordinates are in source image space, ie processed by any denorm
00258 // already, but not yet scaled by scale_factor_.
00259 // Going from the outside of a textline to the inside should measure much
00260 // less distance than going from the inside of a textline to the outside.
00261 // How it works:
00262 // An increase is cheap (getting closer to a textline).
00263 // Constant costs unity.
00264 // A decrease is expensive (getting further from a textline).
00265 // Pixels in projection map Counted distance
00266 //              2
00267 //              3              1/x
00268 //              3               1
00269 //              2               x
00270 //              5              1/x
00271 //              7              1/x
00272 // Total: 1 + x + 3/x where x = kWrongWayPenalty.
00273 int TextlineProjection::VerticalDistance(bool debug, int x,
00274                                          int y1, int y2) const {
00275   x = ImageXToProjectionX(x);
00276   y1 = ImageYToProjectionY(y1);
00277   y2 = ImageYToProjectionY(y2);
00278   if (y1 == y2) return 0;
00279   int wpl = pixGetWpl(pix_);
00280   int step = y1 < y2 ? 1 : -1;
00281   uinT32* data = pixGetData(pix_) + y1 * wpl;
00282   wpl *= step;
00283   int prev_pixel = GET_DATA_BYTE(data, x);
00284   int distance = 0;
00285   int right_way_steps = 0;
00286   for (int y = y1; y != y2; y += step) {
00287     data += wpl;
00288     int pixel = GET_DATA_BYTE(data, x);
00289     if (debug)
00290       tprintf("At (%d,%d), pix = %d, prev=%d\n",
00291               x, y + step, pixel, prev_pixel);
00292     if (pixel < prev_pixel)
00293       distance += kWrongWayPenalty;
00294     else if (pixel > prev_pixel)
00295       ++right_way_steps;
00296     else
00297       ++distance;
00298     prev_pixel = pixel;
00299   }
00300   return distance * scale_factor_ +
00301       right_way_steps * scale_factor_ / kWrongWayPenalty;
00302 }
00303 
00304 // Compute the distance between (x1, y) and (x2, y) using the rule that
00305 // a decrease in textline density is weighted more heavily than an increase.
00306 int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
00307                                            int y) const {
00308   x1 = ImageXToProjectionX(x1);
00309   x2 = ImageXToProjectionX(x2);
00310   y = ImageYToProjectionY(y);
00311   if (x1 == x2) return 0;
00312   int wpl = pixGetWpl(pix_);
00313   int step = x1 < x2 ? 1 : -1;
00314   uinT32* data = pixGetData(pix_) + y * wpl;
00315   int prev_pixel = GET_DATA_BYTE(data, x1);
00316   int distance = 0;
00317   int right_way_steps = 0;
00318   for (int x = x1; x != x2; x += step) {
00319     int pixel = GET_DATA_BYTE(data, x + step);
00320     if (debug)
00321       tprintf("At (%d,%d), pix = %d, prev=%d\n",
00322               x + step, y, pixel, prev_pixel);
00323     if (pixel < prev_pixel)
00324       distance += kWrongWayPenalty;
00325     else if (pixel > prev_pixel)
00326       ++right_way_steps;
00327     else
00328       ++distance;
00329     prev_pixel = pixel;
00330   }
00331   return distance * scale_factor_ +
00332       right_way_steps * scale_factor_ / kWrongWayPenalty;
00333 }
00334 
00335 // Returns true if the blob appears to be outside of a textline.
00336 // Such blobs are potentially diacritics (even if large in Thai) and should
00337 // be kept away from initial textline finding.
00338 bool TextlineProjection::BoxOutOfHTextline(const TBOX& box,
00339                                           const DENORM* denorm,
00340                                           bool debug) const {
00341   int grad1 = 0;
00342   int grad2 = 0;
00343   EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, NULL, NULL);
00344   int worst_result = MIN(grad1, grad2);
00345   int total_result = grad1 + grad2;
00346   if (total_result >= 6) return false;  // Strongly in textline.
00347   // Medium strength: if either gradient is negative, it is likely outside
00348   // the body of the textline.
00349   if (worst_result < 0)
00350     return true;
00351   return false;
00352 }
00353 
00354 // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
00355 // but uses the median top/bottom for horizontal and median left/right for
00356 // vertical instead of the bounding box edges.
00357 // Evaluates for both horizontal and vertical and returns the best result,
00358 // with a positive value for horizontal and a negative value for vertical.
00359 int TextlineProjection::EvaluateColPartition(const ColPartition& part,
00360                                              const DENORM* denorm,
00361                                              bool debug) const {
00362   if (part.IsSingleton())
00363     return EvaluateBox(part.bounding_box(), denorm, debug);
00364   // Test vertical orientation.
00365   TBOX box = part.bounding_box();
00366   // Use the partition median for left/right.
00367   box.set_left(part.median_left());
00368   box.set_right(part.median_right());
00369   int vresult = EvaluateBox(box, denorm, debug);
00370 
00371   // Test horizontal orientation.
00372   box = part.bounding_box();
00373   // Use the partition median for top/bottom.
00374   box.set_top(part.median_top());
00375   box.set_bottom(part.median_bottom());
00376   int hresult = EvaluateBox(box, denorm, debug);
00377   if (debug) {
00378     tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
00379     part.bounding_box().print();
00380     part.Print();
00381   }
00382   return hresult >= -vresult ? hresult : vresult;
00383 }
00384 
00385 // Computes the mean projection gradients over the horizontal and vertical
00386 // edges of the box:
00387 //   -h-h-h-h-h-h
00388 //  |------------| mean=htop   -v|+v--------+v|-v
00389 //  |+h+h+h+h+h+h|             -v|+v        +v|-v
00390 //  |            |             -v|+v        +v|-v
00391 //  |    box     |             -v|+v  box   +v|-v
00392 //  |            |             -v|+v        +v|-v
00393 //  |+h+h+h+h+h+h|             -v|+v        +v|-v
00394 //  |------------| mean=hbot   -v|+v--------+v|-v
00395 //   -h-h-h-h-h-h
00396 //                           mean=vleft  mean=vright
00397 //
00398 // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
00399 // for a horizontal textline, a negative number for a vertical textline,
00400 // and near zero for undecided. Undecided is most likely non-text.
00401 // All the gradients are truncated to remain non-negative, since negative
00402 // horizontal gradients don't give any indication of being vertical and
00403 // vice versa.
00404 // Additional complexity: The coordinates have to be transformed to original
00405 // image coordinates with denorm (if not null), scaled to match the projection
00406 // pix, and THEN step out 2 pixels each way from the edge to compute the
00407 // gradient, and tries 3 positions, each measuring the gradient over a
00408 // 4-pixel spread: (+3/-1), (+2/-2), (+1/-3).  This complexity is handled by
00409 // several layers of helpers below.
00410 int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
00411                                     bool debug) const {
00412   return EvaluateBoxInternal(box, denorm, debug, NULL, NULL, NULL, NULL);
00413 }
00414 
00415 // Internal version of EvaluateBox returns the unclipped gradients as well
00416 // as the result of EvaluateBox.
00417 // hgrad1 and hgrad2 are the gradients for the horizontal textline.
00418 int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
00419                                             const DENORM* denorm, bool debug,
00420                                             int* hgrad1, int* hgrad2,
00421                                             int* vgrad1, int* vgrad2) const {
00422   int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
00423                                            box.top(), true);
00424   int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
00425                                                box.bottom(), false);
00426   int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
00427                                                box.top(), true);
00428   int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
00429                                                  box.bottom(), box.top(),
00430                                                  false);
00431   int top_clipped = MAX(top_gradient, 0);
00432   int bottom_clipped = MAX(bottom_gradient, 0);
00433   int left_clipped = MAX(left_gradient, 0);
00434   int right_clipped = MAX(right_gradient, 0);
00435   if (debug) {
00436     tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
00437             top_gradient, bottom_gradient, left_gradient, right_gradient);
00438     box.print();
00439   }
00440   int result = MAX(top_clipped, bottom_clipped) -
00441       MAX(left_clipped, right_clipped);
00442   if (hgrad1 != NULL && hgrad2 != NULL) {
00443     *hgrad1 = top_gradient;
00444     *hgrad2 = bottom_gradient;
00445   }
00446   if (vgrad1 != NULL && vgrad2 != NULL) {
00447     *vgrad1 = left_gradient;
00448     *vgrad2 = right_gradient;
00449   }
00450   return result;
00451 }
00452 
00453 // Helper returns the mean gradient value for the horizontal row at the given
00454 // y, (in the external coordinates) by subtracting the mean of the transformed
00455 // row 2 pixels above from the mean of the transformed row 2 pixels below.
00456 // This gives a positive value for a good top edge and negative for bottom.
00457 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
00458 int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
00459                                               inT16 min_x, inT16 max_x, inT16 y,
00460                                               bool best_is_max) const {
00461   TPOINT start_pt(min_x, y);
00462   TPOINT end_pt(max_x, y);
00463   int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
00464   int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
00465   int best_gradient = lower - upper;
00466   upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
00467   lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
00468   int gradient = lower - upper;
00469   if ((gradient > best_gradient) == best_is_max)
00470     best_gradient = gradient;
00471   upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
00472   lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
00473   gradient = lower - upper;
00474   if ((gradient > best_gradient) == best_is_max)
00475     best_gradient = gradient;
00476   return best_gradient;
00477 }
00478 
00479 // Helper returns the mean gradient value for the vertical column at the
00480 // given x, (in the external coordinates) by subtracting the mean of the
00481 // transformed column 2 pixels left from the mean of the transformed column
00482 // 2 pixels to the right.
00483 // This gives a positive value for a good left edge and negative for right.
00484 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
00485 int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, inT16 x,
00486                                                  inT16 min_y, inT16 max_y,
00487                                                  bool best_is_max) const {
00488   TPOINT start_pt(x, min_y);
00489   TPOINT end_pt(x, max_y);
00490   int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
00491   int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
00492   int best_gradient = right - left;
00493   left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
00494   right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
00495   int gradient = right - left;
00496   if ((gradient > best_gradient) == best_is_max)
00497     best_gradient = gradient;
00498   left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
00499   right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
00500   gradient = right - left;
00501   if ((gradient > best_gradient) == best_is_max)
00502     best_gradient = gradient;
00503   return best_gradient;
00504 }
00505 
00506 // Helper returns the mean pixel value over the line between the start_pt and
00507 // end_pt (inclusive), but shifted perpendicular to the line in the projection
00508 // image by offset pixels. For simplicity, it is assumed that the vector is
00509 // either nearly horizontal or nearly vertical. It works on skewed textlines!
00510 // The end points are in external coordinates, and will be denormalized with
00511 // the denorm if not NULL before further conversion to pix coordinates.
00512 // After all the conversions, the offset is added to the direction
00513 // perpendicular to the line direction. The offset is thus in projection image
00514 // coordinates, which allows the caller to get a guaranteed displacement
00515 // between pixels used to calculate gradients.
00516 int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
00517                                                 int offset,
00518                                                 TPOINT start_pt,
00519                                                 TPOINT end_pt) const {
00520   TransformToPixCoords(denorm, &start_pt);
00521   TransformToPixCoords(denorm, &end_pt);
00522   TruncateToImageBounds(&start_pt);
00523   TruncateToImageBounds(&end_pt);
00524   int wpl = pixGetWpl(pix_);
00525   uinT32* data = pixGetData(pix_);
00526   int total = 0;
00527   int count = 0;
00528   int x_delta = end_pt.x - start_pt.x;
00529   int y_delta = end_pt.y - start_pt.y;
00530   if (abs(x_delta) >= abs(y_delta)) {
00531     if (x_delta == 0)
00532       return 0;
00533     // Horizontal line. Add the offset vertically.
00534     int x_step = x_delta > 0 ? 1 : -1;
00535     // Correct offset for rotation, keeping it anti-clockwise of the delta.
00536     offset *= x_step;
00537     start_pt.y += offset;
00538     end_pt.y += offset;
00539     TruncateToImageBounds(&start_pt);
00540     TruncateToImageBounds(&end_pt);
00541     x_delta = end_pt.x - start_pt.x;
00542     y_delta = end_pt.y - start_pt.y;
00543     count = x_delta * x_step + 1;
00544     for (int x = start_pt.x; x != end_pt.x; x += x_step) {
00545       int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
00546       total += GET_DATA_BYTE(data + wpl * y, x);
00547     }
00548   } else {
00549     // Vertical line. Add the offset horizontally.
00550     int y_step = y_delta > 0 ? 1 : -1;
00551     // Correct offset for rotation, keeping it anti-clockwise of the delta.
00552     // Pix holds the image with y=0 at the top, so the offset is negated.
00553     offset *= -y_step;
00554     start_pt.x += offset;
00555     end_pt.x += offset;
00556     TruncateToImageBounds(&start_pt);
00557     TruncateToImageBounds(&end_pt);
00558     x_delta = end_pt.x - start_pt.x;
00559     y_delta = end_pt.y - start_pt.y;
00560     count = y_delta * y_step + 1;
00561     for (int y = start_pt.y; y != end_pt.y; y += y_step) {
00562       int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
00563       total += GET_DATA_BYTE(data + wpl * y, x);
00564     }
00565   }
00566   return DivRounded(total, count);
00567 }
00568 
00569 // Given an input pix, and a box, the sides of the box are shrunk inwards until
00570 // they bound any black pixels found within the original box.
00571 // The function converts between tesseract coords and the pix coords assuming
00572 // that this pix is full resolution equal in size to the original image.
00573 // Returns an empty box if there are no black pixels in the source box.
00574 static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
00575   int im_height = pixGetHeight(pix);
00576   Box* input_box = boxCreate(box.left(), im_height - box.top(),
00577                              box.width(), box.height());
00578   Box* output_box = NULL;
00579   pixClipBoxToForeground(pix, input_box, NULL, &output_box);
00580   TBOX result_box;
00581   if (output_box != NULL) {
00582     l_int32 x, y, width, height;
00583     boxGetGeometry(output_box, &x, &y, &width, &height);
00584     result_box.set_left(x);
00585     result_box.set_right(x + width);
00586     result_box.set_top(im_height - y);
00587     result_box.set_bottom(result_box.top() - height);
00588     boxDestroy(&output_box);
00589   }
00590   boxDestroy(&input_box);
00591   return result_box;
00592 }
00593 
00594 // Splits the given box in half at x_middle or y_middle according to split_on_x
00595 // and checks for nontext_map pixels in each half. Reduces the bbox so that it
00596 // still includes the middle point, but does not touch any fg pixels in
00597 // nontext_map. An empty box may be returned if there is no such box.
00598 static void TruncateBoxToMissNonText(int x_middle, int y_middle,
00599                                      bool split_on_x, Pix* nontext_map,
00600                                      TBOX* bbox) {
00601   TBOX box1(*bbox);
00602   TBOX box2(*bbox);
00603   TBOX im_box;
00604   if (split_on_x) {
00605     box1.set_right(x_middle);
00606     im_box = BoundsWithinBox(nontext_map, box1);
00607     if (!im_box.null_box()) box1.set_left(im_box.right());
00608     box2.set_left(x_middle);
00609     im_box = BoundsWithinBox(nontext_map, box2);
00610     if (!im_box.null_box()) box2.set_right(im_box.left());
00611   } else {
00612     box1.set_bottom(y_middle);
00613     im_box = BoundsWithinBox(nontext_map, box1);
00614     if (!im_box.null_box()) box1.set_top(im_box.bottom());
00615     box2.set_top(y_middle);
00616     im_box = BoundsWithinBox(nontext_map, box2);
00617     if (!im_box.null_box()) box2.set_bottom(im_box.top());
00618   }
00619   box1 += box2;
00620   *bbox = box1;
00621 }
00622 
00623 
00624 // Helper function to add 1 to a rectangle in source image coords to the
00625 // internal projection pix_.
00626 void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
00627   int scaled_left = ImageXToProjectionX(box.left());
00628   int scaled_top = ImageYToProjectionY(box.top());
00629   int scaled_right = ImageXToProjectionX(box.right());
00630   int scaled_bottom = ImageYToProjectionY(box.bottom());
00631   int wpl = pixGetWpl(pix_);
00632   uinT32* data = pixGetData(pix_) + scaled_top * wpl;
00633   for (int y = scaled_top; y <= scaled_bottom; ++y) {
00634     for (int x = scaled_left; x <= scaled_right; ++x) {
00635       int pixel = GET_DATA_BYTE(data, x);
00636       if (pixel < 255)
00637         SET_DATA_BYTE(data, x, pixel + 1);
00638     }
00639     data += wpl;
00640   }
00641 }
00642 
00643 // Inserts a list of blobs into the projection.
00644 // Rotation is a multiple of 90 degrees to get from blob coords to
00645 // nontext_map coords, nontext_map_box is the bounds of the nontext_map.
00646 // Blobs are spread horizontally or vertically according to their internal
00647 // flags, but the spreading is truncated by set pixels in the nontext_map
00648 // and also by the horizontal rule line limits on the blobs.
00649 void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
00650                                       const FCOORD& rotation,
00651                                       const TBOX& nontext_map_box,
00652                                       Pix* nontext_map) {
00653   BLOBNBOX_IT blob_it(blobs);
00654   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00655     BLOBNBOX* blob = blob_it.data();
00656     TBOX bbox = blob->bounding_box();
00657     ICOORD middle((bbox.left() + bbox.right()) / 2,
00658                   (bbox.bottom() + bbox.top()) / 2);
00659     bool spreading_horizontally = PadBlobBox(blob, &bbox);
00660     // Rotate to match the nontext_map.
00661     bbox.rotate(rotation);
00662     middle.rotate(rotation);
00663     if (rotation.x() == 0.0f)
00664       spreading_horizontally = !spreading_horizontally;
00665     // Clip to the image before applying the increments.
00666     bbox &= nontext_map_box;  // This is in-place box intersection.
00667     // Check for image pixels before spreading.
00668     TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
00669                              nontext_map, &bbox);
00670     if (bbox.area() > 0) {
00671       IncrementRectangle8Bit(bbox);
00672     }
00673   }
00674 }
00675 
00676 // Pads the bounding box of the given blob according to whether it is on
00677 // a horizontal or vertical text line, taking into account tab-stops near
00678 // the blob. Returns true if padding was in the horizontal direction.
00679 bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
00680   // Determine which direction to spread.
00681   // If text is well spaced out, it can be useful to pad perpendicular to
00682   // the textline direction, so as to ensure diacritics get absorbed
00683   // correctly, but if the text is tightly spaced, this will destroy the
00684   // blank space between textlines in the projection map, and that would
00685   // be very bad.
00686   int pad_limit = scale_factor_ * kMinLineSpacingFactor;
00687   int xpad = 0;
00688   int ypad = 0;
00689   bool padding_horizontally = false;
00690   if (blob->UniquelyHorizontal()) {
00691     xpad = bbox->height() * kOrientedPadFactor;
00692     padding_horizontally = true;
00693     // If the text appears to be very well spaced, pad the other direction by a
00694     // single pixel in the projection profile space to help join diacritics to
00695     // the textline.
00696     if ((blob->neighbour(BND_ABOVE) == NULL ||
00697         bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
00698         (blob->neighbour(BND_BELOW) == NULL ||
00699         bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
00700       ypad = scale_factor_;
00701     }
00702   } else if (blob->UniquelyVertical()) {
00703     ypad = bbox->width() * kOrientedPadFactor;
00704     if ((blob->neighbour(BND_LEFT) == NULL ||
00705         bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
00706         (blob->neighbour(BND_RIGHT) == NULL ||
00707         bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
00708       xpad = scale_factor_;
00709     }
00710   } else {
00711     if ((blob->neighbour(BND_ABOVE) != NULL &&
00712          blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
00713         (blob->neighbour(BND_BELOW) != NULL &&
00714             blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
00715       ypad = bbox->width() * kDefaultPadFactor;
00716     }
00717     if ((blob->neighbour(BND_RIGHT) != NULL &&
00718          blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
00719         (blob->neighbour(BND_LEFT) != NULL &&
00720             blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
00721       xpad = bbox->height() * kDefaultPadFactor;
00722       padding_horizontally = true;
00723     }
00724   }
00725   bbox->pad(xpad, ypad);
00726   pad_limit = scale_factor_ * kMaxTabStopOverrun;
00727   // Now shrink horizontally to avoid stepping more than pad_limit over a
00728   // tab-stop.
00729   if (bbox->left() < blob->left_rule() - pad_limit) {
00730     bbox->set_left(blob->left_rule() - pad_limit);
00731   }
00732   if (bbox->right() > blob->right_rule() + pad_limit) {
00733     bbox->set_right(blob->right_rule() + pad_limit);
00734   }
00735   return padding_horizontally;
00736 }
00737 
00738 // Helper denormalizes the TPOINT with the denorm if not NULL, then
00739 // converts to pix_ coordinates.
00740 void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
00741                                               TPOINT* pt) const {
00742   if (denorm != NULL) {
00743     // Denormalize the point.
00744     denorm->DenormTransform(*pt, pt);
00745   }
00746   pt->x = ImageXToProjectionX(pt->x);
00747   pt->y = ImageYToProjectionY(pt->y);
00748 }
00749 
00750 // Helper truncates the TPOINT to be within the pix_.
00751 void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
00752   pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
00753   pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
00754 }
00755 
00756 // Transform tesseract image coordinates to coordinates used in the projection.
00757 int TextlineProjection::ImageXToProjectionX(int x) const {
00758   x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
00759   return x;
00760 }
00761 int TextlineProjection::ImageYToProjectionY(int y) const {
00762   y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
00763   return y;
00764 }
00765 
00766 }  // namespace tesseract.