Tesseract  3.02
tesseract-ocr/ccstruct/detlinefit.cpp
Go to the documentation of this file.
00001 
00002 // File:        detlinefit.cpp
00003 // Description: Deterministic least median squares line fitting.
00004 // Author:      Ray Smith
00005 // Created:     Thu Feb 28 14:45:01 PDT 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #include "detlinefit.h"
00021 #include "statistc.h"
00022 #include "ndminx.h"
00023 
00024 namespace tesseract {
00025 
00026 // The number of points to consider at each end.
00027 const int kNumEndPoints = 3;
00028 
00029 DetLineFit::DetLineFit() {
00030 }
00031 
00032 DetLineFit::~DetLineFit() {
00033 }
00034 
00035 // Delete all Added points.
00036 void DetLineFit::Clear() {
00037   pt_list_.clear();
00038 }
00039 
00040 // Add a new point. Takes a copy - the pt doesn't need to stay in scope.
00041 void DetLineFit::Add(const ICOORD& pt) {
00042   ICOORDELT_IT it = &pt_list_;
00043   ICOORDELT* new_pt = new ICOORDELT(pt);
00044   it.add_to_end(new_pt);
00045 }
00046 
00047 // Fit a line to the points, returning the fitted line as a pair of
00048 // points, and the upper quartile error.
00049 double DetLineFit::Fit(ICOORD* pt1, ICOORD* pt2) {
00050   ICOORDELT_IT it(&pt_list_);
00051   // Do something sensible with no points.
00052   if (pt_list_.empty()) {
00053     pt1->set_x(0);
00054     pt1->set_y(0);
00055     *pt2 = *pt1;
00056     return 0.0;
00057   }
00058   // Count the points and find the first and last kNumEndPoints.
00059   ICOORD* starts[kNumEndPoints];
00060   ICOORD* ends[kNumEndPoints];
00061   int pt_count = 0;
00062   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00063     if (pt_count < kNumEndPoints) {
00064       starts[pt_count] = it.data();
00065       ends[pt_count] = starts[pt_count];
00066     } else {
00067       for (int i = 1; i < kNumEndPoints; ++i)
00068         ends[i - 1] = ends[i];
00069       ends[kNumEndPoints - 1] = it.data();
00070     }
00071     ++pt_count;
00072   }
00073   // 1 or 2 points need special treatment.
00074   if (pt_count <= 2) {
00075     *pt1 = *starts[0];
00076     if (pt_count > 1)
00077       *pt2 = *starts[1];
00078     else
00079       *pt2 = *pt1;
00080     return 0.0;
00081   }
00082   int end_count = MIN(pt_count, kNumEndPoints);
00083   int* distances = new int[pt_count];
00084   double best_uq = -1.0;
00085   // Iterate each pair of points and find the best fitting line.
00086   for (int i = 0; i < end_count; ++i) {
00087     ICOORD* start = starts[i];
00088     for (int j = 0; j < end_count; ++j) {
00089       ICOORD* end = ends[j];
00090       if (start != end) {
00091         // Compute the upper quartile error from the line.
00092         double dist = ComputeErrors(*start, *end, distances);
00093         if (dist < best_uq || best_uq < 0.0) {
00094           best_uq = dist;
00095           *pt1 = *start;
00096           *pt2 = *end;
00097         }
00098       }
00099     }
00100   }
00101   delete [] distances;
00102   // Finally compute the square root to return the true distance.
00103   return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
00104 }
00105 
00106 // Backwards compatible fit returning a gradient and constant.
00107 // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
00108 // function in preference to the LMS class.
00109 double DetLineFit::Fit(float* m, float* c) {
00110   ICOORD start, end;
00111   double error = Fit(&start, &end);
00112   if (end.x() != start.x()) {
00113     *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
00114     *c = start.y() - *m * start.x();
00115   } else {
00116     *m = 0.0f;
00117     *c = 0.0f;
00118   }
00119   return error;
00120 }
00121 
00122 // Helper function to compute a fictitious end point that is on a line
00123 // of a given gradient through the given start.
00124 ICOORD ComputeEndFromGradient(const ICOORD& start, double m) {
00125   if (m > 1.0 || m < -1.0) {
00126     // dy dominates. Force it to have the opposite sign of start.y() and
00127     // compute dx based on dy being as large as possible
00128     int dx = static_cast<int>(floor(MAX_INT16 / m));
00129     if (dx < 0) ++dx;  // Truncate towards 0.
00130     if (start.y() > 0) dx = - dx;  // Force dy to be opposite to start.y().
00131     // Constrain dx so the result fits in an inT16.
00132     while (start.x() + dx > MAX_INT16 || start.x() + dx < -MAX_INT16)
00133       dx /= 2;
00134     if (-1 <= dx && dx <= 1) {
00135       return ICOORD(start.x(), start.y() + 1);  // Too steep for anything else.
00136     }
00137     int y = start.y() + static_cast<int>(floor(dx * m + 0.5));
00138     ASSERT_HOST(-MAX_INT16 <= y && y <= MAX_INT16);
00139     return ICOORD(start.x() + dx, y);
00140   } else {
00141     // dx dominates. Force it to have the opposite sign of start.x() and
00142     // compute dy based on dx being as large as possible.
00143     int dy = static_cast<int>(floor(MAX_INT16 * m));
00144     if (dy < 0) ++dy;  // Truncate towards 0.
00145     if (start.x() > 0) dy = - dy;  // Force dx to be opposite to start.x().
00146     // Constrain dy so the result fits in an inT16.
00147     while (start.y() + dy > MAX_INT16 || start.y() + dy < -MAX_INT16)
00148       dy /= 2;
00149     if (-1 <= dy && dy <= 1) {
00150       return ICOORD(start.x() + 1, start.y());  // Too flat for anything else.
00151     }
00152     int x = start.x() + static_cast<int>(floor(dy / m + 0.5));
00153     ASSERT_HOST(-MAX_INT16 <= x && x <= MAX_INT16);
00154     return ICOORD(x, start.y() + dy);
00155   }
00156 }
00157 
00158 // Backwards compatible constrained fit with a supplied gradient.
00159 double DetLineFit::ConstrainedFit(double m, float* c) {
00160   ICOORDELT_IT it(&pt_list_);
00161   // Do something sensible with no points.
00162   if (pt_list_.empty()) {
00163     *c = 0.0f;
00164     return 0.0;
00165   }
00166   // Count the points and find the first and last kNumEndPoints.
00167   // Put the ends in a single array to make their use easier later.
00168   ICOORD* pts[kNumEndPoints * 2];
00169   int pt_count = 0;
00170   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00171     if (pt_count < kNumEndPoints) {
00172       pts[pt_count] = it.data();
00173       pts[kNumEndPoints + pt_count] = pts[pt_count];
00174     } else {
00175       for (int i = 1; i < kNumEndPoints; ++i)
00176         pts[kNumEndPoints + i - 1] = pts[kNumEndPoints + i];
00177       pts[kNumEndPoints * 2 - 1] = it.data();
00178     }
00179     ++pt_count;
00180   }
00181   while (pt_count < kNumEndPoints) {
00182     pts[pt_count] = NULL;
00183     pts[kNumEndPoints + pt_count++] = NULL;
00184   }
00185   int* distances = new int[pt_count];
00186   double best_uq = -1.0;
00187   // Iterate each pair of points and find the best fitting line.
00188   for (int i = 0; i < kNumEndPoints * 2; ++i) {
00189     ICOORD* start = pts[i];
00190     if (start == NULL) continue;
00191     ICOORD end = ComputeEndFromGradient(*start, m);
00192     // Compute the upper quartile error from the line.
00193     double dist = ComputeErrors(*start, end, distances);
00194     if (dist < best_uq || best_uq < 0.0) {
00195       best_uq = dist;
00196       *c = start->y() - start->x() * m;
00197     }
00198   }
00199   delete [] distances;
00200   // Finally compute the square root to return the true distance.
00201   return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
00202 }
00203 
00204 // Comparator function used by the nth_item funtion.
00205 static int CompareInts(const void *p1, const void *p2) {
00206   const int* i1 = reinterpret_cast<const int*>(p1);
00207   const int* i2 = reinterpret_cast<const int*>(p2);
00208 
00209   return *i1 - *i2;
00210 }
00211 
00212 // Compute all the cross product distances of the points from the line
00213 // and return the true squared upper quartile distance.
00214 double DetLineFit::ComputeErrors(const ICOORD start, const ICOORD end,
00215                                  int* distances) {
00216   ICOORDELT_IT it(&pt_list_);
00217   ICOORD line_vector = end;
00218   line_vector -= start;
00219   // Compute the distance of each point from the line.
00220   int pt_index = 0;
00221   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00222     ICOORD pt_vector = *it.data();
00223     pt_vector -= start;
00224     // Compute |line_vector||pt_vector|sin(angle between)
00225     int dist = line_vector * pt_vector;
00226     if (dist < 0)
00227       dist = -dist;
00228     distances[pt_index++] = dist;
00229   }
00230   // Now get the upper quartile distance.
00231   int index = choose_nth_item(3 * pt_index / 4, distances, pt_index,
00232                               sizeof(distances[0]), CompareInts);
00233   double dist = distances[index];
00234   // The true distance is the square root of the dist squared / the
00235   // squared length of line_vector (which is the dot product with itself)
00236   // Don't bother with the square root. Just return the square distance.
00237   return dist * dist / (line_vector % line_vector);
00238 }
00239 
00240 }  // namespace tesseract.
00241 
00242