Tesseract
3.02
|
00001 00002 // File: detlinefit.cpp 00003 // Description: Deterministic least median squares line fitting. 00004 // Author: Ray Smith 00005 // Created: Thu Feb 28 14:45:01 PDT 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "detlinefit.h" 00021 #include "statistc.h" 00022 #include "ndminx.h" 00023 00024 namespace tesseract { 00025 00026 // The number of points to consider at each end. 00027 const int kNumEndPoints = 3; 00028 00029 DetLineFit::DetLineFit() { 00030 } 00031 00032 DetLineFit::~DetLineFit() { 00033 } 00034 00035 // Delete all Added points. 00036 void DetLineFit::Clear() { 00037 pt_list_.clear(); 00038 } 00039 00040 // Add a new point. Takes a copy - the pt doesn't need to stay in scope. 00041 void DetLineFit::Add(const ICOORD& pt) { 00042 ICOORDELT_IT it = &pt_list_; 00043 ICOORDELT* new_pt = new ICOORDELT(pt); 00044 it.add_to_end(new_pt); 00045 } 00046 00047 // Fit a line to the points, returning the fitted line as a pair of 00048 // points, and the upper quartile error. 00049 double DetLineFit::Fit(ICOORD* pt1, ICOORD* pt2) { 00050 ICOORDELT_IT it(&pt_list_); 00051 // Do something sensible with no points. 00052 if (pt_list_.empty()) { 00053 pt1->set_x(0); 00054 pt1->set_y(0); 00055 *pt2 = *pt1; 00056 return 0.0; 00057 } 00058 // Count the points and find the first and last kNumEndPoints. 00059 ICOORD* starts[kNumEndPoints]; 00060 ICOORD* ends[kNumEndPoints]; 00061 int pt_count = 0; 00062 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00063 if (pt_count < kNumEndPoints) { 00064 starts[pt_count] = it.data(); 00065 ends[pt_count] = starts[pt_count]; 00066 } else { 00067 for (int i = 1; i < kNumEndPoints; ++i) 00068 ends[i - 1] = ends[i]; 00069 ends[kNumEndPoints - 1] = it.data(); 00070 } 00071 ++pt_count; 00072 } 00073 // 1 or 2 points need special treatment. 00074 if (pt_count <= 2) { 00075 *pt1 = *starts[0]; 00076 if (pt_count > 1) 00077 *pt2 = *starts[1]; 00078 else 00079 *pt2 = *pt1; 00080 return 0.0; 00081 } 00082 int end_count = MIN(pt_count, kNumEndPoints); 00083 int* distances = new int[pt_count]; 00084 double best_uq = -1.0; 00085 // Iterate each pair of points and find the best fitting line. 00086 for (int i = 0; i < end_count; ++i) { 00087 ICOORD* start = starts[i]; 00088 for (int j = 0; j < end_count; ++j) { 00089 ICOORD* end = ends[j]; 00090 if (start != end) { 00091 // Compute the upper quartile error from the line. 00092 double dist = ComputeErrors(*start, *end, distances); 00093 if (dist < best_uq || best_uq < 0.0) { 00094 best_uq = dist; 00095 *pt1 = *start; 00096 *pt2 = *end; 00097 } 00098 } 00099 } 00100 } 00101 delete [] distances; 00102 // Finally compute the square root to return the true distance. 00103 return best_uq > 0.0 ? sqrt(best_uq) : best_uq; 00104 } 00105 00106 // Backwards compatible fit returning a gradient and constant. 00107 // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this 00108 // function in preference to the LMS class. 00109 double DetLineFit::Fit(float* m, float* c) { 00110 ICOORD start, end; 00111 double error = Fit(&start, &end); 00112 if (end.x() != start.x()) { 00113 *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x()); 00114 *c = start.y() - *m * start.x(); 00115 } else { 00116 *m = 0.0f; 00117 *c = 0.0f; 00118 } 00119 return error; 00120 } 00121 00122 // Helper function to compute a fictitious end point that is on a line 00123 // of a given gradient through the given start. 00124 ICOORD ComputeEndFromGradient(const ICOORD& start, double m) { 00125 if (m > 1.0 || m < -1.0) { 00126 // dy dominates. Force it to have the opposite sign of start.y() and 00127 // compute dx based on dy being as large as possible 00128 int dx = static_cast<int>(floor(MAX_INT16 / m)); 00129 if (dx < 0) ++dx; // Truncate towards 0. 00130 if (start.y() > 0) dx = - dx; // Force dy to be opposite to start.y(). 00131 // Constrain dx so the result fits in an inT16. 00132 while (start.x() + dx > MAX_INT16 || start.x() + dx < -MAX_INT16) 00133 dx /= 2; 00134 if (-1 <= dx && dx <= 1) { 00135 return ICOORD(start.x(), start.y() + 1); // Too steep for anything else. 00136 } 00137 int y = start.y() + static_cast<int>(floor(dx * m + 0.5)); 00138 ASSERT_HOST(-MAX_INT16 <= y && y <= MAX_INT16); 00139 return ICOORD(start.x() + dx, y); 00140 } else { 00141 // dx dominates. Force it to have the opposite sign of start.x() and 00142 // compute dy based on dx being as large as possible. 00143 int dy = static_cast<int>(floor(MAX_INT16 * m)); 00144 if (dy < 0) ++dy; // Truncate towards 0. 00145 if (start.x() > 0) dy = - dy; // Force dx to be opposite to start.x(). 00146 // Constrain dy so the result fits in an inT16. 00147 while (start.y() + dy > MAX_INT16 || start.y() + dy < -MAX_INT16) 00148 dy /= 2; 00149 if (-1 <= dy && dy <= 1) { 00150 return ICOORD(start.x() + 1, start.y()); // Too flat for anything else. 00151 } 00152 int x = start.x() + static_cast<int>(floor(dy / m + 0.5)); 00153 ASSERT_HOST(-MAX_INT16 <= x && x <= MAX_INT16); 00154 return ICOORD(x, start.y() + dy); 00155 } 00156 } 00157 00158 // Backwards compatible constrained fit with a supplied gradient. 00159 double DetLineFit::ConstrainedFit(double m, float* c) { 00160 ICOORDELT_IT it(&pt_list_); 00161 // Do something sensible with no points. 00162 if (pt_list_.empty()) { 00163 *c = 0.0f; 00164 return 0.0; 00165 } 00166 // Count the points and find the first and last kNumEndPoints. 00167 // Put the ends in a single array to make their use easier later. 00168 ICOORD* pts[kNumEndPoints * 2]; 00169 int pt_count = 0; 00170 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00171 if (pt_count < kNumEndPoints) { 00172 pts[pt_count] = it.data(); 00173 pts[kNumEndPoints + pt_count] = pts[pt_count]; 00174 } else { 00175 for (int i = 1; i < kNumEndPoints; ++i) 00176 pts[kNumEndPoints + i - 1] = pts[kNumEndPoints + i]; 00177 pts[kNumEndPoints * 2 - 1] = it.data(); 00178 } 00179 ++pt_count; 00180 } 00181 while (pt_count < kNumEndPoints) { 00182 pts[pt_count] = NULL; 00183 pts[kNumEndPoints + pt_count++] = NULL; 00184 } 00185 int* distances = new int[pt_count]; 00186 double best_uq = -1.0; 00187 // Iterate each pair of points and find the best fitting line. 00188 for (int i = 0; i < kNumEndPoints * 2; ++i) { 00189 ICOORD* start = pts[i]; 00190 if (start == NULL) continue; 00191 ICOORD end = ComputeEndFromGradient(*start, m); 00192 // Compute the upper quartile error from the line. 00193 double dist = ComputeErrors(*start, end, distances); 00194 if (dist < best_uq || best_uq < 0.0) { 00195 best_uq = dist; 00196 *c = start->y() - start->x() * m; 00197 } 00198 } 00199 delete [] distances; 00200 // Finally compute the square root to return the true distance. 00201 return best_uq > 0.0 ? sqrt(best_uq) : best_uq; 00202 } 00203 00204 // Comparator function used by the nth_item funtion. 00205 static int CompareInts(const void *p1, const void *p2) { 00206 const int* i1 = reinterpret_cast<const int*>(p1); 00207 const int* i2 = reinterpret_cast<const int*>(p2); 00208 00209 return *i1 - *i2; 00210 } 00211 00212 // Compute all the cross product distances of the points from the line 00213 // and return the true squared upper quartile distance. 00214 double DetLineFit::ComputeErrors(const ICOORD start, const ICOORD end, 00215 int* distances) { 00216 ICOORDELT_IT it(&pt_list_); 00217 ICOORD line_vector = end; 00218 line_vector -= start; 00219 // Compute the distance of each point from the line. 00220 int pt_index = 0; 00221 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00222 ICOORD pt_vector = *it.data(); 00223 pt_vector -= start; 00224 // Compute |line_vector||pt_vector|sin(angle between) 00225 int dist = line_vector * pt_vector; 00226 if (dist < 0) 00227 dist = -dist; 00228 distances[pt_index++] = dist; 00229 } 00230 // Now get the upper quartile distance. 00231 int index = choose_nth_item(3 * pt_index / 4, distances, pt_index, 00232 sizeof(distances[0]), CompareInts); 00233 double dist = distances[index]; 00234 // The true distance is the square root of the dist squared / the 00235 // squared length of line_vector (which is the dot product with itself) 00236 // Don't bother with the square root. Just return the square distance. 00237 return dist * dist / (line_vector % line_vector); 00238 } 00239 00240 } // namespace tesseract. 00241 00242