Tesseract
3.02
|
00001 00002 // File: detlinefit.h 00003 // Description: Deterministic least upper-quartile squares line fitting. 00004 // Author: Ray Smith 00005 // Created: Thu Feb 28 14:35:01 PDT 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_ 00021 #define TESSERACT_CCSTRUCT_DETLINEFIT_H_ 00022 00023 #include "points.h" 00024 00025 namespace tesseract { 00026 00027 // This class fits a line to a set of ICOORD points. 00028 // There is no restriction on the direction of the line, as it 00029 // uses a vector method, ie no concern over infinite gradients. 00030 // The fitted line has the least upper quartile of squares of perpendicular 00031 // distances of all source points from the line, subject to the constraint 00032 // that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}] 00033 // i.e. the 9 combinations of one of the first 3 and last 3 points. 00034 // A fundamental assumption of this algorithm is that one of the first 3 and 00035 // one of the last 3 points are near the best line fit. 00036 // The points must be Added in line order for the algorithm to work properly. 00037 // No floating point calculations are needed* to make an accurate fit, 00038 // and no random numbers are needed** so the algorithm is deterministic, 00039 // architecture-stable, and compiler-stable as well as stable to minor 00040 // changes in the input. 00041 // *A single floating point division is used to compute each line's distance. 00042 // This is unlikely to result in choice of a different line, but if it does, 00043 // it would be easy to replace with a 64 bit integer calculation. 00044 // **Random numbers are used in the nth_item function, but the worst 00045 // non-determinism that can result is picking a different result among equals, 00046 // and that wouldn't make any difference to the end-result distance, so the 00047 // randomness does not affect the determinism of the algorithm. The random 00048 // numbers are only there to guarantee average linear time. 00049 // Fitting time is linear, but with a high constant, as it tries 9 different 00050 // lines and computes the distance of all points each time. 00051 // This class is aimed at replacing the LLSQ (linear least squares) and 00052 // LMS (least median of squares) classes that are currently used for most 00053 // of the line fitting in Tesseract. 00054 class DetLineFit { 00055 public: 00056 DetLineFit(); 00057 ~DetLineFit(); 00058 00059 // Delete all Added points. 00060 void Clear(); 00061 00062 // Add a new point. Takes a copy - the pt doesn't need to stay in scope. 00063 // Add must be called on points in sequence along the line. 00064 void Add(const ICOORD& pt); 00065 00066 // Fit a line to the points, returning the fitted line as a pair of 00067 // points, and the upper quartile error. 00068 double Fit(ICOORD* pt1, ICOORD* pt2); 00069 00070 // Backwards compatible fit returning a gradient and constant. 00071 // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this 00072 // function in preference to the LMS class. 00073 double Fit(float* m, float* c); 00074 00075 // Backwards compatible constrained fit with a supplied gradient. 00076 double ConstrainedFit(double m, float* c); 00077 00078 private: 00079 double ComputeErrors(const ICOORD start, const ICOORD end, int* distances); 00080 00081 ICOORDELT_LIST pt_list_; // All the added points. 00082 }; 00083 00084 } // namespace tesseract. 00085 00086 #endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_ 00087 00088