Tesseract  3.02
tesseract-ocr/ccstruct/detlinefit.h
Go to the documentation of this file.
00001 
00002 // File:        detlinefit.h
00003 // Description: Deterministic least upper-quartile squares line fitting.
00004 // Author:      Ray Smith
00005 // Created:     Thu Feb 28 14:35:01 PDT 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
00021 #define TESSERACT_CCSTRUCT_DETLINEFIT_H_
00022 
00023 #include "points.h"
00024 
00025 namespace tesseract {
00026 
00027 // This class fits a line to a set of ICOORD points.
00028 // There is no restriction on the direction of the line, as it
00029 // uses a vector method, ie no concern over infinite gradients.
00030 // The fitted line has the least upper quartile of squares of perpendicular
00031 // distances of all source points from the line, subject to the constraint
00032 // that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
00033 // i.e. the 9 combinations of one of the first 3 and last 3 points.
00034 // A fundamental assumption of this algorithm is that one of the first 3 and
00035 // one of the last 3 points are near the best line fit.
00036 // The points must be Added in line order for the algorithm to work properly.
00037 // No floating point calculations are needed* to make an accurate fit,
00038 // and no random numbers are needed** so the algorithm is deterministic,
00039 // architecture-stable, and compiler-stable as well as stable to minor
00040 // changes in the input.
00041 // *A single floating point division is used to compute each line's distance.
00042 // This is unlikely to result in choice of a different line, but if it does,
00043 // it would be easy to replace with a 64 bit integer calculation.
00044 // **Random numbers are used in the nth_item function, but the worst
00045 // non-determinism that can result is picking a different result among equals,
00046 // and that wouldn't make any difference to the end-result distance, so the
00047 // randomness does not affect the determinism of the algorithm. The random
00048 // numbers are only there to guarantee average linear time.
00049 // Fitting time is linear, but with a high constant, as it tries 9 different
00050 // lines and computes the distance of all points each time.
00051 // This class is aimed at replacing the LLSQ (linear least squares) and
00052 // LMS (least median of squares) classes that are currently used for most
00053 // of the line fitting in Tesseract.
00054 class DetLineFit {
00055  public:
00056   DetLineFit();
00057   ~DetLineFit();
00058 
00059   // Delete all Added points.
00060   void Clear();
00061 
00062   // Add a new point. Takes a copy - the pt doesn't need to stay in scope.
00063   // Add must be called on points in sequence along the line.
00064   void Add(const ICOORD& pt);
00065 
00066   // Fit a line to the points, returning the fitted line as a pair of
00067   // points, and the upper quartile error.
00068   double Fit(ICOORD* pt1, ICOORD* pt2);
00069 
00070   // Backwards compatible fit returning a gradient and constant.
00071   // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
00072   // function in preference to the LMS class.
00073   double Fit(float* m, float* c);
00074 
00075   // Backwards compatible constrained fit with a supplied gradient.
00076   double ConstrainedFit(double m, float* c);
00077 
00078  private:
00079   double ComputeErrors(const ICOORD start, const ICOORD end, int* distances);
00080 
00081   ICOORDELT_LIST pt_list_;  // All the added points.
00082 };
00083 
00084 }  // namespace tesseract.
00085 
00086 #endif  // TESSERACT_CCSTRUCT_DETLINEFIT_H_
00087 
00088