Tesseract  3.02
tesseract-ocr/ccstruct/dppoint.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        dppoint.h
00003  * Description: Simple generic dynamic programming class.
00004  * Author:      Ray Smith
00005  * Created:     Wed Mar 25 18:57:01 PDT 2009
00006  *
00007  * (C) Copyright 2009, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef TESSERACT_CCSTRUCT_DPPOINT_H__
00021 #define TESSERACT_CCSTRUCT_DPPOINT_H__
00022 
00023 #include "host.h"
00024 
00025 namespace tesseract {
00026 
00027 // A simple class to provide a dynamic programming solution to a class of
00028 // 1st-order problems in which the cost is dependent only on the current
00029 // step and the best cost to that step, with a possible special case
00030 // of using the variance of the steps, and only the top choice is required.
00031 // Useful for problems such as finding the optimal cut points in a fixed-pitch
00032 // (vertical or horizontal) situation.
00033 // Skeletal Example:
00034 // DPPoint* array = new DPPoint[width];
00035 // for (int i = 0; i < width; i++) {
00036 //   array[i].AddLocalCost(cost_at_i)
00037 // }
00038 // DPPoint* best_end = DPPoint::Solve(..., array);
00039 // while (best_end != NULL) {
00040 //   int cut_index = best_end - array;
00041 //   best_end = best_end->best_prev();
00042 // }
00043 // delete [] array;
00044 class DPPoint {
00045  public:
00046   // The cost function evaluates the total cost at this (excluding this's
00047   // local_cost) and if it beats this's total_cost, then
00048   // replace the appropriate values in this.
00049   typedef inT64 (DPPoint::*CostFunc)(const DPPoint* prev);
00050 
00051   DPPoint()
00052     : local_cost_(0), total_cost_(MAX_INT32), total_steps_(1), best_prev_(NULL),
00053       n_(0), sig_x_(0), sig_xsq_(0) {
00054   }
00055 
00056   // Solve the dynamic programming problem for the given array of points, with
00057   // the given size and cost function.
00058   // Steps backwards are limited to being between min_step and max_step
00059   // inclusive.
00060   // The return value is the tail of the best path.
00061   static DPPoint* Solve(int min_step, int max_step, bool debug,
00062                         CostFunc cost_func, int size, DPPoint* points);
00063 
00064   // A CostFunc that takes the variance of step into account in the cost.
00065   inT64 CostWithVariance(const DPPoint* prev);
00066 
00067   // Accessors.
00068   int total_cost() const {
00069     return total_cost_;
00070   }
00071   int Pathlength() const {
00072     return total_steps_;
00073   }
00074   const DPPoint* best_prev() const {
00075     return best_prev_;
00076   }
00077   void AddLocalCost(int new_cost) {
00078     local_cost_ += new_cost;
00079   }
00080 
00081  private:
00082   // Code common to different cost functions.
00083 
00084   // Update the other members if the cost is lower.
00085   void UpdateIfBetter(inT64 cost, inT32 steps, const DPPoint* prev,
00086                       inT32 n, inT32 sig_x, inT64 sig_xsq);
00087 
00088   inT32 local_cost_;    // Cost of this point on its own.
00089   inT32 total_cost_;    // Sum of all costs in best path to here.
00090                         // During cost calculations local_cost is excluded.
00091   inT32 total_steps_;   // Number of steps in best path to here.
00092   const DPPoint* best_prev_;  // Pointer to prev point in best path from here.
00093   // Information for computing the variance part of the cost.
00094   inT32 n_;             // Number of steps in best path to here for variance.
00095   inT32 sig_x_;         // Sum of step sizes for computing variance.
00096   inT64 sig_xsq_;       // Sum of squares of steps for computing variance.
00097 };
00098 
00099 }  // namespace tesseract.
00100 
00101 #endif  // TESSERACT_CCSTRUCT_DPPOINT_H__
00102