Tesseract  3.02
tesseract-ocr/ccutil/sorthelper.h
Go to the documentation of this file.
00001 
00002 // File:        sorthelper.h
00003 // Description: Generic sort and maxfinding class.
00004 // Author:      Ray Smith
00005 // Created:     Thu May 20 17:48:21 PDT 2010
00006 //
00007 // (C) Copyright 2010, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CCUTIL_SORTHELPER_H_
00021 #define TESSERACT_CCUTIL_SORTHELPER_H_
00022 
00023 #include <stdlib.h>
00024 #include "genericvector.h"
00025 
00026 // Generic class to provide functions based on a <value,count> pair.
00027 // T is the value type.
00028 // The class keeps a count of each value and can return the most frequent
00029 // value or a sorted array of the values with counts.
00030 // Note that this class uses linear search for adding. It is better
00031 // to use the STATS class to get the mode of a large number of values
00032 // in a small space. SortHelper is better to get the mode of a small number
00033 // of values from a large space.
00034 // T must have a copy constructor.
00035 template <typename T>
00036 class SortHelper {
00037  public:
00038   // Simple pair class to hold the values and counts.
00039   template<typename PairT> struct SortPair {
00040     PairT value;
00041     int count;
00042   };
00043   // qsort function to sort by decreasing count.
00044   static int SortPairsByCount(const void* v1, const void* v2) {
00045     const SortPair<T>* p1 = reinterpret_cast<const SortPair<T>*>(v1);
00046     const SortPair<T>* p2 = reinterpret_cast<const SortPair<T>*>(v2);
00047     return p2->count - p1->count;
00048   }
00049   // qsort function to sort by decreasing value.
00050   static int SortPairsByValue(const void* v1, const void* v2) {
00051     const SortPair<T>* p1 = reinterpret_cast<const SortPair<T>*>(v1);
00052     const SortPair<T>* p2 = reinterpret_cast<const SortPair<T>*>(v2);
00053     if (p2->value - p1->value < 0) return -1;
00054     if (p2->value - p1->value > 0) return 1;
00055     return 0;
00056   }
00057 
00058   // Constructor takes a hint of the array size, but it need not be accurate.
00059   explicit SortHelper(int sizehint) : counts_(sizehint) {}
00060 
00061   // Add a value that may be a duplicate of an existing value.
00062   // Uses a linear search.
00063   void Add(T value, int count) {
00064     // Linear search for value.
00065     for (int i = 0; i < counts_.size(); ++i) {
00066       if (counts_[i].value == value) {
00067         counts_[i].count += count;
00068         return;
00069       }
00070     }
00071     SortPair<T> new_pair = {value, count};
00072     counts_.push_back(SortPair<T>(new_pair));
00073   }
00074 
00075   // Returns the frequency of the most frequent value.
00076   // If max_value is not NULL, returns the most frequent value.
00077   // If the array is empty, returns -MAX_INT32 and max_value is unchanged.
00078   int MaxCount(T* max_value) const {
00079     int best_count = -MAX_INT32;
00080     for (int i = 0; i < counts_.size(); ++i) {
00081       if (counts_[i].count > best_count) {
00082         best_count = counts_[i].count;
00083         if (max_value != NULL)
00084           *max_value = counts_[i].value;
00085       }
00086     }
00087     return best_count;
00088   }
00089 
00090   // Returns the data array sorted by decreasing frequency.
00091   const GenericVector<SortPair<T> >& SortByCount() {
00092     counts_.sort(&SortPairsByCount);
00093     return counts_;
00094   }
00095   // Returns the data array sorted by decreasing value.
00096   const GenericVector<SortPair<T> >& SortByValue() {
00097     counts_.sort(&SortPairsByValue);
00098     return counts_;
00099   }
00100 
00101  private:
00102   GenericVector<SortPair<T> > counts_;
00103 };
00104 
00105 
00106 #endif  // TESSERACT_CCUTIL_SORTHELPER_H_.