Tesseract
3.02
|
00001 00002 // File: sorthelper.h 00003 // Description: Generic sort and maxfinding class. 00004 // Author: Ray Smith 00005 // Created: Thu May 20 17:48:21 PDT 2010 00006 // 00007 // (C) Copyright 2010, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCUTIL_SORTHELPER_H_ 00021 #define TESSERACT_CCUTIL_SORTHELPER_H_ 00022 00023 #include <stdlib.h> 00024 #include "genericvector.h" 00025 00026 // Generic class to provide functions based on a <value,count> pair. 00027 // T is the value type. 00028 // The class keeps a count of each value and can return the most frequent 00029 // value or a sorted array of the values with counts. 00030 // Note that this class uses linear search for adding. It is better 00031 // to use the STATS class to get the mode of a large number of values 00032 // in a small space. SortHelper is better to get the mode of a small number 00033 // of values from a large space. 00034 // T must have a copy constructor. 00035 template <typename T> 00036 class SortHelper { 00037 public: 00038 // Simple pair class to hold the values and counts. 00039 template<typename PairT> struct SortPair { 00040 PairT value; 00041 int count; 00042 }; 00043 // qsort function to sort by decreasing count. 00044 static int SortPairsByCount(const void* v1, const void* v2) { 00045 const SortPair<T>* p1 = reinterpret_cast<const SortPair<T>*>(v1); 00046 const SortPair<T>* p2 = reinterpret_cast<const SortPair<T>*>(v2); 00047 return p2->count - p1->count; 00048 } 00049 // qsort function to sort by decreasing value. 00050 static int SortPairsByValue(const void* v1, const void* v2) { 00051 const SortPair<T>* p1 = reinterpret_cast<const SortPair<T>*>(v1); 00052 const SortPair<T>* p2 = reinterpret_cast<const SortPair<T>*>(v2); 00053 if (p2->value - p1->value < 0) return -1; 00054 if (p2->value - p1->value > 0) return 1; 00055 return 0; 00056 } 00057 00058 // Constructor takes a hint of the array size, but it need not be accurate. 00059 explicit SortHelper(int sizehint) : counts_(sizehint) {} 00060 00061 // Add a value that may be a duplicate of an existing value. 00062 // Uses a linear search. 00063 void Add(T value, int count) { 00064 // Linear search for value. 00065 for (int i = 0; i < counts_.size(); ++i) { 00066 if (counts_[i].value == value) { 00067 counts_[i].count += count; 00068 return; 00069 } 00070 } 00071 SortPair<T> new_pair = {value, count}; 00072 counts_.push_back(SortPair<T>(new_pair)); 00073 } 00074 00075 // Returns the frequency of the most frequent value. 00076 // If max_value is not NULL, returns the most frequent value. 00077 // If the array is empty, returns -MAX_INT32 and max_value is unchanged. 00078 int MaxCount(T* max_value) const { 00079 int best_count = -MAX_INT32; 00080 for (int i = 0; i < counts_.size(); ++i) { 00081 if (counts_[i].count > best_count) { 00082 best_count = counts_[i].count; 00083 if (max_value != NULL) 00084 *max_value = counts_[i].value; 00085 } 00086 } 00087 return best_count; 00088 } 00089 00090 // Returns the data array sorted by decreasing frequency. 00091 const GenericVector<SortPair<T> >& SortByCount() { 00092 counts_.sort(&SortPairsByCount); 00093 return counts_; 00094 } 00095 // Returns the data array sorted by decreasing value. 00096 const GenericVector<SortPair<T> >& SortByValue() { 00097 counts_.sort(&SortPairsByValue); 00098 return counts_; 00099 } 00100 00101 private: 00102 GenericVector<SortPair<T> > counts_; 00103 }; 00104 00105 00106 #endif // TESSERACT_CCUTIL_SORTHELPER_H_.