Tesseract
3.02
|
00001 // Copyright 2011 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00004 // File: intfeaturedist.h 00005 // Description: Fast set-difference-based feature distance calculator. 00006 // Created: Thu Sep 01 12:14:30 PDT 2011 00007 // 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CLASSIFY_INTFEATUREDIST_H_ 00021 #define TESSERACT_CLASSIFY_INTFEATUREDIST_H_ 00022 00023 #include "genericvector.h" 00024 00025 namespace tesseract { 00026 00027 class IntFeatureMap; 00028 00029 // Feature distance calculator designed to provide a fast distance calculation 00030 // based on set difference between a given feature set and many other feature 00031 // sets in turn. 00032 // Representation of a feature set as an array of bools that are sparsely 00033 // true, and companion arrays that allow fast feature set distance 00034 // calculations with allowance of offsets in position. 00035 // Init is expensive, so for greatest efficiency, to re-initialize for a new 00036 // feature set, use Set(..., false) on the SAME feature set as was used to 00037 // setup with Set(..., true), to return to its initialized state before 00038 // reuse with Set(..., true) on a new feature set. 00039 class IntFeatureDist { 00040 public: 00041 IntFeatureDist(); 00042 ~IntFeatureDist(); 00043 00044 // Initialize the bool array to the given size of feature space. 00045 // The feature_map is just borrowed, and must exist for the entire 00046 // lifetime of the IntFeatureDist. 00047 void Init(const IntFeatureMap* feature_map); 00048 00049 // Setup the map for the given indexed_features that have been indexed by 00050 // feature_map. After use, use Set(..., false) to reset to the initial state 00051 // as this is faster than calling Init for sparse spaces. 00052 void Set(const GenericVector<int>& indexed_features, 00053 int canonical_count, bool value); 00054 00055 // Compute the distance between the given feature vector and the last 00056 // Set feature vector. 00057 double FeatureDistance(const GenericVector<int>& features) const; 00058 double DebugFeatureDistance(const GenericVector<int>& features) const; 00059 00060 private: 00061 // Clear all data. 00062 void Clear(); 00063 00064 // Size of the indexed feature space. 00065 int size_; 00066 // Total weight of features currently stored in the maps. 00067 double total_feature_weight_; 00068 // Pointer to IntFeatureMap given at Init to find offset features. 00069 const IntFeatureMap* feature_map_; 00070 // Array of bools indicating presence of a feature. 00071 bool* features_; 00072 // Array indicating the presence of a feature offset by one unit. 00073 bool* features_delta_one_; 00074 // Array indicating the presence of a feature offset by two units. 00075 bool* features_delta_two_; 00076 }; 00077 00078 } // namespace tesseract 00079 00080 #endif // TESSERACT_CLASSIFY_INTFEATUREDIST_H_