Tesseract
3.02
|
00001 // Copyright 2011 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00004 // File: intfeaturedist.cpp 00005 // Description: Fast set-difference-based feature distance calculator. 00006 // Created: Thu Sep 01 13:07:30 PDT 2011 00007 // 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "intfeaturedist.h" 00021 #include "intfeaturemap.h" 00022 00023 namespace tesseract { 00024 00025 IntFeatureDist::IntFeatureDist() 00026 : size_(0), total_feature_weight_(0.0), 00027 feature_map_(NULL), features_(NULL), 00028 features_delta_one_(NULL), features_delta_two_(NULL) { 00029 } 00030 00031 IntFeatureDist::~IntFeatureDist() { 00032 Clear(); 00033 } 00034 00035 // Initialize the table to the given size of feature space. 00036 void IntFeatureDist::Init(const IntFeatureMap* feature_map) { 00037 size_ = feature_map->sparse_size(); 00038 Clear(); 00039 feature_map_ = feature_map; 00040 features_ = new bool[size_]; 00041 features_delta_one_ = new bool[size_]; 00042 features_delta_two_ = new bool[size_]; 00043 memset(features_, false, size_ * sizeof(features_[0])); 00044 memset(features_delta_one_, false, size_ * sizeof(features_delta_one_[0])); 00045 memset(features_delta_two_, false, size_ * sizeof(features_delta_two_[0])); 00046 total_feature_weight_ = 0.0; 00047 } 00048 00049 // Setup the map for the given indexed_features that have been indexed by 00050 // feature_map. 00051 void IntFeatureDist::Set(const GenericVector<int>& indexed_features, 00052 int canonical_count, bool value) { 00053 total_feature_weight_ = canonical_count; 00054 for (int i = 0; i < indexed_features.size(); ++i) { 00055 int f = indexed_features[i]; 00056 features_[f] = value; 00057 for (int dir = -kNumOffsetMaps; dir <= kNumOffsetMaps; ++dir) { 00058 if (dir == 0) continue; 00059 int mapped_f = feature_map_->OffsetFeature(f, dir); 00060 if (mapped_f >= 0) { 00061 features_delta_one_[mapped_f] = value; 00062 for (int dir2 = -kNumOffsetMaps; dir2 <= kNumOffsetMaps; ++dir2) { 00063 if (dir2 == 0) continue; 00064 int mapped_f2 = feature_map_->OffsetFeature(mapped_f, dir2); 00065 if (mapped_f2 >= 0) 00066 features_delta_two_[mapped_f2] = value; 00067 } 00068 } 00069 } 00070 } 00071 } 00072 00073 // Compute the distance between the given feature vector and the last 00074 // Set feature vector. 00075 double IntFeatureDist::FeatureDistance( 00076 const GenericVector<int>& features) const { 00077 int num_test_features = features.size(); 00078 double denominator = total_feature_weight_ + num_test_features; 00079 double misses = denominator; 00080 for (int i = 0; i < num_test_features; ++i) { 00081 int index = features[i]; 00082 double weight = 1.0; 00083 if (features_[index]) { 00084 // A perfect match. 00085 misses -= 2.0 * weight; 00086 } else if (features_delta_one_[index]) { 00087 misses -= 1.5 * weight; 00088 } else if (features_delta_two_[index]) { 00089 // A near miss. 00090 misses -= 1.0 * weight; 00091 } 00092 } 00093 return misses / denominator; 00094 } 00095 00096 // Compute the distance between the given feature vector and the last 00097 // Set feature vector. 00098 double IntFeatureDist::DebugFeatureDistance( 00099 const GenericVector<int>& features) const { 00100 int num_test_features = features.size(); 00101 double denominator = total_feature_weight_ + num_test_features; 00102 double misses = denominator; 00103 for (int i = 0; i < num_test_features; ++i) { 00104 int index = features[i]; 00105 double weight = 1.0; 00106 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(features[i]); 00107 tprintf("Testing feature weight %g:", weight); 00108 f.print(); 00109 if (features_[index]) { 00110 // A perfect match. 00111 misses -= 2.0 * weight; 00112 tprintf("Perfect hit\n"); 00113 } else if (features_delta_one_[index]) { 00114 misses -= 1.5 * weight; 00115 tprintf("-1 hit\n"); 00116 } else if (features_delta_two_[index]) { 00117 // A near miss. 00118 misses -= 1.0 * weight; 00119 tprintf("-2 hit\n"); 00120 } else { 00121 tprintf("Total miss\n"); 00122 } 00123 } 00124 tprintf("Features present:"); 00125 for (int i = 0; i < size_; ++i) { 00126 if (features_[i]) { 00127 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i); 00128 f.print(); 00129 } 00130 } 00131 tprintf("\nMinus one features:"); 00132 for (int i = 0; i < size_; ++i) { 00133 if (features_delta_one_[i]) { 00134 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i); 00135 f.print(); 00136 } 00137 } 00138 tprintf("\nMinus two features:"); 00139 for (int i = 0; i < size_; ++i) { 00140 if (features_delta_two_[i]) { 00141 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i); 00142 f.print(); 00143 } 00144 } 00145 tprintf("\n"); 00146 return misses / denominator; 00147 } 00148 00149 // Clear all data. 00150 void IntFeatureDist::Clear() { 00151 delete [] features_; 00152 features_ = NULL; 00153 delete [] features_delta_one_; 00154 features_delta_one_ = NULL; 00155 delete [] features_delta_two_; 00156 features_delta_two_ = NULL; 00157 } 00158 00159 } // namespace tesseract