Tesseract
3.02
|
00001 // Copyright 2010 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00004 // File: intfeaturemap.cpp 00005 // Description: Encapsulation of IntFeatureSpace with IndexMapBiDi 00006 // to provide a subspace mapping and fast feature lookup. 00007 // Created: Tue Oct 26 08:58:30 PDT 2010 00008 // 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "intfeaturemap.h" 00022 00023 #include "intfeaturespace.h" 00024 #include "intfx.h" 00025 // These includes do not exist yet, but will be coming soon. 00026 //#include "sampleiterator.h" 00027 //#include "trainingsample.h" 00028 //#include "trainingsampleset.h" 00029 00030 namespace tesseract { 00031 00032 const int kMaxOffsetDist = 32; 00033 const double kMinPCLengthIncrease = 1.0 / 1024; 00034 00035 IntFeatureMap::IntFeatureMap() 00036 : mapping_changed_(true), compact_size_(0) { 00037 for (int dir = 0; dir < kNumOffsetMaps; ++dir) { 00038 offset_plus_[dir] = NULL; 00039 offset_minus_[dir] = NULL; 00040 } 00041 } 00042 00043 IntFeatureMap::~IntFeatureMap() { 00044 Clear(); 00045 } 00046 00047 // Pseudo-accessors. 00048 int IntFeatureMap::IndexFeature(const INT_FEATURE_STRUCT& f) const { 00049 return feature_space_.Index(f); 00050 } 00051 int IntFeatureMap::MapFeature(const INT_FEATURE_STRUCT& f) const { 00052 return feature_map_.SparseToCompact(feature_space_.Index(f)); 00053 } 00054 int IntFeatureMap::MapIndexFeature(int index_feature) const { 00055 return feature_map_.SparseToCompact(index_feature); 00056 } 00057 INT_FEATURE_STRUCT IntFeatureMap::InverseIndexFeature(int index_feature) const { 00058 return feature_space_.PositionFromIndex(index_feature); 00059 } 00060 INT_FEATURE_STRUCT IntFeatureMap::InverseMapFeature(int map_feature) const { 00061 int index = feature_map_.CompactToSparse(map_feature); 00062 return feature_space_.PositionFromIndex(index); 00063 } 00064 void IntFeatureMap::DeleteMapFeature(int map_feature) { 00065 feature_map_.Merge(-1, map_feature); 00066 mapping_changed_ = true; 00067 } 00068 bool IntFeatureMap::IsMapFeatureDeleted(int map_feature) const { 00069 return feature_map_.IsCompactDeleted(map_feature); 00070 } 00071 00072 // Copies the given feature_space and uses it as the index feature map 00073 // from INT_FEATURE_STRUCT. 00074 void IntFeatureMap::Init(const IntFeatureSpace& feature_space) { 00075 feature_space_ = feature_space; 00076 mapping_changed_ = false; 00077 int sparse_size = feature_space_.Size(); 00078 feature_map_.Init(sparse_size, true); 00079 feature_map_.Setup(); 00080 compact_size_ = feature_map_.CompactSize(); 00081 // Initialize look-up tables if needed. 00082 FCOORD dir = FeatureDirection(0); 00083 if (dir.x() == 0.0f && dir.y() == 0.0f) 00084 InitIntegerFX(); 00085 // Compute look-up tables to generate offset features. 00086 for (int dir = 0; dir < kNumOffsetMaps; ++dir) { 00087 delete [] offset_plus_[dir]; 00088 delete [] offset_minus_[dir]; 00089 offset_plus_[dir] = new int[sparse_size]; 00090 offset_minus_[dir] = new int[sparse_size]; 00091 } 00092 for (int dir = 1; dir <= kNumOffsetMaps; ++dir) { 00093 for (int i = 0; i < sparse_size; ++i) { 00094 int offset_index = ComputeOffsetFeature(i, dir); 00095 offset_plus_[dir - 1][i] = offset_index; 00096 offset_index = ComputeOffsetFeature(i, -dir); 00097 offset_minus_[dir - 1][i] = offset_index; 00098 } 00099 } 00100 } 00101 00102 // Helper to return an offset index feature. In this context an offset 00103 // feature with a dir of +/-1 is a feature of a similar direction, 00104 // but shifted perpendicular to the direction of the feature. An offset 00105 // feature with a dir of +/-2 is feature at the same position, but rotated 00106 // by +/- one [compact] quantum. Returns the index of the generated offset 00107 // feature, or -1 if it doesn't exist. Dir should be in 00108 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. 00109 // A dir of 0 is an identity transformation. 00110 // Both input and output are from the index(sparse) feature space, not 00111 // the mapped/compact feature space, but the offset feature is the minimum 00112 // distance moved from the input to guarantee that it maps to the next 00113 // available quantum in the mapped/compact space. 00114 int IntFeatureMap::OffsetFeature(int index_feature, int dir) const { 00115 if (dir > 0 && dir <= kNumOffsetMaps) 00116 return offset_plus_[dir - 1][index_feature]; 00117 else if (dir < 0 && -dir <= kNumOffsetMaps) 00118 return offset_minus_[-dir - 1][index_feature]; 00119 else if (dir == 0) 00120 return index_feature; 00121 else 00122 return -1; 00123 } 00124 00125 00126 //#define EXPERIMENT_ON 00127 #ifdef EXPERIMENT_ON // This code is commented out as SampleIterator and 00128 // TrainingSample are not reviewed/checked in yet, but these functions are a 00129 // useful indicator of how an IntFeatureMap is setup. 00130 00131 // Computes the features used by the subset of samples defined by 00132 // the iterator and sets up the feature mapping. 00133 // Returns the size of the compacted feature space. 00134 int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) { 00135 feature_map_.Init(feature_space_.Size(), false); 00136 int total_samples = 0; 00137 for (it->Begin(); !it->AtEnd(); it->Next()) { 00138 const TrainingSample& sample = it->GetSample(); 00139 GenericVector<int> features; 00140 feature_space_.IndexAndSortFeatures(sample.features(), 00141 sample.num_features(), 00142 &features); 00143 int num_features = features.size(); 00144 for (int f = 0; f < num_features; ++f) 00145 feature_map_.SetMap(features[f], true); 00146 ++total_samples; 00147 } 00148 feature_map_.Setup(); 00149 compact_size_ = feature_map_.CompactSize(); 00150 mapping_changed_ = true; 00151 FinalizeMapping(it); 00152 tprintf("%d non-zero features found in %d samples\n", 00153 compact_size_, total_samples); 00154 return compact_size_; 00155 } 00156 #endif 00157 00158 // After deleting some features, finish setting up the mapping, and map 00159 // all the samples. Returns the size of the compacted feature space. 00160 int IntFeatureMap::FinalizeMapping(SampleIterator* it) { 00161 if (mapping_changed_) { 00162 feature_map_.CompleteMerges(); 00163 compact_size_ = feature_map_.CompactSize(); 00164 #ifdef EXPERIMENT_ON 00165 it->MapSampleFeatures(*this); 00166 #endif 00167 mapping_changed_ = false; 00168 } 00169 return compact_size_; 00170 } 00171 00172 // Prints the map features from the set in human-readable form. 00173 void IntFeatureMap::DebugMapFeatures( 00174 const GenericVector<int>& map_features) const { 00175 for (int i = 0; i < map_features.size(); ++i) { 00176 INT_FEATURE_STRUCT f = InverseMapFeature(map_features[i]); 00177 f.print(); 00178 } 00179 } 00180 00181 void IntFeatureMap::Clear() { 00182 for (int dir = 0; dir < kNumOffsetMaps; ++dir) { 00183 delete [] offset_plus_[dir]; 00184 delete [] offset_minus_[dir]; 00185 offset_plus_[dir] = NULL; 00186 offset_minus_[dir] = NULL; 00187 } 00188 } 00189 00190 // Helper to compute an offset index feature. In this context an offset 00191 // feature with a dir of +/-1 is a feature of a similar direction, 00192 // but shifted perpendicular to the direction of the feature. An offset 00193 // feature with a dir of +/-2 is feature at the same position, but rotated 00194 // by +/- one [compact] quantum. Returns the index of the generated offset 00195 // feature, or -1 if it doesn't exist. Dir should be in 00196 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. 00197 // A dir of 0 is an identity transformation. 00198 // Both input and output are from the index(sparse) feature space, not 00199 // the mapped/compact feature space, but the offset feature is the minimum 00200 // distance moved from the input to guarantee that it maps to the next 00201 // available quantum in the mapped/compact space. 00202 int IntFeatureMap::ComputeOffsetFeature(int index_feature, int dir) const { 00203 INT_FEATURE_STRUCT f = InverseIndexFeature(index_feature); 00204 ASSERT_HOST(IndexFeature(f) == index_feature); 00205 if (dir == 0) { 00206 return index_feature; 00207 } else if (dir == 1 || dir == -1) { 00208 FCOORD feature_dir = FeatureDirection(f.Theta); 00209 FCOORD rotation90(0.0f, 1.0f); 00210 feature_dir.rotate(rotation90); 00211 // Find the nearest existing feature. 00212 for (int m = 1; m < kMaxOffsetDist; ++m) { 00213 double x_pos = f.X + feature_dir.x() * (m * dir); 00214 double y_pos = f.Y + feature_dir.y() * (m * dir); 00215 int x = IntCastRounded(x_pos); 00216 int y = IntCastRounded(y_pos); 00217 if (x >= 0 && x <= MAX_UINT8 && y >= 0 && y <= MAX_UINT8) { 00218 INT_FEATURE_STRUCT offset_f; 00219 offset_f.X = x; 00220 offset_f.Y = y; 00221 offset_f.Theta = f.Theta; 00222 int offset_index = IndexFeature(offset_f); 00223 if (offset_index != index_feature && offset_index >= 0) 00224 return offset_index; // Found one. 00225 } else { 00226 return -1; // Hit the edge of feature space. 00227 } 00228 } 00229 } else if (dir == 2 || dir == -2) { 00230 // Find the nearest existing index_feature. 00231 for (int m = 1; m < kMaxOffsetDist; ++m) { 00232 int theta = f.Theta + m * dir / 2; 00233 INT_FEATURE_STRUCT offset_f; 00234 offset_f.X = f.X; 00235 offset_f.Y = f.Y; 00236 offset_f.Theta = Modulo(theta, 256); 00237 int offset_index = IndexFeature(offset_f); 00238 if (offset_index != index_feature && offset_index >= 0) 00239 return offset_index; // Found one. 00240 } 00241 } 00242 return -1; // Nothing within the max distance. 00243 } 00244 00245 } // namespace tesseract.