Tesseract  3.02
tesseract-ocr/classify/intfeaturemap.cpp
Go to the documentation of this file.
00001 // Copyright 2010 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00004 // File:        intfeaturemap.cpp
00005 // Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
00006 //              to provide a subspace mapping and fast feature lookup.
00007 // Created:     Tue Oct 26 08:58:30 PDT 2010
00008 //
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "intfeaturemap.h"
00022 
00023 #include "intfeaturespace.h"
00024 #include "intfx.h"
00025 // These includes do not exist yet, but will be coming soon.
00026 //#include "sampleiterator.h"
00027 //#include "trainingsample.h"
00028 //#include "trainingsampleset.h"
00029 
00030 namespace tesseract {
00031 
00032 const int kMaxOffsetDist = 32;
00033 const double kMinPCLengthIncrease = 1.0 / 1024;
00034 
00035 IntFeatureMap::IntFeatureMap()
00036   : mapping_changed_(true), compact_size_(0) {
00037   for (int dir = 0; dir < kNumOffsetMaps; ++dir) {
00038     offset_plus_[dir] = NULL;
00039     offset_minus_[dir] = NULL;
00040   }
00041 }
00042 
00043 IntFeatureMap::~IntFeatureMap() {
00044   Clear();
00045 }
00046 
00047 // Pseudo-accessors.
00048 int IntFeatureMap::IndexFeature(const INT_FEATURE_STRUCT& f) const {
00049   return feature_space_.Index(f);
00050 }
00051 int IntFeatureMap::MapFeature(const INT_FEATURE_STRUCT& f) const {
00052   return feature_map_.SparseToCompact(feature_space_.Index(f));
00053 }
00054 int IntFeatureMap::MapIndexFeature(int index_feature) const {
00055   return feature_map_.SparseToCompact(index_feature);
00056 }
00057 INT_FEATURE_STRUCT IntFeatureMap::InverseIndexFeature(int index_feature) const {
00058   return feature_space_.PositionFromIndex(index_feature);
00059 }
00060 INT_FEATURE_STRUCT IntFeatureMap::InverseMapFeature(int map_feature) const {
00061   int index = feature_map_.CompactToSparse(map_feature);
00062   return feature_space_.PositionFromIndex(index);
00063 }
00064 void IntFeatureMap::DeleteMapFeature(int map_feature) {
00065   feature_map_.Merge(-1, map_feature);
00066   mapping_changed_ = true;
00067 }
00068 bool IntFeatureMap::IsMapFeatureDeleted(int map_feature) const {
00069   return feature_map_.IsCompactDeleted(map_feature);
00070 }
00071 
00072 // Copies the given feature_space and uses it as the index feature map
00073 // from INT_FEATURE_STRUCT.
00074 void IntFeatureMap::Init(const IntFeatureSpace& feature_space) {
00075   feature_space_ = feature_space;
00076   mapping_changed_ = false;
00077   int sparse_size = feature_space_.Size();
00078   feature_map_.Init(sparse_size, true);
00079   feature_map_.Setup();
00080   compact_size_ = feature_map_.CompactSize();
00081   // Initialize look-up tables if needed.
00082   FCOORD dir = FeatureDirection(0);
00083   if (dir.x() == 0.0f && dir.y() == 0.0f)
00084     InitIntegerFX();
00085   // Compute look-up tables to generate offset features.
00086   for (int dir = 0; dir < kNumOffsetMaps; ++dir) {
00087     delete [] offset_plus_[dir];
00088     delete [] offset_minus_[dir];
00089     offset_plus_[dir] = new int[sparse_size];
00090     offset_minus_[dir] = new int[sparse_size];
00091   }
00092   for (int dir = 1; dir <= kNumOffsetMaps; ++dir) {
00093     for (int i = 0; i < sparse_size; ++i) {
00094       int offset_index = ComputeOffsetFeature(i, dir);
00095       offset_plus_[dir - 1][i] = offset_index;
00096       offset_index = ComputeOffsetFeature(i, -dir);
00097       offset_minus_[dir - 1][i] = offset_index;
00098     }
00099   }
00100 }
00101 
00102 // Helper to return an offset index feature. In this context an offset
00103 // feature with a dir of +/-1 is a feature of a similar direction,
00104 // but shifted perpendicular to the direction of the feature. An offset
00105 // feature with a dir of +/-2 is feature at the same position, but rotated
00106 // by +/- one [compact] quantum. Returns the index of the generated offset
00107 // feature, or -1 if it doesn't exist. Dir should be in
00108 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
00109 // A dir of 0 is an identity transformation.
00110 // Both input and output are from the index(sparse) feature space, not
00111 // the mapped/compact feature space, but the offset feature is the minimum
00112 // distance moved from the input to guarantee that it maps to the next
00113 // available quantum in the mapped/compact space.
00114 int IntFeatureMap::OffsetFeature(int index_feature, int dir) const {
00115   if (dir > 0 && dir <= kNumOffsetMaps)
00116     return offset_plus_[dir - 1][index_feature];
00117   else if (dir < 0 && -dir <= kNumOffsetMaps)
00118     return offset_minus_[-dir - 1][index_feature];
00119   else if (dir == 0)
00120     return index_feature;
00121   else
00122     return -1;
00123 }
00124 
00125 
00126 //#define EXPERIMENT_ON
00127 #ifdef EXPERIMENT_ON  // This code is commented out as SampleIterator and
00128 // TrainingSample are not reviewed/checked in yet, but these functions are a
00129 // useful indicator of how an IntFeatureMap is setup.
00130 
00131 // Computes the features used by the subset of samples defined by
00132 // the iterator and sets up the feature mapping.
00133 // Returns the size of the compacted feature space.
00134 int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) {
00135   feature_map_.Init(feature_space_.Size(), false);
00136   int total_samples = 0;
00137   for (it->Begin(); !it->AtEnd(); it->Next()) {
00138     const TrainingSample& sample = it->GetSample();
00139     GenericVector<int> features;
00140     feature_space_.IndexAndSortFeatures(sample.features(),
00141                                         sample.num_features(),
00142                                         &features);
00143     int num_features = features.size();
00144     for (int f = 0; f < num_features; ++f)
00145       feature_map_.SetMap(features[f], true);
00146     ++total_samples;
00147   }
00148   feature_map_.Setup();
00149   compact_size_ = feature_map_.CompactSize();
00150   mapping_changed_ = true;
00151   FinalizeMapping(it);
00152   tprintf("%d non-zero features found in %d samples\n",
00153           compact_size_, total_samples);
00154   return compact_size_;
00155 }
00156 #endif
00157 
00158 // After deleting some features, finish setting up the mapping, and map
00159 // all the samples. Returns the size of the compacted feature space.
00160 int IntFeatureMap::FinalizeMapping(SampleIterator* it) {
00161   if (mapping_changed_) {
00162     feature_map_.CompleteMerges();
00163     compact_size_ = feature_map_.CompactSize();
00164 #ifdef EXPERIMENT_ON
00165     it->MapSampleFeatures(*this);
00166 #endif
00167     mapping_changed_ = false;
00168   }
00169   return compact_size_;
00170 }
00171 
00172 // Prints the map features from the set in human-readable form.
00173 void IntFeatureMap::DebugMapFeatures(
00174     const GenericVector<int>& map_features) const {
00175   for (int i = 0; i < map_features.size(); ++i) {
00176     INT_FEATURE_STRUCT f = InverseMapFeature(map_features[i]);
00177     f.print();
00178   }
00179 }
00180 
00181 void IntFeatureMap::Clear() {
00182   for (int dir = 0; dir < kNumOffsetMaps; ++dir) {
00183     delete [] offset_plus_[dir];
00184     delete [] offset_minus_[dir];
00185     offset_plus_[dir] = NULL;
00186     offset_minus_[dir] = NULL;
00187   }
00188 }
00189 
00190 // Helper to compute an offset index feature. In this context an offset
00191 // feature with a dir of +/-1 is a feature of a similar direction,
00192 // but shifted perpendicular to the direction of the feature. An offset
00193 // feature with a dir of +/-2 is feature at the same position, but rotated
00194 // by +/- one [compact] quantum. Returns the index of the generated offset
00195 // feature, or -1 if it doesn't exist. Dir should be in
00196 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
00197 // A dir of 0 is an identity transformation.
00198 // Both input and output are from the index(sparse) feature space, not
00199 // the mapped/compact feature space, but the offset feature is the minimum
00200 // distance moved from the input to guarantee that it maps to the next
00201 // available quantum in the mapped/compact space.
00202 int IntFeatureMap::ComputeOffsetFeature(int index_feature, int dir) const {
00203   INT_FEATURE_STRUCT f = InverseIndexFeature(index_feature);
00204   ASSERT_HOST(IndexFeature(f) == index_feature);
00205   if (dir == 0) {
00206     return index_feature;
00207   } else if (dir == 1 || dir == -1) {
00208     FCOORD feature_dir = FeatureDirection(f.Theta);
00209     FCOORD rotation90(0.0f, 1.0f);
00210     feature_dir.rotate(rotation90);
00211     // Find the nearest existing feature.
00212     for (int m = 1; m < kMaxOffsetDist; ++m) {
00213       double x_pos = f.X + feature_dir.x() * (m * dir);
00214       double y_pos = f.Y + feature_dir.y() * (m * dir);
00215       int x = IntCastRounded(x_pos);
00216       int y = IntCastRounded(y_pos);
00217       if (x >= 0 && x <= MAX_UINT8 && y >= 0 && y <= MAX_UINT8) {
00218         INT_FEATURE_STRUCT offset_f;
00219         offset_f.X = x;
00220         offset_f.Y = y;
00221         offset_f.Theta = f.Theta;
00222         int offset_index = IndexFeature(offset_f);
00223         if (offset_index != index_feature && offset_index >= 0)
00224           return offset_index;  // Found one.
00225       } else {
00226         return -1;  // Hit the edge of feature space.
00227       }
00228     }
00229   } else if (dir == 2 || dir == -2) {
00230     // Find the nearest existing index_feature.
00231     for (int m = 1; m < kMaxOffsetDist; ++m) {
00232       int theta = f.Theta + m * dir / 2;
00233       INT_FEATURE_STRUCT offset_f;
00234       offset_f.X = f.X;
00235       offset_f.Y = f.Y;
00236       offset_f.Theta = Modulo(theta, 256);
00237       int offset_index = IndexFeature(offset_f);
00238       if (offset_index != index_feature && offset_index >= 0)
00239         return offset_index;  // Found one.
00240     }
00241   }
00242   return -1;  // Nothing within the max distance.
00243 }
00244 
00245 }  // namespace tesseract.