Tesseract  3.02
tesseract-ocr/ccutil/indexmapbidi.cpp
Go to the documentation of this file.
00001 
00002 // File:        indexmapbidi.cpp
00003 // Description: Bi-directional mapping between a sparse and compact space.
00004 // Author:      rays@google.com (Ray Smith)
00005 // Created:     Tue Apr 06 11:33:59 PDT 2010
00006 //
00007 // (C) Copyright 2010, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #include "indexmapbidi.h"
00021 
00022 namespace tesseract {
00023 
00024 // SparseToCompact takes a sparse index to an index in the compact space.
00025 // Uses a binary search to find the result. For faster speed use
00026 // IndexMapBiDi, but that takes more memory.
00027 int IndexMap::SparseToCompact(int sparse_index) const {
00028   int result = compact_map_.binary_search(sparse_index);
00029   return compact_map_[result] == sparse_index ? result : -1;
00030 }
00031 
00032 // Copy from the input.
00033 void IndexMap::CopyFrom(const IndexMap& src) {
00034   sparse_size_ = src.sparse_size_;
00035   compact_map_ = src.compact_map_;
00036 }
00037 void IndexMap::CopyFrom(const IndexMapBiDi& src) {
00038   sparse_size_ = src.SparseSize();
00039   compact_map_ = src.compact_map_;
00040 }
00041 
00042 // Writes to the given file. Returns false in case of error.
00043 bool IndexMap::Serialize(FILE* fp) const {
00044   inT32 sparse_size = sparse_size_;
00045   if (fwrite(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
00046   if (!compact_map_.Serialize(fp)) return false;
00047   return true;
00048 }
00049 
00050 // Reads from the given file. Returns false in case of error.
00051 // If swap is true, assumes a big/little-endian swap is needed.
00052 bool IndexMap::DeSerialize(bool swap, FILE* fp) {
00053   inT32 sparse_size;
00054   if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
00055   if (swap)
00056     ReverseN(&sparse_size, sizeof(sparse_size));
00057   sparse_size_ = sparse_size;
00058   if (!compact_map_.DeSerialize(swap, fp)) return false;
00059   return true;
00060 }
00061 
00062 
00063 // Top-level init function in a single call to initialize a map to select
00064 // a single contiguous subrange [start, end) of the sparse space to be mapped
00065 // 1 to 1 to the compact space, with all other elements of the sparse space
00066 // left unmapped.
00067 // No need to call Setup after this.
00068 void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
00069   Init(sparse_size, false);
00070   for (int i = start; i < end; ++i)
00071     SetMap(i, true);
00072   Setup();
00073 }
00074 
00075 // Initializes just the sparse_map_ to the given size with either all
00076 // forward indices mapped (all_mapped = true) or none (all_mapped = false).
00077 // Call Setup immediately after, or make calls to SetMap first to adjust the
00078 // mapping and then call Setup before using the map.
00079 void IndexMapBiDi::Init(int size, bool all_mapped) {
00080   sparse_map_.init_to_size(size, -1);
00081   if (all_mapped) {
00082     for (int i = 0; i < size; ++i)
00083       sparse_map_[i] = i;
00084   }
00085 }
00086 
00087 // Sets a given index in the sparse_map_ to be mapped or not.
00088 void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
00089   sparse_map_[sparse_index] = mapped ? 0 : -1;
00090 }
00091 
00092 // Sets up the sparse_map_ and compact_map_ properly after Init and
00093 // some calls to SetMap. Assumes an ordered 1-1 map from set indices
00094 // in the forward map to the compact space.
00095 void IndexMapBiDi::Setup() {
00096   int compact_size = 0;
00097   for (int i = 0; i < sparse_map_.size(); ++i) {
00098     if (sparse_map_[i] >= 0) {
00099       sparse_map_[i] = compact_size++;
00100     }
00101   }
00102   compact_map_.init_to_size(compact_size, -1);
00103   for (int i = 0; i < sparse_map_.size(); ++i) {
00104     if (sparse_map_[i] >= 0) {
00105       compact_map_[sparse_map_[i]] = i;
00106     }
00107   }
00108   sparse_size_ = sparse_map_.size();
00109 }
00110 
00111 // Copy from the input.
00112 void IndexMapBiDi::CopyFrom(const IndexMapBiDi& src) {
00113   sparse_map_ = src.sparse_map_;
00114   compact_map_ = src.compact_map_;
00115   sparse_size_ = sparse_map_.size();
00116 }
00117 
00118 // Merges the two compact space indices. May be called many times, but
00119 // the merges must be concluded by a call to CompleteMerges.
00120 // Returns true if a merge was actually performed.
00121 bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
00122   // Find the current master index for index1 and index2.
00123   compact_index1 = MasterCompactIndex(compact_index1);
00124   compact_index2 = MasterCompactIndex(compact_index2);
00125   // Be sure that index1 < index2.
00126   if (compact_index1 > compact_index2) {
00127     int tmp = compact_index1;
00128     compact_index1 = compact_index2;
00129     compact_index2 = tmp;
00130   } else if (compact_index1 == compact_index2) {
00131     return false;
00132   }
00133   // To save iterating over all sparse_map_ entries, simply make the master
00134   // entry for index2 point to index1.
00135   // This leaves behind a potential chain of parents that needs to be chased,
00136   // as above.
00137   sparse_map_[compact_map_[compact_index2]] = compact_index1;
00138   if (compact_index1 >= 0)
00139     compact_map_[compact_index2] = compact_map_[compact_index1];
00140   return true;
00141 }
00142 
00143 // Completes one or more Merge operations by further compacting the
00144 // compact space. Unused compact space indices are removed, and the used
00145 // ones above shuffled down to fill the gaps.
00146 // Example:
00147 // Input sparse_map_: (x indicates -1)
00148 // x x 0 x 2 x x 4 x 0 x 2 x
00149 // Output sparse_map_:
00150 // x x 0 x 1 x x 2 x 0 x 1 x
00151 // Output compact_map_:
00152 // 2 4 7.
00153 void IndexMapBiDi::CompleteMerges() {
00154   // Ensure each sparse_map_entry contains a master compact_map_ index.
00155   int compact_size = 0;
00156   for (int i = 0; i < sparse_map_.size(); ++i) {
00157     int compact_index = MasterCompactIndex(sparse_map_[i]);
00158     sparse_map_[i] = compact_index;
00159     if (compact_index >= compact_size)
00160       compact_size = compact_index + 1;
00161   }
00162   // Re-generate the compact_map leaving holes for unused indices.
00163   compact_map_.init_to_size(compact_size, -1);
00164   for (int i = 0; i < sparse_map_.size(); ++i) {
00165     if (sparse_map_[i] >= 0) {
00166       if (compact_map_[sparse_map_[i]] == -1)
00167         compact_map_[sparse_map_[i]] = i;
00168     }
00169   }
00170   // Compact the compact_map, leaving tmp_compact_map saying where each
00171   // index went to in the compacted map.
00172   GenericVector<inT32> tmp_compact_map;
00173   tmp_compact_map.init_to_size(compact_size, -1);
00174   compact_size = 0;
00175   for (int i = 0; i < compact_map_.size(); ++i) {
00176     if (compact_map_[i] >= 0) {
00177       tmp_compact_map[i] = compact_size;
00178       compact_map_[compact_size++] = compact_map_[i];
00179     }
00180   }
00181   compact_map_.truncate(compact_size);
00182   // Now modify the entries in the sparse map to point to the new locations.
00183   for (int i = 0; i < sparse_map_.size(); ++i) {
00184     if (sparse_map_[i] >= 0) {
00185       sparse_map_[i] = tmp_compact_map[sparse_map_[i]];
00186     }
00187   }
00188 }
00189 
00190 // Writes to the given file. Returns false in case of error.
00191 bool IndexMapBiDi::Serialize(FILE* fp) const {
00192   if (!IndexMap::Serialize(fp)) return false;
00193   // Make a vector containing the rest of the map. If the map is many-to-one
00194   // then each additional sparse entry needs to be stored.
00195   // Normally we store only the compact map to save space.
00196   GenericVector<inT32> remaining_pairs;
00197   for (int i = 0; i < sparse_map_.size(); ++i) {
00198     if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) {
00199       remaining_pairs.push_back(i);
00200       remaining_pairs.push_back(sparse_map_[i]);
00201     }
00202   }
00203   if (!remaining_pairs.Serialize(fp)) return false;
00204   return true;
00205 }
00206 
00207 // Reads from the given file. Returns false in case of error.
00208 // If swap is true, assumes a big/little-endian swap is needed.
00209 bool IndexMapBiDi::DeSerialize(bool swap, FILE* fp) {
00210   if (!IndexMap::DeSerialize(swap, fp)) return false;
00211   GenericVector<inT32> remaining_pairs;
00212   if (!remaining_pairs.DeSerialize(swap, fp)) return false;
00213   sparse_map_.init_to_size(sparse_size_, -1);
00214   for (int i = 0; i < compact_map_.size(); ++i) {
00215     sparse_map_[compact_map_[i]] = i;
00216   }
00217   for (int i = 0; i < remaining_pairs.size(); ++i) {
00218     int sparse_index = remaining_pairs[i++];
00219     sparse_map_[sparse_index] = remaining_pairs[i];
00220   }
00221   return true;
00222 }
00223 
00224 // Bulk calls to SparseToCompact.
00225 // Maps the given array of sparse indices to an array of compact indices.
00226 // Assumes the input is sorted. The output indices are sorted and uniqued.
00227 // Return value is the number of "missed" features, being features that
00228 // don't map to the compact feature space.
00229 int IndexMapBiDi::MapFeatures(const GenericVector<int>& sparse,
00230                               GenericVector<int>* compact) const {
00231   compact->truncate(0);
00232   int num_features = sparse.size();
00233   int missed_features = 0;
00234   int prev_good_feature = -1;
00235   for (int f = 0; f < num_features; ++f) {
00236     int feature = sparse_map_[sparse[f]];
00237     if (feature >= 0) {
00238       if (feature != prev_good_feature) {
00239         compact->push_back(feature);
00240         prev_good_feature = feature;
00241       }
00242     } else {
00243       ++missed_features;
00244     }
00245   }
00246   return missed_features;
00247 }
00248 
00249 }  // namespace tesseract.
00250