Tesseract
3.02
|
00001 00002 // File: indexmapbidi.cpp 00003 // Description: Bi-directional mapping between a sparse and compact space. 00004 // Author: rays@google.com (Ray Smith) 00005 // Created: Tue Apr 06 11:33:59 PDT 2010 00006 // 00007 // (C) Copyright 2010, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "indexmapbidi.h" 00021 00022 namespace tesseract { 00023 00024 // SparseToCompact takes a sparse index to an index in the compact space. 00025 // Uses a binary search to find the result. For faster speed use 00026 // IndexMapBiDi, but that takes more memory. 00027 int IndexMap::SparseToCompact(int sparse_index) const { 00028 int result = compact_map_.binary_search(sparse_index); 00029 return compact_map_[result] == sparse_index ? result : -1; 00030 } 00031 00032 // Copy from the input. 00033 void IndexMap::CopyFrom(const IndexMap& src) { 00034 sparse_size_ = src.sparse_size_; 00035 compact_map_ = src.compact_map_; 00036 } 00037 void IndexMap::CopyFrom(const IndexMapBiDi& src) { 00038 sparse_size_ = src.SparseSize(); 00039 compact_map_ = src.compact_map_; 00040 } 00041 00042 // Writes to the given file. Returns false in case of error. 00043 bool IndexMap::Serialize(FILE* fp) const { 00044 inT32 sparse_size = sparse_size_; 00045 if (fwrite(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false; 00046 if (!compact_map_.Serialize(fp)) return false; 00047 return true; 00048 } 00049 00050 // Reads from the given file. Returns false in case of error. 00051 // If swap is true, assumes a big/little-endian swap is needed. 00052 bool IndexMap::DeSerialize(bool swap, FILE* fp) { 00053 inT32 sparse_size; 00054 if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false; 00055 if (swap) 00056 ReverseN(&sparse_size, sizeof(sparse_size)); 00057 sparse_size_ = sparse_size; 00058 if (!compact_map_.DeSerialize(swap, fp)) return false; 00059 return true; 00060 } 00061 00062 00063 // Top-level init function in a single call to initialize a map to select 00064 // a single contiguous subrange [start, end) of the sparse space to be mapped 00065 // 1 to 1 to the compact space, with all other elements of the sparse space 00066 // left unmapped. 00067 // No need to call Setup after this. 00068 void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) { 00069 Init(sparse_size, false); 00070 for (int i = start; i < end; ++i) 00071 SetMap(i, true); 00072 Setup(); 00073 } 00074 00075 // Initializes just the sparse_map_ to the given size with either all 00076 // forward indices mapped (all_mapped = true) or none (all_mapped = false). 00077 // Call Setup immediately after, or make calls to SetMap first to adjust the 00078 // mapping and then call Setup before using the map. 00079 void IndexMapBiDi::Init(int size, bool all_mapped) { 00080 sparse_map_.init_to_size(size, -1); 00081 if (all_mapped) { 00082 for (int i = 0; i < size; ++i) 00083 sparse_map_[i] = i; 00084 } 00085 } 00086 00087 // Sets a given index in the sparse_map_ to be mapped or not. 00088 void IndexMapBiDi::SetMap(int sparse_index, bool mapped) { 00089 sparse_map_[sparse_index] = mapped ? 0 : -1; 00090 } 00091 00092 // Sets up the sparse_map_ and compact_map_ properly after Init and 00093 // some calls to SetMap. Assumes an ordered 1-1 map from set indices 00094 // in the forward map to the compact space. 00095 void IndexMapBiDi::Setup() { 00096 int compact_size = 0; 00097 for (int i = 0; i < sparse_map_.size(); ++i) { 00098 if (sparse_map_[i] >= 0) { 00099 sparse_map_[i] = compact_size++; 00100 } 00101 } 00102 compact_map_.init_to_size(compact_size, -1); 00103 for (int i = 0; i < sparse_map_.size(); ++i) { 00104 if (sparse_map_[i] >= 0) { 00105 compact_map_[sparse_map_[i]] = i; 00106 } 00107 } 00108 sparse_size_ = sparse_map_.size(); 00109 } 00110 00111 // Copy from the input. 00112 void IndexMapBiDi::CopyFrom(const IndexMapBiDi& src) { 00113 sparse_map_ = src.sparse_map_; 00114 compact_map_ = src.compact_map_; 00115 sparse_size_ = sparse_map_.size(); 00116 } 00117 00118 // Merges the two compact space indices. May be called many times, but 00119 // the merges must be concluded by a call to CompleteMerges. 00120 // Returns true if a merge was actually performed. 00121 bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) { 00122 // Find the current master index for index1 and index2. 00123 compact_index1 = MasterCompactIndex(compact_index1); 00124 compact_index2 = MasterCompactIndex(compact_index2); 00125 // Be sure that index1 < index2. 00126 if (compact_index1 > compact_index2) { 00127 int tmp = compact_index1; 00128 compact_index1 = compact_index2; 00129 compact_index2 = tmp; 00130 } else if (compact_index1 == compact_index2) { 00131 return false; 00132 } 00133 // To save iterating over all sparse_map_ entries, simply make the master 00134 // entry for index2 point to index1. 00135 // This leaves behind a potential chain of parents that needs to be chased, 00136 // as above. 00137 sparse_map_[compact_map_[compact_index2]] = compact_index1; 00138 if (compact_index1 >= 0) 00139 compact_map_[compact_index2] = compact_map_[compact_index1]; 00140 return true; 00141 } 00142 00143 // Completes one or more Merge operations by further compacting the 00144 // compact space. Unused compact space indices are removed, and the used 00145 // ones above shuffled down to fill the gaps. 00146 // Example: 00147 // Input sparse_map_: (x indicates -1) 00148 // x x 0 x 2 x x 4 x 0 x 2 x 00149 // Output sparse_map_: 00150 // x x 0 x 1 x x 2 x 0 x 1 x 00151 // Output compact_map_: 00152 // 2 4 7. 00153 void IndexMapBiDi::CompleteMerges() { 00154 // Ensure each sparse_map_entry contains a master compact_map_ index. 00155 int compact_size = 0; 00156 for (int i = 0; i < sparse_map_.size(); ++i) { 00157 int compact_index = MasterCompactIndex(sparse_map_[i]); 00158 sparse_map_[i] = compact_index; 00159 if (compact_index >= compact_size) 00160 compact_size = compact_index + 1; 00161 } 00162 // Re-generate the compact_map leaving holes for unused indices. 00163 compact_map_.init_to_size(compact_size, -1); 00164 for (int i = 0; i < sparse_map_.size(); ++i) { 00165 if (sparse_map_[i] >= 0) { 00166 if (compact_map_[sparse_map_[i]] == -1) 00167 compact_map_[sparse_map_[i]] = i; 00168 } 00169 } 00170 // Compact the compact_map, leaving tmp_compact_map saying where each 00171 // index went to in the compacted map. 00172 GenericVector<inT32> tmp_compact_map; 00173 tmp_compact_map.init_to_size(compact_size, -1); 00174 compact_size = 0; 00175 for (int i = 0; i < compact_map_.size(); ++i) { 00176 if (compact_map_[i] >= 0) { 00177 tmp_compact_map[i] = compact_size; 00178 compact_map_[compact_size++] = compact_map_[i]; 00179 } 00180 } 00181 compact_map_.truncate(compact_size); 00182 // Now modify the entries in the sparse map to point to the new locations. 00183 for (int i = 0; i < sparse_map_.size(); ++i) { 00184 if (sparse_map_[i] >= 0) { 00185 sparse_map_[i] = tmp_compact_map[sparse_map_[i]]; 00186 } 00187 } 00188 } 00189 00190 // Writes to the given file. Returns false in case of error. 00191 bool IndexMapBiDi::Serialize(FILE* fp) const { 00192 if (!IndexMap::Serialize(fp)) return false; 00193 // Make a vector containing the rest of the map. If the map is many-to-one 00194 // then each additional sparse entry needs to be stored. 00195 // Normally we store only the compact map to save space. 00196 GenericVector<inT32> remaining_pairs; 00197 for (int i = 0; i < sparse_map_.size(); ++i) { 00198 if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) { 00199 remaining_pairs.push_back(i); 00200 remaining_pairs.push_back(sparse_map_[i]); 00201 } 00202 } 00203 if (!remaining_pairs.Serialize(fp)) return false; 00204 return true; 00205 } 00206 00207 // Reads from the given file. Returns false in case of error. 00208 // If swap is true, assumes a big/little-endian swap is needed. 00209 bool IndexMapBiDi::DeSerialize(bool swap, FILE* fp) { 00210 if (!IndexMap::DeSerialize(swap, fp)) return false; 00211 GenericVector<inT32> remaining_pairs; 00212 if (!remaining_pairs.DeSerialize(swap, fp)) return false; 00213 sparse_map_.init_to_size(sparse_size_, -1); 00214 for (int i = 0; i < compact_map_.size(); ++i) { 00215 sparse_map_[compact_map_[i]] = i; 00216 } 00217 for (int i = 0; i < remaining_pairs.size(); ++i) { 00218 int sparse_index = remaining_pairs[i++]; 00219 sparse_map_[sparse_index] = remaining_pairs[i]; 00220 } 00221 return true; 00222 } 00223 00224 // Bulk calls to SparseToCompact. 00225 // Maps the given array of sparse indices to an array of compact indices. 00226 // Assumes the input is sorted. The output indices are sorted and uniqued. 00227 // Return value is the number of "missed" features, being features that 00228 // don't map to the compact feature space. 00229 int IndexMapBiDi::MapFeatures(const GenericVector<int>& sparse, 00230 GenericVector<int>* compact) const { 00231 compact->truncate(0); 00232 int num_features = sparse.size(); 00233 int missed_features = 0; 00234 int prev_good_feature = -1; 00235 for (int f = 0; f < num_features; ++f) { 00236 int feature = sparse_map_[sparse[f]]; 00237 if (feature >= 0) { 00238 if (feature != prev_good_feature) { 00239 compact->push_back(feature); 00240 prev_good_feature = feature; 00241 } 00242 } else { 00243 ++missed_features; 00244 } 00245 } 00246 return missed_features; 00247 } 00248 00249 } // namespace tesseract. 00250