Tesseract
3.02
|
00001 00002 // File: indexmapbidi.h 00003 // Description: Bi-directional mapping between a sparse and compact space. 00004 // Author: rays@google.com (Ray Smith) 00005 // Created: Tue Apr 06 11:33:59 PDT 2010 00006 // 00007 // (C) Copyright 2010, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_ 00021 #define TESSERACT_CCUTIL_INDEXMAPBIDI_H_ 00022 00023 #include <stdio.h> 00024 #include "genericvector.h" 00025 00026 namespace tesseract { 00027 00028 class IndexMapBiDi; 00029 00030 // Bidirectional one-to-one mapping between a sparse and a compact discrete 00031 // space. Many entries in the sparse space are unmapped, but those that are 00032 // mapped have a 1-1 mapping to (and from) the compact space, where all 00033 // values are used. This is useful for forming subsets of larger collections, 00034 // such as subsets of character sets, or subsets of binary feature spaces. 00035 // 00036 // This base class provides basic functionality with binary search for the 00037 // SparseToCompact mapping to save memory. 00038 // For a faster inverse mapping, or to allow a many-to-one mapping, use 00039 // IndexMapBiDi below. 00040 // NOTE: there are currently no methods to setup an IndexMap on its own! 00041 // It must be initialized by copying from an IndexMapBiDi or by DeSerialize. 00042 class IndexMap { 00043 public: 00044 virtual ~IndexMap() {} 00045 00046 // SparseToCompact takes a sparse index to an index in the compact space. 00047 // Uses a binary search to find the result. For faster speed use 00048 // IndexMapBiDi, but that takes more memory. 00049 virtual int SparseToCompact(int sparse_index) const; 00050 00051 // CompactToSparse takes a compact index to the corresponding index in the 00052 // sparse space. 00053 int CompactToSparse(int compact_index) const { 00054 return compact_map_[compact_index]; 00055 } 00056 // The size of the sparse space. 00057 virtual int SparseSize() const { 00058 return sparse_size_; 00059 } 00060 // The size of the compact space. 00061 int CompactSize() const { 00062 return compact_map_.size(); 00063 } 00064 00065 // Copy from the input. 00066 void CopyFrom(const IndexMap& src); 00067 void CopyFrom(const IndexMapBiDi& src); 00068 00069 // Writes to the given file. Returns false in case of error. 00070 bool Serialize(FILE* fp) const; 00071 // Reads from the given file. Returns false in case of error. 00072 // If swap is true, assumes a big/little-endian swap is needed. 00073 bool DeSerialize(bool swap, FILE* fp); 00074 00075 protected: 00076 // The sparse space covers integers in the range [0, sparse_size_-1]. 00077 int sparse_size_; 00078 // The compact space covers integers in the range [0, compact_map_.size()-1]. 00079 // Each element contains the corresponding sparse index. 00080 GenericVector<inT32> compact_map_; 00081 }; 00082 00083 // Bidirectional many-to-one mapping between a sparse and a compact discrete 00084 // space. As with IndexMap, many entries may be unmapped, but unlike IndexMap, 00085 // of those that are, many may be mapped to the same compact index. 00086 // If the map is many-to-one, it is not possible to directly obtain all the 00087 // sparse indices that map to a single compact index. 00088 // This map is time- rather than space-efficient. It stores the entire sparse 00089 // space. 00090 // IndexMapBiDi may be initialized in one of 3 ways: 00091 // 1. Init(size, true); 00092 // Setup(); 00093 // Sets a complete 1:1 mapping with no unmapped elements. 00094 // 2. Init(size, false); 00095 // for ... SetMap(index, true); 00096 // Setup(); 00097 // Specifies precisely which sparse indices are mapped. The mapping is 1:1. 00098 // 3. Either of the above, followed by: 00099 // for ... Merge(index1, index2); 00100 // CompleteMerges(); 00101 // Allows a many-to-one mapping by merging compact space indices. 00102 class IndexMapBiDi : public IndexMap { 00103 public: 00104 virtual ~IndexMapBiDi() {} 00105 00106 // Top-level init function in a single call to initialize a map to select 00107 // a single contiguous subrange [start, end) of the sparse space to be mapped 00108 // 1 to 1 to the compact space, with all other elements of the sparse space 00109 // left unmapped. 00110 // No need to call Setup after this. 00111 void InitAndSetupRange(int sparse_size, int start, int end); 00112 00113 // Initializes just the sparse_map_ to the given size with either all 00114 // forward indices mapped (all_mapped = true) or none (all_mapped = false). 00115 // Call Setup immediately after, or make calls to SetMap first to adjust the 00116 // mapping and then call Setup before using the map. 00117 void Init(int size, bool all_mapped); 00118 // Sets a given index in the sparse_map_ to be mapped or not. 00119 void SetMap(int sparse_index, bool mapped); 00120 // Sets up the sparse_map_ and compact_map_ properly after Init and 00121 // some calls to SetMap. Assumes an ordered 1-1 map from set indices 00122 // in the sparse space to the compact space. 00123 void Setup(); 00124 00125 // Merges the two compact space indices. May be called many times, but 00126 // the merges must be concluded by a call to CompleteMerges. 00127 // Returns true if a merge was actually performed. 00128 bool Merge(int compact_index1, int compact_index2); 00129 // Returns true if the given compact index has been deleted. 00130 bool IsCompactDeleted(int index) const { 00131 return MasterCompactIndex(index) < 0; 00132 } 00133 // Completes one or more Merge operations by further compacting the 00134 // compact space. 00135 void CompleteMerges(); 00136 00137 // SparseToCompact takes a sparse index to an index in the compact space. 00138 virtual int SparseToCompact(int sparse_index) const { 00139 return sparse_map_[sparse_index]; 00140 } 00141 // The size of the sparse space. 00142 virtual int SparseSize() const { 00143 return sparse_map_.size(); 00144 } 00145 00146 // Copy from the input. 00147 void CopyFrom(const IndexMapBiDi& src); 00148 00149 // Writes to the given file. Returns false in case of error. 00150 bool Serialize(FILE* fp) const; 00151 // Reads from the given file. Returns false in case of error. 00152 // If swap is true, assumes a big/little-endian swap is needed. 00153 bool DeSerialize(bool swap, FILE* fp); 00154 00155 // Bulk calls to SparseToCompact. 00156 // Maps the given array of sparse indices to an array of compact indices. 00157 // Assumes the input is sorted. The output indices are sorted and uniqued. 00158 // Return value is the number of "missed" features, being features that 00159 // don't map to the compact feature space. 00160 int MapFeatures(const GenericVector<int>& sparse, 00161 GenericVector<int>* compact) const; 00162 00163 private: 00164 // Returns the master compact index for a given compact index. 00165 // During a multiple merge operation, several compact indices may be 00166 // combined, so we need to be able to find the master of all. 00167 int MasterCompactIndex(int compact_index) const { 00168 while (compact_index >= 0 && 00169 sparse_map_[compact_map_[compact_index]] != compact_index) 00170 compact_index = sparse_map_[compact_map_[compact_index]]; 00171 return compact_index; 00172 } 00173 00174 // Direct look-up of the compact index for each element in sparse space. 00175 GenericVector<inT32> sparse_map_; 00176 }; 00177 00178 } // namespace tesseract. 00179 00180 #endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_