Tesseract
3.02
|
00001 /****************************************************************************** 00002 ** Filename: intmatcher.c 00003 ** Purpose: Generic high level classification routines. 00004 ** Author: Robert Moss 00005 ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created. 00006 ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add 00007 ** support for adaptive matching. 00008 ** (c) Copyright Hewlett-Packard Company, 1988. 00009 ** Licensed under the Apache License, Version 2.0 (the "License"); 00010 ** you may not use this file except in compliance with the License. 00011 ** You may obtain a copy of the License at 00012 ** http://www.apache.org/licenses/LICENSE-2.0 00013 ** Unless required by applicable law or agreed to in writing, software 00014 ** distributed under the License is distributed on an "AS IS" BASIS, 00015 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 ** See the License for the specific language governing permissions and 00017 ** limitations under the License. 00018 ******************************************************************************/ 00019 /*---------------------------------------------------------------------------- 00020 Include Files and Type Defines 00021 ----------------------------------------------------------------------------*/ 00022 #include "intmatcher.h" 00023 #include "intproto.h" 00024 #include "callcpp.h" 00025 #include "scrollview.h" 00026 #include "float2int.h" 00027 #include "globals.h" 00028 #include "helpers.h" 00029 #include "classify.h" 00030 #include "shapetable.h" 00031 #include <math.h> 00032 00033 // Include automatically generated configuration file if running autoconf. 00034 #ifdef HAVE_CONFIG_H 00035 #include "config_auto.h" 00036 #endif 00037 00038 /*---------------------------------------------------------------------------- 00039 Global Data Definitions and Declarations 00040 ----------------------------------------------------------------------------*/ 00041 // Parameters of the sigmoid used to convert similarity to evidence in the 00042 // similarity_evidence_table_ that is used to convert distance metric to an 00043 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init). 00044 const float IntegerMatcher::kSEExponentialMultiplier = 0.0; 00045 const float IntegerMatcher::kSimilarityCenter = 0.0075; 00046 00047 static const uinT8 offset_table[256] = { 00048 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00049 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00050 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00051 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00052 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00053 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00054 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00055 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00056 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00057 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00058 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00059 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00060 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00061 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00062 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00063 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 00064 }; 00065 00066 static const uinT8 next_table[256] = { 00067 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, 00068 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18, 00069 0x1c, 0x1c, 0x1e, 00070 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28, 00071 0x2c, 0x2c, 0x2e, 00072 0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 00073 0x38, 0x3c, 0x3c, 0x3e, 00074 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, 0x48, 00075 0x4c, 0x4c, 0x4e, 00076 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a, 00077 0x58, 0x5c, 0x5c, 0x5e, 00078 0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 00079 0x68, 0x6c, 0x6c, 0x6e, 00080 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, 00081 0x78, 0x7c, 0x7c, 0x7e, 00082 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88, 00083 0x8c, 0x8c, 0x8e, 00084 0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 00085 0x98, 0x9c, 0x9c, 0x9e, 00086 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, 00087 0xa8, 0xac, 0xac, 0xae, 00088 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba, 00089 0xb8, 0xbc, 0xbc, 0xbe, 00090 0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 00091 0xc8, 0xcc, 0xcc, 0xce, 00092 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, 00093 0xd8, 0xdc, 0xdc, 0xde, 00094 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea, 00095 0xe8, 0xec, 0xec, 0xee, 00096 0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 00097 0xf8, 0xfc, 0xfc, 0xfe 00098 }; 00099 00100 namespace tesseract { 00101 00102 // Encapsulation of the intermediate data and computations made by the class 00103 // pruner. The class pruner implements a simple linear classifier on binary 00104 // features by heavily quantizing the feature space, and applying 00105 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in 00106 // weights is compensated by a non-constant bias that is dependent on the 00107 // number of features present. 00108 class ClassPruner { 00109 public: 00110 ClassPruner(int max_classes) { 00111 // The unrolled loop in ComputeScores means that the array sizes need to 00112 // be rounded up so that the array is big enough to accommodate the extra 00113 // entries accessed by the unrolling. Each pruner word is of sized 00114 // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are 00115 // BITS_PER_WERD / NUM_BITS_PER_CLASS entries. 00116 // See ComputeScores. 00117 max_classes_ = max_classes; 00118 rounded_classes_ = RoundUp( 00119 max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS); 00120 class_count_ = new int[rounded_classes_]; 00121 norm_count_ = new int[rounded_classes_]; 00122 sort_key_ = new int[rounded_classes_ + 1]; 00123 sort_index_ = new int[rounded_classes_ + 1]; 00124 for (int i = 0; i < rounded_classes_; i++) { 00125 class_count_[i] = 0; 00126 } 00127 pruning_threshold_ = 0; 00128 num_features_ = 0; 00129 num_classes_ = 0; 00130 } 00131 00132 ~ClassPruner() { 00133 delete []class_count_; 00134 delete []norm_count_; 00135 delete []sort_key_; 00136 delete []sort_index_; 00137 } 00138 00139 // Computes the scores for every class in the character set, by summing the 00140 // weights for each feature and stores the sums internally in class_count_. 00141 void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates, 00142 int num_features, const INT_FEATURE_STRUCT* features) { 00143 num_features_ = num_features; 00144 int num_pruners = int_templates->NumClassPruners; 00145 for (int f = 0; f < num_features; ++f) { 00146 const INT_FEATURE_STRUCT* feature = &features[f]; 00147 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. 00148 int x = feature->X * NUM_CP_BUCKETS >> 8; 00149 int y = feature->Y * NUM_CP_BUCKETS >> 8; 00150 int theta = feature->Theta * NUM_CP_BUCKETS >> 8; 00151 int class_id = 0; 00152 // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so 00153 // we need a collection of them, indexed by pruner_set. 00154 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { 00155 // Look up quantized feature in a 3-D array, an array of weights for 00156 // each class. 00157 const uinT32* pruner_word_ptr = 00158 int_templates->ClassPruners[pruner_set]->p[x][y][theta]; 00159 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { 00160 uinT32 pruner_word = *pruner_word_ptr++; 00161 // This inner loop is unrolled to speed up the ClassPruner. 00162 // Currently gcc would not unroll it unless it is set to O3 00163 // level of optimization or -funroll-loops is specified. 00164 /* 00165 uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1; 00166 for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) { 00167 class_count_[class_id++] += pruner_word & class_mask; 00168 pruner_word >>= NUM_BITS_PER_CLASS; 00169 } 00170 */ 00171 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00172 pruner_word >>= NUM_BITS_PER_CLASS; 00173 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00174 pruner_word >>= NUM_BITS_PER_CLASS; 00175 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00176 pruner_word >>= NUM_BITS_PER_CLASS; 00177 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00178 pruner_word >>= NUM_BITS_PER_CLASS; 00179 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00180 pruner_word >>= NUM_BITS_PER_CLASS; 00181 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00182 pruner_word >>= NUM_BITS_PER_CLASS; 00183 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00184 pruner_word >>= NUM_BITS_PER_CLASS; 00185 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00186 pruner_word >>= NUM_BITS_PER_CLASS; 00187 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00188 pruner_word >>= NUM_BITS_PER_CLASS; 00189 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00190 pruner_word >>= NUM_BITS_PER_CLASS; 00191 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00192 pruner_word >>= NUM_BITS_PER_CLASS; 00193 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00194 pruner_word >>= NUM_BITS_PER_CLASS; 00195 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00196 pruner_word >>= NUM_BITS_PER_CLASS; 00197 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00198 pruner_word >>= NUM_BITS_PER_CLASS; 00199 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00200 pruner_word >>= NUM_BITS_PER_CLASS; 00201 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00202 } 00203 } 00204 } 00205 } 00206 00207 // Adjusts the scores according to the number of expected features. Used 00208 // in lieu of a constant bias, this penalizes classes that expect more 00209 // features than there are present. Thus an actual c will score higher for c 00210 // than e, even though almost all the features match e as well as c, because 00211 // e expects more features to be present. 00212 void AdjustForExpectedNumFeatures(const uinT16* expected_num_features, 00213 int cutoff_strength) { 00214 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00215 if (num_features_ < expected_num_features[class_id]) { 00216 int deficit = expected_num_features[class_id] - num_features_; 00217 class_count_[class_id] -= class_count_[class_id] * deficit / 00218 (num_features_ * cutoff_strength + deficit); 00219 } 00220 } 00221 } 00222 00223 // Zeros the scores for classes disabled in the unicharset. 00224 // Implements the black-list to recognize a subset of the character set. 00225 void DisableDisabledClasses(const UNICHARSET& unicharset) { 00226 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00227 if (!unicharset.get_enabled(class_id)) 00228 class_count_[class_id] = 0; // This char is disabled! 00229 } 00230 } 00231 00232 // Zeros the scores of fragments. 00233 void DisableFragments(const UNICHARSET& unicharset) { 00234 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00235 // Do not include character fragments in the class pruner 00236 // results if disable_character_fragments is true. 00237 if (unicharset.get_fragment(class_id)) { 00238 class_count_[class_id] = 0; 00239 } 00240 } 00241 } 00242 00243 // Normalizes the counts for xheight, putting the normalized result in 00244 // norm_count_. Applies a simple subtractive penalty for incorrect vertical 00245 // position provided by the normalization_factors array, indexed by 00246 // character class, and scaled by the norm_multiplier. 00247 void NormalizeForXheight(int norm_multiplier, 00248 const uinT8* normalization_factors) { 00249 for (int class_id = 0; class_id < max_classes_; class_id++) { 00250 norm_count_[class_id] = class_count_[class_id] - 00251 ((norm_multiplier * normalization_factors[class_id]) >> 8); 00252 } 00253 } 00254 00255 // The nop normalization copies the class_count_ array to norm_count_. 00256 void NoNormalization() { 00257 for (int class_id = 0; class_id < max_classes_; class_id++) { 00258 norm_count_[class_id] = class_count_[class_id]; 00259 } 00260 } 00261 00262 // Prunes the classes using <the maximum count> * pruning_factor/256 as a 00263 // threshold for keeping classes. If max_of_non_fragments, then ignore 00264 // fragments in computing the maximum count. 00265 void PruneAndSort(int pruning_factor, bool max_of_non_fragments, 00266 const UNICHARSET& unicharset) { 00267 int max_count = 0; 00268 for (int c = 0; c < max_classes_; ++c) { 00269 if (norm_count_[c] > max_count && 00270 // This additional check is added in order to ensure that 00271 // the classifier will return at least one non-fragmented 00272 // character match. 00273 // TODO(daria): verify that this helps accuracy and does not 00274 // hurt performance. 00275 (!max_of_non_fragments || !unicharset.get_fragment(c))) { 00276 max_count = norm_count_[c]; 00277 } 00278 } 00279 // Prune Classes. 00280 pruning_threshold_ = (max_count * pruning_factor) >> 8; 00281 // Select Classes. 00282 if (pruning_threshold_ < 1) 00283 pruning_threshold_ = 1; 00284 num_classes_ = 0; 00285 for (int class_id = 0; class_id < max_classes_; class_id++) { 00286 if (norm_count_[class_id] >= pruning_threshold_) { 00287 ++num_classes_; 00288 sort_index_[num_classes_] = class_id; 00289 sort_key_[num_classes_] = norm_count_[class_id]; 00290 } 00291 } 00292 00293 // Sort Classes using Heapsort Algorithm. 00294 if (num_classes_ > 1) 00295 HeapSort(num_classes_, sort_key_, sort_index_); 00296 } 00297 00298 // Prints debug info on the class pruner matches for the pruned classes only. 00299 void DebugMatch(const Classify& classify, 00300 const INT_TEMPLATES_STRUCT* int_templates, 00301 const INT_FEATURE_STRUCT* features) const { 00302 int num_pruners = int_templates->NumClassPruners; 00303 int max_num_classes = int_templates->NumClasses; 00304 for (int f = 0; f < num_features_; ++f) { 00305 const INT_FEATURE_STRUCT* feature = &features[f]; 00306 tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta); 00307 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. 00308 int x = feature->X * NUM_CP_BUCKETS >> 8; 00309 int y = feature->Y * NUM_CP_BUCKETS >> 8; 00310 int theta = feature->Theta * NUM_CP_BUCKETS >> 8; 00311 int class_id = 0; 00312 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { 00313 // Look up quantized feature in a 3-D array, an array of weights for 00314 // each class. 00315 const uinT32* pruner_word_ptr = 00316 int_templates->ClassPruners[pruner_set]->p[x][y][theta]; 00317 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { 00318 uinT32 pruner_word = *pruner_word_ptr++; 00319 for (int word_class = 0; word_class < 16 && 00320 class_id < max_num_classes; ++word_class, ++class_id) { 00321 if (norm_count_[class_id] >= pruning_threshold_) { 00322 tprintf(" %s=%d,", 00323 classify.ClassIDToDebugStr(int_templates, 00324 class_id, 0).string(), 00325 pruner_word & CLASS_PRUNER_CLASS_MASK); 00326 } 00327 pruner_word >>= NUM_BITS_PER_CLASS; 00328 } 00329 } 00330 tprintf("\n"); 00331 } 00332 } 00333 } 00334 00335 // Prints a summary of the pruner result. 00336 void SummarizeResult(const Classify& classify, 00337 const INT_TEMPLATES_STRUCT* int_templates, 00338 const uinT16* expected_num_features, 00339 int norm_multiplier, 00340 const uinT8* normalization_factors) const { 00341 tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_); 00342 for (int i = 0; i < num_classes_; ++i) { 00343 int class_id = sort_index_[num_classes_ - i]; 00344 STRING class_string = classify.ClassIDToDebugStr(int_templates, 00345 class_id, 0); 00346 tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n", 00347 class_string.string(), 00348 class_count_[class_id], 00349 expected_num_features[class_id], 00350 (norm_multiplier * normalization_factors[class_id]) >> 8, 00351 sort_key_[num_classes_ - i], 00352 100.0 - 100.0 * sort_key_[num_classes_ - i] / 00353 (CLASS_PRUNER_CLASS_MASK * num_features_)); 00354 } 00355 } 00356 00357 // Copies the pruned, sorted classes into the output results and returns 00358 // the number of classes. 00359 int SetupResults(CP_RESULT_STRUCT* results) const { 00360 for (int c = 0; c < num_classes_; ++c) { 00361 results[c].Class = sort_index_[num_classes_ - c]; 00362 results[c].Rating = 1.0 - sort_key_[num_classes_ - c] / 00363 (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_); 00364 } 00365 return num_classes_; 00366 } 00367 00368 private: 00369 // Array[rounded_classes_] of initial counts for each class. 00370 int *class_count_; 00371 // Array[rounded_classes_] of modified counts for each class after normalizing 00372 // for expected number of features, disabled classes, fragments, and xheights. 00373 int *norm_count_; 00374 // Array[rounded_classes_ +1] of pruned counts that gets sorted 00375 int *sort_key_; 00376 // Array[rounded_classes_ +1] of classes corresponding to sort_key_. 00377 int *sort_index_; 00378 // Number of classes in this class pruner. 00379 int max_classes_; 00380 // Rounded up number of classes used for array sizes. 00381 int rounded_classes_; 00382 // Threshold count applied to prune classes. 00383 int pruning_threshold_; 00384 // The number of features used to compute the scores. 00385 int num_features_; 00386 // Final number of pruned classes. 00387 int num_classes_; 00388 }; 00389 00390 /*---------------------------------------------------------------------------- 00391 Public Code 00392 ----------------------------------------------------------------------------*/ 00393 /*---------------------------------------------------------------------------*/ 00394 // Runs the class pruner from int_templates on the given features, returning 00395 // the number of classes output in results. 00396 // int_templates Class pruner tables 00397 // num_features Number of features in blob 00398 // features Array of features 00399 // normalization_factors Array of fudge factors from blob 00400 // normalization process (by CLASS_INDEX) 00401 // expected_num_features Array of expected number of features 00402 // for each class (by CLASS_INDEX) 00403 // results Sorted Array of pruned classes. Must be an array 00404 // of size at least int_templates->NumClasses. 00405 int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, 00406 int num_features, 00407 const INT_FEATURE_STRUCT* features, 00408 const uinT8* normalization_factors, 00409 const uinT16* expected_num_features, 00410 CP_RESULT_STRUCT* results) { 00411 /* 00412 ** Operation: 00413 ** Prunes the classes using a modified fast match table. 00414 ** Returns a sorted list of classes along with the number 00415 ** of pruned classes in that list. 00416 ** Return: Number of pruned classes. 00417 ** Exceptions: none 00418 ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created. 00419 */ 00420 ClassPruner pruner(int_templates->NumClasses); 00421 // Compute initial match scores for all classes. 00422 pruner.ComputeScores(int_templates, num_features, features); 00423 // Adjust match scores for number of expected features. 00424 pruner.AdjustForExpectedNumFeatures(expected_num_features, 00425 classify_cp_cutoff_strength); 00426 // Apply disabled classes in unicharset - only works without a shape_table. 00427 if (shape_table_ == NULL) 00428 pruner.DisableDisabledClasses(unicharset); 00429 // If fragments are disabled, remove them, also only without a shape table. 00430 if (disable_character_fragments && shape_table_ == NULL) 00431 pruner.DisableFragments(unicharset); 00432 00433 // If we have good x-heights, apply the given normalization factors. 00434 if (normalization_factors != NULL) { 00435 pruner.NormalizeForXheight(classify_class_pruner_multiplier, 00436 normalization_factors); 00437 } else { 00438 pruner.NoNormalization(); 00439 } 00440 // Do the actual pruning and sort the short-list. 00441 pruner.PruneAndSort(classify_class_pruner_threshold, 00442 shape_table_ == NULL, unicharset); 00443 00444 if (classify_debug_level > 2) { 00445 pruner.DebugMatch(*this, int_templates, features); 00446 } 00447 if (classify_debug_level > 1) { 00448 pruner.SummarizeResult(*this, int_templates, expected_num_features, 00449 classify_class_pruner_multiplier, 00450 normalization_factors); 00451 } 00452 // Convert to the expected output format. 00453 return pruner.SetupResults(results); 00454 } 00455 00456 } // namespace tesseract 00457 00458 /*---------------------------------------------------------------------------*/ 00459 void IntegerMatcher::Match(INT_CLASS ClassTemplate, 00460 BIT_VECTOR ProtoMask, 00461 BIT_VECTOR ConfigMask, 00462 inT16 NumFeatures, 00463 const INT_FEATURE_STRUCT* Features, 00464 INT_RESULT Result, 00465 int AdaptFeatureThreshold, 00466 int Debug, 00467 bool SeparateDebugWindows) { 00468 /* 00469 ** Parameters: 00470 ** ClassTemplate Prototypes & tables for a class 00471 ** BlobLength Length of unormalized blob 00472 ** NumFeatures Number of features in blob 00473 ** Features Array of features 00474 ** NormalizationFactor Fudge factor from blob 00475 ** normalization process 00476 ** Result Class rating & configuration: 00477 ** (0.0 -> 1.0), 0=good, 1=bad 00478 ** Debug Debugger flag: 1=debugger on 00479 ** Globals: 00480 ** local_matcher_multiplier_ Normalization factor multiplier 00481 ** Operation: 00482 ** IntegerMatcher returns the best configuration and rating 00483 ** for a single class. The class matched against is determined 00484 ** by the uniqueness of the ClassTemplate parameter. The 00485 ** best rating and its associated configuration are returned. 00486 ** Return: 00487 ** Exceptions: none 00488 ** History: Tue Feb 19 16:36:23 MST 1991, RWM, Created. 00489 */ 00490 ScratchEvidence *tables = new ScratchEvidence(); 00491 int Feature; 00492 int BestMatch; 00493 00494 if (MatchDebuggingOn (Debug)) 00495 cprintf ("Integer Matcher -------------------------------------------\n"); 00496 00497 tables->Clear(ClassTemplate); 00498 Result->FeatureMisses = 0; 00499 00500 for (Feature = 0; Feature < NumFeatures; Feature++) { 00501 int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, 00502 Feature, &Features[Feature], 00503 tables, Debug); 00504 // Count features that were missed over all configs. 00505 if (csum == 0) 00506 Result->FeatureMisses++; 00507 } 00508 00509 #ifndef GRAPHICS_DISABLED 00510 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) { 00511 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00512 NumFeatures, Debug); 00513 } 00514 00515 if (DisplayProtoMatchesOn(Debug)) { 00516 DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask, 00517 *tables, SeparateDebugWindows); 00518 } 00519 00520 if (DisplayFeatureMatchesOn(Debug)) { 00521 DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures, 00522 Features, AdaptFeatureThreshold, Debug, 00523 SeparateDebugWindows); 00524 } 00525 #endif 00526 00527 tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures); 00528 tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures); 00529 00530 BestMatch = FindBestMatch(ClassTemplate, *tables, Result); 00531 00532 #ifndef GRAPHICS_DISABLED 00533 if (PrintMatchSummaryOn(Debug)) 00534 DebugBestMatch(BestMatch, Result); 00535 00536 if (MatchDebuggingOn(Debug)) 00537 cprintf("Match Complete --------------------------------------------\n"); 00538 #endif 00539 00540 delete tables; 00541 } 00542 00543 00544 /*---------------------------------------------------------------------------*/ 00545 int IntegerMatcher::FindGoodProtos( 00546 INT_CLASS ClassTemplate, 00547 BIT_VECTOR ProtoMask, 00548 BIT_VECTOR ConfigMask, 00549 uinT16 BlobLength, 00550 inT16 NumFeatures, 00551 INT_FEATURE_ARRAY Features, 00552 PROTO_ID *ProtoArray, 00553 int AdaptProtoThreshold, 00554 int Debug) { 00555 /* 00556 ** Parameters: 00557 ** ClassTemplate Prototypes & tables for a class 00558 ** ProtoMask AND Mask for proto word 00559 ** ConfigMask AND Mask for config word 00560 ** BlobLength Length of unormalized blob 00561 ** NumFeatures Number of features in blob 00562 ** Features Array of features 00563 ** ProtoArray Array of good protos 00564 ** AdaptProtoThreshold Threshold for good protos 00565 ** Debug Debugger flag: 1=debugger on 00566 ** Globals: 00567 ** local_matcher_multiplier_ Normalization factor multiplier 00568 ** Operation: 00569 ** FindGoodProtos finds all protos whose normalized proto-evidence 00570 ** exceed classify_adapt_proto_thresh. The list is ordered by increasing 00571 ** proto id number. 00572 ** Return: 00573 ** Number of good protos in ProtoArray. 00574 ** Exceptions: none 00575 ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created 00576 */ 00577 ScratchEvidence *tables = new ScratchEvidence(); 00578 int NumGoodProtos = 0; 00579 00580 /* DEBUG opening heading */ 00581 if (MatchDebuggingOn (Debug)) 00582 cprintf 00583 ("Find Good Protos -------------------------------------------\n"); 00584 00585 tables->Clear(ClassTemplate); 00586 00587 for (int Feature = 0; Feature < NumFeatures; Feature++) 00588 UpdateTablesForFeature( 00589 ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]), 00590 tables, Debug); 00591 00592 #ifndef GRAPHICS_DISABLED 00593 if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) 00594 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00595 NumFeatures, Debug); 00596 #endif 00597 00598 /* Average Proto Evidences & Find Good Protos */ 00599 for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) { 00600 /* Compute Average for Actual Proto */ 00601 int Temp = 0; 00602 for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++) 00603 Temp += tables->proto_evidence_[proto][i]; 00604 00605 Temp /= ClassTemplate->ProtoLengths[proto]; 00606 00607 /* Find Good Protos */ 00608 if (Temp >= AdaptProtoThreshold) { 00609 *ProtoArray = proto; 00610 ProtoArray++; 00611 NumGoodProtos++; 00612 } 00613 } 00614 00615 if (MatchDebuggingOn (Debug)) 00616 cprintf ("Match Complete --------------------------------------------\n"); 00617 delete tables; 00618 00619 return NumGoodProtos; 00620 } 00621 00622 00623 /*---------------------------------------------------------------------------*/ 00624 int IntegerMatcher::FindBadFeatures( 00625 INT_CLASS ClassTemplate, 00626 BIT_VECTOR ProtoMask, 00627 BIT_VECTOR ConfigMask, 00628 uinT16 BlobLength, 00629 inT16 NumFeatures, 00630 INT_FEATURE_ARRAY Features, 00631 FEATURE_ID *FeatureArray, 00632 int AdaptFeatureThreshold, 00633 int Debug) { 00634 /* 00635 ** Parameters: 00636 ** ClassTemplate Prototypes & tables for a class 00637 ** ProtoMask AND Mask for proto word 00638 ** ConfigMask AND Mask for config word 00639 ** BlobLength Length of unormalized blob 00640 ** NumFeatures Number of features in blob 00641 ** Features Array of features 00642 ** FeatureArray Array of bad features 00643 ** AdaptFeatureThreshold Threshold for bad features 00644 ** Debug Debugger flag: 1=debugger on 00645 ** Operation: 00646 ** FindBadFeatures finds all features with maximum feature-evidence < 00647 ** AdaptFeatureThresh. The list is ordered by increasing feature number. 00648 ** Return: 00649 ** Number of bad features in FeatureArray. 00650 ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created 00651 */ 00652 ScratchEvidence *tables = new ScratchEvidence(); 00653 int NumBadFeatures = 0; 00654 00655 /* DEBUG opening heading */ 00656 if (MatchDebuggingOn(Debug)) 00657 cprintf("Find Bad Features -------------------------------------------\n"); 00658 00659 tables->Clear(ClassTemplate); 00660 00661 for (int Feature = 0; Feature < NumFeatures; Feature++) { 00662 UpdateTablesForFeature( 00663 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], 00664 tables, Debug); 00665 00666 /* Find Best Evidence for Current Feature */ 00667 int best = 0; 00668 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 00669 if (tables->feature_evidence_[i] > best) 00670 best = tables->feature_evidence_[i]; 00671 00672 /* Find Bad Features */ 00673 if (best < AdaptFeatureThreshold) { 00674 *FeatureArray = Feature; 00675 FeatureArray++; 00676 NumBadFeatures++; 00677 } 00678 } 00679 00680 #ifndef GRAPHICS_DISABLED 00681 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) 00682 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00683 NumFeatures, Debug); 00684 #endif 00685 00686 if (MatchDebuggingOn(Debug)) 00687 cprintf("Match Complete --------------------------------------------\n"); 00688 00689 delete tables; 00690 return NumBadFeatures; 00691 } 00692 00693 00694 /*---------------------------------------------------------------------------*/ 00695 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level, 00696 int classify_integer_matcher_multiplier) { 00697 classify_debug_level_ = classify_debug_level; 00698 00699 /* Set default mode of operation of IntegerMatcher */ 00700 SetCharNormMatch(classify_integer_matcher_multiplier); 00701 00702 /* Initialize table for evidence to similarity lookup */ 00703 for (int i = 0; i < SE_TABLE_SIZE; i++) { 00704 uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS); 00705 double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; 00706 double evidence = Similarity / kSimilarityCenter; 00707 evidence = 255.0 / (evidence * evidence + 1.0); 00708 00709 if (kSEExponentialMultiplier > 0.0) { 00710 double scale = 1.0 - exp(-kSEExponentialMultiplier) * 00711 exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE)); 00712 evidence *= ClipToRange(scale, 0.0, 1.0); 00713 } 00714 00715 similarity_evidence_table_[i] = (uinT8) (evidence + 0.5); 00716 } 00717 00718 /* Initialize evidence computation variables */ 00719 evidence_table_mask_ = 00720 ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits); 00721 mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits); 00722 table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); 00723 evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); 00724 } 00725 00726 /*--------------------------------------------------------------------------*/ 00727 void IntegerMatcher::SetBaseLineMatch() { 00728 local_matcher_multiplier_ = 0; 00729 } 00730 00731 00732 /*--------------------------------------------------------------------------*/ 00733 void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) { 00734 local_matcher_multiplier_ = integer_matcher_multiplier; 00735 } 00736 00737 00741 void ScratchEvidence::Clear(const INT_CLASS class_template) { 00742 memset(sum_feature_evidence_, 0, 00743 class_template->NumConfigs * sizeof(sum_feature_evidence_[0])); 00744 memset(proto_evidence_, 0, 00745 class_template->NumProtos * sizeof(proto_evidence_[0])); 00746 } 00747 00748 void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { 00749 memset(feature_evidence_, 0, 00750 class_template->NumConfigs * sizeof(feature_evidence_[0])); 00751 } 00752 00753 00754 00755 /*---------------------------------------------------------------------------*/ 00756 void IMDebugConfiguration(int FeatureNum, 00757 uinT16 ActualProtoNum, 00758 uinT8 Evidence, 00759 BIT_VECTOR ConfigMask, 00760 uinT32 ConfigWord) { 00761 /* 00762 ** Parameters: 00763 ** Globals: 00764 ** Operation: 00765 ** Print debugging information for Configuations 00766 ** Return: 00767 ** Exceptions: none 00768 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 00769 */ 00770 cprintf ("F = %3d, P = %3d, E = %3d, Configs = ", 00771 FeatureNum, (int) ActualProtoNum, (int) Evidence); 00772 while (ConfigWord) { 00773 if (ConfigWord & 1) 00774 cprintf ("1"); 00775 else 00776 cprintf ("0"); 00777 ConfigWord >>= 1; 00778 } 00779 cprintf ("\n"); 00780 } 00781 00782 00783 /*---------------------------------------------------------------------------*/ 00784 void IMDebugConfigurationSum(int FeatureNum, 00785 uinT8 *FeatureEvidence, 00786 inT32 ConfigCount) { 00787 /* 00788 ** Parameters: 00789 ** Globals: 00790 ** Operation: 00791 ** Print debugging information for Configuations 00792 ** Return: 00793 ** Exceptions: none 00794 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 00795 */ 00796 cprintf("F=%3d, C=", FeatureNum); 00797 for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { 00798 cprintf("%4d", FeatureEvidence[ConfigNum]); 00799 } 00800 cprintf("\n"); 00801 } 00802 00803 00804 00805 /*---------------------------------------------------------------------------*/ 00806 int IntegerMatcher::UpdateTablesForFeature( 00807 INT_CLASS ClassTemplate, 00808 BIT_VECTOR ProtoMask, 00809 BIT_VECTOR ConfigMask, 00810 int FeatureNum, 00811 const INT_FEATURE_STRUCT* Feature, 00812 ScratchEvidence *tables, 00813 int Debug) { 00814 /* 00815 ** Parameters: 00816 ** ClassTemplate Prototypes & tables for a class 00817 ** FeatureNum Current feature number (for DEBUG only) 00818 ** Feature Pointer to a feature struct 00819 ** tables Evidence tables 00820 ** Debug Debugger flag: 1=debugger on 00821 ** Operation: 00822 ** For the given feature: prune protos, compute evidence, 00823 ** update Feature Evidence, Proto Evidence, and Sum of Feature 00824 ** Evidence tables. 00825 ** Return: 00826 */ 00827 register uinT32 ConfigWord; 00828 register uinT32 ProtoWord; 00829 register uinT32 ProtoNum; 00830 register uinT32 ActualProtoNum; 00831 uinT8 proto_byte; 00832 inT32 proto_word_offset; 00833 inT32 proto_offset; 00834 uinT8 config_byte; 00835 inT32 config_offset; 00836 PROTO_SET ProtoSet; 00837 uinT32 *ProtoPrunerPtr; 00838 INT_PROTO Proto; 00839 int ProtoSetIndex; 00840 uinT8 Evidence; 00841 uinT32 XFeatureAddress; 00842 uinT32 YFeatureAddress; 00843 uinT32 ThetaFeatureAddress; 00844 register uinT8 *UINT8Pointer; 00845 register int ProtoIndex; 00846 uinT8 Temp; 00847 register int *IntPointer; 00848 int ConfigNum; 00849 register inT32 M3; 00850 register inT32 A3; 00851 register uinT32 A4; 00852 00853 tables->ClearFeatureEvidence(ClassTemplate); 00854 00855 /* Precompute Feature Address offset for Proto Pruning */ 00856 XFeatureAddress = ((Feature->X >> 2) << 1); 00857 YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1); 00858 ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1); 00859 00860 for (ProtoSetIndex = 0, ActualProtoNum = 0; 00861 ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { 00862 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 00863 ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner); 00864 for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET; 00865 ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum += 00866 (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) { 00867 /* Prune Protos of current Proto Set */ 00868 ProtoWord = *(ProtoPrunerPtr + XFeatureAddress); 00869 ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress); 00870 ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress); 00871 ProtoWord &= *ProtoMask; 00872 00873 if (ProtoWord != 0) { 00874 proto_byte = ProtoWord & 0xff; 00875 ProtoWord >>= 8; 00876 proto_word_offset = 0; 00877 while (ProtoWord != 0 || proto_byte != 0) { 00878 while (proto_byte == 0) { 00879 proto_byte = ProtoWord & 0xff; 00880 ProtoWord >>= 8; 00881 proto_word_offset += 8; 00882 } 00883 proto_offset = offset_table[proto_byte] + proto_word_offset; 00884 proto_byte = next_table[proto_byte]; 00885 Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]); 00886 ConfigWord = Proto->Configs[0]; 00887 A3 = (((Proto->A * (Feature->X - 128)) << 1) 00888 - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); 00889 M3 = 00890 (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1; 00891 00892 if (A3 < 0) 00893 A3 = ~A3; 00894 if (M3 < 0) 00895 M3 = ~M3; 00896 A3 >>= mult_trunc_shift_bits_; 00897 M3 >>= mult_trunc_shift_bits_; 00898 if (A3 > evidence_mult_mask_) 00899 A3 = evidence_mult_mask_; 00900 if (M3 > evidence_mult_mask_) 00901 M3 = evidence_mult_mask_; 00902 00903 A4 = (A3 * A3) + (M3 * M3); 00904 A4 >>= table_trunc_shift_bits_; 00905 if (A4 > evidence_table_mask_) 00906 Evidence = 0; 00907 else 00908 Evidence = similarity_evidence_table_[A4]; 00909 00910 if (PrintFeatureMatchesOn (Debug)) 00911 IMDebugConfiguration (FeatureNum, 00912 ActualProtoNum + proto_offset, 00913 Evidence, ConfigMask, ConfigWord); 00914 00915 ConfigWord &= *ConfigMask; 00916 00917 UINT8Pointer = tables->feature_evidence_ - 8; 00918 config_byte = 0; 00919 while (ConfigWord != 0 || config_byte != 0) { 00920 while (config_byte == 0) { 00921 config_byte = ConfigWord & 0xff; 00922 ConfigWord >>= 8; 00923 UINT8Pointer += 8; 00924 } 00925 config_offset = offset_table[config_byte]; 00926 config_byte = next_table[config_byte]; 00927 if (Evidence > UINT8Pointer[config_offset]) 00928 UINT8Pointer[config_offset] = Evidence; 00929 } 00930 00931 UINT8Pointer = 00932 &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); 00933 for (ProtoIndex = 00934 ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; 00935 ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { 00936 if (Evidence > *UINT8Pointer) { 00937 Temp = *UINT8Pointer; 00938 *UINT8Pointer = Evidence; 00939 Evidence = Temp; 00940 } 00941 else if (Evidence == 0) 00942 break; 00943 } 00944 } 00945 } 00946 } 00947 } 00948 00949 if (PrintFeatureMatchesOn(Debug)) { 00950 IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_, 00951 ClassTemplate->NumConfigs); 00952 } 00953 00954 IntPointer = tables->sum_feature_evidence_; 00955 UINT8Pointer = tables->feature_evidence_; 00956 int SumOverConfigs = 0; 00957 for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) { 00958 int evidence = *UINT8Pointer++; 00959 SumOverConfigs += evidence; 00960 *IntPointer++ += evidence; 00961 } 00962 return SumOverConfigs; 00963 } 00964 00965 00966 /*---------------------------------------------------------------------------*/ 00967 #ifndef GRAPHICS_DISABLED 00968 void IntegerMatcher::DebugFeatureProtoError( 00969 INT_CLASS ClassTemplate, 00970 BIT_VECTOR ProtoMask, 00971 BIT_VECTOR ConfigMask, 00972 const ScratchEvidence& tables, 00973 inT16 NumFeatures, 00974 int Debug) { 00975 /* 00976 ** Parameters: 00977 ** Globals: 00978 ** Operation: 00979 ** Print debugging information for Configuations 00980 ** Return: 00981 ** Exceptions: none 00982 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 00983 */ 00984 FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS]; 00985 int ConfigNum; 00986 uinT32 ConfigWord; 00987 int ProtoSetIndex; 00988 uinT16 ProtoNum; 00989 uinT8 ProtoWordNum; 00990 PROTO_SET ProtoSet; 00991 uinT16 ActualProtoNum; 00992 00993 if (PrintMatchSummaryOn(Debug)) { 00994 cprintf("Configuration Mask:\n"); 00995 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 00996 cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1)); 00997 cprintf("\n"); 00998 00999 cprintf("Feature Error for Configurations:\n"); 01000 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { 01001 cprintf( 01002 " %5.1f", 01003 100.0 * (1.0 - 01004 (FLOAT32) tables.sum_feature_evidence_[ConfigNum] 01005 / NumFeatures / 256.0)); 01006 } 01007 cprintf("\n\n\n"); 01008 } 01009 01010 if (PrintMatchSummaryOn (Debug)) { 01011 cprintf ("Proto Mask:\n"); 01012 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01013 ProtoSetIndex++) { 01014 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01015 for (ProtoWordNum = 0; ProtoWordNum < 2; 01016 ProtoWordNum++, ProtoMask++) { 01017 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01018 for (ProtoNum = 0; 01019 ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) 01020 && (ActualProtoNum < ClassTemplate->NumProtos)); 01021 ProtoNum++, ActualProtoNum++) 01022 cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1)); 01023 cprintf ("\n"); 01024 } 01025 } 01026 cprintf ("\n"); 01027 } 01028 01029 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 01030 ProtoConfigs[i] = 0; 01031 01032 if (PrintProtoMatchesOn (Debug)) { 01033 cprintf ("Proto Evidence:\n"); 01034 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01035 ProtoSetIndex++) { 01036 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01037 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01038 for (ProtoNum = 0; 01039 ((ProtoNum < PROTOS_PER_PROTO_SET) && 01040 (ActualProtoNum < ClassTemplate->NumProtos)); 01041 ProtoNum++, ActualProtoNum++) { 01042 cprintf ("P %3d =", ActualProtoNum); 01043 int temp = 0; 01044 for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) { 01045 uinT8 data = tables.proto_evidence_[ActualProtoNum][j]; 01046 cprintf(" %d", data); 01047 temp += data; 01048 } 01049 01050 cprintf(" = %6.4f%%\n", 01051 temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]); 01052 01053 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; 01054 ConfigNum = 0; 01055 while (ConfigWord) { 01056 cprintf ("%5d", ConfigWord & 1 ? temp : 0); 01057 if (ConfigWord & 1) 01058 ProtoConfigs[ConfigNum] += temp; 01059 ConfigNum++; 01060 ConfigWord >>= 1; 01061 } 01062 cprintf("\n"); 01063 } 01064 } 01065 } 01066 01067 if (PrintMatchSummaryOn (Debug)) { 01068 cprintf ("Proto Error for Configurations:\n"); 01069 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01070 cprintf (" %5.1f", 01071 100.0 * (1.0 - 01072 ProtoConfigs[ConfigNum] / 01073 ClassTemplate->ConfigLengths[ConfigNum] / 256.0)); 01074 cprintf ("\n\n"); 01075 } 01076 01077 if (PrintProtoMatchesOn (Debug)) { 01078 cprintf ("Proto Sum for Configurations:\n"); 01079 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01080 cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0); 01081 cprintf ("\n\n"); 01082 01083 cprintf ("Proto Length for Configurations:\n"); 01084 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01085 cprintf (" %4.1f", 01086 (float) ClassTemplate->ConfigLengths[ConfigNum]); 01087 cprintf ("\n\n"); 01088 } 01089 01090 } 01091 01092 01093 /*---------------------------------------------------------------------------*/ 01094 void IntegerMatcher::DisplayProtoDebugInfo( 01095 INT_CLASS ClassTemplate, 01096 BIT_VECTOR ProtoMask, 01097 BIT_VECTOR ConfigMask, 01098 const ScratchEvidence& tables, 01099 bool SeparateDebugWindows) { 01100 uinT16 ProtoNum; 01101 uinT16 ActualProtoNum; 01102 PROTO_SET ProtoSet; 01103 int ProtoSetIndex; 01104 01105 InitIntMatchWindowIfReqd(); 01106 if (SeparateDebugWindows) { 01107 InitFeatureDisplayWindowIfReqd(); 01108 InitProtoDisplayWindowIfReqd(); 01109 } 01110 01111 01112 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01113 ProtoSetIndex++) { 01114 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01115 ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; 01116 for (ProtoNum = 0; 01117 ((ProtoNum < PROTOS_PER_PROTO_SET) && 01118 (ActualProtoNum < ClassTemplate->NumProtos)); 01119 ProtoNum++, ActualProtoNum++) { 01120 /* Compute Average for Actual Proto */ 01121 int temp = 0; 01122 for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) 01123 temp += tables.proto_evidence_[ActualProtoNum][i]; 01124 01125 temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; 01126 01127 if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) { 01128 DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0); 01129 } 01130 } 01131 } 01132 } 01133 01134 01135 /*---------------------------------------------------------------------------*/ 01136 void IntegerMatcher::DisplayFeatureDebugInfo( 01137 INT_CLASS ClassTemplate, 01138 BIT_VECTOR ProtoMask, 01139 BIT_VECTOR ConfigMask, 01140 inT16 NumFeatures, 01141 const INT_FEATURE_STRUCT* Features, 01142 int AdaptFeatureThreshold, 01143 int Debug, 01144 bool SeparateDebugWindows) { 01145 ScratchEvidence *tables = new ScratchEvidence(); 01146 01147 tables->Clear(ClassTemplate); 01148 01149 InitIntMatchWindowIfReqd(); 01150 if (SeparateDebugWindows) { 01151 InitFeatureDisplayWindowIfReqd(); 01152 InitProtoDisplayWindowIfReqd(); 01153 } 01154 01155 for (int Feature = 0; Feature < NumFeatures; Feature++) { 01156 UpdateTablesForFeature( 01157 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], 01158 tables, 0); 01159 01160 /* Find Best Evidence for Current Feature */ 01161 int best = 0; 01162 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 01163 if (tables->feature_evidence_[i] > best) 01164 best = tables->feature_evidence_[i]; 01165 01166 /* Update display for current feature */ 01167 if (ClipMatchEvidenceOn(Debug)) { 01168 if (best < AdaptFeatureThreshold) 01169 DisplayIntFeature(&Features[Feature], 0.0); 01170 else 01171 DisplayIntFeature(&Features[Feature], 1.0); 01172 } else { 01173 DisplayIntFeature(&Features[Feature], best / 255.0); 01174 } 01175 } 01176 01177 delete tables; 01178 } 01179 #endif 01180 01181 /*---------------------------------------------------------------------------*/ 01182 // Add sum of Proto Evidences into Sum Of Feature Evidence Array 01183 void ScratchEvidence::UpdateSumOfProtoEvidences( 01184 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) { 01185 01186 int *IntPointer; 01187 uinT32 ConfigWord; 01188 int ProtoSetIndex; 01189 uinT16 ProtoNum; 01190 PROTO_SET ProtoSet; 01191 int NumProtos; 01192 uinT16 ActualProtoNum; 01193 01194 NumProtos = ClassTemplate->NumProtos; 01195 01196 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01197 ProtoSetIndex++) { 01198 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01199 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01200 for (ProtoNum = 0; 01201 ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); 01202 ProtoNum++, ActualProtoNum++) { 01203 int temp = 0; 01204 for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) 01205 temp += proto_evidence_[ActualProtoNum] [i]; 01206 01207 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; 01208 ConfigWord &= *ConfigMask; 01209 IntPointer = sum_feature_evidence_; 01210 while (ConfigWord) { 01211 if (ConfigWord & 1) 01212 *IntPointer += temp; 01213 IntPointer++; 01214 ConfigWord >>= 1; 01215 } 01216 } 01217 } 01218 } 01219 01220 01221 01222 /*---------------------------------------------------------------------------*/ 01223 // Normalize Sum of Proto and Feature Evidence by dividing by the sum of 01224 // the Feature Lengths and the Proto Lengths for each configuration. 01225 void ScratchEvidence::NormalizeSums( 01226 INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) { 01227 01228 for (int i = 0; i < ClassTemplate->NumConfigs; i++) { 01229 sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) / 01230 (NumFeatures + ClassTemplate->ConfigLengths[i]); 01231 } 01232 } 01233 01234 01235 /*---------------------------------------------------------------------------*/ 01236 int IntegerMatcher::FindBestMatch( 01237 INT_CLASS ClassTemplate, 01238 const ScratchEvidence &tables, 01239 INT_RESULT Result) { 01240 /* 01241 ** Parameters: 01242 ** Globals: 01243 ** Operation: 01244 ** Find the best match for the current class and update the Result 01245 ** with the configuration and match rating. 01246 ** Return: 01247 ** The best normalized sum of evidences 01248 ** Exceptions: none 01249 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 01250 */ 01251 int BestMatch = 0; 01252 int Best2Match = 0; 01253 Result->Config = 0; 01254 Result->Config2 = 0; 01255 01256 /* Find best match */ 01257 for (int ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { 01258 int rating = tables.sum_feature_evidence_[ConfigNum]; 01259 if (*classify_debug_level_ > 2) 01260 cprintf("Config %d, rating=%d\n", ConfigNum, rating); 01261 if (rating > BestMatch) { 01262 if (BestMatch > 0) { 01263 Result->Config2 = Result->Config; 01264 Best2Match = BestMatch; 01265 } else { 01266 Result->Config2 = ConfigNum; 01267 } 01268 Result->Config = ConfigNum; 01269 BestMatch = rating; 01270 } else if (rating > Best2Match) { 01271 Result->Config2 = ConfigNum; 01272 Best2Match = rating; 01273 } 01274 } 01275 01276 /* Compute Certainty Rating */ 01277 Result->Rating = (65536.0 - BestMatch) / 65536.0; 01278 01279 return BestMatch; 01280 } 01281 01282 // Applies the CN normalization factor to the given rating and returns 01283 // the modified rating. 01284 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, 01285 int normalization_factor) { 01286 return (rating * blob_length + 01287 local_matcher_multiplier_ * normalization_factor / 256.0) / 01288 (blob_length + local_matcher_multiplier_); 01289 } 01290 01291 /*---------------------------------------------------------------------------*/ 01292 #ifndef GRAPHICS_DISABLED 01293 // Print debug information about the best match for the current class. 01294 void IntegerMatcher::DebugBestMatch( 01295 int BestMatch, INT_RESULT Result) { 01296 tprintf("Rating = %5.1f%% Best Config = %3d, Distance = %5.1f\n", 01297 100.0 * Result->Rating, Result->Config, 01298 100.0 * (65536.0 - BestMatch) / 65536.0); 01299 } 01300 #endif 01301 01302 /*---------------------------------------------------------------------------*/ 01303 void 01304 HeapSort (int n, register int ra[], register int rb[]) { 01305 /* 01306 ** Parameters: 01307 ** n Number of elements to sort 01308 ** ra Key array [1..n] 01309 ** rb Index array [1..n] 01310 ** Globals: 01311 ** Operation: 01312 ** Sort Key array in ascending order using heap sort 01313 ** algorithm. Also sort Index array that is tied to 01314 ** the key array. 01315 ** Return: 01316 ** Exceptions: none 01317 ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created. 01318 */ 01319 register int i, rra, rrb; 01320 int l, j, ir; 01321 01322 l = (n >> 1) + 1; 01323 ir = n; 01324 for (;;) { 01325 if (l > 1) { 01326 rra = ra[--l]; 01327 rrb = rb[l]; 01328 } 01329 else { 01330 rra = ra[ir]; 01331 rrb = rb[ir]; 01332 ra[ir] = ra[1]; 01333 rb[ir] = rb[1]; 01334 if (--ir == 1) { 01335 ra[1] = rra; 01336 rb[1] = rrb; 01337 return; 01338 } 01339 } 01340 i = l; 01341 j = l << 1; 01342 while (j <= ir) { 01343 if (j < ir && ra[j] < ra[j + 1]) 01344 ++j; 01345 if (rra < ra[j]) { 01346 ra[i] = ra[j]; 01347 rb[i] = rb[j]; 01348 j += (i = j); 01349 } 01350 else 01351 j = ir + 1; 01352 } 01353 ra[i] = rra; 01354 rb[i] = rrb; 01355 } 01356 }