Tesseract  3.02
tesseract-ocr/classify/intmatcher.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **      Filename:    intmatcher.c
00003  **      Purpose:     Generic high level classification routines.
00004  **      Author:      Robert Moss
00005  **      History:     Wed Feb 13 17:35:28 MST 1991, RWM, Created.
00006  **                   Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add
00007  **                        support for adaptive matching.
00008  **      (c) Copyright Hewlett-Packard Company, 1988.
00009  ** Licensed under the Apache License, Version 2.0 (the "License");
00010  ** you may not use this file except in compliance with the License.
00011  ** You may obtain a copy of the License at
00012  ** http://www.apache.org/licenses/LICENSE-2.0
00013  ** Unless required by applicable law or agreed to in writing, software
00014  ** distributed under the License is distributed on an "AS IS" BASIS,
00015  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016  ** See the License for the specific language governing permissions and
00017  ** limitations under the License.
00018  ******************************************************************************/
00019 /*----------------------------------------------------------------------------
00020                           Include Files and Type Defines
00021 ----------------------------------------------------------------------------*/
00022 #include "intmatcher.h"
00023 #include "intproto.h"
00024 #include "callcpp.h"
00025 #include "scrollview.h"
00026 #include "float2int.h"
00027 #include "globals.h"
00028 #include "helpers.h"
00029 #include "classify.h"
00030 #include "shapetable.h"
00031 #include <math.h>
00032 
00033 // Include automatically generated configuration file if running autoconf.
00034 #ifdef HAVE_CONFIG_H
00035 #include "config_auto.h"
00036 #endif
00037 
00038 /*----------------------------------------------------------------------------
00039                     Global Data Definitions and Declarations
00040 ----------------------------------------------------------------------------*/
00041 // Parameters of the sigmoid used to convert similarity to evidence in the
00042 // similarity_evidence_table_ that is used to convert distance metric to an
00043 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
00044 const float IntegerMatcher::kSEExponentialMultiplier = 0.0;
00045 const float IntegerMatcher::kSimilarityCenter = 0.0075;
00046 
00047 static const uinT8 offset_table[256] = {
00048   255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00049   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00050   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00051   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00052   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00053   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00054   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00055   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00056   7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00057   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00058   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00059   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00060   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00061   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00062   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00063   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
00064 };
00065 
00066 static const uinT8 next_table[256] = {
00067   0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,
00068   0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18,
00069   0x1c, 0x1c, 0x1e,
00070   0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28,
00071   0x2c, 0x2c, 0x2e,
00072   0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a,
00073   0x38, 0x3c, 0x3c, 0x3e,
00074   0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, 0x48,
00075   0x4c, 0x4c, 0x4e,
00076   0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a,
00077   0x58, 0x5c, 0x5c, 0x5e,
00078   0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a,
00079   0x68, 0x6c, 0x6c, 0x6e,
00080   0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,
00081   0x78, 0x7c, 0x7c, 0x7e,
00082   0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88,
00083   0x8c, 0x8c, 0x8e,
00084   0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a,
00085   0x98, 0x9c, 0x9c, 0x9e,
00086   0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,
00087   0xa8, 0xac, 0xac, 0xae,
00088   0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba,
00089   0xb8, 0xbc, 0xbc, 0xbe,
00090   0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca,
00091   0xc8, 0xcc, 0xcc, 0xce,
00092   0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,
00093   0xd8, 0xdc, 0xdc, 0xde,
00094   0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea,
00095   0xe8, 0xec, 0xec, 0xee,
00096   0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa,
00097   0xf8, 0xfc, 0xfc, 0xfe
00098 };
00099 
00100 namespace tesseract {
00101 
00102 // Encapsulation of the intermediate data and computations made by the class
00103 // pruner. The class pruner implements a simple linear classifier on binary
00104 // features by heavily quantizing the feature space, and applying
00105 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
00106 // weights is compensated by a non-constant bias that is dependent on the
00107 // number of features present.
00108 class ClassPruner {
00109  public:
00110   ClassPruner(int max_classes) {
00111     // The unrolled loop in ComputeScores means that the array sizes need to
00112     // be rounded up so that the array is big enough to accommodate the extra
00113     // entries accessed by the unrolling. Each pruner word is of sized
00114     // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
00115     // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
00116     // See ComputeScores.
00117     max_classes_ = max_classes;
00118     rounded_classes_ = RoundUp(
00119         max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS);
00120     class_count_ = new int[rounded_classes_];
00121     norm_count_ = new int[rounded_classes_];
00122     sort_key_ = new int[rounded_classes_ + 1];
00123     sort_index_ = new int[rounded_classes_ + 1];
00124     for (int i = 0; i < rounded_classes_; i++) {
00125       class_count_[i] = 0;
00126     }
00127     pruning_threshold_ = 0;
00128     num_features_ = 0;
00129     num_classes_ = 0;
00130   }
00131 
00132   ~ClassPruner() {
00133     delete []class_count_;
00134     delete []norm_count_;
00135     delete []sort_key_;
00136     delete []sort_index_;
00137   }
00138 
00139   // Computes the scores for every class in the character set, by summing the
00140   // weights for each feature and stores the sums internally in class_count_.
00141   void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
00142                      int num_features, const INT_FEATURE_STRUCT* features) {
00143     num_features_ = num_features;
00144     int num_pruners = int_templates->NumClassPruners;
00145     for (int f = 0; f < num_features; ++f) {
00146       const INT_FEATURE_STRUCT* feature = &features[f];
00147       // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
00148       int x = feature->X * NUM_CP_BUCKETS >> 8;
00149       int y = feature->Y * NUM_CP_BUCKETS >> 8;
00150       int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
00151       int class_id = 0;
00152       // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
00153       // we need a collection of them, indexed by pruner_set.
00154       for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
00155         // Look up quantized feature in a 3-D array, an array of weights for
00156         // each class.
00157         const uinT32* pruner_word_ptr =
00158             int_templates->ClassPruners[pruner_set]->p[x][y][theta];
00159         for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
00160           uinT32 pruner_word = *pruner_word_ptr++;
00161           // This inner loop is unrolled to speed up the ClassPruner.
00162           // Currently gcc would not unroll it unless it is set to O3
00163           // level of optimization or -funroll-loops is specified.
00164           /*
00165           uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
00166           for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
00167             class_count_[class_id++] += pruner_word & class_mask;
00168             pruner_word >>= NUM_BITS_PER_CLASS;
00169           }
00170           */
00171           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00172           pruner_word >>= NUM_BITS_PER_CLASS;
00173           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00174           pruner_word >>= NUM_BITS_PER_CLASS;
00175           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00176           pruner_word >>= NUM_BITS_PER_CLASS;
00177           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00178           pruner_word >>= NUM_BITS_PER_CLASS;
00179           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00180           pruner_word >>= NUM_BITS_PER_CLASS;
00181           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00182           pruner_word >>= NUM_BITS_PER_CLASS;
00183           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00184           pruner_word >>= NUM_BITS_PER_CLASS;
00185           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00186           pruner_word >>= NUM_BITS_PER_CLASS;
00187           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00188           pruner_word >>= NUM_BITS_PER_CLASS;
00189           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00190           pruner_word >>= NUM_BITS_PER_CLASS;
00191           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00192           pruner_word >>= NUM_BITS_PER_CLASS;
00193           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00194           pruner_word >>= NUM_BITS_PER_CLASS;
00195           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00196           pruner_word >>= NUM_BITS_PER_CLASS;
00197           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00198           pruner_word >>= NUM_BITS_PER_CLASS;
00199           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00200           pruner_word >>= NUM_BITS_PER_CLASS;
00201           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00202         }
00203       }
00204     }
00205   }
00206 
00207   // Adjusts the scores according to the number of expected features. Used
00208   // in lieu of a constant bias, this penalizes classes that expect more
00209   // features than there are present. Thus an actual c will score higher for c
00210   // than e, even though almost all the features match e as well as c, because
00211   // e expects more features to be present.
00212   void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
00213                                     int cutoff_strength) {
00214     for (int class_id = 0; class_id < max_classes_; ++class_id) {
00215       if (num_features_ < expected_num_features[class_id]) {
00216         int deficit = expected_num_features[class_id] - num_features_;
00217         class_count_[class_id] -= class_count_[class_id] * deficit /
00218           (num_features_ * cutoff_strength + deficit);
00219       }
00220     }
00221   }
00222 
00223   // Zeros the scores for classes disabled in the unicharset.
00224   // Implements the black-list to recognize a subset of the character set.
00225   void DisableDisabledClasses(const UNICHARSET& unicharset) {
00226     for (int class_id = 0; class_id < max_classes_; ++class_id) {
00227       if (!unicharset.get_enabled(class_id))
00228         class_count_[class_id] = 0;  // This char is disabled!
00229     }
00230   }
00231 
00232   // Zeros the scores of fragments.
00233   void DisableFragments(const UNICHARSET& unicharset) {
00234     for (int class_id = 0; class_id < max_classes_; ++class_id) {
00235       // Do not include character fragments in the class pruner
00236       // results if disable_character_fragments is true.
00237       if (unicharset.get_fragment(class_id)) {
00238         class_count_[class_id] = 0;
00239       }
00240     }
00241   }
00242 
00243   // Normalizes the counts for xheight, putting the normalized result in
00244   // norm_count_. Applies a simple subtractive penalty for incorrect vertical
00245   // position provided by the normalization_factors array, indexed by
00246   // character class, and scaled by the norm_multiplier.
00247   void NormalizeForXheight(int norm_multiplier,
00248                            const uinT8* normalization_factors) {
00249     for (int class_id = 0; class_id < max_classes_; class_id++) {
00250       norm_count_[class_id] = class_count_[class_id] -
00251           ((norm_multiplier * normalization_factors[class_id]) >> 8);
00252     }
00253   }
00254 
00255   // The nop normalization copies the class_count_ array to norm_count_.
00256   void NoNormalization() {
00257     for (int class_id = 0; class_id < max_classes_; class_id++) {
00258       norm_count_[class_id] = class_count_[class_id];
00259     }
00260   }
00261 
00262   // Prunes the classes using <the maximum count> * pruning_factor/256 as a
00263   // threshold for keeping classes. If max_of_non_fragments, then ignore
00264   // fragments in computing the maximum count.
00265   void PruneAndSort(int pruning_factor, bool max_of_non_fragments,
00266                     const UNICHARSET& unicharset) {
00267     int max_count = 0;
00268     for (int c = 0; c < max_classes_; ++c) {
00269       if (norm_count_[c] > max_count &&
00270           // This additional check is added in order to ensure that
00271           // the classifier will return at least one non-fragmented
00272           // character match.
00273           // TODO(daria): verify that this helps accuracy and does not
00274           // hurt performance.
00275           (!max_of_non_fragments || !unicharset.get_fragment(c))) {
00276         max_count = norm_count_[c];
00277       }
00278     }
00279     // Prune Classes.
00280     pruning_threshold_ = (max_count * pruning_factor) >> 8;
00281     // Select Classes.
00282     if (pruning_threshold_ < 1)
00283       pruning_threshold_ = 1;
00284     num_classes_ = 0;
00285     for (int class_id = 0; class_id < max_classes_; class_id++) {
00286       if (norm_count_[class_id] >= pruning_threshold_) {
00287           ++num_classes_;
00288         sort_index_[num_classes_] = class_id;
00289         sort_key_[num_classes_] = norm_count_[class_id];
00290       }
00291     }
00292 
00293     // Sort Classes using Heapsort Algorithm.
00294     if (num_classes_ > 1)
00295       HeapSort(num_classes_, sort_key_, sort_index_);
00296   }
00297 
00298   // Prints debug info on the class pruner matches for the pruned classes only.
00299   void DebugMatch(const Classify& classify,
00300                   const INT_TEMPLATES_STRUCT* int_templates,
00301                   const INT_FEATURE_STRUCT* features) const {
00302     int num_pruners = int_templates->NumClassPruners;
00303     int max_num_classes = int_templates->NumClasses;
00304     for (int f = 0; f < num_features_; ++f) {
00305       const INT_FEATURE_STRUCT* feature = &features[f];
00306       tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
00307       // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
00308       int x = feature->X * NUM_CP_BUCKETS >> 8;
00309       int y = feature->Y * NUM_CP_BUCKETS >> 8;
00310       int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
00311       int class_id = 0;
00312       for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
00313         // Look up quantized feature in a 3-D array, an array of weights for
00314         // each class.
00315         const uinT32* pruner_word_ptr =
00316             int_templates->ClassPruners[pruner_set]->p[x][y][theta];
00317         for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
00318           uinT32 pruner_word = *pruner_word_ptr++;
00319           for (int word_class = 0; word_class < 16 &&
00320                class_id < max_num_classes; ++word_class, ++class_id) {
00321             if (norm_count_[class_id] >= pruning_threshold_) {
00322               tprintf(" %s=%d,",
00323                       classify.ClassIDToDebugStr(int_templates,
00324                                                  class_id, 0).string(),
00325                       pruner_word & CLASS_PRUNER_CLASS_MASK);
00326             }
00327             pruner_word >>= NUM_BITS_PER_CLASS;
00328           }
00329         }
00330         tprintf("\n");
00331       }
00332     }
00333   }
00334 
00335   // Prints a summary of the pruner result.
00336   void SummarizeResult(const Classify& classify,
00337                        const INT_TEMPLATES_STRUCT* int_templates,
00338                        const uinT16* expected_num_features,
00339                        int norm_multiplier,
00340                        const uinT8* normalization_factors) const {
00341     tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
00342     for (int i = 0; i < num_classes_; ++i) {
00343       int class_id = sort_index_[num_classes_ - i];
00344       STRING class_string = classify.ClassIDToDebugStr(int_templates,
00345                                                        class_id, 0);
00346       tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
00347               class_string.string(),
00348               class_count_[class_id],
00349               expected_num_features[class_id],
00350               (norm_multiplier * normalization_factors[class_id]) >> 8,
00351               sort_key_[num_classes_ - i],
00352               100.0 - 100.0 * sort_key_[num_classes_ - i] /
00353                 (CLASS_PRUNER_CLASS_MASK * num_features_));
00354     }
00355   }
00356 
00357   // Copies the pruned, sorted classes into the output results and returns
00358   // the number of classes.
00359   int SetupResults(CP_RESULT_STRUCT* results) const {
00360     for (int c = 0; c < num_classes_; ++c) {
00361       results[c].Class = sort_index_[num_classes_ - c];
00362       results[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
00363         (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
00364     }
00365     return num_classes_;
00366   }
00367 
00368  private:
00369   // Array[rounded_classes_] of initial counts for each class.
00370   int *class_count_;
00371   // Array[rounded_classes_] of modified counts for each class after normalizing
00372   // for expected number of features, disabled classes, fragments, and xheights.
00373   int *norm_count_;
00374   // Array[rounded_classes_ +1] of pruned counts that gets sorted
00375   int *sort_key_;
00376   // Array[rounded_classes_ +1] of classes corresponding to sort_key_.
00377   int *sort_index_;
00378   // Number of classes in this class pruner.
00379   int max_classes_;
00380   // Rounded up number of classes used for array sizes.
00381   int rounded_classes_;
00382   // Threshold count applied to prune classes.
00383   int pruning_threshold_;
00384   // The number of features used to compute the scores.
00385   int num_features_;
00386   // Final number of pruned classes.
00387   int num_classes_;
00388 };
00389 
00390 /*----------------------------------------------------------------------------
00391               Public Code
00392 ----------------------------------------------------------------------------*/
00393 /*---------------------------------------------------------------------------*/
00394 // Runs the class pruner from int_templates on the given features, returning
00395 // the number of classes output in results.
00396 //    int_templates          Class pruner tables
00397 //    num_features           Number of features in blob
00398 //    features               Array of features
00399 //    normalization_factors  Array of fudge factors from blob
00400 //                           normalization process (by CLASS_INDEX)
00401 //    expected_num_features  Array of expected number of features
00402 //                           for each class (by CLASS_INDEX)
00403 //    results                Sorted Array of pruned classes. Must be an array
00404 //                           of size at least int_templates->NumClasses.
00405 int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
00406                            int num_features,
00407                            const INT_FEATURE_STRUCT* features,
00408                            const uinT8* normalization_factors,
00409                            const uinT16* expected_num_features,
00410                            CP_RESULT_STRUCT* results) {
00411 /*
00412  **  Operation:
00413  **    Prunes the classes using a modified fast match table.
00414  **    Returns a sorted list of classes along with the number
00415  **      of pruned classes in that list.
00416  **  Return: Number of pruned classes.
00417  **  Exceptions: none
00418  **  History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
00419  */
00420   ClassPruner pruner(int_templates->NumClasses);
00421   // Compute initial match scores for all classes.
00422   pruner.ComputeScores(int_templates, num_features, features);
00423   // Adjust match scores for number of expected features.
00424   pruner.AdjustForExpectedNumFeatures(expected_num_features,
00425                                       classify_cp_cutoff_strength);
00426   // Apply disabled classes in unicharset - only works without a shape_table.
00427   if (shape_table_ == NULL)
00428     pruner.DisableDisabledClasses(unicharset);
00429   // If fragments are disabled, remove them, also only without a shape table.
00430   if (disable_character_fragments && shape_table_ == NULL)
00431     pruner.DisableFragments(unicharset);
00432 
00433   // If we have good x-heights, apply the given normalization factors.
00434   if (normalization_factors != NULL) {
00435     pruner.NormalizeForXheight(classify_class_pruner_multiplier,
00436                                normalization_factors);
00437   } else {
00438     pruner.NoNormalization();
00439   }
00440   // Do the actual pruning and sort the short-list.
00441   pruner.PruneAndSort(classify_class_pruner_threshold,
00442                       shape_table_ == NULL, unicharset);
00443 
00444   if (classify_debug_level > 2) {
00445     pruner.DebugMatch(*this, int_templates, features);
00446   }
00447   if (classify_debug_level > 1) {
00448     pruner.SummarizeResult(*this, int_templates, expected_num_features,
00449                            classify_class_pruner_multiplier,
00450                            normalization_factors);
00451   }
00452   // Convert to the expected output format.
00453   return pruner.SetupResults(results);
00454 }
00455 
00456 }  // namespace tesseract
00457 
00458 /*---------------------------------------------------------------------------*/
00459 void IntegerMatcher::Match(INT_CLASS ClassTemplate,
00460                            BIT_VECTOR ProtoMask,
00461                            BIT_VECTOR ConfigMask,
00462                            inT16 NumFeatures,
00463                            const INT_FEATURE_STRUCT* Features,
00464                            INT_RESULT Result,
00465                            int AdaptFeatureThreshold,
00466                            int Debug,
00467                            bool SeparateDebugWindows) {
00468 /*
00469  **      Parameters:
00470  **              ClassTemplate             Prototypes & tables for a class
00471  **              BlobLength                Length of unormalized blob
00472  **              NumFeatures               Number of features in blob
00473  **              Features                  Array of features
00474  **              NormalizationFactor       Fudge factor from blob
00475  **                                        normalization process
00476  **              Result                    Class rating & configuration:
00477  **                                        (0.0 -> 1.0), 0=good, 1=bad
00478  **              Debug                     Debugger flag: 1=debugger on
00479  **      Globals:
00480  **              local_matcher_multiplier_    Normalization factor multiplier
00481  **      Operation:
00482  **              IntegerMatcher returns the best configuration and rating
00483  **              for a single class.  The class matched against is determined
00484  **              by the uniqueness of the ClassTemplate parameter.  The
00485  **              best rating and its associated configuration are returned.
00486  **      Return:
00487  **      Exceptions: none
00488  **      History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
00489  */
00490   ScratchEvidence *tables = new ScratchEvidence();
00491   int Feature;
00492   int BestMatch;
00493 
00494   if (MatchDebuggingOn (Debug))
00495     cprintf ("Integer Matcher -------------------------------------------\n");
00496 
00497   tables->Clear(ClassTemplate);
00498   Result->FeatureMisses = 0;
00499 
00500   for (Feature = 0; Feature < NumFeatures; Feature++) {
00501     int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
00502                                       Feature, &Features[Feature],
00503                                       tables, Debug);
00504     // Count features that were missed over all configs.
00505     if (csum == 0)
00506       Result->FeatureMisses++;
00507   }
00508 
00509 #ifndef GRAPHICS_DISABLED
00510   if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
00511     DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
00512                            NumFeatures, Debug);
00513   }
00514 
00515   if (DisplayProtoMatchesOn(Debug)) {
00516     DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
00517                           *tables, SeparateDebugWindows);
00518   }
00519 
00520   if (DisplayFeatureMatchesOn(Debug)) {
00521     DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
00522                             Features, AdaptFeatureThreshold, Debug,
00523                             SeparateDebugWindows);
00524   }
00525 #endif
00526 
00527   tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures);
00528   tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
00529 
00530   BestMatch = FindBestMatch(ClassTemplate, *tables, Result);
00531 
00532 #ifndef GRAPHICS_DISABLED
00533   if (PrintMatchSummaryOn(Debug))
00534     DebugBestMatch(BestMatch, Result);
00535 
00536   if (MatchDebuggingOn(Debug))
00537     cprintf("Match Complete --------------------------------------------\n");
00538 #endif
00539 
00540   delete tables;
00541 }
00542 
00543 
00544 /*---------------------------------------------------------------------------*/
00545 int IntegerMatcher::FindGoodProtos(
00546     INT_CLASS ClassTemplate,
00547     BIT_VECTOR ProtoMask,
00548     BIT_VECTOR ConfigMask,
00549     uinT16 BlobLength,
00550     inT16 NumFeatures,
00551     INT_FEATURE_ARRAY Features,
00552     PROTO_ID *ProtoArray,
00553     int AdaptProtoThreshold,
00554     int Debug) {
00555 /*
00556  **      Parameters:
00557  **              ClassTemplate             Prototypes & tables for a class
00558  **              ProtoMask                 AND Mask for proto word
00559  **              ConfigMask                AND Mask for config word
00560  **              BlobLength                Length of unormalized blob
00561  **              NumFeatures               Number of features in blob
00562  **              Features                  Array of features
00563  **              ProtoArray                Array of good protos
00564  **              AdaptProtoThreshold       Threshold for good protos
00565  **              Debug                     Debugger flag: 1=debugger on
00566  **      Globals:
00567  **              local_matcher_multiplier_    Normalization factor multiplier
00568  **      Operation:
00569  **              FindGoodProtos finds all protos whose normalized proto-evidence
00570  **              exceed classify_adapt_proto_thresh.  The list is ordered by increasing
00571  **              proto id number.
00572  **      Return:
00573  **              Number of good protos in ProtoArray.
00574  **      Exceptions: none
00575  **      History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
00576  */
00577   ScratchEvidence *tables = new ScratchEvidence();
00578   int NumGoodProtos = 0;
00579 
00580   /* DEBUG opening heading */
00581   if (MatchDebuggingOn (Debug))
00582     cprintf
00583       ("Find Good Protos -------------------------------------------\n");
00584 
00585   tables->Clear(ClassTemplate);
00586 
00587   for (int Feature = 0; Feature < NumFeatures; Feature++)
00588     UpdateTablesForFeature(
00589         ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
00590         tables, Debug);
00591 
00592 #ifndef GRAPHICS_DISABLED
00593   if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
00594     DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
00595                            NumFeatures, Debug);
00596 #endif
00597 
00598   /* Average Proto Evidences & Find Good Protos */
00599   for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
00600     /* Compute Average for Actual Proto */
00601     int Temp = 0;
00602     for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++)
00603       Temp += tables->proto_evidence_[proto][i];
00604 
00605     Temp /= ClassTemplate->ProtoLengths[proto];
00606 
00607     /* Find Good Protos */
00608     if (Temp >= AdaptProtoThreshold) {
00609       *ProtoArray = proto;
00610       ProtoArray++;
00611       NumGoodProtos++;
00612     }
00613   }
00614 
00615   if (MatchDebuggingOn (Debug))
00616     cprintf ("Match Complete --------------------------------------------\n");
00617   delete tables;
00618 
00619   return NumGoodProtos;
00620 }
00621 
00622 
00623 /*---------------------------------------------------------------------------*/
00624 int IntegerMatcher::FindBadFeatures(
00625     INT_CLASS ClassTemplate,
00626     BIT_VECTOR ProtoMask,
00627     BIT_VECTOR ConfigMask,
00628     uinT16 BlobLength,
00629     inT16 NumFeatures,
00630     INT_FEATURE_ARRAY Features,
00631     FEATURE_ID *FeatureArray,
00632     int AdaptFeatureThreshold,
00633     int Debug) {
00634 /*
00635  **  Parameters:
00636  **      ClassTemplate             Prototypes & tables for a class
00637  **      ProtoMask                 AND Mask for proto word
00638  **      ConfigMask                AND Mask for config word
00639  **      BlobLength                Length of unormalized blob
00640  **      NumFeatures               Number of features in blob
00641  **      Features                  Array of features
00642  **      FeatureArray              Array of bad features
00643  **      AdaptFeatureThreshold     Threshold for bad features
00644  **      Debug                     Debugger flag: 1=debugger on
00645  **  Operation:
00646  **      FindBadFeatures finds all features with maximum feature-evidence <
00647  **      AdaptFeatureThresh. The list is ordered by increasing feature number.
00648  **  Return:
00649  **      Number of bad features in FeatureArray.
00650  **  History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
00651  */
00652   ScratchEvidence *tables = new ScratchEvidence();
00653   int NumBadFeatures = 0;
00654 
00655   /* DEBUG opening heading */
00656   if (MatchDebuggingOn(Debug))
00657     cprintf("Find Bad Features -------------------------------------------\n");
00658 
00659   tables->Clear(ClassTemplate);
00660 
00661   for (int Feature = 0; Feature < NumFeatures; Feature++) {
00662     UpdateTablesForFeature(
00663         ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
00664         tables, Debug);
00665 
00666     /* Find Best Evidence for Current Feature */
00667     int best = 0;
00668     for (int i = 0; i < ClassTemplate->NumConfigs; i++)
00669       if (tables->feature_evidence_[i] > best)
00670         best = tables->feature_evidence_[i];
00671 
00672     /* Find Bad Features */
00673     if (best < AdaptFeatureThreshold) {
00674       *FeatureArray = Feature;
00675       FeatureArray++;
00676       NumBadFeatures++;
00677     }
00678   }
00679 
00680 #ifndef GRAPHICS_DISABLED
00681   if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
00682     DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
00683                            NumFeatures, Debug);
00684 #endif
00685 
00686   if (MatchDebuggingOn(Debug))
00687     cprintf("Match Complete --------------------------------------------\n");
00688 
00689   delete tables;
00690   return NumBadFeatures;
00691 }
00692 
00693 
00694 /*---------------------------------------------------------------------------*/
00695 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
00696                           int classify_integer_matcher_multiplier) {
00697   classify_debug_level_ = classify_debug_level;
00698 
00699   /* Set default mode of operation of IntegerMatcher */
00700   SetCharNormMatch(classify_integer_matcher_multiplier);
00701 
00702   /* Initialize table for evidence to similarity lookup */
00703   for (int i = 0; i < SE_TABLE_SIZE; i++) {
00704     uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
00705     double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
00706     double evidence = Similarity / kSimilarityCenter;
00707     evidence = 255.0 / (evidence * evidence + 1.0);
00708 
00709     if (kSEExponentialMultiplier > 0.0) {
00710       double scale = 1.0 - exp(-kSEExponentialMultiplier) *
00711         exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE));
00712       evidence *= ClipToRange(scale, 0.0, 1.0);
00713     }
00714 
00715     similarity_evidence_table_[i] = (uinT8) (evidence + 0.5);
00716   }
00717 
00718   /* Initialize evidence computation variables */
00719   evidence_table_mask_ =
00720     ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
00721   mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
00722   table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
00723   evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
00724 }
00725 
00726 /*--------------------------------------------------------------------------*/
00727 void IntegerMatcher::SetBaseLineMatch() {
00728   local_matcher_multiplier_ = 0;
00729 }
00730 
00731 
00732 /*--------------------------------------------------------------------------*/
00733 void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) {
00734   local_matcher_multiplier_ = integer_matcher_multiplier;
00735 }
00736 
00737 
00741 void ScratchEvidence::Clear(const INT_CLASS class_template) {
00742   memset(sum_feature_evidence_, 0,
00743          class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
00744   memset(proto_evidence_, 0,
00745          class_template->NumProtos * sizeof(proto_evidence_[0]));
00746 }
00747 
00748 void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
00749   memset(feature_evidence_, 0,
00750          class_template->NumConfigs * sizeof(feature_evidence_[0]));
00751 }
00752 
00753 
00754 
00755 /*---------------------------------------------------------------------------*/
00756 void IMDebugConfiguration(int FeatureNum,
00757                           uinT16 ActualProtoNum,
00758                           uinT8 Evidence,
00759                           BIT_VECTOR ConfigMask,
00760                           uinT32 ConfigWord) {
00761 /*
00762  **      Parameters:
00763  **      Globals:
00764  **      Operation:
00765  **              Print debugging information for Configuations
00766  **      Return:
00767  **      Exceptions: none
00768  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
00769  */
00770   cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
00771     FeatureNum, (int) ActualProtoNum, (int) Evidence);
00772   while (ConfigWord) {
00773     if (ConfigWord & 1)
00774       cprintf ("1");
00775     else
00776       cprintf ("0");
00777     ConfigWord >>= 1;
00778   }
00779   cprintf ("\n");
00780 }
00781 
00782 
00783 /*---------------------------------------------------------------------------*/
00784 void IMDebugConfigurationSum(int FeatureNum,
00785                              uinT8 *FeatureEvidence,
00786                              inT32 ConfigCount) {
00787 /*
00788  **      Parameters:
00789  **      Globals:
00790  **      Operation:
00791  **              Print debugging information for Configuations
00792  **      Return:
00793  **      Exceptions: none
00794  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
00795  */
00796   cprintf("F=%3d, C=", FeatureNum);
00797   for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
00798     cprintf("%4d", FeatureEvidence[ConfigNum]);
00799   }
00800   cprintf("\n");
00801 }
00802 
00803 
00804 
00805 /*---------------------------------------------------------------------------*/
00806 int IntegerMatcher::UpdateTablesForFeature(
00807     INT_CLASS ClassTemplate,
00808     BIT_VECTOR ProtoMask,
00809     BIT_VECTOR ConfigMask,
00810     int FeatureNum,
00811     const INT_FEATURE_STRUCT* Feature,
00812     ScratchEvidence *tables,
00813     int Debug) {
00814 /*
00815  **  Parameters:
00816  **      ClassTemplate         Prototypes & tables for a class
00817  **      FeatureNum            Current feature number (for DEBUG only)
00818  **      Feature               Pointer to a feature struct
00819  **      tables                Evidence tables
00820  **      Debug                 Debugger flag: 1=debugger on
00821  **  Operation:
00822  **       For the given feature: prune protos, compute evidence,
00823  **       update Feature Evidence, Proto Evidence, and Sum of Feature
00824  **       Evidence tables.
00825  **  Return:
00826  */
00827   register uinT32 ConfigWord;
00828   register uinT32 ProtoWord;
00829   register uinT32 ProtoNum;
00830   register uinT32 ActualProtoNum;
00831   uinT8 proto_byte;
00832   inT32 proto_word_offset;
00833   inT32 proto_offset;
00834   uinT8 config_byte;
00835   inT32 config_offset;
00836   PROTO_SET ProtoSet;
00837   uinT32 *ProtoPrunerPtr;
00838   INT_PROTO Proto;
00839   int ProtoSetIndex;
00840   uinT8 Evidence;
00841   uinT32 XFeatureAddress;
00842   uinT32 YFeatureAddress;
00843   uinT32 ThetaFeatureAddress;
00844   register uinT8 *UINT8Pointer;
00845   register int ProtoIndex;
00846   uinT8 Temp;
00847   register int *IntPointer;
00848   int ConfigNum;
00849   register inT32 M3;
00850   register inT32 A3;
00851   register uinT32 A4;
00852 
00853   tables->ClearFeatureEvidence(ClassTemplate);
00854 
00855   /* Precompute Feature Address offset for Proto Pruning */
00856   XFeatureAddress = ((Feature->X >> 2) << 1);
00857   YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
00858   ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
00859 
00860   for (ProtoSetIndex = 0, ActualProtoNum = 0;
00861   ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
00862     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
00863     ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner);
00864     for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
00865       ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
00866     (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
00867       /* Prune Protos of current Proto Set */
00868       ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
00869       ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
00870       ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
00871       ProtoWord &= *ProtoMask;
00872 
00873       if (ProtoWord != 0) {
00874         proto_byte = ProtoWord & 0xff;
00875         ProtoWord >>= 8;
00876         proto_word_offset = 0;
00877         while (ProtoWord != 0 || proto_byte != 0) {
00878           while (proto_byte == 0) {
00879             proto_byte = ProtoWord & 0xff;
00880             ProtoWord >>= 8;
00881             proto_word_offset += 8;
00882           }
00883           proto_offset = offset_table[proto_byte] + proto_word_offset;
00884           proto_byte = next_table[proto_byte];
00885           Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
00886           ConfigWord = Proto->Configs[0];
00887           A3 = (((Proto->A * (Feature->X - 128)) << 1)
00888             - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9));
00889           M3 =
00890             (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1;
00891 
00892           if (A3 < 0)
00893             A3 = ~A3;
00894           if (M3 < 0)
00895             M3 = ~M3;
00896           A3 >>= mult_trunc_shift_bits_;
00897           M3 >>= mult_trunc_shift_bits_;
00898           if (A3 > evidence_mult_mask_)
00899             A3 = evidence_mult_mask_;
00900           if (M3 > evidence_mult_mask_)
00901             M3 = evidence_mult_mask_;
00902 
00903           A4 = (A3 * A3) + (M3 * M3);
00904           A4 >>= table_trunc_shift_bits_;
00905           if (A4 > evidence_table_mask_)
00906             Evidence = 0;
00907           else
00908             Evidence = similarity_evidence_table_[A4];
00909 
00910           if (PrintFeatureMatchesOn (Debug))
00911             IMDebugConfiguration (FeatureNum,
00912               ActualProtoNum + proto_offset,
00913               Evidence, ConfigMask, ConfigWord);
00914 
00915           ConfigWord &= *ConfigMask;
00916 
00917           UINT8Pointer = tables->feature_evidence_ - 8;
00918           config_byte = 0;
00919           while (ConfigWord != 0 || config_byte != 0) {
00920             while (config_byte == 0) {
00921               config_byte = ConfigWord & 0xff;
00922               ConfigWord >>= 8;
00923               UINT8Pointer += 8;
00924             }
00925             config_offset = offset_table[config_byte];
00926             config_byte = next_table[config_byte];
00927             if (Evidence > UINT8Pointer[config_offset])
00928               UINT8Pointer[config_offset] = Evidence;
00929           }
00930 
00931           UINT8Pointer =
00932             &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
00933           for (ProtoIndex =
00934             ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
00935           ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
00936             if (Evidence > *UINT8Pointer) {
00937               Temp = *UINT8Pointer;
00938               *UINT8Pointer = Evidence;
00939               Evidence = Temp;
00940             }
00941             else if (Evidence == 0)
00942               break;
00943           }
00944         }
00945       }
00946     }
00947   }
00948 
00949   if (PrintFeatureMatchesOn(Debug)) {
00950     IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
00951                             ClassTemplate->NumConfigs);
00952   }
00953 
00954   IntPointer = tables->sum_feature_evidence_;
00955   UINT8Pointer = tables->feature_evidence_;
00956   int SumOverConfigs = 0;
00957   for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
00958     int evidence = *UINT8Pointer++;
00959     SumOverConfigs += evidence;
00960     *IntPointer++ += evidence;
00961   }
00962   return SumOverConfigs;
00963 }
00964 
00965 
00966 /*---------------------------------------------------------------------------*/
00967 #ifndef GRAPHICS_DISABLED
00968 void IntegerMatcher::DebugFeatureProtoError(
00969     INT_CLASS ClassTemplate,
00970     BIT_VECTOR ProtoMask,
00971     BIT_VECTOR ConfigMask,
00972     const ScratchEvidence& tables,
00973     inT16 NumFeatures,
00974     int Debug) {
00975 /*
00976  **      Parameters:
00977  **      Globals:
00978  **      Operation:
00979  **              Print debugging information for Configuations
00980  **      Return:
00981  **      Exceptions: none
00982  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
00983  */
00984   FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
00985   int ConfigNum;
00986   uinT32 ConfigWord;
00987   int ProtoSetIndex;
00988   uinT16 ProtoNum;
00989   uinT8 ProtoWordNum;
00990   PROTO_SET ProtoSet;
00991   uinT16 ActualProtoNum;
00992 
00993   if (PrintMatchSummaryOn(Debug)) {
00994     cprintf("Configuration Mask:\n");
00995     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
00996       cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
00997     cprintf("\n");
00998 
00999     cprintf("Feature Error for Configurations:\n");
01000     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
01001       cprintf(
01002           " %5.1f",
01003           100.0 * (1.0 -
01004           (FLOAT32) tables.sum_feature_evidence_[ConfigNum]
01005           / NumFeatures / 256.0));
01006     }
01007     cprintf("\n\n\n");
01008   }
01009 
01010   if (PrintMatchSummaryOn (Debug)) {
01011     cprintf ("Proto Mask:\n");
01012     for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01013     ProtoSetIndex++) {
01014       ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01015       for (ProtoWordNum = 0; ProtoWordNum < 2;
01016       ProtoWordNum++, ProtoMask++) {
01017         ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01018         for (ProtoNum = 0;
01019           ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
01020           && (ActualProtoNum < ClassTemplate->NumProtos));
01021           ProtoNum++, ActualProtoNum++)
01022         cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
01023         cprintf ("\n");
01024       }
01025     }
01026     cprintf ("\n");
01027   }
01028 
01029   for (int i = 0; i < ClassTemplate->NumConfigs; i++)
01030     ProtoConfigs[i] = 0;
01031 
01032   if (PrintProtoMatchesOn (Debug)) {
01033     cprintf ("Proto Evidence:\n");
01034     for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01035     ProtoSetIndex++) {
01036       ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
01037       ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01038       for (ProtoNum = 0;
01039            ((ProtoNum < PROTOS_PER_PROTO_SET) &&
01040             (ActualProtoNum < ClassTemplate->NumProtos));
01041            ProtoNum++, ActualProtoNum++) {
01042         cprintf ("P %3d =", ActualProtoNum);
01043         int temp = 0;
01044         for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
01045           uinT8 data = tables.proto_evidence_[ActualProtoNum][j];
01046           cprintf(" %d", data);
01047           temp += data;
01048         }
01049 
01050         cprintf(" = %6.4f%%\n",
01051                 temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
01052 
01053         ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
01054         ConfigNum = 0;
01055         while (ConfigWord) {
01056           cprintf ("%5d", ConfigWord & 1 ? temp : 0);
01057           if (ConfigWord & 1)
01058             ProtoConfigs[ConfigNum] += temp;
01059           ConfigNum++;
01060           ConfigWord >>= 1;
01061         }
01062         cprintf("\n");
01063       }
01064     }
01065   }
01066 
01067   if (PrintMatchSummaryOn (Debug)) {
01068     cprintf ("Proto Error for Configurations:\n");
01069     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
01070       cprintf (" %5.1f",
01071         100.0 * (1.0 -
01072         ProtoConfigs[ConfigNum] /
01073         ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
01074     cprintf ("\n\n");
01075   }
01076 
01077   if (PrintProtoMatchesOn (Debug)) {
01078     cprintf ("Proto Sum for Configurations:\n");
01079     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
01080       cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
01081     cprintf ("\n\n");
01082 
01083     cprintf ("Proto Length for Configurations:\n");
01084     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
01085       cprintf (" %4.1f",
01086         (float) ClassTemplate->ConfigLengths[ConfigNum]);
01087     cprintf ("\n\n");
01088   }
01089 
01090 }
01091 
01092 
01093 /*---------------------------------------------------------------------------*/
01094 void IntegerMatcher::DisplayProtoDebugInfo(
01095     INT_CLASS ClassTemplate,
01096     BIT_VECTOR ProtoMask,
01097     BIT_VECTOR ConfigMask,
01098     const ScratchEvidence& tables,
01099     bool SeparateDebugWindows) {
01100   uinT16 ProtoNum;
01101   uinT16 ActualProtoNum;
01102   PROTO_SET ProtoSet;
01103   int ProtoSetIndex;
01104 
01105   InitIntMatchWindowIfReqd();
01106   if (SeparateDebugWindows) {
01107     InitFeatureDisplayWindowIfReqd();
01108     InitProtoDisplayWindowIfReqd();
01109   }
01110 
01111 
01112   for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01113        ProtoSetIndex++) {
01114     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
01115     ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
01116     for (ProtoNum = 0;
01117          ((ProtoNum < PROTOS_PER_PROTO_SET) &&
01118           (ActualProtoNum < ClassTemplate->NumProtos));
01119          ProtoNum++, ActualProtoNum++) {
01120       /* Compute Average for Actual Proto */
01121       int temp = 0;
01122       for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
01123         temp += tables.proto_evidence_[ActualProtoNum][i];
01124 
01125       temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
01126 
01127       if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
01128         DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
01129       }
01130     }
01131   }
01132 }
01133 
01134 
01135 /*---------------------------------------------------------------------------*/
01136 void IntegerMatcher::DisplayFeatureDebugInfo(
01137     INT_CLASS ClassTemplate,
01138     BIT_VECTOR ProtoMask,
01139     BIT_VECTOR ConfigMask,
01140     inT16 NumFeatures,
01141     const INT_FEATURE_STRUCT* Features,
01142     int AdaptFeatureThreshold,
01143     int Debug,
01144     bool SeparateDebugWindows) {
01145   ScratchEvidence *tables = new ScratchEvidence();
01146 
01147   tables->Clear(ClassTemplate);
01148 
01149   InitIntMatchWindowIfReqd();
01150   if (SeparateDebugWindows) {
01151     InitFeatureDisplayWindowIfReqd();
01152     InitProtoDisplayWindowIfReqd();
01153   }
01154 
01155   for (int Feature = 0; Feature < NumFeatures; Feature++) {
01156     UpdateTablesForFeature(
01157         ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
01158         tables, 0);
01159 
01160     /* Find Best Evidence for Current Feature */
01161     int best = 0;
01162     for (int i = 0; i < ClassTemplate->NumConfigs; i++)
01163       if (tables->feature_evidence_[i] > best)
01164         best = tables->feature_evidence_[i];
01165 
01166     /* Update display for current feature */
01167     if (ClipMatchEvidenceOn(Debug)) {
01168       if (best < AdaptFeatureThreshold)
01169         DisplayIntFeature(&Features[Feature], 0.0);
01170       else
01171         DisplayIntFeature(&Features[Feature], 1.0);
01172     } else {
01173       DisplayIntFeature(&Features[Feature], best / 255.0);
01174     }
01175   }
01176 
01177   delete tables;
01178 }
01179 #endif
01180 
01181 /*---------------------------------------------------------------------------*/
01182 // Add sum of Proto Evidences into Sum Of Feature Evidence Array
01183 void ScratchEvidence::UpdateSumOfProtoEvidences(
01184     INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
01185 
01186   int *IntPointer;
01187   uinT32 ConfigWord;
01188   int ProtoSetIndex;
01189   uinT16 ProtoNum;
01190   PROTO_SET ProtoSet;
01191   int NumProtos;
01192   uinT16 ActualProtoNum;
01193 
01194   NumProtos = ClassTemplate->NumProtos;
01195 
01196   for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01197        ProtoSetIndex++) {
01198     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
01199     ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01200     for (ProtoNum = 0;
01201          ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
01202          ProtoNum++, ActualProtoNum++) {
01203       int temp = 0;
01204       for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
01205         temp += proto_evidence_[ActualProtoNum] [i];
01206 
01207       ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
01208       ConfigWord &= *ConfigMask;
01209       IntPointer = sum_feature_evidence_;
01210       while (ConfigWord) {
01211         if (ConfigWord & 1)
01212           *IntPointer += temp;
01213         IntPointer++;
01214         ConfigWord >>= 1;
01215       }
01216     }
01217   }
01218 }
01219 
01220 
01221 
01222 /*---------------------------------------------------------------------------*/
01223 // Normalize Sum of Proto and Feature Evidence by dividing by the sum of
01224 // the Feature Lengths and the Proto Lengths for each configuration.
01225 void ScratchEvidence::NormalizeSums(
01226     INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
01227 
01228   for (int i = 0; i < ClassTemplate->NumConfigs; i++) {
01229     sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) /
01230         (NumFeatures + ClassTemplate->ConfigLengths[i]);
01231   }
01232 }
01233 
01234 
01235 /*---------------------------------------------------------------------------*/
01236 int IntegerMatcher::FindBestMatch(
01237     INT_CLASS ClassTemplate,
01238     const ScratchEvidence &tables,
01239     INT_RESULT Result) {
01240 /*
01241  **      Parameters:
01242  **      Globals:
01243  **      Operation:
01244  **              Find the best match for the current class and update the Result
01245  **              with the configuration and match rating.
01246  **      Return:
01247  **              The best normalized sum of evidences
01248  **      Exceptions: none
01249  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
01250  */
01251   int BestMatch = 0;
01252   int Best2Match = 0;
01253   Result->Config = 0;
01254   Result->Config2 = 0;
01255 
01256   /* Find best match */
01257   for (int ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
01258     int rating = tables.sum_feature_evidence_[ConfigNum];
01259     if (*classify_debug_level_ > 2)
01260       cprintf("Config %d, rating=%d\n", ConfigNum, rating);
01261     if (rating > BestMatch) {
01262       if (BestMatch > 0) {
01263         Result->Config2 = Result->Config;
01264         Best2Match = BestMatch;
01265       } else {
01266         Result->Config2 = ConfigNum;
01267       }
01268       Result->Config = ConfigNum;
01269       BestMatch = rating;
01270     } else if (rating > Best2Match) {
01271       Result->Config2 = ConfigNum;
01272       Best2Match = rating;
01273     }
01274   }
01275 
01276   /* Compute Certainty Rating */
01277   Result->Rating = (65536.0 - BestMatch) / 65536.0;
01278 
01279   return BestMatch;
01280 }
01281 
01282 // Applies the CN normalization factor to the given rating and returns
01283 // the modified rating.
01284 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
01285                                         int normalization_factor) {
01286   return (rating * blob_length +
01287     local_matcher_multiplier_ * normalization_factor / 256.0) /
01288     (blob_length + local_matcher_multiplier_);
01289 }
01290 
01291 /*---------------------------------------------------------------------------*/
01292 #ifndef GRAPHICS_DISABLED
01293 // Print debug information about the best match for the current class.
01294 void IntegerMatcher::DebugBestMatch(
01295     int BestMatch, INT_RESULT Result) {
01296   tprintf("Rating = %5.1f%%  Best Config = %3d, Distance = %5.1f\n",
01297           100.0 * Result->Rating, Result->Config,
01298           100.0 * (65536.0 - BestMatch) / 65536.0);
01299 }
01300 #endif
01301 
01302 /*---------------------------------------------------------------------------*/
01303 void
01304 HeapSort (int n, register int ra[], register int rb[]) {
01305 /*
01306  **      Parameters:
01307  **              n      Number of elements to sort
01308  **              ra     Key array [1..n]
01309  **              rb     Index array [1..n]
01310  **      Globals:
01311  **      Operation:
01312  **              Sort Key array in ascending order using heap sort
01313  **              algorithm.  Also sort Index array that is tied to
01314  **              the key array.
01315  **      Return:
01316  **      Exceptions: none
01317  **      History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
01318  */
01319   register int i, rra, rrb;
01320   int l, j, ir;
01321 
01322   l = (n >> 1) + 1;
01323   ir = n;
01324   for (;;) {
01325     if (l > 1) {
01326       rra = ra[--l];
01327       rrb = rb[l];
01328     }
01329     else {
01330       rra = ra[ir];
01331       rrb = rb[ir];
01332       ra[ir] = ra[1];
01333       rb[ir] = rb[1];
01334       if (--ir == 1) {
01335         ra[1] = rra;
01336         rb[1] = rrb;
01337         return;
01338       }
01339     }
01340     i = l;
01341     j = l << 1;
01342     while (j <= ir) {
01343       if (j < ir && ra[j] < ra[j + 1])
01344         ++j;
01345       if (rra < ra[j]) {
01346         ra[i] = ra[j];
01347         rb[i] = rb[j];
01348         j += (i = j);
01349       }
01350       else
01351         j = ir + 1;
01352     }
01353     ra[i] = rra;
01354     rb[i] = rrb;
01355   }
01356 }