Tesseract  3.02
tesseract-ocr/classify/intmatcher.h
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    intmatcher.h
00003  **     Purpose:     Interface to high level generic classifier routines.
00004  **     Author:      Robert Moss
00005  **     History:     Wed Feb 13 15:24:15 MST 1991, RWM, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 #ifndef   INTMATCHER_H
00019 #define   INTMATCHER_H
00020 
00021 #include "params.h"
00022 
00023 // Character fragments could be present in the trained templaes
00024 // but turned on/off on the language-by-language basis or depending
00025 // on particular properties of the corpus (e.g. when we expect the
00026 // images to have low exposure).
00027 extern BOOL_VAR_H(disable_character_fragments, FALSE,
00028                   "Do not include character fragments in the"
00029                   " results of the classifier");
00030 
00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 14,
00032                  "Integer Matcher Multiplier  0-255:   ");
00033 
00034 
00038 #include "intproto.h"
00039 #include "cutoffs.h"
00040 
00041 struct INT_RESULT_STRUCT {
00042   FLOAT32 Rating;
00043   uinT8 Config;
00044   uinT8 Config2;
00045   uinT16 FeatureMisses;
00046 };
00047 
00048 typedef INT_RESULT_STRUCT *INT_RESULT;
00049 
00050 
00051 struct CP_RESULT_STRUCT {
00052   FLOAT32 Rating;
00053   INT_RESULT_STRUCT IMResult;
00054   CLASS_ID Class;
00055 };
00056 
00057 typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES];
00058 
00059 /*----------------------------------------------------------------------------
00060             Variables
00061 -----------------------------------------------------------------------------*/
00062 
00063 extern INT_VAR_H(classify_adapt_proto_thresh, 230,
00064                  "Threshold for good protos during adaptive 0-255:   ");
00065 
00066 extern INT_VAR_H(classify_adapt_feature_thresh, 230,
00067                  "Threshold for good features during adaptive 0-255:   ");
00068 
00073 #define  SE_TABLE_BITS    9
00074 #define  SE_TABLE_SIZE  512
00075 
00076 struct ScratchEvidence {
00077   uinT8 feature_evidence_[MAX_NUM_CONFIGS];
00078   int sum_feature_evidence_[MAX_NUM_CONFIGS];
00079   uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
00080 
00081   void Clear(const INT_CLASS class_template);
00082   void ClearFeatureEvidence(const INT_CLASS class_template);
00083   void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures,
00084                      inT32 used_features);
00085   void UpdateSumOfProtoEvidences(
00086     INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures);
00087 };
00088 
00089 
00090 class IntegerMatcher {
00091  public:
00092   // Integer Matcher Theta Fudge (0-255).
00093   static const int kIntThetaFudge = 128;
00094   // Bits in Similarity to Evidence Lookup (8-9).
00095   static const int kEvidenceTableBits = 9;
00096   // Integer Evidence Truncation Bits (8-14).
00097   static const int kIntEvidenceTruncBits = 14;
00098   // Similarity to Evidence Table Exponential Multiplier.
00099   static const float kSEExponentialMultiplier;
00100   // Center of Similarity Curve.
00101   static const float kSimilarityCenter;
00102 
00103   IntegerMatcher() : classify_debug_level_(0) {}
00104 
00105   void Init(tesseract::IntParam *classify_debug_level,
00106             int classify_integer_matcher_multiplier);
00107 
00108   void SetBaseLineMatch();
00109   void SetCharNormMatch(int integer_matcher_multiplier);
00110 
00111   void Match(INT_CLASS ClassTemplate,
00112              BIT_VECTOR ProtoMask,
00113              BIT_VECTOR ConfigMask,
00114              inT16 NumFeatures,
00115              const INT_FEATURE_STRUCT* Features,
00116              INT_RESULT Result,
00117              int AdaptFeatureThreshold,
00118              int Debug,
00119              bool SeparateDebugWindows);
00120 
00121   // Applies the CN normalization factor to the given rating and returns
00122   // the modified rating.
00123   float ApplyCNCorrection(float rating, int blob_length,
00124                           int normalization_factor);
00125 
00126   int FindGoodProtos(INT_CLASS ClassTemplate,
00127                      BIT_VECTOR ProtoMask,
00128                      BIT_VECTOR ConfigMask,
00129                      uinT16 BlobLength,
00130                      inT16 NumFeatures,
00131                      INT_FEATURE_ARRAY Features,
00132                      PROTO_ID *ProtoArray,
00133                      int AdaptProtoThreshold,
00134                      int Debug);
00135 
00136   int FindBadFeatures(INT_CLASS ClassTemplate,
00137                       BIT_VECTOR ProtoMask,
00138                       BIT_VECTOR ConfigMask,
00139                       uinT16 BlobLength,
00140                       inT16 NumFeatures,
00141                       INT_FEATURE_ARRAY Features,
00142                       FEATURE_ID *FeatureArray,
00143                       int AdaptFeatureThreshold,
00144                       int Debug);
00145 
00146  private:
00147   int UpdateTablesForFeature(
00148       INT_CLASS ClassTemplate,
00149       BIT_VECTOR ProtoMask,
00150       BIT_VECTOR ConfigMask,
00151       int FeatureNum,
00152       const INT_FEATURE_STRUCT* Feature,
00153       ScratchEvidence *evidence,
00154       int Debug);
00155 
00156   int FindBestMatch(INT_CLASS ClassTemplate,
00157                     const ScratchEvidence &tables,
00158                     INT_RESULT Result);
00159 
00160 #ifndef GRAPHICS_DISABLED
00161   void DebugFeatureProtoError(
00162       INT_CLASS ClassTemplate,
00163       BIT_VECTOR ProtoMask,
00164       BIT_VECTOR ConfigMask,
00165       const ScratchEvidence &tables,
00166       inT16 NumFeatures,
00167       int Debug);
00168 
00169   void DisplayProtoDebugInfo(
00170       INT_CLASS ClassTemplate,
00171       BIT_VECTOR ProtoMask,
00172       BIT_VECTOR ConfigMask,
00173       const ScratchEvidence &tables,
00174       bool SeparateDebugWindows);
00175 
00176   void DisplayFeatureDebugInfo(
00177       INT_CLASS ClassTemplate,
00178       BIT_VECTOR ProtoMask,
00179       BIT_VECTOR ConfigMask,
00180       inT16 NumFeatures,
00181       const INT_FEATURE_STRUCT* Features,
00182       int AdaptFeatureThreshold,
00183       int Debug,
00184       bool SeparateDebugWindows);
00185 
00186   void DebugBestMatch(int BestMatch, INT_RESULT Result);
00187 #endif
00188 
00189 
00190  private:
00191   uinT8 similarity_evidence_table_[SE_TABLE_SIZE];
00192   uinT32 evidence_table_mask_;
00193   uinT32 mult_trunc_shift_bits_;
00194   uinT32 table_trunc_shift_bits_;
00195   inT16 local_matcher_multiplier_;
00196   tesseract::IntParam *classify_debug_level_;
00197   uinT32 evidence_mult_mask_;
00198 };
00199 
00203 void IMDebugConfiguration(INT_FEATURE FeatureNum,
00204                           uinT16 ActualProtoNum,
00205                           uinT8 Evidence,
00206                           BIT_VECTOR ConfigMask,
00207                           uinT32 ConfigWord);
00208 
00209 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
00210                              uinT8 *FeatureEvidence,
00211                              inT32 ConfigCount);
00212 
00213 void HeapSort (int n, register int ra[], register int rb[]);
00214 
00218 #endif