Tesseract
3.02
|
00001 /****************************************************************************** 00002 ** Filename: intmatcher.h 00003 ** Purpose: Interface to high level generic classifier routines. 00004 ** Author: Robert Moss 00005 ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 #ifndef INTMATCHER_H 00019 #define INTMATCHER_H 00020 00021 #include "params.h" 00022 00023 // Character fragments could be present in the trained templaes 00024 // but turned on/off on the language-by-language basis or depending 00025 // on particular properties of the corpus (e.g. when we expect the 00026 // images to have low exposure). 00027 extern BOOL_VAR_H(disable_character_fragments, FALSE, 00028 "Do not include character fragments in the" 00029 " results of the classifier"); 00030 00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 14, 00032 "Integer Matcher Multiplier 0-255: "); 00033 00034 00038 #include "intproto.h" 00039 #include "cutoffs.h" 00040 00041 struct INT_RESULT_STRUCT { 00042 FLOAT32 Rating; 00043 uinT8 Config; 00044 uinT8 Config2; 00045 uinT16 FeatureMisses; 00046 }; 00047 00048 typedef INT_RESULT_STRUCT *INT_RESULT; 00049 00050 00051 struct CP_RESULT_STRUCT { 00052 FLOAT32 Rating; 00053 INT_RESULT_STRUCT IMResult; 00054 CLASS_ID Class; 00055 }; 00056 00057 typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES]; 00058 00059 /*---------------------------------------------------------------------------- 00060 Variables 00061 -----------------------------------------------------------------------------*/ 00062 00063 extern INT_VAR_H(classify_adapt_proto_thresh, 230, 00064 "Threshold for good protos during adaptive 0-255: "); 00065 00066 extern INT_VAR_H(classify_adapt_feature_thresh, 230, 00067 "Threshold for good features during adaptive 0-255: "); 00068 00073 #define SE_TABLE_BITS 9 00074 #define SE_TABLE_SIZE 512 00075 00076 struct ScratchEvidence { 00077 uinT8 feature_evidence_[MAX_NUM_CONFIGS]; 00078 int sum_feature_evidence_[MAX_NUM_CONFIGS]; 00079 uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; 00080 00081 void Clear(const INT_CLASS class_template); 00082 void ClearFeatureEvidence(const INT_CLASS class_template); 00083 void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, 00084 inT32 used_features); 00085 void UpdateSumOfProtoEvidences( 00086 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures); 00087 }; 00088 00089 00090 class IntegerMatcher { 00091 public: 00092 // Integer Matcher Theta Fudge (0-255). 00093 static const int kIntThetaFudge = 128; 00094 // Bits in Similarity to Evidence Lookup (8-9). 00095 static const int kEvidenceTableBits = 9; 00096 // Integer Evidence Truncation Bits (8-14). 00097 static const int kIntEvidenceTruncBits = 14; 00098 // Similarity to Evidence Table Exponential Multiplier. 00099 static const float kSEExponentialMultiplier; 00100 // Center of Similarity Curve. 00101 static const float kSimilarityCenter; 00102 00103 IntegerMatcher() : classify_debug_level_(0) {} 00104 00105 void Init(tesseract::IntParam *classify_debug_level, 00106 int classify_integer_matcher_multiplier); 00107 00108 void SetBaseLineMatch(); 00109 void SetCharNormMatch(int integer_matcher_multiplier); 00110 00111 void Match(INT_CLASS ClassTemplate, 00112 BIT_VECTOR ProtoMask, 00113 BIT_VECTOR ConfigMask, 00114 inT16 NumFeatures, 00115 const INT_FEATURE_STRUCT* Features, 00116 INT_RESULT Result, 00117 int AdaptFeatureThreshold, 00118 int Debug, 00119 bool SeparateDebugWindows); 00120 00121 // Applies the CN normalization factor to the given rating and returns 00122 // the modified rating. 00123 float ApplyCNCorrection(float rating, int blob_length, 00124 int normalization_factor); 00125 00126 int FindGoodProtos(INT_CLASS ClassTemplate, 00127 BIT_VECTOR ProtoMask, 00128 BIT_VECTOR ConfigMask, 00129 uinT16 BlobLength, 00130 inT16 NumFeatures, 00131 INT_FEATURE_ARRAY Features, 00132 PROTO_ID *ProtoArray, 00133 int AdaptProtoThreshold, 00134 int Debug); 00135 00136 int FindBadFeatures(INT_CLASS ClassTemplate, 00137 BIT_VECTOR ProtoMask, 00138 BIT_VECTOR ConfigMask, 00139 uinT16 BlobLength, 00140 inT16 NumFeatures, 00141 INT_FEATURE_ARRAY Features, 00142 FEATURE_ID *FeatureArray, 00143 int AdaptFeatureThreshold, 00144 int Debug); 00145 00146 private: 00147 int UpdateTablesForFeature( 00148 INT_CLASS ClassTemplate, 00149 BIT_VECTOR ProtoMask, 00150 BIT_VECTOR ConfigMask, 00151 int FeatureNum, 00152 const INT_FEATURE_STRUCT* Feature, 00153 ScratchEvidence *evidence, 00154 int Debug); 00155 00156 int FindBestMatch(INT_CLASS ClassTemplate, 00157 const ScratchEvidence &tables, 00158 INT_RESULT Result); 00159 00160 #ifndef GRAPHICS_DISABLED 00161 void DebugFeatureProtoError( 00162 INT_CLASS ClassTemplate, 00163 BIT_VECTOR ProtoMask, 00164 BIT_VECTOR ConfigMask, 00165 const ScratchEvidence &tables, 00166 inT16 NumFeatures, 00167 int Debug); 00168 00169 void DisplayProtoDebugInfo( 00170 INT_CLASS ClassTemplate, 00171 BIT_VECTOR ProtoMask, 00172 BIT_VECTOR ConfigMask, 00173 const ScratchEvidence &tables, 00174 bool SeparateDebugWindows); 00175 00176 void DisplayFeatureDebugInfo( 00177 INT_CLASS ClassTemplate, 00178 BIT_VECTOR ProtoMask, 00179 BIT_VECTOR ConfigMask, 00180 inT16 NumFeatures, 00181 const INT_FEATURE_STRUCT* Features, 00182 int AdaptFeatureThreshold, 00183 int Debug, 00184 bool SeparateDebugWindows); 00185 00186 void DebugBestMatch(int BestMatch, INT_RESULT Result); 00187 #endif 00188 00189 00190 private: 00191 uinT8 similarity_evidence_table_[SE_TABLE_SIZE]; 00192 uinT32 evidence_table_mask_; 00193 uinT32 mult_trunc_shift_bits_; 00194 uinT32 table_trunc_shift_bits_; 00195 inT16 local_matcher_multiplier_; 00196 tesseract::IntParam *classify_debug_level_; 00197 uinT32 evidence_mult_mask_; 00198 }; 00199 00203 void IMDebugConfiguration(INT_FEATURE FeatureNum, 00204 uinT16 ActualProtoNum, 00205 uinT8 Evidence, 00206 BIT_VECTOR ConfigMask, 00207 uinT32 ConfigWord); 00208 00209 void IMDebugConfigurationSum(INT_FEATURE FeatureNum, 00210 uinT8 *FeatureEvidence, 00211 inT32 ConfigCount); 00212 00213 void HeapSort (int n, register int ra[], register int rb[]); 00214 00218 #endif