Tesseract 3.01
|
00001 /****************************************************************************** 00002 ** Filename: intmatcher.h 00003 ** Purpose: Interface to high level generic classifier routines. 00004 ** Author: Robert Moss 00005 ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 #ifndef INTMATCHER_H 00019 #define INTMATCHER_H 00020 00021 #include "params.h" 00022 00023 // Character fragments could be present in the trained templaes 00024 // but turned on/off on the language-by-language basis or depending 00025 // on particular properties of the corpus (e.g. when we expect the 00026 // images to have low exposure). 00027 extern BOOL_VAR_H(disable_character_fragments, FALSE, 00028 "Do not include character fragments in the" 00029 " results of the classifier"); 00030 00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 14, 00032 "Integer Matcher Multiplier 0-255: "); 00033 00034 00038 #include "intproto.h" 00039 #include "cutoffs.h" 00040 00041 struct INT_RESULT_STRUCT { 00042 FLOAT32 Rating; 00043 uinT8 Config; 00044 uinT8 Config2; 00045 uinT16 FeatureMisses; 00046 }; 00047 00048 typedef INT_RESULT_STRUCT *INT_RESULT; 00049 00050 00051 struct CP_RESULT_STRUCT { 00052 FLOAT32 Rating; 00053 INT_RESULT_STRUCT IMResult; 00054 CLASS_ID Class; 00055 }; 00056 00057 typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES]; 00058 00059 typedef uinT8 CLASS_NORMALIZATION_ARRAY[MAX_NUM_CLASSES]; 00060 00061 /*---------------------------------------------------------------------------- 00062 Variables 00063 -----------------------------------------------------------------------------*/ 00064 00065 extern INT_VAR_H(classify_adapt_proto_thresh, 230, 00066 "Threshold for good protos during adaptive 0-255: "); 00067 00068 extern INT_VAR_H(classify_adapt_feature_thresh, 230, 00069 "Threshold for good features during adaptive 0-255: "); 00070 00075 #define SE_TABLE_BITS 9 00076 #define SE_TABLE_SIZE 512 00077 00078 struct ScratchEvidence { 00079 uinT8 feature_evidence_[MAX_NUM_CONFIGS]; 00080 int sum_feature_evidence_[MAX_NUM_CONFIGS]; 00081 uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; 00082 00083 void Clear(const INT_CLASS class_template); 00084 void ClearFeatureEvidence(const INT_CLASS class_template); 00085 void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, 00086 inT32 used_features); 00087 void UpdateSumOfProtoEvidences( 00088 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures); 00089 }; 00090 00091 00092 class IntegerMatcher { 00093 public: 00094 // Integer Matcher Theta Fudge (0-255). 00095 static const int kIntThetaFudge = 128; 00096 // Bits in Similarity to Evidence Lookup (8-9). 00097 static const int kEvidenceTableBits = 9; 00098 // Integer Evidence Truncation Bits (8-14). 00099 static const int kIntEvidenceTruncBits = 14; 00100 // Similarity to Evidence Table Exponential Multiplier. 00101 static const float kSEExponentialMultiplier; 00102 // Center of Similarity Curve. 00103 static const float kSimilarityCenter; 00104 00105 IntegerMatcher() : classify_debug_level_(0) {} 00106 00107 void Init(tesseract::IntParam *classify_debug_level, 00108 int classify_integer_matcher_multiplier); 00109 00110 void SetBaseLineMatch(); 00111 void SetCharNormMatch(int integer_matcher_multiplier); 00112 00113 void Match(INT_CLASS ClassTemplate, 00114 BIT_VECTOR ProtoMask, 00115 BIT_VECTOR ConfigMask, 00116 uinT16 BlobLength, 00117 inT16 NumFeatures, 00118 INT_FEATURE_ARRAY Features, 00119 uinT8 NormalizationFactor, 00120 INT_RESULT Result, 00121 int AdaptFeatureThreshold, 00122 int Debug, 00123 bool SeparateDebugWindows); 00124 00125 int FindGoodProtos(INT_CLASS ClassTemplate, 00126 BIT_VECTOR ProtoMask, 00127 BIT_VECTOR ConfigMask, 00128 uinT16 BlobLength, 00129 inT16 NumFeatures, 00130 INT_FEATURE_ARRAY Features, 00131 PROTO_ID *ProtoArray, 00132 int AdaptProtoThreshold, 00133 int Debug); 00134 00135 int FindBadFeatures(INT_CLASS ClassTemplate, 00136 BIT_VECTOR ProtoMask, 00137 BIT_VECTOR ConfigMask, 00138 uinT16 BlobLength, 00139 inT16 NumFeatures, 00140 INT_FEATURE_ARRAY Features, 00141 FEATURE_ID *FeatureArray, 00142 int AdaptFeatureThreshold, 00143 int Debug); 00144 00145 private: 00146 int UpdateTablesForFeature( 00147 INT_CLASS ClassTemplate, 00148 BIT_VECTOR ProtoMask, 00149 BIT_VECTOR ConfigMask, 00150 int FeatureNum, 00151 INT_FEATURE Feature, 00152 ScratchEvidence *evidence, 00153 int Debug); 00154 00155 int FindBestMatch(INT_CLASS ClassTemplate, 00156 const ScratchEvidence &tables, 00157 uinT16 BlobLength, 00158 uinT8 NormalizationFactor, 00159 INT_RESULT Result); 00160 00161 #ifndef GRAPHICS_DISABLED 00162 void DebugFeatureProtoError( 00163 INT_CLASS ClassTemplate, 00164 BIT_VECTOR ProtoMask, 00165 BIT_VECTOR ConfigMask, 00166 const ScratchEvidence &tables, 00167 inT16 NumFeatures, 00168 int Debug); 00169 00170 void DisplayProtoDebugInfo( 00171 INT_CLASS ClassTemplate, 00172 BIT_VECTOR ProtoMask, 00173 BIT_VECTOR ConfigMask, 00174 const ScratchEvidence &tables, 00175 bool SeparateDebugWindows); 00176 00177 void DisplayFeatureDebugInfo( 00178 INT_CLASS ClassTemplate, 00179 BIT_VECTOR ProtoMask, 00180 BIT_VECTOR ConfigMask, 00181 inT16 NumFeatures, 00182 INT_FEATURE_ARRAY Features, 00183 int AdaptFeatureThreshold, 00184 int Debug, 00185 bool SeparateDebugWindows); 00186 00187 void DebugBestMatch(int BestMatch, 00188 INT_RESULT Result, 00189 uinT16 BlobLength, 00190 uinT8 NormalizationFactor); 00191 #endif 00192 00193 00194 private: 00195 uinT8 similarity_evidence_table_[SE_TABLE_SIZE]; 00196 uinT32 evidence_table_mask_; 00197 uinT32 mult_trunc_shift_bits_; 00198 uinT32 table_trunc_shift_bits_; 00199 inT16 local_matcher_multiplier_; 00200 tesseract::IntParam *classify_debug_level_; 00201 uinT32 evidence_mult_mask_; 00202 }; 00203 00207 void IMDebugConfiguration(INT_FEATURE FeatureNum, 00208 uinT16 ActualProtoNum, 00209 uinT8 Evidence, 00210 BIT_VECTOR ConfigMask, 00211 uinT32 ConfigWord); 00212 00213 void IMDebugConfigurationSum(INT_FEATURE FeatureNum, 00214 uinT8 *FeatureEvidence, 00215 inT32 ConfigCount); 00216 00217 void HeapSort (int n, register int ra[], register int rb[]); 00218 00222 #endif