Tesseract 3.01
/data/source/tesseract-ocr/classify/intmatcher.h
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    intmatcher.h
00003  **     Purpose:     Interface to high level generic classifier routines.
00004  **     Author:      Robert Moss
00005  **     History:     Wed Feb 13 15:24:15 MST 1991, RWM, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 #ifndef   INTMATCHER_H
00019 #define   INTMATCHER_H
00020 
00021 #include "params.h"
00022 
00023 // Character fragments could be present in the trained templaes
00024 // but turned on/off on the language-by-language basis or depending
00025 // on particular properties of the corpus (e.g. when we expect the
00026 // images to have low exposure).
00027 extern BOOL_VAR_H(disable_character_fragments, FALSE,
00028                   "Do not include character fragments in the"
00029                   " results of the classifier");
00030 
00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 14,
00032                  "Integer Matcher Multiplier  0-255:   ");
00033 
00034 
00038 #include "intproto.h"
00039 #include "cutoffs.h"
00040 
00041 struct INT_RESULT_STRUCT {
00042   FLOAT32 Rating;
00043   uinT8 Config;
00044   uinT8 Config2;
00045   uinT16 FeatureMisses;
00046 };
00047 
00048 typedef INT_RESULT_STRUCT *INT_RESULT;
00049 
00050 
00051 struct CP_RESULT_STRUCT {
00052   FLOAT32 Rating;
00053   INT_RESULT_STRUCT IMResult;
00054   CLASS_ID Class;
00055 };
00056 
00057 typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES];
00058 
00059 typedef uinT8 CLASS_NORMALIZATION_ARRAY[MAX_NUM_CLASSES];
00060 
00061 /*----------------------------------------------------------------------------
00062             Variables
00063 -----------------------------------------------------------------------------*/
00064 
00065 extern INT_VAR_H(classify_adapt_proto_thresh, 230,
00066                  "Threshold for good protos during adaptive 0-255:   ");
00067 
00068 extern INT_VAR_H(classify_adapt_feature_thresh, 230,
00069                  "Threshold for good features during adaptive 0-255:   ");
00070 
00075 #define  SE_TABLE_BITS    9
00076 #define  SE_TABLE_SIZE  512
00077 
00078 struct ScratchEvidence {
00079   uinT8 feature_evidence_[MAX_NUM_CONFIGS];
00080   int sum_feature_evidence_[MAX_NUM_CONFIGS];
00081   uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
00082 
00083   void Clear(const INT_CLASS class_template);
00084   void ClearFeatureEvidence(const INT_CLASS class_template);
00085   void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures,
00086                      inT32 used_features);
00087   void UpdateSumOfProtoEvidences(
00088     INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures);
00089 };
00090 
00091 
00092 class IntegerMatcher {
00093  public:
00094   // Integer Matcher Theta Fudge (0-255).
00095   static const int kIntThetaFudge = 128;
00096   // Bits in Similarity to Evidence Lookup (8-9).
00097   static const int kEvidenceTableBits = 9;
00098   // Integer Evidence Truncation Bits (8-14).
00099   static const int kIntEvidenceTruncBits = 14;
00100   // Similarity to Evidence Table Exponential Multiplier.
00101   static const float kSEExponentialMultiplier;
00102   // Center of Similarity Curve.
00103   static const float kSimilarityCenter;
00104 
00105   IntegerMatcher() : classify_debug_level_(0) {}
00106 
00107   void Init(tesseract::IntParam *classify_debug_level,
00108             int classify_integer_matcher_multiplier);
00109 
00110   void SetBaseLineMatch();
00111   void SetCharNormMatch(int integer_matcher_multiplier);
00112 
00113   void Match(INT_CLASS ClassTemplate,
00114              BIT_VECTOR ProtoMask,
00115              BIT_VECTOR ConfigMask,
00116              uinT16 BlobLength,
00117              inT16 NumFeatures,
00118              INT_FEATURE_ARRAY Features,
00119              uinT8 NormalizationFactor,
00120              INT_RESULT Result,
00121              int AdaptFeatureThreshold,
00122              int Debug,
00123              bool SeparateDebugWindows);
00124 
00125   int FindGoodProtos(INT_CLASS ClassTemplate,
00126                      BIT_VECTOR ProtoMask,
00127                      BIT_VECTOR ConfigMask,
00128                      uinT16 BlobLength,
00129                      inT16 NumFeatures,
00130                      INT_FEATURE_ARRAY Features,
00131                      PROTO_ID *ProtoArray,
00132                      int AdaptProtoThreshold,
00133                      int Debug);
00134 
00135   int FindBadFeatures(INT_CLASS ClassTemplate,
00136                       BIT_VECTOR ProtoMask,
00137                       BIT_VECTOR ConfigMask,
00138                       uinT16 BlobLength,
00139                       inT16 NumFeatures,
00140                       INT_FEATURE_ARRAY Features,
00141                       FEATURE_ID *FeatureArray,
00142                       int AdaptFeatureThreshold,
00143                       int Debug);
00144 
00145  private:
00146   int UpdateTablesForFeature(
00147       INT_CLASS ClassTemplate,
00148       BIT_VECTOR ProtoMask,
00149       BIT_VECTOR ConfigMask,
00150       int FeatureNum,
00151       INT_FEATURE Feature,
00152       ScratchEvidence *evidence,
00153       int Debug);
00154 
00155   int FindBestMatch(INT_CLASS ClassTemplate,
00156                     const ScratchEvidence &tables,
00157                     uinT16 BlobLength,
00158                     uinT8 NormalizationFactor,
00159                     INT_RESULT Result);
00160 
00161 #ifndef GRAPHICS_DISABLED
00162   void DebugFeatureProtoError(
00163       INT_CLASS ClassTemplate,
00164       BIT_VECTOR ProtoMask,
00165       BIT_VECTOR ConfigMask,
00166       const ScratchEvidence &tables,
00167       inT16 NumFeatures,
00168       int Debug);
00169 
00170   void DisplayProtoDebugInfo(
00171       INT_CLASS ClassTemplate,
00172       BIT_VECTOR ProtoMask,
00173       BIT_VECTOR ConfigMask,
00174       const ScratchEvidence &tables,
00175       bool SeparateDebugWindows);
00176 
00177   void DisplayFeatureDebugInfo(
00178       INT_CLASS ClassTemplate,
00179       BIT_VECTOR ProtoMask,
00180       BIT_VECTOR ConfigMask,
00181       inT16 NumFeatures,
00182       INT_FEATURE_ARRAY Features,
00183       int AdaptFeatureThreshold,
00184       int Debug,
00185       bool SeparateDebugWindows);
00186 
00187   void DebugBestMatch(int BestMatch,
00188                       INT_RESULT Result,
00189                       uinT16 BlobLength,
00190                       uinT8 NormalizationFactor);
00191 #endif
00192 
00193 
00194  private:
00195   uinT8 similarity_evidence_table_[SE_TABLE_SIZE];
00196   uinT32 evidence_table_mask_;
00197   uinT32 mult_trunc_shift_bits_;
00198   uinT32 table_trunc_shift_bits_;
00199   inT16 local_matcher_multiplier_;
00200   tesseract::IntParam *classify_debug_level_;
00201   uinT32 evidence_mult_mask_;
00202 };
00203 
00207 void IMDebugConfiguration(INT_FEATURE FeatureNum,
00208                           uinT16 ActualProtoNum,
00209                           uinT8 Evidence,
00210                           BIT_VECTOR ConfigMask,
00211                           uinT32 ConfigWord);
00212 
00213 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
00214                              uinT8 *FeatureEvidence,
00215                              inT32 ConfigCount);
00216 
00217 void HeapSort (int n, register int ra[], register int rb[]);
00218 
00222 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines