tesseract-doxygen/classify/adaptmatch.cpp File Reference

#include <ctype.h>
#include "adaptmatch.h"
#include "oldlist.h"
#include "tessclas.h"
#include "fxdefs.h"
#include "matchdefs.h"
#include "adaptive.h"
#include "ocrfeatures.h"
#include "ratngs.h"
#include "general.h"
#include <stdio.h>
#include <math.h>
#include "varable.h"
#include "intmatcher.h"
#include "fpoint.h"
#include "mfoutline.h"
#include "emalloc.h"
#include "intproto.h"
#include "baseline.h"
#include "efio.h"
#include "permute.h"
#include "context.h"
#include "ndminx.h"
#include "const.h"
#include "globals.h"
#include "werd.h"
#include "callcpp.h"
#include "tordvars.h"
#include "classify.h"
#include "unicharset.h"
#include <string.h>
#include <stdlib.h>

Classes

struct  ADAPT_RESULTS
struct  PROTO_KEY

Namespaces

namespace  tesseract

Defines

#define ADAPT_TEMPLATE_SUFFIX   ".a"
#define MAX_MATCHES   10
#define UNLIKELY_NUM_FEAT   200
#define NO_DEBUG   0
#define MAX_ADAPTABLE_WERD_SIZE   40
#define ADAPTABLE_WERD   (GOOD_WERD + 0.05)
#define Y_DIM_OFFSET   (Y_SHIFT - BASELINE_Y_SHIFT)
#define WORST_POSSIBLE_RATING   (1.0)
#define MarginalMatch(Rating)   ((Rating) > matcher_great_threshold)
#define TempConfigReliable(Config)   ((Config)->NumTimesSeen >= matcher_min_examples_for_prototyping)
#define InitIntFX()   (FeaturesHaveBeenExtracted = FALSE)

Functions

void AdaptToChar (TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold)
void AdaptToPunc (TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold)
void AmbigClassifier (TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results)
UNICHAR_IDBaselineClassifier (TBLOB *Blob, LINE_STATS *LineStats, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
void make_config_pruner (INT_TEMPLATES templates, CONFIG_PRUNER *config_pruner)
void CharNormClassifier (TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, ADAPT_RESULTS *Results)
void ClassifyAsNoise (ADAPT_RESULTS *Results)
int CompareCurrentRatings (const void *arg1, const void *arg2)
void ConvertMatchesToChoices (ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
void DebugAdaptiveClassifier (TBLOB *Blob, LINE_STATS *LineStats, ADAPT_RESULTS *Results)
void DoAdaptiveMatch (TBLOB *Blob, LINE_STATS *LineStats, ADAPT_RESULTS *Results)
void GetAdaptThresholds (TWERD *Word, LINE_STATS *LineStats, const WERD_CHOICE &BestChoice, const WERD_CHOICE &BestRawChoice, FLOAT32 Thresholds[])
UNICHAR_IDGetAmbiguities (TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID CorrectClass)
int tesseract::GetBaselineFeatures (TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, inT32 *BlobLength)
int tesseract::GetIntBaselineFeatures (TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, inT32 *BlobLength)
void InitMatcherRatings (register FLOAT32 *Rating)
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob, LINE_STATS *LineStats)
int MakeTempProtoPerm (void *item1, void *item2)
int NumBlobsIn (TWERD *Word)
int NumOutlinesInBlob (TBLOB *Blob)
void PrintAdaptiveMatchResults (FILE *File, ADAPT_RESULTS *Results)
void RemoveBadMatches (ADAPT_RESULTS *Results)
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
void SetAdaptiveThreshold (FLOAT32 Threshold)
void ShowBestMatchFor (TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn)
int GetAdaptiveFeatures (TBLOB *Blob, LINE_STATS *LineStats, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)

Variables

bool classify_enable_adaptive_matcher = 1
bool classify_use_pre_adapted_templates = 0
bool classify_save_adapted_templates = 0
bool classify_enable_adaptive_debugger = 0
int matcher_debug_level = 0
int matcher_debug_flags = 0
int classify_learning_debug_level = 0
double matcher_good_threshold = 0.125
double matcher_great_threshold = 0.0
double matcher_perfect_threshold = 0.02
double matcher_bad_match_pad = 0.15
double matcher_rating_margin = 0.1
double matcher_avg_noise_size = 12.0
int matcher_permanent_classes_min = 1
int matcher_min_examples_for_prototyping = 3
double matcher_clustering_max_angle_delta = 0.015
bool classify_enable_int_fx = 1
bool classify_enable_new_adapt_rules = 1
double rating_scale = 1.5
double certainty_scale = 20.0
int matcher_failed_adaptations_before_reset = 150
double tessedit_class_miss_scale = 0.00390625
bool tess_cn_matching = 0
bool tess_bn_matching = 0

Define Documentation

#define ADAPT_TEMPLATE_SUFFIX   ".a"

---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------

#define ADAPTABLE_WERD   (GOOD_WERD + 0.05)
 
#define InitIntFX (  )     (FeaturesHaveBeenExtracted = FALSE)
#define MarginalMatch ( Rating   )     ((Rating) > matcher_great_threshold)

---------------------------------------------------------------------------- Private Macros ----------------------------------------------------------------------------

#define MAX_ADAPTABLE_WERD_SIZE   40
#define MAX_MATCHES   10
#define NO_DEBUG   0
#define TempConfigReliable ( Config   )     ((Config)->NumTimesSeen >= matcher_min_examples_for_prototyping)
#define UNLIKELY_NUM_FEAT   200
#define WORST_POSSIBLE_RATING   (1.0)
#define Y_DIM_OFFSET   (Y_SHIFT - BASELINE_Y_SHIFT)

Function Documentation

void AdaptToChar ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId,
FLOAT32  Threshold 
)

---------------------------------------------------------------------------- Private Function Prototypes ----------------------------------------------------------------------------

void AdaptToPunc ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId,
FLOAT32  Threshold 
)
void AmbigClassifier ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
UNICHAR_ID Ambiguities,
ADAPT_RESULTS Results 
)
UNICHAR_ID* BaselineClassifier ( TBLOB Blob,
LINE_STATS LineStats,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)
void CharNormClassifier ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)
void ClassifyAsNoise ( ADAPT_RESULTS Results  ) 
int CompareCurrentRatings ( const void *  arg1,
const void *  arg2 
)
void ConvertMatchesToChoices ( ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)
void DebugAdaptiveClassifier ( TBLOB Blob,
LINE_STATS LineStats,
ADAPT_RESULTS Results 
)
void DoAdaptiveMatch ( TBLOB Blob,
LINE_STATS LineStats,
ADAPT_RESULTS Results 
)
int GetAdaptiveFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------

void GetAdaptThresholds ( TWERD Word,
LINE_STATS LineStats,
const WERD_CHOICE BestChoice,
const WERD_CHOICE BestRawChoice,
FLOAT32  Thresholds[] 
)
UNICHAR_ID* GetAmbiguities ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  CorrectClass 
)
void InitMatcherRatings ( register FLOAT32 Rating  ) 
void make_config_pruner ( INT_TEMPLATES  templates,
CONFIG_PRUNER config_pruner 
)
PROTO_ID MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)
void MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob,
LINE_STATS LineStats 
)
int MakeTempProtoPerm ( void *  item1,
void *  item2 
)
int NumBlobsIn ( TWERD Word  ) 
int NumOutlinesInBlob ( TBLOB Blob  ) 
void PrintAdaptiveMatchResults ( FILE *  File,
ADAPT_RESULTS Results 
)
void RemoveBadMatches ( ADAPT_RESULTS Results  ) 
void RemoveExtraPuncs ( ADAPT_RESULTS Results  ) 
void SetAdaptiveThreshold ( FLOAT32  Threshold  ) 
void ShowBestMatchFor ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId,
BOOL8  AdaptiveOn,
BOOL8  PreTrainedOn 
)

Variable Documentation

double certainty_scale = 20.0

"Certainty scaling factor"

---------------------------------------------------------------------- V a r i a b l e s ---------------------------------------------------------------------- "Certainty scaling factor"

"Enable match debugger"

"Enable adaptive classifier"

"Enable integer fx"

"Enable new adaptation rules"

"Learning Debug Level: "

"Save adapted templates to a file"

"Use pre-adapted classifier templates"

double matcher_avg_noise_size = 12.0

"Avg. noise blob length: "

double matcher_bad_match_pad = 0.15

"Bad Match Pad (0-1)"

"Maximum angle delta for prototype clustering"

"Matcher Debug Flags"

"Matcher Debug Level"

"Number of failed adaptions before adapted templates reset"

double matcher_good_threshold = 0.125

"Good Match (0-1)"

"Great Match (0-1)"

"Reliable Config Threshold"

"Perfect Match (0-1)"

"Min # of permanent classes"

double matcher_rating_margin = 0.1

"New template margin (0-1)"

double rating_scale = 1.5

"Rating scaling factor"

"Baseline Normalized Matching"

"Character Normalized Matching"

double tessedit_class_miss_scale = 0.00390625

"Scale factor for features not used"

Generated on Sun Jul 18 17:10:50 2010 for Tesseract by  doxygen 1.6.3