tesseract-doxygen/ccmain/adaptions.h File Reference

#include "charsample.h"
#include "charcut.h"
#include "notdll.h"

Go to the source code of this file.

Functions

void print_em_stats (CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting)
CHAR_SAMPLEclip_sample (PIXROW *pixrow, IMAGELINE *imlines, TBOX pix_box, BOOL8 white_on_black, char c)
void display_cluster_prototypes (CHAR_SAMPLES_LIST *char_clusters)
void reject_all_ems (WERD_RES *word)
void reject_all_fullstops (WERD_RES *word)
void reject_suspect_fullstops (WERD_RES *word)
BOOL8 suspect_em (WERD_RES *word, inT16 index)
BOOL8 suspect_fullstop (WERD_RES *word, inT16 i)

Variables

bool tessedit_reject_ems = FALSE
bool tessedit_reject_suspect_ems = FALSE
double tessedit_cluster_t1 = 0.20
double tessedit_cluster_t2 = 0.40
double tessedit_cluster_t3 = 0.12
double tessedit_cluster_accept_fraction = 0.80
int tessedit_cluster_min_size = 3
bool tessedit_cluster_debug = FALSE
bool tessedit_use_best_sample = FALSE
bool tessedit_test_cluster_input = FALSE
bool tessedit_matrix_match = TRUE
bool tessedit_old_matrix_match = FALSE
bool tessedit_mm_use_non_adaption_set = FALSE
char * tessedit_non_adaption_set = ",.;:'~@*"
bool tessedit_mm_adapt_using_prototypes = TRUE
bool tessedit_mm_use_prototypes = TRUE
bool tessedit_mm_use_rejmap = FALSE
bool tessedit_mm_all_rejects = FALSE
bool tessedit_mm_only_match_same_char = FALSE
bool tessedit_process_rns = FALSE
bool tessedit_demo_adaption = FALSE
int tessedit_demo_word1 = 62
int tessedit_demo_word2 = 64
char * tessedit_demo_file = "academe"
bool tessedit_adapt_to_char_fragments = TRUE

Function Documentation

CHAR_SAMPLE* clip_sample ( PIXROW pixrow,
IMAGELINE imlines,
TBOX  pix_box,
BOOL8  white_on_black,
char  c 
)
void display_cluster_prototypes ( CHAR_SAMPLES_LIST *  char_clusters  ) 
void print_em_stats ( CHAR_SAMPLES_LIST *  char_clusters,
CHAR_SAMPLE_LIST *  chars_waiting 
)
void reject_all_ems ( WERD_RES word  ) 
void reject_all_fullstops ( WERD_RES word  ) 
void reject_suspect_fullstops ( WERD_RES word  ) 
BOOL8 suspect_em ( WERD_RES word,
inT16  index 
)
BOOL8 suspect_fullstop ( WERD_RES word,
inT16  i 
)

Variable Documentation

"Adapt to words that contain " " a character composed form fragments"

"Largest fraction of characters in cluster for it to be used for adaption"

"Generate and print debug information for adaption by clustering"

"Smallest number of samples in a cluster for it to be used for adaption"

double tessedit_cluster_t1 = 0.20

"t1 threshold for clustering samples"

double tessedit_cluster_t2 = 0.40

"t2 threshold for clustering samples"

double tessedit_cluster_t3 = 0.12

"Extra threshold for clustering samples, only keep a new sample if best score greater than this value"

"Display cut images and matrix match for demo purposes"

char* tessedit_demo_file = "academe"

"Name of document containing demo words"

"Word number of first word to display"

"Word number of second word to display"

"Use matrix matcher"

"Use prototypes when adapting"

"Adapt to all characters using, matrix matcher"

"Only match samples against clusters for the same character"

"Don't try to adapt to characters on this list"

"Use prototypes as clusters are built"

"Adapt to characters using reject map"

char* tessedit_non_adaption_set = ",.;:'~@*"

"Characters to be avoided when adapting"

"Use matrix matcher"

"Handle m - rn ambigs"

"Reject all m's"

"Reject suspect m's"

"Set reject map to enable cluster input to be measured"

"Use best sample from cluster when adapting"

Generated on Sun Jul 18 17:10:48 2010 for Tesseract by  doxygen 1.6.3