00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef ADAPTIONS_H
00022 #define ADAPTIONS_H
00023
00024 #include "charsample.h"
00025 #include "charcut.h"
00026 #include "notdll.h"
00027
00028 extern BOOL_VAR_H (tessedit_reject_ems, FALSE, "Reject all m's");
00029 extern BOOL_VAR_H (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's");
00030 extern double_VAR_H (tessedit_cluster_t1, 0.20,
00031 "t1 threshold for clustering samples");
00032 extern double_VAR_H (tessedit_cluster_t2, 0.40,
00033 "t2 threshold for clustering samples");
00034 extern double_VAR_H (tessedit_cluster_t3, 0.12,
00035 "Extra threshold for clustering samples, only keep a new sample if best score greater than this value");
00036 extern double_VAR_H (tessedit_cluster_accept_fraction, 0.80,
00037 "Largest fraction of characters in cluster for it to be used for adaption");
00038 extern INT_VAR_H (tessedit_cluster_min_size, 3,
00039 "Smallest number of samples in a cluster for it to be used for adaption");
00040 extern BOOL_VAR_H (tessedit_cluster_debug, FALSE,
00041 "Generate and print debug information for adaption by clustering");
00042 extern BOOL_VAR_H (tessedit_use_best_sample, FALSE,
00043 "Use best sample from cluster when adapting");
00044 extern BOOL_VAR_H (tessedit_test_cluster_input, FALSE,
00045 "Set reject map to enable cluster input to be measured");
00046 extern BOOL_VAR_H (tessedit_matrix_match, TRUE, "Use matrix matcher");
00047 extern BOOL_VAR_H (tessedit_old_matrix_match, FALSE, "Use matrix matcher");
00048 extern BOOL_VAR_H (tessedit_mm_use_non_adaption_set, FALSE,
00049 "Don't try to adapt to characters on this list");
00050 extern STRING_VAR_H (tessedit_non_adaption_set, ",.;:'~@*",
00051 "Characters to be avoided when adapting");
00052 extern BOOL_VAR_H (tessedit_mm_adapt_using_prototypes, TRUE,
00053 "Use prototypes when adapting");
00054 extern BOOL_VAR_H (tessedit_mm_use_prototypes, TRUE,
00055 "Use prototypes as clusters are built");
00056 extern BOOL_VAR_H (tessedit_mm_use_rejmap, FALSE,
00057 "Adapt to characters using reject map");
00058 extern BOOL_VAR_H (tessedit_mm_all_rejects, FALSE,
00059 "Adapt to all characters using, matrix matcher");
00060 extern BOOL_VAR_H (tessedit_mm_only_match_same_char, FALSE,
00061 "Only match samples against clusters for the same character");
00062 extern BOOL_VAR_H (tessedit_process_rns, FALSE, "Handle m - rn ambigs");
00063 extern BOOL_VAR_H (tessedit_demo_adaption, FALSE,
00064 "Display cut images and matrix match for demo purposes");
00065 extern INT_VAR_H (tessedit_demo_word1, 62,
00066 "Word number of first word to display");
00067 extern INT_VAR_H (tessedit_demo_word2, 64,
00068 "Word number of second word to display");
00069 extern STRING_VAR_H (tessedit_demo_file, "academe",
00070 "Name of document containing demo words");
00071 extern BOOL_VAR_H(tessedit_adapt_to_char_fragments, TRUE,
00072 "Adapt to words that contain "
00073 " a character composed form fragments");
00074
00075 void print_em_stats(CHAR_SAMPLES_LIST *char_clusters,
00076 CHAR_SAMPLE_LIST *chars_waiting);
00077
00078 CHAR_SAMPLE *clip_sample(PIXROW *pixrow,
00079 IMAGELINE *imlines,
00080 TBOX pix_box,
00081 BOOL8 white_on_black,
00082 char c);
00083 void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters);
00084 void reject_all_ems(WERD_RES *word);
00085 void reject_all_fullstops(WERD_RES *word);
00086 void reject_suspect_fullstops(WERD_RES *word);
00087 BOOL8 suspect_em(WERD_RES *word, inT16 index);
00088 BOOL8 suspect_fullstop(WERD_RES *word, inT16 i);
00089 #endif