00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef REJECT_H
00021 #define REJECT_H
00022
00023 #include "varable.h"
00024 #include "pageres.h"
00025 #include "notdll.h"
00026
00027 extern INT_VAR_H (tessedit_reject_mode, 5, "Rejection algorithm");
00028 extern INT_VAR_H (tessedit_ok_mode, 5, "Acceptance decision algorithm");
00029 extern BOOL_VAR_H (tessedit_use_nn, TRUE, "");
00030 extern BOOL_VAR_H (tessedit_rejection_debug, FALSE, "Adaption debug");
00031 extern BOOL_VAR_H (tessedit_rejection_stats, FALSE, "Show NN stats");
00032 extern BOOL_VAR_H (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips");
00033 extern double_VAR_H (tessedit_lower_flip_hyphen, 1.5,
00034 "Aspect ratio dot/hyphen test");
00035 extern double_VAR_H (tessedit_upper_flip_hyphen, 1.8,
00036 "Aspect ratio dot/hyphen test");
00037 extern BOOL_VAR_H (rej_trust_doc_dawg, FALSE,
00038 "Use DOC dawg in 11l conf. detector");
00039 extern BOOL_VAR_H (rej_1Il_use_dict_word, FALSE, "Use dictword test");
00040 extern BOOL_VAR_H (rej_1Il_trust_permuter_type, TRUE, "Dont double check");
00041 extern BOOL_VAR_H (one_ell_conflict_default, TRUE,
00042 "one_ell_conflict default");
00043 extern BOOL_VAR_H (show_char_clipping, FALSE, "Show clip image window?");
00044 extern BOOL_VAR_H (nn_debug, FALSE, "NN DEBUGGING?");
00045 extern BOOL_VAR_H (nn_reject_debug, FALSE, "NN DEBUG each char?");
00046 extern BOOL_VAR_H (nn_lax, FALSE, "Use 2nd rate matches");
00047 extern BOOL_VAR_H (nn_double_check_dict, FALSE, "Double check");
00048 extern BOOL_VAR_H (nn_conf_double_check_dict, TRUE,
00049 "Double check for confusions");
00050 extern BOOL_VAR_H (nn_conf_1Il, TRUE, "NN use 1Il conflicts");
00051 extern BOOL_VAR_H (nn_conf_Ss, TRUE, "NN use Ss conflicts");
00052 extern BOOL_VAR_H (nn_conf_hyphen, TRUE, "NN hyphen conflicts");
00053 extern BOOL_VAR_H (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check");
00054 extern BOOL_VAR_H (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check");
00055 extern BOOL_VAR_H (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check");
00056 extern BOOL_VAR_H (nn_conf_strict_on_dodgy_chs, TRUE,
00057 "Require stronger NN match");
00058 extern double_VAR_H (nn_dodgy_char_threshold, 0.99, "min accept score");
00059 extern INT_VAR_H (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? ");
00060 extern INT_VAR_H (nn_conf_initial_i_level, 3,
00061 "NN accept initial Ii match level ");
00062 extern BOOL_VAR_H (no_unrej_dubious_chars, TRUE,
00063 "Dubious chars next to reject?");
00064 extern BOOL_VAR_H (no_unrej_no_alphanum_wds, TRUE,
00065 "Stop unrej of non A/N wds?");
00066 extern BOOL_VAR_H (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?");
00067 extern BOOL_VAR_H (rej_use_tess_accepted, TRUE,
00068 "Individual rejection control");
00069 extern BOOL_VAR_H (rej_use_tess_blanks, TRUE, "Individual rejection control");
00070 extern BOOL_VAR_H (rej_use_good_perm, TRUE, "Individual rejection control");
00071 extern BOOL_VAR_H (rej_use_sensible_wd, FALSE, "Extend permuter check");
00072 extern BOOL_VAR_H (rej_alphas_in_number_perm, FALSE, "Extend permuter check");
00073 extern double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85,
00074 "if >this fract");
00075 extern INT_VAR_H (rej_mostly_reject_mode, 1,
00076 "0-never, 1-afterNN, 2-after new xht");
00077 extern double_VAR_H (tessed_fullstop_aspect_ratio, 1.2,
00078 "if >this fract then reject");
00079 extern INT_VAR_H (net_image_width, 40, "NN input image width");
00080 extern INT_VAR_H (net_image_height, 36, "NN input image height");
00081 extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height");
00082 extern INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit");
00083 extern INT_VAR_H (net_bl_nodes, 20, "Number of baseline nodes");
00084 extern double_VAR_H (nn_reject_threshold, 0.5, "NN min accept score");
00085 extern double_VAR_H (nn_reject_head_and_shoulders, 0.6,
00086 "top scores sep factor");
00087 extern STRING_VAR_H (ok_single_ch_non_alphanum_wds, "-?\075",
00088 "Allow NN to unrej");
00089 extern STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\075",
00090 "Allow NN to unrej");
00091 extern STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set");
00092 extern STRING_VAR_H (conflict_set_S_s, "Ss$", "Ss conflict set");
00093 extern STRING_VAR_H (conflict_set_hyphen, "-_~", "hyphen conflict set");
00094 extern STRING_VAR_H (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"",
00095 "Unreliable chars");
00096 extern STRING_VAR_H (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"",
00097 "Unreliable chars");
00098 extern INT_VAR_H (min_sane_x_ht_pixels, 8,
00099 "Reject any x-ht lt or eq than this");
00100 void reject_blanks(WERD_RES *word);
00101 void reject_I_1_L(WERD_RES *word);
00102
00103 void reject_poor_matches(WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices);
00104 float compute_reject_threshold(
00105 BLOB_CHOICE_LIST_CLIST *blob_choices);
00106 int sort_floats(
00107 const void *arg1,
00108 const void *arg2);
00109 void reject_edge_blobs(WERD_RES *word);
00110 BOOL8 word_contains_non_1_digit(const char *word,
00111 const char *word_lengths);
00112
00113 inT16 nn_match_char(IMAGE &scaled_image,
00114 float baseline_pos,
00115 BOOL8 dict_word,
00116 BOOL8 checked_dict_word,
00117 BOOL8 sensible_word,
00118 BOOL8 centre,
00119 BOOL8 good_quality_word,
00120 char tess_ch
00121 );
00122 inT16 evaluate_net_match(char top,
00123 float top_score,
00124 char next,
00125 float next_score,
00126 char tess_ch,
00127 BOOL8 dict_word,
00128 BOOL8 checked_dict_word,
00129 BOOL8 sensible_word,
00130 BOOL8 centre,
00131 BOOL8 good_quality_word);
00132 void dont_allow_dubious_chars(WERD_RES *word);
00133
00134 void dont_allow_1Il(WERD_RES *word);
00135
00136 void reject_mostly_rejects(
00137 WERD_RES *word);
00138 void flip_hyphens(WERD_RES *word);
00139 void flip_0O(WERD_RES *word);
00140 BOOL8 non_0_digit(const char* str, int length);
00141 #endif