00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00025 #ifndef CONTROL_H
00026 #define CONTROL_H
00027
00028 #include "varable.h"
00029 #include "ocrblock.h"
00030
00031 #include "ratngs.h"
00032 #include "statistc.h"
00033
00034 #include "ocrshell.h"
00035 #include "pageres.h"
00036
00037 #include "charsample.h"
00038 #include "notdll.h"
00039
00040 enum ACCEPTABLE_WERD_TYPE
00041 {
00042 AC_UNACCEPTABLE,
00043 AC_LOWER_CASE,
00044 AC_UPPER_CASE,
00045 AC_INITIAL_CAP,
00046 AC_LC_ABBREV,
00047 AC_UC_ABBREV
00048 };
00049
00050 typedef BOOL8 (*BLOB_REJECTOR) (PBLOB *, BLOB_CHOICE_IT *, void *);
00051
00052 extern INT_VAR_H (tessedit_single_match, FALSE, "Top choice only from CP");
00053
00054 extern BOOL_VAR_H (tessedit_print_text, FALSE, "Write text to stdout");
00055 extern BOOL_VAR_H (tessedit_draw_words, FALSE, "Draw source words");
00056 extern BOOL_VAR_H (tessedit_draw_outwords, FALSE, "Draw output words");
00057 extern BOOL_VAR_H (tessedit_training_wiseowl, FALSE,
00058 "Call WO to learn blobs");
00059 extern BOOL_VAR_H (tessedit_training_tess, FALSE, "Call Tess to learn blobs");
00060 extern BOOL_VAR_H (tessedit_matcher_is_wiseowl, FALSE, "Call WO to classify");
00061 extern BOOL_VAR_H (tessedit_dump_choices, FALSE, "Dump char choices");
00062 extern BOOL_VAR_H (tessedit_fix_fuzzy_spaces, TRUE,
00063 "Try to improve fuzzy spaces");
00064 extern BOOL_VAR_H (tessedit_unrej_any_wd, FALSE,
00065 "Dont bother with word plausibility");
00066 extern BOOL_VAR_H (tessedit_fix_hyphens, TRUE, "Crunch double hyphens?");
00067 extern BOOL_VAR_H (tessedit_reject_fullstops, FALSE, "Reject all fullstops");
00068 extern BOOL_VAR_H (tessedit_reject_suspect_fullstops, FALSE,
00069 "Reject suspect fullstops");
00070 extern BOOL_VAR_H (tessedit_redo_xheight, TRUE, "Check/Correct x-height");
00071 extern BOOL_VAR_H (tessedit_cluster_adaption_on, TRUE,
00072 "Do our own adaption - ems only");
00073 extern BOOL_VAR_H (tessedit_enable_doc_dict, TRUE,
00074 "Add words to the document dictionary");
00075 extern BOOL_VAR_H (word_occ_first, FALSE, "Do word occ before re-est xht");
00076 extern BOOL_VAR_H (tessedit_xht_fiddles_on_done_wds, TRUE,
00077 "Apply xht fix up even if done");
00078 extern BOOL_VAR_H (tessedit_xht_fiddles_on_no_rej_wds, TRUE,
00079 "Apply xht fix up even in no rejects");
00080 extern INT_VAR_H (x_ht_check_word_occ, 2, "Check Char Block occupancy");
00081 extern INT_VAR_H (x_ht_stringency, 1, "How many confirmed a/n to accept?");
00082 extern BOOL_VAR_H (x_ht_quality_check, TRUE, "Dont allow worse quality");
00083 extern BOOL_VAR_H (tessedit_debug_block_rejection, FALSE,
00084 "Block and Row stats");
00085 extern INT_VAR_H (debug_x_ht_level, 0, "Reestimate debug");
00086 extern BOOL_VAR_H (rej_use_xht, TRUE, "Individual rejection control");
00087 extern BOOL_VAR_H (debug_acceptable_wds, FALSE, "Dump word pass/fail chk");
00088 extern STRING_VAR_H (chs_leading_punct, "('`\"", "Leading punctuation");
00089 extern
00090 STRING_VAR_H (chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation");
00091 extern STRING_VAR_H (chs_trailing_punct2, ")'`\"",
00092 "2nd Trailing punctuation");
00093 extern double_VAR_H (quality_rej_pc, 0.08,
00094 "good_quality_doc lte rejection limit");
00095 extern double_VAR_H (quality_blob_pc, 0.0,
00096 "good_quality_doc gte good blobs limit");
00097 extern double_VAR_H (quality_outline_pc, 1.0,
00098 "good_quality_doc lte outline error limit");
00099 extern double_VAR_H (quality_char_pc, 0.95,
00100 "good_quality_doc gte good char limit");
00101 extern INT_VAR_H (quality_min_initial_alphas_reqd, 2,
00102 "alphas in a good word");
00103 extern BOOL_VAR_H (tessedit_tess_adapt_to_rejmap, FALSE,
00104 "Use reject map to control Tesseract adaption");
00105 extern INT_VAR_H (tessedit_tess_adaption_mode, 3,
00106 "Adaptation decision algorithm for tess");
00107 extern INT_VAR_H (tessedit_em_adaption_mode, 62,
00108 "Adaptation decision algorithm for ems matrix matcher");
00109 extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass1, FALSE,
00110 "Adapt using clusterer after pass 1");
00111 extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass2, FALSE,
00112 "Adapt using clusterer after pass 1");
00113 extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass3, FALSE,
00114 "Adapt using clusterer after pass 1");
00115 extern BOOL_VAR_H (tessedit_cluster_adapt_before_pass1, FALSE,
00116 "Adapt using clusterer before Tess adaping during pass 1");
00117 extern INT_VAR_H (tessedit_cluster_adaption_mode, 0,
00118 "Adaptation decision algorithm for matrix matcher");
00119 extern BOOL_VAR_H (tessedit_adaption_debug, FALSE,
00120 "Generate and print debug information for adaption");
00121 extern BOOL_VAR_H (tessedit_minimal_rej_pass1, FALSE,
00122 "Do minimal rejection on pass 1 output");
00123 extern BOOL_VAR_H (tessedit_test_adaption, FALSE,
00124 "Test adaption criteria");
00125 extern BOOL_VAR_H (tessedit_global_adaption, FALSE,
00126 "Adapt to all docs over time");
00127 extern BOOL_VAR_H (tessedit_matcher_log, FALSE, "Log matcher activity");
00128 extern INT_VAR_H (tessedit_test_adaption_mode, 3,
00129 "Adaptation decision algorithm for tess");
00130 extern BOOL_VAR_H (test_pt, FALSE, "Test for point");
00131 extern double_VAR_H (test_pt_x, 99999.99, "xcoord");
00132 extern double_VAR_H (test_pt_y, 99999.99, "ycoord");
00133 extern BOOL_VAR_H(save_best_choices, FALSE,
00134 "Save the results of the recognition step"
00135 " (blob_choices) within the corresponding WERD_CHOICE");
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146 void classify_word_pass2(WERD_RES *word, ROW *row);
00151 void match_word_pass2(
00152 WERD_RES *word,
00153 ROW *row,
00154 float x_height);
00161 void fix_hyphens(
00162 WERD_CHOICE *choice,
00163 WERD *word,
00164 BLOB_CHOICE_LIST_CLIST *blob_choices);
00165
00171 void merge_blobs(
00172 PBLOB *blob1,
00173 PBLOB *blob2
00174 );
00176 void choice_dump_tester(
00177 PBLOB *,
00178 DENORM *,
00179 BOOL8 correct,
00180 char *text,
00181 inT32 count,
00182 BLOB_CHOICE_LIST *ratings
00183 );
00184 WERD *make_bln_copy(WERD *src_word, ROW *row, BLOCK* block,
00185 float x_height, DENORM *denorm);
00186 BOOL8 check_debug_pt(WERD_RES *word, int location);
00188 void add_in_one_row(
00189 ROW_RES *row,
00190 STATS *fonts,
00191 inT8 *italic,
00192 inT8 *bold
00193 );
00195 void find_modal_font(
00196 STATS *fonts,
00197 inT8 *font_out,
00198 inT8 *font_count
00199 );
00200 #endif