#include "mfcpch.h"
#include <ctype.h>
#include <string.h>
#include "tessbox.h"
#include "ratngs.h"
#include "notdll.h"
#include "varable.h"
#include "wordrec.h"
#include "ocrclass.h"
#include "ocrblock.h"
#include "statistc.h"
#include "ocrshell.h"
#include "pageres.h"
#include "elst.h"
#include "memry.h"
#include "control.h"
#include "img.h"
#include "tordmain.h"
#include "mainblk.h"
#include "ocrrow.h"
#include "werd.h"
#include "rect.h"
#include "polyblob.h"
#include "stepblob.h"
#include "tesseractclass.h"
#include "imgs.h"
#include "charsample.h"
#include "charcut.h"
#include "stopper.h"
#include "hosthplb.h"
#include "secname.h"
#define EXTERN |
#define WINDOWNAMESIZE 13 |
CHAR_SAMPLE* clip_sample | ( | PIXROW * | pixrow, | |
IMAGELINE * | imlines, | |||
TBOX | pix_box, | |||
BOOL8 | white_on_black, | |||
char | c | |||
) |
void display_cluster_prototypes | ( | CHAR_SAMPLES_LIST * | char_clusters | ) |
void print_em_stats | ( | CHAR_SAMPLES_LIST * | char_clusters, | |
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
void reject_all_ems | ( | WERD_RES * | word | ) |
void reject_all_fullstops | ( | WERD_RES * | word | ) |
void reject_suspect_fullstops | ( | WERD_RES * | word | ) |
EXTERN bool tessedit_adapt_to_char_fragments = TRUE |
"Adapt to words that contain " " a character composed form fragments"
EXTERN double tessedit_cluster_accept_fraction = 0.80 |
"Largest fraction of characters in cluster for it to be used for adaption"
EXTERN bool tessedit_cluster_debug = FALSE |
"Generate and print debug information for adaption by clustering"
EXTERN int tessedit_cluster_min_size = 3 |
"Smallest number of samples in a cluster for it to be used for adaption"
EXTERN double tessedit_cluster_t1 = 0.20 |
"t1 threshold for clustering samples"
EXTERN double tessedit_cluster_t2 = 0.40 |
"t2 threshold for clustering samples"
EXTERN double tessedit_cluster_t3 = 0.12 |
"Extra threshold for clustering samples, only keep a new sample if best score greater than this value"
EXTERN bool tessedit_demo_adaption = FALSE |
"Display cut images and matrix match for demo purposes"
EXTERN char* tessedit_demo_file = "academe" |
"Name of document containing demo words"
EXTERN int tessedit_demo_word1 = 62 |
"Word number of first word to display"
EXTERN int tessedit_demo_word2 = 64 |
"Word number of second word to display"
EXTERN bool tessedit_matrix_match = TRUE |
"Use matrix matcher"
EXTERN bool tessedit_mm_adapt_using_prototypes = TRUE |
"Use prototypes when adapting"
EXTERN bool tessedit_mm_all_rejects = FALSE |
"Adapt to all characters using, matrix matcher"
EXTERN bool tessedit_mm_only_match_same_char = FALSE |
"Only match samples against clusters for the same character"
EXTERN bool tessedit_mm_use_non_adaption_set = FALSE |
"Don't try to adapt to characters on this list"
EXTERN bool tessedit_mm_use_prototypes = TRUE |
"Use prototypes as clusters are built"
EXTERN bool tessedit_mm_use_rejmap = FALSE |
"Adapt to characters using reject map"
EXTERN char* tessedit_non_adaption_set = ",.;:'~@*" |
"Characters to be avoided when adapting"
EXTERN bool tessedit_process_rns = FALSE |
"Handle m - rn ambigs"
EXTERN bool tessedit_reject_ems = FALSE |
"Reject all m's"
EXTERN bool tessedit_reject_suspect_ems = FALSE |
"Reject suspect m's"
EXTERN bool tessedit_test_cluster_input = FALSE |
"Set reject map to enable cluster input to be measured"
EXTERN bool tessedit_use_best_sample = FALSE |
"Use best sample from cluster when adapting"