tesseract-doxygen/ccmain/control.h File Reference

#include "varable.h"
#include "ocrblock.h"
#include "ratngs.h"
#include "statistc.h"
#include "ocrshell.h"
#include "pageres.h"
#include "charsample.h"
#include "notdll.h"

Go to the source code of this file.

Typedefs

typedef BOOL8(* BLOB_REJECTOR )(PBLOB *, BLOB_CHOICE_IT *, void *)

Enumerations

enum  ACCEPTABLE_WERD_TYPE {
  AC_UNACCEPTABLE, AC_LOWER_CASE, AC_UPPER_CASE, AC_INITIAL_CAP,
  AC_LC_ABBREV, AC_UC_ABBREV
}

Functions

void classify_word_pass2 (WERD_RES *word, ROW *row)
void match_word_pass2 (WERD_RES *word, ROW *row, float x_height)
void fix_hyphens (WERD_CHOICE *choice, WERD *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
void merge_blobs (PBLOB *blob1, PBLOB *blob2)
void choice_dump_tester (PBLOB *, DENORM *, BOOL8 correct, char *text, inT32 count, BLOB_CHOICE_LIST *ratings)
WERDmake_bln_copy (WERD *src_word, ROW *row, BLOCK *block, float x_height, DENORM *denorm)
BOOL8 check_debug_pt (WERD_RES *word, int location)
void add_in_one_row (ROW_RES *row, STATS *fonts, inT8 *italic, inT8 *bold)
void find_modal_font (STATS *fonts, inT8 *font_out, inT8 *font_count)

Variables

int tessedit_single_match = FALSE
bool tessedit_print_text = FALSE
bool tessedit_draw_words = FALSE
bool tessedit_draw_outwords = FALSE
bool tessedit_training_wiseowl = FALSE
bool tessedit_training_tess = FALSE
bool tessedit_matcher_is_wiseowl = FALSE
bool tessedit_dump_choices = FALSE
bool tessedit_fix_fuzzy_spaces = TRUE
bool tessedit_unrej_any_wd = FALSE
bool tessedit_fix_hyphens = TRUE
bool tessedit_reject_fullstops = FALSE
bool tessedit_reject_suspect_fullstops = FALSE
bool tessedit_redo_xheight = TRUE
bool tessedit_cluster_adaption_on = TRUE
bool tessedit_enable_doc_dict = TRUE
bool word_occ_first = FALSE
bool tessedit_xht_fiddles_on_done_wds = TRUE
bool tessedit_xht_fiddles_on_no_rej_wds = TRUE
int x_ht_check_word_occ = 2
int x_ht_stringency = 1
bool x_ht_quality_check = TRUE
bool tessedit_debug_block_rejection = FALSE
int debug_x_ht_level = 0
bool rej_use_xht = TRUE
bool debug_acceptable_wds = FALSE
char * chs_leading_punct = "('`\""
char * chs_trailing_punct1 = ").,;:?!"
char * chs_trailing_punct2 = ")'`\""
double quality_rej_pc = 0.08
double quality_blob_pc = 0.0
double quality_outline_pc = 1.0
double quality_char_pc = 0.95
int quality_min_initial_alphas_reqd = 2
bool tessedit_tess_adapt_to_rejmap = FALSE
int tessedit_tess_adaption_mode = 3
int tessedit_em_adaption_mode = 62
bool tessedit_cluster_adapt_after_pass1 = FALSE
bool tessedit_cluster_adapt_after_pass2 = FALSE
bool tessedit_cluster_adapt_after_pass3 = FALSE
bool tessedit_cluster_adapt_before_pass1 = FALSE
int tessedit_cluster_adaption_mode = 0
bool tessedit_adaption_debug = FALSE
bool tessedit_minimal_rej_pass1 = FALSE
bool tessedit_test_adaption = FALSE
bool tessedit_global_adaption = FALSE
bool tessedit_matcher_log = FALSE
int tessedit_test_adaption_mode = 3
bool test_pt = FALSE
double test_pt_x = 99999.99
double test_pt_y = 99999.99
bool save_best_choices = FALSE

Detailed Description

Module-independent matcher controller.


Typedef Documentation

typedef BOOL8(* BLOB_REJECTOR)(PBLOB *, BLOB_CHOICE_IT *, void *)

Enumeration Type Documentation

Enumerator:
AC_UNACCEPTABLE 

Unacceptable word.

AC_LOWER_CASE 

ALL lower case.

AC_UPPER_CASE 

ALL upper case.

AC_INITIAL_CAP 

ALL but initial lc.

AC_LC_ABBREV 

a.b.c.

AC_UC_ABBREV 

A.B.C.


Function Documentation

void add_in_one_row ( ROW_RES row,
STATS fonts,
inT8 italic,
inT8 bold 
)

good chars in word

Parameters:
row current row
fonts font stats
italic output count
bold output count
BOOL8 check_debug_pt ( WERD_RES word,
int  location 
)
void choice_dump_tester ( PBLOB ,
DENORM ,
BOOL8  correct,
char *  text,
inT32  count,
BLOB_CHOICE_LIST *  ratings 
)

dump chars in word

Parameters:
correct ly segmented
text correct text
count chars in text
ratings list of results
void classify_word_pass2 ( WERD_RES word,
ROW row 
)
void find_modal_font ( STATS fonts,
inT8 font_out,
inT8 font_count 
)

good chars in word

Parameters:
fonts font stats
font_out output font
font_count output count
void fix_hyphens ( WERD_CHOICE choice,
WERD word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

crunch double hyphens

Parameters:
choice string to fix
word word to do
blob_choices char choices
WERD* make_bln_copy ( WERD src_word,
ROW row,
BLOCK block,
float  x_height,
DENORM denorm 
)
void match_word_pass2 ( WERD_RES word,
ROW row,
float  x_height 
)

recognize one word

Parameters:
word word to do
void merge_blobs ( PBLOB blob1,
PBLOB blob2 
)

combine 2 blobs

Parameters:
blob1 dest blob
blob2 source blob

Variable Documentation

char* chs_leading_punct = "('`\""

"Leading punctuation"

char* chs_trailing_punct1 = ").,;:?!"

"1st Trailing punctuation"

char* chs_trailing_punct2 = ")'`\""

"2nd Trailing punctuation"

"Dump word pass/fail chk"

"Reestimate debug"

double quality_blob_pc = 0.0

"good_quality_doc gte good blobs limit"

double quality_char_pc = 0.95

"good_quality_doc gte good char limit"

"alphas in a good word"

double quality_outline_pc = 1.0

"good_quality_doc lte outline error limit"

double quality_rej_pc = 0.08

"good_quality_doc lte rejection limit"

"Individual rejection control"

"Save the results of the recognition step" " (blob_choices) within the corresponding WERD_CHOICE"

"Generate and print debug information for adaption"

"Adapt using clusterer after pass 1"

"Adapt using clusterer after pass 1"

"Adapt using clusterer after pass 1"

"Adapt using clusterer before Tess adaping during pass 1"

"Adaptation decision algorithm for matrix matcher"

"Do our own adaption - ems only"

"Block and Row stats"

"Draw output words"

"Draw source words"

"Dump char choices"

"Adaptation decision algorithm for ems matrix matcher"

"Add words to the document dictionary"

"Try to improve fuzzy spaces"

"Crunch double hyphens?"

"Adapt to all docs over time"

"Call WO to classify"

"Log matcher activity"

"Do minimal rejection on pass 1 output"

"Write text to stdout"

"Check/Correct x-height"

"Reject all fullstops"

"Reject suspect fullstops"

int tessedit_single_match = FALSE

"Top choice only from CP"

"Use reject map to control Tesseract adaption"

"Adaptation decision algorithm for tess"

"Test adaption criteria"

"Adaptation decision algorithm for tess"

"Call Tess to learn blobs"

"Call WO to learn blobs"

"Dont bother with word plausibility"

"Apply xht fix up even if done"

"Apply xht fix up even in no rejects"

bool test_pt = FALSE

"Test for point"

double test_pt_x = 99999.99

"xcoord"

double test_pt_y = 99999.99

"ycoord"

"Do word occ before re-est xht"

"Check Char Block occupancy"

"Dont allow worse quality"

int x_ht_stringency = 1

"How many confirmed a/n to accept?"

Generated on Sun Jul 18 17:10:49 2010 for Tesseract by  doxygen 1.6.3