tesseract-doxygen/ccmain/docqual.h File Reference

#include "control.h"
#include "notdll.h"

Go to the source code of this file.

Enumerations

enum  GARBAGE_LEVEL { G_NEVER_CRUNCH, G_OK, G_DODGY, G_TERRIBLE }

Functions

inT16 word_blob_quality (WERD_RES *word, ROW *row)
BOOL8 crude_match_blobs (PBLOB *blob1, PBLOB *blob2)
inT16 word_outline_errs (WERD_RES *word)
void word_char_quality (WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count)
void unrej_good_chs (WERD_RES *word, ROW *row)
void print_boxes (WERD *word)
inT16 count_outline_errs (char c, inT16 outline_count)
void reject_whole_page (PAGE_RES_IT &page_res_it)
BOOL8 terrible_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level)
CRUNCH_MODE word_deletable (WERD_RES *word, inT16 &delete_mode)
inT16 failure_count (WERD_RES *word)
BOOL8 noise_outlines (WERD *word)

Variables

char * outlines_odd = "%| "
char * outlines_2 = "ij!?%\":;"
bool docqual_excuse_outline_errs = FALSE
bool tessedit_good_quality_unrej = TRUE
bool tessedit_use_reject_spaces = TRUE
double tessedit_reject_doc_percent = 65.00
double tessedit_reject_block_percent = 45.00
double tessedit_reject_row_percent = 40.00
double tessedit_whole_wd_rej_row_percent = 70.00
bool tessedit_preserve_blk_rej_perfect_wds = TRUE
bool tessedit_preserve_row_rej_perfect_wds = TRUE
bool tessedit_dont_blkrej_good_wds = FALSE
bool tessedit_dont_rowrej_good_wds = FALSE
int tessedit_preserve_min_wd_len = 2
bool tessedit_row_rej_good_docs = TRUE
double tessedit_good_doc_still_rowrej_wd = 1.1
bool tessedit_reject_bad_qual_wds = TRUE
bool tessedit_debug_doc_rejection = FALSE
bool tessedit_debug_quality_metrics = FALSE
bool bland_unrej = FALSE
double quality_rowrej_pc = 1.1
bool unlv_tilde_crunching = TRUE
bool crunch_early_merge_tess_fails = TRUE
bool crunch_early_convert_bad_unlv_chs = FALSE
double crunch_terrible_rating = 80.0
bool crunch_terrible_garbage = TRUE
double crunch_poor_garbage_cert = -9.0
double crunch_poor_garbage_rate = 60
double crunch_pot_poor_rate = 40
double crunch_pot_poor_cert = -8.0
bool crunch_pot_garbage = TRUE
double crunch_del_rating = 60
double crunch_del_cert = -10.0
double crunch_del_min_ht = 0.7
double crunch_del_max_ht = 3.0
double crunch_del_min_width = 3.0
double crunch_del_high_word = 1.5
double crunch_del_low_word = 0.5
double crunch_small_outlines_size = 0.6
int crunch_rating_max = 10
int crunch_pot_indicators = 1
bool crunch_leave_ok_strings = TRUE
bool crunch_accept_ok = TRUE
bool crunch_leave_accept_strings = FALSE
bool crunch_include_numerals = FALSE
int crunch_leave_lc_strings = 4
int crunch_leave_uc_strings = 4
int crunch_long_repetitions = 3
int crunch_debug = 0

Enumeration Type Documentation

Enumerator:
G_NEVER_CRUNCH 
G_OK 
G_DODGY 
G_TERRIBLE 

Function Documentation

inT16 count_outline_errs ( char  c,
inT16  outline_count 
)
BOOL8 crude_match_blobs ( PBLOB blob1,
PBLOB blob2 
)
inT16 failure_count ( WERD_RES word  ) 
BOOL8 noise_outlines ( WERD word  ) 
void print_boxes ( WERD word  ) 
void reject_whole_page ( PAGE_RES_IT page_res_it  ) 
BOOL8 terrible_word_crunch ( WERD_RES word,
GARBAGE_LEVEL  garbage_level 
)
void unrej_good_chs ( WERD_RES word,
ROW row 
)
inT16 word_blob_quality ( WERD_RES word,
ROW row 
)

Blob seg changes

void word_char_quality ( WERD_RES word,
ROW row,
inT16 match_count,
inT16 accepted_match_count 
)

Blob seg changes

CRUNCH_MODE word_deletable ( WERD_RES word,
inT16 delete_mode 
)
inT16 word_outline_errs ( WERD_RES word  ) 

Outline count errs


Variable Documentation

bool bland_unrej = FALSE

"unrej potential with no chekcs"

"Use acceptability in okstring"

int crunch_debug = 0

"As it says"

double crunch_del_cert = -10.0

"POTENTIAL crunch cert lt this"

double crunch_del_high_word = 1.5

"Del if word gt xht x this above bl"

double crunch_del_low_word = 0.5

"Del if word gt xht x this below bl"

double crunch_del_max_ht = 3.0

"Del if word ht gt xht x this"

double crunch_del_min_ht = 0.7

"Del if word ht lt xht x this"

double crunch_del_min_width = 3.0

"Del if word width lt xht x this"

double crunch_del_rating = 60

"POTENTIAL crunch rating lt this"

"Take out ~^ early?"

"Before word crunch?"

"Fiddle alpha figures"

"Dont pot crunch sensible strings"

"Dont crunch words with long lower case strings"

"Dont touch sensible strings"

"Dont crunch words with long lower case strings"

"Crunch words with long repetitions"

double crunch_poor_garbage_cert = -9.0

"crunch garbage cert lt this"

"crunch garbage rating lt this"

"POTENTIAL crunch garbage"

"How many potential indicators needed"

double crunch_pot_poor_cert = -8.0

"POTENTIAL crunch cert lt this"

double crunch_pot_poor_rate = 40

"POTENTIAL crunch rating lt this"

"For adj length in rating per ch"

"Small if lt xht x this"

"As it says"

double crunch_terrible_rating = 80.0

"crunch rating lt this"

"Allow outline errs in unrejection?"

char* outlines_2 = "ij!?%\":;"

"Non standard number of outlines"

char* outlines_odd = "%| "

"Non standard number of outlines"

double quality_rowrej_pc = 1.1

"good_quality_doc gte good char limit"

"Page stats"

"Output data to debug file"

"Use word segmentation quality metric"

"Use word segmentation quality metric"

"rej good doc wd if more than this fraction rejected"

"Reduce rejection on good docs"

"Only rej partially rejected words in block rejection"

"Only preserve wds longer than this"

"Only rej partially rejected words in row rejection"

"Reject all bad quality wds"

"%rej allowed before rej whole block"

"%rej allowed before rej whole doc"

"%rej allowed before rej whole row"

"Apply row rejection to good docs"

"Reject spaces?"

"%of row rejects in whole word rejects which prevents whole row rejection"

"Mark v.bad words for tilde crunch"

Generated on Sun Jul 18 17:10:49 2010 for Tesseract by  doxygen 1.6.3