tesseract-doxygen/ccmain/docqual.cpp File Reference

#include "mfcpch.h"
#include <ctype.h>
#include "docqual.h"
#include "tstruct.h"
#include "tfacep.h"
#include "reject.h"
#include "tessvars.h"
#include "genblob.h"
#include "secname.h"
#include "globals.h"
#include "tesseractclass.h"

Namespaces

namespace  tesseract

Defines

#define EXTERN

Functions

EXTERN BOOL_EVAR (crunch_early_convert_bad_unlv_chs, FALSE,"Take out ~^ early?")
inT16 word_blob_quality (WERD_RES *word, ROW *row)
BOOL8 crude_match_blobs (PBLOB *blob1, PBLOB *blob2)
inT16 word_outline_errs (WERD_RES *word)
void word_char_quality (WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count)
void unrej_good_chs (WERD_RES *word, ROW *row)
void print_boxes (WERD *word)
inT16 count_outline_errs (char c, inT16 outline_count)
void reject_whole_page (PAGE_RES_IT &page_res_it)
BOOL8 terrible_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level)
CRUNCH_MODE word_deletable (WERD_RES *word, inT16 &delete_mode)
inT16 failure_count (WERD_RES *word)
BOOL8 noise_outlines (WERD *word)

Variables

EXTERN char * outlines_odd = "%| "
EXTERN char * outlines_2 = "ij!?%\":;"
EXTERN bool docqual_excuse_outline_errs = FALSE
EXTERN bool tessedit_good_quality_unrej = TRUE
EXTERN bool tessedit_use_reject_spaces = TRUE
EXTERN double tessedit_reject_doc_percent = 65.00
EXTERN double tessedit_reject_block_percent = 45.00
EXTERN double tessedit_reject_row_percent = 40.00
EXTERN double tessedit_whole_wd_rej_row_percent = 70.00
EXTERN bool tessedit_preserve_blk_rej_perfect_wds = TRUE
EXTERN bool tessedit_preserve_row_rej_perfect_wds = TRUE
EXTERN bool tessedit_dont_blkrej_good_wds = FALSE
EXTERN bool tessedit_dont_rowrej_good_wds = FALSE
EXTERN int tessedit_preserve_min_wd_len = 2
EXTERN bool tessedit_row_rej_good_docs = TRUE
EXTERN double tessedit_good_doc_still_rowrej_wd = 1.1
EXTERN bool tessedit_reject_bad_qual_wds = TRUE
EXTERN bool tessedit_debug_doc_rejection = FALSE
EXTERN bool tessedit_debug_quality_metrics = FALSE
EXTERN bool bland_unrej = FALSE
EXTERN double quality_rowrej_pc = 1.1
EXTERN bool unlv_tilde_crunching = TRUE
EXTERN bool crunch_early_merge_tess_fails = TRUE
EXTERN double crunch_terrible_rating = 80.0
EXTERN bool crunch_terrible_garbage = TRUE
EXTERN double crunch_poor_garbage_cert = -9.0
EXTERN double crunch_poor_garbage_rate = 60
EXTERN double crunch_pot_poor_rate = 40
EXTERN double crunch_pot_poor_cert = -8.0
EXTERN bool crunch_pot_garbage = TRUE
EXTERN double crunch_del_rating = 60
EXTERN double crunch_del_cert = -10.0
EXTERN double crunch_del_min_ht = 0.7
EXTERN double crunch_del_max_ht = 3.0
EXTERN double crunch_del_min_width = 3.0
EXTERN double crunch_del_high_word = 1.5
EXTERN double crunch_del_low_word = 0.5
EXTERN double crunch_small_outlines_size = 0.6
EXTERN int crunch_rating_max = 10
EXTERN int crunch_pot_indicators = 1
EXTERN bool crunch_leave_ok_strings = TRUE
EXTERN bool crunch_accept_ok = TRUE
EXTERN bool crunch_leave_accept_strings = FALSE
EXTERN bool crunch_include_numerals = FALSE
EXTERN int crunch_leave_lc_strings = 4
EXTERN int crunch_leave_uc_strings = 4
EXTERN int crunch_long_repetitions = 3
EXTERN int crunch_debug = 0

Define Documentation

#define EXTERN

Function Documentation

EXTERN BOOL_EVAR ( crunch_early_convert_bad_unlv_chs  ,
FALSE  ,
"Take out ~^ early?"   
)
inT16 count_outline_errs ( char  c,
inT16  outline_count 
)
BOOL8 crude_match_blobs ( PBLOB blob1,
PBLOB blob2 
)
inT16 failure_count ( WERD_RES word  ) 
BOOL8 noise_outlines ( WERD word  ) 
void print_boxes ( WERD word  ) 
void reject_whole_page ( PAGE_RES_IT page_res_it  ) 
BOOL8 terrible_word_crunch ( WERD_RES word,
GARBAGE_LEVEL  garbage_level 
)
void unrej_good_chs ( WERD_RES word,
ROW row 
)
inT16 word_blob_quality ( WERD_RES word,
ROW row 
)

Blob seg changes

void word_char_quality ( WERD_RES word,
ROW row,
inT16 match_count,
inT16 accepted_match_count 
)

Blob seg changes

CRUNCH_MODE word_deletable ( WERD_RES word,
inT16 delete_mode 
)
inT16 word_outline_errs ( WERD_RES word  ) 

Outline count errs


Variable Documentation

EXTERN bool bland_unrej = FALSE

"unrej potential with no chekcs"

"Use acceptability in okstring"

EXTERN int crunch_debug = 0

"As it says"

EXTERN double crunch_del_cert = -10.0

"POTENTIAL crunch cert lt this"

EXTERN double crunch_del_high_word = 1.5

"Del if word gt xht x this above bl"

EXTERN double crunch_del_low_word = 0.5

"Del if word gt xht x this below bl"

EXTERN double crunch_del_max_ht = 3.0

"Del if word ht gt xht x this"

EXTERN double crunch_del_min_ht = 0.7

"Del if word ht lt xht x this"

EXTERN double crunch_del_min_width = 3.0

"Del if word width lt xht x this"

EXTERN double crunch_del_rating = 60

"POTENTIAL crunch rating lt this"

"Before word crunch?"

EXTERN bool crunch_include_numerals = FALSE

"Fiddle alpha figures"

"Dont pot crunch sensible strings"

EXTERN int crunch_leave_lc_strings = 4

"Dont crunch words with long lower case strings"

"Dont touch sensible strings"

EXTERN int crunch_leave_uc_strings = 4

"Dont crunch words with long lower case strings"

EXTERN int crunch_long_repetitions = 3

"Crunch words with long repetitions"

EXTERN double crunch_poor_garbage_cert = -9.0

"crunch garbage cert lt this"

EXTERN double crunch_poor_garbage_rate = 60

"crunch garbage rating lt this"

"POTENTIAL crunch garbage"

EXTERN int crunch_pot_indicators = 1

"How many potential indicators needed"

EXTERN double crunch_pot_poor_cert = -8.0

"POTENTIAL crunch cert lt this"

EXTERN double crunch_pot_poor_rate = 40

"POTENTIAL crunch rating lt this"

EXTERN int crunch_rating_max = 10

"For adj length in rating per ch"

EXTERN double crunch_small_outlines_size = 0.6

"Small if lt xht x this"

"As it says"

EXTERN double crunch_terrible_rating = 80.0

"crunch rating lt this"

"Allow outline errs in unrejection?"

EXTERN char* outlines_2 = "ij!?%\":;"

"Non standard number of outlines"

EXTERN char* outlines_odd = "%| "

"Non standard number of outlines"

EXTERN double quality_rowrej_pc = 1.1

"good_quality_doc gte good char limit"

"Page stats"

"Output data to debug file"

"Use word segmentation quality metric"

"Use word segmentation quality metric"

EXTERN double tessedit_good_doc_still_rowrej_wd = 1.1

"rej good doc wd if more than this fraction rejected"

"Reduce rejection on good docs"

"Only rej partially rejected words in block rejection"

"Only preserve wds longer than this"

"Only rej partially rejected words in row rejection"

"Reject all bad quality wds"

EXTERN double tessedit_reject_block_percent = 45.00

"%rej allowed before rej whole block"

EXTERN double tessedit_reject_doc_percent = 65.00

"%rej allowed before rej whole doc"

EXTERN double tessedit_reject_row_percent = 40.00

"%rej allowed before rej whole row"

"Apply row rejection to good docs"

"Reject spaces?"

EXTERN double tessedit_whole_wd_rej_row_percent = 70.00

"%of row rejects in whole word rejects which prevents whole row rejection"

"Mark v.bad words for tilde crunch"

Generated on Sun Jul 18 17:10:49 2010 for Tesseract by  doxygen 1.6.3