tesseract-doxygen/textord/tospace.cpp File Reference

#include "mfcpch.h"
#include "tovars.h"
#include "drawtord.h"
#include "tospace.h"
#include "blobbox.h"
#include "gap_map.h"
#include "statistc.h"
#include "notdll.h"
#include "ndminx.h"

Defines

#define MAXSPACING   128

Functions

void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
void block_spacing_stats (TO_BLOCK *block, GAPMAP *gapmap, BOOL8 &old_text_ord_proportional, inT16 &block_space_gap_width, inT16 &block_non_space_gap_width)
void row_spacing_stats (TO_ROW *row, GAPMAP *gapmap, inT16 block_idx, inT16 row_idx, inT16 block_space_gap_width, inT16 block_non_space_gap_width)
void old_to_method (TO_ROW *row, STATS *all_gap_stats, STATS *space_gap_stats, STATS *small_gap_stats, inT16 block_space_gap_width, inT16 block_non_space_gap_width)
BOOL8 isolated_row_stats (TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_stats, BOOL8 suspected_table, inT16 block_idx, inT16 row_idx)
inT16 stats_count_under (STATS *stats, inT16 threshold)
void improve_row_threshold (TO_ROW *row, STATS *all_gap_stats)
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
BOOL8 make_a_word_break (TO_ROW *row, TBOX blob_box, inT16 prev_gap, TBOX prev_blob_box, inT16 real_current_gap, inT16 within_xht_current_gap, TBOX next_blob_box, inT16 next_gap, uinT8 &blanks, BOOL8 &fuzzy_sp, BOOL8 &fuzzy_non)
BOOL8 narrow_blob (TO_ROW *row, TBOX blob_box)
BOOL8 wide_blob (TO_ROW *row, TBOX blob_box)
BOOL8 suspected_punct_blob (TO_ROW *row, TBOX box)
void peek_at_next_gap (TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, inT16 &next_gap, inT16 &next_within_xht_gap)
void mark_gap (TBOX blob, inT16 rule, inT16 prev_gap, inT16 prev_blob_width, inT16 current_gap, inT16 next_blob_width, inT16 next_gap)
float find_mean_blob_spacing (WERD *word)
BOOL8 ignore_big_gap (TO_ROW *row, inT32 row_length, GAPMAP *gapmap, inT16 left, inT16 right)
TBOX reduced_box_next (TO_ROW *row, BLOBNBOX_IT *it)
TBOX reduced_box_for_blob (BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht)

Variables

bool tosp_old_to_method = FALSE
bool tosp_only_use_prop_rows = TRUE
bool tosp_force_wordbreak_on_punct = FALSE
bool tosp_use_pre_chopping = FALSE
bool tosp_old_to_bug_fix = FALSE
bool tosp_block_use_cert_spaces = TRUE
bool tosp_row_use_cert_spaces = TRUE
bool tosp_narrow_blobs_not_cert = TRUE
bool tosp_row_use_cert_spaces1 = TRUE
bool tosp_recovery_isolated_row_stats = TRUE
bool tosp_only_small_gaps_for_kern = FALSE
bool tosp_all_flips_fuzzy = FALSE
bool tosp_fuzzy_limit_all = TRUE
bool tosp_stats_use_xht_gaps = TRUE
bool tosp_use_xht_gaps = TRUE
bool tosp_only_use_xht_gaps = FALSE
bool tosp_rule_9_test_punct = FALSE
bool tosp_flip_fuzz_kn_to_sp = TRUE
bool tosp_flip_fuzz_sp_to_kn = TRUE
bool tosp_improve_thresh = FALSE
int tosp_debug_level = 0
int tosp_enough_space_samples_for_median = 3
int tosp_redo_kern_limit = 10
int tosp_few_samples = 40
int tosp_short_row = 20
int tosp_sanity_method = 1
double tosp_threshold_bias1 = 0
double tosp_threshold_bias2 = 0
double tosp_narrow_fraction = 0.3
double tosp_narrow_aspect_ratio = 0.48
double tosp_wide_fraction = 0.52
double tosp_wide_aspect_ratio = 0.0
double tosp_fuzzy_space_factor = 0.6
double tosp_fuzzy_space_factor1 = 0.5
double tosp_fuzzy_space_factor2 = 0.72
double tosp_gap_factor = 0.83
double tosp_kern_gap_factor1 = 2.0
double tosp_kern_gap_factor2 = 1.3
double tosp_kern_gap_factor3 = 2.5
double tosp_ignore_big_gaps = -1
double tosp_ignore_very_big_gaps = 3.5
double tosp_rep_space = 1.6
double tosp_enough_small_gaps = 0.65
double tosp_table_kn_sp_ratio = 2.25
double tosp_table_xht_sp_ratio = 0.33
double tosp_table_fuzzy_kn_sp_ratio = 3.0
double tosp_fuzzy_kn_fraction = 0.5
double tosp_fuzzy_sp_fraction = 0.5
double tosp_min_sane_kn_sp = 1.5
double tosp_init_guess_kn_mult = 2.2
double tosp_init_guess_xht_mult = 0.28
double tosp_max_sane_kn_thresh = 5.0
double tosp_flip_caution = 0.0
double tosp_large_kerning = 0.19
double tosp_dont_fool_with_small_kerns = -1
double tosp_near_lh_edge = 0
double tosp_silly_kn_sp_gap = 0.2
double tosp_pass_wide_fuzz_sp_to_context = 0.75

Define Documentation

#define MAXSPACING   128

Function Documentation

void block_spacing_stats ( TO_BLOCK block,
GAPMAP gapmap,
BOOL8 old_text_ord_proportional,
inT16 block_space_gap_width,
inT16 block_non_space_gap_width 
)
float find_mean_blob_spacing ( WERD word  ) 
BOOL8 ignore_big_gap ( TO_ROW row,
inT32  row_length,
GAPMAP gapmap,
inT16  left,
inT16  right 
)
void improve_row_threshold ( TO_ROW row,
STATS all_gap_stats 
)
BOOL8 isolated_row_stats ( TO_ROW row,
GAPMAP gapmap,
STATS all_gap_stats,
BOOL8  suspected_table,
inT16  block_idx,
inT16  row_idx 
)
BOOL8 make_a_word_break ( TO_ROW row,
TBOX  blob_box,
inT16  prev_gap,
TBOX  prev_blob_box,
inT16  real_current_gap,
inT16  within_xht_current_gap,
TBOX  next_blob_box,
inT16  next_gap,
uinT8 blanks,
BOOL8 fuzzy_sp,
BOOL8 fuzzy_non 
)
ROW* make_blob_words ( TO_ROW row,
FCOORD  rotation 
)
ROW* make_prop_words ( TO_ROW row,
FCOORD  rotation 
)
void mark_gap ( TBOX  blob,
inT16  rule,
inT16  prev_gap,
inT16  prev_blob_width,
inT16  current_gap,
inT16  next_blob_width,
inT16  next_gap 
)
BOOL8 narrow_blob ( TO_ROW row,
TBOX  blob_box 
)
void old_to_method ( TO_ROW row,
STATS all_gap_stats,
STATS space_gap_stats,
STATS small_gap_stats,
inT16  block_space_gap_width,
inT16  block_non_space_gap_width 
)
void peek_at_next_gap ( TO_ROW row,
BLOBNBOX_IT  box_it,
TBOX next_blob_box,
inT16 next_gap,
inT16 next_within_xht_gap 
)
TBOX reduced_box_for_blob ( BLOBNBOX blob,
TO_ROW row,
inT16 left_above_xht 
)
TBOX reduced_box_next ( TO_ROW row,
BLOBNBOX_IT *  it 
)
void row_spacing_stats ( TO_ROW row,
GAPMAP gapmap,
inT16  block_idx,
inT16  row_idx,
inT16  block_space_gap_width,
inT16  block_non_space_gap_width 
)
inT16 stats_count_under ( STATS stats,
inT16  threshold 
)
BOOL8 suspected_punct_blob ( TO_ROW row,
TBOX  box 
)
void to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)
BOOL8 wide_blob ( TO_ROW row,
TBOX  blob_box 
)

Variable Documentation

"Pass ANY flip to context?"

"Only stat OBVIOUS spaces"

"Debug data"

"Limit use of xht gap with odd small kns"

double tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

"or should we use mean"

int tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

double tosp_flip_caution = 0.0

"Dont autoflip kn to sp when large separation"

"Default flip"

"Default flip"

"Force word breaks on punct to break long lines in non-space delimited langs"

double tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

"Dont restrict kn->sp fuzzy limit to tables"

double tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

"Fract of xheight for fuzz sp"

"Fract of xheight for fuzz sp"

double tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

double tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

double tosp_ignore_big_gaps = -1

"xht multiplier"

"xht multiplier"

"Enable improvement heuristic"

"Thresh guess - mult kn by this"

double tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

double tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

double tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

double tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

double tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

"Multiplier on kn to limit thresh"

double tosp_min_sane_kn_sp = 1.5

"Dont trust spaces less than this time kn"

double tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

"Only stat OBVIOUS spaces"

double tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

double tosp_near_lh_edge = 0

"Dont reduce box if the top left is non blank"

"Fix suspected bug in old code"

"Space stats use prechopping?"

"Better guess"

"Block stats to use fixed pitch rows?"

"Only use within xht gap for wd breaks"

"How wide fuzzies need context"

"Use row alone when inadequate cert spaces"

"No.samples reqd to reestimate for row"

double tosp_rep_space = 1.6

"rep gap multiplier for space"

"Only stat OBVIOUS spaces"

"Only stat OBVIOUS spaces"

"Dont chng kn to space next to punct"

"How to avoid being silly"

int tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

double tosp_silly_kn_sp_gap = 0.2

"Dont let sp minus kn get too small"

"Use within xht gap for wd breaks"

"Fuzzy if less than this"

double tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

double tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

"how far between kern and space?"

"how far between kern and space?"

"Space stats use prechopping?"

"Use within xht gap for wd breaks"

double tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

double tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Generated on Sun Jul 18 17:10:51 2010 for Tesseract by  doxygen 1.6.3