#include "mfcpch.h"
#include "ocrshell.h"
#include <string.h>
#include <ctype.h>
#include "mainblk.h"
#include "tfacep.h"
#include "tessvars.h"
#include "control.h"
#include "secname.h"
#include "reject.h"
#include "docqual.h"
#include "output.h"
#include "bestfirst.h"
#include "globals.h"
#include "tesseractclass.h"
Namespaces | |
namespace | tesseract |
Defines | |
#define | EXTERN |
#define | EPAPER_EXT ".ep" |
#define | PAGE_YSIZE 3508 |
#define | CTRL_INSET '\024' |
#define | CTRL_FONT '\016' |
#define | CTRL_DEFAULT '\017' |
#define | CTRL_SHIFT '\022' |
#define | CTRL_TAB '\011' |
#define | CTRL_NEWLINE '\012' |
#define | CTRL_HARDLINE '\015' |
Functions | |
EXTERN | BOOL_EVAR (tessedit_write_block_separators, FALSE,"Write block separators in output") |
EXTERN | BOOL_EVAR (tessedit_write_output, FALSE,"Write text to name.txt") |
EXTERN | BOOL_EVAR (tessedit_write_ratings, FALSE,"Return ratings in IPEOCRAPI data") |
EXTERN | BOOL_EVAR (tessedit_write_txt_map, FALSE,"Write .txt to .etx map file") |
EXTERN | BOOL_EVAR (tessedit_write_rep_codes, FALSE,"Write repetition char code") |
EXTERN | BOOL_EVAR (tessedit_write_unlv, FALSE,"Write .unlv output file") |
EXTERN | STRING_EVAR (unrecognised_char,"|","Output char for unidentified blobs") |
EXTERN | INT_EVAR (suspect_level, 99,"Suspect marker level") |
EXTERN | BOOL_EVAR (tessedit_minimal_rejection, FALSE,"Only reject tess failures") |
inT32 | pixels_to_pts (inT32 pixels, inT32 pix_res) |
inT16 | make_reject (TBOX *inset_box, inT16 prevright, inT16 nextleft, DENORM *denorm, char word_string[]) |
char | determine_newline_type (WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block) |
void | write_shm_text (WERD_RES *word, BLOCK *block, ROW_RES *row, const STRING &text, const STRING &text_lengths) |
void | ensure_rep_chars_are_consistent (WERD_RES *word) |
Variables | |
EXTERN bool | tessedit_write_raw_output = FALSE |
EXTERN int | suspect_space_level = 100 |
EXTERN int | suspect_short_words = 2 |
EXTERN bool | suspect_constrain_1Il = FALSE |
EXTERN double | suspect_rating_per_ch = 999.9 |
EXTERN double | suspect_accept_rating = -999.9 |
EXTERN bool | tessedit_zero_rejection = FALSE |
EXTERN bool | tessedit_word_for_word = FALSE |
EXTERN bool | tessedit_zero_kelvin_rejection = FALSE |
EXTERN bool | tessedit_consistent_reps = TRUE |
FILE * | txt_mapfile = NULL |
FILE * | unlv_file = NULL |
#define CTRL_DEFAULT '\017' |
#define CTRL_FONT '\016' |
#define CTRL_HARDLINE '\015' |
#define CTRL_INSET '\024' |
#define CTRL_NEWLINE '\012' |
#define CTRL_SHIFT '\022' |
#define CTRL_TAB '\011' |
#define EPAPER_EXT ".ep" |
#define EXTERN |
#define PAGE_YSIZE 3508 |
EXTERN BOOL_EVAR | ( | tessedit_minimal_rejection | , | |
FALSE | , | |||
"Only reject tess failures" | ||||
) |
EXTERN BOOL_EVAR | ( | tessedit_write_unlv | , | |
FALSE | , | |||
"Write .unlv output file" | ||||
) |
EXTERN BOOL_EVAR | ( | tessedit_write_rep_codes | , | |
FALSE | , | |||
"Write repetition char code" | ||||
) |
EXTERN BOOL_EVAR | ( | tessedit_write_txt_map | , | |
FALSE | , | |||
"Write .txt to .etx map file" | ||||
) |
EXTERN BOOL_EVAR | ( | tessedit_write_ratings | , | |
FALSE | , | |||
"Return ratings in IPEOCRAPI data" | ||||
) |
EXTERN BOOL_EVAR | ( | tessedit_write_output | , | |
FALSE | , | |||
"Write text to name.txt" | ||||
) |
EXTERN BOOL_EVAR | ( | tessedit_write_block_separators | , | |
FALSE | , | |||
"Write block separators in output" | ||||
) |
test line ends
word | word to do | |
block | current block | |
next_word | next word | |
next_block | block of next word |
void ensure_rep_chars_are_consistent | ( | WERD_RES * | word | ) |
EXTERN INT_EVAR | ( | suspect_level | , | |
99 | , | |||
"Suspect marker level" | ||||
) |
inT16 make_reject | ( | TBOX * | inset_box, | |
inT16 | prevright, | |||
inT16 | nextleft, | |||
DENORM * | denorm, | |||
char | word_string[] | |||
) |
make reject code
inset_box | bounding box | |
prevright | previous char | |
nextleft | next char | |
denorm | de-normalizer | |
word_string | output string |
EXTERN STRING_EVAR | ( | unrecognised_char | , | |
"|" | , | |||
"Output char for unidentified blobs" | ||||
) |
void write_shm_text | ( | WERD_RES * | word, | |
BLOCK * | block, | |||
ROW_RES * | row, | |||
const STRING & | text, | |||
const STRING & | text_lengths | |||
) |
write output
word | word to do | |
block | block it is from | |
row | row it is from | |
text | text to write |
EXTERN double suspect_accept_rating = -999.9 |
"Accept good rating limit"
EXTERN bool suspect_constrain_1Il = FALSE |
"UNLV keep 1Il chars rejected"
EXTERN double suspect_rating_per_ch = 999.9 |
"Dont touch bad rating limit"
EXTERN int suspect_short_words = 2 |
"Dont Suspect dict wds longer than this"
EXTERN int suspect_space_level = 100 |
"Min suspect level for rejecting spaces"
EXTERN bool tessedit_consistent_reps = TRUE |
"Force all rep chars the same"
EXTERN bool tessedit_word_for_word = FALSE |
"Make output have exactly one word per WERD"
EXTERN bool tessedit_write_raw_output = FALSE |
"Write raw stuff to name.raw"
EXTERN bool tessedit_zero_kelvin_rejection = FALSE |
"Dont reject ANYTHING AT ALL"
EXTERN bool tessedit_zero_rejection = FALSE |
"Dont reject ANYTHING"
FILE* txt_mapfile = NULL |
FILE* unlv_file = NULL |