#include "mfcpch.h"
#include "applybox.h"
#include <ctype.h>
#include <string.h>
#include "boxread.h"
#include "control.h"
#include "genblob.h"
#include "globals.h"
#include "fixxht.h"
#include "varable.h"
#include "statistc.h"
#include "pageres.h"
#include "notdll.h"
#include "mainblk.h"
#include "matchdefs.h"
#include "secname.h"
#include "tessbox.h"
#include "unichar.h"
#include "unicharset.h"
#include "tesseractclass.h"
Namespaces | |
namespace | tesseract |
Defines | |
#define | SECURE_NAMES |
#define | EXTERN |
Functions | |
void | clear_any_old_text (BLOCK_LIST *block_list) |
UNICHAR_ID | register_char (const char *uch) |
BOOL8 | read_next_box (int page, FILE *box_file, TBOX *box, UNICHAR_ID *uch_id) |
ROW * | find_row_of_box (BLOCK_LIST *block_list, const TBOX &box, inT16 &block_id, inT16 &row_id_to_process) |
inT16 | resegment_box (ROW *row, TBOX &box, UNICHAR_ID uch_id, inT16 block_id, inT16 row_id, inT16 boxfile_lineno, inT16 boxfile_charno, inT16 *tgt_char_counts, bool learn_char_fragments, bool learning) |
void | tidy_up (BLOCK_LIST *block_list, inT16 &ok_char_count, inT16 &ok_row_count, inT16 &unlabelled_words, inT16 *tgt_char_counts, inT16 &rebalance_count, UNICHAR_ID *min_uch_id, inT16 &min_samples, inT16 &final_labelled_blob_count, bool learn_character_fragments, bool learning) |
void | report_failed_box (inT16 boxfile_lineno, inT16 boxfile_charno, TBOX box, const char *box_ch, const char *err_msg) |
void | apply_box_training (const STRING &filename, BLOCK_LIST *block_list) |
Variables | |
EXTERN bool | applybox_rebalance = TRUE |
EXTERN int | applybox_debug = 5 |
EXTERN int | applybox_page = 0 |
EXTERN char * | applybox_test_exclusions = "" |
EXTERN double | applybox_error_band = 0.15 |
EXTERN char * | exposure_pattern = ".exp" |
EXTERN bool | learn_chars_and_char_frags_mode = FALSE |
IMAGE | page_image |
#define EXTERN |
#define SECURE_NAMES |
void apply_box_training | ( | const STRING & | filename, | |
BLOCK_LIST * | block_list | |||
) |
void clear_any_old_text | ( | BLOCK_LIST * | block_list | ) |
remove correct text
block_list | real blocks |
ROW* find_row_of_box | ( | BLOCK_LIST * | block_list, | |
const TBOX & | box, | |||
inT16 & | block_id, | |||
inT16 & | row_id_to_process | |||
) |
block_list | real blocks | |
box | from boxfile |
BOOL8 read_next_box | ( | int | page, | |
FILE * | box_file, | |||
TBOX * | box, | |||
UNICHAR_ID * | uch_id | |||
) |
UNICHAR_ID register_char | ( | const char * | uch | ) |
Register uch with unicharset_boxes.
void report_failed_box | ( | inT16 | boxfile_lineno, | |
inT16 | boxfile_charno, | |||
TBOX | box, | |||
const char * | box_ch, | |||
const char * | err_msg | |||
) |
inT16 resegment_box | ( | ROW * | row, | |
TBOX & | box, | |||
UNICHAR_ID | uch_id, | |||
inT16 | block_id, | |||
inT16 | row_id, | |||
inT16 | boxfile_lineno, | |||
inT16 | boxfile_charno, | |||
inT16 * | tgt_char_counts, | |||
bool | learn_char_fragments, | |||
bool | learning | |||
) |
void tidy_up | ( | BLOCK_LIST * | block_list, | |
inT16 & | ok_char_count, | |||
inT16 & | ok_row_count, | |||
inT16 & | unlabelled_words, | |||
inT16 * | tgt_char_counts, | |||
inT16 & | rebalance_count, | |||
UNICHAR_ID * | min_uch_id, | |||
inT16 & | min_samples, | |||
inT16 & | final_labelled_blob_count, | |||
bool | learn_character_fragments, | |||
bool | learning | |||
) |
block_list | real blocks |
EXTERN int applybox_debug = 5 |
"Debug level"
EXTERN double applybox_error_band = 0.15 |
"Err band as fract of xht"
EXTERN int applybox_page = 0 |
"Page number to apply boxes from"
EXTERN bool applybox_rebalance = TRUE |
"Drop dead"
EXTERN char* applybox_test_exclusions = "" |
"Chars ignored for testing"
EXTERN char* exposure_pattern = ".exp" |
"Exposure value follows this pattern in the image" " filename. The name of the image files are expected" " to be in the form [lang].[fontname].exp[num].tif"
EXTERN bool learn_chars_and_char_frags_mode = FALSE |
"Learn both character fragments (as is done in the" " special low exposure mode) as well as unfragmented" " characters."