tesseract-doxygen/ccmain/applybox.cpp File Reference

#include "mfcpch.h"
#include "applybox.h"
#include <ctype.h>
#include <string.h>
#include "boxread.h"
#include "control.h"
#include "genblob.h"
#include "globals.h"
#include "fixxht.h"
#include "varable.h"
#include "statistc.h"
#include "pageres.h"
#include "notdll.h"
#include "mainblk.h"
#include "matchdefs.h"
#include "secname.h"
#include "tessbox.h"
#include "unichar.h"
#include "unicharset.h"
#include "tesseractclass.h"

Namespaces

namespace  tesseract

Defines

#define SECURE_NAMES
#define EXTERN

Functions

void clear_any_old_text (BLOCK_LIST *block_list)
UNICHAR_ID register_char (const char *uch)
BOOL8 read_next_box (int page, FILE *box_file, TBOX *box, UNICHAR_ID *uch_id)
ROWfind_row_of_box (BLOCK_LIST *block_list, const TBOX &box, inT16 &block_id, inT16 &row_id_to_process)
inT16 resegment_box (ROW *row, TBOX &box, UNICHAR_ID uch_id, inT16 block_id, inT16 row_id, inT16 boxfile_lineno, inT16 boxfile_charno, inT16 *tgt_char_counts, bool learn_char_fragments, bool learning)
void tidy_up (BLOCK_LIST *block_list, inT16 &ok_char_count, inT16 &ok_row_count, inT16 &unlabelled_words, inT16 *tgt_char_counts, inT16 &rebalance_count, UNICHAR_ID *min_uch_id, inT16 &min_samples, inT16 &final_labelled_blob_count, bool learn_character_fragments, bool learning)
void report_failed_box (inT16 boxfile_lineno, inT16 boxfile_charno, TBOX box, const char *box_ch, const char *err_msg)
void apply_box_training (const STRING &filename, BLOCK_LIST *block_list)

Variables

EXTERN bool applybox_rebalance = TRUE
EXTERN int applybox_debug = 5
EXTERN int applybox_page = 0
EXTERN char * applybox_test_exclusions = ""
EXTERN double applybox_error_band = 0.15
EXTERN char * exposure_pattern = ".exp"
EXTERN bool learn_chars_and_char_frags_mode = FALSE
IMAGE page_image

Define Documentation

#define EXTERN
#define SECURE_NAMES

Function Documentation

void apply_box_training ( const STRING filename,
BLOCK_LIST *  block_list 
)
void clear_any_old_text ( BLOCK_LIST *  block_list  ) 

remove correct text

Parameters:
block_list real blocks
ROW* find_row_of_box ( BLOCK_LIST *  block_list,
const TBOX box,
inT16 block_id,
inT16 row_id_to_process 
)
Parameters:
block_list real blocks
box from boxfile
BOOL8 read_next_box ( int  page,
FILE *  box_file,
TBOX box,
UNICHAR_ID uch_id 
)
UNICHAR_ID register_char ( const char *  uch  ) 

Register uch with unicharset_boxes.

void report_failed_box ( inT16  boxfile_lineno,
inT16  boxfile_charno,
TBOX  box,
const char *  box_ch,
const char *  err_msg 
)
inT16 resegment_box ( ROW row,
TBOX box,
UNICHAR_ID  uch_id,
inT16  block_id,
inT16  row_id,
inT16  boxfile_lineno,
inT16  boxfile_charno,
inT16 tgt_char_counts,
bool  learn_char_fragments,
bool  learning 
)
void tidy_up ( BLOCK_LIST *  block_list,
inT16 ok_char_count,
inT16 ok_row_count,
inT16 unlabelled_words,
inT16 tgt_char_counts,
inT16 rebalance_count,
UNICHAR_ID min_uch_id,
inT16 min_samples,
inT16 final_labelled_blob_count,
bool  learn_character_fragments,
bool  learning 
)
Parameters:
block_list real blocks

Variable Documentation

EXTERN int applybox_debug = 5

"Debug level"

EXTERN double applybox_error_band = 0.15

"Err band as fract of xht"

EXTERN int applybox_page = 0

"Page number to apply boxes from"

"Drop dead"

EXTERN char* applybox_test_exclusions = ""

"Chars ignored for testing"

EXTERN char* exposure_pattern = ".exp"

"Exposure value follows this pattern in the image" " filename. The name of the image files are expected" " to be in the form [lang].[fontname].exp[num].tif"

"Learn both character fragments (as is done in the" " special low exposure mode) as well as unfragmented" " characters."

Generated on Sun Jul 18 17:10:48 2010 for Tesseract by  doxygen 1.6.3