#include <tesseractclass.h>
Public Member Functions | |
Tesseract () | |
~Tesseract () | |
void | Clear () |
const FCOORD & | reskew () const |
Pix ** | mutable_pix_binary () |
Pix * | pix_binary () const |
void | SetBlackAndWhitelist () |
int | SegmentPage (const STRING *input_file, IMAGE *image, BLOCK_LIST *blocks) |
int | AutoPageSeg (int width, int height, int resolution, bool single_column, IMAGE *image, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) |
void | recog_all_words (PAGE_RES *page_res, volatile ETEXT_DESC *monitor, TBOX *target_word_box=0L, inT16 dopasses=0) |
void | classify_word_pass1 (WERD_RES *word, ROW *row, BLOCK *block, BOOL8 cluster_adapt, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) |
void | recog_pseudo_word (BLOCK_LIST *block_list, TBOX &selection_box) |
C_BLOB_LIST * | get_blobs_from_blocks (BLOCK_LIST *blocks) |
void | train_word_level_with_boxes (const STRING &box_file, const STRING &out_file, BLOCK_LIST *blocks) |
void | fix_rep_char (WERD_RES *word) |
void | fix_quotes (WERD_CHOICE *choice, WERD *word, BLOB_CHOICE_LIST_CLIST *blob_choices) |
ACCEPTABLE_WERD_TYPE | acceptable_word_string (const char *s, const char *lengths) |
void | match_word_pass2 (WERD_RES *word, ROW *row, BLOCK *block, float x_height) |
void | classify_word_pass2 (WERD_RES *word, BLOCK *block, ROW *row) |
BOOL8 | recog_interactive (BLOCK *block, ROW *row, WERD *word) |
void | fix_hyphens (WERD_CHOICE *choice, WERD *word, BLOB_CHOICE_LIST_CLIST *blob_choices) |
void | set_word_fonts (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) |
void | font_recognition_pass (PAGE_RES_IT &page_res_it) |
void | output_pass (PAGE_RES_IT &page_res_it, BOOL8 write_to_shm, TBOX *target_word_box) |
FILE * | open_outfile (const char *extension) |
void | write_results (PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol, BOOL8 write_to_shm) |
void | set_unlv_suspects (WERD_RES *word) |
UNICHAR_ID | get_rep_char (WERD_RES *word) |
BOOL8 | acceptable_number_string (const char *s, const char *lengths) |
inT16 | count_alphanums (const WERD_CHOICE &word) |
inT16 | count_alphas (const WERD_CHOICE &word) |
void | read_config_file (const char *filename, bool global_only) |
int | init_tesseract (const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only) |
int | init_tesseract_lm (const char *arg0, const char *textbase, const char *language) |
int | init_tesseract_classifier (const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only) |
void | recognize_page (STRING &image_name) |
void | end_tesseract () |
bool | init_tesseract_lang_data (const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only) |
SVMenuNode * | build_menu_new () |
void | pgeditor_main (BLOCK_LIST *blocks) |
void | process_image_event (const SVEvent &event) |
void | pgeditor_read_file (STRING &filename, BLOCK_LIST *blocks) |
void | do_new_source () |
BOOL8 | process_cmd_win_event (inT32 cmd_event, char *new_value) |
const char * | char_ambiguities (char c) |
void | make_reject_map (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices, ROW *row, inT16 pass) |
BOOL8 | one_ell_conflict (WERD_RES *word_res, BOOL8 update_map) |
inT16 | first_alphanum_index (const char *word, const char *word_lengths) |
inT16 | first_alphanum_offset (const char *word, const char *word_lengths) |
inT16 | alpha_count (const char *word, const char *word_lengths) |
BOOL8 | word_contains_non_1_digit (const char *word, const char *word_lengths) |
void | dont_allow_1Il (WERD_RES *word) |
inT16 | count_alphanums (WERD_RES *word) |
BOOL8 | repeated_ch_string (const char *rep_ch_str, const char *lengths) |
void | flip_0O (WERD_RES *word) |
BOOL8 | non_0_digit (UNICHAR_ID unichar_id) |
BOOL8 | non_O_upper (UNICHAR_ID unichar_id) |
BOOL8 | repeated_nonalphanum_wd (WERD_RES *word, ROW *row) |
void | nn_match_word (WERD_RES *word, ROW *row) |
void | nn_recover_rejects (WERD_RES *word, ROW *row) |
BOOL8 | test_ambig_word (WERD_RES *word) |
void | set_done (WERD_RES *word, inT16 pass) |
inT16 | safe_dict_word (const WERD_CHOICE &word) |
void | flip_hyphens (WERD_RES *word) |
void | adapt_to_good_ems (WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) |
void | adapt_to_good_samples (WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) |
BOOL8 | word_adaptable (WERD_RES *word, uinT16 mode) |
void | reject_suspect_ems (WERD_RES *word) |
void | collect_ems_for_adaption (WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) |
void | collect_characters_for_adaption (WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) |
void | check_wait_list (CHAR_SAMPLE_LIST *chars_waiting, CHAR_SAMPLE *sample, CHAR_SAMPLES *best_cluster) |
void | cluster_sample (CHAR_SAMPLE *sample, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) |
void | complete_clustering (CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) |
WERD_CHOICE * | recog_word_recursive (WERD *word, DENORM *denorm, POLY_MATCHER matcher, POLY_TESTER tester, POLY_TESTER trainer, BOOL8 testing, WERD_CHOICE *&raw_choice, BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword) |
WERD_CHOICE * | recog_word (WERD *word, DENORM *denorm, POLY_MATCHER matcher, POLY_TESTER tester, POLY_TESTER trainer, BOOL8 testing, WERD_CHOICE *&raw_choice, BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword) |
WERD_CHOICE * | split_and_recog_word (WERD *word, DENORM *denorm, POLY_MATCHER matcher, POLY_TESTER tester, POLY_TESTER trainer, BOOL8 testing, WERD_CHOICE *&raw_choice, BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword) |
BOOL8 | digit_or_numeric_punct (WERD_RES *word, int char_position) |
inT16 | eval_word_spacing (WERD_RES_LIST &word_res_list) |
void | match_current_words (WERD_RES_LIST &words, ROW *row, BLOCK *block) |
inT16 | fp_eval_word_spacing (WERD_RES_LIST &word_res_list) |
void | fix_noisy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) |
void | fix_fuzzy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) |
void | fix_sp_fp_word (WERD_RES_IT &word_res_it, ROW *row, BLOCK *block) |
void | fix_fuzzy_spaces (volatile ETEXT_DESC *monitor, inT32 word_count, PAGE_RES *page_res) |
GARBAGE_LEVEL | garbage_word (WERD_RES *word, BOOL8 ok_dict_word) |
BOOL8 | potential_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level, BOOL8 ok_dict_word) |
void | tilde_crunch (PAGE_RES_IT &page_res_it) |
void | unrej_good_quality_words (PAGE_RES_IT &page_res_it) |
void | doc_and_block_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc) |
void | quality_based_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc) |
void | convert_bad_unlv_chs (WERD_RES *word_res) |
void | merge_tess_fails (WERD_RES *word_res) |
void | tilde_delete (PAGE_RES_IT &page_res_it) |
void | insert_rej_cblobs (WERD_RES *word) |
void | process_selected_words (BLOCK_LIST *block_list, TBOX &selection_box, BOOL8(tesseract::Tesseract::*word_processor)(BLOCK *, ROW *, WERD *)) |
void | tess_add_doc_word (WERD_CHOICE *word_choice) |
void | tess_adapter (WERD *word, DENORM *denorm, const WERD_CHOICE &choice, const WERD_CHOICE &raw_choice, const char *rejmap) |
WERD_CHOICE * | test_segment_pass2 (WERD *word, DENORM *denorm, POLY_MATCHER matcher, POLY_TESTER tester, WERD_CHOICE *&raw_choice, BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword) |
WERD_CHOICE * | tess_segment_pass1 (WERD *word, DENORM *denorm, POLY_MATCHER matcher, WERD_CHOICE *&raw_choice, BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword) |
WERD_CHOICE * | tess_segment_pass2 (WERD *word, DENORM *denorm, POLY_MATCHER matcher, WERD_CHOICE *&raw_choice, BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword) |
WERD_CHOICE * | correct_segment_pass2 (WERD *word, DENORM *denorm, POLY_MATCHER matcher, POLY_TESTER tester, WERD_CHOICE *&raw_choice, BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword) |
void | tess_default_matcher (PBLOB *pblob, PBLOB *blob, PBLOB *nblob, WERD *word, DENORM *denorm, BLOB_CHOICE_LIST *ratings, const char *script) |
void | tess_bn_matcher (PBLOB *pblob, PBLOB *blob, PBLOB *nblob, WERD *word, DENORM *denorm, BLOB_CHOICE_LIST *ratings) |
void | tess_cn_matcher (PBLOB *pblob, PBLOB *blob, PBLOB *nblob, WERD *word, DENORM *denorm, BLOB_CHOICE_LIST *ratings, CLASS_PRUNER_RESULTS cpresults) |
BOOL8 | tess_adaptable_word (WERD *word, WERD_CHOICE *word_choice, WERD_CHOICE *raw_choice) |
BOOL8 | tess_acceptable_word (WERD_CHOICE *word_choice, WERD_CHOICE *raw_choice) |
void | apply_box_testing (BLOCK_LIST *block_list) |
void | apply_boxes (const STRING &fname, BLOCK_LIST *block_list) |
int | Boxes2BlockList (int box_cnt, TBOX *boxes, BLOCK_LIST *block_list, bool right2left) |
float | compare_tess_blobs (TBLOB *blob1, TEXTROW *row1, TBLOB *blob2, TEXTROW *row2) |
float | compare_bln_blobs (PBLOB *blob1, DENORM *denorm1, PBLOB *blob2, DENORM *denorm2) |
float | compare_blobs (PBLOB *blob1, ROW *row1, PBLOB *blob2, ROW *row2) |
BOOL8 | compare_blob_pairs (BLOCK *, ROW *row, WERD *, PBLOB *blob) |
void | check_block_occ (WERD_RES *word_res) |
FILE * | init_ambigs_training (const STRING &fname) |
void | ambigs_training_segmented (const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) |
void | ambigs_classify_and_output (PAGE_RES_IT *page_res_it, const char *label, FILE *output_file) |
Public Attributes | |
bool | tessedit_resegment_from_boxes = false |
bool | tessedit_train_from_boxes = false |
bool | tessedit_dump_pageseg_images = false |
int | tessedit_pageseg_mode = 2 |
int | tessedit_accuracyvspeed = 0 |
bool | tessedit_train_from_boxes_word_level = false |
char * | tessedit_char_blacklist = "" |
char * | tessedit_char_whitelist = "" |
bool | global_tessedit_ambigs_training = false |
tesseract::Tesseract::Tesseract | ( | ) |
tesseract::Tesseract::~Tesseract | ( | ) |
BOOL8 tesseract::Tesseract::acceptable_number_string | ( | const char * | s, | |
const char * | lengths | |||
) |
ACCEPTABLE_WERD_TYPE tesseract::Tesseract::acceptable_word_string | ( | const char * | s, | |
const char * | lengths | |||
) |
void tesseract::Tesseract::adapt_to_good_ems | ( | WERD_RES * | word, | |
CHAR_SAMPLES_LIST * | char_clusters, | |||
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
void tesseract::Tesseract::adapt_to_good_samples | ( | WERD_RES * | word, | |
CHAR_SAMPLES_LIST * | char_clusters, | |||
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
inT16 tesseract::Tesseract::alpha_count | ( | const char * | word, | |
const char * | word_lengths | |||
) |
void tesseract::Tesseract::ambigs_classify_and_output | ( | PAGE_RES_IT * | page_res_it, | |
const char * | label, | |||
FILE * | output_file | |||
) |
void tesseract::Tesseract::ambigs_training_segmented | ( | const STRING & | fname, | |
PAGE_RES * | page_res, | |||
volatile ETEXT_DESC * | monitor, | |||
FILE * | output_file | |||
) |
void tesseract::Tesseract::apply_box_testing | ( | BLOCK_LIST * | block_list | ) |
void tesseract::Tesseract::apply_boxes | ( | const STRING & | fname, | |
BLOCK_LIST * | block_list | |||
) |
int tesseract::Tesseract::AutoPageSeg | ( | int | width, | |
int | height, | |||
int | resolution, | |||
bool | single_column, | |||
IMAGE * | image, | |||
BLOCK_LIST * | blocks, | |||
TO_BLOCK_LIST * | to_blocks | |||
) |
int tesseract::Tesseract::Boxes2BlockList | ( | int | box_cnt, | |
TBOX * | boxes, | |||
BLOCK_LIST * | block_list, | |||
bool | right2left | |||
) |
SVMenuNode * tesseract::Tesseract::build_menu_new | ( | ) |
const char * tesseract::Tesseract::char_ambiguities | ( | char | c | ) |
void tesseract::Tesseract::check_block_occ | ( | WERD_RES * | word_res | ) |
void tesseract::Tesseract::check_wait_list | ( | CHAR_SAMPLE_LIST * | chars_waiting, | |
CHAR_SAMPLE * | sample, | |||
CHAR_SAMPLES * | best_cluster | |||
) |
void tesseract::Tesseract::classify_word_pass1 | ( | WERD_RES * | word, | |
ROW * | row, | |||
BLOCK * | block, | |||
BOOL8 | cluster_adapt, | |||
CHAR_SAMPLES_LIST * | char_clusters, | |||
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
void tesseract::Tesseract::Clear | ( | ) |
void tesseract::Tesseract::cluster_sample | ( | CHAR_SAMPLE * | sample, | |
CHAR_SAMPLES_LIST * | char_clusters, | |||
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
void tesseract::Tesseract::collect_characters_for_adaption | ( | WERD_RES * | word, | |
CHAR_SAMPLES_LIST * | char_clusters, | |||
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
void tesseract::Tesseract::collect_ems_for_adaption | ( | WERD_RES * | word, | |
CHAR_SAMPLES_LIST * | char_clusters, | |||
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
float tesseract::Tesseract::compare_bln_blobs | ( | PBLOB * | blob1, | |
DENORM * | denorm1, | |||
PBLOB * | blob2, | |||
DENORM * | denorm2 | |||
) |
float tesseract::Tesseract::compare_tess_blobs | ( | TBLOB * | blob1, | |
TEXTROW * | row1, | |||
TBLOB * | blob2, | |||
TEXTROW * | row2 | |||
) |
void tesseract::Tesseract::complete_clustering | ( | CHAR_SAMPLES_LIST * | char_clusters, | |
CHAR_SAMPLE_LIST * | chars_waiting | |||
) |
void tesseract::Tesseract::convert_bad_unlv_chs | ( | WERD_RES * | word_res | ) |
WERD_CHOICE * tesseract::Tesseract::correct_segment_pass2 | ( | WERD * | word, | |
DENORM * | denorm, | |||
POLY_MATCHER | matcher, | |||
POLY_TESTER | tester, | |||
WERD_CHOICE *& | raw_choice, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
WERD *& | outword | |||
) |
inT16 tesseract::Tesseract::count_alphanums | ( | const WERD_CHOICE & | word | ) |
inT16 tesseract::Tesseract::count_alphas | ( | const WERD_CHOICE & | word | ) |
void tesseract::Tesseract::do_new_source | ( | ) |
void tesseract::Tesseract::doc_and_block_rejection | ( | PAGE_RES_IT & | page_res_it, | |
BOOL8 | good_quality_doc | |||
) |
void tesseract::Tesseract::dont_allow_1Il | ( | WERD_RES * | word | ) |
void tesseract::Tesseract::end_tesseract | ( | ) |
inT16 tesseract::Tesseract::eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
inT16 tesseract::Tesseract::first_alphanum_index | ( | const char * | word, | |
const char * | word_lengths | |||
) |
inT16 tesseract::Tesseract::first_alphanum_offset | ( | const char * | word, | |
const char * | word_lengths | |||
) |
void tesseract::Tesseract::fix_fuzzy_space_list | ( | WERD_RES_LIST & | best_perm, | |
ROW * | row, | |||
BLOCK * | block | |||
) |
void tesseract::Tesseract::fix_fuzzy_spaces | ( | volatile ETEXT_DESC * | monitor, | |
inT32 | word_count, | |||
PAGE_RES * | page_res | |||
) |
void tesseract::Tesseract::fix_hyphens | ( | WERD_CHOICE * | choice, | |
WERD * | word, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices | |||
) |
void tesseract::Tesseract::fix_noisy_space_list | ( | WERD_RES_LIST & | best_perm, | |
ROW * | row, | |||
BLOCK * | block | |||
) |
void tesseract::Tesseract::fix_quotes | ( | WERD_CHOICE * | choice, | |
WERD * | word, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices | |||
) |
void tesseract::Tesseract::fix_rep_char | ( | WERD_RES * | word | ) |
void tesseract::Tesseract::flip_0O | ( | WERD_RES * | word | ) |
void tesseract::Tesseract::flip_hyphens | ( | WERD_RES * | word | ) |
void tesseract::Tesseract::font_recognition_pass | ( | PAGE_RES_IT & | page_res_it | ) |
inT16 tesseract::Tesseract::fp_eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
GARBAGE_LEVEL tesseract::Tesseract::garbage_word | ( | WERD_RES * | word, | |
BOOL8 | ok_dict_word | |||
) |
C_BLOB_LIST* tesseract::Tesseract::get_blobs_from_blocks | ( | BLOCK_LIST * | blocks | ) |
UNICHAR_ID tesseract::Tesseract::get_rep_char | ( | WERD_RES * | word | ) |
FILE * tesseract::Tesseract::init_ambigs_training | ( | const STRING & | fname | ) |
int tesseract::Tesseract::init_tesseract | ( | const char * | arg0, | |
const char * | textbase, | |||
const char * | language, | |||
char ** | configs, | |||
int | configs_size, | |||
bool | configs_global_only | |||
) |
int tesseract::Tesseract::init_tesseract_classifier | ( | const char * | arg0, | |
const char * | textbase, | |||
const char * | language, | |||
char ** | configs, | |||
int | configs_size, | |||
bool | configs_global_only | |||
) |
bool tesseract::Tesseract::init_tesseract_lang_data | ( | const char * | arg0, | |
const char * | textbase, | |||
const char * | language, | |||
char ** | configs, | |||
int | configs_size, | |||
bool | configs_global_only | |||
) |
int tesseract::Tesseract::init_tesseract_lm | ( | const char * | arg0, | |
const char * | textbase, | |||
const char * | language | |||
) |
void tesseract::Tesseract::insert_rej_cblobs | ( | WERD_RES * | word | ) |
void tesseract::Tesseract::make_reject_map | ( | WERD_RES * | word, | |
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
ROW * | row, | |||
inT16 | pass | |||
) |
void tesseract::Tesseract::match_word_pass2 | ( | WERD_RES * | word, | |
ROW * | row, | |||
BLOCK * | block, | |||
float | x_height | |||
) |
void tesseract::Tesseract::merge_tess_fails | ( | WERD_RES * | word_res | ) |
Pix** tesseract::Tesseract::mutable_pix_binary | ( | ) | [inline] |
BOOL8 tesseract::Tesseract::non_0_digit | ( | UNICHAR_ID | unichar_id | ) |
BOOL8 tesseract::Tesseract::non_O_upper | ( | UNICHAR_ID | unichar_id | ) |
FILE * tesseract::Tesseract::open_outfile | ( | const char * | extension | ) |
void tesseract::Tesseract::output_pass | ( | PAGE_RES_IT & | page_res_it, | |
BOOL8 | write_to_shm, | |||
TBOX * | target_word_box | |||
) |
void tesseract::Tesseract::pgeditor_main | ( | BLOCK_LIST * | blocks | ) |
void tesseract::Tesseract::pgeditor_read_file | ( | STRING & | filename, | |
BLOCK_LIST * | blocks | |||
) |
Pix* tesseract::Tesseract::pix_binary | ( | ) | const [inline] |
BOOL8 tesseract::Tesseract::potential_word_crunch | ( | WERD_RES * | word, | |
GARBAGE_LEVEL | garbage_level, | |||
BOOL8 | ok_dict_word | |||
) |
void tesseract::Tesseract::process_image_event | ( | const SVEvent & | event | ) |
void tesseract::Tesseract::process_selected_words | ( | BLOCK_LIST * | block_list, | |
TBOX & | selection_box, | |||
BOOL8(tesseract::Tesseract::*)(BLOCK *, ROW *, WERD *) | word_processor | |||
) |
void tesseract::Tesseract::quality_based_rejection | ( | PAGE_RES_IT & | page_res_it, | |
BOOL8 | good_quality_doc | |||
) |
void tesseract::Tesseract::read_config_file | ( | const char * | filename, | |
bool | global_only | |||
) |
void tesseract::Tesseract::recog_all_words | ( | PAGE_RES * | page_res, | |
volatile ETEXT_DESC * | monitor, | |||
TBOX * | target_word_box = 0L , |
|||
inT16 | dopasses = 0 | |||
) |
void tesseract::Tesseract::recog_pseudo_word | ( | BLOCK_LIST * | block_list, | |
TBOX & | selection_box | |||
) |
WERD_CHOICE * tesseract::Tesseract::recog_word | ( | WERD * | word, | |
DENORM * | denorm, | |||
POLY_MATCHER | matcher, | |||
POLY_TESTER | tester, | |||
POLY_TESTER | trainer, | |||
BOOL8 | testing, | |||
WERD_CHOICE *& | raw_choice, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
WERD *& | outword | |||
) |
WERD_CHOICE * tesseract::Tesseract::recog_word_recursive | ( | WERD * | word, | |
DENORM * | denorm, | |||
POLY_MATCHER | matcher, | |||
POLY_TESTER | tester, | |||
POLY_TESTER | trainer, | |||
BOOL8 | testing, | |||
WERD_CHOICE *& | raw_choice, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
WERD *& | outword | |||
) |
void tesseract::Tesseract::recognize_page | ( | STRING & | image_name | ) |
void tesseract::Tesseract::reject_suspect_ems | ( | WERD_RES * | word | ) |
BOOL8 tesseract::Tesseract::repeated_ch_string | ( | const char * | rep_ch_str, | |
const char * | lengths | |||
) |
const FCOORD& tesseract::Tesseract::reskew | ( | ) | const [inline] |
inT16 tesseract::Tesseract::safe_dict_word | ( | const WERD_CHOICE & | word | ) |
int tesseract::Tesseract::SegmentPage | ( | const STRING * | input_file, | |
IMAGE * | image, | |||
BLOCK_LIST * | blocks | |||
) |
void tesseract::Tesseract::set_unlv_suspects | ( | WERD_RES * | word | ) |
void tesseract::Tesseract::set_word_fonts | ( | WERD_RES * | word, | |
BLOB_CHOICE_LIST_CLIST * | blob_choices | |||
) |
void tesseract::Tesseract::SetBlackAndWhitelist | ( | ) |
WERD_CHOICE * tesseract::Tesseract::split_and_recog_word | ( | WERD * | word, | |
DENORM * | denorm, | |||
POLY_MATCHER | matcher, | |||
POLY_TESTER | tester, | |||
POLY_TESTER | trainer, | |||
BOOL8 | testing, | |||
WERD_CHOICE *& | raw_choice, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
WERD *& | outword | |||
) |
BOOL8 tesseract::Tesseract::tess_acceptable_word | ( | WERD_CHOICE * | word_choice, | |
WERD_CHOICE * | raw_choice | |||
) |
BOOL8 tesseract::Tesseract::tess_adaptable_word | ( | WERD * | word, | |
WERD_CHOICE * | word_choice, | |||
WERD_CHOICE * | raw_choice | |||
) |
void tesseract::Tesseract::tess_adapter | ( | WERD * | word, | |
DENORM * | denorm, | |||
const WERD_CHOICE & | choice, | |||
const WERD_CHOICE & | raw_choice, | |||
const char * | rejmap | |||
) |
void tesseract::Tesseract::tess_add_doc_word | ( | WERD_CHOICE * | word_choice | ) |
void tesseract::Tesseract::tess_bn_matcher | ( | PBLOB * | pblob, | |
PBLOB * | blob, | |||
PBLOB * | nblob, | |||
WERD * | word, | |||
DENORM * | denorm, | |||
BLOB_CHOICE_LIST * | ratings | |||
) |
void tesseract::Tesseract::tess_cn_matcher | ( | PBLOB * | pblob, | |
PBLOB * | blob, | |||
PBLOB * | nblob, | |||
WERD * | word, | |||
DENORM * | denorm, | |||
BLOB_CHOICE_LIST * | ratings, | |||
CLASS_PRUNER_RESULTS | cpresults | |||
) |
void tesseract::Tesseract::tess_default_matcher | ( | PBLOB * | pblob, | |
PBLOB * | blob, | |||
PBLOB * | nblob, | |||
WERD * | word, | |||
DENORM * | denorm, | |||
BLOB_CHOICE_LIST * | ratings, | |||
const char * | script | |||
) |
WERD_CHOICE * tesseract::Tesseract::tess_segment_pass1 | ( | WERD * | word, | |
DENORM * | denorm, | |||
POLY_MATCHER | matcher, | |||
WERD_CHOICE *& | raw_choice, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
WERD *& | outword | |||
) |
WERD_CHOICE * tesseract::Tesseract::tess_segment_pass2 | ( | WERD * | word, | |
DENORM * | denorm, | |||
POLY_MATCHER | matcher, | |||
WERD_CHOICE *& | raw_choice, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
WERD *& | outword | |||
) |
WERD_CHOICE * tesseract::Tesseract::test_segment_pass2 | ( | WERD * | word, | |
DENORM * | denorm, | |||
POLY_MATCHER | matcher, | |||
POLY_TESTER | tester, | |||
WERD_CHOICE *& | raw_choice, | |||
BLOB_CHOICE_LIST_CLIST * | blob_choices, | |||
WERD *& | outword | |||
) |
void tesseract::Tesseract::tilde_crunch | ( | PAGE_RES_IT & | page_res_it | ) |
void tesseract::Tesseract::tilde_delete | ( | PAGE_RES_IT & | page_res_it | ) |
void tesseract::Tesseract::train_word_level_with_boxes | ( | const STRING & | box_file, | |
const STRING & | out_file, | |||
BLOCK_LIST * | blocks | |||
) |
void tesseract::Tesseract::unrej_good_quality_words | ( | PAGE_RES_IT & | page_res_it | ) |
BOOL8 tesseract::Tesseract::word_contains_non_1_digit | ( | const char * | word, | |
const char * | word_lengths | |||
) |
void tesseract::Tesseract::write_results | ( | PAGE_RES_IT & | page_res_it, | |
char | newline_type, | |||
BOOL8 | force_eol, | |||
BOOL8 | write_to_shm | |||
) |
"Perform training for ambiguities"
"Accuracy V Speed tradeoff: 0 fastest, 100 most accurate" " (Values from AccuracyVSpeed enum in baseapi.h)"
"Blacklist of chars not to recognize"
"Whitelist of chars to recognize"
"Dump itermediate images made during page segmentation"
"Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char" " (Values from PageSegMode enum in baseapi.h)"
"Take segmentation and labeling from box file"
"Generate training data from boxed chars"
"Generate training data from boxed chars at word level."