tesseract-doxygen/dict/permute.cpp File Reference

#include <assert.h>
#include <math.h>
#include "const.h"
#include "permute.h"
#include "callcpp.h"
#include "context.h"
#include "conversion.h"
#include "freelist.h"
#include "globals.h"
#include "ndminx.h"
#include "permdawg.h"
#include "permngram.h"
#include "ratngs.h"
#include "stopper.h"
#include "tordvars.h"
#include "tprintf.h"
#include "trie.h"
#include "varable.h"
#include "unicharset.h"
#include "dict.h"
#include "image.h"
#include "ccutil.h"

Namespaces

namespace  tesseract

Defines

#define MAX_NUM_EDGES   2000000
#define MAX_DOC_EDGES   250000
#define MAX_USER_EDGES   50000
#define NON_WERD   1.25
#define GARBAGE_STRING   1.5
#define MAX_PERM_LENGTH   128
#define SIM_CERTAINTY_SCALE   -10.0
#define SIM_CERTAINTY_OFFSET   -10.0
#define SIMILARITY_FLOOR   100.0

Functions

WERD_CHOICEget_best_delete_other (WERD_CHOICE *choice1, WERD_CHOICE *choice2)
int good_choice (const WERD_CHOICE &choice)
UNICHAR_ID tesseract::get_top_choice_uid (BLOB_CHOICE_LIST *blob_list)
int tesseract::get_top_word_script (const BLOB_CHOICE_LIST_VECTOR &char_choices, const UNICHARSET &unicharset)
BLOB_CHOICEtesseract::find_choice_by_type (BLOB_CHOICE_LIST *char_choices, char target_type, const UNICHARSET &unicharset)
BLOB_CHOICEtesseract::find_choice_by_script (BLOB_CHOICE_LIST *char_choices, int target_sid, int backup_sid, int secondary_sid)

Variables

int permutation_count
int fragments_debug = 0
bool segment_debug = 0
bool permute_debug = 0
double bestrate_pruning_factor = 2.0
bool permute_script_word = 0
bool segment_segcost_rating = 0
double segment_reward_script = 0.95
double segment_penalty_dict_nonword = 1.25
double segment_penalty_garbage = 1.5
bool save_doc_words = 0
bool doc_dict_enable = 1
bool ngram_permuter_activated = FALSE
char * global_user_words_suffix = ""
float wordseg_rating_adjust_factor
int permute_only_top = 0

Define Documentation

#define GARBAGE_STRING   1.5
#define MAX_DOC_EDGES   250000
#define MAX_NUM_EDGES   2000000
#define MAX_PERM_LENGTH   128
#define MAX_USER_EDGES   50000
#define NON_WERD   1.25
#define SIM_CERTAINTY_OFFSET   -10.0
#define SIM_CERTAINTY_SCALE   -10.0
#define SIMILARITY_FLOOR   100.0

Function Documentation

WERD_CHOICE* get_best_delete_other ( WERD_CHOICE choice1,
WERD_CHOICE choice2 
)
int good_choice ( const WERD_CHOICE choice  ) 

Variable Documentation

"Multiplying factor of current best rate to prune other hypotheses"

"Enable Document Dictionary "

int fragments_debug = 0

"Debug character fragments"

"A list of user-provided words."

"Activate character-level n-gram-based permuter"

"Debug char permutation process"

"Turn on word script consistency permuter"

"Save Document Words"

"Debug the whole segmentation process"

"Score multiplier for glyph fragment segmentations which do not " "match a dictionary word (lower is better)."

"Score multiplier for poorly cased strings that are not in the " "dictionary and generally look like garbage (lower is better)."

double segment_reward_script = 0.95

"Score multipler for script consistency within a word. " "Being a 'reward' factor, it should be <= 1. " "Smaller value implies bigger reward."

"incorporate segmentation cost in word rating?"

Generated on Sun Jul 18 17:10:51 2010 for Tesseract by  doxygen 1.6.3