#include <assert.h>
#include <math.h>
#include "const.h"
#include "permute.h"
#include "callcpp.h"
#include "context.h"
#include "conversion.h"
#include "freelist.h"
#include "globals.h"
#include "ndminx.h"
#include "permdawg.h"
#include "permngram.h"
#include "ratngs.h"
#include "stopper.h"
#include "tordvars.h"
#include "tprintf.h"
#include "trie.h"
#include "varable.h"
#include "unicharset.h"
#include "dict.h"
#include "image.h"
#include "ccutil.h"
#define GARBAGE_STRING 1.5 |
#define MAX_DOC_EDGES 250000 |
#define MAX_NUM_EDGES 2000000 |
#define MAX_PERM_LENGTH 128 |
#define MAX_USER_EDGES 50000 |
#define NON_WERD 1.25 |
#define SIM_CERTAINTY_OFFSET -10.0 |
#define SIM_CERTAINTY_SCALE -10.0 |
#define SIMILARITY_FLOOR 100.0 |
WERD_CHOICE* get_best_delete_other | ( | WERD_CHOICE * | choice1, | |
WERD_CHOICE * | choice2 | |||
) |
int good_choice | ( | const WERD_CHOICE & | choice | ) |
double bestrate_pruning_factor = 2.0 |
"Multiplying factor of current best rate to prune other hypotheses"
bool doc_dict_enable = 1 |
"Enable Document Dictionary "
int fragments_debug = 0 |
"Debug character fragments"
char* global_user_words_suffix = "" |
"A list of user-provided words."
bool ngram_permuter_activated = FALSE |
"Activate character-level n-gram-based permuter"
bool permute_debug = 0 |
"Debug char permutation process"
int permute_only_top = 0 |
"Turn on word script consistency permuter"
bool save_doc_words = 0 |
"Save Document Words"
bool segment_debug = 0 |
"Debug the whole segmentation process"
double segment_penalty_dict_nonword = 1.25 |
"Score multiplier for glyph fragment segmentations which do not " "match a dictionary word (lower is better)."
double segment_penalty_garbage = 1.5 |
"Score multiplier for poorly cased strings that are not in the " "dictionary and generally look like garbage (lower is better)."
double segment_reward_script = 0.95 |
"Score multipler for script consistency within a word. " "Being a 'reward' factor, it should be <= 1. " "Smaller value implies bigger reward."
"incorporate segmentation cost in word rating?"