#include <trie.h>
Public Member Functions | |
Trie (DawgType type, const STRING &lang, PermuterType perm, uinT64 max_num_edges, int unicharset_size) | |
~Trie () | |
EDGE_REF | edge_char_of (NODE_REF node_ref, UNICHAR_ID unichar_id, bool word_end) const |
void | unichar_ids_of (NODE_REF node, NodeChildVector *vec) const |
NODE_REF | next_node (EDGE_REF edge_ref) const |
bool | end_of_word (EDGE_REF edge_ref) const |
UNICHAR_ID | edge_letter (EDGE_REF edge_ref) const |
void | print_node (NODE_REF node, int max_num_edges) const |
SquishedDawg * | trie_to_dawg () |
bool | read_word_list (const char *filename, const UNICHARSET &unicharset) |
void | add_word_to_dawg (const WERD_CHOICE &word) |
Protected Member Functions | |
EDGE_RECORD * | deref_edge_ref (EDGE_REF edge_ref) const |
EDGE_REF | make_edge_ref (NODE_REF node_index, EDGE_INDEX edge_index) const |
void | link_edge (EDGE_RECORD *edge, NODE_REF nxt, int direction, bool word_end, UNICHAR_ID unichar_id) |
void | print_edge_rec (const EDGE_RECORD &edge_rec) const |
bool | can_be_eliminated (const EDGE_RECORD &edge_rec) |
void | print_all (const char *msg, int max_num_edges) |
bool | edge_char_of (NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end, UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const |
bool | add_edge_linkage (NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id) |
bool | add_new_edge (NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id) |
void | add_word_ending (EDGE_RECORD *edge, NODE_REF the_next_node, UNICHAR_ID unichar_id) |
NODE_REF | new_dawg_node () |
void | remove_edge_linkage (NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id) |
void | remove_edge (NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id) |
bool | eliminate_redundant_edges (NODE_REF node, const EDGE_RECORD &edge1, const EDGE_RECORD &edge2) |
bool | reduce_lettered_edges (EDGE_INDEX edge_index, UNICHAR_ID unichar_id, NODE_REF node, const EDGE_VECTOR &backward_edges, NODE_MARKER reduced_nodes) |
void | sort_edges (EDGE_VECTOR *edges) |
void | reduce_node_input (NODE_REF node, NODE_MARKER reduced_nodes) |
Protected Attributes | |
TRIE_NODES | nodes_ |
vector of nodes in the Trie | |
uinT64 | num_edges_ |
sum of all edges (forward and backward) | |
uinT64 | max_num_edges_ |
maximum number of edges allowed | |
uinT64 | deref_direction_mask_ |
mask for EDGE_REF to extract direction | |
uinT64 | deref_node_index_mask_ |
mask for EDGE_REF to extract node index |
Concrete class for Trie data structure that allows to store a list of words (extends Dawg base class) as well as dynamically add new words. This class stores a vector of pointers to TRIE_NODE_RECORDs, each of which has a vector of forward and backward edges.
tesseract::Trie::Trie | ( | DawgType | type, | |
const STRING & | lang, | |||
PermuterType | perm, | |||
uinT64 | max_num_edges, | |||
int | unicharset_size | |||
) | [inline] |
tesseract::Trie::~Trie | ( | ) | [inline] |
bool tesseract::Trie::add_edge_linkage | ( | NODE_REF | node1, | |
NODE_REF | node2, | |||
int | direction, | |||
bool | word_end, | |||
UNICHAR_ID | unichar_id | |||
) | [protected] |
bool tesseract::Trie::add_new_edge | ( | NODE_REF | node1, | |
NODE_REF | node2, | |||
bool | word_end, | |||
UNICHAR_ID | unichar_id | |||
) | [inline, protected] |
void tesseract::Trie::add_word_ending | ( | EDGE_RECORD * | edge, | |
NODE_REF | the_next_node, | |||
UNICHAR_ID | unichar_id | |||
) | [protected] |
void tesseract::Trie::add_word_to_dawg | ( | const WERD_CHOICE & | word | ) |
bool tesseract::Trie::can_be_eliminated | ( | const EDGE_RECORD & | edge_rec | ) | [inline, protected] |
EDGE_RECORD* tesseract::Trie::deref_edge_ref | ( | EDGE_REF | edge_ref | ) | const [inline, protected] |
bool tesseract::Trie::edge_char_of | ( | NODE_REF | node_ref, | |
NODE_REF | next_node, | |||
int | direction, | |||
bool | word_end, | |||
UNICHAR_ID | unichar_id, | |||
EDGE_RECORD ** | edge_ptr, | |||
EDGE_INDEX * | edge_index | |||
) | const [protected] |
EDGE_REF tesseract::Trie::edge_char_of | ( | NODE_REF | node_ref, | |
UNICHAR_ID | unichar_id, | |||
bool | word_end | |||
) | const [inline, virtual] |
Returns the edge that corresponds to the letter out of this node.
Implements tesseract::Dawg.
UNICHAR_ID tesseract::Trie::edge_letter | ( | EDGE_REF | edge_ref | ) | const [inline, virtual] |
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Implements tesseract::Dawg.
bool tesseract::Trie::eliminate_redundant_edges | ( | NODE_REF | node, | |
const EDGE_RECORD & | edge1, | |||
const EDGE_RECORD & | edge2 | |||
) | [protected] |
Returns true if the edge indicated by the given EDGE_REF marks the end of a word.
Implements tesseract::Dawg.
void tesseract::Trie::link_edge | ( | EDGE_RECORD * | edge, | |
NODE_REF | nxt, | |||
int | direction, | |||
bool | word_end, | |||
UNICHAR_ID | unichar_id | |||
) | [inline, protected] |
Sets up this edge record to the requested values.
EDGE_REF tesseract::Trie::make_edge_ref | ( | NODE_REF | node_index, | |
EDGE_INDEX | edge_index | |||
) | const [inline, protected] |
Constructs EDGE_REF from the given node_index and edge_index.
NODE_REF tesseract::Trie::new_dawg_node | ( | ) | [protected] |
Returns the next node visited by following the edge indicated by the given EDGE_REF.
Implements tesseract::Dawg.
void tesseract::Trie::print_all | ( | const char * | msg, | |
int | max_num_edges | |||
) | [inline, protected] |
void tesseract::Trie::print_edge_rec | ( | const EDGE_RECORD & | edge_rec | ) | const [inline, protected] |
Prints the given EDGE_RECORD.
void tesseract::Trie::print_node | ( | NODE_REF | node, | |
int | max_num_edges | |||
) | const [virtual] |
Prints the contents of the node indicated by the given NODE_REF. At most max_num_edges will be printed.
Implements tesseract::Dawg.
bool tesseract::Trie::read_word_list | ( | const char * | filename, | |
const UNICHARSET & | unicharset | |||
) |
bool tesseract::Trie::reduce_lettered_edges | ( | EDGE_INDEX | edge_index, | |
UNICHAR_ID | unichar_id, | |||
NODE_REF | node, | |||
const EDGE_VECTOR & | backward_edges, | |||
NODE_MARKER | reduced_nodes | |||
) | [protected] |
void tesseract::Trie::reduce_node_input | ( | NODE_REF | node, | |
NODE_MARKER | reduced_nodes | |||
) | [protected] |
Eliminates any redundant edges from this node in the Trie.
void tesseract::Trie::remove_edge | ( | NODE_REF | node1, | |
NODE_REF | node2, | |||
bool | word_end, | |||
UNICHAR_ID | unichar_id | |||
) | [inline, protected] |
void tesseract::Trie::remove_edge_linkage | ( | NODE_REF | node1, | |
NODE_REF | node2, | |||
int | direction, | |||
bool | word_end, | |||
UNICHAR_ID | unichar_id | |||
) | [protected] |
void tesseract::Trie::sort_edges | ( | EDGE_VECTOR * | edges | ) | [protected] |
Order num_edges of consequtive EDGE_RECORDS in the given EDGE_VECTOR in increasing order of unichar ids. This function is normally called for all edges in a single node, and since number of edges in each node is usually quite small, selection sort is used.
SquishedDawg * tesseract::Trie::trie_to_dawg | ( | ) |
void tesseract::Trie::unichar_ids_of | ( | NODE_REF | node, | |
NodeChildVector * | vec | |||
) | const [inline, virtual] |
Fills the given NodeChildVector with all the unichar ids (and the corresponding EDGE_REFs) for which there is an edge out of this node.
Implements tesseract::Dawg.
uinT64 tesseract::Trie::deref_direction_mask_ [protected] |
mask for EDGE_REF to extract direction
uinT64 tesseract::Trie::deref_node_index_mask_ [protected] |
mask for EDGE_REF to extract node index
uinT64 tesseract::Trie::max_num_edges_ [protected] |
maximum number of edges allowed
TRIE_NODES tesseract::Trie::nodes_ [protected] |
vector of nodes in the Trie
uinT64 tesseract::Trie::num_edges_ [protected] |
sum of all edges (forward and backward)