tesseract::Trie Class Reference

#include <trie.h>

Inheritance diagram for tesseract::Trie:
tesseract::Dawg

List of all members.

Public Member Functions

 Trie (DawgType type, const STRING &lang, PermuterType perm, uinT64 max_num_edges, int unicharset_size)
 ~Trie ()
EDGE_REF edge_char_of (NODE_REF node_ref, UNICHAR_ID unichar_id, bool word_end) const
void unichar_ids_of (NODE_REF node, NodeChildVector *vec) const
NODE_REF next_node (EDGE_REF edge_ref) const
bool end_of_word (EDGE_REF edge_ref) const
UNICHAR_ID edge_letter (EDGE_REF edge_ref) const
void print_node (NODE_REF node, int max_num_edges) const
SquishedDawgtrie_to_dawg ()
bool read_word_list (const char *filename, const UNICHARSET &unicharset)
void add_word_to_dawg (const WERD_CHOICE &word)

Protected Member Functions

EDGE_RECORDderef_edge_ref (EDGE_REF edge_ref) const
EDGE_REF make_edge_ref (NODE_REF node_index, EDGE_INDEX edge_index) const
void link_edge (EDGE_RECORD *edge, NODE_REF nxt, int direction, bool word_end, UNICHAR_ID unichar_id)
void print_edge_rec (const EDGE_RECORD &edge_rec) const
bool can_be_eliminated (const EDGE_RECORD &edge_rec)
void print_all (const char *msg, int max_num_edges)
bool edge_char_of (NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end, UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const
bool add_edge_linkage (NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id)
bool add_new_edge (NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id)
void add_word_ending (EDGE_RECORD *edge, NODE_REF the_next_node, UNICHAR_ID unichar_id)
NODE_REF new_dawg_node ()
void remove_edge_linkage (NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id)
void remove_edge (NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id)
bool eliminate_redundant_edges (NODE_REF node, const EDGE_RECORD &edge1, const EDGE_RECORD &edge2)
bool reduce_lettered_edges (EDGE_INDEX edge_index, UNICHAR_ID unichar_id, NODE_REF node, const EDGE_VECTOR &backward_edges, NODE_MARKER reduced_nodes)
void sort_edges (EDGE_VECTOR *edges)
void reduce_node_input (NODE_REF node, NODE_MARKER reduced_nodes)

Protected Attributes

TRIE_NODES nodes_
 vector of nodes in the Trie
uinT64 num_edges_
 sum of all edges (forward and backward)
uinT64 max_num_edges_
 maximum number of edges allowed
uinT64 deref_direction_mask_
 mask for EDGE_REF to extract direction
uinT64 deref_node_index_mask_
 mask for EDGE_REF to extract node index

Detailed Description

Concrete class for Trie data structure that allows to store a list of words (extends Dawg base class) as well as dynamically add new words. This class stores a vector of pointers to TRIE_NODE_RECORDs, each of which has a vector of forward and backward edges.


Constructor & Destructor Documentation

tesseract::Trie::Trie ( DawgType  type,
const STRING lang,
PermuterType  perm,
uinT64  max_num_edges,
int  unicharset_size 
) [inline]
tesseract::Trie::~Trie (  )  [inline]

Member Function Documentation

bool tesseract::Trie::add_edge_linkage ( NODE_REF  node1,
NODE_REF  node2,
int  direction,
bool  word_end,
UNICHAR_ID  unichar_id 
) [protected]
bool tesseract::Trie::add_new_edge ( NODE_REF  node1,
NODE_REF  node2,
bool  word_end,
UNICHAR_ID  unichar_id 
) [inline, protected]
void tesseract::Trie::add_word_ending ( EDGE_RECORD edge,
NODE_REF  the_next_node,
UNICHAR_ID  unichar_id 
) [protected]
void tesseract::Trie::add_word_to_dawg ( const WERD_CHOICE word  ) 
bool tesseract::Trie::can_be_eliminated ( const EDGE_RECORD edge_rec  )  [inline, protected]
EDGE_RECORD* tesseract::Trie::deref_edge_ref ( EDGE_REF  edge_ref  )  const [inline, protected]
bool tesseract::Trie::edge_char_of ( NODE_REF  node_ref,
NODE_REF  next_node,
int  direction,
bool  word_end,
UNICHAR_ID  unichar_id,
EDGE_RECORD **  edge_ptr,
EDGE_INDEX edge_index 
) const [protected]
EDGE_REF tesseract::Trie::edge_char_of ( NODE_REF  node_ref,
UNICHAR_ID  unichar_id,
bool  word_end 
) const [inline, virtual]

Returns the edge that corresponds to the letter out of this node.

Implements tesseract::Dawg.

UNICHAR_ID tesseract::Trie::edge_letter ( EDGE_REF  edge_ref  )  const [inline, virtual]

Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.

Implements tesseract::Dawg.

bool tesseract::Trie::eliminate_redundant_edges ( NODE_REF  node,
const EDGE_RECORD edge1,
const EDGE_RECORD edge2 
) [protected]
bool tesseract::Trie::end_of_word ( EDGE_REF  edge_ref  )  const [inline, virtual]

Returns true if the edge indicated by the given EDGE_REF marks the end of a word.

Implements tesseract::Dawg.

void tesseract::Trie::link_edge ( EDGE_RECORD edge,
NODE_REF  nxt,
int  direction,
bool  word_end,
UNICHAR_ID  unichar_id 
) [inline, protected]

Sets up this edge record to the requested values.

EDGE_REF tesseract::Trie::make_edge_ref ( NODE_REF  node_index,
EDGE_INDEX  edge_index 
) const [inline, protected]

Constructs EDGE_REF from the given node_index and edge_index.

NODE_REF tesseract::Trie::new_dawg_node (  )  [protected]
NODE_REF tesseract::Trie::next_node ( EDGE_REF  edge_ref  )  const [inline, virtual]

Returns the next node visited by following the edge indicated by the given EDGE_REF.

Implements tesseract::Dawg.

void tesseract::Trie::print_all ( const char *  msg,
int  max_num_edges 
) [inline, protected]
void tesseract::Trie::print_edge_rec ( const EDGE_RECORD edge_rec  )  const [inline, protected]

Prints the given EDGE_RECORD.

void tesseract::Trie::print_node ( NODE_REF  node,
int  max_num_edges 
) const [virtual]

Prints the contents of the node indicated by the given NODE_REF. At most max_num_edges will be printed.

Implements tesseract::Dawg.

bool tesseract::Trie::read_word_list ( const char *  filename,
const UNICHARSET unicharset 
)
bool tesseract::Trie::reduce_lettered_edges ( EDGE_INDEX  edge_index,
UNICHAR_ID  unichar_id,
NODE_REF  node,
const EDGE_VECTOR backward_edges,
NODE_MARKER  reduced_nodes 
) [protected]
void tesseract::Trie::reduce_node_input ( NODE_REF  node,
NODE_MARKER  reduced_nodes 
) [protected]

Eliminates any redundant edges from this node in the Trie.

void tesseract::Trie::remove_edge ( NODE_REF  node1,
NODE_REF  node2,
bool  word_end,
UNICHAR_ID  unichar_id 
) [inline, protected]
void tesseract::Trie::remove_edge_linkage ( NODE_REF  node1,
NODE_REF  node2,
int  direction,
bool  word_end,
UNICHAR_ID  unichar_id 
) [protected]
void tesseract::Trie::sort_edges ( EDGE_VECTOR edges  )  [protected]

Order num_edges of consequtive EDGE_RECORDS in the given EDGE_VECTOR in increasing order of unichar ids. This function is normally called for all edges in a single node, and since number of edges in each node is usually quite small, selection sort is used.

SquishedDawg * tesseract::Trie::trie_to_dawg (  ) 
void tesseract::Trie::unichar_ids_of ( NODE_REF  node,
NodeChildVector vec 
) const [inline, virtual]

Fills the given NodeChildVector with all the unichar ids (and the corresponding EDGE_REFs) for which there is an edge out of this node.

Implements tesseract::Dawg.


Member Data Documentation

mask for EDGE_REF to extract direction

mask for EDGE_REF to extract node index

maximum number of edges allowed

vector of nodes in the Trie

sum of all edges (forward and backward)


The documentation for this class was generated from the following files:
Generated on Sun Jul 18 17:11:25 2010 for Tesseract by  doxygen 1.6.3