Tesseract 3.01
UNICHARSET Class Reference

#include <unicharset.h>

List of all members.

Classes

struct  UNICHAR_PROPERTIES
struct  UNICHAR_SLOT

Public Member Functions

 UNICHARSET ()
 ~UNICHARSET ()
const UNICHAR_ID unichar_to_id (const char *const unichar_repr) const
const UNICHAR_ID unichar_to_id (const char *const unichar_repr, int length) const
int step (const char *str) const
const char *const id_to_unichar (UNICHAR_ID id) const
STRING debug_str (UNICHAR_ID id) const
STRING debug_str (const char *unichar_repr) const
void unichar_insert (const char *const unichar_repr)
bool contains_unichar_id (UNICHAR_ID unichar_id) const
bool contains_unichar (const char *const unichar_repr) const
bool contains_unichar (const char *const unichar_repr, int length) const
bool eq (UNICHAR_ID unichar_id, const char *const unichar_repr) const
void delete_pointers_in_unichars ()
void clear ()
int size () const
void reserve (int unichars_number)
bool save_to_file (const char *const filename) const
bool save_to_file (FILE *file) const
bool load_from_file (const char *const filename, bool skip_fragments)
bool load_from_file (const char *const filename)
bool load_from_file (FILE *file, bool skip_fragments)
bool load_from_file (FILE *file)
void post_load_setup ()
bool any_right_to_left () const
void set_black_and_whitelist (const char *blacklist, const char *whitelist)
void set_isalpha (UNICHAR_ID unichar_id, bool value)
void set_islower (UNICHAR_ID unichar_id, bool value)
void set_isupper (UNICHAR_ID unichar_id, bool value)
void set_isdigit (UNICHAR_ID unichar_id, bool value)
void set_ispunctuation (UNICHAR_ID unichar_id, bool value)
void set_isngram (UNICHAR_ID unichar_id, bool value)
void set_script (UNICHAR_ID unichar_id, const char *value)
void set_other_case (UNICHAR_ID unichar_id, UNICHAR_ID other_case)
bool get_isalpha (UNICHAR_ID unichar_id) const
bool get_islower (UNICHAR_ID unichar_id) const
bool get_isupper (UNICHAR_ID unichar_id) const
bool get_isdigit (UNICHAR_ID unichar_id) const
bool get_ispunctuation (UNICHAR_ID unichar_id) const
bool get_isngram (UNICHAR_ID unichar_id) const
bool top_bottom_useful () const
void get_top_bottom (UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
void set_top_bottom (UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top)
int get_script (UNICHAR_ID unichar_id) const
unsigned int get_properties (UNICHAR_ID unichar_id) const
char get_chartype (UNICHAR_ID unichar_id) const
UNICHAR_ID get_other_case (UNICHAR_ID unichar_id) const
UNICHAR_ID to_lower (UNICHAR_ID unichar_id) const
UNICHAR_ID to_upper (UNICHAR_ID unichar_id) const
const CHAR_FRAGMENTget_fragment (UNICHAR_ID unichar_id) const
bool get_isalpha (const char *const unichar_repr) const
bool get_islower (const char *const unichar_repr) const
bool get_isupper (const char *const unichar_repr) const
bool get_isdigit (const char *const unichar_repr) const
bool get_ispunctuation (const char *const unichar_repr) const
unsigned int get_properties (const char *const unichar_repr) const
char get_chartype (const char *const unichar_repr) const
int get_script (const char *const unichar_repr) const
const CHAR_FRAGMENTget_fragment (const char *const unichar_repr) const
bool get_isalpha (const char *const unichar_repr, int length) const
bool get_islower (const char *const unichar_repr, int length) const
bool get_isupper (const char *const unichar_repr, int length) const
bool get_isdigit (const char *const unichar_repr, int length) const
bool get_ispunctuation (const char *const unichar_repr, int length) const
int get_script (const char *const unichar_repr, int length) const
int get_script_table_size () const
const char * get_script_from_script_id (int id) const
int get_script_id_from_name (const char *script_name) const
bool is_null_script (const char *script) const
int add_script (const char *script)
bool get_enabled (UNICHAR_ID unichar_id) const
int null_sid () const
int common_sid () const
int latin_sid () const
int cyrillic_sid () const
int greek_sid () const
int han_sid () const
int hiragana_sid () const
int katakana_sid () const
int default_sid () const
bool script_has_upper_lower () const
bool script_has_xheight () const

Static Public Member Functions

static STRING debug_utf8_str (const char *str)

Constructor & Destructor Documentation

UNICHARSET::UNICHARSET ( )
UNICHARSET::~UNICHARSET ( )

Member Function Documentation

int UNICHARSET::add_script ( const char *  script)
bool UNICHARSET::any_right_to_left ( ) const
void UNICHARSET::clear ( ) [inline]
int UNICHARSET::common_sid ( ) const [inline]
bool UNICHARSET::contains_unichar ( const char *const  unichar_repr,
int  length 
) const
bool UNICHARSET::contains_unichar ( const char *const  unichar_repr) const
bool UNICHARSET::contains_unichar_id ( UNICHAR_ID  unichar_id) const [inline]
int UNICHARSET::cyrillic_sid ( ) const [inline]
STRING UNICHARSET::debug_str ( UNICHAR_ID  id) const
STRING UNICHARSET::debug_str ( const char *  unichar_repr) const [inline]
STRING UNICHARSET::debug_utf8_str ( const char *  str) [static]
int UNICHARSET::default_sid ( ) const [inline]
void UNICHARSET::delete_pointers_in_unichars ( ) [inline]
bool UNICHARSET::eq ( UNICHAR_ID  unichar_id,
const char *const  unichar_repr 
) const
char UNICHARSET::get_chartype ( UNICHAR_ID  unichar_id) const
char UNICHARSET::get_chartype ( const char *const  unichar_repr) const [inline]
bool UNICHARSET::get_enabled ( UNICHAR_ID  unichar_id) const [inline]
const CHAR_FRAGMENT* UNICHARSET::get_fragment ( UNICHAR_ID  unichar_id) const [inline]
const CHAR_FRAGMENT* UNICHARSET::get_fragment ( const char *const  unichar_repr) const [inline]
bool UNICHARSET::get_isalpha ( UNICHAR_ID  unichar_id) const [inline]
bool UNICHARSET::get_isalpha ( const char *const  unichar_repr) const [inline]
bool UNICHARSET::get_isalpha ( const char *const  unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_isdigit ( UNICHAR_ID  unichar_id) const [inline]
bool UNICHARSET::get_isdigit ( const char *const  unichar_repr) const [inline]
bool UNICHARSET::get_isdigit ( const char *const  unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_islower ( UNICHAR_ID  unichar_id) const [inline]
bool UNICHARSET::get_islower ( const char *const  unichar_repr) const [inline]
bool UNICHARSET::get_islower ( const char *const  unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_isngram ( UNICHAR_ID  unichar_id) const [inline]
bool UNICHARSET::get_ispunctuation ( const char *const  unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_ispunctuation ( UNICHAR_ID  unichar_id) const [inline]
bool UNICHARSET::get_ispunctuation ( const char *const  unichar_repr) const [inline]
bool UNICHARSET::get_isupper ( UNICHAR_ID  unichar_id) const [inline]
bool UNICHARSET::get_isupper ( const char *const  unichar_repr) const [inline]
bool UNICHARSET::get_isupper ( const char *const  unichar_repr,
int  length 
) const [inline]
UNICHAR_ID UNICHARSET::get_other_case ( UNICHAR_ID  unichar_id) const [inline]
unsigned int UNICHARSET::get_properties ( UNICHAR_ID  unichar_id) const
unsigned int UNICHARSET::get_properties ( const char *const  unichar_repr) const [inline]
int UNICHARSET::get_script ( const char *const  unichar_repr,
int  length 
) const [inline]
int UNICHARSET::get_script ( UNICHAR_ID  unichar_id) const [inline]
int UNICHARSET::get_script ( const char *const  unichar_repr) const [inline]
const char* UNICHARSET::get_script_from_script_id ( int  id) const [inline]
int UNICHARSET::get_script_id_from_name ( const char *  script_name) const
int UNICHARSET::get_script_table_size ( ) const [inline]
void UNICHARSET::get_top_bottom ( UNICHAR_ID  unichar_id,
int *  min_bottom,
int *  max_bottom,
int *  min_top,
int *  max_top 
) const [inline]
int UNICHARSET::greek_sid ( ) const [inline]
int UNICHARSET::han_sid ( ) const [inline]
int UNICHARSET::hiragana_sid ( ) const [inline]
const char *const UNICHARSET::id_to_unichar ( UNICHAR_ID  id) const
bool UNICHARSET::is_null_script ( const char *  script) const [inline]
int UNICHARSET::katakana_sid ( ) const [inline]
int UNICHARSET::latin_sid ( ) const [inline]
bool UNICHARSET::load_from_file ( const char *const  filename,
bool  skip_fragments 
) [inline]
bool UNICHARSET::load_from_file ( const char *const  filename) [inline]
bool UNICHARSET::load_from_file ( FILE *  file,
bool  skip_fragments 
)
bool UNICHARSET::load_from_file ( FILE *  file) [inline]
int UNICHARSET::null_sid ( ) const [inline]
void UNICHARSET::post_load_setup ( )
void UNICHARSET::reserve ( int  unichars_number)
bool UNICHARSET::save_to_file ( const char *const  filename) const [inline]
bool UNICHARSET::save_to_file ( FILE *  file) const
bool UNICHARSET::script_has_upper_lower ( ) const [inline]
bool UNICHARSET::script_has_xheight ( ) const [inline]
void UNICHARSET::set_black_and_whitelist ( const char *  blacklist,
const char *  whitelist 
)
void UNICHARSET::set_isalpha ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_isdigit ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_islower ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_isngram ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_ispunctuation ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_isupper ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_other_case ( UNICHAR_ID  unichar_id,
UNICHAR_ID  other_case 
) [inline]
void UNICHARSET::set_script ( UNICHAR_ID  unichar_id,
const char *  value 
) [inline]
void UNICHARSET::set_top_bottom ( UNICHAR_ID  unichar_id,
int  min_bottom,
int  max_bottom,
int  min_top,
int  max_top 
) [inline]
int UNICHARSET::size ( ) const [inline]
int UNICHARSET::step ( const char *  str) const
UNICHAR_ID UNICHARSET::to_lower ( UNICHAR_ID  unichar_id) const [inline]
UNICHAR_ID UNICHARSET::to_upper ( UNICHAR_ID  unichar_id) const [inline]
bool UNICHARSET::top_bottom_useful ( ) const [inline]
void UNICHARSET::unichar_insert ( const char *const  unichar_repr)
const UNICHAR_ID UNICHARSET::unichar_to_id ( const char *const  unichar_repr) const
const UNICHAR_ID UNICHARSET::unichar_to_id ( const char *const  unichar_repr,
int  length 
) const

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines