tesseract Namespace Reference

Classes

struct  TESS_CHAR
class  TessBaseAPI
class  Tesseract
class  ImageThresholder
class  CCStruct
class  DetLineFit
class  UnicharIdArrayUtils
class  AmbigSpec
class  UnicharAmbigs
class  CCUtilMutex
class  CCUtil
class  TessdataManager
class  Classify
class  CUtil
class  Context
struct  NodeChild
class  Dawg
struct  DawgInfo
class  DawgInfoVector
class  SquishedDawg
struct  DawgArgs
class  Dict
class  Trie
class  Image
struct  AlignedBlobParams
class  AlignedBlob
class  BBGrid
class  GridSearch
class  TabEventHandler
class  ColumnFinder
class  ColPartition
class  ColPartitionSet
class  ImageFinder
class  LineFinder
class  StrokeWidth
class  TabFind
class  ColSegment
class  TabConstraint
class  TabVector
class  WorkingPartSet
class  Wordrec
class  FRAGMENT

Typedefs

typedef int(Dict::* DictFunc )(void *void_dawg_args, int char_index, const void *word, bool word_end)
typedef GenericVector
< AmbigSpec_LIST * > 
UnicharAmbigsVector
typedef GenericVector< UNICHAR_IDUnicharIdVector
typedef GenericVector< NodeChildNodeChildVector
typedef GenericVector< int > SuccessorList
typedef GenericVector
< SuccessorList * > 
SuccessorListsVector
typedef GenericVector< Dawg * > DawgVector
typedef BBGrid< ColPartition,
ColPartition_CLIST,
ColPartition_C_IT > 
ColPartitionGrid
typedef GridSearch
< ColPartition,
ColPartition_CLIST,
ColPartition_C_IT > 
ColPartitionGridSearch
typedef GenericVector
< ColPartitionSet * > 
PartSetVector
typedef ResultCallback1< bool,
int > 
WidthCallback
typedef BBGrid< ColSegment,
ColSegment_CLIST,
ColSegment_C_IT > 
ColSegmentGrid
typedef BBGrid< BLOBNBOX,
BLOBNBOX_CLIST, BLOBNBOX_C_IT > 
BlobGrid

Enumerations

enum  PageSegMode {
  PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE,
  PSM_SINGLE_WORD, PSM_SINGLE_CHAR, PSM_COUNT
}
enum  AccuracyVSpeed { AVS_FASTEST = 0, AVS_MOST_ACCURATE = 100 }
enum  CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT }
enum  AmbigType {
  NOT_AMBIG, REPLACE_AMBIG, DEFINITE_AMBIG, SIMILAR_AMBIG,
  CASE_AMBIG, AMBIG_TYPE_COUNT
}
enum  TessdataType {
  TESSDATA_LANG_CONFIG, TESSDATA_UNICHARSET, TESSDATA_AMBIGS, TESSDATA_INTTEMP,
  TESSDATA_PFFMTABLE, TESSDATA_NORMPROTO, TESSDATA_PUNC_DAWG, TESSDATA_SYSTEM_DAWG,
  TESSDATA_NUMBER_DAWG, TESSDATA_FREQ_DAWG, TESSDATA_NUM_ENTRIES
}
enum  DawgType {
  DAWG_TYPE_PUNCTUATION, DAWG_TYPE_PREFIX, DAWG_TYPE_ROOT, DAWG_TYPE_WORD,
  DAWG_TYPE_SUFFIX, DAWG_TYPE_NUMBER, DAWG_TYPE_COUNT
}
enum  ColSegType {
  COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED,
  COL_COUNT
}
enum  TabAlignment {
  TA_LEFT_ALIGNED, TA_LEFT_RAGGED, TA_CENTER, TA_RIGHT_ALIGNED,
  TA_RIGHT_RAGGED, TA_SEPARATOR, TA_COUNT
}

Functions

TBLOBmake_tesseract_blob (float baseline, float xheight, float descender, float ascender)
void OtsuThreshold (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int **thresholds, int **hi_values)
void HistogramRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int *histogram)
int OtsuStats (const int *histogram, int *H_out, int *omega0_out)
 ELISTIZE (AmbigSpec)
 ELISTIZEH (AmbigSpec)
template<typename T >
bool cmp_eq (T const &t1, T const &t2)
int GetBaselineFeatures (TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, inT32 *BlobLength)
int GetIntBaselineFeatures (TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, inT32 *BlobLength)
void ClearCharNormArray (INT_TEMPLATES Templates, CLASS_NORMALIZATION_ARRAY CharNormArray)
UNICHAR_ID get_top_choice_uid (BLOB_CHOICE_LIST *blob_list)
int get_top_word_script (const BLOB_CHOICE_LIST_VECTOR &char_choices, const UNICHARSET &unicharset)
BLOB_CHOICEfind_choice_by_type (BLOB_CHOICE_LIST *char_choices, char target_type, const UNICHARSET &unicharset)
BLOB_CHOICEfind_choice_by_script (BLOB_CHOICE_LIST *char_choices, int target_sid, int backup_sid, int secondary_sid)
template<class BBC >
int SortByBoxLeft (const void *void1, const void *void2)
template<typename T >
void DeleteObject (T *object)
WIDTH_RECORDstate_char_widths (WIDTH_RECORD *chunk_widths, STATE *state, int num_joints)
FLOAT32 get_width_variance (WIDTH_RECORD *wrec, float norm_height)
FLOAT32 get_gap_variance (WIDTH_RECORD *wrec, float norm_height)
FLOAT32 fp_width_cost (float norm_width, bool end_pos)
FLOAT32 fp_gap_cost (float norm_gap, bool end_pos)
make_ed_blob

Make an editor format blob from the tess style blob.

PBLOBmake_ed_blob (TBLOB *tessblob)
make_ed_outline

Make an editor format outline from the list of fragments.

OUTLINEmake_ed_outline (FRAGMENT_LIST *list)
register_outline

Add the fragments in the given outline to the list

void register_outline (TESSLINE *outline, FRAGMENT_LIST *list)

Variables

const int kMinRectSize = 10
const char kTesseractReject = '~'
const char kUNLVReject = '~'
const char kUNLVSuspect = '^'
const char * kInputFile = "noname.tif"
const int kCharsPerChar = 31
const int kMaxCharsPerChar = 106 + UNICHAR_LEN
const int kUniChs []
const int kLatinChs []
const int kNumEndPoints = 3
const int kHistogramSize = 256
CCUtilMutex tprintfMutex
const double kAlignedFraction = 0.03125
const double kRaggedFraction = 0.5
const double kAlignedGapFraction = 0.75
const double kRaggedGapFraction = 3.0
const int kVLineAlignment = 3
const int kVLineGutter = 1
const int kVLineSearchSize = 150
const int kMinRaggedTabs = 5
const int kMinAlignedTabs = 4
const int kVLineMinLength = 500
const int kMaxSkewFactor = 15
const char * kTextordDebugPix = "psdebug_pix"
const int kMinColumnWidth = 100
const int kMaxIncompatibleColumnCount = 2
const double kMaxPartitionSpacing = 1.75
const double kMarginOverlapFraction = 0.25
const double kHorizontalGapMergeFraction = 0.5
const double kMinNonNoiseFraction = 0.5
const int kSmallBlobSearchRadius = 2
bool textord_tabfind_show_strokewidths = false
bool textord_tabfind_show_initial_partitions = false
int textord_tabfind_show_partitions = 0
bool textord_tabfind_show_columns = false
bool textord_tabfind_show_blocks = false
const int kMaxPartnerDepth = 4
const double kMaxSpacingDrift = 1.0 / 72
const double kMaxTopSpacingFraction = 0.25
const double kMaxSizeRatio = 1.5
const double kMinRectangularFraction = 0.125
const double kMaxRectangularFraction = 0.75
const double kMaxRectangularGradient = 0.1
const int kMinImageFindSize = 100
const int kThinLineFraction = 30
const int kMinLineLengthFraction = 8
const int kCrackSpacing = 100
const int kLineFindGridSize = 50
const int kMinCredibleResolution = 70
const int kDefaultResolution = 300
const double kStrokeWidthFractionTolerance = 0.125
const double kStrokeWidthTolerance = 1.5
const double kMaxTextSize = 2.0
const int kTabRadiusFactor = 5
const int kMinVerticalSearch = 3
const int kMaxVerticalSearch = 12
const int kMaxRaggedSearch = 25
const int kMinLinesInColumn = 10
const double kMinFractionalLinesInColumn = 0.125
const double kSmoothFactor = 0.25
const double kMinBaselineCoverage = 0.5
const double kCharVerticalOverlapFraction = 0.375
const double kMaxHorizontalGap = 3.0
const double kMaxBaselineError = 0.4375
const int kMinEvaluatedTabs = 3
const int kMaxTextLineBlobRatio = 5
const int kMinTextLineBlobRatio = 3
const double kMinImageArea = 0.5
bool textord_tabfind_show_initialtabs = false
bool textord_tabfind_show_finaltabs = false
bool textord_tabfind_vertical_text = true
const int kColumnWidthFactor = 20
const int kMaxVerticalSpacing = 500
const int kMinBoxesInTextPartition = 10
const int kMaxBoxesInDataPartition = 20
const double kMaxGapInTextPartition = 4.0
const double kMinMaxGapInTextPartition = 0.5
const double kMaxTableCellXheight = 2.0
const int kMaxColumnHeaderDistance = 100
const double kTableColumnThreshold = 3.0
const int kRulingVerticalMargin = 3
const double kMinOverlapWithTable = 0.6
const int kSideSpaceMargin = 10
const double kProjectionThreshold = 0.35
const int kMinRowsInTable = 3
bool textord_dump_table_images = false
bool textord_show_tables = false
const int kGutterMultiple = 4
const int kGutterToNeighbourRatio = 3
const int kSimilarVectorDist = 10
const int kSimilarRaggedDist = 50
const int kMaxFillinMultiple = 11
const double kMinGutterFraction = 0.5
const double kVerticalTextGapFraction = 0.5
const char * kAlignmentNames []

Detailed Description

---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------

---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------

---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------


Typedef Documentation

typedef BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> tesseract::BlobGrid
typedef BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT> tesseract::ColPartitionGrid
typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> tesseract::ColPartitionGridSearch
typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT> tesseract::ColSegmentGrid
typedef int(Dict::* tesseract::DictFunc)(void *void_dawg_args, int char_index, const void *word, bool word_end)
typedef GenericVector<AmbigSpec_LIST *> tesseract::UnicharAmbigsVector

Enumeration Type Documentation

The values in the AccuracyVSpeed enum provide hints for how the engine should trade speed for accuracy. There is no guarantee of any effect.

Enumerator:
AVS_FASTEST 

Fastest speed, but lowest accuracy.

AVS_MOST_ACCURATE 

Greatest accuracy, but slowest speed.

Enumerator:
NOT_AMBIG 
REPLACE_AMBIG 
DEFINITE_AMBIG 
SIMILAR_AMBIG 
CASE_AMBIG 
AMBIG_TYPE_COUNT 
Enumerator:
ACTION_1_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
ACTION_2_CMD_EVENT 
Enumerator:
COL_UNKNOWN 
COL_TEXT 
COL_TABLE 
COL_MIXED 
COL_COUNT 
Enumerator:
DAWG_TYPE_PUNCTUATION 
DAWG_TYPE_PREFIX 
DAWG_TYPE_ROOT 
DAWG_TYPE_WORD 
DAWG_TYPE_SUFFIX 
DAWG_TYPE_NUMBER 
DAWG_TYPE_COUNT 
Enumerator:
PSM_AUTO 

Fully automatic page segmentation.

PSM_SINGLE_COLUMN 

Assume a single column of text of variable sizes.

PSM_SINGLE_BLOCK 

Assume a single uniform block of text. (Default.).

PSM_SINGLE_LINE 

Treat the image as a single text line.

PSM_SINGLE_WORD 

Treat the image as a single word.

PSM_SINGLE_CHAR 

Treat the image as a single character.

PSM_COUNT 

Number of enum entries.

Enumerator:
TA_LEFT_ALIGNED 
TA_LEFT_RAGGED 
TA_CENTER 
TA_RIGHT_ALIGNED 
TA_RIGHT_RAGGED 
TA_SEPARATOR 
TA_COUNT 
Enumerator:
TESSDATA_LANG_CONFIG 
TESSDATA_UNICHARSET 
TESSDATA_AMBIGS 
TESSDATA_INTTEMP 
TESSDATA_PFFMTABLE 
TESSDATA_NORMPROTO 
TESSDATA_PUNC_DAWG 
TESSDATA_SYSTEM_DAWG 
TESSDATA_NUMBER_DAWG 
TESSDATA_FREQ_DAWG 
TESSDATA_NUM_ENTRIES 

Function Documentation

void tesseract::ClearCharNormArray ( INT_TEMPLATES  Templates,
CLASS_NORMALIZATION_ARRAY  CharNormArray 
)
template<typename T >
bool tesseract::cmp_eq ( T const &  t1,
T const &  t2 
) [inline]
template<typename T >
void tesseract::DeleteObject ( T *  object  )  [inline]
tesseract::ELISTIZE ( AmbigSpec   ) 
tesseract::ELISTIZEH ( AmbigSpec   ) 
BLOB_CHOICE* tesseract::find_choice_by_script ( BLOB_CHOICE_LIST *  char_choices,
int  target_sid,
int  backup_sid,
int  secondary_sid 
)
BLOB_CHOICE* tesseract::find_choice_by_type ( BLOB_CHOICE_LIST *  char_choices,
char  target_type,
const UNICHARSET unicharset 
)
FLOAT32 tesseract::fp_gap_cost ( float  norm_gap,
bool  end_pos 
)
FLOAT32 tesseract::fp_width_cost ( float  norm_width,
bool  end_pos 
)
FLOAT32 tesseract::get_gap_variance ( WIDTH_RECORD wrec,
float  norm_height 
)
UNICHAR_ID tesseract::get_top_choice_uid ( BLOB_CHOICE_LIST *  blob_list  ) 
int tesseract::get_top_word_script ( const BLOB_CHOICE_LIST_VECTOR char_choices,
const UNICHARSET unicharset 
)
FLOAT32 tesseract::get_width_variance ( WIDTH_RECORD wrec,
float  norm_height 
)
int tesseract::GetBaselineFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
CLASS_NORMALIZATION_ARRAY  CharNormArray,
inT32 BlobLength 
)
int tesseract::GetIntBaselineFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
CLASS_NORMALIZATION_ARRAY  CharNormArray,
inT32 BlobLength 
)
void tesseract::HistogramRect ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height,
int *  histogram 
)
PBLOB * tesseract::make_ed_blob ( TBLOB tessblob  ) 
OUTLINE * tesseract::make_ed_outline ( FRAGMENT_LIST *  list  ) 
TBLOB* tesseract::make_tesseract_blob ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
int tesseract::OtsuStats ( const int *  histogram,
int *  H_out,
int *  omega0_out 
)
void tesseract::OtsuThreshold ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height,
int **  thresholds,
int **  hi_values 
)
void tesseract::register_outline ( TESSLINE outline,
FRAGMENT_LIST *  list 
)
template<class BBC >
int tesseract::SortByBoxLeft ( const void *  void1,
const void *  void2 
) [inline]
WIDTH_RECORD* tesseract::state_char_widths ( WIDTH_RECORD chunk_widths,
STATE state,
int  num_joints 
)

Variable Documentation

const double tesseract::kAlignedFraction = 0.03125
const double tesseract::kAlignedGapFraction = 0.75
Initial value:
 {
  "Left Aligned",
  "Left Ragged",
  "Center",
  "Right Aligned",
  "Right Ragged",
  "Separator"
}
const int tesseract::kCharsPerChar = 31
const int tesseract::kCrackSpacing = 100
const int tesseract::kHistogramSize = 256
const char* tesseract::kInputFile = "noname.tif"
const int tesseract::kLatinChs[]
Initial value:
 {
  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
}
const double tesseract::kMarginOverlapFraction = 0.25
const double tesseract::kMaxBaselineError = 0.4375
const int tesseract::kMaxCharsPerChar = 106 + UNICHAR_LEN
const double tesseract::kMaxHorizontalGap = 3.0
const double tesseract::kMaxPartitionSpacing = 1.75
const double tesseract::kMaxSizeRatio = 1.5
const int tesseract::kMaxSkewFactor = 15
const double tesseract::kMaxSpacingDrift = 1.0 / 72
const double tesseract::kMaxTableCellXheight = 2.0
const double tesseract::kMaxTextSize = 2.0
const double tesseract::kMaxTopSpacingFraction = 0.25
const double tesseract::kMinBaselineCoverage = 0.5
const int tesseract::kMinColumnWidth = 100
const double tesseract::kMinGutterFraction = 0.5
const double tesseract::kMinImageArea = 0.5
const double tesseract::kMinNonNoiseFraction = 0.5
const double tesseract::kMinOverlapWithTable = 0.6
const double tesseract::kMinRectangularFraction = 0.125
const int tesseract::kMinRectSize = 10
const int tesseract::kNumEndPoints = 3
const double tesseract::kProjectionThreshold = 0.35
const double tesseract::kRaggedFraction = 0.5
const double tesseract::kRaggedGapFraction = 3.0
const double tesseract::kSmoothFactor = 0.25
const char tesseract::kTesseractReject = '~'
const char* tesseract::kTextordDebugPix = "psdebug_pix"
const int tesseract::kUniChs[]
Initial value:
 {
  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
}
const char tesseract::kUNLVReject = '~'
const char tesseract::kUNLVSuspect = '^'
const int tesseract::kVLineGutter = 1
const int tesseract::kVLineMinLength = 500
const int tesseract::kVLineSearchSize = 150

"Paint table detection output"

"Show table regions"

"Show final block bounds"

"Show column bounds"

"Show tab vectors"

"Show partition bounds"

"Show tab candidates"

"Show partition bounds, waiting if >1"

"Show stroke widths"

"Enable vertical detection"

Generated on Sun Jul 18 17:11:20 2010 for Tesseract by  doxygen 1.6.3