Classes |
struct | TESS_CHAR |
class | TessBaseAPI |
class | PageIterator |
class | ResultIterator |
class | ChoiceIterator |
class | CubeRecoContext |
struct | DocQualCallbacks |
class | TesseractCubeCombiner |
struct | TesseractStats |
class | Tesseract |
class | ImageThresholder |
class | BoxWord |
class | CCStruct |
class | DetLineFit |
class | DPPoint |
class | UnicharIdArrayUtils |
class | AmbigSpec |
class | UnicharAmbigs |
class | CCUtilMutex |
class | CCUtil |
class | PointerVector |
struct | ParamsVectors |
class | ParamUtils |
class | Param |
class | IntParam |
class | BoolParam |
class | StringParam |
class | DoubleParam |
class | TessdataManager |
class | Classify |
class | AltList |
class | BeamSearch |
class | Bmp8 |
class | CachedFile |
class | CharAltList |
struct | Bigram |
struct | CharBigram |
struct | CharBigramTable |
class | CharBigrams |
class | CharSamp |
class | CharSampEnum |
class | CharSampSet |
class | CharSet |
class | CharClassifier |
class | CharClassifierFactory |
class | ConCompPt |
class | ConComp |
class | ConvNetCharClassifier |
class | CubeLineObject |
class | CubeLineSegmenter |
class | CubeObject |
class | CubeSearchObject |
class | CubeTuningParams |
class | CubeUtils |
class | FeatureBase |
class | FeatureBmp |
class | FeatureChebyshev |
class | FeatureHybrid |
class | HybridNeuralNetCharClassifier |
class | LangModEdge |
class | LangModel |
class | SearchColumn |
class | SearchNode |
class | SearchNodeHashTable |
class | SearchObject |
class | TessLangModEdge |
class | TessLangModel |
class | TuningParams |
class | WordAltList |
class | WordListLangModel |
struct | PairSizeInfo |
struct | FontPairSizeInfo |
class | WordSizeModel |
class | WordUnigrams |
class | CUtil |
struct | NodeChild |
class | Dawg |
struct | DawgInfo |
class | DawgInfoVector |
class | SquishedDawg |
struct | DawgArgs |
class | Dict |
class | PermuterState |
class | Trie |
class | Image |
class | InputFileBuffer |
class | NeuralNet |
class | Neuron |
struct | AlignedBlobParams |
class | AlignedBlob |
class | GridBase |
class | IntGrid |
class | BBGrid |
class | GridSearch |
class | TabEventHandler |
class | ColumnFinder |
class | ColPartition |
class | ColPartitionGrid |
class | ColPartitionSet |
class | PixelHistogram |
class | ShiroRekhaSplitter |
class | ImageFinder |
class | LineFinder |
class | StrokeWidth |
class | TabFind |
class | ColSegment |
class | TableFinder |
class | StructuredTable |
class | TableRecognizer |
class | TabConstraint |
class | TabVector |
class | Textord |
class | WorkingPartSet |
struct | AssociateStats |
class | AssociateUtils |
struct | LanguageModelConsistencyInfo |
struct | LanguageModelDawgInfo |
struct | LanguageModelNgramInfo |
struct | ViterbiStateEntry |
struct | LanguageModelState |
struct | BestChoiceBundle |
struct | BestPathByColumn |
class | LanguageModel |
struct | _MATCH_ |
class | BlobMatchTable |
class | FRAGMENT |
class | Wordrec |
Typedefs |
typedef int(Dict::* | DictFunc )(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) |
typedef double(Dict::* | ProbabilityInContextFunc )(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
typedef TessCallback2< int,
PAGE_RES * > | TruthCallback |
typedef GenericVector< UNICHAR_ID > | UnicharIdVector |
typedef GenericVector
< AmbigSpec_LIST * > | UnicharAmbigsVector |
typedef signed int | char_32 |
typedef basic_string< char_32 > | string_32 |
typedef GenericVector< NodeChild > | NodeChildVector |
typedef GenericVector< int > | SuccessorList |
typedef GenericVector
< SuccessorList * > | SuccessorListsVector |
typedef GenericVector< Dawg * > | DawgVector |
typedef GridSearch
< ColPartition,
ColPartition_CLIST,
ColPartition_C_IT > | ColPartitionGridSearch |
typedef GenericVector
< ColPartitionSet * > | PartSetVector |
typedef TessResultCallback1
< bool, int > | WidthCallback |
typedef BBGrid< ColSegment,
ColSegment_CLIST,
ColSegment_C_IT > | ColSegmentGrid |
typedef GridSearch< ColSegment,
ColSegment_CLIST,
ColSegment_C_IT > | ColSegmentGridSearch |
typedef BBGrid< BLOBNBOX,
BLOBNBOX_CLIST, BLOBNBOX_C_IT > | BlobGrid |
typedef GridSearch< BLOBNBOX,
BLOBNBOX_CLIST, BLOBNBOX_C_IT > | BlobGridSearch |
typedef unsigned char | LanguageModelFlagsType |
typedef struct tesseract::_MATCH_ | MATCH |
Enumerations |
enum | CMD_EVENTS { ACTION_1_CMD_EVENT,
RECOG_WERDS,
RECOG_PSEUDO,
ACTION_2_CMD_EVENT
} |
enum | ScriptPos { SP_NORMAL,
SP_SUBSCRIPT,
SP_SUPERSCRIPT,
SP_DROPCAP
} |
enum | Orientation { ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3
} |
enum | WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2
} |
enum | TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2
} |
enum | PageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_COUNT
} |
enum | PageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} |
enum | OcrEngineMode { OEM_TESSERACT_ONLY,
OEM_CUBE_ONLY,
OEM_TESSERACT_CUBE_COMBINED,
OEM_DEFAULT
} |
enum | AmbigType {
NOT_AMBIG,
REPLACE_AMBIG,
DEFINITE_AMBIG,
SIMILAR_AMBIG,
CASE_AMBIG,
AMBIG_TYPE_COUNT
} |
enum | TessdataType {
TESSDATA_LANG_CONFIG,
TESSDATA_UNICHARSET,
TESSDATA_AMBIGS,
TESSDATA_INTTEMP,
TESSDATA_PFFMTABLE,
TESSDATA_NORMPROTO,
TESSDATA_PUNC_DAWG,
TESSDATA_SYSTEM_DAWG,
TESSDATA_NUMBER_DAWG,
TESSDATA_FREQ_DAWG,
TESSDATA_FIXED_LENGTH_DAWGS,
TESSDATA_CUBE_UNICHARSET,
TESSDATA_CUBE_SYSTEM_DAWG,
TESSDATA_NUM_ENTRIES
} |
enum | CharSegmentationType { CST_FRAGMENT,
CST_WHOLE,
CST_IMPROPER,
CST_NGRAM
} |
enum | DawgType {
DAWG_TYPE_PUNCTUATION,
DAWG_TYPE_WORD,
DAWG_TYPE_NUMBER,
DAWG_TYPE_PATTERN,
DAWG_TYPE_COUNT
} |
enum | ColumnSpanningType {
CST_NOISE,
CST_FLOWING,
CST_HEADING,
CST_PULLOUT,
CST_COUNT
} |
enum | ColSegType {
COL_UNKNOWN,
COL_TEXT,
COL_TABLE,
COL_MIXED,
COL_COUNT
} |
enum | TabAlignment {
TA_LEFT_ALIGNED,
TA_LEFT_RAGGED,
TA_CENTER_JUSTIFIED,
TA_RIGHT_ALIGNED,
TA_RIGHT_RAGGED,
TA_SEPARATOR,
TA_COUNT
} |
Functions |
int | CubeAPITest (Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res) |
TBLOB * | make_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix) |
TBOX | char_box_to_tbox (Box *char_box, TBOX word_box, int x_offset) |
bool | read_t (PAGE_RES_IT *page_res_it, TBOX *tbox) |
bool | read_b (int applybox_page, int *line_number, FILE *box_file, char *label, TBOX *bbox) |
ICOORD | ComputeEndFromGradient (const ICOORD &start, double m) |
void | OtsuThreshold (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int **thresholds, int **hi_values) |
void | HistogramRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int *histogram) |
int | OtsuStats (const int *histogram, int *H_out, int *omega0_out) |
| ELISTIZE (AmbigSpec) |
| ELISTIZEH (AmbigSpec) |
template<typename T > |
bool | cmp_eq (T const &t1, T const &t2) |
template<typename T > |
int | sort_cmp (const void *t1, const void *t2) |
template<typename T > |
int | sort_ptr_cmp (const void *t1, const void *t2) |
void | ClearCharNormArray (INT_TEMPLATES Templates, CLASS_NORMALIZATION_ARRAY CharNormArray) |
void | ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window) |
WERD_CHOICE * | get_best_delete_other (WERD_CHOICE *choice1, WERD_CHOICE *choice2) |
BLOB_CHOICE * | get_nth_choice (BLOB_CHOICE_LIST *blob_list, int n) |
UNICHAR_ID | get_top_choice_uid (BLOB_CHOICE_LIST *blob_list) |
int | find_choice_by_uid (BLOB_CHOICE_LIST *blob_list, UNICHAR_ID target_uid) |
WERD_CHOICE * | get_choice_from_posstr (const BLOB_CHOICE_LIST_VECTOR &char_choices, int start_pos, const char *pos_str, float *certainties) |
void | get_posstr_from_choice (const BLOB_CHOICE_LIST_VECTOR &char_choices, WERD_CHOICE *word_choice, int start_pos, char *pos_str) |
BLOB_CHOICE * | find_choice_by_type (BLOB_CHOICE_LIST *blob_choices, char target_type, const UNICHARSET &unicharset) |
BLOB_CHOICE * | find_choice_by_script (BLOB_CHOICE_LIST *blob_choices, int target_sid, int backup_sid, int secondary_sid) |
Pix * | GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) |
template<class BBC > |
int | SortByBoxLeft (const void *void1, const void *void2) |
template<class BBC > |
int | SortByBoxBottom (const void *void1, const void *void2) |
template<typename T > |
void | DeleteObject (T *object) |
| ELISTIZE (ViterbiStateEntry) |
| ELISTIZEH (ViterbiStateEntry) |
Variables |
const int | kMinRectSize = 10 |
const char | kTesseractReject = '~' |
const char | kUNLVReject = '~' |
const char | kUNLVSuspect = '^' |
const char * | kInputFile = "noname.tif" |
const char * | kOldVarsFile = "failed_vars.txt" |
const int | kMaxIntSize = 22 |
const int | kNumbersPerBlob = 5 |
const int | kBytesPerNumber = 5 |
const int | kBytesPerBlob = kNumbersPerBlob * (kBytesPerNumber + 1) + 1 |
const int | kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1 |
const int | kBytesPer64BitNumber = 20 |
const int | kMaxBytesPerLine |
const int | kUniChs [] |
const int | kLatinChs [] |
const int | kMaxCharTopRange = 48 |
const int | kMinCredibleResolution = 70 |
| Minimum believable resolution.
|
const int | kDefaultResolution = 300 |
| Default resolution used if input in not believable.
|
const int | kMaxCircleErosions = 8 |
const inT16 | kMaxBoxEdgeDiff = 2 |
const int | kBoxClipTolerance = 2 |
const int | kMinSubscriptOffset = 20 |
const int | kMinSuperscriptOffset = 20 |
const int | kMaxDropCapBottom = -128 |
const int | kNumEndPoints = 3 |
const int | kHistogramSize = 256 |
CCUtilMutex | tprintfMutex |
const int | kStateCnt = 4 |
const int | kNumLiteralCnt = 5 |
const int | case_state_table [6][4] |
const double | kAlignedFraction = 0.03125 |
const double | kRaggedFraction = 0.5 |
const double | kAlignedGapFraction = 0.75 |
const double | kRaggedGapFraction = 3.0 |
const int | kVLineAlignment = 3 |
const int | kVLineGutter = 1 |
const int | kVLineSearchSize = 150 |
const int | kMinRaggedTabs = 5 |
const int | kMinAlignedTabs = 4 |
const int | kVLineMinLength = 500 |
const double | kMinTabGradient = 4.0 |
const int | kMaxSkewFactor = 15 |
const char * | kTextordDebugPix = "psdebug_pix" |
const int | kMinColumnWidth = 100 |
const int | kMaxIncompatibleColumnCount = 2 |
const double | kMarginOverlapFraction = 0.25 |
const double | kHorizontalGapMergeFraction = 0.5 |
const double | kMinNonNoiseFraction = 0.5 |
const double | kMinGutterWidthGrid = 0.5 |
const int | kSmallBlobSearchRadius = 2 |
bool | textord_tabfind_show_initial_partitions = false |
int | textord_tabfind_show_partitions = 0 |
bool | textord_tabfind_show_columns = false |
bool | textord_tabfind_show_blocks = false |
bool | textord_tabfind_find_tables = false |
const int | kMaxPartnerDepth = 4 |
const double | kMaxSpacingDrift = 1.0 / 72 |
const double | kMaxTopSpacingFraction = 0.25 |
const double | kMaxSizeRatio = 1.5 |
const double | kMaxLeaderGapFractionOfMax = 0.25 |
const double | kMaxLeaderGapFractionOfMin = 0.5 |
const int | kMinLeaderCount = 5 |
const int | kLeaderCutCost = 8 |
const int | kRGBRMSColors = 4 |
bool | textord_tabfind_show_color_fit = false |
const int | kMaxCaptionLines = 7 |
const double | kMinCaptionGapRatio = 2.0 |
const double | kMinCaptionGapHeightRatio = 0.5 |
const double | kTinyEnoughTextlineOverlapFraction = 0.25 |
const double | kMaxPartitionSpacing = 1.75 |
const double | kMinRectangularFraction = 0.125 |
const double | kMaxRectangularFraction = 0.75 |
const double | kMaxRectangularGradient = 0.1 |
const int | kMinImageFindSize = 100 |
const int | kThinLineFraction = 30 |
| Denominator of resolution makes max pixel width to allow thin lines.
|
const int | kMinLineLengthFraction = 8 |
| Denominator of resolution makes min pixels to demand line lengths to be.
|
const int | kCrackSpacing = 100 |
| Spacing of cracks across the page to break up tall vertical lines.
|
const int | kLineFindGridSize = 50 |
| Grid size used by line finder. Not very critical.
|
int | textord_tabfind_show_strokewidths = 0 |
bool | textord_tabfind_only_strokewidths = false |
double | textord_strokewidth_minsize = 0.25 |
double | textord_strokewidth_maxsize = 4.0 |
bool | textord_tabfind_vertical_text = true |
bool | textord_tabfind_force_vertical_text = false |
bool | textord_tabfind_vertical_horizontal_mix = true |
double | textord_tabfind_vertical_text_ratio = 0.5 |
const double | kStrokeWidthFractionTolerance = 0.125 |
const double | kStrokeWidthTolerance = 1.5 |
const double | kStrokeWidthFractionCJK = 0.25 |
const double | kStrokeWidthCJK = 2.0 |
const int | kCJKRadius = 2 |
const double | kCJKBrokenDistanceFraction = 0.25 |
const int | kCJKMaxComponents = 8 |
const double | kCJKAspectRatio = 1.25 |
const double | kCJKAspectRatioIncrease = 1.0625 |
const int | kMaxCJKSizeRatio = 5 |
const int | kMinDiacriticSizeRatio = 2 |
const int | kSearchRadius = 2 |
const int | kLineTrapLongest = 4 |
const int | kLineTrapShortest = 2 |
const int | kMostlyOneDirRatio = 3 |
const double | kMaxSmallNeighboursPerPix = 3.0 / 128 |
const float | kSizeRatioToReject = 2.0 |
const double | kMaxTextSize = 2.0 |
const int | kTabRadiusFactor = 5 |
const int | kMinVerticalSearch = 3 |
const int | kMaxVerticalSearch = 12 |
const int | kMaxRaggedSearch = 25 |
const int | kMinLinesInColumn = 10 |
const double | kMinFractionalLinesInColumn = 0.125 |
const double | kMinGutterWidthAbsolute = 0.02 |
const double | kMaxGutterWidthAbsolute = 2.00 |
const double | kLineFragmentAspectRatio = 10.0 |
const double | kSmoothFactor = 0.25 |
const double | kMinBaselineCoverage = 0.5 |
const double | kCharVerticalOverlapFraction = 0.375 |
const double | kMaxHorizontalGap = 3.0 |
const double | kMaxBaselineError = 0.4375 |
const int | kMinEvaluatedTabs = 3 |
const int | kMaxTextLineBlobRatio = 5 |
const int | kMinTextLineBlobRatio = 3 |
const double | kMinImageArea = 0.5 |
const double | kCosMaxSkewAngle = 0.866025 |
bool | textord_tabfind_show_initialtabs = false |
bool | textord_tabfind_show_finaltabs = false |
double | textord_tabfind_aligned_gap_fraction = 0.75 |
const int | kColumnWidthFactor = 20 |
const int | kMaxVerticalSpacing = 500 |
const int | kMaxBlobWidth = 500 |
const double | kSplitPartitionSize = 2.0 |
const double | kAllowTextHeight = 0.5 |
const double | kAllowTextWidth = 0.6 |
const double | kAllowTextArea = 0.8 |
const double | kAllowBlobHeight = 0.3 |
const double | kAllowBlobWidth = 0.4 |
const double | kAllowBlobArea = 0.05 |
const int | kMinBoxesInTextPartition = 10 |
const int | kMaxBoxesInDataPartition = 20 |
const double | kMaxGapInTextPartition = 4.0 |
const double | kMinMaxGapInTextPartition = 0.5 |
const double | kMaxBlobOverlapFactor = 4.0 |
const double | kMaxTableCellXheight = 2.0 |
const int | kMaxColumnHeaderDistance = 4 |
const double | kTableColumnThreshold = 3.0 |
const int | kRulingVerticalMargin = 3 |
const double | kMinOverlapWithTable = 0.6 |
const int | kSideSpaceMargin = 10 |
const double | kSmallTableProjectionThreshold = 0.35 |
const double | kLargeTableProjectionThreshold = 0.45 |
const int | kLargeTableRowCount = 6 |
const int | kMinRowsInTable = 3 |
const double | kRequiredFullJustifiedSpacing = 4.0 |
const int | kAdjacentLeaderSearchPadding = 2 |
const double | kParagraphEndingPreviousLineRatio = 1.3 |
const double | kMaxParagraphEndingLeftSpaceMultiple = 3.0 |
const double | kMinParagraphEndingTextToWhitespaceRatio = 3.0 |
const double | kMaxXProjectionGapFactor = 2.0 |
const double | kStrokeWidthFractionalTolerance = 0.25 |
const double | kStrokeWidthConstantTolerance = 2.0 |
bool | textord_dump_table_images = false |
bool | textord_show_tables = false |
bool | textord_tablefind_show_mark = false |
bool | textord_tablefind_show_stats = false |
bool | textord_tablefind_recognize_tables = false |
const double | kHorizontalSpacing = 0.30 |
const double | kVerticalSpacing = -0.2 |
const int | kCellSplitRowThreshold = 0 |
const int | kCellSplitColumnThreshold = 0 |
const int | kLinedTableMinVerticalLines = 3 |
const int | kLinedTableMinHorizontalLines = 3 |
const double | kRequiredColumns = 0.7 |
const double | kMarginFactor = 1.1 |
const double | kMaxRowSize = 2.5 |
const double | kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 } |
const int | kGoodRowNumberOfColumnsSmallSize |
const double | kGoodRowNumberOfColumnsLarge = 0.7 |
const double | kMinFilledArea = 0.35 |
const int | kGutterMultiple = 4 |
const int | kGutterToNeighbourRatio = 3 |
const int | kSimilarVectorDist = 10 |
const int | kSimilarRaggedDist = 50 |
const int | kMaxFillinMultiple = 11 |
const double | kMinGutterFraction = 0.5 |
const double | kLineCountReciprocal = 4.0 |
const double | kMinAlignedGutter = 0.25 |
const double | kMinRaggedGutter = 2.0 |
double | textord_tabvector_vertical_gap_fraction = 0.5 |
double | textord_tabvector_vertical_box_ratio = 0.5 |
const char * | kAlignmentNames [] |
recog_pseudo_word
Make a word from the selected blobs and run Tess on them.
- Parameters:
-
page_res | recognise blobs |
selection_box | within this box |
fp_eval_word_spacing() Evaluation function for fixed pitch word lists.
Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars
process_selected_words()
Walk the current block list applying the specified word processor function to each word that overlaps the selection_box.
build_menu()
Construct the menu tree used by the command window
process_cmd_win_event()
Process a command returned from the command window (Just call the appropriate command handler)
word_blank_and_set_display() Word processor
Blank display of word then redisplay word according to current display mode settings
---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------
---------------------------------------------------------------------------- Include Files and Type Defines ---------------------------------------------------------------------------- ---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------