00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00019
00020 #ifndef TESSERACT_CCMAIN_BASEAPI_H__
00021 #define TESSERACT_CCMAIN_BASEAPI_H__
00022
00023 #include "thresholder.h"
00024
00025 class PAGE_RES;
00026 class PAGE_RES_IT;
00027 class BLOCK_LIST;
00028 class IMAGE;
00029 class STRING;
00030 struct Pix;
00031 struct Box;
00032 struct Pixa;
00033 struct Boxa;
00034 struct ETEXT_STRUCT;
00035 struct OSResults;
00036 struct TBOX;
00037
00038 #define MAX_NUM_INT_FEATURES 512
00039 struct INT_FEATURE_STRUCT;
00040 typedef INT_FEATURE_STRUCT *INT_FEATURE;
00041 typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES];
00042
00043 #ifdef TESSDLL_EXPORTS
00044 #define TESSDLL_API __declspec(dllexport)
00045 #elif defined(TESSDLL_IMPORTS)
00046 #define TESSDLL_API __declspec(dllimport)
00047 #else
00048 #define TESSDLL_API
00049 #endif
00050
00051
00052 namespace tesseract {
00053
00054 class Dict;
00055 class Tesseract;
00056 class Trie;
00057 class CubeRecoContext;
00058 class TesseractCubeCombiner;
00059 class CubeObject;
00060 class CubeLineObject;
00061 class Dawg;
00062
00063 typedef int (Dict::*DictFunc)(void* void_dawg_args, int char_index,
00064 const void *word, bool word_end);
00065
00066 enum PageSegMode {
00067 PSM_AUTO,
00068 PSM_SINGLE_COLUMN,
00069 PSM_SINGLE_BLOCK,
00070 PSM_SINGLE_LINE,
00071 PSM_SINGLE_WORD,
00072 PSM_SINGLE_CHAR,
00073
00074 PSM_COUNT
00075 };
00076
00081 enum AccuracyVSpeed {
00082 AVS_FASTEST = 0,
00083 AVS_MOST_ACCURATE = 100
00084 };
00085
00094 class TESSDLL_API TessBaseAPI {
00095 public:
00096 TessBaseAPI();
00097 virtual ~TessBaseAPI();
00098
00103 void SetInputName(const char* name);
00104
00106 void SetOutputName(const char* name);
00107
00118 bool SetVariable(const char* variable, const char* value);
00119
00146 int Init(const char* datapath, const char* language,
00147 char **configs, int configs_size, bool configs_global_only);
00148 int Init(const char* datapath, const char* language) {
00149 return Init(datapath, language, 0, 0, false);
00150 }
00151
00158 int InitLangMod(const char* datapath, const char* language);
00159
00164 int InitWithoutLangModel(const char* datapath, const char* language);
00165
00171 void ReadConfigFile(const char* filename, bool global_only);
00172
00178 void SetPageSegMode(PageSegMode mode);
00179
00181 PageSegMode GetPageSegMode() const;
00182
00194 void SetAccuracyVSpeed(AccuracyVSpeed mode);
00195
00213 char* TesseractRect(const unsigned char* imagedata,
00214 int bytes_per_pixel, int bytes_per_line,
00215 int left, int top, int width, int height);
00216
00221 void ClearAdaptiveClassifier();
00222
00229
00230
00240 void SetImage(const unsigned char* imagedata, int width, int height,
00241 int bytes_per_pixel, int bytes_per_line);
00242
00253 void SetImage(const Pix* pix);
00254
00260 void SetRectangle(int left, int top, int width, int height);
00261
00269 void SetThresholder(ImageThresholder* thresholder) {
00270 if (thresholder_ != 0)
00271 delete thresholder_;
00272 thresholder_ = thresholder;
00273 ClearResults();
00274 }
00275
00281 Pix* GetThresholdedImage();
00282
00288 Boxa* GetRegions(Pixa** pixa);
00289
00297 Boxa* GetTextlines(Pixa** pixa, int** blockids);
00298
00304 Boxa* GetWords(Pixa** pixa);
00305
00311 void DumpPGM(const char* filename);
00312
00319 int Recognize(ETEXT_STRUCT* monitor);
00320
00327 int RecognizeForChopTest(struct ETEXT_STRUCT* monitor);
00328
00333 char* GetUTF8Text();
00340 char* GetHOCRText(int page_id);
00348 char* GetBoxText(int page_number);
00354 char* GetUNLVText();
00356 int MeanTextConf();
00363 int* AllWordConfidences();
00364
00371 void Clear();
00372
00379 void End();
00380
00387 int IsValidWord(const char *word);
00388
00389 bool GetTextDirection(int* out_offset, float* out_slope);
00390
00392 void SetDictFunc(DictFunc f);
00393
00398 bool DetectOS(OSResults*);
00399
00401 void GetFeatures(INT_FEATURE_ARRAY int_features,
00402 int* num_features);
00403
00405 const Dawg *GetDawg(int i) const;
00406
00408 int NumDawgs() const;
00409
00411 const char* GetLastInitLanguage() const;
00412
00413
00414
00415 protected:
00416
00418 bool InternalSetImage();
00419
00424 virtual void Threshold(Pix** pix);
00425
00430 int FindLines();
00431
00433 void ClearResults();
00434
00441 int TextLength(int* blob_count);
00442
00445
00447 BLOCK_LIST* FindLinesCreateBlockList();
00448
00454 static void DeleteBlockList(BLOCK_LIST* block_list);
00455
00460 void AdaptToCharacter(const char *unichar_repr,
00461 int length,
00462 float baseline,
00463 float xheight,
00464 float descender,
00465 float ascender);
00466
00468 PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
00469 PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
00470 PAGE_RES* pass1_result);
00471
00476 static int TesseractExtractResult(char** text,
00477 int** lengths,
00478 float** costs,
00479 int** x0,
00480 int** y0,
00481 int** x1,
00482 int** y1,
00483 PAGE_RES* page_res);
00484
00507 int Cube();
00509 int RunCubeOnLines();
00515 int CubePostProcessWords();
00517 CubeLineObject **CreateLineObjects(Pixa* pixa_lines);
00522 TBOX *CreatePhraseBoxes(Boxa* boxa_lines, CubeLineObject **line_objs,
00523 int *phrase_cnt);
00525 bool RecognizePhrases(int line_cnt, int phrase_cnt,
00526 CubeLineObject **line_objs, TBOX *phrase_boxes);
00528 bool RecognizePhrase(CubeObject *phrase, PAGE_RES_IT *result);
00530 bool CreateCubeObjects();
00531
00532
00533 protected:
00534 Tesseract* tesseract_;
00535 ImageThresholder* thresholder_;
00536 bool threshold_done_;
00537 BLOCK_LIST* block_list_;
00538 PAGE_RES* page_res_;
00539 STRING* input_file_;
00540 STRING* output_file_;
00541 STRING* datapath_;
00542 STRING* language_;
00543
00548
00549 int rect_left_;
00550 int rect_top_;
00551 int rect_width_;
00552 int rect_height_;
00553 int image_width_;
00554 int image_height_;
00555
00556 };
00557
00558 } * namespace tesseract.
00559
00560 #endif * TESSERACT_CCMAIN_BASEAPI_H__