Tesseract 3.01
|
00001 00002 // File: baseapi.h 00003 // Description: Simple API for calling tesseract. 00004 // Author: Ray Smith 00005 // Created: Fri Oct 06 15:35:01 PDT 2006 00006 // 00007 // (C) Copyright 2006, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_API_BASEAPI_H__ 00021 #define TESSERACT_API_BASEAPI_H__ 00022 00023 // To avoid collision with other typenames include the ABSOLUTE MINIMUM 00024 // complexity of includes here. Use forward declarations wherever possible 00025 // and hide includes of complex types in baseapi.cpp. 00026 #include "apitypes.h" 00027 #include "genericvector.h" 00028 #include "thresholder.h" 00029 #include "unichar.h" 00030 #include "tesscallback.h" 00031 00032 class PAGE_RES; 00033 class PAGE_RES_IT; 00034 class BLOCK_LIST; 00035 class DENORM; 00036 class IMAGE; 00037 class PBLOB; 00038 class ROW; 00039 class STRING; 00040 class WERD; 00041 struct Pix; 00042 struct Box; 00043 struct Pixa; 00044 struct Boxa; 00045 class ETEXT_DESC; 00046 struct OSResults; 00047 class TBOX; 00048 00049 #define MAX_NUM_INT_FEATURES 512 00050 struct INT_FEATURE_STRUCT; 00051 typedef INT_FEATURE_STRUCT *INT_FEATURE; 00052 typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; 00053 struct TBLOB; 00054 00055 #ifdef TESSDLL_EXPORTS 00056 #define TESSDLL_API __declspec(dllexport) 00057 #elif defined(TESSDLL_IMPORTS) 00058 #define TESSDLL_API __declspec(dllimport) 00059 #else 00060 #define TESSDLL_API 00061 #endif 00062 00063 00064 namespace tesseract { 00065 00066 class CubeRecoContext; 00067 class Dawg; 00068 class Dict; 00069 class PageIterator; 00070 class ResultIterator; 00071 class Tesseract; 00072 class Trie; 00073 00074 typedef int (Dict::*DictFunc)(void* void_dawg_args, 00075 UNICHAR_ID unichar_id, bool word_end); 00076 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, 00077 const char* context, 00078 int context_bytes, 00079 const char* character, 00080 int character_bytes); 00081 typedef TessCallback2<int, PAGE_RES *> TruthCallback; 00082 00091 class TESSDLL_API TessBaseAPI { 00092 public: 00093 TessBaseAPI(); 00094 virtual ~TessBaseAPI(); 00095 00099 static const char* Version(); 00100 00105 void SetInputName(const char* name); 00106 00108 void SetOutputName(const char* name); 00109 00125 bool SetVariable(const char* name, const char* value); 00126 00127 // Returns true if the parameter was found among Tesseract parameters. 00128 // Fills in value with the value of the parameter. 00129 bool GetIntVariable(const char *name, int *value) const; 00130 bool GetBoolVariable(const char *name, bool *value) const; 00131 bool GetDoubleVariable(const char *name, double *value) const; 00132 // Returns the pointer to the string that represents the value of the 00133 // parameter if it was found among Tesseract parameters. 00134 const char *GetStringVariable(const char *name) const; 00135 00136 // Print Tesseract parameters to the given file. 00137 void PrintVariables(FILE *fp) const; 00138 // Get value of named variable as a string, if it exists. 00139 bool GetVariableAsString(const char *name, STRING *val); 00140 00166 int Init(const char* datapath, const char* language, OcrEngineMode mode, 00167 char **configs, int configs_size, 00168 const GenericVector<STRING> *vars_vec, 00169 const GenericVector<STRING> *vars_values, 00170 bool set_only_init_params); 00171 int Init(const char* datapath, const char* language, OcrEngineMode oem) { 00172 return Init(datapath, language, oem, NULL, 0, NULL, NULL, false); 00173 } 00174 int Init(const char* datapath, const char* language) { 00175 return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false); 00176 } 00177 00184 int InitLangMod(const char* datapath, const char* language); 00185 00186 // Init only for page layout analysis. Use only for calls to SetImage and 00187 // AnalysePage. Calls that attempt recognition will generate an error. 00188 void InitForAnalysePage(); 00189 00199 void ReadConfigFile(const char* filename, bool init_only); 00200 00206 void SetPageSegMode(PageSegMode mode); 00207 00209 PageSegMode GetPageSegMode() const; 00210 00228 char* TesseractRect(const unsigned char* imagedata, 00229 int bytes_per_pixel, int bytes_per_line, 00230 int left, int top, int width, int height); 00231 00236 void ClearAdaptiveClassifier(); 00237 00244 /* @{ */ 00245 00255 void SetImage(const unsigned char* imagedata, int width, int height, 00256 int bytes_per_pixel, int bytes_per_line); 00257 00268 void SetImage(const Pix* pix); 00269 00275 void SetRectangle(int left, int top, int width, int height); 00276 00284 void SetThresholder(ImageThresholder* thresholder) { 00285 if (thresholder_ != NULL) 00286 delete thresholder_; 00287 thresholder_ = thresholder; 00288 ClearResults(); 00289 } 00290 00296 Pix* GetThresholdedImage(); 00297 00303 Boxa* GetRegions(Pixa** pixa); 00304 00312 Boxa* GetTextlines(Pixa** pixa, int** blockids); 00313 00319 Boxa* GetWords(Pixa** pixa); 00320 00321 // Gets the individual connected (text) components (created 00322 // after pages segmentation step, but before recognition) 00323 // as a leptonica-style Boxa, Pixa pair, in reading order. 00324 // Can be called before or after Recognize. 00325 // Note: the caller is responsible for calling boxaDestroy() 00326 // on the returned Boxa array and pixaDestroy() on cc array. 00327 Boxa* GetConnectedComponents(Pixa** cc); 00328 00329 // Get the given level kind of components (block, textline, word etc.) as a 00330 // leptonica-style Boxa, Pixa pair, in reading order. 00331 // Can be called before or after Recognize. 00332 // If blockids is not NULL, the block-id of each component is also returned 00333 // as an array of one element per component. delete [] after use. 00334 Boxa* GetComponentImages(PageIteratorLevel level, 00335 Pixa** pixa, int** blockids); 00336 00342 void DumpPGM(const char* filename); 00343 00344 // Runs page layout analysis in the mode set by SetPageSegMode. 00345 // May optionally be called prior to Recognize to get access to just 00346 // the page layout results. Returns an iterator to the results. 00347 // Returns NULL on error. 00348 // The returned iterator must be deleted after use. 00349 // WARNING! This class points to data held within the TessBaseAPI class, and 00350 // therefore can only be used while the TessBaseAPI class still exists and 00351 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End 00352 // DetectOS, or anything else that changes the internal PAGE_RES. 00353 PageIterator* AnalyseLayout(); 00354 00361 int Recognize(ETEXT_DESC* monitor); 00362 00369 int RecognizeForChopTest(ETEXT_DESC* monitor); 00370 00387 bool ProcessPages(const char* filename, 00388 const char* retry_config, int timeout_millisec, 00389 STRING* text_out); 00390 00402 bool ProcessPage(Pix* pix, int page_index, const char* filename, 00403 const char* retry_config, int timeout_millisec, 00404 STRING* text_out); 00405 00406 // Get an iterator to the results of LayoutAnalysis and/or Recognize. 00407 // The returned iterator must be deleted after use. 00408 // WARNING! This class points to data held within the TessBaseAPI class, and 00409 // therefore can only be used while the TessBaseAPI class still exists and 00410 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End 00411 // DetectOS, or anything else that changes the internal PAGE_RES. 00412 ResultIterator* GetIterator(); 00413 00418 char* GetUTF8Text(); 00424 char* GetHOCRText(int page_number); 00432 char* GetBoxText(int page_number); 00438 char* GetUNLVText(); 00440 int MeanTextConf(); 00447 int* AllWordConfidences(); 00448 00459 bool AdaptToWordStr(PageSegMode mode, const char* wordstr); 00460 00467 void Clear(); 00468 00475 void End(); 00476 00483 int IsValidWord(const char *word); 00484 00485 bool GetTextDirection(int* out_offset, float* out_slope); 00486 00488 void SetDictFunc(DictFunc f); 00489 00493 void SetProbabilityInContextFunc(ProbabilityInContextFunc f); 00494 00499 bool DetectOS(OSResults*); 00500 00502 void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm, 00503 INT_FEATURE_ARRAY int_features, 00504 int* num_features, int* FeatureOutlineIndex); 00505 00506 // This method returns the row to which a box of specified dimensions would 00507 // belong. If no good match is found, it returns NULL. 00508 static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, 00509 int right, int bottom); 00510 00511 // Method to run adaptive classifier on a blob. 00512 // It returns at max num_max_matches results. 00513 void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm, 00514 int num_max_matches, 00515 int* unichar_ids, 00516 float* ratings, 00517 int* num_matches_returned); 00518 00519 // This method returns the string form of the specified unichar. 00520 const char* GetUnichar(int unichar_id); 00521 00523 const Dawg *GetDawg(int i) const; 00524 00526 int NumDawgs() const; 00527 00529 const char* GetLastInitLanguage() const; 00530 00531 // Returns a ROW object created from the input row specification. 00532 static ROW *MakeTessOCRRow(float baseline, float xheight, 00533 float descender, float ascender); 00534 00535 // Returns a TBLOB corresponding to the entire input image. 00536 static TBLOB *MakeTBLOB(Pix *pix); 00537 00538 // This method baseline normalizes a TBLOB in-place. The input row is used 00539 // for normalization. The denorm is an optional parameter in which the 00540 // normalization-antidote is returned. 00541 static void NormalizeTBLOB(TBLOB *tblob, ROW *row, 00542 bool numeric_mode, DENORM *denorm); 00543 00544 Tesseract* const tesseract() const { 00545 return tesseract_; 00546 } 00547 00548 void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } 00549 00550 // Return a pointer to underlying CubeRecoContext object if present. 00551 CubeRecoContext *GetCubeRecoContext() const; 00552 00553 void set_min_orientation_margin(double margin); 00554 00555 // Return text orientation of each block as determined by an earlier run 00556 // of layout analysis. 00557 void GetBlockTextOrientations(int** block_orientation, 00558 bool** vertical_writing); 00559 00561 BLOCK_LIST* FindLinesCreateBlockList(); 00562 00568 static void DeleteBlockList(BLOCK_LIST* block_list); 00569 /* @} */ 00570 00571 protected: 00572 00574 bool InternalSetImage(); 00575 00580 virtual void Threshold(Pix** pix); 00581 00586 int FindLines(); 00587 00589 void ClearResults(); 00590 00597 int TextLength(int* blob_count); 00598 00600 /* @{ */ 00601 00606 void AdaptToCharacter(const char *unichar_repr, 00607 int length, 00608 float baseline, 00609 float xheight, 00610 float descender, 00611 float ascender); 00612 00614 PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); 00615 PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result); 00616 00621 static int TesseractExtractResult(char** text, 00622 int** lengths, 00623 float** costs, 00624 int** x0, 00625 int** y0, 00626 int** x1, 00627 int** y1, 00628 PAGE_RES* page_res); 00629 00630 const PAGE_RES* GetPageRes() const { 00631 return page_res_; 00632 }; 00633 00634 protected: 00635 Tesseract* tesseract_; 00636 Tesseract* osd_tesseract_; 00637 ImageThresholder* thresholder_; 00638 BLOCK_LIST* block_list_; 00639 PAGE_RES* page_res_; 00640 STRING* input_file_; 00641 STRING* output_file_; 00642 STRING* datapath_; 00643 STRING* language_; 00644 OcrEngineMode last_oem_requested_; 00645 bool recognition_done_; 00646 TruthCallback *truth_cb_; 00647 00652 /* @{ */ 00653 int rect_left_; 00654 int rect_top_; 00655 int rect_width_; 00656 int rect_height_; 00657 int image_width_; 00658 int image_height_; 00659 /* @} */ 00660 }; 00661 00662 } // namespace tesseract. 00663 00664 #endif // TESSERACT_API_BASEAPI_H__