Tesseract 3.01
/data/source/tesseract-ocr/api/baseapi.h
Go to the documentation of this file.
00001 
00002 // File:        baseapi.h
00003 // Description: Simple API for calling tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Fri Oct 06 15:35:01 PDT 2006
00006 //
00007 // (C) Copyright 2006, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_API_BASEAPI_H__
00021 #define TESSERACT_API_BASEAPI_H__
00022 
00023 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
00024 // complexity of includes here. Use forward declarations wherever possible
00025 // and hide includes of complex types in baseapi.cpp.
00026 #include "apitypes.h"
00027 #include "genericvector.h"
00028 #include "thresholder.h"
00029 #include "unichar.h"
00030 #include "tesscallback.h"
00031 
00032 class PAGE_RES;
00033 class PAGE_RES_IT;
00034 class BLOCK_LIST;
00035 class DENORM;
00036 class IMAGE;
00037 class PBLOB;
00038 class ROW;
00039 class STRING;
00040 class WERD;
00041 struct Pix;
00042 struct Box;
00043 struct Pixa;
00044 struct Boxa;
00045 class ETEXT_DESC;
00046 struct OSResults;
00047 class TBOX;
00048 
00049 #define MAX_NUM_INT_FEATURES 512
00050 struct INT_FEATURE_STRUCT;
00051 typedef INT_FEATURE_STRUCT *INT_FEATURE;
00052 typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES];
00053 struct TBLOB;
00054 
00055 #ifdef TESSDLL_EXPORTS
00056 #define TESSDLL_API __declspec(dllexport)
00057 #elif defined(TESSDLL_IMPORTS)
00058 #define TESSDLL_API __declspec(dllimport)
00059 #else
00060 #define TESSDLL_API
00061 #endif
00062 
00063 
00064 namespace tesseract {
00065 
00066 class CubeRecoContext;
00067 class Dawg;
00068 class Dict;
00069 class PageIterator;
00070 class ResultIterator;
00071 class Tesseract;
00072 class Trie;
00073 
00074 typedef int (Dict::*DictFunc)(void* void_dawg_args,
00075                               UNICHAR_ID unichar_id, bool word_end);
00076 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
00077                                                  const char* context,
00078                                                  int context_bytes,
00079                                                  const char* character,
00080                                                  int character_bytes);
00081 typedef TessCallback2<int, PAGE_RES *> TruthCallback;
00082 
00091 class TESSDLL_API TessBaseAPI {
00092  public:
00093   TessBaseAPI();
00094   virtual ~TessBaseAPI();
00095 
00099   static const char* Version();
00100 
00105   void SetInputName(const char* name);
00106 
00108   void SetOutputName(const char* name);
00109 
00125   bool SetVariable(const char* name, const char* value);
00126 
00127   // Returns true if the parameter was found among Tesseract parameters.
00128   // Fills in value with the value of the parameter.
00129   bool GetIntVariable(const char *name, int *value) const;
00130   bool GetBoolVariable(const char *name, bool *value) const;
00131   bool GetDoubleVariable(const char *name, double *value) const;
00132   // Returns the pointer to the string that represents the value of the
00133   // parameter if it was found among Tesseract parameters.
00134   const char *GetStringVariable(const char *name) const;
00135 
00136   // Print Tesseract parameters to the given file.
00137   void PrintVariables(FILE *fp) const;
00138   // Get value of named variable as a string, if it exists.
00139   bool GetVariableAsString(const char *name, STRING *val);
00140 
00166   int Init(const char* datapath, const char* language, OcrEngineMode mode,
00167            char **configs, int configs_size,
00168            const GenericVector<STRING> *vars_vec,
00169            const GenericVector<STRING> *vars_values,
00170            bool set_only_init_params);
00171   int Init(const char* datapath, const char* language, OcrEngineMode oem) {
00172     return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
00173   }
00174   int Init(const char* datapath, const char* language) {
00175     return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
00176   }
00177 
00184   int InitLangMod(const char* datapath, const char* language);
00185 
00186   // Init only for page layout analysis. Use only for calls to SetImage and
00187   // AnalysePage. Calls that attempt recognition will generate an error.
00188   void InitForAnalysePage();
00189 
00199   void ReadConfigFile(const char* filename, bool init_only);
00200 
00206   void SetPageSegMode(PageSegMode mode);
00207 
00209   PageSegMode GetPageSegMode() const;
00210 
00228   char* TesseractRect(const unsigned char* imagedata,
00229                       int bytes_per_pixel, int bytes_per_line,
00230                       int left, int top, int width, int height);
00231 
00236   void ClearAdaptiveClassifier();
00237 
00244    /* @{ */
00245 
00255   void SetImage(const unsigned char* imagedata, int width, int height,
00256                 int bytes_per_pixel, int bytes_per_line);
00257 
00268   void SetImage(const Pix* pix);
00269 
00275   void SetRectangle(int left, int top, int width, int height);
00276 
00284   void SetThresholder(ImageThresholder* thresholder) {
00285     if (thresholder_ != NULL)
00286       delete thresholder_;
00287     thresholder_ = thresholder;
00288     ClearResults();
00289   }
00290 
00296   Pix* GetThresholdedImage();
00297 
00303   Boxa* GetRegions(Pixa** pixa);
00304 
00312   Boxa* GetTextlines(Pixa** pixa, int** blockids);
00313 
00319   Boxa* GetWords(Pixa** pixa);
00320 
00321   // Gets the individual connected (text) components (created
00322   // after pages segmentation step, but before recognition)
00323   // as a leptonica-style Boxa, Pixa pair, in reading order.
00324   // Can be called before or after Recognize.
00325   // Note: the caller is responsible for calling boxaDestroy()
00326   // on the returned Boxa array and pixaDestroy() on cc array.
00327   Boxa* GetConnectedComponents(Pixa** cc);
00328 
00329   // Get the given level kind of components (block, textline, word etc.) as a
00330   // leptonica-style Boxa, Pixa pair, in reading order.
00331   // Can be called before or after Recognize.
00332   // If blockids is not NULL, the block-id of each component is also returned
00333   // as an array of one element per component. delete [] after use.
00334   Boxa* GetComponentImages(PageIteratorLevel level,
00335                            Pixa** pixa, int** blockids);
00336 
00342   void DumpPGM(const char* filename);
00343 
00344   // Runs page layout analysis in the mode set by SetPageSegMode.
00345   // May optionally be called prior to Recognize to get access to just
00346   // the page layout results. Returns an iterator to the results.
00347   // Returns NULL on error.
00348   // The returned iterator must be deleted after use.
00349   // WARNING! This class points to data held within the TessBaseAPI class, and
00350   // therefore can only be used while the TessBaseAPI class still exists and
00351   // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
00352   // DetectOS, or anything else that changes the internal PAGE_RES.
00353   PageIterator* AnalyseLayout();
00354 
00361   int Recognize(ETEXT_DESC* monitor);
00362 
00369   int RecognizeForChopTest(ETEXT_DESC* monitor);
00370 
00387   bool ProcessPages(const char* filename,
00388                     const char* retry_config, int timeout_millisec,
00389                     STRING* text_out);
00390 
00402   bool ProcessPage(Pix* pix, int page_index, const char* filename,
00403                    const char* retry_config, int timeout_millisec,
00404                    STRING* text_out);
00405 
00406   // Get an iterator to the results of LayoutAnalysis and/or Recognize.
00407   // The returned iterator must be deleted after use.
00408   // WARNING! This class points to data held within the TessBaseAPI class, and
00409   // therefore can only be used while the TessBaseAPI class still exists and
00410   // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
00411   // DetectOS, or anything else that changes the internal PAGE_RES.
00412   ResultIterator* GetIterator();
00413 
00418   char* GetUTF8Text();
00424   char* GetHOCRText(int page_number);
00432   char* GetBoxText(int page_number);
00438   char* GetUNLVText();
00440   int MeanTextConf();
00447   int* AllWordConfidences();
00448 
00459   bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
00460 
00467   void Clear();
00468 
00475   void End();
00476 
00483   int IsValidWord(const char *word);
00484 
00485   bool GetTextDirection(int* out_offset, float* out_slope);
00486 
00488   void SetDictFunc(DictFunc f);
00489 
00493   void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
00494 
00499   bool DetectOS(OSResults*);
00500 
00502   void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm,
00503                           INT_FEATURE_ARRAY int_features,
00504                           int* num_features, int* FeatureOutlineIndex);
00505 
00506   // This method returns the row to which a box of specified dimensions would
00507   // belong. If no good match is found, it returns NULL.
00508   static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
00509                             int right, int bottom);
00510 
00511   // Method to run adaptive classifier on a blob.
00512   // It returns at max num_max_matches results.
00513   void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
00514                              int num_max_matches,
00515                              int* unichar_ids,
00516                              float* ratings,
00517                              int* num_matches_returned);
00518 
00519   // This method returns the string form of the specified unichar.
00520   const char* GetUnichar(int unichar_id);
00521 
00523   const Dawg *GetDawg(int i) const;
00524 
00526   int NumDawgs() const;
00527 
00529   const char* GetLastInitLanguage() const;
00530 
00531   // Returns a ROW object created from the input row specification.
00532   static ROW *MakeTessOCRRow(float baseline, float xheight,
00533                              float descender, float ascender);
00534 
00535   // Returns a TBLOB corresponding to the entire input image.
00536   static TBLOB *MakeTBLOB(Pix *pix);
00537 
00538   // This method baseline normalizes a TBLOB in-place. The input row is used
00539   // for normalization. The denorm is an optional parameter in which the
00540   // normalization-antidote is returned.
00541   static void NormalizeTBLOB(TBLOB *tblob, ROW *row,
00542                              bool numeric_mode, DENORM *denorm);
00543 
00544   Tesseract* const tesseract() const {
00545     return tesseract_;
00546   }
00547 
00548   void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
00549 
00550   // Return a pointer to underlying CubeRecoContext object if present.
00551   CubeRecoContext *GetCubeRecoContext() const;
00552 
00553   void set_min_orientation_margin(double margin);
00554 
00555   // Return text orientation of each block as determined by an earlier run
00556   // of layout analysis.
00557   void GetBlockTextOrientations(int** block_orientation,
00558                                 bool** vertical_writing);
00559 
00561   BLOCK_LIST* FindLinesCreateBlockList();
00562 
00568   static void DeleteBlockList(BLOCK_LIST* block_list);
00569  /* @} */
00570 
00571  protected:
00572 
00574   bool InternalSetImage();
00575 
00580   virtual void Threshold(Pix** pix);
00581 
00586   int FindLines();
00587 
00589   void ClearResults();
00590 
00597   int TextLength(int* blob_count);
00598 
00600   /* @{ */
00601 
00606   void AdaptToCharacter(const char *unichar_repr,
00607                         int length,
00608                         float baseline,
00609                         float xheight,
00610                         float descender,
00611                         float ascender);
00612 
00614   PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
00615   PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result);
00616 
00621   static int TesseractExtractResult(char** text,
00622                                     int** lengths,
00623                                     float** costs,
00624                                     int** x0,
00625                                     int** y0,
00626                                     int** x1,
00627                                     int** y1,
00628                                     PAGE_RES* page_res);
00629 
00630   const PAGE_RES* GetPageRes() const {
00631     return page_res_;
00632   };
00633 
00634  protected:
00635   Tesseract*        tesseract_;       
00636   Tesseract*        osd_tesseract_;   
00637   ImageThresholder* thresholder_;     
00638   BLOCK_LIST*       block_list_;      
00639   PAGE_RES*         page_res_;        
00640   STRING*           input_file_;      
00641   STRING*           output_file_;     
00642   STRING*           datapath_;        
00643   STRING*           language_;        
00644   OcrEngineMode last_oem_requested_;  
00645   bool          recognition_done_;   
00646   TruthCallback *truth_cb_;           
00647 
00652   /* @{ */
00653   int rect_left_;
00654   int rect_top_;
00655   int rect_width_;
00656   int rect_height_;
00657   int image_width_;
00658   int image_height_;
00659   /* @} */
00660 };
00661 
00662 }  // namespace tesseract.
00663 
00664 #endif  // TESSERACT_API_BASEAPI_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines