Tesseract 3.01
/data/source/tesseract-ocr/ccmain/cube_reco_context.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_reco_context.h
00003  * Description: Declaration of the Cube Recognition Context Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
00021 // (or a thread) would create one CubeRecoContext object per language.
00022 // The CubeRecoContext object also provides methods to get and set the
00023 // different attribues of the Cube OCR Engine.
00024 
00025 #ifndef CUBE_RECO_CONTEXT_H
00026 #define CUBE_RECO_CONTEXT_H
00027 
00028 #include <string>
00029 #include "neural_net.h"
00030 #include "lang_model.h"
00031 #include "classifier_base.h"
00032 #include "feature_base.h"
00033 #include "char_set.h"
00034 #include "word_size_model.h"
00035 #include "char_bigrams.h"
00036 #include "word_unigrams.h"
00037 
00038 namespace tesseract {
00039 
00040 class Tesseract;
00041 class TessdataManager;
00042 
00043 class CubeRecoContext {
00044  public:
00045   // Reading order enum type
00046   enum ReadOrder {
00047    L2R,
00048    R2L
00049   };
00050 
00051   // Instantiate using a Tesseract object
00052   CubeRecoContext(Tesseract *tess_obj);
00053 
00054   ~CubeRecoContext();
00055 
00056   // accessor functions
00057   inline const string & Lang() const { return lang_; }
00058   inline CharSet *CharacterSet() const { return char_set_; }
00059   inline CharClassifier *Classifier() const { return char_classifier_; }
00060   inline WordSizeModel *SizeModel() const { return word_size_model_; }
00061   inline CharBigrams *Bigrams() const { return char_bigrams_; }
00062   inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
00063   inline TuningParams *Params() const { return params_; }
00064   inline LangModel *LangMod() const { return lang_mod_; }
00065 
00066   // the reading order of the language
00067   inline ReadOrder ReadingOrder() const {
00068     return ((lang_ == "ara") ? R2L : L2R);
00069   }
00070 
00071   // does the language support case
00072   inline bool HasCase() const {
00073     return (lang_ != "ara" && lang_ != "hin");
00074   }
00075 
00076   inline bool Cursive() const {
00077     return (lang_ == "ara");
00078   }
00079 
00080   inline bool HasItalics() const {
00081     return (lang_ != "ara" && lang_ != "hin" && lang_ != "uk");
00082   }
00083 
00084   inline bool Contextual() const {
00085     return (lang_ == "ara");
00086   }
00087 
00088   // RecoContext runtime flags accessor functions
00089   inline bool SizeNormalization() const { return size_normalization_; }
00090   inline bool NoisyInput() const { return noisy_input_; }
00091   inline bool OOD() const { return lang_mod_->OOD(); }
00092   inline bool Numeric() const { return lang_mod_->Numeric(); }
00093   inline bool WordList() const { return lang_mod_->WordList(); }
00094   inline bool Punc() const { return lang_mod_->Punc(); }
00095   inline bool CaseSensitive() const {
00096     return char_classifier_->CaseSensitive();
00097   }
00098 
00099   inline void SetSizeNormalization(bool size_normalization) {
00100     size_normalization_ = size_normalization;
00101   }
00102   inline void SetNoisyInput(bool noisy_input) {
00103     noisy_input_ = noisy_input;
00104   }
00105   inline void SetOOD(bool ood_enabled) {
00106     lang_mod_->SetOOD(ood_enabled);
00107   }
00108   inline void SetNumeric(bool numeric_enabled) {
00109     lang_mod_->SetNumeric(numeric_enabled);
00110   }
00111   inline void SetWordList(bool word_list_enabled) {
00112     lang_mod_->SetWordList(word_list_enabled);
00113   }
00114   inline void SetPunc(bool punc_enabled) {
00115     lang_mod_->SetPunc(punc_enabled);
00116   }
00117   inline void SetCaseSensitive(bool case_sensitive) {
00118     char_classifier_->SetCaseSensitive(case_sensitive);
00119   }
00120   inline tesseract::Tesseract *TesseractObject() const {
00121     return tess_obj_;
00122   }
00123 
00124   // Returns the path of the data files
00125   bool GetDataFilePath(string *path) const;
00126   // Creates a CubeRecoContext object using a tesseract object. Data
00127   // files are loaded via the tessdata_manager, and the tesseract
00128   // unicharset is provided in order to map Cube's unicharset to
00129   // Tesseract's in the case where the two unicharsets differ.
00130   static CubeRecoContext *Create(Tesseract *tess_obj,
00131                                  TessdataManager *tessdata_manager,
00132                                  UNICHARSET *tess_unicharset);
00133 
00134  private:
00135   bool loaded_;
00136   string lang_;
00137   CharSet *char_set_;
00138   WordSizeModel *word_size_model_;
00139   CharClassifier *char_classifier_;
00140   CharBigrams *char_bigrams_;
00141   WordUnigrams *word_unigrams_;
00142   TuningParams *params_;
00143   LangModel *lang_mod_;
00144   Tesseract *tess_obj_;  // CubeRecoContext does not own this pointer
00145   bool size_normalization_;
00146   bool noisy_input_;
00147 
00148   // Loads and initialized all the necessary components of a
00149   // CubeRecoContext. See .cpp for more details.
00150   bool Load(TessdataManager *tessdata_manager,
00151             UNICHARSET *tess_unicharset);
00152 };
00153 }
00154 
00155 #endif  // CUBE_RECO_CONTEXT_H
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines