Tesseract 3.01
|
00001 /********************************************************************** 00002 * File: cube_object.h 00003 * Description: Declaration of the Cube Object Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The CubeObject class is the main class used to perform recognition of 00021 // a specific char_samp as a single word. 00022 // To recognize a word, a CubeObject is constructed for this word. 00023 // A Call to RecognizeWord is then issued specifying the language model that 00024 // will be used during recognition. If none is specified, the default language 00025 // model in the CubeRecoContext is used. The CubeRecoContext is passed at 00026 // construction time 00027 // 00028 // The typical usage pattern for Cube is shown below: 00029 // 00030 // // Create and initialize Tesseract object and get its 00031 // // CubeRecoContext object (note that Tesseract object owns it, 00032 // // so it will be freed when the Tesseract object is freed). 00033 // tesseract::Tesseract *tess_obj = new tesseract::Tesseract(); 00034 // tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY); 00035 // CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext(); 00036 // CHECK(cntxt != NULL) << "Unable to create a Cube reco context"; 00037 // . 00038 // . 00039 // . 00040 // // Do this to recognize a word in pix whose co-ordinates are 00041 // // (left,top,width,height) 00042 // tesseract::CubeObject *cube_obj; 00043 // cube_obj = new tesseract::CubeObject(cntxt, pix, 00044 // left, top, width, height); 00045 // 00046 // // Get back Cube's list of answers 00047 // tesseract::WordAltList *alt_list = cube_obj->RecognizeWord(); 00048 // CHECK(alt_list != NULL && alt_list->AltCount() > 0); 00049 // 00050 // // Get the string and cost of every alternate 00051 // for (int alt = 0; alt < alt_list->AltCount(); alt++) { 00052 // // Return the result as a UTF-32 string 00053 // string_32 res_str32 = alt_list->Alt(alt); 00054 // // Convert to UTF8 if need-be 00055 // string res_str; 00056 // CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str); 00057 // // Get the string cost. This should get bigger as you go deeper 00058 // // in the list 00059 // int cost = alt_list->AltCost(alt); 00060 // } 00061 // 00062 // // Call this once you are done recognizing this word 00063 // delete cube_obj; 00064 // 00065 // // Call this once you are done recognizing all words with 00066 // // for the current language 00067 // tess_obj->end_tesseract(); 00068 // delete tess_obj; 00069 // 00070 // Note that if the language supports "Italics" (see the CubeRecoContext), the 00071 // RecognizeWord function attempts to de-slant the word. 00072 00073 #ifndef CUBE_OBJECT_H 00074 #define CUBE_OBJECT_H 00075 00076 #include "img.h" 00077 #include "char_samp.h" 00078 #include "word_altlist.h" 00079 #include "beam_search.h" 00080 #include "cube_search_object.h" 00081 #include "tess_lang_model.h" 00082 #include "cube_reco_context.h" 00083 00084 namespace tesseract { 00085 00086 // minimum aspect ratio needed to normalize a char_samp before recognition 00087 static const float kMinNormalizationAspectRatio = 3.5; 00088 // minimum probability a top alt choice must meet before having 00089 // deslanted processing applied to it 00090 static const float kMinProbSkipDeslanted = 0.25; 00091 00092 class CubeObject { 00093 public: 00094 // Different flavors of constructor. They just differ in the way the 00095 // word image is specified 00096 CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp); 00097 CubeObject(CubeRecoContext *cntxt, IMAGE *img, 00098 int left, int top, int wid, int hgt); 00099 CubeObject(CubeRecoContext *cntxt, Pix *pix, 00100 int left, int top, int wid, int hgt); 00101 ~CubeObject(); 00102 00103 // Perform the word recognition using the specified language mode. If none 00104 // is specified, the default language model in the CubeRecoContext is used. 00105 // Returns the sorted list of alternate word answers 00106 WordAltList *RecognizeWord(LangModel *lang_mod = NULL); 00107 // Same as RecognizeWord but recognizes as a phrase 00108 WordAltList *RecognizePhrase(LangModel *lang_mod = NULL); 00109 // Computes the cost of a specific string. This is done by performing 00110 // recognition of a language model that allows only the specified word. 00111 // The alternate list(s) will be permanently modified. 00112 int WordCost(const char *str); 00113 00114 // Returns the BeamSearch object that resulted from the last call to 00115 // RecognizeWord 00116 inline BeamSearch *BeamObj() const { 00117 return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_); 00118 } 00119 // Returns the WordAltList object that resulted from the last call to 00120 // RecognizeWord 00121 inline WordAltList *AlternateList() const { 00122 return (deslanted_ == true ? deslanted_alt_list_ : alt_list_); 00123 } 00124 // Returns the CubeSearchObject object that resulted from the last call to 00125 // RecognizeWord 00126 inline CubeSearchObject *SrchObj() const { 00127 return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_); 00128 } 00129 // Returns the CharSamp object that resulted from the last call to 00130 // RecognizeWord. Note that this object is not necessarily identical to the 00131 // one passed at construction time as normalization might have occurred 00132 inline CharSamp *CharSample() const { 00133 return (deslanted_ == true ? deslanted_char_samp_ : char_samp_); 00134 } 00135 00136 // Set the ownership of the CharSamp 00137 inline void SetCharSampOwnership(bool own_char_samp) { 00138 own_char_samp_ = own_char_samp; 00139 } 00140 00141 protected: 00142 // Normalize the CharSamp if its aspect ratio exceeds the below constant. 00143 bool Normalize(); 00144 00145 private: 00146 // minimum segment count needed to normalize a char_samp before recognition 00147 static const int kMinNormalizationSegmentCnt = 4; 00148 00149 // Data member initialization function 00150 void Init(); 00151 // Free alternate lists. 00152 void Cleanup(); 00153 // Perform the actual recognition using the specified language mode. If none 00154 // is specified, the default language model in the CubeRecoContext is used. 00155 // Returns the sorted list of alternate answers. Called by both 00156 // RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false) 00157 WordAltList *Recognize(LangModel *lang_mod, bool word_mode); 00158 00159 CubeRecoContext *cntxt_; 00160 BeamSearch *beam_obj_; 00161 BeamSearch *deslanted_beam_obj_; 00162 bool offline_mode_; 00163 bool own_char_samp_; 00164 bool deslanted_; 00165 CharSamp *char_samp_; 00166 CharSamp *deslanted_char_samp_; 00167 CubeSearchObject *srch_obj_; 00168 CubeSearchObject *deslanted_srch_obj_; 00169 WordAltList *alt_list_; 00170 WordAltList *deslanted_alt_list_; 00171 }; 00172 } 00173 00174 #endif // CUBE_OBJECT_H