Tesseract 3.01
/data/source/tesseract-ocr/cube/cube_object.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_object.h
00003  * Description: Declaration of the Cube Object Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The CubeObject class is the main class used to perform recognition of
00021 // a specific char_samp as a single word.
00022 // To recognize a word, a CubeObject is constructed for this word.
00023 // A Call to RecognizeWord is then issued specifying the language model that
00024 // will be used during recognition. If none is specified, the default language
00025 // model in the CubeRecoContext is used. The CubeRecoContext is passed at
00026 // construction time
00027 //
00028 // The typical usage pattern for Cube is shown below:
00029 //
00030 //         // Create and initialize Tesseract object and get its
00031 //         // CubeRecoContext object (note that Tesseract object owns it,
00032 //         // so it will be freed when the Tesseract object is freed).
00033 //         tesseract::Tesseract *tess_obj =  new tesseract::Tesseract();
00034 //         tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY);
00035 //         CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext();
00036 //         CHECK(cntxt != NULL) << "Unable to create a Cube reco context";
00037 //         .
00038 //         .
00039 //         .
00040 //         // Do this to recognize a word in pix whose co-ordinates are
00041 //         // (left,top,width,height)
00042 //         tesseract::CubeObject *cube_obj;
00043 //         cube_obj = new tesseract::CubeObject(cntxt, pix,
00044 //                                              left, top, width, height);
00045 //
00046 //         // Get back Cube's list of answers
00047 //         tesseract::WordAltList *alt_list = cube_obj->RecognizeWord();
00048 //         CHECK(alt_list != NULL && alt_list->AltCount() > 0);
00049 //
00050 //         // Get the string and cost of every alternate
00051 //         for (int alt = 0; alt < alt_list->AltCount(); alt++) {
00052 //           // Return the result as a UTF-32 string
00053 //           string_32 res_str32 = alt_list->Alt(alt);
00054 //           // Convert to UTF8 if need-be
00055 //           string res_str;
00056 //           CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str);
00057 //           // Get the string cost. This should get bigger as you go deeper
00058 //           // in the list
00059 //           int cost = alt_list->AltCost(alt);
00060 //         }
00061 //
00062 //         // Call this once you are done recognizing this word
00063 //         delete cube_obj;
00064 //
00065 //         // Call this once you are done recognizing all words with
00066 //         // for the current language
00067 //         tess_obj->end_tesseract();
00068 //         delete tess_obj;
00069 //
00070 // Note that if the language supports "Italics" (see the CubeRecoContext), the
00071 // RecognizeWord function attempts to de-slant the word.
00072 
00073 #ifndef CUBE_OBJECT_H
00074 #define CUBE_OBJECT_H
00075 
00076 #include "img.h"
00077 #include "char_samp.h"
00078 #include "word_altlist.h"
00079 #include "beam_search.h"
00080 #include "cube_search_object.h"
00081 #include "tess_lang_model.h"
00082 #include "cube_reco_context.h"
00083 
00084 namespace tesseract {
00085 
00086 // minimum aspect ratio needed to normalize a char_samp before recognition
00087 static const float kMinNormalizationAspectRatio = 3.5;
00088 // minimum probability a top alt choice must meet before having
00089 // deslanted processing applied to it
00090 static const float kMinProbSkipDeslanted = 0.25;
00091 
00092 class CubeObject {
00093  public:
00094   // Different flavors of constructor. They just differ in the way the
00095   // word image is specified
00096   CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp);
00097   CubeObject(CubeRecoContext *cntxt, IMAGE *img,
00098              int left, int top, int wid, int hgt);
00099   CubeObject(CubeRecoContext *cntxt, Pix *pix,
00100              int left, int top, int wid, int hgt);
00101   ~CubeObject();
00102 
00103   // Perform the word recognition using the specified language mode. If none
00104   // is specified, the default language model in the CubeRecoContext is used.
00105   // Returns the sorted list of alternate word answers
00106   WordAltList *RecognizeWord(LangModel *lang_mod = NULL);
00107   // Same as RecognizeWord but recognizes as a phrase
00108   WordAltList *RecognizePhrase(LangModel *lang_mod = NULL);
00109   // Computes the cost of a specific string. This is done by performing
00110   // recognition of a language model that allows only the specified word.
00111   // The alternate list(s) will be permanently modified.
00112   int WordCost(const char *str);
00113 
00114   // Returns the BeamSearch object that resulted from the last call to
00115   // RecognizeWord
00116   inline BeamSearch *BeamObj() const {
00117     return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_);
00118   }
00119   // Returns the WordAltList object that resulted from the last call to
00120   // RecognizeWord
00121   inline WordAltList *AlternateList() const {
00122     return (deslanted_ == true ? deslanted_alt_list_ : alt_list_);
00123   }
00124   // Returns the CubeSearchObject object that resulted from the last call to
00125   // RecognizeWord
00126   inline CubeSearchObject *SrchObj() const {
00127     return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_);
00128   }
00129   // Returns the CharSamp object that resulted from the last call to
00130   // RecognizeWord. Note that this object is not necessarily identical to the
00131   // one passed at construction time as normalization might have occurred
00132   inline CharSamp *CharSample() const {
00133     return (deslanted_ == true ? deslanted_char_samp_ : char_samp_);
00134   }
00135 
00136   // Set the ownership of the CharSamp
00137   inline void SetCharSampOwnership(bool own_char_samp) {
00138     own_char_samp_ = own_char_samp;
00139   }
00140 
00141  protected:
00142   // Normalize the CharSamp if its aspect ratio exceeds the below constant.
00143   bool Normalize();
00144 
00145  private:
00146   // minimum segment count needed to normalize a char_samp before recognition
00147   static const int kMinNormalizationSegmentCnt = 4;
00148 
00149   // Data member initialization function
00150   void Init();
00151   // Free alternate lists.
00152   void Cleanup();
00153   // Perform the actual recognition using the specified language mode. If none
00154   // is specified, the default language model in the CubeRecoContext is used.
00155   // Returns the sorted list of alternate answers. Called by both
00156   // RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false)
00157   WordAltList *Recognize(LangModel *lang_mod, bool word_mode);
00158 
00159   CubeRecoContext *cntxt_;
00160   BeamSearch *beam_obj_;
00161   BeamSearch *deslanted_beam_obj_;
00162   bool offline_mode_;
00163   bool own_char_samp_;
00164   bool deslanted_;
00165   CharSamp *char_samp_;
00166   CharSamp *deslanted_char_samp_;
00167   CubeSearchObject *srch_obj_;
00168   CubeSearchObject *deslanted_srch_obj_;
00169   WordAltList *alt_list_;
00170   WordAltList *deslanted_alt_list_;
00171 };
00172 }
00173 
00174 #endif  // CUBE_OBJECT_H
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines