Tesseract 3.01
/data/source/tesseract-ocr/api/resultiterator.h
Go to the documentation of this file.
00001 
00002 // File:        resultiterator.h
00003 // Description: Iterator for tesseract results that avoids using tesseract
00004 //              internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 11:01:06 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifndef TESSERACT_API_RESULTITERATOR_H__
00022 #define TESSERACT_API_RESULTITERATOR_H__
00023 
00024 #include "pageiterator.h"
00025 
00026 class BLOB_CHOICE_IT;
00027 
00028 namespace tesseract {
00029 
00030 class Tesseract;
00031 
00032 // Class to iterate over tesseract results, providing access to all levels
00033 // of the page hierarchy, without including any tesseract headers or having
00034 // to handle any tesseract structures.
00035 // WARNING! This class points to data held within the TessBaseAPI class, and
00036 // therefore can only be used while the TessBaseAPI class still exists and
00037 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
00038 // DetectOS, or anything else that changes the internal PAGE_RES.
00039 // See apitypes.h for the definition of PageIteratorLevel.
00040 // See also base class PageIterator, which contains the bulk of the interface.
00041 // ResultIterator adds text-specific methods for access to OCR output.
00042 
00043 class ResultIterator : public PageIterator {
00044   friend class ChoiceIterator;
00045  public:
00046   // page_res and tesseract come directly from the BaseAPI.
00047   // The rectangle parameters are copied indirectly from the Thresholder,
00048   // via the BaseAPI. They represent the coordinates of some rectangle in an
00049   // original image (in top-left-origin coordinates) and therefore the top-left
00050   // needs to be added to any output boxes in order to specify coordinates
00051   // in the original image. See TessBaseAPI::SetRectangle.
00052   // The scale and scaled_yres are in case the Thresholder scaled the image
00053   // rectangle prior to thresholding. Any coordinates in tesseract's image
00054   // must be divided by scale before adding (rect_left, rect_top).
00055   // The scaled_yres indicates the effective resolution of the binary image
00056   // that tesseract has been given by the Thresholder.
00057   // After the constructor, Begin has already been called.
00058   ResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
00059                  int scale, int scaled_yres,
00060                  int rect_left, int rect_top,
00061                  int rect_width, int rect_height);
00062   virtual ~ResultIterator();
00063 
00064   // ResultIterators may be copied! This makes it possible to iterate over
00065   // all the objects at a lower level, while maintaining an iterator to
00066   // objects at a higher level. These constructors DO NOT CALL Begin, so
00067   // iterations will continue from the location of src.
00068   // TODO: For now the copy constructor and operator= only need the base class
00069   // versions, but if new data members are added, don't forget to add them!
00070 
00071   // ============= Moving around within the page ============.
00072 
00073   // See PageIterator.
00074 
00075   // ============= Accessing data ==============.
00076 
00077   // Returns the null terminated UTF-8 encoded text string for the current
00078   // object at the given level. Use delete [] to free after use.
00079   char* GetUTF8Text(PageIteratorLevel level) const;
00080 
00081   // Returns the mean confidence of the current object at the given level.
00082   // The number should be interpreted as a percent probability. (0.0f-100.0f)
00083   float Confidence(PageIteratorLevel level) const;
00084 
00085   // ============= Functions that refer to words only ============.
00086 
00087   // Returns the font attributes of the current word. If iterating at a higher
00088   // level object than words, eg textlines, then this will return the
00089   // attributes of the first word in that textline.
00090   // The actual return value is a string representing a font name. It points
00091   // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
00092   // the iterator itself, ie rendered invalid by various members of
00093   // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
00094   // Pointsize is returned in printers points (1/72 inch.)
00095   const char* WordFontAttributes(bool* is_bold,
00096                                  bool* is_italic,
00097                                  bool* is_underlined,
00098                                  bool* is_monospace,
00099                                  bool* is_serif,
00100                                  bool* is_smallcaps,
00101                                  int* pointsize,
00102                                  int* font_id) const;
00103 
00104   // Returns true if the current word was found in a dictionary.
00105   bool WordIsFromDictionary() const;
00106 
00107   // Returns true if the current word is numeric.
00108   bool WordIsNumeric() const;
00109 
00110   // ============= Functions that refer to symbols only ============.
00111 
00112   // Returns true if the current symbol is a superscript.
00113   // If iterating at a higher level object than symbols, eg words, then
00114   // this will return the attributes of the first symbol in that word.
00115   bool SymbolIsSuperscript() const;
00116   // Returns true if the current symbol is a subscript.
00117   // If iterating at a higher level object than symbols, eg words, then
00118   // this will return the attributes of the first symbol in that word.
00119   bool SymbolIsSubscript() const;
00120   // Returns true if the current symbol is a dropcap.
00121   // If iterating at a higher level object than symbols, eg words, then
00122   // this will return the attributes of the first symbol in that word.
00123   bool SymbolIsDropcap() const;
00124 };
00125 
00126 // Class to iterate over the classifier choices for a single RIL_SYMBOL.
00127 class ChoiceIterator {
00128  public:
00129   // Construction is from a ResultIterator that points to the symbol of
00130   // interest. The ChoiceIterator allows a one-shot iteration over the
00131   // choices for this symbol and after that is is useless.
00132   explicit ChoiceIterator(const ResultIterator& result_it);
00133   ~ChoiceIterator();
00134 
00135   // Moves to the next choice for the symbol and returns false if there
00136   // are none left.
00137   bool Next();
00138 
00139   // ============= Accessing data ==============.
00140 
00141   // Returns the null terminated UTF-8 encoded text string for the current
00142   // choice.
00143   // NOTE: Unlike ResultIterator::GetUTF8Text, the return points to an
00144   // internal structure and should NOT be delete[]ed to free after use.
00145   const char* GetUTF8Text() const;
00146 
00147   // Returns the confidence of the current choice.
00148   // The number should be interpreted as a percent probability. (0.0f-100.0f)
00149   float Confidence() const;
00150 
00151  private:
00152   // Pointer to the Tesseract object owned by the API.
00153   Tesseract* tesseract_;
00154   // Iterator over the blob choices.
00155   BLOB_CHOICE_IT* choice_it_;
00156 };
00157 
00158 }  // namespace tesseract.
00159 
00160 #endif  // TESSERACT_API_RESULT_ITERATOR_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines