Tesseract 3.01
|
00001 00002 // File: resultiterator.h 00003 // Description: Iterator for tesseract results that avoids using tesseract 00004 // internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 11:01:06 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_API_RESULTITERATOR_H__ 00022 #define TESSERACT_API_RESULTITERATOR_H__ 00023 00024 #include "pageiterator.h" 00025 00026 class BLOB_CHOICE_IT; 00027 00028 namespace tesseract { 00029 00030 class Tesseract; 00031 00032 // Class to iterate over tesseract results, providing access to all levels 00033 // of the page hierarchy, without including any tesseract headers or having 00034 // to handle any tesseract structures. 00035 // WARNING! This class points to data held within the TessBaseAPI class, and 00036 // therefore can only be used while the TessBaseAPI class still exists and 00037 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End 00038 // DetectOS, or anything else that changes the internal PAGE_RES. 00039 // See apitypes.h for the definition of PageIteratorLevel. 00040 // See also base class PageIterator, which contains the bulk of the interface. 00041 // ResultIterator adds text-specific methods for access to OCR output. 00042 00043 class ResultIterator : public PageIterator { 00044 friend class ChoiceIterator; 00045 public: 00046 // page_res and tesseract come directly from the BaseAPI. 00047 // The rectangle parameters are copied indirectly from the Thresholder, 00048 // via the BaseAPI. They represent the coordinates of some rectangle in an 00049 // original image (in top-left-origin coordinates) and therefore the top-left 00050 // needs to be added to any output boxes in order to specify coordinates 00051 // in the original image. See TessBaseAPI::SetRectangle. 00052 // The scale and scaled_yres are in case the Thresholder scaled the image 00053 // rectangle prior to thresholding. Any coordinates in tesseract's image 00054 // must be divided by scale before adding (rect_left, rect_top). 00055 // The scaled_yres indicates the effective resolution of the binary image 00056 // that tesseract has been given by the Thresholder. 00057 // After the constructor, Begin has already been called. 00058 ResultIterator(PAGE_RES* page_res, Tesseract* tesseract, 00059 int scale, int scaled_yres, 00060 int rect_left, int rect_top, 00061 int rect_width, int rect_height); 00062 virtual ~ResultIterator(); 00063 00064 // ResultIterators may be copied! This makes it possible to iterate over 00065 // all the objects at a lower level, while maintaining an iterator to 00066 // objects at a higher level. These constructors DO NOT CALL Begin, so 00067 // iterations will continue from the location of src. 00068 // TODO: For now the copy constructor and operator= only need the base class 00069 // versions, but if new data members are added, don't forget to add them! 00070 00071 // ============= Moving around within the page ============. 00072 00073 // See PageIterator. 00074 00075 // ============= Accessing data ==============. 00076 00077 // Returns the null terminated UTF-8 encoded text string for the current 00078 // object at the given level. Use delete [] to free after use. 00079 char* GetUTF8Text(PageIteratorLevel level) const; 00080 00081 // Returns the mean confidence of the current object at the given level. 00082 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00083 float Confidence(PageIteratorLevel level) const; 00084 00085 // ============= Functions that refer to words only ============. 00086 00087 // Returns the font attributes of the current word. If iterating at a higher 00088 // level object than words, eg textlines, then this will return the 00089 // attributes of the first word in that textline. 00090 // The actual return value is a string representing a font name. It points 00091 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as 00092 // the iterator itself, ie rendered invalid by various members of 00093 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. 00094 // Pointsize is returned in printers points (1/72 inch.) 00095 const char* WordFontAttributes(bool* is_bold, 00096 bool* is_italic, 00097 bool* is_underlined, 00098 bool* is_monospace, 00099 bool* is_serif, 00100 bool* is_smallcaps, 00101 int* pointsize, 00102 int* font_id) const; 00103 00104 // Returns true if the current word was found in a dictionary. 00105 bool WordIsFromDictionary() const; 00106 00107 // Returns true if the current word is numeric. 00108 bool WordIsNumeric() const; 00109 00110 // ============= Functions that refer to symbols only ============. 00111 00112 // Returns true if the current symbol is a superscript. 00113 // If iterating at a higher level object than symbols, eg words, then 00114 // this will return the attributes of the first symbol in that word. 00115 bool SymbolIsSuperscript() const; 00116 // Returns true if the current symbol is a subscript. 00117 // If iterating at a higher level object than symbols, eg words, then 00118 // this will return the attributes of the first symbol in that word. 00119 bool SymbolIsSubscript() const; 00120 // Returns true if the current symbol is a dropcap. 00121 // If iterating at a higher level object than symbols, eg words, then 00122 // this will return the attributes of the first symbol in that word. 00123 bool SymbolIsDropcap() const; 00124 }; 00125 00126 // Class to iterate over the classifier choices for a single RIL_SYMBOL. 00127 class ChoiceIterator { 00128 public: 00129 // Construction is from a ResultIterator that points to the symbol of 00130 // interest. The ChoiceIterator allows a one-shot iteration over the 00131 // choices for this symbol and after that is is useless. 00132 explicit ChoiceIterator(const ResultIterator& result_it); 00133 ~ChoiceIterator(); 00134 00135 // Moves to the next choice for the symbol and returns false if there 00136 // are none left. 00137 bool Next(); 00138 00139 // ============= Accessing data ==============. 00140 00141 // Returns the null terminated UTF-8 encoded text string for the current 00142 // choice. 00143 // NOTE: Unlike ResultIterator::GetUTF8Text, the return points to an 00144 // internal structure and should NOT be delete[]ed to free after use. 00145 const char* GetUTF8Text() const; 00146 00147 // Returns the confidence of the current choice. 00148 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00149 float Confidence() const; 00150 00151 private: 00152 // Pointer to the Tesseract object owned by the API. 00153 Tesseract* tesseract_; 00154 // Iterator over the blob choices. 00155 BLOB_CHOICE_IT* choice_it_; 00156 }; 00157 00158 } // namespace tesseract. 00159 00160 #endif // TESSERACT_API_RESULT_ITERATOR_H__