Tesseract 3.01
/data/source/tesseract-ocr/api/pageiterator.h
Go to the documentation of this file.
00001 
00002 // File:        pageiterator.h
00003 // Description: Iterator for tesseract page structure that avoids using
00004 //              tesseract internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 11:01:06 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifndef TESSERACT_API_PAGEITERATOR_H__
00022 #define TESSERACT_API_PAGEITERATOR_H__
00023 
00024 #include "apitypes.h"
00025 
00026 class C_BLOB_IT;
00027 class PBLOB_IT;
00028 class PAGE_RES;
00029 class PAGE_RES_IT;
00030 class WERD;
00031 struct Pix;
00032 
00033 namespace tesseract {
00034 
00035 class Tesseract;
00036 
00037 // Class to iterate over tesseract page structure, providing access to all
00038 // levels of the page hierarchy, without including any tesseract headers or
00039 // having to handle any tesseract structures.
00040 // WARNING! This class points to data held within the TessBaseAPI class, and
00041 // therefore can only be used while the TessBaseAPI class still exists and
00042 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
00043 // DetectOS, or anything else that changes the internal PAGE_RES.
00044 // See apitypes.h for the definition of PageIteratorLevel.
00045 // See also ResultIterator, derived from PageIterator, which adds in the
00046 // ability to access OCR output with text-specific methods.
00047 
00048 class PageIterator {
00049  public:
00050   // page_res and tesseract come directly from the BaseAPI.
00051   // The rectangle parameters are copied indirectly from the Thresholder,
00052   // via the BaseAPI. They represent the coordinates of some rectangle in an
00053   // original image (in top-left-origin coordinates) and therefore the top-left
00054   // needs to be added to any output boxes in order to specify coordinates
00055   // in the original image. See TessBaseAPI::SetRectangle.
00056   // The scale and scaled_yres are in case the Thresholder scaled the image
00057   // rectangle prior to thresholding. Any coordinates in tesseract's image
00058   // must be divided by scale before adding (rect_left, rect_top).
00059   // The scaled_yres indicates the effective resolution of the binary image
00060   // that tesseract has been given by the Thresholder.
00061   // After the constructor, Begin has already been called.
00062   PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
00063                int scale, int scaled_yres,
00064                int rect_left, int rect_top,
00065                int rect_width, int rect_height);
00066   virtual ~PageIterator();
00067 
00068   // Page/ResultIterators may be copied! This makes it possible to iterate over
00069   // all the objects at a lower level, while maintaining an iterator to
00070   // objects at a higher level. These constructors DO NOT CALL Begin, so
00071   // iterations will continue from the location of src.
00072   PageIterator(const PageIterator& src);
00073   const PageIterator& operator=(const PageIterator& src);
00074 
00075   // ============= Moving around within the page ============.
00076 
00077   // Moves the iterator to point to the start of the page to begin an iteration.
00078   void Begin();
00079 
00080   // Moves to the start of the next object at the given level in the
00081   // page hierarchy, and returns false if the end of the page was reached.
00082   // NOTE that RIL_SYMBOL will skip non-text blocks, but all other
00083   // PageIteratorLevel level values will visit each non-text block once.
00084   // Think of non text blocks as containing a single para, with a single line,
00085   // with a single imaginary word.
00086   // Calls to Next with different levels may be freely intermixed.
00087   // This function iterates words in right-to-left scripts correctly, if
00088   // the appropriate language has been loaded into Tesseract.
00089   bool Next(PageIteratorLevel level);
00090 
00091   // Returns true if the iterator is at the start of an object at the given
00092   // level. Possible uses include determining if a call to Next(RIL_WORD)
00093   // moved to the start of a RIL_PARA.
00094   bool IsAtBeginningOf(PageIteratorLevel level) const;
00095 
00096   // Returns whether the iterator is positioned at the last element in a
00097   // given level. (e.g. the last word in a line, the last line in a block)
00098   bool IsAtFinalElement(PageIteratorLevel level,
00099                         PageIteratorLevel element) const;
00100 
00101   // ============= Accessing data ==============.
00102   // Coordinate system:
00103   // Integer coordinates are at the cracks between the pixels.
00104   // The top-left corner of the top-left pixel in the image is at (0,0).
00105   // The bottom-right corner of the bottom-right pixel in the image is at
00106   // (width, height).
00107   // Every bounding box goes from the top-left of the top-left contained
00108   // pixel to the bottom-right of the bottom-right contained pixel, so
00109   // the bounding box of the single top-left pixel in the image is:
00110   // (0,0)->(1,1).
00111   // If an image rectangle has been set in the API, then returned coordinates
00112   // relate to the original (full) image, rather than the rectangle.
00113 
00114   // Returns the bounding rectangle of the current object at the given level.
00115   // See comment on coordinate system above.
00116   // Returns false if there is no such object at the current position.
00117   // The returned bounding box is guaranteed to match the size and position
00118   // of the image returned by GetBinaryImage, but may clip foreground pixels
00119   // from a grey image. The padding argument to GetImage can be used to expand
00120   // the image to include more foreground pixels. See GetImage below.
00121   bool BoundingBox(PageIteratorLevel level,
00122                    int* left, int* top, int* right, int* bottom) const;
00123 
00124   // Returns the type of the current block. See apitypes.h for PolyBlockType.
00125   PolyBlockType BlockType() const;
00126 
00127   // Returns a binary image of the current object at the given level.
00128   // The position and size match the return from BoundingBox.
00129   // Use pixDestroy to delete the image after use.
00130   Pix* GetBinaryImage(PageIteratorLevel level) const;
00131 
00132   // Returns an image of the current object at the given level in greyscale
00133   // if available in the input. To guarantee a binary image use BinaryImage.
00134   // NOTE that in order to give the best possible image, the bounds are
00135   // expanded slightly over the binary connected component, by the supplied
00136   // padding, so the top-left position of the returned image is returned
00137   // in (left,top). These will most likely not match the coordinates
00138   // returned by BoundingBox.
00139   // Use pixDestroy to delete the image after use.
00140   Pix* GetImage(PageIteratorLevel level, int padding,
00141                 int* left, int* top) const;
00142 
00143   // Returns the baseline of the current object at the given level.
00144   // The baseline is the line that passes through (x1, y1) and (x2, y2).
00145   // WARNING: with vertical text, baselines may be vertical!
00146   // Returns false if there is no baseline at the current position.
00147   bool Baseline(PageIteratorLevel level,
00148                 int* x1, int* y1, int* x2, int* y2) const;
00149 
00150   // Returns orientation for the block the iterator points to.
00151   //   orientation, writing_direction, textline_order: see publictypes.h
00152   //   deskew_angle: after rotating the block so the text orientation is
00153   //                 upright, how many radians does one have to rotate the
00154   //                 block anti-clockwise for it to be level?
00155   //                   -Pi/4 <= deskew_angle <= Pi/4
00156   void Orientation(tesseract::Orientation *orientation,
00157                    tesseract::WritingDirection *writing_direction,
00158                    tesseract::TextlineOrder *textline_order,
00159                    float *deskew_angle);
00160 
00161  protected:
00162   // Sets up the internal data for iterating the blobs of a new word, then
00163   // moves the iterator to the given offset.
00164   void BeginWord(int offset);
00165 
00166   // Pointer to the page_res owned by the API.
00167   PAGE_RES* page_res_;
00168   // Pointer to the Tesseract object owned by the API.
00169   Tesseract* tesseract_;
00170   // The iterator to the page_res_. Owned by this ResultIterator.
00171   // A pointer just to avoid dragging in Tesseract includes.
00172   PAGE_RES_IT* it_;
00173   // The current input WERD being iterated. If there is an output from OCR,
00174   // then word_ is NULL. Owned by the API.
00175   WERD* word_;
00176   // The length of the current word_.
00177   int word_length_;
00178   // The current blob index within the word.
00179   int blob_index_;
00180   // Iterator to the blobs within the word. If NULL, then we are iterating
00181   // OCR results in the box_word.
00182   // Owned by this ResultIterator.
00183   C_BLOB_IT* cblob_it_;
00184   // Parameters saved from the Thresholder. Needed to rebuild coordinates.
00185   int scale_;
00186   int scaled_yres_;
00187   int rect_left_;
00188   int rect_top_;
00189   int rect_width_;
00190   int rect_height_;
00191 };
00192 
00193 }  // namespace tesseract.
00194 
00195 #endif  // TESSERACT_API_PAGEITERATOR_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines