Tesseract 3.01
/data/source/tesseract-ocr/textord/tabfind.h
Go to the documentation of this file.
00001 
00002 // File:        tabfind.h
00003 // Description: Subclass of BBGrid to find tabstops.
00004 // Author:      Ray Smith
00005 // Created:     Fri Mar 21 15:03:01 PST 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TEXTORD_TABFIND_H__
00021 #define TESSERACT_TEXTORD_TABFIND_H__
00022 
00023 #include "alignedblob.h"
00024 #include "tesscallback.h"
00025 #include "tabvector.h"
00026 #include "linefind.h"
00027 
00028 extern BOOL_VAR_H(textord_tabfind_force_vertical_text, false,
00029        "Force using vertical text page mode");
00030 extern BOOL_VAR_H(textord_tabfind_vertical_horizontal_mix, true,
00031        "find horizontal lines such as headers in vertical page mode");
00032 extern double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5,
00033        "Fraction of textlines deemed vertical to use vertical page mode");
00034 extern double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75,
00035        "Fraction of height used as a minimum gap for aligned blobs.");
00036 
00037 class BLOBNBOX;
00038 class BLOBNBOX_LIST;
00039 class TO_BLOCK;
00040 class ScrollView;
00041 struct Pix;
00042 
00043 namespace tesseract {
00044 
00045 typedef TessResultCallback1<bool, int> WidthCallback;
00046 
00047 struct AlignedBlobParams;
00048 
00050 const int kColumnWidthFactor = 20;
00051 
00061 class TabFind : public AlignedBlob {
00062  public:
00063   TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
00064           TabVector_LIST* vlines, int vertical_x, int vertical_y,
00065           int resolution);
00066   virtual ~TabFind();
00067 
00073   void InsertBlobList(bool h_spread, bool v_spread, bool large,
00074                       BLOBNBOX_LIST* blobs, bool take_ownership,
00075                       BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
00076 
00086   bool InsertBlob(bool h_spread, bool v_spread, bool large, BLOBNBOX* blob,
00087                   BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
00088 
00089 
00090   // Returns the gutter width of the given TabVector between the given y limits.
00091   // Also returns x-shift to be added to the vector to clear any intersecting
00092   // blobs. The shift is deducted from the returned gutter.
00093   int GutterWidth(int bottom_y, int top_y, const TabVector& v,
00094                   int* required_shift);
00098   void GutterWidthAndNeighbourGap(int tab_x, int mean_height,
00099                                   int max_gutter, bool left,
00100                                   BLOBNBOX* bbox, int* gutter_width,
00101                                   int* neighbour_gap);
00102 
00108   BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
00109                          bool right_to_left, int gap_limit);
00110 
00115   BlobRegionType ComputeBlobType(BLOBNBOX* blob);
00116 
00123   int RightEdgeForBox(const TBOX& box, bool crossing, bool extended);
00127   int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended);
00128 
00145   TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended);
00149   TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended);
00150 
00155   bool CommonWidth(int width);
00160   static bool DifferentSizes(int size1, int size2);
00165   static bool VeryDifferentSizes(int size1, int size2);
00166 
00170   WidthCallback* WidthCB() {
00171     return width_cb_;
00172   }
00173 
00177   const ICOORD& image_origin() const {
00178     return image_origin_;
00179   }
00180 
00181  protected:
00185   TabVector_LIST* vectors() {
00186     return &vectors_;
00187   }
00188   TabVector_LIST* dead_vectors() {
00189     return &dead_vectors_;
00190   }
00191 
00196   bool FindTabVectors(TabVector_LIST* hlines,
00197                       BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
00198                       int min_gutter_width,
00199                       FCOORD* deskew, FCOORD* reskew);
00200 
00201   // Top-level function to not find TabVectors in an input page block,
00202   // but setup for single column mode.
00203   void DontFindTabVectors(BLOBNBOX_LIST* image_blobs,
00204                           TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
00205   // Helper function to setup search limits for *TabForBox.
00206   void SetupTabSearch(int x, int y, int* min_key, int* max_key);
00207 
00211   ScrollView* DisplayTabVectors(ScrollView* tab_win);
00212 
00213   // First part of FindTabVectors, which may be used twice if the text
00214   // is mostly of vertical alignment.  If find_vertical_text flag is
00215   // true, this finds vertical textlines in possibly rotated blob space.
00216   // In other words, when the page has mostly vertical lines and is rotated,
00217   // setting this to true will find horizontal lines on the page.
00218   ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
00219                                     int min_gutter_width, TO_BLOCK* block);
00220 
00221   // Apply the given rotation to the given list of blobs.
00222   static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs);
00223 
00224   // Flip the vertical and horizontal lines and rotate the grid ready
00225   // for working on the rotated image.
00226   // The min_gutter_width will be adjusted to the median gutter width between
00227   // vertical tabs to set a better threshold for tabboxes in the 2nd pass.
00228   void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
00229                             TabVector_LIST* horizontal_lines,
00230                             int* min_gutter_width);
00231 
00232  private:
00233   // For each box in the grid, decide whether it is a candidate tab-stop,
00234   // and if so add it to the tab_grid_.
00235   ScrollView* FindTabBoxes(int min_gutter_width);
00236 
00237   // Return true if this box looks like a candidate tab stop, and set
00238   // the appropriate tab type(s) to TT_UNCONFIRMED.
00239   bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width);
00240 
00241   // Fills the list of TabVector with the tabstops found in the grid,
00242   // and estimates the logical vertical direction.
00243   void FindAllTabVectors(int min_gutter_width);
00244   // Helper for FindAllTabVectors finds the vectors of a particular type.
00245   int FindTabVectors(int search_size_multiple,
00246                      TabAlignment alignment,
00247                      int min_gutter_width,
00248                      TabVector_LIST* vectors,
00249                      int* vertical_x, int* vertical_y);
00250   // Finds a vector corresponding to a tabstop running through the
00251   // given box of the given alignment type.
00252   // search_size_multiple is a multiple of height used to control
00253   // the size of the search.
00254   // vertical_x and y are updated with an estimate of the real
00255   // vertical direction. (skew finding.)
00256   // Returns NULL if no decent tabstop can be found.
00257   TabVector* FindTabVector(int search_size_multiple, int min_gutter_width,
00258                            TabAlignment alignment,
00259                            BLOBNBOX* bbox,
00260                            int* vertical_x, int* vertical_y);
00261 
00262   // Set the vertical_skew_ member from the given vector and refit
00263   // all vectors parallel to the skew vector.
00264   void SetVerticalSkewAndParellelize(int vertical_x, int vertical_y);
00265 
00266   // Sort all the current vectors using the vertical_skew_ vector.
00267   void SortVectors();
00268 
00269   // Evaluate all the current tab vectors.
00270   void EvaluateTabs();
00271 
00272   // Trace textlines from one side to the other of each tab vector, saving
00273   // the most frequent column widths found in a list so that a given width
00274   // can be tested for being a common width with a simple callback function.
00275   void ComputeColumnWidths(ScrollView* tab_win);
00276 
00277   // Set the region_type_ member for all the blobs in the grid.
00278   void ComputeBlobGoodness();
00279 
00280   // Set the region_type_ member of the blob, if not already known.
00281   void SetBlobRegionType(BLOBNBOX* blob);
00282 
00283   // Mark blobs as being in a vertical text line where that is the case.
00284   void MarkVerticalText();
00285 
00286   // Returns the median gutter width between pairs of matching tab vectors
00287   // assuming they are sorted left-to-right.  If there are too few data
00288   // points (< kMinLinesInColumn), then 0 is returned.
00289   int FindMedianGutterWidth(TabVector_LIST* tab_vectors);
00290 
00291   // If this box looks like it is on a textline in the given direction,
00292   // return the width of the textline-like group of blobs, and the number
00293   // of blobs found.
00294   // For more detail see FindTextlineSegment below.
00295   int FindTextlineWidth(bool right_to_left, BLOBNBOX* bbox, int* blob_count);
00296 
00297   // Search from the given tabstop bbox to the next opposite
00298   // tabstop bbox on the same text line, which may be itself.
00299   // Returns true if the search is successful, and sets
00300   // start_pt, end_pt to the fitted baseline, width to the measured
00301   // width of the text line (column width estimate.)
00302   bool TraceTextline(BLOBNBOX* bbox, ICOORD* start_pt, ICOORD* end_pt,
00303                      int* left_edge, int* right_edge);
00304 
00305   // Search from the given bbox in the given direction until the next tab
00306   // vector is found or a significant horizontal gap is found.
00307   // Returns the width of the line if the search is successful, (defined
00308   // as good coverage of the width and a good fitting baseline) and sets
00309   // start_pt, end_pt to the fitted baseline, left_blob, right_blob to
00310   // the ends of the line. Returns zero otherwise.
00311   // Sets blob_count to the number of blobs found on the line.
00312   // On input, either both left_vector and right_vector should be NULL,
00313   // indicating a basic search, or both left_vector and right_vector should
00314   // be not NULL and one of *left_vector and *right_vector should be not NULL,
00315   // in which case the search is strictly between tab vectors and will return
00316   // zero if a gap is found before the opposite tab vector is reached, or a
00317   // conflicting tab vector is found.
00318   // If ignore_images is true, then blobs with aligned_text() < 0 are treated
00319   // as if they do not exist.
00320   int FindTextlineSegment(bool right_to_lefts, bool ignore_images,
00321                           BLOBNBOX* bbox, int* blob_count,
00322                           ICOORD* start_pt, ICOORD* end_pt,
00323                           TabVector** left_vector, TabVector** right_vector,
00324                           BLOBNBOX** left_blob, BLOBNBOX** right_blob);
00325 
00326   // Find the next adjacent (to left or right) blob on this text line,
00327   // with the constraint that it must vertically significantly overlap
00328   // the [top_y, bottom_y] range.
00329   // If ignore_images is true, then blobs with aligned_text() < 0 are treated
00330   // as if they do not exist.
00331   BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
00332                          bool right_to_left, bool ignore_images,
00333                          int gap_limit, int top_y, int bottom_y);
00334 
00335   // Add a bi-directional partner relationship between the left
00336   // and the right. If one (or both) of the vectors is a separator,
00337   // extend a nearby extendable vector or create a new one of the
00338   // correct type, using the given left or right blob as a guide.
00339   void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
00340                         TabVector* left, TabVector* right);
00341 
00346   void CleanupTabs();
00347 
00353   bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
00354               TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
00355 
00356   // Compute the rotation required to deskew, and its inverse rotation.
00357   void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew);
00358 
00363   void ApplyTabConstraints();
00364 
00365  protected:
00366   ICOORD vertical_skew_;          //< Estimate of true vertical in this image.
00367   int resolution_;                //< Of source image in pixels per inch.
00368  private:
00369   ICOORD image_origin_;           //< Top-left of image in deskewed coords
00370   TabVector_LIST vectors_;        //< List of rule line and tabstops.
00371   TabVector_IT v_it_;             //< Iterator for searching vectors_.
00372   TabVector_LIST dead_vectors_;   //< Separators and unpartnered tab vectors.
00373   ICOORDELT_LIST column_widths_;  //< List of commonly occurring widths.
00375   WidthCallback* width_cb_;
00377   BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* tab_grid_;
00378 };
00379 
00380 }  // namespace tesseract.
00381 
00382 #endif  // TESSERACT_TEXTORD_TABFIND_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines