Tesseract 3.01
|
00001 00002 // File: tabfind.h 00003 // Description: Subclass of BBGrid to find tabstops. 00004 // Author: Ray Smith 00005 // Created: Fri Mar 21 15:03:01 PST 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_TABFIND_H__ 00021 #define TESSERACT_TEXTORD_TABFIND_H__ 00022 00023 #include "alignedblob.h" 00024 #include "tesscallback.h" 00025 #include "tabvector.h" 00026 #include "linefind.h" 00027 00028 extern BOOL_VAR_H(textord_tabfind_force_vertical_text, false, 00029 "Force using vertical text page mode"); 00030 extern BOOL_VAR_H(textord_tabfind_vertical_horizontal_mix, true, 00031 "find horizontal lines such as headers in vertical page mode"); 00032 extern double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5, 00033 "Fraction of textlines deemed vertical to use vertical page mode"); 00034 extern double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75, 00035 "Fraction of height used as a minimum gap for aligned blobs."); 00036 00037 class BLOBNBOX; 00038 class BLOBNBOX_LIST; 00039 class TO_BLOCK; 00040 class ScrollView; 00041 struct Pix; 00042 00043 namespace tesseract { 00044 00045 typedef TessResultCallback1<bool, int> WidthCallback; 00046 00047 struct AlignedBlobParams; 00048 00050 const int kColumnWidthFactor = 20; 00051 00061 class TabFind : public AlignedBlob { 00062 public: 00063 TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, 00064 TabVector_LIST* vlines, int vertical_x, int vertical_y, 00065 int resolution); 00066 virtual ~TabFind(); 00067 00073 void InsertBlobList(bool h_spread, bool v_spread, bool large, 00074 BLOBNBOX_LIST* blobs, bool take_ownership, 00075 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid); 00076 00086 bool InsertBlob(bool h_spread, bool v_spread, bool large, BLOBNBOX* blob, 00087 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid); 00088 00089 00090 // Returns the gutter width of the given TabVector between the given y limits. 00091 // Also returns x-shift to be added to the vector to clear any intersecting 00092 // blobs. The shift is deducted from the returned gutter. 00093 int GutterWidth(int bottom_y, int top_y, const TabVector& v, 00094 int* required_shift); 00098 void GutterWidthAndNeighbourGap(int tab_x, int mean_height, 00099 int max_gutter, bool left, 00100 BLOBNBOX* bbox, int* gutter_width, 00101 int* neighbour_gap); 00102 00108 BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, 00109 bool right_to_left, int gap_limit); 00110 00115 BlobRegionType ComputeBlobType(BLOBNBOX* blob); 00116 00123 int RightEdgeForBox(const TBOX& box, bool crossing, bool extended); 00127 int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended); 00128 00145 TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended); 00149 TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended); 00150 00155 bool CommonWidth(int width); 00160 static bool DifferentSizes(int size1, int size2); 00165 static bool VeryDifferentSizes(int size1, int size2); 00166 00170 WidthCallback* WidthCB() { 00171 return width_cb_; 00172 } 00173 00177 const ICOORD& image_origin() const { 00178 return image_origin_; 00179 } 00180 00181 protected: 00185 TabVector_LIST* vectors() { 00186 return &vectors_; 00187 } 00188 TabVector_LIST* dead_vectors() { 00189 return &dead_vectors_; 00190 } 00191 00196 bool FindTabVectors(TabVector_LIST* hlines, 00197 BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, 00198 int min_gutter_width, 00199 FCOORD* deskew, FCOORD* reskew); 00200 00201 // Top-level function to not find TabVectors in an input page block, 00202 // but setup for single column mode. 00203 void DontFindTabVectors(BLOBNBOX_LIST* image_blobs, 00204 TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); 00205 // Helper function to setup search limits for *TabForBox. 00206 void SetupTabSearch(int x, int y, int* min_key, int* max_key); 00207 00211 ScrollView* DisplayTabVectors(ScrollView* tab_win); 00212 00213 // First part of FindTabVectors, which may be used twice if the text 00214 // is mostly of vertical alignment. If find_vertical_text flag is 00215 // true, this finds vertical textlines in possibly rotated blob space. 00216 // In other words, when the page has mostly vertical lines and is rotated, 00217 // setting this to true will find horizontal lines on the page. 00218 ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, 00219 int min_gutter_width, TO_BLOCK* block); 00220 00221 // Apply the given rotation to the given list of blobs. 00222 static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs); 00223 00224 // Flip the vertical and horizontal lines and rotate the grid ready 00225 // for working on the rotated image. 00226 // The min_gutter_width will be adjusted to the median gutter width between 00227 // vertical tabs to set a better threshold for tabboxes in the 2nd pass. 00228 void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, 00229 TabVector_LIST* horizontal_lines, 00230 int* min_gutter_width); 00231 00232 private: 00233 // For each box in the grid, decide whether it is a candidate tab-stop, 00234 // and if so add it to the tab_grid_. 00235 ScrollView* FindTabBoxes(int min_gutter_width); 00236 00237 // Return true if this box looks like a candidate tab stop, and set 00238 // the appropriate tab type(s) to TT_UNCONFIRMED. 00239 bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width); 00240 00241 // Fills the list of TabVector with the tabstops found in the grid, 00242 // and estimates the logical vertical direction. 00243 void FindAllTabVectors(int min_gutter_width); 00244 // Helper for FindAllTabVectors finds the vectors of a particular type. 00245 int FindTabVectors(int search_size_multiple, 00246 TabAlignment alignment, 00247 int min_gutter_width, 00248 TabVector_LIST* vectors, 00249 int* vertical_x, int* vertical_y); 00250 // Finds a vector corresponding to a tabstop running through the 00251 // given box of the given alignment type. 00252 // search_size_multiple is a multiple of height used to control 00253 // the size of the search. 00254 // vertical_x and y are updated with an estimate of the real 00255 // vertical direction. (skew finding.) 00256 // Returns NULL if no decent tabstop can be found. 00257 TabVector* FindTabVector(int search_size_multiple, int min_gutter_width, 00258 TabAlignment alignment, 00259 BLOBNBOX* bbox, 00260 int* vertical_x, int* vertical_y); 00261 00262 // Set the vertical_skew_ member from the given vector and refit 00263 // all vectors parallel to the skew vector. 00264 void SetVerticalSkewAndParellelize(int vertical_x, int vertical_y); 00265 00266 // Sort all the current vectors using the vertical_skew_ vector. 00267 void SortVectors(); 00268 00269 // Evaluate all the current tab vectors. 00270 void EvaluateTabs(); 00271 00272 // Trace textlines from one side to the other of each tab vector, saving 00273 // the most frequent column widths found in a list so that a given width 00274 // can be tested for being a common width with a simple callback function. 00275 void ComputeColumnWidths(ScrollView* tab_win); 00276 00277 // Set the region_type_ member for all the blobs in the grid. 00278 void ComputeBlobGoodness(); 00279 00280 // Set the region_type_ member of the blob, if not already known. 00281 void SetBlobRegionType(BLOBNBOX* blob); 00282 00283 // Mark blobs as being in a vertical text line where that is the case. 00284 void MarkVerticalText(); 00285 00286 // Returns the median gutter width between pairs of matching tab vectors 00287 // assuming they are sorted left-to-right. If there are too few data 00288 // points (< kMinLinesInColumn), then 0 is returned. 00289 int FindMedianGutterWidth(TabVector_LIST* tab_vectors); 00290 00291 // If this box looks like it is on a textline in the given direction, 00292 // return the width of the textline-like group of blobs, and the number 00293 // of blobs found. 00294 // For more detail see FindTextlineSegment below. 00295 int FindTextlineWidth(bool right_to_left, BLOBNBOX* bbox, int* blob_count); 00296 00297 // Search from the given tabstop bbox to the next opposite 00298 // tabstop bbox on the same text line, which may be itself. 00299 // Returns true if the search is successful, and sets 00300 // start_pt, end_pt to the fitted baseline, width to the measured 00301 // width of the text line (column width estimate.) 00302 bool TraceTextline(BLOBNBOX* bbox, ICOORD* start_pt, ICOORD* end_pt, 00303 int* left_edge, int* right_edge); 00304 00305 // Search from the given bbox in the given direction until the next tab 00306 // vector is found or a significant horizontal gap is found. 00307 // Returns the width of the line if the search is successful, (defined 00308 // as good coverage of the width and a good fitting baseline) and sets 00309 // start_pt, end_pt to the fitted baseline, left_blob, right_blob to 00310 // the ends of the line. Returns zero otherwise. 00311 // Sets blob_count to the number of blobs found on the line. 00312 // On input, either both left_vector and right_vector should be NULL, 00313 // indicating a basic search, or both left_vector and right_vector should 00314 // be not NULL and one of *left_vector and *right_vector should be not NULL, 00315 // in which case the search is strictly between tab vectors and will return 00316 // zero if a gap is found before the opposite tab vector is reached, or a 00317 // conflicting tab vector is found. 00318 // If ignore_images is true, then blobs with aligned_text() < 0 are treated 00319 // as if they do not exist. 00320 int FindTextlineSegment(bool right_to_lefts, bool ignore_images, 00321 BLOBNBOX* bbox, int* blob_count, 00322 ICOORD* start_pt, ICOORD* end_pt, 00323 TabVector** left_vector, TabVector** right_vector, 00324 BLOBNBOX** left_blob, BLOBNBOX** right_blob); 00325 00326 // Find the next adjacent (to left or right) blob on this text line, 00327 // with the constraint that it must vertically significantly overlap 00328 // the [top_y, bottom_y] range. 00329 // If ignore_images is true, then blobs with aligned_text() < 0 are treated 00330 // as if they do not exist. 00331 BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, 00332 bool right_to_left, bool ignore_images, 00333 int gap_limit, int top_y, int bottom_y); 00334 00335 // Add a bi-directional partner relationship between the left 00336 // and the right. If one (or both) of the vectors is a separator, 00337 // extend a nearby extendable vector or create a new one of the 00338 // correct type, using the given left or right blob as a guide. 00339 void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, 00340 TabVector* left, TabVector* right); 00341 00346 void CleanupTabs(); 00347 00353 bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, 00354 TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); 00355 00356 // Compute the rotation required to deskew, and its inverse rotation. 00357 void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew); 00358 00363 void ApplyTabConstraints(); 00364 00365 protected: 00366 ICOORD vertical_skew_; //< Estimate of true vertical in this image. 00367 int resolution_; //< Of source image in pixels per inch. 00368 private: 00369 ICOORD image_origin_; //< Top-left of image in deskewed coords 00370 TabVector_LIST vectors_; //< List of rule line and tabstops. 00371 TabVector_IT v_it_; //< Iterator for searching vectors_. 00372 TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors. 00373 ICOORDELT_LIST column_widths_; //< List of commonly occurring widths. 00375 WidthCallback* width_cb_; 00377 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* tab_grid_; 00378 }; 00379 00380 } // namespace tesseract. 00381 00382 #endif // TESSERACT_TEXTORD_TABFIND_H__