00001 00002 // File: tabfind.h 00003 // Description: Subclass of BBGrid to find tabstops. 00004 // Author: Ray Smith 00005 // Created: Fri Mar 21 15:03:01 PST 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_TABFIND_H__ 00021 #define TESSERACT_TEXTORD_TABFIND_H__ 00022 00023 #include "alignedblob.h" 00024 #include "callback.h" 00025 #include "tabvector.h" 00026 #include "linefind.h" 00027 00028 class BLOBNBOX; 00029 class BLOBNBOX_LIST; 00030 class TO_BLOCK; 00031 class ScrollView; 00032 struct Pix; 00033 00034 namespace tesseract { 00035 00036 typedef ResultCallback1<bool, int> WidthCallback; 00037 00038 struct AlignedBlobParams; 00039 00040 // Pixel resolution of column width estimates. 00041 const int kColumnWidthFactor = 20; 00042 00043 // The TabFind class contains code to find tab-stops and maintain the 00044 // vectors_ list of tab vectors. 00045 // Also provides an interface to find neighbouring blobs 00046 // in the grid of BLOBNBOXes that is used by multiple subclasses. 00047 // Searching is a complex operation because of the need to enforce 00048 // rule/separator lines, and tabstop boundaries, (when available), so 00049 // as the holder of the list of TabVectors this class provides the functions. 00050 class TabFind : public AlignedBlob { 00051 public: 00052 TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, 00053 TabVector_LIST* vlines, int vertical_x, int vertical_y); 00054 virtual ~TabFind(); 00055 00056 // Insert a list of blobs into the given grid (not necessarily this). 00057 // If take_ownership is true, then the blobs are removed from the source list. 00058 // See InsertBlob for the other arguments. 00059 void InsertBlobList(bool h_spread, bool v_spread, bool large, 00060 BLOBNBOX_LIST* blobs, bool take_ownership, 00061 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid); 00062 00063 // Insert a single blob into the given grid (not necessarily this). 00064 // If h_spread, then all cells covered horizontally by the box are 00065 // used, otherwise, just the bottom-left. Similarly for v_spread. 00066 // If large, then insert only if the bounding box doesn't intersect 00067 // anything else already in the grid. Returns true if the blob was inserted. 00068 // A side effect is that the left and right rule edges of the blob are 00069 // set according to the tab vectors in this (not grid). 00070 bool InsertBlob(bool h_spread, bool v_spread, bool large, BLOBNBOX* blob, 00071 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid); 00072 00073 // Find the gutter width and distance to inner neighbour for the given blob. 00074 void GutterWidthAndNeighbourGap(int tab_x, int mean_height, 00075 int max_gutter, bool left, 00076 BLOBNBOX* bbox, int* gutter_width, 00077 int* neighbour_gap); 00078 00079 // Find the next adjacent (to left or right) blob on this text line, 00080 // with the constraint that it must vertically significantly overlap 00081 // the input box. 00082 BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, 00083 bool right_to_left, int gap_limit); 00084 00085 // Compute and return, but do not set the type as being BRT_TEXT or 00086 // BRT_UNKNOWN according to how well it forms a text line. 00087 BlobRegionType ComputeBlobType(BLOBNBOX* blob); 00088 00089 // Return the x-coord that corresponds to the right edge for the given 00090 // box. If there is a rule line to the right that vertically overlaps it, 00091 // then return the x-coord of the rule line, otherwise return the right 00092 // edge of the page. For details see RightTabForBox below. 00093 int RightEdgeForBox(const TBOX& box, bool crossing, bool extended); 00094 // As RightEdgeForBox, but finds the left Edge instead. 00095 int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended); 00096 00097 // Compute the rotation required to deskew, and its inverse rotation. 00098 void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew); 00099 00100 // Return true if the given width is close to one of the common 00101 // widths in column_widths_. 00102 bool CommonWidth(int width); 00103 // Return true if the sizes are more than a 00104 // factor of 2 different. 00105 static bool DifferentSizes(int size1, int size2); 00106 00107 // Return a callback for testing CommonWidth. 00108 WidthCallback* WidthCB() { 00109 return width_cb_; 00110 } 00111 00112 // Return the coords at which to draw the image backdrop. 00113 const ICOORD& image_origin() const { 00114 return image_origin_; 00115 } 00116 00117 protected: 00118 // Accessors 00119 TabVector_LIST* get_vectors() { 00120 return &vectors_; 00121 } 00122 00123 // Top-level function to find TabVectors in an input page block. 00124 void FindTabVectors(int resolution, TabVector_LIST* hlines, 00125 BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, 00126 FCOORD* reskew, FCOORD* rerotate); 00127 00128 // Top-level function to not find TabVectors in an input page block, 00129 // but setup for single column mode. 00130 void DontFindTabVectors(int resolution, BLOBNBOX_LIST* image_blobs, 00131 TO_BLOCK* block, FCOORD* reskew); 00132 00133 // Return the TabVector that corresponds to the right edge for the given 00134 // box. If there is a TabVector to the right that vertically overlaps it, 00135 // then return it, otherwise return NULL. Note that Right and Left refer 00136 // to the position of the TabVector, not its type, ie RightTabForBox 00137 // returns the nearest TabVector to the right of the box, regardless of 00138 // its type. 00139 // If a TabVector crosses right through the box (as opposed to grazing one 00140 // edge or missing entirely), then crossing false will ignore such a line. 00141 // Crossing true will return the line for BOTH left and right edges. 00142 // If extended is true, then TabVectors are considered to extend to their 00143 // extended_start/end_y, otherwise, just the startpt_ and endpt_. 00144 // These functions make use of an internal iterator to the vectors_ list 00145 // for speed when used repeatedly on neighbouring boxes. The caveat is 00146 // that the iterator must be updated whenever the list is modified. 00147 TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended); 00148 // As RightTabForBox, but finds the left TabVector instead. 00149 TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended); 00150 // Helper function to setup search limits for *TabForBox. 00151 void SetupTabSearch(int x, int y, int* min_key, int* max_key); 00152 00153 // Display the tab vectors found in this grid. 00154 ScrollView* DisplayTabVectors(ScrollView* tab_win); 00155 00156 private: 00157 // First part of FindTabVectors, which may be used twice if the text 00158 // is mostly of vertical alignment. 00159 void FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block); 00160 00161 // For each box in the grid, decide whether it is a candidate tab-stop, 00162 // and if so add it to the tab_grid_. 00163 ScrollView* FindTabBoxes(); 00164 00165 // Return true if this box looks like a candidate tab stop, and set 00166 // the appropriate tab type(s) to TT_UNCONFIRMED. 00167 bool TestBoxForTabs(BLOBNBOX* bbox); 00168 00169 // Fills the list of TabVector with the tabstops found in the grid, 00170 // and estimates the logical vertical direction. 00171 void FindAllTabVectors(); 00172 // Helper for FindAllTabVectors finds the vectors of a particular type. 00173 int FindTabVectors(int search_size_multiple, 00174 TabAlignment alignment, 00175 TabVector_LIST* vectors, 00176 int* vertical_x, int* vertical_y); 00177 // Finds a vector corresponding to a tabstop running through the 00178 // given box of the given alignment type. 00179 // search_size_multiple is a multiple of height used to control 00180 // the size of the search. 00181 // vertical_x and y are updated with an estimate of the real 00182 // vertical direction. (skew finding.) 00183 // Returns NULL if no decent tabstop can be found. 00184 TabVector* FindTabVector(int search_size_multiple, 00185 TabAlignment alignment, 00186 BLOBNBOX* bbox, 00187 int* vertical_x, int* vertical_y); 00188 00189 // Set the vertical_skew_ member from the given vector and refit 00190 // all vectors parallel to the skew vector. 00191 void SetVerticalSkewAndParellelize(int vertical_x, int vertical_y); 00192 00193 // Sort all the current vectors using the vertical_skew_ vector. 00194 void SortVectors(); 00195 00196 // Evaluate all the current tab vectors. 00197 void EvaluateTabs(); 00198 00199 // Trace textlines from one side to the other of each tab vector, saving 00200 // the most frequent column widths found in a list so that a given width 00201 // can be tested for being a common width with a simple callback function. 00202 void ComputeColumnWidths(ScrollView* tab_win); 00203 00204 // Set the region_type_ member for all the blobs in the grid. 00205 void ComputeBlobGoodness(); 00206 00207 // Set the region_type_ member of the blob, if not already known. 00208 void SetBlobRegionType(BLOBNBOX* blob); 00209 00210 // Mark blobs as being in a vertical text line where that is the case. 00211 void MarkVerticalText(); 00212 00213 // Returns true if the majority of the image is vertical text lines. 00214 bool TextMostlyVertical(); 00215 00216 // If this box looks like it is on a textline in the given direction, 00217 // return the width of the textline-like group of blobs, and the number 00218 // of blobs found. 00219 // For more detail see FindTextlineSegment below. 00220 int FindTextlineWidth(bool right_to_left, BLOBNBOX* bbox, int* blob_count); 00221 00222 // Search from the given tabstop bbox to the next opposite 00223 // tabstop bbox on the same text line, which may be itself. 00224 // Returns true if the search is successful, and sets 00225 // start_pt, end_pt to the fitted baseline, width to the measured 00226 // width of the text line (column width estimate.) 00227 bool TraceTextline(BLOBNBOX* bbox, ICOORD* start_pt, ICOORD* end_pt, 00228 int* left_edge, int* right_edge); 00229 00230 // Search from the given bbox in the given direction until the next tab 00231 // vector is found or a significant horizontal gap is found. 00232 // Returns the width of the line if the search is successful, (defined 00233 // as good coverage of the width and a good fitting baseline) and sets 00234 // start_pt, end_pt to the fitted baseline, left_blob, right_blob to 00235 // the ends of the line. Returns zero otherwise. 00236 // Sets blob_count to the number of blobs found on the line. 00237 // On input, either both left_vector and right_vector should be NULL, 00238 // indicating a basic search, or both left_vector and right_vector should 00239 // be not NULL and one of *left_vector and *right_vector should be not NULL, 00240 // in which case the search is strictly between tab vectors and will return 00241 // zero if a gap is found before the opposite tab vector is reached, or a 00242 // conflicting tab vector is found. 00243 // If ignore_images is true, then blobs with aligned_text() < 0 are treated 00244 // as if they do not exist. 00245 int FindTextlineSegment(bool right_to_lefts, bool ignore_images, 00246 BLOBNBOX* bbox, int* blob_count, 00247 ICOORD* start_pt, ICOORD* end_pt, 00248 TabVector** left_vector, TabVector** right_vector, 00249 BLOBNBOX** left_blob, BLOBNBOX** right_blob); 00250 00251 // Find the next adjacent (to left or right) blob on this text line, 00252 // with the constraint that it must vertically significantly overlap 00253 // the [top_y, bottom_y] range. 00254 // If ignore_images is true, then blobs with aligned_text() < 0 are treated 00255 // as if they do not exist. 00256 BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, 00257 bool right_to_left, bool ignore_images, 00258 int gap_limit, int top_y, int bottom_y); 00259 00260 // Add a bi-directional partner relationship between the left 00261 // and the right. If one (or both) of the vectors is a separator, 00262 // extend a nearby extendable vector or create a new one of the 00263 // correct type, using the given left or right blob as a guide. 00264 void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, 00265 TabVector* left, TabVector* right); 00266 00267 // Remove separators and unused tabs from the main vectors_ list 00268 // to the dead_vectors_ list. 00269 void CleanupTabs(); 00270 00271 // Deskew the tab vectors and blobs, computing the rotation and resetting 00272 // the storked vertical_skew_. The deskew inverse is returned in reskew. 00273 void Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, 00274 TO_BLOCK* block, FCOORD* reskew); 00275 00276 // Restart everything and rotate the input blobs ready for vertical text. 00277 void ResetForVerticalText(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, 00278 TO_BLOCK* block, FCOORD* rerotate); 00279 00280 // Compute and apply constraints to the end positions of TabVectors so 00281 // that where possible partners end at the same y coordinate. 00282 void ApplyTabConstraints(); 00283 00284 protected: 00285 ICOORD vertical_skew_; // Estimate of true vertical in this image. 00286 int resolution_; // Of source image in pixels per inch. 00287 private: 00288 ICOORD image_origin_; // Top-left of image in deskewed coords 00289 TabVector_LIST vectors_; // List of rule line and tabstops. 00290 TabVector_IT v_it_; // Iterator for searching vectors_. 00291 TabVector_LIST dead_vectors_; // Separators and unpartnered tab vectors. 00292 ICOORDELT_LIST column_widths_; // List of commonly occurring widths. 00293 // Callback to test an int for being a common width. 00294 WidthCallback* width_cb_; 00295 // Instance of the base class that contains only candidate tab stops. 00296 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* tab_grid_; 00297 }; 00298 00299 } // namespace tesseract. 00300 00301 #endif // TESSERACT_TEXTORD_TABFIND_H__ 00302