Tesseract 3.01
|
00001 00002 // File: colfind.h 00003 // Description: Class to find columns in the grid of BLOBNBOXes. 00004 // Author: Ray Smith 00005 // Created: Thu Feb 21 14:04:01 PST 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_COLFIND_H__ 00021 #define TESSERACT_TEXTORD_COLFIND_H__ 00022 00023 #include "tabfind.h" 00024 #include "imagefind.h" 00025 #include "colpartitiongrid.h" 00026 #include "colpartitionset.h" 00027 #include "ocrblock.h" 00028 00029 class ScrollView; 00030 class TO_BLOCK; 00031 class STATS; 00032 class BLOCK_LIST; 00033 struct Boxa; 00034 struct Pixa; 00035 00036 namespace tesseract { 00037 00038 extern BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection"); 00039 00040 class StrokeWidth; 00041 class LineSpacing; 00042 class TempColumn_LIST; 00043 class ColSegment_LIST; 00044 class ColumnGroup_LIST; 00045 class ColPartitionSet; 00046 class ColPartitionSet_LIST; 00047 00048 // The ColumnFinder class finds columns in the grid. 00049 class ColumnFinder : public TabFind { 00050 public: 00051 // Gridsize is an estimate of the text size in the image. A suitable value 00052 // is in TO_BLOCK::line_size after find_components has been used to make 00053 // the blobs. 00054 // bleft and tright are the bounds of the image (rectangle) being processed. 00055 // vlines is a (possibly empty) list of TabVector and vertical_x and y are 00056 // the sum logical vertical vector produced by LineFinder::FindVerticalLines. 00057 ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, 00058 int resolution, TabVector_LIST* vlines, TabVector_LIST* hlines, 00059 int vertical_x, int vertical_y); 00060 virtual ~ColumnFinder(); 00061 00062 // ====================================================================== 00063 // The main function of ColumnFinder is broken into pieces to facilitate 00064 // optional insertion of orientation and script detection in an efficient 00065 // way. The calling sequence IS MANDATORY however, whether or not 00066 // OSD is being used: 00067 // 1. Construction. 00068 // 2. IsVerticallyAlignedText. 00069 // 3. CorrectOrientation. 00070 // 4. FindBlocks. 00071 // 5. Destruction. Use of a single column finder for multiple images does not 00072 // make sense. 00073 // TODO(rays) break up column finder further into smaller classes, as 00074 // there is a lot more to it than column finding now. 00075 // ====================================================================== 00076 00077 // Tests for vertical alignment of text (returning true if so), and 00078 // generates a list of blobs for orientation and script detection. Note that 00079 // the vertical alignment may be due to text whose writing direction is 00080 // vertical, like say Japanese, or due to text whose writing direction is 00081 // horizontal but whose text appears vertically aligned because the image is 00082 // not the right way up. 00083 bool IsVerticallyAlignedText(TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs); 00084 00085 // Rotates the blobs and the TabVectors so that the gross writing direction 00086 // (text lines) are horizontal and lines are read down the page. 00087 // Applied rotation stored in rotation_. 00088 // A second rotation is calculated for application during recognition to 00089 // make the rotated blobs upright for recognition. 00090 // Subsequent rotation stored in text_rotation_. 00091 // 00092 // Arguments: 00093 // vertical_text_lines is true if the text lines are vertical. 00094 // recognition_rotation [0..3] is the number of anti-clockwise 90 degree 00095 // rotations from osd required for the text to be upright and readable. 00096 void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines, 00097 int recognition_rotation); 00098 00099 // Finds the text and image blocks, returning them in the blocks and to_blocks 00100 // lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.) 00101 // If boxa and pixa are not NULL, they are assumed to be the output of 00102 // ImageFinder::FindImages, and are used to generate image blocks. 00103 // The input boxa and pixa are destroyed. 00104 // Imageheight should be the pixel height of the original image. 00105 // The input block is the result of a call to find_components, and contains 00106 // the blobs found in the image. These blobs will be removed and placed 00107 // in the output blocks, while unused ones will be deleted. 00108 // If single_column is true, the input is treated as single column, but 00109 // it is still divided into blocks of equal line spacing/text size. 00110 // Returns -1 if the user requested retry with more debug info. 00111 int FindBlocks(bool single_column, int imageheight, 00112 TO_BLOCK* block, Boxa* boxa, Pixa* pixa, 00113 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00114 00115 // Get the rotation required to deskew, and its inverse rotation. 00116 void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew); 00117 00118 private: 00119 // Displays the blob and block bounding boxes in a window called Blocks. 00120 void DisplayBlocks(BLOCK_LIST* blocks); 00121 // Displays the column edges at each grid y coordinate defined by 00122 // best_columns_. 00123 void DisplayColumnBounds(PartSetVector* sets); 00124 00125 // Converts the arrays of Box/Pix to a list of C_OUTLINE, and then to blobs. 00126 // The output is a list of C_BLOBs for the images, but the C_OUTLINEs 00127 // contain no data. 00128 void ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa); 00129 00131 00132 // Creates the initial ColPartitions, and puts them in a ColPartitionSet 00133 // for each grid y coordinate, storing the ColPartitionSets in part_sets_. 00134 // After creating the ColPartitonSets, attempts to merge them where they 00135 // overlap and unique the BLOBNBOXes within. 00136 // The return value is the number of ColPartitionSets made. 00137 int MakeColumnPartitions(); 00138 // Partition the BLOBNBOXES horizontally at the given grid y, creating a 00139 // ColPartitionSet which is returned. NULL is returned if there are no 00140 // BLOBNBOXES at the given grid y. 00141 ColPartitionSet* PartitionsAtGridY(int grid_y); 00142 // Insert the blobs in the given list into the main grid and for 00143 // each one also make it a separate unknown partition. 00144 // If filter is true, use only the blobs that are above a threshold in 00145 // size or a non-isolated. 00146 void InsertSmallBlobsAsUnknowns(bool filter, BLOBNBOX_LIST* blobs); 00147 // Helper function for PartitionsAtGridY, with a long argument list. 00148 // This bbox is of unknown type, so it is added to an unk_partition. 00149 // If the edge is past the unk_right_margin then unk_partition has to be 00150 // completed and a new one made. See CompletePartition and StartPartition 00151 // for the other args. 00152 void ProcessUnknownBlob(int page_edge, BLOBNBOX* bbox, 00153 ColPartition** unk_partition, 00154 ColPartition_IT* unk_part_it, 00155 TabVector** unk_right_line, 00156 int* unk_right_margin, 00157 int* unk_prev_margin, 00158 bool* unk_edge_is_left); 00159 // Creates and returns a new ColPartition of the given start_type 00160 // and adds the given bbox to it. 00161 // Also finds the left and right tabvectors that bound the textline, setting 00162 // the members of the returned ColPartition appropriately: 00163 // If the left tabvector is less constraining than the input left_margin 00164 // (assumed to be the right edge of the previous partition), then the 00165 // tabvector is ignored and the left_margin used instead. 00166 // If the right tabvector is more constraining than the input *right_margin, 00167 // (probably the right edge of the page), then the *right_margin is adjusted 00168 // to use the tabvector. 00169 // *edge_is_left is set to true if the right tabvector is good and used as the 00170 // margin, so we can include blobs that overhang the tabvector in this 00171 // partition. 00172 ColPartition* StartPartition(BlobRegionType start_type, int left_margin, 00173 BLOBNBOX* bbox, TabVector** right_line, 00174 int* right_margin, bool* edge_is_left); 00175 // Completes the given partition, and adds it to the given iterator. 00176 // The right_margin on input is the left edge of the next blob if there is 00177 // one. The right tab vector plus a margin is used as the right margin if 00178 // it is more constraining than the next blob, but if there are no more 00179 // blobs, we want the right margin to make it to the page edge. 00180 // The return value is the next left margin, being the right edge of the 00181 // bounding box of blobs. 00182 int CompletePartition(bool no_more_blobs, int page_edge, 00183 TabVector* right_line, int* right_margin, 00184 ColPartition** partition, ColPartition_IT* part_it); 00185 00186 00188 00189 // Makes an ordered list of candidates to partition the width of the page 00190 // into columns using the part_sets_. 00191 // See AddToColumnSetsIfUnique for the ordering. 00192 // If single_column, then it just makes a single page-wide fake column. 00193 void MakeColumnCandidates(bool single_column); 00194 // Attempt to improve the column_candidates by expanding the columns 00195 // and adding new partitions from the partition sets in src_sets. 00196 // Src_sets may be equal to column_candidates, in which case it will 00197 // use them as a source to improve themselves. 00198 void ImproveColumnCandidates(PartSetVector* src_sets, 00199 PartSetVector* column_sets); 00200 // Prints debug information on the column candidates. 00201 void PrintColumnCandidates(const char* title); 00202 // Finds the optimal set of columns that cover the entire image with as 00203 // few changes in column partition as possible. 00204 void AssignColumns(); 00205 // Finds the biggest range in part_sets_ that has no assigned column, but 00206 // column assignment is possible. 00207 bool BiggestUnassignedRange(const bool* any_columns_possible, 00208 int* start, int* end); 00209 // Finds the modal compatible column_set_ index within the given range. 00210 int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs, 00211 int start, int end); 00212 // Given that there are many column_set_id compatible columns in the range, 00213 // shrinks the range to the longest contiguous run of compatibility, allowing 00214 // gaps where no columns are possible, but not where competing columns are 00215 // possible. 00216 void ShrinkRangeToLongestRun(int** column_set_costs, 00217 const int* assigned_costs, 00218 const bool* any_columns_possible, 00219 int column_set_id, 00220 int* best_start, int* best_end); 00221 // Moves start in the direction of step, upto, but not including end while 00222 // the only incompatible regions are no more than kMaxIncompatibleColumnCount 00223 // in size, and the compatible regions beyond are bigger. 00224 void ExtendRangePastSmallGaps(int** column_set_costs, 00225 const int* assigned_costs, 00226 const bool* any_columns_possible, 00227 int column_set_id, 00228 int step, int end, int* start); 00229 // Assigns the given column_set_id to the part_sets_ in the given range. 00230 void AssignColumnToRange(int column_set_id, int start, int end, 00231 int** column_set_costs, int* assigned_costs); 00232 00233 // Computes the mean_column_gap_. 00234 void ComputeMeanColumnGap(); 00235 00238 00239 // Removes the ColPartitions from part_sets_, the ColPartitionSets that 00240 // contain them, and puts them in the part_grid_ after ensuring that no 00241 // BLOBNBOX is owned by more than one of them. 00242 void MovePartitionsToGrid(); 00243 // Splits partitions that cross columns where they have nothing in the gap. 00244 void GridSplitPartitions(); 00245 // Merges partitions where there is vertical overlap, within a single column, 00246 // and the horizontal gap is small enough. 00247 void GridMergePartitions(); 00248 // Resolves unknown partitions from the unknown_parts_ list by merging them 00249 // with a close neighbour, inserting them into the grid with a known type, 00250 // or declaring them to be noise. 00251 void GridInsertUnknowns(); 00252 // Add horizontal line separators as partitions. 00253 void GridInsertHLinePartitions(); 00254 // Add vertical line separators as partitions. 00255 void GridInsertVLinePartitions(); 00256 // For every ColPartition in the grid, sets its type based on position 00257 // in the columns. 00258 void SetPartitionTypes(); 00259 // Only images remain with multiple types in a run of partners. 00260 // Sets the type of all in the group to the maximum of the group. 00261 void SmoothPartnerRuns(); 00262 00264 00265 // Helper functions for TransformToBlocks. 00266 // Add the part to the temp list in the correct order. 00267 void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list); 00268 // Add everything from the temp list to the work_set assuming correct order. 00269 void EmptyTempPartList(ColPartition_CLIST* temp_list, 00270 WorkingPartSet_LIST* work_set); 00271 00272 // Transform the grid of partitions to the output blocks. 00273 void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00274 00275 // Undo the deskew that was done in FindTabVectors, as recognition is done 00276 // without correcting blobs or blob outlines for skew. 00277 // Reskew the completed blocks to put them back to the original rotated coords 00278 // that were created by CorrectOrientation. 00279 // Blocks that were identified as vertical text (relative to the rotated 00280 // coordinates) are further rotated so the text lines are horizontal. 00281 // blob polygonal outlines are rotated to match the position of the blocks 00282 // that they are in, and their bounding boxes are recalculated to be accurate. 00283 // Record appropriate inverse transformations and required 00284 // classifier transformation in the blocks. 00285 void RotateAndReskewBlocks(TO_BLOCK_LIST* to_blocks); 00286 00287 00288 // Move all the small and noise blobs into the main blobs list of 00289 // the block from the to_blocks list that contains them. 00290 void MoveSmallBlobs(BLOBNBOX_LIST* bblobs, TO_BLOCK_LIST* to_blocks); 00291 00292 // The minimum gutter width to apply for finding columns. 00293 // Modified when vertical text is detected to prevent detection of 00294 // vertical text lines as columns. 00295 int min_gutter_width_; 00296 // The mean gap between columns over the page. 00297 int mean_column_gap_; 00298 // The rotation vector needed to convert original coords to deskewed. 00299 FCOORD deskew_; 00300 // The rotation vector needed to convert deskewed back to original coords. 00301 FCOORD reskew_; 00302 // The rotation vector used to rotate vertically oriented pages. 00303 FCOORD rotation_; 00304 // The rotation vector needed to convert the rotated back to original coords. 00305 FCOORD rerotate_; 00306 // The additional rotation vector needed to rotate text for recognition. 00307 FCOORD text_rotation_; 00308 // The part_sets_ are the initial text-line-like partition of the grid, 00309 // and is a vector of ColPartitionSets. 00310 PartSetVector part_sets_; 00311 // The column_sets_ contain the ordered candidate ColPartitionSets that 00312 // define the possible divisions of the page into columns. 00313 PartSetVector column_sets_; 00314 // A simple array of pointers to the best assigned column division at 00315 // each grid y coordinate. 00316 ColPartitionSet** best_columns_; 00317 // The grid used for creating initial partitions with strokewidth. 00318 StrokeWidth* stroke_width_; 00319 // The grid used to hold ColPartitions after the columns have been determined. 00320 ColPartitionGrid part_grid_; 00321 // List of ColPartitions that are no longer needed after they have been 00322 // turned into regions, but are kept around because they are referenced 00323 // by the part_grid_. 00324 ColPartition_LIST good_parts_; 00325 // List of ColPartitions of unknown type. 00326 ColPartition_LIST unknown_parts_; 00327 // List of ColPartitions that have been declared noise. 00328 ColPartition_LIST noise_parts_; 00329 // The fake blobs that are made from the input boxa/pixa pair. 00330 BLOBNBOX_LIST image_bblobs_; 00331 // Horizontal line separators. 00332 TabVector_LIST horizontal_lines_; 00333 // Allow a subsequent instance to reuse the blocks window. 00334 // Not thread-safe, but multiple threads shouldn't be using windows anyway. 00335 static ScrollView* blocks_win_; 00336 }; 00337 00338 } // namespace tesseract. 00339 00340 #endif // TESSERACT_TEXTORD_COLFIND_H__