00001 00002 // File: colfind.h 00003 // Description: Class to find columns in the grid of BLOBNBOXes. 00004 // Author: Ray Smith 00005 // Created: Thu Feb 21 14:04:01 PST 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_COLFIND_H__ 00021 #define TESSERACT_TEXTORD_COLFIND_H__ 00022 00023 #include "tabfind.h" 00024 #include "tablefind.h" 00025 #include "imagefind.h" 00026 #include "colpartition.h" 00027 #include "colpartitionset.h" 00028 #include "ocrblock.h" 00029 00030 class ScrollView; 00031 class TO_BLOCK; 00032 class STATS; 00033 class BLOCK_LIST; 00034 struct Boxa; 00035 struct Pixa; 00036 00037 namespace tesseract { 00038 00039 class StrokeWidth; 00040 class LineSpacing; 00041 class TempColumn_LIST; 00042 class ColSegment_LIST; 00043 class ColumnGroup_LIST; 00044 class ColPartitionSet; 00045 class ColPartitionSet_LIST; 00046 00047 // The ColumnFinder class finds columns in the grid. 00048 class ColumnFinder : public TabFind { 00049 public: 00050 // Gridsize is an estimate of the text size in the image. A suitable value 00051 // is in TO_BLOCK::line_size after find_components has been used to make 00052 // the blobs. 00053 // bleft and tright are the bounds of the image (rectangle) being processed. 00054 // vlines is a (possibly empty) list of TabVector and vertical_x and y are 00055 // the sum logical vertical vector produced by LineFinder::FindVerticalLines. 00056 ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, 00057 TabVector_LIST* vlines, TabVector_LIST* hlines, 00058 int vertical_x, int vertical_y); 00059 virtual ~ColumnFinder(); 00060 00061 // Finds the text and image blocks, returning them in the blocks and to_blocks 00062 // lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.) 00063 // If boxa and pixa are not NULL, they are assumed to be the output of 00064 // ImageFinder::FindImages, and are used to generate image blocks. 00065 // The input boxa and pixa are destroyed. 00066 // Imageheight and resolution should be the pixel height and resolution in 00067 // pixels per inch of the original image. 00068 // The input block is the result of a call to find_components, and contains 00069 // the blobs found in the image. These blobs will be removed and placed 00070 // in the output blocks, while unused ones will be deleted. 00071 // If single_column is true, the input is treated as single column, but 00072 // it is still divided into blocks of equal line spacing/text size. 00073 // Returns -1 if the user requested retry with more debug info. 00074 int FindBlocks(int imageheight, int resolution, bool single_column, 00075 TO_BLOCK* block, Boxa* boxa, Pixa* pixa, 00076 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00077 00078 private: 00079 // Displays the blob and block bounding boxes in a window called Blocks. 00080 void DisplayBlocks(BLOCK_LIST* blocks); 00081 // Displays the column edges at each grid y coordinate defined by 00082 // best_columns_. 00083 void DisplayColumnBounds(PartSetVector* sets); 00084 00085 // Converts the arrays of Box/Pix to a list of C_OUTLINE, and then to blobs. 00086 // The output is a list of C_BLOBs for the images, but the C_OUTLINEs 00087 // contain no data. 00088 void ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa); 00089 00091 00092 // Creates the initial ColPartitions, and puts them in a ColPartitionSet 00093 // for each grid y coordinate, storing the ColPartitionSets in part_sets_. 00094 // After creating the ColPartitonSets, attempts to merge them where they 00095 // overlap and unique the BLOBNBOXes within. 00096 // The return value is the number of ColPartitionSets made. 00097 int MakeColumnPartitions(); 00098 // Partition the BLOBNBOXES horizontally at the given grid y, creating a 00099 // ColPartitionSet which is returned. NULL is returned if there are no 00100 // BLOBNBOXES at the given grid y. 00101 ColPartitionSet* PartitionsAtGridY(int grid_y); 00102 // Insert the blobs in the given list into the main grid and for 00103 // each one also make it a separate unknown partition. 00104 // If filter is true, use only the blobs that are above a threshold in 00105 // size or a non-isolated. 00106 void InsertSmallBlobsAsUnknowns(bool filter, BLOBNBOX_LIST* blobs); 00107 // Helper function for PartitionsAtGridY, with a long argument list. 00108 // This bbox is of unknown type, so it is added to an unk_partition. 00109 // If the edge is past the unk_right_margin then unk_partition has to be 00110 // completed and a new one made. See CompletePartition and StartPartition 00111 // for the other args. 00112 void ProcessUnknownBlob(int page_edge, BLOBNBOX* bbox, 00113 ColPartition** unk_partition, 00114 ColPartition_IT* unk_part_it, 00115 TabVector** unk_right_line, 00116 int* unk_right_margin, 00117 int* unk_prev_margin, 00118 bool* unk_edge_is_left); 00119 // Creates and returns a new ColPartition of the given start_type 00120 // and adds the given bbox to it. 00121 // Also finds the left and right tabvectors that bound the textline, setting 00122 // the members of the returned ColPartition appropriately: 00123 // If the left tabvector is less constraining than the input left_margin 00124 // (assumed to be the right edge of the previous partition), then the 00125 // tabvector is ignored and the left_margin used instead. 00126 // If the right tabvector is more constraining than the input *right_margin, 00127 // (probably the right edge of the page), then the *right_margin is adjusted 00128 // to use the tabvector. 00129 // *edge_is_left is set to true if the right tabvector is good and used as the 00130 // margin, so we can include blobs that overhang the tabvector in this 00131 // partition. 00132 ColPartition* StartPartition(BlobRegionType start_type, int left_margin, 00133 BLOBNBOX* bbox, TabVector** right_line, 00134 int* right_margin, bool* edge_is_left); 00135 // Completes the given partition, and adds it to the given iterator. 00136 // The right_margin on input is the left edge of the next blob if there is 00137 // one. The right tab vector plus a margin is used as the right margin if 00138 // it is more constraining than the next blob, but if there are no more 00139 // blobs, we want the right margin to make it to the page edge. 00140 // The return value is the next left margin, being the right edge of the 00141 // bounding box of blobs. 00142 int CompletePartition(bool no_more_blobs, int page_edge, 00143 TabVector* right_line, int* right_margin, 00144 ColPartition** partition, ColPartition_IT* part_it); 00145 00146 00148 00149 // Makes an ordered list of candidates to partition the width of the page 00150 // into columns using the part_sets_. 00151 // See AddToColumnSetsIfUnique for the ordering. 00152 // If single_column, then it just makes a single page-wide fake column. 00153 void MakeColumnCandidates(bool single_column); 00154 // Attempt to improve the column_candidates by expanding the columns 00155 // and adding new partitions from the partition sets in src_sets. 00156 // Src_sets may be equal to column_candidates, in which case it will 00157 // use them as a source to improve themselves. 00158 void ImproveColumnCandidates(PartSetVector* src_sets, 00159 PartSetVector* column_sets); 00160 // Prints debug information on the column candidates. 00161 void PrintColumnCandidates(const char* title); 00162 // Finds the optimal set of columns that cover the entire image with as 00163 // few changes in column partition as possible. 00164 void AssignColumns(); 00165 // Finds the biggest range in part_sets_ that has no assigned column, but 00166 // column assignment is possible. 00167 bool BiggestUnassignedRange(const bool* any_columns_possible, 00168 int* start, int* end); 00169 // Finds the modal compatible column_set_ index within the given range. 00170 int RangeModalColumnSet(bool** possible_column_sets, 00171 int start, int end); 00172 // Given that there are many column_set_id compatible columns in the range, 00173 // shrinks the range to the longest contiguous run of compatibility, allowing 00174 // gaps where no columns are possible, but not where competing columns are 00175 // possible. 00176 void ShrinkRangeToLongestRun(bool** possible_column_sets, 00177 const bool* any_columns_possible, 00178 int column_set_id, 00179 int* best_start, int* best_end); 00180 // Moves start in the direction of step, upto, but not including end while 00181 // the only incompatible regions are no more than kMaxIncompatibleColumnCount 00182 // in size, and the compatible regions beyond are bigger. 00183 void ExtendRangePastSmallGaps(bool** possible_column_sets, 00184 const bool* any_columns_possible, 00185 int column_set_id, 00186 int step, int end, int* start); 00187 // Assigns the given column_set_id to the part_sets_ in the given range. 00188 void AssignColumnToRange(int column_set_id, int start, int end, 00189 bool** assigned_column_sets); 00190 00191 // Computes the mean_column_gap_. 00192 void ComputeMeanColumnGap(); 00193 00196 00197 // Removes the ColPartitions from part_sets_, the ColPartitionSets that 00198 // contain them, and puts them in the part_grid_ after ensuring that no 00199 // BLOBNBOX is owned by more than one of them. 00200 void MovePartitionsToGrid(); 00201 // Splits partitions that cross columns where they have nothing in the gap. 00202 void GridSplitPartitions(); 00203 // Merges partitions where there is vertical overlap, within a single column, 00204 // and the horizontal gap is small enough. 00205 void GridMergePartitions(); 00206 // Resolves unknown partitions from the unknown_parts_ list by merging them 00207 // with a close neighbour, inserting them into the grid with a known type, 00208 // or declaring them to be noise. 00209 void GridInsertUnknowns(); 00210 // Add horizontal line separators as partitions. 00211 void GridInsertHLinePartitions(); 00212 // Improves the margins of the ColPartitions in the grid by calling 00213 // FindPartitionMargins on each. 00214 void GridFindMargins(); 00215 // Improves the margins of the ColPartitions in the list by calling 00216 // FindPartitionMargins on each. 00217 void ListFindMargins(ColPartition_LIST* parts); 00218 // Improves the margins of the ColPartition by searching for 00219 // neighbours that vertically overlap significantly. 00220 void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); 00221 // Starting at x, and going in the specified direction, upto x_limit, finds 00222 // the margin for the given y range by searching sideways, 00223 // and ignoring not_this. 00224 int FindMargin(int x, bool right_to_left, int x_limit, 00225 int y_bottom, int y_top, const ColPartition* not_this); 00226 // For every ColPartition in the grid, sets its type based on position 00227 // in the columns. 00228 void SetPartitionTypes(); 00229 00232 00233 // For every ColPartition in the grid, finds its upper and lower neighbours. 00234 void FindPartitionPartners(); 00235 // Finds the best partner in the given direction for the given partition. 00236 // Stores the result with AddPartner. 00237 void FindPartitionPartners(bool upper, ColPartition* part); 00238 // For every ColPartition with multiple partners in the grid, reduces the 00239 // number of partners to 0 or 1. 00240 void RefinePartitionPartners(); 00241 // Only images remain with multiple types in a run of partners. 00242 // Sets the type of all in the group to the maximum of the group. 00243 void SmoothPartnerRuns(); 00244 00247 00248 // Copy cleaned partitions from part_grid_ to clean_part_grid_ and 00249 // insert dot-like noise into period_grid_ 00250 void GetCleanPartitions(TO_BLOCK* block); 00251 00252 // High level function to perform table detection 00253 void LocateTables(); 00254 00255 // Get Column segments from best_columns_ 00256 void GetColumnBlocks(ColSegment_LIST *col_segments); 00257 00258 // Group Column segments into consecutive single column regions. 00259 void GroupColumnBlocks(ColSegment_LIST *current_segments, 00260 ColSegment_LIST *col_segments); 00261 00262 // Check if two boxes are consecutive within the same column 00263 bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); 00264 00265 // Set left, right and top, bottom spacings of each colpartition. 00266 // Left/right spacings are w.r.t the column boundaries 00267 // Top/bottom spacings are w.r.t. previous and next colpartitions 00268 void SetPartitionSpacings(); 00269 00270 // Set spacing and closest neighbors above and below a given colpartition. 00271 void SetVerticalSpacing(ColPartition* part); 00272 00273 // Set global spacing estimates 00274 void SetGlobalSpacings(); 00275 00276 // Mark partitions as table rows/cells. 00277 void GridMarkTablePartitions(); 00278 00279 // Check if the partition has at lease one large gap between words or no 00280 // significant gap at all 00281 bool HasWideOrNoInterWordGap(ColPartition* part); 00282 00283 // Check if a period lies in the inter-wrod gap in the parition boxes 00284 bool LiesInGap(BLOBNBOX* period, BLOBNBOX_CLIST* boxes); 00285 00286 // Filter individual text partitions marked as table partitions 00287 // consisting of paragraph endings, small section headings, and 00288 // headers and footers. 00289 void FilterFalseAlarms(); 00290 00291 // Mark all ColPartitions as table cells that have a table cell above 00292 // and below them 00293 void SmoothTablePartitionRuns(); 00294 00295 // Set the ratio of candidate table partitions in each column 00296 void SetColumnsType(ColSegment_LIST* col_segments); 00297 00298 // Move Column Blocks to col_seg_grid_ 00299 void MoveColSegmentsToGrid(ColSegment_LIST *segments, 00300 ColSegmentGrid *col_seg_grid); 00301 00302 // Merge Column Blocks that were split due to the presence of a table 00303 void GridMergeColumnBlocks(); 00304 00305 // Merge table cells into table columns 00306 void GetTableColumns(ColSegment_LIST *table_columns); 00307 00308 // Get Column segments from best_columns_ 00309 void GetTableRegions(ColSegment_LIST *table_columns, 00310 ColSegment_LIST *table_regions); 00311 00312 // Merge table regions corresponding to tables spanning multiple columns 00313 void GridMergeTableRegions(); 00314 bool BelongToOneTable(const TBOX &box1, const TBOX &box2); 00315 00316 // Adjust table boundaries by building a tight bounding box around all 00317 // ColPartitions contained in it. 00318 void AdjustTableBoundaries(); 00319 00320 // Checks whether the horizontal line belong to the table by looking at the 00321 // side spacing of extra ColParitions that will be included in the table 00322 // due to expansion 00323 bool HLineBelongsToTable(ColPartition* part, const TBOX& table_box); 00324 00325 // Look for isolated column headers above the given table box and 00326 // include them in the table 00327 void IncludeLeftOutColumnHeaders(TBOX& table_box); 00328 00329 // Remove false alarms consiting of a single column 00330 void DeleteSingleColumnTables(); 00331 00332 // Return true if at least one gap larger than the global x-height 00333 // exists in the horizontal projection 00334 bool GapInXProjection(int* xprojection, int length); 00335 00336 // Displays Colpartitions marked as table row. Overlays them on top of 00337 // part_grid_. 00338 void DisplayColSegments(ColSegment_LIST *cols, ScrollView* win, 00339 ScrollView::Color color); 00340 00341 // Displays the colpartitions using a new coloring on an existing window. 00342 // Note: This method is only for debug purpose during development and 00343 // would not be part of checked in code 00344 void DisplayColPartitions(ScrollView* win, 00345 ScrollView::Color color); 00346 00347 // Write ColParitions and Tables to a PIX image 00348 // Note: This method is only for debug purpose during development and 00349 // would not be part of checked in code 00350 void WriteToPix(); 00351 00352 // Merge all colpartitions in table regions to make them a single 00353 // colpartition and revert types of isolated table cells not 00354 // assigned to any table to their original types. 00355 void MakeTableBlocks(); 00356 00358 00359 // Helper functions for TransformToBlocks. 00360 // Add the part to the temp list in the correct order. 00361 void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list); 00362 // Add everything from the temp list to the work_set assuming correct order. 00363 void EmptyTempPartList(ColPartition_CLIST* temp_list, 00364 WorkingPartSet_LIST* work_set); 00365 00366 // Transform the grid of partitions to the output blocks. 00367 void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00368 00369 // Reskew the completed blocks to put them back to the original coords. 00370 // (Blob outlines are not corrected for skew.) 00371 // Rotate blobs and blocks individually so text line direction is 00372 // horizontal. Record appropriate inverse transformations and required 00373 // classifier transformation in the blocks. 00374 void RotateAndReskewBlocks(TO_BLOCK_LIST* to_blocks); 00375 00376 00377 // Move all the small and noise blobs into the main blobs list of 00378 // the block from the to_blocks list that contains them. 00379 void MoveSmallBlobs(BLOBNBOX_LIST* bblobs, TO_BLOCK_LIST* to_blocks); 00380 00381 // The mean gap between columns over the page. 00382 int mean_column_gap_; 00383 // Estimate of median x-height over the page 00384 int global_median_xheight_; 00385 // Estimate of median ledding on the page 00386 int global_median_ledding_; 00387 // The rotation vector needed to convert deskewed back to original coords. 00388 FCOORD reskew_; 00389 // The rotation vector needed to convert the rotated back to original coords. 00390 FCOORD rerotate_; 00391 // The part_sets_ are the initial text-line-like partition of the grid, 00392 // and is a vector of ColPartitionSets. 00393 PartSetVector part_sets_; 00394 // The column_sets_ contain the ordered candidate ColPartitionSets that 00395 // define the possible divisions of the page into columns. 00396 PartSetVector column_sets_; 00397 // A simple array of pointers to the best assigned column division at 00398 // each grid y coordinate. 00399 ColPartitionSet** best_columns_; 00400 // The grid used to hold ColPartitions after the columns have been determined. 00401 ColPartitionGrid part_grid_; 00402 // Grid to hold cleaned colpartitions after removing all 00403 // colpartitions that consist of only noise blobs, and removing 00404 // noise blobs from remaining colpartitions. 00405 ColPartitionGrid clean_part_grid_; 00406 // Grid to hold periods, commas, i-dots etc. 00407 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> period_grid_; 00408 // List of period blobs extracted from small and noise blobs 00409 BLOBNBOX_LIST period_blobs_; 00410 // Grid of page column blocks 00411 ColSegmentGrid col_seg_grid_; 00412 // Grid of detected tables 00413 ColSegmentGrid table_grid_; 00414 // List of ColPartitions that are no longer needed after they have been 00415 // turned into regions, but are kept around because they are referenced 00416 // by the part_grid_. 00417 ColPartition_LIST good_parts_; 00418 // List of ColPartitions of unknown type. 00419 ColPartition_LIST unknown_parts_; 00420 // List of ColPartitions that have been declared noise. 00421 ColPartition_LIST noise_parts_; 00422 // The fake blobs that are made from the input boxa/pixa pair. 00423 BLOBNBOX_LIST image_bblobs_; 00424 // Horizontal line separators. 00425 TabVector_LIST horizontal_lines_; 00426 // Allow a subsequent instance to reuse the blocks window. 00427 // Not thread-safe, but multiple threads shouldn't be using windows anyway. 00428 static ScrollView* blocks_win_; 00429 }; 00430 00431 } // namespace tesseract. 00432 00433 #endif // TESSERACT_TEXTORD_COLFIND_H__