Tesseract 3.01
/data/source/tesseract-ocr/textord/colfind.h
Go to the documentation of this file.
00001 
00002 // File:        colfind.h
00003 // Description: Class to find columns in the grid of BLOBNBOXes.
00004 // Author:      Ray Smith
00005 // Created:     Thu Feb 21 14:04:01 PST 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TEXTORD_COLFIND_H__
00021 #define TESSERACT_TEXTORD_COLFIND_H__
00022 
00023 #include "tabfind.h"
00024 #include "imagefind.h"
00025 #include "colpartitiongrid.h"
00026 #include "colpartitionset.h"
00027 #include "ocrblock.h"
00028 
00029 class ScrollView;
00030 class TO_BLOCK;
00031 class STATS;
00032 class BLOCK_LIST;
00033 struct Boxa;
00034 struct Pixa;
00035 
00036 namespace tesseract {
00037 
00038 extern BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection");
00039 
00040 class StrokeWidth;
00041 class LineSpacing;
00042 class TempColumn_LIST;
00043 class ColSegment_LIST;
00044 class ColumnGroup_LIST;
00045 class ColPartitionSet;
00046 class ColPartitionSet_LIST;
00047 
00048 // The ColumnFinder class finds columns in the grid.
00049 class ColumnFinder : public TabFind {
00050  public:
00051   // Gridsize is an estimate of the text size in the image. A suitable value
00052   // is in TO_BLOCK::line_size after find_components has been used to make
00053   // the blobs.
00054   // bleft and tright are the bounds of the image (rectangle) being processed.
00055   // vlines is a (possibly empty) list of TabVector and vertical_x and y are
00056   // the sum logical vertical vector produced by LineFinder::FindVerticalLines.
00057   ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright,
00058                int resolution, TabVector_LIST* vlines, TabVector_LIST* hlines,
00059                int vertical_x, int vertical_y);
00060   virtual ~ColumnFinder();
00061 
00062   // ======================================================================
00063   // The main function of ColumnFinder is broken into pieces to facilitate
00064   // optional insertion of orientation and script detection in an efficient
00065   // way. The calling sequence IS MANDATORY however, whether or not
00066   // OSD is being used:
00067   // 1. Construction.
00068   // 2. IsVerticallyAlignedText.
00069   // 3. CorrectOrientation.
00070   // 4. FindBlocks.
00071   // 5. Destruction. Use of a single column finder for multiple images does not
00072   //    make sense.
00073   // TODO(rays) break up column finder further into smaller classes, as
00074   // there is a lot more to it than column finding now.
00075   // ======================================================================
00076 
00077   // Tests for vertical alignment of text (returning true if so), and
00078   // generates a list of blobs for orientation and script detection. Note that
00079   // the vertical alignment may be due to text whose writing direction is
00080   // vertical, like say Japanese, or due to text whose writing direction is
00081   // horizontal but whose text appears vertically aligned because the image is
00082   // not the right way up.
00083   bool IsVerticallyAlignedText(TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs);
00084 
00085   // Rotates the blobs and the TabVectors so that the gross writing direction
00086   // (text lines) are horizontal and lines are read down the page.
00087   // Applied rotation stored in rotation_.
00088   // A second rotation is calculated for application during recognition to
00089   // make the rotated blobs upright for recognition.
00090   // Subsequent rotation stored in text_rotation_.
00091   //
00092   // Arguments:
00093   //   vertical_text_lines is true if the text lines are vertical.
00094   //   recognition_rotation [0..3] is the number of anti-clockwise 90 degree
00095   //   rotations from osd required for the text to be upright and readable.
00096   void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines,
00097                           int recognition_rotation);
00098 
00099   // Finds the text and image blocks, returning them in the blocks and to_blocks
00100   // lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.)
00101   // If boxa and pixa are not NULL, they are assumed to be the output of
00102   // ImageFinder::FindImages, and are used to generate image blocks.
00103   // The input boxa and pixa are destroyed.
00104   // Imageheight should be the pixel height of the original image.
00105   // The input block is the result of a call to find_components, and contains
00106   // the blobs found in the image. These blobs will be removed and placed
00107   // in the output blocks, while unused ones will be deleted.
00108   // If single_column is true, the input is treated as single column, but
00109   // it is still divided into blocks of equal line spacing/text size.
00110   // Returns -1 if the user requested retry with more debug info.
00111   int FindBlocks(bool single_column, int imageheight,
00112                  TO_BLOCK* block, Boxa* boxa, Pixa* pixa,
00113                  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
00114 
00115   // Get the rotation required to deskew, and its inverse rotation.
00116   void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew);
00117 
00118  private:
00119   // Displays the blob and block bounding boxes in a window called Blocks.
00120   void DisplayBlocks(BLOCK_LIST* blocks);
00121   // Displays the column edges at each grid y coordinate defined by
00122   // best_columns_.
00123   void DisplayColumnBounds(PartSetVector* sets);
00124 
00125   // Converts the arrays of Box/Pix to a list of C_OUTLINE, and then to blobs.
00126   // The output is a list of C_BLOBs for the images, but the C_OUTLINEs
00127   // contain no data.
00128   void ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa);
00129 
00131 
00132   // Creates the initial ColPartitions, and puts them in a ColPartitionSet
00133   // for each grid y coordinate, storing the ColPartitionSets in part_sets_.
00134   // After creating the ColPartitonSets, attempts to merge them where they
00135   // overlap and unique the BLOBNBOXes within.
00136   // The return value is the number of ColPartitionSets made.
00137   int MakeColumnPartitions();
00138   // Partition the BLOBNBOXES horizontally at the given grid y, creating a
00139   // ColPartitionSet which is returned. NULL is returned if there are no
00140   // BLOBNBOXES at the given grid y.
00141   ColPartitionSet* PartitionsAtGridY(int grid_y);
00142   // Insert the blobs in the given list into the main grid and for
00143   // each one also make it a separate unknown partition.
00144   // If filter is true, use only the blobs that are above a threshold in
00145   // size or a non-isolated.
00146   void InsertSmallBlobsAsUnknowns(bool filter, BLOBNBOX_LIST* blobs);
00147   // Helper function for PartitionsAtGridY, with a long argument list.
00148   // This bbox is of unknown type, so it is added to an unk_partition.
00149   // If the edge is past the unk_right_margin then unk_partition has to be
00150   // completed and a new one made. See CompletePartition and StartPartition
00151   // for the other args.
00152   void ProcessUnknownBlob(int page_edge, BLOBNBOX* bbox,
00153                           ColPartition** unk_partition,
00154                           ColPartition_IT* unk_part_it,
00155                           TabVector** unk_right_line,
00156                           int* unk_right_margin,
00157                           int* unk_prev_margin,
00158                           bool* unk_edge_is_left);
00159   // Creates and returns a new ColPartition of the given start_type
00160   // and adds the given bbox to it.
00161   // Also finds the left and right tabvectors that bound the textline, setting
00162   // the members of the returned ColPartition appropriately:
00163   // If the left tabvector is less constraining than the input left_margin
00164   // (assumed to be the right edge of the previous partition), then the
00165   // tabvector is ignored and the left_margin used instead.
00166   // If the right tabvector is more constraining than the input *right_margin,
00167   // (probably the right edge of the page), then the *right_margin is adjusted
00168   // to use the tabvector.
00169   // *edge_is_left is set to true if the right tabvector is good and used as the
00170   // margin, so we can include blobs that overhang the tabvector in this
00171   // partition.
00172   ColPartition* StartPartition(BlobRegionType start_type, int left_margin,
00173                                BLOBNBOX* bbox, TabVector** right_line,
00174                                int* right_margin, bool* edge_is_left);
00175   // Completes the given partition, and adds it to the given iterator.
00176   // The right_margin on input is the left edge of the next blob if there is
00177   // one. The right tab vector plus a margin is used as the right margin if
00178   // it is more constraining than the next blob, but if there are no more
00179   // blobs, we want the right margin to make it to the page edge.
00180   // The return value is the next left margin, being the right edge of the
00181   // bounding box of blobs.
00182   int CompletePartition(bool no_more_blobs, int page_edge,
00183                         TabVector* right_line, int* right_margin,
00184                         ColPartition** partition, ColPartition_IT* part_it);
00185 
00186 
00188 
00189   // Makes an ordered list of candidates to partition the width of the page
00190   // into columns using the part_sets_.
00191   // See AddToColumnSetsIfUnique for the ordering.
00192   // If single_column, then it just makes a single page-wide fake column.
00193   void MakeColumnCandidates(bool single_column);
00194   // Attempt to improve the column_candidates by expanding the columns
00195   // and adding new partitions from the partition sets in src_sets.
00196   // Src_sets may be equal to column_candidates, in which case it will
00197   // use them as a source to improve themselves.
00198   void ImproveColumnCandidates(PartSetVector* src_sets,
00199                                PartSetVector* column_sets);
00200   // Prints debug information on the column candidates.
00201   void PrintColumnCandidates(const char* title);
00202   // Finds the optimal set of columns that cover the entire image with as
00203   // few changes in column partition as possible.
00204   void AssignColumns();
00205   // Finds the biggest range in part_sets_ that has no assigned column, but
00206   // column assignment is possible.
00207   bool BiggestUnassignedRange(const bool* any_columns_possible,
00208                               int* start, int* end);
00209   // Finds the modal compatible column_set_ index within the given range.
00210   int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs,
00211                           int start, int end);
00212   // Given that there are many column_set_id compatible columns in the range,
00213   // shrinks the range to the longest contiguous run of compatibility, allowing
00214   // gaps where no columns are possible, but not where competing columns are
00215   // possible.
00216   void ShrinkRangeToLongestRun(int** column_set_costs,
00217                                const int* assigned_costs,
00218                                const bool* any_columns_possible,
00219                                int column_set_id,
00220                                int* best_start, int* best_end);
00221   // Moves start in the direction of step, upto, but not including end while
00222   // the only incompatible regions are no more than kMaxIncompatibleColumnCount
00223   // in size, and the compatible regions beyond are bigger.
00224   void ExtendRangePastSmallGaps(int** column_set_costs,
00225                                 const int* assigned_costs,
00226                                 const bool* any_columns_possible,
00227                                 int column_set_id,
00228                                 int step, int end, int* start);
00229   // Assigns the given column_set_id to the part_sets_ in the given range.
00230   void AssignColumnToRange(int column_set_id, int start, int end,
00231                            int** column_set_costs, int* assigned_costs);
00232 
00233   // Computes the mean_column_gap_.
00234   void ComputeMeanColumnGap();
00235 
00238 
00239   // Removes the ColPartitions from part_sets_, the ColPartitionSets that
00240   // contain them, and puts them in the part_grid_ after ensuring that no
00241   // BLOBNBOX is owned by more than one of them.
00242   void MovePartitionsToGrid();
00243   // Splits partitions that cross columns where they have nothing in the gap.
00244   void GridSplitPartitions();
00245   // Merges partitions where there is vertical overlap, within a single column,
00246   // and the horizontal gap is small enough.
00247   void GridMergePartitions();
00248   // Resolves unknown partitions from the unknown_parts_ list by merging them
00249   // with a close neighbour, inserting them into the grid with a known type,
00250   // or declaring them to be noise.
00251   void GridInsertUnknowns();
00252   // Add horizontal line separators as partitions.
00253   void GridInsertHLinePartitions();
00254   // Add vertical line separators as partitions.
00255   void GridInsertVLinePartitions();
00256   // For every ColPartition in the grid, sets its type based on position
00257   // in the columns.
00258   void SetPartitionTypes();
00259   // Only images remain with multiple types in a run of partners.
00260   // Sets the type of all in the group to the maximum of the group.
00261   void SmoothPartnerRuns();
00262 
00264 
00265   // Helper functions for TransformToBlocks.
00266   // Add the part to the temp list in the correct order.
00267   void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list);
00268   // Add everything from the temp list to the work_set assuming correct order.
00269   void EmptyTempPartList(ColPartition_CLIST* temp_list,
00270                          WorkingPartSet_LIST* work_set);
00271 
00272   // Transform the grid of partitions to the output blocks.
00273   void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
00274 
00275   // Undo the deskew that was done in FindTabVectors, as recognition is done
00276   // without correcting blobs or blob outlines for skew.
00277   // Reskew the completed blocks to put them back to the original rotated coords
00278   // that were created by CorrectOrientation.
00279   // Blocks that were identified as vertical text (relative to the rotated
00280   // coordinates) are further rotated so the text lines are horizontal.
00281   // blob polygonal outlines are rotated to match the position of the blocks
00282   // that they are in, and their bounding boxes are recalculated to be accurate.
00283   // Record appropriate inverse transformations and required
00284   // classifier transformation in the blocks.
00285   void RotateAndReskewBlocks(TO_BLOCK_LIST* to_blocks);
00286 
00287 
00288   // Move all the small and noise blobs into the main blobs list of
00289   // the block from the to_blocks list that contains them.
00290   void MoveSmallBlobs(BLOBNBOX_LIST* bblobs, TO_BLOCK_LIST* to_blocks);
00291 
00292   // The minimum gutter width to apply for finding columns.
00293   // Modified when vertical text is detected to prevent detection of
00294   // vertical text lines as columns.
00295   int min_gutter_width_;
00296   // The mean gap between columns over the page.
00297   int mean_column_gap_;
00298   // The rotation vector needed to convert original coords to deskewed.
00299   FCOORD deskew_;
00300   // The rotation vector needed to convert deskewed back to original coords.
00301   FCOORD reskew_;
00302   // The rotation vector used to rotate vertically oriented pages.
00303   FCOORD rotation_;
00304   // The rotation vector needed to convert the rotated back to original coords.
00305   FCOORD rerotate_;
00306   // The additional rotation vector needed to rotate text for recognition.
00307   FCOORD text_rotation_;
00308   // The part_sets_ are the initial text-line-like partition of the grid,
00309   // and is a vector of ColPartitionSets.
00310   PartSetVector part_sets_;
00311   // The column_sets_ contain the ordered candidate ColPartitionSets that
00312   // define the possible divisions of the page into columns.
00313   PartSetVector column_sets_;
00314   // A simple array of pointers to the best assigned column division at
00315   // each grid y coordinate.
00316   ColPartitionSet** best_columns_;
00317   // The grid used for creating initial partitions with strokewidth.
00318   StrokeWidth* stroke_width_;
00319   // The grid used to hold ColPartitions after the columns have been determined.
00320   ColPartitionGrid part_grid_;
00321   // List of ColPartitions that are no longer needed after they have been
00322   // turned into regions, but are kept around because they are referenced
00323   // by the part_grid_.
00324   ColPartition_LIST good_parts_;
00325   // List of ColPartitions of unknown type.
00326   ColPartition_LIST unknown_parts_;
00327   // List of ColPartitions that have been declared noise.
00328   ColPartition_LIST noise_parts_;
00329   // The fake blobs that are made from the input boxa/pixa pair.
00330   BLOBNBOX_LIST image_bblobs_;
00331   // Horizontal line separators.
00332   TabVector_LIST horizontal_lines_;
00333   // Allow a subsequent instance to reuse the blocks window.
00334   // Not thread-safe, but multiple threads shouldn't be using windows anyway.
00335   static ScrollView* blocks_win_;
00336 };
00337 
00338 }  // namespace tesseract.
00339 
00340 #endif  // TESSERACT_TEXTORD_COLFIND_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines