Tesseract 3.01
/data/source/tesseract-ocr/textord/colpartitiongrid.h
Go to the documentation of this file.
00001 
00002 // File:        colpartitionrid.h
00003 // Description: Class collecting code that acts on a BBGrid of ColPartitions.
00004 // Author:      Ray Smith
00005 // Created:     Mon Oct 05 08:42:01 PDT 2009
00006 //
00007 // (C) Copyright 2009, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H__
00021 #define TESSERACT_TEXTORD_COLPARTITIONGRID_H__
00022 
00023 #include "bbgrid.h"
00024 #include "colpartition.h"
00025 
00026 namespace tesseract {
00027 
00028 class TabFind;
00029 
00030 // ColPartitionGrid is a BBGrid of ColPartition.
00031 // It collects functions that work on the grid.
00032 class ColPartitionGrid : public BBGrid<ColPartition,
00033                                        ColPartition_CLIST,
00034                                        ColPartition_C_IT> {
00035  public:
00036   ColPartitionGrid();
00037   ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
00038 
00039   ~ColPartitionGrid();
00040 
00041   // Handles a click event in a display window.
00042   void HandleClick(int x, int y);
00043 
00044   // Finds all the ColPartitions in the grid that overlap with the given
00045   // box and returns them SortByBoxLeft(ed) and uniqued in the given list.
00046   // Any partition equal to not_this (may be NULL) is excluded.
00047   void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this,
00048                                  ColPartition_CLIST* parts);
00049 
00050   // Finds and returns the best candidate ColPartition to merge with part,
00051   // selected from the candidates list, based on the minimum increase in
00052   // pairwise overlap among all the partitions overlapped by the combined box.
00053   // If overlap_increase is not NULL then it returns the increase in overlap
00054   // that would result from the merge.
00055   // See colpartitiongrid.cpp for a diagram.
00056   ColPartition* BestMergeCandidate(
00057       const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
00058       TessResultCallback2<bool, const ColPartition*,
00059                           const ColPartition*>* confirm_cb,
00060       int* overlap_increase);
00061 
00062   // Improves the margins of the ColPartitions in the grid by calling
00063   // FindPartitionMargins on each.
00064   void GridFindMargins(ColPartitionSet** best_columns);
00065 
00066   // Improves the margins of the ColPartitions in the list by calling
00067   // FindPartitionMargins on each.
00068   void ListFindMargins(ColPartitionSet** best_columns,
00069                        ColPartition_LIST* parts);
00070 
00071   // Finds and marks text partitions that represent figure captions.
00072   void FindFigureCaptions();
00073 
00076   // For every ColPartition in the grid, finds its upper and lower neighbours.
00077   void FindPartitionPartners();
00078   // Finds the best partner in the given direction for the given partition.
00079   // Stores the result with AddPartner.
00080   void FindPartitionPartners(bool upper, ColPartition* part);
00081   // For every ColPartition with multiple partners in the grid, reduces the
00082   // number of partners to 0 or 1. If get_desperate is true, goes to more
00083   // desperate merge methods to merge flowing text before breaking partnerships.
00084   void RefinePartitionPartners(bool get_desperate);
00085 
00086  private:
00087   // Improves the margins of the ColPartition by searching for
00088   // neighbours that vertically overlap significantly.
00089   void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
00090 
00091   // Starting at x, and going in the specified direction, upto x_limit, finds
00092   // the margin for the given y range by searching sideways,
00093   // and ignoring not_this.
00094   int FindMargin(int x, bool right_to_left, int x_limit,
00095                  int y_bottom, int y_top, const ColPartition* not_this);
00096 };
00097 
00098 }  // namespace tesseract.
00099 
00100 #endif  // TESSERACT_TEXTORD_COLPARTITIONGRID_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines