00001 00002 // File: tablefind.h 00003 // Description: Helper classes to find tables from ColPartitions. 00004 // Author: Faisal Shafait (faisal.shafait@dfki.de) 00005 // Created: Tue Jan 06 11:13:01 PST 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_TABLEFIND_H__ 00021 #define TESSERACT_TEXTORD_TABLEFIND_H__ 00022 00023 #include "rect.h" 00024 #include "elst.h" 00025 00026 namespace tesseract { 00027 00028 // Possible types for a column segment. 00029 enum ColSegType { 00030 COL_UNKNOWN, 00031 COL_TEXT, 00032 COL_TABLE, 00033 COL_MIXED, 00034 COL_COUNT 00035 }; 00036 00037 // ColSegment holds rectangular blocks that represent segmentation of a page 00038 // into regions containing single column text/table. 00039 class ColSegment; 00040 ELISTIZEH(ColSegment) 00041 CLISTIZEH(ColSegment) 00042 00043 class ColSegment : public ELIST_LINK { 00044 public: 00045 ColSegment() : num_table_cells_(0), num_text_cells_(0), 00046 type_(COL_UNKNOWN) { 00047 } 00048 ~ColSegment() { } 00049 00050 // Simple accessors and mutators 00051 const TBOX& bounding_box() const { 00052 return bounding_box_; 00053 } 00054 00055 void set_top(int y) { 00056 bounding_box_.set_top(y); 00057 } 00058 00059 void set_bottom(int y) { 00060 bounding_box_.set_bottom(y); 00061 } 00062 00063 void set_left(int x) { 00064 bounding_box_.set_left(x); 00065 } 00066 00067 void set_right(int x) { 00068 bounding_box_.set_right(x); 00069 } 00070 00071 void set_bounding_box(const TBOX& other) { 00072 bounding_box_ = other; 00073 } 00074 00075 int get_num_table_cells() { 00076 return num_table_cells_; 00077 } 00078 00079 // set the number of table colpartitions covered by the bounding_box_ 00080 void set_num_table_cells(int n) { 00081 num_table_cells_ = n; 00082 } 00083 00084 int get_num_text_cells() { 00085 return num_text_cells_; 00086 } 00087 00088 // set the number of text colpartitions covered by the bounding_box_ 00089 void set_num_text_cells(int n) { 00090 num_text_cells_ = n; 00091 } 00092 00093 ColSegType type() { 00094 return type_; 00095 } 00096 00097 // set the type of the block based on the ratio of table to text 00098 // colpartitions covered by it. 00099 void set_type(); 00100 00101 // Provides a color for BBGrid to draw the rectangle. 00102 ScrollView::Color BoxColor() const; 00103 00104 // Insert a rectangle into bounding_box_ 00105 void InsertBox(const TBOX& other); 00106 00107 private: 00108 // Initializes the bulk of the members to default values. 00109 void Init() { 00110 } 00111 00112 TBOX bounding_box_; // bounding box 00113 int num_table_cells_; 00114 int num_text_cells_; 00115 ColSegType type_; 00116 }; 00117 00118 // Typedef BBGrid of ColSegments 00119 typedef BBGrid<ColSegment, 00120 ColSegment_CLIST, 00121 ColSegment_C_IT> ColSegmentGrid; 00122 00123 00124 } // namespace tesseract. 00125 00126 #endif // TESSERACT_TEXTORD_TABLEFIND_H__