00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00020
00021 #ifndef TESSERACT_TEXTORD_COLPARTITION_H__
00022 #define TESSERACT_TEXTORD_COLPARTITION_H__
00023
00024 #include "bbgrid.h"
00025 #include "blobbox.h"
00026 #include "ndminx.h"
00027 #include "ocrblock.h"
00028 #include "rect.h"
00029 #include "scrollview.h"
00030 #include "tabfind.h"
00031 #include "tabvector.h"
00032
00033 namespace tesseract {
00034
00035
00036 class ColPartition;
00037 class ColPartitionSet;
00038 class WorkingPartSet;
00039 class WorkingPartSet_LIST;
00040
00041 ELIST2IZEH(ColPartition)
00042 CLISTIZEH(ColPartition)
00043
00044
00045
00046
00047
00048
00049
00050
00051 class ColPartition : public ELIST2_LINK {
00052 public:
00053 ColPartition() {
00054
00055
00056
00057 }
00058
00059
00060 ColPartition(BlobRegionType blob_type, const ICOORD& vertical);
00061
00062
00063 ColPartition(const ICOORD& vertical,
00064 int left, int bottom, int right, int top);
00065
00066
00067
00068
00069
00070
00071 static ColPartition* FakePartition(const TBOX& box);
00072
00073 ~ColPartition();
00074
00075
00076 const TBOX& bounding_box() const {
00077 return bounding_box_;
00078 }
00079 int left_margin() const {
00080 return left_margin_;
00081 }
00082 void set_left_margin(int margin) {
00083 left_margin_ = margin;
00084 }
00085 int right_margin() const {
00086 return right_margin_;
00087 }
00088 void set_right_margin(int margin) {
00089 right_margin_ = margin;
00090 }
00091 int median_top() const {
00092 return median_top_;
00093 }
00094 int median_bottom() const {
00095 return median_bottom_;
00096 }
00097 int median_size() const {
00098 return median_size_;
00099 }
00100 BlobRegionType blob_type() const {
00101 return blob_type_;
00102 }
00103 void set_blob_type(BlobRegionType t) {
00104 blob_type_ = t;
00105 }
00106 bool good_width() const {
00107 return good_width_;
00108 }
00109 bool good_column() const {
00110 return good_column_;
00111 }
00112 bool left_key_tab() const {
00113 return left_key_tab_;
00114 }
00115 int left_key() const {
00116 return left_key_;
00117 }
00118 bool right_key_tab() const {
00119 return right_key_tab_;
00120 }
00121 int right_key() const {
00122 return right_key_;
00123 }
00124 PolyBlockType type() const {
00125 return type_;
00126 }
00127 void set_type(PolyBlockType t) {
00128 type_ = t;
00129 }
00130 BLOBNBOX_CLIST* boxes() {
00131 return &boxes_;
00132 }
00133 ColPartition_CLIST* upper_partners() {
00134 return &upper_partners_;
00135 }
00136 ColPartition_CLIST* lower_partners() {
00137 return &lower_partners_;
00138 }
00139 void set_working_set(WorkingPartSet* working_set) {
00140 working_set_ = working_set;
00141 }
00142 ColPartitionSet* column_set() const {
00143 return column_set_;
00144 }
00145 void set_side_step(int step) {
00146 side_step_ = step;
00147 }
00148 int bottom_spacing() const {
00149 return bottom_spacing_;
00150 }
00151 void set_bottom_spacing(int spacing) {
00152 bottom_spacing_ = spacing;
00153 }
00154 int top_spacing() const {
00155 return top_spacing_;
00156 }
00157 void set_top_spacing(int spacing) {
00158 top_spacing_ = spacing;
00159 }
00160
00161 void set_table_type() {
00162 if (type_ != PT_TABLE) {
00163 type_before_table_ = type_;
00164 type_ = PT_TABLE;
00165 }
00166 }
00167 void clear_table_type() {
00168 if (type_ == PT_TABLE)
00169 type_ = type_before_table_;
00170 }
00171 bool inside_table_column() {
00172 return inside_table_column_;
00173 }
00174 void set_inside_table_column(bool val) {
00175 inside_table_column_ = val;
00176 }
00177 ColPartition* nearest_neighbor_above() const {
00178 return nearest_neighbor_above_;
00179 }
00180 void set_nearest_neighbor_above(ColPartition* part) {
00181 nearest_neighbor_above_ = part;
00182 }
00183 ColPartition* nearest_neighbor_below() const {
00184 return nearest_neighbor_below_;
00185 }
00186 void set_nearest_neighbor_below(ColPartition* part) {
00187 nearest_neighbor_below_ = part;
00188 }
00189 int space_above() const {
00190 return space_above_;
00191 }
00192 void set_space_above(int space) {
00193 space_above_ = space;
00194 }
00195 int space_below() const {
00196 return space_below_;
00197 }
00198 void set_space_below(int space) {
00199 space_below_ = space;
00200 }
00201 int space_to_left() const {
00202 return space_to_left_;
00203 }
00204 void set_space_to_left(int space) {
00205 space_to_left_ = space;
00206 }
00207 int space_to_right() const {
00208 return space_to_right_;
00209 }
00210 void set_space_to_right(int space) {
00211 space_to_right_ = space;
00212 }
00213
00214
00215
00216
00217 int MidY() const {
00218 return (bounding_box_.top() + bounding_box_.bottom()) / 2;
00219 }
00220
00221 int MedianY() const {
00222 return (median_top_ + median_bottom_) / 2;
00223 }
00224
00225 int SortKey(int x, int y) const {
00226 return TabVector::SortKey(vertical_, x, y);
00227 }
00228
00229 int XAtY(int sort_key, int y) const {
00230 return TabVector::XAtY(vertical_, sort_key, y);
00231 }
00232
00233 int KeyWidth(int left_key, int right_key) const {
00234 return (right_key - left_key) / vertical_.y();
00235 }
00236
00237 int ColumnWidth() const {
00238 return KeyWidth(left_key_, right_key_);
00239 }
00240
00241 int BoxLeftKey() const {
00242 return SortKey(bounding_box_.left(), MidY());
00243 }
00244
00245 int BoxRightKey() const {
00246 return SortKey(bounding_box_.right(), MidY());
00247 }
00248
00249 int LeftAtY(int y) const {
00250 return XAtY(left_key_, y);
00251 }
00252
00253 int RightAtY(int y) const {
00254 return XAtY(right_key_, y);
00255 }
00256
00257
00258 bool IsLeftOf(const ColPartition& other) const {
00259 return bounding_box_.right() < other.bounding_box_.right();
00260 }
00261
00262 bool ColumnContains(int x, int y) const {
00263 return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
00264 }
00265
00266 bool IsEmpty() {
00267 return boxes_.empty();
00268 }
00269
00270 bool HOverlaps(const ColPartition& other) const {
00271 return bounding_box_.x_overlap(other.bounding_box_);
00272 }
00273
00274
00275 bool HCompatible(const ColPartition& other) const {
00276 return left_margin_ <= other.bounding_box_.left() &&
00277 bounding_box_.left() >= other.left_margin_ &&
00278 bounding_box_.right() <= other.right_margin_ &&
00279 right_margin_ >= other.bounding_box_.right();
00280 }
00281
00282 int VOverlap(const ColPartition& other) const {
00283 return MIN(median_top_, other.median_top_) -
00284 MAX(median_bottom_, other.median_bottom_);
00285 }
00286
00287 bool VOverlaps(const ColPartition& other) const {
00288 int overlap = VOverlap(other);
00289 int height = MIN(median_top_ - median_bottom_,
00290 other.median_top_ - other.median_bottom_);
00291 return overlap * 3 > height;
00292 }
00293
00294 bool TypesMatch(const ColPartition& other) const {
00295 return TypesMatch(blob_type_, other.blob_type_);
00296 }
00297 static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
00298 return type1 == type2 ||
00299 (type1 < BRT_UNKNOWN && type2 < BRT_UNKNOWN);
00300 }
00301
00302
00303 bool IsLineType() {
00304 return POLY_BLOCK::IsLineType(type_);
00305 }
00306
00307 bool IsImageType() {
00308 return POLY_BLOCK::IsImageType(type_);
00309 }
00310
00311 bool IsTextType() {
00312 return POLY_BLOCK::IsTextType(type_);
00313 }
00314
00315
00316
00317
00318 void AddBox(BLOBNBOX* box);
00319
00320
00321
00322 void ClaimBoxes(WidthCallback* cb);
00323
00324
00325 void DeleteBoxes();
00326
00327
00328
00329
00330
00331
00332
00333 bool IsLegal();
00334
00335
00336 bool MatchingColumns(const ColPartition& other) const;
00337
00338
00339
00340
00341 void SetLeftTab(const TabVector* tab_vector);
00342 void SetRightTab(const TabVector* tab_vector);
00343
00344
00345
00346 void CopyLeftTab(const ColPartition& src, bool take_box);
00347 void CopyRightTab(const ColPartition& src, bool take_box);
00348
00349
00350
00351
00352 void AddPartner(bool upper, ColPartition* partner);
00353
00354
00355
00356 void RemovePartner(bool upper, ColPartition* partner);
00357
00358 ColPartition* SingletonPartner(bool upper);
00359
00360
00361 void Absorb(ColPartition* other, WidthCallback* cb);
00362
00363
00364
00365 bool Unique(ColPartition* other, WidthCallback* cb);
00366
00367
00368
00369 ColPartition* SplitAt(int split_x);
00370
00371
00372 void ComputeLimits();
00373
00374
00375 void SetPartitionType(ColPartitionSet* columns);
00376
00377
00378 void ColumnRange(ColPartitionSet* columns, int* first_col, int* last_col);
00379
00380
00381 void SetColumnGoodness(WidthCallback* cb);
00382
00383
00384
00385 void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
00386 int resolution, ColPartition_LIST* used_parts,
00387 WorkingPartSet_LIST* working_set);
00388
00389
00390
00391
00392
00393
00394
00395 static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
00396 int resolution,
00397 ColPartition_LIST* block_parts,
00398 ColPartition_LIST* used_parts,
00399 BLOCK_LIST* completed_blocks,
00400 TO_BLOCK_LIST* to_blocks);
00401
00402
00403 static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright,
00404 ColPartition_LIST* block_parts,
00405 ColPartition_LIST* used_parts);
00406
00407
00408
00409
00410 ColPartition* ShallowCopy() const;
00411
00412
00413 ScrollView::Color BoxColor() const;
00414
00415
00416 void Print();
00417
00418
00419 void SmoothPartnerRun(int working_set_count);
00420
00421
00422
00423 void RefinePartners(PolyBlockType type);
00424
00425 private:
00426
00427
00428 enum SpacingNeighbourhood {
00429 PN_ABOVE2,
00430 PN_ABOVE1,
00431 PN_UPPER,
00432 PN_LOWER,
00433 PN_BELOW1,
00434 PN_BELOW2,
00435 PN_COUNT
00436 };
00437
00438
00439 void RefinePartnersInternal(bool upper);
00440
00441
00442 void RefinePartnersByType(bool upper, ColPartition_CLIST* partners);
00443
00444
00445
00446
00447 void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners);
00448
00449
00450 void RefineFlowingTextPartners(bool upper, ColPartition_CLIST* partners);
00451
00452 void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners);
00453
00454
00455 bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other);
00456
00457
00458
00459
00460 static void SmoothSpacings(int resolution, int page_height,
00461 ColPartition_LIST* parts);
00462
00463
00464
00465
00466 static bool OKSpacingBlip(int resolution, int median_spacing,
00467 ColPartition** parts);
00468
00469
00470
00471 bool SpacingEqual(int spacing, int resolution) const;
00472
00473
00474
00475 bool SpacingsEqual(const ColPartition& other, int resolution) const;
00476
00477
00478
00479
00480 bool SummedSpacingOK(const ColPartition& other,
00481 int spacing, int resolution) const;
00482
00483
00484
00485 int BottomSpacingMargin(int resolution) const;
00486
00487
00488
00489 int TopSpacingMargin(int resolution) const;
00490
00491
00492
00493 bool SizesSimilar(const ColPartition& other) const;
00494
00495
00496
00497
00498
00499
00500
00501 static void LeftEdgeRun(ColPartition_IT* part_it,
00502 ICOORD* start, ICOORD* end);
00503
00504
00505
00506
00507
00508
00509 static void RightEdgeRun(ColPartition_IT* part_it,
00510 ICOORD* start, ICOORD* end);
00511
00512
00513
00514
00515
00516 int left_margin_;
00517
00518 int right_margin_;
00519
00520 TBOX bounding_box_;
00521
00522 int median_bottom_;
00523 int median_top_;
00524
00525 int median_size_;
00526
00527 BlobRegionType blob_type_;
00528
00529 bool good_width_;
00530
00531 bool good_column_;
00532
00533 bool left_key_tab_;
00534
00535 bool right_key_tab_;
00536
00537
00538
00539
00540
00541
00542 int left_key_;
00543 int right_key_;
00544
00545 PolyBlockType type_;
00546
00547 BLOBNBOX_CLIST boxes_;
00548
00549 ICOORD vertical_;
00550
00551 ColPartition_CLIST upper_partners_;
00552
00553 ColPartition_CLIST lower_partners_;
00554
00555 WorkingPartSet* working_set_;
00556
00557
00558 bool block_owned_;
00559
00560
00561
00562
00563
00564
00565
00566 int first_column_;
00567 int last_column_;
00568
00569 ColPartitionSet* column_set_;
00570
00571 int side_step_;
00572 int top_spacing_;
00573 int bottom_spacing_;
00574
00575
00576
00577
00578 PolyBlockType type_before_table_;
00579 bool inside_table_column_;
00580
00581
00582 ColPartition* nearest_neighbor_above_;
00583
00584 ColPartition* nearest_neighbor_below_;
00585 int space_above_;
00586 int space_below_;
00587 int space_to_left_;
00588 int space_to_right_;
00589 };
00590
00591
00592 typedef BBGrid<ColPartition,
00593 ColPartition_CLIST,
00594 ColPartition_C_IT> ColPartitionGrid;
00595 typedef GridSearch<ColPartition,
00596 ColPartition_CLIST,
00597 ColPartition_C_IT> ColPartitionGridSearch;
00598
00599 }
00600
00601 #endif // TESSERACT_TEXTORD_COLPARTITION_H__