Tesseract 3.01
|
00001 /********************************************************************** 00002 * File: blobbox.h (Formerly blobnbox.h) 00003 * Description: Code for the textord blob class. 00004 * Author: Ray Smith 00005 * Created: Thu Jul 30 09:08:51 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef BLOBBOX_H 00021 #define BLOBBOX_H 00022 00023 #include "clst.h" 00024 #include "elst2.h" 00025 #include "werd.h" 00026 #include "ocrblock.h" 00027 #include "statistc.h" 00028 00029 enum PITCH_TYPE 00030 { 00031 PITCH_DUNNO, //insufficient data 00032 PITCH_DEF_FIXED, //definitely fixed 00033 PITCH_MAYBE_FIXED, //could be 00034 PITCH_DEF_PROP, 00035 PITCH_MAYBE_PROP, 00036 PITCH_CORR_FIXED, 00037 PITCH_CORR_PROP 00038 }; 00039 00040 // The possible tab-stop types of each side of a BLOBNBOX. 00041 enum TabType { 00042 TT_NONE, // Not a tab. 00043 TT_DELETED, // Not a tab after detailed analysis. 00044 TT_UNCONFIRMED, // Initial designation of a tab-stop candidate. 00045 TT_FAKE, // Added by interpolation. 00046 TT_CONFIRMED, // Aligned with neighbours. 00047 TT_VLINE // Detected as a vertical line. 00048 }; 00049 00050 // The possible region types of a BLOBNBOX. 00051 // Note: keep all the text types > BRT_UNKNOWN and all the image types less. 00052 // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the 00053 // *Type static functions below. 00054 enum BlobRegionType { 00055 BRT_NOISE, // Neither text nor image. 00056 BRT_HLINE, // Horizontal separator line. 00057 BRT_VLINE, // Vertical separator line. 00058 BRT_RECTIMAGE, // Rectangular image. 00059 BRT_POLYIMAGE, // Non-rectangular image. 00060 BRT_UNKNOWN, // Not determined yet. 00061 BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented. 00062 BRT_TEXT, // Convincing text. 00063 00064 BRT_COUNT // Number of possibilities. 00065 }; 00066 00067 // enum for elements of arrays that refer to neighbours. 00068 enum BlobNeighbourDir { 00069 BND_LEFT, 00070 BND_BELOW, 00071 BND_RIGHT, 00072 BND_ABOVE, 00073 BND_COUNT 00074 }; 00075 00076 // BlobTextFlowType indicates the quality of neighbouring information 00077 // related to a chain of connected components, either horizontally or 00078 // vertically. Also used by ColPartition for the collection of blobs 00079 // within, which should all have the same value in most cases. 00080 enum BlobTextFlowType { 00081 BTFT_NONE, // No text flow set yet. 00082 BTFT_NONTEXT, // Flow too poor to be likely text. 00083 BTFT_NEIGHBOURS, // Neighbours support flow in this direction. 00084 BTFT_CHAIN, // There is a weak chain of text in this direction. 00085 BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction. 00086 BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image. 00087 BTFT_LEADER, // Leader dots/dashes etc. 00088 BTFT_COUNT 00089 }; 00090 00091 // Returns true if type1 dominates type2 in a merge. Mostly determined by the 00092 // ordering of the enum, but NONTEXT dominates everything else, and LEADER 00093 // dominates nothing. 00094 // The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that 00095 // this cannot be true if t1 == t2, so the result is undefined. 00096 inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) { 00097 // NONTEXT dominates everything. 00098 if (type1 == BTFT_NONTEXT) return true; 00099 if (type2 == BTFT_NONTEXT) return false; 00100 // LEADER always loses. 00101 if (type1 == BTFT_LEADER) return false; 00102 if (type2 == BTFT_LEADER) return true; 00103 // With those out of the way, the ordering of the enum determines the result. 00104 return type1 >= type2; 00105 } 00106 00107 namespace tesseract { 00108 class ColPartition; 00109 } 00110 00111 class BLOBNBOX; 00112 ELISTIZEH (BLOBNBOX) 00113 class BLOBNBOX:public ELIST_LINK 00114 { 00115 public: 00116 BLOBNBOX() { 00117 ConstructionInit(); 00118 } 00119 explicit BLOBNBOX(C_BLOB *srcblob) { 00120 box = srcblob->bounding_box(); 00121 ConstructionInit(); 00122 cblob_ptr = srcblob; 00123 area = static_cast<int>(srcblob->area()); 00124 } 00125 static BLOBNBOX* RealBlob(C_OUTLINE* outline) { 00126 C_BLOB* blob = new C_BLOB(outline); 00127 return new BLOBNBOX(blob); 00128 } 00129 00130 void rotate_box(FCOORD rotation); 00131 void rotate(FCOORD rotation); 00132 void translate_box(ICOORD v) { 00133 if (IsDiacritic()) { 00134 box.move(v); 00135 base_char_top_ += v.y(); 00136 base_char_bottom_ += v.y(); 00137 } else { 00138 box.move(v); 00139 set_diacritic_box(box); 00140 } 00141 } 00142 void merge(BLOBNBOX *nextblob); 00143 void really_merge(BLOBNBOX* other); 00144 void chop( // fake chop blob 00145 BLOBNBOX_IT *start_it, // location of this 00146 BLOBNBOX_IT *blob_it, // iterator 00147 FCOORD rotation, // for landscape 00148 float xheight); // line height 00149 00150 void NeighbourGaps(int gaps[BND_COUNT]) const; 00151 void MinMaxGapsClipped(int* h_min, int* h_max, 00152 int* v_min, int* v_max) const; 00153 int GoodTextBlob() const; 00154 00155 // Returns true, and sets vert_possible/horz_possible if the blob has some 00156 // feature that makes it individually appear to flow one way. 00157 // eg if it has a high aspect ratio, yet has a complex shape, such as a 00158 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1. 00159 bool DefiniteIndividualFlow(); 00160 00161 // Returns true if there is no tabstop violation in merging this and other. 00162 bool ConfirmNoTabViolation(const BLOBNBOX& other) const; 00163 00164 // Returns true if other has a similar stroke width to this. 00165 bool MatchingStrokeWidth(const BLOBNBOX& other, 00166 double fractional_tolerance, 00167 double constant_tolerance) const; 00168 00169 // Returns a bounding box of the outline contained within the 00170 // given horizontal range. 00171 TBOX BoundsWithinLimits(int left, int right); 00172 00173 // Simple accessors. 00174 const TBOX& bounding_box() const { 00175 return box; 00176 } 00177 // Set the bounding box. Use with caution. 00178 // Normally use compute_bounding_box instead. 00179 void set_bounding_box(const TBOX& new_box) { 00180 box = new_box; 00181 base_char_top_ = box.top(); 00182 base_char_bottom_ = box.bottom(); 00183 } 00184 void compute_bounding_box() { 00185 box = cblob_ptr->bounding_box(); 00186 base_char_top_ = box.top(); 00187 base_char_bottom_ = box.bottom(); 00188 } 00189 const TBOX& reduced_box() const { 00190 return red_box; 00191 } 00192 void set_reduced_box(TBOX new_box) { 00193 red_box = new_box; 00194 reduced = TRUE; 00195 } 00196 inT32 enclosed_area() const { 00197 return area; 00198 } 00199 bool joined_to_prev() const { 00200 return joined != 0; 00201 } 00202 bool red_box_set() const { 00203 return reduced != 0; 00204 } 00205 int repeated_set() const { 00206 return repeated_set_; 00207 } 00208 void set_repeated_set(int set_id) { 00209 repeated_set_ = set_id; 00210 } 00211 C_BLOB *cblob() const { 00212 return cblob_ptr; 00213 } 00214 TabType left_tab_type() const { 00215 return left_tab_type_; 00216 } 00217 void set_left_tab_type(TabType new_type) { 00218 left_tab_type_ = new_type; 00219 } 00220 TabType right_tab_type() const { 00221 return right_tab_type_; 00222 } 00223 void set_right_tab_type(TabType new_type) { 00224 right_tab_type_ = new_type; 00225 } 00226 BlobRegionType region_type() const { 00227 return region_type_; 00228 } 00229 void set_region_type(BlobRegionType new_type) { 00230 region_type_ = new_type; 00231 } 00232 BlobTextFlowType flow() const { 00233 return flow_; 00234 } 00235 void set_flow(BlobTextFlowType value) { 00236 flow_ = value; 00237 } 00238 bool vert_possible() const { 00239 return vert_possible_; 00240 } 00241 void set_vert_possible(bool value) { 00242 vert_possible_ = value; 00243 } 00244 bool horz_possible() const { 00245 return horz_possible_; 00246 } 00247 void set_horz_possible(bool value) { 00248 horz_possible_ = value; 00249 } 00250 int left_rule() const { 00251 return left_rule_; 00252 } 00253 void set_left_rule(int new_left) { 00254 left_rule_ = new_left; 00255 } 00256 int right_rule() const { 00257 return right_rule_; 00258 } 00259 void set_right_rule(int new_right) { 00260 right_rule_ = new_right; 00261 } 00262 int left_crossing_rule() const { 00263 return left_crossing_rule_; 00264 } 00265 void set_left_crossing_rule(int new_left) { 00266 left_crossing_rule_ = new_left; 00267 } 00268 int right_crossing_rule() const { 00269 return right_crossing_rule_; 00270 } 00271 void set_right_crossing_rule(int new_right) { 00272 right_crossing_rule_ = new_right; 00273 } 00274 float horz_stroke_width() const { 00275 return horz_stroke_width_; 00276 } 00277 void set_horz_stroke_width(float width) { 00278 horz_stroke_width_ = width; 00279 } 00280 float vert_stroke_width() const { 00281 return vert_stroke_width_; 00282 } 00283 void set_vert_stroke_width(float width) { 00284 vert_stroke_width_ = width; 00285 } 00286 float area_stroke_width() const { 00287 return area_stroke_width_; 00288 } 00289 tesseract::ColPartition* owner() const { 00290 return owner_; 00291 } 00292 void set_owner(tesseract::ColPartition* new_owner) { 00293 owner_ = new_owner; 00294 } 00295 bool leader_on_left() const { 00296 return leader_on_left_; 00297 } 00298 void set_leader_on_left(bool flag) { 00299 leader_on_left_ = flag; 00300 } 00301 bool leader_on_right() const { 00302 return leader_on_right_; 00303 } 00304 void set_leader_on_right(bool flag) { 00305 leader_on_right_ = flag; 00306 } 00307 BLOBNBOX* neighbour(BlobNeighbourDir n) const { 00308 return neighbours_[n]; 00309 } 00310 bool good_stroke_neighbour(BlobNeighbourDir n) const { 00311 return good_stroke_neighbours_[n]; 00312 } 00313 void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) { 00314 neighbours_[n] = neighbour; 00315 good_stroke_neighbours_[n] = good; 00316 } 00317 bool IsDiacritic() const { 00318 return base_char_top_ != box.top() || base_char_bottom_ != box.bottom(); 00319 } 00320 int base_char_top() const { 00321 return base_char_top_; 00322 } 00323 int base_char_bottom() const { 00324 return base_char_bottom_; 00325 } 00326 void set_diacritic_box(const TBOX& diacritic_box) { 00327 base_char_top_ = diacritic_box.top(); 00328 base_char_bottom_ = diacritic_box.bottom(); 00329 } 00330 bool UniquelyVertical() const { 00331 return vert_possible_ && !horz_possible_; 00332 } 00333 bool UniquelyHorizontal() const { 00334 return horz_possible_ && !vert_possible_; 00335 } 00336 00337 // Returns true if the region type is text. 00338 static bool IsTextType(BlobRegionType type) { 00339 return type == BRT_TEXT || type == BRT_VERT_TEXT; 00340 } 00341 // Returns true if the region type is image. 00342 static bool IsImageType(BlobRegionType type) { 00343 return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE; 00344 } 00345 // Returns true if the region type is line. 00346 static bool IsLineType(BlobRegionType type) { 00347 return type == BRT_HLINE || type == BRT_VLINE; 00348 } 00349 // Returns true if the region type cannot be merged. 00350 static bool UnMergeableType(BlobRegionType type) { 00351 return IsLineType(type) || IsImageType(type); 00352 } 00353 00354 static ScrollView::Color TextlineColor(BlobRegionType region_type, 00355 BlobTextFlowType flow_type); 00356 00357 #ifndef GRAPHICS_DISABLED 00358 // Keep in sync with BlobRegionType. 00359 ScrollView::Color BoxColor() const; 00360 00361 void plot(ScrollView* window, // window to draw in 00362 ScrollView::Color blob_colour, // for outer bits 00363 ScrollView::Color child_colour) { // for holes 00364 if (cblob_ptr != NULL) 00365 cblob_ptr->plot(window, blob_colour, child_colour); 00366 } 00367 #endif 00368 00369 // Initializes the bulk of the members to default values for use at 00370 // construction time. 00371 void ConstructionInit() { 00372 cblob_ptr = NULL; 00373 area = 0; 00374 area_stroke_width_ = 0.0f; 00375 horz_stroke_width_ = 0.0f; 00376 vert_stroke_width_ = 0.0f; 00377 ReInit(); 00378 } 00379 // Initializes members set by StrokeWidth and beyond, without discarding 00380 // stored area and strokewidth values, which are expensive to calculate. 00381 void ReInit() { 00382 joined = false; 00383 reduced = false; 00384 repeated_set_ = 0; 00385 left_tab_type_ = TT_NONE; 00386 right_tab_type_ = TT_NONE; 00387 region_type_ = BRT_UNKNOWN; 00388 flow_ = BTFT_NONE; 00389 left_rule_ = 0; 00390 right_rule_ = 0; 00391 left_crossing_rule_ = 0; 00392 right_crossing_rule_ = 0; 00393 if (area_stroke_width_ == 0.0f && area > 0 && cblob() != NULL) 00394 area_stroke_width_ = 2.0f * area / cblob()->perimeter(); 00395 owner_ = NULL; 00396 base_char_top_ = box.top(); 00397 base_char_bottom_ = box.bottom(); 00398 horz_possible_ = false; 00399 vert_possible_ = false; 00400 leader_on_left_ = false; 00401 leader_on_right_ = false; 00402 ClearNeighbours(); 00403 } 00404 00405 void ClearNeighbours() { 00406 for (int n = 0; n < BND_COUNT; ++n) { 00407 neighbours_[n] = NULL; 00408 good_stroke_neighbours_[n] = false; 00409 } 00410 } 00411 00412 private: 00413 C_BLOB *cblob_ptr; // edgestep blob 00414 TBOX box; // bounding box 00415 TBOX red_box; // bounding box 00416 int area:30; // enclosed area 00417 int joined:1; // joined to prev 00418 int reduced:1; // reduced box set 00419 int repeated_set_; // id of the set of repeated blobs 00420 TabType left_tab_type_; // Indicates tab-stop assessment 00421 TabType right_tab_type_; // Indicates tab-stop assessment 00422 BlobRegionType region_type_; // Type of region this blob belongs to 00423 BlobTextFlowType flow_; // Quality of text flow. 00424 inT16 left_rule_; // x-coord of nearest but not crossing rule line 00425 inT16 right_rule_; // x-coord of nearest but not crossing rule line 00426 inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line 00427 inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line 00428 inT16 base_char_top_; // y-coord of top/bottom of diacritic base, 00429 inT16 base_char_bottom_; // if it exists else top/bottom of this blob. 00430 float horz_stroke_width_; // Median horizontal stroke width 00431 float vert_stroke_width_; // Median vertical stroke width 00432 float area_stroke_width_; // Stroke width from area/perimeter ratio. 00433 tesseract::ColPartition* owner_; // Who will delete me when I am not needed 00434 BLOBNBOX* neighbours_[BND_COUNT]; 00435 bool good_stroke_neighbours_[BND_COUNT]; 00436 bool horz_possible_; // Could be part of horizontal flow. 00437 bool vert_possible_; // Could be part of vertical flow. 00438 bool leader_on_left_; // There is a leader to the left. 00439 bool leader_on_right_; // There is a leader to the right. 00440 }; 00441 00442 class TO_ROW: public ELIST2_LINK 00443 { 00444 public: 00445 static const int kErrorWeight = 3; 00446 00447 TO_ROW() { 00448 clear(); 00449 } //empty 00450 TO_ROW( //constructor 00451 BLOBNBOX *blob, //from first blob 00452 float top, //of row //target height 00453 float bottom, 00454 float row_size); 00455 00456 float max_y() const { //access function 00457 return y_max; 00458 } 00459 float min_y() const { 00460 return y_min; 00461 } 00462 float mean_y() const { 00463 return (y_min + y_max) / 2.0f; 00464 } 00465 float initial_min_y() const { 00466 return initial_y_min; 00467 } 00468 float line_m() const { //access to line fit 00469 return m; 00470 } 00471 float line_c() const { 00472 return c; 00473 } 00474 float line_error() const { 00475 return error; 00476 } 00477 float parallel_c() const { 00478 return para_c; 00479 } 00480 float parallel_error() const { 00481 return para_error; 00482 } 00483 float believability() const { //baseline goodness 00484 return credibility; 00485 } 00486 float intercept() const { //real parallel_c 00487 return y_origin; 00488 } 00489 void add_blob( //put in row 00490 BLOBNBOX *blob, //blob to add 00491 float top, //of row //target height 00492 float bottom, 00493 float row_size); 00494 void insert_blob( //put in row in order 00495 BLOBNBOX *blob); 00496 00497 BLOBNBOX_LIST *blob_list() { //get list 00498 return &blobs; 00499 } 00500 00501 void set_line( //set line spec 00502 float new_m, //line to set 00503 float new_c, 00504 float new_error) { 00505 m = new_m; 00506 c = new_c; 00507 error = new_error; 00508 } 00509 void set_parallel_line( //set fixed gradient line 00510 float gradient, //page gradient 00511 float new_c, 00512 float new_error) { 00513 para_c = new_c; 00514 para_error = new_error; 00515 credibility = 00516 (float) (blobs.length () - kErrorWeight * new_error); 00517 y_origin = (float) (new_c / sqrt (1 + gradient * gradient)); 00518 //real intercept 00519 } 00520 void set_limits( //set min,max 00521 float new_min, //bottom and 00522 float new_max) { //top of row 00523 y_min = new_min; 00524 y_max = new_max; 00525 } 00526 void compute_vertical_projection(); 00527 //get projection 00528 00529 bool rep_chars_marked() const { 00530 return num_repeated_sets_ != -1; 00531 } 00532 void clear_rep_chars_marked() { 00533 num_repeated_sets_ = -1; 00534 } 00535 int num_repeated_sets() const { 00536 return num_repeated_sets_; 00537 } 00538 void set_num_repeated_sets(int num_sets) { 00539 num_repeated_sets_ = num_sets; 00540 } 00541 00542 // true when dead 00543 BOOL8 merged; 00544 BOOL8 all_caps; // had no ascenders 00545 BOOL8 used_dm_model; // in guessing pitch 00546 inT16 projection_left; // start of projection 00547 inT16 projection_right; // start of projection 00548 PITCH_TYPE pitch_decision; // how strong is decision 00549 float fixed_pitch; // pitch or 0 00550 float fp_space; // sp if fixed pitch 00551 float fp_nonsp; // nonsp if fixed pitch 00552 float pr_space; // sp if prop 00553 float pr_nonsp; // non sp if prop 00554 float spacing; // to "next" row 00555 float xheight; // of line 00556 int xheight_evidence; // number of blobs of height xheight 00557 float ascrise; // ascenders 00558 float descdrop; // descenders 00559 inT32 min_space; // min size for real space 00560 inT32 max_nonspace; // max size of non-space 00561 inT32 space_threshold; // space vs nonspace 00562 float kern_size; // average non-space 00563 float space_size; // average space 00564 WERD_LIST rep_words; // repeated chars 00565 ICOORDELT_LIST char_cells; // fixed pitch cells 00566 QSPLINE baseline; // curved baseline 00567 STATS projection; // vertical projection 00568 00569 private: 00570 void clear(); // clear all values to reasonable defaults 00571 00572 BLOBNBOX_LIST blobs; //blobs in row 00573 float y_min; //coords 00574 float y_max; 00575 float initial_y_min; 00576 float m, c; //line spec 00577 float error; //line error 00578 float para_c; //constrained fit 00579 float para_error; 00580 float y_origin; //rotated para_c; 00581 float credibility; //baseline believability 00582 int num_repeated_sets_; // number of sets of repeated blobs 00583 // set to -1 if we have not searched 00584 // for repeated blobs in this row yet 00585 }; 00586 00587 ELIST2IZEH (TO_ROW) 00588 class TO_BLOCK:public ELIST_LINK 00589 { 00590 public: 00591 TO_BLOCK() : pitch_decision(PITCH_DUNNO) { 00592 clear(); 00593 } //empty 00594 TO_BLOCK( //constructor 00595 BLOCK *src_block); //real block 00596 ~TO_BLOCK(); 00597 00598 void clear(); // clear all scalar members. 00599 00600 TO_ROW_LIST *get_rows() { //access function 00601 return &row_list; 00602 } 00603 00604 // Rotate all the blobnbox lists and the underlying block. Then update the 00605 // median size statistic from the blobs list. 00606 void rotate(const FCOORD& rotation) { 00607 BLOBNBOX_LIST* blobnbox_list[] = {&blobs, &underlines, &noise_blobs, 00608 &small_blobs, &large_blobs, NULL}; 00609 for (BLOBNBOX_LIST** list = blobnbox_list; *list != NULL; ++list) { 00610 BLOBNBOX_IT it(*list); 00611 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00612 it.data()->rotate(rotation); 00613 } 00614 } 00615 // Rotate the block 00616 ASSERT_HOST(block->poly_block() != NULL); 00617 block->rotate(rotation); 00618 // Update the median size statistic from the blobs list. 00619 STATS widths(0, block->bounding_box().width()); 00620 STATS heights(0, block->bounding_box().height()); 00621 BLOBNBOX_IT blob_it(&blobs); 00622 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00623 widths.add(blob_it.data()->bounding_box().width(), 1); 00624 heights.add(blob_it.data()->bounding_box().height(), 1); 00625 } 00626 block->set_median_size(static_cast<int>(widths.median() + 0.5), 00627 static_cast<int>(heights.median() + 0.5)); 00628 } 00629 00630 void print_rows() { //debug info 00631 TO_ROW_IT row_it = &row_list; 00632 TO_ROW *row; 00633 00634 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); 00635 row_it.forward ()) { 00636 row = row_it.data (); 00637 printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT 00638 "\n", row->min_y (), row->max_y (), row->parallel_c (), 00639 row->blob_list ()->length ()); 00640 } 00641 } 00642 00643 // Draw the blobs on on the various lists in the block in different colors. 00644 #ifndef GRAPHICS_DISABLED 00645 void plot_graded_blobs(ScrollView* to_win); 00646 #endif 00647 00648 BLOBNBOX_LIST blobs; //medium size 00649 BLOBNBOX_LIST underlines; //underline blobs 00650 BLOBNBOX_LIST noise_blobs; //very small 00651 BLOBNBOX_LIST small_blobs; //fairly small 00652 BLOBNBOX_LIST large_blobs; //big blobs 00653 BLOCK *block; //real block 00654 PITCH_TYPE pitch_decision; //how strong is decision 00655 float line_spacing; //estimate 00656 // line_size is a lower-bound estimate of the font size in pixels of 00657 // the text in the block (with ascenders and descenders), being a small 00658 // (1.25) multiple of the median height of filtered blobs. 00659 // In most cases the font size will be bigger, but it will be closer 00660 // if the text is allcaps, or in a no-x-height script. 00661 float line_size; //estimate 00662 float max_blob_size; //line assignment limit 00663 float baseline_offset; //phase shift 00664 float xheight; //median blob size 00665 float fixed_pitch; //pitch or 0 00666 float kern_size; //average non-space 00667 float space_size; //average space 00668 inT32 min_space; //min definite space 00669 inT32 max_nonspace; //max definite 00670 float fp_space; //sp if fixed pitch 00671 float fp_nonsp; //nonsp if fixed pitch 00672 float pr_space; //sp if prop 00673 float pr_nonsp; //non sp if prop 00674 TO_ROW *key_row; //starting row 00675 00676 private: 00677 TO_ROW_LIST row_list; //temporary rows 00678 }; 00679 00680 ELISTIZEH (TO_BLOCK) 00681 extern double_VAR_H (textord_error_weight, 3, 00682 "Weighting for error in believability"); 00683 void find_cblob_limits( //get y limits 00684 C_BLOB *blob, //blob to search 00685 float leftx, //x limits 00686 float rightx, 00687 FCOORD rotation, //for landscape 00688 float &ymin, //output y limits 00689 float &ymax); 00690 void find_cblob_vlimits( //get y limits 00691 C_BLOB *blob, //blob to search 00692 float leftx, //x limits 00693 float rightx, 00694 float &ymin, //output y limits 00695 float &ymax); 00696 void find_cblob_hlimits( //get x limits 00697 C_BLOB *blob, //blob to search 00698 float bottomy, //y limits 00699 float topy, 00700 float &xmin, //output x limits 00701 float &xymax); 00702 C_BLOB *crotate_cblob( //rotate it 00703 C_BLOB *blob, //blob to search 00704 FCOORD rotation //for landscape 00705 ); 00706 TBOX box_next( //get bounding box 00707 BLOBNBOX_IT *it //iterator to blobds 00708 ); 00709 TBOX box_next_pre_chopped( //get bounding box 00710 BLOBNBOX_IT *it //iterator to blobds 00711 ); 00712 void vertical_cblob_projection( //project outlines 00713 C_BLOB *blob, //blob to project 00714 STATS *stats //output 00715 ); 00716 void vertical_coutline_projection( //project outlines 00717 C_OUTLINE *outline, //outline to project 00718 STATS *stats //output 00719 ); 00720 #ifndef GRAPHICS_DISABLED 00721 void plot_blob_list(ScrollView* win, // window to draw in 00722 BLOBNBOX_LIST *list, // blob list 00723 ScrollView::Color body_colour, // colour to draw 00724 ScrollView::Color child_colour); // colour of child 00725 #endif // GRAPHICS_DISABLED 00726 #endif