Tesseract 3.01
/data/source/tesseract-ocr/ccstruct/blobbox.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        blobbox.h  (Formerly blobnbox.h)
00003  * Description: Code for the textord blob class.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Thu Jul 30 09:08:51 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef           BLOBBOX_H
00021 #define           BLOBBOX_H
00022 
00023 #include          "clst.h"
00024 #include          "elst2.h"
00025 #include          "werd.h"
00026 #include          "ocrblock.h"
00027 #include          "statistc.h"
00028 
00029 enum PITCH_TYPE
00030 {
00031   PITCH_DUNNO,                   //insufficient data
00032   PITCH_DEF_FIXED,               //definitely fixed
00033   PITCH_MAYBE_FIXED,             //could be
00034   PITCH_DEF_PROP,
00035   PITCH_MAYBE_PROP,
00036   PITCH_CORR_FIXED,
00037   PITCH_CORR_PROP
00038 };
00039 
00040 // The possible tab-stop types of each side of a BLOBNBOX.
00041 enum TabType {
00042   TT_NONE,         // Not a tab.
00043   TT_DELETED,      // Not a tab after detailed analysis.
00044   TT_UNCONFIRMED,  // Initial designation of a tab-stop candidate.
00045   TT_FAKE,         // Added by interpolation.
00046   TT_CONFIRMED,    // Aligned with neighbours.
00047   TT_VLINE         // Detected as a vertical line.
00048 };
00049 
00050 // The possible region types of a BLOBNBOX.
00051 // Note: keep all the text types > BRT_UNKNOWN and all the image types less.
00052 // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the
00053 // *Type static functions below.
00054 enum BlobRegionType {
00055   BRT_NOISE,      // Neither text nor image.
00056   BRT_HLINE,      // Horizontal separator line.
00057   BRT_VLINE,      // Vertical separator line.
00058   BRT_RECTIMAGE,  // Rectangular image.
00059   BRT_POLYIMAGE,  // Non-rectangular image.
00060   BRT_UNKNOWN,    // Not determined yet.
00061   BRT_VERT_TEXT,  // Vertical alignment, not necessarily vertically oriented.
00062   BRT_TEXT,       // Convincing text.
00063 
00064   BRT_COUNT       // Number of possibilities.
00065 };
00066 
00067 // enum for elements of arrays that refer to neighbours.
00068 enum BlobNeighbourDir {
00069   BND_LEFT,
00070   BND_BELOW,
00071   BND_RIGHT,
00072   BND_ABOVE,
00073   BND_COUNT
00074 };
00075 
00076 // BlobTextFlowType indicates the quality of neighbouring information
00077 // related to a chain of connected components, either horizontally or
00078 // vertically. Also used by ColPartition for the collection of blobs
00079 // within, which should all have the same value in most cases.
00080 enum BlobTextFlowType {
00081   BTFT_NONE,           // No text flow set yet.
00082   BTFT_NONTEXT,        // Flow too poor to be likely text.
00083   BTFT_NEIGHBOURS,     // Neighbours support flow in this direction.
00084   BTFT_CHAIN,          // There is a weak chain of text in this direction.
00085   BTFT_STRONG_CHAIN,   // There is a strong chain of text in this direction.
00086   BTFT_TEXT_ON_IMAGE,  // There is a strong chain of text on an image.
00087   BTFT_LEADER,         // Leader dots/dashes etc.
00088   BTFT_COUNT
00089 };
00090 
00091 // Returns true if type1 dominates type2 in a merge. Mostly determined by the
00092 // ordering of the enum, but NONTEXT dominates everything else, and LEADER
00093 // dominates nothing.
00094 // The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
00095 // this cannot be true if t1 == t2, so the result is undefined.
00096 inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
00097   // NONTEXT dominates everything.
00098   if (type1 == BTFT_NONTEXT) return true;
00099   if (type2 == BTFT_NONTEXT) return false;
00100   // LEADER always loses.
00101   if (type1 == BTFT_LEADER) return false;
00102   if (type2 == BTFT_LEADER) return true;
00103   // With those out of the way, the ordering of the enum determines the result.
00104   return type1 >= type2;
00105 }
00106 
00107 namespace tesseract {
00108 class ColPartition;
00109 }
00110 
00111 class BLOBNBOX;
00112 ELISTIZEH (BLOBNBOX)
00113 class BLOBNBOX:public ELIST_LINK
00114 {
00115   public:
00116     BLOBNBOX() {
00117       ConstructionInit();
00118     }
00119     explicit BLOBNBOX(C_BLOB *srcblob) {
00120       box = srcblob->bounding_box();
00121       ConstructionInit();
00122       cblob_ptr = srcblob;
00123       area = static_cast<int>(srcblob->area());
00124     }
00125     static BLOBNBOX* RealBlob(C_OUTLINE* outline) {
00126       C_BLOB* blob = new C_BLOB(outline);
00127       return new BLOBNBOX(blob);
00128     }
00129 
00130     void rotate_box(FCOORD rotation);
00131     void rotate(FCOORD rotation);
00132     void translate_box(ICOORD v) {
00133       if (IsDiacritic()) {
00134         box.move(v);
00135         base_char_top_ += v.y();
00136         base_char_bottom_ += v.y();
00137       } else {
00138         box.move(v);
00139         set_diacritic_box(box);
00140       }
00141     }
00142     void merge(BLOBNBOX *nextblob);
00143     void really_merge(BLOBNBOX* other);
00144     void chop(                        // fake chop blob
00145               BLOBNBOX_IT *start_it,  // location of this
00146               BLOBNBOX_IT *blob_it,   // iterator
00147               FCOORD rotation,        // for landscape
00148               float xheight);         // line height
00149 
00150     void NeighbourGaps(int gaps[BND_COUNT]) const;
00151     void MinMaxGapsClipped(int* h_min, int* h_max,
00152                            int* v_min, int* v_max) const;
00153     int GoodTextBlob() const;
00154 
00155     // Returns true, and sets vert_possible/horz_possible if the blob has some
00156     // feature that makes it individually appear to flow one way.
00157     // eg if it has a high aspect ratio, yet has a complex shape, such as a
00158     // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1.
00159     bool DefiniteIndividualFlow();
00160 
00161     // Returns true if there is no tabstop violation in merging this and other.
00162     bool ConfirmNoTabViolation(const BLOBNBOX& other) const;
00163 
00164     // Returns true if other has a similar stroke width to this.
00165     bool MatchingStrokeWidth(const BLOBNBOX& other,
00166                              double fractional_tolerance,
00167                              double constant_tolerance) const;
00168 
00169     // Returns a bounding box of the outline contained within the
00170     // given horizontal range.
00171     TBOX BoundsWithinLimits(int left, int right);
00172 
00173     // Simple accessors.
00174     const TBOX& bounding_box() const {
00175       return box;
00176     }
00177     // Set the bounding box. Use with caution.
00178     // Normally use compute_bounding_box instead.
00179     void set_bounding_box(const TBOX& new_box) {
00180       box = new_box;
00181       base_char_top_ = box.top();
00182       base_char_bottom_ = box.bottom();
00183     }
00184     void compute_bounding_box() {
00185       box = cblob_ptr->bounding_box();
00186       base_char_top_ = box.top();
00187       base_char_bottom_ = box.bottom();
00188     }
00189     const TBOX& reduced_box() const {
00190       return red_box;
00191     }
00192     void set_reduced_box(TBOX new_box) {
00193       red_box = new_box;
00194       reduced = TRUE;
00195     }
00196     inT32 enclosed_area() const {
00197       return area;
00198     }
00199     bool joined_to_prev() const {
00200       return joined != 0;
00201     }
00202     bool red_box_set() const {
00203       return reduced != 0;
00204     }
00205     int repeated_set() const {
00206       return repeated_set_;
00207     }
00208     void set_repeated_set(int set_id) {
00209       repeated_set_ = set_id;
00210     }
00211     C_BLOB *cblob() const {
00212       return cblob_ptr;
00213     }
00214     TabType left_tab_type() const {
00215       return left_tab_type_;
00216     }
00217     void set_left_tab_type(TabType new_type) {
00218       left_tab_type_ = new_type;
00219     }
00220     TabType right_tab_type() const {
00221       return right_tab_type_;
00222     }
00223     void set_right_tab_type(TabType new_type) {
00224       right_tab_type_ = new_type;
00225     }
00226     BlobRegionType region_type() const {
00227       return region_type_;
00228     }
00229     void set_region_type(BlobRegionType new_type) {
00230       region_type_ = new_type;
00231     }
00232     BlobTextFlowType flow() const {
00233       return flow_;
00234     }
00235     void set_flow(BlobTextFlowType value) {
00236       flow_ = value;
00237     }
00238     bool vert_possible() const {
00239       return vert_possible_;
00240     }
00241     void set_vert_possible(bool value) {
00242       vert_possible_ = value;
00243     }
00244     bool horz_possible() const {
00245       return horz_possible_;
00246     }
00247     void set_horz_possible(bool value) {
00248       horz_possible_ = value;
00249     }
00250     int left_rule() const {
00251       return left_rule_;
00252     }
00253     void set_left_rule(int new_left) {
00254       left_rule_ = new_left;
00255     }
00256     int right_rule() const {
00257       return right_rule_;
00258     }
00259     void set_right_rule(int new_right) {
00260       right_rule_ = new_right;
00261     }
00262     int left_crossing_rule() const {
00263       return left_crossing_rule_;
00264     }
00265     void set_left_crossing_rule(int new_left) {
00266       left_crossing_rule_ = new_left;
00267     }
00268     int right_crossing_rule() const {
00269       return right_crossing_rule_;
00270     }
00271     void set_right_crossing_rule(int new_right) {
00272       right_crossing_rule_ = new_right;
00273     }
00274     float horz_stroke_width() const {
00275       return horz_stroke_width_;
00276     }
00277     void set_horz_stroke_width(float width) {
00278       horz_stroke_width_ = width;
00279     }
00280     float vert_stroke_width() const {
00281       return vert_stroke_width_;
00282     }
00283     void set_vert_stroke_width(float width) {
00284       vert_stroke_width_ = width;
00285     }
00286     float area_stroke_width() const {
00287       return area_stroke_width_;
00288     }
00289     tesseract::ColPartition* owner() const {
00290       return owner_;
00291     }
00292     void set_owner(tesseract::ColPartition* new_owner) {
00293       owner_ = new_owner;
00294     }
00295     bool leader_on_left() const {
00296       return leader_on_left_;
00297     }
00298     void set_leader_on_left(bool flag) {
00299       leader_on_left_ = flag;
00300     }
00301     bool leader_on_right() const {
00302       return leader_on_right_;
00303     }
00304     void set_leader_on_right(bool flag) {
00305       leader_on_right_ = flag;
00306     }
00307     BLOBNBOX* neighbour(BlobNeighbourDir n) const {
00308       return neighbours_[n];
00309     }
00310     bool good_stroke_neighbour(BlobNeighbourDir n) const {
00311       return good_stroke_neighbours_[n];
00312     }
00313     void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) {
00314       neighbours_[n] = neighbour;
00315       good_stroke_neighbours_[n] = good;
00316     }
00317     bool IsDiacritic() const {
00318       return base_char_top_ != box.top() || base_char_bottom_ != box.bottom();
00319     }
00320     int base_char_top() const {
00321       return base_char_top_;
00322     }
00323     int base_char_bottom() const {
00324       return base_char_bottom_;
00325     }
00326     void set_diacritic_box(const TBOX& diacritic_box) {
00327       base_char_top_ = diacritic_box.top();
00328       base_char_bottom_ = diacritic_box.bottom();
00329     }
00330     bool UniquelyVertical() const {
00331       return vert_possible_ && !horz_possible_;
00332     }
00333     bool UniquelyHorizontal() const {
00334       return horz_possible_ && !vert_possible_;
00335     }
00336 
00337     // Returns true if the region type is text.
00338     static bool IsTextType(BlobRegionType type) {
00339       return type == BRT_TEXT || type == BRT_VERT_TEXT;
00340     }
00341     // Returns true if the region type is image.
00342     static bool IsImageType(BlobRegionType type) {
00343       return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE;
00344     }
00345     // Returns true if the region type is line.
00346     static bool IsLineType(BlobRegionType type) {
00347       return type == BRT_HLINE || type == BRT_VLINE;
00348     }
00349     // Returns true if the region type cannot be merged.
00350     static bool UnMergeableType(BlobRegionType type) {
00351       return IsLineType(type) || IsImageType(type);
00352     }
00353 
00354     static ScrollView::Color TextlineColor(BlobRegionType region_type,
00355                                            BlobTextFlowType flow_type);
00356 
00357 #ifndef GRAPHICS_DISABLED
00358     // Keep in sync with BlobRegionType.
00359     ScrollView::Color BoxColor() const;
00360 
00361     void plot(ScrollView* window,                // window to draw in
00362               ScrollView::Color blob_colour,     // for outer bits
00363               ScrollView::Color child_colour) {  // for holes
00364       if (cblob_ptr != NULL)
00365         cblob_ptr->plot(window, blob_colour, child_colour);
00366     }
00367 #endif
00368 
00369   // Initializes the bulk of the members to default values for use at
00370   // construction time.
00371   void ConstructionInit() {
00372     cblob_ptr = NULL;
00373     area = 0;
00374     area_stroke_width_ = 0.0f;
00375     horz_stroke_width_ = 0.0f;
00376     vert_stroke_width_ = 0.0f;
00377     ReInit();
00378   }
00379   // Initializes members set by StrokeWidth and beyond, without discarding
00380   // stored area and strokewidth values, which are expensive to calculate.
00381   void ReInit() {
00382     joined = false;
00383     reduced = false;
00384     repeated_set_ = 0;
00385     left_tab_type_ = TT_NONE;
00386     right_tab_type_ = TT_NONE;
00387     region_type_ = BRT_UNKNOWN;
00388     flow_ = BTFT_NONE;
00389     left_rule_ = 0;
00390     right_rule_ = 0;
00391     left_crossing_rule_ = 0;
00392     right_crossing_rule_ = 0;
00393     if (area_stroke_width_ == 0.0f && area > 0 && cblob() != NULL)
00394       area_stroke_width_ = 2.0f * area / cblob()->perimeter();
00395     owner_ = NULL;
00396     base_char_top_ = box.top();
00397     base_char_bottom_ = box.bottom();
00398     horz_possible_ = false;
00399     vert_possible_ = false;
00400     leader_on_left_ = false;
00401     leader_on_right_ = false;
00402     ClearNeighbours();
00403   }
00404 
00405   void ClearNeighbours() {
00406     for (int n = 0; n < BND_COUNT; ++n) {
00407       neighbours_[n] = NULL;
00408       good_stroke_neighbours_[n] = false;
00409     }
00410   }
00411 
00412  private:
00413   C_BLOB *cblob_ptr;            // edgestep blob
00414   TBOX box;                     // bounding box
00415   TBOX red_box;                 // bounding box
00416   int area:30;                  // enclosed area
00417   int joined:1;                 // joined to prev
00418   int reduced:1;                // reduced box set
00419   int repeated_set_;            // id of the set of repeated blobs
00420   TabType left_tab_type_;       // Indicates tab-stop assessment
00421   TabType right_tab_type_;      // Indicates tab-stop assessment
00422   BlobRegionType region_type_;  // Type of region this blob belongs to
00423   BlobTextFlowType flow_;       // Quality of text flow.
00424   inT16 left_rule_;             // x-coord of nearest but not crossing rule line
00425   inT16 right_rule_;            // x-coord of nearest but not crossing rule line
00426   inT16 left_crossing_rule_;    // x-coord of nearest or crossing rule line
00427   inT16 right_crossing_rule_;   // x-coord of nearest or crossing rule line
00428   inT16 base_char_top_;         // y-coord of top/bottom of diacritic base,
00429   inT16 base_char_bottom_;      // if it exists else top/bottom of this blob.
00430   float horz_stroke_width_;     // Median horizontal stroke width
00431   float vert_stroke_width_;     // Median vertical stroke width
00432   float area_stroke_width_;     // Stroke width from area/perimeter ratio.
00433   tesseract::ColPartition* owner_;  // Who will delete me when I am not needed
00434   BLOBNBOX* neighbours_[BND_COUNT];
00435   bool good_stroke_neighbours_[BND_COUNT];
00436   bool horz_possible_;           // Could be part of horizontal flow.
00437   bool vert_possible_;           // Could be part of vertical flow.
00438   bool leader_on_left_;          // There is a leader to the left.
00439   bool leader_on_right_;         // There is a leader to the right.
00440 };
00441 
00442 class TO_ROW: public ELIST2_LINK
00443 {
00444   public:
00445     static const int kErrorWeight = 3;
00446 
00447     TO_ROW() {
00448       clear();
00449     }                            //empty
00450     TO_ROW(                 //constructor
00451            BLOBNBOX *blob,  //from first blob
00452            float top,       //of row //target height
00453            float bottom,
00454            float row_size);
00455 
00456     float max_y() const {  //access function
00457       return y_max;
00458     }
00459     float min_y() const {
00460       return y_min;
00461     }
00462     float mean_y() const {
00463       return (y_min + y_max) / 2.0f;
00464     }
00465     float initial_min_y() const {
00466       return initial_y_min;
00467     }
00468     float line_m() const {  //access to line fit
00469       return m;
00470     }
00471     float line_c() const {
00472       return c;
00473     }
00474     float line_error() const {
00475       return error;
00476     }
00477     float parallel_c() const {
00478       return para_c;
00479     }
00480     float parallel_error() const {
00481       return para_error;
00482     }
00483     float believability() const {  //baseline goodness
00484       return credibility;
00485     }
00486     float intercept() const {  //real parallel_c
00487       return y_origin;
00488     }
00489     void add_blob(                 //put in row
00490                   BLOBNBOX *blob,  //blob to add
00491                   float top,       //of row //target height
00492                   float bottom,
00493                   float row_size);
00494     void insert_blob(  //put in row in order
00495                      BLOBNBOX *blob);
00496 
00497     BLOBNBOX_LIST *blob_list() {  //get list
00498       return &blobs;
00499     }
00500 
00501     void set_line(              //set line spec
00502                   float new_m,  //line to set
00503                   float new_c,
00504                   float new_error) {
00505       m = new_m;
00506       c = new_c;
00507       error = new_error;
00508     }
00509     void set_parallel_line(                 //set fixed gradient line
00510                            float gradient,  //page gradient
00511                            float new_c,
00512                            float new_error) {
00513       para_c = new_c;
00514       para_error = new_error;
00515       credibility =
00516         (float) (blobs.length () - kErrorWeight * new_error);
00517       y_origin = (float) (new_c / sqrt (1 + gradient * gradient));
00518       //real intercept
00519     }
00520     void set_limits(                  //set min,max
00521                     float new_min,    //bottom and
00522                     float new_max) {  //top of row
00523       y_min = new_min;
00524       y_max = new_max;
00525     }
00526     void compute_vertical_projection();
00527     //get projection
00528 
00529     bool rep_chars_marked() const {
00530       return num_repeated_sets_ != -1;
00531     }
00532     void clear_rep_chars_marked() {
00533       num_repeated_sets_ = -1;
00534     }
00535     int num_repeated_sets() const {
00536       return num_repeated_sets_;
00537     }
00538     void set_num_repeated_sets(int num_sets) {
00539       num_repeated_sets_ = num_sets;
00540     }
00541 
00542                                  // true when dead
00543     BOOL8 merged;
00544     BOOL8 all_caps;              // had no ascenders
00545     BOOL8 used_dm_model;         // in guessing pitch
00546     inT16 projection_left;       // start of projection
00547     inT16 projection_right;      // start of projection
00548     PITCH_TYPE pitch_decision;   // how strong is decision
00549     float fixed_pitch;           // pitch or 0
00550     float fp_space;              // sp if fixed pitch
00551     float fp_nonsp;              // nonsp if fixed pitch
00552     float pr_space;              // sp if prop
00553     float pr_nonsp;              // non sp if prop
00554     float spacing;               // to "next" row
00555     float xheight;               // of line
00556     int xheight_evidence;        // number of blobs of height xheight
00557     float ascrise;               // ascenders
00558     float descdrop;              // descenders
00559     inT32 min_space;             // min size for real space
00560     inT32 max_nonspace;          // max size of non-space
00561     inT32 space_threshold;       // space vs nonspace
00562     float kern_size;             // average non-space
00563     float space_size;            // average space
00564     WERD_LIST rep_words;         // repeated chars
00565     ICOORDELT_LIST char_cells;   // fixed pitch cells
00566     QSPLINE baseline;            // curved baseline
00567     STATS projection;            // vertical projection
00568 
00569   private:
00570     void clear();  // clear all values to reasonable defaults
00571 
00572     BLOBNBOX_LIST blobs;         //blobs in row
00573     float y_min;                 //coords
00574     float y_max;
00575     float initial_y_min;
00576     float m, c;                  //line spec
00577     float error;                 //line error
00578     float para_c;                //constrained fit
00579     float para_error;
00580     float y_origin;              //rotated para_c;
00581     float credibility;           //baseline believability
00582     int num_repeated_sets_;      // number of sets of repeated blobs
00583                                  // set to -1 if we have not searched
00584                                  // for repeated blobs in this row yet
00585 };
00586 
00587 ELIST2IZEH (TO_ROW)
00588 class TO_BLOCK:public ELIST_LINK
00589 {
00590   public:
00591     TO_BLOCK() : pitch_decision(PITCH_DUNNO) {
00592       clear();
00593     }                            //empty
00594     TO_BLOCK(                    //constructor
00595              BLOCK *src_block);  //real block
00596     ~TO_BLOCK();
00597 
00598     void clear();  // clear all scalar members.
00599 
00600     TO_ROW_LIST *get_rows() {  //access function
00601       return &row_list;
00602     }
00603 
00604     // Rotate all the blobnbox lists and the underlying block. Then update the
00605     // median size statistic from the blobs list.
00606     void rotate(const FCOORD& rotation) {
00607       BLOBNBOX_LIST* blobnbox_list[] = {&blobs, &underlines, &noise_blobs,
00608                                         &small_blobs, &large_blobs, NULL};
00609       for (BLOBNBOX_LIST** list = blobnbox_list; *list != NULL; ++list) {
00610         BLOBNBOX_IT it(*list);
00611         for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00612           it.data()->rotate(rotation);
00613         }
00614       }
00615       // Rotate the block
00616       ASSERT_HOST(block->poly_block() != NULL);
00617       block->rotate(rotation);
00618       // Update the median size statistic from the blobs list.
00619       STATS widths(0, block->bounding_box().width());
00620       STATS heights(0, block->bounding_box().height());
00621       BLOBNBOX_IT blob_it(&blobs);
00622       for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00623         widths.add(blob_it.data()->bounding_box().width(), 1);
00624         heights.add(blob_it.data()->bounding_box().height(), 1);
00625       }
00626       block->set_median_size(static_cast<int>(widths.median() + 0.5),
00627                              static_cast<int>(heights.median() + 0.5));
00628     }
00629 
00630     void print_rows() {  //debug info
00631       TO_ROW_IT row_it = &row_list;
00632       TO_ROW *row;
00633 
00634       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00635       row_it.forward ()) {
00636         row = row_it.data ();
00637         printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
00638           "\n", row->min_y (), row->max_y (), row->parallel_c (),
00639           row->blob_list ()->length ());
00640       }
00641     }
00642 
00643     // Draw the blobs on on the various lists in the block in different colors.
00644 #ifndef GRAPHICS_DISABLED
00645     void plot_graded_blobs(ScrollView* to_win);
00646 #endif
00647 
00648     BLOBNBOX_LIST blobs;         //medium size
00649     BLOBNBOX_LIST underlines;    //underline blobs
00650     BLOBNBOX_LIST noise_blobs;   //very small
00651     BLOBNBOX_LIST small_blobs;   //fairly small
00652     BLOBNBOX_LIST large_blobs;   //big blobs
00653     BLOCK *block;                //real block
00654     PITCH_TYPE pitch_decision;   //how strong is decision
00655     float line_spacing;          //estimate
00656     // line_size is a lower-bound estimate of the font size in pixels of
00657     // the text in the block (with ascenders and descenders), being a small
00658     // (1.25) multiple of the median height of filtered blobs.
00659     // In most cases the font size will be bigger, but it will be closer
00660     // if the text is allcaps, or in a no-x-height script.
00661     float line_size;             //estimate
00662     float max_blob_size;         //line assignment limit
00663     float baseline_offset;       //phase shift
00664     float xheight;               //median blob size
00665     float fixed_pitch;           //pitch or 0
00666     float kern_size;             //average non-space
00667     float space_size;            //average space
00668     inT32 min_space;             //min definite space
00669     inT32 max_nonspace;          //max definite
00670     float fp_space;              //sp if fixed pitch
00671     float fp_nonsp;              //nonsp if fixed pitch
00672     float pr_space;              //sp if prop
00673     float pr_nonsp;              //non sp if prop
00674     TO_ROW *key_row;             //starting row
00675 
00676    private:
00677     TO_ROW_LIST row_list;        //temporary rows
00678 };
00679 
00680 ELISTIZEH (TO_BLOCK)
00681 extern double_VAR_H (textord_error_weight, 3,
00682 "Weighting for error in believability");
00683 void find_cblob_limits(                  //get y limits
00684                        C_BLOB *blob,     //blob to search
00685                        float leftx,      //x limits
00686                        float rightx,
00687                        FCOORD rotation,  //for landscape
00688                        float &ymin,      //output y limits
00689                        float &ymax);
00690 void find_cblob_vlimits(               //get y limits
00691                         C_BLOB *blob,  //blob to search
00692                         float leftx,   //x limits
00693                         float rightx,
00694                         float &ymin,   //output y limits
00695                         float &ymax);
00696 void find_cblob_hlimits(                //get x limits
00697                         C_BLOB *blob,   //blob to search
00698                         float bottomy,  //y limits
00699                         float topy,
00700                         float &xmin,    //output x limits
00701                         float &xymax);
00702 C_BLOB *crotate_cblob(                 //rotate it
00703                       C_BLOB *blob,    //blob to search
00704                       FCOORD rotation  //for landscape
00705                      );
00706 TBOX box_next(                 //get bounding box
00707              BLOBNBOX_IT *it  //iterator to blobds
00708             );
00709 TBOX box_next_pre_chopped(                 //get bounding box
00710                          BLOBNBOX_IT *it  //iterator to blobds
00711                         );
00712 void vertical_cblob_projection(               //project outlines
00713                                C_BLOB *blob,  //blob to project
00714                                STATS *stats   //output
00715                               );
00716 void vertical_coutline_projection(                     //project outlines
00717                                   C_OUTLINE *outline,  //outline to project
00718                                   STATS *stats         //output
00719                                  );
00720 #ifndef GRAPHICS_DISABLED
00721 void plot_blob_list(ScrollView* win,                   // window to draw in
00722                     BLOBNBOX_LIST *list,               // blob list
00723                     ScrollView::Color body_colour,     // colour to draw
00724                     ScrollView::Color child_colour);   // colour of child
00725 #endif  // GRAPHICS_DISABLED
00726 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines