Tesseract 3.01
/data/source/tesseract-ocr/ccstruct/blobs.h
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        blobs.h  (Formerly blobs.h)
00005  * Description:  Blob definition
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 27 15:39:52 1989
00008  * Modified:     Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Experimental (Do Not Distribute)
00012  *
00013  * (c) Copyright 1989, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 
00026 #ifndef BLOBS_H
00027 #define BLOBS_H
00028 
00029 /*----------------------------------------------------------------------
00030               I n c l u d e s
00031 ----------------------------------------------------------------------*/
00032 #include "rect.h"
00033 #include "vecfuncs.h"
00034 
00035 class BLOCK;
00036 class C_BLOB;
00037 class DENORM;
00038 class ROW;
00039 class WERD;
00040 
00041 /*----------------------------------------------------------------------
00042               T y p e s
00043 ----------------------------------------------------------------------*/
00044 #define EDGEPTFLAGS     4        /*concavity,length etc. */
00045 
00046 typedef struct
00047 {                                /* Widths of pieces */
00048   int num_chars;
00049   int widths[1];
00050 } WIDTH_RECORD;
00051 
00052 struct TPOINT {
00053   void operator+=(const TPOINT& other) {
00054     x += other.x;
00055     y += other.y;
00056   }
00057   void operator/=(int divisor) {
00058     x /= divisor;
00059     y /= divisor;
00060   }
00061 
00062   inT16 x;                       // absolute x coord.
00063   inT16 y;                       // absolute y coord.
00064 };
00065 typedef TPOINT VECTOR;           // structure for coordinates.
00066 
00067 struct EDGEPT {
00068   EDGEPT() : next(NULL), prev(NULL) {
00069     memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0]));
00070   }
00071   EDGEPT(const EDGEPT& src) : next(NULL), prev(NULL) {
00072     CopyFrom(src);
00073   }
00074   EDGEPT& operator=(const EDGEPT& src) {
00075     CopyFrom(src);
00076     return *this;
00077   }
00078   // Copies the data elements, but leaves the pointers untouched.
00079   void CopyFrom(const EDGEPT& src) {
00080     pos = src.pos;
00081     vec = src.vec;
00082     memcpy(flags, src.flags, EDGEPTFLAGS * sizeof(flags[0]));
00083   }
00084   // Accessors to hide or reveal a cut edge from feature extractors.
00085   void Hide() {
00086     flags[0] = true;
00087   }
00088   void Reveal() {
00089     flags[0] = false;
00090   }
00091   bool IsHidden() const {
00092     return flags[0] != 0;
00093   }
00094 
00095   TPOINT pos;                    // position
00096   VECTOR vec;                    // vector to next point
00097   // TODO(rays) Remove flags and replace with
00098   // is_hidden, runlength, dir, and fixed. The only use
00099   // of the flags other than is_hidden is in polyaprx.cpp.
00100   char flags[EDGEPTFLAGS];       // concavity, length etc
00101   EDGEPT* next;                  // anticlockwise element
00102   EDGEPT* prev;                  // clockwise element
00103 };
00104 
00105 struct TESSLINE {
00106   TESSLINE() : is_hole(false), loop(NULL), next(NULL) {}
00107   TESSLINE(const TESSLINE& src) : loop(NULL), next(NULL) {
00108     CopyFrom(src);
00109   }
00110   ~TESSLINE() {
00111     Clear();
00112   }
00113   TESSLINE& operator=(const TESSLINE& src) {
00114     CopyFrom(src);
00115     return *this;
00116   }
00117   // Consume the circular list of EDGEPTs to make a TESSLINE.
00118   static TESSLINE* BuildFromOutlineList(EDGEPT* outline);
00119   // Copies the data and the outline, but leaves next untouched.
00120   void CopyFrom(const TESSLINE& src);
00121   // Deletes owned data.
00122   void Clear();
00123   // Normalize in-place using the DENORM.
00124   void Normalize(const DENORM& denorm);
00125   // Rotates by the given rotation in place.
00126   void Rotate(const FCOORD rotation);
00127   // Moves by the given vec in place.
00128   void Move(const ICOORD vec);
00129   // Scales by the given factor in place.
00130   void Scale(float factor);
00131   // Sets up the start and vec members of the loop from the pos members.
00132   void SetupFromPos();
00133   // Recomputes the bounding box from the points in the loop.
00134   void ComputeBoundingBox();
00135   // Computes the min and max cross product of the outline points with the
00136   // given vec and returns the results in min_xp and max_xp. Geometrically
00137   // this is the left and right edge of the outline perpendicular to the
00138   // given direction, but to get the distance units correct, you would
00139   // have to divide by the modulus of vec.
00140   void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const;
00141 
00142   TBOX bounding_box() const;
00143   // Returns true if the point is contained within the outline box.
00144   bool Contains(const TPOINT& pt) {
00145     return topleft.x <= pt.x && pt.x <= botright.x &&
00146            botright.y <= pt.y && pt.y <= topleft.y;
00147   }
00148 
00149   void plot(ScrollView* window, ScrollView::Color color,
00150             ScrollView::Color child_color);
00151 
00152   int BBArea() const {
00153     return (botright.x - topleft.x) * (topleft.y - botright.y);
00154   }
00155 
00156   TPOINT topleft;                // Top left of loop.
00157   TPOINT botright;               // Bottom right of loop.
00158   TPOINT start;                  // Start of loop.
00159   bool is_hole;                  // True if this is a hole/child outline.
00160   EDGEPT *loop;                  // Edgeloop.
00161   TESSLINE *next;                // Next outline in blob.
00162 };                               // Outline structure.
00163 
00164 struct TBLOB {
00165   TBLOB() : outlines(NULL), next(NULL) {}
00166   TBLOB(const TBLOB& src) : outlines(NULL), next(NULL) {
00167     CopyFrom(src);
00168   }
00169   ~TBLOB() {
00170     Clear();
00171   }
00172   TBLOB& operator=(const TBLOB& src) {
00173     CopyFrom(src);
00174     return *this;
00175   }
00176   // Factory to build a TBLOB from a C_BLOB with polygonal
00177   // approximation along the way.
00178   static TBLOB* PolygonalCopy(C_BLOB* src);
00179   // Copies the data and the outlines, but leaves next untouched.
00180   void CopyFrom(const TBLOB& src);
00181   // Deletes owned data.
00182   void Clear();
00183   // Normalize in-place using the DENORM.
00184   void Normalize(const DENORM& denorm);
00185   // Rotates by the given rotation in place.
00186   void Rotate(const FCOORD rotation);
00187   // Moves by the given vec in place.
00188   void Move(const ICOORD vec);
00189   // Scales by the given factor in place.
00190   void Scale(float factor);
00191   // Recomputes the bounding boxes of the outlines.
00192   void ComputeBoundingBoxes();
00193 
00194   // Returns the number of outlines.
00195   int NumOutlines() const;
00196 
00197   TBOX bounding_box() const;
00198 
00199   void plot(ScrollView* window, ScrollView::Color color,
00200             ScrollView::Color child_color);
00201 
00202   int BBArea() const {
00203     int total_area = 0;
00204     for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next)
00205       total_area += outline->BBArea();
00206     return total_area;
00207   }
00208 
00209   TESSLINE *outlines;            // List of outlines in blob.
00210   TBLOB *next;                   // Next blob in block.
00211 };                               // Blob structure.
00212 
00213 int count_blobs(TBLOB *blobs);
00214 
00215 struct TWERD {
00216   TWERD() : blobs(NULL), latin_script(false), next(NULL) {}
00217   TWERD(const TWERD& src) : blobs(NULL), next(NULL) {
00218     CopyFrom(src);
00219   }
00220   ~TWERD() {
00221     Clear();
00222   }
00223   TWERD& operator=(const TWERD& src) {
00224     CopyFrom(src);
00225     return *this;
00226   }
00227   // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
00228   // approximation along the way.
00229   static TWERD* PolygonalCopy(WERD* src);
00230   // Setup for Baseline normalization, recording the normalization in the
00231   // DENORM, but doesn't do any normalization.
00232   void SetupBLNormalize(const BLOCK* block, const ROW* row,
00233                         float x_height, bool numeric_mode,
00234                         DENORM* denorm) const;
00235   // Normalize in-place using the DENORM.
00236   void Normalize(const DENORM& denorm);
00237   // Copies the data and the blobs, but leaves next untouched.
00238   void CopyFrom(const TWERD& src);
00239   // Deletes owned data.
00240   void Clear();
00241   // Recomputes the bounding boxes of the blobs.
00242   void ComputeBoundingBoxes();
00243 
00244   // Returns the number of blobs in the word.
00245   int NumBlobs() const {
00246     return count_blobs(blobs);
00247   }
00248   TBOX bounding_box() const;
00249 
00250   // Merges the blobs from start to end, not including end, and deletes
00251   // the blobs between start and end.
00252   void MergeBlobs(int start, int end);
00253 
00254   void plot(ScrollView* window);
00255 
00256   TBLOB* blobs;                  // blobs in word.
00257   bool latin_script;             // This word is in a latin-based script.
00258   TWERD* next;                   // next word.
00259 };
00260 
00261 /*----------------------------------------------------------------------
00262               M a c r o s
00263 ----------------------------------------------------------------------*/
00264 /**********************************************************************
00265  * free_widths
00266  *
00267  * Free the memory taken up by a width array.
00268  **********************************************************************/
00269 #define free_widths(w)  \
00270 if (w) memfree (w)
00271 
00272 /*----------------------------------------------------------------------
00273               F u n c t i o n s
00274 ----------------------------------------------------------------------*/
00275 // TODO(rays) This will become a member of TBLOB when TBLOB's definition
00276 // moves to blobs.h
00277 TBOX TBLOB_bounding_box(const TBLOB* blob);
00278 
00279 void blob_origin(TBLOB *blob,      /*blob to compute on */
00280                  TPOINT *origin);  /*return value */
00281 
00282                                  /*blob to compute on */
00283 void blob_bounding_box(const TBLOB *blob,
00284                        TPOINT *topleft,  // Bounding box.
00285                        TPOINT *botright);
00286 
00287 void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright); 
00288 
00289 void blobs_origin(TBLOB *blobs,     /*blob to compute on */
00290                   TPOINT *origin);  /*return value */
00291 
00292                                  /*blob to compute on */
00293 WIDTH_RECORD *blobs_widths(TBLOB *blobs); 
00294 
00295 bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location);
00296 
00297 void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
00298                   const TPOINT& location);
00299 
00300 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines