Tesseract 3.01
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: blobs.h (Formerly blobs.h) 00005 * Description: Blob definition 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 27 15:39:52 1989 00008 * Modified: Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Experimental (Do Not Distribute) 00012 * 00013 * (c) Copyright 1989, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 00026 #ifndef BLOBS_H 00027 #define BLOBS_H 00028 00029 /*---------------------------------------------------------------------- 00030 I n c l u d e s 00031 ----------------------------------------------------------------------*/ 00032 #include "rect.h" 00033 #include "vecfuncs.h" 00034 00035 class BLOCK; 00036 class C_BLOB; 00037 class DENORM; 00038 class ROW; 00039 class WERD; 00040 00041 /*---------------------------------------------------------------------- 00042 T y p e s 00043 ----------------------------------------------------------------------*/ 00044 #define EDGEPTFLAGS 4 /*concavity,length etc. */ 00045 00046 typedef struct 00047 { /* Widths of pieces */ 00048 int num_chars; 00049 int widths[1]; 00050 } WIDTH_RECORD; 00051 00052 struct TPOINT { 00053 void operator+=(const TPOINT& other) { 00054 x += other.x; 00055 y += other.y; 00056 } 00057 void operator/=(int divisor) { 00058 x /= divisor; 00059 y /= divisor; 00060 } 00061 00062 inT16 x; // absolute x coord. 00063 inT16 y; // absolute y coord. 00064 }; 00065 typedef TPOINT VECTOR; // structure for coordinates. 00066 00067 struct EDGEPT { 00068 EDGEPT() : next(NULL), prev(NULL) { 00069 memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0])); 00070 } 00071 EDGEPT(const EDGEPT& src) : next(NULL), prev(NULL) { 00072 CopyFrom(src); 00073 } 00074 EDGEPT& operator=(const EDGEPT& src) { 00075 CopyFrom(src); 00076 return *this; 00077 } 00078 // Copies the data elements, but leaves the pointers untouched. 00079 void CopyFrom(const EDGEPT& src) { 00080 pos = src.pos; 00081 vec = src.vec; 00082 memcpy(flags, src.flags, EDGEPTFLAGS * sizeof(flags[0])); 00083 } 00084 // Accessors to hide or reveal a cut edge from feature extractors. 00085 void Hide() { 00086 flags[0] = true; 00087 } 00088 void Reveal() { 00089 flags[0] = false; 00090 } 00091 bool IsHidden() const { 00092 return flags[0] != 0; 00093 } 00094 00095 TPOINT pos; // position 00096 VECTOR vec; // vector to next point 00097 // TODO(rays) Remove flags and replace with 00098 // is_hidden, runlength, dir, and fixed. The only use 00099 // of the flags other than is_hidden is in polyaprx.cpp. 00100 char flags[EDGEPTFLAGS]; // concavity, length etc 00101 EDGEPT* next; // anticlockwise element 00102 EDGEPT* prev; // clockwise element 00103 }; 00104 00105 struct TESSLINE { 00106 TESSLINE() : is_hole(false), loop(NULL), next(NULL) {} 00107 TESSLINE(const TESSLINE& src) : loop(NULL), next(NULL) { 00108 CopyFrom(src); 00109 } 00110 ~TESSLINE() { 00111 Clear(); 00112 } 00113 TESSLINE& operator=(const TESSLINE& src) { 00114 CopyFrom(src); 00115 return *this; 00116 } 00117 // Consume the circular list of EDGEPTs to make a TESSLINE. 00118 static TESSLINE* BuildFromOutlineList(EDGEPT* outline); 00119 // Copies the data and the outline, but leaves next untouched. 00120 void CopyFrom(const TESSLINE& src); 00121 // Deletes owned data. 00122 void Clear(); 00123 // Normalize in-place using the DENORM. 00124 void Normalize(const DENORM& denorm); 00125 // Rotates by the given rotation in place. 00126 void Rotate(const FCOORD rotation); 00127 // Moves by the given vec in place. 00128 void Move(const ICOORD vec); 00129 // Scales by the given factor in place. 00130 void Scale(float factor); 00131 // Sets up the start and vec members of the loop from the pos members. 00132 void SetupFromPos(); 00133 // Recomputes the bounding box from the points in the loop. 00134 void ComputeBoundingBox(); 00135 // Computes the min and max cross product of the outline points with the 00136 // given vec and returns the results in min_xp and max_xp. Geometrically 00137 // this is the left and right edge of the outline perpendicular to the 00138 // given direction, but to get the distance units correct, you would 00139 // have to divide by the modulus of vec. 00140 void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const; 00141 00142 TBOX bounding_box() const; 00143 // Returns true if the point is contained within the outline box. 00144 bool Contains(const TPOINT& pt) { 00145 return topleft.x <= pt.x && pt.x <= botright.x && 00146 botright.y <= pt.y && pt.y <= topleft.y; 00147 } 00148 00149 void plot(ScrollView* window, ScrollView::Color color, 00150 ScrollView::Color child_color); 00151 00152 int BBArea() const { 00153 return (botright.x - topleft.x) * (topleft.y - botright.y); 00154 } 00155 00156 TPOINT topleft; // Top left of loop. 00157 TPOINT botright; // Bottom right of loop. 00158 TPOINT start; // Start of loop. 00159 bool is_hole; // True if this is a hole/child outline. 00160 EDGEPT *loop; // Edgeloop. 00161 TESSLINE *next; // Next outline in blob. 00162 }; // Outline structure. 00163 00164 struct TBLOB { 00165 TBLOB() : outlines(NULL), next(NULL) {} 00166 TBLOB(const TBLOB& src) : outlines(NULL), next(NULL) { 00167 CopyFrom(src); 00168 } 00169 ~TBLOB() { 00170 Clear(); 00171 } 00172 TBLOB& operator=(const TBLOB& src) { 00173 CopyFrom(src); 00174 return *this; 00175 } 00176 // Factory to build a TBLOB from a C_BLOB with polygonal 00177 // approximation along the way. 00178 static TBLOB* PolygonalCopy(C_BLOB* src); 00179 // Copies the data and the outlines, but leaves next untouched. 00180 void CopyFrom(const TBLOB& src); 00181 // Deletes owned data. 00182 void Clear(); 00183 // Normalize in-place using the DENORM. 00184 void Normalize(const DENORM& denorm); 00185 // Rotates by the given rotation in place. 00186 void Rotate(const FCOORD rotation); 00187 // Moves by the given vec in place. 00188 void Move(const ICOORD vec); 00189 // Scales by the given factor in place. 00190 void Scale(float factor); 00191 // Recomputes the bounding boxes of the outlines. 00192 void ComputeBoundingBoxes(); 00193 00194 // Returns the number of outlines. 00195 int NumOutlines() const; 00196 00197 TBOX bounding_box() const; 00198 00199 void plot(ScrollView* window, ScrollView::Color color, 00200 ScrollView::Color child_color); 00201 00202 int BBArea() const { 00203 int total_area = 0; 00204 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) 00205 total_area += outline->BBArea(); 00206 return total_area; 00207 } 00208 00209 TESSLINE *outlines; // List of outlines in blob. 00210 TBLOB *next; // Next blob in block. 00211 }; // Blob structure. 00212 00213 int count_blobs(TBLOB *blobs); 00214 00215 struct TWERD { 00216 TWERD() : blobs(NULL), latin_script(false), next(NULL) {} 00217 TWERD(const TWERD& src) : blobs(NULL), next(NULL) { 00218 CopyFrom(src); 00219 } 00220 ~TWERD() { 00221 Clear(); 00222 } 00223 TWERD& operator=(const TWERD& src) { 00224 CopyFrom(src); 00225 return *this; 00226 } 00227 // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal 00228 // approximation along the way. 00229 static TWERD* PolygonalCopy(WERD* src); 00230 // Setup for Baseline normalization, recording the normalization in the 00231 // DENORM, but doesn't do any normalization. 00232 void SetupBLNormalize(const BLOCK* block, const ROW* row, 00233 float x_height, bool numeric_mode, 00234 DENORM* denorm) const; 00235 // Normalize in-place using the DENORM. 00236 void Normalize(const DENORM& denorm); 00237 // Copies the data and the blobs, but leaves next untouched. 00238 void CopyFrom(const TWERD& src); 00239 // Deletes owned data. 00240 void Clear(); 00241 // Recomputes the bounding boxes of the blobs. 00242 void ComputeBoundingBoxes(); 00243 00244 // Returns the number of blobs in the word. 00245 int NumBlobs() const { 00246 return count_blobs(blobs); 00247 } 00248 TBOX bounding_box() const; 00249 00250 // Merges the blobs from start to end, not including end, and deletes 00251 // the blobs between start and end. 00252 void MergeBlobs(int start, int end); 00253 00254 void plot(ScrollView* window); 00255 00256 TBLOB* blobs; // blobs in word. 00257 bool latin_script; // This word is in a latin-based script. 00258 TWERD* next; // next word. 00259 }; 00260 00261 /*---------------------------------------------------------------------- 00262 M a c r o s 00263 ----------------------------------------------------------------------*/ 00264 /********************************************************************** 00265 * free_widths 00266 * 00267 * Free the memory taken up by a width array. 00268 **********************************************************************/ 00269 #define free_widths(w) \ 00270 if (w) memfree (w) 00271 00272 /*---------------------------------------------------------------------- 00273 F u n c t i o n s 00274 ----------------------------------------------------------------------*/ 00275 // TODO(rays) This will become a member of TBLOB when TBLOB's definition 00276 // moves to blobs.h 00277 TBOX TBLOB_bounding_box(const TBLOB* blob); 00278 00279 void blob_origin(TBLOB *blob, /*blob to compute on */ 00280 TPOINT *origin); /*return value */ 00281 00282 /*blob to compute on */ 00283 void blob_bounding_box(const TBLOB *blob, 00284 TPOINT *topleft, // Bounding box. 00285 TPOINT *botright); 00286 00287 void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright); 00288 00289 void blobs_origin(TBLOB *blobs, /*blob to compute on */ 00290 TPOINT *origin); /*return value */ 00291 00292 /*blob to compute on */ 00293 WIDTH_RECORD *blobs_widths(TBLOB *blobs); 00294 00295 bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location); 00296 00297 void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, 00298 const TPOINT& location); 00299 00300 #endif