Tesseract 3.01
|
00001 /********************************************************************** 00002 * File: normalis.h (Formerly denorm.h) 00003 * Description: Code for the DENORM class. 00004 * Author: Ray Smith 00005 * Created: Thu Apr 23 09:22:43 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef NORMALIS_H 00021 #define NORMALIS_H 00022 00023 #include <stdio.h> 00024 #include "host.h" 00025 00026 const int kBlnCellHeight = 256; // Full-height for baseline normalization. 00027 const int kBlnXHeight = 128; // x-height for baseline normalization. 00028 const int kBlnBaselineOffset = 64; // offset for baseline normalization. 00029 00030 struct Pix; 00031 class ROW; // Forward decl 00032 class BLOCK; 00033 class FCOORD; 00034 struct TBLOB; 00035 class TBOX; 00036 struct TPOINT; 00037 00038 class DENORM_SEG { 00039 public: 00040 DENORM_SEG() {} 00041 00042 inT32 xstart; // start of segment 00043 inT32 ycoord; // y at segment 00044 float scale_factor; // normalized_x/scale_factor + x_center == original_x 00045 }; 00046 00047 class DENORM { 00048 public: 00049 DENORM(); 00050 DENORM(float x, float scaling, ROW *src); 00051 DENORM(float x, // from same pieces 00052 float scaling, 00053 double line_m, // default line: y = mx + c 00054 double line_c, 00055 inT16 seg_count, // no of segments 00056 DENORM_SEG *seg_pts, // actual segments 00057 BOOL8 using_row, // as baseline 00058 ROW *src); 00059 // Copying a DENORM is allowed. 00060 DENORM(const DENORM &); 00061 DENORM& operator=(const DENORM&); 00062 ~DENORM(); 00063 00064 // Setup for a baseline normalization. If there are segs, then they 00065 // are used, otherwise, if there is a row, that is used, otherwise the 00066 // bottom of the word_box is used for the baseline. 00067 void SetupBLNormalize(const BLOCK* block, const ROW* row, float x_height, 00068 const TBOX& word_box, 00069 int num_segs, const DENORM_SEG* segs); 00070 00071 // Setup the normalization transformation parameters. 00072 // The normalizations applied to a blob are as follows: 00073 // 1. An optional block layout rotation that was applied during layout 00074 // analysis to make the textlines horizontal. 00075 // 2. A normalization transformation (LocalNormTransform): 00076 // Subtract the "origin" 00077 // Apply an x,y scaling. 00078 // Apply an optional rotation. 00079 // Add back a final translation. 00080 // The origin is in the block-rotated space, and is usually something like 00081 // the x-middle of the word at the baseline. 00082 // 3. Zero or more further normalization transformations that are applied 00083 // in sequence, with a similar pattern to the first normalization transform. 00084 // 00085 // A DENORM holds the parameters of a single normalization, and can execute 00086 // both the LocalNormTransform (a forwards normalization), and the 00087 // LocalDenormTransform which is an inverse transform or de-normalization. 00088 // A DENORM may point to a predecessor DENORM, which is actually the earlier 00089 // normalization, so the full normalization sequence involves executing all 00090 // predecessors first and then the transform in "this". 00091 // Let x be image co-ordinates and that we have normalization classes A, B, C 00092 // where we first apply A then B then C to get normalized x': 00093 // x' = CBAx 00094 // Then the backwards (to original coordinates) would be: 00095 // x = A^-1 B^-1 C^-1 x' 00096 // and A = B->predecessor_ and B = C->predecessor_ 00097 // NormTransform executes all predecessors recursively, and then this. 00098 // NormTransform would be used to transform an image-based feature to 00099 // normalized space for use in a classifier 00100 // DenormTransform inverts this and then all predecessors. It can be 00101 // used to get back to the original image coordinates from normalized space. 00102 // The LocalNormTransform member executes just the transformation 00103 // in "this" without the layout rotation or any predecessors. It would be 00104 // used to run each successive normalization, eg the word normalization, 00105 // and later the character normalization. 00106 00107 // Arguments: 00108 // block: if not NULL, then this is the first transformation, and 00109 // block->re_rotation() needs to be used after the Denorm 00110 // transformation to get back to the image coords. 00111 // row: if not NULL, then row->baseline(x) is added to the y_origin, unless 00112 // segs is not NULL and num_segs > 0, in which case they are used. 00113 // rotation: if not NULL, apply this rotation after translation to the 00114 // origin and scaling. (Usually a classify rotation.) 00115 // predecessor: if not NULL, then predecessor has been applied to the 00116 // input space and needs to be undone to complete the inverse. 00117 // segs: if not NULL and num_segs > 0, then the segs provide the y_origin 00118 // and the y_scale at a given source x. 00119 // num_segs: the number of segs. 00120 // The above pointers are not owned by this DENORM and are assumed to live 00121 // longer than this denorm, except rotation and segs, which are deep 00122 // copied on input. 00123 // 00124 // x_origin: The x origin which will be mapped to final_xshift in the result. 00125 // y_origin: The y origin which will be mapped to final_yshift in the result. 00126 // Added to result of row->baseline(x) if not NULL. 00127 // 00128 // x_scale: scale factor for the x-coordinate. 00129 // y_scale: scale factor for the y-coordinate. Ignored if segs is given. 00130 // Note that these scale factors apply to the same x and y system as the 00131 // x-origin and y-origin apply, ie after any block rotation, but before 00132 // the rotation argument is applied. 00133 // 00134 // final_xshift: The x component of the final translation. 00135 // final_yshift: The y component of the final translation. 00136 // 00137 // In theory, any of the commonly used normalizations can be setup here: 00138 // * Traditional baseline normalization on a word: 00139 // SetupNormalization(block, row, NULL, NULL, NULL, 0, 00140 // box.x_middle(), 0.0f, 00141 // kBlnXHeight / x_height, kBlnXHeight / x_height, 00142 // 0, kBlnBaselineOffset); 00143 // * Numeric mode baseline normalization on a word: 00144 // SetupNormalization(block, NULL, NULL, NULL, segs, num_segs, 00145 // box.x_middle(), 0.0f, 00146 // kBlnXHeight / x_height, kBlnXHeight / x_height, 00147 // 0, kBlnBaselineOffset); 00148 // * Anisotropic character normalization used by IntFx. 00149 // SetupNormalization(NULL, NULL, NULL, denorm, NULL, 0, 00150 // centroid_x, centroid_y, 00151 // 51.2 / ry, 51.2 / rx, 128, 128); 00152 // * Normalize blob height to x-height (current OSD): 00153 // SetupNormalization(NULL, NULL, &rotation, NULL, NULL, 0, 00154 // box.rotational_x_middle(rotation), 00155 // box.rotational_y_middle(rotation), 00156 // kBlnXHeight / box.rotational_height(rotation), 00157 // kBlnXHeight / box.rotational_height(rotation), 00158 // 0, kBlnBaselineOffset); 00159 // * Secondary normalization for classification rotation (current): 00160 // FCOORD rotation = block->classify_rotation(); 00161 // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; 00162 // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0, 00163 // box.rotational_x_middle(rotation), 00164 // box.rotational_y_middle(rotation), 00165 // target_height / box.rotational_height(rotation), 00166 // target_height / box.rotational_height(rotation), 00167 // 0, kBlnBaselineOffset); 00168 // * Proposed new normalizations for CJK: Between them there is then 00169 // no need for further normalization at all, and the character fills the cell. 00170 // ** Replacement for baseline normalization on a word: 00171 // Scales height and width independently so that modal height and pitch 00172 // fill the cell respectively. 00173 // float cap_height = x_height / CCStruct::kXHeightCapRatio; 00174 // SetupNormalization(block, row, NULL, NULL, NULL, 0, 00175 // box.x_middle(), cap_height / 2.0f, 00176 // kBlnCellHeight / fixed_pitch, 00177 // kBlnCellHeight / cap_height, 00178 // 0, 0); 00179 // ** Secondary normalization for classification (with rotation) (proposed): 00180 // Requires a simple translation to the center of the appropriate character 00181 // cell, no further scaling and a simple rotation (or nothing) about the 00182 // cell center. 00183 // FCOORD rotation = block->classify_rotation(); 00184 // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0, 00185 // fixed_pitch_cell_center, 00186 // 0.0f, 00187 // 1.0f, 00188 // 1.0f, 00189 // 0, 0); 00190 void SetupNormalization(const BLOCK* block, 00191 const ROW* row, 00192 const FCOORD* rotation, 00193 const DENORM* predecessor, 00194 const DENORM_SEG* segs, int num_segs, 00195 float x_origin, float y_origin, 00196 float x_scale, float y_scale, 00197 float final_xshift, float final_yshift); 00198 00199 // Transforms the given coords one step forward to normalized space, without 00200 // using any block rotation or predecessor. 00201 void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const; 00202 void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const; 00203 // Transforms the given coords forward to normalized space using the 00204 // full transformation sequence defined by the block rotation, the 00205 // predecessors, deepest first, and finally this. 00206 void NormTransform(const TPOINT& pt, TPOINT* transformed) const; 00207 void NormTransform(const FCOORD& pt, FCOORD* transformed) const; 00208 // Transforms the given coords one step back to source space, without 00209 // using to any block rotation or predecessor. 00210 void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const; 00211 void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const; 00212 // Transforms the given coords all the way back to source image space using 00213 // the full transformation sequence defined by this and its predecesors 00214 // recursively, shallowest first, and finally any block re_rotation. 00215 void DenormTransform(const TPOINT& pt, TPOINT* original) const; 00216 void DenormTransform(const FCOORD& pt, FCOORD* original) const; 00217 00218 // Normalize a blob using blob transformations. Less accurate, but 00219 // more accurately copies the old way. 00220 void LocalNormBlob(TBLOB* blob) const; 00221 00222 Pix* pix() const { 00223 return pix_; 00224 } 00225 void set_pix(Pix* pix) { 00226 pix_ = pix; 00227 } 00228 bool inverse() const { 00229 return inverse_; 00230 } 00231 void set_inverse(bool value) { 00232 inverse_ = value; 00233 } 00234 const DENORM* RootDenorm() const { 00235 if (predecessor_ != NULL) 00236 return predecessor_->RootDenorm(); 00237 return this; 00238 } 00239 // Accessors - perhaps should not be needed. 00240 float x_scale() const { 00241 return x_scale_; 00242 } 00243 float y_scale() const { 00244 return y_scale_; 00245 } 00246 const ROW *row() const { 00247 return row_; 00248 } 00249 void set_row(ROW* row) { 00250 row_ = row; 00251 } 00252 const BLOCK* block() const { 00253 return block_; 00254 } 00255 void set_block(const BLOCK* block) { 00256 block_ = block; 00257 } 00258 00259 private: 00260 // Free allocated memory and clear pointers. 00261 void Clear(); 00262 // Setup default values. 00263 void Init(); 00264 00265 // Returns the y-origin at the original (un-normalized) x. 00266 float YOriginAtOrigX(float orig_x) const; 00267 00268 // Returns the y-scale at the original (un-normalized) x. 00269 float YScaleAtOrigX(float orig_x) const; 00270 00271 // Deep copy the array of segments for use as a y_origin and y_scale. 00272 void SetSegments(const DENORM_SEG* new_segs, int seg_count); 00273 00274 // Finds the appropriate segment for a given original x-coord 00275 const DENORM_SEG* BinarySearchSegment(float orig_x) const; 00276 00277 // Best available image. 00278 Pix* pix_; 00279 // True if the source image is white-on-black. 00280 bool inverse_; 00281 // Block the word came from. If not null, block->re_rotation() takes the 00282 // "untransformed" coordinates even further back to the original image. 00283 const BLOCK* block_; 00284 // Row the word came from. If not null, row->baseline() is added to y_origin_. 00285 const ROW* row_; 00286 // Rotation to apply between translation to the origin and scaling. 00287 const FCOORD* rotation_; 00288 // Previous transformation in a chain. 00289 const DENORM* predecessor_; 00290 // Array of segments used to specify local y_origin_ and y_scale_. 00291 // Owned by the DENORM. 00292 DENORM_SEG *segs_; 00293 // Size of the segs_ array. 00294 int num_segs_; 00295 // x-coordinate to be mapped to final_xshift_ in the result. 00296 float x_origin_; 00297 // y-coordinate to be mapped to final_yshift_ in the result. 00298 float y_origin_; 00299 // Scale factors for x and y coords. Applied to pre-rotation system. 00300 float x_scale_; 00301 float y_scale_; 00302 // Destination coords of the x_origin_ and y_origin_. 00303 float final_xshift_; 00304 float final_yshift_; 00305 }; 00306 #endif