00001 /********************************************************************** 00002 * File: pageres.h (Formerly page_res.h) 00003 * Description: Results classes used by control.c 00004 * Author: Phil Cheatle 00005 * Created: Tue Sep 22 08:42:49 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 #ifndef PAGERES_H 00020 #define PAGERES_H 00021 00022 #include "elst.h" 00023 #include "ocrblock.h" 00024 #include "ocrrow.h" 00025 #include "werd.h" 00026 #include "ratngs.h" 00027 #include "rejctmap.h" 00028 #include "notdll.h" 00029 #include "notdll.h" 00030 00031 /* Forward declarations */ 00032 00033 class BLOCK_RES; 00034 00035 ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES) 00036 class 00037 ROW_RES; 00038 00039 ELISTIZEH (ROW_RES) 00040 class WERD_RES; 00041 00042 ELISTIZEH (WERD_RES) 00043 /************************************************************************* 00044 * PAGE_RES - Page results 00045 *************************************************************************/ 00046 class PAGE_RES //page result 00047 { 00048 public: 00049 inT32 char_count; 00050 inT32 rej_count; 00051 BLOCK_RES_LIST block_res_list; 00052 BOOL8 rejected; 00053 00054 PAGE_RES() { 00055 } //empty constructor 00056 00057 PAGE_RES( //simple constructor 00058 BLOCK_LIST *block_list); //real blocks 00059 00060 ~PAGE_RES () { //destructor 00061 } 00062 }; 00063 00064 /************************************************************************* 00065 * BLOCK_RES - Block results 00066 *************************************************************************/ 00067 00068 class BLOCK_RES:public ELIST_LINK 00069 //page block result 00070 { 00071 public: 00072 BLOCK * block; //real block 00073 inT32 char_count; //chars in block 00074 inT32 rej_count; //rejected chars 00075 inT16 font_class; // 00076 inT16 row_count; 00077 float x_height; 00078 BOOL8 font_assigned; // block already 00079 // processed 00080 BOOL8 bold; // all bold 00081 BOOL8 italic; // all italic 00082 00083 ROW_RES_LIST row_res_list; 00084 00085 BLOCK_RES() { 00086 } //empty constructor 00087 00088 BLOCK_RES( //simple constructor 00089 BLOCK *the_block); //real block 00090 00091 ~BLOCK_RES () { //destructor 00092 } 00093 }; 00094 00095 /************************************************************************* 00096 * ROW_RES - Row results 00097 *************************************************************************/ 00098 00099 class ROW_RES:public ELIST_LINK //row result 00100 { 00101 public: 00102 ROW * row; //real row 00103 inT32 char_count; //chars in block 00104 inT32 rej_count; //rejected chars 00105 inT32 whole_word_rej_count; //rejs in total rej wds 00106 WERD_RES_LIST word_res_list; 00107 float font_class_score; 00108 inT16 font_class; // 00109 inT32 italic; 00110 inT32 bold; 00111 inT8 font1; //primary font 00112 inT8 font1_count; //no of voters 00113 inT8 font2; //secondary font 00114 inT8 font2_count; //no of voters 00115 00116 ROW_RES() { 00117 } //empty constructor 00118 00119 ROW_RES( //simple constructor 00120 ROW *the_row); //real row 00121 00122 ~ROW_RES () { //destructor 00123 } 00124 }; 00125 00126 /************************************************************************* 00127 * WERD_RES - Word results 00128 *************************************************************************/ 00129 enum CRUNCH_MODE 00130 { 00131 CR_NONE, 00132 CR_KEEP_SPACE, 00133 CR_LOOSE_SPACE, 00134 CR_DELETE 00135 }; 00136 00137 class WERD_RES:public ELIST_LINK //word result 00138 { 00139 public: 00140 WERD * word; //non-bln real word 00141 WERD *outword; //bln best choice 00142 //segmentation 00143 DENORM denorm; //for use on outword 00144 WERD_CHOICE *best_choice; //tess output 00145 WERD_CHOICE *raw_choice; //top choice permuter 00146 WERD_CHOICE *ep_choice; //ep text 00147 REJMAP reject_map; //best_choice rejects 00148 BOOL8 tess_failed; 00149 /* 00150 If tess_failed is TRUE, one of the following tests failed when Tess 00151 returned: 00152 - The outword blob list was not the same length as the best_choice string; 00153 - The best_choice string contained ALL blanks; 00154 - The best_choice string was zero length 00155 */ 00156 BOOL8 tess_accepted; //Tess thinks its ok? 00157 BOOL8 tess_would_adapt; //Tess would adapt? 00158 BOOL8 done; //ready for output? 00159 inT8 italic; 00160 inT8 bold; 00161 inT8 font1; //primary font 00162 inT8 font1_count; //no of voters 00163 inT8 font2; //secondary font 00164 inT8 font2_count; //no of voters 00165 CRUNCH_MODE unlv_crunch_mode; 00166 float x_height; //Post match estimate 00167 float caps_height; //Post match estimate 00168 BOOL8 guessed_x_ht; 00169 BOOL8 guessed_caps_ht; 00170 /* 00171 To deal with fuzzy spaces we need to be able to combine "words" to form 00172 combinations when we suspect that the gap is a non-space. The (new) text 00173 ord code generates separate words for EVERY fuzzy gap - flags in the word 00174 indicate whether the gap is below the threshold (fuzzy kern) and is thus 00175 NOT a real word break by default, or above the threshold (fuzzy space) and 00176 this is a real word break by default. 00177 00178 The WERD_RES list contains all these words PLUS "combination" words built 00179 out of (copies of) the words split by fuzzy kerns. The separate parts have 00180 their "part_of_combo" flag set true and should be IGNORED on a default 00181 reading of the list. 00182 00183 Combination words are FOLLOWED by the sequence of part_of_combo words 00184 which they combine. 00185 */ 00186 BOOL8 combination; //of two fuzzy gap wds 00187 BOOL8 part_of_combo; //part of a combo 00188 BOOL8 reject_spaces; //Reject spacing? 00189 00190 WERD_RES() { 00191 } //empty constructor 00192 00193 WERD_RES( //simple constructor 00194 WERD *the_word) { //real word 00195 word = the_word; 00196 outword = NULL; 00197 best_choice = NULL; 00198 raw_choice = NULL; 00199 ep_choice = NULL; 00200 tess_failed = FALSE; 00201 tess_accepted = FALSE; 00202 tess_would_adapt = FALSE; 00203 done = FALSE; 00204 unlv_crunch_mode = CR_NONE; 00205 italic = FALSE; 00206 bold = FALSE; 00207 font1 = -1; 00208 font1_count = 0; 00209 font2 = -1; 00210 font2_count = 0; 00211 x_height = 0.0; 00212 caps_height = 0.0; 00213 guessed_x_ht = TRUE; 00214 guessed_caps_ht = TRUE; 00215 combination = FALSE; 00216 part_of_combo = FALSE; 00217 reject_spaces = FALSE; 00218 } 00219 WERD_RES(const WERD_RES &source) { 00220 *this = source; //see operator= 00221 } 00222 00223 ~WERD_RES (); //destructor 00224 00225 WERD_RES& operator=(const WERD_RES& source); //from this 00226 00227 static WERD_RES* deep_copy(const WERD_RES* src) { 00228 return new WERD_RES(*src); 00229 } 00230 00231 void copy_on( //copy blobs onto word 00232 WERD_RES *word_res) { //from this word 00233 word->set_flag (W_EOL, word_res->word->flag (W_EOL)); 00234 word->copy_on (word_res->word); 00235 } 00236 }; 00237 00238 /************************************************************************* 00239 * PAGE_RES_IT - Page results iterator 00240 *************************************************************************/ 00241 00242 class PAGE_RES_IT 00243 { 00244 public: 00245 PAGE_RES * page_res; //page being iterated 00246 00247 PAGE_RES_IT() { 00248 } //empty contructor 00249 00250 PAGE_RES_IT( //empty contructor 00251 PAGE_RES *the_page_res) { //page result 00252 page_res = the_page_res; 00253 restart_page(); //ready to scan 00254 } 00255 00256 WERD_RES *restart_page(); //get ready 00257 00258 WERD_RES *internal_forward( //get next word 00259 BOOL8 new_block); 00260 00261 WERD_RES *forward() { //get next word 00262 return internal_forward (FALSE); 00263 } 00264 00265 WERD_RES *forward_block(); //get first word in 00266 //next non-empty block 00267 WERD_RES *prev_word() { //previous word 00268 return prev_word_res; 00269 } 00270 ROW_RES *prev_row() { //row of prev word 00271 return prev_row_res; 00272 } 00273 BLOCK_RES *prev_block() { //block of prev word 00274 return prev_block_res; 00275 } 00276 WERD_RES *word() { //current word 00277 return word_res; 00278 } 00279 ROW_RES *row() { //row of current word 00280 return row_res; 00281 } 00282 BLOCK_RES *block() { //block of cur. word 00283 return block_res; 00284 } 00285 WERD_RES *next_word() { //next word 00286 return next_word_res; 00287 } 00288 ROW_RES *next_row() { //row of next word 00289 return next_row_res; 00290 } 00291 BLOCK_RES *next_block() { //block of next word 00292 return next_block_res; 00293 } 00294 void rej_stat_word(); //for page/block/row 00295 00296 private: 00297 WERD_RES * prev_word_res; //previous word 00298 ROW_RES *prev_row_res; //row of prev word 00299 BLOCK_RES *prev_block_res; //block of prev word 00300 00301 WERD_RES *word_res; //current word 00302 ROW_RES *row_res; //row of current word 00303 BLOCK_RES *block_res; //block of cur. word 00304 00305 WERD_RES *next_word_res; //next word 00306 ROW_RES *next_row_res; //row of next word 00307 BLOCK_RES *next_block_res; //block of next word 00308 00309 BLOCK_RES_IT block_res_it; //iterators 00310 ROW_RES_IT row_res_it; 00311 WERD_RES_IT word_res_it; 00312 }; 00313 #endif