Tesseract 3.01
|
00001 /********************************************************************** 00002 * File: pageres.h (Formerly page_res.h) 00003 * Description: Results classes used by control.c 00004 * Author: Phil Cheatle 00005 * Created: Tue Sep 22 08:42:49 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 #ifndef PAGERES_H 00020 #define PAGERES_H 00021 00022 #include "blobs.h" 00023 #include "boxword.h" 00024 #include "elst.h" 00025 #include "genericvector.h" 00026 #include "normalis.h" 00027 #include "ocrblock.h" 00028 #include "ocrrow.h" 00029 #include "ratngs.h" 00030 #include "rejctmap.h" 00031 #include "seam.h" 00032 #include "werd.h" 00033 00034 /* Forward declarations */ 00035 00036 class BLOCK_RES; 00037 00038 ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES) 00039 class 00040 ROW_RES; 00041 00042 ELISTIZEH (ROW_RES) 00043 class WERD_RES; 00044 00045 ELISTIZEH (WERD_RES) 00046 00047 /************************************************************************* 00048 * PAGE_RES - Page results 00049 *************************************************************************/ 00050 class PAGE_RES { // page result 00051 public: 00052 inT32 char_count; 00053 inT32 rej_count; 00054 BLOCK_RES_LIST block_res_list; 00055 BOOL8 rejected; 00056 // Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to 00057 // the next word. This pointer is not owned by PAGE_RES class. 00058 WERD_CHOICE **prev_word_best_choice; 00059 00060 PAGE_RES() { 00061 } // empty constructor 00062 00063 PAGE_RES(BLOCK_LIST *block_list, // real blocks 00064 WERD_CHOICE **prev_word_best_choice_ptr); 00065 00066 ~PAGE_RES () { // destructor 00067 } 00068 }; 00069 00070 /************************************************************************* 00071 * BLOCK_RES - Block results 00072 *************************************************************************/ 00073 00074 class BLOCK_RES:public ELIST_LINK { 00075 public: 00076 BLOCK * block; // real block 00077 inT32 char_count; // chars in block 00078 inT32 rej_count; // rejected chars 00079 inT16 font_class; // 00080 inT16 row_count; 00081 float x_height; 00082 BOOL8 font_assigned; // block already 00083 // processed 00084 BOOL8 bold; // all bold 00085 BOOL8 italic; // all italic 00086 00087 ROW_RES_LIST row_res_list; 00088 00089 BLOCK_RES() { 00090 } // empty constructor 00091 00092 BLOCK_RES(BLOCK *the_block); // real block 00093 00094 ~BLOCK_RES () { // destructor 00095 } 00096 }; 00097 00098 /************************************************************************* 00099 * ROW_RES - Row results 00100 *************************************************************************/ 00101 00102 class ROW_RES:public ELIST_LINK { 00103 public: 00104 ROW * row; // real row 00105 inT32 char_count; // chars in block 00106 inT32 rej_count; // rejected chars 00107 inT32 whole_word_rej_count; // rejs in total rej wds 00108 WERD_RES_LIST word_res_list; 00109 00110 ROW_RES() { 00111 } // empty constructor 00112 00113 ROW_RES(bool right_to_left, ROW *the_row); // real row 00114 00115 ~ROW_RES() { // destructor 00116 } 00117 }; 00118 00119 /************************************************************************* 00120 * WERD_RES - Word results 00121 *************************************************************************/ 00122 enum CRUNCH_MODE 00123 { 00124 CR_NONE, 00125 CR_KEEP_SPACE, 00126 CR_LOOSE_SPACE, 00127 CR_DELETE 00128 }; 00129 00130 // WERD_RES is a collection of publicly accessible members that gathers 00131 // information about a word result. 00132 class WERD_RES : public ELIST_LINK { 00133 public: 00134 // Which word is which? 00135 // There are 3 coordinate spaces in use here: a possibly rotated pixel space, 00136 // the original image coordinate space, and the BLN space in which the 00137 // baseline of a word is at kBlnBaselineOffset, the xheight is kBlnXHeight, 00138 // and the x-middle of the word is at 0. 00139 // In the rotated pixel space, coordinates correspond to the input image, 00140 // but may be rotated about the origin by a multiple of 90 degrees, 00141 // and may therefore be negative. 00142 // In any case a rotation by denorm.block()->re_rotation() will take them 00143 // back to the original image. 00144 // The other differences between words all represent different stages of 00145 // processing. 00146 // 00147 // The word is the input C_BLOBs in the rotated pixel space. 00148 // word is NOT owned by the WERD_RES unless combination is true. 00149 // All the other word pointers ARE owned by the WERD_RES. 00150 WERD* word; // Input C_BLOB word. 00151 // The bln_boxes contains the bounding boxes (only) of the input word, in the 00152 // BLN space. The lengths of word and bln_boxes 00153 // match as they are both before any chopping. 00154 // TODO(rays) determine if docqual does anything useful and delete bln_boxes 00155 // if it doesn't. 00156 tesseract::BoxWord* bln_boxes; // BLN input bounding boxes. 00157 // The chopped_word is also in BLN space, and represents the fully chopped 00158 // character fragments that make up the word. 00159 // The length of chopped_word matches length of seam_array + 1 (if set). 00160 TWERD* chopped_word; // BLN chopped fragments output. 00161 SEAMS seam_array; // Seams matching chopped_word. 00162 // The rebuild_word is also in BLN space, but represents the final best 00163 // segmentation of the word. Its length is therefore the same as box_word. 00164 TWERD* rebuild_word; // BLN best segmented word. 00165 // The denorm provides the transformation to get back to the rotated image 00166 // coords from the chopped_word/rebuild_word BLN coords. 00167 DENORM denorm; // For use on chopped_word. 00168 // The box_word is in the original image coordinate space. It is the 00169 // bounding boxes of the rebuild_word, after denormalization. 00170 // The length of box_word matches rebuild_word, best_state (if set) and 00171 // correct_text (if set), as well as best_choice and represents the 00172 // number of classified units in the output. 00173 tesseract::BoxWord* box_word; // Denormalized output boxes. 00174 // The best_state stores the relationship between chopped_word and 00175 // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] 00176 // adjacent blobs in chopped_word. The seams in seam_array are hidden 00177 // within a rebuild_word blob and revealed between them. 00178 GenericVector<int> best_state; // Number of blobs in each best blob. 00179 // The correct_text is used during training and adaption to carry the 00180 // text to the training system without the need for a unicharset. There 00181 // is one entry in the vector for each blob in rebuild_word and box_word. 00182 GenericVector<STRING> correct_text; 00183 // The truth_* fields below are used by the blamer to determine the source 00184 // of errors. 00185 // The truth_word (in the original image coordinate space) contains ground 00186 // truth bounding boxes for this WERD_RES. 00187 tesseract::BoxWord* truth_word; 00188 // The truth_text contains ground truth unichar for each 00189 // of the bounding boxes in truth_word. 00190 GenericVector<STRING> truth_text; 00191 WERD_CHOICE *best_choice; // tess output 00192 WERD_CHOICE *raw_choice; // top choice permuter 00193 WERD_CHOICE *ep_choice; // ep text TODO(rays) delete this. 00194 REJMAP reject_map; // best_choice rejects 00195 BOOL8 tess_failed; 00196 /* 00197 If tess_failed is TRUE, one of the following tests failed when Tess 00198 returned: 00199 - The outword blob list was not the same length as the best_choice string; 00200 - The best_choice string contained ALL blanks; 00201 - The best_choice string was zero length 00202 */ 00203 BOOL8 tess_accepted; // Tess thinks its ok? 00204 BOOL8 tess_would_adapt; // Tess would adapt? 00205 BOOL8 done; // ready for output? 00206 bool small_caps; // word appears to be small caps 00207 inT8 italic; 00208 inT8 bold; 00209 inT16 fontinfo_id; // primary font id (should be at least inT16) 00210 inT8 fontinfo_id_count; // number of votes 00211 inT16 fontinfo_id2; // secondary font id (should be at least inT16) 00212 inT8 fontinfo_id2_count; // number of votes 00213 CRUNCH_MODE unlv_crunch_mode; 00214 float x_height; // post match estimate 00215 float caps_height; // post match estimate 00216 BOOL8 guessed_x_ht; 00217 BOOL8 guessed_caps_ht; 00218 /* 00219 To deal with fuzzy spaces we need to be able to combine "words" to form 00220 combinations when we suspect that the gap is a non-space. The (new) text 00221 ord code generates separate words for EVERY fuzzy gap - flags in the word 00222 indicate whether the gap is below the threshold (fuzzy kern) and is thus 00223 NOT a real word break by default, or above the threshold (fuzzy space) and 00224 this is a real word break by default. 00225 00226 The WERD_RES list contains all these words PLUS "combination" words built 00227 out of (copies of) the words split by fuzzy kerns. The separate parts have 00228 their "part_of_combo" flag set true and should be IGNORED on a default 00229 reading of the list. 00230 00231 Combination words are FOLLOWED by the sequence of part_of_combo words 00232 which they combine. 00233 */ 00234 BOOL8 combination; //of two fuzzy gap wds 00235 BOOL8 part_of_combo; //part of a combo 00236 BOOL8 reject_spaces; //Reject spacing? 00237 // FontInfo ids for each unichar in best_choice. 00238 GenericVector<inT8> best_choice_fontinfo_ids; 00239 00240 WERD_RES() { 00241 InitPointers(); 00242 } 00243 WERD_RES( //simple constructor 00244 WERD *the_word) { //real word 00245 InitPointers(); 00246 word = the_word; 00247 tess_failed = FALSE; 00248 tess_accepted = FALSE; 00249 tess_would_adapt = FALSE; 00250 done = FALSE; 00251 unlv_crunch_mode = CR_NONE; 00252 small_caps = false; 00253 italic = FALSE; 00254 bold = FALSE; 00255 fontinfo_id = -1; 00256 fontinfo_id_count = 0; 00257 fontinfo_id2 = -1; 00258 fontinfo_id2_count = 0; 00259 x_height = 0.0; 00260 caps_height = 0.0; 00261 guessed_x_ht = TRUE; 00262 guessed_caps_ht = TRUE; 00263 combination = FALSE; 00264 part_of_combo = FALSE; 00265 reject_spaces = FALSE; 00266 } 00267 WERD_RES(const WERD_RES &source) { 00268 InitPointers(); 00269 *this = source; // see operator= 00270 } 00271 00272 ~WERD_RES(); 00273 void InitPointers(); 00274 void Clear(); 00275 void ClearResults(); 00276 00277 WERD_RES& operator=(const WERD_RES& source); //from this 00278 00279 void CopySimpleFields(const WERD_RES& source); 00280 00281 // Sets up the members used in recognition: 00282 // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. 00283 // Returns false if the word is empty and sets up fake results. 00284 bool SetupForRecognition(const UNICHARSET& unicharset, 00285 bool numeric_mode, ROW *row, BLOCK* block); 00286 00287 // Builds the rebuild_word from the chopped_word and the best_state. 00288 void RebuildBestState(); 00289 00290 // Copies the chopped_word to the rebuild_word, faking a best_state as well. 00291 // Also sets up the output box_word. 00292 void CloneChoppedToRebuild(); 00293 00294 // Sets/replaces the box_word with one made from the rebuild_word. 00295 void SetupBoxWord(); 00296 00297 // Sets up the script positions in the output boxword using the best_choice 00298 // to get the unichars, and the unicharset to get the target positions. 00299 void SetScriptPositions(const UNICHARSET& unicharset); 00300 00301 // Classifies the word with some already-calculated BLOB_CHOICEs. 00302 // The choices are an array of blob_count pointers to BLOB_CHOICE, 00303 // providing a single classifier result for each blob. 00304 // The BLOB_CHOICEs are consumed and the word takes ownership. 00305 // The number of blobs in the outword must match blob_count. 00306 void FakeClassifyWord(const UNICHARSET& unicharset, int blob_count, 00307 BLOB_CHOICE** choices); 00308 00309 // Copies the best_choice strings to the correct_text for adaption/training. 00310 void BestChoiceToCorrectText(const UNICHARSET& unicharset); 00311 00312 // Merges 2 adjacent blobs in the result if the permanent callback 00313 // class_cb returns other than INVALID_UNICHAR_ID, AND the permanent 00314 // callback box_cb is NULL or returns true, setting the merged blob 00315 // result to the class returned from class_cb. 00316 // Returns true if anything was merged. 00317 bool ConditionalBlobMerge( 00318 const UNICHARSET& unicharset, 00319 TessResultCallback2<UNICHAR_ID, UNICHAR_ID, UNICHAR_ID>* class_cb, 00320 TessResultCallback2<bool, const TBOX&, const TBOX&>* box_cb, 00321 BLOB_CHOICE_LIST_CLIST *blob_choices); 00322 00323 static WERD_RES* deep_copy(const WERD_RES* src) { 00324 return new WERD_RES(*src); 00325 } 00326 00327 // Copy blobs from word_res onto this word (eliminating spaces between). 00328 // Since this may be called bidirectionally OR both the BOL and EOL flags. 00329 void copy_on(WERD_RES *word_res) { //from this word 00330 word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL)); 00331 word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL)); 00332 word->copy_on(word_res->word); 00333 } 00334 }; 00335 00336 /************************************************************************* 00337 * PAGE_RES_IT - Page results iterator 00338 *************************************************************************/ 00339 00340 class PAGE_RES_IT { 00341 public: 00342 PAGE_RES * page_res; // page being iterated 00343 00344 PAGE_RES_IT() { 00345 } // empty contructor 00346 00347 PAGE_RES_IT(PAGE_RES *the_page_res) { // page result 00348 page_res = the_page_res; 00349 restart_page(); // ready to scan 00350 } 00351 00352 WERD_RES *restart_page() { 00353 return start_page(false); // Skip empty blocks. 00354 } 00355 WERD_RES *restart_page_with_empties() { 00356 return start_page(true); // Allow empty blocks. 00357 } 00358 WERD_RES *start_page(bool empty_ok); 00359 00360 // ============ Methods that mutate the underling structures =========== 00361 // Note that these methods will potentially invalidate other PAGE_RES_ITs 00362 // and are intended to be used only while a single PAGE_RES_IT is active. 00363 // This problem needs to be taken into account if these mutation operators 00364 // are ever provided to PageIterator or its subclasses. 00365 00366 // Inserts the new_word and a corresponding WERD_RES before the current 00367 // position. The simple fields of the WERD_RES are copied from clone_res and 00368 // the resulting WERD_RES is returned for further setup with best_choice etc. 00369 WERD_RES* InsertCloneWord(const WERD_RES& clone_res, WERD* new_word); 00370 00371 // Deletes the current WERD_RES and its underlying WERD. 00372 void DeleteCurrentWord(); 00373 00374 WERD_RES *forward() { // Get next word. 00375 return internal_forward(false, false); 00376 } 00377 // Move forward, but allow empty blocks to show as single NULL words. 00378 WERD_RES *forward_with_empties() { 00379 return internal_forward(false, true); 00380 } 00381 00382 WERD_RES *forward_block(); // get first word in 00383 // next non-empty block 00384 WERD_RES *prev_word() const { // previous word 00385 return prev_word_res; 00386 } 00387 ROW_RES *prev_row() const { // row of prev word 00388 return prev_row_res; 00389 } 00390 BLOCK_RES *prev_block() const { // block of prev word 00391 return prev_block_res; 00392 } 00393 WERD_RES *word() const { // current word 00394 return word_res; 00395 } 00396 ROW_RES *row() const { // row of current word 00397 return row_res; 00398 } 00399 BLOCK_RES *block() const { // block of cur. word 00400 return block_res; 00401 } 00402 WERD_RES *next_word() const { // next word 00403 return next_word_res; 00404 } 00405 ROW_RES *next_row() const { // row of next word 00406 return next_row_res; 00407 } 00408 BLOCK_RES *next_block() const { // block of next word 00409 return next_block_res; 00410 } 00411 void rej_stat_word(); // for page/block/row 00412 00413 private: 00414 void ResetWordIterator(); 00415 WERD_RES *internal_forward(bool new_block, bool empty_ok); 00416 00417 WERD_RES * prev_word_res; // previous word 00418 ROW_RES *prev_row_res; // row of prev word 00419 BLOCK_RES *prev_block_res; // block of prev word 00420 00421 WERD_RES *word_res; // current word 00422 ROW_RES *row_res; // row of current word 00423 BLOCK_RES *block_res; // block of cur. word 00424 00425 WERD_RES *next_word_res; // next word 00426 ROW_RES *next_row_res; // row of next word 00427 BLOCK_RES *next_block_res; // block of next word 00428 00429 BLOCK_RES_IT block_res_it; // iterators 00430 ROW_RES_IT row_res_it; 00431 WERD_RES_IT word_res_it; 00432 }; 00433 #endif