Tesseract 3.01
/data/source/tesseract-ocr/ccstruct/pageres.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        pageres.h  (Formerly page_res.h)
00003  * Description: Results classes used by control.c
00004  * Author:              Phil Cheatle
00005  * Created:     Tue Sep 22 08:42:49 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 #ifndef           PAGERES_H
00020 #define           PAGERES_H
00021 
00022 #include "blobs.h"
00023 #include "boxword.h"
00024 #include "elst.h"
00025 #include "genericvector.h"
00026 #include "normalis.h"
00027 #include "ocrblock.h"
00028 #include "ocrrow.h"
00029 #include "ratngs.h"
00030 #include "rejctmap.h"
00031 #include "seam.h"
00032 #include "werd.h"
00033 
00034 /* Forward declarations */
00035 
00036 class BLOCK_RES;
00037 
00038 ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES)
00039 class
00040 ROW_RES;
00041 
00042 ELISTIZEH (ROW_RES)
00043 class WERD_RES;
00044 
00045 ELISTIZEH (WERD_RES)
00046 
00047 /*************************************************************************
00048  * PAGE_RES - Page results
00049  *************************************************************************/
00050 class PAGE_RES {                 // page result
00051  public:
00052   inT32 char_count;
00053   inT32 rej_count;
00054   BLOCK_RES_LIST block_res_list;
00055   BOOL8 rejected;
00056   // Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to
00057   // the next word. This pointer is not owned by PAGE_RES class.
00058   WERD_CHOICE **prev_word_best_choice;
00059 
00060   PAGE_RES() {
00061   }                            // empty constructor
00062 
00063   PAGE_RES(BLOCK_LIST *block_list,   // real blocks
00064            WERD_CHOICE **prev_word_best_choice_ptr);
00065 
00066   ~PAGE_RES () {               // destructor
00067   }
00068 };
00069 
00070 /*************************************************************************
00071  * BLOCK_RES - Block results
00072  *************************************************************************/
00073 
00074 class BLOCK_RES:public ELIST_LINK {
00075  public:
00076   BLOCK * block;               // real block
00077   inT32 char_count;            // chars in block
00078   inT32 rej_count;             // rejected chars
00079   inT16 font_class;            //
00080   inT16 row_count;
00081   float x_height;
00082   BOOL8 font_assigned;         // block already
00083   //      processed
00084   BOOL8 bold;                  // all bold
00085   BOOL8 italic;                // all italic
00086 
00087   ROW_RES_LIST row_res_list;
00088 
00089   BLOCK_RES() {
00090   }                            // empty constructor
00091 
00092   BLOCK_RES(BLOCK *the_block);  // real block
00093 
00094   ~BLOCK_RES () {              // destructor
00095   }
00096 };
00097 
00098 /*************************************************************************
00099  * ROW_RES - Row results
00100  *************************************************************************/
00101 
00102 class ROW_RES:public ELIST_LINK {
00103  public:
00104   ROW * row;                   // real row
00105   inT32 char_count;            // chars in block
00106   inT32 rej_count;             // rejected chars
00107   inT32 whole_word_rej_count;  // rejs in total rej wds
00108   WERD_RES_LIST word_res_list;
00109 
00110   ROW_RES() {
00111   }                            // empty constructor
00112 
00113   ROW_RES(bool right_to_left, ROW *the_row);  // real row
00114 
00115   ~ROW_RES() {                // destructor
00116   }
00117 };
00118 
00119 /*************************************************************************
00120  * WERD_RES - Word results
00121  *************************************************************************/
00122 enum CRUNCH_MODE
00123 {
00124   CR_NONE,
00125   CR_KEEP_SPACE,
00126   CR_LOOSE_SPACE,
00127   CR_DELETE
00128 };
00129 
00130 // WERD_RES is a collection of publicly accessible members that gathers
00131 // information about a word result.
00132 class WERD_RES : public ELIST_LINK {
00133  public:
00134   // Which word is which?
00135   // There are 3 coordinate spaces in use here: a possibly rotated pixel space,
00136   // the original image coordinate space, and the BLN space in which the
00137   // baseline of a word is at kBlnBaselineOffset, the xheight is kBlnXHeight,
00138   // and the x-middle of the word is at 0.
00139   // In the rotated pixel space, coordinates correspond to the input image,
00140   // but may be rotated about the origin by a multiple of 90 degrees,
00141   // and may therefore be negative.
00142   // In any case a rotation by denorm.block()->re_rotation() will take them
00143   // back to the original image.
00144   // The other differences between words all represent different stages of
00145   // processing.
00146   //
00147   // The word is the input C_BLOBs in the rotated pixel space.
00148   // word is NOT owned by the WERD_RES unless combination is true.
00149   // All the other word pointers ARE owned by the WERD_RES.
00150   WERD* word;                     // Input C_BLOB word.
00151   // The bln_boxes contains the bounding boxes (only) of the input word, in the
00152   // BLN space. The lengths of word and bln_boxes
00153   // match as they are both before any chopping.
00154   // TODO(rays) determine if docqual does anything useful and delete bln_boxes
00155   // if it doesn't.
00156   tesseract::BoxWord* bln_boxes;  // BLN input bounding boxes.
00157   // The chopped_word is also in BLN space, and represents the fully chopped
00158   // character fragments that make up the word.
00159   // The length of chopped_word matches length of seam_array + 1 (if set).
00160   TWERD* chopped_word;            // BLN chopped fragments output.
00161   SEAMS seam_array;               // Seams matching chopped_word.
00162   // The rebuild_word is also in BLN space, but represents the final best
00163   // segmentation of the word. Its length is therefore the same as box_word.
00164   TWERD* rebuild_word;            // BLN best segmented word.
00165   // The denorm provides the transformation to get back to the rotated image
00166   // coords from the chopped_word/rebuild_word BLN coords.
00167   DENORM denorm;                  // For use on chopped_word.
00168   // The box_word is in the original image coordinate space. It is the
00169   // bounding boxes of the rebuild_word, after denormalization.
00170   // The length of box_word matches rebuild_word, best_state (if set) and
00171   // correct_text (if set), as well as best_choice and represents the
00172   // number of classified units in the output.
00173   tesseract::BoxWord* box_word;   // Denormalized output boxes.
00174   // The best_state stores the relationship between chopped_word and
00175   // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i]
00176   // adjacent blobs in chopped_word. The seams in seam_array are hidden
00177   // within a rebuild_word blob and revealed between them.
00178   GenericVector<int> best_state;  // Number of blobs in each best blob.
00179   // The correct_text is used during training and adaption to carry the
00180   // text to the training system without the need for a unicharset. There
00181   // is one entry in the vector for each blob in rebuild_word and box_word.
00182   GenericVector<STRING> correct_text;
00183   // The truth_* fields below are used by the blamer to determine the source
00184   // of errors.
00185   // The truth_word (in the original image coordinate space) contains ground
00186   // truth bounding boxes for this WERD_RES.
00187   tesseract::BoxWord* truth_word;
00188   // The truth_text contains ground truth unichar for each
00189   // of the bounding boxes in truth_word.
00190   GenericVector<STRING> truth_text;
00191   WERD_CHOICE *best_choice;    // tess output
00192   WERD_CHOICE *raw_choice;     // top choice permuter
00193   WERD_CHOICE *ep_choice;      // ep text TODO(rays) delete this.
00194   REJMAP reject_map;           // best_choice rejects
00195   BOOL8 tess_failed;
00196   /*
00197     If tess_failed is TRUE, one of the following tests failed when Tess
00198     returned:
00199     - The outword blob list was not the same length as the best_choice string;
00200     - The best_choice string contained ALL blanks;
00201     - The best_choice string was zero length
00202   */
00203   BOOL8 tess_accepted;          // Tess thinks its ok?
00204   BOOL8 tess_would_adapt;       // Tess would adapt?
00205   BOOL8 done;                   // ready for output?
00206   bool small_caps;             // word appears to be small caps
00207   inT8 italic;
00208   inT8 bold;
00209   inT16 fontinfo_id;            // primary font id (should be at least inT16)
00210   inT8 fontinfo_id_count;       // number of votes
00211   inT16 fontinfo_id2;           // secondary font id (should be at least inT16)
00212   inT8 fontinfo_id2_count;      // number of votes
00213   CRUNCH_MODE unlv_crunch_mode;
00214   float x_height;              // post match estimate
00215   float caps_height;           // post match estimate
00216   BOOL8 guessed_x_ht;
00217   BOOL8 guessed_caps_ht;
00218   /*
00219     To deal with fuzzy spaces we need to be able to combine "words" to form
00220     combinations when we suspect that the gap is a non-space. The (new) text
00221     ord code generates separate words for EVERY fuzzy gap - flags in the word
00222     indicate whether the gap is below the threshold (fuzzy kern) and is thus
00223     NOT a real word break by default, or above the threshold (fuzzy space) and
00224     this is a real word break by default.
00225 
00226     The WERD_RES list contains all these words PLUS "combination" words built
00227     out of (copies of) the words split by fuzzy kerns. The separate parts have
00228     their "part_of_combo" flag set true and should be IGNORED on a default
00229     reading of the list.
00230 
00231     Combination words are FOLLOWED by the sequence of part_of_combo words
00232     which they combine.
00233   */
00234   BOOL8 combination;           //of two fuzzy gap wds
00235   BOOL8 part_of_combo;         //part of a combo
00236   BOOL8 reject_spaces;         //Reject spacing?
00237   // FontInfo ids for each unichar in best_choice.
00238   GenericVector<inT8> best_choice_fontinfo_ids;
00239 
00240   WERD_RES() {
00241     InitPointers();
00242   }
00243   WERD_RES(                   //simple constructor
00244            WERD *the_word) {  //real word
00245     InitPointers();
00246     word = the_word;
00247     tess_failed = FALSE;
00248     tess_accepted = FALSE;
00249     tess_would_adapt = FALSE;
00250     done = FALSE;
00251     unlv_crunch_mode = CR_NONE;
00252     small_caps = false;
00253     italic = FALSE;
00254     bold = FALSE;
00255     fontinfo_id = -1;
00256     fontinfo_id_count = 0;
00257     fontinfo_id2 = -1;
00258     fontinfo_id2_count = 0;
00259     x_height = 0.0;
00260     caps_height = 0.0;
00261     guessed_x_ht = TRUE;
00262     guessed_caps_ht = TRUE;
00263     combination = FALSE;
00264     part_of_combo = FALSE;
00265     reject_spaces = FALSE;
00266   }
00267   WERD_RES(const WERD_RES &source) {
00268     InitPointers();
00269     *this = source;            // see operator=
00270   }
00271 
00272   ~WERD_RES();
00273   void InitPointers();
00274   void Clear();
00275   void ClearResults();
00276 
00277   WERD_RES& operator=(const WERD_RES& source);  //from this
00278 
00279   void CopySimpleFields(const WERD_RES& source);
00280 
00281   // Sets up the members used in recognition:
00282   // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
00283   // Returns false if the word is empty and sets up fake results.
00284   bool SetupForRecognition(const UNICHARSET& unicharset,
00285                            bool numeric_mode, ROW *row, BLOCK* block);
00286 
00287   // Builds the rebuild_word from the chopped_word and the best_state.
00288   void RebuildBestState();
00289 
00290   // Copies the chopped_word to the rebuild_word, faking a best_state as well.
00291   // Also sets up the output box_word.
00292   void CloneChoppedToRebuild();
00293 
00294   // Sets/replaces the box_word with one made from the rebuild_word.
00295   void SetupBoxWord();
00296 
00297   // Sets up the script positions in the output boxword using the best_choice
00298   // to get the unichars, and the unicharset to get the target positions.
00299   void SetScriptPositions(const UNICHARSET& unicharset);
00300 
00301   // Classifies the word with some already-calculated BLOB_CHOICEs.
00302   // The choices are an array of blob_count pointers to BLOB_CHOICE,
00303   // providing a single classifier result for each blob.
00304   // The BLOB_CHOICEs are consumed and the word takes ownership.
00305   // The number of blobs in the outword must match blob_count.
00306   void FakeClassifyWord(const UNICHARSET& unicharset, int blob_count,
00307                         BLOB_CHOICE** choices);
00308 
00309   // Copies the best_choice strings to the correct_text for adaption/training.
00310   void BestChoiceToCorrectText(const UNICHARSET& unicharset);
00311 
00312   // Merges 2 adjacent blobs in the result if the permanent callback
00313   // class_cb returns other than INVALID_UNICHAR_ID, AND the permanent
00314   // callback box_cb is NULL or returns true, setting the merged blob
00315   // result to the class returned from class_cb.
00316   // Returns true if anything was merged.
00317   bool ConditionalBlobMerge(
00318       const UNICHARSET& unicharset,
00319       TessResultCallback2<UNICHAR_ID, UNICHAR_ID, UNICHAR_ID>* class_cb,
00320       TessResultCallback2<bool, const TBOX&, const TBOX&>* box_cb,
00321       BLOB_CHOICE_LIST_CLIST *blob_choices);
00322 
00323   static WERD_RES* deep_copy(const WERD_RES* src) {
00324     return new WERD_RES(*src);
00325   }
00326 
00327   // Copy blobs from word_res onto this word (eliminating spaces between).
00328   // Since this may be called bidirectionally OR both the BOL and EOL flags.
00329   void copy_on(WERD_RES *word_res) {  //from this word
00330     word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
00331     word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
00332     word->copy_on(word_res->word);
00333   }
00334 };
00335 
00336 /*************************************************************************
00337  * PAGE_RES_IT - Page results iterator
00338  *************************************************************************/
00339 
00340 class PAGE_RES_IT {
00341  public:
00342   PAGE_RES * page_res;         // page being iterated
00343 
00344   PAGE_RES_IT() {
00345   }                            // empty contructor
00346 
00347   PAGE_RES_IT(PAGE_RES *the_page_res) {    // page result
00348     page_res = the_page_res;
00349     restart_page();  // ready to scan
00350   }
00351 
00352   WERD_RES *restart_page() {
00353     return start_page(false);  // Skip empty blocks.
00354   }
00355   WERD_RES *restart_page_with_empties() {
00356     return start_page(true);  // Allow empty blocks.
00357   }
00358   WERD_RES *start_page(bool empty_ok);
00359 
00360   // ============ Methods that mutate the underling structures ===========
00361   // Note that these methods will potentially invalidate other PAGE_RES_ITs
00362   // and are intended to be used only while a single PAGE_RES_IT is  active.
00363   // This problem needs to be taken into account if these mutation operators
00364   // are ever provided to PageIterator or its subclasses.
00365 
00366   // Inserts the new_word and a corresponding WERD_RES before the current
00367   // position. The simple fields of the WERD_RES are copied from clone_res and
00368   // the resulting WERD_RES is returned for further setup with best_choice etc.
00369   WERD_RES* InsertCloneWord(const WERD_RES& clone_res, WERD* new_word);
00370 
00371   // Deletes the current WERD_RES and its underlying WERD.
00372   void DeleteCurrentWord();
00373 
00374   WERD_RES *forward() {  // Get next word.
00375     return internal_forward(false, false);
00376   }
00377   // Move forward, but allow empty blocks to show as single NULL words.
00378   WERD_RES *forward_with_empties() {
00379     return internal_forward(false, true);
00380   }
00381 
00382   WERD_RES *forward_block();  // get first word in
00383   // next non-empty block
00384   WERD_RES *prev_word() const {  // previous word
00385     return prev_word_res;
00386   }
00387   ROW_RES *prev_row() const {  // row of prev word
00388     return prev_row_res;
00389   }
00390   BLOCK_RES *prev_block() const {  // block of prev word
00391     return prev_block_res;
00392   }
00393   WERD_RES *word() const {  // current word
00394     return word_res;
00395   }
00396   ROW_RES *row() const {  // row of current word
00397     return row_res;
00398   }
00399   BLOCK_RES *block() const {  // block of cur. word
00400     return block_res;
00401   }
00402   WERD_RES *next_word() const {  // next word
00403     return next_word_res;
00404   }
00405   ROW_RES *next_row() const {  // row of next word
00406     return next_row_res;
00407   }
00408   BLOCK_RES *next_block() const {  // block of next word
00409     return next_block_res;
00410   }
00411   void rej_stat_word();  // for page/block/row
00412 
00413  private:
00414   void ResetWordIterator();
00415   WERD_RES *internal_forward(bool new_block, bool empty_ok);
00416 
00417   WERD_RES * prev_word_res;    // previous word
00418   ROW_RES *prev_row_res;       // row of prev word
00419   BLOCK_RES *prev_block_res;   // block of prev word
00420 
00421   WERD_RES *word_res;          // current word
00422   ROW_RES *row_res;            // row of current word
00423   BLOCK_RES *block_res;        // block of cur. word
00424 
00425   WERD_RES *next_word_res;     // next word
00426   ROW_RES *next_row_res;       // row of next word
00427   BLOCK_RES *next_block_res;   // block of next word
00428 
00429   BLOCK_RES_IT block_res_it;   // iterators
00430   ROW_RES_IT row_res_it;
00431   WERD_RES_IT word_res_it;
00432 };
00433 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines