Tesseract 3.01
/data/source/tesseract-ocr/textord/devanagari_processing.h
Go to the documentation of this file.
00001 // Copyright 2008 Google Inc. All Rights Reserved.
00002 // Author: shobhitsaxena@google.com (Shobhit Saxena)
00003 
00004 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
00005 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
00006 
00007 #include "ocrblock.h"
00008 #include "params.h"
00009 
00010 struct Pix;
00011 struct Box;
00012 struct Boxa;
00013 
00014 extern
00015 INT_VAR_H(devanagari_split_debuglevel, 0,
00016           "Debug level for split shiro-rekha process.");
00017 
00018 extern
00019 BOOL_VAR_H(devanagari_split_debugimage, 0,
00020            "Whether to create a debug image for split shiro-rekha process.");
00021 
00022 class TBOX;
00023 class IMAGE;
00024 
00025 namespace tesseract {
00026 
00027 class PixelHistogram {
00028  public:
00029   PixelHistogram() {
00030     hist_ = NULL;
00031     length_ = 0;
00032   }
00033 
00034   ~PixelHistogram() {
00035     Clear();
00036   }
00037 
00038   void Clear() {
00039     if (hist_) {
00040       delete[] hist_;
00041     }
00042     length_ = 0;
00043   }
00044 
00045   int* const hist() const {
00046     return hist_;
00047   }
00048 
00049   int length() const {
00050     return length_;
00051   }
00052 
00053   // Methods to construct histograms from images. These clear any existing data.
00054   void ConstructVerticalCountHist(Pix* pix);
00055   void ConstructHorizontalCountHist(Pix* pix);
00056 
00057   // This method returns the global-maxima for the histogram. The frequency of
00058   // the global maxima is returned in count, if specified.
00059   int GetHistogramMaximum(int* count) const;
00060 
00061  private:
00062   int* hist_;
00063   int length_;
00064 };
00065 
00066 class ShiroRekhaSplitter {
00067  public:
00068   enum SplitStrategy {
00069     NO_SPLIT = 0,   // No splitting is performed for the phase.
00070     MINIMAL_SPLIT,  // Blobs are split minimally.
00071     MAXIMAL_SPLIT   // Blobs are split maximally.
00072   };
00073 
00074   ShiroRekhaSplitter();
00075   virtual ~ShiroRekhaSplitter();
00076 
00077   // Top-level method to perform splitting based on current settings.
00078   // Returns true if a split was actually performed.
00079   // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
00080   // splitting. If false, the ocr_split_strategy_ is used.
00081   bool Split(bool split_for_pageseg);
00082 
00083   // This method changes the input page image and pix_binary to be the same as
00084   // the splitted image owned by this object.
00085   // Any of the parameters can be NULL.
00086   void CopySplittedImageTo(IMAGE* page_image, Pix** pix_binary) const;
00087 
00088   // This method changes the input page image and pix_binary to be the same as
00089   // the original image provided to this object.
00090   // Any of the parameters can be NULL.
00091   void CopyOriginalImageTo(IMAGE* page_image, Pix** pix_binary) const;
00092 
00093   // Clears the memory held by this object.
00094   void Clear();
00095 
00096   // Refreshes the words in the segmentation block list by using blobs in the
00097   // input blob list.
00098   // The segmentation block list must be set.
00099   void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
00100 
00101   // Returns true if the split strategies for pageseg and ocr are different.
00102   bool HasDifferentSplitStrategies() const {
00103     return pageseg_split_strategy_ != ocr_split_strategy_;
00104   }
00105 
00106   // This only keeps a copy of the block list pointer. At split call, the list
00107   // object should still be alive. This block list is used as a golden
00108   // segmentation when performing splitting.
00109   void set_segmentation_block_list(BLOCK_LIST* block_list) {
00110     segmentation_block_list_ = block_list;
00111   }
00112 
00113   static const int kUnspecifiedXheight = -1;
00114 
00115   void set_global_xheight(int xheight) {
00116     global_xheight_ = xheight;
00117   }
00118 
00119   void set_perform_close(bool perform) {
00120     perform_close_ = perform;
00121   }
00122 
00123   // Returns the image obtained from shiro-rekha splitting. The returned object
00124   // is owned by this class. Callers may want to clone the returned pix to keep
00125   // it alive beyond the life of ShiroRekhaSplitter object.
00126   Pix* splitted_image() {
00127     return splitted_image_;
00128   }
00129 
00130   // On setting the input image, a clone of it is owned by this class.
00131   void set_orig_pix(Pix* pix);
00132 
00133   // Returns the input image provided to the object. This object is owned by
00134   // this class. Callers may want to clone the returned pix to work with it.
00135   Pix* orig_pix() {
00136     return orig_pix_;
00137   }
00138 
00139   SplitStrategy ocr_split_strategy() const {
00140     return ocr_split_strategy_;
00141   }
00142 
00143   void set_ocr_split_strategy(SplitStrategy strategy) {
00144     ocr_split_strategy_ = strategy;
00145   }
00146 
00147   SplitStrategy pageseg_split_strategy() const {
00148     return pageseg_split_strategy_;
00149   }
00150 
00151   void set_pageseg_split_strategy(SplitStrategy strategy) {
00152     pageseg_split_strategy_ = strategy;
00153   }
00154 
00155   BLOCK_LIST* segmentation_block_list() {
00156     return segmentation_block_list_;
00157   }
00158 
00159   // This method dumps a debug image to the specified location.
00160   void DumpDebugImage(const char* filename) const;
00161 
00162   // This method returns the computed mode-height of blobs in the pix.
00163   // It also prunes very small blobs from calculation. Could be used to provide
00164   // a global xheight estimate for images which have the same point-size text.
00165   static int GetModeHeight(Pix* pix);
00166 
00167  private:
00168   // Method to perform a close operation on the input image. The xheight
00169   // estimate decides the size of sel used.
00170   static void PerformClose(Pix* pix, int xheight_estimate);
00171 
00172   // This method resolves the cc bbox to a particular row and returns the row's
00173   // xheight. This uses block_list_ if available, else just returns the
00174   // global_xheight_ estimate currently set in the object.
00175   int GetXheightForCC(Box* cc_bbox);
00176 
00177   // Returns a list of regions (boxes) which should be cleared in the original
00178   // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
00179   // (or less) word only. Xheight measure could be the global estimate, the row
00180   // estimate, or unspecified. If unspecified, over splitting may occur, since a
00181   // conservative estimate of stroke width along with an associated multiplier
00182   // is used in its place. It is advisable to have a specified xheight when
00183   // splitting for classification/training.
00184   void SplitWordShiroRekha(SplitStrategy split_strategy,
00185                            Pix* pix,
00186                            int xheight,
00187                            int word_left,
00188                            int word_top,
00189                            Boxa* regions_to_clear);
00190 
00191   // Returns a new box object for the corresponding TBOX, based on the original
00192   // image's coordinate system.
00193   Box* GetBoxForTBOX(const TBOX& tbox) const;
00194 
00195   // This method returns y-extents of the shiro-rekha computed from the input
00196   // word image.
00197   static void GetShiroRekhaYExtents(Pix* word_pix,
00198                                     int* shirorekha_top,
00199                                     int* shirorekha_bottom,
00200                                     int* shirorekha_ylevel);
00201 
00202   Pix* orig_pix_;         // Just a clone of the input image passed.
00203   Pix* splitted_image_;   // Image produced after the last splitting round. The
00204                           // object is owned by this class.
00205   SplitStrategy pageseg_split_strategy_;
00206   SplitStrategy ocr_split_strategy_;
00207   Pix* debug_image_;
00208   // This block list is used as a golden segmentation when performing splitting.
00209   BLOCK_LIST* segmentation_block_list_;
00210   int global_xheight_;
00211   bool perform_close_;  // Whether a morphological close operation should be
00212                         // performed before CCs are run through splitting.
00213 };
00214 
00215 }
00216 #endif  // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines