Tesseract 3.01
|
00001 // Copyright 2008 Google Inc. All Rights Reserved. 00002 // Author: shobhitsaxena@google.com (Shobhit Saxena) 00003 00004 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 00005 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 00006 00007 #include "ocrblock.h" 00008 #include "params.h" 00009 00010 struct Pix; 00011 struct Box; 00012 struct Boxa; 00013 00014 extern 00015 INT_VAR_H(devanagari_split_debuglevel, 0, 00016 "Debug level for split shiro-rekha process."); 00017 00018 extern 00019 BOOL_VAR_H(devanagari_split_debugimage, 0, 00020 "Whether to create a debug image for split shiro-rekha process."); 00021 00022 class TBOX; 00023 class IMAGE; 00024 00025 namespace tesseract { 00026 00027 class PixelHistogram { 00028 public: 00029 PixelHistogram() { 00030 hist_ = NULL; 00031 length_ = 0; 00032 } 00033 00034 ~PixelHistogram() { 00035 Clear(); 00036 } 00037 00038 void Clear() { 00039 if (hist_) { 00040 delete[] hist_; 00041 } 00042 length_ = 0; 00043 } 00044 00045 int* const hist() const { 00046 return hist_; 00047 } 00048 00049 int length() const { 00050 return length_; 00051 } 00052 00053 // Methods to construct histograms from images. These clear any existing data. 00054 void ConstructVerticalCountHist(Pix* pix); 00055 void ConstructHorizontalCountHist(Pix* pix); 00056 00057 // This method returns the global-maxima for the histogram. The frequency of 00058 // the global maxima is returned in count, if specified. 00059 int GetHistogramMaximum(int* count) const; 00060 00061 private: 00062 int* hist_; 00063 int length_; 00064 }; 00065 00066 class ShiroRekhaSplitter { 00067 public: 00068 enum SplitStrategy { 00069 NO_SPLIT = 0, // No splitting is performed for the phase. 00070 MINIMAL_SPLIT, // Blobs are split minimally. 00071 MAXIMAL_SPLIT // Blobs are split maximally. 00072 }; 00073 00074 ShiroRekhaSplitter(); 00075 virtual ~ShiroRekhaSplitter(); 00076 00077 // Top-level method to perform splitting based on current settings. 00078 // Returns true if a split was actually performed. 00079 // If split_for_pageseg is true, the pageseg_split_strategy_ is used for 00080 // splitting. If false, the ocr_split_strategy_ is used. 00081 bool Split(bool split_for_pageseg); 00082 00083 // This method changes the input page image and pix_binary to be the same as 00084 // the splitted image owned by this object. 00085 // Any of the parameters can be NULL. 00086 void CopySplittedImageTo(IMAGE* page_image, Pix** pix_binary) const; 00087 00088 // This method changes the input page image and pix_binary to be the same as 00089 // the original image provided to this object. 00090 // Any of the parameters can be NULL. 00091 void CopyOriginalImageTo(IMAGE* page_image, Pix** pix_binary) const; 00092 00093 // Clears the memory held by this object. 00094 void Clear(); 00095 00096 // Refreshes the words in the segmentation block list by using blobs in the 00097 // input blob list. 00098 // The segmentation block list must be set. 00099 void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs); 00100 00101 // Returns true if the split strategies for pageseg and ocr are different. 00102 bool HasDifferentSplitStrategies() const { 00103 return pageseg_split_strategy_ != ocr_split_strategy_; 00104 } 00105 00106 // This only keeps a copy of the block list pointer. At split call, the list 00107 // object should still be alive. This block list is used as a golden 00108 // segmentation when performing splitting. 00109 void set_segmentation_block_list(BLOCK_LIST* block_list) { 00110 segmentation_block_list_ = block_list; 00111 } 00112 00113 static const int kUnspecifiedXheight = -1; 00114 00115 void set_global_xheight(int xheight) { 00116 global_xheight_ = xheight; 00117 } 00118 00119 void set_perform_close(bool perform) { 00120 perform_close_ = perform; 00121 } 00122 00123 // Returns the image obtained from shiro-rekha splitting. The returned object 00124 // is owned by this class. Callers may want to clone the returned pix to keep 00125 // it alive beyond the life of ShiroRekhaSplitter object. 00126 Pix* splitted_image() { 00127 return splitted_image_; 00128 } 00129 00130 // On setting the input image, a clone of it is owned by this class. 00131 void set_orig_pix(Pix* pix); 00132 00133 // Returns the input image provided to the object. This object is owned by 00134 // this class. Callers may want to clone the returned pix to work with it. 00135 Pix* orig_pix() { 00136 return orig_pix_; 00137 } 00138 00139 SplitStrategy ocr_split_strategy() const { 00140 return ocr_split_strategy_; 00141 } 00142 00143 void set_ocr_split_strategy(SplitStrategy strategy) { 00144 ocr_split_strategy_ = strategy; 00145 } 00146 00147 SplitStrategy pageseg_split_strategy() const { 00148 return pageseg_split_strategy_; 00149 } 00150 00151 void set_pageseg_split_strategy(SplitStrategy strategy) { 00152 pageseg_split_strategy_ = strategy; 00153 } 00154 00155 BLOCK_LIST* segmentation_block_list() { 00156 return segmentation_block_list_; 00157 } 00158 00159 // This method dumps a debug image to the specified location. 00160 void DumpDebugImage(const char* filename) const; 00161 00162 // This method returns the computed mode-height of blobs in the pix. 00163 // It also prunes very small blobs from calculation. Could be used to provide 00164 // a global xheight estimate for images which have the same point-size text. 00165 static int GetModeHeight(Pix* pix); 00166 00167 private: 00168 // Method to perform a close operation on the input image. The xheight 00169 // estimate decides the size of sel used. 00170 static void PerformClose(Pix* pix, int xheight_estimate); 00171 00172 // This method resolves the cc bbox to a particular row and returns the row's 00173 // xheight. This uses block_list_ if available, else just returns the 00174 // global_xheight_ estimate currently set in the object. 00175 int GetXheightForCC(Box* cc_bbox); 00176 00177 // Returns a list of regions (boxes) which should be cleared in the original 00178 // image so as to perform shiro-rekha splitting. Pix is assumed to carry one 00179 // (or less) word only. Xheight measure could be the global estimate, the row 00180 // estimate, or unspecified. If unspecified, over splitting may occur, since a 00181 // conservative estimate of stroke width along with an associated multiplier 00182 // is used in its place. It is advisable to have a specified xheight when 00183 // splitting for classification/training. 00184 void SplitWordShiroRekha(SplitStrategy split_strategy, 00185 Pix* pix, 00186 int xheight, 00187 int word_left, 00188 int word_top, 00189 Boxa* regions_to_clear); 00190 00191 // Returns a new box object for the corresponding TBOX, based on the original 00192 // image's coordinate system. 00193 Box* GetBoxForTBOX(const TBOX& tbox) const; 00194 00195 // This method returns y-extents of the shiro-rekha computed from the input 00196 // word image. 00197 static void GetShiroRekhaYExtents(Pix* word_pix, 00198 int* shirorekha_top, 00199 int* shirorekha_bottom, 00200 int* shirorekha_ylevel); 00201 00202 Pix* orig_pix_; // Just a clone of the input image passed. 00203 Pix* splitted_image_; // Image produced after the last splitting round. The 00204 // object is owned by this class. 00205 SplitStrategy pageseg_split_strategy_; 00206 SplitStrategy ocr_split_strategy_; 00207 Pix* debug_image_; 00208 // This block list is used as a golden segmentation when performing splitting. 00209 BLOCK_LIST* segmentation_block_list_; 00210 int global_xheight_; 00211 bool perform_close_; // Whether a morphological close operation should be 00212 // performed before CCs are run through splitting. 00213 }; 00214 00215 } 00216 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_