Tesseract 3.01
/data/source/tesseract-ocr/ccutil/ocrclass.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        ocrclass.h
00003  * Description: Class definitions and constants for the OCR API.
00004  * Author:                                      Hewlett-Packard Co
00005  *
00006  * (C) Copyright 1996, Hewlett-Packard Co.
00007  ** Licensed under the Apache License, Version 2.0 (the "License");
00008  ** you may not use this file except in compliance with the License.
00009  ** You may obtain a copy of the License at
00010  ** http://www.apache.org/licenses/LICENSE-2.0
00011  ** Unless required by applicable law or agreed to in writing, software
00012  ** distributed under the License is distributed on an "AS IS" BASIS,
00013  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  ** See the License for the specific language governing permissions and
00015  ** limitations under the License.
00016  *
00017  **********************************************************************/
00018 
00019 /**********************************************************************
00020  * This file contains typedefs for all the structures used by
00021  * the HP OCR interface.
00022  * The code is designed to be used with either a C or C++ compiler.
00023  * The structures are designed to allow them to be used with any
00024  * structure alignment upto 8.
00025  **********************************************************************/
00026 
00027 #ifndef            CCUTIL_OCRCLASS_H_
00028 #define            CCUTIL_OCRCLASS_H_
00029 
00030 #ifndef __GNUC__
00031 #ifdef __MSW32__
00032 #include          <windows.h>
00033 #include          "gettimeofday.h"
00034 #endif
00035 #else
00036 #include          <sys/time.h>
00037 #endif
00038 #include          <time.h>
00039 #include          "host.h"
00040 
00041 /*Maximum lengths of various strings*/
00042 #define MAX_FONT_NAME   34       /*name of font */
00043 #define MAX_OCR_NAME    32       /*name of engine */
00044 #define MAX_OCR_VERSION   17     /*version code of engine */
00045 
00046 /*Image parameters*/
00047 #define MIN_IMAGE_SIZE    64     /*smallest image that will be passed */
00048 #define IMAGE_ROUNDING    32     /*all sizes are multiple of this */
00049 
00050 #if defined(__SLOW_TIMES__)
00051 /*Maximum timeouts of various functions (in secs)*/
00052 #define STARTUP_TIMEOUT   100    /*start of OCR engine */
00053 #define SHUTDOWN_TIMEOUT  50     /*end of OCR engine */
00054 #define SENDIM_TIMEOUT    50     /*send of image */
00055 #define RELEASE_TIMEOUT   50     /*release of semaphore */
00056 #define READIM_TIMEOUT    100    /*read of image */
00057 #define READTEXT_TIMEOUT  50     /*read of text */
00058 #define PROGRESS_TIMEOUT  30     /*progress every 3 seconds */
00059 #define BADTIMES_TIMEOUT  7      /*max lack of progress */
00060 #else
00061 /*Maximum timeouts of various functions (in secs)*/
00062 #define STARTUP_TIMEOUT   10     /*start of OCR engine */
00063 #define SHUTDOWN_TIMEOUT  6      /*end of OCR engine */
00064 #define SENDIM_TIMEOUT    5      /*send of image */
00065 #define RELEASE_TIMEOUT   5      /*release of semaphore */
00066 #define READIM_TIMEOUT    10     /*read of image */
00067 #define READTEXT_TIMEOUT  5      /*read of text */
00068 #define PROGRESS_TIMEOUT  3      /*progress every 3 seconds */
00069 #define BADTIMES_TIMEOUT  7      /*max lack of progress */
00070 #endif
00071 
00072 /*language definitions are identical to RTF*/
00073 #define LANGE_NONE      0x0400   /*no language */
00074 #define LANGE_ALBANIAN    0x041c /*Albanian */
00075 #define LANGE_BRITISH   0x0809   /*International English */
00076 #define LANGE_BULGARIAN   0x0402 /*Bulgarian */
00077 #define LANGE_CROATIAN    0x041a /*Croatian(latin alphabet) */
00078 #define LANGE_CZECH     0x0405   /*Czech */
00079 #define LANGE_DANISH    0x0406   /*Danish */
00080 #define LANGE_DUTCH     0x0413   /*Dutch */
00081 #define LANGE_FINNISH   0x040b   /*Finnish */
00082 #define LANGE_FRENCH    0x040c   /*French */
00083 #define LANGE_GERMAN    0x0407   /*German */
00084 #define LANGE_GREEK     0x0408   /*Greek */
00085 #define LANGE_HUNGARIAN   0x040e /*Hungarian */
00086 #define LANGE_ITALIAN   0x0410   /*Italian */
00087 #define LANGE_JAPANESE    0x0411 /*Japanese */
00088 #define LANGE_KOREAN    0x0412   /*Korean */
00089 #define LANGE_NORWEGIAN   0x0414 /*Bokmal */
00090 #define LANGE_POLISH    0x0415   /*Polish */
00091 #define LANGE_PORTUGESE   0x0416 /*Brazilian Portugese */
00092 #define LANGE_ROMANIAN    0x0418 /*Romanian */
00093 #define LANGE_RUSSIAN   0x0419   /*Russian */
00094 #define LANGE_SCHINESE    0x0804 /*Simplified Chinese */
00095 #define LANGE_SLOVAK    0x041b   /*Slovak */
00096 #define LANGE_SPANISH   0x040a   /*Castilian */
00097 #define LANGE_SWEDISH   0x041d   /*Swedish */
00098 #define LANGE_TCHINESE    0x0404 /*Traditional Chinese */
00099 #define LANGE_TURKISH   0x041f   /*Turkish */
00100 #define LANGE_USENGLISH   0x0409 /*American */
00101 
00102 /*font family definitions are identical to RTF*/
00103 #define FFAM_NONE     0          /*unknown */
00104 #define FFAM_ROMAN      1        /*serifed prop */
00105 #define FFAM_SWISS      2        /*sans-serif prop */
00106 #define FFAM_MODERN     3        /*fixed pitch */
00107 
00108 /*character set definitions are identical to RTF*/
00109 #define CHSET_ANSI      0        /*Ansi efigs */
00110 #define CHSET_SHIFT_JIS   128    /*JIS X 0208-1990 */
00111 #define CHSET_KOREAN    129      /*KS C 5601-1992 */
00112 #define CHSET_SCHINESE    134    /*GB 2312-80 */
00113 #define CHSET_BIG5      136      /*Big Five */
00114 #define CHSET_CYRILLIC    204    /*Cyrillic */
00115 #define CHSET_EEUROPE   238      /*Eastern Europe */
00116 
00117 /*pitch set definitions are identical to RTF*/
00118 #define PITCH_DEF     0          /*default */
00119 #define PITCH_FIXED     1        /*fixed pitch */
00120 #define PITCH_VAR     2          /*variable pitch */
00121 
00122 /*Bitmasks for character enhancements.
00123 OR these together for enhancement in ocr_append_char*/
00124 #define EUC_BOLD      1          /*bold character */
00125 #define EUC_ITALIC      2        /*italic char */
00126 #define EUC_UNDERLINE   4        /*underlined char */
00127 #define EUC_SUBSCRIPT   8        /*subscript char */
00128 #define EUC_SUPERSCRIPT   16     /*superscript char */
00129 
00130 /*enum for character rendering direction*/
00131 enum OCR_CHAR_DIRECTION {
00132   OCR_CDIR_RIGHT_LEFT,           /*right to left horizontal */
00133   OCR_CDIR_LEFT_RIGHT,           /*left to right horizontal */
00134   OCR_CDIR_TOP_BOTTOM,           /*top to bottom vertical */
00135   OCR_CDIR_BOTTOM_TOP            /*bottom to top vertical */
00136 };
00137 
00138 /*enum for line rendering direction*/
00139 enum OCR_LINE_DIRECTION {
00140   OCR_LDIR_DOWN_RIGHT,           /*horizontal lines go down */
00141   /*vertical lines go right */
00142   OCR_LDIR_UP_LEFT               /*horizontal lines go up */
00143 };
00144 
00145 /*enum for newline type*/
00146 enum OCR_NEWLINE_TYPE {
00147   OCR_NL_NONE,                   /*not a newline */
00148   OCR_NL_NEWLINE,                /*this is a newline but not new para */
00149   OCR_NL_NEWPARA                 /*this is a newline and a new para */
00150 };
00151 
00152 /*error codes that can be returned from the API functions other than OKAY
00153 and HPERR*/
00154 #define OCR_API_NO_MEM    (-2)   /*filled output buffer */
00155 #define OCR_API_BAD_CHAR  (-3)   /*whitespace sent to ocr_append_char */
00156 #define OCR_API_BAD_STATE (-4)   /*invalid call sequence */
00157 
00158 /*error codes used for passing errors back to the HP side*/
00159 enum OCR_ERR_CODE {
00160   OCR_ERR_NONE,                  /*no error */
00161   OCR_ERR_CLEAN_EXIT,            /*no error */
00162   OCR_ERR_NO_MEM,                /*out of memory */
00163   OCR_ERR_FILE_READ,             /*failed to read data file */
00164   OCR_ERR_TMP_WRITE,             /*failed to write temp file */
00165   OCR_ERR_TMP_READ,              /*failed to read temp file */
00166   OCR_ERR_BAD_DLL,               /*missing or invalid dll subcomponent */
00167   OCR_ERR_BAD_EXE,               /*missing or invalid exe subcomponent */
00168   OCR_ERR_BAD_LOAD,              /*failed to load subcomponent */
00169   OCR_ERR_BAD_LANG,              /*unable to recognize requested language */
00170   OCR_ERR_BAD_STATE,             /*engine did call out of sequence */
00171   OCR_ERR_INTERNAL1,             /*internal error type 1 */
00172   OCR_ERR_INTERNAL2,             /*internal error type 1 */
00173   OCR_ERR_INTERNAL3,             /*internal error type 1 */
00174   OCR_ERR_INTERNAL4,             /*internal error type 1 */
00175   OCR_ERR_INTERNAL5,             /*internal error type 1 */
00176   OCR_ERR_INTERNAL6,             /*internal error type 1 */
00177   OCR_ERR_INTERNAL7,             /*internal error type 1 */
00178   OCR_ERR_INTERNAL8,             /*internal error type 1 */
00179   OCR_ERR_TIMEOUT                /*timed out in comms */
00180 };                               /*for calls to ocr_error */
00181 
00182 /**********************************************************************
00183  * EFONT_DESC
00184  * Description of one font.
00185  * The information required is basically that used by RTF.
00186  * The name may be either a valid font on the system or the empty string.
00187  **********************************************************************/
00188 
00189 typedef struct {                  /*font description */
00190   uinT16 language;               /*default language */
00191   uinT8 font_family;             /*serif/not, fixed/not */
00192   uinT8 char_set;                /*character set standard */
00193   uinT8 pitch;                   /*fixed or prop */
00194   inT8 name[MAX_FONT_NAME + 1];  /*plain ascii name */
00195 } EFONT_DESC;                    /*font description */
00196 
00197 /**********************************************************************
00198  * EOCR_DESC
00199  * Description of the OCR engine provided at startup.
00200  * The name and version may be reported to the user at some point.
00201  * The fonts array should indicate the fonts that the OCR system
00202  * can recognize.
00203  **********************************************************************/
00204 
00205 typedef struct {                  /*startup info */
00206   inT32 protocol;                /*interface version */
00207   uinT32 font_count;             /*number of fonts */
00208   uinT16 language;               /*default language */
00209   uinT16 name[MAX_OCR_NAME + 1]; /*name of engine */
00210                                  /*version of engine */
00211   uinT16 version[MAX_OCR_VERSION + 1];
00212   EFONT_DESC fonts[1];           /*array of fonts */
00213 } EOCR_DESC;                     /*startup info */
00214 
00215 /**********************************************************************
00216  * ESTRIP_DESC
00217  * Description of the image strip as it is passed to the engine.
00218  * The image is always 1 bit, with 1=black.
00219  * The width is always a multiple of 32, so padding is always OK.
00220  * The height of the full image is always a multiple of 32.
00221  * The top y coordinate is 0, and increases down.
00222  * The top leftmost pixel is in the most significant bit of the first byte.
00223  **********************************************************************/
00224 
00225 typedef struct {                  /*bitmap strip */
00226   inT16 x_size;                  /*width in pixels */
00227   inT16 y_size;                  /*of full image */
00228   inT16 strip_size;              /*of this strip */
00229   inT16 resolution;              /*pixels per inch */
00230   uinT8 data[8];                 /*image data */
00231 } ESTRIP_DESC;                   /*bitmap strip */
00232 
00233 /**********************************************************************
00234  * EANYCODE_CHAR
00235  * Description of a single character. The character code is defined by
00236  * the character set of the current font.
00237  * Output text is sent as an array of these structures.
00238  * Spaces and line endings in the output are represented in the
00239  * structures of the surrounding characters. They are not directly
00240  * represented as characters.
00241  * The first character in a word has a positive value of blanks.
00242  * Missing information should be set to the defaults in the comments.
00243  * If word bounds are known, but not character bounds, then the top and
00244  * bottom of each character should be those of the word. The left of the
00245  * first and right of the last char in each word should be set. All other
00246  * lefts and rights should be set to -1.
00247  * If set, the values of right and bottom are left+width and top+height.
00248  * Most of the members come directly from the parameters to ocr_append_char.
00249  * The formatting member uses the enhancement parameter and combines the
00250  * line direction stuff into the top 3 bits.
00251  * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
00252  * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
00253  * the coding is, only that it is backwards compatible with the previous
00254  * version.
00255  **********************************************************************/
00256 
00257 typedef struct {                  /*single character */
00258 // It should be noted that the format for char_code for version 2.0 and beyond
00259 // is UTF8 which means that ASCII characters will come out as one structure but
00260 // other characters will be returned in two or more instances of this structure
00261 // with a single byte of the  UTF8 code in each, but each will have the same
00262 // bounding box. Programs which want to handle languagues with different
00263 // characters sets will need to handle extended characters appropriately, but
00264 // *all* code needs to be prepared to receive UTF8 coded characters for
00265 // characters such as bullet and fancy quotes.
00266   uinT16 char_code;              /*character itself */
00267   inT16 left;                    /*of char (-1) */
00268   inT16 right;                   /*of char (-1) */
00269   inT16 top;                     /*of char (-1) */
00270   inT16 bottom;                  /*of char (-1) */
00271   inT16 font_index;              /*what font (0) */
00272   uinT8 confidence;              /*0=perfect, 100=reject (0/100) */
00273   uinT8 point_size;              /*of char, 72=i inch, (10) */
00274   inT8 blanks;                   /*no of spaces before this char (1) */
00275   uinT8 formatting;              /*char formatting (0) */
00276 } EANYCODE_CHAR;                 /*single character */
00277 
00278 /**********************************************************************
00279  * ETEXT_DESC
00280  * Description of the output of the OCR engine.
00281  * This structure is used as both a progress monitor and the final
00282  * output header, since it needs to be a valid progress monitor while
00283  * the OCR engine is storing its output to shared memory.
00284  * During progress, all the buffer info is -1.
00285  * Progress starts at 0 and increases to 100 during OCR. No other constraint.
00286  * Every progress callback, the OCR engine must set ocr_alive to 1.
00287  * The HP side will set ocr_alive to 0. Repeated failure to reset
00288  * to 1 indicates that the OCR engine is dead.
00289  * If the cancel function is not null then it is called with the number of
00290  * user words found. If it returns true then operation is cancelled.
00291  **********************************************************************/
00292 typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
00293 
00294 class ETEXT_DESC {             // output header
00295  public:
00296   inT16 count;                 // chars in this buffer(0)
00297   inT16 progress;              // percent complete increasing (0-100)
00298   inT8 more_to_come;           // true if not last
00299   volatile inT8 ocr_alive;     // ocr sets to 1, HP 0
00300   inT8 err_code;               // for errcode use
00301   CANCEL_FUNC cancel;          // returns true to cancel
00302   void* cancel_this;           // this or other data for cancel
00303   struct timeval end_time;     // time to stop. expected to be set only by call
00304                                // to set_deadline_msecs()
00305   EANYCODE_CHAR text[1];       // character data
00306 
00307   ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0),
00308                    err_code(0), cancel(NULL), cancel_this(NULL) {
00309     end_time.tv_sec = 0;
00310     end_time.tv_usec = 0;
00311   }
00312 
00313   // Sets the end time to be deadline_msecs milliseconds from now.
00314   void set_deadline_msecs(inT32 deadline_msecs) {
00315     gettimeofday(&end_time, NULL);
00316     inT32 deadline_secs = deadline_msecs / 1000;
00317     end_time.tv_sec += deadline_secs;
00318     end_time.tv_usec += (deadline_msecs -  deadline_secs * 1000) * 1000;
00319     if (end_time.tv_usec > 1000000) {
00320       end_time.tv_usec -= 1000000;
00321       ++end_time.tv_sec;
00322     }
00323   }
00324 
00325   // Returns false if we've not passed the end_time, or have not set a deadline.
00326   bool deadline_exceeded() const {
00327     if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false;
00328     struct timeval now;
00329     gettimeofday(&now, NULL);
00330     return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec &&
00331                                              now.tv_usec > end_time.tv_usec));
00332   }
00333 };
00334 
00335 #endif  // CCUTIL_OCRCLASS_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines