Tesseract 3.01
|
00001 /********************************************************************** 00002 * File: ocrclass.h 00003 * Description: Class definitions and constants for the OCR API. 00004 * Author: Hewlett-Packard Co 00005 * 00006 * (C) Copyright 1996, Hewlett-Packard Co. 00007 ** Licensed under the Apache License, Version 2.0 (the "License"); 00008 ** you may not use this file except in compliance with the License. 00009 ** You may obtain a copy of the License at 00010 ** http://www.apache.org/licenses/LICENSE-2.0 00011 ** Unless required by applicable law or agreed to in writing, software 00012 ** distributed under the License is distributed on an "AS IS" BASIS, 00013 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 ** See the License for the specific language governing permissions and 00015 ** limitations under the License. 00016 * 00017 **********************************************************************/ 00018 00019 /********************************************************************** 00020 * This file contains typedefs for all the structures used by 00021 * the HP OCR interface. 00022 * The code is designed to be used with either a C or C++ compiler. 00023 * The structures are designed to allow them to be used with any 00024 * structure alignment upto 8. 00025 **********************************************************************/ 00026 00027 #ifndef CCUTIL_OCRCLASS_H_ 00028 #define CCUTIL_OCRCLASS_H_ 00029 00030 #ifndef __GNUC__ 00031 #ifdef __MSW32__ 00032 #include <windows.h> 00033 #include "gettimeofday.h" 00034 #endif 00035 #else 00036 #include <sys/time.h> 00037 #endif 00038 #include <time.h> 00039 #include "host.h" 00040 00041 /*Maximum lengths of various strings*/ 00042 #define MAX_FONT_NAME 34 /*name of font */ 00043 #define MAX_OCR_NAME 32 /*name of engine */ 00044 #define MAX_OCR_VERSION 17 /*version code of engine */ 00045 00046 /*Image parameters*/ 00047 #define MIN_IMAGE_SIZE 64 /*smallest image that will be passed */ 00048 #define IMAGE_ROUNDING 32 /*all sizes are multiple of this */ 00049 00050 #if defined(__SLOW_TIMES__) 00051 /*Maximum timeouts of various functions (in secs)*/ 00052 #define STARTUP_TIMEOUT 100 /*start of OCR engine */ 00053 #define SHUTDOWN_TIMEOUT 50 /*end of OCR engine */ 00054 #define SENDIM_TIMEOUT 50 /*send of image */ 00055 #define RELEASE_TIMEOUT 50 /*release of semaphore */ 00056 #define READIM_TIMEOUT 100 /*read of image */ 00057 #define READTEXT_TIMEOUT 50 /*read of text */ 00058 #define PROGRESS_TIMEOUT 30 /*progress every 3 seconds */ 00059 #define BADTIMES_TIMEOUT 7 /*max lack of progress */ 00060 #else 00061 /*Maximum timeouts of various functions (in secs)*/ 00062 #define STARTUP_TIMEOUT 10 /*start of OCR engine */ 00063 #define SHUTDOWN_TIMEOUT 6 /*end of OCR engine */ 00064 #define SENDIM_TIMEOUT 5 /*send of image */ 00065 #define RELEASE_TIMEOUT 5 /*release of semaphore */ 00066 #define READIM_TIMEOUT 10 /*read of image */ 00067 #define READTEXT_TIMEOUT 5 /*read of text */ 00068 #define PROGRESS_TIMEOUT 3 /*progress every 3 seconds */ 00069 #define BADTIMES_TIMEOUT 7 /*max lack of progress */ 00070 #endif 00071 00072 /*language definitions are identical to RTF*/ 00073 #define LANGE_NONE 0x0400 /*no language */ 00074 #define LANGE_ALBANIAN 0x041c /*Albanian */ 00075 #define LANGE_BRITISH 0x0809 /*International English */ 00076 #define LANGE_BULGARIAN 0x0402 /*Bulgarian */ 00077 #define LANGE_CROATIAN 0x041a /*Croatian(latin alphabet) */ 00078 #define LANGE_CZECH 0x0405 /*Czech */ 00079 #define LANGE_DANISH 0x0406 /*Danish */ 00080 #define LANGE_DUTCH 0x0413 /*Dutch */ 00081 #define LANGE_FINNISH 0x040b /*Finnish */ 00082 #define LANGE_FRENCH 0x040c /*French */ 00083 #define LANGE_GERMAN 0x0407 /*German */ 00084 #define LANGE_GREEK 0x0408 /*Greek */ 00085 #define LANGE_HUNGARIAN 0x040e /*Hungarian */ 00086 #define LANGE_ITALIAN 0x0410 /*Italian */ 00087 #define LANGE_JAPANESE 0x0411 /*Japanese */ 00088 #define LANGE_KOREAN 0x0412 /*Korean */ 00089 #define LANGE_NORWEGIAN 0x0414 /*Bokmal */ 00090 #define LANGE_POLISH 0x0415 /*Polish */ 00091 #define LANGE_PORTUGESE 0x0416 /*Brazilian Portugese */ 00092 #define LANGE_ROMANIAN 0x0418 /*Romanian */ 00093 #define LANGE_RUSSIAN 0x0419 /*Russian */ 00094 #define LANGE_SCHINESE 0x0804 /*Simplified Chinese */ 00095 #define LANGE_SLOVAK 0x041b /*Slovak */ 00096 #define LANGE_SPANISH 0x040a /*Castilian */ 00097 #define LANGE_SWEDISH 0x041d /*Swedish */ 00098 #define LANGE_TCHINESE 0x0404 /*Traditional Chinese */ 00099 #define LANGE_TURKISH 0x041f /*Turkish */ 00100 #define LANGE_USENGLISH 0x0409 /*American */ 00101 00102 /*font family definitions are identical to RTF*/ 00103 #define FFAM_NONE 0 /*unknown */ 00104 #define FFAM_ROMAN 1 /*serifed prop */ 00105 #define FFAM_SWISS 2 /*sans-serif prop */ 00106 #define FFAM_MODERN 3 /*fixed pitch */ 00107 00108 /*character set definitions are identical to RTF*/ 00109 #define CHSET_ANSI 0 /*Ansi efigs */ 00110 #define CHSET_SHIFT_JIS 128 /*JIS X 0208-1990 */ 00111 #define CHSET_KOREAN 129 /*KS C 5601-1992 */ 00112 #define CHSET_SCHINESE 134 /*GB 2312-80 */ 00113 #define CHSET_BIG5 136 /*Big Five */ 00114 #define CHSET_CYRILLIC 204 /*Cyrillic */ 00115 #define CHSET_EEUROPE 238 /*Eastern Europe */ 00116 00117 /*pitch set definitions are identical to RTF*/ 00118 #define PITCH_DEF 0 /*default */ 00119 #define PITCH_FIXED 1 /*fixed pitch */ 00120 #define PITCH_VAR 2 /*variable pitch */ 00121 00122 /*Bitmasks for character enhancements. 00123 OR these together for enhancement in ocr_append_char*/ 00124 #define EUC_BOLD 1 /*bold character */ 00125 #define EUC_ITALIC 2 /*italic char */ 00126 #define EUC_UNDERLINE 4 /*underlined char */ 00127 #define EUC_SUBSCRIPT 8 /*subscript char */ 00128 #define EUC_SUPERSCRIPT 16 /*superscript char */ 00129 00130 /*enum for character rendering direction*/ 00131 enum OCR_CHAR_DIRECTION { 00132 OCR_CDIR_RIGHT_LEFT, /*right to left horizontal */ 00133 OCR_CDIR_LEFT_RIGHT, /*left to right horizontal */ 00134 OCR_CDIR_TOP_BOTTOM, /*top to bottom vertical */ 00135 OCR_CDIR_BOTTOM_TOP /*bottom to top vertical */ 00136 }; 00137 00138 /*enum for line rendering direction*/ 00139 enum OCR_LINE_DIRECTION { 00140 OCR_LDIR_DOWN_RIGHT, /*horizontal lines go down */ 00141 /*vertical lines go right */ 00142 OCR_LDIR_UP_LEFT /*horizontal lines go up */ 00143 }; 00144 00145 /*enum for newline type*/ 00146 enum OCR_NEWLINE_TYPE { 00147 OCR_NL_NONE, /*not a newline */ 00148 OCR_NL_NEWLINE, /*this is a newline but not new para */ 00149 OCR_NL_NEWPARA /*this is a newline and a new para */ 00150 }; 00151 00152 /*error codes that can be returned from the API functions other than OKAY 00153 and HPERR*/ 00154 #define OCR_API_NO_MEM (-2) /*filled output buffer */ 00155 #define OCR_API_BAD_CHAR (-3) /*whitespace sent to ocr_append_char */ 00156 #define OCR_API_BAD_STATE (-4) /*invalid call sequence */ 00157 00158 /*error codes used for passing errors back to the HP side*/ 00159 enum OCR_ERR_CODE { 00160 OCR_ERR_NONE, /*no error */ 00161 OCR_ERR_CLEAN_EXIT, /*no error */ 00162 OCR_ERR_NO_MEM, /*out of memory */ 00163 OCR_ERR_FILE_READ, /*failed to read data file */ 00164 OCR_ERR_TMP_WRITE, /*failed to write temp file */ 00165 OCR_ERR_TMP_READ, /*failed to read temp file */ 00166 OCR_ERR_BAD_DLL, /*missing or invalid dll subcomponent */ 00167 OCR_ERR_BAD_EXE, /*missing or invalid exe subcomponent */ 00168 OCR_ERR_BAD_LOAD, /*failed to load subcomponent */ 00169 OCR_ERR_BAD_LANG, /*unable to recognize requested language */ 00170 OCR_ERR_BAD_STATE, /*engine did call out of sequence */ 00171 OCR_ERR_INTERNAL1, /*internal error type 1 */ 00172 OCR_ERR_INTERNAL2, /*internal error type 1 */ 00173 OCR_ERR_INTERNAL3, /*internal error type 1 */ 00174 OCR_ERR_INTERNAL4, /*internal error type 1 */ 00175 OCR_ERR_INTERNAL5, /*internal error type 1 */ 00176 OCR_ERR_INTERNAL6, /*internal error type 1 */ 00177 OCR_ERR_INTERNAL7, /*internal error type 1 */ 00178 OCR_ERR_INTERNAL8, /*internal error type 1 */ 00179 OCR_ERR_TIMEOUT /*timed out in comms */ 00180 }; /*for calls to ocr_error */ 00181 00182 /********************************************************************** 00183 * EFONT_DESC 00184 * Description of one font. 00185 * The information required is basically that used by RTF. 00186 * The name may be either a valid font on the system or the empty string. 00187 **********************************************************************/ 00188 00189 typedef struct { /*font description */ 00190 uinT16 language; /*default language */ 00191 uinT8 font_family; /*serif/not, fixed/not */ 00192 uinT8 char_set; /*character set standard */ 00193 uinT8 pitch; /*fixed or prop */ 00194 inT8 name[MAX_FONT_NAME + 1]; /*plain ascii name */ 00195 } EFONT_DESC; /*font description */ 00196 00197 /********************************************************************** 00198 * EOCR_DESC 00199 * Description of the OCR engine provided at startup. 00200 * The name and version may be reported to the user at some point. 00201 * The fonts array should indicate the fonts that the OCR system 00202 * can recognize. 00203 **********************************************************************/ 00204 00205 typedef struct { /*startup info */ 00206 inT32 protocol; /*interface version */ 00207 uinT32 font_count; /*number of fonts */ 00208 uinT16 language; /*default language */ 00209 uinT16 name[MAX_OCR_NAME + 1]; /*name of engine */ 00210 /*version of engine */ 00211 uinT16 version[MAX_OCR_VERSION + 1]; 00212 EFONT_DESC fonts[1]; /*array of fonts */ 00213 } EOCR_DESC; /*startup info */ 00214 00215 /********************************************************************** 00216 * ESTRIP_DESC 00217 * Description of the image strip as it is passed to the engine. 00218 * The image is always 1 bit, with 1=black. 00219 * The width is always a multiple of 32, so padding is always OK. 00220 * The height of the full image is always a multiple of 32. 00221 * The top y coordinate is 0, and increases down. 00222 * The top leftmost pixel is in the most significant bit of the first byte. 00223 **********************************************************************/ 00224 00225 typedef struct { /*bitmap strip */ 00226 inT16 x_size; /*width in pixels */ 00227 inT16 y_size; /*of full image */ 00228 inT16 strip_size; /*of this strip */ 00229 inT16 resolution; /*pixels per inch */ 00230 uinT8 data[8]; /*image data */ 00231 } ESTRIP_DESC; /*bitmap strip */ 00232 00233 /********************************************************************** 00234 * EANYCODE_CHAR 00235 * Description of a single character. The character code is defined by 00236 * the character set of the current font. 00237 * Output text is sent as an array of these structures. 00238 * Spaces and line endings in the output are represented in the 00239 * structures of the surrounding characters. They are not directly 00240 * represented as characters. 00241 * The first character in a word has a positive value of blanks. 00242 * Missing information should be set to the defaults in the comments. 00243 * If word bounds are known, but not character bounds, then the top and 00244 * bottom of each character should be those of the word. The left of the 00245 * first and right of the last char in each word should be set. All other 00246 * lefts and rights should be set to -1. 00247 * If set, the values of right and bottom are left+width and top+height. 00248 * Most of the members come directly from the parameters to ocr_append_char. 00249 * The formatting member uses the enhancement parameter and combines the 00250 * line direction stuff into the top 3 bits. 00251 * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para, 00252 * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what 00253 * the coding is, only that it is backwards compatible with the previous 00254 * version. 00255 **********************************************************************/ 00256 00257 typedef struct { /*single character */ 00258 // It should be noted that the format for char_code for version 2.0 and beyond 00259 // is UTF8 which means that ASCII characters will come out as one structure but 00260 // other characters will be returned in two or more instances of this structure 00261 // with a single byte of the UTF8 code in each, but each will have the same 00262 // bounding box. Programs which want to handle languagues with different 00263 // characters sets will need to handle extended characters appropriately, but 00264 // *all* code needs to be prepared to receive UTF8 coded characters for 00265 // characters such as bullet and fancy quotes. 00266 uinT16 char_code; /*character itself */ 00267 inT16 left; /*of char (-1) */ 00268 inT16 right; /*of char (-1) */ 00269 inT16 top; /*of char (-1) */ 00270 inT16 bottom; /*of char (-1) */ 00271 inT16 font_index; /*what font (0) */ 00272 uinT8 confidence; /*0=perfect, 100=reject (0/100) */ 00273 uinT8 point_size; /*of char, 72=i inch, (10) */ 00274 inT8 blanks; /*no of spaces before this char (1) */ 00275 uinT8 formatting; /*char formatting (0) */ 00276 } EANYCODE_CHAR; /*single character */ 00277 00278 /********************************************************************** 00279 * ETEXT_DESC 00280 * Description of the output of the OCR engine. 00281 * This structure is used as both a progress monitor and the final 00282 * output header, since it needs to be a valid progress monitor while 00283 * the OCR engine is storing its output to shared memory. 00284 * During progress, all the buffer info is -1. 00285 * Progress starts at 0 and increases to 100 during OCR. No other constraint. 00286 * Every progress callback, the OCR engine must set ocr_alive to 1. 00287 * The HP side will set ocr_alive to 0. Repeated failure to reset 00288 * to 1 indicates that the OCR engine is dead. 00289 * If the cancel function is not null then it is called with the number of 00290 * user words found. If it returns true then operation is cancelled. 00291 **********************************************************************/ 00292 typedef bool (*CANCEL_FUNC)(void* cancel_this, int words); 00293 00294 class ETEXT_DESC { // output header 00295 public: 00296 inT16 count; // chars in this buffer(0) 00297 inT16 progress; // percent complete increasing (0-100) 00298 inT8 more_to_come; // true if not last 00299 volatile inT8 ocr_alive; // ocr sets to 1, HP 0 00300 inT8 err_code; // for errcode use 00301 CANCEL_FUNC cancel; // returns true to cancel 00302 void* cancel_this; // this or other data for cancel 00303 struct timeval end_time; // time to stop. expected to be set only by call 00304 // to set_deadline_msecs() 00305 EANYCODE_CHAR text[1]; // character data 00306 00307 ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0), 00308 err_code(0), cancel(NULL), cancel_this(NULL) { 00309 end_time.tv_sec = 0; 00310 end_time.tv_usec = 0; 00311 } 00312 00313 // Sets the end time to be deadline_msecs milliseconds from now. 00314 void set_deadline_msecs(inT32 deadline_msecs) { 00315 gettimeofday(&end_time, NULL); 00316 inT32 deadline_secs = deadline_msecs / 1000; 00317 end_time.tv_sec += deadline_secs; 00318 end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000; 00319 if (end_time.tv_usec > 1000000) { 00320 end_time.tv_usec -= 1000000; 00321 ++end_time.tv_sec; 00322 } 00323 } 00324 00325 // Returns false if we've not passed the end_time, or have not set a deadline. 00326 bool deadline_exceeded() const { 00327 if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false; 00328 struct timeval now; 00329 gettimeofday(&now, NULL); 00330 return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec && 00331 now.tv_usec > end_time.tv_usec)); 00332 } 00333 }; 00334 00335 #endif // CCUTIL_OCRCLASS_H_