Tesseract 3.01
|
00001 00002 // File: publictypes.h 00003 // Description: Types used in both the API and internally 00004 // Author: Ray Smith 00005 // Created: Wed Mar 03 09:22:53 PST 2010 00006 // 00007 // (C) Copyright 2010, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H__ 00021 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H__ 00022 00023 // This file contains types that are used both by the API and internally 00024 // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic 00025 // dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT. 00026 // Restated: It is OK for low-level Tesseract files to include publictypes.h, 00027 // but not for the low-level tesseract code to include top-level API code. 00028 // This file should not use other Tesseract types, as that would drag 00029 // their includes into the API-level. 00030 // API-level code should include apitypes.h in preference to this file. 00031 00032 // Number of printers' points in an inch. The unit of the pointsize return. 00033 const int kPointsPerInch = 72; 00034 00035 // Possible types for a POLY_BLOCK or ColPartition. 00036 // Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions 00037 // below, as well as kPolyBlockNames in publictypes.cpp. 00038 // Used extensively by ColPartition, and POLY_BLOCK. 00039 enum PolyBlockType { 00040 PT_UNKNOWN, // Type is not yet known. Keep as the first element. 00041 PT_FLOWING_TEXT, // Text that lives inside a column. 00042 PT_HEADING_TEXT, // Text that spans more than one column. 00043 PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. 00044 PT_TABLE, // Partition belonging to a table region. 00045 PT_VERTICAL_TEXT, // Text-line runs vertically. 00046 PT_CAPTION_TEXT, // Text that belongs to an image. 00047 PT_FLOWING_IMAGE, // Image that lives inside a column. 00048 PT_HEADING_IMAGE, // Image that spans more than one column. 00049 PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. 00050 PT_HORZ_LINE, // Horizontal Line. 00051 PT_VERT_LINE, // Vertical Line. 00052 PT_NOISE, // Lies outside of any column. 00053 PT_COUNT 00054 }; 00055 00056 // Returns true if PolyBlockType is of horizontal line type 00057 inline bool PTIsLineType(PolyBlockType type) { 00058 return type == PT_HORZ_LINE || type == PT_VERT_LINE; 00059 } 00060 // Returns true if PolyBlockType is of image type 00061 inline bool PTIsImageType(PolyBlockType type) { 00062 return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE || 00063 type == PT_PULLOUT_IMAGE; 00064 } 00065 // Returns true if PolyBlockType is of text type 00066 inline bool PTIsTextType(PolyBlockType type) { 00067 return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT || 00068 type == PT_PULLOUT_TEXT || type == PT_TABLE || 00069 type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT; 00070 } 00071 00072 // String name for each block type. Keep in sync with PolyBlockType. 00073 extern const char* kPolyBlockNames[]; 00074 00075 namespace tesseract { 00076 // +------------------+ Orientation Example: 00077 // | 1 Aaaa Aaaa Aaaa | ==================== 00078 // | Aaa aa aaa aa | To left is a diagram of some (1) English and 00079 // | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. 00080 // | 2 | 00081 // | ####### c c C | Upright Latin characters are represented as A and a. 00082 // | ####### c c c | '<' represents a latin character rotated 00083 // | < ####### c c c | anti-clockwise 90 degrees. 00084 // | < ####### c c | 00085 // | < ####### . c | Upright Chinese characters are represented C and c. 00086 // | 3 ####### c | 00087 // +------------------+ NOTA BENE: enum values here should match goodoc.proto 00088 00089 // If you orient your head so that "up" aligns with Orientation, 00090 // then the characters will appear "right side up" and readable. 00091 // 00092 // In the example above, both the English and Chinese paragraphs are oriented 00093 // so their "up" is the top of the page (page up). The photo credit is read 00094 // with one's head turned leftward ("up" is to page left). 00095 // 00096 // The values of this enum match the convention of Tesseract's osdetect.h 00097 enum Orientation { 00098 ORIENTATION_PAGE_UP = 0, 00099 ORIENTATION_PAGE_RIGHT = 1, 00100 ORIENTATION_PAGE_DOWN = 2, 00101 ORIENTATION_PAGE_LEFT = 3, 00102 }; 00103 00104 // The grapheme clusters within a line of text are laid out logically 00105 // in this direction, judged when looking at the text line rotated so that 00106 // its Orientation is "page up". 00107 // 00108 // For English text, the writing direction is left-to-right. For the 00109 // Chinese text in the above example, the writing direction is top-to-bottom. 00110 enum WritingDirection { 00111 WRITING_DIRECTION_LEFT_TO_RIGHT = 0, 00112 WRITING_DIRECTION_RIGHT_TO_LEFT = 1, 00113 WRITING_DIRECTION_TOP_TO_BOTTOM = 2, 00114 }; 00115 00116 // The text lines are read in the given sequence. 00117 // 00118 // In English, the order is top-to-bottom. 00119 // In Chinese, vertical text lines are read right-to-left. Mongolian is 00120 // written in vertical columns top to bottom like Chinese, but the lines 00121 // order left-to right. 00122 // 00123 // Note that only some combinations make sense. For example, 00124 // WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM 00125 enum TextlineOrder { 00126 TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, 00127 TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, 00128 TEXTLINE_ORDER_TOP_TO_BOTTOM = 2, 00129 }; 00130 00131 // Possible modes for page layout analysis. These *must* be kept in order 00132 // of decreasing amount of layout analysis to be done, except for OSD_ONLY, 00133 // so that the inequality test macros below work. 00134 enum PageSegMode { 00135 PSM_OSD_ONLY, 00136 PSM_AUTO_OSD, 00137 00138 PSM_AUTO_ONLY, 00139 PSM_AUTO, 00140 PSM_SINGLE_COLUMN, 00141 PSM_SINGLE_BLOCK_VERT_TEXT, 00142 00143 PSM_SINGLE_BLOCK, 00144 PSM_SINGLE_LINE, 00145 PSM_SINGLE_WORD, 00146 PSM_CIRCLE_WORD, 00147 PSM_SINGLE_CHAR, 00148 00149 PSM_COUNT 00150 }; 00151 00152 // Macros that act on a PageSegMode to determine whether components of 00153 // layout analysis are enabled. 00154 // *Depend critically on the order of elements of PageSegMode.* 00155 #define PSM_OSD_ENABLED(pageseg_mode) ((pageseg_mode) <= PSM_AUTO_OSD) 00156 #define PSM_COL_FIND_ENABLED(pageseg_mode) \ 00157 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_AUTO) 00158 #define PSM_BLOCK_FIND_ENABLED(pageseg_mode) \ 00159 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_COLUMN) 00160 #define PSM_LINE_FIND_ENABLED(pageseg_mode) \ 00161 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_BLOCK) 00162 #define PSM_WORD_FIND_ENABLED(pageseg_mode) \ 00163 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_LINE) 00164 00165 // enum of the elements of the page hierarchy, used in ResultIterator 00166 // to provide functions that operate on each level without having to 00167 // have 5x as many functions. 00168 // NOTE: At present RIL_PARA and RIL_BLOCK are equivalent as there is 00169 // no paragraph internally yet. 00170 // TODO(rays) Add paragraph detection. 00171 enum PageIteratorLevel { 00172 RIL_BLOCK, // Block of text/image/separator line. 00173 RIL_PARA, // Paragraph within a block. 00174 RIL_TEXTLINE, // Line within a paragraph. 00175 RIL_WORD, // Word within a textline. 00176 RIL_SYMBOL // Symbol/character within a word. 00177 }; 00178 00179 // When Tesseract/Cube is initialized we can choose to instantiate/load/run 00180 // only the Tesseract part, only the Cube part or both along with the combiner. 00181 // The preference of which engine to use is stored in tessedit_ocr_engine_mode. 00182 // 00183 // ATTENTION: When modifying this enum, please make sure to make the 00184 // appropriate changes to all the enums mirroring it (e.g. OCREngine in 00185 // cityblock/workflow/detection/detection_storage.proto). Such enums will 00186 // mention the connection to OcrEngineMode in the comments. 00187 enum OcrEngineMode { 00188 OEM_TESSERACT_ONLY, // Run Tesseract only - fastest 00189 OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower 00190 OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy 00191 OEM_DEFAULT // Specify this mode when calling init_*(), 00192 // to indicate that any of the above modes 00193 // should be automatically inferred from the 00194 // variables in the language-specific config, 00195 // command-line configs, or if not specified 00196 // in any of the above should be set to the 00197 // default OEM_TESSERACT_ONLY. 00198 }; 00199 00200 } // namespace tesseract. 00201 00202 #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H__