Tesseract 3.01
/data/source/tesseract-ocr/ccstruct/publictypes.h
Go to the documentation of this file.
00001 
00002 // File:        publictypes.h
00003 // Description: Types used in both the API and internally
00004 // Author:      Ray Smith
00005 // Created:     Wed Mar 03 09:22:53 PST 2010
00006 //
00007 // (C) Copyright 2010, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H__
00021 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H__
00022 
00023 // This file contains types that are used both by the API and internally
00024 // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
00025 // dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
00026 // Restated: It is OK for low-level Tesseract files to include publictypes.h,
00027 // but not for the low-level tesseract code to include top-level API code.
00028 // This file should not use other Tesseract types, as that would drag
00029 // their includes into the API-level.
00030 // API-level code should include apitypes.h in preference to this file.
00031 
00032 // Number of printers' points in an inch. The unit of the pointsize return.
00033 const int kPointsPerInch = 72;
00034 
00035 // Possible types for a POLY_BLOCK or ColPartition.
00036 // Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
00037 // below, as well as kPolyBlockNames in publictypes.cpp.
00038 // Used extensively by ColPartition, and POLY_BLOCK.
00039 enum PolyBlockType {
00040   PT_UNKNOWN,        // Type is not yet known. Keep as the first element.
00041   PT_FLOWING_TEXT,   // Text that lives inside a column.
00042   PT_HEADING_TEXT,   // Text that spans more than one column.
00043   PT_PULLOUT_TEXT,   // Text that is in a cross-column pull-out region.
00044   PT_TABLE,          // Partition belonging to a table region.
00045   PT_VERTICAL_TEXT,  // Text-line runs vertically.
00046   PT_CAPTION_TEXT,   // Text that belongs to an image.
00047   PT_FLOWING_IMAGE,  // Image that lives inside a column.
00048   PT_HEADING_IMAGE,  // Image that spans more than one column.
00049   PT_PULLOUT_IMAGE,  // Image that is in a cross-column pull-out region.
00050   PT_HORZ_LINE,      // Horizontal Line.
00051   PT_VERT_LINE,      // Vertical Line.
00052   PT_NOISE,          // Lies outside of any column.
00053   PT_COUNT
00054 };
00055 
00056 // Returns true if PolyBlockType is of horizontal line type
00057 inline bool PTIsLineType(PolyBlockType type) {
00058   return type == PT_HORZ_LINE || type == PT_VERT_LINE;
00059 }
00060 // Returns true if PolyBlockType is of image type
00061 inline bool PTIsImageType(PolyBlockType type) {
00062   return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
00063          type == PT_PULLOUT_IMAGE;
00064 }
00065 // Returns true if PolyBlockType is of text type
00066 inline bool PTIsTextType(PolyBlockType type) {
00067   return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
00068          type == PT_PULLOUT_TEXT || type == PT_TABLE ||
00069          type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT;
00070 }
00071 
00072 // String name for each block type. Keep in sync with PolyBlockType.
00073 extern const char* kPolyBlockNames[];
00074 
00075 namespace tesseract {
00076 //  +------------------+  Orientation Example:
00077 //  | 1 Aaaa Aaaa Aaaa |  ====================
00078 //  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
00079 //  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
00080 //  |                2 |
00081 //  |   #######  c c C |  Upright Latin characters are represented as A and a.
00082 //  |   #######  c c c |  '<' represents a latin character rotated
00083 //  | < #######  c c c |      anti-clockwise 90 degrees.
00084 //  | < #######  c   c |
00085 //  | < #######  .   c |  Upright Chinese characters are represented C and c.
00086 //  | 3 #######      c |
00087 //  +------------------+  NOTA BENE: enum values here should match goodoc.proto
00088 
00089 // If you orient your head so that "up" aligns with Orientation,
00090 // then the characters will appear "right side up" and readable.
00091 //
00092 // In the example above, both the English and Chinese paragraphs are oriented
00093 // so their "up" is the top of the page (page up).  The photo credit is read
00094 // with one's head turned leftward ("up" is to page left).
00095 //
00096 // The values of this enum match the convention of Tesseract's osdetect.h
00097 enum Orientation {
00098   ORIENTATION_PAGE_UP = 0,
00099   ORIENTATION_PAGE_RIGHT = 1,
00100   ORIENTATION_PAGE_DOWN = 2,
00101   ORIENTATION_PAGE_LEFT = 3,
00102 };
00103 
00104 // The grapheme clusters within a line of text are laid out logically
00105 // in this direction, judged when looking at the text line rotated so that
00106 // its Orientation is "page up".
00107 //
00108 // For English text, the writing direction is left-to-right.  For the
00109 // Chinese text in the above example, the writing direction is top-to-bottom.
00110 enum WritingDirection {
00111   WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
00112   WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
00113   WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
00114 };
00115 
00116 // The text lines are read in the given sequence.
00117 //
00118 // In English, the order is top-to-bottom.
00119 // In Chinese, vertical text lines are read right-to-left.  Mongolian is
00120 // written in vertical columns top to bottom like Chinese, but the lines
00121 // order left-to right.
00122 //
00123 // Note that only some combinations make sense.  For example,
00124 // WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
00125 enum TextlineOrder {
00126   TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
00127   TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
00128   TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
00129 };
00130 
00131 // Possible modes for page layout analysis. These *must* be kept in order
00132 // of decreasing amount of layout analysis to be done, except for OSD_ONLY,
00133 // so that the inequality test macros below work.
00134 enum PageSegMode {
00135   PSM_OSD_ONLY,       
00136   PSM_AUTO_OSD,       
00137 
00138   PSM_AUTO_ONLY,      
00139   PSM_AUTO,           
00140   PSM_SINGLE_COLUMN,  
00141   PSM_SINGLE_BLOCK_VERT_TEXT,  
00142 
00143   PSM_SINGLE_BLOCK,   
00144   PSM_SINGLE_LINE,    
00145   PSM_SINGLE_WORD,    
00146   PSM_CIRCLE_WORD,    
00147   PSM_SINGLE_CHAR,    
00148 
00149   PSM_COUNT           
00150 };
00151 
00152 // Macros that act on a PageSegMode to determine whether components of
00153 // layout analysis are enabled.
00154 // *Depend critically on the order of elements of PageSegMode.*
00155 #define PSM_OSD_ENABLED(pageseg_mode) ((pageseg_mode) <= PSM_AUTO_OSD)
00156 #define PSM_COL_FIND_ENABLED(pageseg_mode) \
00157   ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_AUTO)
00158 #define PSM_BLOCK_FIND_ENABLED(pageseg_mode) \
00159   ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_COLUMN)
00160 #define PSM_LINE_FIND_ENABLED(pageseg_mode) \
00161   ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_BLOCK)
00162 #define PSM_WORD_FIND_ENABLED(pageseg_mode) \
00163   ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_LINE)
00164 
00165 // enum of the elements of the page hierarchy, used in ResultIterator
00166 // to provide functions that operate on each level without having to
00167 // have 5x as many functions.
00168 // NOTE: At present RIL_PARA and RIL_BLOCK are equivalent as there is
00169 // no paragraph internally yet.
00170 // TODO(rays) Add paragraph detection.
00171 enum PageIteratorLevel {
00172   RIL_BLOCK,     // Block of text/image/separator line.
00173   RIL_PARA,      // Paragraph within a block.
00174   RIL_TEXTLINE,  // Line within a paragraph.
00175   RIL_WORD,      // Word within a textline.
00176   RIL_SYMBOL     // Symbol/character within a word.
00177 };
00178 
00179 // When Tesseract/Cube is initialized we can choose to instantiate/load/run
00180 // only the Tesseract part, only the Cube part or both along with the combiner.
00181 // The preference of which engine to use is stored in tessedit_ocr_engine_mode.
00182 //
00183 // ATTENTION: When modifying this enum, please make sure to make the
00184 // appropriate changes to all the enums mirroring it (e.g. OCREngine in
00185 // cityblock/workflow/detection/detection_storage.proto). Such enums will
00186 // mention the connection to OcrEngineMode in the comments.
00187 enum OcrEngineMode {
00188   OEM_TESSERACT_ONLY,           // Run Tesseract only - fastest
00189   OEM_CUBE_ONLY,                // Run Cube only - better accuracy, but slower
00190   OEM_TESSERACT_CUBE_COMBINED,  // Run both and combine results - best accuracy
00191   OEM_DEFAULT                   // Specify this mode when calling init_*(),
00192                                 // to indicate that any of the above modes
00193                                 // should be automatically inferred from the
00194                                 // variables in the language-specific config,
00195                                 // command-line configs, or if not specified
00196                                 // in any of the above should be set to the
00197                                 // default OEM_TESSERACT_ONLY.
00198 };
00199 
00200 }  // namespace tesseract.
00201 
00202 #endif  // TESSERACT_CCSTRUCT_PUBLICTYPES_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines