Tesseract 3.01
|
00001 00002 // File: osdetect.h 00003 // Description: Orientation and script detection. 00004 // Author: Samuel Charron 00005 // Ranjith Unnikrishnan 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCMAIN_OSDETECT_H__ 00021 #define TESSERACT_CCMAIN_OSDETECT_H__ 00022 00023 #include "strngs.h" 00024 #include "unicharset.h" 00025 00026 class TO_BLOCK_LIST; 00027 class BLOBNBOX; 00028 class BLOB_CHOICE_LIST; 00029 class BLOBNBOX_CLIST; 00030 00031 namespace tesseract { 00032 class Tesseract; 00033 } 00034 00035 // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur 00036 const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; 00037 00038 struct OSBestResult { 00039 OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), 00040 oconfidence(0.0) {} 00041 int orientation_id; 00042 int script_id; 00043 float sconfidence; 00044 float oconfidence; 00045 }; 00046 00047 struct OSResults { 00048 OSResults() { 00049 for (int i = 0; i < 4; ++i) { 00050 for (int j = 0; j < kMaxNumberOfScripts; ++j) 00051 scripts_na[i][j] = 0; 00052 orientations[i] = 0; 00053 } 00054 } 00055 void update_best_orientation(); 00056 void set_best_orientation(int orientation_id); 00057 void update_best_script(int orientation_id); 00058 00059 // Array holding scores for each orientation id [0,3]. 00060 // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the 00061 // page respectively, where the values refer to the amount of clockwise 00062 // rotation to be applied to the page for the text to be upright and readable. 00063 float orientations[4]; 00064 // Script confidence scores for each of 4 possible orientations. 00065 float scripts_na[4][kMaxNumberOfScripts]; 00066 00067 UNICHARSET* unicharset; 00068 OSBestResult best_result; 00069 }; 00070 00071 class OrientationDetector { 00072 public: 00073 OrientationDetector(OSResults*); 00074 bool detect_blob(BLOB_CHOICE_LIST* scores); 00075 int get_orientation(); 00076 private: 00077 OSResults* osr_; 00078 }; 00079 00080 class ScriptDetector { 00081 public: 00082 ScriptDetector(OSResults*, tesseract::Tesseract* tess); 00083 void detect_blob(BLOB_CHOICE_LIST* scores); 00084 void get_script() ; 00085 bool must_stop(int orientation); 00086 private: 00087 OSResults* osr_; 00088 static const char* korean_script_; 00089 static const char* japanese_script_; 00090 static const char* fraktur_script_; 00091 int korean_id_; 00092 int japanese_id_; 00093 int katakana_id_; 00094 int hiragana_id_; 00095 int han_id_; 00096 int hangul_id_; 00097 int latin_id_; 00098 int fraktur_id_; 00099 tesseract::Tesseract* tess_; 00100 }; 00101 00102 int orientation_and_script_detection(STRING& filename, 00103 OSResults*, 00104 tesseract::Tesseract*); 00105 00106 int os_detect(TO_BLOCK_LIST* port_blocks, 00107 OSResults* osr, 00108 tesseract::Tesseract* tess); 00109 00110 int os_detect_blobs(BLOBNBOX_CLIST* blob_list, 00111 OSResults* osr, 00112 tesseract::Tesseract* tess); 00113 00114 bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, 00115 ScriptDetector* s, OSResults*, 00116 tesseract::Tesseract* tess); 00117 00118 // Helper method to convert an orientation index to its value in degrees. 00119 // The value represents the amount of clockwise rotation in degrees that must be 00120 // applied for the text to be upright (readable). 00121 const int OrientationIdToValue(const int& id); 00122 00123 #endif // TESSERACT_CCMAIN_OSDETECT_H__