Tesseract 3.01
/data/source/tesseract-ocr/ccmain/osdetect.h
Go to the documentation of this file.
00001 
00002 // File:        osdetect.h
00003 // Description: Orientation and script detection.
00004 // Author:      Samuel Charron
00005 //              Ranjith Unnikrishnan
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CCMAIN_OSDETECT_H__
00021 #define TESSERACT_CCMAIN_OSDETECT_H__
00022 
00023 #include "strngs.h"
00024 #include "unicharset.h"
00025 
00026 class TO_BLOCK_LIST;
00027 class BLOBNBOX;
00028 class BLOB_CHOICE_LIST;
00029 class BLOBNBOX_CLIST;
00030 
00031 namespace tesseract {
00032 class Tesseract;
00033 }
00034 
00035 // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
00036 const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
00037 
00038 struct OSBestResult {
00039   OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0),
00040                    oconfidence(0.0) {}
00041   int orientation_id;
00042   int script_id;
00043   float sconfidence;
00044   float oconfidence;
00045 };
00046 
00047 struct OSResults {
00048   OSResults() {
00049     for (int i = 0; i < 4; ++i) {
00050       for (int j = 0; j < kMaxNumberOfScripts; ++j)
00051         scripts_na[i][j] = 0;
00052       orientations[i] = 0;
00053     }
00054   }
00055   void update_best_orientation();
00056   void set_best_orientation(int orientation_id);
00057   void update_best_script(int orientation_id);
00058 
00059   // Array holding scores for each orientation id [0,3].
00060   // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
00061   // page respectively, where the values refer to the amount of clockwise
00062   // rotation to be applied to the page for the text to be upright and readable.
00063   float orientations[4];
00064   // Script confidence scores for each of 4 possible orientations.
00065   float scripts_na[4][kMaxNumberOfScripts];
00066 
00067   UNICHARSET* unicharset;
00068   OSBestResult best_result;
00069 };
00070 
00071 class OrientationDetector {
00072  public:
00073   OrientationDetector(OSResults*);
00074   bool detect_blob(BLOB_CHOICE_LIST* scores);
00075   int get_orientation();
00076  private:
00077   OSResults* osr_;
00078 };
00079 
00080 class ScriptDetector {
00081  public:
00082   ScriptDetector(OSResults*, tesseract::Tesseract* tess);
00083   void detect_blob(BLOB_CHOICE_LIST* scores);
00084   void get_script() ;
00085   bool must_stop(int orientation);
00086  private:
00087   OSResults* osr_;
00088   static const char* korean_script_;
00089   static const char* japanese_script_;
00090   static const char* fraktur_script_;
00091   int korean_id_;
00092   int japanese_id_;
00093   int katakana_id_;
00094   int hiragana_id_;
00095   int han_id_;
00096   int hangul_id_;
00097   int latin_id_;
00098   int fraktur_id_;
00099   tesseract::Tesseract* tess_;
00100 };
00101 
00102 int orientation_and_script_detection(STRING& filename,
00103                                      OSResults*,
00104                                      tesseract::Tesseract*);
00105 
00106 int os_detect(TO_BLOCK_LIST* port_blocks,
00107               OSResults* osr,
00108               tesseract::Tesseract* tess);
00109 
00110 int os_detect_blobs(BLOBNBOX_CLIST* blob_list,
00111                     OSResults* osr,
00112                     tesseract::Tesseract* tess);
00113 
00114 bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
00115                     ScriptDetector* s, OSResults*,
00116                     tesseract::Tesseract* tess);
00117 
00118 // Helper method to convert an orientation index to its value in degrees.
00119 // The value represents the amount of clockwise rotation in degrees that must be
00120 // applied for the text to be upright (readable).
00121 const int OrientationIdToValue(const int& id);
00122 
00123 #endif  // TESSERACT_CCMAIN_OSDETECT_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines