Tesseract 3.01
/data/source/tesseract-ocr/training/commontraining.h
Go to the documentation of this file.
00001 // Copyright 2008 Google Inc. All Rights Reserved.
00002 // Author: scharron@google.com (Samuel Charron)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 
00014 #ifndef TESSERACT_TRAINING_COMMONTRAINING_H__
00015 #define TESSERACT_TRAINING_COMMONTRAINING_H__
00016 
00017 #include "oldlist.h"
00018 #include "cluster.h"
00019 #include "intproto.h"
00020 #include "featdefs.h"
00021 
00023 // Macros ////////////////////////////////////////////////////////////////////
00025 #define MAXNAMESIZE     80
00026 
00028 // Globals ///////////////////////////////////////////////////////////////////
00030 extern BOOL8 ShowAllSamples;
00031 
00032 // Must be defined in the file that "implements" commonTraining facilities.
00033 extern CLUSTERCONFIG Config;
00034 extern FLOAT32 RoundingAccuracy;
00035 
00036 extern char CTFontName[MAXNAMESIZE];
00037 // globals used for parsing command line arguments
00038 extern char *Directory;
00039 
00040 extern const char* test_ch;
00041 
00042 extern const char *InputUnicharsetFile;
00043 extern const char *OutputUnicharsetFile;
00044 
00045 extern const char *InputFontInfoFile;
00046 extern const char *InputXHeightsFile;
00047 
00049 // Structs ///////////////////////////////////////////////////////////////////
00051 typedef struct
00052 {
00053   char  *Label;
00054   int   SampleCount;
00055   int   font_sample_count;
00056   LIST  List;
00057 }
00058 LABELEDLISTNODE, *LABELEDLIST;
00059 
00060 typedef struct
00061 {
00062   char* Label;
00063   int   NumMerged[MAX_NUM_PROTOS];
00064   CLASS_TYPE Class;
00065 }MERGE_CLASS_NODE;
00066 typedef MERGE_CLASS_NODE* MERGE_CLASS;
00067 
00068 
00070 // Functions /////////////////////////////////////////////////////////////////
00072 void ParseArguments(
00073     int         argc,
00074     char        **argv);
00075 
00076 char *GetNextFilename(int Argc, char** argv);
00077 
00078 LABELEDLIST FindList(
00079     LIST        List,
00080     char        *Label);
00081 
00082 LABELEDLIST NewLabeledList(
00083     const char  *Label);
00084 
00085 void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
00086                          const char *feature_name, int max_samples,
00087                          float linear_spread, float circular_spread,
00088                          UNICHARSET* unicharset,
00089                          FILE* file, LIST* training_samples);
00090 
00091 void WriteTrainingSamples(
00092     const FEATURE_DEFS_STRUCT &FeatureDefs,
00093     char *Directory,
00094     LIST CharList,
00095     const char  *program_feature_type);
00096 
00097 void FreeTrainingSamples(
00098     LIST        CharList);
00099 
00100 void FreeLabeledList(
00101     LABELEDLIST LabeledList);
00102 
00103 void FreeLabeledClassList(
00104     LIST        ClassListList);
00105 
00106 CLUSTERER *SetUpForClustering(
00107     const FEATURE_DEFS_STRUCT &FeatureDefs,
00108     LABELEDLIST CharSample,
00109     const char  *program_feature_type);
00110 
00111 LIST RemoveInsignificantProtos(
00112     LIST        ProtoList,
00113     BOOL8       KeepSigProtos,
00114     BOOL8       KeepInsigProtos,
00115     int         N);
00116 
00117 void CleanUpUnusedData(
00118     LIST        ProtoList);
00119 
00120 void MergeInsignificantProtos(
00121     LIST        ProtoList,
00122     const char  *label,
00123     CLUSTERER   *Clusterer,
00124     CLUSTERCONFIG *Config);
00125 
00126 MERGE_CLASS FindClass(
00127     LIST        List,
00128     char        *Label);
00129 
00130 MERGE_CLASS NewLabeledClass(
00131     char        *Label);
00132 
00133 void FreeTrainingSamples(
00134     LIST        CharList);
00135 
00136 void SetUpForFloat2Int(const UNICHARSET& unicharset, LIST LabeledClassList);
00137 
00138 void Normalize(
00139     float       *Values);
00140 
00141 void FreeNormProtoList(
00142     LIST        CharList);
00143 
00144 void AddToNormProtosList(
00145     LIST*       NormProtoList,
00146     LIST        ProtoList,
00147     char        *CharName);
00148 
00149 int NumberOfProtos(
00150     LIST        ProtoList,
00151     BOOL8       CountSigProtos,
00152     BOOL8       CountInsigProtos);
00153 
00154 
00155 void allocNormProtos();
00156 #endif  // TESSERACT_TRAINING_COMMONTRAINING_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines