Tesseract 3.01
|
00001 // Copyright 2008 Google Inc. All Rights Reserved. 00002 // Author: scharron@google.com (Samuel Charron) 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // Unless required by applicable law or agreed to in writing, software 00009 // distributed under the License is distributed on an "AS IS" BASIS, 00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00011 // See the License for the specific language governing permissions and 00012 // limitations under the License. 00013 00014 #ifndef TESSERACT_TRAINING_COMMONTRAINING_H__ 00015 #define TESSERACT_TRAINING_COMMONTRAINING_H__ 00016 00017 #include "oldlist.h" 00018 #include "cluster.h" 00019 #include "intproto.h" 00020 #include "featdefs.h" 00021 00023 // Macros //////////////////////////////////////////////////////////////////// 00025 #define MAXNAMESIZE 80 00026 00028 // Globals /////////////////////////////////////////////////////////////////// 00030 extern BOOL8 ShowAllSamples; 00031 00032 // Must be defined in the file that "implements" commonTraining facilities. 00033 extern CLUSTERCONFIG Config; 00034 extern FLOAT32 RoundingAccuracy; 00035 00036 extern char CTFontName[MAXNAMESIZE]; 00037 // globals used for parsing command line arguments 00038 extern char *Directory; 00039 00040 extern const char* test_ch; 00041 00042 extern const char *InputUnicharsetFile; 00043 extern const char *OutputUnicharsetFile; 00044 00045 extern const char *InputFontInfoFile; 00046 extern const char *InputXHeightsFile; 00047 00049 // Structs /////////////////////////////////////////////////////////////////// 00051 typedef struct 00052 { 00053 char *Label; 00054 int SampleCount; 00055 int font_sample_count; 00056 LIST List; 00057 } 00058 LABELEDLISTNODE, *LABELEDLIST; 00059 00060 typedef struct 00061 { 00062 char* Label; 00063 int NumMerged[MAX_NUM_PROTOS]; 00064 CLASS_TYPE Class; 00065 }MERGE_CLASS_NODE; 00066 typedef MERGE_CLASS_NODE* MERGE_CLASS; 00067 00068 00070 // Functions ///////////////////////////////////////////////////////////////// 00072 void ParseArguments( 00073 int argc, 00074 char **argv); 00075 00076 char *GetNextFilename(int Argc, char** argv); 00077 00078 LABELEDLIST FindList( 00079 LIST List, 00080 char *Label); 00081 00082 LABELEDLIST NewLabeledList( 00083 const char *Label); 00084 00085 void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, 00086 const char *feature_name, int max_samples, 00087 float linear_spread, float circular_spread, 00088 UNICHARSET* unicharset, 00089 FILE* file, LIST* training_samples); 00090 00091 void WriteTrainingSamples( 00092 const FEATURE_DEFS_STRUCT &FeatureDefs, 00093 char *Directory, 00094 LIST CharList, 00095 const char *program_feature_type); 00096 00097 void FreeTrainingSamples( 00098 LIST CharList); 00099 00100 void FreeLabeledList( 00101 LABELEDLIST LabeledList); 00102 00103 void FreeLabeledClassList( 00104 LIST ClassListList); 00105 00106 CLUSTERER *SetUpForClustering( 00107 const FEATURE_DEFS_STRUCT &FeatureDefs, 00108 LABELEDLIST CharSample, 00109 const char *program_feature_type); 00110 00111 LIST RemoveInsignificantProtos( 00112 LIST ProtoList, 00113 BOOL8 KeepSigProtos, 00114 BOOL8 KeepInsigProtos, 00115 int N); 00116 00117 void CleanUpUnusedData( 00118 LIST ProtoList); 00119 00120 void MergeInsignificantProtos( 00121 LIST ProtoList, 00122 const char *label, 00123 CLUSTERER *Clusterer, 00124 CLUSTERCONFIG *Config); 00125 00126 MERGE_CLASS FindClass( 00127 LIST List, 00128 char *Label); 00129 00130 MERGE_CLASS NewLabeledClass( 00131 char *Label); 00132 00133 void FreeTrainingSamples( 00134 LIST CharList); 00135 00136 void SetUpForFloat2Int(const UNICHARSET& unicharset, LIST LabeledClassList); 00137 00138 void Normalize( 00139 float *Values); 00140 00141 void FreeNormProtoList( 00142 LIST CharList); 00143 00144 void AddToNormProtosList( 00145 LIST* NormProtoList, 00146 LIST ProtoList, 00147 char *CharName); 00148 00149 int NumberOfProtos( 00150 LIST ProtoList, 00151 BOOL8 CountSigProtos, 00152 BOOL8 CountInsigProtos); 00153 00154 00155 void allocNormProtos(); 00156 #endif // TESSERACT_TRAINING_COMMONTRAINING_H__