00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef CHARSAMPLE_H
00022 #define CHARSAMPLE_H
00023
00024 #include "elst.h"
00025 #include "pageres.h"
00026 #include "memry.h"
00027 #include "notdll.h"
00028
00029 #define BAD_SCORE MAX_INT32
00030 #define FIRST_CHAR '!'
00031 #define LAST_CHAR '~'
00032
00033 namespace tesseract {
00034 class Tesseract;
00035 }
00036
00037 enum ClusterType
00038 { UNKNOWN, BLOB_CLUSTER, IMAGE_CLUSTER };
00039
00040 class CHAR_SAMPLE;
00041
00042 ELISTIZEH (CHAR_SAMPLE)
00043 class CHAR_SAMPLES;
00044
00045 ELISTIZEH (CHAR_SAMPLES)
00046 class CHAR_PROTO;
00047
00048 class CHAR_SAMPLE:public ELIST_LINK
00049 {
00050 public:
00051 CHAR_SAMPLE();
00052
00053 CHAR_SAMPLE(
00054 PBLOB *blob,
00055 DENORM *denorm,
00056 char c
00057 );
00058
00059 CHAR_SAMPLE(
00060 IMAGE *image,
00061 char c
00062 );
00063
00064 ~CHAR_SAMPLE () {
00065
00066 if (sample_image != NULL)
00067 delete sample_image;
00068 }
00069
00070 float match_sample(CHAR_SAMPLE *test_sample, BOOL8 updating,
00071 tesseract::Tesseract* tess);
00072
00073 inT32 n_matches() {
00074 return n_samples_matched;
00075 }
00076
00077 IMAGE *image() {
00078 return sample_image;
00079 }
00080
00081 PBLOB *blob() {
00082 return sample_blob;
00083 }
00084
00085 DENORM *denorm() {
00086 return sample_denorm;
00087 }
00088
00089 double mean_score();
00090
00091 double variance();
00092
00093 char character() {
00094 return ch;
00095 }
00096
00097 void print(FILE *f);
00098
00099 void reset_match_statistics();
00100
00101 NEWDELETE2 (CHAR_SAMPLE) private:
00102 IMAGE * sample_image;
00103 PBLOB *sample_blob;
00104 DENORM *sample_denorm;
00105 inT32 n_samples_matched;
00106 double total_match_scores;
00107 double sumsq_match_scores;
00108 char ch;
00109 };
00110
00111 class CHAR_SAMPLES:public ELIST_LINK
00112 {
00113 public:
00114 CHAR_SAMPLES();
00115
00116 CHAR_SAMPLES(CHAR_SAMPLE *sample);
00117
00118 ~CHAR_SAMPLES () {
00119 }
00120
00121 inT32 n_samples() {
00122 return samples.length ();
00123 }
00124
00125 void add_sample(CHAR_SAMPLE *sample, tesseract::Tesseract*);
00126
00127 void build_prototype();
00128
00129 void rebuild_prototype(inT32 new_xsize, inT32 new_ysize);
00130
00131 void add_sample_to_prototype(CHAR_SAMPLE *sample);
00132
00133 CHAR_PROTO *prototype() {
00134 return proto;
00135 }
00136
00137 void find_best_sample();
00138
00139 float match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess);
00140
00141 float nn_match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess);
00142
00143 char character() {
00144 return ch;
00145 }
00146
00147 void assign_to_char();
00148
00149 void print(FILE *f);
00150
00151 NEWDELETE2 (CHAR_SAMPLES) private:
00152 ClusterType type;
00153 char ch;
00154 CHAR_PROTO *proto;
00155 CHAR_SAMPLE *best_sample;
00156 CHAR_SAMPLE_LIST samples;
00157 };
00158
00159 class CHAR_PROTO
00160 {
00161 public:
00162 CHAR_PROTO();
00163
00164 CHAR_PROTO(inT32 x_size,
00165 inT32 y_size,
00166 inT32 n_samples,
00167 float initial_value,
00168 char c);
00169
00170 CHAR_PROTO(
00171 CHAR_SAMPLE *sample);
00172
00173 ~CHAR_PROTO ();
00174
00175 float match_sample(CHAR_SAMPLE *test_sample);
00176
00177 float match(CHAR_PROTO *test_proto);
00178
00179 inT32 n_samples() {
00180 return nsamples;
00181 }
00182
00183 inT32 x_size() {
00184 return xsize;
00185 }
00186
00187 inT32 y_size() {
00188 return ysize;
00189 }
00190
00191 float **data() {
00192 return proto;
00193 }
00194 char character() {
00195 return ch;
00196 }
00197
00198 void enlarge_prototype(inT32 new_xsize, inT32 new_ysize);
00199
00200 void add_sample(CHAR_SAMPLE *sample);
00201
00202 IMAGE *make_image();
00203
00204 void print(FILE *f);
00205
00206 NEWDELETE2 (CHAR_PROTO) private:
00207 inT32 xsize;
00208 inT32 ysize;
00209 float *proto_data;
00210 float **proto;
00211 inT32 nsamples;
00212 char ch;
00213 };
00214 #endif