00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00018
00019 #ifndef TESSERACT_WORDREC_WORDREC_H__
00020 #define TESSERACT_WORDREC_WORDREC_H__
00021
00022 #include "classify.h"
00023 #include "ratngs.h"
00024 #include "matrix.h"
00025 #include "seam.h"
00026 #include "callback.h"
00027 #include "associate.h"
00028 #include "badwords.h"
00029
00030 struct CHUNKS_RECORD;
00031 struct SEARCH_RECORD;
00032
00033 namespace tesseract {
00034 class Wordrec : public Classify {
00035 public:
00036 Wordrec();
00037 ~Wordrec();
00038 void save_summary(inT32 elapsed_time);
00039
00040 void program_editup(const char *textbase, bool init_permute);
00041 BLOB_CHOICE_LIST_VECTOR *cc_recog(TWERD *tessword,
00042 WERD_CHOICE *best_choice,
00043 WERD_CHOICE *best_raw_choice,
00044 BOOL8 tester,
00045 BOOL8 trainer,
00046 bool last_word_on_line);
00047 void program_editdown(inT32 elasped_time);
00048 void set_pass1();
00049 void set_pass2();
00050 int end_recog();
00051 int start_recog(const char *textbase);
00052 BLOB_CHOICE_LIST *call_matcher(
00053 TBLOB *ptblob,
00054 TBLOB *tessblob,
00055 TBLOB *ntblob,
00056 void *,
00057 TEXTROW *
00058 );
00059
00060 void program_init();
00061
00062 BLOB_CHOICE_LIST *classify_blob(TBLOB *pblob,
00063 TBLOB *blob,
00064 TBLOB *nblob,
00065 TEXTROW *row,
00066 const char *string,
00067 C_COL color);
00068 void update_blob_classifications(TWERD *word,
00069 const BLOB_CHOICE_LIST_VECTOR &choices);
00070
00071 BLOB_CHOICE_LIST_VECTOR *evaluate_chunks(CHUNKS_RECORD *chunks_record,
00072 SEARCH_STATE search_state);
00073 void update_ratings(const BLOB_CHOICE_LIST_VECTOR &new_choices,
00074 const CHUNKS_RECORD *chunks_record,
00075 const SEARCH_STATE search_state);
00076 inT16 evaluate_state(CHUNKS_RECORD *chunks_record,
00077 SEARCH_RECORD *the_search,
00078 DANGERR *fixpt);
00079 void best_first_search(CHUNKS_RECORD *chunks_record,
00080 WERD_CHOICE *best_choice,
00081 WERD_CHOICE *raw_choice,
00082 STATE *state,
00083 DANGERR *fixpt,
00084 STATE *best_state);
00085 void expand_node(FLOAT32 worst_priority,
00086 CHUNKS_RECORD *chunks_record,
00087 SEARCH_RECORD *the_search);
00088 BLOB_CHOICE_LIST_VECTOR *rebuild_current_state(
00089 TBLOB *blobs,
00090 SEAMS seam_list,
00091 STATE *state,
00092 BLOB_CHOICE_LIST_VECTOR *char_choices,
00093 int fx,
00094 bool force_rebuild,
00095 const WERD_CHOICE &best_choice,
00096 const MATRIX *ratings);
00097 BLOB_CHOICE_LIST *join_blobs_and_classify(
00098 TBLOB *blobs, SEAMS seam_list,
00099 int x, int y, int fx, const MATRIX *ratings,
00100 BLOB_CHOICE_LIST_VECTOR *old_choices);
00101
00102
00103 bool improve_one_blob(TWERD *word,
00104 BLOB_CHOICE_LIST_VECTOR *char_choices,
00105 int fx,
00106 inT32 *blob_number,
00107 SEAMS *seam_list,
00108 DANGERR *fixpt,
00109 bool split_next_to_fragment);
00110 void modify_blob_choice(BLOB_CHOICE_LIST *answer,
00111 int chop_index);
00112 bool chop_one_blob(TWERD *word,
00113 BLOB_CHOICE_LIST_VECTOR *char_choices,
00114 inT32 *blob_number,
00115 SEAMS *seam_list,
00116 int *right_chop_index);
00117 BLOB_CHOICE_LIST_VECTOR *chop_word_main(register TWERD *word,
00118 int fx,
00119 WERD_CHOICE *best_choice,
00120 WERD_CHOICE *raw_choice,
00121 BOOL8 tester,
00122 BOOL8 trainer);
00123 void improve_by_chopping(register TWERD *word,
00124 BLOB_CHOICE_LIST_VECTOR *char_choices,
00125 int fx,
00126 STATE *best_state,
00127 WERD_CHOICE *best_choice,
00128 WERD_CHOICE *raw_choice,
00129 SEAMS *seam_list,
00130 DANGERR *fixpt,
00131 STATE *chop_states,
00132 inT32 *state_count);
00133 MATRIX *word_associator(TBLOB *blobs,
00134 SEAMS seams,
00135 STATE *state,
00136 int fxid,
00137 WERD_CHOICE *best_choice,
00138 WERD_CHOICE *raw_choice,
00139 char *correct,
00140 DANGERR *fixpt,
00141 STATE *best_state);
00142 inT16 select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices,
00143 float rating_ceiling,
00144 bool split_next_to_fragment);
00145
00146 void mfeature_init();
00147
00148 BLOB_CHOICE_LIST *classify_piece(TBLOB *pieces,
00149 SEAMS seams,
00150 inT16 start,
00151 inT16 end);
00152 BLOB_CHOICE_LIST *get_piece_rating(MATRIX *ratings,
00153 TBLOB *blobs,
00154 SEAMS seams,
00155 inT16 start,
00156 inT16 end);
00157
00158
00159 void dj_statistics(FILE *File) {
00160 PrintAdaptiveStatistics(File);
00161 PrintBadWords(File);
00162 }
00163
00164 void dj_cleanup() { EndAdaptiveClassifier(); }
00165
00166
00167
00168 FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record,
00169 SEARCH_RECORD *the_search);
00170 FLOAT32 width_priority(CHUNKS_RECORD *chunks_record,
00171 STATE *state,
00172 int num_joints);
00173 FLOAT32 seamcut_priority(SEAMS seams,
00174 STATE *state,
00175 int num_joints);
00176 FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record,
00177 STATE *state,
00178 int num_joints);
00179
00180
00181
00182 POLY_MATCHER tess_matcher;
00183 POLY_TESTER tess_tester;
00184 POLY_TESTER tess_trainer;
00185 DENORM *tess_denorm;
00186 WERD *tess_word;
00187 int dict_word(const WERD_CHOICE &word);
00188 };
00189
00190
00191
00192
00193 class FRAGMENT:public ELIST_LINK
00194 {
00195 public:
00196 FRAGMENT() {
00197 }
00198 FRAGMENT(EDGEPT *head_pt,
00199 EDGEPT *tail_pt);
00200
00201 ICOORD head;
00202 ICOORD tail;
00203 EDGEPT *headpt;
00204 EDGEPT *tailpt;
00205
00206 NEWDELETE2 (FRAGMENT)
00207 };
00208
00209 ELISTIZEH (FRAGMENT)
00210 PBLOB *make_ed_blob(
00211 TBLOB *tessblob
00212 );
00213 OUTLINE *make_ed_outline(
00214 FRAGMENT_LIST *list
00215 );
00216 void register_outline(
00217 TESSLINE *outline,
00218 FRAGMENT_LIST *list
00219 );
00220
00221 }
00222
00223 #endif // TESSERACT_WORDREC_WORDREC_H__