00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef BLOBBOX_H
00021 #define BLOBBOX_H
00022
00023 #include "varable.h"
00024 #include "clst.h"
00025 #include "elst2.h"
00026 #include "werd.h"
00027 #include "ocrblock.h"
00028 #include "statistc.h"
00029
00030 extern double_VAR_H (textord_error_weight, 3,
00031 "Weighting for error in believability");
00032
00033 enum PITCH_TYPE
00034 {
00035 PITCH_DUNNO,
00036 PITCH_DEF_FIXED,
00037 PITCH_MAYBE_FIXED,
00038 PITCH_DEF_PROP,
00039 PITCH_MAYBE_PROP,
00040 PITCH_CORR_FIXED,
00041 PITCH_CORR_PROP
00042 };
00043
00044
00045 enum TabType {
00046 TT_NONE,
00047 TT_DELETED,
00048 TT_UNCONFIRMED,
00049 TT_FAKE,
00050 TT_CONFIRMED,
00051 TT_VLINE
00052 };
00053
00054
00055
00056
00057 enum BlobRegionType {
00058 BRT_NOISE,
00059 BRT_HLINE,
00060 BRT_RECTIMAGE,
00061 BRT_POLYIMAGE,
00062 BRT_UNKNOWN,
00063 BRT_VERT_TEXT,
00064 BRT_TEXT,
00065
00066 BRT_COUNT
00067 };
00068
00069 namespace tesseract {
00070 class ColPartition;
00071 }
00072
00073 class BLOBNBOX;
00074 ELISTIZEH (BLOBNBOX)
00075 class BLOBNBOX:public ELIST_LINK
00076 {
00077 public:
00078 BLOBNBOX() {
00079 blob_ptr = NULL;
00080 cblob_ptr = NULL;
00081 area = 0;
00082 Init();
00083 }
00084 explicit BLOBNBOX(PBLOB *srcblob) {
00085 blob_ptr = srcblob;
00086 cblob_ptr = NULL;
00087 box = srcblob->bounding_box ();
00088 area = (int) srcblob->area ();
00089 Init();
00090 }
00091 explicit BLOBNBOX(C_BLOB *srcblob) {
00092 blob_ptr = NULL;
00093 cblob_ptr = srcblob;
00094 box = srcblob->bounding_box ();
00095 area = (int) srcblob->area ();
00096 Init();
00097 }
00098
00099 void rotate_box(FCOORD vec) {
00100 box.rotate(vec);
00101 }
00102 void translate_box(ICOORD v) {
00103 box.move(v);
00104 }
00105 void merge(BLOBNBOX *nextblob);
00106 void chop(
00107 BLOBNBOX_IT *start_it,
00108 BLOBNBOX_IT *blob_it,
00109 FCOORD rotation,
00110 float xheight);
00111
00112
00113 const TBOX& bounding_box() const {
00114 return box;
00115 }
00116 void compute_bounding_box() {
00117 box = cblob_ptr != NULL ? cblob_ptr->bounding_box()
00118 : blob_ptr->bounding_box();
00119 }
00120 const TBOX& reduced_box() const {
00121 return red_box;
00122 }
00123 void set_reduced_box(TBOX new_box) {
00124 red_box = new_box;
00125 reduced = TRUE;
00126 }
00127 inT32 enclosed_area() const {
00128 return area;
00129 }
00130 bool joined_to_prev() const {
00131 return joined != 0;
00132 }
00133 bool red_box_set() const {
00134 return reduced != 0;
00135 }
00136 int repeated_set() const {
00137 return repeated_set_;
00138 }
00139 void set_repeated_set(int set_id) {
00140 repeated_set_ = set_id;
00141 }
00142 PBLOB *blob() const {
00143 return blob_ptr;
00144 }
00145 C_BLOB *cblob() const {
00146 return cblob_ptr;
00147 }
00148 TabType left_tab_type() const {
00149 return left_tab_type_;
00150 }
00151 void set_left_tab_type(TabType new_type) {
00152 left_tab_type_ = new_type;
00153 }
00154 TabType right_tab_type() const {
00155 return right_tab_type_;
00156 }
00157 void set_right_tab_type(TabType new_type) {
00158 right_tab_type_ = new_type;
00159 }
00160 BlobRegionType region_type() const {
00161 return region_type_;
00162 }
00163 void set_region_type(BlobRegionType new_type) {
00164 region_type_ = new_type;
00165 }
00166 int left_rule() const {
00167 return left_rule_;
00168 }
00169 void set_left_rule(int new_left) {
00170 left_rule_ = new_left;
00171 }
00172 int right_rule() const {
00173 return right_rule_;
00174 }
00175 void set_right_rule(int new_right) {
00176 right_rule_ = new_right;
00177 }
00178 int left_crossing_rule() const {
00179 return left_crossing_rule_;
00180 }
00181 void set_left_crossing_rule(int new_left) {
00182 left_crossing_rule_ = new_left;
00183 }
00184 int right_crossing_rule() const {
00185 return right_crossing_rule_;
00186 }
00187 void set_right_crossing_rule(int new_right) {
00188 right_crossing_rule_ = new_right;
00189 }
00190 float horz_stroke_width() const {
00191 return horz_stroke_width_;
00192 }
00193 void set_horz_stroke_width(float width) {
00194 horz_stroke_width_ = width;
00195 }
00196 float vert_stroke_width() const {
00197 return vert_stroke_width_;
00198 }
00199 void set_vert_stroke_width(float width) {
00200 vert_stroke_width_ = width;
00201 }
00202 tesseract::ColPartition* owner() const {
00203 return owner_;
00204 }
00205 void set_owner(tesseract::ColPartition* new_owner) {
00206 owner_ = new_owner;
00207 }
00208 void set_noise_flag(bool flag) {
00209 noise_flag_ = flag;
00210 }
00211 bool noise_flag() const {
00212 return noise_flag_;
00213 }
00214
00215 #ifndef GRAPHICS_DISABLED
00216
00217 ScrollView::Color BoxColor() const {
00218 switch (region_type_) {
00219 case BRT_HLINE:
00220 return ScrollView::YELLOW;
00221 case BRT_RECTIMAGE:
00222 return ScrollView::RED;
00223 case BRT_POLYIMAGE:
00224 return ScrollView::ORANGE;
00225 case BRT_UNKNOWN:
00226 return ScrollView::CYAN;
00227 case BRT_VERT_TEXT:
00228 return ScrollView::GREEN;
00229 case BRT_TEXT:
00230 return ScrollView::BLUE;
00231 case BRT_NOISE:
00232 default:
00233 return ScrollView::GREY;
00234 }
00235 }
00236
00237 void plot(ScrollView* window,
00238 ScrollView::Color blob_colour,
00239 ScrollView::Color child_colour) {
00240 if (blob_ptr != NULL)
00241 blob_ptr->plot(window, blob_colour, child_colour);
00242 if (cblob_ptr != NULL)
00243 cblob_ptr->plot(window, blob_colour, child_colour);
00244 }
00245 #endif
00246
00247 NEWDELETE2(BLOBNBOX)
00248
00249 private:
00250
00251 void Init() {
00252 joined = false;
00253 reduced = false;
00254 repeated_set_ = 0;
00255 left_tab_type_ = TT_NONE;
00256 right_tab_type_ = TT_NONE;
00257 region_type_ = BRT_UNKNOWN;
00258 left_rule_ = 0;
00259 right_rule_ = 0;
00260 left_crossing_rule_ = 0;
00261 right_crossing_rule_ = 0;
00262 horz_stroke_width_ = 0.0f;
00263 vert_stroke_width_ = 0.0f;
00264 owner_ = NULL;
00265 noise_flag_ = false;
00266 }
00267
00268 PBLOB *blob_ptr;
00269 C_BLOB *cblob_ptr;
00270 TBOX box;
00271 TBOX red_box;
00272 int area:30;
00273 int joined:1;
00274 int reduced:1;
00275 int repeated_set_;
00276 TabType left_tab_type_;
00277 TabType right_tab_type_;
00278 BlobRegionType region_type_;
00279 inT16 left_rule_;
00280 inT16 right_rule_;
00281 inT16 left_crossing_rule_;
00282 inT16 right_crossing_rule_;
00283 float horz_stroke_width_;
00284 float vert_stroke_width_;
00285 tesseract::ColPartition* owner_;
00286
00287 bool noise_flag_;
00288 };
00289
00290 class TO_ROW:public ELIST2_LINK
00291 {
00292 public:
00293 TO_ROW() {
00294 num_repeated_sets_ = -1;
00295 }
00296 TO_ROW(
00297 BLOBNBOX *blob,
00298 float top,
00299 float bottom,
00300 float row_size);
00301
00302 float max_y() const {
00303 return y_max;
00304 }
00305 float min_y() const {
00306 return y_min;
00307 }
00308 float mean_y() const {
00309 return (y_min + y_max) / 2.0f;
00310 }
00311 float initial_min_y() const {
00312 return initial_y_min;
00313 }
00314 float line_m() const {
00315 return m;
00316 }
00317 float line_c() const {
00318 return c;
00319 }
00320 float line_error() const {
00321 return error;
00322 }
00323 float parallel_c() const {
00324 return para_c;
00325 }
00326 float parallel_error() const {
00327 return para_error;
00328 }
00329 float believability() const {
00330 return credibility;
00331 }
00332 float intercept() const {
00333 return y_origin;
00334 }
00335 void add_blob(
00336 BLOBNBOX *blob,
00337 float top,
00338 float bottom,
00339 float row_size);
00340 void insert_blob(
00341 BLOBNBOX *blob);
00342
00343 BLOBNBOX_LIST *blob_list() {
00344 return &blobs;
00345 }
00346
00347 void set_line(
00348 float new_m,
00349 float new_c,
00350 float new_error) {
00351 m = new_m;
00352 c = new_c;
00353 error = new_error;
00354 }
00355 void set_parallel_line(
00356 float gradient,
00357 float new_c,
00358 float new_error) {
00359 para_c = new_c;
00360 para_error = new_error;
00361 credibility =
00362 (float) (blobs.length () - textord_error_weight * new_error);
00363 y_origin = (float) (new_c / sqrt (1 + gradient * gradient));
00364
00365 }
00366 void set_limits(
00367 float new_min,
00368 float new_max) {
00369 y_min = new_min;
00370 y_max = new_max;
00371 }
00372 void compute_vertical_projection();
00373
00374
00375 bool rep_chars_marked() const {
00376 return num_repeated_sets_ != -1;
00377 }
00378 void clear_rep_chars_marked() {
00379 num_repeated_sets_ = -1;
00380 }
00381 int num_repeated_sets() const {
00382 return num_repeated_sets_;
00383 }
00384 void set_num_repeated_sets(int num_sets) {
00385 num_repeated_sets_ = num_sets;
00386 }
00387
00388
00389 NEWDELETE2 (TO_ROW) BOOL8 merged;
00390 BOOL8 all_caps;
00391 BOOL8 used_dm_model;
00392 inT16 projection_left;
00393 inT16 projection_right;
00394 PITCH_TYPE pitch_decision;
00395 float fixed_pitch;
00396 float fp_space;
00397 float fp_nonsp;
00398 float pr_space;
00399 float pr_nonsp;
00400 float spacing;
00401 float xheight;
00402 int xheight_evidence;
00403 float ascrise;
00404 float descdrop;
00405 inT32 min_space;
00406 inT32 max_nonspace;
00407 inT32 space_threshold;
00408 float kern_size;
00409 float space_size;
00410 WERD_LIST rep_words;
00411 ICOORDELT_LIST char_cells;
00412 QSPLINE baseline;
00413 STATS projection;
00414
00415 private:
00416 BLOBNBOX_LIST blobs;
00417 float y_min;
00418 float y_max;
00419 float initial_y_min;
00420 float m, c;
00421 float error;
00422 float para_c;
00423 float para_error;
00424 float y_origin;
00425 float credibility;
00426 int num_repeated_sets_;
00427
00428
00429 };
00430
00431 ELIST2IZEH (TO_ROW)
00432 class TO_BLOCK:public ELIST_LINK
00433 {
00434 public:
00435 TO_BLOCK() {
00436 }
00437 TO_BLOCK(
00438 BLOCK *src_block);
00439 ~TO_BLOCK();
00440
00441 TO_ROW_LIST *get_rows() {
00442 return &row_list;
00443 }
00444
00445 void print_rows() {
00446 TO_ROW_IT row_it = &row_list;
00447 TO_ROW *row;
00448
00449 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00450 row_it.forward ()) {
00451 row = row_it.data ();
00452 printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
00453 "\n", row->min_y (), row->max_y (), row->parallel_c (),
00454 row->blob_list ()->length ());
00455 }
00456 }
00457
00458
00459 void plot_graded_blobs(ScrollView* to_win);
00460
00461 BLOBNBOX_LIST blobs;
00462 BLOBNBOX_LIST underlines;
00463 BLOBNBOX_LIST noise_blobs;
00464 BLOBNBOX_LIST small_blobs;
00465 BLOBNBOX_LIST large_blobs;
00466 BLOCK *block;
00467 PITCH_TYPE pitch_decision;
00468 float line_spacing;
00469 float line_size;
00470 float max_blob_size;
00471 float baseline_offset;
00472 float xheight;
00473 float fixed_pitch;
00474 float kern_size;
00475 float space_size;
00476 inT32 min_space;
00477 inT32 max_nonspace;
00478 float fp_space;
00479 float fp_nonsp;
00480 float pr_space;
00481 float pr_nonsp;
00482 TO_ROW *key_row;
00483
00484 NEWDELETE2 (TO_BLOCK) private:
00485 TO_ROW_LIST row_list;
00486 };
00487
00488 ELISTIZEH (TO_BLOCK)
00489 extern double_VAR_H (textord_error_weight, 3,
00490 "Weighting for error in believability");
00491 void find_blob_limits(
00492 PBLOB *blob,
00493 float leftx,
00494 float rightx,
00495 FCOORD rotation,
00496 float &ymin,
00497 float &ymax);
00498 void find_cblob_limits(
00499 C_BLOB *blob,
00500 float leftx,
00501 float rightx,
00502 FCOORD rotation,
00503 float &ymin,
00504 float &ymax);
00505 void find_cblob_vlimits(
00506 C_BLOB *blob,
00507 float leftx,
00508 float rightx,
00509 float &ymin,
00510 float &ymax);
00511 void find_cblob_hlimits(
00512 C_BLOB *blob,
00513 float bottomy,
00514 float topy,
00515 float &xmin,
00516 float &xymax);
00517 PBLOB *rotate_blob(
00518 PBLOB *blob,
00519 FCOORD rotation
00520 );
00521 PBLOB *rotate_cblob(
00522 C_BLOB *blob,
00523 float xheight,
00524 FCOORD rotation
00525 );
00526 C_BLOB *crotate_cblob(
00527 C_BLOB *blob,
00528 FCOORD rotation
00529 );
00530 TBOX box_next(
00531 BLOBNBOX_IT *it
00532 );
00533 TBOX box_next_pre_chopped(
00534 BLOBNBOX_IT *it
00535 );
00536 void vertical_blob_projection(
00537 PBLOB *blob,
00538 STATS *stats
00539 );
00540
00541 void vertical_outline_projection(OUTLINE *outline,
00542 STATS *stats
00543 );
00544 void vertical_cblob_projection(
00545 C_BLOB *blob,
00546 STATS *stats
00547 );
00548 void vertical_coutline_projection(
00549 C_OUTLINE *outline,
00550 STATS *stats
00551 );
00552 void plot_blob_list(ScrollView* win,
00553 BLOBNBOX_LIST *list,
00554 ScrollView::Color body_colour,
00555 ScrollView::Color child_colour);
00556
00557 #endif