00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef TORDMAIN_H
00021 #define TORDMAIN_H
00022
00023 #include <time.h>
00024 #include "varable.h"
00025 #include "ocrblock.h"
00026 #include "tessclas.h"
00027 #include "blobbox.h"
00028 #include "notdll.h"
00029
00030 namespace tesseract {
00031 class Tesseract;
00032 }
00033
00034 extern BOOL_VAR_H (textord_show_blobs, FALSE, "Display unsorted blobs");
00035 extern BOOL_VAR_H (textord_new_initial_xheight, TRUE,
00036 "Use test xheight mechanism");
00037 extern BOOL_VAR_H (textord_exit_after, FALSE,
00038 "Exit after completing textord");
00039 extern INT_VAR_H (textord_max_noise_size, 7, "Pixel size of noise");
00040 extern double_VAR_H (textord_blob_size_bigile, 95,
00041 "Percentile for large blobs");
00042 extern double_VAR_H (textord_noise_area_ratio, 0.7,
00043 "Fraction of bounding box for noise");
00044 extern double_VAR_H (textord_blob_size_smallile, 20,
00045 "Percentile for small blobs");
00046 extern double_VAR_H (textord_initialx_ile, 0.75,
00047 "Ile of sizes for xheight guess");
00048 extern double_VAR_H (textord_initialasc_ile, 0.90,
00049 "Ile of sizes for xheight guess");
00050 extern INT_VAR_H (textord_noise_sizefraction, 10,
00051 "Fraction of size for maxima");
00052 extern double_VAR_H (textord_noise_sizelimit, 0.5,
00053 "Fraction of x for big t count");
00054 extern INT_VAR_H (textord_noise_translimit, 16,
00055 "Transitions for normal blob");
00056 extern double_VAR_H (textord_noise_normratio, 2.0,
00057 "Dot to norm ratio for deletion");
00058 extern BOOL_VAR_H (textord_noise_rejwords, TRUE, "Reject noise-like words");
00059 extern BOOL_VAR_H (textord_noise_rejrows, TRUE, "Reject noise-like rows");
00060 extern double_VAR_H (textord_noise_syfract, 0.2,
00061 "xh fract error for norm blobs");
00062 extern double_VAR_H (textord_noise_sxfract, 0.4,
00063 "xh fract width error for norm blobs");
00064 extern INT_VAR_H (textord_noise_sncount, 1, "super norm blobs to save row");
00065 extern double_VAR_H (textord_noise_rowratio, 6.0,
00066 "Dot to norm ratio for deletion");
00067 extern BOOL_VAR_H (textord_noise_debug, FALSE, "Debug row garbage detector");
00068 extern double_VAR_H (textord_blshift_maxshift, 0.00, "Max baseline shift");
00069 extern double_VAR_H (textord_blshift_xfraction, 9.99,
00070 "Min size of baseline shift");
00071
00072 extern STRING_EVAR_H (tessedit_image_ext, ".tif", "Externsion for image file");
00073 extern clock_t previous_cpu;
00074 void make_blocks_from_blobs(
00075 TBLOB *tessblobs,
00076 const char *filename,
00077 ICOORD page_tr,
00078 BOOL8 do_shift,
00079 BLOCK_LIST *blocks
00080 );
00081 void find_components(
00082 BLOCK_LIST *blocks,
00083 TO_BLOCK_LIST *land_blocks,
00084 TO_BLOCK_LIST *port_blocks,
00085 TBOX *page_box);
00086 void SetBlobStrokeWidth(bool debug, BLOBNBOX* blob);
00087 void assign_blobs_to_blocks2(
00088 BLOCK_LIST *blocks,
00089 TO_BLOCK_LIST *land_blocks,
00090 TO_BLOCK_LIST *port_blocks
00091 );
00092 void filter_blobs(
00093 ICOORD page_tr,
00094 TO_BLOCK_LIST *blocks,
00095 BOOL8 testing_on
00096 );
00097 float filter_noise_blobs(
00098 BLOBNBOX_LIST *src_list,
00099 BLOBNBOX_LIST *noise_list,
00100 BLOBNBOX_LIST *small_list,
00101 BLOBNBOX_LIST *large_list
00102 );
00103 float filter_noise_blobs2(
00104 BLOBNBOX_LIST *src_list,
00105 BLOBNBOX_LIST *noise_list,
00106 BLOBNBOX_LIST *small_list,
00107 BLOBNBOX_LIST *large_list
00108 );
00109 void textord_page(
00110 ICOORD page_tr,
00111 BLOCK_LIST *blocks,
00112 TO_BLOCK_LIST *land_blocks,
00113 TO_BLOCK_LIST *port_blocks,
00114 tesseract::Tesseract*
00115 );
00116 void cleanup_blocks(
00117 BLOCK_LIST *blocks
00118 );
00119 BOOL8 clean_noise_from_row(
00120 ROW *row
00121 );
00122 void clean_noise_from_words(
00123 ROW *row
00124 );
00125
00126
00127 void clean_small_noise_from_words(ROW *row);
00128 void tweak_row_baseline(
00129 ROW *row
00130 );
00131 inT32 blob_y_order(
00132 void *item1,
00133 void *item2);
00134 #endif