00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef MAKEROW_H
00021 #define MAKEROW_H
00022
00023 #include "varable.h"
00024 #include "ocrblock.h"
00025 #include "tessclas.h"
00026 #include "blobbox.h"
00027 #include "statistc.h"
00028 #include "notdll.h"
00029 #include "tesseractclass.h"
00030
00031 enum OVERLAP_STATE
00032 {
00033 ASSIGN,
00034 REJECT,
00035 NEW_ROW
00036 };
00037
00038 enum ROW_CATEGORY {
00039 ROW_ASCENDERS_FOUND,
00040 ROW_DESCENDERS_FOUND,
00041 ROW_UNKNOWN,
00042 ROW_INVALID,
00043 };
00044
00045 extern BOOL_VAR_H (textord_show_initial_rows, FALSE,
00046 "Display row accumulation");
00047 extern BOOL_VAR_H (textord_show_parallel_rows, FALSE,
00048 "Display page correlated rows");
00049 extern BOOL_VAR_H (textord_show_expanded_rows, FALSE,
00050 "Display rows after expanding");
00051 extern BOOL_VAR_H (textord_show_final_rows, FALSE,
00052 "Display rows after final fitting");
00053 extern BOOL_VAR_H (textord_show_final_blobs, FALSE,
00054 "Display blob bounds after pre-ass");
00055 extern BOOL_VAR_H (textord_test_landscape, FALSE, "Tests refer to land/port");
00056 extern BOOL_VAR_H (textord_parallel_baselines, TRUE,
00057 "Force parallel baselines");
00058 extern BOOL_VAR_H (textord_straight_baselines, FALSE,
00059 "Force straight baselines");
00060 extern BOOL_VAR_H (textord_quadratic_baselines, FALSE,
00061 "Use quadratic splines");
00062 extern BOOL_VAR_H (textord_old_baselines, TRUE, "Use old baseline algorithm");
00063 extern BOOL_VAR_H (textord_old_xheight, TRUE, "Use old xheight algorithm");
00064 extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE, "Use spline baseline");
00065 extern BOOL_VAR_H (textord_fix_makerow_bug, TRUE,
00066 "Prevent multiple baselines");
00067 extern BOOL_VAR_H (textord_cblob_blockocc, TRUE,
00068 "Use new projection for underlines");
00069 extern BOOL_VAR_H (textord_debug_xheights, FALSE, "Test xheight algorithms");
00070 extern INT_VAR_H (textord_test_x, 0, "coord of test pt");
00071 extern INT_VAR_H (textord_test_y, 0, "coord of test pt");
00072 extern INT_VAR_H (textord_min_blobs_in_row, 4,
00073 "Min blobs before gradient counted");
00074 extern INT_VAR_H (textord_spline_minblobs, 8,
00075 "Min blobs in each spline segment");
00076 extern INT_VAR_H (textord_spline_medianwin, 6,
00077 "Size of window for spline segmentation");
00078 extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
00079 extern double_VAR_H (textord_spline_shift_fraction, 0.02,
00080 "Fraction of line spacing for quad");
00081 extern double_VAR_H (textord_spline_outlier_fraction, 0.1,
00082 "Fraction of line spacing for outlier");
00083 extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
00084 extern double_VAR_H (textord_skew_lag, 0.75,
00085 "Lag for skew on row accumulation");
00086 extern double_VAR_H (textord_linespace_iqrlimit, 0.2,
00087 "Max iqr/median for linespace");
00088 extern double_VAR_H (textord_width_limit, 8,
00089 "Max width of blobs to make rows");
00090 extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
00091 extern double_VAR_H (textord_merge_desc, 0.25,
00092 "Fraction of linespace for desc drop");
00093 extern double_VAR_H (textord_merge_x, 0.5,
00094 "Fraction of linespace for x height");
00095 extern double_VAR_H (textord_merge_asc, 0.25,
00096 "Fraction of linespace for asc height");
00097 extern double_VAR_H (textord_minxh, 0.25,
00098 "fraction of linesize for min xheight");
00099 extern double_VAR_H (textord_min_linesize, 1.25,
00100 "* blob height for initial linesize");
00101 extern double_VAR_H (textord_excess_blobsize, 1.3,
00102 "New row made if blob makes row this big");
00103 extern double_VAR_H (textord_occupancy_threshold, 0.4,
00104 "Fraction of neighbourhood");
00105 extern double_VAR_H (textord_underline_width, 2.0,
00106 "Multiple of line_size for underline");
00107 extern double_VAR_H(textord_min_blob_height_fraction, 0.75,
00108 "Min blob height/top to include blob top into xheight stats");
00109 extern double_VAR_H (textord_xheight_mode_fraction, 0.4,
00110 "Min pile height to make xheight");
00111 extern double_VAR_H (textord_ascheight_mode_fraction, 0.15,
00112 "Min pile height to make ascheight");
00113 extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
00114 extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
00115 extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
00116 extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
00117 extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
00118
00119 inline void get_min_max_xheight(double block_linesize,
00120 int *min_height, int *max_height) {
00121 *min_height = static_cast<inT32>(floor(block_linesize * textord_minxh));
00122 if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
00123 *max_height = static_cast<inT32>(ceil(block_linesize * 3));
00124 }
00125
00126 inline ROW_CATEGORY get_row_category(const TO_ROW *row) {
00127 if (row->xheight <= 0) return ROW_INVALID;
00128 return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
00129 (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
00130 }
00131
00132 inline bool within_error_margin(float test, float num, float margin) {
00133 return (test >= num * (1 - margin) && test <= num * (1 + margin));
00134 }
00135
00136 void fill_heights(TO_ROW *row, float gradient, int min_height,
00137 int max_height, STATS *heights, STATS *floating_heights);
00138
00139 float make_single_row(ICOORD page_tr, TO_BLOCK* block,
00140 TO_BLOCK_LIST* blocks, tesseract::Tesseract* tess);
00141 float make_rows(
00142 ICOORD page_tr,
00143 BLOCK_LIST *blocks,
00144 TO_BLOCK_LIST *land_blocks,
00145 TO_BLOCK_LIST *port_blocks,
00146 tesseract::Tesseract* tess
00147 );
00148 void make_initial_textrows(
00149 ICOORD page_tr,
00150 TO_BLOCK *block,
00151 FCOORD rotation,
00152 BOOL8 testing_on
00153 );
00154 void fit_lms_line(
00155 TO_ROW *row
00156 );
00157 void compute_page_skew(
00158 TO_BLOCK_LIST *blocks,
00159 float &page_m,
00160 float &page_err
00161 );
00162 void cleanup_rows(
00163 ICOORD page_tr,
00164 TO_BLOCK *block,
00165 float gradient,
00166 FCOORD rotation,
00167 inT32 block_edge,
00168 BOOL8 testing_on,
00169 tesseract::Tesseract* tess
00170 );
00171 void delete_non_dropout_rows(
00172 TO_BLOCK *block,
00173 float gradient,
00174 FCOORD rotation,
00175 inT32 block_edge,
00176 BOOL8 testing_on
00177 );
00178 BOOL8 find_best_dropout_row(
00179 TO_ROW *row,
00180 inT32 distance,
00181 float dist_limit,
00182 inT32 line_index,
00183 TO_ROW_IT *row_it,
00184 BOOL8 testing_on
00185 );
00186 TBOX deskew_block_coords(
00187 TO_BLOCK *block,
00188 float gradient
00189 );
00190 void compute_line_occupation(
00191 TO_BLOCK *block,
00192 float gradient,
00193 inT32 min_y,
00194 inT32 max_y,
00195 inT32 *occupation,
00196 inT32 *deltas
00197 );
00198 void compute_occupation_threshold(
00199 inT32 low_window,
00200 inT32 high_window,
00201 inT32 line_count,
00202 inT32 *occupation,
00203 inT32 *thresholds
00204 );
00205 void compute_dropout_distances(
00206 inT32 *occupation,
00207 inT32 *thresholds,
00208 inT32 line_count
00209 );
00210 void expand_rows(
00211 ICOORD page_tr,
00212 TO_BLOCK *block,
00213 float gradient,
00214 FCOORD rotation,
00215 inT32 block_edge,
00216 BOOL8 testing_on
00217 );
00218 void adjust_row_limits(
00219 TO_BLOCK *block
00220 );
00221 void compute_row_stats(
00222 TO_BLOCK *block,
00223 BOOL8 testing_on
00224 );
00225 void compute_block_xheight(
00226 TO_BLOCK *block,
00227 float gradient,
00228 tesseract::Tesseract* tess
00229 );
00230 float median_block_xheight(
00231 TO_BLOCK *block,
00232 float gradient
00233 );
00234 void compute_row_xheight(
00235 TO_ROW *row,
00236 float gradient,
00237 int block_height,
00238 tesseract::Tesseract* tess
00239 );
00240
00241 int compute_xheight_from_modes(
00242 STATS *heights, STATS *floating_heights, int min_height,
00243 int max_height, float *xheight, float *ascrise);
00244
00245 inT32 compute_row_descdrop(
00246 TO_ROW *row,
00247 float gradient,
00248 int xheight_blob_count,
00249 STATS *heights
00250 );
00251 inT32 compute_height_modes(
00252 STATS *heights,
00253 inT32 min_height,
00254 inT32 max_height,
00255 inT32 *modes,
00256 inT32 maxmodes
00257 );
00258 void correct_row_xheight(
00259 TO_ROW *row,
00260 float xheight,
00261 float ascrise,
00262 float descdrop);
00263 void separate_underlines(
00264 TO_BLOCK *block,
00265 float gradient,
00266 FCOORD rotation,
00267 BOOL8 testing_on
00268 );
00269 void pre_associate_blobs(
00270 ICOORD page_tr,
00271 TO_BLOCK *block,
00272 FCOORD rotation,
00273 BOOL8 testing_on
00274 );
00275 void fit_parallel_rows(
00276 TO_BLOCK *block,
00277 float gradient,
00278 FCOORD rotation,
00279 inT32 block_edge,
00280 BOOL8 testing_on
00281 );
00282 void fit_parallel_lms(
00283 float gradient,
00284 TO_ROW *row
00285 );
00286 void make_spline_rows(
00287 TO_BLOCK *block,
00288 float gradient,
00289 FCOORD rotation,
00290 inT32 block_edge,
00291 BOOL8 testing_on,
00292 tesseract::Tesseract* tess
00293 );
00294 void make_baseline_spline(
00295 TO_ROW *row,
00296 TO_BLOCK *block
00297 );
00298 BOOL8 segment_baseline (
00299 TO_ROW * row,
00300 TO_BLOCK * block,
00301 inT32 & segments,
00302 inT32 xstarts[]
00303 );
00304 double *linear_spline_baseline (
00305 TO_ROW * row,
00306 TO_BLOCK * block,
00307 inT32 & segments,
00308 inT32 xstarts[]
00309 );
00310 void assign_blobs_to_rows(
00311 TO_BLOCK *block,
00312 float *gradient,
00313 int pass,
00314 BOOL8 reject_misses,
00315 BOOL8 make_new_rows,
00316 BOOL8 drawing_skew
00317 );
00318
00319 OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it,
00320 TO_ROW *&best_row,
00321 float top,
00322 float bottom,
00323 float rowsize,
00324 BOOL8 testing_blob
00325 );
00326 int blob_x_order(
00327 const void *item1,
00328 const void *item2);
00329 int row_y_order(
00330 const void *item1,
00331 const void *item2);
00332 int row_spacing_order(
00333 const void *item1,
00334 const void *item2);
00335
00336 void mark_repeated_chars(TO_ROW *row, float block_xheight,
00337 tesseract::Tesseract *tess);
00338 #endif