Tesseract 3.01
|
#include <tabfind.h>
Public Member Functions | |
TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution) | |
virtual | ~TabFind () |
void | InsertBlobList (bool h_spread, bool v_spread, bool large, BLOBNBOX_LIST *blobs, bool take_ownership, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid) |
bool | InsertBlob (bool h_spread, bool v_spread, bool large, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid) |
int | GutterWidth (int bottom_y, int top_y, const TabVector &v, int *required_shift) |
void | GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap) |
BLOBNBOX * | AdjacentBlob (const BLOBNBOX *bbox, bool right_to_left, int gap_limit) |
BlobRegionType | ComputeBlobType (BLOBNBOX *blob) |
int | RightEdgeForBox (const TBOX &box, bool crossing, bool extended) |
int | LeftEdgeForBox (const TBOX &box, bool crossing, bool extended) |
TabVector * | RightTabForBox (const TBOX &box, bool crossing, bool extended) |
TabVector * | LeftTabForBox (const TBOX &box, bool crossing, bool extended) |
bool | CommonWidth (int width) |
WidthCallback * | WidthCB () |
const ICOORD & | image_origin () const |
Static Public Member Functions | |
static bool | DifferentSizes (int size1, int size2) |
static bool | VeryDifferentSizes (int size1, int size2) |
Protected Member Functions | |
TabVector_LIST * | vectors () |
TabVector_LIST * | dead_vectors () |
bool | FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, FCOORD *deskew, FCOORD *reskew) |
void | DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew) |
void | SetupTabSearch (int x, int y, int *min_key, int *max_key) |
ScrollView * | DisplayTabVectors (ScrollView *tab_win) |
ScrollView * | FindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, TO_BLOCK *block) |
void | ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width) |
Static Protected Member Functions | |
static void | RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs) |
Protected Attributes | |
ICOORD | vertical_skew_ |
int | resolution_ |
The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.
tesseract::TabFind::TabFind | ( | int | gridsize, |
const ICOORD & | bleft, | ||
const ICOORD & | tright, | ||
TabVector_LIST * | vlines, | ||
int | vertical_x, | ||
int | vertical_y, | ||
int | resolution | ||
) |
tesseract::TabFind::~TabFind | ( | ) | [virtual] |
BLOBNBOX * tesseract::TabFind::AdjacentBlob | ( | const BLOBNBOX * | bbox, |
bool | right_to_left, | ||
int | gap_limit | ||
) |
Find the next adjacent (to left or right) blob on this text line, with the constraint that it must vertically significantly overlap the input box.
bool tesseract::TabFind::CommonWidth | ( | int | width | ) |
Return true if the given width is close to one of the common widths in column_widths_.
BlobRegionType tesseract::TabFind::ComputeBlobType | ( | BLOBNBOX * | blob | ) |
Compute and return, but do not set the type as being BRT_TEXT or BRT_UNKNOWN according to how well it forms a text line.
TabVector_LIST* tesseract::TabFind::dead_vectors | ( | ) | [inline, protected] |
bool tesseract::TabFind::DifferentSizes | ( | int | size1, |
int | size2 | ||
) | [static] |
Return true if the sizes are more than a factor of 2 different.
ScrollView * tesseract::TabFind::DisplayTabVectors | ( | ScrollView * | tab_win | ) | [protected] |
Display the tab vectors found in this grid.
void tesseract::TabFind::DontFindTabVectors | ( | BLOBNBOX_LIST * | image_blobs, |
TO_BLOCK * | block, | ||
FCOORD * | deskew, | ||
FCOORD * | reskew | ||
) | [protected] |
ScrollView * tesseract::TabFind::FindInitialTabVectors | ( | BLOBNBOX_LIST * | image_blobs, |
int | min_gutter_width, | ||
TO_BLOCK * | block | ||
) | [protected] |
bool tesseract::TabFind::FindTabVectors | ( | TabVector_LIST * | hlines, |
BLOBNBOX_LIST * | image_blobs, | ||
TO_BLOCK * | block, | ||
int | min_gutter_width, | ||
FCOORD * | deskew, | ||
FCOORD * | reskew | ||
) | [protected] |
Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible.
int tesseract::TabFind::GutterWidth | ( | int | bottom_y, |
int | top_y, | ||
const TabVector & | v, | ||
int * | required_shift | ||
) |
void tesseract::TabFind::GutterWidthAndNeighbourGap | ( | int | tab_x, |
int | mean_height, | ||
int | max_gutter, | ||
bool | left, | ||
BLOBNBOX * | bbox, | ||
int * | gutter_width, | ||
int * | neighbour_gap | ||
) |
Find the gutter width and distance to inner neighbour for the given blob.
const ICOORD& tesseract::TabFind::image_origin | ( | ) | const [inline] |
Return the coords at which to draw the image backdrop.
bool tesseract::TabFind::InsertBlob | ( | bool | h_spread, |
bool | v_spread, | ||
bool | large, | ||
BLOBNBOX * | blob, | ||
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > * | grid | ||
) |
Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. If large, then insert only if the bounding box doesn't intersect anything else already in the grid. Returns true if the blob was inserted. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).
void tesseract::TabFind::InsertBlobList | ( | bool | h_spread, |
bool | v_spread, | ||
bool | large, | ||
BLOBNBOX_LIST * | blobs, | ||
bool | take_ownership, | ||
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > * | grid | ||
) |
Insert a list of blobs into the given grid (not necessarily this). If take_ownership is true, then the blobs are removed from the source list. See InsertBlob for the other arguments.
int tesseract::TabFind::LeftEdgeForBox | ( | const TBOX & | box, |
bool | crossing, | ||
bool | extended | ||
) |
As RightEdgeForBox, but finds the left Edge instead.
As RightTabForBox, but finds the left TabVector instead.
void tesseract::TabFind::ResetForVerticalText | ( | const FCOORD & | rotate, |
const FCOORD & | rerotate, | ||
TabVector_LIST * | horizontal_lines, | ||
int * | min_gutter_width | ||
) | [protected] |
int tesseract::TabFind::RightEdgeForBox | ( | const TBOX & | box, |
bool | crossing, | ||
bool | extended | ||
) |
Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.
Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return NULL. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.
void tesseract::TabFind::RotateBlobList | ( | const FCOORD & | rotation, |
BLOBNBOX_LIST * | blobs | ||
) | [static, protected] |
void tesseract::TabFind::SetupTabSearch | ( | int | x, |
int | y, | ||
int * | min_key, | ||
int * | max_key | ||
) | [protected] |
TabVector_LIST* tesseract::TabFind::vectors | ( | ) | [inline, protected] |
Accessors
bool tesseract::TabFind::VeryDifferentSizes | ( | int | size1, |
int | size2 | ||
) | [static] |
Return true if the sizes are more than a factor of 5 different.
WidthCallback* tesseract::TabFind::WidthCB | ( | ) | [inline] |
Return a callback for testing CommonWidth.
int tesseract::TabFind::resolution_ [protected] |
ICOORD tesseract::TabFind::vertical_skew_ [protected] |