Tesseract
3.02
|
Go to the source code of this file.
Defines | |
#define | PROJECTION_MARGIN 10 |
#define | EXTERN |
Functions | |
void | restore_underlined_blobs (TO_BLOCK *block) |
TO_ROW * | most_overlapping_row (TO_ROW_LIST *rows, BLOBNBOX *blob) |
void | find_underlined_blobs (BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells) |
void | vertical_cunderline_projection (C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj) |
Variables | |
EXTERN double | textord_underline_offset = 0.1 |
EXTERN bool | textord_restore_underlines = 1 |
#define EXTERN |
Definition at line 27 of file underlin.cpp.
#define PROJECTION_MARGIN 10 |
Definition at line 26 of file underlin.cpp.
void find_underlined_blobs | ( | BLOBNBOX * | u_line, |
QSPLINE * | baseline, | ||
float | xheight, | ||
float | baseline_offset, | ||
ICOORDELT_LIST * | chop_cells | ||
) |
Definition at line 179 of file underlin.cpp.
{ inT16 x, y; //sides of blob ICOORD blob_chop; //sides of blob TBOX blob_box = u_line->bounding_box (); //cell iterator ICOORDELT_IT cell_it = chop_cells; STATS upper_proj (blob_box.left (), blob_box.right () + 1); STATS middle_proj (blob_box.left (), blob_box.right () + 1); STATS lower_proj (blob_box.left (), blob_box.right () + 1); C_OUTLINE_IT out_it; //outlines of blob ASSERT_HOST (u_line->cblob () != NULL); out_it.set_to_list (u_line->cblob ()->out_list ()); for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { vertical_cunderline_projection (out_it.data (), baseline, xheight, baseline_offset, &lower_proj, &middle_proj, &upper_proj); } for (x = blob_box.left (); x < blob_box.right (); x++) { if (middle_proj.pile_count (x) > 0) { for (y = x + 1; y < blob_box.right () && middle_proj.pile_count (y) > 0; y++); blob_chop = ICOORD (x, y); cell_it.add_after_then_move (new ICOORDELT (blob_chop)); x = y; } } }
Definition at line 122 of file underlin.cpp.
{ inT16 x = (blob->bounding_box ().left () + blob->bounding_box ().right ()) / 2; TO_ROW_IT row_it = rows; //row iterator TO_ROW *row; //current row TO_ROW *best_row; //output row float overlap; //of blob & row float bestover; //best overlap best_row = NULL; bestover = (float) -MAX_INT32; if (row_it.empty ()) return NULL; row = row_it.data (); row_it.mark_cycle_pt (); while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top () && !row_it.cycled_list ()) { best_row = row; bestover = blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop; row_it.forward (); row = row_it.data (); } while (row->baseline.y (x) + row->xheight + row->ascrise >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) { overlap = row->baseline.y (x) + row->xheight + row->ascrise; if (blob->bounding_box ().top () < overlap) overlap = blob->bounding_box ().top (); if (blob->bounding_box ().bottom () > row->baseline.y (x) + row->descdrop) overlap -= blob->bounding_box ().bottom (); else overlap -= row->baseline.y (x) + row->descdrop; if (overlap > bestover) { bestover = overlap; best_row = row; } row_it.forward (); row = row_it.data (); } if (bestover < 0 && row->baseline.y (x) + row->xheight + row->ascrise - blob->bounding_box ().bottom () > bestover) best_row = row; return best_row; }
void restore_underlined_blobs | ( | TO_BLOCK * | block | ) |
Definition at line 39 of file underlin.cpp.
{ inT16 chop_coord; //chop boundary TBOX blob_box; //of underline BLOBNBOX *u_line; //underline bit TO_ROW *row; //best row for blob ICOORDELT_LIST chop_cells; //blobs to cut out //real underlines BLOBNBOX_LIST residual_underlines; C_OUTLINE_LIST left_coutlines; C_OUTLINE_LIST right_coutlines; ICOORDELT_IT cell_it = &chop_cells; //under lines BLOBNBOX_IT under_it = &block->underlines; BLOBNBOX_IT ru_it = &residual_underlines; if (block->get_rows()->empty()) return; // Don't crash if there are no rows. for (under_it.mark_cycle_pt (); !under_it.cycled_list (); under_it.forward ()) { u_line = under_it.extract (); blob_box = u_line->bounding_box (); row = most_overlapping_row (block->get_rows (), u_line); find_underlined_blobs (u_line, &row->baseline, row->xheight, row->xheight * textord_underline_offset, &chop_cells); cell_it.set_to_list (&chop_cells); for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); cell_it.forward ()) { chop_coord = cell_it.data ()->x (); if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) { split_to_blob (u_line, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines, &right_coutlines); if (!left_coutlines.empty()) { ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); } chop_coord = cell_it.data ()->y (); split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines, &right_coutlines); if (!left_coutlines.empty()) { row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines))); } else { fprintf(stderr, "Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n", cell_it.data ()->x (), cell_it.data ()->y (), blob_box.left (), blob_box.bottom (), blob_box.right (), blob_box.top ()); ASSERT_HOST(FALSE); } u_line = NULL; //no more blobs to add } delete cell_it.extract(); } if (!right_coutlines.empty ()) { split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5, &left_coutlines, &right_coutlines); if (!left_coutlines.empty()) ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); } if (u_line != NULL) { if (u_line->cblob() != NULL) delete u_line->cblob(); delete u_line; } } if (!ru_it.empty()) { ru_it.move_to_first(); for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) { under_it.add_after_then_move(ru_it.extract()); } } }
void vertical_cunderline_projection | ( | C_OUTLINE * | outline, |
QSPLINE * | baseline, | ||
float | xheight, | ||
float | baseline_offset, | ||
STATS * | lower_proj, | ||
STATS * | middle_proj, | ||
STATS * | upper_proj | ||
) |
Definition at line 224 of file underlin.cpp.
{ ICOORD pos; //current point ICOORD step; //edge step inT16 lower_y, upper_y; //region limits inT32 length; //of outline inT16 stepindex; //current step C_OUTLINE_IT out_it = outline->child (); pos = outline->start_pos (); length = outline->pathlength (); for (stepindex = 0; stepindex < length; stepindex++) { step = outline->step (stepindex); if (step.x () > 0) { lower_y = (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5); upper_y = (inT16) floor (baseline->y (pos.x ()) + baseline_offset + xheight + 0.5); if (pos.y () >= lower_y) { lower_proj->add (pos.x (), -lower_y); if (pos.y () >= upper_y) { middle_proj->add (pos.x (), lower_y - upper_y); upper_proj->add (pos.x (), upper_y - pos.y ()); } else middle_proj->add (pos.x (), lower_y - pos.y ()); } else lower_proj->add (pos.x (), -pos.y ()); } else if (step.x () < 0) { lower_y = (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset + 0.5); upper_y = (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset + xheight + 0.5); if (pos.y () >= lower_y) { lower_proj->add (pos.x () - 1, lower_y); if (pos.y () >= upper_y) { middle_proj->add (pos.x () - 1, upper_y - lower_y); upper_proj->add (pos.x () - 1, pos.y () - upper_y); } else middle_proj->add (pos.x () - 1, pos.y () - lower_y); } else lower_proj->add (pos.x () - 1, pos.y ()); } pos += step; } for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { vertical_cunderline_projection (out_it.data (), baseline, xheight, baseline_offset, lower_proj, middle_proj, upper_proj); } }
EXTERN bool textord_restore_underlines = 1 |
"Chop underlines & put back"
Definition at line 31 of file underlin.cpp.
EXTERN double textord_underline_offset = 0.1 |
"Fraction of x to ignore"
Definition at line 29 of file underlin.cpp.