Tesseract
3.02
|
#include <time.h>
#include "params.h"
#include "ocrblock.h"
#include "blobs.h"
#include "blobbox.h"
#include "notdll.h"
Go to the source code of this file.
Namespaces | |
namespace | tesseract |
Functions | |
void | make_blocks_from_blobs (TBLOB *tessblobs, const char *filename, ICOORD page_tr, BOOL8 do_shift, BLOCK_LIST *blocks) |
void | SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob) |
void | assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks) |
void | textord_page (ICOORD page_tr, BLOCK_LIST *blocks, TO_BLOCK_LIST *land_blocks, TO_BLOCK_LIST *port_blocks, tesseract::Tesseract *) |
void | tweak_row_baseline (ROW *row, double blshift_maxshift, double blshift_xfraction) |
inT32 | blob_y_order (void *item1, void *item2) |
void assign_blobs_to_blocks2 | ( | Pix * | pix, |
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | port_blocks | ||
) |
Definition at line 156 of file tordmain.cpp.
{ // output list BLOCK *block; // current block BLOBNBOX *newblob; // created blob C_BLOB *blob; // current blob BLOCK_IT block_it = blocks; C_BLOB_IT blob_it; // iterator BLOBNBOX_IT port_box_it; // iterator // destination iterator TO_BLOCK_IT port_block_it = port_blocks; TO_BLOCK *port_block; // created block for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { block = block_it.data(); port_block = new TO_BLOCK(block); // Convert the good outlines to block->blob_list port_box_it.set_to_list(&port_block->blobs); blob_it.set_to_list(block->blob_list()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.extract(); newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. SetBlobStrokeWidth(pix, newblob); port_box_it.add_after_then_move(newblob); } // Put the rejected outlines in block->noise_blobs, which allows them to // be reconsidered and sorted back into rows and recover outlines mistakenly // rejected. port_box_it.set_to_list(&port_block->noise_blobs); blob_it.set_to_list(block->reject_blobs()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.extract(); newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. SetBlobStrokeWidth(pix, newblob); port_box_it.add_after_then_move(newblob); } port_block_it.add_after_then_move(port_block); } }
inT32 blob_y_order | ( | void * | item1, |
void * | item2 | ||
) |
Definition at line 793 of file tordmain.cpp.
{ //converted ptr BLOBNBOX *blob1 = *(BLOBNBOX **) item1; //converted ptr BLOBNBOX *blob2 = *(BLOBNBOX **) item2; if (blob1->bounding_box ().bottom () > blob2->bounding_box ().bottom ()) return -1; else if (blob1->bounding_box ().bottom () < blob2->bounding_box ().bottom ()) return 1; else { if (blob1->bounding_box ().left () < blob2->bounding_box ().left ()) return -1; else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ()) return 1; else return 0; } }
void make_blocks_from_blobs | ( | TBLOB * | tessblobs, |
const char * | filename, | ||
ICOORD | page_tr, | ||
BOOL8 | do_shift, | ||
BLOCK_LIST * | blocks | ||
) |
void SetBlobStrokeWidth | ( | Pix * | pix, |
BLOBNBOX * | blob | ||
) |
Definition at line 56 of file tordmain.cpp.
{ // Cut the blob rectangle into a Pix. int pix_height = pixGetHeight(pix); const TBOX& box = blob->bounding_box(); int width = box.width(); int height = box.height(); Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height); Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, NULL); boxDestroy(&blob_pix_box); Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG); pixDestroy(&pix_blob); // Compute the stroke widths. uinT32* data = pixGetData(dist_pix); int wpl = pixGetWpl(dist_pix); // Horizontal width of stroke. STATS h_stats(0, width + 1); for (int y = 0; y < height; ++y) { uinT32* pixels = data + y*wpl; int prev_pixel = 0; int pixel = GET_DATA_BYTE(pixels, 0); for (int x = 1; x < width; ++x) { int next_pixel = GET_DATA_BYTE(pixels, x); // We are looking for a pixel that is equal to its vertical neighbours, // yet greater than its left neighbour. if (prev_pixel < pixel && (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) && (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) { if (pixel > next_pixel) { // Single local max, so an odd width. h_stats.add(pixel * 2 - 1, 1); } else if (pixel == next_pixel && x + 1 < width && pixel > GET_DATA_BYTE(pixels, x + 1)) { // Double local max, so an even width. h_stats.add(pixel * 2, 1); } } prev_pixel = pixel; pixel = next_pixel; } } // Vertical width of stroke. STATS v_stats(0, height + 1); for (int x = 0; x < width; ++x) { int prev_pixel = 0; int pixel = GET_DATA_BYTE(data, x); for (int y = 1; y < height; ++y) { uinT32* pixels = data + y*wpl; int next_pixel = GET_DATA_BYTE(pixels, x); // We are looking for a pixel that is equal to its horizontal neighbours, // yet greater than its upper neighbour. if (prev_pixel < pixel && (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) && (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) { if (pixel > next_pixel) { // Single local max, so an odd width. v_stats.add(pixel * 2 - 1, 1); } else if (pixel == next_pixel && y + 1 < height && pixel > GET_DATA_BYTE(pixels + wpl, x)) { // Double local max, so an even width. v_stats.add(pixel * 2, 1); } } prev_pixel = pixel; pixel = next_pixel; } } pixDestroy(&dist_pix); // Store the horizontal and vertical width in the blob, keeping both // widths if there is enough information, otherwse only the one with // the most samples. // If there are insufficent samples, store zero, rather than using // 2*area/perimeter, as the numbers that gives do not match the numbers // from the distance method. if (h_stats.get_total() >= (width + height) / 4) { blob->set_horz_stroke_width(h_stats.ile(0.5f)); if (v_stats.get_total() >= (width + height) / 4) blob->set_vert_stroke_width(v_stats.ile(0.5f)); else blob->set_vert_stroke_width(0.0f); } else { if (v_stats.get_total() >= (width + height) / 4 || v_stats.get_total() > h_stats.get_total()) { blob->set_horz_stroke_width(0.0f); blob->set_vert_stroke_width(v_stats.ile(0.5f)); } else { blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f) : 0.0f); blob->set_vert_stroke_width(0.0f); } } }
void textord_page | ( | ICOORD | page_tr, |
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | land_blocks, | ||
TO_BLOCK_LIST * | port_blocks, | ||
tesseract::Tesseract * | |||
) |
void tweak_row_baseline | ( | ROW * | row, |
double | blshift_maxshift, | ||
double | blshift_xfraction | ||
) |
Definition at line 680 of file tordmain.cpp.
{ TBOX blob_box; //bounding box C_BLOB *blob; //current blob WERD *word; //current word inT32 blob_count; //no of blobs inT32 src_index; //source segment inT32 dest_index; //destination segment inT32 *xstarts; //spline segments double *coeffs; //spline coeffs float ydiff; //baseline error float x_centre; //centre of blob //words of row WERD_IT word_it = row->word_list (); C_BLOB_IT blob_it; //blob iterator blob_count = 0; for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); //current word //get total blobs blob_count += word->cblob_list ()->length (); } if (blob_count == 0) return; xstarts = (inT32 *) alloc_mem ((blob_count + row->baseline.segments + 1) * sizeof (inT32)); coeffs = (double *) alloc_mem ((blob_count + row->baseline.segments) * 3 * sizeof (double)); src_index = 0; dest_index = 0; xstarts[0] = row->baseline.xcoords[0]; for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); //current word //blobs in word blob_it.set_to_list (word->cblob_list ()); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); blob_box = blob->bounding_box (); x_centre = (blob_box.left () + blob_box.right ()) / 2.0; ydiff = blob_box.bottom () - row->base_line (x_centre); if (ydiff < 0) ydiff = -ydiff / row->x_height (); else ydiff = ydiff / row->x_height (); if (ydiff < blshift_maxshift && blob_box.height () / row->x_height () > blshift_xfraction) { if (xstarts[dest_index] >= x_centre) xstarts[dest_index] = blob_box.left (); coeffs[dest_index * 3] = 0; coeffs[dest_index * 3 + 1] = 0; coeffs[dest_index * 3 + 2] = blob_box.bottom (); //shift it dest_index++; xstarts[dest_index] = blob_box.right () + 1; } else { if (xstarts[dest_index] <= x_centre) { while (row->baseline.xcoords[src_index + 1] <= x_centre && src_index < row->baseline.segments - 1) { if (row->baseline.xcoords[src_index + 1] > xstarts[dest_index]) { coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b; coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c; dest_index++; xstarts[dest_index] = row->baseline.xcoords[src_index + 1]; } src_index++; } coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b; coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c; dest_index++; xstarts[dest_index] = row->baseline.xcoords[src_index + 1]; } } } } while (src_index < row->baseline.segments && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) src_index++; while (src_index < row->baseline.segments) { coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b; coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c; dest_index++; src_index++; xstarts[dest_index] = row->baseline.xcoords[src_index]; } //turn to spline row->baseline = QSPLINE (dest_index, xstarts, coeffs); free_mem(xstarts); free_mem(coeffs); }