Tesseract
3.02
|
#include <imagefind.h>
Static Public Member Functions | |
static Pix * | FindImages (Pix *pix) |
static void | ConnCompAndRectangularize (Pix *pix, Boxa **boxa, Pixa **pixa) |
static bool | pixNearlyRectangular (Pix *pix, double min_fraction, double max_fraction, double max_skew_gradient, int *x_start, int *y_start, int *x_end, int *y_end) |
static bool | BoundsWithinRect (Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end) |
static double | ColorDistanceFromLine (const uinT8 *line1, const uinT8 *line2, const uinT8 *point) |
static uinT32 | ComposeRGB (uinT32 r, uinT32 g, uinT32 b) |
static uinT8 | ClipToByte (double pixel) |
static void | ComputeRectangleColors (const TBOX &rect, Pix *pix, int factor, Pix *color_map1, Pix *color_map2, Pix *rms_map, uinT8 *color1, uinT8 *color2) |
static bool | BlankImageInBetween (const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix) |
static int | CountPixelsInRotatedBox (TBOX box, const TBOX &im_box, const FCOORD &rotation, Pix *pix) |
static void | TransferImagePartsToImageMask (const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask) |
static void | FindImagePartitions (Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts) |
Definition at line 42 of file imagefind.h.
bool tesseract::ImageFind::BlankImageInBetween | ( | const TBOX & | box1, |
const TBOX & | box2, | ||
const TBOX & | im_box, | ||
const FCOORD & | rotation, | ||
Pix * | pix | ||
) | [static] |
Definition at line 553 of file imagefind.cpp.
{ TBOX search_box(box1); search_box += box2; if (box1.x_gap(box2) >= box1.y_gap(box2)) { if (box1.x_gap(box2) <= 0) return true; search_box.set_left(MIN(box1.right(), box2.right())); search_box.set_right(MAX(box1.left(), box2.left())); } else { if (box1.y_gap(box2) <= 0) return true; search_box.set_top(MAX(box1.bottom(), box2.bottom())); search_box.set_bottom(MIN(box1.top(), box2.top())); } return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0; }
bool tesseract::ImageFind::BoundsWithinRect | ( | Pix * | pix, |
int * | x_start, | ||
int * | y_start, | ||
int * | x_end, | ||
int * | y_end | ||
) | [static] |
Definition at line 309 of file imagefind.cpp.
{ Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start); Box* output_box = NULL; pixClipBoxToForeground(pix, input_box, NULL, &output_box); bool result = output_box != NULL; if (result) { l_int32 x, y, width, height; boxGetGeometry(output_box, &x, &y, &width, &height); *x_start = x; *y_start = y; *x_end = x + width; *y_end = y + height; boxDestroy(&output_box); } boxDestroy(&input_box); return result; }
uinT8 tesseract::ImageFind::ClipToByte | ( | double | pixel | ) | [static] |
Definition at line 373 of file imagefind.cpp.
{ if (pixel < 0.0) return 0; else if (pixel >= 255.0) return 255; return static_cast<uinT8>(pixel); }
double tesseract::ImageFind::ColorDistanceFromLine | ( | const uinT8 * | line1, |
const uinT8 * | line2, | ||
const uinT8 * | point | ||
) | [static] |
Definition at line 332 of file imagefind.cpp.
{ int line_vector[kRGBRMSColors]; int point_vector[kRGBRMSColors]; for (int i = 0; i < kRGBRMSColors; ++i) { line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]); point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]); } line_vector[L_ALPHA_CHANNEL] = 0; // Now the cross product in 3d. int cross[kRGBRMSColors]; cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN]; cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] - line_vector[COLOR_RED] * point_vector[COLOR_BLUE]; cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] - line_vector[COLOR_GREEN] * point_vector[COLOR_RED]; cross[L_ALPHA_CHANNEL] = 0; // Now the sums of the squares. double cross_sq = 0.0; double line_sq = 0.0; for (int j = 0; j < kRGBRMSColors; ++j) { cross_sq += static_cast<double>(cross[j]) * cross[j]; line_sq += static_cast<double>(line_vector[j]) * line_vector[j]; } if (line_sq == 0.0) { return 0.0; } return cross_sq / line_sq; // This is the squared distance. }
Definition at line 366 of file imagefind.cpp.
{ l_uint32 result; composeRGBPixel(r, g, b, &result); return result; }
void tesseract::ImageFind::ComputeRectangleColors | ( | const TBOX & | rect, |
Pix * | pix, | ||
int | factor, | ||
Pix * | color_map1, | ||
Pix * | color_map2, | ||
Pix * | rms_map, | ||
uinT8 * | color1, | ||
uinT8 * | color2 | ||
) | [static] |
Definition at line 391 of file imagefind.cpp.
{ ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32); // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more // background. int width = pixGetWidth(pix); int height = pixGetHeight(pix); int left_pad = MAX(rect.left() - 2 * factor, 0) / factor; int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor; top_pad = MIN(height, top_pad); int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor; right_pad = MIN(width, right_pad); int bottom_pad = MAX(rect.bottom() - 2 * factor, 0) / factor; int width_pad = right_pad - left_pad; int height_pad = top_pad - bottom_pad; if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4) return; // Now crop the pix to the rectangle. Box* scaled_box = boxCreate(left_pad, height - top_pad, width_pad, height_pad); Pix* scaled = pixClipRectangle(pix, scaled_box, NULL); // Compute stats over the whole image. STATS red_stats(0, 256); STATS green_stats(0, 256); STATS blue_stats(0, 256); uinT32* data = pixGetData(scaled); ASSERT_HOST(pixGetWpl(scaled) == width_pad); for (int y = 0; y < height_pad; ++y) { for (int x = 0; x < width_pad; ++x, ++data) { int r = GET_DATA_BYTE(data, COLOR_RED); int g = GET_DATA_BYTE(data, COLOR_GREEN); int b = GET_DATA_BYTE(data, COLOR_BLUE); red_stats.add(r, 1); green_stats.add(g, 1); blue_stats.add(b, 1); } } // Find the RGB component with the greatest 8th-ile-range. // 8th-iles are used instead of quartiles to get closer to the true // foreground color, which is going to be faint at best because of the // pre-scaling of the input image. int best_l8 = static_cast<int>(red_stats.ile(0.125f)); int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f))); int best_i8r = best_u8 - best_l8; int x_color = COLOR_RED; int y1_color = COLOR_GREEN; int y2_color = COLOR_BLUE; int l8 = static_cast<int>(green_stats.ile(0.125f)); int u8 = static_cast<int>(ceil(green_stats.ile(0.875f))); if (u8 - l8 > best_i8r) { best_i8r = u8 - l8; best_l8 = l8; best_u8 = u8; x_color = COLOR_GREEN; y1_color = COLOR_RED; } l8 = static_cast<int>(blue_stats.ile(0.125f)); u8 = static_cast<int>(ceil(blue_stats.ile(0.875f))); if (u8 - l8 > best_i8r) { best_i8r = u8 - l8; best_l8 = l8; best_u8 = u8; x_color = COLOR_BLUE; y1_color = COLOR_GREEN; y2_color = COLOR_RED; } if (best_i8r >= kMinColorDifference) { LLSQ line1; LLSQ line2; uinT32* data = pixGetData(scaled); for (int im_y = 0; im_y < height_pad; ++im_y) { for (int im_x = 0; im_x < width_pad; ++im_x, ++data) { int x = GET_DATA_BYTE(data, x_color); int y1 = GET_DATA_BYTE(data, y1_color); int y2 = GET_DATA_BYTE(data, y2_color); line1.add(x, y1); line2.add(x, y2); } } double m1 = line1.m(); double c1 = line1.c(m1); double m2 = line2.m(); double c2 = line2.c(m2); double rms = line1.rms(m1, c1) + line2.rms(m2, c2); rms *= kRMSFitScaling; // Save the results. color1[x_color] = ClipToByte(best_l8); color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5); color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5); color1[L_ALPHA_CHANNEL] = ClipToByte(rms); color2[x_color] = ClipToByte(best_u8); color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5); color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5); color2[L_ALPHA_CHANNEL] = ClipToByte(rms); } else { // There is only one color. color1[COLOR_RED] = ClipToByte(red_stats.median()); color1[COLOR_GREEN] = ClipToByte(green_stats.median()); color1[COLOR_BLUE] = ClipToByte(blue_stats.median()); color1[L_ALPHA_CHANNEL] = 0; memcpy(color2, color1, 4); } if (color_map1 != NULL) { pixSetInRectArbitrary(color_map1, scaled_box, ComposeRGB(color1[COLOR_RED], color1[COLOR_GREEN], color1[COLOR_BLUE])); pixSetInRectArbitrary(color_map2, scaled_box, ComposeRGB(color2[COLOR_RED], color2[COLOR_GREEN], color2[COLOR_BLUE])); pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]); } pixDestroy(&scaled); boxDestroy(&scaled_box); }
void tesseract::ImageFind::ConnCompAndRectangularize | ( | Pix * | pix, |
Boxa ** | boxa, | ||
Pixa ** | pixa | ||
) | [static] |
Definition at line 134 of file imagefind.cpp.
{ *boxa = NULL; *pixa = NULL; if (textord_tabfind_show_images) pixWrite("junkconncompimage.png", pix, IFF_PNG); // Find the individual image regions in the mask image. *boxa = pixConnComp(pix, pixa, 8); // Rectangularize the individual images. If a sharp edge in vertical and/or // horizontal occupancy can be found, it indicates a probably rectangular // image with unwanted bits merged on, so clip to the approximate rectangle. int npixes = pixaGetCount(*pixa); for (int i = 0; i < npixes; ++i) { int x_start, x_end, y_start, y_end; Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE); pixDisplayWrite(img_pix, textord_tabfind_show_images); if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction, kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) { Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1); pixSetAll(simple_pix); pixDestroy(&img_pix); // pixaReplacePix takes ownership of the simple_pix. pixaReplacePix(*pixa, i, simple_pix, NULL); img_pix = pixaGetPix(*pixa, i, L_CLONE); // Fix the box to match the new pix. l_int32 x, y, width, height; boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); Box* simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start); boxaReplaceBox(*boxa, i, simple_box); } pixDestroy(&img_pix); } }
int tesseract::ImageFind::CountPixelsInRotatedBox | ( | TBOX | box, |
const TBOX & | im_box, | ||
const FCOORD & | rotation, | ||
Pix * | pix | ||
) | [static] |
Definition at line 574 of file imagefind.cpp.
{ // Intersect it with the image box. box &= im_box; // This is in-place box intersection. if (box.null_box()) return 0; box.rotate(rotation); TBOX rotated_im_box(im_box); rotated_im_box.rotate(rotation); Pix* rect_pix = pixCreate(box.width(), box.height(), 1); pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix, box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top()); l_int32 result; pixCountPixels(rect_pix, &result, NULL); pixDestroy(&rect_pix); return result; }
void tesseract::ImageFind::FindImagePartitions | ( | Pix * | image_pix, |
const FCOORD & | rotation, | ||
const FCOORD & | rerotation, | ||
TO_BLOCK * | block, | ||
TabFind * | tab_grid, | ||
ColPartitionGrid * | part_grid, | ||
ColPartition_LIST * | big_parts | ||
) | [static] |
Definition at line 1275 of file imagefind.cpp.
{ int imageheight = pixGetHeight(image_pix); Boxa* boxa; Pixa* pixa; ConnCompAndRectangularize(image_pix, &boxa, &pixa); // Iterate the connected components in the image regions mask. int nboxes = boxaGetCount(boxa); for (int i = 0; i < nboxes; ++i) { l_int32 x, y, width, height; boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height); Pix* pix = pixaGetPix(pixa, i, L_CLONE); TBOX im_box(x, imageheight -y - height, x + width, imageheight - y); im_box.rotate(rotation); // Now matches all partitions and blobs. ColPartitionGridSearch rectsearch(part_grid); rectsearch.SetUniqueMode(true); ColPartition_LIST part_list; DivideImageIntoParts(im_box, rotation, rerotation, pix, &rectsearch, &part_list); if (textord_tabfind_show_images) { pixWrite("junkimagecomponent.png", pix, IFF_PNG); tprintf("Component has %d parts\n", part_list.length()); } pixDestroy(&pix); if (!part_list.empty()) { ColPartition_IT part_it(&part_list); if (part_list.singleton()) { // We didn't have to chop it into a polygon to fit around text, so // try expanding it to merge fragmented image parts, as long as it // doesn't touch strong text. ColPartition* part = part_it.extract(); TBOX text_box(im_box); MaximalImageBoundingBox(part_grid, &text_box); while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)); part_it.set_to_list(&part_list); part_it.add_after_then_move(part); im_box = part->bounding_box(); } EliminateWeakParts(im_box, part_grid, big_parts, &part_list); // Iterate the part_list and put the parts into the grid. for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { ColPartition* image_part = part_it.extract(); im_box = image_part->bounding_box(); part_grid->InsertBBox(true, true, image_part); if (!part_it.at_last()) { ColPartition* neighbour = part_it.data_relative(1); image_part->AddPartner(false, neighbour); neighbour->AddPartner(true, image_part); } } } } boxaDestroy(&boxa); pixaDestroy(&pixa); DeleteSmallImages(part_grid); if (textord_tabfind_show_images) { ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images"); part_grid->DisplayBoxes(images_win_); } }
Pix * tesseract::ImageFind::FindImages | ( | Pix * | pix | ) | [static] |
Definition at line 66 of file imagefind.cpp.
{ // Not worth looking at small images. if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); // Reduce by factor 2. Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); pixDisplayWrite(pixr, textord_tabfind_show_images); // Get the halftone mask directly from Leptonica. l_int32 ht_found = 0; Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, &ht_found, textord_tabfind_show_images); pixDestroy(&pixr); if (!ht_found && pixht2 != NULL) pixDestroy(&pixht2); if (pixht2 == NULL) return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); // Expand back up again. Pix *pixht = pixExpandReplicate(pixht2, 2); pixDisplayWrite(pixht, textord_tabfind_show_images); pixDestroy(&pixht2); // Fill to capture pixels near the mask edges that were missed Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8); pixOr(pixht, pixht, pixt); pixDestroy(&pixt); // Eliminate lines and bars that may be joined to images. Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); pixDilateBrick(pixfinemask, pixfinemask, 5, 5); pixDisplayWrite(pixfinemask, textord_tabfind_show_images); Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); pixDestroy(&pixreduced); pixDilateBrick(pixreduced2, pixreduced2, 5, 5); Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); pixDestroy(&pixreduced2); pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images); // Combine the coarse and fine image masks. pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); pixDestroy(&pixfinemask); // Dilate a bit to make sure we get everything. pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); pixDestroy(&pixcoarsemask); if (textord_tabfind_show_images) pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG); // And the image mask with the line and bar remover. pixAnd(pixht, pixht, pixmask); pixDestroy(&pixmask); if (textord_tabfind_show_images) pixWrite("junkfinalimagemask.png", pixht, IFF_PNG); // Make the result image the same size as the input. Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); pixOr(result, result, pixht); pixDestroy(&pixht); return result; }
bool tesseract::ImageFind::pixNearlyRectangular | ( | Pix * | pix, |
double | min_fraction, | ||
double | max_fraction, | ||
double | max_skew_gradient, | ||
int * | x_start, | ||
int * | y_start, | ||
int * | x_end, | ||
int * | y_end | ||
) | [static] |
Definition at line 243 of file imagefind.cpp.
{ ASSERT_HOST(pix != NULL); *x_start = 0; *x_end = pixGetWidth(pix); *y_start = 0; *y_end = pixGetHeight(pix); uinT32* data = pixGetData(pix); int wpl = pixGetWpl(pix); bool any_cut = false; bool left_done = false; bool right_done = false; bool top_done = false; bool bottom_done = false; do { any_cut = false; // Find the top/bottom edges. int width = *x_end - *x_start; int min_count = static_cast<int>(width * min_fraction); int max_count = static_cast<int>(width * max_fraction); int edge_width = static_cast<int>(width * max_skew_gradient); if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_end, 1, y_start) && !top_done) { top_done = true; any_cut = true; } --(*y_end); if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_start, -1, y_end) && !bottom_done) { bottom_done = true; any_cut = true; } ++(*y_end); // Find the left/right edges. int height = *y_end - *y_start; min_count = static_cast<int>(height * min_fraction); max_count = static_cast<int>(height * max_fraction); edge_width = static_cast<int>(height * max_skew_gradient); if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_end, 1, x_start) && !left_done) { left_done = true; any_cut = true; } --(*x_end); if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_start, -1, x_end) && !right_done) { right_done = true; any_cut = true; } ++(*x_end); } while (any_cut); // All edges must satisfy the condition of sharp gradient in pixel density // in order for the full rectangle to be present. return left_done && right_done && top_done && bottom_done; }
void tesseract::ImageFind::TransferImagePartsToImageMask | ( | const FCOORD & | rerotation, |
ColPartitionGrid * | part_grid, | ||
Pix * | image_mask | ||
) | [static] |
Definition at line 1222 of file imagefind.cpp.
{ // Extract the noise parts from the grid and put them on a temporary list. ColPartition_LIST parts_list; ColPartition_IT part_it(&parts_list); ColPartitionGridSearch gsearch(part_grid); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { BlobRegionType type = part->blob_type(); if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { part_it.add_after_then_move(part); gsearch.RemoveBBox(); } } // Render listed noise partitions to the image mask. MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask); }