#include <imagefind.h>

Static Public Member Functions
static Pix *	FindImages (Pix *pix)
static void	ConnCompAndRectangularize (Pix pix, Boxa boxa, Pixa *pixa)
static bool	pixNearlyRectangular (Pix pix, double min_fraction, double max_fraction, double max_skew_gradient, int x_start, int y_start, int x_end, int *y_end)
static bool	BoundsWithinRect (Pix pix, int x_start, int y_start, int x_end, int *y_end)
static double	ColorDistanceFromLine (const uinT8 line1, const uinT8 line2, const uinT8 *point)
static uinT32	ComposeRGB (uinT32 r, uinT32 g, uinT32 b)
static uinT8	ClipToByte (double pixel)
static void	ComputeRectangleColors (const TBOX &rect, Pix pix, int factor, Pix color_map1, Pix color_map2, Pix rms_map, uinT8 color1, uinT8 color2)
static bool	BlankImageInBetween (const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
static int	CountPixelsInRotatedBox (TBOX box, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
static void	TransferImagePartsToImageMask (const FCOORD &rerotation, ColPartitionGrid part_grid, Pix image_mask)
static void	FindImagePartitions (Pix image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK block, TabFind tab_grid, ColPartitionGrid part_grid, ColPartition_LIST *big_parts)

Detailed Description

Definition at line 42 of file imagefind.h.

Member Function Documentation

bool tesseract::ImageFind::BlankImageInBetween	(	const TBOX &	box1,
		const TBOX &	box2,
		const TBOX &	im_box,
		const FCOORD &	rotation,
		Pix *	pix
	)		`[static]`

Definition at line 553 of file imagefind.cpp.

                                              {
  TBOX search_box(box1);
  search_box += box2;
  if (box1.x_gap(box2) >= box1.y_gap(box2)) {
    if (box1.x_gap(box2) <= 0)
      return true;
    search_box.set_left(MIN(box1.right(), box2.right()));
    search_box.set_right(MAX(box1.left(), box2.left()));
  } else {
    if (box1.y_gap(box2) <= 0)
      return true;
    search_box.set_top(MAX(box1.bottom(), box2.bottom()));
    search_box.set_bottom(MIN(box1.top(), box2.top()));
  }
  return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
}

bool tesseract::ImageFind::BoundsWithinRect	(	Pix *	pix,
		int *	x_start,
		int *	y_start,
		int *	x_end,
		int *	y_end
	)		`[static]`

Definition at line 309 of file imagefind.cpp.

                                                         {
  Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
                             *y_end - *y_start);
  Box* output_box = NULL;
  pixClipBoxToForeground(pix, input_box, NULL, &output_box);
  bool result = output_box != NULL;
  if (result) {
    l_int32 x, y, width, height;
    boxGetGeometry(output_box, &x, &y, &width, &height);
    *x_start = x;
    *y_start = y;
    *x_end = x + width;
    *y_end = y + height;
    boxDestroy(&output_box);
  }
  boxDestroy(&input_box);
  return result;
}

uinT8 tesseract::ImageFind::ClipToByte ( double pixel ) [static]

Definition at line 373 of file imagefind.cpp.

                                        {
  if (pixel < 0.0)
    return 0;
  else if (pixel >= 255.0)
    return 255;
  return static_cast<uinT8>(pixel);
}

double tesseract::ImageFind::ColorDistanceFromLine	(	const uinT8 *	line1,
		const uinT8 *	line2,
		const uinT8 *	point
	)		`[static]`

Definition at line 332 of file imagefind.cpp.

                                                            {
  int line_vector[kRGBRMSColors];
  int point_vector[kRGBRMSColors];
  for (int i = 0; i < kRGBRMSColors; ++i) {
    line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
    point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
  }
  line_vector[L_ALPHA_CHANNEL] = 0;
  // Now the cross product in 3d.
  int cross[kRGBRMSColors];
  cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
                   - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
  cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
                   - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
  cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
                   - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
  cross[L_ALPHA_CHANNEL] = 0;
  // Now the sums of the squares.
  double cross_sq = 0.0;
  double line_sq = 0.0;
  for (int j = 0; j < kRGBRMSColors; ++j) {
    cross_sq += static_cast<double>(cross[j]) * cross[j];
    line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
  }
  if (line_sq == 0.0) {
    return 0.0;
  }
  return cross_sq / line_sq;  // This is the squared distance.
}

uinT32 tesseract::ImageFind::ComposeRGB	(	uinT32	r,
		uinT32	g,
		uinT32	b
	)		`[static]`

Definition at line 366 of file imagefind.cpp.

                                                         {
  l_uint32 result;
  composeRGBPixel(r, g, b, &result);
  return result;
}

void tesseract::ImageFind::ComputeRectangleColors	(	const TBOX &	rect,
		Pix *	pix,
		int	factor,
		Pix *	color_map1,
		Pix *	color_map2,
		Pix *	rms_map,
		uinT8 *	color1,
		uinT8 *	color2
	)		`[static]`

Definition at line 391 of file imagefind.cpp.

                                                                     {
  ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32);
  // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
  // background.
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  int left_pad = MAX(rect.left() - 2 * factor, 0) / factor;
  int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
  top_pad = MIN(height, top_pad);
  int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
  right_pad = MIN(width, right_pad);
  int bottom_pad = MAX(rect.bottom() - 2 * factor, 0) / factor;
  int width_pad = right_pad - left_pad;
  int height_pad = top_pad - bottom_pad;
  if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
    return;
  // Now crop the pix to the rectangle.
  Box* scaled_box = boxCreate(left_pad, height - top_pad,
                              width_pad, height_pad);
  Pix* scaled = pixClipRectangle(pix, scaled_box, NULL);

  // Compute stats over the whole image.
  STATS red_stats(0, 256);
  STATS green_stats(0, 256);
  STATS blue_stats(0, 256);
  uinT32* data = pixGetData(scaled);
  ASSERT_HOST(pixGetWpl(scaled) == width_pad);
  for (int y = 0; y < height_pad; ++y) {
    for (int x = 0; x < width_pad; ++x, ++data) {
      int r = GET_DATA_BYTE(data, COLOR_RED);
      int g = GET_DATA_BYTE(data, COLOR_GREEN);
      int b = GET_DATA_BYTE(data, COLOR_BLUE);
      red_stats.add(r, 1);
      green_stats.add(g, 1);
      blue_stats.add(b, 1);
    }
  }
  // Find the RGB component with the greatest 8th-ile-range.
  // 8th-iles are used instead of quartiles to get closer to the true
  // foreground color, which is going to be faint at best because of the
  // pre-scaling of the input image.
  int best_l8 = static_cast<int>(red_stats.ile(0.125f));
  int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
  int best_i8r = best_u8 - best_l8;
  int x_color = COLOR_RED;
  int y1_color = COLOR_GREEN;
  int y2_color = COLOR_BLUE;
  int l8 = static_cast<int>(green_stats.ile(0.125f));
  int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
  if (u8 - l8 > best_i8r) {
    best_i8r = u8 - l8;
    best_l8 = l8;
    best_u8 = u8;
    x_color = COLOR_GREEN;
    y1_color = COLOR_RED;
  }
  l8 = static_cast<int>(blue_stats.ile(0.125f));
  u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
  if (u8 - l8 > best_i8r) {
    best_i8r = u8 - l8;
    best_l8 = l8;
    best_u8 = u8;
    x_color = COLOR_BLUE;
    y1_color = COLOR_GREEN;
    y2_color = COLOR_RED;
  }
  if (best_i8r >= kMinColorDifference) {
    LLSQ line1;
    LLSQ line2;
    uinT32* data = pixGetData(scaled);
    for (int im_y = 0; im_y < height_pad; ++im_y) {
      for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
        int x = GET_DATA_BYTE(data, x_color);
        int y1 = GET_DATA_BYTE(data, y1_color);
        int y2 = GET_DATA_BYTE(data, y2_color);
        line1.add(x, y1);
        line2.add(x, y2);
      }
    }
    double m1 = line1.m();
    double c1 = line1.c(m1);
    double m2 = line2.m();
    double c2 = line2.c(m2);
    double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
    rms *= kRMSFitScaling;
    // Save the results.
    color1[x_color] = ClipToByte(best_l8);
    color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
    color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
    color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
    color2[x_color] = ClipToByte(best_u8);
    color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
    color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
    color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
  } else {
    // There is only one color.
    color1[COLOR_RED] = ClipToByte(red_stats.median());
    color1[COLOR_GREEN] = ClipToByte(green_stats.median());
    color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
    color1[L_ALPHA_CHANNEL] = 0;
    memcpy(color2, color1, 4);
  }
  if (color_map1 != NULL) {
    pixSetInRectArbitrary(color_map1, scaled_box,
                          ComposeRGB(color1[COLOR_RED],
                              color1[COLOR_GREEN],
                              color1[COLOR_BLUE]));
    pixSetInRectArbitrary(color_map2, scaled_box,
                          ComposeRGB(color2[COLOR_RED],
                              color2[COLOR_GREEN],
                              color2[COLOR_BLUE]));
    pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
  }
  pixDestroy(&scaled);
  boxDestroy(&scaled_box);
}

void tesseract::ImageFind::ConnCompAndRectangularize	(	Pix *	pix,
		Boxa **	boxa,
		Pixa **	pixa
	)		`[static]`

Definition at line 134 of file imagefind.cpp.

                                                                            {
  *boxa = NULL;
  *pixa = NULL;

  if (textord_tabfind_show_images)
    pixWrite("junkconncompimage.png", pix, IFF_PNG);
  // Find the individual image regions in the mask image.
  *boxa = pixConnComp(pix, pixa, 8);
  // Rectangularize the individual images. If a sharp edge in vertical and/or
  // horizontal occupancy can be found, it indicates a probably rectangular
  // image with unwanted bits merged on, so clip to the approximate rectangle.
  int npixes = pixaGetCount(*pixa);
  for (int i = 0; i < npixes; ++i) {
    int x_start, x_end, y_start, y_end;
    Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
    pixDisplayWrite(img_pix, textord_tabfind_show_images);
    if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
                             kMaxRectangularFraction,
                             kMaxRectangularGradient,
                             &x_start, &y_start, &x_end, &y_end)) {
      Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
      pixSetAll(simple_pix);
      pixDestroy(&img_pix);
      // pixaReplacePix takes ownership of the simple_pix.
      pixaReplacePix(*pixa, i, simple_pix, NULL);
      img_pix = pixaGetPix(*pixa, i, L_CLONE);
      // Fix the box to match the new pix.
      l_int32 x, y, width, height;
      boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
      Box* simple_box = boxCreate(x + x_start, y + y_start,
                                  x_end - x_start, y_end - y_start);
      boxaReplaceBox(*boxa, i, simple_box);
    }
    pixDestroy(&img_pix);
  }
}

int tesseract::ImageFind::CountPixelsInRotatedBox	(	TBOX	box,
		const TBOX &	im_box,
		const FCOORD &	rotation,
		Pix *	pix
	)		`[static]`

Definition at line 574 of file imagefind.cpp.

                                                                         {
  // Intersect it with the image box.
  box &= im_box;  // This is in-place box intersection.
  if (box.null_box())
    return 0;
  box.rotate(rotation);
  TBOX rotated_im_box(im_box);
  rotated_im_box.rotate(rotation);
  Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
  pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
              PIX_SRC, pix, box.left() - rotated_im_box.left(),
              rotated_im_box.top() - box.top());
  l_int32 result;
  pixCountPixels(rect_pix, &result, NULL);
  pixDestroy(&rect_pix);
  return result;
}

void tesseract::ImageFind::FindImagePartitions	(	Pix *	image_pix,
		const FCOORD &	rotation,
		const FCOORD &	rerotation,
		TO_BLOCK *	block,
		TabFind *	tab_grid,
		ColPartitionGrid *	part_grid,
		ColPartition_LIST *	big_parts
	)		`[static]`

Definition at line 1275 of file imagefind.cpp.

                                                                 {
  int imageheight = pixGetHeight(image_pix);
  Boxa* boxa;
  Pixa* pixa;
  ConnCompAndRectangularize(image_pix, &boxa, &pixa);
  // Iterate the connected components in the image regions mask.
  int nboxes = boxaGetCount(boxa);
  for (int i = 0; i < nboxes; ++i) {
    l_int32 x, y, width, height;
    boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
    Pix* pix = pixaGetPix(pixa, i, L_CLONE);
    TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
    im_box.rotate(rotation);  // Now matches all partitions and blobs.
    ColPartitionGridSearch rectsearch(part_grid);
    rectsearch.SetUniqueMode(true);
    ColPartition_LIST part_list;
    DivideImageIntoParts(im_box, rotation, rerotation, pix,
                         &rectsearch, &part_list);
    if (textord_tabfind_show_images) {
      pixWrite("junkimagecomponent.png", pix, IFF_PNG);
      tprintf("Component has %d parts\n", part_list.length());
    }
    pixDestroy(&pix);
    if (!part_list.empty()) {
      ColPartition_IT part_it(&part_list);
      if (part_list.singleton()) {
        // We didn't have to chop it into a polygon to fit around text, so
        // try expanding it to merge fragmented image parts, as long as it
        // doesn't touch strong text.
        ColPartition* part = part_it.extract();
        TBOX text_box(im_box);
        MaximalImageBoundingBox(part_grid, &text_box);
        while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
        part_it.set_to_list(&part_list);
        part_it.add_after_then_move(part);
        im_box = part->bounding_box();
      }
      EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
      // Iterate the part_list and put the parts into the grid.
      for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
        ColPartition* image_part = part_it.extract();
        im_box = image_part->bounding_box();
        part_grid->InsertBBox(true, true, image_part);
        if (!part_it.at_last()) {
          ColPartition* neighbour = part_it.data_relative(1);
          image_part->AddPartner(false, neighbour);
          neighbour->AddPartner(true, image_part);
        }
      }
    }
  }
  boxaDestroy(&boxa);
  pixaDestroy(&pixa);
  DeleteSmallImages(part_grid);
  if (textord_tabfind_show_images) {
    ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
    part_grid->DisplayBoxes(images_win_);
  }
}

Pix * tesseract::ImageFind::FindImages ( Pix * pix ) [static]

Definition at line 66 of file imagefind.cpp.

                                   {
  // Not worth looking at small images.
  if (pixGetWidth(pix) < kMinImageFindSize ||
      pixGetHeight(pix) < kMinImageFindSize)
    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
  // Reduce by factor 2.
  Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
  pixDisplayWrite(pixr, textord_tabfind_show_images);

  // Get the halftone mask directly from Leptonica.
  l_int32 ht_found = 0;
  Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, &ht_found,
                                   textord_tabfind_show_images);
  pixDestroy(&pixr);
  if (!ht_found && pixht2 != NULL)
    pixDestroy(&pixht2);
  if (pixht2 == NULL)
    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);

  // Expand back up again.
  Pix *pixht = pixExpandReplicate(pixht2, 2);
  pixDisplayWrite(pixht, textord_tabfind_show_images);
  pixDestroy(&pixht2);

  // Fill to capture pixels near the mask edges that were missed
  Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
  pixOr(pixht, pixht, pixt);
  pixDestroy(&pixt);

  // Eliminate lines and bars that may be joined to images.
  Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
  pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
  Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
  Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
  pixDestroy(&pixreduced);
  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
  Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
  pixDestroy(&pixreduced2);
  pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
  // Combine the coarse and fine image masks.
  pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
  pixDestroy(&pixfinemask);
  // Dilate a bit to make sure we get everything.
  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
  Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
  pixDestroy(&pixcoarsemask);
  if (textord_tabfind_show_images)
    pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG);
  // And the image mask with the line and bar remover.
  pixAnd(pixht, pixht, pixmask);
  pixDestroy(&pixmask);
  if (textord_tabfind_show_images)
    pixWrite("junkfinalimagemask.png", pixht, IFF_PNG);
  // Make the result image the same size as the input.
  Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
  pixOr(result, result, pixht);
  pixDestroy(&pixht);
  return result;
}

bool tesseract::ImageFind::pixNearlyRectangular	(	Pix *	pix,
		double	min_fraction,
		double	max_fraction,
		double	max_skew_gradient,
		int *	x_start,
		int *	y_start,
		int *	x_end,
		int *	y_end
	)		`[static]`

Definition at line 243 of file imagefind.cpp.

                                                             {
  ASSERT_HOST(pix != NULL);
  *x_start = 0;
  *x_end = pixGetWidth(pix);
  *y_start = 0;
  *y_end = pixGetHeight(pix);

  uinT32* data = pixGetData(pix);
  int wpl = pixGetWpl(pix);
  bool any_cut = false;
  bool left_done = false;
  bool right_done = false;
  bool top_done = false;
  bool bottom_done = false;
  do {
    any_cut = false;
    // Find the top/bottom edges.
    int width = *x_end - *x_start;
    int min_count = static_cast<int>(width * min_fraction);
    int max_count = static_cast<int>(width * max_fraction);
    int edge_width = static_cast<int>(width * max_skew_gradient);
    if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
                     max_count, *y_end, 1, y_start) && !top_done) {
      top_done = true;
      any_cut = true;
    }
    --(*y_end);
    if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
                     max_count, *y_start, -1, y_end) && !bottom_done) {
      bottom_done = true;
      any_cut = true;
    }
    ++(*y_end);

    // Find the left/right edges.
    int height = *y_end - *y_start;
    min_count = static_cast<int>(height * min_fraction);
    max_count = static_cast<int>(height * max_fraction);
    edge_width = static_cast<int>(height * max_skew_gradient);
    if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
                     max_count, *x_end, 1, x_start) && !left_done) {
      left_done = true;
      any_cut = true;
    }
    --(*x_end);
    if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
                     max_count, *x_start, -1, x_end) && !right_done) {
      right_done = true;
      any_cut = true;
    }
    ++(*x_end);
  } while (any_cut);

  // All edges must satisfy the condition of sharp gradient in pixel density
  // in order for the full rectangle to be present.
  return left_done && right_done && top_done && bottom_done;
}

void tesseract::ImageFind::TransferImagePartsToImageMask	(	const FCOORD &	rerotation,
		ColPartitionGrid *	part_grid,
		Pix *	image_mask
	)		`[static]`

Definition at line 1222 of file imagefind.cpp.

                                                               {
  // Extract the noise parts from the grid and put them on a temporary list.
  ColPartition_LIST parts_list;
  ColPartition_IT part_it(&parts_list);
  ColPartitionGridSearch gsearch(part_grid);
  gsearch.StartFullSearch();
  ColPartition* part;
  while ((part = gsearch.NextFullSearch()) != NULL) {
    BlobRegionType type = part->blob_type();
    if (type  == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
      part_it.add_after_then_move(part);
      gsearch.RemoveBBox();
    }
  }
  // Render listed noise partitions to the image mask.
  MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
}

The documentation for this class was generated from the following files:

tesseract-ocr/textord/imagefind.h
tesseract-ocr/textord/imagefind.cpp

Static Public Member Functions

Detailed Description

Member Function Documentation