Tesseract
3.02
|
00001 00002 // File: imagefind.h 00003 // Description: Class to find image and drawing regions in an image 00004 // and create a corresponding list of empty blobs. 00005 // Author: Ray Smith 00006 // Created: Fri Aug 01 10:50:01 PDT 2008 00007 // 00008 // (C) Copyright 2008, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_TEXTORD_IMAGEFIND_H__ 00022 #define TESSERACT_TEXTORD_IMAGEFIND_H__ 00023 00024 #include "host.h" 00025 00026 struct Boxa; 00027 struct Pix; 00028 struct Pixa; 00029 class TBOX; 00030 class FCOORD; 00031 class TO_BLOCK; 00032 class BLOBNBOX_LIST; 00033 00034 namespace tesseract { 00035 00036 class ColPartitionGrid; 00037 class ColPartition_LIST; 00038 class TabFind; 00039 00040 // The ImageFind class is a simple static function wrapper class that 00041 // exposes the FindImages function and some useful helper functions. 00042 class ImageFind { 00043 public: 00044 // Finds image regions within the BINARY source pix (page image) and returns 00045 // the image regions as a mask image. 00046 // The returned pix may be NULL, meaning no images found. 00047 // If not NULL, it must be PixDestroyed by the caller. 00048 static Pix* FindImages(Pix* pix); 00049 00050 // Generates a Boxa, Pixa pair from the input binary (image mask) pix, 00051 // analgous to pixConnComp, except that connected components which are nearly 00052 // rectangular are replaced with solid rectangles. 00053 // The returned boxa, pixa may be NULL, meaning no images found. 00054 // If not NULL, they must be destroyed by the caller. 00055 // Resolution of pix should match the source image (Tesseract::pix_binary_) 00056 // so the output coordinate systems match. 00057 static void ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa); 00058 00059 // Returns true if there is a rectangle in the source pix, such that all 00060 // pixel rows and column slices outside of it have less than 00061 // min_fraction of the pixels black, and within max_skew_gradient fraction 00062 // of the pixels on the inside, there are at least max_fraction of the 00063 // pixels black. In other words, the inside of the rectangle looks roughly 00064 // rectangular, and the outside of it looks like extra bits. 00065 // On return, the rectangle is defined by x_start, y_start, x_end and y_end. 00066 // Note: the algorithm is iterative, allowing it to slice off pixels from 00067 // one edge, allowing it to then slice off more pixels from another edge. 00068 static bool pixNearlyRectangular(Pix* pix, 00069 double min_fraction, double max_fraction, 00070 double max_skew_gradient, 00071 int* x_start, int* y_start, 00072 int* x_end, int* y_end); 00073 00074 // Given an input pix, and a bounding rectangle, the sides of the rectangle 00075 // are shrunk inwards until they bound any black pixels found within the 00076 // original rectangle. Returns false if the rectangle contains no black 00077 // pixels at all. 00078 static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start, 00079 int* x_end, int* y_end); 00080 00081 // Given a point in 3-D (RGB) space, returns the squared Euclidean distance 00082 // of the point from the given line, defined by a pair of points in the 3-D 00083 // (RGB) space, line1 and line2. 00084 static double ColorDistanceFromLine(const uinT8* line1, const uinT8* line2, 00085 const uinT8* point); 00086 00087 // Returns the leptonica combined code for the given RGB triplet. 00088 static uinT32 ComposeRGB(uinT32 r, uinT32 g, uinT32 b); 00089 00090 // Returns the input value clipped to a uinT8. 00091 static uinT8 ClipToByte(double pixel); 00092 00093 // Computes the light and dark extremes of color in the given rectangle of 00094 // the given pix, which is factor smaller than the coordinate system in rect. 00095 // The light and dark points are taken to be the upper and lower 8th-ile of 00096 // the most deviant of R, G and B. The value of the other 2 channels are 00097 // computed by linear fit against the most deviant. 00098 // The colors of the two point are returned in color1 and color2, with the 00099 // alpha channel set to a scaled mean rms of the fits. 00100 // If color_map1 is not null then it and color_map2 get rect pasted in them 00101 // with the two calculated colors, and rms map gets a pasted rect of the rms. 00102 // color_map1, color_map2 and rms_map are assumed to be the same scale as pix. 00103 static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, 00104 Pix* color_map1, Pix* color_map2, 00105 Pix* rms_map, 00106 uinT8* color1, uinT8* color2); 00107 00108 // Returns true if there are no black pixels in between the boxes. 00109 // The im_box must represent the bounding box of the pix in tesseract 00110 // coordinates, which may be negative, due to rotations to make the textlines 00111 // horizontal. The boxes are rotated by rotation, which should undo such 00112 // rotations, before mapping them onto the pix. 00113 static bool BlankImageInBetween(const TBOX& box1, const TBOX& box2, 00114 const TBOX& im_box, const FCOORD& rotation, 00115 Pix* pix); 00116 00117 // Returns the number of pixels in box in the pix. 00118 // The im_box must represent the bounding box of the pix in tesseract 00119 // coordinates, which may be negative, due to rotations to make the textlines 00120 // horizontal. The boxes are rotated by rotation, which should undo such 00121 // rotations, before mapping them onto the pix. 00122 static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box, 00123 const FCOORD& rotation, Pix* pix); 00124 00125 00126 // Locates all the image partitions in the part_grid, that were found by a 00127 // previous call to FindImagePartitions, marks them in the image_mask, 00128 // removes them from the grid, and deletes them. This makes it possble to 00129 // call FindImagePartitions again to produce less broken-up and less 00130 // overlapping image partitions. 00131 // rerotation specifies how to rotate the partition coords to match 00132 // the image_mask, since this function is used after orientation correction. 00133 static void TransferImagePartsToImageMask(const FCOORD& rerotation, 00134 ColPartitionGrid* part_grid, 00135 Pix* image_mask); 00136 00137 // Runs a CC analysis on the image_pix mask image, and creates 00138 // image partitions from them, cutting out strong text, and merging with 00139 // nearby image regions such that they don't interfere with text. 00140 // Rotation and rerotation specify how to rotate image coords to match 00141 // the blob and partition coords and back again. 00142 // The input/output part_grid owns all the created partitions, and 00143 // the partitions own all the fake blobs that belong in the partitions. 00144 // Since the other blobs in the other partitions will be owned by the block, 00145 // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this 00146 // situation and collect the image blobs. 00147 static void FindImagePartitions(Pix* image_pix, 00148 const FCOORD& rotation, 00149 const FCOORD& rerotation, 00150 TO_BLOCK* block, 00151 TabFind* tab_grid, 00152 ColPartitionGrid* part_grid, 00153 ColPartition_LIST* big_parts); 00154 }; 00155 00156 } // namespace tesseract. 00157 00158 #endif // TESSERACT_TEXTORD_LINEFIND_H__ 00159