Tesseract
3.02
|
00001 00002 // File: thresholder.cpp 00003 // Description: Base API for thresolding images in tesseract. 00004 // Author: Ray Smith 00005 // Created: Mon May 12 11:28:15 PDT 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "allheaders.h" 00021 00022 #include "thresholder.h" 00023 00024 #include <string.h> 00025 00026 #include "img.h" 00027 #include "otsuthr.h" 00028 00029 namespace tesseract { 00030 00031 ImageThresholder::ImageThresholder() 00032 : pix_(NULL), 00033 image_data_(NULL), 00034 image_width_(0), image_height_(0), 00035 image_bytespp_(0), image_bytespl_(0), 00036 scale_(1), yres_(300), estimated_res_(300) { 00037 SetRectangle(0, 0, 0, 0); 00038 } 00039 00040 ImageThresholder::~ImageThresholder() { 00041 Clear(); 00042 } 00043 00044 // Destroy the Pix if there is one, freeing memory. 00045 void ImageThresholder::Clear() { 00046 if (pix_ != NULL) { 00047 pixDestroy(&pix_); 00048 pix_ = NULL; 00049 } 00050 image_data_ = NULL; 00051 } 00052 00053 // Return true if no image has been set. 00054 bool ImageThresholder::IsEmpty() const { 00055 if (pix_ != NULL) 00056 return false; 00057 return image_data_ == NULL; 00058 } 00059 00060 // SetImage makes a copy of only the metadata, not the underlying 00061 // image buffer. It promises to treat the source as read-only in either case, 00062 // but in return assumes that the Pix or image buffer remain valid 00063 // throughout the life of the ImageThresholder. 00064 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. 00065 // Palette color images will not work properly and must be converted to 00066 // 24 bit. 00067 // Binary images of 1 bit per pixel may also be given but they must be 00068 // byte packed with the MSB of the first byte being the first pixel, and a 00069 // one pixel is WHITE. For binary images set bytes_per_pixel=0. 00070 void ImageThresholder::SetImage(const unsigned char* imagedata, 00071 int width, int height, 00072 int bytes_per_pixel, int bytes_per_line) { 00073 if (pix_ != NULL) 00074 pixDestroy(&pix_); 00075 pix_ = NULL; 00076 image_data_ = imagedata; 00077 image_width_ = width; 00078 image_height_ = height; 00079 image_bytespp_ = bytes_per_pixel; 00080 image_bytespl_ = bytes_per_line; 00081 scale_ = 1; 00082 estimated_res_ = yres_ = 300; 00083 Init(); 00084 } 00085 00086 // Store the coordinates of the rectangle to process for later use. 00087 // Doesn't actually do any thresholding. 00088 void ImageThresholder::SetRectangle(int left, int top, int width, int height) { 00089 rect_left_ = left; 00090 rect_top_ = top; 00091 rect_width_ = width; 00092 rect_height_ = height; 00093 } 00094 00095 // Get enough parameters to be able to rebuild bounding boxes in the 00096 // original image (not just within the rectangle). 00097 // Left and top are enough with top-down coordinates, but 00098 // the height of the rectangle and the image are needed for bottom-up. 00099 void ImageThresholder::GetImageSizes(int* left, int* top, 00100 int* width, int* height, 00101 int* imagewidth, int* imageheight) { 00102 *left = rect_left_; 00103 *top = rect_top_; 00104 *width = rect_width_; 00105 *height = rect_height_; 00106 *imagewidth = image_width_; 00107 *imageheight = image_height_; 00108 } 00109 00110 // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its 00111 // input, so the source pix may be pixDestroyed immediately after. 00112 void ImageThresholder::SetImage(const Pix* pix) { 00113 image_data_ = NULL; 00114 if (pix_ != NULL) 00115 pixDestroy(&pix_); 00116 Pix* src = const_cast<Pix*>(pix); 00117 int depth; 00118 pixGetDimensions(src, &image_width_, &image_height_, &depth); 00119 // Convert the image as necessary so it is one of binary, plain RGB, or 00120 // 8 bit with no colormap. 00121 if (depth > 1 && depth < 8) { 00122 pix_ = pixConvertTo8(src, false); 00123 } else if (pixGetColormap(src)) { 00124 pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); 00125 } else { 00126 pix_ = pixClone(src); 00127 } 00128 depth = pixGetDepth(pix_); 00129 image_bytespp_ = depth / 8; 00130 image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32); 00131 scale_ = 1; 00132 estimated_res_ = yres_ = pixGetYRes(src); 00133 Init(); 00134 } 00135 00136 // Threshold the source image as efficiently as possible to the output Pix. 00137 // Creates a Pix and sets pix to point to the resulting pointer. 00138 // Caller must use pixDestroy to free the created Pix. 00139 void ImageThresholder::ThresholdToPix(Pix** pix) { 00140 if (pix_ != NULL) { 00141 if (image_bytespp_ == 0) { 00142 // We have a binary image, so it just has to be cloned. 00143 *pix = GetPixRect(); 00144 } else { 00145 if (image_bytespp_ == 4) { 00146 // Color data can just be passed direct. 00147 const uinT32* data = pixGetData(pix_); 00148 OtsuThresholdRectToPix(reinterpret_cast<const uinT8*>(data), 00149 image_bytespp_, image_bytespl_, pix); 00150 } else { 00151 // Convert 8-bit to IMAGE and then pass its 00152 // buffer to the raw interface to complete the conversion. 00153 IMAGE temp_image; 00154 temp_image.FromPix(pix_); 00155 OtsuThresholdRectToPix(temp_image.get_buffer(), 00156 image_bytespp_, 00157 COMPUTE_IMAGE_XDIM(temp_image.get_xsize(), 00158 temp_image.get_bpp()), 00159 pix); 00160 } 00161 } 00162 return; 00163 } 00164 if (image_bytespp_ > 0) { 00165 // Threshold grey or color. 00166 OtsuThresholdRectToPix(image_data_, image_bytespp_, image_bytespl_, pix); 00167 } else { 00168 RawRectToPix(pix); 00169 } 00170 } 00171 00172 // Common initialization shared between SetImage methods. 00173 void ImageThresholder::Init() { 00174 SetRectangle(0, 0, image_width_, image_height_); 00175 } 00176 00177 // Get a clone/copy of the source image rectangle. 00178 // The returned Pix must be pixDestroyed. 00179 // This function will be used in the future by the page layout analysis, and 00180 // the layout analysis that uses it will only be available with Leptonica, 00181 // so there is no raw equivalent. 00182 Pix* ImageThresholder::GetPixRect() { 00183 if (pix_ != NULL) { 00184 if (IsFullImage()) { 00185 // Just clone the whole thing. 00186 return pixClone(pix_); 00187 } else { 00188 // Crop to the given rectangle. 00189 Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); 00190 Pix* cropped = pixClipRectangle(pix_, box, NULL); 00191 boxDestroy(&box); 00192 return cropped; 00193 } 00194 } 00195 // The input is raw, so we have to make a copy of it. 00196 Pix* raw_pix; 00197 RawRectToPix(&raw_pix); 00198 return raw_pix; 00199 } 00200 00201 // Get a clone/copy of the source image rectangle, reduced to greyscale. 00202 // The returned Pix must be pixDestroyed. 00203 // This function will be used in the future by the page layout analysis, and 00204 // the layout analysis that uses it will only be available with Leptonica, 00205 // so there is no raw equivalent. 00206 Pix* ImageThresholder::GetPixRectGrey() { 00207 Pix* pix = GetPixRect(); // May have to be reduced to grey. 00208 int depth = pixGetDepth(pix); 00209 if (depth != 8) { 00210 Pix* result = depth < 8 ? pixConvertTo8(pix, false) 00211 : pixConvertRGBToLuminance(pix); 00212 pixDestroy(&pix); 00213 return result; 00214 } 00215 return pix; 00216 } 00217 00218 // Otsu threshold the rectangle, taking everything except the image buffer 00219 // pointer from the class, to the output Pix. 00220 void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata, 00221 int bytes_per_pixel, 00222 int bytes_per_line, 00223 Pix** pix) const { 00224 int* thresholds; 00225 int* hi_values; 00226 OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line, 00227 rect_left_, rect_top_, rect_width_, rect_height_, 00228 &thresholds, &hi_values); 00229 00230 // Threshold the image to the given IMAGE. 00231 ThresholdRectToPix(imagedata, bytes_per_pixel, bytes_per_line, 00232 thresholds, hi_values, pix); 00233 delete [] thresholds; 00234 delete [] hi_values; 00235 } 00236 00237 // Threshold the rectangle, taking everything except the image buffer pointer 00238 // from the class, using thresholds/hi_values to the output IMAGE. 00239 void ImageThresholder::ThresholdRectToPix(const unsigned char* imagedata, 00240 int bytes_per_pixel, 00241 int bytes_per_line, 00242 const int* thresholds, 00243 const int* hi_values, 00244 Pix** pix) const { 00245 *pix = pixCreate(rect_width_, rect_height_, 1); 00246 uinT32* pixdata = pixGetData(*pix); 00247 int wpl = pixGetWpl(*pix); 00248 const unsigned char* srcdata = imagedata + rect_top_* bytes_per_line + 00249 rect_left_ * bytes_per_pixel; 00250 for (int y = 0; y < rect_height_; ++y) { 00251 const uinT8* linedata = srcdata; 00252 uinT32* pixline = pixdata + y * wpl; 00253 for (int x = 0; x < rect_width_; ++x, linedata += bytes_per_pixel) { 00254 bool white_result = true; 00255 for (int ch = 0; ch < bytes_per_pixel; ++ch) { 00256 if (hi_values[ch] >= 0 && 00257 (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { 00258 white_result = false; 00259 break; 00260 } 00261 } 00262 if (white_result) 00263 CLEAR_DATA_BIT(pixline, x); 00264 else 00265 SET_DATA_BIT(pixline, x); 00266 } 00267 srcdata += bytes_per_line; 00268 } 00269 } 00270 00271 // Copy the raw image rectangle, taking all data from the class, to the Pix. 00272 void ImageThresholder::RawRectToPix(Pix** pix) const { 00273 if (image_bytespp_ < 4) { 00274 // Go via a tesseract image structure (doesn't copy the data) 00275 // and use ToPix. 00276 IMAGE image; 00277 int bits_per_pixel = image_bytespp_ * 8; 00278 if (image_bytespp_ == 0) 00279 bits_per_pixel = 1; 00280 image.capture(const_cast<uinT8*>(image_data_), 00281 image_width_, rect_top_ + rect_height_, bits_per_pixel); 00282 if (IsFullImage()) { 00283 *pix = image.ToPix(); 00284 } else { 00285 IMAGE rect; 00286 rect.create(rect_width_, rect_height_, bits_per_pixel); 00287 // The capture chopped the image off at top+height, so copy 00288 // the rectangle with y = 0 to get a rectangle of height 00289 // starting at the bottom, since copy_sub_image uses bottom-up coords. 00290 copy_sub_image(&image, rect_left_, 0, rect_width_, rect_height_, 00291 &rect, 0, 0, true); 00292 *pix = rect.ToPix(); 00293 } 00294 } else { 00295 *pix = pixCreate(rect_width_, rect_height_, 32); 00296 uinT32* data = pixGetData(*pix); 00297 int wpl = pixGetWpl(*pix); 00298 const uinT8* imagedata = image_data_ + rect_top_ * image_bytespl_ + 00299 rect_left_ * image_bytespp_; 00300 for (int y = 0; y < rect_height_; ++y) { 00301 const uinT8* linedata = imagedata; 00302 uinT32* line = data + y * wpl; 00303 for (int x = 0; x < rect_width_; ++x) { 00304 line[x] = (linedata[0] << 24) | (linedata[1] << 16) | 00305 (linedata[2] << 8) | linedata[3]; 00306 linedata += 4; 00307 } 00308 imagedata += image_bytespl_; 00309 } 00310 } 00311 } 00312 00313 } // namespace tesseract. 00314