Tesseract  3.02
tesseract-ocr/ccmain/thresholder.cpp
Go to the documentation of this file.
00001 
00002 // File:        thresholder.cpp
00003 // Description: Base API for thresolding images in tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Mon May 12 11:28:15 PDT 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #include "allheaders.h"
00021 
00022 #include "thresholder.h"
00023 
00024 #include <string.h>
00025 
00026 #include "img.h"
00027 #include "otsuthr.h"
00028 
00029 namespace tesseract {
00030 
00031 ImageThresholder::ImageThresholder()
00032   : pix_(NULL),
00033     image_data_(NULL),
00034     image_width_(0), image_height_(0),
00035     image_bytespp_(0), image_bytespl_(0),
00036     scale_(1), yres_(300), estimated_res_(300) {
00037   SetRectangle(0, 0, 0, 0);
00038 }
00039 
00040 ImageThresholder::~ImageThresholder() {
00041   Clear();
00042 }
00043 
00044 // Destroy the Pix if there is one, freeing memory.
00045 void ImageThresholder::Clear() {
00046   if (pix_ != NULL) {
00047     pixDestroy(&pix_);
00048     pix_ = NULL;
00049   }
00050   image_data_ = NULL;
00051 }
00052 
00053 // Return true if no image has been set.
00054 bool ImageThresholder::IsEmpty() const {
00055   if (pix_ != NULL)
00056     return false;
00057   return image_data_ == NULL;
00058 }
00059 
00060 // SetImage makes a copy of only the metadata, not the underlying
00061 // image buffer. It promises to treat the source as read-only in either case,
00062 // but in return assumes that the Pix or image buffer remain valid
00063 // throughout the life of the ImageThresholder.
00064 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
00065 // Palette color images will not work properly and must be converted to
00066 // 24 bit.
00067 // Binary images of 1 bit per pixel may also be given but they must be
00068 // byte packed with the MSB of the first byte being the first pixel, and a
00069 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
00070 void ImageThresholder::SetImage(const unsigned char* imagedata,
00071                                 int width, int height,
00072                                 int bytes_per_pixel, int bytes_per_line) {
00073   if (pix_ != NULL)
00074     pixDestroy(&pix_);
00075   pix_ = NULL;
00076   image_data_ = imagedata;
00077   image_width_ = width;
00078   image_height_ = height;
00079   image_bytespp_ = bytes_per_pixel;
00080   image_bytespl_ = bytes_per_line;
00081   scale_ = 1;
00082   estimated_res_ = yres_ = 300;
00083   Init();
00084 }
00085 
00086 // Store the coordinates of the rectangle to process for later use.
00087 // Doesn't actually do any thresholding.
00088 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
00089   rect_left_ = left;
00090   rect_top_ = top;
00091   rect_width_ = width;
00092   rect_height_ = height;
00093 }
00094 
00095 // Get enough parameters to be able to rebuild bounding boxes in the
00096 // original image (not just within the rectangle).
00097 // Left and top are enough with top-down coordinates, but
00098 // the height of the rectangle and the image are needed for bottom-up.
00099 void ImageThresholder::GetImageSizes(int* left, int* top,
00100                                      int* width, int* height,
00101                                      int* imagewidth, int* imageheight) {
00102   *left = rect_left_;
00103   *top = rect_top_;
00104   *width = rect_width_;
00105   *height = rect_height_;
00106   *imagewidth = image_width_;
00107   *imageheight = image_height_;
00108 }
00109 
00110 // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its
00111 // input, so the source pix may be pixDestroyed immediately after.
00112 void ImageThresholder::SetImage(const Pix* pix) {
00113   image_data_ = NULL;
00114   if (pix_ != NULL)
00115     pixDestroy(&pix_);
00116   Pix* src = const_cast<Pix*>(pix);
00117   int depth;
00118   pixGetDimensions(src, &image_width_, &image_height_, &depth);
00119   // Convert the image as necessary so it is one of binary, plain RGB, or
00120   // 8 bit with no colormap.
00121   if (depth > 1 && depth < 8) {
00122     pix_ = pixConvertTo8(src, false);
00123   } else if (pixGetColormap(src)) {
00124     pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
00125   } else {
00126     pix_ = pixClone(src);
00127   }
00128   depth = pixGetDepth(pix_);
00129   image_bytespp_ = depth / 8;
00130   image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32);
00131   scale_ = 1;
00132   estimated_res_ = yres_ = pixGetYRes(src);
00133   Init();
00134 }
00135 
00136 // Threshold the source image as efficiently as possible to the output Pix.
00137 // Creates a Pix and sets pix to point to the resulting pointer.
00138 // Caller must use pixDestroy to free the created Pix.
00139 void ImageThresholder::ThresholdToPix(Pix** pix) {
00140   if (pix_ != NULL) {
00141     if (image_bytespp_ == 0) {
00142       // We have a binary image, so it just has to be cloned.
00143       *pix = GetPixRect();
00144     } else {
00145       if (image_bytespp_ == 4) {
00146         // Color data can just be passed direct.
00147         const uinT32* data = pixGetData(pix_);
00148         OtsuThresholdRectToPix(reinterpret_cast<const uinT8*>(data),
00149                                image_bytespp_, image_bytespl_, pix);
00150       } else {
00151         // Convert 8-bit to IMAGE and then pass its
00152         // buffer to the raw interface to complete the conversion.
00153         IMAGE temp_image;
00154         temp_image.FromPix(pix_);
00155         OtsuThresholdRectToPix(temp_image.get_buffer(),
00156                                image_bytespp_,
00157                                COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
00158                                                   temp_image.get_bpp()),
00159                                pix);
00160       }
00161     }
00162     return;
00163   }
00164   if (image_bytespp_ > 0) {
00165     // Threshold grey or color.
00166     OtsuThresholdRectToPix(image_data_, image_bytespp_, image_bytespl_, pix);
00167   } else {
00168     RawRectToPix(pix);
00169   }
00170 }
00171 
00172 // Common initialization shared between SetImage methods.
00173 void ImageThresholder::Init() {
00174   SetRectangle(0, 0, image_width_, image_height_);
00175 }
00176 
00177 // Get a clone/copy of the source image rectangle.
00178 // The returned Pix must be pixDestroyed.
00179 // This function will be used in the future by the page layout analysis, and
00180 // the layout analysis that uses it will only be available with Leptonica,
00181 // so there is no raw equivalent.
00182 Pix* ImageThresholder::GetPixRect() {
00183   if (pix_ != NULL) {
00184     if (IsFullImage()) {
00185       // Just clone the whole thing.
00186       return pixClone(pix_);
00187     } else {
00188       // Crop to the given rectangle.
00189       Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
00190       Pix* cropped = pixClipRectangle(pix_, box, NULL);
00191       boxDestroy(&box);
00192       return cropped;
00193     }
00194   }
00195   // The input is raw, so we have to make a copy of it.
00196   Pix* raw_pix;
00197   RawRectToPix(&raw_pix);
00198   return raw_pix;
00199 }
00200 
00201 // Get a clone/copy of the source image rectangle, reduced to greyscale.
00202 // The returned Pix must be pixDestroyed.
00203 // This function will be used in the future by the page layout analysis, and
00204 // the layout analysis that uses it will only be available with Leptonica,
00205 // so there is no raw equivalent.
00206 Pix* ImageThresholder::GetPixRectGrey() {
00207   Pix* pix = GetPixRect();  // May have to be reduced to grey.
00208   int depth = pixGetDepth(pix);
00209   if (depth != 8) {
00210     Pix* result = depth < 8 ? pixConvertTo8(pix, false)
00211                             : pixConvertRGBToLuminance(pix);
00212     pixDestroy(&pix);
00213     return result;
00214   }
00215   return pix;
00216 }
00217 
00218 // Otsu threshold the rectangle, taking everything except the image buffer
00219 // pointer from the class, to the output Pix.
00220 void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata,
00221                                               int bytes_per_pixel,
00222                                               int bytes_per_line,
00223                                               Pix** pix) const {
00224   int* thresholds;
00225   int* hi_values;
00226   OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
00227                 rect_left_, rect_top_, rect_width_, rect_height_,
00228                 &thresholds, &hi_values);
00229 
00230   // Threshold the image to the given IMAGE.
00231   ThresholdRectToPix(imagedata, bytes_per_pixel, bytes_per_line,
00232                      thresholds, hi_values, pix);
00233   delete [] thresholds;
00234   delete [] hi_values;
00235 }
00236 
00237 // Threshold the rectangle, taking everything except the image buffer pointer
00238 // from the class, using thresholds/hi_values to the output IMAGE.
00239 void ImageThresholder::ThresholdRectToPix(const unsigned char* imagedata,
00240                                           int bytes_per_pixel,
00241                                           int bytes_per_line,
00242                                           const int* thresholds,
00243                                           const int* hi_values,
00244                                           Pix** pix) const {
00245   *pix = pixCreate(rect_width_, rect_height_, 1);
00246   uinT32* pixdata = pixGetData(*pix);
00247   int wpl = pixGetWpl(*pix);
00248   const unsigned char* srcdata = imagedata + rect_top_* bytes_per_line +
00249                                  rect_left_ * bytes_per_pixel;
00250   for (int y = 0; y < rect_height_; ++y) {
00251     const uinT8* linedata = srcdata;
00252     uinT32* pixline = pixdata + y * wpl;
00253     for (int x = 0; x < rect_width_; ++x, linedata += bytes_per_pixel) {
00254       bool white_result = true;
00255       for (int ch = 0; ch < bytes_per_pixel; ++ch) {
00256         if (hi_values[ch] >= 0 &&
00257             (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
00258           white_result = false;
00259           break;
00260         }
00261       }
00262       if (white_result)
00263         CLEAR_DATA_BIT(pixline, x);
00264       else
00265         SET_DATA_BIT(pixline, x);
00266     }
00267     srcdata += bytes_per_line;
00268   }
00269 }
00270 
00271 // Copy the raw image rectangle, taking all data from the class, to the Pix.
00272 void ImageThresholder::RawRectToPix(Pix** pix) const {
00273   if (image_bytespp_ < 4) {
00274     // Go via a tesseract image structure (doesn't copy the data)
00275     // and use ToPix.
00276     IMAGE image;
00277     int bits_per_pixel = image_bytespp_ * 8;
00278     if (image_bytespp_ == 0)
00279       bits_per_pixel = 1;
00280     image.capture(const_cast<uinT8*>(image_data_),
00281                   image_width_, rect_top_ + rect_height_, bits_per_pixel);
00282     if (IsFullImage()) {
00283       *pix = image.ToPix();
00284     } else {
00285       IMAGE rect;
00286       rect.create(rect_width_, rect_height_, bits_per_pixel);
00287       // The capture chopped the image off at top+height, so copy
00288       // the rectangle with y = 0 to get a rectangle of height
00289       // starting at the bottom, since copy_sub_image uses bottom-up coords.
00290       copy_sub_image(&image, rect_left_, 0, rect_width_, rect_height_,
00291                      &rect, 0, 0, true);
00292       *pix = rect.ToPix();
00293     }
00294   } else {
00295     *pix = pixCreate(rect_width_, rect_height_, 32);
00296     uinT32* data = pixGetData(*pix);
00297     int wpl = pixGetWpl(*pix);
00298     const uinT8* imagedata = image_data_ + rect_top_ * image_bytespl_ +
00299                              rect_left_ * image_bytespp_;
00300     for (int y = 0; y < rect_height_; ++y) {
00301       const uinT8* linedata = imagedata;
00302       uinT32* line = data + y * wpl;
00303       for (int x = 0; x < rect_width_; ++x) {
00304         line[x] = (linedata[0] << 24) | (linedata[1] << 16) |
00305                   (linedata[2] << 8) | linedata[3];
00306         linedata += 4;
00307       }
00308       imagedata += image_bytespl_;
00309     }
00310   }
00311 }
00312 
00313 }  // namespace tesseract.
00314