Tesseract  3.02
tesseract-ocr/ccstruct/otsuthr.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        otsuthr.cpp
00003  * Description: Simple Otsu thresholding for binarizing images.
00004  * Author:      Ray Smith
00005  * Created:     Fri Mar 07 12:31:01 PST 2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <string.h>
00021 #include "otsuthr.h"
00022 
00023 namespace tesseract {
00024 
00025 // Compute the Otsu threshold(s) for the given image rectangle, making one
00026 // for each channel. Each channel is always one byte per pixel.
00027 // Returns an array of threshold values and an array of hi_values, such
00028 // that a pixel value >threshold[channel] is considered foreground if
00029 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
00030 // that there is no apparent foreground. At least one hi_value will not be -1.
00031 // Delete thresholds and hi_values with delete [] after use.
00032 void OtsuThreshold(const unsigned char* imagedata,
00033                    int bytes_per_pixel, int bytes_per_line,
00034                    int left, int top, int width, int height,
00035                    int** thresholds, int** hi_values) {
00036   // Of all channels with no good hi_value, keep the best so we can always
00037   // produce at least one answer.
00038   int best_hi_value = 1;
00039   int best_hi_index = 0;
00040   bool any_good_hivalue = false;
00041   double best_hi_dist = 0.0;
00042   *thresholds = new int[bytes_per_pixel];
00043   *hi_values = new int[bytes_per_pixel];
00044 
00045   for (int ch = 0; ch < bytes_per_pixel; ++ch) {
00046     (*thresholds)[ch] = -1;
00047     (*hi_values)[ch] = -1;
00048     // Compute the histogram of the image rectangle.
00049     int histogram[kHistogramSize];
00050     HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line,
00051                   left, top, width, height, histogram);
00052     int H;
00053     int best_omega_0;
00054     int best_t = OtsuStats(histogram, &H, &best_omega_0);
00055     if (best_omega_0 == 0 || best_omega_0 == H) {
00056        // This channel is empty.
00057        continue;
00058      }
00059     // To be a convincing foreground we must have a small fraction of H
00060     // or to be a convincing background we must have a large fraction of H.
00061     // In between we assume this channel contains no thresholding information.
00062     int hi_value = best_omega_0 < H * 0.5;
00063     (*thresholds)[ch] = best_t;
00064     if (best_omega_0 > H * 0.75) {
00065       any_good_hivalue = true;
00066       (*hi_values)[ch] = 0;
00067     } else if (best_omega_0 < H * 0.25) {
00068       any_good_hivalue = true;
00069       (*hi_values)[ch] = 1;
00070     } else {
00071       // In case all channels are like this, keep the best of the bad lot.
00072       double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
00073       if (hi_dist > best_hi_dist) {
00074         best_hi_dist = hi_dist;
00075         best_hi_value = hi_value;
00076         best_hi_index = ch;
00077       }
00078     }
00079   }
00080   if (!any_good_hivalue) {
00081     // Use the best of the ones that were not good enough.
00082     (*hi_values)[best_hi_index] = best_hi_value;
00083   }
00084 }
00085 
00086 // Compute the histogram for the given image rectangle, and the given
00087 // channel. (Channel pointed to by imagedata.) Each channel is always
00088 // one byte per pixel.
00089 // Bytes per pixel is used to skip channels not being
00090 // counted with this call in a multi-channel (pixel-major) image.
00091 // Histogram is always a kHistogramSize(256) element array to count
00092 // occurrences of each pixel value.
00093 void HistogramRect(const unsigned char* imagedata,
00094                    int bytes_per_pixel, int bytes_per_line,
00095                    int left, int top, int width, int height,
00096                    int* histogram) {
00097   int bottom = top + height;
00098   memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
00099   const unsigned char* pixels = imagedata +
00100                                 top * bytes_per_line +
00101                                 left * bytes_per_pixel;
00102   for (int y = top; y < bottom; ++y) {
00103     for (int x = 0; x < width; ++x) {
00104       ++histogram[pixels[x * bytes_per_pixel]];
00105     }
00106     pixels += bytes_per_line;
00107   }
00108 }
00109 
00110 // Compute the Otsu threshold(s) for the given histogram.
00111 // Also returns H = total count in histogram, and
00112 // omega0 = count of histogram below threshold.
00113 int OtsuStats(const int* histogram, int* H_out, int* omega0_out) {
00114   int H = 0;
00115   double mu_T = 0.0;
00116   for (int i = 0; i < kHistogramSize; ++i) {
00117     H += histogram[i];
00118     mu_T += static_cast<double>(i) * histogram[i];
00119   }
00120 
00121   // Now maximize sig_sq_B over t.
00122   // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
00123   int best_t = -1;
00124   int omega_0, omega_1;
00125   int best_omega_0 = 0;
00126   double best_sig_sq_B = 0.0;
00127   double mu_0, mu_1, mu_t;
00128   omega_0 = 0;
00129   mu_t = 0.0;
00130   for (int t = 0; t < kHistogramSize - 1; ++t) {
00131     omega_0 += histogram[t];
00132     mu_t += t * static_cast<double>(histogram[t]);
00133     if (omega_0 == 0)
00134       continue;
00135     omega_1 = H - omega_0;
00136     if (omega_1 == 0)
00137       break;
00138     mu_0 = mu_t / omega_0;
00139     mu_1 = (mu_T - mu_t) / omega_1;
00140     double sig_sq_B = mu_1 - mu_0;
00141     sig_sq_B *= sig_sq_B * omega_0 * omega_1;
00142     if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
00143       best_sig_sq_B = sig_sq_B;
00144       best_t = t;
00145       best_omega_0 = omega_0;
00146     }
00147   }
00148   if (H_out != NULL) *H_out = H;
00149   if (omega0_out != NULL) *omega0_out = best_omega_0;
00150   return best_t;
00151 }
00152 
00153 }  // namespace tesseract.