Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: otsuthr.cpp 00003 * Description: Simple Otsu thresholding for binarizing images. 00004 * Author: Ray Smith 00005 * Created: Fri Mar 07 12:31:01 PST 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <string.h> 00021 #include "otsuthr.h" 00022 00023 namespace tesseract { 00024 00025 // Compute the Otsu threshold(s) for the given image rectangle, making one 00026 // for each channel. Each channel is always one byte per pixel. 00027 // Returns an array of threshold values and an array of hi_values, such 00028 // that a pixel value >threshold[channel] is considered foreground if 00029 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates 00030 // that there is no apparent foreground. At least one hi_value will not be -1. 00031 // Delete thresholds and hi_values with delete [] after use. 00032 void OtsuThreshold(const unsigned char* imagedata, 00033 int bytes_per_pixel, int bytes_per_line, 00034 int left, int top, int width, int height, 00035 int** thresholds, int** hi_values) { 00036 // Of all channels with no good hi_value, keep the best so we can always 00037 // produce at least one answer. 00038 int best_hi_value = 1; 00039 int best_hi_index = 0; 00040 bool any_good_hivalue = false; 00041 double best_hi_dist = 0.0; 00042 *thresholds = new int[bytes_per_pixel]; 00043 *hi_values = new int[bytes_per_pixel]; 00044 00045 for (int ch = 0; ch < bytes_per_pixel; ++ch) { 00046 (*thresholds)[ch] = -1; 00047 (*hi_values)[ch] = -1; 00048 // Compute the histogram of the image rectangle. 00049 int histogram[kHistogramSize]; 00050 HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line, 00051 left, top, width, height, histogram); 00052 int H; 00053 int best_omega_0; 00054 int best_t = OtsuStats(histogram, &H, &best_omega_0); 00055 if (best_omega_0 == 0 || best_omega_0 == H) { 00056 // This channel is empty. 00057 continue; 00058 } 00059 // To be a convincing foreground we must have a small fraction of H 00060 // or to be a convincing background we must have a large fraction of H. 00061 // In between we assume this channel contains no thresholding information. 00062 int hi_value = best_omega_0 < H * 0.5; 00063 (*thresholds)[ch] = best_t; 00064 if (best_omega_0 > H * 0.75) { 00065 any_good_hivalue = true; 00066 (*hi_values)[ch] = 0; 00067 } else if (best_omega_0 < H * 0.25) { 00068 any_good_hivalue = true; 00069 (*hi_values)[ch] = 1; 00070 } else { 00071 // In case all channels are like this, keep the best of the bad lot. 00072 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; 00073 if (hi_dist > best_hi_dist) { 00074 best_hi_dist = hi_dist; 00075 best_hi_value = hi_value; 00076 best_hi_index = ch; 00077 } 00078 } 00079 } 00080 if (!any_good_hivalue) { 00081 // Use the best of the ones that were not good enough. 00082 (*hi_values)[best_hi_index] = best_hi_value; 00083 } 00084 } 00085 00086 // Compute the histogram for the given image rectangle, and the given 00087 // channel. (Channel pointed to by imagedata.) Each channel is always 00088 // one byte per pixel. 00089 // Bytes per pixel is used to skip channels not being 00090 // counted with this call in a multi-channel (pixel-major) image. 00091 // Histogram is always a kHistogramSize(256) element array to count 00092 // occurrences of each pixel value. 00093 void HistogramRect(const unsigned char* imagedata, 00094 int bytes_per_pixel, int bytes_per_line, 00095 int left, int top, int width, int height, 00096 int* histogram) { 00097 int bottom = top + height; 00098 memset(histogram, 0, sizeof(*histogram) * kHistogramSize); 00099 const unsigned char* pixels = imagedata + 00100 top * bytes_per_line + 00101 left * bytes_per_pixel; 00102 for (int y = top; y < bottom; ++y) { 00103 for (int x = 0; x < width; ++x) { 00104 ++histogram[pixels[x * bytes_per_pixel]]; 00105 } 00106 pixels += bytes_per_line; 00107 } 00108 } 00109 00110 // Compute the Otsu threshold(s) for the given histogram. 00111 // Also returns H = total count in histogram, and 00112 // omega0 = count of histogram below threshold. 00113 int OtsuStats(const int* histogram, int* H_out, int* omega0_out) { 00114 int H = 0; 00115 double mu_T = 0.0; 00116 for (int i = 0; i < kHistogramSize; ++i) { 00117 H += histogram[i]; 00118 mu_T += static_cast<double>(i) * histogram[i]; 00119 } 00120 00121 // Now maximize sig_sq_B over t. 00122 // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf 00123 int best_t = -1; 00124 int omega_0, omega_1; 00125 int best_omega_0 = 0; 00126 double best_sig_sq_B = 0.0; 00127 double mu_0, mu_1, mu_t; 00128 omega_0 = 0; 00129 mu_t = 0.0; 00130 for (int t = 0; t < kHistogramSize - 1; ++t) { 00131 omega_0 += histogram[t]; 00132 mu_t += t * static_cast<double>(histogram[t]); 00133 if (omega_0 == 0) 00134 continue; 00135 omega_1 = H - omega_0; 00136 if (omega_1 == 0) 00137 break; 00138 mu_0 = mu_t / omega_0; 00139 mu_1 = (mu_T - mu_t) / omega_1; 00140 double sig_sq_B = mu_1 - mu_0; 00141 sig_sq_B *= sig_sq_B * omega_0 * omega_1; 00142 if (best_t < 0 || sig_sq_B > best_sig_sq_B) { 00143 best_sig_sq_B = sig_sq_B; 00144 best_t = t; 00145 best_omega_0 = omega_0; 00146 } 00147 } 00148 if (H_out != NULL) *H_out = H; 00149 if (omega0_out != NULL) *omega0_out = best_omega_0; 00150 return best_t; 00151 } 00152 00153 } // namespace tesseract.