Tesseract
3.02
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: gradechop.c (Formerly gradechop.c) 00005 * Description: 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 16 14:37:00 1987 00008 * Modified: Tue Jul 30 16:06:27 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Reusable Software Component 00012 * 00013 * (c) Copyright 1987, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 /*---------------------------------------------------------------------- 00026 I n c l u d e s 00027 ----------------------------------------------------------------------*/ 00028 #include "gradechop.h" 00029 #include "wordrec.h" 00030 #include "olutil.h" 00031 #include "chop.h" 00032 #include "ndminx.h" 00033 #include <math.h> 00034 00035 /*---------------------------------------------------------------------- 00036 T y p e s 00037 ----------------------------------------------------------------------*/ 00038 #define CENTER_GRADE_CAP 25.0 00039 00040 /*---------------------------------------------------------------------- 00041 M a c r o s 00042 ----------------------------------------------------------------------*/ 00043 /********************************************************************** 00044 * find_bounds_loop 00045 * 00046 * This is a macro to be used by set_outline_bounds. 00047 **********************************************************************/ 00048 00049 #define find_bounds_loop(point1,point2,x_min,x_max) \ 00050 x_min = point2->pos.x; \ 00051 x_max = point2->pos.x; \ 00052 \ 00053 this_point = point1; \ 00054 do { \ 00055 x_min = MIN (this_point->pos.x, x_min); \ 00056 x_max = MAX (this_point->pos.x, x_max); \ 00057 this_point = this_point->next; \ 00058 } \ 00059 while (this_point != point2 && this_point != point1) \ 00060 00061 00062 namespace tesseract { 00063 00064 /*---------------------------------------------------------------------- 00065 F u n c t i o n s 00066 ----------------------------------------------------------------------*/ 00067 /********************************************************************** 00068 * full_split_priority 00069 * 00070 * Assign a priority to this split based on the features that it has. 00071 * Part of the priority has already been calculated so just return the 00072 * additional amount for the bounding box type information. 00073 **********************************************************************/ 00074 PRIORITY Wordrec::full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) { 00075 BOUNDS_RECT rect; 00076 00077 set_outline_bounds (split->point1, split->point2, rect); 00078 00079 if (xmin < MIN (rect[0], rect[2]) && xmax > MAX (rect[1], rect[3])) 00080 return (999.0); 00081 00082 return (grade_overlap (rect) + 00083 grade_center_of_blob (rect) + grade_width_change (rect)); 00084 } 00085 00086 00087 /********************************************************************** 00088 * grade_center_of_blob 00089 * 00090 * Return a grade for the a split. Rank it on closeness to the center 00091 * of the original blob 00092 * 0 = "perfect" 00093 * 100 = "no way jay" 00094 **********************************************************************/ 00095 PRIORITY Wordrec::grade_center_of_blob(register BOUNDS_RECT rect) { 00096 register PRIORITY grade; 00097 00098 grade = (rect[1] - rect[0]) - (rect[3] - rect[2]); 00099 if (grade < 0) 00100 grade = -grade; 00101 00102 grade *= chop_center_knob; 00103 grade = MIN (CENTER_GRADE_CAP, grade); 00104 return (MAX (0.0, grade)); 00105 } 00106 00107 00108 /********************************************************************** 00109 * grade_overlap 00110 * 00111 * Return a grade for this split for the overlap of the resultant blobs. 00112 * 0 = "perfect" 00113 * 100 = "no way jay" 00114 **********************************************************************/ 00115 PRIORITY Wordrec::grade_overlap(register BOUNDS_RECT rect) { 00116 register PRIORITY grade; 00117 register inT16 width1; 00118 register inT16 width2; 00119 register inT16 overlap; 00120 00121 width1 = rect[3] - rect[2]; 00122 width2 = rect[1] - rect[0]; 00123 00124 overlap = MIN (rect[1], rect[3]) - MAX (rect[0], rect[2]); 00125 width1 = MIN (width1, width2); 00126 if (overlap == width1) 00127 return (100.0); /* Total overlap */ 00128 00129 width1 = 2 * overlap - width1; /* Extra penalty for too */ 00130 overlap += MAX (0, width1); /* much overlap */ 00131 00132 grade = overlap * chop_overlap_knob; 00133 00134 return (MAX (0.0, grade)); 00135 } 00136 00137 00138 /********************************************************************** 00139 * grade_split_length 00140 * 00141 * Return a grade for the length of this split. 00142 * 0 = "perfect" 00143 * 100 = "no way jay" 00144 **********************************************************************/ 00145 PRIORITY Wordrec::grade_split_length(register SPLIT *split) { 00146 register PRIORITY grade; 00147 register float split_length; 00148 00149 split_length = weighted_edgept_dist (split->point1, split->point2, 00150 chop_x_y_weight); 00151 00152 if (split_length <= 0) 00153 grade = 0; 00154 else 00155 grade = sqrt (split_length) * chop_split_dist_knob; 00156 00157 return (MAX (0.0, grade)); 00158 } 00159 00160 00161 /********************************************************************** 00162 * grade_sharpness 00163 * 00164 * Return a grade for the sharpness of this split. 00165 * 0 = "perfect" 00166 * 100 = "no way jay" 00167 **********************************************************************/ 00168 PRIORITY Wordrec::grade_sharpness(register SPLIT *split) { 00169 register PRIORITY grade; 00170 00171 grade = point_priority (split->point1) + point_priority (split->point2); 00172 00173 if (grade < -360.0) 00174 grade = 0; 00175 else 00176 grade += 360.0; 00177 00178 grade *= chop_sharpness_knob; /* Values 0 to -360 */ 00179 00180 return (grade); 00181 } 00182 00183 00184 /********************************************************************** 00185 * grade_width_change 00186 * 00187 * Return a grade for the change in width of the resultant blobs. 00188 * 0 = "perfect" 00189 * 100 = "no way jay" 00190 **********************************************************************/ 00191 PRIORITY Wordrec::grade_width_change(register BOUNDS_RECT rect) { 00192 register PRIORITY grade; 00193 register inT32 width1; 00194 register inT32 width2; 00195 00196 width1 = rect[3] - rect[2]; 00197 width2 = rect[1] - rect[0]; 00198 00199 grade = 20 - (MAX (rect[1], rect[3]) 00200 - MIN (rect[0], rect[2]) - MAX (width1, width2)); 00201 00202 grade *= chop_width_change_knob; 00203 00204 return (MAX (0.0, grade)); 00205 } 00206 00207 00208 /********************************************************************** 00209 * set_outline_bounds 00210 * 00211 * Set up the limits for the x coordinate of the outline. 00212 **********************************************************************/ 00213 void Wordrec::set_outline_bounds(register EDGEPT *point1, 00214 register EDGEPT *point2, 00215 BOUNDS_RECT rect) { 00216 register EDGEPT *this_point; 00217 register inT16 x_min; 00218 register inT16 x_max; 00219 00220 find_bounds_loop(point1, point2, x_min, x_max); 00221 00222 rect[0] = x_min; 00223 rect[1] = x_max; 00224 00225 find_bounds_loop(point2, point1, x_min, x_max); 00226 00227 rect[2] = x_min; 00228 rect[3] = x_max; 00229 } 00230 00231 } // namespace tesseract