Tesseract
3.02
|
00001 /****************************************************************************** 00002 ** Filename: picofeat.c 00003 ** Purpose: Definition of pico-features. 00004 ** Author: Dan Johnson 00005 ** History: 9/4/90, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00021 #include "picofeat.h" 00022 00023 #include "classify.h" 00024 #include "efio.h" 00025 #include "featdefs.h" 00026 #include "fpoint.h" 00027 #include "mfoutline.h" 00028 #include "ocrfeatures.h" 00029 #include "params.h" 00030 #include "trainingsample.h" 00031 00032 #include <math.h> 00033 #include <stdio.h> 00034 00035 /*--------------------------------------------------------------------------- 00036 Variables 00037 ----------------------------------------------------------------------------*/ 00038 00039 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); 00040 00041 /*--------------------------------------------------------------------------- 00042 Private Function Prototypes 00043 ----------------------------------------------------------------------------*/ 00044 void ConvertSegmentToPicoFeat(FPOINT *Start, 00045 FPOINT *End, 00046 FEATURE_SET FeatureSet); 00047 00048 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); 00049 00050 void NormalizePicoX(FEATURE_SET FeatureSet); 00051 00055 /*---------------------------------------------------------------------------*/ 00056 namespace tesseract { 00057 FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { 00058 /* 00059 ** Parameters: 00060 ** Blob blob to extract pico-features from 00061 ** LineStats statistics on text row blob is in 00062 ** Globals: 00063 ** classify_norm_method normalization method currently specified 00064 ** Operation: Dummy for now. 00065 ** Return: Pico-features for Blob. 00066 ** Exceptions: none 00067 ** History: 9/4/90, DSJ, Created. 00068 */ 00069 LIST Outlines; 00070 LIST RemainingOutlines; 00071 MFOUTLINE Outline; 00072 FEATURE_SET FeatureSet; 00073 FLOAT32 XScale, YScale; 00074 00075 FeatureSet = NewFeatureSet(MAX_PICO_FEATURES); 00076 Outlines = ConvertBlob(Blob); 00077 NormalizeOutlines(Outlines, &XScale, &YScale); 00078 RemainingOutlines = Outlines; 00079 iterate(RemainingOutlines) { 00080 Outline = (MFOUTLINE) first_node (RemainingOutlines); 00081 ConvertToPicoFeatures2(Outline, FeatureSet); 00082 } 00083 if (classify_norm_method == baseline) 00084 NormalizePicoX(FeatureSet); 00085 FreeOutlines(Outlines); 00086 return (FeatureSet); 00087 00088 } /* ExtractPicoFeatures */ 00089 } // namespace tesseract 00090 00094 /*---------------------------------------------------------------------------*/ 00095 void ConvertSegmentToPicoFeat(FPOINT *Start, 00096 FPOINT *End, 00097 FEATURE_SET FeatureSet) { 00098 /* 00099 ** Parameters: 00100 ** Start starting point of pico-feature 00101 ** End ending point of pico-feature 00102 ** FeatureSet set to add pico-feature to 00103 ** Globals: 00104 ** classify_pico_feature_length length of a single pico-feature 00105 ** Operation: This routine converts an entire segment of an outline 00106 ** into a set of pico features which are added to 00107 ** FeatureSet. The length of the segment is rounded to the 00108 ** nearest whole number of pico-features. The pico-features 00109 ** are spaced evenly over the entire segment. 00110 ** Return: none (results are placed in FeatureSet) 00111 ** Exceptions: none 00112 ** History: Tue Apr 30 15:44:34 1991, DSJ, Created. 00113 */ 00114 FEATURE Feature; 00115 FLOAT32 Angle; 00116 FLOAT32 Length; 00117 int NumFeatures; 00118 FPOINT Center; 00119 FPOINT Delta; 00120 int i; 00121 00122 Angle = NormalizedAngleFrom (Start, End, 1.0); 00123 Length = DistanceBetween (*Start, *End); 00124 NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5); 00125 if (NumFeatures < 1) 00126 NumFeatures = 1; 00127 00128 /* compute vector for one pico feature */ 00129 Delta.x = XDelta (*Start, *End) / NumFeatures; 00130 Delta.y = YDelta (*Start, *End) / NumFeatures; 00131 00132 /* compute position of first pico feature */ 00133 Center.x = Start->x + Delta.x / 2.0; 00134 Center.y = Start->y + Delta.y / 2.0; 00135 00136 /* compute each pico feature in segment and add to feature set */ 00137 for (i = 0; i < NumFeatures; i++) { 00138 Feature = NewFeature (&PicoFeatDesc); 00139 Feature->Params[PicoFeatDir] = Angle; 00140 Feature->Params[PicoFeatX] = Center.x; 00141 Feature->Params[PicoFeatY] = Center.y; 00142 AddFeature(FeatureSet, Feature); 00143 00144 Center.x += Delta.x; 00145 Center.y += Delta.y; 00146 } 00147 } /* ConvertSegmentToPicoFeat */ 00148 00149 00150 /*---------------------------------------------------------------------------*/ 00151 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { 00152 /* 00153 ** Parameters: 00154 ** Outline outline to extract micro-features from 00155 ** FeatureSet set of features to add pico-features to 00156 ** Globals: 00157 ** classify_pico_feature_length 00158 ** length of features to be extracted 00159 ** Operation: 00160 ** This routine steps thru the specified outline and cuts it 00161 ** up into pieces of equal length. These pieces become the 00162 ** desired pico-features. Each segment in the outline 00163 ** is converted into an integral number of pico-features. 00164 ** Return: none (results are returned in FeatureSet) 00165 ** Exceptions: none 00166 ** History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures(). 00167 */ 00168 MFOUTLINE Next; 00169 MFOUTLINE First; 00170 MFOUTLINE Current; 00171 00172 if (DegenerateOutline(Outline)) 00173 return; 00174 00175 First = Outline; 00176 Current = First; 00177 Next = NextPointAfter(Current); 00178 do { 00179 /* note that an edge is hidden if the ending point of the edge is 00180 marked as hidden. This situation happens because the order of 00181 the outlines is reversed when they are converted from the old 00182 format. In the old format, a hidden edge is marked by the 00183 starting point for that edge. */ 00184 if (!(PointAt(Next)->Hidden)) 00185 ConvertSegmentToPicoFeat (&(PointAt(Current)->Point), 00186 &(PointAt(Next)->Point), FeatureSet); 00187 00188 Current = Next; 00189 Next = NextPointAfter(Current); 00190 } 00191 while (Current != First); 00192 00193 } /* ConvertToPicoFeatures2 */ 00194 00195 00196 /*---------------------------------------------------------------------------*/ 00197 void NormalizePicoX(FEATURE_SET FeatureSet) { 00198 /* 00199 ** Parameters: 00200 ** FeatureSet pico-features to be normalized 00201 ** Globals: none 00202 ** Operation: This routine computes the average x position over all 00203 ** of the pico-features in FeatureSet and then renormalizes 00204 ** the pico-features to force this average to be the x origin 00205 ** (i.e. x=0). 00206 ** Return: none (FeatureSet is changed) 00207 ** Exceptions: none 00208 ** History: Tue Sep 4 16:50:08 1990, DSJ, Created. 00209 */ 00210 int i; 00211 FEATURE Feature; 00212 FLOAT32 Origin = 0.0; 00213 00214 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00215 Feature = FeatureSet->Features[i]; 00216 Origin += Feature->Params[PicoFeatX]; 00217 } 00218 Origin /= FeatureSet->NumFeatures; 00219 00220 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00221 Feature = FeatureSet->Features[i]; 00222 Feature->Params[PicoFeatX] -= Origin; 00223 } 00224 } /* NormalizePicoX */ 00225 00226 /*---------------------------------------------------------------------------*/ 00227 FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) { 00228 /* 00229 ** Parameters: 00230 ** blob blob to extract features from 00231 ** denorm normalization/denormalization parameters. 00232 ** Return: Integer character-normalized features for blob. 00233 ** Exceptions: none 00234 ** History: 8/8/2011, rays, Created. 00235 */ 00236 tesseract::TrainingSample* sample = GetIntFeatures( 00237 tesseract::NM_CHAR_ANISOTROPIC, blob, denorm); 00238 if (sample == NULL) return NULL; 00239 00240 int num_features = sample->num_features(); 00241 const INT_FEATURE_STRUCT* features = sample->features(); 00242 FEATURE_SET feature_set = NewFeatureSet(num_features); 00243 for (int f = 0; f < num_features; ++f) { 00244 FEATURE feature = NewFeature(&IntFeatDesc); 00245 00246 feature->Params[IntX] = features[f].X; 00247 feature->Params[IntY] = features[f].Y; 00248 feature->Params[IntDir] = features[f].Theta; 00249 AddFeature(feature_set, feature); 00250 } 00251 delete sample; 00252 00253 return feature_set; 00254 } /* ExtractIntCNFeatures */ 00255 00256 /*---------------------------------------------------------------------------*/ 00257 FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& denorm) { 00258 /* 00259 ** Parameters: 00260 ** blob blob to extract features from 00261 ** denorm normalization/denormalization parameters. 00262 ** Return: Geometric (top/bottom/width) features for blob. 00263 ** Exceptions: none 00264 ** History: 8/8/2011, rays, Created. 00265 */ 00266 tesseract::TrainingSample* sample = GetIntFeatures( 00267 tesseract::NM_CHAR_ANISOTROPIC, blob, denorm); 00268 if (sample == NULL) return NULL; 00269 00270 FEATURE_SET feature_set = NewFeatureSet(1); 00271 FEATURE feature = NewFeature(&IntFeatDesc); 00272 00273 feature->Params[GeoBottom] = sample->geo_feature(GeoBottom); 00274 feature->Params[GeoTop] = sample->geo_feature(GeoTop); 00275 feature->Params[GeoWidth] = sample->geo_feature(GeoWidth); 00276 AddFeature(feature_set, feature); 00277 delete sample; 00278 00279 return feature_set; 00280 } /* ExtractIntGeoFeatures */