Tesseract  3.02
tesseract-ocr/classify/picofeat.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    picofeat.c
00003  **     Purpose:     Definition of pico-features.
00004  **     Author:      Dan Johnson
00005  **     History:     9/4/90, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00021 #include "picofeat.h"
00022 
00023 #include "classify.h"
00024 #include "efio.h"
00025 #include "featdefs.h"
00026 #include "fpoint.h"
00027 #include "mfoutline.h"
00028 #include "ocrfeatures.h"
00029 #include "params.h"
00030 #include "trainingsample.h"
00031 
00032 #include <math.h>
00033 #include <stdio.h>
00034 
00035 /*---------------------------------------------------------------------------
00036           Variables
00037 ----------------------------------------------------------------------------*/
00038 
00039 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
00040 
00041 /*---------------------------------------------------------------------------
00042           Private Function Prototypes
00043 ----------------------------------------------------------------------------*/
00044 void ConvertSegmentToPicoFeat(FPOINT *Start,
00045                               FPOINT *End,
00046                               FEATURE_SET FeatureSet);
00047 
00048 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
00049 
00050 void NormalizePicoX(FEATURE_SET FeatureSet);
00051 
00055 /*---------------------------------------------------------------------------*/
00056 namespace tesseract {
00057 FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
00058 /*
00059  **     Parameters:
00060  **             Blob            blob to extract pico-features from
00061  **             LineStats       statistics on text row blob is in
00062  **     Globals:
00063  **             classify_norm_method    normalization method currently specified
00064  **     Operation: Dummy for now.
00065  **     Return: Pico-features for Blob.
00066  **     Exceptions: none
00067  **     History: 9/4/90, DSJ, Created.
00068  */
00069   LIST Outlines;
00070   LIST RemainingOutlines;
00071   MFOUTLINE Outline;
00072   FEATURE_SET FeatureSet;
00073   FLOAT32 XScale, YScale;
00074 
00075   FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
00076   Outlines = ConvertBlob(Blob);
00077   NormalizeOutlines(Outlines, &XScale, &YScale);
00078   RemainingOutlines = Outlines;
00079   iterate(RemainingOutlines) {
00080     Outline = (MFOUTLINE) first_node (RemainingOutlines);
00081     ConvertToPicoFeatures2(Outline, FeatureSet);
00082   }
00083   if (classify_norm_method == baseline)
00084     NormalizePicoX(FeatureSet);
00085   FreeOutlines(Outlines);
00086   return (FeatureSet);
00087 
00088 }                                /* ExtractPicoFeatures */
00089 }  // namespace tesseract
00090 
00094 /*---------------------------------------------------------------------------*/
00095 void ConvertSegmentToPicoFeat(FPOINT *Start,
00096                               FPOINT *End,
00097                               FEATURE_SET FeatureSet) {
00098 /*
00099  **     Parameters:
00100  **             Start           starting point of pico-feature
00101  **             End             ending point of pico-feature
00102  **             FeatureSet      set to add pico-feature to
00103  **     Globals:
00104  **             classify_pico_feature_length    length of a single pico-feature
00105  **     Operation: This routine converts an entire segment of an outline
00106  **             into a set of pico features which are added to
00107  **             FeatureSet.  The length of the segment is rounded to the
00108  **             nearest whole number of pico-features.  The pico-features
00109  **             are spaced evenly over the entire segment.
00110  **     Return: none (results are placed in FeatureSet)
00111  **     Exceptions: none
00112  **     History: Tue Apr 30 15:44:34 1991, DSJ, Created.
00113  */
00114   FEATURE Feature;
00115   FLOAT32 Angle;
00116   FLOAT32 Length;
00117   int NumFeatures;
00118   FPOINT Center;
00119   FPOINT Delta;
00120   int i;
00121 
00122   Angle = NormalizedAngleFrom (Start, End, 1.0);
00123   Length = DistanceBetween (*Start, *End);
00124   NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5);
00125   if (NumFeatures < 1)
00126     NumFeatures = 1;
00127 
00128   /* compute vector for one pico feature */
00129   Delta.x = XDelta (*Start, *End) / NumFeatures;
00130   Delta.y = YDelta (*Start, *End) / NumFeatures;
00131 
00132   /* compute position of first pico feature */
00133   Center.x = Start->x + Delta.x / 2.0;
00134   Center.y = Start->y + Delta.y / 2.0;
00135 
00136   /* compute each pico feature in segment and add to feature set */
00137   for (i = 0; i < NumFeatures; i++) {
00138     Feature = NewFeature (&PicoFeatDesc);
00139     Feature->Params[PicoFeatDir] = Angle;
00140     Feature->Params[PicoFeatX] = Center.x;
00141     Feature->Params[PicoFeatY] = Center.y;
00142     AddFeature(FeatureSet, Feature);
00143 
00144     Center.x += Delta.x;
00145     Center.y += Delta.y;
00146   }
00147 }                                /* ConvertSegmentToPicoFeat */
00148 
00149 
00150 /*---------------------------------------------------------------------------*/
00151 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
00152 /*
00153  **     Parameters:
00154  **             Outline         outline to extract micro-features from
00155  **             FeatureSet      set of features to add pico-features to
00156  **     Globals:
00157  **             classify_pico_feature_length
00158  **                             length of features to be extracted
00159  **     Operation:
00160  **             This routine steps thru the specified outline and cuts it
00161  **             up into pieces of equal length.  These pieces become the
00162  **             desired pico-features.  Each segment in the outline
00163  **             is converted into an integral number of pico-features.
00164  **     Return: none (results are returned in FeatureSet)
00165  **     Exceptions: none
00166  **     History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures().
00167  */
00168   MFOUTLINE Next;
00169   MFOUTLINE First;
00170   MFOUTLINE Current;
00171 
00172   if (DegenerateOutline(Outline))
00173     return;
00174 
00175   First = Outline;
00176   Current = First;
00177   Next = NextPointAfter(Current);
00178   do {
00179     /* note that an edge is hidden if the ending point of the edge is
00180        marked as hidden.  This situation happens because the order of
00181        the outlines is reversed when they are converted from the old
00182        format.  In the old format, a hidden edge is marked by the
00183        starting point for that edge. */
00184     if (!(PointAt(Next)->Hidden))
00185       ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
00186         &(PointAt(Next)->Point), FeatureSet);
00187 
00188     Current = Next;
00189     Next = NextPointAfter(Current);
00190   }
00191   while (Current != First);
00192 
00193 }                                /* ConvertToPicoFeatures2 */
00194 
00195 
00196 /*---------------------------------------------------------------------------*/
00197 void NormalizePicoX(FEATURE_SET FeatureSet) {
00198 /*
00199  **     Parameters:
00200  **             FeatureSet      pico-features to be normalized
00201  **     Globals: none
00202  **     Operation: This routine computes the average x position over all
00203  **             of the pico-features in FeatureSet and then renormalizes
00204  **             the pico-features to force this average to be the x origin
00205  **             (i.e. x=0).
00206  **     Return: none (FeatureSet is changed)
00207  **     Exceptions: none
00208  **     History: Tue Sep  4 16:50:08 1990, DSJ, Created.
00209  */
00210   int i;
00211   FEATURE Feature;
00212   FLOAT32 Origin = 0.0;
00213 
00214   for (i = 0; i < FeatureSet->NumFeatures; i++) {
00215     Feature = FeatureSet->Features[i];
00216     Origin += Feature->Params[PicoFeatX];
00217   }
00218   Origin /= FeatureSet->NumFeatures;
00219 
00220   for (i = 0; i < FeatureSet->NumFeatures; i++) {
00221     Feature = FeatureSet->Features[i];
00222     Feature->Params[PicoFeatX] -= Origin;
00223   }
00224 }                                /* NormalizePicoX */
00225 
00226 /*---------------------------------------------------------------------------*/
00227 FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) {
00228 /*
00229  ** Parameters:
00230  **   blob    blob to extract features from
00231  **   denorm  normalization/denormalization parameters.
00232  ** Return: Integer character-normalized features for blob.
00233  ** Exceptions: none
00234  ** History: 8/8/2011, rays, Created.
00235  */
00236   tesseract::TrainingSample* sample = GetIntFeatures(
00237       tesseract::NM_CHAR_ANISOTROPIC, blob, denorm);
00238   if (sample == NULL) return NULL;
00239 
00240   int num_features = sample->num_features();
00241   const INT_FEATURE_STRUCT* features = sample->features();
00242   FEATURE_SET feature_set = NewFeatureSet(num_features);
00243   for (int f = 0; f < num_features; ++f) {
00244     FEATURE feature = NewFeature(&IntFeatDesc);
00245 
00246     feature->Params[IntX] = features[f].X;
00247     feature->Params[IntY] = features[f].Y;
00248     feature->Params[IntDir] = features[f].Theta;
00249     AddFeature(feature_set, feature);
00250   }
00251   delete sample;
00252 
00253   return feature_set;
00254 }                                /* ExtractIntCNFeatures */
00255 
00256 /*---------------------------------------------------------------------------*/
00257 FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& denorm) {
00258 /*
00259  ** Parameters:
00260  **   blob    blob to extract features from
00261  **   denorm  normalization/denormalization parameters.
00262  ** Return: Geometric (top/bottom/width) features for blob.
00263  ** Exceptions: none
00264  ** History: 8/8/2011, rays, Created.
00265  */
00266   tesseract::TrainingSample* sample = GetIntFeatures(
00267       tesseract::NM_CHAR_ANISOTROPIC, blob, denorm);
00268   if (sample == NULL) return NULL;
00269 
00270   FEATURE_SET feature_set = NewFeatureSet(1);
00271   FEATURE feature = NewFeature(&IntFeatDesc);
00272 
00273   feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
00274   feature->Params[GeoTop] = sample->geo_feature(GeoTop);
00275   feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
00276   AddFeature(feature_set, feature);
00277   delete sample;
00278 
00279   return feature_set;
00280 }                                /* ExtractIntGeoFeatures */