Tesseract  3.02
tesseract-ocr/classify/ocrfeatures.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    features.c
00003  **     Purpose:     Generic definition of a feature.
00004  **     Author:      Dan Johnson
00005  **     History:     Mon May 21 10:49:04 1990, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00021 #include "ocrfeatures.h"
00022 #include "emalloc.h"
00023 #include "callcpp.h"
00024 #include "danerror.h"
00025 #include "freelist.h"
00026 #include "scanutils.h"
00027 
00028 #include <assert.h>
00029 #include <math.h>
00030 
00034 /*---------------------------------------------------------------------------*/
00035 BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
00036 /*
00037  **     Parameters:
00038  **             FeatureSet      set of features to add Feature to
00039  **             Feature         feature to be added to FeatureSet
00040  **     Globals: none
00041  **     Operation: Add a feature to a feature set.  If the feature set is
00042  **             already full, FALSE is returned to indicate that the
00043  **             feature could not be added to the set; otherwise, TRUE is
00044  **             returned.
00045  **     Return: TRUE if feature added to set, FALSE if set is already full.
00046  **     Exceptions: none
00047  **     History: Tue May 22 17:22:23 1990, DSJ, Created.
00048  */
00049   if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) {
00050     FreeFeature(Feature);
00051     return FALSE;
00052   }
00053 
00054   FeatureSet->Features[FeatureSet->NumFeatures++] = Feature;
00055   return TRUE;
00056 }                                /* AddFeature */
00057 
00058 /*---------------------------------------------------------------------------*/
00059 void FreeFeature(FEATURE Feature) {
00060 /*
00061  **     Parameters:
00062  **             Feature         feature to be deallocated.
00063  **     Globals: none
00064  **     Operation: Release the memory consumed by the specified feature.
00065  **     Return: none
00066  **     Exceptions: none
00067  **     History: Mon May 21 13:33:27 1990, DSJ, Created.
00068  */
00069   if (Feature) {
00070     free_struct (Feature, sizeof (FEATURE_STRUCT)
00071       + sizeof (FLOAT32) * (Feature->Type->NumParams - 1),
00072       "sizeof(FEATURE_STRUCT)+sizeof(FLOAT32)*(NumParamsIn(Feature)-1)");
00073   }
00074 
00075 }                                /* FreeFeature */
00076 
00077 
00078 /*---------------------------------------------------------------------------*/
00079 void FreeFeatureSet(FEATURE_SET FeatureSet) {
00080 /*
00081  **     Parameters:
00082  **             FeatureSet      set of features to be freed
00083  **     Globals: none
00084  **     Operation: Release the memory consumed by the specified feature
00085  **             set.  This routine also frees the memory consumed by the
00086  **             features contained in the set.
00087  **     Return: none
00088  **     Exceptions: none
00089  **     History: Mon May 21 13:59:46 1990, DSJ, Created.
00090  */
00091   int i;
00092 
00093   if (FeatureSet) {
00094     for (i = 0; i < FeatureSet->NumFeatures; i++)
00095       FreeFeature(FeatureSet->Features[i]);
00096     memfree(FeatureSet);
00097   }
00098 }                                /* FreeFeatureSet */
00099 
00100 
00101 /*---------------------------------------------------------------------------*/
00102 FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
00103 /*
00104  **     Parameters:
00105  **             FeatureDesc     description of feature to be created.
00106  **     Globals: none
00107  **     Operation: Allocate and return a new feature of the specified
00108  **             type.
00109  **     Return: New feature.
00110  **     Exceptions: none
00111  **     History: Mon May 21 14:06:42 1990, DSJ, Created.
00112  */
00113   FEATURE Feature;
00114 
00115   Feature = (FEATURE) alloc_struct (sizeof (FEATURE_STRUCT) +
00116     (FeatureDesc->NumParams - 1) *
00117     sizeof (FLOAT32),
00118     "sizeof(FEATURE_STRUCT)+sizeof(FLOAT32)*(NumParamsIn(Feature)-1)");
00119   Feature->Type = FeatureDesc;
00120   return (Feature);
00121 
00122 }                                /* NewFeature */
00123 
00124 
00125 /*---------------------------------------------------------------------------*/
00126 FEATURE_SET NewFeatureSet(int NumFeatures) {
00127 /*
00128  **     Parameters:
00129  **             NumFeatures     maximum # of features to be put in feature set
00130  **     Globals: none
00131  **     Operation: Allocate and return a new feature set large enough to
00132  **             hold the specified number of features.
00133  **     Return: New feature set.
00134  **     Exceptions: none
00135  **     History: Mon May 21 14:22:40 1990, DSJ, Created.
00136  */
00137   FEATURE_SET FeatureSet;
00138 
00139   FeatureSet = (FEATURE_SET) Emalloc (sizeof (FEATURE_SET_STRUCT) +
00140     (NumFeatures - 1) * sizeof (FEATURE));
00141   FeatureSet->MaxNumFeatures = NumFeatures;
00142   FeatureSet->NumFeatures = 0;
00143   return (FeatureSet);
00144 
00145 }                                /* NewFeatureSet */
00146 
00147 
00148 /*---------------------------------------------------------------------------*/
00149 FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
00150 /*
00151  **     Parameters:
00152  **             File            open text file to read feature from
00153  **             FeatureDesc     specifies type of feature to read from File
00154  **     Globals: none
00155  **     Operation: Create a new feature of the specified type and read in
00156  **             the value of its parameters from File.  The extra penalty
00157  **             for the feature is also computed by calling the appropriate
00158  **             function for the specified feature type.  The correct text
00159  **             representation for a feature is a list of N floats where
00160  **             N is the number of parameters in the feature.
00161  **     Return: New feature read from File.
00162  **     Exceptions: ILLEGAL_FEATURE_PARAM if text file doesn't match expected format
00163  **     History: Wed May 23 08:53:16 1990, DSJ, Created.
00164  */
00165   FEATURE Feature;
00166   int i;
00167 
00168   Feature = NewFeature (FeatureDesc);
00169   for (i = 0; i < Feature->Type->NumParams; i++) {
00170     if (fscanf (File, "%f", &(Feature->Params[i])) != 1)
00171       DoError (ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec");
00172 #ifndef _WIN32
00173     assert (!isnan(Feature->Params[i]));
00174 #endif
00175   }
00176   return (Feature);
00177 
00178 }                                /* ReadFeature */
00179 
00180 
00181 /*---------------------------------------------------------------------------*/
00182 FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
00183 /*
00184  **     Parameters:
00185  **             File            open text file to read new feature set from
00186  **             FeatureDesc     specifies type of feature to read from File
00187  **     Globals: none
00188  **     Operation: Create a new feature set of the specified type and read in
00189  **             the features from File.  The correct text representation
00190  **             for a feature set is an integer which specifies the number (N)
00191  **             of features in a set followed by a list of N feature
00192  **             descriptions.
00193  **     Return: New feature set read from File.
00194  **     Exceptions: none
00195  **     History: Wed May 23 09:17:31 1990, DSJ, Created.
00196  */
00197   FEATURE_SET FeatureSet;
00198   int NumFeatures;
00199   int i;
00200 
00201   if (fscanf (File, "%d", &NumFeatures) != 1 || NumFeatures < 0)
00202     DoError (ILLEGAL_NUM_FEATURES, "Illegal number of features in set");
00203 
00204   FeatureSet = NewFeatureSet (NumFeatures);
00205   for (i = 0; i < NumFeatures; i++)
00206     AddFeature (FeatureSet, ReadFeature (File, FeatureDesc));
00207 
00208   return (FeatureSet);
00209 
00210 }                                /* ReadFeatureSet */
00211 
00212 
00213 /*---------------------------------------------------------------------------*/
00214 void WriteFeature(FILE *File, FEATURE Feature) {
00215 /*
00216  **     Parameters:
00217  **             File            open text file to write Feature to
00218  **             Feature         feature to write out to File
00219  **     Globals: none
00220  **     Operation: Write a textual representation of Feature to File.
00221  **             This representation is simply a list of the N parameters
00222  **             of the feature, terminated with a newline.  It is assumed
00223  **             that the ExtraPenalty field can be reconstructed from the
00224  **             parameters of the feature.  It is also assumed that the
00225  **             feature type information is specified or assumed elsewhere.
00226  **     Return: none
00227  **     Exceptions: none
00228  **     History: Wed May 23 09:28:18 1990, DSJ, Created.
00229  */
00230   int i;
00231 
00232   for (i = 0; i < Feature->Type->NumParams; i++) {
00233 #ifndef _WIN32
00234     assert(!isnan(Feature->Params[i]));
00235 #endif
00236     fprintf(File, " %g", Feature->Params[i]);
00237   }
00238   fprintf(File, "\n");
00239 }                                /* WriteFeature */
00240 
00241 
00242 /*---------------------------------------------------------------------------*/
00243 void WriteFeatureSet(FILE *File, FEATURE_SET FeatureSet) {
00244 /*
00245  **     Parameters:
00246  **             File            open text file to write FeatureSet to
00247  **             FeatureSet      feature set to write to File
00248  **     Globals: none
00249  **     Operation: Write a textual representation of FeatureSet to File.
00250  **             This representation is an integer specifying the number of
00251  **             features in the set, followed by a newline, followed by
00252  **             text representations for each feature in the set.
00253  **     Return: none
00254  **     Exceptions: none
00255  **     History: Wed May 23 10:06:03 1990, DSJ, Created.
00256  */
00257   int i;
00258 
00259   if (FeatureSet) {
00260     fprintf (File, "%d\n", FeatureSet->NumFeatures);
00261     for (i = 0; i < FeatureSet->NumFeatures; i++)
00262       WriteFeature (File, FeatureSet->Features[i]);
00263   }
00264 }                                /* WriteFeatureSet */
00265 
00266 
00267 /*---------------------------------------------------------------------------*/
00268 void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
00269 /*
00270  **     Parameters:
00271  **             File            open text file to write FeatureDesc to
00272  **             FeatureDesc     feature descriptor to write to File
00273  **     Globals: none
00274  **     Operation: Write a textual representation of FeatureDesc to File
00275  **             in the old format (i.e. the format used by the clusterer).
00276  **             This format is:
00277  **                     Number of Params
00278  **                     Description of Param 1
00279  **                     ...
00280  **     Return: none
00281  **     Exceptions: none
00282  **     History: Fri May 25 15:27:18 1990, DSJ, Created.
00283  */
00284   int i;
00285 
00286   fprintf (File, "%d\n", FeatureDesc->NumParams);
00287   for (i = 0; i < FeatureDesc->NumParams; i++) {
00288     if (FeatureDesc->ParamDesc[i].Circular)
00289       fprintf (File, "circular ");
00290     else
00291       fprintf (File, "linear   ");
00292 
00293     if (FeatureDesc->ParamDesc[i].NonEssential)
00294       fprintf (File, "non-essential  ");
00295     else
00296       fprintf (File, "essential      ");
00297 
00298     fprintf (File, "%f  %f\n",
00299       FeatureDesc->ParamDesc[i].Min, FeatureDesc->ParamDesc[i].Max);
00300   }
00301 }                                /* WriteOldParamDesc */