Tesseract  3.02
tesseract-ocr/classify/featdefs.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    featdefs.c
00003  **     Purpose:     Definitions of currently defined feature types.
00004  **     Author:      Dan Johnson
00005  **     History:     Mon May 21 10:26:21 1990, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 /*-----------------------------------------------------------------------------
00019           Include Files and Type Defines
00020 -----------------------------------------------------------------------------*/
00021 #ifdef _MSC_VER
00022 #include "mathfix.h"
00023 #endif
00024 
00025 #include "featdefs.h"
00026 #include "emalloc.h"
00027 #include "danerror.h"
00028 #include "scanutils.h"
00029 
00030 #include <string.h>
00031 #include <stdio.h>
00032 
00034 #define ILLEGAL_NUM_SETS  3001
00035 
00036 #define PICO_FEATURE_LENGTH 0.05
00037 
00038 /*-----------------------------------------------------------------------------
00039         Global Data Definitions and Declarations
00040 -----------------------------------------------------------------------------*/
00041 const char* kMicroFeatureType = "mf";
00042 const char* kCNFeatureType = "cn";
00043 const char* kIntFeatureType = "if";
00044 const char* kGeoFeatureType = "tb";
00045 
00046 // Define all of the parameters for the MicroFeature type.
00047 StartParamDesc(MicroFeatureParams)
00048 DefineParam(0, 0, -0.5, 0.5)
00049 DefineParam(0, 0, -0.25, 0.75)
00050 DefineParam(0, 1, 0.0, 1.0)
00051 DefineParam(1, 0, 0.0, 1.0)
00052 DefineParam (0, 1, -0.5, 0.5)
00053 DefineParam (0, 1, -0.5, 0.5)
00054 EndParamDesc
00055 // Now define the feature type itself (see features.h for parameters).
00056 DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams)
00057 
00058 // Define all of the parameters for the NormFeat type.
00059 StartParamDesc (CharNormParams)
00060 DefineParam(0, 0, -0.25, 0.75)
00061 DefineParam(0, 1, 0.0, 1.0)
00062 DefineParam(0, 0, 0.0, 1.0)
00063 DefineParam(0, 0, 0.0, 1.0)
00064 EndParamDesc
00065 // Now define the feature type itself (see features.h for parameters).
00066 DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams)
00067 
00068 // Define all of the parameters for the IntFeature type
00069 StartParamDesc(IntFeatParams)
00070 DefineParam(0, 0, 0.0, 255.0)
00071 DefineParam(0, 0, 0.0, 255.0)
00072 DefineParam(1, 0, 0.0, 255.0)
00073 EndParamDesc
00074 // Now define the feature type itself (see features.h for parameters).
00075 DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams)
00076 
00077 // Define all of the parameters for the GeoFeature type
00078 StartParamDesc(GeoFeatParams)
00079 DefineParam(0, 0, 0.0, 255.0)
00080 DefineParam(0, 0, 0.0, 255.0)
00081 DefineParam(0, 0, 0.0, 255.0)
00082 EndParamDesc
00083 // Now define the feature type itself (see features.h for parameters).
00084 DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams)
00085 
00086 // Other features used for training the adaptive classifier, but not used
00087 // during normal training, therefore not in the DescDefs array.
00088 
00089 // Define all of the parameters for the PicoFeature type
00090 // define knob that can be used to adjust pico-feature length.
00091 FLOAT32 PicoFeatureLength = PICO_FEATURE_LENGTH;
00092 StartParamDesc(PicoFeatParams)
00093 DefineParam(0, 0, -0.25, 0.75)
00094 DefineParam(1, 0, 0.0, 1.0)
00095 DefineParam(0, 0, -0.5, 0.5)
00096 EndParamDesc
00097 // Now define the feature type itself (see features.h for parameters).
00098 DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams)
00099 
00100 // Define all of the parameters for the OutlineFeature type.
00101 StartParamDesc(OutlineFeatParams)
00102 DefineParam(0, 0, -0.5, 0.5)
00103 DefineParam(0, 0, -0.25, 0.75)
00104 DefineParam(0, 0, 0.0, 1.0)
00105 DefineParam(1, 0, 0.0, 1.0)
00106 EndParamDesc
00107 // Now define the feature type itself (see features.h for parameters).
00108 DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams)
00109 
00110 // MUST be kept in-sync with ExtractorDefs in fxdefs.cpp.
00111 static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = {
00112   &MicroFeatureDesc,
00113   &CharNormDesc,
00114   &IntFeatDesc,
00115   &GeoFeatDesc
00116 };
00117 
00118 /*-----------------------------------------------------------------------------
00119               Public Code
00120 -----------------------------------------------------------------------------*/
00121 void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) {
00122   featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES;
00123   for (int i = 0; i < NUM_FEATURE_TYPES; ++i) {
00124     featuredefs->FeatureDesc[i] = DescDefs[i];
00125   }
00126 }
00127 
00128 /*---------------------------------------------------------------------------*/
00141 void FreeCharDescription(CHAR_DESC CharDesc) {
00142   int i;
00143 
00144   if (CharDesc) {
00145     for (i = 0; i < CharDesc->NumFeatureSets; i++)
00146       FreeFeatureSet (CharDesc->FeatureSets[i]);
00147     Efree(CharDesc);
00148   }
00149 }                                /* FreeCharDescription */
00150 
00151 
00152 /*---------------------------------------------------------------------------*/
00164 CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) {
00165   CHAR_DESC CharDesc;
00166   int i;
00167 
00168   CharDesc = (CHAR_DESC) Emalloc (sizeof (CHAR_DESC_STRUCT));
00169   CharDesc->NumFeatureSets = FeatureDefs.NumFeatureTypes;
00170 
00171   for (i = 0; i < CharDesc->NumFeatureSets; i++)
00172     CharDesc->FeatureSets[i] = NULL;
00173 
00174   return (CharDesc);
00175 
00176 }                                /* NewCharDescription */
00177 
00178 
00179 /*---------------------------------------------------------------------------*/
00200 void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
00201                           FILE *File, CHAR_DESC CharDesc) {
00202   int Type;
00203   int NumSetsToWrite = 0;
00204 
00205   for (Type = 0; Type < CharDesc->NumFeatureSets; Type++)
00206     if (CharDesc->FeatureSets[Type])
00207       NumSetsToWrite++;
00208 
00209   fprintf (File, " %d\n", NumSetsToWrite);
00210   for (Type = 0; Type < CharDesc->NumFeatureSets; Type++)
00211   if (CharDesc->FeatureSets[Type]) {
00212     fprintf (File, "%s ", (FeatureDefs.FeatureDesc[Type])->ShortName);
00213     WriteFeatureSet (File, CharDesc->FeatureSets[Type]);
00214   }
00215 }                                /* WriteCharDescription */
00216 
00217 // Return whether all of the fields of the given feature set
00218 // are well defined (not inf or nan).
00219 bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
00220                           CHAR_DESC CharDesc) {
00221   bool anything_written = false;
00222   bool well_formed = true;
00223   for (int Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
00224     if (CharDesc->FeatureSets[Type]) {
00225       for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) {
00226         FEATURE feat = CharDesc->FeatureSets[Type]->Features[i];
00227         for (int p = 0; p < feat->Type->NumParams; p++) {
00228           if (isnan(feat->Params[p]) || isinf(feat->Params[p]))
00229             well_formed = false;
00230           else
00231             anything_written = true;
00232         }
00233       }
00234     }
00235   }
00236   return anything_written && well_formed;
00237 }                                /* ValidCharDescription */
00238 
00239 /*---------------------------------------------------------------------------*/
00261 CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
00262                               FILE *File) {
00263   int NumSetsToRead;
00264   char ShortName[FEAT_NAME_SIZE];
00265   CHAR_DESC CharDesc;
00266   int Type;
00267 
00268   if (fscanf (File, "%d", &NumSetsToRead) != 1 ||
00269     NumSetsToRead < 0 || NumSetsToRead > FeatureDefs.NumFeatureTypes)
00270     DoError (ILLEGAL_NUM_SETS, "Illegal number of feature sets");
00271 
00272   CharDesc = NewCharDescription(FeatureDefs);
00273   for (; NumSetsToRead > 0; NumSetsToRead--) {
00274     fscanf (File, "%s", ShortName);
00275     Type = ShortNameToFeatureType(FeatureDefs, ShortName);
00276     CharDesc->FeatureSets[Type] =
00277       ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]);
00278   }
00279   return (CharDesc);
00280 
00281 }                                // ReadCharDescription
00282 
00283 
00284 /*---------------------------------------------------------------------------*/
00300 int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
00301                            const char *ShortName) {
00302   int i;
00303 
00304   for (i = 0; i < FeatureDefs.NumFeatureTypes; i++)
00305     if (!strcmp ((FeatureDefs.FeatureDesc[i]->ShortName), ShortName))
00306       return (i);
00307   DoError (ILLEGAL_SHORT_NAME, "Illegal short name for a feature");
00308   return 0;
00309 
00310 }                                // ShortNameToFeatureType