Tesseract  3.02
tesseract-ocr/classify/ocrfeatures.h
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    features.h
00003  **     Purpose:     Generic definition of a feature.
00004  **     Author:      Dan Johnson
00005  **     History:     Sun May 20 10:28:30 1990, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 #ifndef   FEATURES_H
00019 #define   FEATURES_H
00020 
00024 #include "blobs.h"
00025 
00026 #include <stdio.h>
00027 
00028 class DENORM;
00029 
00030 #undef Min
00031 #undef Max
00032 #define FEAT_NAME_SIZE    80
00033 
00034 // define trap errors which can be caused by this module
00035 #define ILLEGAL_FEATURE_PARAM 1000
00036 #define ILLEGAL_NUM_FEATURES  1001
00037 
00038 // A character is described by multiple sets of extracted features.  Each
00039 // set contains a number of features of a particular type, for example, a
00040 // set of bays, or a set of closures, or a set of microfeatures.  Each
00041 // feature consists of a number of parameters.  All features within a
00042 // feature set contain the same number of parameters.  All circular
00043 // parameters are required to be the first parameters in the feature.
00044 
00045 struct PARAM_DESC {
00046   inT8 Circular;                   // TRUE if dimension wraps around
00047   inT8 NonEssential;               // TRUE if dimension not used in searches
00048   FLOAT32 Min;                     // low end of range for circular dimensions
00049   FLOAT32 Max;                     // high end of range for circular dimensions
00050   FLOAT32 Range;                   // Max - Min
00051   FLOAT32 HalfRange;               // (Max - Min)/2
00052   FLOAT32 MidRange;                // (Max + Min)/2
00053 };
00054 
00055 struct FEATURE_DESC_STRUCT {
00056   uinT16 NumParams;                // total # of params
00057   const char *ShortName;           // short name for feature
00058   const PARAM_DESC *ParamDesc;     // array - one per param
00059 };
00060 typedef FEATURE_DESC_STRUCT *FEATURE_DESC;
00061 
00062 struct FEATURE_STRUCT {
00063   const FEATURE_DESC_STRUCT *Type;  // points to description of feature type
00064   FLOAT32 Params[1];                // variable size array - params for feature
00065 };
00066 typedef FEATURE_STRUCT *FEATURE;
00067 
00068 struct FEATURE_SET_STRUCT {
00069   uinT16 NumFeatures;            // number of features in set
00070   uinT16 MaxNumFeatures;         // maximum size of feature set
00071   FEATURE Features[1];           // variable size array of features
00072 };
00073 typedef FEATURE_SET_STRUCT *FEATURE_SET;
00074 
00075 // A generic character description as a char pointer. In reality, it will be
00076 // a pointer to some data structure. Paired feature extractors/matchers need
00077 // to agree on the data structure to be used, however, the high level
00078 // classifier does not need to know the details of this data structure.
00079 typedef char *CHAR_FEATURES;
00080 
00081 typedef FEATURE_SET (*FX_FUNC) (TBLOB *, const DENORM&);
00082 
00083 struct FEATURE_EXT_STRUCT {
00084   FX_FUNC Extractor;             // func to extract features
00085 };
00086 
00087 /*----------------------------------------------------------------------
00088     Macros for defining the parameters of a new features
00089 ----------------------------------------------------------------------*/
00090 #define StartParamDesc(Name)    \
00091 const PARAM_DESC Name[] = {
00092 
00093 #define DefineParam(Circular, NonEssential, Min, Max)   \
00094         {Circular, NonEssential, Min, Max,                      \
00095         (Max) - (Min), (((Max) - (Min))/2.0), (((Max) + (Min))/2.0)},
00096 
00097 #define EndParamDesc  };
00098 
00099 /*----------------------------------------------------------------------
00100 Macro for describing a new feature.  The parameters of the macro
00101 are as follows:
00102 
00103 DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName)
00104 ----------------------------------------------------------------------*/
00105 #define DefineFeature(Name, NL, NC, SN, PN)             \
00106 const FEATURE_DESC_STRUCT Name = {                              \
00107         ((NL) + (NC)), SN, PN};
00108 
00109 /*----------------------------------------------------------------------
00110         Generic routines that work for all feature types
00111 ----------------------------------------------------------------------*/
00112 BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature);
00113 
00114 void FreeFeature(FEATURE Feature);
00115 
00116 void FreeFeatureSet(FEATURE_SET FeatureSet);
00117 
00118 FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc);
00119 
00120 FEATURE_SET NewFeatureSet(int NumFeatures);
00121 
00122 FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc);
00123 
00124 FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc);
00125 
00126 void WriteFeature(FILE *File, FEATURE Feature);
00127 
00128 void WriteFeatureSet(FILE *File, FEATURE_SET FeatureSet);
00129 
00130 void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc);
00131 
00132 #endif