Tesseract
3.02
|
00001 /****************************************************************************** 00002 ** Filename: normfeat.c 00003 ** Purpose: Definition of char normalization features. 00004 ** Author: Dan Johnson 00005 ** History: 12/14/90, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00021 #include "normfeat.h" 00022 00023 #include "intfx.h" 00024 #include "featdefs.h" 00025 #include "mfoutline.h" 00026 00031 // Return the length of the outline in baseline normalized form. 00032 FLOAT32 ActualOutlineLength(FEATURE Feature) { 00033 return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); 00034 } 00035 00036 00037 /*---------------------------------------------------------------------------*/ 00038 // Return the character normalization feature for a blob. 00039 // 00040 // The features returned are in a scale where the x-height has been 00041 // normalized to live in the region y = [-0.25 .. 0.25]. Example ranges 00042 // for English below are based on the Linux font collection on 2009-12-04: 00043 // 00044 // Params[CharNormY] 00045 // The y coordinate of the grapheme's centroid. 00046 // English: [-0.27, 0.71] 00047 // 00048 // Params[CharNormLength] 00049 // The length of the grapheme's outline (tiny segments discarded), 00050 // divided by 10.0=LENGTH_COMPRESSION. 00051 // English: [0.16, 0.85] 00052 // 00053 // Params[CharNormRx] 00054 // The radius of gyration about the x axis, as measured from CharNormY. 00055 // English: [0.011, 0.34] 00056 // 00057 // Params[CharNormRy] 00058 // The radius of gyration about the y axis, as measured from 00059 // the x center of the grapheme's bounding box. 00060 // English: [0.011, 0.31] 00061 // 00062 FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& denorm) { 00063 FEATURE_SET feature_set = NewFeatureSet(1); 00064 FEATURE feature = NewFeature(&CharNormDesc); 00065 00066 INT_FEATURE_ARRAY blfeatures; 00067 INT_FEATURE_ARRAY cnfeatures; 00068 INT_FX_RESULT_STRUCT FXInfo; 00069 00070 ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &FXInfo); 00071 00072 feature->Params[CharNormY] = 00073 MF_SCALE_FACTOR * (FXInfo.Ymean - BASELINE_OFFSET); 00074 feature->Params[CharNormLength] = 00075 MF_SCALE_FACTOR * FXInfo.Length / LENGTH_COMPRESSION; 00076 feature->Params[CharNormRx] = MF_SCALE_FACTOR * FXInfo.Rx; 00077 feature->Params[CharNormRy] = MF_SCALE_FACTOR * FXInfo.Ry; 00078 00079 AddFeature(feature_set, feature); 00080 00081 return feature_set; 00082 } /* ExtractCharNormFeatures */