Tesseract  3.02
tesseract-ocr/classify/normfeat.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    normfeat.c
00003  **     Purpose:     Definition of char normalization features.
00004  **     Author:      Dan Johnson
00005  **     History:     12/14/90, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00021 #include "normfeat.h"
00022 
00023 #include "intfx.h"
00024 #include "featdefs.h"
00025 #include "mfoutline.h"
00026 
00031 // Return the length of the outline in baseline normalized form.
00032 FLOAT32 ActualOutlineLength(FEATURE Feature) {
00033   return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
00034 }
00035 
00036 
00037 /*---------------------------------------------------------------------------*/
00038 // Return the character normalization feature for a blob.
00039 //
00040 // The features returned are in a scale where the x-height has been
00041 // normalized to live in the region y = [-0.25 .. 0.25].  Example ranges
00042 // for English below are based on the Linux font collection on 2009-12-04:
00043 //
00044 //   Params[CharNormY]
00045 //     The y coordinate of the grapheme's centroid.
00046 //     English: [-0.27, 0.71]
00047 //
00048 //   Params[CharNormLength]
00049 //     The length of the grapheme's outline (tiny segments discarded),
00050 //     divided by 10.0=LENGTH_COMPRESSION.
00051 //     English: [0.16, 0.85]
00052 //
00053 //   Params[CharNormRx]
00054 //     The radius of gyration about the x axis, as measured from CharNormY.
00055 //     English: [0.011, 0.34]
00056 //
00057 //   Params[CharNormRy]
00058 //     The radius of gyration about the y axis, as measured from
00059 //     the x center of the grapheme's bounding box.
00060 //     English: [0.011, 0.31]
00061 //
00062 FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& denorm) {
00063   FEATURE_SET feature_set = NewFeatureSet(1);
00064   FEATURE feature = NewFeature(&CharNormDesc);
00065 
00066   INT_FEATURE_ARRAY blfeatures;
00067   INT_FEATURE_ARRAY cnfeatures;
00068   INT_FX_RESULT_STRUCT FXInfo;
00069 
00070   ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &FXInfo);
00071 
00072   feature->Params[CharNormY] =
00073       MF_SCALE_FACTOR * (FXInfo.Ymean - BASELINE_OFFSET);
00074   feature->Params[CharNormLength] =
00075       MF_SCALE_FACTOR * FXInfo.Length / LENGTH_COMPRESSION;
00076   feature->Params[CharNormRx] = MF_SCALE_FACTOR * FXInfo.Rx;
00077   feature->Params[CharNormRy] = MF_SCALE_FACTOR * FXInfo.Ry;
00078 
00079   AddFeature(feature_set, feature);
00080 
00081   return feature_set;
00082 }                                /* ExtractCharNormFeatures */