Tesseract  3.02
tesseract-ocr/classify/outfeat.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    outfeat.c
00003  **     Purpose:     Definition of outline-features.
00004  **     Author:      Dan Johnson
00005  **     History:     11/13/90, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00021 #include "outfeat.h"
00022 
00023 #include "classify.h"
00024 #include "efio.h"
00025 #include "featdefs.h"
00026 #include "mfoutline.h"
00027 #include "ocrfeatures.h"
00028 
00029 #include <stdio.h>
00030 
00034 /*---------------------------------------------------------------------------*/
00035 namespace tesseract {
00036 FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
00037 /*
00038  **     Parameters:
00039  **             Blob            blob to extract pico-features from
00040  **             LineStats       statistics on text row blob is in
00041  **     Globals: none
00042  **     Operation: Convert each segment in the outline to a feature
00043  **             and return the features.
00044  **     Return: Outline-features for Blob.
00045  **     Exceptions: none
00046  **     History: 11/13/90, DSJ, Created.
00047  **             05/24/91, DSJ, Updated for either char or baseline normalize.
00048  */
00049   LIST Outlines;
00050   LIST RemainingOutlines;
00051   MFOUTLINE Outline;
00052   FEATURE_SET FeatureSet;
00053   FLOAT32 XScale, YScale;
00054 
00055   FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES);
00056   if (Blob == NULL)
00057     return (FeatureSet);
00058 
00059   Outlines = ConvertBlob (Blob);
00060 
00061   NormalizeOutlines(Outlines, &XScale, &YScale);
00062   RemainingOutlines = Outlines;
00063   iterate(RemainingOutlines) {
00064     Outline = (MFOUTLINE) first_node (RemainingOutlines);
00065     ConvertToOutlineFeatures(Outline, FeatureSet);
00066   }
00067   if (classify_norm_method == baseline)
00068     NormalizeOutlineX(FeatureSet);
00069   FreeOutlines(Outlines);
00070   return (FeatureSet);
00071 }                                /* ExtractOutlineFeatures */
00072 }  // namespace tesseract
00073 
00077 /*---------------------------------------------------------------------------*/
00078 void AddOutlineFeatureToSet(FPOINT *Start,
00079                             FPOINT *End,
00080                             FEATURE_SET FeatureSet) {
00081 /*
00082  **     Parameters:
00083  **             Start           starting point of outline-feature
00084  **             End             ending point of outline-feature
00085  **             FeatureSet      set to add outline-feature to
00086  **     Globals: none
00087  **     Operation: This routine computes the midpoint between Start and
00088  **             End to obtain the x,y position of the outline-feature.  It
00089  **             also computes the direction from Start to End as the
00090  **             direction of the outline-feature and the distance from
00091  **             Start to End as the length of the outline-feature.
00092  **             This feature is then
00093  **             inserted into the next feature slot in FeatureSet.
00094  **     Return: none (results are placed in FeatureSet)
00095  **     Exceptions: none
00096  **     History: 11/13/90, DSJ, Created.
00097  */
00098   FEATURE Feature;
00099 
00100   Feature = NewFeature(&OutlineFeatDesc);
00101   Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0);
00102   Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x);
00103   Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y);
00104   Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End);
00105   AddFeature(FeatureSet, Feature);
00106 
00107 }                                /* AddOutlineFeatureToSet */
00108 
00109 
00110 /*---------------------------------------------------------------------------*/
00111 void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
00112 /*
00113  **     Parameters:
00114  **             Outline         outline to extract outline-features from
00115  **             FeatureSet      set of features to add outline-features to
00116  **     Globals: none
00117  **     Operation:
00118  **             This routine steps converts each section in the specified
00119  **             outline to a feature described by its x,y position, length
00120  **             and angle.
00121  **     Return: none (results are returned in FeatureSet)
00122  **     Exceptions: none
00123  **     History: 11/13/90, DSJ, Created.
00124  **                     5/24/91, DSJ, Added hidden edge capability.
00125  */
00126   MFOUTLINE Next;
00127   MFOUTLINE First;
00128   FPOINT FeatureStart;
00129   FPOINT FeatureEnd;
00130 
00131   if (DegenerateOutline (Outline))
00132     return;
00133 
00134   First = Outline;
00135   Next = First;
00136   do {
00137     FeatureStart = PointAt(Next)->Point;
00138     Next = NextPointAfter(Next);
00139 
00140     /* note that an edge is hidden if the ending point of the edge is
00141        marked as hidden.  This situation happens because the order of
00142        the outlines is reversed when they are converted from the old
00143        format.  In the old format, a hidden edge is marked by the
00144        starting point for that edge. */
00145     if (!PointAt(Next)->Hidden) {
00146       FeatureEnd = PointAt(Next)->Point;
00147       AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet);
00148     }
00149   }
00150   while (Next != First);
00151 }                                /* ConvertToOutlineFeatures */
00152 
00153 
00154 /*---------------------------------------------------------------------------*/
00155 void NormalizeOutlineX(FEATURE_SET FeatureSet) {
00156 /*
00157  **     Parameters:
00158  **             FeatureSet      outline-features to be normalized
00159  **     Globals: none
00160  **     Operation: This routine computes the weighted average x position
00161  **             over all of the outline-features in FeatureSet and then
00162  **             renormalizes the outline-features to force this average
00163  **             to be the x origin (i.e. x=0).
00164  **     Return: none (FeatureSet is changed)
00165  **     Exceptions: none
00166  **     History: 11/13/90, DSJ, Created.
00167  */
00168   int i;
00169   FEATURE Feature;
00170   FLOAT32 Length;
00171   FLOAT32 TotalX = 0.0;
00172   FLOAT32 TotalWeight = 0.0;
00173   FLOAT32 Origin;
00174 
00175   if (FeatureSet->NumFeatures <= 0)
00176     return;
00177 
00178   for (i = 0; i < FeatureSet->NumFeatures; i++) {
00179     Feature = FeatureSet->Features[i];
00180     Length = Feature->Params[OutlineFeatLength];
00181     TotalX += Feature->Params[OutlineFeatX] * Length;
00182     TotalWeight += Length;
00183   }
00184   Origin = TotalX / TotalWeight;
00185 
00186   for (i = 0; i < FeatureSet->NumFeatures; i++) {
00187     Feature = FeatureSet->Features[i];
00188     Feature->Params[OutlineFeatX] -= Origin;
00189   }
00190 }                                /* NormalizeOutlineX */