Tesseract  3.02
tesseract-ocr/training/mergenf.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002 **  Filename:    MergeNF.c
00003 **  Purpose:     Program for merging similar nano-feature protos
00004 **  Author:      Dan Johnson
00005 **  History:     Wed Nov 21 09:55:23 1990, DSJ, Created.
00006 **
00007  ** (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017 ******************************************************************************/
00018 #include "mergenf.h"
00019 #include "host.h"
00020 #include "efio.h"
00021 #include "clusttool.h"
00022 #include "cluster.h"
00023 #include "oldlist.h"
00024 #include "protos.h"
00025 #include "ndminx.h"
00026 #include "ocrfeatures.h"
00027 #include "const.h"
00028 #include "featdefs.h"
00029 #include "intproto.h"
00030 #include "params.h"
00031 
00032 #include <stdio.h>
00033 #include <string.h>
00034 #include <math.h>
00035 
00036 
00037 /*-------------------once in subfeat---------------------------------*/
00038 double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ...");
00039 
00040 double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ...");
00041 
00042 double_VAR(training_similarity_curl, 2.0, "Similarity Curl ...");
00043 
00044 /*-----------------------------once in fasttrain----------------------------------*/
00045 double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ...");
00046 
00047 double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ...");
00048 
00049 double_VAR(training_angle_pad, 45.0, "Angle pad ...");
00050 
00067 FLOAT32 CompareProtos(PROTO p1, PROTO p2) {
00068   FEATURE Feature;
00069   FLOAT32 WorstEvidence = WORST_EVIDENCE;
00070   FLOAT32 Evidence;
00071   FLOAT32 Angle, Length;
00072 
00073   /* if p1 and p2 are not close in length, don't let them match */
00074   Length = fabs (p1->Length - p2->Length);
00075   if (Length > MAX_LENGTH_MISMATCH)
00076     return (0.0);
00077 
00078   /* create a dummy pico-feature to be used for comparisons */
00079   Feature = NewFeature (&PicoFeatDesc);
00080   Feature->Params[PicoFeatDir] = p1->Angle;
00081 
00082   /* convert angle to radians */
00083   Angle = p1->Angle * 2.0 * PI;
00084 
00085   /* find distance from center of p1 to 1/2 picofeat from end */
00086   Length = p1->Length / 2.0 - GetPicoFeatureLength () / 2.0;
00087   if (Length < 0) Length = 0;
00088 
00089   /* set the dummy pico-feature at one end of p1 and match it to p2 */
00090   Feature->Params[PicoFeatX] = p1->X + cos (Angle) * Length;
00091   Feature->Params[PicoFeatY] = p1->Y + sin (Angle) * Length;
00092   if (DummyFastMatch (Feature, p2)) {
00093     Evidence = SubfeatureEvidence (Feature, p2);
00094     if (Evidence < WorstEvidence)
00095       WorstEvidence = Evidence;
00096   } else {
00097     FreeFeature(Feature);
00098     return 0.0;
00099   }
00100 
00101   /* set the dummy pico-feature at the other end of p1 and match it to p2 */
00102   Feature->Params[PicoFeatX] = p1->X - cos (Angle) * Length;
00103   Feature->Params[PicoFeatY] = p1->Y - sin (Angle) * Length;
00104   if (DummyFastMatch (Feature, p2)) {
00105     Evidence = SubfeatureEvidence (Feature, p2);
00106     if (Evidence < WorstEvidence)
00107       WorstEvidence = Evidence;
00108   } else {
00109     FreeFeature(Feature);
00110     return 0.0;
00111   }
00112 
00113   FreeFeature (Feature);
00114   return (WorstEvidence);
00115 
00116 } /* CompareProtos */
00117 
00133 void ComputeMergedProto (PROTO  p1,
00134                          PROTO  p2,
00135                          FLOAT32  w1,
00136                          FLOAT32  w2,
00137                          PROTO  MergedProto) {
00138   FLOAT32 TotalWeight;
00139 
00140   TotalWeight = w1 + w2;
00141   w1 /= TotalWeight;
00142   w2 /= TotalWeight;
00143 
00144   MergedProto->X = p1->X * w1 + p2->X * w2;
00145   MergedProto->Y = p1->Y * w1 + p2->Y * w2;
00146   MergedProto->Length = p1->Length * w1 + p2->Length * w2;
00147   MergedProto->Angle = p1->Angle * w1 + p2->Angle * w2;
00148   FillABC(MergedProto);
00149 } /* ComputeMergedProto */
00150 
00167 int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[],
00168                              PROTOTYPE  *Prototype) {
00169   PROTO_STRUCT  NewProto;
00170   PROTO_STRUCT  MergedProto;
00171   int   Pid;
00172   PROTO   Proto;
00173   int   BestProto;
00174   FLOAT32 BestMatch;
00175   FLOAT32 Match, OldMatch, NewMatch;
00176 
00177   MakeNewFromOld (&NewProto, Prototype);
00178 
00179   BestProto = NO_PROTO;
00180   BestMatch = WORST_MATCH_ALLOWED;
00181   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
00182     Proto  = ProtoIn(Class, Pid);
00183     ComputeMergedProto(Proto, &NewProto,
00184       (FLOAT32) NumMerged[Pid], 1.0, &MergedProto);
00185     OldMatch = CompareProtos(Proto, &MergedProto);
00186     NewMatch = CompareProtos(&NewProto, &MergedProto);
00187     Match = MIN(OldMatch, NewMatch);
00188     if (Match > BestMatch) {
00189       BestProto = Pid;
00190       BestMatch = Match;
00191     }
00192   }
00193   return BestProto;
00194 } /* FindClosestExistingProto */
00195 
00208 void MakeNewFromOld(PROTO New, PROTOTYPE *Old) {
00209   New->X = CenterX(Old->Mean);
00210   New->Y = CenterY(Old->Mean);
00211   New->Length = LengthOf(Old->Mean);
00212   New->Angle = OrientationOf(Old->Mean);
00213   FillABC(New);
00214 } /* MakeNewFromOld */
00215 
00216 /*-------------------once in subfeat---------------------------------*/
00217 
00223 FLOAT32 SubfeatureEvidence(FEATURE Feature, PROTO Proto) {
00224   float       Distance;
00225   float       Dangle;
00226 
00227   Dangle   = Proto->Angle - Feature->Params[PicoFeatDir];
00228   if (Dangle < -0.5) Dangle += 1.0;
00229   if (Dangle >  0.5) Dangle -= 1.0;
00230   Dangle *= training_angle_match_scale;
00231 
00232   Distance = Proto->A * Feature->Params[PicoFeatX] +
00233     Proto->B * Feature->Params[PicoFeatY] +
00234     Proto->C;
00235 
00236   return (EvidenceOf (Distance * Distance + Dangle * Dangle));
00237 }
00238 
00247 double EvidenceOf (double Similarity) {
00248 
00249   Similarity /= training_similarity_midpoint;
00250 
00251   if (training_similarity_curl == 3)
00252     Similarity = Similarity * Similarity * Similarity;
00253   else if (training_similarity_curl == 2)
00254     Similarity = Similarity * Similarity;
00255   else
00256     Similarity = pow (Similarity, training_similarity_curl);
00257 
00258   return (1.0 / (1.0 + Similarity));
00259 }
00260 
00276 BOOL8 DummyFastMatch (
00277      FEATURE  Feature,
00278      PROTO  Proto)
00279 {
00280   FRECT   BoundingBox;
00281   FLOAT32 MaxAngleError;
00282   FLOAT32 AngleError;
00283 
00284   MaxAngleError = training_angle_pad / 360.0;
00285   AngleError = fabs (Proto->Angle - Feature->Params[PicoFeatDir]);
00286   if (AngleError > 0.5)
00287     AngleError = 1.0 - AngleError;
00288 
00289   if (AngleError > MaxAngleError)
00290     return (FALSE);
00291 
00292   ComputePaddedBoundingBox (Proto,
00293     training_tangent_bbox_pad * GetPicoFeatureLength (),
00294     training_orthogonal_bbox_pad * GetPicoFeatureLength (),
00295     &BoundingBox);
00296 
00297   return PointInside(&BoundingBox, Feature->Params[PicoFeatX],
00298                      Feature->Params[PicoFeatY]);
00299 } /* DummyFastMatch */
00300 
00318 void ComputePaddedBoundingBox (PROTO  Proto, FLOAT32  TangentPad,
00319                                FLOAT32  OrthogonalPad, FRECT  *BoundingBox) {
00320   FLOAT32 Pad, Length, Angle;
00321   FLOAT32 CosOfAngle, SinOfAngle;
00322 
00323   Length     = Proto->Length / 2.0 + TangentPad;
00324   Angle      = Proto->Angle * 2.0 * PI;
00325   CosOfAngle = fabs(cos(Angle));
00326   SinOfAngle = fabs(sin(Angle));
00327 
00328   Pad = MAX (CosOfAngle * Length, SinOfAngle * OrthogonalPad);
00329   BoundingBox->MinX = Proto->X - Pad;
00330   BoundingBox->MaxX = Proto->X + Pad;
00331 
00332   Pad = MAX(SinOfAngle * Length, CosOfAngle * OrthogonalPad);
00333   BoundingBox->MinY = Proto->Y - Pad;
00334   BoundingBox->MaxY = Proto->Y + Pad;
00335 
00336 } /* ComputePaddedBoundingBox */
00337 
00347 BOOL8 PointInside(FRECT *Rectangle, FLOAT32 X, FLOAT32  Y) {
00348   if (X < Rectangle->MinX) return (FALSE);
00349   if (X > Rectangle->MaxX) return (FALSE);
00350   if (Y < Rectangle->MinY) return (FALSE);
00351   if (Y > Rectangle->MaxY) return (FALSE);
00352   return (TRUE);
00353 
00354 } /* PointInside */