Tesseract  3.02
tesseract-ocr/classify/protos.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        protos.c  (Formerly protos.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Mon Mar  4 14:51:24 1991 (Dan Johnson) danj@hpgrlj
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "protos.h"
00029 #include "const.h"
00030 #include "emalloc.h"
00031 #include "freelist.h"
00032 #include "callcpp.h"
00033 #include "tprintf.h"
00034 #include "scanutils.h"
00035 #include "globals.h"
00036 #include "classify.h"
00037 #include "params.h"
00038 
00039 #include <stdio.h>
00040 #include <math.h>
00041 
00042 #define PROTO_INCREMENT   32
00043 #define CONFIG_INCREMENT  16
00044 
00045 /*----------------------------------------------------------------------
00046               V a r i a b l e s
00047 ----------------------------------------------------------------------*/
00048 CLASS_STRUCT TrainingData[NUMBER_OF_CLASSES];
00049 
00050 STRING_VAR(classify_training_file, "MicroFeatures", "Training file");
00051 
00052 /*----------------------------------------------------------------------
00053               F u n c t i o n s
00054 ----------------------------------------------------------------------*/
00063 int AddConfigToClass(CLASS_TYPE Class) {
00064   int NewNumConfigs;
00065   int NewConfig;
00066   int MaxNumProtos;
00067   BIT_VECTOR Config;
00068 
00069   MaxNumProtos = Class->MaxNumProtos;
00070 
00071   if (Class->NumConfigs >= Class->MaxNumConfigs) {
00072     /* add configs in CONFIG_INCREMENT chunks at a time */
00073     NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) /
00074       CONFIG_INCREMENT) * CONFIG_INCREMENT);
00075 
00076     Class->Configurations =
00077       (CONFIGS) Erealloc (Class->Configurations,
00078       sizeof (BIT_VECTOR) * NewNumConfigs);
00079 
00080     Class->MaxNumConfigs = NewNumConfigs;
00081   }
00082   NewConfig = Class->NumConfigs++;
00083   Config = NewBitVector (MaxNumProtos);
00084   Class->Configurations[NewConfig] = Config;
00085   zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos));
00086 
00087   return (NewConfig);
00088 }
00089 
00090 
00099 int AddProtoToClass(CLASS_TYPE Class) {
00100   int i;
00101   int Bit;
00102   int NewNumProtos;
00103   int NewProto;
00104   BIT_VECTOR Config;
00105 
00106   if (Class->NumProtos >= Class->MaxNumProtos) {
00107     /* add protos in PROTO_INCREMENT chunks at a time */
00108     NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) /
00109       PROTO_INCREMENT) * PROTO_INCREMENT);
00110 
00111     Class->Prototypes = (PROTO) Erealloc (Class->Prototypes,
00112       sizeof (PROTO_STRUCT) *
00113       NewNumProtos);
00114 
00115     Class->MaxNumProtos = NewNumProtos;
00116 
00117     for (i = 0; i < Class->NumConfigs; i++) {
00118       Config = Class->Configurations[i];
00119       Class->Configurations[i] = ExpandBitVector (Config, NewNumProtos);
00120 
00121       for (Bit = Class->NumProtos; Bit < NewNumProtos; Bit++)
00122         reset_bit(Config, Bit);
00123     }
00124   }
00125   NewProto = Class->NumProtos++;
00126   if (Class->NumProtos > MAX_NUM_PROTOS) {
00127     tprintf("Ouch! number of protos = %d, vs max of %d!",
00128             Class->NumProtos, MAX_NUM_PROTOS);
00129   }
00130   return (NewProto);
00131 }
00132 
00133 
00142 FLOAT32 ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) {
00143   inT16 Pid;
00144   FLOAT32 TotalLength = 0;
00145 
00146   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
00147     if (test_bit (Config, Pid)) {
00148 
00149       TotalLength += (ProtoIn (Class, Pid))->Length;
00150     }
00151   }
00152   return (TotalLength);
00153 }
00154 
00155 
00163 FLOAT32 ClassProtoLength(CLASS_TYPE Class) {
00164   inT16 Pid;
00165   FLOAT32 TotalLength = 0;
00166 
00167   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
00168     TotalLength += (ProtoIn (Class, Pid))->Length;
00169   }
00170   return (TotalLength);
00171 }
00172 
00173 
00182 void CopyProto(PROTO Src, PROTO Dest) {
00183   Dest->X = Src->X;
00184   Dest->Y = Src->Y;
00185   Dest->Length = Src->Length;
00186   Dest->Angle = Src->Angle;
00187   Dest->A = Src->A;
00188   Dest->B = Src->B;
00189   Dest->C = Src->C;
00190 }
00191 
00192 
00193 /**********************************************************************
00194  * FillABC
00195  *
00196  * Fill in Protos A, B, C fields based on the X, Y, Angle fields.
00197  **********************************************************************/
00198 void FillABC(PROTO Proto) {
00199   FLOAT32 Slope, Intercept, Normalizer;
00200 
00201   Slope = tan (Proto->Angle * 2.0 * PI);
00202   Intercept = Proto->Y - Slope * Proto->X;
00203   Normalizer = 1.0 / sqrt (Slope * Slope + 1.0);
00204   Proto->A = Slope * Normalizer;
00205   Proto->B = -Normalizer;
00206   Proto->C = Intercept * Normalizer;
00207 }
00208 
00209 
00210 /**********************************************************************
00211  * FreeClass
00212  *
00213  * Deallocate the memory consumed by the specified class.
00214  **********************************************************************/
00215 void FreeClass(CLASS_TYPE Class) {
00216   if (Class) {
00217     FreeClassFields(Class);
00218     delete Class;
00219   }
00220 }
00221 
00222 
00223 /**********************************************************************
00224  * FreeClassFields
00225  *
00226  * Deallocate the memory consumed by subfields of the specified class.
00227  **********************************************************************/
00228 void FreeClassFields(CLASS_TYPE Class) {
00229   int i;
00230 
00231   if (Class) {
00232     if (Class->MaxNumProtos > 0)
00233       memfree (Class->Prototypes);
00234     if (Class->MaxNumConfigs > 0) {
00235       for (i = 0; i < Class->NumConfigs; i++)
00236         FreeBitVector (Class->Configurations[i]);
00237       memfree (Class->Configurations);
00238     }
00239   }
00240 }
00241 
00242 /**********************************************************************
00243  * NewClass
00244  *
00245  * Allocate a new class with enough memory to hold the specified number
00246  * of prototypes and configurations.
00247  **********************************************************************/
00248 CLASS_TYPE NewClass(int NumProtos, int NumConfigs) {
00249   CLASS_TYPE Class;
00250 
00251   Class = new CLASS_STRUCT;
00252 
00253   if (NumProtos > 0)
00254     Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT));
00255 
00256   if (NumConfigs > 0)
00257     Class->Configurations = (CONFIGS) Emalloc (NumConfigs *
00258       sizeof (BIT_VECTOR));
00259   Class->MaxNumProtos = NumProtos;
00260   Class->MaxNumConfigs = NumConfigs;
00261   Class->NumProtos = 0;
00262   Class->NumConfigs = 0;
00263   return (Class);
00264 
00265 }
00266 
00267 
00268 /**********************************************************************
00269  * PrintProtos
00270  *
00271  * Print the list of prototypes in this class type.
00272  **********************************************************************/
00273 void PrintProtos(CLASS_TYPE Class) {
00274   inT16 Pid;
00275 
00276   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
00277     cprintf ("Proto %d:\t", Pid);
00278     PrintProto (ProtoIn (Class, Pid));
00279     cprintf ("\t");
00280     PrintProtoLine (ProtoIn (Class, Pid));
00281     new_line();
00282   }
00283 }
00284 
00285 
00286 namespace tesseract {
00293 void Classify::ReadClassFile() {
00294  FILE *File;
00295  char TextLine[CHARS_PER_LINE];
00296  char unichar[CHARS_PER_LINE];
00297 
00298  cprintf ("Reading training data from '%s' ...",
00299           static_cast<STRING>(classify_training_file).string());
00300  fflush(stdout);
00301 
00302  File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
00303  while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
00304 
00305    sscanf(TextLine, "%s", unichar);
00306    ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
00307    fgets(TextLine, CHARS_PER_LINE, File);
00308    fgets(TextLine, CHARS_PER_LINE, File);
00309  }
00310  fclose(File);
00311  new_line();
00312 }
00313 }  // namespace tesseract
00314 
00321 void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) {
00322   CLASS_TYPE Class;
00323 
00324   Class = &TrainingData[unichar_id];
00325 
00326   ReadProtos(File, Class);
00327 
00328   ReadConfigs(File, Class);
00329 }
00330 
00337 void ReadConfigs(register FILE *File, CLASS_TYPE Class) {
00338   inT16 Cid;
00339   register inT16 Wid;
00340   register BIT_VECTOR ThisConfig;
00341   int NumWords;
00342   int NumConfigs;
00343 
00344   fscanf (File, "%d %d\n", &NumConfigs, &NumWords);
00345   Class->NumConfigs = NumConfigs;
00346   Class->MaxNumConfigs = NumConfigs;
00347   Class->Configurations =
00348     (CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs);
00349   NumWords = WordsInVectorOfSize (Class->NumProtos);
00350 
00351   for (Cid = 0; Cid < NumConfigs; Cid++) {
00352 
00353     ThisConfig = NewBitVector (Class->NumProtos);
00354     for (Wid = 0; Wid < NumWords; Wid++)
00355       fscanf (File, "%x", &ThisConfig[Wid]);
00356     Class->Configurations[Cid] = ThisConfig;
00357   }
00358 }
00359 
00360 
00367 void ReadProtos(register FILE *File, CLASS_TYPE Class) {
00368   register inT16 Pid;
00369   register PROTO Proto;
00370   int NumProtos;
00371 
00372   fscanf (File, "%d\n", &NumProtos);
00373   Class->NumProtos = NumProtos;
00374   Class->MaxNumProtos = NumProtos;
00375   Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);
00376 
00377   for (Pid = 0; Pid < NumProtos; Pid++) {
00378     Proto = ProtoIn (Class, Pid);
00379     fscanf (File, "%f %f %f %f %f %f %f\n",
00380       &Proto->X,
00381       &Proto->Y,
00382       &Proto->Length,
00383       &Proto->Angle,
00384       &Proto->A,
00385       &Proto->B, &Proto->C);
00386   }
00387 }
00388 
00389 
00399 int SplitProto(CLASS_TYPE Class, int OldPid) {
00400   int i;
00401   int NewPid;
00402   BIT_VECTOR Config;
00403 
00404   NewPid = AddProtoToClass (Class);
00405 
00406   for (i = 0; i < Class->NumConfigs; i++) {
00407     Config = Class->Configurations[i];
00408     if (test_bit (Config, OldPid))
00409       SET_BIT(Config, NewPid);
00410   }
00411   return (NewPid);
00412 }
00413 
00414 
00425 void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) {
00426   int Cid, Pid;
00427   BIT_VECTOR Config;
00428 
00429   fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos);
00430 
00431   for (Cid = 0; Cid < Class->NumConfigs; Cid++) {
00432     fprintf (File, "1 ");
00433 
00434     Config = Class->Configurations[Cid];
00435 
00436     for (Pid = 0; Pid < Class->NumProtos; Pid++) {
00437       if (test_bit (Config, Pid))
00438         fprintf (File, "1");
00439       else
00440         fprintf (File, "0");
00441     }
00442     fprintf (File, "\n");
00443   }
00444 }
00445 
00446 
00457 void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
00458   int Pid;
00459   PROTO Proto;
00460 
00461   /* print old header */
00462   fprintf (File, "6\n");
00463   fprintf (File, "linear   essential      -0.500000   0.500000\n");
00464   fprintf (File, "linear   essential      -0.250000   0.750000\n");
00465   fprintf (File, "linear   essential       0.000000   1.000000\n");
00466   fprintf (File, "circular essential       0.000000   1.000000\n");
00467   fprintf (File, "linear   non-essential  -0.500000   0.500000\n");
00468   fprintf (File, "linear   non-essential  -0.500000   0.500000\n");
00469 
00470   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
00471     Proto = ProtoIn (Class, Pid);
00472 
00473     fprintf (File, "significant   elliptical   1\n");
00474     fprintf (File, "     %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
00475       Proto->X, Proto->Y,
00476       Proto->Length, Proto->Angle, 0.0, 0.0);
00477     fprintf (File, "     %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
00478       0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
00479   }
00480 }