Tesseract  3.02
tesseract-ocr/classify/adaptive.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  ** Filename:    adaptive.c
00003  ** Purpose:     Adaptive matcher.
00004  ** Author:      Dan Johnson
00005  ** History:     Fri Mar  8 10:00:21 1991, DSJ, Created.
00006  **
00007  ** (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 
00019 /*----------------------------------------------------------------------------
00020           Include Files and Type Defines
00021 ----------------------------------------------------------------------------*/
00022 #include "adaptive.h"
00023 #include "emalloc.h"
00024 #include "freelist.h"
00025 #include "globals.h"
00026 #include "classify.h"
00027 
00028 #ifdef __UNIX__
00029 #include <assert.h>
00030 #endif
00031 #include <stdio.h>
00032 
00033 /*----------------------------------------------------------------------------
00034               Public Code
00035 ----------------------------------------------------------------------------*/
00036 /*---------------------------------------------------------------------------*/
00049 void AddAdaptedClass(ADAPT_TEMPLATES Templates,
00050                      ADAPT_CLASS Class,
00051                      CLASS_ID ClassId) {
00052   INT_CLASS IntClass;
00053 
00054   assert (Templates != NULL);
00055   assert (Class != NULL);
00056   assert (LegalClassId (ClassId));
00057   assert (UnusedClassIdIn (Templates->Templates, ClassId));
00058   assert (Class->NumPermConfigs == 0);
00059 
00060   IntClass = NewIntClass (1, 1);
00061   AddIntClass (Templates->Templates, ClassId, IntClass);
00062 
00063   assert (Templates->Class[ClassId] == NULL);
00064   Templates->Class[ClassId] = Class;
00065 
00066 }                                /* AddAdaptedClass */
00067 
00068 
00069 /*---------------------------------------------------------------------------*/
00080 void FreeTempConfig(TEMP_CONFIG Config) {
00081   assert (Config != NULL);
00082 
00083   destroy_nodes (Config->ContextsSeen, memfree);
00084   FreeBitVector (Config->Protos);
00085   free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT");
00086 
00087 }                                /* FreeTempConfig */
00088 
00089 /*---------------------------------------------------------------------------*/
00090 void FreeTempProto(void *arg) {
00091   PROTO proto = (PROTO) arg;
00092 
00093   free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
00094 }
00095 
00096 void FreePermConfig(PERM_CONFIG Config) {
00097   assert(Config != NULL);
00098   Efree(Config->Ambigs);
00099   free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
00100 }
00101 
00102 /*---------------------------------------------------------------------------*/
00113 ADAPT_CLASS NewAdaptedClass() {
00114   ADAPT_CLASS Class;
00115   int i;
00116 
00117   Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
00118   Class->NumPermConfigs = 0;
00119   Class->MaxNumTimesSeen = 0;
00120   Class->TempProtos = NIL_LIST;
00121 
00122   Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
00123   Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
00124   zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
00125   zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
00126 
00127   for (i = 0; i < MAX_NUM_CONFIGS; i++)
00128     TempConfigFor (Class, i) = NULL;
00129 
00130   return (Class);
00131 
00132 }                                /* NewAdaptedClass */
00133 
00134 
00135 /*-------------------------------------------------------------------------*/
00136 void free_adapted_class(ADAPT_CLASS adapt_class) {
00137   int i;
00138 
00139   for (i = 0; i < MAX_NUM_CONFIGS; i++) {
00140     if (ConfigIsPermanent (adapt_class, i)
00141       && PermConfigFor (adapt_class, i) != NULL)
00142       FreePermConfig (PermConfigFor (adapt_class, i));
00143     else if (!ConfigIsPermanent (adapt_class, i)
00144       && TempConfigFor (adapt_class, i) != NULL)
00145       FreeTempConfig (TempConfigFor (adapt_class, i));
00146   }
00147   FreeBitVector (adapt_class->PermProtos);
00148   FreeBitVector (adapt_class->PermConfigs);
00149   destroy_nodes (adapt_class->TempProtos, FreeTempProto);
00150   Efree(adapt_class);
00151 }
00152 
00153 
00154 /*---------------------------------------------------------------------------*/
00155 namespace tesseract {
00167 ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) {
00168   ADAPT_TEMPLATES Templates;
00169   int i;
00170 
00171   Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
00172 
00173   Templates->Templates = NewIntTemplates ();
00174   Templates->NumPermClasses = 0;
00175   Templates->NumNonEmptyClasses = 0;
00176 
00177   /* Insert an empty class for each unichar id in unicharset */
00178   for (i = 0; i < MAX_NUM_CLASSES; i++) {
00179     Templates->Class[i] = NULL;
00180     if (InitFromUnicharset && i < unicharset.size()) {
00181       AddAdaptedClass(Templates, NewAdaptedClass(), i);
00182     }
00183   }
00184 
00185   return (Templates);
00186 
00187 }                                /* NewAdaptedTemplates */
00188 
00189 // Returns FontinfoId of the given config of the given adapted class.
00190 int Classify::GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId) {
00191   return (ConfigIsPermanent(Class, ConfigId) ?
00192       PermConfigFor(Class, ConfigId)->FontinfoId :
00193       TempConfigFor(Class, ConfigId)->FontinfoId);
00194 }
00195 
00196 }  // namespace tesseract
00197 
00198 /*----------------------------------------------------------------------------*/
00199 void free_adapted_templates(ADAPT_TEMPLATES templates) {
00200 
00201   if (templates != NULL) {
00202     int i;
00203     for (i = 0; i < (templates->Templates)->NumClasses; i++)
00204       free_adapted_class (templates->Class[i]);
00205     free_int_templates (templates->Templates);
00206     Efree(templates);
00207   }
00208 }
00209 
00210 
00211 /*---------------------------------------------------------------------------*/
00223 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
00224   TEMP_CONFIG Config;
00225   int NumProtos = MaxProtoId + 1;
00226 
00227   Config =
00228     (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
00229     "TEMP_CONFIG_STRUCT");
00230   Config->Protos = NewBitVector (NumProtos);
00231 
00232   Config->NumTimesSeen = 1;
00233   Config->MaxProtoId = MaxProtoId;
00234   Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
00235   Config->ContextsSeen = NIL_LIST;
00236   zero_all_bits (Config->Protos, Config->ProtoVectorSize);
00237   Config->FontinfoId = FontinfoId;
00238 
00239   return (Config);
00240 
00241 }                                /* NewTempConfig */
00242 
00243 
00244 /*---------------------------------------------------------------------------*/
00254 TEMP_PROTO NewTempProto() {
00255   return ((TEMP_PROTO)
00256     alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"));
00257 }                                /* NewTempProto */
00258 
00259 
00260 /*---------------------------------------------------------------------------*/
00261 namespace tesseract {
00273 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
00274   int i;
00275   INT_CLASS IClass;
00276   ADAPT_CLASS AClass;
00277 
00278   #ifndef SECURE_NAMES
00279   fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
00280   fprintf (File, "Num classes = %d;  Num permanent classes = %d\n\n",
00281            Templates->NumNonEmptyClasses, Templates->NumPermClasses);
00282   fprintf (File, "   Id  NC NPC  NP NPP\n");
00283   fprintf (File, "------------------------\n");
00284 
00285   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
00286     IClass = Templates->Templates->Class[i];
00287     AClass = Templates->Class[i];
00288     if (!IsEmptyAdaptedClass (AClass)) {
00289       fprintf (File, "%5d  %s %3d %3d %3d %3d\n",
00290         i, unicharset.id_to_unichar(i),
00291       IClass->NumConfigs, AClass->NumPermConfigs,
00292       IClass->NumProtos,
00293       IClass->NumProtos - count (AClass->TempProtos));
00294     }
00295   }
00296   #endif
00297   fprintf (File, "\n");
00298 
00299 }                                /* PrintAdaptedTemplates */
00300 }  // namespace tesseract
00301 
00302 
00303 /*---------------------------------------------------------------------------*/
00315 ADAPT_CLASS ReadAdaptedClass(FILE *File) {
00316   int NumTempProtos;
00317   int NumConfigs;
00318   int i;
00319   ADAPT_CLASS Class;
00320   TEMP_PROTO TempProto;
00321 
00322   /* first read high level adapted class structure */
00323   Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
00324   fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
00325 
00326   /* then read in the definitions of the permanent protos and configs */
00327   Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
00328   Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
00329   fread ((char *) Class->PermProtos, sizeof (uinT32),
00330     WordsInVectorOfSize (MAX_NUM_PROTOS), File);
00331   fread ((char *) Class->PermConfigs, sizeof (uinT32),
00332     WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
00333 
00334   /* then read in the list of temporary protos */
00335   fread ((char *) &NumTempProtos, sizeof (int), 1, File);
00336   Class->TempProtos = NIL_LIST;
00337   for (i = 0; i < NumTempProtos; i++) {
00338     TempProto =
00339       (TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT),
00340       "TEMP_PROTO_STRUCT");
00341     fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
00342     Class->TempProtos = push_last (Class->TempProtos, TempProto);
00343   }
00344 
00345   /* then read in the adapted configs */
00346   fread ((char *) &NumConfigs, sizeof (int), 1, File);
00347   for (i = 0; i < NumConfigs; i++)
00348     if (test_bit (Class->PermConfigs, i))
00349       Class->Config[i].Perm = ReadPermConfig (File);
00350     else
00351       Class->Config[i].Temp = ReadTempConfig (File);
00352 
00353   return (Class);
00354 
00355 }                                /* ReadAdaptedClass */
00356 
00357 
00358 /*---------------------------------------------------------------------------*/
00359 namespace tesseract {
00371 ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) {
00372   int i;
00373   ADAPT_TEMPLATES Templates;
00374 
00375   /* first read the high level adaptive template struct */
00376   Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
00377   fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
00378 
00379   /* then read in the basic integer templates */
00380   Templates->Templates = ReadIntTemplates (File);
00381 
00382   /* then read in the adaptive info for each class */
00383   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
00384     Templates->Class[i] = ReadAdaptedClass (File);
00385   }
00386   return (Templates);
00387 
00388 }                                /* ReadAdaptedTemplates */
00389 }  // namespace tesseract
00390 
00391 
00392 /*---------------------------------------------------------------------------*/
00404 PERM_CONFIG ReadPermConfig(FILE *File) {
00405   PERM_CONFIG Config = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT),
00406                                                   "PERM_CONFIG_STRUCT");
00407   uinT8 NumAmbigs;
00408   fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
00409   Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1));
00410   fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
00411   Config->Ambigs[NumAmbigs] = -1;
00412   fread(&(Config->FontinfoId), sizeof(int), 1, File);
00413 
00414   return (Config);
00415 
00416 }                                /* ReadPermConfig */
00417 
00418 
00419 /*---------------------------------------------------------------------------*/
00431 TEMP_CONFIG ReadTempConfig(FILE *File) {
00432   TEMP_CONFIG Config;
00433 
00434   Config =
00435     (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
00436     "TEMP_CONFIG_STRUCT");
00437   fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
00438 
00439   Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
00440   fread ((char *) Config->Protos, sizeof (uinT32),
00441     Config->ProtoVectorSize, File);
00442 
00443   return (Config);
00444 
00445 }                                /* ReadTempConfig */
00446 
00447 
00448 /*---------------------------------------------------------------------------*/
00461 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
00462   int NumTempProtos;
00463   LIST TempProtos;
00464   int i;
00465 
00466   /* first write high level adapted class structure */
00467   fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
00468 
00469   /* then write out the definitions of the permanent protos and configs */
00470   fwrite ((char *) Class->PermProtos, sizeof (uinT32),
00471     WordsInVectorOfSize (MAX_NUM_PROTOS), File);
00472   fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
00473     WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
00474 
00475   /* then write out the list of temporary protos */
00476   NumTempProtos = count (Class->TempProtos);
00477   fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
00478   TempProtos = Class->TempProtos;
00479   iterate (TempProtos) {
00480     void* proto = first_node(TempProtos);
00481     fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
00482   }
00483 
00484   /* then write out the adapted configs */
00485   fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
00486   for (i = 0; i < NumConfigs; i++)
00487     if (test_bit (Class->PermConfigs, i))
00488       WritePermConfig (File, Class->Config[i].Perm);
00489     else
00490       WriteTempConfig (File, Class->Config[i].Temp);
00491 
00492 }                                /* WriteAdaptedClass */
00493 
00494 
00495 /*---------------------------------------------------------------------------*/
00496 namespace tesseract {
00507 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
00508   int i;
00509 
00510   /* first write the high level adaptive template struct */
00511   fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
00512 
00513   /* then write out the basic integer templates */
00514   WriteIntTemplates (File, Templates->Templates, unicharset);
00515 
00516   /* then write out the adaptive info for each class */
00517   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
00518     WriteAdaptedClass (File, Templates->Class[i],
00519       Templates->Templates->Class[i]->NumConfigs);
00520   }
00521 }                                /* WriteAdaptedTemplates */
00522 }  // namespace tesseract
00523 
00524 
00525 /*---------------------------------------------------------------------------*/
00537 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
00538   uinT8 NumAmbigs = 0;
00539 
00540   assert (Config != NULL);
00541   while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
00542 
00543   fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
00544   fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
00545   fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
00546 }                                /* WritePermConfig */
00547 
00548 
00549 /*---------------------------------------------------------------------------*/
00561 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
00562   assert (Config != NULL);
00563                                  /* contexts not yet implemented */
00564   assert (Config->ContextsSeen == NULL);
00565 
00566   fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
00567   fwrite ((char *) Config->Protos, sizeof (uinT32),
00568     Config->ProtoVectorSize, File);
00569 
00570 }                                /* WriteTempConfig */