Tesseract
3.02
|
00001 /****************************************************************************** 00002 ** Filename: adaptive.c 00003 ** Purpose: Adaptive matcher. 00004 ** Author: Dan Johnson 00005 ** History: Fri Mar 8 10:00:21 1991, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 00019 /*---------------------------------------------------------------------------- 00020 Include Files and Type Defines 00021 ----------------------------------------------------------------------------*/ 00022 #include "adaptive.h" 00023 #include "emalloc.h" 00024 #include "freelist.h" 00025 #include "globals.h" 00026 #include "classify.h" 00027 00028 #ifdef __UNIX__ 00029 #include <assert.h> 00030 #endif 00031 #include <stdio.h> 00032 00033 /*---------------------------------------------------------------------------- 00034 Public Code 00035 ----------------------------------------------------------------------------*/ 00036 /*---------------------------------------------------------------------------*/ 00049 void AddAdaptedClass(ADAPT_TEMPLATES Templates, 00050 ADAPT_CLASS Class, 00051 CLASS_ID ClassId) { 00052 INT_CLASS IntClass; 00053 00054 assert (Templates != NULL); 00055 assert (Class != NULL); 00056 assert (LegalClassId (ClassId)); 00057 assert (UnusedClassIdIn (Templates->Templates, ClassId)); 00058 assert (Class->NumPermConfigs == 0); 00059 00060 IntClass = NewIntClass (1, 1); 00061 AddIntClass (Templates->Templates, ClassId, IntClass); 00062 00063 assert (Templates->Class[ClassId] == NULL); 00064 Templates->Class[ClassId] = Class; 00065 00066 } /* AddAdaptedClass */ 00067 00068 00069 /*---------------------------------------------------------------------------*/ 00080 void FreeTempConfig(TEMP_CONFIG Config) { 00081 assert (Config != NULL); 00082 00083 destroy_nodes (Config->ContextsSeen, memfree); 00084 FreeBitVector (Config->Protos); 00085 free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT"); 00086 00087 } /* FreeTempConfig */ 00088 00089 /*---------------------------------------------------------------------------*/ 00090 void FreeTempProto(void *arg) { 00091 PROTO proto = (PROTO) arg; 00092 00093 free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"); 00094 } 00095 00096 void FreePermConfig(PERM_CONFIG Config) { 00097 assert(Config != NULL); 00098 Efree(Config->Ambigs); 00099 free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT"); 00100 } 00101 00102 /*---------------------------------------------------------------------------*/ 00113 ADAPT_CLASS NewAdaptedClass() { 00114 ADAPT_CLASS Class; 00115 int i; 00116 00117 Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT)); 00118 Class->NumPermConfigs = 0; 00119 Class->MaxNumTimesSeen = 0; 00120 Class->TempProtos = NIL_LIST; 00121 00122 Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); 00123 Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); 00124 zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS)); 00125 zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS)); 00126 00127 for (i = 0; i < MAX_NUM_CONFIGS; i++) 00128 TempConfigFor (Class, i) = NULL; 00129 00130 return (Class); 00131 00132 } /* NewAdaptedClass */ 00133 00134 00135 /*-------------------------------------------------------------------------*/ 00136 void free_adapted_class(ADAPT_CLASS adapt_class) { 00137 int i; 00138 00139 for (i = 0; i < MAX_NUM_CONFIGS; i++) { 00140 if (ConfigIsPermanent (adapt_class, i) 00141 && PermConfigFor (adapt_class, i) != NULL) 00142 FreePermConfig (PermConfigFor (adapt_class, i)); 00143 else if (!ConfigIsPermanent (adapt_class, i) 00144 && TempConfigFor (adapt_class, i) != NULL) 00145 FreeTempConfig (TempConfigFor (adapt_class, i)); 00146 } 00147 FreeBitVector (adapt_class->PermProtos); 00148 FreeBitVector (adapt_class->PermConfigs); 00149 destroy_nodes (adapt_class->TempProtos, FreeTempProto); 00150 Efree(adapt_class); 00151 } 00152 00153 00154 /*---------------------------------------------------------------------------*/ 00155 namespace tesseract { 00167 ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) { 00168 ADAPT_TEMPLATES Templates; 00169 int i; 00170 00171 Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)); 00172 00173 Templates->Templates = NewIntTemplates (); 00174 Templates->NumPermClasses = 0; 00175 Templates->NumNonEmptyClasses = 0; 00176 00177 /* Insert an empty class for each unichar id in unicharset */ 00178 for (i = 0; i < MAX_NUM_CLASSES; i++) { 00179 Templates->Class[i] = NULL; 00180 if (InitFromUnicharset && i < unicharset.size()) { 00181 AddAdaptedClass(Templates, NewAdaptedClass(), i); 00182 } 00183 } 00184 00185 return (Templates); 00186 00187 } /* NewAdaptedTemplates */ 00188 00189 // Returns FontinfoId of the given config of the given adapted class. 00190 int Classify::GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId) { 00191 return (ConfigIsPermanent(Class, ConfigId) ? 00192 PermConfigFor(Class, ConfigId)->FontinfoId : 00193 TempConfigFor(Class, ConfigId)->FontinfoId); 00194 } 00195 00196 } // namespace tesseract 00197 00198 /*----------------------------------------------------------------------------*/ 00199 void free_adapted_templates(ADAPT_TEMPLATES templates) { 00200 00201 if (templates != NULL) { 00202 int i; 00203 for (i = 0; i < (templates->Templates)->NumClasses; i++) 00204 free_adapted_class (templates->Class[i]); 00205 free_int_templates (templates->Templates); 00206 Efree(templates); 00207 } 00208 } 00209 00210 00211 /*---------------------------------------------------------------------------*/ 00223 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) { 00224 TEMP_CONFIG Config; 00225 int NumProtos = MaxProtoId + 1; 00226 00227 Config = 00228 (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT), 00229 "TEMP_CONFIG_STRUCT"); 00230 Config->Protos = NewBitVector (NumProtos); 00231 00232 Config->NumTimesSeen = 1; 00233 Config->MaxProtoId = MaxProtoId; 00234 Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos); 00235 Config->ContextsSeen = NIL_LIST; 00236 zero_all_bits (Config->Protos, Config->ProtoVectorSize); 00237 Config->FontinfoId = FontinfoId; 00238 00239 return (Config); 00240 00241 } /* NewTempConfig */ 00242 00243 00244 /*---------------------------------------------------------------------------*/ 00254 TEMP_PROTO NewTempProto() { 00255 return ((TEMP_PROTO) 00256 alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT")); 00257 } /* NewTempProto */ 00258 00259 00260 /*---------------------------------------------------------------------------*/ 00261 namespace tesseract { 00273 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { 00274 int i; 00275 INT_CLASS IClass; 00276 ADAPT_CLASS AClass; 00277 00278 #ifndef SECURE_NAMES 00279 fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); 00280 fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n", 00281 Templates->NumNonEmptyClasses, Templates->NumPermClasses); 00282 fprintf (File, " Id NC NPC NP NPP\n"); 00283 fprintf (File, "------------------------\n"); 00284 00285 for (i = 0; i < (Templates->Templates)->NumClasses; i++) { 00286 IClass = Templates->Templates->Class[i]; 00287 AClass = Templates->Class[i]; 00288 if (!IsEmptyAdaptedClass (AClass)) { 00289 fprintf (File, "%5d %s %3d %3d %3d %3d\n", 00290 i, unicharset.id_to_unichar(i), 00291 IClass->NumConfigs, AClass->NumPermConfigs, 00292 IClass->NumProtos, 00293 IClass->NumProtos - count (AClass->TempProtos)); 00294 } 00295 } 00296 #endif 00297 fprintf (File, "\n"); 00298 00299 } /* PrintAdaptedTemplates */ 00300 } // namespace tesseract 00301 00302 00303 /*---------------------------------------------------------------------------*/ 00315 ADAPT_CLASS ReadAdaptedClass(FILE *File) { 00316 int NumTempProtos; 00317 int NumConfigs; 00318 int i; 00319 ADAPT_CLASS Class; 00320 TEMP_PROTO TempProto; 00321 00322 /* first read high level adapted class structure */ 00323 Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT)); 00324 fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File); 00325 00326 /* then read in the definitions of the permanent protos and configs */ 00327 Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); 00328 Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); 00329 fread ((char *) Class->PermProtos, sizeof (uinT32), 00330 WordsInVectorOfSize (MAX_NUM_PROTOS), File); 00331 fread ((char *) Class->PermConfigs, sizeof (uinT32), 00332 WordsInVectorOfSize (MAX_NUM_CONFIGS), File); 00333 00334 /* then read in the list of temporary protos */ 00335 fread ((char *) &NumTempProtos, sizeof (int), 1, File); 00336 Class->TempProtos = NIL_LIST; 00337 for (i = 0; i < NumTempProtos; i++) { 00338 TempProto = 00339 (TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT), 00340 "TEMP_PROTO_STRUCT"); 00341 fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File); 00342 Class->TempProtos = push_last (Class->TempProtos, TempProto); 00343 } 00344 00345 /* then read in the adapted configs */ 00346 fread ((char *) &NumConfigs, sizeof (int), 1, File); 00347 for (i = 0; i < NumConfigs; i++) 00348 if (test_bit (Class->PermConfigs, i)) 00349 Class->Config[i].Perm = ReadPermConfig (File); 00350 else 00351 Class->Config[i].Temp = ReadTempConfig (File); 00352 00353 return (Class); 00354 00355 } /* ReadAdaptedClass */ 00356 00357 00358 /*---------------------------------------------------------------------------*/ 00359 namespace tesseract { 00371 ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) { 00372 int i; 00373 ADAPT_TEMPLATES Templates; 00374 00375 /* first read the high level adaptive template struct */ 00376 Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)); 00377 fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File); 00378 00379 /* then read in the basic integer templates */ 00380 Templates->Templates = ReadIntTemplates (File); 00381 00382 /* then read in the adaptive info for each class */ 00383 for (i = 0; i < (Templates->Templates)->NumClasses; i++) { 00384 Templates->Class[i] = ReadAdaptedClass (File); 00385 } 00386 return (Templates); 00387 00388 } /* ReadAdaptedTemplates */ 00389 } // namespace tesseract 00390 00391 00392 /*---------------------------------------------------------------------------*/ 00404 PERM_CONFIG ReadPermConfig(FILE *File) { 00405 PERM_CONFIG Config = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT), 00406 "PERM_CONFIG_STRUCT"); 00407 uinT8 NumAmbigs; 00408 fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File); 00409 Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1)); 00410 fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); 00411 Config->Ambigs[NumAmbigs] = -1; 00412 fread(&(Config->FontinfoId), sizeof(int), 1, File); 00413 00414 return (Config); 00415 00416 } /* ReadPermConfig */ 00417 00418 00419 /*---------------------------------------------------------------------------*/ 00431 TEMP_CONFIG ReadTempConfig(FILE *File) { 00432 TEMP_CONFIG Config; 00433 00434 Config = 00435 (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT), 00436 "TEMP_CONFIG_STRUCT"); 00437 fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File); 00438 00439 Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG); 00440 fread ((char *) Config->Protos, sizeof (uinT32), 00441 Config->ProtoVectorSize, File); 00442 00443 return (Config); 00444 00445 } /* ReadTempConfig */ 00446 00447 00448 /*---------------------------------------------------------------------------*/ 00461 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) { 00462 int NumTempProtos; 00463 LIST TempProtos; 00464 int i; 00465 00466 /* first write high level adapted class structure */ 00467 fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File); 00468 00469 /* then write out the definitions of the permanent protos and configs */ 00470 fwrite ((char *) Class->PermProtos, sizeof (uinT32), 00471 WordsInVectorOfSize (MAX_NUM_PROTOS), File); 00472 fwrite ((char *) Class->PermConfigs, sizeof (uinT32), 00473 WordsInVectorOfSize (MAX_NUM_CONFIGS), File); 00474 00475 /* then write out the list of temporary protos */ 00476 NumTempProtos = count (Class->TempProtos); 00477 fwrite ((char *) &NumTempProtos, sizeof (int), 1, File); 00478 TempProtos = Class->TempProtos; 00479 iterate (TempProtos) { 00480 void* proto = first_node(TempProtos); 00481 fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File); 00482 } 00483 00484 /* then write out the adapted configs */ 00485 fwrite ((char *) &NumConfigs, sizeof (int), 1, File); 00486 for (i = 0; i < NumConfigs; i++) 00487 if (test_bit (Class->PermConfigs, i)) 00488 WritePermConfig (File, Class->Config[i].Perm); 00489 else 00490 WriteTempConfig (File, Class->Config[i].Temp); 00491 00492 } /* WriteAdaptedClass */ 00493 00494 00495 /*---------------------------------------------------------------------------*/ 00496 namespace tesseract { 00507 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { 00508 int i; 00509 00510 /* first write the high level adaptive template struct */ 00511 fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File); 00512 00513 /* then write out the basic integer templates */ 00514 WriteIntTemplates (File, Templates->Templates, unicharset); 00515 00516 /* then write out the adaptive info for each class */ 00517 for (i = 0; i < (Templates->Templates)->NumClasses; i++) { 00518 WriteAdaptedClass (File, Templates->Class[i], 00519 Templates->Templates->Class[i]->NumConfigs); 00520 } 00521 } /* WriteAdaptedTemplates */ 00522 } // namespace tesseract 00523 00524 00525 /*---------------------------------------------------------------------------*/ 00537 void WritePermConfig(FILE *File, PERM_CONFIG Config) { 00538 uinT8 NumAmbigs = 0; 00539 00540 assert (Config != NULL); 00541 while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs; 00542 00543 fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File); 00544 fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); 00545 fwrite(&(Config->FontinfoId), sizeof(int), 1, File); 00546 } /* WritePermConfig */ 00547 00548 00549 /*---------------------------------------------------------------------------*/ 00561 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) { 00562 assert (Config != NULL); 00563 /* contexts not yet implemented */ 00564 assert (Config->ContextsSeen == NULL); 00565 00566 fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File); 00567 fwrite ((char *) Config->Protos, sizeof (uinT32), 00568 Config->ProtoVectorSize, File); 00569 00570 } /* WriteTempConfig */