Tesseract
3.02
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: protos.c (Formerly protos.c) 00005 * Description: 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 16 14:37:00 1987 00008 * Modified: Mon Mar 4 14:51:24 1991 (Dan Johnson) danj@hpgrlj 00009 * Language: C 00010 * Package: N/A 00011 * Status: Reusable Software Component 00012 * 00013 * (c) Copyright 1987, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 /*---------------------------------------------------------------------- 00026 I n c l u d e s 00027 ----------------------------------------------------------------------*/ 00028 #include "protos.h" 00029 #include "const.h" 00030 #include "emalloc.h" 00031 #include "freelist.h" 00032 #include "callcpp.h" 00033 #include "tprintf.h" 00034 #include "scanutils.h" 00035 #include "globals.h" 00036 #include "classify.h" 00037 #include "params.h" 00038 00039 #include <stdio.h> 00040 #include <math.h> 00041 00042 #define PROTO_INCREMENT 32 00043 #define CONFIG_INCREMENT 16 00044 00045 /*---------------------------------------------------------------------- 00046 V a r i a b l e s 00047 ----------------------------------------------------------------------*/ 00048 CLASS_STRUCT TrainingData[NUMBER_OF_CLASSES]; 00049 00050 STRING_VAR(classify_training_file, "MicroFeatures", "Training file"); 00051 00052 /*---------------------------------------------------------------------- 00053 F u n c t i o n s 00054 ----------------------------------------------------------------------*/ 00063 int AddConfigToClass(CLASS_TYPE Class) { 00064 int NewNumConfigs; 00065 int NewConfig; 00066 int MaxNumProtos; 00067 BIT_VECTOR Config; 00068 00069 MaxNumProtos = Class->MaxNumProtos; 00070 00071 if (Class->NumConfigs >= Class->MaxNumConfigs) { 00072 /* add configs in CONFIG_INCREMENT chunks at a time */ 00073 NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) / 00074 CONFIG_INCREMENT) * CONFIG_INCREMENT); 00075 00076 Class->Configurations = 00077 (CONFIGS) Erealloc (Class->Configurations, 00078 sizeof (BIT_VECTOR) * NewNumConfigs); 00079 00080 Class->MaxNumConfigs = NewNumConfigs; 00081 } 00082 NewConfig = Class->NumConfigs++; 00083 Config = NewBitVector (MaxNumProtos); 00084 Class->Configurations[NewConfig] = Config; 00085 zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos)); 00086 00087 return (NewConfig); 00088 } 00089 00090 00099 int AddProtoToClass(CLASS_TYPE Class) { 00100 int i; 00101 int Bit; 00102 int NewNumProtos; 00103 int NewProto; 00104 BIT_VECTOR Config; 00105 00106 if (Class->NumProtos >= Class->MaxNumProtos) { 00107 /* add protos in PROTO_INCREMENT chunks at a time */ 00108 NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) / 00109 PROTO_INCREMENT) * PROTO_INCREMENT); 00110 00111 Class->Prototypes = (PROTO) Erealloc (Class->Prototypes, 00112 sizeof (PROTO_STRUCT) * 00113 NewNumProtos); 00114 00115 Class->MaxNumProtos = NewNumProtos; 00116 00117 for (i = 0; i < Class->NumConfigs; i++) { 00118 Config = Class->Configurations[i]; 00119 Class->Configurations[i] = ExpandBitVector (Config, NewNumProtos); 00120 00121 for (Bit = Class->NumProtos; Bit < NewNumProtos; Bit++) 00122 reset_bit(Config, Bit); 00123 } 00124 } 00125 NewProto = Class->NumProtos++; 00126 if (Class->NumProtos > MAX_NUM_PROTOS) { 00127 tprintf("Ouch! number of protos = %d, vs max of %d!", 00128 Class->NumProtos, MAX_NUM_PROTOS); 00129 } 00130 return (NewProto); 00131 } 00132 00133 00142 FLOAT32 ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) { 00143 inT16 Pid; 00144 FLOAT32 TotalLength = 0; 00145 00146 for (Pid = 0; Pid < Class->NumProtos; Pid++) { 00147 if (test_bit (Config, Pid)) { 00148 00149 TotalLength += (ProtoIn (Class, Pid))->Length; 00150 } 00151 } 00152 return (TotalLength); 00153 } 00154 00155 00163 FLOAT32 ClassProtoLength(CLASS_TYPE Class) { 00164 inT16 Pid; 00165 FLOAT32 TotalLength = 0; 00166 00167 for (Pid = 0; Pid < Class->NumProtos; Pid++) { 00168 TotalLength += (ProtoIn (Class, Pid))->Length; 00169 } 00170 return (TotalLength); 00171 } 00172 00173 00182 void CopyProto(PROTO Src, PROTO Dest) { 00183 Dest->X = Src->X; 00184 Dest->Y = Src->Y; 00185 Dest->Length = Src->Length; 00186 Dest->Angle = Src->Angle; 00187 Dest->A = Src->A; 00188 Dest->B = Src->B; 00189 Dest->C = Src->C; 00190 } 00191 00192 00193 /********************************************************************** 00194 * FillABC 00195 * 00196 * Fill in Protos A, B, C fields based on the X, Y, Angle fields. 00197 **********************************************************************/ 00198 void FillABC(PROTO Proto) { 00199 FLOAT32 Slope, Intercept, Normalizer; 00200 00201 Slope = tan (Proto->Angle * 2.0 * PI); 00202 Intercept = Proto->Y - Slope * Proto->X; 00203 Normalizer = 1.0 / sqrt (Slope * Slope + 1.0); 00204 Proto->A = Slope * Normalizer; 00205 Proto->B = -Normalizer; 00206 Proto->C = Intercept * Normalizer; 00207 } 00208 00209 00210 /********************************************************************** 00211 * FreeClass 00212 * 00213 * Deallocate the memory consumed by the specified class. 00214 **********************************************************************/ 00215 void FreeClass(CLASS_TYPE Class) { 00216 if (Class) { 00217 FreeClassFields(Class); 00218 delete Class; 00219 } 00220 } 00221 00222 00223 /********************************************************************** 00224 * FreeClassFields 00225 * 00226 * Deallocate the memory consumed by subfields of the specified class. 00227 **********************************************************************/ 00228 void FreeClassFields(CLASS_TYPE Class) { 00229 int i; 00230 00231 if (Class) { 00232 if (Class->MaxNumProtos > 0) 00233 memfree (Class->Prototypes); 00234 if (Class->MaxNumConfigs > 0) { 00235 for (i = 0; i < Class->NumConfigs; i++) 00236 FreeBitVector (Class->Configurations[i]); 00237 memfree (Class->Configurations); 00238 } 00239 } 00240 } 00241 00242 /********************************************************************** 00243 * NewClass 00244 * 00245 * Allocate a new class with enough memory to hold the specified number 00246 * of prototypes and configurations. 00247 **********************************************************************/ 00248 CLASS_TYPE NewClass(int NumProtos, int NumConfigs) { 00249 CLASS_TYPE Class; 00250 00251 Class = new CLASS_STRUCT; 00252 00253 if (NumProtos > 0) 00254 Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT)); 00255 00256 if (NumConfigs > 0) 00257 Class->Configurations = (CONFIGS) Emalloc (NumConfigs * 00258 sizeof (BIT_VECTOR)); 00259 Class->MaxNumProtos = NumProtos; 00260 Class->MaxNumConfigs = NumConfigs; 00261 Class->NumProtos = 0; 00262 Class->NumConfigs = 0; 00263 return (Class); 00264 00265 } 00266 00267 00268 /********************************************************************** 00269 * PrintProtos 00270 * 00271 * Print the list of prototypes in this class type. 00272 **********************************************************************/ 00273 void PrintProtos(CLASS_TYPE Class) { 00274 inT16 Pid; 00275 00276 for (Pid = 0; Pid < Class->NumProtos; Pid++) { 00277 cprintf ("Proto %d:\t", Pid); 00278 PrintProto (ProtoIn (Class, Pid)); 00279 cprintf ("\t"); 00280 PrintProtoLine (ProtoIn (Class, Pid)); 00281 new_line(); 00282 } 00283 } 00284 00285 00286 namespace tesseract { 00293 void Classify::ReadClassFile() { 00294 FILE *File; 00295 char TextLine[CHARS_PER_LINE]; 00296 char unichar[CHARS_PER_LINE]; 00297 00298 cprintf ("Reading training data from '%s' ...", 00299 static_cast<STRING>(classify_training_file).string()); 00300 fflush(stdout); 00301 00302 File = open_file(static_cast<STRING>(classify_training_file).string(), "r"); 00303 while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) { 00304 00305 sscanf(TextLine, "%s", unichar); 00306 ReadClassFromFile (File, unicharset.unichar_to_id(unichar)); 00307 fgets(TextLine, CHARS_PER_LINE, File); 00308 fgets(TextLine, CHARS_PER_LINE, File); 00309 } 00310 fclose(File); 00311 new_line(); 00312 } 00313 } // namespace tesseract 00314 00321 void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) { 00322 CLASS_TYPE Class; 00323 00324 Class = &TrainingData[unichar_id]; 00325 00326 ReadProtos(File, Class); 00327 00328 ReadConfigs(File, Class); 00329 } 00330 00337 void ReadConfigs(register FILE *File, CLASS_TYPE Class) { 00338 inT16 Cid; 00339 register inT16 Wid; 00340 register BIT_VECTOR ThisConfig; 00341 int NumWords; 00342 int NumConfigs; 00343 00344 fscanf (File, "%d %d\n", &NumConfigs, &NumWords); 00345 Class->NumConfigs = NumConfigs; 00346 Class->MaxNumConfigs = NumConfigs; 00347 Class->Configurations = 00348 (CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs); 00349 NumWords = WordsInVectorOfSize (Class->NumProtos); 00350 00351 for (Cid = 0; Cid < NumConfigs; Cid++) { 00352 00353 ThisConfig = NewBitVector (Class->NumProtos); 00354 for (Wid = 0; Wid < NumWords; Wid++) 00355 fscanf (File, "%x", &ThisConfig[Wid]); 00356 Class->Configurations[Cid] = ThisConfig; 00357 } 00358 } 00359 00360 00367 void ReadProtos(register FILE *File, CLASS_TYPE Class) { 00368 register inT16 Pid; 00369 register PROTO Proto; 00370 int NumProtos; 00371 00372 fscanf (File, "%d\n", &NumProtos); 00373 Class->NumProtos = NumProtos; 00374 Class->MaxNumProtos = NumProtos; 00375 Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos); 00376 00377 for (Pid = 0; Pid < NumProtos; Pid++) { 00378 Proto = ProtoIn (Class, Pid); 00379 fscanf (File, "%f %f %f %f %f %f %f\n", 00380 &Proto->X, 00381 &Proto->Y, 00382 &Proto->Length, 00383 &Proto->Angle, 00384 &Proto->A, 00385 &Proto->B, &Proto->C); 00386 } 00387 } 00388 00389 00399 int SplitProto(CLASS_TYPE Class, int OldPid) { 00400 int i; 00401 int NewPid; 00402 BIT_VECTOR Config; 00403 00404 NewPid = AddProtoToClass (Class); 00405 00406 for (i = 0; i < Class->NumConfigs; i++) { 00407 Config = Class->Configurations[i]; 00408 if (test_bit (Config, OldPid)) 00409 SET_BIT(Config, NewPid); 00410 } 00411 return (NewPid); 00412 } 00413 00414 00425 void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) { 00426 int Cid, Pid; 00427 BIT_VECTOR Config; 00428 00429 fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos); 00430 00431 for (Cid = 0; Cid < Class->NumConfigs; Cid++) { 00432 fprintf (File, "1 "); 00433 00434 Config = Class->Configurations[Cid]; 00435 00436 for (Pid = 0; Pid < Class->NumProtos; Pid++) { 00437 if (test_bit (Config, Pid)) 00438 fprintf (File, "1"); 00439 else 00440 fprintf (File, "0"); 00441 } 00442 fprintf (File, "\n"); 00443 } 00444 } 00445 00446 00457 void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) { 00458 int Pid; 00459 PROTO Proto; 00460 00461 /* print old header */ 00462 fprintf (File, "6\n"); 00463 fprintf (File, "linear essential -0.500000 0.500000\n"); 00464 fprintf (File, "linear essential -0.250000 0.750000\n"); 00465 fprintf (File, "linear essential 0.000000 1.000000\n"); 00466 fprintf (File, "circular essential 0.000000 1.000000\n"); 00467 fprintf (File, "linear non-essential -0.500000 0.500000\n"); 00468 fprintf (File, "linear non-essential -0.500000 0.500000\n"); 00469 00470 for (Pid = 0; Pid < Class->NumProtos; Pid++) { 00471 Proto = ProtoIn (Class, Pid); 00472 00473 fprintf (File, "significant elliptical 1\n"); 00474 fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n", 00475 Proto->X, Proto->Y, 00476 Proto->Length, Proto->Angle, 0.0, 0.0); 00477 fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n", 00478 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001); 00479 } 00480 }