Tesseract
3.02
|
00001 /****************************************************************************** 00002 ** Filename: clustertool.c 00003 ** Purpose: Misc. tools for use with the clustering routines 00004 ** Author: Dan Johnson 00005 ** History: 6/6/89, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 00019 //--------------------------Include Files---------------------------------- 00020 #include "clusttool.h" 00021 #include "const.h" 00022 #include "danerror.h" 00023 #include "emalloc.h" 00024 #include "scanutils.h" 00025 #include <stdio.h> 00026 #include <math.h> 00027 00028 //---------------Global Data Definitions and Declarations-------------------- 00029 #define TOKENSIZE 80 //max size of tokens read from an input file 00030 #define MAXSAMPLESIZE 65535 //max num of dimensions in feature space 00031 //#define MAXBLOCKSIZE 65535 //max num of samples in a character (block size) 00032 00033 /*--------------------------------------------------------------------------- 00034 Public Code 00035 -----------------------------------------------------------------------------*/ 00046 uinT16 ReadSampleSize(FILE *File) { 00047 int SampleSize; 00048 00049 if ((fscanf (File, "%d", &SampleSize) != 1) || 00050 (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE)) 00051 DoError (ILLEGALSAMPLESIZE, "Illegal sample size"); 00052 return (SampleSize); 00053 } // ReadSampleSize 00054 00055 00068 PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) { 00069 int i; 00070 PARAM_DESC *ParamDesc; 00071 char Token[TOKENSIZE]; 00072 00073 ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC)); 00074 for (i = 0; i < N; i++) { 00075 if (fscanf (File, "%s", Token) != 1) 00076 DoError (ILLEGALCIRCULARSPEC, 00077 "Illegal circular/linear specification"); 00078 if (Token[0] == 'c') 00079 ParamDesc[i].Circular = TRUE; 00080 else 00081 ParamDesc[i].Circular = FALSE; 00082 00083 if (fscanf (File, "%s", Token) != 1) 00084 DoError (ILLEGALESSENTIALSPEC, 00085 "Illegal essential/non-essential spec"); 00086 if (Token[0] == 'e') 00087 ParamDesc[i].NonEssential = FALSE; 00088 else 00089 ParamDesc[i].NonEssential = TRUE; 00090 if (fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 00091 2) 00092 DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification"); 00093 ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; 00094 ParamDesc[i].HalfRange = ParamDesc[i].Range / 2; 00095 ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; 00096 } 00097 return (ParamDesc); 00098 } // ReadParamDesc 00099 00100 00115 PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) { 00116 char Token[TOKENSIZE]; 00117 int Status; 00118 PROTOTYPE *Proto; 00119 int SampleCount; 00120 int i; 00121 00122 if ((Status = fscanf (File, "%s", Token)) == 1) { 00123 Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE)); 00124 Proto->Cluster = NULL; 00125 if (Token[0] == 's') 00126 Proto->Significant = TRUE; 00127 else 00128 Proto->Significant = FALSE; 00129 00130 Proto->Style = ReadProtoStyle (File); 00131 00132 if ((fscanf (File, "%d", &SampleCount) != 1) || (SampleCount < 0)) 00133 DoError (ILLEGALSAMPLECOUNT, "Illegal sample count"); 00134 Proto->NumSamples = SampleCount; 00135 00136 Proto->Mean = ReadNFloats (File, N, NULL); 00137 if (Proto->Mean == NULL) 00138 DoError (ILLEGALMEANSPEC, "Illegal prototype mean"); 00139 00140 switch (Proto->Style) { 00141 case spherical: 00142 if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL) 00143 DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance"); 00144 Proto->Magnitude.Spherical = 00145 1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical)); 00146 Proto->TotalMagnitude = 00147 pow (Proto->Magnitude.Spherical, (float) N); 00148 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); 00149 Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; 00150 Proto->Distrib = NULL; 00151 break; 00152 case elliptical: 00153 Proto->Variance.Elliptical = ReadNFloats (File, N, NULL); 00154 if (Proto->Variance.Elliptical == NULL) 00155 DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance"); 00156 Proto->Magnitude.Elliptical = 00157 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00158 Proto->Weight.Elliptical = 00159 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00160 Proto->TotalMagnitude = 1.0; 00161 for (i = 0; i < N; i++) { 00162 Proto->Magnitude.Elliptical[i] = 00163 1.0 / 00164 sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i])); 00165 Proto->Weight.Elliptical[i] = 00166 1.0 / Proto->Variance.Elliptical[i]; 00167 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; 00168 } 00169 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); 00170 Proto->Distrib = NULL; 00171 break; 00172 case mixed: 00173 Proto->Distrib = 00174 (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION)); 00175 for (i = 0; i < N; i++) { 00176 if (fscanf (File, "%s", Token) != 1) 00177 DoError (ILLEGALDISTRIBUTION, 00178 "Illegal prototype distribution"); 00179 switch (Token[0]) { 00180 case 'n': 00181 Proto->Distrib[i] = normal; 00182 break; 00183 case 'u': 00184 Proto->Distrib[i] = uniform; 00185 break; 00186 case 'r': 00187 Proto->Distrib[i] = D_random; 00188 break; 00189 default: 00190 DoError (ILLEGALDISTRIBUTION, 00191 "Illegal prototype distribution"); 00192 } 00193 } 00194 Proto->Variance.Elliptical = ReadNFloats (File, N, NULL); 00195 if (Proto->Variance.Elliptical == NULL) 00196 DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance"); 00197 Proto->Magnitude.Elliptical = 00198 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00199 Proto->Weight.Elliptical = 00200 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00201 Proto->TotalMagnitude = 1.0; 00202 for (i = 0; i < N; i++) { 00203 switch (Proto->Distrib[i]) { 00204 case normal: 00205 Proto->Magnitude.Elliptical[i] = 1.0 / 00206 sqrt ((double) 00207 (2.0 * PI * Proto->Variance.Elliptical[i])); 00208 Proto->Weight.Elliptical[i] = 00209 1.0 / Proto->Variance.Elliptical[i]; 00210 break; 00211 case uniform: 00212 case D_random: 00213 Proto->Magnitude.Elliptical[i] = 1.0 / 00214 (2.0 * Proto->Variance.Elliptical[i]); 00215 break; 00216 case DISTRIBUTION_COUNT: 00217 ASSERT_HOST(!"Distribution count not allowed!"); 00218 } 00219 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; 00220 } 00221 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); 00222 break; 00223 } 00224 return (Proto); 00225 } 00226 else if (Status == EOF) 00227 return (NULL); 00228 else { 00229 DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification"); 00230 return (NULL); 00231 } 00232 } // ReadPrototype 00233 00234 00235 /* ReadProtoStyle ************************************************************* 00236 Parameters: File open text file to read prototype style from 00237 Globals: None 00238 Operation: This routine reads an single token from the specified 00239 text file and interprets it as a prototype specification. 00240 Return: Prototype style read from text file 00241 Exceptions: ILLEGALSTYLESPEC illegal prototype style specification 00242 History: 6/8/89, DSJ, Created. 00243 *******************************************************************************/ 00244 PROTOSTYLE ReadProtoStyle(FILE *File) { 00245 char Token[TOKENSIZE]; 00246 PROTOSTYLE Style; 00247 00248 if (fscanf (File, "%s", Token) != 1) 00249 DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification"); 00250 switch (Token[0]) { 00251 case 's': 00252 Style = spherical; 00253 break; 00254 case 'e': 00255 Style = elliptical; 00256 break; 00257 case 'm': 00258 Style = mixed; 00259 break; 00260 case 'a': 00261 Style = automatic; 00262 break; 00263 default: 00264 Style = elliptical; 00265 DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification"); 00266 } 00267 return (Style); 00268 } // ReadProtoStyle 00269 00270 00285 FLOAT32 * 00286 ReadNFloats (FILE * File, uinT16 N, FLOAT32 Buffer[]) { 00287 int i; 00288 int NumFloatsRead; 00289 00290 if (Buffer == NULL) 00291 Buffer = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00292 00293 for (i = 0; i < N; i++) { 00294 NumFloatsRead = fscanf (File, "%f", &(Buffer[i])); 00295 if (NumFloatsRead != 1) { 00296 if ((NumFloatsRead == EOF) && (i == 0)) 00297 return (NULL); 00298 else 00299 DoError (ILLEGALFLOAT, "Illegal float specification"); 00300 } 00301 } 00302 return (Buffer); 00303 } // ReadNFloats 00304 00305 00317 void 00318 WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) { 00319 int i; 00320 00321 for (i = 0; i < N; i++) { 00322 if (ParamDesc[i].Circular) 00323 fprintf (File, "circular "); 00324 else 00325 fprintf (File, "linear "); 00326 00327 if (ParamDesc[i].NonEssential) 00328 fprintf (File, "non-essential "); 00329 else 00330 fprintf (File, "essential "); 00331 00332 fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); 00333 } 00334 } // WriteParamDesc 00335 00336 00348 void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) { 00349 int i; 00350 00351 if (Proto->Significant) 00352 fprintf (File, "significant "); 00353 else 00354 fprintf (File, "insignificant "); 00355 WriteProtoStyle (File, (PROTOSTYLE) Proto->Style); 00356 fprintf (File, "%6d\n\t", Proto->NumSamples); 00357 WriteNFloats (File, N, Proto->Mean); 00358 fprintf (File, "\t"); 00359 00360 switch (Proto->Style) { 00361 case spherical: 00362 WriteNFloats (File, 1, &(Proto->Variance.Spherical)); 00363 break; 00364 case elliptical: 00365 WriteNFloats (File, N, Proto->Variance.Elliptical); 00366 break; 00367 case mixed: 00368 for (i = 0; i < N; i++) 00369 switch (Proto->Distrib[i]) { 00370 case normal: 00371 fprintf (File, " %9s", "normal"); 00372 break; 00373 case uniform: 00374 fprintf (File, " %9s", "uniform"); 00375 break; 00376 case D_random: 00377 fprintf (File, " %9s", "random"); 00378 break; 00379 case DISTRIBUTION_COUNT: 00380 ASSERT_HOST(!"Distribution count not allowed!"); 00381 } 00382 fprintf (File, "\n\t"); 00383 WriteNFloats (File, N, Proto->Variance.Elliptical); 00384 } 00385 } // WritePrototype 00386 00387 00399 void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) { 00400 for (int i = 0; i < N; i++) 00401 fprintf(File, " %9.6f", Array[i]); 00402 fprintf(File, "\n"); 00403 } // WriteNFloats 00404 00405 00417 void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { 00418 switch (ProtoStyle) { 00419 case spherical: 00420 fprintf (File, "spherical"); 00421 break; 00422 case elliptical: 00423 fprintf (File, "elliptical"); 00424 break; 00425 case mixed: 00426 fprintf (File, "mixed"); 00427 break; 00428 case automatic: 00429 fprintf (File, "automatic"); 00430 break; 00431 } 00432 } // WriteProtoStyle 00433 00434 /*---------------------------------------------------------------------------*/ 00435 void WriteProtoList( 00436 FILE *File, 00437 uinT16 N, 00438 PARAM_DESC ParamDesc[], 00439 LIST ProtoList, 00440 BOOL8 WriteSigProtos, 00441 BOOL8 WriteInsigProtos) 00442 00443 /* 00444 ** Parameters: 00445 ** File open text file to write prototypes to 00446 ** N number of dimensions in feature space 00447 ** ParamDesc descriptions for each dimension 00448 ** ProtoList list of prototypes to be written 00449 ** WriteSigProtos TRUE to write out significant prototypes 00450 ** WriteInsigProtos TRUE to write out insignificants 00451 ** Globals: 00452 ** None 00453 ** Operation: 00454 ** This routine writes a textual description of each prototype 00455 ** in the prototype list to the specified file. It also 00456 ** writes a file header which includes the number of dimensions 00457 ** in feature space and the descriptions for each dimension. 00458 ** Return: 00459 ** None 00460 ** Exceptions: 00461 ** None 00462 ** History: 00463 ** 6/12/89, DSJ, Created. 00464 */ 00465 00466 { 00467 PROTOTYPE *Proto; 00468 00469 /* write file header */ 00470 fprintf(File,"%0d\n",N); 00471 WriteParamDesc(File,N,ParamDesc); 00472 00473 /* write prototypes */ 00474 iterate(ProtoList) 00475 { 00476 Proto = (PROTOTYPE *) first_node ( ProtoList ); 00477 if (( Proto->Significant && WriteSigProtos ) || 00478 ( ! Proto->Significant && WriteInsigProtos ) ) 00479 WritePrototype( File, N, Proto ); 00480 } 00481 } /* WriteProtoList */ 00482