Tesseract  3.02
tesseract-ocr/classify/clusttool.cpp File Reference
#include "clusttool.h"
#include "const.h"
#include "danerror.h"
#include "emalloc.h"
#include "scanutils.h"
#include <stdio.h>
#include <math.h>

Go to the source code of this file.

Defines

#define TOKENSIZE   80
#define MAXSAMPLESIZE   65535

Functions

uinT16 ReadSampleSize (FILE *File)
PARAM_DESCReadParamDesc (FILE *File, uinT16 N)
PROTOTYPEReadPrototype (FILE *File, uinT16 N)
PROTOSTYLE ReadProtoStyle (FILE *File)
FLOAT32ReadNFloats (FILE *File, uinT16 N, FLOAT32 Buffer[])
void WriteParamDesc (FILE *File, uinT16 N, PARAM_DESC ParamDesc[])
void WritePrototype (FILE *File, uinT16 N, PROTOTYPE *Proto)
void WriteNFloats (FILE *File, uinT16 N, FLOAT32 Array[])
void WriteProtoStyle (FILE *File, PROTOSTYLE ProtoStyle)
void WriteProtoList (FILE *File, uinT16 N, PARAM_DESC ParamDesc[], LIST ProtoList, BOOL8 WriteSigProtos, BOOL8 WriteInsigProtos)

Define Documentation

#define MAXSAMPLESIZE   65535

Definition at line 30 of file clusttool.cpp.

#define TOKENSIZE   80

Definition at line 29 of file clusttool.cpp.


Function Documentation

FLOAT32* ReadNFloats ( FILE *  File,
uinT16  N,
FLOAT32  Buffer[] 
)

ReadNFloats ************************************************************* Parameters: File open text file to read floats from N number of floats to read Buffer pointer to buffer to place floats into Globals: None Operation: This routine reads N floats from the specified text file and places them into Buffer. If Buffer is NULL, a buffer is created and passed back to the caller. If EOF is encountered before any floats can be read, NULL is returned. Return: Pointer to buffer holding floats or NULL if EOF Exceptions: ILLEGALFLOAT History: 6/6/89, DSJ, Created.

Definition at line 286 of file clusttool.cpp.

                                                      {
  int i;
  int NumFloatsRead;

  if (Buffer == NULL)
    Buffer = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

  for (i = 0; i < N; i++) {
    NumFloatsRead = fscanf (File, "%f", &(Buffer[i]));
    if (NumFloatsRead != 1) {
      if ((NumFloatsRead == EOF) && (i == 0))
        return (NULL);
      else
        DoError (ILLEGALFLOAT, "Illegal float specification");
    }
  }
  return (Buffer);
}                                // ReadNFloats
PARAM_DESC* ReadParamDesc ( FILE *  File,
uinT16  N 
)

ReadParamDesc ************************************************************* Parameters: File open text file to read N parameter descriptions from N number of parameter descriptions to read Globals: None Operation: This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions. Return: Pointer to an array of parameter descriptors. Exceptions: ILLEGALCIRCULARSPEC ILLEGALESSENTIALSPEC ILLEGALMINMAXSPEC History: 6/6/89, DSJ, Created.

Definition at line 68 of file clusttool.cpp.

                                                {
  int i;
  PARAM_DESC *ParamDesc;
  char Token[TOKENSIZE];

  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
  for (i = 0; i < N; i++) {
    if (fscanf (File, "%s", Token) != 1)
      DoError (ILLEGALCIRCULARSPEC,
        "Illegal circular/linear specification");
    if (Token[0] == 'c')
      ParamDesc[i].Circular = TRUE;
    else
      ParamDesc[i].Circular = FALSE;

    if (fscanf (File, "%s", Token) != 1)
      DoError (ILLEGALESSENTIALSPEC,
        "Illegal essential/non-essential spec");
    if (Token[0] == 'e')
      ParamDesc[i].NonEssential = FALSE;
    else
      ParamDesc[i].NonEssential = TRUE;
    if (fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) !=
      2)
      DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
    ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
    ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
    ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
  }
  return (ParamDesc);
}                                // ReadParamDesc
PROTOSTYLE ReadProtoStyle ( FILE *  File)

Definition at line 244 of file clusttool.cpp.

                                      {
  char Token[TOKENSIZE];
  PROTOSTYLE Style;

  if (fscanf (File, "%s", Token) != 1)
    DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
  switch (Token[0]) {
    case 's':
      Style = spherical;
      break;
    case 'e':
      Style = elliptical;
      break;
    case 'm':
      Style = mixed;
      break;
    case 'a':
      Style = automatic;
      break;
    default:
      Style = elliptical;
      DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
  }
  return (Style);
}                                // ReadProtoStyle
PROTOTYPE* ReadPrototype ( FILE *  File,
uinT16  N 
)

ReadPrototype ************************************************************* Parameters: File open text file to read prototype from N number of dimensions used in prototype Globals: None Operation: This routine reads a textual description of a prototype from the specified file. Return: List of prototypes Exceptions: ILLEGALSIGNIFICANCESPEC ILLEGALSAMPLECOUNT ILLEGALMEANSPEC ILLEGALVARIANCESPEC ILLEGALDISTRIBUTION History: 6/6/89, DSJ, Created.

Definition at line 115 of file clusttool.cpp.

                                               {
  char Token[TOKENSIZE];
  int Status;
  PROTOTYPE *Proto;
  int SampleCount;
  int i;

  if ((Status = fscanf (File, "%s", Token)) == 1) {
    Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
    Proto->Cluster = NULL;
    if (Token[0] == 's')
      Proto->Significant = TRUE;
    else
      Proto->Significant = FALSE;

    Proto->Style = ReadProtoStyle (File);

    if ((fscanf (File, "%d", &SampleCount) != 1) || (SampleCount < 0))
      DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
    Proto->NumSamples = SampleCount;

    Proto->Mean = ReadNFloats (File, N, NULL);
    if (Proto->Mean == NULL)
      DoError (ILLEGALMEANSPEC, "Illegal prototype mean");

    switch (Proto->Style) {
      case spherical:
        if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
        Proto->Magnitude.Spherical =
          1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
        Proto->TotalMagnitude =
          pow (Proto->Magnitude.Spherical, (float) N);
        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
        Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
        Proto->Distrib = NULL;
        break;
      case elliptical:
        Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
        if (Proto->Variance.Elliptical == NULL)
          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
        Proto->Magnitude.Elliptical =
          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
        Proto->Weight.Elliptical =
          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
        Proto->TotalMagnitude = 1.0;
        for (i = 0; i < N; i++) {
          Proto->Magnitude.Elliptical[i] =
            1.0 /
            sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
          Proto->Weight.Elliptical[i] =
            1.0 / Proto->Variance.Elliptical[i];
          Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
        }
        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
        Proto->Distrib = NULL;
        break;
      case mixed:
        Proto->Distrib =
          (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
        for (i = 0; i < N; i++) {
          if (fscanf (File, "%s", Token) != 1)
            DoError (ILLEGALDISTRIBUTION,
              "Illegal prototype distribution");
          switch (Token[0]) {
            case 'n':
              Proto->Distrib[i] = normal;
              break;
            case 'u':
              Proto->Distrib[i] = uniform;
              break;
            case 'r':
              Proto->Distrib[i] = D_random;
              break;
            default:
              DoError (ILLEGALDISTRIBUTION,
                "Illegal prototype distribution");
          }
        }
        Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
        if (Proto->Variance.Elliptical == NULL)
          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
        Proto->Magnitude.Elliptical =
          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
        Proto->Weight.Elliptical =
          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
        Proto->TotalMagnitude = 1.0;
        for (i = 0; i < N; i++) {
          switch (Proto->Distrib[i]) {
            case normal:
              Proto->Magnitude.Elliptical[i] = 1.0 /
                sqrt ((double)
                (2.0 * PI * Proto->Variance.Elliptical[i]));
              Proto->Weight.Elliptical[i] =
                1.0 / Proto->Variance.Elliptical[i];
              break;
            case uniform:
            case D_random:
              Proto->Magnitude.Elliptical[i] = 1.0 /
                (2.0 * Proto->Variance.Elliptical[i]);
              break;
            case DISTRIBUTION_COUNT:
              ASSERT_HOST(!"Distribution count not allowed!");
          }
          Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
        }
        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
        break;
    }
    return (Proto);
  }
  else if (Status == EOF)
    return (NULL);
  else {
    DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
    return (NULL);
  }
}                                // ReadPrototype
uinT16 ReadSampleSize ( FILE *  File)

ReadSampleSize *********************************************************** Parameters: File open text file to read sample size from Globals: None Operation: This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE. Return: Sample size Exceptions: ILLEGALSAMPLESIZE illegal format or range History: 6/6/89, DSJ, Created.

Definition at line 46 of file clusttool.cpp.

                                  {
  int SampleSize;

  if ((fscanf (File, "%d", &SampleSize) != 1) ||
    (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
    DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
  return (SampleSize);
}                                // ReadSampleSize
void WriteNFloats ( FILE *  File,
uinT16  N,
FLOAT32  Array[] 
)

WriteNFloats *********************************************************** Parameters: File open text file to write N floats to N number of floats to write Array array of floats to write Globals: None Operation: This routine writes a text representation of N floats from an array to a file. All of the floats are placed on one line. Return: None Exceptions: None History: 6/6/89, DSJ, Created.

Definition at line 399 of file clusttool.cpp.

                                                          {
  for (int i = 0; i < N; i++)
    fprintf(File, " %9.6f", Array[i]);
  fprintf(File, "\n");
}                                // WriteNFloats
void WriteParamDesc ( FILE *  File,
uinT16  N,
PARAM_DESC  ParamDesc[] 
)

WriteParamDesc ************************************************************ Parameters: File open text file to write param descriptors to N number of param descriptors to write ParamDesc array of param descriptors to write Globals: None Operation: This routine writes an array of dimension descriptors to the specified text file. Return: None Exceptions: None History: 6/6/89, DSJ, Created.

Definition at line 318 of file clusttool.cpp.

                                                               {
  int i;

  for (i = 0; i < N; i++) {
    if (ParamDesc[i].Circular)
      fprintf (File, "circular ");
    else
      fprintf (File, "linear   ");

    if (ParamDesc[i].NonEssential)
      fprintf (File, "non-essential ");
    else
      fprintf (File, "essential     ");

    fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
  }
}                                // WriteParamDesc
void WriteProtoList ( FILE *  File,
uinT16  N,
PARAM_DESC  ParamDesc[],
LIST  ProtoList,
BOOL8  WriteSigProtos,
BOOL8  WriteInsigProtos 
)

Definition at line 435 of file clusttool.cpp.

{
  PROTOTYPE     *Proto;

  /* write file header */
  fprintf(File,"%0d\n",N);
  WriteParamDesc(File,N,ParamDesc);

  /* write prototypes */
  iterate(ProtoList)
    {
      Proto = (PROTOTYPE *) first_node ( ProtoList );
      if (( Proto->Significant && WriteSigProtos )      ||
          ( ! Proto->Significant && WriteInsigProtos ) )
        WritePrototype( File, N, Proto );
    }
}       /* WriteProtoList */
void WriteProtoStyle ( FILE *  File,
PROTOSTYLE  ProtoStyle 
)

WriteProtoSyle ********************************************************** Parameters: File open text file to write prototype style to ProtoStyle prototype style to write Globals: None Operation: This routine writes to the specified text file a word which represents the ProtoStyle. It does not append a carriage return to the end. Return: None Exceptions: None History: 6/8/89, DSJ, Created.

Definition at line 417 of file clusttool.cpp.

                                                        {
  switch (ProtoStyle) {
    case spherical:
      fprintf (File, "spherical");
      break;
    case elliptical:
      fprintf (File, "elliptical");
      break;
    case mixed:
      fprintf (File, "mixed");
      break;
    case automatic:
      fprintf (File, "automatic");
      break;
  }
}                                // WriteProtoStyle
void WritePrototype ( FILE *  File,
uinT16  N,
PROTOTYPE Proto 
)

WritePrototype ************************************************************ Parameters: File open text file to write prototype to N number of dimensions in feature space Proto prototype to write out Globals: None Operation: This routine writes a textual description of a prototype to the specified text file. Return: None Exceptions: None History: 6/12/89, DSJ, Created.

Definition at line 348 of file clusttool.cpp.

                                                            {
  int i;

  if (Proto->Significant)
    fprintf (File, "significant   ");
  else
    fprintf (File, "insignificant ");
  WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
  fprintf (File, "%6d\n\t", Proto->NumSamples);
  WriteNFloats (File, N, Proto->Mean);
  fprintf (File, "\t");

  switch (Proto->Style) {
    case spherical:
      WriteNFloats (File, 1, &(Proto->Variance.Spherical));
      break;
    case elliptical:
      WriteNFloats (File, N, Proto->Variance.Elliptical);
      break;
    case mixed:
      for (i = 0; i < N; i++)
      switch (Proto->Distrib[i]) {
        case normal:
          fprintf (File, " %9s", "normal");
          break;
        case uniform:
          fprintf (File, " %9s", "uniform");
          break;
        case D_random:
          fprintf (File, " %9s", "random");
          break;
        case DISTRIBUTION_COUNT:
          ASSERT_HOST(!"Distribution count not allowed!");
      }
      fprintf (File, "\n\t");
      WriteNFloats (File, N, Proto->Variance.Elliptical);
  }
}                                // WritePrototype