Tesseract  3.02
tesseract-ocr/training/commontraining.cpp File Reference
#include "commontraining.h"
#include "base/init_google.h"
#include "base/commandlineflags.h"
#include "allheaders.h"
#include "ccutil.h"
#include "classify.h"
#include "oldlist.h"
#include "globals.h"
#include "mf.h"
#include "clusttool.h"
#include "cluster.h"
#include "tessopt.h"
#include "efio.h"
#include "emalloc.h"
#include "featdefs.h"
#include "fontinfo.h"
#include "intfeaturespace.h"
#include "mastertrainer.h"
#include "tessdatamanager.h"
#include "tprintf.h"
#include "freelist.h"
#include "params.h"
#include "shapetable.h"
#include "unicity_table.h"
#include <math.h>

Go to the source code of this file.

Namespaces

namespace  tesseract

Functions

 INT_PARAM_FLAG (debug_level, 0,"Level of Trainer debugging")
 INT_PARAM_FLAG (load_images, 0,"Load images with tr files")
 STRING_PARAM_FLAG (configfile,"","File to load more configs from")
 STRING_PARAM_FLAG (D,"","Directory to write output files to")
 STRING_PARAM_FLAG (F,"font_properties","File listing font properties")
 STRING_PARAM_FLAG (X,"","File listing font xheights")
 STRING_PARAM_FLAG (U,"unicharset","File to load unicharset from")
 STRING_PARAM_FLAG (O,"","File to write unicharset to")
 STRING_PARAM_FLAG (input_trainer,"","File to load trainer from")
 STRING_PARAM_FLAG (output_trainer,"","File to write trainer to")
 STRING_PARAM_FLAG (test_ch,"","UTF8 test character string")
void ParseArguments (int *argc, char ***argv)
ShapeTabletesseract::LoadShapeTable (const STRING &file_prefix)
void tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
MasterTrainer * tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
const char * GetNextFilename (int argc, const char *const *argv)
LABELEDLIST FindList (LIST List, char *Label)
LABELEDLIST NewLabeledList (const char *Label)
void ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
void FreeTrainingSamples (LIST CharList)
void FreeLabeledList (LABELEDLIST LabeledList)
CLUSTERERSetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST char_sample, const char *program_feature_type)
void MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
void CleanUpUnusedData (LIST ProtoList)
LIST RemoveInsignificantProtos (LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N)
MERGE_CLASS FindClass (LIST List, const char *Label)
MERGE_CLASS NewLabeledClass (const char *Label)
void FreeLabeledClassList (LIST ClassList)
CLASS_STRUCTSetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList)
void Normalize (float *Values)
void FreeNormProtoList (LIST CharList)
void AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName)
int NumberOfProtos (LIST ProtoList, BOOL8 CountSigProtos, BOOL8 CountInsigProtos)

Variables

CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }
const char * kUsage = "[flags] [ .tr files ... ]\n"
FEATURE_DEFS_STRUCT feature_defs
CCUtil ccutil

Function Documentation

void AddToNormProtosList ( LIST NormProtoList,
LIST  ProtoList,
char *  CharName 
)

Definition at line 935 of file commontraining.cpp.

{
  PROTOTYPE* Proto;
  LABELEDLIST LabeledProtoList;

  LabeledProtoList = NewLabeledList(CharName);
  iterate(ProtoList)
  {
    Proto = (PROTOTYPE *) first_node (ProtoList);
    LabeledProtoList->List = push(LabeledProtoList->List, Proto);
  }
  *NormProtoList = push(*NormProtoList, LabeledProtoList);
}
void CleanUpUnusedData ( LIST  ProtoList)

Definition at line 683 of file commontraining.cpp.

{
  PROTOTYPE* Prototype;

  iterate(ProtoList)
  {
    Prototype = (PROTOTYPE *) first_node (ProtoList);
    if(Prototype->Variance.Elliptical != NULL)
    {
      memfree(Prototype->Variance.Elliptical);
      Prototype->Variance.Elliptical = NULL;
    }
    if(Prototype->Magnitude.Elliptical != NULL)
    {
      memfree(Prototype->Magnitude.Elliptical);
      Prototype->Magnitude.Elliptical = NULL;
    }
    if(Prototype->Weight.Elliptical != NULL)
    {
      memfree(Prototype->Weight.Elliptical);
      Prototype->Weight.Elliptical = NULL;
    }
  }
}
MERGE_CLASS FindClass ( LIST  List,
const char *  Label 
)

Definition at line 778 of file commontraining.cpp.

{
  MERGE_CLASS   MergeClass;

  iterate (List)
  {
    MergeClass = (MERGE_CLASS) first_node (List);
    if (strcmp (MergeClass->Label, Label) == 0)
      return (MergeClass);
  }
  return (NULL);

}       /* FindClass */
LABELEDLIST FindList ( LIST  List,
char *  Label 
)

Definition at line 385 of file commontraining.cpp.

{
  LABELEDLIST   LabeledList;

  iterate (List)
  {
    LabeledList = (LABELEDLIST) first_node (List);
    if (strcmp (LabeledList->Label, Label) == 0)
      return (LabeledList);
  }
  return (NULL);

}       /* FindList */
void FreeLabeledClassList ( LIST  ClassList)

Definition at line 809 of file commontraining.cpp.

{
  MERGE_CLASS   MergeClass;

  iterate (ClassList)           /* iterate thru all of the fonts */
  {
    MergeClass = (MERGE_CLASS) first_node (ClassList);
    free (MergeClass->Label);
    FreeClass(MergeClass->Class);
    delete MergeClass;
  }
  destroy (ClassList);

}       /* FreeLabeledClassList */
void FreeLabeledList ( LABELEDLIST  LabeledList)

Definition at line 548 of file commontraining.cpp.

                                              {
/*
 **     Parameters:
 **             LabeledList     labeled list to be freed
 **     Globals: none
 **     Operation:
 **             This routine deallocates all of the memory consumed by
 **             a labeled list.  It does not free any memory which may be
 **             consumed by the items in the list.
 **     Return: none
 **     Exceptions: none
 **     History: Fri Aug 18 17:52:45 1989, DSJ, Created.
 */
  destroy(LabeledList->List);
  free(LabeledList->Label);
  free(LabeledList);
}  /* FreeLabeledList */
void FreeNormProtoList ( LIST  CharList)

Definition at line 919 of file commontraining.cpp.

{
  LABELEDLIST   char_sample;

  iterate (CharList)            /* iterate thru all of the fonts */
  {
    char_sample = (LABELEDLIST) first_node (CharList);
    FreeLabeledList (char_sample);
  }
  destroy (CharList);

}       // FreeNormProtoList
void FreeTrainingSamples ( LIST  CharList)

Definition at line 518 of file commontraining.cpp.

                                        {
/*
 **     Parameters:
 **             FontList        list of all fonts in document
 **     Globals: none
 **     Operation:
 **             This routine deallocates all of the space allocated to
 **             the specified list of training samples.
 **     Return: none
 **     Exceptions: none
 **     History: Fri Aug 18 17:44:27 1989, DSJ, Created.
 */
  LABELEDLIST char_sample;
  FEATURE_SET FeatureSet;
  LIST FeatureList;


  iterate(CharList) {  /* iterate thru all of the fonts */
    char_sample = (LABELEDLIST) first_node(CharList);
    FeatureList = char_sample->List;
    iterate(FeatureList) {  /* iterate thru all of the classes */
      FeatureSet = (FEATURE_SET) first_node(FeatureList);
      FreeFeatureSet(FeatureSet);
    }
    FreeLabeledList(char_sample);
  }
  destroy(CharList);
}  /* FreeTrainingSamples */
const char* GetNextFilename ( int  argc,
const char *const *  argv 
)

Definition at line 362 of file commontraining.cpp.

                                                                {
  /*
   **   Parameters: none
   **   Globals:
   **           tessoptind                      defined by tessopt sys call
   **   Operation:
   **           This routine returns the next command line argument.  If
   **           there are no remaining command line arguments, it returns
   **           NULL.  This routine should only be called after all option
   **           arguments have been parsed and removed with ParseArguments.
   **   Return: Next command line argument or NULL.
   **   Exceptions: none
   **   History: Fri Aug 18 09:34:12 1989, DSJ, Created.
   */
  if (tessoptind < argc)
    return argv[tessoptind++];
  else
    return NULL;
}       /* GetNextFilename */
INT_PARAM_FLAG ( debug_level  ,
,
"Level of Trainer debugging"   
)
INT_PARAM_FLAG ( load_images  ,
,
"Load images with tr files"   
)
void MergeInsignificantProtos ( LIST  ProtoList,
const char *  label,
CLUSTERER Clusterer,
CLUSTERCONFIG Config 
)

Definition at line 618 of file commontraining.cpp.

                                                                           {
  PROTOTYPE     *Prototype;
  bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;

  LIST pProtoList = ProtoList;
  iterate(pProtoList) {
    Prototype = (PROTOTYPE *) first_node (pProtoList);
    if (Prototype->Significant || Prototype->Merged)
      continue;
    FLOAT32 best_dist = 0.125;
    PROTOTYPE* best_match = NULL;
    // Find the nearest alive prototype.
    LIST list_it = ProtoList;
    iterate(list_it) {
      PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
      if (test_p != Prototype && !test_p->Merged) {
        FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
                                       Clusterer->ParamDesc,
                                       Prototype->Mean, test_p->Mean);
        if (dist < best_dist) {
          best_match = test_p;
          best_dist = dist;
        }
      }
    }
    if (best_match != NULL && !best_match->Significant) {
      if (debug)
        tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
                best_match->NumSamples, Prototype->NumSamples,
                best_match->Mean[0], best_match->Mean[1],
                Prototype->Mean[0], Prototype->Mean[1]);
      best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
                                             Clusterer->ParamDesc,
                                             best_match->NumSamples,
                                             Prototype->NumSamples,
                                             best_match->Mean,
                                             best_match->Mean, Prototype->Mean);
      Prototype->NumSamples = 0;
      Prototype->Merged = 1;
    } else if (best_match != NULL) {
      if (debug)
        tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
                Prototype->Mean[0], Prototype->Mean[1],
                best_match->Mean[0], best_match->Mean[1]);
      Prototype->Merged = 1;
    }
  }
  // Mark significant those that now have enough samples.
  int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);
  pProtoList = ProtoList;
  iterate(pProtoList) {
    Prototype = (PROTOTYPE *) first_node (pProtoList);
    // Process insignificant protos that do not match a green one
    if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
        !Prototype->Merged) {
      if (debug)
        tprintf("Red proto at %g,%g becoming green\n",
                Prototype->Mean[0], Prototype->Mean[1]);
      Prototype->Significant = true;
    }
  }
}       /* MergeInsignificantProtos */
MERGE_CLASS NewLabeledClass ( const char *  Label)

Definition at line 795 of file commontraining.cpp.

{
  MERGE_CLASS   MergeClass;

  MergeClass = new MERGE_CLASS_NODE;
  MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
  strcpy (MergeClass->Label, Label);
  MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
  return (MergeClass);

}       /* NewLabeledClass */
LABELEDLIST NewLabeledList ( const char *  Label)

Definition at line 417 of file commontraining.cpp.

{
  LABELEDLIST   LabeledList;

  LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
  LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
  strcpy (LabeledList->Label, Label);
  LabeledList->List = NIL_LIST;
  LabeledList->SampleCount = 0;
  LabeledList->font_sample_count = 0;
  return (LabeledList);

}       /* NewLabeledList */
void Normalize ( float *  Values)

Definition at line 902 of file commontraining.cpp.

{
  register float Slope;
  register float Intercept;
  register float Normalizer;

  Slope      = tan (Values [2] * 2 * PI);
  Intercept  = Values [1] - Slope * Values [0];
  Normalizer = 1 / sqrt (Slope * Slope + 1.0);

  Values [0] = Slope * Normalizer;
  Values [1] = - Normalizer;
  Values [2] = Intercept * Normalizer;
} // Normalize
int NumberOfProtos ( LIST  ProtoList,
BOOL8  CountSigProtos,
BOOL8  CountInsigProtos 
)

Definition at line 953 of file commontraining.cpp.

{
  int N = 0;
  PROTOTYPE     *Proto;

  iterate(ProtoList)
  {
    Proto = (PROTOTYPE *) first_node ( ProtoList );
    if (( Proto->Significant && CountSigProtos )        ||
        ( ! Proto->Significant && CountInsigProtos ) )
      N++;
  }
  return(N);
}
void ParseArguments ( int *  argc,
char ***  argv 
)

Definition at line 88 of file commontraining.cpp.

                                             {
/*
 **     Parameters:
 **             argc    number of command line arguments to parse
 **             argv    command line arguments
 **     Globals:
 **             ShowSignificantProtos   flag controlling proto display
 **             ShowInsignificantProtos flag controlling proto display
 **             Config                  current clustering parameters
 **             tessoptarg, tessoptind          defined by tessopt sys call
 **             Argc, Argv              global copies of argc and argv
 **     Operation:
 **             This routine parses the command line arguments that were
 **             passed to the program.  The legal arguments are shown in the usage
 **             message below:

 **     Return: none
 **     Exceptions: Illegal options terminate the program.
 **     History: 7/24/89, DSJ, Created.
 */
#ifndef USE_STD_NAMESPACE
  InitGoogle(kUsage, argc, argv, true);
  tessoptind = 1;
#else
  int    Option;
  int    ParametersRead;
  BOOL8  Error;

  Error = FALSE;
  while ((Option = tessopt(*argc, *argv, "F:O:U:D:C:I:M:B:S:X:c:")) != EOF) {
    switch (Option) {
      case 'C':
        ParametersRead = sscanf(tessoptarg, "%lf", &(Config.Confidence) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.Confidence > 1 ) Config.Confidence = 1;
        else if ( Config.Confidence < 0 ) Config.Confidence = 0;
        break;
      case 'I':
        ParametersRead = sscanf(tessoptarg, "%f", &(Config.Independence) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.Independence > 1 ) Config.Independence = 1;
        else if ( Config.Independence < 0 ) Config.Independence = 0;
        break;
      case 'M':
        ParametersRead = sscanf(tessoptarg, "%f", &(Config.MinSamples) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
        else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
        break;
      case 'B':
        ParametersRead = sscanf(tessoptarg, "%f", &(Config.MaxIllegal) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
        else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
        break;
      case 'c':
        FLAGS_configfile.set_value(tessoptarg);
        break;
      case 'D':
        FLAGS_D.set_value(tessoptarg);
        break;
      case 'U':
        FLAGS_U.set_value(tessoptarg);
        break;
      case 'O':
        FLAGS_O.set_value(tessoptarg);
        break;
      case 'F':
        FLAGS_F.set_value(tessoptarg);
        break;
      case 'X':
        FLAGS_X.set_value(tessoptarg);
        break;
      case '?':
        Error = TRUE;
        break;
    }
    if (Error) {
      fprintf(stderr, "Usage: %s %s\n", (*argv)[0], kUsage);
      exit(2);
    }
  }
#endif
  // Set additional parameters from config file if specified.
  if (!FLAGS_configfile.empty()) {
    tesseract::ParamUtils::ReadParamsFile(
        FLAGS_configfile.c_str(),
        tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
        ccutil.params());
  }
}  // ParseArguments
void ReadTrainingSamples ( const FEATURE_DEFS_STRUCT feature_defs,
const char *  feature_name,
int  max_samples,
UNICHARSET unicharset,
FILE *  file,
LIST training_samples 
)

Definition at line 448 of file commontraining.cpp.

                                                             {
/*
**  Parameters:
**    file    open text file to read samples from
**  Globals: none
**  Operation:
**    This routine reads training samples from a file and
**    places them into a data structure which organizes the
**    samples by FontName and CharName.  It then returns this
**    data structure.
**  Return: none
**  Exceptions: none
**  History: Fri Aug 18 13:11:39 1989, DSJ, Created.
**       Tue May 17 1998 simplifications to structure, illiminated
**        font, and feature specification levels of structure.
*/
  char    buffer[2048];
  char    unichar[UNICHAR_LEN + 1];
  LABELEDLIST char_sample;
  FEATURE_SET feature_samples;
  CHAR_DESC char_desc;
  int   i;
  int feature_type = ShortNameToFeatureType(feature_defs, feature_name);
  // Zero out the font_sample_count for all the classes.
  LIST it = *training_samples;
  iterate(it) {
    char_sample = reinterpret_cast<LABELEDLIST>(first_node(it));
    char_sample->font_sample_count = 0;
  }

  while (fgets(buffer, 2048, file) != NULL) {
    if (buffer[0] == '\n')
      continue;

    sscanf(buffer, "%*s %s", unichar);
    if (unicharset != NULL && !unicharset->contains_unichar(unichar)) {
      unicharset->unichar_insert(unichar);
      if (unicharset->size() > MAX_NUM_CLASSES) {
        tprintf("Error: Size of unicharset in training is "
                "greater than MAX_NUM_CLASSES\n");
        exit(1);
      }
    }
    char_sample = FindList(*training_samples, unichar);
    if (char_sample == NULL) {
      char_sample = NewLabeledList(unichar);
      *training_samples = push(*training_samples, char_sample);
    }
    char_desc = ReadCharDescription(feature_defs, file);
    feature_samples = char_desc->FeatureSets[feature_type];
    if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
      char_sample->List = push(char_sample->List, feature_samples);
      char_sample->SampleCount++;
      char_sample->font_sample_count++;
    } else {
      FreeFeatureSet(feature_samples);
    }
    for (i = 0; i < char_desc->NumFeatureSets; i++) {
      if (feature_type != i)
        FreeFeatureSet(char_desc->FeatureSets[i]);
    }
    free(char_desc);
  }
}  // ReadTrainingSamples
LIST RemoveInsignificantProtos ( LIST  ProtoList,
BOOL8  KeepSigProtos,
BOOL8  KeepInsigProtos,
int  N 
)

Definition at line 710 of file commontraining.cpp.

{
  LIST NewProtoList = NIL_LIST;
  LIST pProtoList;
  PROTOTYPE* Proto;
  PROTOTYPE* NewProto;
  int i;

  pProtoList = ProtoList;
  iterate(pProtoList)
  {
    Proto = (PROTOTYPE *) first_node (pProtoList);
    if ((Proto->Significant && KeepSigProtos) ||
        (!Proto->Significant && KeepInsigProtos))
    {
      NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));

      NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
      NewProto->Significant = Proto->Significant;
      NewProto->Style = Proto->Style;
      NewProto->NumSamples = Proto->NumSamples;
      NewProto->Cluster = NULL;
      NewProto->Distrib = NULL;

      for (i=0; i < N; i++)
        NewProto->Mean[i] = Proto->Mean[i];
      if (Proto->Variance.Elliptical != NULL)
      {
        NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
        for (i=0; i < N; i++)
          NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
      }
      else
        NewProto->Variance.Elliptical = NULL;
      //---------------------------------------------
      if (Proto->Magnitude.Elliptical != NULL)
      {
        NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
        for (i=0; i < N; i++)
          NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
      }
      else
        NewProto->Magnitude.Elliptical = NULL;
      //------------------------------------------------
      if (Proto->Weight.Elliptical != NULL)
      {
        NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
        for (i=0; i < N; i++)
          NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
      }
      else
        NewProto->Weight.Elliptical = NULL;

      NewProto->TotalMagnitude = Proto->TotalMagnitude;
      NewProto->LogMagnitude = Proto->LogMagnitude;
      NewProtoList = push_last(NewProtoList, NewProto);
    }
  }
  FreeProtoList(&ProtoList);
  return (NewProtoList);
}       /* RemoveInsignificantProtos */
CLUSTERER* SetUpForClustering ( const FEATURE_DEFS_STRUCT FeatureDefs,
LABELEDLIST  char_sample,
const char *  program_feature_type 
)

Definition at line 567 of file commontraining.cpp.

                                                                {
/*
 **     Parameters:
 **             char_sample: LABELEDLIST that holds all the feature information for a
 **             given character.
 **     Globals:
 **             None
 **     Operation:
 **             This routine reads samples from a LABELEDLIST and enters
 **             those samples into a clusterer data structure.  This
 **             data structure is then returned to the caller.
 **     Return:
 **             Pointer to new clusterer data structure.
 **     Exceptions:
 **             None
 **     History:
 **             8/16/89, DSJ, Created.
 */
  uinT16 N;
  int i, j;
  FLOAT32 *Sample = NULL;
  CLUSTERER *Clusterer;
  inT32 CharID;
  LIST FeatureList = NULL;
  FEATURE_SET FeatureSet = NULL;

  int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
  N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
  Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);

  FeatureList = char_sample->List;
  CharID = 0;
  iterate(FeatureList) {
    FeatureSet = (FEATURE_SET) first_node(FeatureList);
    for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
      if (Sample == NULL)
        Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
      for (j = 0; j < N; j++)
        Sample[j] = FeatureSet->Features[i]->Params[j];
      MakeSample (Clusterer, Sample, CharID);
    }
    CharID++;
  }
  if ( Sample != NULL ) free( Sample );
  return( Clusterer );

}       /* SetUpForClustering */
CLASS_STRUCT* SetUpForFloat2Int ( const UNICHARSET unicharset,
LIST  LabeledClassList 
)

SetUpForFloat2Int

Definition at line 839 of file commontraining.cpp.

                                                       {
  MERGE_CLASS   MergeClass;
  CLASS_TYPE            Class;
  int                           NumProtos;
  int                           NumConfigs;
  int                           NumWords;
  int                           i, j;
  float                 Values[3];
  PROTO                 NewProto;
  PROTO                 OldProto;
  BIT_VECTOR            NewConfig;
  BIT_VECTOR            OldConfig;

  //    printf("Float2Int ...\n");

  CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()];
  iterate(LabeledClassList)
  {
    UnicityTableEqEq<int>   font_set;
    MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
    Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)];
    NumProtos = MergeClass->Class->NumProtos;
    NumConfigs = MergeClass->Class->NumConfigs;
    font_set.move(&MergeClass->Class->font_set);
    Class->NumProtos = NumProtos;
    Class->MaxNumProtos = NumProtos;
    Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
    for(i=0; i < NumProtos; i++)
    {
      NewProto = ProtoIn(Class, i);
      OldProto = ProtoIn(MergeClass->Class, i);
      Values[0] = OldProto->X;
      Values[1] = OldProto->Y;
      Values[2] = OldProto->Angle;
      Normalize(Values);
      NewProto->X = OldProto->X;
      NewProto->Y = OldProto->Y;
      NewProto->Length = OldProto->Length;
      NewProto->Angle = OldProto->Angle;
      NewProto->A = Values[0];
      NewProto->B = Values[1];
      NewProto->C = Values[2];
    }

    Class->NumConfigs = NumConfigs;
    Class->MaxNumConfigs = NumConfigs;
    Class->font_set.move(&font_set);
    Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
    NumWords = WordsInVectorOfSize(NumProtos);
    for(i=0; i < NumConfigs; i++)
    {
      NewConfig = NewBitVector(NumProtos);
      OldConfig = MergeClass->Class->Configurations[i];
      for(j=0; j < NumWords; j++)
        NewConfig[j] = OldConfig[j];
      Class->Configurations[i] = NewConfig;
    }
  }
  return float_classes;
} // SetUpForFloat2Int
STRING_PARAM_FLAG ( configfile  ,
""  ,
"File to load more configs from"   
)
STRING_PARAM_FLAG ( ,
""  ,
"Directory to write output files to"   
)
STRING_PARAM_FLAG ( ,
"font_properties"  ,
"File listing font properties"   
)
STRING_PARAM_FLAG ( ,
""  ,
"File listing font xheights"   
)
STRING_PARAM_FLAG ( ,
"unicharset"  ,
"File to load unicharset from"   
)
STRING_PARAM_FLAG ( ,
""  ,
"File to write unicharset to"   
)
STRING_PARAM_FLAG ( input_trainer  ,
""  ,
"File to load trainer from"   
)
STRING_PARAM_FLAG ( output_trainer  ,
""  ,
"File to write trainer to"   
)
STRING_PARAM_FLAG ( test_ch  ,
""  ,
"UTF8 test character string"   
)

Variable Documentation

Definition at line 85 of file commontraining.cpp.

CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }

Definition at line 53 of file commontraining.cpp.

const char* kUsage = "[flags] [ .tr files ... ]\n"

Definition at line 71 of file commontraining.cpp.