Tesseract
3.02
|
Go to the source code of this file.
#define InitSampleSearch | ( | S, | |
C | |||
) | (((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C)))) |
enum DISTRIBUTION |
enum PROTOSTYLE |
Definition at line 44 of file cluster.h.
{ spherical, elliptical, mixed, automatic } PROTOSTYLE;
LIST ClusterSamples | ( | CLUSTERER * | Clusterer, |
CLUSTERCONFIG * | Config | ||
) |
ClusterSamples *********************************************************** Parameters: Clusterer data struct containing samples to be clustered Config parameters which control clustering process Operation: This routine first checks to see if the samples in this clusterer have already been clustered before; if so, it does not bother to recreate the cluster tree. It simply recomputes the prototypes based on the new Config info. If the samples have not been clustered before, the samples in the KD tree are formed into a cluster tree and then the prototypes are computed from the cluster tree. In either case this routine returns a pointer to a list of prototypes that best represent the samples given the constraints specified in Config. Return: Pointer to a list of prototypes Exceptions: None History: 5/29/89, DSJ, Created.
Definition at line 504 of file cluster.cpp.
{ //only create cluster tree if samples have never been clustered before if (Clusterer->Root == NULL) CreateClusterTree(Clusterer); //deallocate the old prototype list if one exists FreeProtoList (&Clusterer->ProtoList); Clusterer->ProtoList = NIL_LIST; //compute prototypes starting at the root node in the tree ComputePrototypes(Clusterer, Config); return (Clusterer->ProtoList); } // ClusterSamples
void FreeClusterer | ( | CLUSTERER * | Clusterer | ) |
FreeClusterer ************************************************************* Parameters: Clusterer pointer to data structure to be freed Operation: This routine frees all of the memory allocated to the specified data structure. It will not, however, free the memory used by the prototype list. The pointers to the clusters for each prototype in the list will be set to NULL to indicate that the cluster data structures no longer exist. Any sample lists that have been obtained via calls to GetSamples are no longer valid. Return: None Exceptions: None History: 6/6/89, DSJ, Created.
Definition at line 532 of file cluster.cpp.
{ if (Clusterer != NULL) { memfree (Clusterer->ParamDesc); if (Clusterer->KDTree != NULL) FreeKDTree (Clusterer->KDTree); if (Clusterer->Root != NULL) FreeCluster (Clusterer->Root); // Free up all used buckets structures. for (int d = 0; d < DISTRIBUTION_COUNT; ++d) { for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c) if (Clusterer->bucket_cache[d][c] != NULL) FreeBuckets(Clusterer->bucket_cache[d][c]); } memfree(Clusterer); } } // FreeClusterer
void FreeProtoList | ( | LIST * | ProtoList | ) |
FreeProtoList ************************************************************ Parameters: ProtoList pointer to list of prototypes to be freed Operation: This routine frees all of the memory allocated to the specified list of prototypes. The clusters which are pointed to by the prototypes are not freed. Return: None Exceptions: None History: 6/6/89, DSJ, Created.
Definition at line 560 of file cluster.cpp.
{ destroy_nodes(*ProtoList, FreePrototype); } // FreeProtoList
void FreePrototype | ( | void * | arg | ) |
FreePrototype ************************************************************ Parameters: Prototype prototype data structure to be deallocated Operation: This routine deallocates the memory consumed by the specified prototype and modifies the corresponding cluster so that it is no longer marked as a prototype. The cluster is NOT deallocated by this routine. Return: None Exceptions: None History: 5/30/89, DSJ, Created.
Definition at line 575 of file cluster.cpp.
{ //PROTOTYPE *Prototype) PROTOTYPE *Prototype = (PROTOTYPE *) arg; // unmark the corresponding cluster (if there is one if (Prototype->Cluster != NULL) Prototype->Cluster->Prototype = FALSE; // deallocate the prototype statistics and then the prototype itself if (Prototype->Distrib != NULL) memfree (Prototype->Distrib); if (Prototype->Mean != NULL) memfree (Prototype->Mean); if (Prototype->Style != spherical) { if (Prototype->Variance.Elliptical != NULL) memfree (Prototype->Variance.Elliptical); if (Prototype->Magnitude.Elliptical != NULL) memfree (Prototype->Magnitude.Elliptical); if (Prototype->Weight.Elliptical != NULL) memfree (Prototype->Weight.Elliptical); } memfree(Prototype); } // FreePrototype
CLUSTERER* MakeClusterer | ( | inT16 | SampleSize, |
const PARAM_DESC | ParamDesc[] | ||
) |
MakeClusterer ********************************************************** Parameters: SampleSize number of dimensions in feature space ParamDesc description of each dimension Operation: This routine creates a new clusterer data structure, initializes it, and returns a pointer to it. Return: pointer to the new clusterer data structure Exceptions: None History: 5/29/89, DSJ, Created.
Definition at line 395 of file cluster.cpp.
{ CLUSTERER *Clusterer; int i; // allocate main clusterer data structure and init simple fields Clusterer = (CLUSTERER *) Emalloc (sizeof (CLUSTERER)); Clusterer->SampleSize = SampleSize; Clusterer->NumberOfSamples = 0; Clusterer->NumChar = 0; // init fields which will not be used initially Clusterer->Root = NULL; Clusterer->ProtoList = NIL_LIST; // maintain a copy of param descriptors in the clusterer data structure Clusterer->ParamDesc = (PARAM_DESC *) Emalloc (SampleSize * sizeof (PARAM_DESC)); for (i = 0; i < SampleSize; i++) { Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular; Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential; Clusterer->ParamDesc[i].Min = ParamDesc[i].Min; Clusterer->ParamDesc[i].Max = ParamDesc[i].Max; Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2; Clusterer->ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; } // allocate a kd tree to hold the samples Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc); // Initialize cache of histogram buckets to minimize recomputing them. for (int d = 0; d < DISTRIBUTION_COUNT; ++d) { for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c) Clusterer->bucket_cache[d][c] = NULL; } return Clusterer; } // MakeClusterer
MakeSample *********************************************************** Parameters: Clusterer clusterer data structure to add sample to Feature feature to be added to clusterer CharID unique ident. of char that sample came from Operation: This routine creates a new sample data structure to hold the specified feature. This sample is added to the clusterer data structure (so that it knows which samples are to be clustered later), and a pointer to the sample is returned to the caller. Return: Pointer to the new sample data structure Exceptions: ALREADYCLUSTERED MakeSample can't be called after ClusterSamples has been called History: 5/29/89, DSJ, Created.
Definition at line 450 of file cluster.cpp.
{ SAMPLE *Sample; int i; // see if the samples have already been clustered - if so trap an error if (Clusterer->Root != NULL) DoError (ALREADYCLUSTERED, "Can't add samples after they have been clustered"); // allocate the new sample and initialize it Sample = (SAMPLE *) Emalloc (sizeof (SAMPLE) + (Clusterer->SampleSize - 1) * sizeof (FLOAT32)); Sample->Clustered = FALSE; Sample->Prototype = FALSE; Sample->SampleCount = 1; Sample->Left = NULL; Sample->Right = NULL; Sample->CharID = CharID; for (i = 0; i < Clusterer->SampleSize; i++) Sample->Mean[i] = Feature[i]; // add the sample to the KD tree - keep track of the total # of samples Clusterer->NumberOfSamples++; KDStore (Clusterer->KDTree, Sample->Mean, (char *) Sample); if (CharID >= Clusterer->NumChar) Clusterer->NumChar = CharID + 1; // execute hook for monitoring clustering operation // (*SampleCreationHook)( Sample ); return (Sample); } // MakeSample
Mean *********************************************************** Parameters: Proto prototype to return mean of Dimension dimension whose mean is to be returned Operation: This routine returns the mean of the specified prototype in the indicated dimension. Return: Mean of Prototype in Dimension Exceptions: none History: 7/6/89, DSJ, Created.
Definition at line 639 of file cluster.cpp.
{ return (Proto->Mean[Dimension]); } // Mean
inT32 MergeClusters | ( | inT16 | N, |
PARAM_DESC | ParamDesc[], | ||
inT32 | n1, | ||
inT32 | n2, | ||
FLOAT32 | m[], | ||
FLOAT32 | m1[], | ||
FLOAT32 | m2[] | ||
) |
MergeClusters ************************************************************ Parameters: N # of dimensions (size of arrays) ParamDesc array of dimension descriptions n1, n2 number of samples in each old cluster m array to hold mean of new cluster m1, m2 arrays containing means of old clusters Operation: This routine merges two clusters into one larger cluster. To do this it computes the number of samples in the new cluster and the mean of the new cluster. The ParamDesc information is used to ensure that circular dimensions are handled correctly. Return: The number of samples in the new cluster. Exceptions: None History: 5/31/89, DSJ, Created.
Definition at line 882 of file cluster.cpp.
{ inT32 i, n; n = n1 + n2; for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) { if (ParamDesc->Circular) { // if distance between means is greater than allowed // reduce upper point by one "rotation" to compute mean // then normalize the mean back into the accepted range if ((*m2 - *m1) > ParamDesc->HalfRange) { *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n; if (*m < ParamDesc->Min) *m += ParamDesc->Range; } else if ((*m1 - *m2) > ParamDesc->HalfRange) { *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n; if (*m < ParamDesc->Min) *m += ParamDesc->Range; } else *m = (n1 * *m1 + n2 * *m2) / n; } else *m = (n1 * *m1 + n2 * *m2) / n; } return n; } // MergeClusters
NextSample ************************************************************ Parameters: SearchState ptr to list containing clusters to be searched Operation: This routine is used to find all of the samples which belong to a cluster. It starts by removing the top cluster on the cluster list (SearchState). If this cluster is a leaf it is returned. Otherwise, the right subcluster is pushed on the list and we continue the search in the left subcluster. This continues until a leaf is found. If all samples have been found, NULL is returned. InitSampleSearch() must be called before NextSample() to initialize the search. Return: Pointer to the next leaf cluster (sample) or NULL. Exceptions: None History: 6/16/89, DSJ, Created.
Definition at line 614 of file cluster.cpp.
{ CLUSTER *Cluster; if (*SearchState == NIL_LIST) return (NULL); Cluster = (CLUSTER *) first_node (*SearchState); *SearchState = pop (*SearchState); while (TRUE) { if (Cluster->Left == NULL) return (Cluster); *SearchState = push (*SearchState, Cluster->Right); Cluster = Cluster->Left; } } // NextSample
StandardDeviation ************************************************* Parameters: Proto prototype to return standard deviation of Dimension dimension whose stddev is to be returned Operation: This routine returns the standard deviation of the prototype in the indicated dimension. Return: Standard deviation of Prototype in Dimension Exceptions: none History: 7/6/89, DSJ, Created.
Definition at line 653 of file cluster.cpp.
{ switch (Proto->Style) { case spherical: return ((FLOAT32) sqrt ((double) Proto->Variance.Spherical)); case elliptical: return ((FLOAT32) sqrt ((double) Proto->Variance.Elliptical[Dimension])); case mixed: switch (Proto->Distrib[Dimension]) { case normal: return ((FLOAT32) sqrt ((double) Proto->Variance.Elliptical[Dimension])); case uniform: case D_random: return (Proto->Variance.Elliptical[Dimension]); case DISTRIBUTION_COUNT: ASSERT_HOST(!"Distribution count not allowed!"); } } return 0.0f; } // StandardDeviation