Tesseract  3.02
tesseract-ocr/classify/kdtree.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **  Filename:  kdtree.cpp
00003  **  Purpose:   Routines for managing K-D search trees
00004  **  Author:    Dan Johnson
00005  **  History:  3/10/89, DSJ, Created.
00006  **      5/23/89, DSJ, Added circular feature capability.
00007  **      7/13/89, DSJ, Made tree nodes invisible to outside.
00008  **
00009  **  (c) Copyright Hewlett-Packard Company, 1988.
00010  ** Licensed under the Apache License, Version 2.0 (the "License");
00011  ** you may not use this file except in compliance with the License.
00012  ** You may obtain a copy of the License at
00013  ** http://www.apache.org/licenses/LICENSE-2.0
00014  ** Unless required by applicable law or agreed to in writing, software
00015  ** distributed under the License is distributed on an "AS IS" BASIS,
00016  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00017  ** See the License for the specific language governing permissions and
00018  ** limitations under the License.
00019  ******************************************************************************/
00020 
00021 /*-----------------------------------------------------------------------------
00022           Include Files and Type Defines
00023 -----------------------------------------------------------------------------*/
00024 #include "kdtree.h"
00025 #include "const.h"
00026 #include "emalloc.h"
00027 #include "freelist.h"
00028 #include <stdio.h>
00029 #include <math.h>
00030 
00031 #define Magnitude(X)    ((X) < 0 ? -(X) : (X))
00032 #define NodeFound(N,K,D)  (( (N)->Key == (K) ) && ( (N)->Data == (D) ))
00033 
00034 /*-----------------------------------------------------------------------------
00035         Global Data Definitions and Declarations
00036 -----------------------------------------------------------------------------*/
00037 #define MINSEARCH -MAX_FLOAT32
00038 #define MAXSEARCH MAX_FLOAT32
00039 
00040 // Helper function to find the next essential dimension in a cycle.
00041 static int NextLevel(KDTREE *tree, int level) {
00042   do {
00043     ++level;
00044     if (level >= tree->KeySize)
00045       level = 0;
00046   } while (tree->KeyDesc[level].NonEssential);
00047   return level;
00048 }
00049 
00050 //-----------------------------------------------------------------------------
00051 // Store the k smallest-keyed key-value pairs.
00052 template<typename Key, typename Value>
00053 class MinK {
00054  public:
00055   MinK(Key max_key, int k);
00056   ~MinK();
00057 
00058   struct Element {
00059     Element() {}
00060     Element(const Key& k, const Value& v) : key(k), value(v) {}
00061 
00062     Key key;
00063     Value value;
00064   };
00065 
00066   bool insert(Key k, Value v);
00067   const Key& max_insertable_key();
00068 
00069   int elements_count() { return elements_count_; }
00070   const Element* elements() { return elements_; }
00071 
00072  private:
00073   const Key max_key_;  // the maximum possible Key
00074   Element* elements_;  // unsorted array of elements
00075   int elements_count_;  // the number of results collected so far
00076   int k_;  // the number of results we want from the search
00077   int max_index_;  // the index of the result with the largest key
00078 };
00079 
00080 template<typename Key, typename Value>
00081 MinK<Key, Value>::MinK(Key max_key, int k) :
00082   max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
00083   elements_ = new Element[k_];
00084 }
00085 
00086 template<typename Key, typename Value>
00087 MinK<Key, Value>::~MinK() {
00088   delete []elements_;
00089 }
00090 
00091 template<typename Key, typename Value>
00092 const Key& MinK<Key, Value>::max_insertable_key() {
00093   if (elements_count_ < k_)
00094     return max_key_;
00095   return elements_[max_index_].key;
00096 }
00097 
00098 template<typename Key, typename Value>
00099 bool MinK<Key, Value>::insert(Key key, Value value) {
00100   if (elements_count_ < k_) {
00101     elements_[elements_count_++] = Element(key, value);
00102     if (key > elements_[max_index_].key)
00103       max_index_ = elements_count_ - 1;
00104     return true;
00105   } else if (key < elements_[max_index_].key) {
00106     // evict the largest element.
00107     elements_[max_index_] = Element(key, value);
00108     // recompute max_index_
00109     for (int i = 0; i < elements_count_; i++) {
00110       if (elements_[i].key > elements_[max_index_].key)
00111         max_index_ = i;
00112     }
00113     return true;
00114   }
00115   return false;
00116 }
00117 
00118 
00119 //-----------------------------------------------------------------------------
00120 // Helper class for searching for the k closest points to query_point in tree.
00121 class KDTreeSearch {
00122  public:
00123   KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest);
00124   ~KDTreeSearch();
00125 
00126   // Return the k nearest points' data.
00127   void Search(int *result_count, FLOAT32 *distances, void **results);
00128 
00129  private:
00130   void SearchRec(int Level, KDNODE *SubTree);
00131   bool BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper);
00132 
00133   KDTREE *tree_;
00134   FLOAT32 *query_point_;
00135   MinK<FLOAT32, void *>* results_;
00136   FLOAT32 *sb_min_;  // search box minimum
00137   FLOAT32 *sb_max_;  // search box maximum
00138 };
00139 
00140 KDTreeSearch::KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest) :
00141     tree_(tree),
00142     query_point_(query_point) {
00143   results_ = new MinK<FLOAT32, void *>(MAXSEARCH, k_closest);
00144   sb_min_ = new FLOAT32[tree->KeySize];
00145   sb_max_ = new FLOAT32[tree->KeySize];
00146 }
00147 
00148 KDTreeSearch::~KDTreeSearch() {
00149   delete results_;
00150   delete[] sb_min_;
00151   delete[] sb_max_;
00152 }
00153 
00154 // Locate the k_closest points to query_point_, and return their distances and
00155 // data into the given buffers.
00156 void KDTreeSearch::Search(int *result_count,
00157                           FLOAT32 *distances,
00158                           void **results) {
00159   if (tree_->Root.Left == NULL) {
00160     *result_count = 0;
00161   } else {
00162     for (int i = 0; i < tree_->KeySize; i++) {
00163       sb_min_[i] = tree_->KeyDesc[i].Min;
00164       sb_max_[i] = tree_->KeyDesc[i].Max;
00165     }
00166     SearchRec(0, tree_->Root.Left);
00167     int count = results_->elements_count();
00168     *result_count = count;
00169     for (int j = 0; j < count; j++) {
00170       distances[j] = (FLOAT32) sqrt((FLOAT64)results_->elements()[j].key);
00171       results[j] = results_->elements()[j].value;
00172     }
00173   }
00174 }
00175 
00176 /*-----------------------------------------------------------------------------
00177               Public Code
00178 -----------------------------------------------------------------------------*/
00179 /*---------------------------------------------------------------------------*/
00184 KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]) {
00185   KDTREE *KDTree = (KDTREE *) Emalloc(
00186       sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC));
00187   for (int i = 0; i < KeySize; i++) {
00188     KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
00189     KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
00190     if (KeyDesc[i].Circular) {
00191       KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
00192       KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
00193       KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
00194       KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
00195       KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
00196     } else {
00197       KDTree->KeyDesc[i].Min = MINSEARCH;
00198       KDTree->KeyDesc[i].Max = MAXSEARCH;
00199     }
00200   }
00201   KDTree->KeySize = KeySize;
00202   KDTree->Root.Left = NULL;
00203   KDTree->Root.Right = NULL;
00204   return KDTree;
00205 }
00206 
00207 
00208 /*---------------------------------------------------------------------------*/
00209 void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
00222   int Level;
00223   KDNODE *Node;
00224   KDNODE **PtrToNode;
00225 
00226   PtrToNode = &(Tree->Root.Left);
00227   Node = *PtrToNode;
00228   Level = NextLevel(Tree, -1);
00229   while (Node != NULL) {
00230     if (Key[Level] < Node->BranchPoint) {
00231       PtrToNode = &(Node->Left);
00232       if (Key[Level] > Node->LeftBranch)
00233         Node->LeftBranch = Key[Level];
00234     }
00235     else {
00236       PtrToNode = &(Node->Right);
00237       if (Key[Level] < Node->RightBranch)
00238         Node->RightBranch = Key[Level];
00239     }
00240     Level = NextLevel(Tree, Level);
00241     Node = *PtrToNode;
00242   }
00243 
00244   *PtrToNode = MakeKDNode(Tree, Key, (void *) Data, Level);
00245 }                                /* KDStore */
00246 
00247 
00248 /*---------------------------------------------------------------------------*/
00268 void
00269 KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
00270   int Level;
00271   KDNODE *Current;
00272   KDNODE *Father;
00273 
00274   /* initialize search at root of tree */
00275   Father = &(Tree->Root);
00276   Current = Father->Left;
00277   Level = NextLevel(Tree, -1);
00278 
00279   /* search tree for node to be deleted */
00280   while ((Current != NULL) && (!NodeFound (Current, Key, Data))) {
00281     Father = Current;
00282     if (Key[Level] < Current->BranchPoint)
00283       Current = Current->Left;
00284     else
00285       Current = Current->Right;
00286 
00287     Level = NextLevel(Tree, Level);
00288   }
00289 
00290   if (Current != NULL) {         /* if node to be deleted was found */
00291     if (Current == Father->Left) {
00292       Father->Left = NULL;
00293       Father->LeftBranch = Tree->KeyDesc[Level].Min;
00294     } else {
00295       Father->Right = NULL;
00296       Father->RightBranch = Tree->KeyDesc[Level].Max;
00297     }
00298 
00299     InsertNodes(Tree, Current->Left);
00300     InsertNodes(Tree, Current->Right);
00301     FreeSubTree(Current);
00302   }
00303 }                                /* KDDelete */
00304 
00305 
00306 /*---------------------------------------------------------------------------*/
00307 void KDNearestNeighborSearch(
00308     KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance,
00309     int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]) {
00310 /*
00311  **  Parameters:
00312  **    Tree    ptr to K-D tree to be searched
00313  **    Query    ptr to query key (point in D-space)
00314  **    QuerySize  number of nearest neighbors to be found
00315  **    MaxDistance  all neighbors must be within this distance
00316  **    NBuffer    ptr to QuerySize buffer to hold nearest neighbors
00317  **    DBuffer    ptr to QuerySize buffer to hold distances
00318  **          from nearest neighbor to query point
00319  **  Operation:
00320  **    This routine searches the K-D tree specified by Tree and
00321  **    finds the QuerySize nearest neighbors of Query.  All neighbors
00322  **    must be within MaxDistance of Query.  The data contents of
00323  **    the nearest neighbors
00324  **    are placed in NBuffer and their distances from Query are
00325  **    placed in DBuffer.
00326  **  Return: Number of nearest neighbors actually found
00327  **  Exceptions: none
00328  **  History:
00329  **    3/10/89, DSJ, Created.
00330  **    7/13/89, DSJ, Return contents of node instead of node itself.
00331  */
00332   KDTreeSearch search(Tree, Query, QuerySize);
00333   search.Search(NumberOfResults, DBuffer, NBuffer);
00334 }
00335 
00336 
00337 /*---------------------------------------------------------------------------*/
00338 // Walk a given Tree with action.
00339 void KDWalk(KDTREE *Tree, void_proc action, void *context) {
00340   if (Tree->Root.Left != NULL)
00341     Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
00342 }
00343 
00344 
00345 /*---------------------------------------------------------------------------*/
00346 void FreeKDTree(KDTREE *Tree) {
00347 /*
00348  **  Parameters:
00349  **    Tree  tree data structure to be released
00350  **  Operation:
00351  **    This routine frees all memory which is allocated to the
00352  **    specified KD-tree.  This includes the data structure for
00353  **    the kd-tree itself plus the data structures for each node
00354  **    in the tree.  It does not include the Key and Data items
00355  **    which are pointed to by the nodes.  This memory is left
00356  **    untouched.
00357  **  Return: none
00358  **  Exceptions: none
00359  **  History:
00360  **    5/26/89, DSJ, Created.
00361  */
00362   FreeSubTree(Tree->Root.Left);
00363   memfree(Tree);
00364 }                                /* FreeKDTree */
00365 
00366 
00367 /*-----------------------------------------------------------------------------
00368               Private Code
00369 -----------------------------------------------------------------------------*/
00370 /*---------------------------------------------------------------------------*/
00371 KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) {
00372 /*
00373  **  Parameters:
00374  **      tree  The tree to create the node for
00375  **      Key  Access key for new node in KD tree
00376  **      Data  ptr to data to be stored in new node
00377  **      Index  index of Key to branch on
00378  **  Operation:
00379  **    This routine allocates memory for a new K-D tree node
00380  **    and places the specified Key and Data into it.  The
00381  **    left and right subtree pointers for the node are
00382  **    initialized to empty subtrees.
00383  **  Return:
00384  **    pointer to new K-D tree node
00385  **  Exceptions:
00386  **    None
00387  **  History:
00388  **    3/11/89, DSJ, Created.
00389  */
00390   KDNODE *NewNode;
00391 
00392   NewNode = (KDNODE *) Emalloc (sizeof (KDNODE));
00393 
00394   NewNode->Key = Key;
00395   NewNode->Data = Data;
00396   NewNode->BranchPoint = Key[Index];
00397   NewNode->LeftBranch = tree->KeyDesc[Index].Min;
00398   NewNode->RightBranch = tree->KeyDesc[Index].Max;
00399   NewNode->Left = NULL;
00400   NewNode->Right = NULL;
00401 
00402   return NewNode;
00403 }                                /* MakeKDNode */
00404 
00405 
00406 /*---------------------------------------------------------------------------*/
00407 void FreeKDNode(KDNODE *Node) {
00408   memfree ((char *)Node);
00409 }
00410 
00411 
00412 /*---------------------------------------------------------------------------*/
00413 // Recursively accumulate the k_closest points to query_point_ into results_.
00414 //  Parameters:
00415 //      Level  level in tree of sub-tree to be searched
00416 //      SubTree  sub-tree to be searched
00417 void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
00418   if (level >= tree_->KeySize)
00419     level = 0;
00420 
00421   if (!BoxIntersectsSearch(sb_min_, sb_max_))
00422     return;
00423 
00424   results_->insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc,
00425                                   query_point_, sub_tree->Key),
00426                    sub_tree->Data);
00427 
00428   if (query_point_[level] < sub_tree->BranchPoint) {
00429     if (sub_tree->Left != NULL) {
00430       FLOAT32 tmp = sb_max_[level];
00431       sb_max_[level] = sub_tree->LeftBranch;
00432       SearchRec(NextLevel(tree_, level), sub_tree->Left);
00433       sb_max_[level] = tmp;
00434     }
00435     if (sub_tree->Right != NULL) {
00436       FLOAT32 tmp = sb_min_[level];
00437       sb_min_[level] = sub_tree->RightBranch;
00438       SearchRec(NextLevel(tree_, level), sub_tree->Right);
00439       sb_min_[level] = tmp;
00440     }
00441   } else {
00442     if (sub_tree->Right != NULL) {
00443       FLOAT32 tmp = sb_min_[level];
00444       sb_min_[level] = sub_tree->RightBranch;
00445       SearchRec(NextLevel(tree_, level), sub_tree->Right);
00446       sb_min_[level] = tmp;
00447     }
00448     if (sub_tree->Left != NULL) {
00449       FLOAT32 tmp = sb_max_[level];
00450       sb_max_[level] = sub_tree->LeftBranch;
00451       SearchRec(NextLevel(tree_, level), sub_tree->Left);
00452       sb_max_[level] = tmp;
00453     }
00454   }
00455 }
00456 
00457 
00458 /*---------------------------------------------------------------------------*/
00459 // Returns the Euclidean distance squared between p1 and p2 for all essential
00460 // dimensions.
00461 //   Parameters:
00462 //       k      keys are in k-space
00463 //       dim    dimension descriptions (essential, circular, etc)
00464 //       p1,p2  two different points in K-D space
00465 FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
00466   FLOAT32 total_distance = 0;
00467 
00468   for (; k > 0; k--, p1++, p2++, dim++) {
00469     if (dim->NonEssential)
00470       continue;
00471 
00472     FLOAT32 dimension_distance = *p1 - *p2;
00473 
00474     /* if this dimension is circular - check wraparound distance */
00475     if (dim->Circular) {
00476       dimension_distance = Magnitude(dimension_distance);
00477       FLOAT32 wrap_distance = dim->Max - dim->Min - dimension_distance;
00478       dimension_distance = MIN(dimension_distance, wrap_distance);
00479     }
00480 
00481     total_distance += dimension_distance * dimension_distance;
00482   }
00483   return total_distance;
00484 }
00485 
00486 FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
00487   return sqrt(DistanceSquared(k, dim, p1, p2));
00488 }
00489 
00490 /*---------------------------------------------------------------------------*/
00491 // Return whether the query region (the smallest known circle about
00492 // query_point_ containing results->k_ points) intersects the box specified
00493 // between lower and upper.  For circular dimensions, we also check the point
00494 // one wrap distance away from the query.
00495 bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
00496   FLOAT32 *query = query_point_;
00497   FLOAT64 total_distance = 0.0;
00498   FLOAT64 radius_squared =
00499       results_->max_insertable_key() * results_->max_insertable_key();
00500   PARAM_DESC *dim = tree_->KeyDesc;
00501 
00502   for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
00503     if (dim->NonEssential)
00504       continue;
00505 
00506     FLOAT32 dimension_distance;
00507     if (*query < *lower)
00508       dimension_distance = *lower - *query;
00509     else if (*query > *upper)
00510       dimension_distance = *query - *upper;
00511     else
00512       dimension_distance = 0;
00513 
00514     /* if this dimension is circular - check wraparound distance */
00515     if (dim->Circular) {
00516       FLOAT32 wrap_distance = MAX_FLOAT32;
00517       if (*query < *lower)
00518         wrap_distance = *query + dim->Max - dim->Min - *upper;
00519       else if (*query > *upper)
00520         wrap_distance = *lower - (*query - (dim->Max - dim->Min));
00521       dimension_distance = MIN(dimension_distance, wrap_distance);
00522     }
00523 
00524     total_distance += dimension_distance * dimension_distance;
00525     if (total_distance >= radius_squared)
00526       return FALSE;
00527   }
00528   return TRUE;
00529 }
00530 
00531 
00532 /*---------------------------------------------------------------------------*/
00533 //  Walk a tree, calling action once on each node.
00534 //
00535 //  Parameters:
00536 //      tree  root of the tree being walked.
00537 //      action  action to be performed at every node
00538 //      context  action's context
00539 //      sub_tree  ptr to root of subtree to be walked
00540 //      level  current level in the tree for this node
00541 //  Operation:
00542 //      This routine walks thru the specified sub_tree and invokes action
00543 //      action at each node as follows:
00544 //        action(context, data, level)
00545 //      data  the data contents of the node being visited,
00546 //      level is the level of the node in the tree with the root being level 0.
00547 void Walk(KDTREE *tree, void_proc action, void *context,
00548           KDNODE *sub_tree, inT32 level) {
00549   (*action)(context, sub_tree->Data, level);
00550   if (sub_tree->Left != NULL)
00551     Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
00552   if (sub_tree->Right != NULL)
00553     Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
00554 }
00555 
00556 
00557 // Given a subtree nodes, insert all of its elements into tree.
00558 void InsertNodes(KDTREE *tree, KDNODE *nodes) {
00559   if (nodes == NULL)
00560     return;
00561 
00562   KDStore(tree, nodes->Key, nodes->Data);
00563   InsertNodes(tree, nodes->Left);
00564   InsertNodes(tree, nodes->Right);
00565 }
00566 
00567 // Free all of the nodes of a sub tree.
00568 void FreeSubTree(KDNODE *sub_tree) {
00569   if (sub_tree != NULL) {
00570     FreeSubTree(sub_tree->Left);
00571     FreeSubTree(sub_tree->Right);
00572     memfree(sub_tree);
00573   }
00574 }                                /* FreeSubTree */