Tesseract  3.02
tesseract-ocr/wordrec/matchtab.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        matchtab.c  (Formerly matchtab.c)
00005  * Description:  Match table to retain blobs that were matched.
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Mon Jan 29 09:00:56 1990
00008  * Modified:     Tue Mar 19 15:09:06 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Experimental (Do Not Distribute)
00012  *
00013  * (c) Copyright 1990, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 #include "matchtab.h"
00026 
00027 #include "blobs.h"
00028 #include "callcpp.h"
00029 #include "elst.h"
00030 #include "freelist.h"
00031 #include "helpers.h"
00032 #include "ratngs.h"
00033 
00034 #define NUM_MATCH_ENTRIES 500    /* Entries in match_table */
00035 
00036 namespace tesseract {
00037 
00038 BlobMatchTable::BlobMatchTable()
00039   : been_initialized_(false), match_table_(NULL) {
00040   init_match_table();
00041 }
00042 
00043 BlobMatchTable::~BlobMatchTable() {
00044   end_match_table();
00045 }
00046 
00047 /**********************************************************************
00048  * init_match_table
00049  *
00050  * Create and clear a match table to be used to speed up the splitter.
00051  **********************************************************************/
00052 void BlobMatchTable::init_match_table() {
00053   if (been_initialized_) {
00054     /* Reclaim old choices */
00055     for (int x = 0; x < NUM_MATCH_ENTRIES; x++) {
00056       if (!IsEmpty(x)) {
00057         match_table_[x].rating->clear();
00058         delete match_table_[x].rating;
00059         // Reinitialize the entry.
00060         match_table_[x].box = TBOX();
00061         match_table_[x].rating = NULL;
00062       }
00063     }
00064   } else {
00065     /* Allocate memory once */
00066     match_table_ = new MATCH[NUM_MATCH_ENTRIES];
00067     been_initialized_ = true;
00068   }
00069 }
00070 
00071 void BlobMatchTable::end_match_table() {
00072   if (been_initialized_) {
00073     init_match_table();
00074     delete[] match_table_;
00075     match_table_ = NULL;
00076     been_initialized_ = false;
00077   }
00078 }
00079 
00080 
00081 /**********************************************************************
00082  * put_match
00083  *
00084  * Put a new blob and its corresponding match ratings into the match
00085  * table.
00086  **********************************************************************/
00087 void BlobMatchTable::put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) {
00088   if (!blob) return;
00089   /* Hash into table */
00090   TBOX bbox(blob->bounding_box());
00091   int start = Hash(bbox);
00092 
00093   /* Look for empty */
00094   int x = start;
00095   do {
00096     if (IsEmpty(x)) {
00097       /* Add this entry */
00098       match_table_[x].box = bbox;
00099       // Copy ratings to match_table_[x].rating
00100       match_table_[x].rating = new BLOB_CHOICE_LIST();
00101       match_table_[x].rating->deep_copy(ratings, &BLOB_CHOICE::deep_copy);
00102       return;
00103     }
00104     if (++x >= NUM_MATCH_ENTRIES)
00105       x = 0;
00106   } while (x != start);
00107 
00108   cprintf ("error: Match table is full\n");
00109 }
00110 
00111 
00112 /**********************************************************************
00113  * get_match
00114  *
00115  * Look up this blob in the match table to see if it needs to be
00116  * matched.  If it is not present then NULL is returned.
00117  **********************************************************************/
00118 BLOB_CHOICE_LIST *BlobMatchTable::get_match(TBLOB *blob) {
00119   return get_match_by_box(blob->bounding_box());
00120 }
00121 
00122 /**********************************************************************
00123  * Hash
00124  *
00125  * The hash function we use to translate a bounding box to a starting
00126  * hash position in our array.
00127  **********************************************************************/
00128 int BlobMatchTable::Hash(const TBOX &box) const {
00129   int topleft = (box.top() << 16) + box.left();
00130   int botright = (box.bottom() << 16) + box.right();
00131   return Modulo(topleft + botright, NUM_MATCH_ENTRIES);
00132 }
00133 
00134 /**********************************************************************
00135  * IsEmpty
00136  *
00137  * Returns whether the idx entry in the array is still empty.
00138  **********************************************************************/
00139 bool BlobMatchTable::IsEmpty(int idx) const {
00140   return TBOX() == match_table_[idx].box &&
00141       NULL == match_table_[idx].rating;
00142 }
00143 
00144 /**********************************************************************
00145  * get_match_by_box
00146  *
00147  * Look up this blob in the match table to see if it needs to be
00148  * matched.  If it is not present then NULL is returned.
00149  **********************************************************************/
00150 BLOB_CHOICE_LIST *BlobMatchTable::get_match_by_box(const TBOX &box) {
00151   int start = Hash(box);
00152   int x = start;
00153   /* Search for match */
00154   do {
00155     /* Not found when blank */
00156     if (IsEmpty(x))
00157       break;
00158     /* Is this the match ? */
00159     if (match_table_[x].box == box) {
00160       BLOB_CHOICE_LIST *blist = new BLOB_CHOICE_LIST();
00161       blist->deep_copy(match_table_[x].rating, &BLOB_CHOICE::deep_copy);
00162       return blist;
00163     }
00164     if (++x >= NUM_MATCH_ENTRIES)
00165       x = 0;
00166   } while (x != start);
00167   return NULL;
00168 }
00169 
00170 /**********************************************************************
00171  * add_to_match
00172  *
00173  * Update ratings list in the match_table corresponding to the given
00174  * blob. The function assumes that:
00175  * -- the match table contains the initial non-NULL list with choices
00176  *    for the given blob
00177  * -- the new ratings list is a superset of the corresponding list in
00178  *    the match_table and the unichar ids of the blob choices in the
00179  *    list are unique.
00180  * The entries that appear in the new ratings list and not in the
00181  * old one are added to the old ratings list in the match_table.
00182  **********************************************************************/
00183 void BlobMatchTable::add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) {
00184   TBOX bbox = blob->bounding_box();
00185   int start = Hash(bbox);
00186   int x = start;
00187   do {
00188     if (IsEmpty(x)) {
00189       fprintf(stderr, "Can not update uninitialized entry in match_table\n");
00190       ASSERT_HOST(!IsEmpty(x));
00191     }
00192     if (match_table_[x].box == bbox) {
00193       // Copy new ratings to match_table_[x].rating.
00194       BLOB_CHOICE_IT it;
00195       it.set_to_list(match_table_[x].rating);
00196       BLOB_CHOICE_IT new_it;
00197       new_it.set_to_list(ratings);
00198       assert(it.length() <= new_it.length());
00199       for (it.mark_cycle_pt(), new_it.mark_cycle_pt();
00200            !it.cycled_list() && !new_it.cycled_list(); new_it.forward()) {
00201         if (it.data()->unichar_id() == new_it.data()->unichar_id()) {
00202           it.forward();
00203         } else {
00204           it.add_before_stay_put(new BLOB_CHOICE(*(new_it.data())));
00205         }
00206       }
00207       return;
00208     }
00209     if (++x >= NUM_MATCH_ENTRIES)
00210       x = 0;
00211   } while (x != start);
00212 }
00213 
00214 }  // namespace tesseract