Tesseract  3.02
tesseract-ocr/cube/word_altlist.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        word_altlist.cpp
00003  * Description: Implementation of the Word Alternate List Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "word_altlist.h"
00021 
00022 namespace tesseract {
00023 WordAltList::WordAltList(int max_alt)
00024     : AltList(max_alt) {
00025   word_alt_ = NULL;
00026 }
00027 
00028 WordAltList::~WordAltList() {
00029   if (word_alt_ != NULL) {
00030     for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00031       if (word_alt_[alt_idx] != NULL) {
00032         delete []word_alt_[alt_idx];
00033       }
00034     }
00035     delete []word_alt_;
00036     word_alt_ = NULL;
00037   }
00038 }
00039 
00040 // insert an alternate word with the specified cost and tag
00041 bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
00042   if (word_alt_ == NULL || alt_cost_ == NULL) {
00043     word_alt_ = new char_32*[max_alt_];
00044     alt_cost_ = new int[max_alt_];
00045     alt_tag_ = new void *[max_alt_];
00046 
00047     if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) {
00048       return false;
00049     }
00050 
00051     memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
00052   } else {
00053     // check if alt already exists
00054     for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00055       if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
00056         // update the cost if we have a lower one
00057         if (cost < alt_cost_[alt_idx]) {
00058           alt_cost_[alt_idx] = cost;
00059           alt_tag_[alt_idx] = tag;
00060         }
00061         return true;
00062       }
00063     }
00064   }
00065 
00066   // determine length of alternate
00067   int len = CubeUtils::StrLen(word_str);
00068 
00069   word_alt_[alt_cnt_] = new char_32[len + 1];
00070   if (word_alt_[alt_cnt_] == NULL) {
00071     return false;
00072   }
00073 
00074   if (len > 0) {
00075     memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
00076   }
00077 
00078   word_alt_[alt_cnt_][len] = 0;
00079   alt_cost_[alt_cnt_] = cost;
00080   alt_tag_[alt_cnt_] = tag;
00081 
00082   alt_cnt_++;
00083 
00084   return true;
00085 }
00086 
00087 // sort the alternate in descending order based on the cost
00088 void WordAltList::Sort() {
00089   for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00090     for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
00091       if (alt_cost_[alt_idx] > alt_cost_[alt]) {
00092         char_32 *pchTemp = word_alt_[alt_idx];
00093         word_alt_[alt_idx] = word_alt_[alt];
00094         word_alt_[alt] = pchTemp;
00095 
00096         int temp = alt_cost_[alt_idx];
00097         alt_cost_[alt_idx] = alt_cost_[alt];
00098         alt_cost_[alt] = temp;
00099 
00100         void *tag = alt_tag_[alt_idx];
00101         alt_tag_[alt_idx] = alt_tag_[alt];
00102         alt_tag_[alt] = tag;
00103       }
00104     }
00105   }
00106 }
00107 
00108 void WordAltList::PrintDebug() {
00109   for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00110     char_32 *word_32 = word_alt_[alt_idx];
00111     string word_str;
00112     CubeUtils::UTF32ToUTF8(word_32, &word_str);
00113     int num_unichars = CubeUtils::StrLen(word_32);
00114     fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
00115             word_str.c_str(), alt_cost_[alt_idx], num_unichars);
00116     for (int i = 0; i < num_unichars; ++i)
00117       fprintf(stderr, "%d ", word_32[i]);
00118     fprintf(stderr, "\n");
00119   }
00120 }
00121 }  // namespace tesseract