Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: word_altlist.cpp 00003 * Description: Implementation of the Word Alternate List Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "word_altlist.h" 00021 00022 namespace tesseract { 00023 WordAltList::WordAltList(int max_alt) 00024 : AltList(max_alt) { 00025 word_alt_ = NULL; 00026 } 00027 00028 WordAltList::~WordAltList() { 00029 if (word_alt_ != NULL) { 00030 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00031 if (word_alt_[alt_idx] != NULL) { 00032 delete []word_alt_[alt_idx]; 00033 } 00034 } 00035 delete []word_alt_; 00036 word_alt_ = NULL; 00037 } 00038 } 00039 00040 // insert an alternate word with the specified cost and tag 00041 bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) { 00042 if (word_alt_ == NULL || alt_cost_ == NULL) { 00043 word_alt_ = new char_32*[max_alt_]; 00044 alt_cost_ = new int[max_alt_]; 00045 alt_tag_ = new void *[max_alt_]; 00046 00047 if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) { 00048 return false; 00049 } 00050 00051 memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_)); 00052 } else { 00053 // check if alt already exists 00054 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00055 if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) { 00056 // update the cost if we have a lower one 00057 if (cost < alt_cost_[alt_idx]) { 00058 alt_cost_[alt_idx] = cost; 00059 alt_tag_[alt_idx] = tag; 00060 } 00061 return true; 00062 } 00063 } 00064 } 00065 00066 // determine length of alternate 00067 int len = CubeUtils::StrLen(word_str); 00068 00069 word_alt_[alt_cnt_] = new char_32[len + 1]; 00070 if (word_alt_[alt_cnt_] == NULL) { 00071 return false; 00072 } 00073 00074 if (len > 0) { 00075 memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str)); 00076 } 00077 00078 word_alt_[alt_cnt_][len] = 0; 00079 alt_cost_[alt_cnt_] = cost; 00080 alt_tag_[alt_cnt_] = tag; 00081 00082 alt_cnt_++; 00083 00084 return true; 00085 } 00086 00087 // sort the alternate in descending order based on the cost 00088 void WordAltList::Sort() { 00089 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00090 for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) { 00091 if (alt_cost_[alt_idx] > alt_cost_[alt]) { 00092 char_32 *pchTemp = word_alt_[alt_idx]; 00093 word_alt_[alt_idx] = word_alt_[alt]; 00094 word_alt_[alt] = pchTemp; 00095 00096 int temp = alt_cost_[alt_idx]; 00097 alt_cost_[alt_idx] = alt_cost_[alt]; 00098 alt_cost_[alt] = temp; 00099 00100 void *tag = alt_tag_[alt_idx]; 00101 alt_tag_[alt_idx] = alt_tag_[alt]; 00102 alt_tag_[alt] = tag; 00103 } 00104 } 00105 } 00106 } 00107 00108 void WordAltList::PrintDebug() { 00109 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00110 char_32 *word_32 = word_alt_[alt_idx]; 00111 string word_str; 00112 CubeUtils::UTF32ToUTF8(word_32, &word_str); 00113 int num_unichars = CubeUtils::StrLen(word_32); 00114 fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx, 00115 word_str.c_str(), alt_cost_[alt_idx], num_unichars); 00116 for (int i = 0; i < num_unichars; ++i) 00117 fprintf(stderr, "%d ", word_32[i]); 00118 fprintf(stderr, "\n"); 00119 } 00120 } 00121 } // namespace tesseract