Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: search_column.cpp 00003 * Description: Implementation of the Beam Search Column Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "search_column.h" 00021 #include <stdlib.h> 00022 00023 namespace tesseract { 00024 00025 SearchColumn::SearchColumn(int col_idx, int max_node) { 00026 col_idx_ = col_idx; 00027 node_cnt_ = 0; 00028 node_array_ = NULL; 00029 max_node_cnt_ = max_node; 00030 node_hash_table_ = NULL; 00031 init_ = false; 00032 min_cost_ = INT_MAX; 00033 max_cost_ = 0; 00034 } 00035 00036 // Cleanup data 00037 void SearchColumn::Cleanup() { 00038 if (node_array_ != NULL) { 00039 for (int node_idx = 0; node_idx < node_cnt_; node_idx++) { 00040 if (node_array_[node_idx] != NULL) { 00041 delete node_array_[node_idx]; 00042 } 00043 } 00044 00045 delete []node_array_; 00046 node_array_ = NULL; 00047 } 00048 FreeHashTable(); 00049 init_ = false; 00050 } 00051 00052 SearchColumn::~SearchColumn() { 00053 Cleanup(); 00054 } 00055 00056 // Initializations 00057 bool SearchColumn::Init() { 00058 if (init_ == true) { 00059 return true; 00060 } 00061 00062 // create hash table 00063 if (node_hash_table_ == NULL) { 00064 node_hash_table_ = new SearchNodeHashTable(); 00065 if (node_hash_table_ == NULL) { 00066 return false; 00067 } 00068 } 00069 00070 init_ = true; 00071 00072 return true; 00073 } 00074 00075 // Prune the nodes if necessary. Pruning is done such that a max 00076 // number of nodes is kept, i.e., the beam width 00077 void SearchColumn::Prune() { 00078 // no need to prune 00079 if (node_cnt_ <= max_node_cnt_) { 00080 return; 00081 } 00082 00083 // compute the cost histogram 00084 memset(score_bins_, 0, sizeof(score_bins_)); 00085 int cost_range = max_cost_ - min_cost_ + 1; 00086 for (int node_idx = 0; node_idx < node_cnt_; node_idx++) { 00087 int cost_bin = static_cast<int>( 00088 ((node_array_[node_idx]->BestCost() - min_cost_) * 00089 kScoreBins) / static_cast<double>(cost_range)); 00090 if (cost_bin >= kScoreBins) { 00091 cost_bin = kScoreBins - 1; 00092 } 00093 score_bins_[cost_bin]++; 00094 } 00095 00096 // determine the pruning cost by scanning the cost histogram from 00097 // least to greatest cost bins and finding the cost at which the 00098 // max number of nodes is exceeded 00099 int pruning_cost = 0; 00100 int new_node_cnt = 0; 00101 for (int cost_bin = 0; cost_bin < kScoreBins; cost_bin++) { 00102 if (new_node_cnt > 0 && 00103 (new_node_cnt + score_bins_[cost_bin]) > max_node_cnt_) { 00104 pruning_cost = min_cost_ + ((cost_bin * cost_range) / kScoreBins); 00105 break; 00106 } 00107 new_node_cnt += score_bins_[cost_bin]; 00108 } 00109 00110 // prune out all the nodes above this cost 00111 for (int node_idx = new_node_cnt = 0; node_idx < node_cnt_; node_idx++) { 00112 // prune this node out 00113 if (node_array_[node_idx]->BestCost() > pruning_cost || 00114 new_node_cnt > max_node_cnt_) { 00115 delete node_array_[node_idx]; 00116 } else { 00117 // keep it 00118 node_array_[new_node_cnt++] = node_array_[node_idx]; 00119 } 00120 } 00121 node_cnt_ = new_node_cnt; 00122 } 00123 00124 // sort all nodes 00125 void SearchColumn::Sort() { 00126 if (node_cnt_ > 0 && node_array_ != NULL) { 00127 qsort(node_array_, node_cnt_, sizeof(*node_array_), 00128 SearchNode::SearchNodeComparer); 00129 } 00130 } 00131 00132 // add a new node 00133 SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost, 00134 SearchNode *parent_node, 00135 CubeRecoContext *cntxt) { 00136 // init if necessary 00137 if (init_ == false && Init() == false) { 00138 return NULL; 00139 } 00140 00141 // find out if we have an node with the same edge 00142 // look in the hash table 00143 SearchNode *new_node = node_hash_table_->Lookup(edge, parent_node); 00144 // node does not exist 00145 if (new_node == NULL) { 00146 new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_); 00147 if (new_node == NULL) { 00148 return NULL; 00149 } 00150 00151 // if the max node count has already been reached, check if the cost of 00152 // the new node exceeds the max cost. This indicates that it will be pruned 00153 // and so there is no point adding it 00154 if (node_cnt_ >= max_node_cnt_ && new_node->BestCost() > max_cost_) { 00155 delete new_node; 00156 return NULL; 00157 } 00158 00159 // expand the node buffer if necc 00160 if ((node_cnt_ % kNodeAllocChunk) == 0) { 00161 // alloc a new buff 00162 SearchNode **new_node_buff = 00163 new SearchNode *[node_cnt_ + kNodeAllocChunk]; 00164 if (new_node_buff == NULL) { 00165 delete new_node; 00166 return NULL; 00167 } 00168 00169 // free existing after copying contents 00170 if (node_array_ != NULL) { 00171 memcpy(new_node_buff, node_array_, node_cnt_ * sizeof(*new_node_buff)); 00172 delete []node_array_; 00173 } 00174 00175 node_array_ = new_node_buff; 00176 } 00177 00178 // add the node to the hash table only if it is non-OOD edge 00179 // because the langmod state is not unique 00180 if (edge->IsOOD() == false) { 00181 if (!node_hash_table_->Insert(edge, new_node)) { 00182 printf("Hash table full!!!"); 00183 delete new_node; 00184 return NULL; 00185 } 00186 } 00187 00188 node_array_[node_cnt_++] = new_node; 00189 00190 } else { 00191 // node exists before 00192 // if no update occurred, return NULL 00193 if (new_node->UpdateParent(parent_node, reco_cost, edge) == false) { 00194 new_node = NULL; 00195 } 00196 00197 // free the edge 00198 if (edge != NULL) { 00199 delete edge; 00200 } 00201 } 00202 00203 // update Min and Max Costs 00204 if (new_node != NULL) { 00205 if (min_cost_ > new_node->BestCost()) { 00206 min_cost_ = new_node->BestCost(); 00207 } 00208 00209 if (max_cost_ < new_node->BestCost()) { 00210 max_cost_ = new_node->BestCost(); 00211 } 00212 } 00213 00214 return new_node; 00215 } 00216 00217 SearchNode *SearchColumn::BestNode() { 00218 SearchNode *best_node = NULL; 00219 00220 for (int node_idx = 0; node_idx < node_cnt_; node_idx++) { 00221 if (best_node == NULL || 00222 best_node->BestCost() > node_array_[node_idx]->BestCost()) { 00223 best_node = node_array_[node_idx]; 00224 } 00225 } 00226 00227 return best_node; 00228 } 00229 } // namespace tesseract