Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: cube_tuning_params.cpp 00003 * Description: Implementation of the CubeTuningParameters Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <string> 00021 #include <vector> 00022 #include "cube_tuning_params.h" 00023 #include "tuning_params.h" 00024 #include "cube_utils.h" 00025 00026 namespace tesseract { 00027 CubeTuningParams::CubeTuningParams() { 00028 reco_wgt_ = 1.0; 00029 size_wgt_ = 1.0; 00030 char_bigrams_wgt_ = 1.0; 00031 word_unigrams_wgt_ = 0.0; 00032 max_seg_per_char_ = 8; 00033 beam_width_ = 32; 00034 tp_classifier_ = NN; 00035 tp_feat_ = BMP; 00036 conv_grid_size_ = 32; 00037 hist_wind_wid_ = 0; 00038 max_word_aspect_ratio_ = 10.0; 00039 min_space_height_ratio_ = 0.2; 00040 max_space_height_ratio_ = 0.3; 00041 min_con_comp_size_ = 0; 00042 combiner_run_thresh_ = 1.0; 00043 combiner_classifier_thresh_ = 0.5; 00044 ood_wgt_ = 1.0; 00045 num_wgt_ = 1.0; 00046 00047 } 00048 00049 CubeTuningParams::~CubeTuningParams() { 00050 } 00051 00052 // Create an Object given the data file path and the language by loading 00053 // the approporiate file 00054 CubeTuningParams *CubeTuningParams::Create(const string &data_file_path, 00055 const string &lang) { 00056 CubeTuningParams *obj = new CubeTuningParams(); 00057 if (!obj) { 00058 fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to " 00059 "allocate new tuning params object\n"); 00060 return NULL; 00061 } 00062 00063 string tuning_params_file; 00064 tuning_params_file = data_file_path + lang; 00065 tuning_params_file += ".cube.params"; 00066 00067 if (!obj->Load(tuning_params_file)) { 00068 fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to " 00069 "load tuning parameters from %s\n", tuning_params_file.c_str()); 00070 delete obj; 00071 obj = NULL; 00072 } 00073 00074 return obj; 00075 } 00076 00077 // Loads the params file 00078 bool CubeTuningParams::Load(string tuning_params_file) { 00079 // load the string into memory 00080 string param_str; 00081 00082 if (CubeUtils::ReadFileToString(tuning_params_file, ¶m_str) == false) { 00083 fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read " 00084 "file %s\n", tuning_params_file.c_str()); 00085 return false; 00086 } 00087 00088 // split into lines 00089 vector<string> str_vec; 00090 CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec); 00091 if (str_vec.size() < 8) { 00092 fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows " 00093 "in parameter file is too low\n"); 00094 return false; 00095 } 00096 00097 // for all entries 00098 for (int entry = 0; entry < str_vec.size(); entry++) { 00099 // tokenize 00100 vector<string> str_tok; 00101 00102 // should be only two tokens 00103 CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok); 00104 if (str_tok.size() != 2) { 00105 fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in " 00106 "line: %s.\n", str_vec[entry].c_str()); 00107 return false; 00108 } 00109 00110 double val = 0; 00111 char peekchar = (str_tok[1].c_str())[0]; 00112 if ((peekchar >= '0' && peekchar <= '9') || 00113 peekchar == '-' || peekchar == '+' || 00114 peekchar == '.') { 00115 // read the value 00116 if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) { 00117 fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format " 00118 "in line: %s.\n", str_vec[entry].c_str()); 00119 return false; 00120 } 00121 } 00122 00123 // token type 00124 if (str_tok[0] == "RecoWgt") { 00125 reco_wgt_ = val; 00126 } else if (str_tok[0] == "SizeWgt") { 00127 size_wgt_ = val; 00128 } else if (str_tok[0] == "CharBigramsWgt") { 00129 char_bigrams_wgt_ = val; 00130 } else if (str_tok[0] == "WordUnigramsWgt") { 00131 word_unigrams_wgt_ = val; 00132 } else if (str_tok[0] == "MaxSegPerChar") { 00133 max_seg_per_char_ = static_cast<int>(val); 00134 } else if (str_tok[0] == "BeamWidth") { 00135 beam_width_ = static_cast<int>(val); 00136 } else if (str_tok[0] == "Classifier") { 00137 if (str_tok[1] == "NN") { 00138 tp_classifier_ = TuningParams::NN; 00139 } else if (str_tok[1] == "HYBRID_NN") { 00140 tp_classifier_ = TuningParams::HYBRID_NN; 00141 } else { 00142 fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid " 00143 "classifier type in line: %s.\n", str_vec[entry].c_str()); 00144 return false; 00145 } 00146 } else if (str_tok[0] == "FeatureType") { 00147 if (str_tok[1] == "BMP") { 00148 tp_feat_ = TuningParams::BMP; 00149 } else if (str_tok[1] == "CHEBYSHEV") { 00150 tp_feat_ = TuningParams::CHEBYSHEV; 00151 } else if (str_tok[1] == "HYBRID") { 00152 tp_feat_ = TuningParams::HYBRID; 00153 } else { 00154 fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature " 00155 "type in line: %s.\n", str_vec[entry].c_str()); 00156 return false; 00157 } 00158 } else if (str_tok[0] == "ConvGridSize") { 00159 conv_grid_size_ = static_cast<int>(val); 00160 } else if (str_tok[0] == "HistWindWid") { 00161 hist_wind_wid_ = val; 00162 } else if (str_tok[0] == "MinConCompSize") { 00163 min_con_comp_size_ = val; 00164 } else if (str_tok[0] == "MaxWordAspectRatio") { 00165 max_word_aspect_ratio_ = val; 00166 } else if (str_tok[0] == "MinSpaceHeightRatio") { 00167 min_space_height_ratio_ = val; 00168 } else if (str_tok[0] == "MaxSpaceHeightRatio") { 00169 max_space_height_ratio_ = val; 00170 } else if (str_tok[0] == "CombinerRunThresh") { 00171 combiner_run_thresh_ = val; 00172 } else if (str_tok[0] == "CombinerClassifierThresh") { 00173 combiner_classifier_thresh_ = val; 00174 } else if (str_tok[0] == "OODWgt") { 00175 ood_wgt_ = val; 00176 } else if (str_tok[0] == "NumWgt") { 00177 num_wgt_ = val; 00178 } else { 00179 fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter " 00180 "in line: %s.\n", str_vec[entry].c_str()); 00181 return false; 00182 } 00183 } 00184 00185 return true; 00186 } 00187 00188 // Save the parameters to a file 00189 bool CubeTuningParams::Save(string file_name) { 00190 FILE *params_file = fopen(file_name.c_str(), "wb"); 00191 if (params_file == NULL) { 00192 fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file " 00193 "%s for write.\n", file_name.c_str()); 00194 return false; 00195 } 00196 00197 fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_); 00198 fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_); 00199 fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_); 00200 fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_); 00201 fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_); 00202 fprintf(params_file, "BeamWidth=%d\n", beam_width_); 00203 fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_); 00204 fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_); 00205 fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_); 00206 fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_); 00207 fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_); 00208 fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_); 00209 fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_); 00210 fprintf(params_file, "CombinerClassifierThresh=%.4f\n", 00211 combiner_classifier_thresh_); 00212 fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_); 00213 fprintf(params_file, "NumWgt=%.4f\n", num_wgt_); 00214 00215 fclose(params_file); 00216 return true; 00217 } 00218 }