Tesseract  3.02
tesseract-ocr/cube/cube_tuning_params.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_tuning_params.cpp
00003  * Description: Implementation of the CubeTuningParameters Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <string>
00021 #include <vector>
00022 #include "cube_tuning_params.h"
00023 #include "tuning_params.h"
00024 #include "cube_utils.h"
00025 
00026 namespace tesseract {
00027 CubeTuningParams::CubeTuningParams() {
00028   reco_wgt_ = 1.0;
00029   size_wgt_ = 1.0;
00030   char_bigrams_wgt_ = 1.0;
00031   word_unigrams_wgt_ = 0.0;
00032   max_seg_per_char_ = 8;
00033   beam_width_ = 32;
00034   tp_classifier_ = NN;
00035   tp_feat_ = BMP;
00036   conv_grid_size_ = 32;
00037   hist_wind_wid_ = 0;
00038   max_word_aspect_ratio_ = 10.0;
00039   min_space_height_ratio_ = 0.2;
00040   max_space_height_ratio_ = 0.3;
00041   min_con_comp_size_ = 0;
00042   combiner_run_thresh_ = 1.0;
00043   combiner_classifier_thresh_ = 0.5;
00044   ood_wgt_ = 1.0;
00045   num_wgt_ = 1.0;
00046 
00047 }
00048 
00049 CubeTuningParams::~CubeTuningParams() {
00050 }
00051 
00052 // Create an Object given the data file path and the language by loading
00053 // the approporiate file
00054 CubeTuningParams *CubeTuningParams::Create(const string &data_file_path,
00055                                            const string &lang) {
00056   CubeTuningParams *obj = new CubeTuningParams();
00057   if (!obj) {
00058     fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
00059             "allocate new tuning params object\n");
00060     return NULL;
00061   }
00062 
00063   string tuning_params_file;
00064   tuning_params_file = data_file_path + lang;
00065   tuning_params_file += ".cube.params";
00066 
00067   if (!obj->Load(tuning_params_file)) {
00068     fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
00069             "load tuning parameters from %s\n", tuning_params_file.c_str());
00070     delete obj;
00071     obj = NULL;
00072   }
00073 
00074   return obj;
00075 }
00076 
00077 // Loads the params file
00078 bool CubeTuningParams::Load(string tuning_params_file) {
00079   // load the string into memory
00080   string param_str;
00081 
00082   if (CubeUtils::ReadFileToString(tuning_params_file, &param_str) == false) {
00083     fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read "
00084             "file %s\n", tuning_params_file.c_str());
00085     return false;
00086   }
00087 
00088   // split into lines
00089   vector<string> str_vec;
00090   CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec);
00091   if (str_vec.size() < 8) {
00092     fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows "
00093             "in parameter file is too low\n");
00094     return false;
00095   }
00096 
00097   // for all entries
00098   for (int entry = 0; entry < str_vec.size(); entry++) {
00099     // tokenize
00100     vector<string> str_tok;
00101 
00102     // should be only two tokens
00103     CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok);
00104     if (str_tok.size() != 2) {
00105       fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in "
00106               "line: %s.\n", str_vec[entry].c_str());
00107       return false;
00108     }
00109 
00110     double val = 0;
00111     char peekchar = (str_tok[1].c_str())[0];
00112     if ((peekchar >= '0' && peekchar <= '9') ||
00113          peekchar == '-' || peekchar == '+' ||
00114          peekchar == '.') {
00115       // read the value
00116       if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) {
00117         fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format "
00118                 "in line: %s.\n", str_vec[entry].c_str());
00119         return false;
00120       }
00121     }
00122 
00123     // token type
00124     if (str_tok[0] == "RecoWgt") {
00125       reco_wgt_ = val;
00126     } else if (str_tok[0] == "SizeWgt") {
00127       size_wgt_ = val;
00128     } else if (str_tok[0] == "CharBigramsWgt") {
00129       char_bigrams_wgt_ = val;
00130     } else if (str_tok[0] == "WordUnigramsWgt") {
00131       word_unigrams_wgt_ = val;
00132     } else if (str_tok[0] == "MaxSegPerChar") {
00133       max_seg_per_char_ = static_cast<int>(val);
00134     } else if (str_tok[0] == "BeamWidth") {
00135       beam_width_ = static_cast<int>(val);
00136     } else if (str_tok[0] == "Classifier") {
00137       if (str_tok[1] == "NN") {
00138         tp_classifier_ = TuningParams::NN;
00139       } else if (str_tok[1] == "HYBRID_NN") {
00140         tp_classifier_ = TuningParams::HYBRID_NN;
00141       } else {
00142         fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid "
00143                 "classifier type in line: %s.\n", str_vec[entry].c_str());
00144         return false;
00145       }
00146     } else if (str_tok[0] == "FeatureType") {
00147       if (str_tok[1] == "BMP") {
00148         tp_feat_ = TuningParams::BMP;
00149       } else if (str_tok[1] == "CHEBYSHEV") {
00150         tp_feat_ = TuningParams::CHEBYSHEV;
00151       } else if (str_tok[1] == "HYBRID") {
00152         tp_feat_ = TuningParams::HYBRID;
00153       } else {
00154         fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature "
00155                 "type in line: %s.\n", str_vec[entry].c_str());
00156         return false;
00157       }
00158     } else if (str_tok[0] == "ConvGridSize") {
00159       conv_grid_size_ = static_cast<int>(val);
00160     } else if (str_tok[0] == "HistWindWid") {
00161       hist_wind_wid_ = val;
00162     } else if (str_tok[0] == "MinConCompSize") {
00163       min_con_comp_size_ = val;
00164     } else if (str_tok[0] == "MaxWordAspectRatio") {
00165       max_word_aspect_ratio_ = val;
00166     } else if (str_tok[0] == "MinSpaceHeightRatio") {
00167       min_space_height_ratio_ = val;
00168     } else if (str_tok[0] == "MaxSpaceHeightRatio") {
00169       max_space_height_ratio_ = val;
00170     } else if (str_tok[0] == "CombinerRunThresh") {
00171       combiner_run_thresh_ = val;
00172     } else if (str_tok[0] == "CombinerClassifierThresh") {
00173       combiner_classifier_thresh_ = val;
00174     } else if (str_tok[0] == "OODWgt") {
00175       ood_wgt_ = val;
00176     } else if (str_tok[0] == "NumWgt") {
00177       num_wgt_ = val;
00178     } else {
00179       fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter "
00180               "in line: %s.\n", str_vec[entry].c_str());
00181       return false;
00182     }
00183   }
00184 
00185   return true;
00186 }
00187 
00188 // Save the parameters to a file
00189 bool CubeTuningParams::Save(string file_name) {
00190   FILE *params_file = fopen(file_name.c_str(), "wb");
00191   if (params_file == NULL) {
00192     fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file "
00193             "%s for write.\n", file_name.c_str());
00194     return false;
00195   }
00196 
00197   fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_);
00198   fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_);
00199   fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_);
00200   fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_);
00201   fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_);
00202   fprintf(params_file, "BeamWidth=%d\n", beam_width_);
00203   fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_);
00204   fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_);
00205   fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_);
00206   fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_);
00207   fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_);
00208   fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_);
00209   fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_);
00210   fprintf(params_file, "CombinerClassifierThresh=%.4f\n",
00211           combiner_classifier_thresh_);
00212   fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_);
00213   fprintf(params_file, "NumWgt=%.4f\n", num_wgt_);
00214 
00215   fclose(params_file);
00216   return true;
00217 }
00218 }