Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: tuning_params.h 00003 * Description: Declaration of the Tuning Parameters Base Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The TuningParams class abstracts all the parameters that can be learned or 00021 // tuned during the training process. It is a base class that all TuningParams 00022 // classes should inherit from. 00023 00024 #ifndef TUNING_PARAMS_H 00025 #define TUNING_PARAMS_H 00026 00027 #include <string> 00028 #ifdef USE_STD_NAMESPACE 00029 using std::string; 00030 #endif 00031 00032 namespace tesseract { 00033 class TuningParams { 00034 public: 00035 enum type_classifer { 00036 NN, 00037 HYBRID_NN 00038 }; 00039 enum type_feature { 00040 BMP, 00041 CHEBYSHEV, 00042 HYBRID 00043 }; 00044 00045 TuningParams() {} 00046 virtual ~TuningParams() {} 00047 // Accessor functions 00048 inline double RecoWgt() const { return reco_wgt_; } 00049 inline double SizeWgt() const { return size_wgt_; } 00050 inline double CharBigramWgt() const { return char_bigrams_wgt_; } 00051 inline double WordUnigramWgt() const { return word_unigrams_wgt_; } 00052 inline int MaxSegPerChar() const { return max_seg_per_char_; } 00053 inline int BeamWidth() const { return beam_width_; } 00054 inline int TypeClassifier() const { return tp_classifier_; } 00055 inline int TypeFeature() const { return tp_feat_; } 00056 inline int ConvGridSize() const { return conv_grid_size_; } 00057 inline int HistWindWid() const { return hist_wind_wid_; } 00058 inline int MinConCompSize() const { return min_con_comp_size_; } 00059 inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; } 00060 inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; } 00061 inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; } 00062 inline double CombinerRunThresh() const { return combiner_run_thresh_; } 00063 inline double CombinerClassifierThresh() const { 00064 return combiner_classifier_thresh_; } 00065 00066 inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; } 00067 inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; } 00068 inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; } 00069 inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; } 00070 inline void SetMaxSegPerChar(int max_seg_per_char) { 00071 max_seg_per_char_ = max_seg_per_char; 00072 } 00073 inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; } 00074 inline void SetTypeClassifier(type_classifer tp_classifier) { 00075 tp_classifier_ = tp_classifier; 00076 } 00077 inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;} 00078 inline void SetHistWindWid(int hist_wind_wid) { 00079 hist_wind_wid_ = hist_wind_wid; 00080 } 00081 00082 virtual bool Save(string file_name) = 0; 00083 virtual bool Load(string file_name) = 0; 00084 00085 protected: 00086 // weight of recognition cost. This includes the language model cost 00087 double reco_wgt_; 00088 // weight of size cost 00089 double size_wgt_; 00090 // weight of character bigrams cost 00091 double char_bigrams_wgt_; 00092 // weight of word unigrams cost 00093 double word_unigrams_wgt_; 00094 // Maximum number of segments per character 00095 int max_seg_per_char_; 00096 // Beam width equal to the maximum number of nodes kept in the beam search 00097 // trellis column after pruning 00098 int beam_width_; 00099 // Classifier type: See enum type_classifer for classifier types 00100 type_classifer tp_classifier_; 00101 // Feature types: See enum type_feature for feature types 00102 type_feature tp_feat_; 00103 // Grid size to scale a grapheme bitmap used by the BMP feature type 00104 int conv_grid_size_; 00105 // Histogram window size as a ratio of the word height used in computing 00106 // the vertical pixel density histogram in the segmentation algorithm 00107 int hist_wind_wid_; 00108 // Minimum possible size of a connected component 00109 int min_con_comp_size_; 00110 // Maximum aspect ratio of a word (width / height) 00111 double max_word_aspect_ratio_; 00112 // Minimum ratio relative to the line height of a gap to be considered as 00113 // a word break 00114 double min_space_height_ratio_; 00115 // Maximum ratio relative to the line height of a gap to be considered as 00116 // a definite word break 00117 double max_space_height_ratio_; 00118 // When Cube and Tesseract are run in combined mode, only run 00119 // combiner classifier when tesseract confidence is below this 00120 // threshold. When Cube is run without Tesseract, this is ignored. 00121 double combiner_run_thresh_; 00122 // When Cube and tesseract are run in combined mode, threshold on 00123 // output of combiner binary classifier (chosen from ROC during 00124 // combiner training). When Cube is run without Tesseract, this is ignored. 00125 double combiner_classifier_thresh_; 00126 }; 00127 } 00128 00129 #endif // TUNING_PARAMS_H