tesseract-doc/tuning__params_8h_source.html

00001 /**********************************************************************
00002  * File:        tuning_params.h
00003  * Description: Declaration of the Tuning Parameters Base Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019
00020 // The TuningParams class abstracts all the parameters that can be learned or
00021 // tuned during the training process. It is a base class that all TuningParams
00022 // classes should inherit from.
00023
00024 #ifndef TUNING_PARAMS_H
00025 #define TUNING_PARAMS_H
00026
00027 #include <string>
00028 #ifdef USE_STD_NAMESPACE
00029 using std::string;
00030 #endif
00031
00032 namespace tesseract {
00033 class TuningParams {
00034  public:
00035   enum type_classifer {
00036     NN,
00037     HYBRID_NN
00038   };
00039   enum type_feature {
00040     BMP,
00041     CHEBYSHEV,
00042     HYBRID
00043   };
00044
00045   TuningParams() {}
00046   virtual ~TuningParams() {}
00047   // Accessor functions
00048   inline double RecoWgt() const { return reco_wgt_; }
00049   inline double SizeWgt() const { return size_wgt_; }
00050   inline double CharBigramWgt() const { return char_bigrams_wgt_; }
00051   inline double WordUnigramWgt() const { return word_unigrams_wgt_; }
00052   inline int MaxSegPerChar() const { return max_seg_per_char_; }
00053   inline int BeamWidth() const { return beam_width_; }
00054   inline int TypeClassifier() const { return tp_classifier_; }
00055   inline int TypeFeature() const { return tp_feat_; }
00056   inline int ConvGridSize() const { return conv_grid_size_; }
00057   inline int HistWindWid() const { return hist_wind_wid_; }
00058   inline int MinConCompSize() const { return min_con_comp_size_; }
00059   inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; }
00060   inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; }
00061   inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; }
00062   inline double CombinerRunThresh() const { return combiner_run_thresh_; }
00063   inline double CombinerClassifierThresh() const {
00064     return combiner_classifier_thresh_; }
00065
00066   inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; }
00067   inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; }
00068   inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; }
00069   inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; }
00070   inline void SetMaxSegPerChar(int max_seg_per_char) {
00071     max_seg_per_char_ = max_seg_per_char;
00072   }
00073   inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; }
00074   inline void SetTypeClassifier(type_classifer tp_classifier) {
00075     tp_classifier_ = tp_classifier;
00076   }
00077   inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;}
00078   inline void SetHistWindWid(int hist_wind_wid) {
00079     hist_wind_wid_ = hist_wind_wid;
00080   }
00081
00082   virtual bool Save(string file_name) = 0;
00083   virtual bool Load(string file_name) = 0;
00084
00085  protected:
00086   // weight of recognition cost. This includes the language model cost
00087   double reco_wgt_;
00088   // weight of size cost
00089   double size_wgt_;
00090   // weight of character bigrams cost
00091   double char_bigrams_wgt_;
00092   // weight of word unigrams cost
00093   double word_unigrams_wgt_;
00094   // Maximum number of segments per character
00095   int max_seg_per_char_;
00096   // Beam width equal to the maximum number of nodes kept in the beam search
00097   // trellis column after pruning
00098   int beam_width_;
00099   // Classifier type: See enum type_classifer for classifier types
00100   type_classifer tp_classifier_;
00101   // Feature types: See enum type_feature for feature types
00102   type_feature   tp_feat_;
00103   // Grid size to scale a grapheme bitmap used by the BMP feature type
00104   int conv_grid_size_;
00105   // Histogram window size as a ratio of the word height used in computing
00106   // the vertical pixel density histogram in the segmentation algorithm
00107   int hist_wind_wid_;
00108   // Minimum possible size of a connected component
00109   int min_con_comp_size_;
00110   // Maximum aspect ratio of a word (width / height)
00111   double max_word_aspect_ratio_;
00112   // Minimum ratio relative to the line height of a gap to be considered as
00113   // a word break
00114   double min_space_height_ratio_;
00115   // Maximum ratio relative to the line height of a gap to be considered as
00116   // a definite word break
00117   double max_space_height_ratio_;
00118   // When Cube and Tesseract are run in combined mode, only run
00119   // combiner classifier when tesseract confidence is below this
00120   // threshold. When Cube is run without Tesseract, this is ignored.
00121   double combiner_run_thresh_;
00122   // When Cube and tesseract are run in combined mode, threshold on
00123   // output of combiner binary classifier (chosen from ROC during
00124   // combiner training). When Cube is run without Tesseract, this is ignored.
00125   double combiner_classifier_thresh_;
00126 };
00127 }
00128
00129 #endif  // TUNING_PARAMS_H