Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: classifier_base.h 00003 * Description: Declaration of the Base Character Classifier 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The CharClassifier class is the abstract class for any character/grapheme 00021 // classifier. 00022 00023 #ifndef CHAR_CLASSIFIER_BASE_H 00024 #define CHAR_CLASSIFIER_BASE_H 00025 00026 #include <string> 00027 #include "char_samp.h" 00028 #include "char_altlist.h" 00029 #include "char_set.h" 00030 #include "feature_base.h" 00031 #include "lang_model.h" 00032 #include "tuning_params.h" 00033 00034 namespace tesseract { 00035 class CharClassifier { 00036 public: 00037 CharClassifier(CharSet *char_set, TuningParams *params, 00038 FeatureBase *feat_extract) { 00039 char_set_ = char_set; 00040 params_ = params; 00041 feat_extract_ = feat_extract; 00042 fold_sets_ = NULL; 00043 fold_set_cnt_ = 0; 00044 fold_set_len_ = NULL; 00045 init_ = false; 00046 case_sensitive_ = true; 00047 } 00048 00049 virtual ~CharClassifier() { 00050 if (fold_sets_ != NULL) { 00051 for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { 00052 if (fold_sets_[fold_set] != NULL) { 00053 delete []fold_sets_[fold_set]; 00054 } 00055 } 00056 delete []fold_sets_; 00057 fold_sets_ = NULL; 00058 } 00059 if (fold_set_len_ != NULL) { 00060 delete []fold_set_len_; 00061 fold_set_len_ = NULL; 00062 } 00063 if (feat_extract_ != NULL) { 00064 delete feat_extract_; 00065 feat_extract_ = NULL; 00066 } 00067 } 00068 00069 // pure virtual functions that need to be implemented by any inheriting class 00070 virtual CharAltList * Classify(CharSamp *char_samp) = 0; 00071 virtual int CharCost(CharSamp *char_samp) = 0; 00072 virtual bool Train(CharSamp *char_samp, int ClassID) = 0; 00073 virtual bool SetLearnParam(char *var_name, float val) = 0; 00074 virtual bool Init(const string &data_file_path, const string &lang, 00075 LangModel *lang_mod) = 0; 00076 00077 // accessors 00078 FeatureBase *FeatureExtractor() {return feat_extract_;} 00079 inline bool CaseSensitive() const { return case_sensitive_; } 00080 inline void SetCaseSensitive(bool case_sensitive) { 00081 case_sensitive_ = case_sensitive; 00082 } 00083 00084 protected: 00085 virtual void Fold() = 0; 00086 virtual bool LoadFoldingSets(const string &data_file_path, 00087 const string &lang, 00088 LangModel *lang_mod) = 0; 00089 FeatureBase *feat_extract_; 00090 CharSet *char_set_; 00091 TuningParams *params_; 00092 int **fold_sets_; 00093 int *fold_set_len_; 00094 int fold_set_cnt_; 00095 bool init_; 00096 bool case_sensitive_; 00097 }; 00098 } // tesseract 00099 00100 #endif // CHAR_CLASSIFIER_BASE_H