Tesseract  3.02
tesseract-ocr/cube/classifier_base.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        classifier_base.h
00003  * Description: Declaration of the Base Character Classifier
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The CharClassifier class is the abstract class for any character/grapheme
00021 // classifier.
00022 
00023 #ifndef CHAR_CLASSIFIER_BASE_H
00024 #define CHAR_CLASSIFIER_BASE_H
00025 
00026 #include <string>
00027 #include "char_samp.h"
00028 #include "char_altlist.h"
00029 #include "char_set.h"
00030 #include "feature_base.h"
00031 #include "lang_model.h"
00032 #include "tuning_params.h"
00033 
00034 namespace tesseract {
00035 class CharClassifier {
00036  public:
00037   CharClassifier(CharSet *char_set, TuningParams *params,
00038                  FeatureBase *feat_extract) {
00039     char_set_ = char_set;
00040     params_ = params;
00041     feat_extract_ = feat_extract;
00042     fold_sets_ = NULL;
00043     fold_set_cnt_ = 0;
00044     fold_set_len_ = NULL;
00045     init_ = false;
00046     case_sensitive_ = true;
00047   }
00048 
00049   virtual ~CharClassifier() {
00050     if (fold_sets_  != NULL) {
00051       for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
00052         if (fold_sets_[fold_set] != NULL) {
00053           delete []fold_sets_[fold_set];
00054         }
00055       }
00056       delete []fold_sets_;
00057       fold_sets_ = NULL;
00058     }
00059     if (fold_set_len_ != NULL) {
00060       delete []fold_set_len_;
00061       fold_set_len_ = NULL;
00062     }
00063     if (feat_extract_ != NULL) {
00064       delete feat_extract_;
00065       feat_extract_ = NULL;
00066     }
00067   }
00068 
00069   // pure virtual functions that need to be implemented by any inheriting class
00070   virtual CharAltList * Classify(CharSamp *char_samp) = 0;
00071   virtual int CharCost(CharSamp *char_samp) = 0;
00072   virtual bool Train(CharSamp *char_samp, int ClassID) = 0;
00073   virtual bool SetLearnParam(char *var_name, float val) = 0;
00074   virtual bool Init(const string &data_file_path, const string &lang,
00075                     LangModel *lang_mod) = 0;
00076 
00077   // accessors
00078   FeatureBase *FeatureExtractor() {return feat_extract_;}
00079   inline bool CaseSensitive() const { return case_sensitive_; }
00080   inline void SetCaseSensitive(bool case_sensitive) {
00081     case_sensitive_ = case_sensitive;
00082   }
00083 
00084  protected:
00085   virtual void Fold() = 0;
00086   virtual bool LoadFoldingSets(const string &data_file_path,
00087                                const string &lang,
00088                                LangModel *lang_mod) = 0;
00089   FeatureBase *feat_extract_;
00090   CharSet *char_set_;
00091   TuningParams *params_;
00092   int **fold_sets_;
00093   int *fold_set_len_;
00094   int fold_set_cnt_;
00095   bool init_;
00096   bool case_sensitive_;
00097 };
00098 }  // tesseract
00099 
00100 #endif  // CHAR_CLASSIFIER_BASE_H