Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: conv_net_classifier.h 00003 * Description: Declaration of Convolutional-NeuralNet Character Classifier 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The ConvNetCharClassifier inherits from the base classifier class: 00021 // "CharClassifierBase". It implements a Convolutional Neural Net classifier 00022 // instance of the base classifier. It uses the Tesseract Neural Net library 00023 // The Neural Net takes a scaled version of a bitmap and feeds it to a 00024 // Convolutional Neural Net as input and performs a FeedForward. Each output 00025 // of the net corresponds to class_id in the CharSet passed at construction 00026 // time. 00027 // Afterwards, the outputs of the Net are "folded" using the folding set 00028 // (if any) 00029 #ifndef CONV_NET_CLASSIFIER_H 00030 #define CONV_NET_CLASSIFIER_H 00031 00032 #include <string> 00033 #include "char_samp.h" 00034 #include "char_altlist.h" 00035 #include "char_set.h" 00036 #include "feature_base.h" 00037 #include "classifier_base.h" 00038 #include "neural_net.h" 00039 #include "lang_model.h" 00040 #include "tuning_params.h" 00041 00042 namespace tesseract { 00043 00044 // Folding Ratio is the ratio of the max-activation of members of a folding 00045 // set that is used to compute the min-activation of the rest of the set 00046 static const float kFoldingRatio = 0.75; 00047 00048 class ConvNetCharClassifier : public CharClassifier { 00049 public: 00050 ConvNetCharClassifier(CharSet *char_set, TuningParams *params, 00051 FeatureBase *feat_extract); 00052 virtual ~ConvNetCharClassifier(); 00053 // The main training function. Given a sample and a class ID the classifier 00054 // updates its parameters according to its learning algorithm. This function 00055 // is currently not implemented. TODO(ahmadab): implement end-2-end training 00056 virtual bool Train(CharSamp *char_samp, int ClassID); 00057 // A secondary function needed for training. Allows the trainer to set the 00058 // value of any train-time paramter. This function is currently not 00059 // implemented. TODO(ahmadab): implement end-2-end training 00060 virtual bool SetLearnParam(char *var_name, float val); 00061 // Externally sets the Neural Net used by the classifier. Used for training 00062 void SetNet(tesseract::NeuralNet *net); 00063 00064 // Classifies an input charsamp and return a CharAltList object containing 00065 // the possible candidates and corresponding scores 00066 virtual CharAltList * Classify(CharSamp *char_samp); 00067 // Computes the cost of a specific charsamp being a character (versus a 00068 // non-character: part-of-a-character OR more-than-one-character) 00069 virtual int CharCost(CharSamp *char_samp); 00070 00071 00072 private: 00073 // Neural Net object used for classification 00074 tesseract::NeuralNet *char_net_; 00075 // data buffers used to hold Neural Net inputs and outputs 00076 float *net_input_; 00077 float *net_output_; 00078 00079 // Init the classifier provided a data-path and a language string 00080 virtual bool Init(const string &data_file_path, const string &lang, 00081 LangModel *lang_mod); 00082 // Loads the NeuralNets needed for the classifier 00083 bool LoadNets(const string &data_file_path, const string &lang); 00084 // Loads the folding sets provided a data-path and a language string 00085 virtual bool LoadFoldingSets(const string &data_file_path, 00086 const string &lang, 00087 LangModel *lang_mod); 00088 // Folds the output of the NeuralNet using the loaded folding sets 00089 virtual void Fold(); 00090 // Scales the input char_samp and feeds it to the NeuralNet as input 00091 bool RunNets(CharSamp *char_samp); 00092 }; 00093 } 00094 #endif // CONV_NET_CLASSIFIER_H