Tesseract  3.02
tesseract-ocr/cube/conv_net_classifier.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        conv_net_classifier.h
00003  * Description: Declaration of Convolutional-NeuralNet Character Classifier
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The ConvNetCharClassifier inherits from the base classifier class:
00021 // "CharClassifierBase". It implements a Convolutional Neural Net classifier
00022 // instance of the base classifier. It uses the Tesseract Neural Net library
00023 // The Neural Net takes a scaled version of a bitmap and feeds it to a
00024 // Convolutional Neural Net as input and performs a FeedForward. Each output
00025 // of the net corresponds to class_id in the CharSet passed at construction
00026 // time.
00027 // Afterwards, the outputs of the Net are "folded" using the folding set
00028 // (if any)
00029 #ifndef CONV_NET_CLASSIFIER_H
00030 #define CONV_NET_CLASSIFIER_H
00031 
00032 #include <string>
00033 #include "char_samp.h"
00034 #include "char_altlist.h"
00035 #include "char_set.h"
00036 #include "feature_base.h"
00037 #include "classifier_base.h"
00038 #include "neural_net.h"
00039 #include "lang_model.h"
00040 #include "tuning_params.h"
00041 
00042 namespace tesseract {
00043 
00044 // Folding Ratio is the ratio of the max-activation of members of a folding
00045 // set that is used to compute the min-activation of the rest of the set
00046 static const float kFoldingRatio = 0.75;
00047 
00048 class ConvNetCharClassifier : public CharClassifier {
00049  public:
00050   ConvNetCharClassifier(CharSet *char_set, TuningParams *params,
00051       FeatureBase *feat_extract);
00052   virtual ~ConvNetCharClassifier();
00053   // The main training function. Given a sample and a class ID the classifier
00054   // updates its parameters according to its learning algorithm. This function
00055   // is currently not implemented. TODO(ahmadab): implement end-2-end training
00056   virtual bool Train(CharSamp *char_samp, int ClassID);
00057   // A secondary function needed for training. Allows the trainer to set the
00058   // value of any train-time paramter. This function is currently not
00059   // implemented. TODO(ahmadab): implement end-2-end training
00060   virtual bool SetLearnParam(char *var_name, float val);
00061   // Externally sets the Neural Net used by the classifier. Used for training
00062   void SetNet(tesseract::NeuralNet *net);
00063 
00064   // Classifies an input charsamp and return a CharAltList object containing
00065   // the possible candidates and corresponding scores
00066   virtual CharAltList * Classify(CharSamp *char_samp);
00067   // Computes the cost of a specific charsamp being a character (versus a
00068   // non-character: part-of-a-character OR more-than-one-character)
00069   virtual int CharCost(CharSamp *char_samp);
00070 
00071 
00072  private:
00073   // Neural Net object used for classification
00074   tesseract::NeuralNet *char_net_;
00075   // data buffers used to hold Neural Net inputs and outputs
00076   float *net_input_;
00077   float *net_output_;
00078 
00079   // Init the classifier provided a data-path and a language string
00080   virtual bool Init(const string &data_file_path, const string &lang,
00081                     LangModel *lang_mod);
00082   // Loads the NeuralNets needed for the classifier
00083   bool LoadNets(const string &data_file_path, const string &lang);
00084   // Loads the folding sets provided a data-path and a language string
00085   virtual bool LoadFoldingSets(const string &data_file_path,
00086                                const string &lang,
00087                                LangModel *lang_mod);
00088   // Folds the output of the NeuralNet using the loaded folding sets
00089   virtual void Fold();
00090   // Scales the input char_samp and feeds it to the NeuralNet as input
00091   bool RunNets(CharSamp *char_samp);
00092 };
00093 }
00094 #endif  // CONV_NET_CLASSIFIER_H