Tesseract  3.02
tesseract-ocr/classify/trainingsample.h
Go to the documentation of this file.
00001 // Copyright 2010 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 //
00015 
00016 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H__
00017 #define TESSERACT_TRAINING_TRAININGSAMPLE_H__
00018 
00019 #include "elst.h"
00020 #include "featdefs.h"
00021 #include "intfx.h"
00022 #include "intmatcher.h"
00023 #include "matrix.h"
00024 #include "mf.h"
00025 #include "picofeat.h"
00026 #include "shapetable.h"
00027 #include "unicharset.h"
00028 
00029 struct Pix;
00030 
00031 namespace tesseract {
00032 
00033 class IntFeatureMap;
00034 class IntFeatureSpace;
00035 class ShapeTable;
00036 
00037 // Number of elements of cn_feature_.
00038 static const int kNumCNParams = 4;
00039 // Number of ways to shift the features when randomizing.
00040 static const int kSampleYShiftSize = 5;
00041 // Number of ways to scale the features when randomizing.
00042 static const int kSampleScaleSize = 3;
00043 // Total number of different ways to manipulate the features when randomizing.
00044 // The first and last combinations are removed to avoid an excessive
00045 // top movement (first) and an identity transformation (last).
00046 // WARNING: To avoid patterned duplication of samples, be sure to keep
00047 // kSampleRandomSize prime!
00048 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
00049 // kSampleRandomSize is 13, which is prime.
00050 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
00051 // ASSERT_IS_PRIME(kSampleRandomSize) !!
00052 
00053 class TrainingSample : public ELIST_LINK {
00054  public:
00055   TrainingSample()
00056     : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
00057       num_features_(0), num_micro_features_(0),
00058       features_(NULL), micro_features_(NULL), weight_(1.0),
00059       max_dist_(0.0), sample_index_(0),
00060       features_are_indexed_(false), features_are_mapped_(false),
00061       is_error_(false) {
00062   }
00063   ~TrainingSample();
00064 
00065   // Saves the given features into a TrainingSample. The features are copied,
00066   // so may be deleted afterwards. Delete the return value after use.
00067   static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info,
00068                                           const INT_FEATURE_STRUCT* features,
00069                                           int num_features);
00070   // Constructs and returns a copy "randomized" by the method given by
00071   // the randomizer index. If index is out of [0, kSampleRandomSize) then
00072   // an exact copy is returned.
00073   TrainingSample* RandomizedCopy(int index) const;
00074   // Constructs and returns an exact copy.
00075   TrainingSample* Copy() const;
00076 
00077   // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
00078   // members, which is mostly the mapped features, and the weight.
00079   // It is assumed these can all be reconstructed from what is saved.
00080   // Writes to the given file. Returns false in case of error.
00081   bool Serialize(FILE* fp) const;
00082   // Creates from the given file. Returns NULL in case of error.
00083   // If swap is true, assumes a big/little-endian swap is needed.
00084   static TrainingSample* DeSerializeCreate(bool swap, FILE* fp);
00085   // Reads from the given file. Returns false in case of error.
00086   // If swap is true, assumes a big/little-endian swap is needed.
00087   bool DeSerialize(bool swap, FILE* fp);
00088 
00089   // Extracts the needed information from the CHAR_DESC_STRUCT.
00090   void ExtractCharDesc(int feature_type, int micro_type,
00091                        int cn_type, int geo_type,
00092                        CHAR_DESC_STRUCT* char_desc);
00093 
00094   // Sets the mapped_features_ from the features_ using the provided
00095   // feature_space to the indexed versions of the features.
00096   void IndexFeatures(const IntFeatureSpace& feature_space);
00097   // Sets the mapped_features_ from the features_ using the provided
00098   // feature_map.
00099   void MapFeatures(const IntFeatureMap& feature_map);
00100 
00101   // Returns a pix representing the sample. (Int features only.)
00102   Pix* RenderToPix(const UNICHARSET* unicharset) const;
00103   // Displays the features in the given window with the given color.
00104   void DisplayFeatures(ScrollView::Color color, ScrollView* window) const;
00105 
00106   // Returns a pix of the original sample image. The pix is padded all round
00107   // by padding wherever possible.
00108   // The returned Pix must be pixDestroyed after use.
00109   // If the input page_pix is NULL, NULL is returned.
00110   Pix* GetSamplePix(int padding, Pix* page_pix) const;
00111 
00112   // Accessors.
00113   UNICHAR_ID class_id() const {
00114     return class_id_;
00115   }
00116   void set_class_id(int id) {
00117     class_id_ = id;
00118   }
00119   int font_id() const {
00120     return font_id_;
00121   }
00122   void set_font_id(int id) {
00123     font_id_ = id;
00124   }
00125   int page_num() const {
00126     return page_num_;
00127   }
00128   void set_page_num(int page) {
00129     page_num_ = page;
00130   }
00131   const TBOX& bounding_box() const {
00132     return bounding_box_;
00133   }
00134   void set_bounding_box(const TBOX& box) {
00135     bounding_box_ = box;
00136   }
00137   int num_features() const {
00138     return num_features_;
00139   }
00140   const INT_FEATURE_STRUCT* features() const {
00141     return features_;
00142   }
00143   int num_micro_features() const {
00144     return num_micro_features_;
00145   }
00146   const MicroFeature* micro_features() const {
00147     return micro_features_;
00148   }
00149   float cn_feature(int index) const {
00150     return cn_feature_[index];
00151   }
00152   int geo_feature(int index) const {
00153     return geo_feature_[index];
00154   }
00155   double weight() const {
00156     return weight_;
00157   }
00158   void set_weight(double value) {
00159     weight_ = value;
00160   }
00161   double max_dist() const {
00162     return max_dist_;
00163   }
00164   void set_max_dist(double value) {
00165     max_dist_ = value;
00166   }
00167   int sample_index() const {
00168     return sample_index_;
00169   }
00170   void set_sample_index(int value) {
00171     sample_index_ = value;
00172   }
00173   bool features_are_mapped() const {
00174     return features_are_mapped_;
00175   }
00176   const GenericVector<int>& mapped_features() const {
00177     ASSERT_HOST(features_are_mapped_);
00178     return mapped_features_;
00179   }
00180   const GenericVector<int>& indexed_features() const {
00181     ASSERT_HOST(features_are_indexed_);
00182     return mapped_features_;
00183   }
00184   bool is_error() const {
00185     return is_error_;
00186   }
00187   void set_is_error(bool value) {
00188     is_error_ = value;
00189   }
00190 
00191  private:
00192   // Unichar id that this sample represents. There obviously must be a
00193   // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
00194   UNICHAR_ID class_id_;
00195   // Font id in which this sample was printed. Refers to a fontinfo_table_ in
00196   // MasterTrainer.
00197   int font_id_;
00198   // Number of page that the sample came from.
00199   int page_num_;
00200   // Bounding box of sample in original image.
00201   TBOX bounding_box_;
00202   // Number of INT_FEATURE_STRUCT in features_ array.
00203   int num_features_;
00204   // Number of MicroFeature in micro_features_ array.
00205   int num_micro_features_;
00206   // Array of features.
00207   INT_FEATURE_STRUCT* features_;
00208   // Array of features.
00209   MicroFeature* micro_features_;
00210   // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
00211   float cn_feature_[kNumCNParams];
00212   // The one and only geometric feature. (Aims at replacing cn_feature_).
00213   // Indexed by GeoParams enum in picofeat.h
00214   int geo_feature_[GeoCount];
00215 
00216   // Non-serialized cache data.
00217   // Weight used for boosting training.
00218   double weight_;
00219   // Maximum distance to other samples of same class/font used in computing
00220   // the canonical sample.
00221   double max_dist_;
00222   // Global index of this sample.
00223   int sample_index_;
00224   // Indexed/mapped features, as indicated by the bools below.
00225   GenericVector<int> mapped_features_;
00226   bool features_are_indexed_;
00227   bool features_are_mapped_;
00228   // True if the last classification was an error by the current definition.
00229   bool is_error_;
00230 
00231   // Randomizing factors.
00232   static const int kYShiftValues[kSampleYShiftSize];
00233   static const double kScaleValues[kSampleScaleSize];
00234 };
00235 
00236 ELISTIZEH(TrainingSample)
00237 
00238 }  // namespace tesseract
00239 
00240 #endif  // TESSERACT_TRAINING_TRAININGSAMPLE_H__