Tesseract
3.02
|
00001 // Copyright 2010 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // Unless required by applicable law or agreed to in writing, software 00009 // distributed under the License is distributed on an "AS IS" BASIS, 00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00011 // See the License for the specific language governing permissions and 00012 // limitations under the License. 00013 // 00015 00016 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H__ 00017 #define TESSERACT_TRAINING_TRAININGSAMPLE_H__ 00018 00019 #include "elst.h" 00020 #include "featdefs.h" 00021 #include "intfx.h" 00022 #include "intmatcher.h" 00023 #include "matrix.h" 00024 #include "mf.h" 00025 #include "picofeat.h" 00026 #include "shapetable.h" 00027 #include "unicharset.h" 00028 00029 struct Pix; 00030 00031 namespace tesseract { 00032 00033 class IntFeatureMap; 00034 class IntFeatureSpace; 00035 class ShapeTable; 00036 00037 // Number of elements of cn_feature_. 00038 static const int kNumCNParams = 4; 00039 // Number of ways to shift the features when randomizing. 00040 static const int kSampleYShiftSize = 5; 00041 // Number of ways to scale the features when randomizing. 00042 static const int kSampleScaleSize = 3; 00043 // Total number of different ways to manipulate the features when randomizing. 00044 // The first and last combinations are removed to avoid an excessive 00045 // top movement (first) and an identity transformation (last). 00046 // WARNING: To avoid patterned duplication of samples, be sure to keep 00047 // kSampleRandomSize prime! 00048 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3) 00049 // kSampleRandomSize is 13, which is prime. 00050 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; 00051 // ASSERT_IS_PRIME(kSampleRandomSize) !! 00052 00053 class TrainingSample : public ELIST_LINK { 00054 public: 00055 TrainingSample() 00056 : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), 00057 num_features_(0), num_micro_features_(0), 00058 features_(NULL), micro_features_(NULL), weight_(1.0), 00059 max_dist_(0.0), sample_index_(0), 00060 features_are_indexed_(false), features_are_mapped_(false), 00061 is_error_(false) { 00062 } 00063 ~TrainingSample(); 00064 00065 // Saves the given features into a TrainingSample. The features are copied, 00066 // so may be deleted afterwards. Delete the return value after use. 00067 static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info, 00068 const INT_FEATURE_STRUCT* features, 00069 int num_features); 00070 // Constructs and returns a copy "randomized" by the method given by 00071 // the randomizer index. If index is out of [0, kSampleRandomSize) then 00072 // an exact copy is returned. 00073 TrainingSample* RandomizedCopy(int index) const; 00074 // Constructs and returns an exact copy. 00075 TrainingSample* Copy() const; 00076 00077 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data 00078 // members, which is mostly the mapped features, and the weight. 00079 // It is assumed these can all be reconstructed from what is saved. 00080 // Writes to the given file. Returns false in case of error. 00081 bool Serialize(FILE* fp) const; 00082 // Creates from the given file. Returns NULL in case of error. 00083 // If swap is true, assumes a big/little-endian swap is needed. 00084 static TrainingSample* DeSerializeCreate(bool swap, FILE* fp); 00085 // Reads from the given file. Returns false in case of error. 00086 // If swap is true, assumes a big/little-endian swap is needed. 00087 bool DeSerialize(bool swap, FILE* fp); 00088 00089 // Extracts the needed information from the CHAR_DESC_STRUCT. 00090 void ExtractCharDesc(int feature_type, int micro_type, 00091 int cn_type, int geo_type, 00092 CHAR_DESC_STRUCT* char_desc); 00093 00094 // Sets the mapped_features_ from the features_ using the provided 00095 // feature_space to the indexed versions of the features. 00096 void IndexFeatures(const IntFeatureSpace& feature_space); 00097 // Sets the mapped_features_ from the features_ using the provided 00098 // feature_map. 00099 void MapFeatures(const IntFeatureMap& feature_map); 00100 00101 // Returns a pix representing the sample. (Int features only.) 00102 Pix* RenderToPix(const UNICHARSET* unicharset) const; 00103 // Displays the features in the given window with the given color. 00104 void DisplayFeatures(ScrollView::Color color, ScrollView* window) const; 00105 00106 // Returns a pix of the original sample image. The pix is padded all round 00107 // by padding wherever possible. 00108 // The returned Pix must be pixDestroyed after use. 00109 // If the input page_pix is NULL, NULL is returned. 00110 Pix* GetSamplePix(int padding, Pix* page_pix) const; 00111 00112 // Accessors. 00113 UNICHAR_ID class_id() const { 00114 return class_id_; 00115 } 00116 void set_class_id(int id) { 00117 class_id_ = id; 00118 } 00119 int font_id() const { 00120 return font_id_; 00121 } 00122 void set_font_id(int id) { 00123 font_id_ = id; 00124 } 00125 int page_num() const { 00126 return page_num_; 00127 } 00128 void set_page_num(int page) { 00129 page_num_ = page; 00130 } 00131 const TBOX& bounding_box() const { 00132 return bounding_box_; 00133 } 00134 void set_bounding_box(const TBOX& box) { 00135 bounding_box_ = box; 00136 } 00137 int num_features() const { 00138 return num_features_; 00139 } 00140 const INT_FEATURE_STRUCT* features() const { 00141 return features_; 00142 } 00143 int num_micro_features() const { 00144 return num_micro_features_; 00145 } 00146 const MicroFeature* micro_features() const { 00147 return micro_features_; 00148 } 00149 float cn_feature(int index) const { 00150 return cn_feature_[index]; 00151 } 00152 int geo_feature(int index) const { 00153 return geo_feature_[index]; 00154 } 00155 double weight() const { 00156 return weight_; 00157 } 00158 void set_weight(double value) { 00159 weight_ = value; 00160 } 00161 double max_dist() const { 00162 return max_dist_; 00163 } 00164 void set_max_dist(double value) { 00165 max_dist_ = value; 00166 } 00167 int sample_index() const { 00168 return sample_index_; 00169 } 00170 void set_sample_index(int value) { 00171 sample_index_ = value; 00172 } 00173 bool features_are_mapped() const { 00174 return features_are_mapped_; 00175 } 00176 const GenericVector<int>& mapped_features() const { 00177 ASSERT_HOST(features_are_mapped_); 00178 return mapped_features_; 00179 } 00180 const GenericVector<int>& indexed_features() const { 00181 ASSERT_HOST(features_are_indexed_); 00182 return mapped_features_; 00183 } 00184 bool is_error() const { 00185 return is_error_; 00186 } 00187 void set_is_error(bool value) { 00188 is_error_ = value; 00189 } 00190 00191 private: 00192 // Unichar id that this sample represents. There obviously must be a 00193 // reference UNICHARSET somewhere. Usually in TrainingSampleSet. 00194 UNICHAR_ID class_id_; 00195 // Font id in which this sample was printed. Refers to a fontinfo_table_ in 00196 // MasterTrainer. 00197 int font_id_; 00198 // Number of page that the sample came from. 00199 int page_num_; 00200 // Bounding box of sample in original image. 00201 TBOX bounding_box_; 00202 // Number of INT_FEATURE_STRUCT in features_ array. 00203 int num_features_; 00204 // Number of MicroFeature in micro_features_ array. 00205 int num_micro_features_; 00206 // Array of features. 00207 INT_FEATURE_STRUCT* features_; 00208 // Array of features. 00209 MicroFeature* micro_features_; 00210 // The one and only CN feature. Indexed by NORM_PARAM_NAME enum. 00211 float cn_feature_[kNumCNParams]; 00212 // The one and only geometric feature. (Aims at replacing cn_feature_). 00213 // Indexed by GeoParams enum in picofeat.h 00214 int geo_feature_[GeoCount]; 00215 00216 // Non-serialized cache data. 00217 // Weight used for boosting training. 00218 double weight_; 00219 // Maximum distance to other samples of same class/font used in computing 00220 // the canonical sample. 00221 double max_dist_; 00222 // Global index of this sample. 00223 int sample_index_; 00224 // Indexed/mapped features, as indicated by the bools below. 00225 GenericVector<int> mapped_features_; 00226 bool features_are_indexed_; 00227 bool features_are_mapped_; 00228 // True if the last classification was an error by the current definition. 00229 bool is_error_; 00230 00231 // Randomizing factors. 00232 static const int kYShiftValues[kSampleYShiftSize]; 00233 static const double kScaleValues[kSampleScaleSize]; 00234 }; 00235 00236 ELISTIZEH(TrainingSample) 00237 00238 } // namespace tesseract 00239 00240 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H__