Tesseract
3.02
|
00001 // Copyright 2010 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // Unless required by applicable law or agreed to in writing, software 00009 // distributed under the License is distributed on an "AS IS" BASIS, 00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00011 // See the License for the specific language governing permissions and 00012 // limitations under the License. 00013 // 00015 00016 #include "trainingsample.h" 00017 00018 #include <math.h> 00019 #include "allheaders.h" 00020 #include "helpers.h" 00021 #include "intfeaturemap.h" 00022 #include "normfeat.h" 00023 #include "shapetable.h" 00024 00025 namespace tesseract { 00026 00027 ELISTIZE(TrainingSample) 00028 00029 // Center of randomizing operations. 00030 const int kRandomizingCenter = 128; 00031 00032 // Randomizing factors. 00033 const int TrainingSample::kYShiftValues[kSampleYShiftSize] = { 00034 6, 3, -3, -6, 0 00035 }; 00036 const double TrainingSample::kScaleValues[kSampleScaleSize] = { 00037 1.0625, 0.9375, 1.0 00038 }; 00039 00040 TrainingSample::~TrainingSample() { 00041 delete [] features_; 00042 delete [] micro_features_; 00043 } 00044 00045 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data 00046 // members, which is mostly the mapped features, and the weight. 00047 // It is assumed these can all be reconstructed from what is saved. 00048 // Writes to the given file. Returns false in case of error. 00049 bool TrainingSample::Serialize(FILE* fp) const { 00050 if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; 00051 if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; 00052 if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; 00053 if (!bounding_box_.Serialize(fp)) return false; 00054 if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; 00055 if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) 00056 return false; 00057 if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_) 00058 return false; 00059 if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_, 00060 fp) != num_micro_features_) 00061 return false; 00062 if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != 00063 kNumCNParams) return false; 00064 if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) 00065 return false; 00066 return true; 00067 } 00068 00069 // Creates from the given file. Returns NULL in case of error. 00070 // If swap is true, assumes a big/little-endian swap is needed. 00071 TrainingSample* TrainingSample::DeSerializeCreate(bool swap, FILE* fp) { 00072 TrainingSample* sample = new TrainingSample; 00073 if (sample->DeSerialize(swap, fp)) return sample; 00074 delete sample; 00075 return NULL; 00076 } 00077 00078 // Reads from the given file. Returns false in case of error. 00079 // If swap is true, assumes a big/little-endian swap is needed. 00080 bool TrainingSample::DeSerialize(bool swap, FILE* fp) { 00081 if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; 00082 if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; 00083 if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; 00084 if (!bounding_box_.DeSerialize(swap, fp)) return false; 00085 if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; 00086 if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) 00087 return false; 00088 if (swap) { 00089 ReverseN(&class_id_, sizeof(class_id_)); 00090 ReverseN(&num_features_, sizeof(num_features_)); 00091 ReverseN(&num_micro_features_, sizeof(num_micro_features_)); 00092 } 00093 delete [] features_; 00094 features_ = new INT_FEATURE_STRUCT[num_features_]; 00095 if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_) 00096 return false; 00097 delete [] micro_features_; 00098 micro_features_ = new MicroFeature[num_micro_features_]; 00099 if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, 00100 fp) != num_micro_features_) 00101 return false; 00102 if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != 00103 kNumCNParams) return false; 00104 if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) 00105 return false; 00106 return true; 00107 } 00108 00109 // Saves the given features into a TrainingSample. 00110 TrainingSample* TrainingSample::CopyFromFeatures( 00111 const INT_FX_RESULT_STRUCT& fx_info, const INT_FEATURE_STRUCT* features, 00112 int num_features) { 00113 TrainingSample* sample = new TrainingSample; 00114 sample->num_features_ = num_features; 00115 sample->features_ = new INT_FEATURE_STRUCT[num_features]; 00116 memcpy(sample->features_, features, num_features * sizeof(features[0])); 00117 sample->geo_feature_[GeoBottom] = fx_info.YBottom; 00118 sample->geo_feature_[GeoTop] = fx_info.YTop; 00119 sample->geo_feature_[GeoWidth] = fx_info.Width; 00120 sample->features_are_indexed_ = false; 00121 sample->features_are_mapped_ = false; 00122 return sample; 00123 } 00124 00125 // Constructs and returns a copy randomized by the method given by 00126 // the randomizer index. If index is out of [0, kSampleRandomSize) then 00127 // an exact copy is returned. 00128 TrainingSample* TrainingSample::RandomizedCopy(int index) const { 00129 TrainingSample* sample = Copy(); 00130 if (index >= 0 && index < kSampleRandomSize) { 00131 ++index; // Remove the first combination. 00132 int yshift = kYShiftValues[index / kSampleScaleSize]; 00133 double scaling = kScaleValues[index % kSampleScaleSize]; 00134 for (int i = 0; i < num_features_; ++i) { 00135 double result = (features_[i].X - kRandomizingCenter) * scaling; 00136 result += kRandomizingCenter; 00137 sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0, 00138 MAX_UINT8); 00139 result = (features_[i].Y - kRandomizingCenter) * scaling; 00140 result += kRandomizingCenter + yshift; 00141 sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0, 00142 MAX_UINT8); 00143 } 00144 } 00145 return sample; 00146 } 00147 00148 // Constructs and returns an exact copy. 00149 TrainingSample* TrainingSample::Copy() const { 00150 TrainingSample* sample = new TrainingSample; 00151 sample->class_id_ = class_id_; 00152 sample->font_id_ = font_id_; 00153 sample->weight_ = weight_; 00154 sample->sample_index_ = sample_index_; 00155 sample->num_features_ = num_features_; 00156 if (num_features_ > 0) { 00157 sample->features_ = new INT_FEATURE_STRUCT[num_features_]; 00158 memcpy(sample->features_, features_, num_features_ * sizeof(features_[0])); 00159 } 00160 sample->num_micro_features_ = num_micro_features_; 00161 if (num_micro_features_ > 0) { 00162 sample->micro_features_ = new MicroFeature[num_micro_features_]; 00163 memcpy(sample->micro_features_, micro_features_, 00164 num_micro_features_ * sizeof(micro_features_[0])); 00165 } 00166 memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams); 00167 memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount); 00168 return sample; 00169 } 00170 00171 // Extracts the needed information from the CHAR_DESC_STRUCT. 00172 void TrainingSample::ExtractCharDesc(int int_feature_type, 00173 int micro_type, 00174 int cn_type, 00175 int geo_type, 00176 CHAR_DESC_STRUCT* char_desc) { 00177 // Extract the INT features. 00178 if (features_ != NULL) delete [] features_; 00179 FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; 00180 if (char_features == NULL) { 00181 tprintf("Error: no features to train on of type %s\n", 00182 kIntFeatureType); 00183 num_features_ = 0; 00184 features_ = NULL; 00185 } else { 00186 num_features_ = char_features->NumFeatures; 00187 features_ = new INT_FEATURE_STRUCT[num_features_]; 00188 for (int f = 0; f < num_features_; ++f) { 00189 features_[f].X = 00190 static_cast<uinT8>(char_features->Features[f]->Params[IntX]); 00191 features_[f].Y = 00192 static_cast<uinT8>(char_features->Features[f]->Params[IntY]); 00193 features_[f].Theta = 00194 static_cast<uinT8>(char_features->Features[f]->Params[IntDir]); 00195 features_[f].CP_misses = 0; 00196 } 00197 } 00198 // Extract the Micro features. 00199 if (micro_features_ != NULL) delete [] micro_features_; 00200 char_features = char_desc->FeatureSets[micro_type]; 00201 if (char_features == NULL) { 00202 tprintf("Error: no features to train on of type %s\n", 00203 kMicroFeatureType); 00204 num_micro_features_ = 0; 00205 micro_features_ = NULL; 00206 } else { 00207 num_micro_features_ = char_features->NumFeatures; 00208 micro_features_ = new MicroFeature[num_micro_features_]; 00209 for (int f = 0; f < num_micro_features_; ++f) { 00210 for (int d = 0; d < MFCount; ++d) { 00211 micro_features_[f][d] = char_features->Features[f]->Params[d]; 00212 } 00213 } 00214 } 00215 // Extract the CN feature. 00216 char_features = char_desc->FeatureSets[cn_type]; 00217 if (char_features == NULL) { 00218 tprintf("Error: no CN feature to train on.\n"); 00219 } else { 00220 ASSERT_HOST(char_features->NumFeatures == 1); 00221 cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY]; 00222 cn_feature_[CharNormLength] = 00223 char_features->Features[0]->Params[CharNormLength]; 00224 cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx]; 00225 cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy]; 00226 } 00227 // Extract the Geo feature. 00228 char_features = char_desc->FeatureSets[geo_type]; 00229 if (char_features == NULL) { 00230 tprintf("Error: no Geo feature to train on.\n"); 00231 } else { 00232 ASSERT_HOST(char_features->NumFeatures == 1); 00233 geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom]; 00234 geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop]; 00235 geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth]; 00236 } 00237 features_are_indexed_ = false; 00238 features_are_mapped_ = false; 00239 } 00240 00241 // Sets the mapped_features_ from the features_ using the provided 00242 // feature_space to the indexed versions of the features. 00243 void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) { 00244 GenericVector<int> indexed_features; 00245 feature_space.IndexAndSortFeatures(features_, num_features_, 00246 &mapped_features_); 00247 features_are_indexed_ = true; 00248 features_are_mapped_ = false; 00249 } 00250 00251 // Sets the mapped_features_ from the features using the provided 00252 // feature_map. 00253 void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) { 00254 GenericVector<int> indexed_features; 00255 feature_map.feature_space().IndexAndSortFeatures(features_, num_features_, 00256 &indexed_features); 00257 feature_map.MapIndexedFeatures(indexed_features, &mapped_features_); 00258 features_are_indexed_ = false; 00259 features_are_mapped_ = true; 00260 } 00261 00262 // Returns a pix representing the sample. (Int features only.) 00263 Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { 00264 Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); 00265 for (int f = 0; f < num_features_; ++f) { 00266 int start_x = features_[f].X; 00267 int start_y = kIntFeatureExtent - features_[f].Y; 00268 double dx = cos((features_[f].Theta / 256.0) * 2.0 * PI - PI); 00269 double dy = -sin((features_[f].Theta / 256.0) * 2.0 * PI - PI); 00270 for (int i = 0; i <= 5; ++i) { 00271 int x = static_cast<int>(start_x + dx * i); 00272 int y = static_cast<int>(start_y + dy * i); 00273 if (x >= 0 && x < 256 && y >= 0 && y < 256) 00274 pixSetPixel(pix, x, y, 1); 00275 } 00276 } 00277 if (unicharset != NULL) 00278 pixSetText(pix, unicharset->id_to_unichar(class_id_)); 00279 return pix; 00280 } 00281 00282 // Displays the features in the given window with the given color. 00283 void TrainingSample::DisplayFeatures(ScrollView::Color color, 00284 ScrollView* window) const { 00285 #ifndef GRAPHICS_DISABLED 00286 for (int f = 0; f < num_features_; ++f) { 00287 RenderIntFeature(window, &features_[f], color); 00288 } 00289 #endif // GRAPHICS_DISABLED 00290 } 00291 00292 // Returns a pix of the original sample image. The pix is padded all round 00293 // by padding wherever possible. 00294 // The returned Pix must be pixDestroyed after use. 00295 // If the input page_pix is NULL, NULL is returned. 00296 Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const { 00297 if (page_pix == NULL) 00298 return NULL; 00299 int page_width = pixGetWidth(page_pix); 00300 int page_height = pixGetHeight(page_pix); 00301 TBOX padded_box = bounding_box(); 00302 padded_box.pad(padding, padding); 00303 // Clip the padded_box to the limits of the page 00304 TBOX page_box(0, 0, page_width, page_height); 00305 padded_box &= page_box; 00306 Box* box = boxCreate(page_box.left(), page_height - page_box.top(), 00307 page_box.width(), page_box.height()); 00308 Pix* sample_pix = pixClipRectangle(page_pix, box, NULL); 00309 boxDestroy(&box); 00310 return sample_pix; 00311 } 00312 00313 } // namespace tesseract