Tesseract  3.02
tesseract-ocr/classify/trainingsample.cpp
Go to the documentation of this file.
00001 // Copyright 2010 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 //
00015 
00016 #include "trainingsample.h"
00017 
00018 #include <math.h>
00019 #include "allheaders.h"
00020 #include "helpers.h"
00021 #include "intfeaturemap.h"
00022 #include "normfeat.h"
00023 #include "shapetable.h"
00024 
00025 namespace tesseract {
00026 
00027 ELISTIZE(TrainingSample)
00028 
00029 // Center of randomizing operations.
00030 const int kRandomizingCenter = 128;
00031 
00032 // Randomizing factors.
00033 const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {
00034     6, 3, -3, -6, 0
00035 };
00036 const double TrainingSample::kScaleValues[kSampleScaleSize] = {
00037     1.0625, 0.9375, 1.0
00038 };
00039 
00040 TrainingSample::~TrainingSample() {
00041   delete [] features_;
00042   delete [] micro_features_;
00043 }
00044 
00045 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
00046 // members, which is mostly the mapped features, and the weight.
00047 // It is assumed these can all be reconstructed from what is saved.
00048 // Writes to the given file. Returns false in case of error.
00049 bool TrainingSample::Serialize(FILE* fp) const {
00050   if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
00051   if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
00052   if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
00053   if (!bounding_box_.Serialize(fp)) return false;
00054   if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
00055   if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
00056     return false;
00057   if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
00058     return false;
00059   if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
00060              fp) != num_micro_features_)
00061     return false;
00062   if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
00063       kNumCNParams) return false;
00064   if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
00065     return false;
00066   return true;
00067 }
00068 
00069 // Creates from the given file. Returns NULL in case of error.
00070 // If swap is true, assumes a big/little-endian swap is needed.
00071 TrainingSample* TrainingSample::DeSerializeCreate(bool swap, FILE* fp) {
00072   TrainingSample* sample = new TrainingSample;
00073   if (sample->DeSerialize(swap, fp)) return sample;
00074   delete sample;
00075   return NULL;
00076 }
00077 
00078 // Reads from the given file. Returns false in case of error.
00079 // If swap is true, assumes a big/little-endian swap is needed.
00080 bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
00081   if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
00082   if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
00083   if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
00084   if (!bounding_box_.DeSerialize(swap, fp)) return false;
00085   if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
00086   if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
00087     return false;
00088   if (swap) {
00089     ReverseN(&class_id_, sizeof(class_id_));
00090     ReverseN(&num_features_, sizeof(num_features_));
00091     ReverseN(&num_micro_features_, sizeof(num_micro_features_));
00092   }
00093   delete [] features_;
00094   features_ = new INT_FEATURE_STRUCT[num_features_];
00095   if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_)
00096     return false;
00097   delete [] micro_features_;
00098   micro_features_ = new MicroFeature[num_micro_features_];
00099   if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
00100             fp) != num_micro_features_)
00101     return false;
00102   if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
00103             kNumCNParams) return false;
00104   if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
00105     return false;
00106   return true;
00107 }
00108 
00109 // Saves the given features into a TrainingSample.
00110 TrainingSample* TrainingSample::CopyFromFeatures(
00111     const INT_FX_RESULT_STRUCT& fx_info, const INT_FEATURE_STRUCT* features,
00112     int num_features) {
00113   TrainingSample* sample = new TrainingSample;
00114   sample->num_features_ = num_features;
00115   sample->features_ = new INT_FEATURE_STRUCT[num_features];
00116   memcpy(sample->features_, features, num_features * sizeof(features[0]));
00117   sample->geo_feature_[GeoBottom] = fx_info.YBottom;
00118   sample->geo_feature_[GeoTop] = fx_info.YTop;
00119   sample->geo_feature_[GeoWidth] = fx_info.Width;
00120   sample->features_are_indexed_ = false;
00121   sample->features_are_mapped_ = false;
00122   return sample;
00123 }
00124 
00125 // Constructs and returns a copy randomized by the method given by
00126 // the randomizer index. If index is out of [0, kSampleRandomSize) then
00127 // an exact copy is returned.
00128 TrainingSample* TrainingSample::RandomizedCopy(int index) const {
00129   TrainingSample* sample = Copy();
00130   if (index >= 0 && index < kSampleRandomSize) {
00131     ++index;  // Remove the first combination.
00132     int yshift = kYShiftValues[index / kSampleScaleSize];
00133     double scaling = kScaleValues[index % kSampleScaleSize];
00134     for (int i = 0; i < num_features_; ++i) {
00135       double result = (features_[i].X - kRandomizingCenter) * scaling;
00136       result += kRandomizingCenter;
00137       sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0,
00138                                            MAX_UINT8);
00139       result = (features_[i].Y - kRandomizingCenter) * scaling;
00140       result += kRandomizingCenter + yshift;
00141       sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0,
00142                                            MAX_UINT8);
00143     }
00144   }
00145   return sample;
00146 }
00147 
00148 // Constructs and returns an exact copy.
00149 TrainingSample* TrainingSample::Copy() const {
00150   TrainingSample* sample = new TrainingSample;
00151   sample->class_id_ = class_id_;
00152   sample->font_id_ = font_id_;
00153   sample->weight_ = weight_;
00154   sample->sample_index_ = sample_index_;
00155   sample->num_features_ = num_features_;
00156   if (num_features_ > 0) {
00157     sample->features_ = new INT_FEATURE_STRUCT[num_features_];
00158     memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
00159   }
00160   sample->num_micro_features_ = num_micro_features_;
00161   if (num_micro_features_ > 0) {
00162     sample->micro_features_ = new MicroFeature[num_micro_features_];
00163     memcpy(sample->micro_features_, micro_features_,
00164            num_micro_features_ * sizeof(micro_features_[0]));
00165   }
00166   memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
00167   memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
00168   return sample;
00169 }
00170 
00171 // Extracts the needed information from the CHAR_DESC_STRUCT.
00172 void TrainingSample::ExtractCharDesc(int int_feature_type,
00173                                      int micro_type,
00174                                      int cn_type,
00175                                      int geo_type,
00176                                      CHAR_DESC_STRUCT* char_desc) {
00177   // Extract the INT features.
00178   if (features_ != NULL) delete [] features_;
00179   FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
00180   if (char_features == NULL) {
00181     tprintf("Error: no features to train on of type %s\n",
00182             kIntFeatureType);
00183     num_features_ = 0;
00184     features_ = NULL;
00185   } else {
00186     num_features_ = char_features->NumFeatures;
00187     features_ = new INT_FEATURE_STRUCT[num_features_];
00188     for (int f = 0; f < num_features_; ++f) {
00189       features_[f].X =
00190           static_cast<uinT8>(char_features->Features[f]->Params[IntX]);
00191       features_[f].Y =
00192           static_cast<uinT8>(char_features->Features[f]->Params[IntY]);
00193       features_[f].Theta =
00194           static_cast<uinT8>(char_features->Features[f]->Params[IntDir]);
00195       features_[f].CP_misses = 0;
00196     }
00197   }
00198   // Extract the Micro features.
00199   if (micro_features_ != NULL) delete [] micro_features_;
00200   char_features = char_desc->FeatureSets[micro_type];
00201   if (char_features == NULL) {
00202     tprintf("Error: no features to train on of type %s\n",
00203             kMicroFeatureType);
00204     num_micro_features_ = 0;
00205     micro_features_ = NULL;
00206   } else {
00207     num_micro_features_ = char_features->NumFeatures;
00208     micro_features_ = new MicroFeature[num_micro_features_];
00209     for (int f = 0; f < num_micro_features_; ++f) {
00210       for (int d = 0; d < MFCount; ++d) {
00211         micro_features_[f][d] = char_features->Features[f]->Params[d];
00212       }
00213     }
00214   }
00215   // Extract the CN feature.
00216   char_features = char_desc->FeatureSets[cn_type];
00217   if (char_features == NULL) {
00218     tprintf("Error: no CN feature to train on.\n");
00219   } else {
00220     ASSERT_HOST(char_features->NumFeatures == 1);
00221     cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
00222     cn_feature_[CharNormLength] =
00223         char_features->Features[0]->Params[CharNormLength];
00224     cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
00225     cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
00226   }
00227   // Extract the Geo feature.
00228   char_features = char_desc->FeatureSets[geo_type];
00229   if (char_features == NULL) {
00230     tprintf("Error: no Geo feature to train on.\n");
00231   } else {
00232     ASSERT_HOST(char_features->NumFeatures == 1);
00233     geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
00234     geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
00235     geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
00236   }
00237   features_are_indexed_ = false;
00238   features_are_mapped_ = false;
00239 }
00240 
00241 // Sets the mapped_features_ from the features_ using the provided
00242 // feature_space to the indexed versions of the features.
00243 void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) {
00244   GenericVector<int> indexed_features;
00245   feature_space.IndexAndSortFeatures(features_, num_features_,
00246                                      &mapped_features_);
00247   features_are_indexed_ = true;
00248   features_are_mapped_ = false;
00249 }
00250 
00251 // Sets the mapped_features_ from the features using the provided
00252 // feature_map.
00253 void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
00254   GenericVector<int> indexed_features;
00255   feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
00256                                                    &indexed_features);
00257   feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
00258   features_are_indexed_ = false;
00259   features_are_mapped_ = true;
00260 }
00261 
00262 // Returns a pix representing the sample. (Int features only.)
00263 Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
00264   Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
00265   for (int f = 0; f < num_features_; ++f) {
00266     int start_x = features_[f].X;
00267     int start_y = kIntFeatureExtent - features_[f].Y;
00268     double dx = cos((features_[f].Theta / 256.0) * 2.0 * PI - PI);
00269     double dy = -sin((features_[f].Theta / 256.0) * 2.0 * PI - PI);
00270     for (int i = 0; i <= 5; ++i) {
00271       int x = static_cast<int>(start_x + dx * i);
00272       int y = static_cast<int>(start_y + dy * i);
00273       if (x >= 0 && x < 256 && y >= 0 && y < 256)
00274         pixSetPixel(pix, x, y, 1);
00275     }
00276   }
00277   if (unicharset != NULL)
00278     pixSetText(pix, unicharset->id_to_unichar(class_id_));
00279   return pix;
00280 }
00281 
00282 // Displays the features in the given window with the given color.
00283 void TrainingSample::DisplayFeatures(ScrollView::Color color,
00284                                      ScrollView* window) const {
00285   #ifndef GRAPHICS_DISABLED
00286   for (int f = 0; f < num_features_; ++f) {
00287     RenderIntFeature(window, &features_[f], color);
00288   }
00289   #endif  // GRAPHICS_DISABLED
00290 }
00291 
00292 // Returns a pix of the original sample image. The pix is padded all round
00293 // by padding wherever possible.
00294 // The returned Pix must be pixDestroyed after use.
00295 // If the input page_pix is NULL, NULL is returned.
00296 Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const {
00297   if (page_pix == NULL)
00298     return NULL;
00299   int page_width = pixGetWidth(page_pix);
00300   int page_height = pixGetHeight(page_pix);
00301   TBOX padded_box = bounding_box();
00302   padded_box.pad(padding, padding);
00303   // Clip the padded_box to the limits of the page
00304   TBOX page_box(0, 0, page_width, page_height);
00305   padded_box &= page_box;
00306   Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
00307                        page_box.width(), page_box.height());
00308   Pix* sample_pix = pixClipRectangle(page_pix, box, NULL);
00309   boxDestroy(&box);
00310   return sample_pix;
00311 }
00312 
00313 }  // namespace tesseract