Tesseract  3.02
tesseract::TrainingSample Class Reference

#include <trainingsample.h>

Inheritance diagram for tesseract::TrainingSample:
ELIST_LINK

List of all members.

Public Member Functions

 TrainingSample ()
 ~TrainingSample ()
TrainingSampleRandomizedCopy (int index) const
TrainingSampleCopy () const
bool Serialize (FILE *fp) const
bool DeSerialize (bool swap, FILE *fp)
void ExtractCharDesc (int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
void IndexFeatures (const IntFeatureSpace &feature_space)
void MapFeatures (const IntFeatureMap &feature_map)
Pix * RenderToPix (const UNICHARSET *unicharset) const
void DisplayFeatures (ScrollView::Color color, ScrollView *window) const
Pix * GetSamplePix (int padding, Pix *page_pix) const
UNICHAR_ID class_id () const
void set_class_id (int id)
int font_id () const
void set_font_id (int id)
int page_num () const
void set_page_num (int page)
const TBOXbounding_box () const
void set_bounding_box (const TBOX &box)
int num_features () const
const INT_FEATURE_STRUCTfeatures () const
int num_micro_features () const
const MicroFeaturemicro_features () const
float cn_feature (int index) const
int geo_feature (int index) const
double weight () const
void set_weight (double value)
double max_dist () const
void set_max_dist (double value)
int sample_index () const
void set_sample_index (int value)
bool features_are_mapped () const
const GenericVector< int > & mapped_features () const
const GenericVector< int > & indexed_features () const
bool is_error () const
void set_is_error (bool value)

Static Public Member Functions

static TrainingSampleCopyFromFeatures (const INT_FX_RESULT_STRUCT &fx_info, const INT_FEATURE_STRUCT *features, int num_features)
static TrainingSampleDeSerializeCreate (bool swap, FILE *fp)

Detailed Description

Definition at line 53 of file trainingsample.h.


Constructor & Destructor Documentation

tesseract::TrainingSample::TrainingSample ( ) [inline]

Definition at line 55 of file trainingsample.h.

    : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
      num_features_(0), num_micro_features_(0),
      features_(NULL), micro_features_(NULL), weight_(1.0),
      max_dist_(0.0), sample_index_(0),
      features_are_indexed_(false), features_are_mapped_(false),
      is_error_(false) {
  }
tesseract::TrainingSample::~TrainingSample ( )

Definition at line 40 of file trainingsample.cpp.

                                {
  delete [] features_;
  delete [] micro_features_;
}

Member Function Documentation

const TBOX& tesseract::TrainingSample::bounding_box ( ) const [inline]

Definition at line 131 of file trainingsample.h.

                                   {
    return bounding_box_;
  }
UNICHAR_ID tesseract::TrainingSample::class_id ( ) const [inline]

Definition at line 113 of file trainingsample.h.

                              {
    return class_id_;
  }
float tesseract::TrainingSample::cn_feature ( int  index) const [inline]

Definition at line 149 of file trainingsample.h.

                                    {
    return cn_feature_[index];
  }
TrainingSample * tesseract::TrainingSample::Copy ( ) const

Definition at line 149 of file trainingsample.cpp.

                                           {
  TrainingSample* sample = new TrainingSample;
  sample->class_id_ = class_id_;
  sample->font_id_ = font_id_;
  sample->weight_ = weight_;
  sample->sample_index_ = sample_index_;
  sample->num_features_ = num_features_;
  if (num_features_ > 0) {
    sample->features_ = new INT_FEATURE_STRUCT[num_features_];
    memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
  }
  sample->num_micro_features_ = num_micro_features_;
  if (num_micro_features_ > 0) {
    sample->micro_features_ = new MicroFeature[num_micro_features_];
    memcpy(sample->micro_features_, micro_features_,
           num_micro_features_ * sizeof(micro_features_[0]));
  }
  memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
  memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
  return sample;
}
TrainingSample * tesseract::TrainingSample::CopyFromFeatures ( const INT_FX_RESULT_STRUCT fx_info,
const INT_FEATURE_STRUCT features,
int  num_features 
) [static]

Definition at line 110 of file trainingsample.cpp.

                      {
  TrainingSample* sample = new TrainingSample;
  sample->num_features_ = num_features;
  sample->features_ = new INT_FEATURE_STRUCT[num_features];
  memcpy(sample->features_, features, num_features * sizeof(features[0]));
  sample->geo_feature_[GeoBottom] = fx_info.YBottom;
  sample->geo_feature_[GeoTop] = fx_info.YTop;
  sample->geo_feature_[GeoWidth] = fx_info.Width;
  sample->features_are_indexed_ = false;
  sample->features_are_mapped_ = false;
  return sample;
}
bool tesseract::TrainingSample::DeSerialize ( bool  swap,
FILE *  fp 
)

Definition at line 80 of file trainingsample.cpp.

                                                    {
  if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
  if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
  if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
  if (!bounding_box_.DeSerialize(swap, fp)) return false;
  if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
  if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
    return false;
  if (swap) {
    ReverseN(&class_id_, sizeof(class_id_));
    ReverseN(&num_features_, sizeof(num_features_));
    ReverseN(&num_micro_features_, sizeof(num_micro_features_));
  }
  delete [] features_;
  features_ = new INT_FEATURE_STRUCT[num_features_];
  if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_)
    return false;
  delete [] micro_features_;
  micro_features_ = new MicroFeature[num_micro_features_];
  if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
            fp) != num_micro_features_)
    return false;
  if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
            kNumCNParams) return false;
  if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
    return false;
  return true;
}
TrainingSample * tesseract::TrainingSample::DeSerializeCreate ( bool  swap,
FILE *  fp 
) [static]

Definition at line 71 of file trainingsample.cpp.

                                                                     {
  TrainingSample* sample = new TrainingSample;
  if (sample->DeSerialize(swap, fp)) return sample;
  delete sample;
  return NULL;
}
void tesseract::TrainingSample::DisplayFeatures ( ScrollView::Color  color,
ScrollView window 
) const

Definition at line 283 of file trainingsample.cpp.

                                                               {
  #ifndef GRAPHICS_DISABLED
  for (int f = 0; f < num_features_; ++f) {
    RenderIntFeature(window, &features_[f], color);
  }
  #endif  // GRAPHICS_DISABLED
}
void tesseract::TrainingSample::ExtractCharDesc ( int  feature_type,
int  micro_type,
int  cn_type,
int  geo_type,
CHAR_DESC_STRUCT char_desc 
)

Definition at line 172 of file trainingsample.cpp.

                                                                  {
  // Extract the INT features.
  if (features_ != NULL) delete [] features_;
  FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
  if (char_features == NULL) {
    tprintf("Error: no features to train on of type %s\n",
            kIntFeatureType);
    num_features_ = 0;
    features_ = NULL;
  } else {
    num_features_ = char_features->NumFeatures;
    features_ = new INT_FEATURE_STRUCT[num_features_];
    for (int f = 0; f < num_features_; ++f) {
      features_[f].X =
          static_cast<uinT8>(char_features->Features[f]->Params[IntX]);
      features_[f].Y =
          static_cast<uinT8>(char_features->Features[f]->Params[IntY]);
      features_[f].Theta =
          static_cast<uinT8>(char_features->Features[f]->Params[IntDir]);
      features_[f].CP_misses = 0;
    }
  }
  // Extract the Micro features.
  if (micro_features_ != NULL) delete [] micro_features_;
  char_features = char_desc->FeatureSets[micro_type];
  if (char_features == NULL) {
    tprintf("Error: no features to train on of type %s\n",
            kMicroFeatureType);
    num_micro_features_ = 0;
    micro_features_ = NULL;
  } else {
    num_micro_features_ = char_features->NumFeatures;
    micro_features_ = new MicroFeature[num_micro_features_];
    for (int f = 0; f < num_micro_features_; ++f) {
      for (int d = 0; d < MFCount; ++d) {
        micro_features_[f][d] = char_features->Features[f]->Params[d];
      }
    }
  }
  // Extract the CN feature.
  char_features = char_desc->FeatureSets[cn_type];
  if (char_features == NULL) {
    tprintf("Error: no CN feature to train on.\n");
  } else {
    ASSERT_HOST(char_features->NumFeatures == 1);
    cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
    cn_feature_[CharNormLength] =
        char_features->Features[0]->Params[CharNormLength];
    cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
    cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
  }
  // Extract the Geo feature.
  char_features = char_desc->FeatureSets[geo_type];
  if (char_features == NULL) {
    tprintf("Error: no Geo feature to train on.\n");
  } else {
    ASSERT_HOST(char_features->NumFeatures == 1);
    geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
    geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
    geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
  }
  features_are_indexed_ = false;
  features_are_mapped_ = false;
}
const INT_FEATURE_STRUCT* tesseract::TrainingSample::features ( ) const [inline]

Definition at line 140 of file trainingsample.h.

                                             {
    return features_;
  }
bool tesseract::TrainingSample::features_are_mapped ( ) const [inline]

Definition at line 173 of file trainingsample.h.

                                   {
    return features_are_mapped_;
  }
int tesseract::TrainingSample::font_id ( ) const [inline]

Definition at line 119 of file trainingsample.h.

                      {
    return font_id_;
  }
int tesseract::TrainingSample::geo_feature ( int  index) const [inline]

Definition at line 152 of file trainingsample.h.

                                   {
    return geo_feature_[index];
  }
Pix * tesseract::TrainingSample::GetSamplePix ( int  padding,
Pix *  page_pix 
) const

Definition at line 296 of file trainingsample.cpp.

                                                                  {
  if (page_pix == NULL)
    return NULL;
  int page_width = pixGetWidth(page_pix);
  int page_height = pixGetHeight(page_pix);
  TBOX padded_box = bounding_box();
  padded_box.pad(padding, padding);
  // Clip the padded_box to the limits of the page
  TBOX page_box(0, 0, page_width, page_height);
  padded_box &= page_box;
  Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
                       page_box.width(), page_box.height());
  Pix* sample_pix = pixClipRectangle(page_pix, box, NULL);
  boxDestroy(&box);
  return sample_pix;
}
const GenericVector<int>& tesseract::TrainingSample::indexed_features ( ) const [inline]

Definition at line 180 of file trainingsample.h.

                                                     {
    ASSERT_HOST(features_are_indexed_);
    return mapped_features_;
  }
void tesseract::TrainingSample::IndexFeatures ( const IntFeatureSpace feature_space)

Definition at line 243 of file trainingsample.cpp.

                                                                       {
  GenericVector<int> indexed_features;
  feature_space.IndexAndSortFeatures(features_, num_features_,
                                     &mapped_features_);
  features_are_indexed_ = true;
  features_are_mapped_ = false;
}
bool tesseract::TrainingSample::is_error ( ) const [inline]

Definition at line 184 of file trainingsample.h.

                        {
    return is_error_;
  }
void tesseract::TrainingSample::MapFeatures ( const IntFeatureMap feature_map)

Definition at line 253 of file trainingsample.cpp.

                                                                 {
  GenericVector<int> indexed_features;
  feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
                                                   &indexed_features);
  feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
  features_are_indexed_ = false;
  features_are_mapped_ = true;
}
const GenericVector<int>& tesseract::TrainingSample::mapped_features ( ) const [inline]

Definition at line 176 of file trainingsample.h.

                                                    {
    ASSERT_HOST(features_are_mapped_);
    return mapped_features_;
  }
double tesseract::TrainingSample::max_dist ( ) const [inline]

Definition at line 161 of file trainingsample.h.

                          {
    return max_dist_;
  }
const MicroFeature* tesseract::TrainingSample::micro_features ( ) const [inline]

Definition at line 146 of file trainingsample.h.

                                             {
    return micro_features_;
  }
int tesseract::TrainingSample::num_features ( ) const [inline]

Definition at line 137 of file trainingsample.h.

                           {
    return num_features_;
  }
int tesseract::TrainingSample::num_micro_features ( ) const [inline]

Definition at line 143 of file trainingsample.h.

                                 {
    return num_micro_features_;
  }
int tesseract::TrainingSample::page_num ( ) const [inline]

Definition at line 125 of file trainingsample.h.

                       {
    return page_num_;
  }
TrainingSample * tesseract::TrainingSample::RandomizedCopy ( int  index) const

Definition at line 128 of file trainingsample.cpp.

                                                              {
  TrainingSample* sample = Copy();
  if (index >= 0 && index < kSampleRandomSize) {
    ++index;  // Remove the first combination.
    int yshift = kYShiftValues[index / kSampleScaleSize];
    double scaling = kScaleValues[index % kSampleScaleSize];
    for (int i = 0; i < num_features_; ++i) {
      double result = (features_[i].X - kRandomizingCenter) * scaling;
      result += kRandomizingCenter;
      sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0,
                                           MAX_UINT8);
      result = (features_[i].Y - kRandomizingCenter) * scaling;
      result += kRandomizingCenter + yshift;
      sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0,
                                           MAX_UINT8);
    }
  }
  return sample;
}
Pix * tesseract::TrainingSample::RenderToPix ( const UNICHARSET unicharset) const

Definition at line 263 of file trainingsample.cpp.

                                                                   {
  Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
  for (int f = 0; f < num_features_; ++f) {
    int start_x = features_[f].X;
    int start_y = kIntFeatureExtent - features_[f].Y;
    double dx = cos((features_[f].Theta / 256.0) * 2.0 * PI - PI);
    double dy = -sin((features_[f].Theta / 256.0) * 2.0 * PI - PI);
    for (int i = 0; i <= 5; ++i) {
      int x = static_cast<int>(start_x + dx * i);
      int y = static_cast<int>(start_y + dy * i);
      if (x >= 0 && x < 256 && y >= 0 && y < 256)
        pixSetPixel(pix, x, y, 1);
    }
  }
  if (unicharset != NULL)
    pixSetText(pix, unicharset->id_to_unichar(class_id_));
  return pix;
}
int tesseract::TrainingSample::sample_index ( ) const [inline]

Definition at line 167 of file trainingsample.h.

                           {
    return sample_index_;
  }
bool tesseract::TrainingSample::Serialize ( FILE *  fp) const

Definition at line 49 of file trainingsample.cpp.

                                             {
  if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
  if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
  if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
  if (!bounding_box_.Serialize(fp)) return false;
  if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
  if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
    return false;
  if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
    return false;
  if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
             fp) != num_micro_features_)
    return false;
  if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
      kNumCNParams) return false;
  if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
    return false;
  return true;
}
void tesseract::TrainingSample::set_bounding_box ( const TBOX box) [inline]

Definition at line 134 of file trainingsample.h.

                                         {
    bounding_box_ = box;
  }
void tesseract::TrainingSample::set_class_id ( int  id) [inline]

Definition at line 116 of file trainingsample.h.

                            {
    class_id_ = id;
  }
void tesseract::TrainingSample::set_font_id ( int  id) [inline]

Definition at line 122 of file trainingsample.h.

                           {
    font_id_ = id;
  }
void tesseract::TrainingSample::set_is_error ( bool  value) [inline]

Definition at line 187 of file trainingsample.h.

                                {
    is_error_ = value;
  }
void tesseract::TrainingSample::set_max_dist ( double  value) [inline]

Definition at line 164 of file trainingsample.h.

                                  {
    max_dist_ = value;
  }
void tesseract::TrainingSample::set_page_num ( int  page) [inline]

Definition at line 128 of file trainingsample.h.

                              {
    page_num_ = page;
  }
void tesseract::TrainingSample::set_sample_index ( int  value) [inline]

Definition at line 170 of file trainingsample.h.

                                   {
    sample_index_ = value;
  }
void tesseract::TrainingSample::set_weight ( double  value) [inline]

Definition at line 158 of file trainingsample.h.

                                {
    weight_ = value;
  }
double tesseract::TrainingSample::weight ( ) const [inline]

Definition at line 155 of file trainingsample.h.

                        {
    return weight_;
  }

The documentation for this class was generated from the following files: