Tesseract  3.02
tesseract::SampleIterator Class Reference

#include <sampleiterator.h>

List of all members.

Public Member Functions

 SampleIterator ()
 ~SampleIterator ()
void Clear ()
void Init (const IndexMapBiDi *charset_map, const ShapeTable *shape_table, bool randomize, TrainingSampleSet *sample_set)
void Begin ()
bool AtEnd () const
const TrainingSampleGetSample () const
TrainingSampleMutableSample () const
int GlobalSampleIndex () const
int GetCompactClassID () const
int GetSparseClassID () const
void Next ()
int CompactCharsetSize () const
int SparseCharsetSize () const
const IndexMapBiDicharset_map () const
const ShapeTableshape_table () const
const TrainingSampleSetsample_set () const
void MapSampleFeatures (const IntFeatureMap &feature_map)
int UniformSamples ()
double NormalizeSamples ()

Detailed Description

Definition at line 92 of file sampleiterator.h.


Constructor & Destructor Documentation

tesseract::SampleIterator::SampleIterator ( )

Definition at line 27 of file sampleiterator.cpp.

  : charset_map_(NULL),
    shape_table_(NULL),
    sample_set_(NULL),
    randomize_(false),
    owned_shape_table_(NULL) {
  num_shapes_ = 0;
  Begin();
}
tesseract::SampleIterator::~SampleIterator ( )

Definition at line 37 of file sampleiterator.cpp.

                                {
  Clear();
}

Member Function Documentation

bool tesseract::SampleIterator::AtEnd ( ) const

Definition at line 99 of file sampleiterator.cpp.

                                 {
  return shape_index_ >= num_shapes_;
}
void tesseract::SampleIterator::Begin ( )

Definition at line 87 of file sampleiterator.cpp.

                           {
  shape_index_ = -1;
  shape_char_index_ = 0;
  num_shape_chars_ = 0;
  shape_font_index_ = 0;
  num_shape_fonts_ = 0;
  sample_index_ = 0;
  num_samples_ = 0;
  // Find the first indexable sample.
  Next();
}
const IndexMapBiDi& tesseract::SampleIterator::charset_map ( ) const [inline]

Definition at line 137 of file sampleiterator.h.

                                          {
    return *charset_map_;
  }
void tesseract::SampleIterator::Clear ( )

Definition at line 41 of file sampleiterator.cpp.

                           {
  delete owned_shape_table_;
  owned_shape_table_ = NULL;
}
int tesseract::SampleIterator::CompactCharsetSize ( ) const

Definition at line 196 of file sampleiterator.cpp.

                                             {
  return charset_map_ != NULL ? charset_map_->CompactSize()
                              : SparseCharsetSize();
}
int tesseract::SampleIterator::GetCompactClassID ( ) const

Definition at line 142 of file sampleiterator.cpp.

                                            {
  return charset_map_ != NULL ? charset_map_->SparseToCompact(shape_index_)
                              : GetSparseClassID();
}
const TrainingSample & tesseract::SampleIterator::GetSample ( ) const

Definition at line 103 of file sampleiterator.cpp.

                                                      {
  if (shape_table_ != NULL) {
    const UnicharAndFonts* shape_entry = GetShapeEntry();
    int char_id = shape_entry->unichar_id;
    int font_id = shape_entry->font_ids[shape_font_index_];
    return *sample_set_->GetSample(font_id, char_id, sample_index_);
  } else {
    return *sample_set_->GetSample(shape_index_);
  }
}
int tesseract::SampleIterator::GetSparseClassID ( ) const

Definition at line 150 of file sampleiterator.cpp.

                                           {
  return shape_table_ != NULL ? shape_index_ : GetSample().class_id();
}
int tesseract::SampleIterator::GlobalSampleIndex ( ) const

Definition at line 127 of file sampleiterator.cpp.

                                            {
  if (shape_table_ != NULL) {
    const UnicharAndFonts* shape_entry = GetShapeEntry();
    int char_id = shape_entry->unichar_id;
    int font_id = shape_entry->font_ids[shape_font_index_];
    return sample_set_->GlobalSampleIndex(font_id, char_id, sample_index_);
  } else {
    return shape_index_;
  }
}
void tesseract::SampleIterator::Init ( const IndexMapBiDi charset_map,
const ShapeTable shape_table,
bool  randomize,
TrainingSampleSet sample_set 
)

Definition at line 47 of file sampleiterator.cpp.

                                                         {
  Clear();
  charset_map_ = charset_map;
  shape_table_ = shape_table;
  sample_set_ = sample_set;
  randomize_ = randomize;
  if (shape_table_ == NULL && charset_map_ != NULL) {
    // The caller wishes to iterate by class. The easiest way to do this
    // is to create a dummy shape_table_ that we will own.
    int num_fonts = sample_set_->NumFonts();
    owned_shape_table_ = new ShapeTable(sample_set_->unicharset());
    int charsetsize = sample_set_->unicharset().size();
    for (int c = 0; c < charsetsize; ++c) {
      // We always add a shape for each character to keep the index in sync
      // with the unichar_id.
      int shape_id = owned_shape_table_->AddShape(c, 0);
      for (int f = 1; f < num_fonts; ++f) {
        if (sample_set_->NumClassSamples(f, c, true) > 0) {
          owned_shape_table_->AddToShape(shape_id, c, f);
        }
      }
    }
    shape_table_ = owned_shape_table_;
  }
  if (shape_table_ != NULL) {
    num_shapes_ = shape_table_->NumShapes();
  } else {
    num_shapes_ = randomize ? sample_set_->num_samples()
                            : sample_set_->num_raw_samples();
  }
  Begin();
}
void tesseract::SampleIterator::MapSampleFeatures ( const IntFeatureMap feature_map)

Definition at line 211 of file sampleiterator.cpp.

                                                                       {
  for (Begin(); !AtEnd(); Next()) {
    TrainingSample* sample = MutableSample();
    sample->MapFeatures(feature_map);
  }
}
TrainingSample * tesseract::SampleIterator::MutableSample ( ) const

Definition at line 114 of file sampleiterator.cpp.

                                                    {
  if (shape_table_ != NULL) {
    const UnicharAndFonts* shape_entry = GetShapeEntry();
    int char_id = shape_entry->unichar_id;
    int font_id = shape_entry->font_ids[shape_font_index_];
    return sample_set_->MutableSample(font_id, char_id, sample_index_);
  } else {
    return sample_set_->mutable_sample(shape_index_);
  }
}
void tesseract::SampleIterator::Next ( )

Definition at line 156 of file sampleiterator.cpp.

                          {
  if (shape_table_ != NULL) {
    // Next sample in this class/font combination.
    ++sample_index_;
    if (sample_index_ < num_samples_)
      return;
    // Next font in this class in this shape.
    sample_index_ = 0;
    do {
      ++shape_font_index_;
      if (shape_font_index_ >= num_shape_fonts_) {
        // Next unichar in this shape.
        shape_font_index_ = 0;
        ++shape_char_index_;
        if (shape_char_index_ >= num_shape_chars_) {
          // Find the next shape that is mapped in the charset_map_.
          shape_char_index_ = 0;
          do {
            ++shape_index_;
          } while (shape_index_ < num_shapes_ &&
                   charset_map_ != NULL &&
                   charset_map_->SparseToCompact(shape_index_) < 0);
          if (shape_index_ >= num_shapes_)
            return;  // The end.
          num_shape_chars_ = shape_table_->GetShape(shape_index_).size();
        }
      }
      const UnicharAndFonts* shape_entry = GetShapeEntry();
      num_shape_fonts_ = shape_entry->font_ids.size();
      int char_id = shape_entry->unichar_id;
      int font_id = shape_entry->font_ids[shape_font_index_];
      num_samples_ = sample_set_->NumClassSamples(font_id, char_id, randomize_);
    } while (num_samples_ == 0);
  } else {
    // We are just iterating over the samples.
    ++shape_index_;
  }
}
double tesseract::SampleIterator::NormalizeSamples ( )

Definition at line 233 of file sampleiterator.cpp.

                                        {
  double total_weight = 0.0;
  int sample_count = 0;
  for (Begin(); !AtEnd(); Next()) {
    const TrainingSample& sample = GetSample();
    total_weight += sample.weight();
    ++sample_count;
  }
  // Normalize samples.
  double min_assigned_sample_weight = 1.0;
  if (total_weight > 0.0) {
    for (Begin(); !AtEnd(); Next()) {
      TrainingSample* sample = MutableSample();
      double weight = sample->weight() / total_weight;
      if (weight < min_assigned_sample_weight)
        min_assigned_sample_weight = weight;
      sample->set_weight(weight);
    }
  }
  return min_assigned_sample_weight;
}
const TrainingSampleSet* tesseract::SampleIterator::sample_set ( ) const [inline]

Definition at line 144 of file sampleiterator.h.

                                              {
    return sample_set_;
  }
const ShapeTable* tesseract::SampleIterator::shape_table ( ) const [inline]

Definition at line 140 of file sampleiterator.h.

                                        {
    return shape_table_;
  }
int tesseract::SampleIterator::SparseCharsetSize ( ) const

Definition at line 202 of file sampleiterator.cpp.

                                            {
  return charset_map_ != NULL
      ? charset_map_->SparseSize()
      : (shape_table_ != NULL ? shape_table_->NumShapes()
                              : sample_set_->charsetsize());
}
int tesseract::SampleIterator::UniformSamples ( )

Definition at line 220 of file sampleiterator.cpp.

                                   {
  int num_good_samples = 0;
  for (Begin(); !AtEnd(); Next()) {
    TrainingSample* sample = MutableSample();
    sample->set_weight(1.0);
    ++num_good_samples;
  }
  NormalizeSamples();
  return num_good_samples;
}

The documentation for this class was generated from the following files: