Tesseract
3.02
|
#include <sampleiterator.h>
Public Member Functions | |
SampleIterator () | |
~SampleIterator () | |
void | Clear () |
void | Init (const IndexMapBiDi *charset_map, const ShapeTable *shape_table, bool randomize, TrainingSampleSet *sample_set) |
void | Begin () |
bool | AtEnd () const |
const TrainingSample & | GetSample () const |
TrainingSample * | MutableSample () const |
int | GlobalSampleIndex () const |
int | GetCompactClassID () const |
int | GetSparseClassID () const |
void | Next () |
int | CompactCharsetSize () const |
int | SparseCharsetSize () const |
const IndexMapBiDi & | charset_map () const |
const ShapeTable * | shape_table () const |
const TrainingSampleSet * | sample_set () const |
void | MapSampleFeatures (const IntFeatureMap &feature_map) |
int | UniformSamples () |
double | NormalizeSamples () |
Definition at line 92 of file sampleiterator.h.
tesseract::SampleIterator::SampleIterator | ( | ) |
tesseract::SampleIterator::~SampleIterator | ( | ) |
Definition at line 37 of file sampleiterator.cpp.
{ Clear(); }
bool tesseract::SampleIterator::AtEnd | ( | ) | const |
Definition at line 99 of file sampleiterator.cpp.
{
return shape_index_ >= num_shapes_;
}
void tesseract::SampleIterator::Begin | ( | ) |
Definition at line 87 of file sampleiterator.cpp.
{ shape_index_ = -1; shape_char_index_ = 0; num_shape_chars_ = 0; shape_font_index_ = 0; num_shape_fonts_ = 0; sample_index_ = 0; num_samples_ = 0; // Find the first indexable sample. Next(); }
const IndexMapBiDi& tesseract::SampleIterator::charset_map | ( | ) | const [inline] |
Definition at line 137 of file sampleiterator.h.
{
return *charset_map_;
}
void tesseract::SampleIterator::Clear | ( | ) |
Definition at line 41 of file sampleiterator.cpp.
{ delete owned_shape_table_; owned_shape_table_ = NULL; }
int tesseract::SampleIterator::CompactCharsetSize | ( | ) | const |
Definition at line 196 of file sampleiterator.cpp.
{ return charset_map_ != NULL ? charset_map_->CompactSize() : SparseCharsetSize(); }
int tesseract::SampleIterator::GetCompactClassID | ( | ) | const |
Definition at line 142 of file sampleiterator.cpp.
{ return charset_map_ != NULL ? charset_map_->SparseToCompact(shape_index_) : GetSparseClassID(); }
const TrainingSample & tesseract::SampleIterator::GetSample | ( | ) | const |
Definition at line 103 of file sampleiterator.cpp.
{ if (shape_table_ != NULL) { const UnicharAndFonts* shape_entry = GetShapeEntry(); int char_id = shape_entry->unichar_id; int font_id = shape_entry->font_ids[shape_font_index_]; return *sample_set_->GetSample(font_id, char_id, sample_index_); } else { return *sample_set_->GetSample(shape_index_); } }
int tesseract::SampleIterator::GetSparseClassID | ( | ) | const |
Definition at line 150 of file sampleiterator.cpp.
int tesseract::SampleIterator::GlobalSampleIndex | ( | ) | const |
Definition at line 127 of file sampleiterator.cpp.
{ if (shape_table_ != NULL) { const UnicharAndFonts* shape_entry = GetShapeEntry(); int char_id = shape_entry->unichar_id; int font_id = shape_entry->font_ids[shape_font_index_]; return sample_set_->GlobalSampleIndex(font_id, char_id, sample_index_); } else { return shape_index_; } }
void tesseract::SampleIterator::Init | ( | const IndexMapBiDi * | charset_map, |
const ShapeTable * | shape_table, | ||
bool | randomize, | ||
TrainingSampleSet * | sample_set | ||
) |
Definition at line 47 of file sampleiterator.cpp.
{ Clear(); charset_map_ = charset_map; shape_table_ = shape_table; sample_set_ = sample_set; randomize_ = randomize; if (shape_table_ == NULL && charset_map_ != NULL) { // The caller wishes to iterate by class. The easiest way to do this // is to create a dummy shape_table_ that we will own. int num_fonts = sample_set_->NumFonts(); owned_shape_table_ = new ShapeTable(sample_set_->unicharset()); int charsetsize = sample_set_->unicharset().size(); for (int c = 0; c < charsetsize; ++c) { // We always add a shape for each character to keep the index in sync // with the unichar_id. int shape_id = owned_shape_table_->AddShape(c, 0); for (int f = 1; f < num_fonts; ++f) { if (sample_set_->NumClassSamples(f, c, true) > 0) { owned_shape_table_->AddToShape(shape_id, c, f); } } } shape_table_ = owned_shape_table_; } if (shape_table_ != NULL) { num_shapes_ = shape_table_->NumShapes(); } else { num_shapes_ = randomize ? sample_set_->num_samples() : sample_set_->num_raw_samples(); } Begin(); }
void tesseract::SampleIterator::MapSampleFeatures | ( | const IntFeatureMap & | feature_map | ) |
Definition at line 211 of file sampleiterator.cpp.
{ for (Begin(); !AtEnd(); Next()) { TrainingSample* sample = MutableSample(); sample->MapFeatures(feature_map); } }
TrainingSample * tesseract::SampleIterator::MutableSample | ( | ) | const |
Definition at line 114 of file sampleiterator.cpp.
{ if (shape_table_ != NULL) { const UnicharAndFonts* shape_entry = GetShapeEntry(); int char_id = shape_entry->unichar_id; int font_id = shape_entry->font_ids[shape_font_index_]; return sample_set_->MutableSample(font_id, char_id, sample_index_); } else { return sample_set_->mutable_sample(shape_index_); } }
void tesseract::SampleIterator::Next | ( | ) |
Definition at line 156 of file sampleiterator.cpp.
{ if (shape_table_ != NULL) { // Next sample in this class/font combination. ++sample_index_; if (sample_index_ < num_samples_) return; // Next font in this class in this shape. sample_index_ = 0; do { ++shape_font_index_; if (shape_font_index_ >= num_shape_fonts_) { // Next unichar in this shape. shape_font_index_ = 0; ++shape_char_index_; if (shape_char_index_ >= num_shape_chars_) { // Find the next shape that is mapped in the charset_map_. shape_char_index_ = 0; do { ++shape_index_; } while (shape_index_ < num_shapes_ && charset_map_ != NULL && charset_map_->SparseToCompact(shape_index_) < 0); if (shape_index_ >= num_shapes_) return; // The end. num_shape_chars_ = shape_table_->GetShape(shape_index_).size(); } } const UnicharAndFonts* shape_entry = GetShapeEntry(); num_shape_fonts_ = shape_entry->font_ids.size(); int char_id = shape_entry->unichar_id; int font_id = shape_entry->font_ids[shape_font_index_]; num_samples_ = sample_set_->NumClassSamples(font_id, char_id, randomize_); } while (num_samples_ == 0); } else { // We are just iterating over the samples. ++shape_index_; } }
double tesseract::SampleIterator::NormalizeSamples | ( | ) |
Definition at line 233 of file sampleiterator.cpp.
{ double total_weight = 0.0; int sample_count = 0; for (Begin(); !AtEnd(); Next()) { const TrainingSample& sample = GetSample(); total_weight += sample.weight(); ++sample_count; } // Normalize samples. double min_assigned_sample_weight = 1.0; if (total_weight > 0.0) { for (Begin(); !AtEnd(); Next()) { TrainingSample* sample = MutableSample(); double weight = sample->weight() / total_weight; if (weight < min_assigned_sample_weight) min_assigned_sample_weight = weight; sample->set_weight(weight); } } return min_assigned_sample_weight; }
const TrainingSampleSet* tesseract::SampleIterator::sample_set | ( | ) | const [inline] |
Definition at line 144 of file sampleiterator.h.
{
return sample_set_;
}
const ShapeTable* tesseract::SampleIterator::shape_table | ( | ) | const [inline] |
Definition at line 140 of file sampleiterator.h.
{
return shape_table_;
}
int tesseract::SampleIterator::SparseCharsetSize | ( | ) | const |
Definition at line 202 of file sampleiterator.cpp.
int tesseract::SampleIterator::UniformSamples | ( | ) |
Definition at line 220 of file sampleiterator.cpp.
{ int num_good_samples = 0; for (Begin(); !AtEnd(); Next()) { TrainingSample* sample = MutableSample(); sample->set_weight(1.0); ++num_good_samples; } NormalizeSamples(); return num_good_samples; }