|
Tesseract
3.02
|
#include <sampleiterator.h>
Public Member Functions | |
| SampleIterator () | |
| ~SampleIterator () | |
| void | Clear () |
| void | Init (const IndexMapBiDi *charset_map, const ShapeTable *shape_table, bool randomize, TrainingSampleSet *sample_set) |
| void | Begin () |
| bool | AtEnd () const |
| const TrainingSample & | GetSample () const |
| TrainingSample * | MutableSample () const |
| int | GlobalSampleIndex () const |
| int | GetCompactClassID () const |
| int | GetSparseClassID () const |
| void | Next () |
| int | CompactCharsetSize () const |
| int | SparseCharsetSize () const |
| const IndexMapBiDi & | charset_map () const |
| const ShapeTable * | shape_table () const |
| const TrainingSampleSet * | sample_set () const |
| void | MapSampleFeatures (const IntFeatureMap &feature_map) |
| int | UniformSamples () |
| double | NormalizeSamples () |
Definition at line 92 of file sampleiterator.h.
| tesseract::SampleIterator::SampleIterator | ( | ) |
| tesseract::SampleIterator::~SampleIterator | ( | ) |
Definition at line 37 of file sampleiterator.cpp.
{
Clear();
}
| bool tesseract::SampleIterator::AtEnd | ( | ) | const |
Definition at line 99 of file sampleiterator.cpp.
{
return shape_index_ >= num_shapes_;
}
| void tesseract::SampleIterator::Begin | ( | ) |
Definition at line 87 of file sampleiterator.cpp.
{
shape_index_ = -1;
shape_char_index_ = 0;
num_shape_chars_ = 0;
shape_font_index_ = 0;
num_shape_fonts_ = 0;
sample_index_ = 0;
num_samples_ = 0;
// Find the first indexable sample.
Next();
}
| const IndexMapBiDi& tesseract::SampleIterator::charset_map | ( | ) | const [inline] |
Definition at line 137 of file sampleiterator.h.
{
return *charset_map_;
}
| void tesseract::SampleIterator::Clear | ( | ) |
Definition at line 41 of file sampleiterator.cpp.
{
delete owned_shape_table_;
owned_shape_table_ = NULL;
}
| int tesseract::SampleIterator::CompactCharsetSize | ( | ) | const |
Definition at line 196 of file sampleiterator.cpp.
{
return charset_map_ != NULL ? charset_map_->CompactSize()
: SparseCharsetSize();
}
| int tesseract::SampleIterator::GetCompactClassID | ( | ) | const |
Definition at line 142 of file sampleiterator.cpp.
{
return charset_map_ != NULL ? charset_map_->SparseToCompact(shape_index_)
: GetSparseClassID();
}
| const TrainingSample & tesseract::SampleIterator::GetSample | ( | ) | const |
Definition at line 103 of file sampleiterator.cpp.
{
if (shape_table_ != NULL) {
const UnicharAndFonts* shape_entry = GetShapeEntry();
int char_id = shape_entry->unichar_id;
int font_id = shape_entry->font_ids[shape_font_index_];
return *sample_set_->GetSample(font_id, char_id, sample_index_);
} else {
return *sample_set_->GetSample(shape_index_);
}
}
| int tesseract::SampleIterator::GetSparseClassID | ( | ) | const |
Definition at line 150 of file sampleiterator.cpp.
| int tesseract::SampleIterator::GlobalSampleIndex | ( | ) | const |
Definition at line 127 of file sampleiterator.cpp.
{
if (shape_table_ != NULL) {
const UnicharAndFonts* shape_entry = GetShapeEntry();
int char_id = shape_entry->unichar_id;
int font_id = shape_entry->font_ids[shape_font_index_];
return sample_set_->GlobalSampleIndex(font_id, char_id, sample_index_);
} else {
return shape_index_;
}
}
| void tesseract::SampleIterator::Init | ( | const IndexMapBiDi * | charset_map, |
| const ShapeTable * | shape_table, | ||
| bool | randomize, | ||
| TrainingSampleSet * | sample_set | ||
| ) |
Definition at line 47 of file sampleiterator.cpp.
{
Clear();
charset_map_ = charset_map;
shape_table_ = shape_table;
sample_set_ = sample_set;
randomize_ = randomize;
if (shape_table_ == NULL && charset_map_ != NULL) {
// The caller wishes to iterate by class. The easiest way to do this
// is to create a dummy shape_table_ that we will own.
int num_fonts = sample_set_->NumFonts();
owned_shape_table_ = new ShapeTable(sample_set_->unicharset());
int charsetsize = sample_set_->unicharset().size();
for (int c = 0; c < charsetsize; ++c) {
// We always add a shape for each character to keep the index in sync
// with the unichar_id.
int shape_id = owned_shape_table_->AddShape(c, 0);
for (int f = 1; f < num_fonts; ++f) {
if (sample_set_->NumClassSamples(f, c, true) > 0) {
owned_shape_table_->AddToShape(shape_id, c, f);
}
}
}
shape_table_ = owned_shape_table_;
}
if (shape_table_ != NULL) {
num_shapes_ = shape_table_->NumShapes();
} else {
num_shapes_ = randomize ? sample_set_->num_samples()
: sample_set_->num_raw_samples();
}
Begin();
}
| void tesseract::SampleIterator::MapSampleFeatures | ( | const IntFeatureMap & | feature_map | ) |
Definition at line 211 of file sampleiterator.cpp.
{
for (Begin(); !AtEnd(); Next()) {
TrainingSample* sample = MutableSample();
sample->MapFeatures(feature_map);
}
}
| TrainingSample * tesseract::SampleIterator::MutableSample | ( | ) | const |
Definition at line 114 of file sampleiterator.cpp.
{
if (shape_table_ != NULL) {
const UnicharAndFonts* shape_entry = GetShapeEntry();
int char_id = shape_entry->unichar_id;
int font_id = shape_entry->font_ids[shape_font_index_];
return sample_set_->MutableSample(font_id, char_id, sample_index_);
} else {
return sample_set_->mutable_sample(shape_index_);
}
}
| void tesseract::SampleIterator::Next | ( | ) |
Definition at line 156 of file sampleiterator.cpp.
{
if (shape_table_ != NULL) {
// Next sample in this class/font combination.
++sample_index_;
if (sample_index_ < num_samples_)
return;
// Next font in this class in this shape.
sample_index_ = 0;
do {
++shape_font_index_;
if (shape_font_index_ >= num_shape_fonts_) {
// Next unichar in this shape.
shape_font_index_ = 0;
++shape_char_index_;
if (shape_char_index_ >= num_shape_chars_) {
// Find the next shape that is mapped in the charset_map_.
shape_char_index_ = 0;
do {
++shape_index_;
} while (shape_index_ < num_shapes_ &&
charset_map_ != NULL &&
charset_map_->SparseToCompact(shape_index_) < 0);
if (shape_index_ >= num_shapes_)
return; // The end.
num_shape_chars_ = shape_table_->GetShape(shape_index_).size();
}
}
const UnicharAndFonts* shape_entry = GetShapeEntry();
num_shape_fonts_ = shape_entry->font_ids.size();
int char_id = shape_entry->unichar_id;
int font_id = shape_entry->font_ids[shape_font_index_];
num_samples_ = sample_set_->NumClassSamples(font_id, char_id, randomize_);
} while (num_samples_ == 0);
} else {
// We are just iterating over the samples.
++shape_index_;
}
}
| double tesseract::SampleIterator::NormalizeSamples | ( | ) |
Definition at line 233 of file sampleiterator.cpp.
{
double total_weight = 0.0;
int sample_count = 0;
for (Begin(); !AtEnd(); Next()) {
const TrainingSample& sample = GetSample();
total_weight += sample.weight();
++sample_count;
}
// Normalize samples.
double min_assigned_sample_weight = 1.0;
if (total_weight > 0.0) {
for (Begin(); !AtEnd(); Next()) {
TrainingSample* sample = MutableSample();
double weight = sample->weight() / total_weight;
if (weight < min_assigned_sample_weight)
min_assigned_sample_weight = weight;
sample->set_weight(weight);
}
}
return min_assigned_sample_weight;
}
| const TrainingSampleSet* tesseract::SampleIterator::sample_set | ( | ) | const [inline] |
Definition at line 144 of file sampleiterator.h.
{
return sample_set_;
}
| const ShapeTable* tesseract::SampleIterator::shape_table | ( | ) | const [inline] |
Definition at line 140 of file sampleiterator.h.
{
return shape_table_;
}
| int tesseract::SampleIterator::SparseCharsetSize | ( | ) | const |
Definition at line 202 of file sampleiterator.cpp.
| int tesseract::SampleIterator::UniformSamples | ( | ) |
Definition at line 220 of file sampleiterator.cpp.
{
int num_good_samples = 0;
for (Begin(); !AtEnd(); Next()) {
TrainingSample* sample = MutableSample();
sample->set_weight(1.0);
++num_good_samples;
}
NormalizeSamples();
return num_good_samples;
}