Tesseract
3.02
|
#include <trainingsample.h>
Public Member Functions | |
TrainingSample () | |
~TrainingSample () | |
TrainingSample * | RandomizedCopy (int index) const |
TrainingSample * | Copy () const |
bool | Serialize (FILE *fp) const |
bool | DeSerialize (bool swap, FILE *fp) |
void | ExtractCharDesc (int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc) |
void | IndexFeatures (const IntFeatureSpace &feature_space) |
void | MapFeatures (const IntFeatureMap &feature_map) |
Pix * | RenderToPix (const UNICHARSET *unicharset) const |
void | DisplayFeatures (ScrollView::Color color, ScrollView *window) const |
Pix * | GetSamplePix (int padding, Pix *page_pix) const |
UNICHAR_ID | class_id () const |
void | set_class_id (int id) |
int | font_id () const |
void | set_font_id (int id) |
int | page_num () const |
void | set_page_num (int page) |
const TBOX & | bounding_box () const |
void | set_bounding_box (const TBOX &box) |
int | num_features () const |
const INT_FEATURE_STRUCT * | features () const |
int | num_micro_features () const |
const MicroFeature * | micro_features () const |
float | cn_feature (int index) const |
int | geo_feature (int index) const |
double | weight () const |
void | set_weight (double value) |
double | max_dist () const |
void | set_max_dist (double value) |
int | sample_index () const |
void | set_sample_index (int value) |
bool | features_are_mapped () const |
const GenericVector< int > & | mapped_features () const |
const GenericVector< int > & | indexed_features () const |
bool | is_error () const |
void | set_is_error (bool value) |
Static Public Member Functions | |
static TrainingSample * | CopyFromFeatures (const INT_FX_RESULT_STRUCT &fx_info, const INT_FEATURE_STRUCT *features, int num_features) |
static TrainingSample * | DeSerializeCreate (bool swap, FILE *fp) |
Definition at line 53 of file trainingsample.h.
tesseract::TrainingSample::TrainingSample | ( | ) | [inline] |
Definition at line 55 of file trainingsample.h.
tesseract::TrainingSample::~TrainingSample | ( | ) |
Definition at line 40 of file trainingsample.cpp.
{ delete [] features_; delete [] micro_features_; }
const TBOX& tesseract::TrainingSample::bounding_box | ( | ) | const [inline] |
Definition at line 131 of file trainingsample.h.
{
return bounding_box_;
}
UNICHAR_ID tesseract::TrainingSample::class_id | ( | ) | const [inline] |
Definition at line 113 of file trainingsample.h.
{
return class_id_;
}
float tesseract::TrainingSample::cn_feature | ( | int | index | ) | const [inline] |
Definition at line 149 of file trainingsample.h.
{
return cn_feature_[index];
}
TrainingSample * tesseract::TrainingSample::Copy | ( | ) | const |
Definition at line 149 of file trainingsample.cpp.
{ TrainingSample* sample = new TrainingSample; sample->class_id_ = class_id_; sample->font_id_ = font_id_; sample->weight_ = weight_; sample->sample_index_ = sample_index_; sample->num_features_ = num_features_; if (num_features_ > 0) { sample->features_ = new INT_FEATURE_STRUCT[num_features_]; memcpy(sample->features_, features_, num_features_ * sizeof(features_[0])); } sample->num_micro_features_ = num_micro_features_; if (num_micro_features_ > 0) { sample->micro_features_ = new MicroFeature[num_micro_features_]; memcpy(sample->micro_features_, micro_features_, num_micro_features_ * sizeof(micro_features_[0])); } memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams); memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount); return sample; }
TrainingSample * tesseract::TrainingSample::CopyFromFeatures | ( | const INT_FX_RESULT_STRUCT & | fx_info, |
const INT_FEATURE_STRUCT * | features, | ||
int | num_features | ||
) | [static] |
Definition at line 110 of file trainingsample.cpp.
{ TrainingSample* sample = new TrainingSample; sample->num_features_ = num_features; sample->features_ = new INT_FEATURE_STRUCT[num_features]; memcpy(sample->features_, features, num_features * sizeof(features[0])); sample->geo_feature_[GeoBottom] = fx_info.YBottom; sample->geo_feature_[GeoTop] = fx_info.YTop; sample->geo_feature_[GeoWidth] = fx_info.Width; sample->features_are_indexed_ = false; sample->features_are_mapped_ = false; return sample; }
bool tesseract::TrainingSample::DeSerialize | ( | bool | swap, |
FILE * | fp | ||
) |
Definition at line 80 of file trainingsample.cpp.
{ if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; if (!bounding_box_.DeSerialize(swap, fp)) return false; if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) return false; if (swap) { ReverseN(&class_id_, sizeof(class_id_)); ReverseN(&num_features_, sizeof(num_features_)); ReverseN(&num_micro_features_, sizeof(num_micro_features_)); } delete [] features_; features_ = new INT_FEATURE_STRUCT[num_features_]; if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_) return false; delete [] micro_features_; micro_features_ = new MicroFeature[num_micro_features_]; if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) != num_micro_features_) return false; if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) return false; if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) return false; return true; }
TrainingSample * tesseract::TrainingSample::DeSerializeCreate | ( | bool | swap, |
FILE * | fp | ||
) | [static] |
Definition at line 71 of file trainingsample.cpp.
{ TrainingSample* sample = new TrainingSample; if (sample->DeSerialize(swap, fp)) return sample; delete sample; return NULL; }
void tesseract::TrainingSample::DisplayFeatures | ( | ScrollView::Color | color, |
ScrollView * | window | ||
) | const |
Definition at line 283 of file trainingsample.cpp.
{ #ifndef GRAPHICS_DISABLED for (int f = 0; f < num_features_; ++f) { RenderIntFeature(window, &features_[f], color); } #endif // GRAPHICS_DISABLED }
void tesseract::TrainingSample::ExtractCharDesc | ( | int | feature_type, |
int | micro_type, | ||
int | cn_type, | ||
int | geo_type, | ||
CHAR_DESC_STRUCT * | char_desc | ||
) |
Definition at line 172 of file trainingsample.cpp.
{ // Extract the INT features. if (features_ != NULL) delete [] features_; FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; if (char_features == NULL) { tprintf("Error: no features to train on of type %s\n", kIntFeatureType); num_features_ = 0; features_ = NULL; } else { num_features_ = char_features->NumFeatures; features_ = new INT_FEATURE_STRUCT[num_features_]; for (int f = 0; f < num_features_; ++f) { features_[f].X = static_cast<uinT8>(char_features->Features[f]->Params[IntX]); features_[f].Y = static_cast<uinT8>(char_features->Features[f]->Params[IntY]); features_[f].Theta = static_cast<uinT8>(char_features->Features[f]->Params[IntDir]); features_[f].CP_misses = 0; } } // Extract the Micro features. if (micro_features_ != NULL) delete [] micro_features_; char_features = char_desc->FeatureSets[micro_type]; if (char_features == NULL) { tprintf("Error: no features to train on of type %s\n", kMicroFeatureType); num_micro_features_ = 0; micro_features_ = NULL; } else { num_micro_features_ = char_features->NumFeatures; micro_features_ = new MicroFeature[num_micro_features_]; for (int f = 0; f < num_micro_features_; ++f) { for (int d = 0; d < MFCount; ++d) { micro_features_[f][d] = char_features->Features[f]->Params[d]; } } } // Extract the CN feature. char_features = char_desc->FeatureSets[cn_type]; if (char_features == NULL) { tprintf("Error: no CN feature to train on.\n"); } else { ASSERT_HOST(char_features->NumFeatures == 1); cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY]; cn_feature_[CharNormLength] = char_features->Features[0]->Params[CharNormLength]; cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx]; cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy]; } // Extract the Geo feature. char_features = char_desc->FeatureSets[geo_type]; if (char_features == NULL) { tprintf("Error: no Geo feature to train on.\n"); } else { ASSERT_HOST(char_features->NumFeatures == 1); geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom]; geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop]; geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth]; } features_are_indexed_ = false; features_are_mapped_ = false; }
const INT_FEATURE_STRUCT* tesseract::TrainingSample::features | ( | ) | const [inline] |
Definition at line 140 of file trainingsample.h.
{
return features_;
}
bool tesseract::TrainingSample::features_are_mapped | ( | ) | const [inline] |
Definition at line 173 of file trainingsample.h.
{
return features_are_mapped_;
}
int tesseract::TrainingSample::font_id | ( | ) | const [inline] |
Definition at line 119 of file trainingsample.h.
{
return font_id_;
}
int tesseract::TrainingSample::geo_feature | ( | int | index | ) | const [inline] |
Definition at line 152 of file trainingsample.h.
{
return geo_feature_[index];
}
Pix * tesseract::TrainingSample::GetSamplePix | ( | int | padding, |
Pix * | page_pix | ||
) | const |
Definition at line 296 of file trainingsample.cpp.
{ if (page_pix == NULL) return NULL; int page_width = pixGetWidth(page_pix); int page_height = pixGetHeight(page_pix); TBOX padded_box = bounding_box(); padded_box.pad(padding, padding); // Clip the padded_box to the limits of the page TBOX page_box(0, 0, page_width, page_height); padded_box &= page_box; Box* box = boxCreate(page_box.left(), page_height - page_box.top(), page_box.width(), page_box.height()); Pix* sample_pix = pixClipRectangle(page_pix, box, NULL); boxDestroy(&box); return sample_pix; }
const GenericVector<int>& tesseract::TrainingSample::indexed_features | ( | ) | const [inline] |
Definition at line 180 of file trainingsample.h.
{ ASSERT_HOST(features_are_indexed_); return mapped_features_; }
void tesseract::TrainingSample::IndexFeatures | ( | const IntFeatureSpace & | feature_space | ) |
Definition at line 243 of file trainingsample.cpp.
{ GenericVector<int> indexed_features; feature_space.IndexAndSortFeatures(features_, num_features_, &mapped_features_); features_are_indexed_ = true; features_are_mapped_ = false; }
bool tesseract::TrainingSample::is_error | ( | ) | const [inline] |
Definition at line 184 of file trainingsample.h.
{
return is_error_;
}
void tesseract::TrainingSample::MapFeatures | ( | const IntFeatureMap & | feature_map | ) |
Definition at line 253 of file trainingsample.cpp.
{ GenericVector<int> indexed_features; feature_map.feature_space().IndexAndSortFeatures(features_, num_features_, &indexed_features); feature_map.MapIndexedFeatures(indexed_features, &mapped_features_); features_are_indexed_ = false; features_are_mapped_ = true; }
const GenericVector<int>& tesseract::TrainingSample::mapped_features | ( | ) | const [inline] |
Definition at line 176 of file trainingsample.h.
{ ASSERT_HOST(features_are_mapped_); return mapped_features_; }
double tesseract::TrainingSample::max_dist | ( | ) | const [inline] |
Definition at line 161 of file trainingsample.h.
{
return max_dist_;
}
const MicroFeature* tesseract::TrainingSample::micro_features | ( | ) | const [inline] |
Definition at line 146 of file trainingsample.h.
{
return micro_features_;
}
int tesseract::TrainingSample::num_features | ( | ) | const [inline] |
Definition at line 137 of file trainingsample.h.
{
return num_features_;
}
int tesseract::TrainingSample::num_micro_features | ( | ) | const [inline] |
Definition at line 143 of file trainingsample.h.
{
return num_micro_features_;
}
int tesseract::TrainingSample::page_num | ( | ) | const [inline] |
Definition at line 125 of file trainingsample.h.
{
return page_num_;
}
TrainingSample * tesseract::TrainingSample::RandomizedCopy | ( | int | index | ) | const |
Definition at line 128 of file trainingsample.cpp.
{ TrainingSample* sample = Copy(); if (index >= 0 && index < kSampleRandomSize) { ++index; // Remove the first combination. int yshift = kYShiftValues[index / kSampleScaleSize]; double scaling = kScaleValues[index % kSampleScaleSize]; for (int i = 0; i < num_features_; ++i) { double result = (features_[i].X - kRandomizingCenter) * scaling; result += kRandomizingCenter; sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0, MAX_UINT8); result = (features_[i].Y - kRandomizingCenter) * scaling; result += kRandomizingCenter + yshift; sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0, MAX_UINT8); } } return sample; }
Pix * tesseract::TrainingSample::RenderToPix | ( | const UNICHARSET * | unicharset | ) | const |
Definition at line 263 of file trainingsample.cpp.
{ Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); for (int f = 0; f < num_features_; ++f) { int start_x = features_[f].X; int start_y = kIntFeatureExtent - features_[f].Y; double dx = cos((features_[f].Theta / 256.0) * 2.0 * PI - PI); double dy = -sin((features_[f].Theta / 256.0) * 2.0 * PI - PI); for (int i = 0; i <= 5; ++i) { int x = static_cast<int>(start_x + dx * i); int y = static_cast<int>(start_y + dy * i); if (x >= 0 && x < 256 && y >= 0 && y < 256) pixSetPixel(pix, x, y, 1); } } if (unicharset != NULL) pixSetText(pix, unicharset->id_to_unichar(class_id_)); return pix; }
int tesseract::TrainingSample::sample_index | ( | ) | const [inline] |
Definition at line 167 of file trainingsample.h.
{
return sample_index_;
}
bool tesseract::TrainingSample::Serialize | ( | FILE * | fp | ) | const |
Definition at line 49 of file trainingsample.cpp.
{ if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; if (!bounding_box_.Serialize(fp)) return false; if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) return false; if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_) return false; if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) != num_micro_features_) return false; if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) return false; if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) return false; return true; }
void tesseract::TrainingSample::set_bounding_box | ( | const TBOX & | box | ) | [inline] |
Definition at line 134 of file trainingsample.h.
{ bounding_box_ = box; }
void tesseract::TrainingSample::set_class_id | ( | int | id | ) | [inline] |
Definition at line 116 of file trainingsample.h.
{ class_id_ = id; }
void tesseract::TrainingSample::set_font_id | ( | int | id | ) | [inline] |
Definition at line 122 of file trainingsample.h.
{ font_id_ = id; }
void tesseract::TrainingSample::set_is_error | ( | bool | value | ) | [inline] |
Definition at line 187 of file trainingsample.h.
{ is_error_ = value; }
void tesseract::TrainingSample::set_max_dist | ( | double | value | ) | [inline] |
Definition at line 164 of file trainingsample.h.
{ max_dist_ = value; }
void tesseract::TrainingSample::set_page_num | ( | int | page | ) | [inline] |
Definition at line 128 of file trainingsample.h.
{ page_num_ = page; }
void tesseract::TrainingSample::set_sample_index | ( | int | value | ) | [inline] |
Definition at line 170 of file trainingsample.h.
{ sample_index_ = value; }
void tesseract::TrainingSample::set_weight | ( | double | value | ) | [inline] |
Definition at line 158 of file trainingsample.h.
{ weight_ = value; }
double tesseract::TrainingSample::weight | ( | ) | const [inline] |
Definition at line 155 of file trainingsample.h.
{
return weight_;
}