Tesseract
3.02
|
#include <word_size_model.h>
Public Member Functions | |
WordSizeModel (CharSet *, bool contextual) | |
virtual | ~WordSizeModel () |
int | Cost (CharSamp **samp_array, int samp_cnt) const |
bool | Save (string file_name) |
int | FontCount () const |
const FontPairSizeInfo * | FontInfo () const |
Static Public Member Functions | |
static WordSizeModel * | Create (const string &data_file_path, const string &lang, CharSet *char_set, bool contextual) |
static double | PairCost (int width_0, int height_0, int top_0, int width_1, int height_1, int top_1, const PairSizeInfo &pair_info) |
static int | SizeCode (int cls_id, int start, int end) |
Definition at line 53 of file word_size_model.h.
tesseract::WordSizeModel::WordSizeModel | ( | CharSet * | char_set, |
bool | contextual | ||
) |
Definition at line 28 of file word_size_model.cpp.
{ char_set_ = char_set; contextual_ = contextual; }
tesseract::WordSizeModel::~WordSizeModel | ( | ) | [virtual] |
Definition at line 33 of file word_size_model.cpp.
{ for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) { FontPairSizeInfo fnt_info = font_pair_size_models_[fnt]; delete []fnt_info.pair_size_info[0]; delete []fnt_info.pair_size_info; } }
int tesseract::WordSizeModel::Cost | ( | CharSamp ** | samp_array, |
int | samp_cnt | ||
) | const |
Definition at line 210 of file word_size_model.cpp.
{ if (samp_cnt < 2) { return 0; } double best_dist = static_cast<double>(WORST_COST); int best_fnt = -1; for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) { const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt]; double mean_dist = 0; int pair_cnt = 0; for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) { int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel()); if (cls_0 < 1) { continue; } // compute size code for samp 0 based on class id and position int size_code_0; if (contextual_) { size_code_0 = SizeCode(cls_0, samp_array[smp_0]->FirstChar() == 0 ? 0 : 1, samp_array[smp_0]->LastChar() == 0 ? 0 : 1); } else { size_code_0 = cls_0; } int char0_height = samp_array[smp_0]->Height(); int char0_width = samp_array[smp_0]->Width(); int char0_top = samp_array[smp_0]->Top(); for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) { int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel()); if (cls_1 < 1) { continue; } // compute size code for samp 0 based on class id and position int size_code_1; if (contextual_) { size_code_1 = SizeCode(cls_1, samp_array[smp_1]->FirstChar() == 0 ? 0 : 1, samp_array[smp_1]->LastChar() == 0 ? 0 : 1); } else { size_code_1 = cls_1; } double dist = PairCost( char0_width, char0_height, char0_top, samp_array[smp_1]->Width(), samp_array[smp_1]->Height(), samp_array[smp_1]->Top(), fnt_info->pair_size_info[size_code_0][size_code_1]); if (dist > 0) { mean_dist += dist; pair_cnt++; } } // smp_1 } // smp_0 if (pair_cnt == 0) { continue; } mean_dist /= pair_cnt; if (best_fnt == -1 || mean_dist < best_dist) { best_dist = mean_dist; best_fnt = fnt; } } if (best_fnt == -1) { return static_cast<int>(WORST_COST); } else { return static_cast<int>(best_dist); } }
WordSizeModel * tesseract::WordSizeModel::Create | ( | const string & | data_file_path, |
const string & | lang, | ||
CharSet * | char_set, | ||
bool | contextual | ||
) | [static] |
Definition at line 41 of file word_size_model.cpp.
{ WordSizeModel *obj = new WordSizeModel(char_set, contextual); if (!obj) { fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate " "new word size model object\n"); return NULL; } if (!obj->Init(data_file_path, lang)) { delete obj; return NULL; } return obj; }
int tesseract::WordSizeModel::FontCount | ( | ) | const [inline] |
Definition at line 72 of file word_size_model.h.
{
return font_pair_size_models_.size();
}
const FontPairSizeInfo* tesseract::WordSizeModel::FontInfo | ( | ) | const [inline] |
Definition at line 75 of file word_size_model.h.
{
return &font_pair_size_models_[0];
}
double tesseract::WordSizeModel::PairCost | ( | int | width_0, |
int | height_0, | ||
int | top_0, | ||
int | width_1, | ||
int | height_1, | ||
int | top_1, | ||
const PairSizeInfo & | pair_info | ||
) | [static] |
Definition at line 280 of file word_size_model.cpp.
{ double scale_factor = static_cast<double>(pair_info.hgt_0) / static_cast<double>(height_0); double dist = 0.0; if (scale_factor > 0) { double norm_width_0 = width_0 * scale_factor; double norm_width_1 = width_1 * scale_factor; double norm_height_1 = height_1 * scale_factor; double norm_delta_top = (top_1 - top_0) * scale_factor; // accumulate the distance between the model character and the // predicted one on all dimensions of the pair dist += fabs(pair_info.wid_0 - norm_width_0); dist += fabs(pair_info.wid_1 - norm_width_1); dist += fabs(pair_info.hgt_1 - norm_height_1); dist += fabs(pair_info.delta_top - norm_delta_top); } return dist; }
bool tesseract::WordSizeModel::Save | ( | string | file_name | ) |
static int tesseract::WordSizeModel::SizeCode | ( | int | cls_id, |
int | start, | ||
int | end | ||
) | [inline, static] |
Definition at line 80 of file word_size_model.h.
{
return (cls_id << 2) + (end << 1) + start;
}