Tesseract  3.02
tesseract::WordSizeModel Class Reference

#include <word_size_model.h>

List of all members.

Public Member Functions

 WordSizeModel (CharSet *, bool contextual)
virtual ~WordSizeModel ()
int Cost (CharSamp **samp_array, int samp_cnt) const
bool Save (string file_name)
int FontCount () const
const FontPairSizeInfoFontInfo () const

Static Public Member Functions

static WordSizeModelCreate (const string &data_file_path, const string &lang, CharSet *char_set, bool contextual)
static double PairCost (int width_0, int height_0, int top_0, int width_1, int height_1, int top_1, const PairSizeInfo &pair_info)
static int SizeCode (int cls_id, int start, int end)

Detailed Description

Definition at line 53 of file word_size_model.h.


Constructor & Destructor Documentation

tesseract::WordSizeModel::WordSizeModel ( CharSet char_set,
bool  contextual 
)

Definition at line 28 of file word_size_model.cpp.

                                                                {
  char_set_ = char_set;
  contextual_ = contextual;
}
tesseract::WordSizeModel::~WordSizeModel ( ) [virtual]

Definition at line 33 of file word_size_model.cpp.

                              {
  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
    FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];
    delete []fnt_info.pair_size_info[0];
    delete []fnt_info.pair_size_info;
  }
}

Member Function Documentation

int tesseract::WordSizeModel::Cost ( CharSamp **  samp_array,
int  samp_cnt 
) const

Definition at line 210 of file word_size_model.cpp.

                                                                 {
  if (samp_cnt < 2) {
    return 0;
  }
  double best_dist = static_cast<double>(WORST_COST);
  int best_fnt = -1;
  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
    const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];
    double mean_dist = 0;
    int pair_cnt = 0;

    for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
      int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());
      if (cls_0 < 1) {
        continue;
      }
      // compute size code for samp 0 based on class id and position
      int size_code_0;
      if (contextual_) {
        size_code_0 = SizeCode(cls_0,
                               samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
                               samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
      } else {
        size_code_0 = cls_0;
      }

      int char0_height = samp_array[smp_0]->Height();
      int char0_width = samp_array[smp_0]->Width();
      int char0_top = samp_array[smp_0]->Top();

      for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
        int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());
        if (cls_1 < 1) {
          continue;
        }
        // compute size code for samp 0 based on class id and position
        int size_code_1;
        if (contextual_) {
          size_code_1 = SizeCode(cls_1,
                                 samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
                                 samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
        } else {
          size_code_1 = cls_1;
        }
        double dist = PairCost(
            char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
            samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
            fnt_info->pair_size_info[size_code_0][size_code_1]);
        if (dist > 0) {
          mean_dist += dist;
          pair_cnt++;
        }
      }  // smp_1
    }  // smp_0
    if (pair_cnt == 0) {
      continue;
    }
    mean_dist /= pair_cnt;
    if (best_fnt == -1 || mean_dist < best_dist) {
      best_dist = mean_dist;
      best_fnt = fnt;
    }
  }
  if (best_fnt == -1) {
    return static_cast<int>(WORST_COST);
  } else {
    return static_cast<int>(best_dist);
  }
}
WordSizeModel * tesseract::WordSizeModel::Create ( const string &  data_file_path,
const string &  lang,
CharSet char_set,
bool  contextual 
) [static]

Definition at line 41 of file word_size_model.cpp.

                                                      {
  WordSizeModel *obj = new WordSizeModel(char_set, contextual);
  if (!obj) {
    fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate "
            "new word size model object\n");
    return NULL;
  }

  if (!obj->Init(data_file_path, lang)) {
    delete obj;
    return NULL;
  }
  return obj;
}
int tesseract::WordSizeModel::FontCount ( ) const [inline]

Definition at line 72 of file word_size_model.h.

                               {
    return font_pair_size_models_.size();
  }
const FontPairSizeInfo* tesseract::WordSizeModel::FontInfo ( ) const [inline]

Definition at line 75 of file word_size_model.h.

                                                  {
    return &font_pair_size_models_[0];
  }
double tesseract::WordSizeModel::PairCost ( int  width_0,
int  height_0,
int  top_0,
int  width_1,
int  height_1,
int  top_1,
const PairSizeInfo pair_info 
) [static]

Definition at line 280 of file word_size_model.cpp.

                                                              {
  double scale_factor = static_cast<double>(pair_info.hgt_0) /
      static_cast<double>(height_0);
  double dist = 0.0;
  if (scale_factor > 0) {
    double norm_width_0 = width_0 * scale_factor;
    double norm_width_1 = width_1 * scale_factor;
    double norm_height_1 = height_1 * scale_factor;
    double norm_delta_top = (top_1 - top_0) * scale_factor;

    // accumulate the distance between the model character and the
    // predicted one on all dimensions of the pair
    dist += fabs(pair_info.wid_0 - norm_width_0);
    dist += fabs(pair_info.wid_1 - norm_width_1);
    dist += fabs(pair_info.hgt_1 - norm_height_1);
    dist += fabs(pair_info.delta_top - norm_delta_top);
  }
  return dist;
}
bool tesseract::WordSizeModel::Save ( string  file_name)
static int tesseract::WordSizeModel::SizeCode ( int  cls_id,
int  start,
int  end 
) [inline, static]

Definition at line 80 of file word_size_model.h.

                                                             {
    return (cls_id << 2) + (end << 1) + start;
  }

The documentation for this class was generated from the following files: