Tesseract  3.02
tesseract::CharSet Class Reference

#include <char_set.h>

List of all members.

Public Member Functions

 CharSet ()
 ~CharSet ()
bool SharedUnicharset ()
int ClassID (const char_32 *str) const
int ClassID (char_32 ch) const
int UnicharID (const char_32 *str) const
int UnicharID (char_32 ch) const
const char_32ClassString (int class_id) const
int ClassCount () const
UNICHARSETInternalUnicharset ()

Static Public Member Functions

static CharSetCreate (TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)

Detailed Description

Definition at line 42 of file char_set.h.


Constructor & Destructor Documentation

tesseract::CharSet::CharSet ( )

Definition at line 28 of file char_set.cpp.

                 {
  class_cnt_ = 0;
  class_strings_ = NULL;
  unicharset_map_ = NULL;
  init_ = false;

  // init hash table
  memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
}
tesseract::CharSet::~CharSet ( )

Definition at line 38 of file char_set.cpp.

                  {
  if (class_strings_ != NULL) {
    for (int cls = 0; cls < class_cnt_; cls++) {
      if (class_strings_[cls] != NULL) {
        delete class_strings_[cls];
      }
    }
    delete []class_strings_;
    class_strings_ = NULL;
  }
  delete []unicharset_map_;
}

Member Function Documentation

int tesseract::CharSet::ClassCount ( ) const [inline]

Definition at line 111 of file char_set.h.

{ return class_cnt_; }
int tesseract::CharSet::ClassID ( const char_32 str) const [inline]

Definition at line 54 of file char_set.h.

                                               {
    int hash_val = Hash(str);
    if (hash_bin_size_[hash_val] == 0)
      return -1;
    for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
      if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0)
        return hash_bins_[hash_val][bin];
    }
    return -1;
  }
int tesseract::CharSet::ClassID ( char_32  ch) const [inline]

Definition at line 65 of file char_set.h.

                                       {
    int hash_val = Hash(ch);
    if (hash_bin_size_[hash_val] == 0)
      return -1;
    for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
      if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch &&
          class_strings_[hash_bins_[hash_val][bin]]->length() == 1) {
        return hash_bins_[hash_val][bin];
      }
    }
    return -1;
  }
const char_32* tesseract::CharSet::ClassString ( int  class_id) const [inline]

Definition at line 104 of file char_set.h.

                                                         {
    if (class_id < 0 || class_id >= class_cnt_) {
      return NULL;
    }
    return reinterpret_cast<const char_32 *>(class_strings_[class_id]->c_str());
  }
CharSet * tesseract::CharSet::Create ( TessdataManager tessdata_manager,
UNICHARSET tess_unicharset 
) [static]

Definition at line 54 of file char_set.cpp.

                                                      {
  CharSet *char_set = new CharSet();
  if (char_set == NULL) {
    return NULL;
  }

  // First look for Cube's unicharset; if not there, use tesseract's
  bool cube_unicharset_exists;
  if (!(cube_unicharset_exists =
        tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) &&
      !tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
    fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
            "either cube or tesseract unicharset\n");
    return false;
  }
  FILE *charset_fp = tessdata_manager->GetDataFilePtr();
  if (!charset_fp) {
    fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
            "a unicharset\n");
    return false;
  }

  // If we found a cube unicharset separate from tesseract's, load it and
  // map its unichars to tesseract's; if only one unicharset exists,
  // just load it.
  bool loaded;
  if (cube_unicharset_exists) {
    char_set->cube_unicharset_.load_from_file(charset_fp);
    loaded = tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET);
    loaded = loaded && char_set->LoadSupportedCharList(
        tessdata_manager->GetDataFilePtr(), tess_unicharset);
    char_set->unicharset_ = &char_set->cube_unicharset_;
  } else {
    loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
    char_set->unicharset_ = tess_unicharset;
  }
  if (!loaded) {
    delete char_set;
    return false;
  }

  char_set->init_ = true;
  return char_set;
}
UNICHARSET* tesseract::CharSet::InternalUnicharset ( ) [inline]

Definition at line 121 of file char_set.h.

{ return unicharset_; }
bool tesseract::CharSet::SharedUnicharset ( ) [inline]

Definition at line 48 of file char_set.h.

{ return (unicharset_map_ == NULL); }
int tesseract::CharSet::UnicharID ( const char_32 str) const [inline]

Definition at line 80 of file char_set.h.

                                                 {
    int class_id = ClassID(str);
    if (class_id == INVALID_UNICHAR_ID)
      return INVALID_UNICHAR_ID;
    int unichar_id;
    if (unicharset_map_)
      unichar_id = unicharset_map_[class_id];
    else
      unichar_id = class_id;
    return unichar_id;
  }
int tesseract::CharSet::UnicharID ( char_32  ch) const [inline]

Definition at line 92 of file char_set.h.

                                         {
    int class_id = ClassID(ch);
    if (class_id == INVALID_UNICHAR_ID)
      return INVALID_UNICHAR_ID;
    int unichar_id;
    if (unicharset_map_)
      unichar_id = unicharset_map_[class_id];
    else
      unichar_id = class_id;
    return unichar_id;
  }

The documentation for this class was generated from the following files: