Tesseract  3.02
tesseract::CubeUtils Class Reference

#include <cube_utils.h>

List of all members.

Public Member Functions

 CubeUtils ()
 ~CubeUtils ()

Static Public Member Functions

static int Prob2Cost (double prob_val)
static double Cost2Prob (int cost)
static int StrLen (const char_32 *str)
static int StrCmp (const char_32 *str1, const char_32 *str2)
static char_32StrDup (const char_32 *str)
static CharSampCharSampleFromImg (IMAGE *img, int left, int top, int wid, int hgt)
static CharSampCharSampleFromPix (Pix *pix, int left, int top, int wid, int hgt)
static IMAGEImageFromCharSample (CharSamp *char_samp)
static Pix * PixFromCharSample (CharSamp *char_samp)
static bool ReadFileToString (const string &file_name, string *str)
static void SplitStringUsing (const string &str, const string &delims, vector< string > *str_vec)
static void UTF8ToUTF32 (const char *utf8_str, string_32 *str32)
static void UTF32ToUTF8 (const char_32 *utf32_str, string *str)
static bool IsCaseInvariant (const char_32 *str32, CharSet *char_set)
static char_32ToLower (const char_32 *str32, CharSet *char_set)
static char_32ToUpper (const char_32 *str32, CharSet *char_set)

Detailed Description

Definition at line 36 of file cube_utils.h.


Constructor & Destructor Documentation

tesseract::CubeUtils::CubeUtils ( )

Definition at line 28 of file cube_utils.cpp.

                     {
}
tesseract::CubeUtils::~CubeUtils ( )

Definition at line 31 of file cube_utils.cpp.

                      {
}

Member Function Documentation

CharSamp * tesseract::CubeUtils::CharSampleFromImg ( IMAGE img,
int  left,
int  top,
int  wid,
int  hgt 
) [static]

Definition at line 121 of file cube_utils.cpp.

                                                         {
  // get the raw img data from the image
  unsigned char *temp_buff = GetImageData(img, left, top, wid, hgt);
  if (temp_buff == NULL) {
    return NULL;
  }

  // create a char samp from temp buffer
  CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
  // clean up temp buffer
  delete []temp_buff;
  return char_samp;
}
CharSamp * tesseract::CubeUtils::CharSampleFromPix ( Pix *  pix,
int  left,
int  top,
int  wid,
int  hgt 
) [static]

Definition at line 138 of file cube_utils.cpp.

                                                         {
  // get the raw img data from the image
  unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
  if (temp_buff == NULL) {
    return NULL;
  }

  // create a char samp from temp buffer
  CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);

  // clean up temp buffer
  delete []temp_buff;
  return char_samp;
}
double tesseract::CubeUtils::Cost2Prob ( int  cost) [static]

Definition at line 43 of file cube_utils.cpp.

                                    {
  return exp(-cost / PROB2COST_SCALE);
}
IMAGE * tesseract::CubeUtils::ImageFromCharSample ( CharSamp char_samp) [static]

Definition at line 155 of file cube_utils.cpp.

                                                         {
  // parameter check
  if (char_samp == NULL) {
    return NULL;
  }

  // get the raw data
  int stride = char_samp->Stride(),
    wid = char_samp->Width(),
    hgt = char_samp->Height();

  unsigned char  *buff = char_samp->RawData();
  if (buff == NULL) {
    return NULL;
  }

  // create a new image object
  IMAGE *img = new IMAGE();
  if (img == NULL) {
    return NULL;
  }

  // create a blank B/W image
  if (img->create(wid, hgt, 1) == -1) {
    delete img;
    return NULL;
  }

  // copy the contents
  IMAGELINE line;
  line.init(wid);

  for (int y = 0, off = 0; y < hgt ; y++, off += stride) {
    for (int x = 0; x < wid; x++) {
      line.pixels[x] = (buff[off + x] == 0) ? 0 : 1;
    }

    img->fast_put_line(0, hgt - 1 - y, wid, &line);
  }

  return img;
}
bool tesseract::CubeUtils::IsCaseInvariant ( const char_32 str32,
CharSet char_set 
) [static]

Definition at line 361 of file cube_utils.cpp.

                                                                       {
  bool all_one_case = true;
  bool capitalized;
  bool prev_upper;
  bool prev_lower;
  bool first_upper;
  bool first_lower;
  bool cur_upper;
  bool cur_lower;

  string str8;
  if (!char_set) {
    // If cube char_set is missing, use C-locale-dependent functions
    // on UTF8 characters to determine case properties.
    first_upper = isupper(str32[0]);
    first_lower = islower(str32[0]);
    if (first_upper)
      capitalized = true;
    prev_upper = first_upper;
    prev_lower = islower(str32[0]);
    for (int c = 1; str32[c] != 0; ++c) {
      cur_upper = isupper(str32[c]);
      cur_lower = islower(str32[c]);
      if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
        all_one_case = false;
      if (cur_upper)
        capitalized = false;
      prev_upper = cur_upper;
      prev_lower = cur_lower;
    }
  } else {
    UNICHARSET *unicharset = char_set->InternalUnicharset();
    // Use UNICHARSET functions to determine case properties
    first_upper = unicharset->get_isupper(char_set->ClassID(str32[0]));
    first_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
    if (first_upper)
      capitalized = true;
    prev_upper = first_upper;
    prev_lower = unicharset->get_islower(char_set->ClassID(str32[0]));

    for (int c = 1; c < StrLen(str32); ++c) {
      cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c]));
      cur_lower = unicharset->get_islower(char_set->ClassID(str32[c]));
      if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
        all_one_case = false;
      if (cur_upper)
        capitalized = false;
      prev_upper = cur_upper;
      prev_lower = cur_lower;
    }
  }
  return all_one_case || capitalized;
}
Pix * tesseract::CubeUtils::PixFromCharSample ( CharSamp char_samp) [static]

Definition at line 199 of file cube_utils.cpp.

                                                     {
  // parameter check
  if (char_samp == NULL) {
    return NULL;
  }

  // get the raw data
  int stride = char_samp->Stride();
  int wid = char_samp->Width();
  int hgt = char_samp->Height();

  Pix *pix = pixCreate(wid, hgt, 1);
  if (pix == NULL) {
    return NULL;
  }

  // copy the contents
  unsigned char *line = char_samp->RawData();
  for (int y = 0; y < hgt ; y++, line += stride) {
    for (int x = 0; x < wid; x++) {
      if (line[x] != 0) {
        pixSetPixel(pix, x, y, 0);
      } else {
        pixSetPixel(pix, x, y, 255);
      }
    }
  }

  return pix;
}
int tesseract::CubeUtils::Prob2Cost ( double  prob_val) [static]

Definition at line 35 of file cube_utils.cpp.

                                        {
  if (prob_val < MIN_PROB)   {
    return MIN_PROB_COST;
  }
  return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
}
bool tesseract::CubeUtils::ReadFileToString ( const string &  file_name,
string *  str 
) [static]

Definition at line 268 of file cube_utils.cpp.

                                                                     {
  str->clear();
  FILE *fp = fopen(file_name.c_str(), "rb");
  if (fp == NULL) {
    return false;
  }

  // get the size of the size
  fseek(fp, 0, SEEK_END);
  int file_size = ftell(fp);
  if (file_size < 1) {
    fclose(fp);
    return false;
  }
  // adjust string size
  str->reserve(file_size);
  // read the contents
  rewind(fp);
  char *buff = new char[file_size];
  if (buff == NULL) {
    fclose(fp);
    return false;
  }
  int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
  if (read_bytes == file_size) {
    str->append(buff, file_size);
  }
  delete []buff;
  fclose(fp);
  return (read_bytes == file_size);
}
void tesseract::CubeUtils::SplitStringUsing ( const string &  str,
const string &  delims,
vector< string > *  str_vec 
) [static]

Definition at line 301 of file cube_utils.cpp.

                                                          {
  // Optimize the common case where delims is a single character.
  if (delims[0] != '\0' && delims[1] == '\0') {
    char c = delims[0];
    const char* p = str.data();
    const char* end = p + str.size();
    while (p != end) {
      if (*p == c) {
        ++p;
      } else {
        const char* start = p;
        while (++p != end && *p != c);
        str_vec->push_back(string(start, p - start));
      }
    }
    return;
  }

  string::size_type begin_index, end_index;
  begin_index = str.find_first_not_of(delims);
  while (begin_index != string::npos) {
    end_index = str.find_first_of(delims, begin_index);
    if (end_index == string::npos) {
      str_vec->push_back(str.substr(begin_index));
      return;
    }
    str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
    begin_index = str.find_first_not_of(delims, end_index);
  }
}
int tesseract::CubeUtils::StrCmp ( const char_32 str1,
const char_32 str2 
) [static]

Definition at line 58 of file cube_utils.cpp.

                                                              {
  const char_32 *pch1 = str1;
  const char_32 *pch2 = str2;

  for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
    if ((*pch1) != (*pch2)) {
      return (*pch1) - (*pch2);
    }
  }

  if ((*pch1) == 0) {
    if ((*pch2) == 0) {
      return 0;
    } else {
      return -1;
    }
  } else {
    return 1;
  }
}
char_32 * tesseract::CubeUtils::StrDup ( const char_32 str) [static]

Definition at line 80 of file cube_utils.cpp.

                                               {
  int len = StrLen(str32);
  char_32 *new_str = new char_32[len + 1];
  if (new_str == NULL) {
    return NULL;
  }
  memcpy(new_str, str32, len * sizeof(*str32));
  new_str[len] = 0;
  return new_str;
}
int tesseract::CubeUtils::StrLen ( const char_32 str) [static]

Definition at line 48 of file cube_utils.cpp.

                                                {
  if (char_32_ptr == NULL) {
    return 0;
  }
  int len = -1;
  while (char_32_ptr[++len]);
  return len;
}
char_32 * tesseract::CubeUtils::ToLower ( const char_32 str32,
CharSet char_set 
) [static]

Definition at line 415 of file cube_utils.cpp.

                                                                   {
  if (!char_set) {
    return NULL;
  }
  UNICHARSET *unicharset = char_set->InternalUnicharset();
  int len = StrLen(str32);
  char_32 *lower = new char_32[len + 1];
  if (!lower)
    return NULL;
  for (int i = 0; i < len; ++i) {
    char_32 ch = str32[i];
    if (ch == INVALID_UNICHAR_ID) {
      delete [] lower;
      return NULL;
    }
    // convert upper-case characters to lower-case
    if (unicharset->get_isupper(char_set->ClassID(ch))) {
      UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch));
      const char_32 *str32_lower = char_set->ClassString(uid_lower);
      // expect lower-case version of character to be a single character
      if (!str32_lower || StrLen(str32_lower) != 1) {
        delete [] lower;
        return NULL;
      }
      lower[i] = str32_lower[0];
    } else {
      lower[i] = ch;
    }
  }
  lower[len] = 0;
  return lower;
}
char_32 * tesseract::CubeUtils::ToUpper ( const char_32 str32,
CharSet char_set 
) [static]

Definition at line 448 of file cube_utils.cpp.

                                                                   {
  if (!char_set) {
    return NULL;
  }
  UNICHARSET *unicharset = char_set->InternalUnicharset();
  int len = StrLen(str32);
  char_32 *upper = new char_32[len + 1];
  if (!upper)
    return NULL;
  for (int i = 0; i < len; ++i) {
    char_32 ch = str32[i];
    if (ch == INVALID_UNICHAR_ID) {
      delete [] upper;
      return NULL;
    }
    // convert lower-case characters to upper-case
    if (unicharset->get_islower(char_set->ClassID(ch))) {
      UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
      const char_32 *str32_upper = char_set->ClassString(uid_upper);
      // expect upper-case version of character to be a single character
      if (!str32_upper || StrLen(str32_upper) != 1) {
        delete [] upper;
        return NULL;
      }
      upper[i] = str32_upper[0];
    } else {
      upper[i] = ch;
    }
  }
  upper[len] = 0;
  return upper;
}
void tesseract::CubeUtils::UTF32ToUTF8 ( const char_32 utf32_str,
string *  str 
) [static]

Definition at line 349 of file cube_utils.cpp.

                                                                 {
  str->clear();
  for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++)  {
    UNICHAR uni_ch((*ch_32));
    char *utf8 = uni_ch.utf8_str();
    if (utf8 != NULL) {
      (*str) += utf8;
      delete []utf8;
    }
  }
}
void tesseract::CubeUtils::UTF8ToUTF32 ( const char *  utf8_str,
string_32 str32 
) [static]

Definition at line 335 of file cube_utils.cpp.

                                                                  {
  str32->clear();
  int len = strlen(utf8_str);
  int step = 0;
  for (int ch = 0; ch < len; ch += step) {
    step = UNICHAR::utf8_step(utf8_str + ch);
    if (step > 0) {
      UNICHAR uni_ch(utf8_str + ch, step);
      (*str32) += uni_ch.first_uni();
    }
  }
}

The documentation for this class was generated from the following files: