Tesseract
3.02
|
#include <cube_utils.h>
Public Member Functions | |
CubeUtils () | |
~CubeUtils () | |
Static Public Member Functions | |
static int | Prob2Cost (double prob_val) |
static double | Cost2Prob (int cost) |
static int | StrLen (const char_32 *str) |
static int | StrCmp (const char_32 *str1, const char_32 *str2) |
static char_32 * | StrDup (const char_32 *str) |
static CharSamp * | CharSampleFromImg (IMAGE *img, int left, int top, int wid, int hgt) |
static CharSamp * | CharSampleFromPix (Pix *pix, int left, int top, int wid, int hgt) |
static IMAGE * | ImageFromCharSample (CharSamp *char_samp) |
static Pix * | PixFromCharSample (CharSamp *char_samp) |
static bool | ReadFileToString (const string &file_name, string *str) |
static void | SplitStringUsing (const string &str, const string &delims, vector< string > *str_vec) |
static void | UTF8ToUTF32 (const char *utf8_str, string_32 *str32) |
static void | UTF32ToUTF8 (const char_32 *utf32_str, string *str) |
static bool | IsCaseInvariant (const char_32 *str32, CharSet *char_set) |
static char_32 * | ToLower (const char_32 *str32, CharSet *char_set) |
static char_32 * | ToUpper (const char_32 *str32, CharSet *char_set) |
Definition at line 36 of file cube_utils.h.
tesseract::CubeUtils::CubeUtils | ( | ) |
Definition at line 28 of file cube_utils.cpp.
{ }
tesseract::CubeUtils::~CubeUtils | ( | ) |
Definition at line 31 of file cube_utils.cpp.
{ }
CharSamp * tesseract::CubeUtils::CharSampleFromImg | ( | IMAGE * | img, |
int | left, | ||
int | top, | ||
int | wid, | ||
int | hgt | ||
) | [static] |
Definition at line 121 of file cube_utils.cpp.
{ // get the raw img data from the image unsigned char *temp_buff = GetImageData(img, left, top, wid, hgt); if (temp_buff == NULL) { return NULL; } // create a char samp from temp buffer CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff); // clean up temp buffer delete []temp_buff; return char_samp; }
CharSamp * tesseract::CubeUtils::CharSampleFromPix | ( | Pix * | pix, |
int | left, | ||
int | top, | ||
int | wid, | ||
int | hgt | ||
) | [static] |
Definition at line 138 of file cube_utils.cpp.
{ // get the raw img data from the image unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt); if (temp_buff == NULL) { return NULL; } // create a char samp from temp buffer CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff); // clean up temp buffer delete []temp_buff; return char_samp; }
double tesseract::CubeUtils::Cost2Prob | ( | int | cost | ) | [static] |
Definition at line 43 of file cube_utils.cpp.
{ return exp(-cost / PROB2COST_SCALE); }
Definition at line 155 of file cube_utils.cpp.
{ // parameter check if (char_samp == NULL) { return NULL; } // get the raw data int stride = char_samp->Stride(), wid = char_samp->Width(), hgt = char_samp->Height(); unsigned char *buff = char_samp->RawData(); if (buff == NULL) { return NULL; } // create a new image object IMAGE *img = new IMAGE(); if (img == NULL) { return NULL; } // create a blank B/W image if (img->create(wid, hgt, 1) == -1) { delete img; return NULL; } // copy the contents IMAGELINE line; line.init(wid); for (int y = 0, off = 0; y < hgt ; y++, off += stride) { for (int x = 0; x < wid; x++) { line.pixels[x] = (buff[off + x] == 0) ? 0 : 1; } img->fast_put_line(0, hgt - 1 - y, wid, &line); } return img; }
Definition at line 361 of file cube_utils.cpp.
{ bool all_one_case = true; bool capitalized; bool prev_upper; bool prev_lower; bool first_upper; bool first_lower; bool cur_upper; bool cur_lower; string str8; if (!char_set) { // If cube char_set is missing, use C-locale-dependent functions // on UTF8 characters to determine case properties. first_upper = isupper(str32[0]); first_lower = islower(str32[0]); if (first_upper) capitalized = true; prev_upper = first_upper; prev_lower = islower(str32[0]); for (int c = 1; str32[c] != 0; ++c) { cur_upper = isupper(str32[c]); cur_lower = islower(str32[c]); if ((prev_upper && cur_lower) || (prev_lower && cur_upper)) all_one_case = false; if (cur_upper) capitalized = false; prev_upper = cur_upper; prev_lower = cur_lower; } } else { UNICHARSET *unicharset = char_set->InternalUnicharset(); // Use UNICHARSET functions to determine case properties first_upper = unicharset->get_isupper(char_set->ClassID(str32[0])); first_lower = unicharset->get_islower(char_set->ClassID(str32[0])); if (first_upper) capitalized = true; prev_upper = first_upper; prev_lower = unicharset->get_islower(char_set->ClassID(str32[0])); for (int c = 1; c < StrLen(str32); ++c) { cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c])); cur_lower = unicharset->get_islower(char_set->ClassID(str32[c])); if ((prev_upper && cur_lower) || (prev_lower && cur_upper)) all_one_case = false; if (cur_upper) capitalized = false; prev_upper = cur_upper; prev_lower = cur_lower; } } return all_one_case || capitalized; }
Pix * tesseract::CubeUtils::PixFromCharSample | ( | CharSamp * | char_samp | ) | [static] |
Definition at line 199 of file cube_utils.cpp.
{ // parameter check if (char_samp == NULL) { return NULL; } // get the raw data int stride = char_samp->Stride(); int wid = char_samp->Width(); int hgt = char_samp->Height(); Pix *pix = pixCreate(wid, hgt, 1); if (pix == NULL) { return NULL; } // copy the contents unsigned char *line = char_samp->RawData(); for (int y = 0; y < hgt ; y++, line += stride) { for (int x = 0; x < wid; x++) { if (line[x] != 0) { pixSetPixel(pix, x, y, 0); } else { pixSetPixel(pix, x, y, 255); } } } return pix; }
int tesseract::CubeUtils::Prob2Cost | ( | double | prob_val | ) | [static] |
Definition at line 35 of file cube_utils.cpp.
{ if (prob_val < MIN_PROB) { return MIN_PROB_COST; } return static_cast<int>(-log(prob_val) * PROB2COST_SCALE); }
bool tesseract::CubeUtils::ReadFileToString | ( | const string & | file_name, |
string * | str | ||
) | [static] |
Definition at line 268 of file cube_utils.cpp.
{ str->clear(); FILE *fp = fopen(file_name.c_str(), "rb"); if (fp == NULL) { return false; } // get the size of the size fseek(fp, 0, SEEK_END); int file_size = ftell(fp); if (file_size < 1) { fclose(fp); return false; } // adjust string size str->reserve(file_size); // read the contents rewind(fp); char *buff = new char[file_size]; if (buff == NULL) { fclose(fp); return false; } int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp); if (read_bytes == file_size) { str->append(buff, file_size); } delete []buff; fclose(fp); return (read_bytes == file_size); }
void tesseract::CubeUtils::SplitStringUsing | ( | const string & | str, |
const string & | delims, | ||
vector< string > * | str_vec | ||
) | [static] |
Definition at line 301 of file cube_utils.cpp.
{ // Optimize the common case where delims is a single character. if (delims[0] != '\0' && delims[1] == '\0') { char c = delims[0]; const char* p = str.data(); const char* end = p + str.size(); while (p != end) { if (*p == c) { ++p; } else { const char* start = p; while (++p != end && *p != c); str_vec->push_back(string(start, p - start)); } } return; } string::size_type begin_index, end_index; begin_index = str.find_first_not_of(delims); while (begin_index != string::npos) { end_index = str.find_first_of(delims, begin_index); if (end_index == string::npos) { str_vec->push_back(str.substr(begin_index)); return; } str_vec->push_back(str.substr(begin_index, (end_index - begin_index))); begin_index = str.find_first_not_of(delims, end_index); } }
Definition at line 58 of file cube_utils.cpp.
int tesseract::CubeUtils::StrLen | ( | const char_32 * | str | ) | [static] |
Definition at line 48 of file cube_utils.cpp.
{ if (char_32_ptr == NULL) { return 0; } int len = -1; while (char_32_ptr[++len]); return len; }
Definition at line 415 of file cube_utils.cpp.
{ if (!char_set) { return NULL; } UNICHARSET *unicharset = char_set->InternalUnicharset(); int len = StrLen(str32); char_32 *lower = new char_32[len + 1]; if (!lower) return NULL; for (int i = 0; i < len; ++i) { char_32 ch = str32[i]; if (ch == INVALID_UNICHAR_ID) { delete [] lower; return NULL; } // convert upper-case characters to lower-case if (unicharset->get_isupper(char_set->ClassID(ch))) { UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch)); const char_32 *str32_lower = char_set->ClassString(uid_lower); // expect lower-case version of character to be a single character if (!str32_lower || StrLen(str32_lower) != 1) { delete [] lower; return NULL; } lower[i] = str32_lower[0]; } else { lower[i] = ch; } } lower[len] = 0; return lower; }
Definition at line 448 of file cube_utils.cpp.
{ if (!char_set) { return NULL; } UNICHARSET *unicharset = char_set->InternalUnicharset(); int len = StrLen(str32); char_32 *upper = new char_32[len + 1]; if (!upper) return NULL; for (int i = 0; i < len; ++i) { char_32 ch = str32[i]; if (ch == INVALID_UNICHAR_ID) { delete [] upper; return NULL; } // convert lower-case characters to upper-case if (unicharset->get_islower(char_set->ClassID(ch))) { UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch)); const char_32 *str32_upper = char_set->ClassString(uid_upper); // expect upper-case version of character to be a single character if (!str32_upper || StrLen(str32_upper) != 1) { delete [] upper; return NULL; } upper[i] = str32_upper[0]; } else { upper[i] = ch; } } upper[len] = 0; return upper; }
void tesseract::CubeUtils::UTF32ToUTF8 | ( | const char_32 * | utf32_str, |
string * | str | ||
) | [static] |
Definition at line 349 of file cube_utils.cpp.
void tesseract::CubeUtils::UTF8ToUTF32 | ( | const char * | utf8_str, |
string_32 * | str32 | ||
) | [static] |
Definition at line 335 of file cube_utils.cpp.
{ str32->clear(); int len = strlen(utf8_str); int step = 0; for (int ch = 0; ch < len; ch += step) { step = UNICHAR::utf8_step(utf8_str + ch); if (step > 0) { UNICHAR uni_ch(utf8_str + ch, step); (*str32) += uni_ch.first_uni(); } } }