|
Tesseract
3.02
|
#include <cube_utils.h>
Public Member Functions | |
| CubeUtils () | |
| ~CubeUtils () | |
Static Public Member Functions | |
| static int | Prob2Cost (double prob_val) |
| static double | Cost2Prob (int cost) |
| static int | StrLen (const char_32 *str) |
| static int | StrCmp (const char_32 *str1, const char_32 *str2) |
| static char_32 * | StrDup (const char_32 *str) |
| static CharSamp * | CharSampleFromImg (IMAGE *img, int left, int top, int wid, int hgt) |
| static CharSamp * | CharSampleFromPix (Pix *pix, int left, int top, int wid, int hgt) |
| static IMAGE * | ImageFromCharSample (CharSamp *char_samp) |
| static Pix * | PixFromCharSample (CharSamp *char_samp) |
| static bool | ReadFileToString (const string &file_name, string *str) |
| static void | SplitStringUsing (const string &str, const string &delims, vector< string > *str_vec) |
| static void | UTF8ToUTF32 (const char *utf8_str, string_32 *str32) |
| static void | UTF32ToUTF8 (const char_32 *utf32_str, string *str) |
| static bool | IsCaseInvariant (const char_32 *str32, CharSet *char_set) |
| static char_32 * | ToLower (const char_32 *str32, CharSet *char_set) |
| static char_32 * | ToUpper (const char_32 *str32, CharSet *char_set) |
Definition at line 36 of file cube_utils.h.
| tesseract::CubeUtils::CubeUtils | ( | ) |
Definition at line 28 of file cube_utils.cpp.
{
}
| tesseract::CubeUtils::~CubeUtils | ( | ) |
Definition at line 31 of file cube_utils.cpp.
{
}
| CharSamp * tesseract::CubeUtils::CharSampleFromImg | ( | IMAGE * | img, |
| int | left, | ||
| int | top, | ||
| int | wid, | ||
| int | hgt | ||
| ) | [static] |
Definition at line 121 of file cube_utils.cpp.
{
// get the raw img data from the image
unsigned char *temp_buff = GetImageData(img, left, top, wid, hgt);
if (temp_buff == NULL) {
return NULL;
}
// create a char samp from temp buffer
CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
// clean up temp buffer
delete []temp_buff;
return char_samp;
}
| CharSamp * tesseract::CubeUtils::CharSampleFromPix | ( | Pix * | pix, |
| int | left, | ||
| int | top, | ||
| int | wid, | ||
| int | hgt | ||
| ) | [static] |
Definition at line 138 of file cube_utils.cpp.
{
// get the raw img data from the image
unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
if (temp_buff == NULL) {
return NULL;
}
// create a char samp from temp buffer
CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
// clean up temp buffer
delete []temp_buff;
return char_samp;
}
| double tesseract::CubeUtils::Cost2Prob | ( | int | cost | ) | [static] |
Definition at line 43 of file cube_utils.cpp.
{
return exp(-cost / PROB2COST_SCALE);
}
Definition at line 155 of file cube_utils.cpp.
{
// parameter check
if (char_samp == NULL) {
return NULL;
}
// get the raw data
int stride = char_samp->Stride(),
wid = char_samp->Width(),
hgt = char_samp->Height();
unsigned char *buff = char_samp->RawData();
if (buff == NULL) {
return NULL;
}
// create a new image object
IMAGE *img = new IMAGE();
if (img == NULL) {
return NULL;
}
// create a blank B/W image
if (img->create(wid, hgt, 1) == -1) {
delete img;
return NULL;
}
// copy the contents
IMAGELINE line;
line.init(wid);
for (int y = 0, off = 0; y < hgt ; y++, off += stride) {
for (int x = 0; x < wid; x++) {
line.pixels[x] = (buff[off + x] == 0) ? 0 : 1;
}
img->fast_put_line(0, hgt - 1 - y, wid, &line);
}
return img;
}
Definition at line 361 of file cube_utils.cpp.
{
bool all_one_case = true;
bool capitalized;
bool prev_upper;
bool prev_lower;
bool first_upper;
bool first_lower;
bool cur_upper;
bool cur_lower;
string str8;
if (!char_set) {
// If cube char_set is missing, use C-locale-dependent functions
// on UTF8 characters to determine case properties.
first_upper = isupper(str32[0]);
first_lower = islower(str32[0]);
if (first_upper)
capitalized = true;
prev_upper = first_upper;
prev_lower = islower(str32[0]);
for (int c = 1; str32[c] != 0; ++c) {
cur_upper = isupper(str32[c]);
cur_lower = islower(str32[c]);
if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
all_one_case = false;
if (cur_upper)
capitalized = false;
prev_upper = cur_upper;
prev_lower = cur_lower;
}
} else {
UNICHARSET *unicharset = char_set->InternalUnicharset();
// Use UNICHARSET functions to determine case properties
first_upper = unicharset->get_isupper(char_set->ClassID(str32[0]));
first_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
if (first_upper)
capitalized = true;
prev_upper = first_upper;
prev_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
for (int c = 1; c < StrLen(str32); ++c) {
cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c]));
cur_lower = unicharset->get_islower(char_set->ClassID(str32[c]));
if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
all_one_case = false;
if (cur_upper)
capitalized = false;
prev_upper = cur_upper;
prev_lower = cur_lower;
}
}
return all_one_case || capitalized;
}
| Pix * tesseract::CubeUtils::PixFromCharSample | ( | CharSamp * | char_samp | ) | [static] |
Definition at line 199 of file cube_utils.cpp.
{
// parameter check
if (char_samp == NULL) {
return NULL;
}
// get the raw data
int stride = char_samp->Stride();
int wid = char_samp->Width();
int hgt = char_samp->Height();
Pix *pix = pixCreate(wid, hgt, 1);
if (pix == NULL) {
return NULL;
}
// copy the contents
unsigned char *line = char_samp->RawData();
for (int y = 0; y < hgt ; y++, line += stride) {
for (int x = 0; x < wid; x++) {
if (line[x] != 0) {
pixSetPixel(pix, x, y, 0);
} else {
pixSetPixel(pix, x, y, 255);
}
}
}
return pix;
}
| int tesseract::CubeUtils::Prob2Cost | ( | double | prob_val | ) | [static] |
Definition at line 35 of file cube_utils.cpp.
{
if (prob_val < MIN_PROB) {
return MIN_PROB_COST;
}
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
}
| bool tesseract::CubeUtils::ReadFileToString | ( | const string & | file_name, |
| string * | str | ||
| ) | [static] |
Definition at line 268 of file cube_utils.cpp.
{
str->clear();
FILE *fp = fopen(file_name.c_str(), "rb");
if (fp == NULL) {
return false;
}
// get the size of the size
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
if (file_size < 1) {
fclose(fp);
return false;
}
// adjust string size
str->reserve(file_size);
// read the contents
rewind(fp);
char *buff = new char[file_size];
if (buff == NULL) {
fclose(fp);
return false;
}
int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
if (read_bytes == file_size) {
str->append(buff, file_size);
}
delete []buff;
fclose(fp);
return (read_bytes == file_size);
}
| void tesseract::CubeUtils::SplitStringUsing | ( | const string & | str, |
| const string & | delims, | ||
| vector< string > * | str_vec | ||
| ) | [static] |
Definition at line 301 of file cube_utils.cpp.
{
// Optimize the common case where delims is a single character.
if (delims[0] != '\0' && delims[1] == '\0') {
char c = delims[0];
const char* p = str.data();
const char* end = p + str.size();
while (p != end) {
if (*p == c) {
++p;
} else {
const char* start = p;
while (++p != end && *p != c);
str_vec->push_back(string(start, p - start));
}
}
return;
}
string::size_type begin_index, end_index;
begin_index = str.find_first_not_of(delims);
while (begin_index != string::npos) {
end_index = str.find_first_of(delims, begin_index);
if (end_index == string::npos) {
str_vec->push_back(str.substr(begin_index));
return;
}
str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
begin_index = str.find_first_not_of(delims, end_index);
}
}
Definition at line 58 of file cube_utils.cpp.
| int tesseract::CubeUtils::StrLen | ( | const char_32 * | str | ) | [static] |
Definition at line 48 of file cube_utils.cpp.
{
if (char_32_ptr == NULL) {
return 0;
}
int len = -1;
while (char_32_ptr[++len]);
return len;
}
Definition at line 415 of file cube_utils.cpp.
{
if (!char_set) {
return NULL;
}
UNICHARSET *unicharset = char_set->InternalUnicharset();
int len = StrLen(str32);
char_32 *lower = new char_32[len + 1];
if (!lower)
return NULL;
for (int i = 0; i < len; ++i) {
char_32 ch = str32[i];
if (ch == INVALID_UNICHAR_ID) {
delete [] lower;
return NULL;
}
// convert upper-case characters to lower-case
if (unicharset->get_isupper(char_set->ClassID(ch))) {
UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch));
const char_32 *str32_lower = char_set->ClassString(uid_lower);
// expect lower-case version of character to be a single character
if (!str32_lower || StrLen(str32_lower) != 1) {
delete [] lower;
return NULL;
}
lower[i] = str32_lower[0];
} else {
lower[i] = ch;
}
}
lower[len] = 0;
return lower;
}
Definition at line 448 of file cube_utils.cpp.
{
if (!char_set) {
return NULL;
}
UNICHARSET *unicharset = char_set->InternalUnicharset();
int len = StrLen(str32);
char_32 *upper = new char_32[len + 1];
if (!upper)
return NULL;
for (int i = 0; i < len; ++i) {
char_32 ch = str32[i];
if (ch == INVALID_UNICHAR_ID) {
delete [] upper;
return NULL;
}
// convert lower-case characters to upper-case
if (unicharset->get_islower(char_set->ClassID(ch))) {
UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
const char_32 *str32_upper = char_set->ClassString(uid_upper);
// expect upper-case version of character to be a single character
if (!str32_upper || StrLen(str32_upper) != 1) {
delete [] upper;
return NULL;
}
upper[i] = str32_upper[0];
} else {
upper[i] = ch;
}
}
upper[len] = 0;
return upper;
}
| void tesseract::CubeUtils::UTF32ToUTF8 | ( | const char_32 * | utf32_str, |
| string * | str | ||
| ) | [static] |
Definition at line 349 of file cube_utils.cpp.
| void tesseract::CubeUtils::UTF8ToUTF32 | ( | const char * | utf8_str, |
| string_32 * | str32 | ||
| ) | [static] |
Definition at line 335 of file cube_utils.cpp.
{
str32->clear();
int len = strlen(utf8_str);
int step = 0;
for (int ch = 0; ch < len; ch += step) {
step = UNICHAR::utf8_step(utf8_str + ch);
if (step > 0) {
UNICHAR uni_ch(utf8_str + ch, step);
(*str32) += uni_ch.first_uni();
}
}
}