|
Tesseract
3.02
|
#include <unicharset.h>
Public Member Functions | |
| void | set_all (const char *unichar, int pos, int total, bool natural) |
| void | set_unichar (const char *uch) |
| void | set_pos (int p) |
| void | set_total (int t) |
| const char * | get_unichar () const |
| int | get_pos () const |
| int | get_total () const |
| STRING | to_string () const |
| bool | equals (const char *other_unichar, int other_pos, int other_total) const |
| bool | equals (const CHAR_FRAGMENT *other) const |
| bool | is_continuation_of (const CHAR_FRAGMENT *fragment) const |
| bool | is_beginning () const |
| bool | is_ending () const |
| bool | is_natural () const |
| void | set_natural (bool value) |
Static Public Member Functions | |
| static STRING | to_string (const char *unichar, int pos, int total, bool natural) |
| static CHAR_FRAGMENT * | parse_from_string (const char *str) |
Static Public Attributes | |
| static const int | kMinLen = 6 |
| static const int | kMaxLen = 3 + UNICHAR_LEN + 2 |
| static const int | kMaxChunks = 5 |
Definition at line 37 of file unicharset.h.
| bool CHAR_FRAGMENT::equals | ( | const char * | other_unichar, |
| int | other_pos, | ||
| int | other_total | ||
| ) | const [inline] |
Definition at line 74 of file unicharset.h.
{
return (strcmp(this->unichar, other_unichar) == 0 &&
this->pos == other_pos && this->total == other_total);
}
| bool CHAR_FRAGMENT::equals | ( | const CHAR_FRAGMENT * | other | ) | const [inline] |
Definition at line 79 of file unicharset.h.
{
return this->equals(other->get_unichar(),
other->get_pos(),
other->get_total());
}
| int CHAR_FRAGMENT::get_pos | ( | ) | const [inline] |
Definition at line 60 of file unicharset.h.
{ return this->pos; }
| int CHAR_FRAGMENT::get_total | ( | ) | const [inline] |
Definition at line 61 of file unicharset.h.
{ return this->total; }
| const char* CHAR_FRAGMENT::get_unichar | ( | ) | const [inline] |
Definition at line 59 of file unicharset.h.
{ return this->unichar; }
| bool CHAR_FRAGMENT::is_beginning | ( | ) | const [inline] |
Definition at line 94 of file unicharset.h.
{ return this->pos == 0; }
| bool CHAR_FRAGMENT::is_continuation_of | ( | const CHAR_FRAGMENT * | fragment | ) | const [inline] |
Definition at line 87 of file unicharset.h.
{
return (strcmp(this->unichar, fragment->get_unichar()) == 0 &&
this->total == fragment->get_total() &&
this->pos == fragment->get_pos() + 1);
}
| bool CHAR_FRAGMENT::is_ending | ( | ) | const [inline] |
Definition at line 97 of file unicharset.h.
{ return this->pos == this->total-1; }
| bool CHAR_FRAGMENT::is_natural | ( | ) | const [inline] |
Definition at line 102 of file unicharset.h.
{ return natural; }
| CHAR_FRAGMENT * CHAR_FRAGMENT::parse_from_string | ( | const char * | str | ) | [static] |
Definition at line 902 of file unicharset.cpp.
{
const char *ptr = string;
int len = strlen(string);
if (len < kMinLen || *ptr != kSeparator) {
return NULL; // this string can not represent a fragment
}
ptr++; // move to the next character
int step = 0;
while ((ptr + step) < (string + len) && *(ptr + step) != kSeparator) {
step += UNICHAR::utf8_step(ptr + step);
}
if (step == 0 || step > UNICHAR_LEN) {
return NULL; // no character for unichar or the character is too long
}
char unichar[UNICHAR_LEN + 1];
strncpy(unichar, ptr, step);
unichar[step] = '\0'; // null terminate unichar
ptr += step; // move to the next fragment separator
int pos = 0;
int total = 0;
bool natural = false;
char *end_ptr = NULL;
for (int i = 0; i < 2; i++) {
if (ptr > string + len || *ptr != kSeparator) {
if (i == 1 && *ptr == kNaturalFlag)
natural = true;
else
return NULL; // Failed to parse fragment representation.
}
ptr++; // move to the next character
i == 0 ? pos = static_cast<int>(strtol(ptr, &end_ptr, 10))
: total = static_cast<int>(strtol(ptr, &end_ptr, 10));
ptr = end_ptr;
}
if (ptr != string + len) {
return NULL; // malformed fragment representation
}
CHAR_FRAGMENT *fragment = new CHAR_FRAGMENT();
fragment->set_all(unichar, pos, total, natural);
return fragment;
}
| void CHAR_FRAGMENT::set_all | ( | const char * | unichar, |
| int | pos, | ||
| int | total, | ||
| bool | natural | ||
| ) | [inline] |
Definition at line 47 of file unicharset.h.
{
set_unichar(unichar);
set_pos(pos);
set_total(total);
set_natural(natural);
}
| void CHAR_FRAGMENT::set_natural | ( | bool | value | ) | [inline] |
Definition at line 103 of file unicharset.h.
{ natural = value; }
| void CHAR_FRAGMENT::set_pos | ( | int | p | ) | [inline] |
Definition at line 57 of file unicharset.h.
{ this->pos = p; }
| void CHAR_FRAGMENT::set_total | ( | int | t | ) | [inline] |
Definition at line 58 of file unicharset.h.
{ this->total = t; }
| void CHAR_FRAGMENT::set_unichar | ( | const char * | uch | ) | [inline] |
Definition at line 53 of file unicharset.h.
{
strncpy(this->unichar, uch, UNICHAR_LEN);
this->unichar[UNICHAR_LEN] = '\0';
}
| STRING CHAR_FRAGMENT::to_string | ( | const char * | unichar, |
| int | pos, | ||
| int | total, | ||
| bool | natural | ||
| ) | [static] |
Definition at line 889 of file unicharset.cpp.
| STRING CHAR_FRAGMENT::to_string | ( | ) | const [inline] |
Definition at line 68 of file unicharset.h.
{
return to_string(unichar, pos, total, natural);
}
const int CHAR_FRAGMENT::kMaxChunks = 5 [static] |
Definition at line 44 of file unicharset.h.
const int CHAR_FRAGMENT::kMaxLen = 3 + UNICHAR_LEN + 2 [static] |
Definition at line 42 of file unicharset.h.
const int CHAR_FRAGMENT::kMinLen = 6 [static] |
Definition at line 40 of file unicharset.h.