Tesseract
3.02
|
Public Member Functions | |
UnicodeSpanSkipper (const UNICHARSET *unicharset, const WERD_CHOICE *word) | |
int | SkipPunc (int pos) |
int | SkipDigits (int pos) |
int | SkipRomans (int pos) |
int | SkipAlpha (int pos) |
Definition at line 294 of file paragraphs.cpp.
tesseract::UnicodeSpanSkipper::UnicodeSpanSkipper | ( | const UNICHARSET * | unicharset, |
const WERD_CHOICE * | word | ||
) | [inline] |
Definition at line 296 of file paragraphs.cpp.
: u_(unicharset), word_(word) { wordlen_ = word->length(); }
int tesseract::UnicodeSpanSkipper::SkipAlpha | ( | int | pos | ) |
Definition at line 335 of file paragraphs.cpp.
{ while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) pos++; return pos; }
int tesseract::UnicodeSpanSkipper::SkipDigits | ( | int | pos | ) |
Definition at line 319 of file paragraphs.cpp.
{ while (pos < wordlen_ && (u_->get_isdigit(word_->unichar_id(pos)) || IsDigitLike(UnicodeFor(u_, word_, pos)))) pos++; return pos; }
int tesseract::UnicodeSpanSkipper::SkipPunc | ( | int | pos | ) |
Definition at line 314 of file paragraphs.cpp.
{ while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) pos++; return pos; }
int tesseract::UnicodeSpanSkipper::SkipRomans | ( | int | pos | ) |
Definition at line 325 of file paragraphs.cpp.
{ const char *kRomans = "ivxlmdIVXLMD"; while (pos < wordlen_) { int ch = UnicodeFor(u_, word_, pos); if (ch >= 0xF0 || strchr(kRomans, ch) == 0) break; pos++; } return pos; }