|
Tesseract
3.02
|
Public Member Functions | |
| UnicodeSpanSkipper (const UNICHARSET *unicharset, const WERD_CHOICE *word) | |
| int | SkipPunc (int pos) |
| int | SkipDigits (int pos) |
| int | SkipRomans (int pos) |
| int | SkipAlpha (int pos) |
Definition at line 294 of file paragraphs.cpp.
| tesseract::UnicodeSpanSkipper::UnicodeSpanSkipper | ( | const UNICHARSET * | unicharset, |
| const WERD_CHOICE * | word | ||
| ) | [inline] |
Definition at line 296 of file paragraphs.cpp.
: u_(unicharset), word_(word) { wordlen_ = word->length(); }
| int tesseract::UnicodeSpanSkipper::SkipAlpha | ( | int | pos | ) |
Definition at line 335 of file paragraphs.cpp.
{
while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) pos++;
return pos;
}
| int tesseract::UnicodeSpanSkipper::SkipDigits | ( | int | pos | ) |
Definition at line 319 of file paragraphs.cpp.
{
while (pos < wordlen_ && (u_->get_isdigit(word_->unichar_id(pos)) ||
IsDigitLike(UnicodeFor(u_, word_, pos)))) pos++;
return pos;
}
| int tesseract::UnicodeSpanSkipper::SkipPunc | ( | int | pos | ) |
Definition at line 314 of file paragraphs.cpp.
{
while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) pos++;
return pos;
}
| int tesseract::UnicodeSpanSkipper::SkipRomans | ( | int | pos | ) |
Definition at line 325 of file paragraphs.cpp.
{
const char *kRomans = "ivxlmdIVXLMD";
while (pos < wordlen_) {
int ch = UnicodeFor(u_, word_, pos);
if (ch >= 0xF0 || strchr(kRomans, ch) == 0) break;
pos++;
}
return pos;
}