Public Member Functions
	UnicodeSpanSkipper (const UNICHARSET unicharset, const WERD_CHOICE word)
int	SkipPunc (int pos)
int	SkipDigits (int pos)
int	SkipRomans (int pos)
int	SkipAlpha (int pos)

Detailed Description

Definition at line 294 of file paragraphs.cpp.

Constructor & Destructor Documentation

tesseract::UnicodeSpanSkipper::UnicodeSpanSkipper	(	const UNICHARSET *	unicharset,
		const WERD_CHOICE *	word
	)		`[inline]`

Definition at line 296 of file paragraphs.cpp.

      : u_(unicharset), word_(word) { wordlen_ = word->length(); }

Member Function Documentation

int tesseract::UnicodeSpanSkipper::SkipAlpha ( int pos )

Definition at line 335 of file paragraphs.cpp.

                                         {
  while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) pos++;
  return pos;
}

int tesseract::UnicodeSpanSkipper::SkipDigits ( int pos )

Definition at line 319 of file paragraphs.cpp.

                                          {
  while (pos < wordlen_ && (u_->get_isdigit(word_->unichar_id(pos)) ||
                            IsDigitLike(UnicodeFor(u_, word_, pos)))) pos++;
  return pos;
}

int tesseract::UnicodeSpanSkipper::SkipPunc ( int pos )

Definition at line 314 of file paragraphs.cpp.

                                        {
  while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) pos++;
  return pos;
}

int tesseract::UnicodeSpanSkipper::SkipRomans ( int pos )

Definition at line 325 of file paragraphs.cpp.

                                          {
  const char *kRomans = "ivxlmdIVXLMD";
  while (pos < wordlen_) {
    int ch = UnicodeFor(u_, word_, pos);
    if (ch >= 0xF0 || strchr(kRomans, ch) == 0) break;
    pos++;
  }
  return pos;
}

The documentation for this class was generated from the following file:

tesseract-ocr/ccmain/paragraphs.cpp

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation