Tesseract  3.02
tesseract-ocr/ccutil/unicodes.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        unicodes.h
00003  * Description: Unicode related machinery
00004  * Author:      David Eger
00005  * Created:     Wed Jun 15 16:37:50 PST 2011
00006  *
00007  * (C) Copyright 2011, Google, Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef TESSERACT_CCUTIL_UNICODES_H__
00021 #define TESSERACT_CCUTIL_UNICODES_H__
00022 
00023 namespace tesseract {
00024 
00025 extern const char *kUTF8LineSeparator;
00026 extern const char *kUTF8ParagraphSeparator;
00027 extern const char *kLRM;  // Left-to-Right Mark
00028 extern const char *kRLM;  // Right-to-Left Mark
00029 extern const char *kRLE;  // Right-to-Left Embedding
00030 extern const char *kPDF;  // Pop Directional Formatting
00031 
00032 // The following are confusable internal word punctuation symbols
00033 // which we normalize to the first variant when matching in dawgs.
00034 extern const char *kHyphenLikeUTF8[];
00035 extern const char *kApostropheLikeUTF8[];
00036 
00037 }  // namespace
00038 
00039 #endif  // TESSERACT_CCUTIL_UNICODES_H__