Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: unicodes.h 00003 * Description: Unicode related machinery 00004 * Author: David Eger 00005 * Created: Wed Jun 15 16:37:50 PST 2011 00006 * 00007 * (C) Copyright 2011, Google, Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef TESSERACT_CCUTIL_UNICODES_H__ 00021 #define TESSERACT_CCUTIL_UNICODES_H__ 00022 00023 namespace tesseract { 00024 00025 extern const char *kUTF8LineSeparator; 00026 extern const char *kUTF8ParagraphSeparator; 00027 extern const char *kLRM; // Left-to-Right Mark 00028 extern const char *kRLM; // Right-to-Left Mark 00029 extern const char *kRLE; // Right-to-Left Embedding 00030 extern const char *kPDF; // Pop Directional Formatting 00031 00032 // The following are confusable internal word punctuation symbols 00033 // which we normalize to the first variant when matching in dawgs. 00034 extern const char *kHyphenLikeUTF8[]; 00035 extern const char *kApostropheLikeUTF8[]; 00036 00037 } // namespace 00038 00039 #endif // TESSERACT_CCUTIL_UNICODES_H__