Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: cube_utils.h 00003 * Description: Declaration of the Cube Utilities Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 *(C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0(the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The CubeUtils class provides miscellaneous utility and helper functions 00021 // to the rest of the Cube Engine 00022 00023 #ifndef CUBE_UTILS_H 00024 #define CUBE_UTILS_H 00025 00026 #include <vector> 00027 #include <string> 00028 00029 #include "allheaders.h" 00030 #include "const.h" 00031 #include "char_set.h" 00032 #include "char_samp.h" 00033 #include "img.h" 00034 00035 namespace tesseract { 00036 class CubeUtils { 00037 public: 00038 CubeUtils(); 00039 ~CubeUtils(); 00040 00041 // Converts a probability value to a cost by getting the -log() of the 00042 // probability value to a known base 00043 static int Prob2Cost(double prob_val); 00044 // Converts a cost to probability by getting the exp(-normalized cost) 00045 static double Cost2Prob(int cost); 00046 // Computes the length of a 32-bit char buffer 00047 static int StrLen(const char_32 *str); 00048 // Compares two 32-bit char buffers 00049 static int StrCmp(const char_32 *str1, const char_32 *str2); 00050 // Duplicates a 32-bit char buffer 00051 static char_32 *StrDup(const char_32 *str); 00052 // Creates a CharSamp from an IMAGE and a bounding box 00053 static CharSamp *CharSampleFromImg(IMAGE *img, 00054 int left, int top, int wid, int hgt); 00055 // Creates a CharSamp from an Pix and a bounding box 00056 static CharSamp *CharSampleFromPix(Pix *pix, 00057 int left, int top, int wid, int hgt); 00058 // Creates an IMAGE from a CharSamp 00059 static IMAGE *ImageFromCharSample(CharSamp *char_samp); 00060 // Creates a Pix from a CharSamp 00061 static Pix *PixFromCharSample(CharSamp *char_samp); 00062 // read the contents of a file to a string 00063 static bool ReadFileToString(const string &file_name, string *str); 00064 // split a string into vectors using any of the specified delimiters 00065 static void SplitStringUsing(const string &str, const string &delims, 00066 vector<string> *str_vec); 00067 // UTF-8 to UTF-32 convesion functions 00068 static void UTF8ToUTF32(const char *utf8_str, string_32 *str32); 00069 static void UTF32ToUTF8(const char_32 *utf32_str, string *str); 00070 // Returns true if input word has either 1) all-one-case, or 2) 00071 // first character upper-case, and remaining characters lower-case. 00072 // If char_set is not NULL, uses tesseract's unicharset functions 00073 // to determine case properties. Otherwise, uses C-locale-dependent 00074 // functions, which may be unreliable on non-ASCII characters. 00075 static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set); 00076 // Returns char_32 pointer to the lower-case-transformed version of 00077 // the input string or NULL on error. If char_set is NULL returns NULL. 00078 // Return array must be freed by caller. 00079 static char_32 *ToLower(const char_32 *str32, CharSet *char_set); 00080 // Returns char_32 pointer to the upper-case-transformed version of 00081 // the input string or NULL on error. If char_set is NULL returns NULL. 00082 // Return array must be freed by caller. 00083 static char_32 *ToUpper(const char_32 *str32, CharSet *char_set); 00084 private: 00085 static unsigned char *GetImageData(IMAGE *img, 00086 int left, int top, int wid, int hgt); 00087 static unsigned char *GetImageData(Pix *pix, 00088 int left, int top, int wid, int hgt); 00089 }; 00090 } // namespace tesseract 00091 #endif // CUBE_UTILS_H