Tesseract
3.02
|
00001 00002 // File: fontinfo.h 00003 // Description: Font information classes abstracted from intproto.h/cpp. 00004 // Author: rays@google.com (Ray Smith) 00005 // Created: Tue May 17 17:08:01 PDT 2011 00006 // 00007 // (C) Copyright 2011, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 00021 #ifndef TESSERACT_CCSTRUCT_FONTINFO_H_ 00022 #define TESSERACT_CCSTRUCT_FONTINFO_H_ 00023 00024 #include "genericvector.h" 00025 #include "host.h" 00026 #include "unichar.h" 00027 00028 namespace tesseract { 00029 00030 // Struct for information about spacing between characters in a particular font. 00031 struct FontSpacingInfo { 00032 inT16 x_gap_before; 00033 inT16 x_gap_after; 00034 GenericVector<UNICHAR_ID> kerned_unichar_ids; 00035 GenericVector<inT16> kerned_x_gaps; 00036 }; 00037 00038 /* 00039 * font_properties contains properties about boldness, italicness, fixed pitch, 00040 * serif, fraktur 00041 */ 00042 struct FontInfo { 00043 FontInfo() : name(NULL), spacing_vec(NULL) {} 00044 ~FontInfo() {} 00045 // Reserves unicharset_size spots in spacing_vec. 00046 void init_spacing(int unicharset_size) { 00047 spacing_vec = new GenericVector<FontSpacingInfo *>(); 00048 spacing_vec->init_to_size(unicharset_size, NULL); 00049 } 00050 // Adds the given pointer to FontSpacingInfo to spacing_vec member 00051 // (FontInfo class takes ownership of the pointer). 00052 // Note: init_spacing should be called before calling this function. 00053 void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) { 00054 ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id); 00055 (*spacing_vec)[uch_id] = spacing_info; 00056 } 00057 00058 // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID. 00059 const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const { 00060 return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ? 00061 NULL : (*spacing_vec)[uch_id]; 00062 } 00063 00064 // Fills spacing with the value of the x gap expected between the two given 00065 // UNICHAR_IDs. Returns true on success. 00066 bool get_spacing(UNICHAR_ID prev_uch_id, 00067 UNICHAR_ID uch_id, 00068 int *spacing) const { 00069 const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id); 00070 const FontSpacingInfo *fsi = this->get_spacing(uch_id); 00071 if (prev_fsi == NULL || fsi == NULL) return false; 00072 int i = 0; 00073 for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) { 00074 if (prev_fsi->kerned_unichar_ids[i] == uch_id) break; 00075 } 00076 if (i < prev_fsi->kerned_unichar_ids.size()) { 00077 *spacing = prev_fsi->kerned_x_gaps[i]; 00078 } else { 00079 *spacing = prev_fsi->x_gap_after + fsi->x_gap_before; 00080 } 00081 return true; 00082 } 00083 00084 bool is_italic() const { return properties & 1; } 00085 bool is_bold() const { return (properties & 2) != 0; } 00086 bool is_fixed_pitch() const { return (properties & 4) != 0; } 00087 bool is_serif() const { return (properties & 8) != 0; } 00088 bool is_fraktur() const { return (properties & 16) != 0; } 00089 00090 char* name; 00091 uinT32 properties; 00092 // The universal_id is a field reserved for the initialization process 00093 // to assign a unique id number to all fonts loaded for the current 00094 // combination of languages. This id will then be returned by 00095 // ResultIterator::WordFontAttributes. 00096 inT32 universal_id; 00097 // Horizontal spacing between characters (indexed by UNICHAR_ID). 00098 GenericVector<FontSpacingInfo *> *spacing_vec; 00099 }; 00100 00101 // Every class (character) owns a FontSet that represents all the fonts that can 00102 // render this character. 00103 // Since almost all the characters from the same script share the same set of 00104 // fonts, the sets are shared over multiple classes (see 00105 // Classify::fontset_table_). Thus, a class only store an id to a set. 00106 // Because some fonts cannot render just one character of a set, there are a 00107 // lot of FontSet that differ only by one font. Rather than storing directly 00108 // the FontInfo in the FontSet structure, it's better to share FontInfos among 00109 // FontSets (Classify::fontinfo_table_). 00110 struct FontSet { 00111 int size; 00112 int* configs; // FontInfo ids 00113 }; 00114 00115 // Compare FontInfo structures. 00116 bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2); 00117 // Compare FontSet structures. 00118 bool CompareFontSet(const FontSet& fs1, const FontSet& fs2); 00119 // Deletion callbacks for GenericVector. 00120 void FontInfoDeleteCallback(FontInfo f); 00121 void FontSetDeleteCallback(FontSet fs); 00122 00123 // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. 00124 bool read_info(FILE* f, FontInfo* fi, bool swap); 00125 bool write_info(FILE* f, const FontInfo& fi); 00126 bool read_spacing_info(FILE *f, FontInfo* fi, bool swap); 00127 bool write_spacing_info(FILE* f, const FontInfo& fi); 00128 bool read_set(FILE* f, FontSet* fs, bool swap); 00129 bool write_set(FILE* f, const FontSet& fs); 00130 00131 } // namespace tesseract. 00132 00133 #endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */