Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: word_size_model.h 00003 * Description: Declaration of the Word Size Model Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The WordSizeModel class abstracts the geometrical relationships 00021 // between characters/shapes in the same word (presumeably of the same font) 00022 // A non-parametric bigram model describes the three geometrical properties of a 00023 // character pair: 00024 // 1- Normalized Width 00025 // 2- Normalized Top 00026 // 3- Normalized Height 00027 // These dimensions are computed for each character pair in a word. These are 00028 // then compared to the same information for each of the fonts that the size 00029 // model knows about. The WordSizeCost is the cost of the font that matches 00030 // best. 00031 00032 #ifndef WORD_SIZE_MODEL_H 00033 #define WORD_SIZE_MODEL_H 00034 00035 #include <string> 00036 #include "char_samp.h" 00037 #include "char_set.h" 00038 00039 namespace tesseract { 00040 struct PairSizeInfo { 00041 int delta_top; 00042 int wid_0; 00043 int hgt_0; 00044 int wid_1; 00045 int hgt_1; 00046 }; 00047 00048 struct FontPairSizeInfo { 00049 string font_name; 00050 PairSizeInfo **pair_size_info; 00051 }; 00052 00053 class WordSizeModel { 00054 public: 00055 WordSizeModel(CharSet *, bool contextual); 00056 virtual ~WordSizeModel(); 00057 static WordSizeModel *Create(const string &data_file_path, 00058 const string &lang, 00059 CharSet *char_set, 00060 bool contextual); 00061 // Given a word and number of unichars, return the size cost, 00062 // minimized over all fonts in the size model. 00063 int Cost(CharSamp **samp_array, int samp_cnt) const; 00064 // Given dimensions of a pair of character samples and a font size 00065 // model for that character pair, return the pair's size cost for 00066 // the font. 00067 static double PairCost(int width_0, int height_0, int top_0, 00068 int width_1, int height_1, int top_1, 00069 const PairSizeInfo& pair_info); 00070 bool Save(string file_name); 00071 // Number of fonts in size model. 00072 inline int FontCount() const { 00073 return font_pair_size_models_.size(); 00074 } 00075 inline const FontPairSizeInfo *FontInfo() const { 00076 return &font_pair_size_models_[0]; 00077 } 00078 // Helper functions to convert between size codes, class id and position 00079 // codes 00080 static inline int SizeCode(int cls_id, int start, int end) { 00081 return (cls_id << 2) + (end << 1) + start; 00082 } 00083 00084 private: 00085 // Scaling constant used to convert floating point ratios in size table 00086 // to fixed point 00087 static const int kShapeModelScale = 1000; 00088 static const int kExpectedTokenCount = 10; 00089 00090 // Language properties 00091 bool contextual_; 00092 CharSet *char_set_; 00093 // Size ratios table 00094 vector<FontPairSizeInfo> font_pair_size_models_; 00095 00096 // Initialize the word size model object 00097 bool Init(const string &data_file_path, const string &lang); 00098 }; 00099 } 00100 #endif // WORD_SIZE_MODEL_H