Tesseract  3.02
tesseract-ocr/cube/word_size_model.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        word_size_model.h
00003  * Description: Declaration of the Word Size Model Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The WordSizeModel class abstracts the geometrical relationships
00021 // between characters/shapes in the same word (presumeably of the same font)
00022 // A non-parametric bigram model describes the three geometrical properties of a
00023 // character pair:
00024 //   1- Normalized Width
00025 //   2- Normalized Top
00026 //   3- Normalized Height
00027 // These dimensions are computed for each character pair in a word. These are
00028 // then compared to the same information for each of the fonts that the size
00029 // model knows about. The WordSizeCost is the cost of the font that matches
00030 // best.
00031 
00032 #ifndef WORD_SIZE_MODEL_H
00033 #define WORD_SIZE_MODEL_H
00034 
00035 #include <string>
00036 #include "char_samp.h"
00037 #include "char_set.h"
00038 
00039 namespace tesseract {
00040 struct PairSizeInfo {
00041   int delta_top;
00042   int wid_0;
00043   int hgt_0;
00044   int wid_1;
00045   int hgt_1;
00046 };
00047 
00048 struct FontPairSizeInfo {
00049   string font_name;
00050   PairSizeInfo **pair_size_info;
00051 };
00052 
00053 class WordSizeModel {
00054  public:
00055   WordSizeModel(CharSet *, bool contextual);
00056   virtual ~WordSizeModel();
00057   static WordSizeModel *Create(const string &data_file_path,
00058                                const string &lang,
00059                                CharSet *char_set,
00060                                bool contextual);
00061   // Given a word and number of unichars, return the size cost,
00062   // minimized over all fonts in the size model.
00063   int Cost(CharSamp **samp_array, int samp_cnt) const;
00064   // Given dimensions of a pair of character samples and a font size
00065   // model for that character pair, return the pair's size cost for
00066   // the font.
00067   static double PairCost(int width_0, int height_0, int top_0,
00068                          int width_1, int height_1, int top_1,
00069                          const PairSizeInfo& pair_info);
00070   bool Save(string file_name);
00071   // Number of fonts in size model.
00072   inline int FontCount() const {
00073     return font_pair_size_models_.size();
00074   }
00075   inline const FontPairSizeInfo *FontInfo() const {
00076     return &font_pair_size_models_[0];
00077   }
00078   // Helper functions to convert between size codes, class id and position
00079   // codes
00080   static inline int SizeCode(int cls_id, int start, int end) {
00081     return (cls_id << 2) + (end << 1) + start;
00082   }
00083 
00084  private:
00085   // Scaling constant used to convert floating point ratios in size table
00086   // to fixed point
00087   static const int kShapeModelScale = 1000;
00088   static const int kExpectedTokenCount = 10;
00089 
00090   // Language properties
00091   bool contextual_;
00092   CharSet *char_set_;
00093   // Size ratios table
00094   vector<FontPairSizeInfo> font_pair_size_models_;
00095 
00096   // Initialize the word size model object
00097   bool Init(const string &data_file_path, const string &lang);
00098 };
00099 }
00100 #endif  // WORD_SIZE_MODEL_H