Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: beam_search.h 00003 * Description: Declaration of Beam Word Search Algorithm Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The Beam Search class implements a Beam Search algorithm for the 00021 // N-best paths through the lattice of a search object using a language model 00022 // The search object is a segmented bitmap of a word image. The language model 00023 // is a state machine that defines valid sequences of characters 00024 // The cost of each path is the combined (product) probabilities of the 00025 // characters along the path. The character probabilities are computed using 00026 // the character classifier member of the RecoContext 00027 // The BeamSearch class itself holds the state of the last search it performed 00028 // using its "Search" method. Subsequent class to the Search method erase the 00029 // states of previously done searches 00030 00031 #ifndef BEAM_SEARCH_H 00032 #define BEAM_SEARCH_H 00033 00034 #include "search_column.h" 00035 #include "word_altlist.h" 00036 #include "search_object.h" 00037 #include "lang_model.h" 00038 #include "cube_utils.h" 00039 #include "cube_reco_context.h" 00040 #include "allheaders.h" 00041 00042 namespace tesseract { 00043 00044 class BeamSearch { 00045 public: 00046 explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true); 00047 ~BeamSearch(); 00048 // Performs a beam seach in the specified search using the specified 00049 // language model; returns an alternate list of possible words as a result. 00050 WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL); 00051 // Returns the best node in the last column of last performed search. 00052 SearchNode *BestNode() const; 00053 // Returns the string corresponding to the specified alt. 00054 char_32 *Alt(int alt) const; 00055 // Backtracks from the specified lattice node and returns the corresponding 00056 // character-mapped segments, character count, char_32 result string, and 00057 // character bounding boxes (if char_boxes is not NULL). If the segments 00058 // cannot be constructed, returns NULL, and all result arguments 00059 // will be NULL. 00060 CharSamp **BackTrack(SearchObject *srch_obj, int node_index, 00061 int *char_cnt, char_32 **str32, Boxa **char_boxes) const; 00062 // Same as above, except it takes a pointer to a search node object 00063 // instead of node index. 00064 CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node, 00065 int *char_cnt, char_32 **str32, Boxa **char_boxes) const; 00066 // Returns the size cost of a specified string of a lattice 00067 // path that ends at the specified lattice node. 00068 int SizeCost(SearchObject *srch_obj, SearchNode *node, 00069 char_32 **str32 = NULL) const; 00070 // Returns the word unigram cost of the given string, possibly 00071 // stripping out a single trailing punctuation character. 00072 int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const; 00073 00074 // Supplementary functions needed for visualization 00075 // Return column count of the lattice. 00076 inline int ColCnt() const { return col_cnt_; } 00077 // Returns the lattice column corresponding to the specified column index. 00078 SearchColumn *Column(int col_idx) const; 00079 // Return the index of the best node in the last column of the 00080 // best-cost path before the alternates list is sorted. 00081 inline int BestPresortedNodeIndex() const { 00082 return best_presorted_node_idx_; 00083 }; 00084 00085 private: 00086 // Maximum reasonable segmentation point count 00087 static const int kMaxSegPointCnt = 128; 00088 // Recognition context object; the context holds the character classifier 00089 // and the tuning parameters object 00090 CubeRecoContext *cntxt_; 00091 // Count of segmentation pts 00092 int seg_pt_cnt_; 00093 // Lattice column count; currently redundant with respect to seg_pt_cnt_ 00094 // but that might change in the future 00095 int col_cnt_; 00096 // Array of lattice columns 00097 SearchColumn **col_; 00098 // Run in word or phrase mode 00099 bool word_mode_; 00100 // Node index of best-cost node, before alternates are merged and sorted 00101 int best_presorted_node_idx_; 00102 // Cleans up beam search state 00103 void Cleanup(); 00104 // Creates a Word alternate list from the results in the lattice. 00105 // This function computes a cost for each node in the final column 00106 // of the lattice, which is a weighted average of several costs: 00107 // size cost, character bigram cost, word unigram cost, and 00108 // recognition cost from the beam search. The weights are the 00109 // CubeTuningParams, which are learned together with the character 00110 // classifiers. 00111 WordAltList *CreateWordAltList(SearchObject *srch_obj); 00112 // Creates a set of children nodes emerging from a parent node based on 00113 // the character alternate list and the language model. 00114 void CreateChildren(SearchColumn *out_col, LangModel *lang_mod, 00115 SearchNode *parent_node, LangModEdge *lm_parent_edge, 00116 CharAltList *char_alt_list, int extra_cost); 00117 // Backtracks from the given lattice node and returns the corresponding 00118 // char mapped segments, character count, and character bounding boxes (if 00119 // char_boxes is not NULL). If the segments cannot be constructed, 00120 // returns NULL, and all result arguments will be NULL. 00121 CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node, 00122 int* char_cnt, Boxa **char_boxes) const; 00123 }; 00124 } 00125 00126 #endif // BEAM_SEARCH_H