Tesseract  3.02
tesseract-ocr/cube/beam_search.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        beam_search.h
00003  * Description: Declaration of Beam Word Search Algorithm Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The Beam Search class implements a Beam Search algorithm for the
00021 // N-best paths through the lattice of a search object using a language model
00022 // The search object is a segmented bitmap of a word image. The language model
00023 // is a state machine that defines valid sequences of characters
00024 // The cost of each path is the combined (product) probabilities of the
00025 // characters along the path. The character probabilities are computed using
00026 // the character classifier member of the RecoContext
00027 // The BeamSearch class itself holds the state of the last search it performed
00028 // using its "Search" method. Subsequent class to the Search method erase the
00029 // states of previously done searches
00030 
00031 #ifndef BEAM_SEARCH_H
00032 #define BEAM_SEARCH_H
00033 
00034 #include "search_column.h"
00035 #include "word_altlist.h"
00036 #include "search_object.h"
00037 #include "lang_model.h"
00038 #include "cube_utils.h"
00039 #include "cube_reco_context.h"
00040 #include "allheaders.h"
00041 
00042 namespace tesseract {
00043 
00044 class BeamSearch {
00045  public:
00046   explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
00047   ~BeamSearch();
00048   // Performs a beam seach in the specified search using the specified
00049   // language model; returns an alternate list of possible words as a result.
00050   WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
00051   // Returns the best node in the last column of last performed search.
00052   SearchNode *BestNode() const;
00053   // Returns the string corresponding to the specified alt.
00054   char_32 *Alt(int alt) const;
00055   // Backtracks from the specified lattice node and returns the corresponding
00056   // character-mapped segments, character count, char_32 result string, and
00057   // character bounding boxes (if char_boxes is not NULL). If the segments
00058   // cannot be constructed, returns NULL, and all result arguments
00059   // will be NULL.
00060   CharSamp **BackTrack(SearchObject *srch_obj, int node_index,
00061                        int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
00062   // Same as above, except it takes a pointer to a search node object
00063   // instead of node index.
00064   CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node,
00065                        int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
00066   // Returns the size cost of a specified string of a lattice
00067   // path that ends at the specified lattice node.
00068   int SizeCost(SearchObject *srch_obj, SearchNode *node,
00069                char_32 **str32 = NULL) const;
00070   // Returns the word unigram cost of the given string, possibly
00071   // stripping out a single trailing punctuation character.
00072   int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const;
00073 
00074   // Supplementary functions needed for visualization
00075   // Return column count of the lattice.
00076   inline int ColCnt() const { return col_cnt_; }
00077   // Returns the lattice column corresponding to the specified column index.
00078   SearchColumn *Column(int col_idx) const;
00079   // Return the index of the best node in the last column of the
00080   // best-cost path before the alternates list is sorted.
00081   inline int BestPresortedNodeIndex() const {
00082     return best_presorted_node_idx_;
00083   };
00084 
00085  private:
00086   // Maximum reasonable segmentation point count
00087   static const int kMaxSegPointCnt = 128;
00088   // Recognition context object; the context holds the character classifier
00089   // and the tuning parameters object
00090   CubeRecoContext *cntxt_;
00091   // Count of segmentation pts
00092   int seg_pt_cnt_;
00093   // Lattice column count; currently redundant with respect to seg_pt_cnt_
00094   // but that might change in the future
00095   int col_cnt_;
00096   // Array of lattice columns
00097   SearchColumn **col_;
00098   // Run in word or phrase mode
00099   bool word_mode_;
00100   // Node index of best-cost node, before alternates are merged and sorted
00101   int best_presorted_node_idx_;
00102   // Cleans up beam search state
00103   void Cleanup();
00104   // Creates a Word alternate list from the results in the lattice.
00105   // This function computes a cost for each node in the final column
00106   // of the lattice, which is a weighted average of several costs:
00107   // size cost, character bigram cost, word unigram cost, and
00108   // recognition cost from the beam search. The weights are the
00109   // CubeTuningParams, which are learned together with the character
00110   // classifiers.
00111   WordAltList *CreateWordAltList(SearchObject *srch_obj);
00112   // Creates a set of children nodes emerging from a parent node based on
00113   // the character alternate list and the language model.
00114   void CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
00115                       SearchNode *parent_node, LangModEdge *lm_parent_edge,
00116                       CharAltList *char_alt_list, int extra_cost);
00117   // Backtracks from the given lattice node and returns the corresponding
00118   // char mapped segments, character count, and character bounding boxes (if
00119   // char_boxes is not NULL). If the segments cannot be constructed,
00120   // returns NULL, and all result arguments will be NULL.
00121   CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node,
00122                          int* char_cnt, Boxa **char_boxes) const;
00123 };
00124 }
00125 
00126 #endif  // BEAM_SEARCH_H