Tesseract  3.02
tesseract::BeamSearch Class Reference

#include <beam_search.h>

List of all members.

Public Member Functions

 BeamSearch (CubeRecoContext *cntxt, bool word_mode=true)
 ~BeamSearch ()
WordAltListSearch (SearchObject *srch_obj, LangModel *lang_mod=NULL)
SearchNodeBestNode () const
char_32Alt (int alt) const
CharSamp ** BackTrack (SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
CharSamp ** BackTrack (SearchObject *srch_obj, SearchNode *node, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
int SizeCost (SearchObject *srch_obj, SearchNode *node, char_32 **str32=NULL) const
int WordUnigramCost (char_32 *str32, WordUnigrams *word_unigrams) const
int ColCnt () const
SearchColumnColumn (int col_idx) const
int BestPresortedNodeIndex () const

Detailed Description

Definition at line 44 of file beam_search.h.


Constructor & Destructor Documentation

tesseract::BeamSearch::BeamSearch ( CubeRecoContext cntxt,
bool  word_mode = true 
) [explicit]

Definition at line 27 of file beam_search.cpp.

                                                             {
  cntxt_ = cntxt;
  seg_pt_cnt_ = 0;
  col_cnt_ = 1;
  col_ = NULL;
  word_mode_ = word_mode;
}
tesseract::BeamSearch::~BeamSearch ( )

Definition at line 47 of file beam_search.cpp.

                        {
  Cleanup();
}

Member Function Documentation

char_32 * tesseract::BeamSearch::Alt ( int  alt) const

Definition at line 298 of file beam_search.cpp.

                                      {
  // get the last column of the lattice
  if (col_cnt_ <= 0)
    return NULL;

  SearchColumn *srch_col = col_[col_cnt_ - 1];
  if (!srch_col)
    return NULL;

  // point to the last node in the selected path
  if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
    return NULL;
  }

  SearchNode *srch_node = srch_col->Nodes()[alt];
  if (!srch_node)
    return  NULL;

  // get string
  char_32 *str32 = srch_node->PathString();
  if (!str32)
    return NULL;

  return str32;
}
CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
int  node_index,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 328 of file beam_search.cpp.

                                                          {
  // get the last column of the lattice
  if (col_cnt_ <= 0)
    return NULL;
  SearchColumn *srch_col = col_[col_cnt_ - 1];
  if (!srch_col)
    return NULL;

  // point to the last node in the selected path
  if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
    return NULL;

  SearchNode *srch_node = srch_col->Nodes()[node_index];
  if (!srch_node)
    return NULL;
  return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
}
CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
SearchNode node,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 352 of file beam_search.cpp.

                                                          {
  if (!srch_node)
    return NULL;

  if (str32) {
    if (*str32)
      delete [](*str32);  // clear existing value
    *str32 = srch_node->PathString();
    if (!*str32)
      return NULL;
  }

  if (char_boxes && *char_boxes) {
    boxaDestroy(char_boxes);  // clear existing value
  }

  CharSamp **chars;
  chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
  if (!chars && str32)
    delete []*str32;
  return chars;
}
SearchNode * tesseract::BeamSearch::BestNode ( ) const

Definition at line 286 of file beam_search.cpp.

                                       {
  if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
    return NULL;

  int node_cnt = col_[col_cnt_ - 1]->NodeCount();
  SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
  if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
    return NULL;
  return srch_nodes[0];
}
int tesseract::BeamSearch::BestPresortedNodeIndex ( ) const [inline]

Definition at line 81 of file beam_search.h.

                                            {
    return best_presorted_node_idx_;
  };
int tesseract::BeamSearch::ColCnt ( ) const [inline]

Definition at line 76 of file beam_search.h.

{ return col_cnt_; }
SearchColumn * tesseract::BeamSearch::Column ( int  col_idx) const

Definition at line 279 of file beam_search.cpp.

                                              {
  if (col < 0 || col >= col_cnt_ || !col_)
    return NULL;
  return col_[col];
}
WordAltList * tesseract::BeamSearch::Search ( SearchObject srch_obj,
LangModel lang_mod = NULL 
)

Definition at line 98 of file beam_search.cpp.

                                                                            {
  // verifications
  if (!lang_mod)
    lang_mod = cntxt_->LangMod();
  if (!lang_mod) {
    fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
            "LangModel\n");
    return NULL;
  }

  // free existing state
  Cleanup();

  // get seg pt count
  seg_pt_cnt_ = srch_obj->SegPtCnt();
  if (seg_pt_cnt_ < 0) {
    return NULL;
  }
  col_cnt_ = seg_pt_cnt_ + 1;

  // disregard suspicious cases
  if (seg_pt_cnt_ > 128) {
    fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
            "suspiciously high; bailing out\n");
    return NULL;
  }

  // alloc memory for columns
  col_ = new SearchColumn *[col_cnt_];
  if (!col_) {
    fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
            "SearchColumn array\n");
    return NULL;
  }
  memset(col_, 0, col_cnt_ * sizeof(*col_));

  // for all possible segments
  for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
    // create a search column
    col_[end_seg - 1] = new SearchColumn(end_seg - 1,
                                         cntxt_->Params()->BeamWidth());
    if (!col_[end_seg - 1]) {
      fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
              "SearchColumn for column %d\n", end_seg - 1);
      return NULL;
    }

    // for all possible start segments
    int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
    for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
      int parent_nodes_cnt;
      SearchNode **parent_nodes;

      // for the root segment, we do not have a parent
      if (strt_seg == 0) {
        parent_nodes_cnt = 1;
        parent_nodes = NULL;
      } else {
        // for all the existing nodes in the starting column
        parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
        parent_nodes = col_[strt_seg - 1]->Nodes();
      }

      // run the shape recognizer
      CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
                                                              end_seg - 1);
      // for all the possible parents
      for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
        // point to the parent node
        SearchNode *parent_node = !parent_nodes ? NULL
            : parent_nodes[parent_idx];
        LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
            : parent_node->LangModelEdge();

        // compute the cost of not having spaces within the segment range
        int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);

        // In phrase mode, compute the cost of not having a space before
        // this character
        int no_space_cost = 0;
        if (!word_mode_ && strt_seg > 0) {
          no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
        }

        // if the no space cost is low enough
        if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
          // Add the children nodes
          CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
                         lm_parent_edge, char_alt_list,
                         contig_cost + no_space_cost);
        }

        // In phrase mode and if not starting at the root
        if (!word_mode_ && strt_seg > 0) {  // parent_node must be non-NULL
          // consider starting a new word for nodes that are valid EOW
          if (parent_node->LangModelEdge()->IsEOW()) {
            // get the space cost
            int space_cost = srch_obj->SpaceCost(strt_seg - 1);
            // if the space cost is low enough
            if ((contig_cost + space_cost) < MIN_PROB_COST) {
              // Restart the language model and add nodes as children to the
              // space node.
              CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
                             char_alt_list, contig_cost + space_cost);
            }
          }
        }
      }  // parent
    }  // strt_seg

    // prune the column nodes
    col_[end_seg - 1]->Prune();

    // Free the column hash table. No longer needed
    col_[end_seg - 1]->FreeHashTable();
  }  // end_seg

  WordAltList *alt_list = CreateWordAltList(srch_obj);
  return alt_list;
}
int tesseract::BeamSearch::SizeCost ( SearchObject srch_obj,
SearchNode node,
char_32 **  str32 = NULL 
) const

Definition at line 472 of file beam_search.cpp.

                                                {
  CharSamp **chars = NULL;
  int char_cnt = 0;
  if (!node)
    return 0;
  // Backtrack to get string and character segmentation
  chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
  if (!chars)
    return WORST_COST;
  int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
      cntxt_->SizeModel()->Cost(chars, char_cnt);
  delete []chars;
  return size_cost;
}
int tesseract::BeamSearch::WordUnigramCost ( char_32 str32,
WordUnigrams word_unigrams 
) const

The documentation for this class was generated from the following files: