Tesseract  3.02
tesseract::WordListLangModel Class Reference

#include <word_list_lang_model.h>

Inheritance diagram for tesseract::WordListLangModel:
tesseract::LangModel

List of all members.

Public Member Functions

 WordListLangModel (CubeRecoContext *cntxt)
 ~WordListLangModel ()
LangModEdgeRoot ()
LangModEdge ** GetEdges (CharAltList *alt_list, LangModEdge *edge, int *edge_cnt)
bool IsValidSequence (const char_32 *sequence, bool eow_flag, LangModEdge **edges)
bool IsLeadingPunc (char_32 ch)
bool IsTrailingPunc (char_32 ch)
bool IsDigit (char_32 ch)
bool AddString (const char *char_ptr)
bool AddString32 (const char_32 *char_32_ptr)

Static Public Member Functions

static void WordVariants (const CharSet &char_set, const UNICHARSET *uchset, string_32 str32, vector< WERD_CHOICE * > *word_variants)

Detailed Description

Definition at line 39 of file word_list_lang_model.h.


Constructor & Destructor Documentation

tesseract::WordListLangModel::WordListLangModel ( CubeRecoContext cntxt) [explicit]

Definition at line 29 of file word_list_lang_model.cpp.

                                                           {
  cntxt_ = cntxt;
  dawg_ = NULL;
  init_ = false;
}
tesseract::WordListLangModel::~WordListLangModel ( )

Definition at line 35 of file word_list_lang_model.cpp.

                                      {
  Cleanup();
}

Member Function Documentation

bool tesseract::WordListLangModel::AddString ( const char *  char_ptr)

Definition at line 167 of file word_list_lang_model.cpp.

                                                      {
  if (!init_ && !Init()) {  // initialize if necessary
    return false;
  }

  string_32 str32;
  CubeUtils::UTF8ToUTF32(char_ptr, &str32);
  if (str32.length() < 1) {
    return false;
  }
  return AddString32(str32.c_str());
}
bool tesseract::WordListLangModel::AddString32 ( const char_32 char_32_ptr)

Definition at line 181 of file word_list_lang_model.cpp.

                                                              {
  if (char_32_ptr == NULL) {
    return false;
  }
  // get all the word variants
  vector<WERD_CHOICE *> word_variants;
  WordVariants(*(cntxt_->CharacterSet()), cntxt_->TessUnicharset(),
               char_32_ptr, &word_variants);

  if (word_variants.size() > 0) {
    // find the shortest variant
    int shortest_word = 0;
    for (int word = 1; word < word_variants.size(); word++) {
      if (word_variants[shortest_word]->length() >
          word_variants[word]->length()) {
        shortest_word = word;
      }
    }
    // only add the shortest grapheme interpretation of string to the word list
    dawg_->add_word_to_dawg(*word_variants[shortest_word]);
  }
  for (int i = 0; i < word_variants.size(); i++) { delete word_variants[i]; }
  return true;
}
LangModEdge ** tesseract::WordListLangModel::GetEdges ( CharAltList alt_list,
LangModEdge edge,
int *  edge_cnt 
) [virtual]

Implements tesseract::LangModel.

Definition at line 71 of file word_list_lang_model.cpp.

                                                         {
  // initialize if necessary
  if (init_ == false) {
    if (Init() == false) {
      return false;
    }
  }

  (*edge_cnt) = 0;

  EDGE_REF edge_ref;

  TessLangModEdge *tess_lm_edge = reinterpret_cast<TessLangModEdge *>(edge);

  if (tess_lm_edge == NULL) {
    edge_ref = 0;
  } else {
    edge_ref = tess_lm_edge->EndEdge();

    // advance node
    edge_ref = dawg_->next_node(edge_ref);
    if (edge_ref == 0) {
      return 0;
    }
  }

  // allocate memory for edges
  LangModEdge **edge_array = new LangModEdge *[kMaxEdge];
  if (edge_array == NULL) {
    return NULL;
  }

  // now get all the emerging edges
  (*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref,
                                                 edge_array + (*edge_cnt));

  return edge_array;
}
bool tesseract::WordListLangModel::IsDigit ( char_32  ch) [inline, virtual]

Implements tesseract::LangModel.

Definition at line 58 of file word_list_lang_model.h.

{ return false; }  // not yet implemented
bool tesseract::WordListLangModel::IsLeadingPunc ( char_32  ch) [inline, virtual]

Implements tesseract::LangModel.

Definition at line 56 of file word_list_lang_model.h.

{ return false; }  // not yet implemented
bool tesseract::WordListLangModel::IsTrailingPunc ( char_32  ch) [inline, virtual]

Implements tesseract::LangModel.

Definition at line 57 of file word_list_lang_model.h.

{ return false; }  // not yet implemented
bool tesseract::WordListLangModel::IsValidSequence ( const char_32 sequence,
bool  eow_flag,
LangModEdge **  edges 
) [virtual]

Implements tesseract::LangModel.

Definition at line 114 of file word_list_lang_model.cpp.

                                                                            {
  return false;
}
LangModEdge * tesseract::WordListLangModel::Root ( ) [virtual]

Implements tesseract::LangModel.

Definition at line 66 of file word_list_lang_model.cpp.

                                      {
  return NULL;
}
void tesseract::WordListLangModel::WordVariants ( const CharSet char_set,
const UNICHARSET uchset,
string_32  str32,
vector< WERD_CHOICE * > *  word_variants 
) [static]

Definition at line 154 of file word_list_lang_model.cpp.

                                                                           {
  for (int i = 0; i < word_variants->size(); i++) {
    delete (*word_variants)[i];
  }
  word_variants->clear();
  string_32 prefix_str32;
  WERD_CHOICE word_so_far(uchset);
  WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
}

The documentation for this class was generated from the following files: