Tesseract
3.02
|
#include <word_list_lang_model.h>
Public Member Functions | |
WordListLangModel (CubeRecoContext *cntxt) | |
~WordListLangModel () | |
LangModEdge * | Root () |
LangModEdge ** | GetEdges (CharAltList *alt_list, LangModEdge *edge, int *edge_cnt) |
bool | IsValidSequence (const char_32 *sequence, bool eow_flag, LangModEdge **edges) |
bool | IsLeadingPunc (char_32 ch) |
bool | IsTrailingPunc (char_32 ch) |
bool | IsDigit (char_32 ch) |
bool | AddString (const char *char_ptr) |
bool | AddString32 (const char_32 *char_32_ptr) |
Static Public Member Functions | |
static void | WordVariants (const CharSet &char_set, const UNICHARSET *uchset, string_32 str32, vector< WERD_CHOICE * > *word_variants) |
Definition at line 39 of file word_list_lang_model.h.
tesseract::WordListLangModel::WordListLangModel | ( | CubeRecoContext * | cntxt | ) | [explicit] |
Definition at line 29 of file word_list_lang_model.cpp.
{ cntxt_ = cntxt; dawg_ = NULL; init_ = false; }
tesseract::WordListLangModel::~WordListLangModel | ( | ) |
Definition at line 35 of file word_list_lang_model.cpp.
{ Cleanup(); }
bool tesseract::WordListLangModel::AddString | ( | const char * | char_ptr | ) |
Definition at line 167 of file word_list_lang_model.cpp.
{ if (!init_ && !Init()) { // initialize if necessary return false; } string_32 str32; CubeUtils::UTF8ToUTF32(char_ptr, &str32); if (str32.length() < 1) { return false; } return AddString32(str32.c_str()); }
bool tesseract::WordListLangModel::AddString32 | ( | const char_32 * | char_32_ptr | ) |
Definition at line 181 of file word_list_lang_model.cpp.
{ if (char_32_ptr == NULL) { return false; } // get all the word variants vector<WERD_CHOICE *> word_variants; WordVariants(*(cntxt_->CharacterSet()), cntxt_->TessUnicharset(), char_32_ptr, &word_variants); if (word_variants.size() > 0) { // find the shortest variant int shortest_word = 0; for (int word = 1; word < word_variants.size(); word++) { if (word_variants[shortest_word]->length() > word_variants[word]->length()) { shortest_word = word; } } // only add the shortest grapheme interpretation of string to the word list dawg_->add_word_to_dawg(*word_variants[shortest_word]); } for (int i = 0; i < word_variants.size(); i++) { delete word_variants[i]; } return true; }
LangModEdge ** tesseract::WordListLangModel::GetEdges | ( | CharAltList * | alt_list, |
LangModEdge * | edge, | ||
int * | edge_cnt | ||
) | [virtual] |
Implements tesseract::LangModel.
Definition at line 71 of file word_list_lang_model.cpp.
{ // initialize if necessary if (init_ == false) { if (Init() == false) { return false; } } (*edge_cnt) = 0; EDGE_REF edge_ref; TessLangModEdge *tess_lm_edge = reinterpret_cast<TessLangModEdge *>(edge); if (tess_lm_edge == NULL) { edge_ref = 0; } else { edge_ref = tess_lm_edge->EndEdge(); // advance node edge_ref = dawg_->next_node(edge_ref); if (edge_ref == 0) { return 0; } } // allocate memory for edges LangModEdge **edge_array = new LangModEdge *[kMaxEdge]; if (edge_array == NULL) { return NULL; } // now get all the emerging edges (*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref, edge_array + (*edge_cnt)); return edge_array; }
bool tesseract::WordListLangModel::IsDigit | ( | char_32 | ch | ) | [inline, virtual] |
Implements tesseract::LangModel.
Definition at line 58 of file word_list_lang_model.h.
{ return false; } // not yet implemented
bool tesseract::WordListLangModel::IsLeadingPunc | ( | char_32 | ch | ) | [inline, virtual] |
Implements tesseract::LangModel.
Definition at line 56 of file word_list_lang_model.h.
{ return false; } // not yet implemented
bool tesseract::WordListLangModel::IsTrailingPunc | ( | char_32 | ch | ) | [inline, virtual] |
Implements tesseract::LangModel.
Definition at line 57 of file word_list_lang_model.h.
{ return false; } // not yet implemented
bool tesseract::WordListLangModel::IsValidSequence | ( | const char_32 * | sequence, |
bool | eow_flag, | ||
LangModEdge ** | edges | ||
) | [virtual] |
Implements tesseract::LangModel.
Definition at line 114 of file word_list_lang_model.cpp.
{ return false; }
LangModEdge * tesseract::WordListLangModel::Root | ( | ) | [virtual] |
Implements tesseract::LangModel.
Definition at line 66 of file word_list_lang_model.cpp.
{ return NULL; }
void tesseract::WordListLangModel::WordVariants | ( | const CharSet & | char_set, |
const UNICHARSET * | uchset, | ||
string_32 | str32, | ||
vector< WERD_CHOICE * > * | word_variants | ||
) | [static] |
Definition at line 154 of file word_list_lang_model.cpp.
{ for (int i = 0; i < word_variants->size(); i++) { delete (*word_variants)[i]; } word_variants->clear(); string_32 prefix_str32; WERD_CHOICE word_so_far(uchset); WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants); }