|
Tesseract
3.02
|
#include <word_list_lang_model.h>
Public Member Functions | |
| WordListLangModel (CubeRecoContext *cntxt) | |
| ~WordListLangModel () | |
| LangModEdge * | Root () |
| LangModEdge ** | GetEdges (CharAltList *alt_list, LangModEdge *edge, int *edge_cnt) |
| bool | IsValidSequence (const char_32 *sequence, bool eow_flag, LangModEdge **edges) |
| bool | IsLeadingPunc (char_32 ch) |
| bool | IsTrailingPunc (char_32 ch) |
| bool | IsDigit (char_32 ch) |
| bool | AddString (const char *char_ptr) |
| bool | AddString32 (const char_32 *char_32_ptr) |
Static Public Member Functions | |
| static void | WordVariants (const CharSet &char_set, const UNICHARSET *uchset, string_32 str32, vector< WERD_CHOICE * > *word_variants) |
Definition at line 39 of file word_list_lang_model.h.
| tesseract::WordListLangModel::WordListLangModel | ( | CubeRecoContext * | cntxt | ) | [explicit] |
Definition at line 29 of file word_list_lang_model.cpp.
{
cntxt_ = cntxt;
dawg_ = NULL;
init_ = false;
}
| tesseract::WordListLangModel::~WordListLangModel | ( | ) |
Definition at line 35 of file word_list_lang_model.cpp.
{
Cleanup();
}
| bool tesseract::WordListLangModel::AddString | ( | const char * | char_ptr | ) |
Definition at line 167 of file word_list_lang_model.cpp.
{
if (!init_ && !Init()) { // initialize if necessary
return false;
}
string_32 str32;
CubeUtils::UTF8ToUTF32(char_ptr, &str32);
if (str32.length() < 1) {
return false;
}
return AddString32(str32.c_str());
}
| bool tesseract::WordListLangModel::AddString32 | ( | const char_32 * | char_32_ptr | ) |
Definition at line 181 of file word_list_lang_model.cpp.
{
if (char_32_ptr == NULL) {
return false;
}
// get all the word variants
vector<WERD_CHOICE *> word_variants;
WordVariants(*(cntxt_->CharacterSet()), cntxt_->TessUnicharset(),
char_32_ptr, &word_variants);
if (word_variants.size() > 0) {
// find the shortest variant
int shortest_word = 0;
for (int word = 1; word < word_variants.size(); word++) {
if (word_variants[shortest_word]->length() >
word_variants[word]->length()) {
shortest_word = word;
}
}
// only add the shortest grapheme interpretation of string to the word list
dawg_->add_word_to_dawg(*word_variants[shortest_word]);
}
for (int i = 0; i < word_variants.size(); i++) { delete word_variants[i]; }
return true;
}
| LangModEdge ** tesseract::WordListLangModel::GetEdges | ( | CharAltList * | alt_list, |
| LangModEdge * | edge, | ||
| int * | edge_cnt | ||
| ) | [virtual] |
Implements tesseract::LangModel.
Definition at line 71 of file word_list_lang_model.cpp.
{
// initialize if necessary
if (init_ == false) {
if (Init() == false) {
return false;
}
}
(*edge_cnt) = 0;
EDGE_REF edge_ref;
TessLangModEdge *tess_lm_edge = reinterpret_cast<TessLangModEdge *>(edge);
if (tess_lm_edge == NULL) {
edge_ref = 0;
} else {
edge_ref = tess_lm_edge->EndEdge();
// advance node
edge_ref = dawg_->next_node(edge_ref);
if (edge_ref == 0) {
return 0;
}
}
// allocate memory for edges
LangModEdge **edge_array = new LangModEdge *[kMaxEdge];
if (edge_array == NULL) {
return NULL;
}
// now get all the emerging edges
(*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref,
edge_array + (*edge_cnt));
return edge_array;
}
| bool tesseract::WordListLangModel::IsDigit | ( | char_32 | ch | ) | [inline, virtual] |
Implements tesseract::LangModel.
Definition at line 58 of file word_list_lang_model.h.
{ return false; } // not yet implemented
| bool tesseract::WordListLangModel::IsLeadingPunc | ( | char_32 | ch | ) | [inline, virtual] |
Implements tesseract::LangModel.
Definition at line 56 of file word_list_lang_model.h.
{ return false; } // not yet implemented
| bool tesseract::WordListLangModel::IsTrailingPunc | ( | char_32 | ch | ) | [inline, virtual] |
Implements tesseract::LangModel.
Definition at line 57 of file word_list_lang_model.h.
{ return false; } // not yet implemented
| bool tesseract::WordListLangModel::IsValidSequence | ( | const char_32 * | sequence, |
| bool | eow_flag, | ||
| LangModEdge ** | edges | ||
| ) | [virtual] |
Implements tesseract::LangModel.
Definition at line 114 of file word_list_lang_model.cpp.
{
return false;
}
| LangModEdge * tesseract::WordListLangModel::Root | ( | ) | [virtual] |
Implements tesseract::LangModel.
Definition at line 66 of file word_list_lang_model.cpp.
{
return NULL;
}
| void tesseract::WordListLangModel::WordVariants | ( | const CharSet & | char_set, |
| const UNICHARSET * | uchset, | ||
| string_32 | str32, | ||
| vector< WERD_CHOICE * > * | word_variants | ||
| ) | [static] |
Definition at line 154 of file word_list_lang_model.cpp.
{
for (int i = 0; i < word_variants->size(); i++) {
delete (*word_variants)[i];
}
word_variants->clear();
string_32 prefix_str32;
WERD_CHOICE word_so_far(uchset);
WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
}