Tesseract  3.02
tesseract-ocr/training/dawg2wordlist.cpp File Reference
#include "dawg.h"
#include "host.h"
#include "tesscallback.h"
#include "trie.h"
#include "unicharset.h"

Go to the source code of this file.

Classes

class  WordOutputter

Functions

tesseract::DawgLoadSquishedDawg (const UNICHARSET &unicharset, const char *filename)
int WriteDawgAsWordlist (const UNICHARSET &unicharset, const tesseract::Dawg *dawg, const char *outfile_name)
int main (int argc, char *argv[])

Variables

const int kDictDebugLevel = 1

Function Documentation

tesseract::Dawg* LoadSquishedDawg ( const UNICHARSET unicharset,
const char *  filename 
)

Definition at line 28 of file dawg2wordlist.cpp.

                                                        {
  const int kDictDebugLevel = 1;
  FILE *dawg_file = fopen(filename, "rb");
  if (dawg_file == NULL) {
    tprintf("Could not open %s for reading.\n", filename);
    return NULL;
  }
  tprintf("Loading word list from %s\n", filename);
  tesseract::Dawg *retval = new tesseract::SquishedDawg(
      dawg_file, tesseract::DAWG_TYPE_WORD, "eng", SYSTEM_DAWG_PERM,
      kDictDebugLevel);
  tprintf("Word list loaded.\n");
  fclose(dawg_file);
  return retval;
}
int main ( int  argc,
char *  argv[] 
)

Definition at line 70 of file dawg2wordlist.cpp.

                                 {
  if (argc != 4) {
    tprintf("Print all the words in a given dawg.\n");
    tprintf("Usage: %s <unicharset> <dawgfile> <wordlistfile>\n",
            argv[0]);
    return 1;
  }
  const char *unicharset_file = argv[1];
  const char *dawg_file = argv[2];
  const char *wordlist_file = argv[3];
  UNICHARSET unicharset;
  if (!unicharset.load_from_file(unicharset_file)) {
    tprintf("Error loading unicharset from %s.\n", unicharset_file);
    return 1;
  }
  tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file);
  if (dict == NULL) {
    tprintf("Error loading dictionary from %s.\n", dawg_file);
    return 1;
  }
  int retval = WriteDawgAsWordlist(unicharset, dict, wordlist_file);
  delete dict;
  return retval;
}
int WriteDawgAsWordlist ( const UNICHARSET unicharset,
const tesseract::Dawg dawg,
const char *  outfile_name 
)

Definition at line 54 of file dawg2wordlist.cpp.

                                                  {
  FILE *out = fopen(outfile_name, "wb");
  if (out == NULL) {
    tprintf("Could not open %s for writing.\n", outfile_name);
    return 1;
  }
  WordOutputter outputter(out);
  TessCallback1<const char *> *print_word_cb =
      NewPermanentTessCallback(&outputter, &WordOutputter::output_word);
  dawg->iterate_words(unicharset, print_word_cb);
  delete print_word_cb;
  return fclose(out);
}

Variable Documentation

const int kDictDebugLevel = 1

Definition at line 26 of file dawg2wordlist.cpp.