Tesseract  3.02
tesseract-ocr/training/ambiguous_words.cpp File Reference
#include <stdio.h>
#include "baseapi.h"
#include "helpers.h"
#include "strngs.h"
#include "dict.h"
#include "tesseractclass.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)

Function Documentation

int main ( int  argc,
char **  argv 
)

---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------

Definition at line 32 of file ambiguous_words.cpp.

                                {

  // Parse input arguments.
  if (argc != 4 && (argc != 6 || strcmp(argv[1], "-l") != 0)) {
    printf("Usage: %s [-l lang] tessdata_dir wordlist_file"
           " output_ambiguious_wordlist_file\n", argv[0]);
    return 1;
  }
  int argv_offset = 0;
  STRING lang;
  if (argc == 6) {
    lang = argv[2];
    argv_offset = 2;
  } else {
    lang = "eng";
  }
  const char *tessdata_dir = argv[++argv_offset];
  const char *input_file_str = argv[++argv_offset];
  const char *output_file_str = argv[++argv_offset];

  // Initialize Tesseract.
  tesseract::TessBaseAPI api;
  GenericVector<STRING> vars_vec;
  GenericVector<STRING> vars_values;
  vars_vec.push_back("output_ambig_words_file");
  vars_values.push_back(output_file_str);
  api.Init(tessdata_dir, lang.string(), tesseract::OEM_TESSERACT_ONLY,
           NULL, NULL, &vars_vec, &vars_values, false);
  tesseract::Dict &dict = api.tesseract()->getDict();
  FILE *input_file = fopen(input_file_str, "rb");
  if (input_file == NULL) {
    tprintf("Failed to open input wordlist file %s\n", input_file_str);
    exit(1);
  }
  char str[CHARS_PER_LINE];

  // Read word list and call Dict::NoDangerousAmbig() for each word
  // to record ambiguities in the output file.
  while (fgets(str, CHARS_PER_LINE, input_file) != NULL) {
    chomp_string(str);  // remove newline
    WERD_CHOICE word(str, dict.getUnicharset());
    dict.NoDangerousAmbig(&word, NULL, false, NULL, NULL);
  }
  // Clean up.
  fclose(input_file);
}