Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00022
00023
00024 #include <stdio.h>
00025
00026 #include "baseapi.h"
00027 #include "helpers.h"
00028 #include "strngs.h"
00029 #include "dict.h"
00030 #include "tesseractclass.h"
00031
00032 int main(int argc, char** argv) {
00033
00034
00035 if (argc != 4 && (argc != 6 || strcmp(argv[1], "-l") != 0)) {
00036 printf("Usage: %s [-l lang] tessdata_dir wordlist_file"
00037 " output_ambiguious_wordlist_file\n", argv[0]);
00038 return 1;
00039 }
00040 int argv_offset = 0;
00041 STRING lang;
00042 if (argc == 6) {
00043 lang = argv[2];
00044 argv_offset = 2;
00045 } else {
00046 lang = "eng";
00047 }
00048 const char *tessdata_dir = argv[++argv_offset];
00049 const char *input_file_str = argv[++argv_offset];
00050 const char *output_file_str = argv[++argv_offset];
00051
00052
00053 tesseract::TessBaseAPI api;
00054 GenericVector<STRING> vars_vec;
00055 GenericVector<STRING> vars_values;
00056 vars_vec.push_back("output_ambig_words_file");
00057 vars_values.push_back(output_file_str);
00058 api.Init(tessdata_dir, lang.string(), tesseract::OEM_TESSERACT_ONLY,
00059 NULL, NULL, &vars_vec, &vars_values, false);
00060 tesseract::Dict &dict = api.tesseract()->getDict();
00061 FILE *input_file = fopen(input_file_str, "rb");
00062 if (input_file == NULL) {
00063 tprintf("Failed to open input wordlist file %s\n", input_file_str);
00064 exit(1);
00065 }
00066 char str[CHARS_PER_LINE];
00067
00068
00069
00070 while (fgets(str, CHARS_PER_LINE, input_file) != NULL) {
00071 chomp_string(str);
00072 WERD_CHOICE word(str, dict.getUnicharset());
00073 dict.NoDangerousAmbig(&word, NULL, false, NULL, NULL);
00074 }
00075
00076 fclose(input_file);
00077 }