Tesseract  3.02
tesseract-ocr/training/combine_tessdata.cpp File Reference
#include "tessdatamanager.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)

Function Documentation

int main ( int  argc,
char **  argv 
)

---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------

Definition at line 66 of file combine_tessdata.cpp.

                                {
  int i;
  if (argc == 2) {
    printf("Combining tessdata files\n");
    STRING output_file = argv[1];
    output_file += kTrainedDataSuffix;
    if (!tesseract::TessdataManager::CombineDataFiles(
        argv[1], output_file.string())) {
      char* last = &argv[1][strlen(argv[1])-1];
      printf("Error combining tessdata files into %s\n",
             output_file.string());
      if (*last != '.')
        printf("Hint: the prefix is missing a period (.)\n");
    }
  } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
                           strcmp(argv[1], "-u") == 0)) {
    // Initialize TessdataManager with the data in the given traineddata file.
    tesseract::TessdataManager tm;
    tm.Init(argv[2], 0);
    printf("Extracting tessdata components from %s\n", argv[2]);
    if (strcmp(argv[1], "-e") == 0) {
      for (i = 3; i < argc; ++i) {
        if (tm.ExtractToFile(argv[i])) {
          printf("Wrote %s\n", argv[i]);
        } else {
          printf("Not extracting %s, since this component"
                 " is not present\n", argv[i]);
        }
      }
    } else {  // extract all the components
      for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) {
        STRING filename = argv[3];
        filename += tesseract::kTessdataFileSuffixes[i];
        if (tm.ExtractToFile(filename.string())) {
          printf("Wrote %s\n", filename.string());
        }
      }
    }
    tm.End();
  } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) {
    // Rename the current traineddata file to a temporary name.
    const char *new_traineddata_filename = argv[2];
    STRING traineddata_filename = new_traineddata_filename;
    traineddata_filename += ".__tmp__";
    if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) {
      tprintf("Failed to create a temporary file %s\n",
              traineddata_filename.string());
      exit(1);
    }

    // Initialize TessdataManager with the data in the given traineddata file.
    tesseract::TessdataManager tm;
    tm.Init(traineddata_filename.string(), 0);

    // Write the updated traineddata file.
    tm.OverwriteComponents(new_traineddata_filename, argv+3, argc-3);
    tm.End();
  } else {
    printf("Usage for combining tessdata components:\n"
           "%s language_data_path_prefix (e.g. tessdata/eng.)\n", argv[0]);
    printf("Usage for extracting tessdata components:\n"
           "%s -e traineddata_file [output_component_file...]\n", argv[0]);
    printf("Usage for overwriting tessdata components:\n"
           "%s -o traineddata_file [input_component_file...]\n", argv[0]);
    printf("Usage for unpacking all tessdata components:\n"
           "%s -u traineddata_file output_path_prefix"
           " (e.g. /tmp/eng.)\n", argv[0]);
    return 1;
  }
}