|
Tesseract
3.02
|
#include "allheaders.h"#include "baseapi.h"#include "strngs.h"#include "tesseractmain.h"#include "tprintf.h"Go to the source code of this file.
Defines | |
| #define | _(x) (x) |
Functions | |
| int | main (int argc, char **argv) |
| #define _ | ( | x | ) | (x) |
Definition at line 35 of file tesseractmain.cpp.
| int main | ( | int | argc, |
| char ** | argv | ||
| ) |
---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------
Definition at line 49 of file tesseractmain.cpp.
{
#ifdef USING_GETTEXT
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
#endif
if ((argc == 2 && strcmp(argv[1], "-v") == 0) ||
(argc == 2 && strcmp(argv[1], "--version") == 0)) {
char *versionStrP;
fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
versionStrP = getLeptonicaVersion();
fprintf(stderr, " %s\n", versionStrP);
lept_free(versionStrP);
versionStrP = getImagelibVersions();
fprintf(stderr, " %s\n", versionStrP);
lept_free(versionStrP);
exit(0);
}
// Make the order of args a bit more forgiving than it used to be.
const char* lang = "eng";
const char* image = NULL;
const char* output = NULL;
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
int arg = 1;
while (arg < argc && (output == NULL || argv[arg][0] == '-')) {
if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) {
lang = argv[arg + 1];
++arg;
} else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) {
pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1]));
++arg;
} else if (image == NULL) {
image = argv[arg];
} else if (output == NULL) {
output = argv[arg];
}
++arg;
}
if (output == NULL) {
fprintf(stderr, _("Usage:%s imagename outputbase [-l lang] "
"[-psm pagesegmode] [configfile...]\n"), argv[0]);
fprintf(stderr,
_("pagesegmode values are:\n"
"0 = Orientation and script detection (OSD) only.\n"
"1 = Automatic page segmentation with OSD.\n"
"2 = Automatic page segmentation, but no OSD, or OCR\n"
"3 = Fully automatic page segmentation, but no OSD. (Default)\n"
"4 = Assume a single column of text of variable sizes.\n"
"5 = Assume a single uniform block of vertically aligned text.\n"
"6 = Assume a single uniform block of text.\n"
"7 = Treat the image as a single text line.\n"
"8 = Treat the image as a single word.\n"
"9 = Treat the image as a single word in a circle.\n"
"10 = Treat the image as a single character.\n"));
fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any"
"configfile.\n"));
exit(1);
}
tesseract::TessBaseAPI api;
api.SetOutputName(output);
int rc = api.Init(argv[0], lang, tesseract::OEM_DEFAULT,
&(argv[arg]), argc - arg, NULL, NULL, false);
if (rc) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
// We have 2 possible sources of pagesegmode: a config file and
// the command line. For backwards compatability reasons, the
// default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
// default for this program is tesseract::PSM_AUTO. We will let
// the config file take priority, so the command-line default
// can take priority over the tesseract default, so we use the
// value from the command line only if the retrieved mode
// is still tesseract::PSM_SINGLE_BLOCK, indicating no change
// in any config file. Therefore the only way to force
// tesseract::PSM_SINGLE_BLOCK is from the command line.
// It would be simpler if we could set the value before Init,
// but that doesn't work.
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
api.SetPageSegMode(pagesegmode);
tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
tesseract::TessBaseAPI::Version());
FILE* fin = fopen(image, "rb");
if (fin == NULL) {
printf("Cannot open input file: %s\n", image);
exit(2);
}
fclose(fin);
PIX *pixs;
if ((pixs = pixRead(image)) == NULL) {
printf("Unsupported image type.\n");
exit(3);
}
pixDestroy(&pixs);
STRING text_out;
if (!api.ProcessPages(image, NULL, 0, &text_out)) {
printf("Error during processing.\n");
}
bool output_hocr = false;
api.GetBoolVariable("tessedit_create_hocr", &output_hocr);
bool output_box = false;
api.GetBoolVariable("tessedit_create_boxfile", &output_box);
STRING outfile = output;
outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt";
FILE* fout = fopen(outfile.string(), "wb");
if (fout == NULL) {
printf("Cannot create output file %s\n", outfile.string());
exit(1);
}
fwrite(text_out.string(), 1, text_out.length(), fout);
fclose(fout);
return 0; // Normal exit
}