Tesseract  3.02
tesseract-ocr/api/tesseractmain.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002 * File:        tessedit.cpp  (Formerly tessedit.c)
00003 * Description: Main program for merge of tess and editor.
00004 * Author:                  Ray Smith
00005 * Created:                 Tue Jan 07 15:21:46 GMT 1992
00006 *
00007 * (C) Copyright 1992, Hewlett-Packard Ltd.
00008 ** Licensed under the Apache License, Version 2.0 (the "License");
00009 ** you may not use this file except in compliance with the License.
00010 ** You may obtain a copy of the License at
00011 ** http://www.apache.org/licenses/LICENSE-2.0
00012 ** Unless required by applicable law or agreed to in writing, software
00013 ** distributed under the License is distributed on an "AS IS" BASIS,
00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 ** See the License for the specific language governing permissions and
00016 ** limitations under the License.
00017 *
00018 **********************************************************************/
00019 
00020 // #define USE_VLD //Uncomment for Visual Leak Detector.
00021 #if (defined _MSC_VER && defined USE_VLD)
00022 #include "mfcpch.h"
00023 #include <vld.h>
00024 #endif
00025 
00026 // Include automatically generated configuration file if running autoconf
00027 #ifdef HAVE_CONFIG_H
00028 #include "config_auto.h"
00029 #endif
00030 #ifdef USING_GETTEXT
00031 #include <libintl.h>
00032 #include <locale.h>
00033 #define _(x) gettext(x)
00034 #else
00035 #define _(x) (x)
00036 #endif
00037 
00038 #include "allheaders.h"
00039 #include "baseapi.h"
00040 #include "strngs.h"
00041 #include "tesseractmain.h"
00042 #include "tprintf.h"
00043 
00044 /**********************************************************************
00045  *  main()
00046  *
00047  **********************************************************************/
00048 
00049 int main(int argc, char **argv) {
00050 #ifdef USING_GETTEXT
00051   setlocale (LC_ALL, "");
00052   bindtextdomain (PACKAGE, LOCALEDIR);
00053   textdomain (PACKAGE);
00054 #endif
00055   if ((argc == 2 && strcmp(argv[1], "-v") == 0) ||
00056       (argc == 2 && strcmp(argv[1], "--version") == 0)) {
00057     char *versionStrP;
00058 
00059     fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
00060     
00061     versionStrP = getLeptonicaVersion();
00062     fprintf(stderr, " %s\n", versionStrP);
00063     lept_free(versionStrP);
00064     
00065     versionStrP = getImagelibVersions();
00066     fprintf(stderr, "  %s\n", versionStrP);
00067     lept_free(versionStrP);
00068 
00069     exit(0);
00070   }
00071   // Make the order of args a bit more forgiving than it used to be.
00072   const char* lang = "eng";
00073   const char* image = NULL;
00074   const char* output = NULL;
00075   tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
00076   int arg = 1;
00077   while (arg < argc && (output == NULL || argv[arg][0] == '-')) {
00078     if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) {
00079       lang = argv[arg + 1];
00080       ++arg;
00081     } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) {
00082       pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1]));
00083       ++arg;
00084     } else if (image == NULL) {
00085       image = argv[arg];
00086     } else if (output == NULL) {
00087       output = argv[arg];
00088     }
00089     ++arg;
00090   }
00091   if (output == NULL) {
00092     fprintf(stderr, _("Usage:%s imagename outputbase [-l lang] "
00093                       "[-psm pagesegmode] [configfile...]\n"), argv[0]);
00094     fprintf(stderr,
00095             _("pagesegmode values are:\n"
00096               "0 = Orientation and script detection (OSD) only.\n"
00097               "1 = Automatic page segmentation with OSD.\n"
00098               "2 = Automatic page segmentation, but no OSD, or OCR\n"
00099               "3 = Fully automatic page segmentation, but no OSD. (Default)\n"
00100               "4 = Assume a single column of text of variable sizes.\n"
00101               "5 = Assume a single uniform block of vertically aligned text.\n"
00102               "6 = Assume a single uniform block of text.\n"
00103               "7 = Treat the image as a single text line.\n"
00104               "8 = Treat the image as a single word.\n"
00105               "9 = Treat the image as a single word in a circle.\n"
00106               "10 = Treat the image as a single character.\n"));
00107     fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any"
00108                       "configfile.\n"));
00109     exit(1);
00110   }
00111 
00112   tesseract::TessBaseAPI  api;
00113 
00114   api.SetOutputName(output);
00115 
00116   int rc = api.Init(argv[0], lang, tesseract::OEM_DEFAULT,
00117            &(argv[arg]), argc - arg, NULL, NULL, false);
00118   if (rc) {
00119     fprintf(stderr, "Could not initialize tesseract.\n");
00120     exit(1);
00121   }
00122    
00123   // We have 2 possible sources of pagesegmode: a config file and
00124   // the command line. For backwards compatability reasons, the
00125   // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
00126   // default for this program is tesseract::PSM_AUTO. We will let
00127   // the config file take priority, so the command-line default
00128   // can take priority over the tesseract default, so we use the
00129   // value from the command line only if the retrieved mode
00130   // is still tesseract::PSM_SINGLE_BLOCK, indicating no change
00131   // in any config file. Therefore the only way to force
00132   // tesseract::PSM_SINGLE_BLOCK is from the command line.
00133   // It would be simpler if we could set the value before Init,
00134   // but that doesn't work.
00135   if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
00136     api.SetPageSegMode(pagesegmode);
00137   tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
00138            tesseract::TessBaseAPI::Version());
00139 
00140 
00141   FILE* fin = fopen(image, "rb");
00142   if (fin == NULL) {
00143     printf("Cannot open input file: %s\n", image);
00144     exit(2);
00145   }
00146   fclose(fin);
00147 
00148   PIX   *pixs;
00149   if ((pixs = pixRead(image)) == NULL) {
00150     printf("Unsupported image type.\n");
00151     exit(3);
00152   }
00153   pixDestroy(&pixs);
00154 
00155   STRING text_out;
00156   if (!api.ProcessPages(image, NULL, 0, &text_out)) {
00157     printf("Error during processing.\n");
00158   }
00159   bool output_hocr = false;
00160   api.GetBoolVariable("tessedit_create_hocr", &output_hocr);
00161   bool output_box = false;
00162   api.GetBoolVariable("tessedit_create_boxfile", &output_box);
00163   STRING outfile = output;
00164   outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt";
00165   FILE* fout = fopen(outfile.string(), "wb");
00166   if (fout == NULL) {
00167     printf("Cannot create output file %s\n", outfile.string());
00168     exit(1);
00169   }
00170   fwrite(text_out.string(), 1, text_out.length(), fout);
00171   fclose(fout);
00172 
00173   return 0;                      // Normal exit
00174 }
00175 
00176 #ifdef _WIN32
00177 
00178 char szAppName[] = "Tesseract";   //app name
00179 int initialized = 0;
00180 
00181 /**********************************************************************
00182 * WinMain
00183 *
00184 * Main function for a windows program.
00185 **********************************************************************/
00186 
00187 int WINAPI WinMain(  //main for windows //command line
00188         HINSTANCE hInstance,
00189         HINSTANCE hPrevInstance,
00190         LPSTR lpszCmdLine,
00191         int nCmdShow) {
00192   WNDCLASS wc;
00193   HWND hwnd;
00194   MSG msg;
00195 
00196   char **argv;
00197   char *argsin[2];
00198   int argc;
00199   int exit_code;
00200 
00201   wc.style = CS_NOCLOSE | CS_OWNDC;
00202   wc.lpfnWndProc = (WNDPROC) WndProc;
00203   wc.cbClsExtra = 0;
00204   wc.cbWndExtra = 0;
00205   wc.hInstance = hInstance;
00206   wc.hIcon = NULL;         //LoadIcon (NULL, IDI_APPLICATION);
00207   wc.hCursor = NULL;       //LoadCursor (NULL, IDC_ARROW);
00208   wc.hbrBackground = (HBRUSH) (COLOR_WINDOW + 1);
00209   wc.lpszMenuName = NULL;
00210   wc.lpszClassName = szAppName;
00211 
00212   RegisterClass(&wc);
00213 
00214   hwnd = CreateWindow (szAppName, szAppName,
00215                        WS_OVERLAPPEDWINDOW | WS_DISABLED,
00216                        CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT,
00217                        CW_USEDEFAULT, HWND_DESKTOP, NULL, hInstance, NULL);
00218 
00219   argsin[0] = strdup (szAppName);
00220   argsin[1] = strdup (lpszCmdLine);
00221   /*allocate memory for the args. There can never be more than half*/
00222   /*the total number of characters in the arguments.*/
00223   argv = (char **)malloc(((strlen(argsin[0]) + strlen(argsin[1])) / 2 + 1) *
00224                          sizeof(char *));
00225 
00226   /*now construct argv as it should be for C.*/
00227   argc = parse_args (2, argsin, argv);
00228 
00229   //  ShowWindow (hwnd, nCmdShow);
00230   //  UpdateWindow (hwnd);
00231 
00232   if (initialized) {
00233     exit_code = main (argc, argv);
00234     free (argsin[0]);
00235     free (argsin[1]);
00236     free(argv);
00237     return exit_code;
00238   }
00239   while (GetMessage (&msg, NULL, 0, 0)) {
00240     TranslateMessage(&msg);
00241     DispatchMessage(&msg);
00242     if (initialized) {
00243       exit_code = main (argc, argv);
00244       break;
00245     }
00246     else
00247       exit_code = msg.wParam;
00248   }
00249   free (argsin[0]);
00250   free (argsin[1]);
00251   free(argv);
00252   return exit_code;
00253 }
00254 
00255 
00256 /**********************************************************************
00257 * WndProc
00258 *
00259 * Function to respond to messages.
00260 **********************************************************************/
00261 
00262 LONG WINAPI WndProc(            //message handler
00263         HWND hwnd,              //window with message
00264         UINT msg,               //message typ
00265         WPARAM wParam,
00266         LPARAM lParam) {
00267   HDC hdc;
00268 
00269   if (msg == WM_CREATE) {
00270     //
00271     // Create a rendering context.
00272     //
00273     hdc = GetDC (hwnd);
00274     ReleaseDC(hwnd, hdc);
00275     initialized = 1;
00276     return 0;
00277   }
00278   return DefWindowProc (hwnd, msg, wParam, lParam);
00279 }
00280 
00281 
00282 /**********************************************************************
00283 * parse_args
00284 *
00285 * Turn a list of args into a new list of args with each separate
00286 * whitespace spaced string being an arg.
00287 **********************************************************************/
00288 
00289 int
00290 parse_args (                     /*refine arg list */
00291         int argc,                /*no of input args */
00292         char *argv[],            /*input args */
00293         char *arglist[]          /*output args */
00294         ) {
00295   int argcount;            /*converted argc */
00296   char *testchar;          /*char in option string */
00297   int arg;                 /*current argument */
00298 
00299   argcount = 0;            /*no of options */
00300   for (arg = 0; arg < argc; arg++) {
00301     testchar = argv[arg]; /*start of arg */
00302     do {
00303       while (*testchar
00304              && (*testchar == ' ' || *testchar == '\n'
00305                  || *testchar == '\t'))
00306         testchar++; /*skip white space */
00307       if (*testchar) {
00308         /*new arg */
00309         arglist[argcount++] = testchar;
00310         /*skip to white space */
00311         for (testchar++; *testchar && *testchar != ' ' && *testchar != '\n' && *testchar != '\t'; testchar++) ;
00312         if (*testchar)
00313           *testchar++ = '\0'; /*turn to separate args */
00314       }
00315     }
00316     while (*testchar);
00317   }
00318   return argcount;         /*new number of args */
00319 }
00320 #endif