Tesseract  3.02
tesseract-ocr/training/classifier_tester.cpp File Reference
#include "base/commandlineflags.h"
#include "baseapi.h"
#include "commontraining.h"
#include "cubeclassifier.h"
#include "mastertrainer.h"
#include "params.h"
#include "strngs.h"
#include "tessclassifier.h"

Go to the source code of this file.

Enumerations

enum  ClassifierName {
  CN_PRUNER, CN_FULL, CN_CUBE, CN_CUBETESS,
  CN_COUNT
}

Functions

 STRING_PARAM_FLAG (classifier,"","Classifier to test")
 STRING_PARAM_FLAG (lang,"eng","Language to test")
 STRING_PARAM_FLAG (tessdata_dir,"","Directory of traineddata files")
int main (int argc, char **argv)

Variables

const char * names [] = {"pruner", "full", "cube", "cubetess", NULL }

Enumeration Type Documentation

Enumerator:
CN_PRUNER 
CN_FULL 
CN_CUBE 
CN_CUBETESS 
CN_COUNT 

Definition at line 34 of file classifier_tester.cpp.


Function Documentation

int main ( int  argc,
char **  argv 
)

---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------

Definition at line 65 of file classifier_tester.cpp.

                                {
  ParseArguments(&argc, &argv);
  // Decode the classifier string.
  ClassifierName classifier = CN_COUNT;
  for (int c = 0; c < CN_COUNT; ++c) {
    if (strcmp(FLAGS_classifier.c_str(), names[c]) == 0) {
      classifier = static_cast<ClassifierName>(c);
      break;
    }
  }
  if (classifier == CN_COUNT) {
    fprintf(stderr, "Invalid classifier name:%s\n", FLAGS_classifier.c_str());
    return 1;
  }

  STRING file_prefix;
  tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
      argc, argv, true, NULL, &file_prefix);
  // We want to test junk as well if it is available.
  trainer->IncludeJunk();
  // We want to test with replicated samples too.
  trainer->ReplicateAndRandomizeSamplesIfRequired();

  // We need to initialize tesseract to test.
  tesseract::TessBaseAPI api;
  tesseract::OcrEngineMode engine_mode = tesseract::OEM_TESSERACT_ONLY;
  if (classifier == CN_CUBE || classifier == CN_CUBETESS)
    engine_mode = tesseract::OEM_TESSERACT_CUBE_COMBINED;
  if (api.Init(FLAGS_tessdata_dir.c_str(), FLAGS_lang.c_str(),
               engine_mode) < 0) {
    fprintf(stderr, "Tesseract initialization failed!\n");
    return 1;
  }
  tesseract::ShapeClassifier* shape_classifier = NULL;
  tesseract::Tesseract* tesseract =
      const_cast<tesseract::Tesseract*>(api.tesseract());
  tesseract::Classify* classify =
      reinterpret_cast<tesseract::Classify*>(tesseract);
  // Copy the shape_table from the classifier and add the space character if
  // not already present to count junk.
  tesseract::ShapeTable shape_table;
  shape_table.set_unicharset(classify->shape_table()->unicharset());
  shape_table.AppendMasterShapes(*classify->shape_table());
  if (shape_table.FindShape(0, -1) < 0)
    shape_table.AddShape(0, 0);
  if (classifier == CN_PRUNER) {
    shape_classifier = new tesseract::TessClassifier(true, classify);
  } else if (classifier == CN_FULL) {
    shape_classifier = new tesseract::TessClassifier(false, classify);
  } else if (classifier == CN_CUBE) {
    shape_classifier = new tesseract::CubeClassifier(tesseract);
  } else if (classifier == CN_CUBETESS) {
    shape_classifier = new tesseract::CubeTessClassifier(tesseract);
  } else {
    fprintf(stderr, "%s tester not yet implemented\n",
            FLAGS_classifier.c_str());
    return 1;
  }
  tprintf("Testing classifier %s:\n", FLAGS_classifier.c_str());
  trainer->TestClassifierOnSamples(3, false, shape_classifier, NULL);
  if (classifier != CN_CUBE && classifier != CN_CUBETESS) {
    // Test with replicated samples as well.
    trainer->TestClassifierOnSamples(3, true, shape_classifier, NULL);
  }
  delete shape_classifier;
  delete trainer;

  return 0;
} /* main */
STRING_PARAM_FLAG ( classifier  ,
""  ,
"Classifier to test"   
)
STRING_PARAM_FLAG ( lang  ,
"eng"  ,
"Language to test"   
)
STRING_PARAM_FLAG ( tessdata_dir  ,
""  ,
"Directory of traineddata files"   
)

Variable Documentation

const char* names[] = {"pruner", "full", "cube", "cubetess", NULL }

Definition at line 42 of file classifier_tester.cpp.