Tesseract  3.02
tesseract-ocr/training/shapeclustering.cpp
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 
00014 //  Filename: shapeclustering.cpp
00015 //  Purpose:  Generates a master shape table to merge similarly-shaped
00016 //            training data of whole, partial or multiple characters.
00017 //  Author:   Ray Smith
00018 
00019 #ifndef USE_STD_NAMESPACE
00020 #include "base/commandlineflags.h"
00021 #endif
00022 #include "commontraining.h"
00023 #include "mastertrainer.h"
00024 #include "params.h"
00025 #include "strngs.h"
00026 
00027 INT_PARAM_FLAG(display_cloud_font, -1,
00028                "Display cloud of this font, canonical_class1");
00029 INT_PARAM_FLAG(display_canonical_font, -1,
00030                "Display canonical sample of this font, canonical_class2");
00031 STRING_PARAM_FLAG(canonical_class1, "", "Class to show ambigs for");
00032 STRING_PARAM_FLAG(canonical_class2, "", "Class to show ambigs for");
00033 
00034 // Loads training data, if requested displays debug information, otherwise
00035 // creates the master shape table by shape clustering and writes it to a file.
00036 // If FLAGS_display_cloud_font is set, then the cloud features of
00037 // FLAGS_canonical_class1/FLAGS_display_cloud_font are shown in green ON TOP
00038 // OF the red canonical features of FLAGS_canonical_class2/
00039 // FLAGS_display_canonical_font, so as to show which canonical features are
00040 // NOT in the cloud.
00041 // Otherwise, if FLAGS_canonical_class1 is set, prints a table of font-wise
00042 // cluster distances between FLAGS_canonical_class1 and FLAGS_canonical_class2.
00043 int main(int argc, char **argv) {
00044   ParseArguments(&argc, &argv);
00045 
00046   STRING file_prefix;
00047   tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
00048       argc, argv, false, NULL, &file_prefix);
00049 
00050   if (FLAGS_display_cloud_font >= 0) {
00051         #ifndef GRAPHICS_DISABLED 
00052     trainer->DisplaySamples(FLAGS_canonical_class1.c_str(),
00053                             FLAGS_display_cloud_font,
00054                             FLAGS_canonical_class2.c_str(),
00055                             FLAGS_display_canonical_font);
00056     #endif  // GRAPHICS_DISABLED
00057     return 0;
00058   } else if (!FLAGS_canonical_class1.empty()) {
00059     trainer->DebugCanonical(FLAGS_canonical_class1.c_str(),
00060                             FLAGS_canonical_class2.c_str());
00061     return 0;
00062   }
00063   trainer->SetupMasterShapes();
00064   WriteShapeTable(file_prefix, trainer->master_shapes());
00065   delete trainer;
00066 
00067   return 0;
00068 } /* main */
00069