Tesseract  3.02
tesseract-ocr/ccmain/osdetect.h
Go to the documentation of this file.
00001 
00002 // File:        osdetect.h
00003 // Description: Orientation and script detection.
00004 // Author:      Samuel Charron
00005 //              Ranjith Unnikrishnan
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CCMAIN_OSDETECT_H__
00021 #define TESSERACT_CCMAIN_OSDETECT_H__
00022 
00023 #include "strngs.h"
00024 #include "unicharset.h"
00025 
00026 class TO_BLOCK_LIST;
00027 class BLOBNBOX;
00028 class BLOB_CHOICE_LIST;
00029 class BLOBNBOX_CLIST;
00030 
00031 namespace tesseract {
00032 class Tesseract;
00033 }
00034 
00035 // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
00036 const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
00037 
00038 struct OSBestResult {
00039   OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0),
00040                    oconfidence(0.0) {}
00041   int orientation_id;
00042   int script_id;
00043   float sconfidence;
00044   float oconfidence;
00045 };
00046 
00047 struct OSResults {
00048   OSResults() : unicharset(NULL) {
00049     for (int i = 0; i < 4; ++i) {
00050       for (int j = 0; j < kMaxNumberOfScripts; ++j)
00051         scripts_na[i][j] = 0;
00052       orientations[i] = 0;
00053     }
00054   }
00055   void update_best_orientation();
00056   // Set the estimate of the orientation to the given id.
00057   void set_best_orientation(int orientation_id);
00058   // Update/Compute the best estimate of the script assuming the given
00059   // orientation id.
00060   void update_best_script(int orientation_id);
00061   // Return the index of the script with the highest score for this orientation.
00062   int get_best_script(int orientation_id) const;
00063   // Accumulate scores with given OSResults instance and update the best script.
00064   void accumulate(const OSResults& osr);
00065 
00066   // Print statistics.
00067   void print_scores(void) const;
00068   void print_scores(int orientation_id) const;
00069 
00070   // Array holding scores for each orientation id [0,3].
00071   // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
00072   // page respectively, where the values refer to the amount of clockwise
00073   // rotation to be applied to the page for the text to be upright and readable.
00074   float orientations[4];
00075   // Script confidence scores for each of 4 possible orientations.
00076   float scripts_na[4][kMaxNumberOfScripts];
00077 
00078   UNICHARSET* unicharset;
00079   OSBestResult best_result;
00080 };
00081 
00082 class OrientationDetector {
00083  public:
00084   OrientationDetector(OSResults*);
00085   bool detect_blob(BLOB_CHOICE_LIST* scores);
00086   int get_orientation();
00087  private:
00088   OSResults* osr_;
00089 };
00090 
00091 class ScriptDetector {
00092  public:
00093   ScriptDetector(OSResults*, tesseract::Tesseract* tess);
00094   void detect_blob(BLOB_CHOICE_LIST* scores);
00095   void get_script() ;
00096   bool must_stop(int orientation);
00097  private:
00098   OSResults* osr_;
00099   static const char* korean_script_;
00100   static const char* japanese_script_;
00101   static const char* fraktur_script_;
00102   int korean_id_;
00103   int japanese_id_;
00104   int katakana_id_;
00105   int hiragana_id_;
00106   int han_id_;
00107   int hangul_id_;
00108   int latin_id_;
00109   int fraktur_id_;
00110   tesseract::Tesseract* tess_;
00111 };
00112 
00113 int orientation_and_script_detection(STRING& filename,
00114                                      OSResults*,
00115                                      tesseract::Tesseract*);
00116 
00117 int os_detect(TO_BLOCK_LIST* port_blocks,
00118               OSResults* osr,
00119               tesseract::Tesseract* tess);
00120 
00121 int os_detect_blobs(BLOBNBOX_CLIST* blob_list,
00122                     OSResults* osr,
00123                     tesseract::Tesseract* tess);
00124 
00125 bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
00126                     ScriptDetector* s, OSResults*,
00127                     tesseract::Tesseract* tess);
00128 
00129 // Helper method to convert an orientation index to its value in degrees.
00130 // The value represents the amount of clockwise rotation in degrees that must be
00131 // applied for the text to be upright (readable).
00132 const int OrientationIdToValue(const int& id);
00133 
00134 #endif  // TESSERACT_CCMAIN_OSDETECT_H__