Tesseract
3.02
|
00001 00002 // File: osdetect.h 00003 // Description: Orientation and script detection. 00004 // Author: Samuel Charron 00005 // Ranjith Unnikrishnan 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCMAIN_OSDETECT_H__ 00021 #define TESSERACT_CCMAIN_OSDETECT_H__ 00022 00023 #include "strngs.h" 00024 #include "unicharset.h" 00025 00026 class TO_BLOCK_LIST; 00027 class BLOBNBOX; 00028 class BLOB_CHOICE_LIST; 00029 class BLOBNBOX_CLIST; 00030 00031 namespace tesseract { 00032 class Tesseract; 00033 } 00034 00035 // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur 00036 const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; 00037 00038 struct OSBestResult { 00039 OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), 00040 oconfidence(0.0) {} 00041 int orientation_id; 00042 int script_id; 00043 float sconfidence; 00044 float oconfidence; 00045 }; 00046 00047 struct OSResults { 00048 OSResults() : unicharset(NULL) { 00049 for (int i = 0; i < 4; ++i) { 00050 for (int j = 0; j < kMaxNumberOfScripts; ++j) 00051 scripts_na[i][j] = 0; 00052 orientations[i] = 0; 00053 } 00054 } 00055 void update_best_orientation(); 00056 // Set the estimate of the orientation to the given id. 00057 void set_best_orientation(int orientation_id); 00058 // Update/Compute the best estimate of the script assuming the given 00059 // orientation id. 00060 void update_best_script(int orientation_id); 00061 // Return the index of the script with the highest score for this orientation. 00062 int get_best_script(int orientation_id) const; 00063 // Accumulate scores with given OSResults instance and update the best script. 00064 void accumulate(const OSResults& osr); 00065 00066 // Print statistics. 00067 void print_scores(void) const; 00068 void print_scores(int orientation_id) const; 00069 00070 // Array holding scores for each orientation id [0,3]. 00071 // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the 00072 // page respectively, where the values refer to the amount of clockwise 00073 // rotation to be applied to the page for the text to be upright and readable. 00074 float orientations[4]; 00075 // Script confidence scores for each of 4 possible orientations. 00076 float scripts_na[4][kMaxNumberOfScripts]; 00077 00078 UNICHARSET* unicharset; 00079 OSBestResult best_result; 00080 }; 00081 00082 class OrientationDetector { 00083 public: 00084 OrientationDetector(OSResults*); 00085 bool detect_blob(BLOB_CHOICE_LIST* scores); 00086 int get_orientation(); 00087 private: 00088 OSResults* osr_; 00089 }; 00090 00091 class ScriptDetector { 00092 public: 00093 ScriptDetector(OSResults*, tesseract::Tesseract* tess); 00094 void detect_blob(BLOB_CHOICE_LIST* scores); 00095 void get_script() ; 00096 bool must_stop(int orientation); 00097 private: 00098 OSResults* osr_; 00099 static const char* korean_script_; 00100 static const char* japanese_script_; 00101 static const char* fraktur_script_; 00102 int korean_id_; 00103 int japanese_id_; 00104 int katakana_id_; 00105 int hiragana_id_; 00106 int han_id_; 00107 int hangul_id_; 00108 int latin_id_; 00109 int fraktur_id_; 00110 tesseract::Tesseract* tess_; 00111 }; 00112 00113 int orientation_and_script_detection(STRING& filename, 00114 OSResults*, 00115 tesseract::Tesseract*); 00116 00117 int os_detect(TO_BLOCK_LIST* port_blocks, 00118 OSResults* osr, 00119 tesseract::Tesseract* tess); 00120 00121 int os_detect_blobs(BLOBNBOX_CLIST* blob_list, 00122 OSResults* osr, 00123 tesseract::Tesseract* tess); 00124 00125 bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, 00126 ScriptDetector* s, OSResults*, 00127 tesseract::Tesseract* tess); 00128 00129 // Helper method to convert an orientation index to its value in degrees. 00130 // The value represents the amount of clockwise rotation in degrees that must be 00131 // applied for the text to be upright (readable). 00132 const int OrientationIdToValue(const int& id); 00133 00134 #endif // TESSERACT_CCMAIN_OSDETECT_H__