Tesseract
3.02
|
#include <osdetect.h>
Public Member Functions | |
ScriptDetector (OSResults *, tesseract::Tesseract *tess) | |
void | detect_blob (BLOB_CHOICE_LIST *scores) |
void | get_script () |
bool | must_stop (int orientation) |
Definition at line 91 of file osdetect.h.
ScriptDetector::ScriptDetector | ( | OSResults * | osr, |
tesseract::Tesseract * | tess | ||
) |
Definition at line 419 of file osdetect.cpp.
{ osr_ = osr; tess_ = tess; katakana_id_ = tess_->unicharset.add_script(katakana_script); hiragana_id_ = tess_->unicharset.add_script(hiragana_script); han_id_ = tess_->unicharset.add_script(han_script); hangul_id_ = tess_->unicharset.add_script(hangul_script); japanese_id_ = tess_->unicharset.add_script(japanese_script_); korean_id_ = tess_->unicharset.add_script(korean_script_); latin_id_ = tess_->unicharset.add_script(latin_script); fraktur_id_ = tess_->unicharset.add_script(fraktur_script_); }
void ScriptDetector::detect_blob | ( | BLOB_CHOICE_LIST * | scores | ) |
Definition at line 435 of file osdetect.cpp.
{ bool done[kMaxNumberOfScripts]; for (int i = 0; i < 4; ++i) { for (int j = 0; j < kMaxNumberOfScripts; ++j) done[j] = false; BLOB_CHOICE_IT choice_it; choice_it.set_to_list(scores + i); float prev_score = -1; int script_count = 0; int prev_id = -1; int prev_script; int prev_class_id = -1; int prev_fontinfo_id = -1; const char* prev_unichar = ""; const char* unichar = ""; float next_best_score = -1.0; int next_best_script_id = -1; const char* next_best_unichar = ""; for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { BLOB_CHOICE* choice = choice_it.data(); int id = choice->script_id(); // Script already processed before. if (done[id]) continue; done[id] = true; unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); // Save data from the first match if (prev_score < 0) { prev_score = -choice->certainty(); script_count = 1; prev_id = id; prev_script = choice->script_id(); prev_unichar = unichar; prev_class_id = choice->unichar_id(); prev_fontinfo_id = choice->fontinfo_id(); } else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) { ++script_count; next_best_score = -choice->certainty(); next_best_script_id = choice->script_id(); next_best_unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); } if (strlen(prev_unichar) == 1) if (unichar[0] >= '0' && unichar[0] <= '9') break; // if script_count is >= 2, character is ambiguous, skip other matches // since they are useless. if (script_count >= 2) break; } // Character is non ambiguous if (script_count == 1) { // Update the score of the winning script osr_->scripts_na[i][prev_id] += 1.0; // Workaround for Fraktur if (prev_id == latin_id_) { if (prev_fontinfo_id >= 0) { const tesseract::FontInfo &fi = tess_->get_fontinfo_table().get(prev_fontinfo_id); //printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name, // fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(), // fi.is_serif(), fi.is_fraktur(), // prev_unichar); if (fi.is_fraktur()) { osr_->scripts_na[i][prev_id] -= 1.0; osr_->scripts_na[i][fraktur_id_] += 1.0; } } } // Update Japanese / Korean pseudo-scripts if (prev_id == katakana_id_) osr_->scripts_na[i][japanese_id_] += 1.0; if (prev_id == hiragana_id_) osr_->scripts_na[i][japanese_id_] += 1.0; if (prev_id == hangul_id_) osr_->scripts_na[i][korean_id_] += 1.0; if (prev_id == han_id_) osr_->scripts_na[i][korean_id_] += kHanRatioInKorean; if (prev_id == han_id_) osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese; } } // iterate over each orientation }
void ScriptDetector::get_script | ( | ) |
bool ScriptDetector::must_stop | ( | int | orientation | ) |
Definition at line 526 of file osdetect.cpp.
{ osr_->update_best_script(orientation); return osr_->best_result.sconfidence > 1; }