Tesseract  3.02
ScriptDetector Class Reference

#include <osdetect.h>

List of all members.

Public Member Functions

 ScriptDetector (OSResults *, tesseract::Tesseract *tess)
void detect_blob (BLOB_CHOICE_LIST *scores)
void get_script ()
bool must_stop (int orientation)

Detailed Description

Definition at line 91 of file osdetect.h.


Constructor & Destructor Documentation

ScriptDetector::ScriptDetector ( OSResults osr,
tesseract::Tesseract tess 
)

Definition at line 419 of file osdetect.cpp.

                                                                       {
  osr_ = osr;
  tess_ = tess;
  katakana_id_ = tess_->unicharset.add_script(katakana_script);
  hiragana_id_ = tess_->unicharset.add_script(hiragana_script);
  han_id_ = tess_->unicharset.add_script(han_script);
  hangul_id_ = tess_->unicharset.add_script(hangul_script);
  japanese_id_ = tess_->unicharset.add_script(japanese_script_);
  korean_id_ = tess_->unicharset.add_script(korean_script_);
  latin_id_ = tess_->unicharset.add_script(latin_script);
  fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);
}

Member Function Documentation

void ScriptDetector::detect_blob ( BLOB_CHOICE_LIST *  scores)

Definition at line 435 of file osdetect.cpp.

                                                         {
  bool done[kMaxNumberOfScripts];
  for (int i = 0; i < 4; ++i) {
    for (int j = 0; j < kMaxNumberOfScripts; ++j)
      done[j] = false;

    BLOB_CHOICE_IT choice_it;
    choice_it.set_to_list(scores + i);

    float prev_score = -1;
    int script_count = 0;
    int prev_id = -1;
    int prev_script;
    int prev_class_id = -1;
    int prev_fontinfo_id = -1;
    const char* prev_unichar = "";
    const char* unichar = "";
    float next_best_score = -1.0;
    int next_best_script_id = -1;
    const char* next_best_unichar = "";

    for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
         choice_it.forward()) {
      BLOB_CHOICE* choice = choice_it.data();
      int id = choice->script_id();
      // Script already processed before.
      if (done[id]) continue;
      done[id] = true;

      unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
      // Save data from the first match
      if (prev_score < 0) {
        prev_score = -choice->certainty();
        script_count = 1;
        prev_id = id;
        prev_script = choice->script_id();
        prev_unichar = unichar;
        prev_class_id = choice->unichar_id();
        prev_fontinfo_id = choice->fontinfo_id();
      } else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) {
        ++script_count;
        next_best_score = -choice->certainty();
        next_best_script_id = choice->script_id();
        next_best_unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
      }

      if (strlen(prev_unichar) == 1)
        if (unichar[0] >= '0' && unichar[0] <= '9')
          break;

      // if script_count is >= 2, character is ambiguous, skip other matches
      // since they are useless.
      if (script_count >= 2)
        break;
    }
    // Character is non ambiguous
    if (script_count == 1) {
      // Update the score of the winning script
      osr_->scripts_na[i][prev_id] += 1.0;

      // Workaround for Fraktur
      if (prev_id == latin_id_) {
        if (prev_fontinfo_id >= 0) {
          const tesseract::FontInfo &fi =
              tess_->get_fontinfo_table().get(prev_fontinfo_id);
          //printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
          //       fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
          //       fi.is_serif(), fi.is_fraktur(),
          //       prev_unichar);
          if (fi.is_fraktur()) {
            osr_->scripts_na[i][prev_id] -= 1.0;
            osr_->scripts_na[i][fraktur_id_] += 1.0;
          }
        }
      }

      // Update Japanese / Korean pseudo-scripts
      if (prev_id == katakana_id_)
        osr_->scripts_na[i][japanese_id_] += 1.0;
      if (prev_id == hiragana_id_)
        osr_->scripts_na[i][japanese_id_] += 1.0;
      if (prev_id == hangul_id_)
        osr_->scripts_na[i][korean_id_] += 1.0;
      if (prev_id == han_id_)
        osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
      if (prev_id == han_id_)
        osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
    }
  }  // iterate over each orientation
}
void ScriptDetector::get_script ( )
bool ScriptDetector::must_stop ( int  orientation)

Definition at line 526 of file osdetect.cpp.

                                              {
  osr_->update_best_script(orientation);
  return osr_->best_result.sconfidence > 1;
}

The documentation for this class was generated from the following files: