Tesseract  3.02
tesseract::TesseractCubeCombiner Class Reference

#include <tesseract_cube_combiner.h>

List of all members.

Public Member Functions

 TesseractCubeCombiner (CubeRecoContext *cube_cntxt)
virtual ~TesseractCubeCombiner ()
float CombineResults (WERD_RES *tess_res, CubeObject *cube_obj)
float CombineResults (WERD_RES *tess_res, CubeObject *cube_obj, WordAltList *alt_list)
bool ComputeCombinerFeatures (const string &tess_res, int tess_confidence, CubeObject *cube_obj, WordAltList *cube_alt_list, vector< double > *features, bool *agreement)
bool ValidWord (const string &str)
bool LoadCombinerNet ()

Detailed Description

Definition at line 47 of file tesseract_cube_combiner.h.


Constructor & Destructor Documentation

tesseract::TesseractCubeCombiner::TesseractCubeCombiner ( CubeRecoContext cube_cntxt) [explicit]

Definition at line 39 of file tesseract_cube_combiner.cpp.

                                                                        {
  cube_cntxt_ = cube_cntxt;
  combiner_net_ = NULL;
}
tesseract::TesseractCubeCombiner::~TesseractCubeCombiner ( ) [virtual]

Definition at line 44 of file tesseract_cube_combiner.cpp.

                                              {
  if (combiner_net_ != NULL) {
    delete combiner_net_;
    combiner_net_ = NULL;
  }
}

Member Function Documentation

float tesseract::TesseractCubeCombiner::CombineResults ( WERD_RES tess_res,
CubeObject cube_obj 
)

Definition at line 242 of file tesseract_cube_combiner.cpp.

                                                                  {
  // If no combiner is loaded or the cube object is undefined,
  // tesseract wins with probability 1.0
  if (combiner_net_ == NULL || cube_obj == NULL) {
    tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
            "Cube objects not initialized; defaulting to Tesseract\n");
    return 1.0;
  }

  // Retrieve the alternate list from the CubeObject's current state.
  // If the alt list empty, tesseract wins with probability 1.0
  WordAltList *cube_alt_list = cube_obj->AlternateList();
  if (cube_alt_list == NULL)
    cube_alt_list = cube_obj->RecognizeWord();
  if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
    tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
            "Cube returned no results; defaulting to Tesseract\n");
    return 1.0;
  }
  return CombineResults(tess_res, cube_obj, cube_alt_list);
}
float tesseract::TesseractCubeCombiner::CombineResults ( WERD_RES tess_res,
CubeObject cube_obj,
WordAltList alt_list 
)

Definition at line 270 of file tesseract_cube_combiner.cpp.

                                                                        {
  // If no combiner is loaded or the cube object is undefined, or the
  // alt list is empty, tesseract wins with probability 1.0
  if (combiner_net_ == NULL || cube_obj == NULL ||
      cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
    tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
            "Cube result cannot be retrieved; defaulting to Tesseract\n");
    return 1.0;
  }

  // Tesseract result string, tesseract confidence, and cost of
  // tesseract result according to cube
  string tess_str = tess_res->best_choice->unichar_string().string();
  // Map certainty [-20.0, 0.0] to confidence [0, 100]
  int tess_confidence = MIN(100, MAX(1, static_cast<int>(
      100 + (5 * tess_res->best_choice->certainty()))));

  // Compute the combiner features. If feature computation fails or
  // answers are identical, tesseract wins with probability 1.0
  vector<double> features;
  bool agreement;
  bool combiner_success = ComputeCombinerFeatures(tess_str, tess_confidence,
                                                  cube_obj, cube_alt_list,
                                                  &features, &agreement);
  if (!combiner_success || agreement)
    return 1.0;

  // Classify combiner feature vector and return output (probability
  // of tesseract class).
  double net_out[2];
  if (!combiner_net_->FeedForward(&features[0], net_out))
    return 1.0;
  return net_out[1];
}
bool tesseract::TesseractCubeCombiner::ComputeCombinerFeatures ( const string &  tess_res,
int  tess_confidence,
CubeObject cube_obj,
WordAltList cube_alt_list,
vector< double > *  features,
bool *  agreement 
)

Definition at line 130 of file tesseract_cube_combiner.cpp.

                                                                     {
  features->clear();
  *agreement = false;
  if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
    return false;

  // Get Cube's best string; return false if empty
  char_32 *cube_best_str32 = cube_alt_list->Alt(0);
  if (cube_best_str32 == NULL || CubeUtils::StrLen(cube_best_str32) < 1)
    return false;
  string cube_best_str;
  int cube_best_cost = cube_alt_list->AltCost(0);
  int cube_best_bigram_cost = 0;
  bool cube_best_bigram_cost_valid = true;
  if (cube_cntxt_->Bigrams())
    cube_best_bigram_cost = cube_cntxt_->Bigrams()->
        Cost(cube_best_str32, cube_cntxt_->CharacterSet());
  else
    cube_best_bigram_cost_valid = false;
  CubeUtils::UTF32ToUTF8(cube_best_str32, &cube_best_str);

  // Get Tesseract's UTF32 string
  string_32 tess_str32;
  CubeUtils::UTF8ToUTF32(tess_str.c_str(), &tess_str32);

  // Compute agreement flag
  *agreement = (tess_str.compare(cube_best_str) == 0);

  // Get Cube's second best string; if empty, return false
  char_32 *cube_next_best_str32;
  string cube_next_best_str;
  int cube_next_best_cost = WORST_COST;
  if (cube_alt_list->AltCount() > 1) {
    cube_next_best_str32 = cube_alt_list->Alt(1);
    if (cube_next_best_str32 == NULL ||
        CubeUtils::StrLen(cube_next_best_str32) == 0) {
      return false;
    }
    cube_next_best_cost = cube_alt_list->AltCost(1);
    CubeUtils::UTF32ToUTF8(cube_next_best_str32, &cube_next_best_str);
  }
  // Rank of Tesseract's top result in Cube's alternate list
  int tess_rank = 0;
  for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) {
    string alt_str;
    CubeUtils::UTF32ToUTF8(cube_alt_list->Alt(tess_rank), &alt_str);
    if (alt_str == tess_str)
      break;
  }

  // Cube's cost for tesseract's result. Note that this modifies the
  // state of cube_obj, including its alternate list by calling RecognizeWord()
  int tess_cost = cube_obj->WordCost(tess_str.c_str());
  // Cube's bigram cost of Tesseract's string
  int tess_bigram_cost = 0;
  int tess_bigram_cost_valid = true;
  if (cube_cntxt_->Bigrams())
    tess_bigram_cost = cube_cntxt_->Bigrams()->
        Cost(tess_str32.c_str(), cube_cntxt_->CharacterSet());
  else
    tess_bigram_cost_valid = false;

  // Tesseract confidence
  features->push_back(tess_confidence);
  // Cube cost of Tesseract string
  features->push_back(tess_cost);
  // Cube Rank of Tesseract string
  features->push_back(tess_rank);
  // length of Tesseract OCR string
  features->push_back(tess_str.length());
  // Tesseract OCR string in dictionary
  features->push_back(ValidWord(tess_str));
  if (tess_bigram_cost_valid) {
    // bigram cost of Tesseract string
    features->push_back(tess_bigram_cost);
  }
  // Cube tess_cost of Cube best string
  features->push_back(cube_best_cost);
  // Cube tess_cost of Cube next best string
  features->push_back(cube_next_best_cost);
  // length of Cube string
  features->push_back(cube_best_str.length());
  // Cube string in dictionary
  features->push_back(ValidWord(cube_best_str));
  if (cube_best_bigram_cost_valid) {
    // bigram cost of Cube string
    features->push_back(cube_best_bigram_cost);
  }
  // case-insensitive string comparison, including punctuation
  int compare_nocase_punc = CompareStrings(cube_best_str.c_str(),
                                           tess_str.c_str(), false, true);
  features->push_back(compare_nocase_punc == 0);
  // case-sensitive string comparison, ignoring punctuation
  int compare_case_nopunc = CompareStrings(cube_best_str.c_str(),
                                           tess_str.c_str(), true, false);
  features->push_back(compare_case_nopunc == 0);
  // case-insensitive string comparison, ignoring punctuation
  int compare_nocase_nopunc = CompareStrings(cube_best_str.c_str(),
                                             tess_str.c_str(), true, true);
  features->push_back(compare_nocase_nopunc == 0);
  return true;
}
bool tesseract::TesseractCubeCombiner::LoadCombinerNet ( )

Definition at line 51 of file tesseract_cube_combiner.cpp.

                                            {
  ASSERT_HOST(cube_cntxt_);
  // Compute the path of the combiner net
  string data_path;
  cube_cntxt_->GetDataFilePath(&data_path);
  string net_file_name =  data_path + cube_cntxt_->Lang() +
                          ".tesseract_cube.nn";

  // Return false if file does not exist
  FILE *fp = fopen(net_file_name.c_str(), "rb");
  if (fp == NULL)
    return false;
  else
    fclose(fp);

  // Load and validate net
  combiner_net_ = NeuralNet::FromFile(net_file_name);
  if (combiner_net_ == NULL) {
    tprintf("Could not read combiner net file %s", net_file_name.c_str());
    return false;
  } else if (combiner_net_->out_cnt() != 2) {
    tprintf("Invalid combiner net file %s! Output count != 2\n",
            net_file_name.c_str());
    delete combiner_net_;
    combiner_net_ = NULL;
    return false;
  }
  return true;
}
bool tesseract::TesseractCubeCombiner::ValidWord ( const string &  str)

Definition at line 122 of file tesseract_cube_combiner.cpp.

                                                       {
  return (cube_cntxt_->TesseractObject()->getDict().valid_word(str.c_str())
          > 0);
}

The documentation for this class was generated from the following files: