Tesseract  3.02
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

List of all members.

Public Member Functions

 WERD_RES ()
 WERD_RES (WERD *the_word)
 WERD_RES (const WERD_RES &source)
 ~WERD_RES ()
const char *const BestUTF8 (int blob_index, bool in_rtl_context) const
const char *const RawUTF8 (int blob_index) const
UNICHARSET::Direction SymbolDirection (int blob_index) const
bool AnyRtlCharsInWord () const
bool AnyLtrCharsInWord () const
bool UnicharsInReadingOrder () const
void InitNonPointers ()
void InitPointers ()
void Clear ()
void ClearResults ()
WERD_RESoperator= (const WERD_RES &source)
void CopySimpleFields (const WERD_RES &source)
void InitForRetryRecognition (const WERD_RES &source)
bool SetupForTessRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, bool numeric_mode, bool use_body_size, ROW *row, BLOCK *block)
bool SetupForCubeRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, const BLOCK *block)
void SetupFake (const UNICHARSET &uch)
void SetupWordScript (const UNICHARSET &unicharset_in)
void SetupBlamerBundle ()
void ConsumeWordResults (WERD_RES *word)
void ReplaceBestChoice (const WERD_CHOICE &choice, const GenericVector< int > &segmentation_state)
void RebuildBestState ()
void CloneChoppedToRebuild ()
void SetupBoxWord ()
void SetScriptPositions ()
void WithoutFootnoteSpan (int *start, int *end) const
void WithoutFootnoteSpan (const WERD_CHOICE &choice, const GenericVector< int > &state, int *start, int *end) const
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
void BestChoiceToCorrectText ()
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb, BLOB_CHOICE_LIST_CLIST *blob_choices)
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
void fix_quotes (BLOB_CHOICE_LIST_CLIST *blob_choices)
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
void fix_hyphens (BLOB_CHOICE_LIST_CLIST *blob_choices)
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
void merge_tess_fails ()
void copy_on (WERD_RES *word_res)
bool PiecesAllNatural (int start, int count) const

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)

Public Attributes

WERDword
tesseract::BoxWordbln_boxes
DENORM denorm
const UNICHARSETuch_set
TWERDchopped_word
SEAMS seam_array
WERD_CHOICEbest_choice
WERD_CHOICEraw_choice
GenericVector< WERD_CHOICE * > alt_choices
GenericVector< GenericVector
< int > > 
alt_states
BlamerBundleblamer_bundle
TWERDrebuild_word
tesseract::BoxWordbox_word
GenericVector< int > best_state
GenericVector< STRINGcorrect_text
tesseract::Tesseracttesseract
WERD_CHOICEep_choice
REJMAP reject_map
BOOL8 tess_failed
BOOL8 tess_accepted
BOOL8 tess_would_adapt
BOOL8 done
bool small_caps
inT8 italic
inT8 bold
const FontInfofontinfo
const FontInfofontinfo2
inT8 fontinfo_id_count
inT8 fontinfo_id2_count
BOOL8 guessed_x_ht
BOOL8 guessed_caps_ht
CRUNCH_MODE unlv_crunch_mode
float x_height
float caps_height
BOOL8 combination
BOOL8 part_of_combo
BOOL8 reject_spaces
GenericVector< inT8best_choice_fontinfo_ids

Detailed Description

Definition at line 314 of file pageres.h.


Constructor & Destructor Documentation

WERD_RES::WERD_RES ( ) [inline]

Definition at line 456 of file pageres.h.

WERD_RES::WERD_RES ( WERD the_word) [inline]

Definition at line 460 of file pageres.h.

                           {
    InitNonPointers();
    InitPointers();
    word = the_word;
  }
WERD_RES::WERD_RES ( const WERD_RES source) [inline]

Definition at line 465 of file pageres.h.

                                   {
    InitPointers();
    *this = source;            // see operator=
  }
WERD_RES::~WERD_RES ( )

Definition at line 746 of file pageres.cpp.

                     {
  Clear();
}

Member Function Documentation

bool WERD_RES::AnyLtrCharsInWord ( ) const [inline]

Definition at line 523 of file pageres.h.

                                 {
    if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
      return false;
    for (int id = 0; id < best_choice->length(); id++) {
      int unichar_id = best_choice->unichar_id(id);
      if (unichar_id < 0 || unichar_id >= uch_set->size())
        continue;  // Ignore illegal chars.
      UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
      if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
        return true;
    }
    return false;
  }
bool WERD_RES::AnyRtlCharsInWord ( ) const [inline]

Definition at line 506 of file pageres.h.

                                 {
    if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
      return false;
    for (int id = 0; id < best_choice->length(); id++) {
      int unichar_id = best_choice->unichar_id(id);
      if (unichar_id < 0 || unichar_id >= uch_set->size())
        continue;  // Ignore illegal chars.
      UNICHARSET::Direction dir =
          uch_set->get_direction(unichar_id);
      if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
          dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
          dir == UNICHARSET::U_ARABIC_NUMBER)
        return true;
    }
    return false;
  }
void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 572 of file pageres.cpp.

                                       {
  correct_text.clear();
  ASSERT_HOST(best_choice != NULL);
  for (int i = 0; i < best_choice->length(); ++i) {
    UNICHAR_ID choice_id = best_choice->unichar_id(i);
    const char* blob_choice = uch_set->id_to_unichar(choice_id);
    correct_text.push_back(STRING(blob_choice));
  }
}
const char* const WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const [inline]

Definition at line 477 of file pageres.h.

                                                                        {
    if (blob_index < 0 || blob_index >= best_choice->length())
      return NULL;
    UNICHAR_ID id = best_choice->unichar_id(blob_index);
    if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
      return NULL;
    UNICHAR_ID mirrored = uch_set->get_mirror(id);
    if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
      id = mirrored;
    return uch_set->id_to_unichar_ext(id);
  }
UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 683 of file pageres.cpp.

                                                               {
  const char *ch = uch_set->id_to_unichar(id1);
  const char *next_ch = uch_set->id_to_unichar(id2);
  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
      (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
    return uch_set->unichar_to_id("-");
  return INVALID_UNICHAR_ID;
}
UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 660 of file pageres.cpp.

                                                              {
  const char *ch = uch_set->id_to_unichar(id1);
  const char *next_ch = uch_set->id_to_unichar(id2);
  if (is_simple_quote(ch, strlen(ch)) &&
      is_simple_quote(next_ch, strlen(next_ch)))
    return uch_set->unichar_to_id("\"");
  return INVALID_UNICHAR_ID;
}
UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 713 of file pageres.cpp.

                                                              {
  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
    return id1;
  else
    return INVALID_UNICHAR_ID;
}
void WERD_RES::Clear ( )

Definition at line 789 of file pageres.cpp.

                     {
  if (word != NULL && combination) {
    delete word;
  }
  word = NULL;
  delete blamer_bundle;
  blamer_bundle = NULL;
  ClearResults();
}
void WERD_RES::ClearResults ( )
void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 480 of file pageres.cpp.

                                     {
  if (rebuild_word != NULL)
    delete rebuild_word;
  rebuild_word = new TWERD(*chopped_word);
  SetupBoxWord();
  int word_len = box_word->length();
  best_state.reserve(word_len);
  correct_text.reserve(word_len);
  for (int i = 0; i < word_len; ++i) {
    best_state.push_back(1);
    correct_text.push_back(STRING(""));
  }
}
bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX & > *  box_cb,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

Definition at line 587 of file pageres.cpp.

                                          {
  bool modified = false;
  for (int i = 0; i + 1 < best_choice->length(); ++i) {
    UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
                                      best_choice->unichar_id(i+1));
    if (new_id != INVALID_UNICHAR_ID &&
        (box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
                                       box_word->BlobBox(i + 1)))) {
      if (reject_map.length() == best_choice->length())
        reject_map.remove_pos(i);
      best_choice->set_unichar_id(new_id, i);
      best_choice->remove_unichar_id(i + 1);
      raw_choice->set_unichar_id(new_id, i);
      raw_choice->remove_unichar_id(i + 1);
      modified = true;
      rebuild_word->MergeBlobs(i, i + 2);
      box_word->MergeBoxes(i, i + 2);
      if (i + 1 < best_state.length()) {
        best_state[i] += best_state[i + 1];
        best_state.remove(i + 1);
      }

      BLOB_CHOICE_LIST_C_IT blob_choices_it(blob_choices);
      for (int j = 0; j < i; ++j)
        blob_choices_it.forward();
      BLOB_CHOICE_IT it1(blob_choices_it.data());            // first choices
      BLOB_CHOICE_LIST* target_choices = blob_choices_it.data_relative(1);
      BLOB_CHOICE_IT it2(target_choices);  // second choices
      float certainty = it2.data()->certainty();
      float rating = it2.data()->rating();
      if (it1.data()->certainty() < certainty) {
        certainty = it1.data()->certainty();
        rating = it1.data()->rating();
        target_choices = blob_choices_it.data();
        blob_choices_it.forward();
      }
      delete blob_choices_it.extract();  // get rid of spare
      // TODO(rays) Fix the choices so they contain the desired result.
      // Do we really need to ? Only needed for fix_quotes, which should be
      // going away.
    }
  }
  delete class_cb;
  delete box_cb;
  return modified;
}
void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 411 of file pageres.cpp.

                                                {
  denorm = word->denorm;
  MovePointerData(&chopped_word, &word->chopped_word);
  MovePointerData(&rebuild_word, &word->rebuild_word);
  MovePointerData(&box_word, &word->box_word);
  if (seam_array != NULL)
    free_seam_list(seam_array);
  seam_array = word->seam_array;
  word->seam_array = NULL;
  best_state.move(&word->best_state);
  correct_text.move(&word->correct_text);
  MovePointerData(&best_choice, &word->best_choice);
  MovePointerData(&raw_choice, &word->raw_choice);
  alt_choices.delete_data_pointers();
  alt_choices.move(&word->alt_choices);
  alt_states.move(&word->alt_states);
  reject_map = word->reject_map;
  if (word->blamer_bundle != NULL) {
    assert(blamer_bundle != NULL);
    blamer_bundle->CopyResults(*(word->blamer_bundle));
  }
  CopySimpleFields(*word);
}
void WERD_RES::copy_on ( WERD_RES word_res) [inline]

Definition at line 674 of file pageres.h.

                                   {  //from this word
    word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
    word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
    word->copy_on(word_res->word);
  }
void WERD_RES::CopySimpleFields ( const WERD_RES source)
static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src) [inline, static]

Definition at line 668 of file pageres.h.

                                                  {
    return new WERD_RES(*src);
  }
void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 549 of file pageres.cpp.

                                                                     {
  // Setup the WERD_RES.
  ASSERT_HOST(box_word != NULL);
  ASSERT_HOST(blob_count == box_word->length());
  ASSERT_HOST(best_choice != NULL);
  BLOB_CHOICE_LIST_CLIST* word_choices = new BLOB_CHOICE_LIST_CLIST;
  BLOB_CHOICE_LIST_C_IT bc_it(word_choices);
  for (int c = 0; c < blob_count; ++c) {
    best_choice->append_unichar_id(
        choices[c]->unichar_id(), 1,
        choices[c]->rating(), choices[c]->certainty());
    BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
    BLOB_CHOICE_IT choice_it(choice_list);
    choice_it.add_after_then_move(choices[c]);
    bc_it.add_after_then_move(choice_list);
  }
  best_choice->set_blob_choices(word_choices);
  delete raw_choice;
  raw_choice = new WERD_CHOICE(*best_choice);
  reject_map.initialise(blob_count);
}
void WERD_RES::fix_hyphens ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 700 of file pageres.cpp.

                                                               {
  if (!uch_set->contains_unichar("-") ||
      !uch_set->get_enabled(uch_set->unichar_to_id("-")))
    return;  // Don't create it if it is disallowed.

  ConditionalBlobMerge(
      NewPermanentTessCallback(this, &WERD_RES::BothHyphens),
      NewPermanentTessCallback(this, &WERD_RES::HyphenBoxesOverlap),
      blob_choices);
}
void WERD_RES::fix_quotes ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 670 of file pageres.cpp.

                                                              {
  if (!uch_set->contains_unichar("\"") ||
      !uch_set->get_enabled(uch_set->unichar_to_id("\"")))
    return;  // Don't create it if it is disallowed.

  ConditionalBlobMerge(
      NewPermanentTessCallback(this, &WERD_RES::BothQuotes),
      NULL,
      blob_choices);
}
bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 694 of file pageres.cpp.

                                                                    {
  return box1.right() >= box2.left();
}
void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 260 of file pageres.cpp.

                                                             {
  word = source.word;
  CopySimpleFields(source);
  if (source.blamer_bundle != NULL) {
    blamer_bundle = new BlamerBundle();
    blamer_bundle->CopyTruth(*source.blamer_bundle);
  }
}
void WERD_RES::InitNonPointers ( )

Definition at line 750 of file pageres.cpp.

                               {
  tess_failed = FALSE;
  tess_accepted = FALSE;
  tess_would_adapt = FALSE;
  done = FALSE;
  unlv_crunch_mode = CR_NONE;
  small_caps = false;
  italic = FALSE;
  bold = FALSE;
  // The fontinfos and tesseract count as non-pointers as they point to
  // data owned elsewhere.
  fontinfo = NULL;
  fontinfo2 = NULL;
  tesseract = NULL;
  fontinfo_id_count = 0;
  fontinfo_id2_count = 0;
  x_height = 0.0;
  caps_height = 0.0;
  guessed_x_ht = TRUE;
  guessed_caps_ht = TRUE;
  combination = FALSE;
  part_of_combo = FALSE;
  reject_spaces = FALSE;
}
void WERD_RES::InitPointers ( )
void WERD_RES::merge_tess_fails ( )
WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 177 of file pageres.cpp.

                                                     {
  this->ELIST_LINK::operator=(source);
  Clear();
  if (source.combination) {
    word = new WERD;
    *word = *(source.word);      // deep copy
  } else {
    word = source.word;          // pt to same word
  }
  if (source.bln_boxes != NULL)
    bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
  if (source.chopped_word != NULL)
    chopped_word = new TWERD(*source.chopped_word);
  if (source.rebuild_word != NULL)
    rebuild_word = new TWERD(*source.rebuild_word);
  // TODO(rays) Do we ever need to copy the seam_array?
  denorm = source.denorm;
  if (source.box_word != NULL)
    box_word = new tesseract::BoxWord(*source.box_word);
  best_state = source.best_state;
  correct_text = source.correct_text;

  if (source.best_choice != NULL) {
    best_choice = new WERD_CHOICE(*source.best_choice);
    raw_choice = new WERD_CHOICE(*source.raw_choice);
    best_choice_fontinfo_ids = source.best_choice_fontinfo_ids;
  }
  else {
    best_choice = NULL;
    raw_choice = NULL;
    if (!best_choice_fontinfo_ids.empty()) {
      best_choice_fontinfo_ids.clear();
    }
  }
  for (int i = 0; i < source.alt_choices.length(); ++i) {
    const WERD_CHOICE *choice = source.alt_choices[i];
    ASSERT_HOST(choice != NULL);
    alt_choices.push_back(new WERD_CHOICE(*choice));
  }
  alt_states = source.alt_states;
  if (source.ep_choice != NULL) {
    ep_choice = new WERD_CHOICE(*source.ep_choice);
  } else {
    ep_choice = NULL;
  }
  reject_map = source.reject_map;
  combination = source.combination;
  part_of_combo = source.part_of_combo;
  CopySimpleFields(source);
  if (source.blamer_bundle != NULL) {
    blamer_bundle =  new BlamerBundle(*(source.blamer_bundle));
  }
  return *this;
}
bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 733 of file pageres.cpp.

                                                          {
  // all seams must have no splits.
  for (int index = start; index < start + count - 1; ++index) {
    if (index >= 0 && index < array_count(seam_array)) {
      SEAM* seam = reinterpret_cast<SEAM *>(array_value(seam_array, index));
      if (seam != NULL && seam->split1 != NULL)
        return false;
    }
  }
  return true;
}
const char* const WERD_RES::RawUTF8 ( int  blob_index) const [inline]

Definition at line 489 of file pageres.h.

                                                  {
    if (blob_index < 0 || blob_index >= raw_choice->length())
      return NULL;
    UNICHAR_ID id = raw_choice->unichar_id(blob_index);
    if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
      return NULL;
    return uch_set->id_to_unichar(id);
  }
void WERD_RES::RebuildBestState ( )

Definition at line 452 of file pageres.cpp.

                                {
  if (rebuild_word != NULL)
    delete rebuild_word;
  rebuild_word = new TWERD;
  if (seam_array == NULL) {
    seam_array = start_seam_list(chopped_word->blobs);
  }
  TBLOB* prev_blob = NULL;
  int start = 0;
  for (int i = 0; i < best_state.size(); ++i) {
    int length = best_state[i];
    join_pieces(chopped_word->blobs, seam_array, start, start + length - 1);
    TBLOB* blob = chopped_word->blobs;
    for (int i = 0; i < start; ++i)
      blob = blob->next;
    TBLOB* copy_blob = new TBLOB(*blob);
    if (prev_blob == NULL)
      rebuild_word->blobs = copy_blob;
    else
      prev_blob->next = copy_blob;
    prev_blob = copy_blob;
    break_pieces(blob, seam_array, start, start + length - 1);
    start += length;
  }
}
void WERD_RES::ReplaceBestChoice ( const WERD_CHOICE choice,
const GenericVector< int > &  segmentation_state 
)

Definition at line 436 of file pageres.cpp.

                                                  {
  delete best_choice;
  best_choice = new WERD_CHOICE(choice);
  best_state = segmentation_state;
  RebuildBestState();
  SetupBoxWord();
  // Make up a fake reject map of the right length to keep the
  // rejection pass happy.
  reject_map.initialise(segmentation_state.length());
  done = tess_accepted = tess_would_adapt = true;
  SetScriptPositions();
}
void WERD_RES::SetScriptPositions ( )
void WERD_RES::SetupBlamerBundle ( )

Definition at line 379 of file pageres.cpp.

                                 {
  if (blamer_bundle != NULL) {
    blamer_bundle->norm_box_tolerance = kBlamerBoxTolerance * denorm.x_scale();
    TPOINT topleft;
    TPOINT botright;
    TPOINT norm_topleft;
    TPOINT norm_botright;
    for (int b = 0; b < blamer_bundle->truth_word.length(); ++b) {
      const TBOX &box = blamer_bundle->truth_word.BlobBox(b);
      topleft.x = box.left();
      topleft.y = box.top();
      botright.x = box.right();
      botright.y = box.bottom();
      denorm.NormTransform(topleft, &norm_topleft);
      denorm.NormTransform(botright, &norm_botright);
      TBOX norm_box(norm_topleft.x, norm_botright.y,
                    norm_botright.x, norm_topleft.y);
      blamer_bundle->norm_truth_word.InsertBox(b, norm_box);
    }
  }
}
void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 340 of file pageres.cpp.

                                                        {
  ClearResults();
  SetupWordScript(unicharset_in);
  chopped_word = new TWERD;
  rebuild_word = new TWERD;
  bln_boxes = new tesseract::BoxWord;
  box_word = new tesseract::BoxWord;
  int blob_count = word->cblob_list()->length();
  best_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
                                TOP_CHOICE_PERM, unicharset_in);
  raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
                               TOP_CHOICE_PERM, unicharset_in);
  if (blob_count > 0) {
    BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
    // For non-text blocks, just pass any blobs through to the box_word
    // and call the word failed with a fake classification.
    C_BLOB_IT b_it(word->cblob_list());
    int blob_id = 0;
    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
      TBOX box = b_it.data()->bounding_box();
      box_word->InsertBox(box_word->length(), box);
      fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f,
                                                -1, -1, -1, 0, 0, false);
    }
    FakeClassifyWord(blob_count, fake_choices);
    delete [] fake_choices;
  }
  tess_failed = true;
}
bool WERD_RES::SetupForCubeRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
const BLOCK block 
)

Definition at line 317 of file pageres.cpp.

                                                           {
  tesseract = tess;
  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
  if (pb != NULL && !pb->IsText()) {
    // Ignore words in graphic regions.
    SetupFake(unicharset_in);
    word->set_flag(W_REP_CHAR, false);
    return false;
  }
  ClearResults();
  SetupWordScript(unicharset_in);
  TBOX word_box = word->bounding_box();
  denorm.SetupNormalization(block, NULL, NULL, NULL, NULL, 0,
                            word_box.left(), word_box.bottom(),
                            1.0f, 1.0f, 0.0f, 0.0f);
  SetupBlamerBundle();
  return true;
}
bool WERD_RES::SetupForTessRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
bool  numeric_mode,
bool  use_body_size,
ROW row,
BLOCK block 
)

Definition at line 272 of file pageres.cpp.

                                                           {
  tesseract = tess;
  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
  if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) {
    // Empty words occur when all the blobs have been moved to the rej_blobs
    // list, which seems to occur frequently in junk.
    SetupFake(unicharset_in);
    word->set_flag(W_REP_CHAR, false);
    return false;
  }
  ClearResults();
  SetupWordScript(unicharset_in);
  chopped_word = TWERD::PolygonalCopy(word);
  if (use_body_size && row->body_size() > 0.0f) {
    chopped_word->SetupBLNormalize(block, row, row->body_size(),
                                   numeric_mode, &denorm);
  } else {
    chopped_word->SetupBLNormalize(block, row, x_height, numeric_mode, &denorm);
  }
  // The image will be 8-bit grey if the input was grey or color. Note that in
  // a grey image 0 is black and 255 is white. If the input was binary, then
  // the pix will be binary and 0 is white, with 1 being black.
  // To tell the difference pixGetDepth() will return 8 or 1.
  denorm.set_pix(pix);
  // The inverse flag will be true iff the word has been determined to be white
  // on black, and is independent of whether the pix is 8 bit or 1 bit.
  denorm.set_inverse(word->flag(W_INVERSE));
  chopped_word->Normalize(denorm);
  bln_boxes = tesseract::BoxWord::CopyFromNormalized(NULL, chopped_word);
  seam_array = start_seam_list(chopped_word->blobs);
  best_choice = new WERD_CHOICE(&unicharset_in);
  best_choice->make_bad();
  raw_choice = new WERD_CHOICE(&unicharset_in);
  raw_choice->make_bad();
  SetupBlamerBundle();
  return true;
}
void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 370 of file pageres.cpp.

                                                    {
  uch_set = &uch;
  int script = uch.default_sid();
  word->set_script_id(script);
  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
}
UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const [inline]

Definition at line 498 of file pageres.h.

                                                            {
    if (best_choice == NULL ||
        blob_index >= best_choice->length() ||
        blob_index < 0)
      return UNICHARSET::U_OTHER_NEUTRAL;
    return uch_set->get_direction(best_choice->unichar_id(blob_index));
  }
bool WERD_RES::UnicharsInReadingOrder ( ) const [inline]

Definition at line 540 of file pageres.h.

void WERD_RES::WithoutFootnoteSpan ( int *  start,
int *  end 
) const

Definition at line 510 of file pageres.cpp.

                                                               {
  int end = best_choice->length();
  while (end > 0 &&
         uch_set->get_isdigit(best_choice->unichar_ids()[end - 1]) &&
         box_word->BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) {
    end--;
  }
  int start = 0;
  while (start < end &&
         uch_set->get_isdigit(best_choice->unichar_ids()[start]) &&
         box_word->BlobPosition(start) == tesseract::SP_SUPERSCRIPT) {
    start++;
  }
  *pstart = start;
  *pend = end;
}
void WERD_RES::WithoutFootnoteSpan ( const WERD_CHOICE choice,
const GenericVector< int > &  state,
int *  start,
int *  end 
) const

Definition at line 527 of file pageres.cpp.

                                  {
  int len = word.length();
  *pstart = 0;
  *pend = len;
  if (len < 2) return;
  if (!word.unicharset()->get_isdigit(word.unichar_ids()[len - 1]) &&
      !word.unicharset()->get_isdigit(word.unichar_ids()[0])) return;

  // ok, now that we know the word ends in digits, do the expensive bit of
  // figuring out if they're superscript.
  WERD_RES copy(*this);
  copy.ReplaceBestChoice(word, state);
  copy.WithoutFootnoteSpan(pstart, pend);
}

Member Data Documentation

Definition at line 364 of file pageres.h.

Definition at line 359 of file pageres.h.

Definition at line 392 of file pageres.h.

Definition at line 367 of file pageres.h.

Definition at line 343 of file pageres.h.

Definition at line 422 of file pageres.h.

Definition at line 387 of file pageres.h.

Definition at line 432 of file pageres.h.

Definition at line 357 of file pageres.h.

Definition at line 450 of file pageres.h.

Definition at line 346 of file pageres.h.

Definition at line 419 of file pageres.h.

Definition at line 407 of file pageres.h.

Definition at line 424 of file pageres.h.

Definition at line 425 of file pageres.h.

Definition at line 427 of file pageres.h.

Definition at line 426 of file pageres.h.

Definition at line 429 of file pageres.h.

Definition at line 428 of file pageres.h.

Definition at line 421 of file pageres.h.

Definition at line 451 of file pageres.h.

Definition at line 360 of file pageres.h.

Definition at line 381 of file pageres.h.

Definition at line 408 of file pageres.h.

Definition at line 452 of file pageres.h.

Definition at line 358 of file pageres.h.

Definition at line 420 of file pageres.h.

Definition at line 417 of file pageres.h.

Definition at line 409 of file pageres.h.

Definition at line 418 of file pageres.h.

Definition at line 403 of file pageres.h.

Definition at line 348 of file pageres.h.

Definition at line 430 of file pageres.h.

Definition at line 334 of file pageres.h.

Definition at line 431 of file pageres.h.


The documentation for this class was generated from the following files: