Tesseract  3.02
WERD_CHOICE Class Reference

#include <ratngs.h>

List of all members.

Public Member Functions

 WERD_CHOICE (const UNICHARSET *unicharset)
 WERD_CHOICE (const UNICHARSET *unicharset, int reserved)
 WERD_CHOICE (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET &unicharset)
 WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset)
 WERD_CHOICE (const WERD_CHOICE &word)
 ~WERD_CHOICE ()
const UNICHARSETunicharset () const
int length () const
const UNICHAR_IDunichar_ids () const
const UNICHAR_ID unichar_id (int index) const
const char * fragment_lengths () const
const char fragment_length (int index) const
float rating () const
float certainty () const
uinT8 permuter () const
const char * permuter_name () const
bool fragment_mark () const
BLOB_CHOICE_LIST_CLIST * blob_choices ()
void set_unichar_id (UNICHAR_ID unichar_id, int index)
void set_fragment_length (char flen, int index)
void set_rating (float new_val)
void set_certainty (float new_val)
void set_permuter (uinT8 perm)
void set_fragment_mark (bool new_fragment_mark)
void set_length (int len)
void set_blob_choices (BLOB_CHOICE_LIST_CLIST *blob_choices)
void double_the_size ()
 Make more space in unichar_id_ and fragment_lengths_ arrays.
void init (int reserved)
void init (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter)
void make_bad ()
 Set the fields in this choice to be default (bad) values.
void append_unichar_id_space_allocated (UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty)
void append_unichar_id (UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty)
void set_unichar_id (UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty, int index)
bool contains_unichar_id (UNICHAR_ID unichar_id) const
void remove_unichar_ids (int index, int num)
void remove_last_unichar_id ()
void remove_unichar_id (int index)
bool has_rtl_unichar_id () const
void reverse_and_mirror_unichar_ids ()
void punct_stripped (int *start_core, int *end_core) const
WERD_CHOICE shallow_copy (int start, int end) const
void string_and_lengths (STRING *word_str, STRING *word_lengths_str) const
const STRING debug_string () const
bool set_unichars_in_script_order (bool in_script_order)
bool unichars_in_script_order () const
const STRINGunichar_string () const
const STRINGunichar_lengths () const
const void print () const
const void print (const char *msg) const
WERD_CHOICEoperator+= (const WERD_CHOICE &second)
WERD_CHOICEoperator= (const WERD_CHOICE &source)

Static Public Attributes

static const float kBadRating = 100000.0

Detailed Description

Definition at line 177 of file ratngs.h.


Constructor & Destructor Documentation

WERD_CHOICE::WERD_CHOICE ( const UNICHARSET unicharset) [inline]

Definition at line 181 of file ratngs.h.

    : unicharset_(unicharset) { this->init(8); }
WERD_CHOICE::WERD_CHOICE ( const UNICHARSET unicharset,
int  reserved 
) [inline]

Definition at line 183 of file ratngs.h.

    : unicharset_(unicharset) { this->init(reserved); }
WERD_CHOICE::WERD_CHOICE ( const char *  src_string,
const char *  src_lengths,
float  src_rating,
float  src_certainty,
uinT8  src_permuter,
const UNICHARSET unicharset 
) [inline]

Definition at line 185 of file ratngs.h.

    : unicharset_(&unicharset) {
    this->init(src_string, src_lengths, src_rating,
               src_certainty, src_permuter);
  }
WERD_CHOICE::WERD_CHOICE ( const char *  src_string,
const UNICHARSET unicharset 
)

WERD_CHOICE::WERD_CHOICE

Constructor to build a WERD_CHOICE from the given string. The function assumes that src_string is not NULL.

Definition at line 112 of file ratngs.cpp.

    : unicharset_(&unicharset){
  STRING src_lengths;
  const char *ptr = src_string;
  const char *end = src_string + strlen(src_string);
  int step = unicharset.step(ptr);
  for (; ptr < end && step > 0;
       step = unicharset.step(ptr), src_lengths += step, ptr += step);
  if (step != 0 && ptr == end) {
    this->init(src_string, src_lengths.string(),
               0.0, 0.0, NO_PERM);
  } else {  // there must have been an invalid unichar in the string
    this->init(8);
    this->make_bad();
  }
}
WERD_CHOICE::WERD_CHOICE ( const WERD_CHOICE word) [inline]

Definition at line 196 of file ratngs.h.

                                       : unicharset_(word.unicharset_) {
    this->init(word.length());
    this->operator=(word);
  }
WERD_CHOICE::~WERD_CHOICE ( )

WERD_CHOICE::~WERD_CHOICE

Definition at line 168 of file ratngs.cpp.

                          {
  delete[] unichar_ids_;
  delete[] fragment_lengths_;
  delete_blob_choices();
}

Member Function Documentation

void WERD_CHOICE::append_unichar_id ( UNICHAR_ID  unichar_id,
char  fragment_length,
float  rating,
float  certainty 
)

append_unichar_id

Make sure there is enough space in the word for the new unichar id and call append_unichar_id_space_allocated().

Definition at line 313 of file ratngs.cpp.

                                   {
  if (length_ == reserved_) {
    this->double_the_size();
  }
  this->append_unichar_id_space_allocated(unichar_id, fragment_length,
                                          rating, certainty);
}
void WERD_CHOICE::append_unichar_id_space_allocated ( UNICHAR_ID  unichar_id,
char  fragment_length,
float  rating,
float  certainty 
) [inline]

This function assumes that there is enough space reserved in the WERD_CHOICE for adding another unichar. This is an efficient alternative to append_unichar_id().

Definition at line 322 of file ratngs.h.

                                     {
    assert(reserved_ > length_);
    length_++;
    this->set_unichar_id(unichar_id, fragment_length,
                         rating, certainty, length_-1);
  }
BLOB_CHOICE_LIST_CLIST* WERD_CHOICE::blob_choices ( ) [inline]

Definition at line 235 of file ratngs.h.

                                                {
    return blob_choices_;
  }
float WERD_CHOICE::certainty ( ) const [inline]

Definition at line 225 of file ratngs.h.

                                 {
    return certainty_;
  }
bool WERD_CHOICE::contains_unichar_id ( UNICHAR_ID  unichar_id) const

contains_unichar_id

Returns true if unichar_ids_ contain the given unichar_id, false otherwise.

Definition at line 197 of file ratngs.cpp.

                                                                 {
  for (int i = 0; i < length_; ++i) {
    if (unichar_ids_[i] == unichar_id) {
      return true;
    }
  }
  return false;
}
const STRING WERD_CHOICE::debug_string ( ) const [inline]

Definition at line 364 of file ratngs.h.

                                    {
    STRING word_str;
    for (int i = 0; i < length_; ++i) {
      word_str += unicharset_->debug_str(unichar_ids_[i]);
      word_str += " ";
    }
    return word_str;
  }
void WERD_CHOICE::double_the_size ( ) [inline]

Make more space in unichar_id_ and fragment_lengths_ arrays.

Definition at line 268 of file ratngs.h.

                                {
    if (reserved_ > 0) {
      unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
          reserved_, unichar_ids_);
      fragment_lengths_ = GenericVector<char>::double_the_size_memcpy(
          reserved_, fragment_lengths_);
      reserved_ *= 2;
    } else {
      unichar_ids_ = new UNICHAR_ID[1];
      fragment_lengths_ = new char[1];
      reserved_ = 1;
    }
  }
const char WERD_CHOICE::fragment_length ( int  index) const [inline]

Definition at line 218 of file ratngs.h.

                                                     {
    assert(index < length_);
    return fragment_lengths_[index];
  }
const char* WERD_CHOICE::fragment_lengths ( ) const [inline]

Definition at line 215 of file ratngs.h.

                                              {
    return fragment_lengths_;
  }
bool WERD_CHOICE::fragment_mark ( ) const [inline]

Definition at line 232 of file ratngs.h.

                                    {
    return fragment_mark_;
  }
bool WERD_CHOICE::has_rtl_unichar_id ( ) const

has_rtl_unichar_id

Returns true if unichar_ids contain at least one "strongly" RTL unichar.

Definition at line 276 of file ratngs.cpp.

                                           {
  int i;
  for (i = 0; i < length_; ++i) {
    UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]);
    if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
        dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
      return true;
    }
  }
  return false;
}
void WERD_CHOICE::init ( int  reserved) [inline]

Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and fragment_length_ arrays. Sets other values to default (blank) values.

Definition at line 284 of file ratngs.h.

                                 {
    reserved_ = reserved;
    if (reserved > 0) {
      unichar_ids_ = new UNICHAR_ID[reserved];
      fragment_lengths_ = new char[reserved];
    } else {
      unichar_ids_ = NULL;
      fragment_lengths_ = NULL;
    }
    length_ = 0;
    rating_ = 0.0;
    certainty_ = MAX_FLOAT32;
    permuter_ = NO_PERM;
    fragment_mark_ = false;
    blob_choices_ = NULL;
    unichars_in_script_order_ = false;  // Tesseract is strict left-to-right.
  }
void WERD_CHOICE::init ( const char *  src_string,
const char *  src_lengths,
float  src_rating,
float  src_certainty,
uinT8  src_permuter 
)

Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter. The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.

WERD_CHOICE::init

Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter.

The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.

Definition at line 140 of file ratngs.cpp.

                                           {
  int src_string_len = strlen(src_string);
  if (src_string_len == 0) {
    this->init(8);
  } else {
    this->init(src_lengths ? strlen(src_lengths): src_string_len);
    length_ = reserved_;
    int offset = 0;
    for (int i = 0; i < length_; ++i) {
      int unichar_length = src_lengths ? src_lengths[i] : 1;
      unichar_ids_[i] =
          unicharset_->unichar_to_id(src_string+offset, unichar_length);
      fragment_lengths_[i] = 1;
      offset += unichar_length;
    }
  }
  rating_ = src_rating;
  certainty_ = src_certainty;
  permuter_ = src_permuter;
}
int WERD_CHOICE::length ( ) const [inline]

Definition at line 205 of file ratngs.h.

                            {
    return length_;
  }
void WERD_CHOICE::make_bad ( ) [inline]

Set the fields in this choice to be default (bad) values.

Definition at line 312 of file ratngs.h.

                         {
    length_ = 0;
    rating_ = kBadRating;
    certainty_ = -MAX_FLOAT32;
    fragment_mark_ = false;
  }
WERD_CHOICE & WERD_CHOICE::operator+= ( const WERD_CHOICE second)

WERD_CHOICE::operator+=

Cat a second word rating on the end of this current one. The ratings are added and the confidence is the min. If the permuters are NOT the same the permuter is set to COMPOUND_PERM

Definition at line 330 of file ratngs.cpp.

                                                                 {
  // TODO(daria): find out why the choice was cleared this way if any
  // of the pieces are empty. Add the description of this behavior
  // to the comments.
  // if (word_string.length () == 0 || second.word_string.length () == 0) {
  //   word_string = NULL;          //make it empty
  //   word_lengths = NULL;
  //   delete_blob_choices();
  // } else {
  ASSERT_HOST(unicharset_ == second.unicharset_);
  while (reserved_ < length_ + second.length()) {
    this->double_the_size();
  }
  const UNICHAR_ID *other_unichar_ids = second.unichar_ids();
  const char *other_fragment_lengths = second.fragment_lengths();
  for (int i = 0; i < second.length(); ++i) {
    unichar_ids_[length_ + i] = other_unichar_ids[i];
    fragment_lengths_[length_ + i] = other_fragment_lengths[i];
  }
  length_ += second.length();
  rating_ += second.rating();  // add ratings
  if (second.certainty() < certainty_) // take min
    certainty_ = second.certainty();
  if (permuter_ == NO_PERM) {
    permuter_ = second.permuter();
  } else if (second.permuter() != NO_PERM &&
             second.permuter() != permuter_) {
    permuter_ = COMPOUND_PERM;
  }

  // Append a deep copy of second blob_choices if it exists.
  if (second.blob_choices_ != NULL) {
    if (this->blob_choices_ == NULL)
      this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST;

    BLOB_CHOICE_LIST_C_IT this_blob_choices_it;
    BLOB_CHOICE_LIST_C_IT second_blob_choices_it;

    this_blob_choices_it.set_to_list(this->blob_choices_);
    this_blob_choices_it.move_to_last();

    second_blob_choices_it.set_to_list(second.blob_choices_);

    for (second_blob_choices_it.mark_cycle_pt();
         !second_blob_choices_it.cycled_list();
         second_blob_choices_it.forward()) {

      BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST();
      blob_choices_copy->deep_copy(second_blob_choices_it.data(),
                                   &BLOB_CHOICE::deep_copy);

      this_blob_choices_it.add_after_then_move(blob_choices_copy);
    }
  }
  return *this;
}
WERD_CHOICE & WERD_CHOICE::operator= ( const WERD_CHOICE source)

WERD_CHOICE::operator=

Allocate enough memory to hold a copy of source and copy over all the information from source to this WERD_CHOICE.

Definition at line 394 of file ratngs.cpp.

                                                             {
  while (reserved_ < source.length()) {
    this->double_the_size();
  }

  unicharset_ = source.unicharset_;
  const UNICHAR_ID *other_unichar_ids = source.unichar_ids();
  const char *other_fragment_lengths = source.fragment_lengths();
  for (int i = 0; i < source.length(); ++i) {
    unichar_ids_[i] = other_unichar_ids[i];
    fragment_lengths_[i] = other_fragment_lengths[i];
  }
  length_ = source.length();
  rating_ = source.rating();
  certainty_ = source.certainty();
  permuter_ = source.permuter();
  fragment_mark_ = source.fragment_mark();

  // Delete existing blob_choices
  this->delete_blob_choices();

  // Deep copy blob_choices of source
  if (source.blob_choices_ != NULL) {
    BLOB_CHOICE_LIST_C_IT this_blob_choices_it;
    BLOB_CHOICE_LIST_C_IT source_blob_choices_it;

    this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST();

    this_blob_choices_it.set_to_list(this->blob_choices_);
    source_blob_choices_it.set_to_list(source.blob_choices_);

    for (source_blob_choices_it.mark_cycle_pt();
         !source_blob_choices_it.cycled_list();
         source_blob_choices_it.forward()) {

      BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST();
      blob_choices_copy->deep_copy(source_blob_choices_it.data(),
                                   &BLOB_CHOICE::deep_copy);

      this_blob_choices_it.add_after_then_move(blob_choices_copy);
    }
  }
  return *this;
}
uinT8 WERD_CHOICE::permuter ( ) const [inline]

Definition at line 228 of file ratngs.h.

                                {
    return permuter_;
  }
const char * WERD_CHOICE::permuter_name ( ) const

Definition at line 174 of file ratngs.cpp.

                                             {
  return kPermuterTypeNames[permuter_];
}
const void WERD_CHOICE::print ( ) const [inline]

Definition at line 397 of file ratngs.h.

{ this->print(""); }
const void WERD_CHOICE::print ( const char *  msg) const

WERD_CHOICE::print

Print WERD_CHOICE to stdout.

Definition at line 457 of file ratngs.cpp.

                                                   {
  tprintf("%s WERD_CHOICE:\n", msg);
  tprintf("length_ %d reserved_ %d permuter_ %d\n",
         length_, reserved_, permuter_);
  tprintf("rating_ %.4f certainty_ %.4f", rating_, certainty_);
  if (fragment_mark_) {
    tprintf(" fragment_mark_ true");
  }
  tprintf("\n");
  if (unichar_string_.length() > 0) {
    tprintf("unichar_string_ %s unichar_lengths_ %s\n",
            unichar_string_.string(), unichar_lengths_.string());
  }
  tprintf("unichar_ids: ");
  int i;
  for (i = 0; i < length_; ++i) {
    tprintf("%d ", unichar_ids_[i]);
  }
  tprintf("\nfragment_lengths_: ");
  for (i = 0; i < length_; ++i) {
    tprintf("%d ", fragment_lengths_[i]);
  }
  tprintf("\n");
  fflush(stdout);
}
void WERD_CHOICE::punct_stripped ( int *  start,
int *  end 
) const

punct_stripped

Returns the half-open interval of unichar_id indices [start, end) which enclose the core portion of this word -- the part after stripping punctuation from the left and right.

Definition at line 245 of file ratngs.cpp.

                                                           {
  *start = 0;
  *end = length() - 1;
  while (*start < length() &&
         unicharset()->get_ispunctuation(unichar_id(*start))) {
    (*start)++;
  }
  while (*end > -1 &&
         unicharset()->get_ispunctuation(unichar_id(*end))) {
    (*end)--;
  }
  (*end)++;
}
float WERD_CHOICE::rating ( ) const [inline]

Definition at line 222 of file ratngs.h.

                              {
    return rating_;
  }
void WERD_CHOICE::remove_last_unichar_id ( ) [inline]

Definition at line 347 of file ratngs.h.

{ --length_; }
void WERD_CHOICE::remove_unichar_id ( int  index) [inline]

Definition at line 348 of file ratngs.h.

                                           {
    this->remove_unichar_ids(index, 1);
  }
void WERD_CHOICE::remove_unichar_ids ( int  start,
int  num 
)

remove_unichar_ids

Removes num unichar ids starting from index start from unichar_ids_ and updates length_ and fragment_lengths_ to reflect this change. Note: this function does not modify rating_ and certainty_.

Definition at line 213 of file ratngs.cpp.

                                                       {
  ASSERT_HOST(start >= 0 && start + num <= length_);
  for (int i = start; i+num < length_; ++i) {
    unichar_ids_[i] = unichar_ids_[i+num];
    fragment_lengths_[i] = fragment_lengths_[i+num];
  }
  length_ -= num;
}
void WERD_CHOICE::reverse_and_mirror_unichar_ids ( )

reverse_and_mirror_unichar_ids

Reverses and mirrors unichars in unichar_ids.

Definition at line 227 of file ratngs.cpp.

                                                 {
  for (int i = 0; i < length_/2; ++i) {
    UNICHAR_ID tmp_id = unichar_ids_[i];
    unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]);
    unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id);
  }
  if (length_ % 2 != 0) {
    unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]);
  }
}
void WERD_CHOICE::set_blob_choices ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

WERD_CHOICE::set_blob_choices

Delete current blob_choices. Set the blob_choices to the given new list.

Definition at line 184 of file ratngs.cpp.

                                                                       {
  if (blob_choices_ != blob_choices) {
    delete_blob_choices();
    blob_choices_ = blob_choices;
  }
}
void WERD_CHOICE::set_certainty ( float  new_val) [inline]

Definition at line 249 of file ratngs.h.

                                           {
    certainty_ = new_val;
  }
void WERD_CHOICE::set_fragment_length ( char  flen,
int  index 
) [inline]

Definition at line 242 of file ratngs.h.

                                                        {
    assert(index < length_);
    fragment_lengths_[index] = flen;
  }
void WERD_CHOICE::set_fragment_mark ( bool  new_fragment_mark) [inline]

Definition at line 255 of file ratngs.h.

                                                        {
    fragment_mark_ = new_fragment_mark;
  }
void WERD_CHOICE::set_length ( int  len) [inline]

Definition at line 261 of file ratngs.h.

                                  {
    ASSERT_HOST(reserved_ >= len);
    length_ = len;
  }
void WERD_CHOICE::set_permuter ( uinT8  perm) [inline]

Definition at line 252 of file ratngs.h.

                                       {
    permuter_ = perm;
  }
void WERD_CHOICE::set_rating ( float  new_val) [inline]

Definition at line 246 of file ratngs.h.

                                        {
    rating_ = new_val;
  }
void WERD_CHOICE::set_unichar_id ( UNICHAR_ID  unichar_id,
int  index 
) [inline]

Definition at line 238 of file ratngs.h.

                                                               {
    assert(index < length_);
    unichar_ids_[index] = unichar_id;
  }
void WERD_CHOICE::set_unichar_id ( UNICHAR_ID  unichar_id,
char  fragment_length,
float  rating,
float  certainty,
int  index 
) [inline]

Definition at line 334 of file ratngs.h.

                                                                       {
    assert(index < length_);
    unichar_ids_[index] = unichar_id;
    fragment_lengths_[index] = fragment_length;
    rating_ += rating;
    if (certainty < certainty_) {
      certainty_ = certainty;
    }
  }
bool WERD_CHOICE::set_unichars_in_script_order ( bool  in_script_order) [inline]

Definition at line 376 of file ratngs.h.

                                                          {
    return unichars_in_script_order_ = in_script_order;
  }
WERD_CHOICE WERD_CHOICE::shallow_copy ( int  start,
int  end 
) const

Definition at line 259 of file ratngs.cpp.

                                                              {
  ASSERT_HOST(start >= 0 && start <= length_);
  ASSERT_HOST(end >= 0 && end <= length_);
  if (end < start) { end = start; }
  WERD_CHOICE retval(unicharset_, end - start);
  for (int i = start; i < end; i++) {
    retval.append_unichar_id_space_allocated(
        unichar_ids_[i], fragment_lengths_[i], 0.0f, 0.0f);
  }
  return retval;
}
void WERD_CHOICE::string_and_lengths ( STRING word_str,
STRING word_lengths_str 
) const

string_and_lengths

Populates the given word_str with unichars from unichar_ids and and word_lengths_str with the corresponding unichar lengths.

Definition at line 294 of file ratngs.cpp.

                                                                     {
  *word_str = "";
  if (word_lengths_str != NULL) *word_lengths_str = "";
  for (int i = 0; i < length_; ++i) {
    const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
    *word_str += ch;
    if (word_lengths_str != NULL) {
      *word_lengths_str += strlen(ch);
    }
  }
}
const UNICHAR_ID WERD_CHOICE::unichar_id ( int  index) const [inline]

Definition at line 211 of file ratngs.h.

                                                      {
    assert(index < length_);
    return unichar_ids_[index];
  }
const UNICHAR_ID* WERD_CHOICE::unichar_ids ( ) const [inline]

Definition at line 208 of file ratngs.h.

                                               {
    return unichar_ids_;
  }
const STRING& WERD_CHOICE::unichar_lengths ( ) const [inline]

Definition at line 393 of file ratngs.h.

                                        {
    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
    return unichar_lengths_;
  }
const STRING& WERD_CHOICE::unichar_string ( ) const [inline]

Definition at line 386 of file ratngs.h.

                                       {
    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
    return unichar_string_;
  }
bool WERD_CHOICE::unichars_in_script_order ( ) const [inline]

Definition at line 380 of file ratngs.h.

                                        {
    return unichars_in_script_order_;
  }
const UNICHARSET* WERD_CHOICE::unicharset ( ) const [inline]

Definition at line 202 of file ratngs.h.

                                       {
    return unicharset_;
  }

Member Data Documentation

const float WERD_CHOICE::kBadRating = 100000.0 [static]

Definition at line 179 of file ratngs.h.


The documentation for this class was generated from the following files: