Tesseract  3.02
WERD Class Reference

#include <werd.h>

Inheritance diagram for WERD:

List of all members.

Public Member Functions

 WERD ()
 WERD (C_BLOB_LIST *blob_list, uinT8 blanks, const char *text)
 WERD (C_BLOB_LIST *blob_list, WERD *clone)
WERDConstructFromSingleBlob (bool bol, bool eol, C_BLOB *blob)
 ~WERD ()
WERDoperator= (const WERD &source)
WERDConstructWerdWithNewBlobs (C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
C_BLOB_LIST * rej_cblob_list ()
C_BLOB_LIST * cblob_list ()
uinT8 space ()
void set_blanks (uinT8 new_blanks)
int script_id () const
void set_script_id (int id)
TBOX bounding_box ()
const char * text () const
void set_text (const char *new_text)
BOOL8 flag (WERD_FLAGS mask) const
void set_flag (WERD_FLAGS mask, BOOL8 value)
BOOL8 display_flag (uinT8 flag) const
void set_display_flag (uinT8 flag, BOOL8 value)
WERDshallow_copy ()
void move (const ICOORD vec)
void join_on (WERD *other)
void copy_on (WERD *other)
void print ()
void plot (ScrollView *window, ScrollView::Color colour)
void plot (ScrollView *window)
void plot_rej_blobs (ScrollView *window)

Static Public Member Functions

static ScrollView::Color NextColor (ScrollView::Color colour)

Detailed Description

Definition at line 65 of file werd.h.

Constructor & Destructor Documentation

WERD::WERD ( ) [inline]

Definition at line 67 of file werd.h.

WERD::WERD ( C_BLOB_LIST *  blob_list,
uinT8  blank_count,
const char *  text 


Constructor to build a WERD from a list of C_BLOBs. blob_list The C_BLOBs (in word order) are not copied; we take its elements and put them in our lists. blank_count blanks in front of the word text correct text, outlives this WERD

Definition at line 44 of file werd.cpp.

  : blanks(blank_count),
    correct(text) {
  C_BLOB_IT start_it = blob_list;
  C_BLOB_IT end_it = blob_list;
  C_BLOB_IT rej_cblob_it = &rej_cblobs;
  C_OUTLINE_IT c_outline_it;
  inT16 inverted_vote = 0;
  inT16 non_inverted_vote = 0;

  // Move blob_list's elements into cblobs.
  while (!end_it.at_last())
  cblobs.assign_to_sublist(&start_it, &end_it);

    Set white on black flag for the WERD, moving any duff blobs onto the
    rej_cblobs list.
    First, walk the cblobs checking the inverse flag for each outline of each
    cblob. If a cblob has inconsistent flag settings for its different
    outlines, move the blob to the reject list. Otherwise, increment the
    appropriate w-on-b or b-on-w vote for the word.

    Now set the inversion flag for the WERD by maximum vote.

    Walk the blobs again, moving any blob whose inversion flag does not agree
    with the concencus onto the reject list.
  if (start_it.empty())
  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
    BOOL8 reject_blob = FALSE;
    BOOL8 blob_inverted;

    blob_inverted = c_outline_it.data()->flag(COUT_INVERSE);
    for (c_outline_it.mark_cycle_pt();
         !c_outline_it.cycled_list() && !reject_blob;
         c_outline_it.forward()) {
      reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted;
    if (reject_blob) {
    } else {
      if (blob_inverted)

  flags.set_bit(W_INVERSE, (inverted_vote > non_inverted_vote));

  if (start_it.empty())
  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
    if (c_outline_it.data()->flag(COUT_INVERSE) != flags.bit(W_INVERSE))
WERD::WERD ( C_BLOB_LIST *  blob_list,
WERD clone 


Constructor to build a WERD from a list of C_BLOBs. The C_BLOBs are not copied so the source list is emptied.

Definition at line 118 of file werd.cpp.

  : flags(clone->flags),
    correct(clone->correct) {
  C_BLOB_IT start_it = blob_list;  // iterator
  C_BLOB_IT end_it = blob_list;    // another

  while (!end_it.at_last ())
    end_it.forward ();           //move to last
  ((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
  //move to our list
  blanks = clone->blanks;
  //      fprintf(stderr,"Wrong constructor!!!!\n");
WERD::~WERD ( ) [inline]

Definition at line 83 of file werd.h.


Member Function Documentation

TBOX WERD::bounding_box ( )


Return the bounding box of the WERD. This is quite a mess to compute! ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the words on the row were re-sorted. The original words were built with reject blobs included. The FUZZY SPACE flags were set accordingly. If ALL the blobs in a word are rejected the BB for the word is NULL, causing the sort to screw up, leading to the erroneous possibility of the first word in a row being marked as FUZZY space.

Definition at line 159 of file werd.cpp.

  TBOX box;                       // box being built
  C_BLOB_IT rej_cblob_it = &rej_cblobs;  // rejected blobs

  for (rej_cblob_it.mark_cycle_pt(); !rej_cblob_it.cycled_list();
       rej_cblob_it.forward()) {
    box += rej_cblob_it.data()->bounding_box();

  C_BLOB_IT it = &cblobs;    // blobs of WERD
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    box += it.data()->bounding_box();
  return box;
C_BLOB_LIST* WERD::cblob_list ( ) [inline]

Definition at line 105 of file werd.h.

                              {  // get compact blobs
      return &cblobs;
WERD * WERD::ConstructFromSingleBlob ( bool  bol,
bool  eol,
C_BLOB blob 

Definition at line 136 of file werd.cpp.

  C_BLOB_LIST temp_blobs;
  C_BLOB_IT temp_it(&temp_blobs);
  WERD* blob_word = new WERD(&temp_blobs, this);
  blob_word->set_flag(W_BOL, bol);
  blob_word->set_flag(W_EOL, eol);
  return blob_word;
WERD * WERD::ConstructWerdWithNewBlobs ( C_BLOB_LIST *  all_blobs,
C_BLOB_LIST *  orphan_blobs 


This method returns a new werd constructed using the blobs in the input all_blobs list, which correspond to the blobs in this werd object. The blobs used to construct the new word are consumed and removed from the input all_blobs list. Returns NULL if the word couldn't be constructed. Returns original blobs for which no matches were found in the output list orphan_blobs (appends).

Definition at line 399 of file werd.cpp.

  C_BLOB_LIST current_blob_list;
  C_BLOB_IT werd_blobs_it(&current_blob_list);
  // Add the word's c_blobs.

  // New blob list. These contain the blobs which will form the new word.
  C_BLOB_LIST new_werd_blobs;
  C_BLOB_IT new_blobs_it(&new_werd_blobs);

  // not_found_blobs contains the list of current word's blobs for which a
  // corresponding blob wasn't found in the input all_blobs list.
  C_BLOB_LIST not_found_blobs;
  C_BLOB_IT not_found_it(&not_found_blobs);

  for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list();
       werd_blobs_it.forward()) {
    C_BLOB* werd_blob = werd_blobs_it.extract();
    TBOX werd_blob_box = werd_blob->bounding_box();
    bool found = false;
    // Now find the corresponding blob for this blob in the all_blobs
    // list. For now, follow the inefficient method of pairwise
    // comparisons. Ideally, one can pre-bucket the blobs by row.
    C_BLOB_IT all_blobs_it(all_blobs);
    for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list();
         all_blobs_it.forward()) {
      C_BLOB* a_blob = all_blobs_it.data();
      // Compute the overlap of the two blobs. If major, a_blob should
      // be added to the new blobs list.
      TBOX a_blob_box = a_blob->bounding_box();
      if (a_blob_box.null_box()) {
        tprintf("Bounding box couldn't be ascertained\n");
      if (werd_blob_box.contains(a_blob_box) ||
          werd_blob_box.major_overlap(a_blob_box)) {
        // Old blobs are from minimal splits, therefore are expected to be
        // bigger. The new small blobs should cover a significant portion.
        // This is it.
        found = true;
    if (!found) {
    } else {
      delete werd_blob;
  // Iterate over all not found blobs. Some of them may be due to
  // under-segmentation (which is OK, since the corresponding blob is already
  // in the list in that case.
  for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list();
       not_found_it.forward()) {
    C_BLOB* not_found = not_found_it.data();
    TBOX not_found_box = not_found->bounding_box();
    C_BLOB_IT existing_blobs_it(new_blobs_it);
    for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list();
         existing_blobs_it.forward()) {
      C_BLOB* a_blob = existing_blobs_it.data();
      TBOX a_blob_box = a_blob->bounding_box();
      if ((not_found_box.major_overlap(a_blob_box) ||
           a_blob_box.major_overlap(not_found_box)) &&
           not_found_box.y_overlap(a_blob_box)) {
        // Already taken care of.
        delete not_found_it.extract();
  if (orphan_blobs) {
    C_BLOB_IT orphan_blobs_it(orphan_blobs);

  // New blobs are ready. Create a new werd object with these.
  WERD* new_werd = NULL;
  if (!new_werd_blobs.empty()) {
    new_werd = new WERD(&new_werd_blobs, this);
  } else {
    // Add the blobs back to this word so that it can be reused.
    C_BLOB_IT this_list_it(cblob_list());
  return new_werd;
void WERD::copy_on ( WERD other)


Copy blobs from other word onto this one.

Definition at line 219 of file werd.cpp.

  bool reversed = other->bounding_box().left() < bounding_box().left();
  C_BLOB_IT c_blob_it(&cblobs);
  C_BLOB_LIST c_blobs;

  c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
  if (reversed) {
  } else {
  if (!other->rej_cblobs.empty()) {
    C_BLOB_IT rej_c_blob_it(&rej_cblobs);
    C_BLOB_LIST new_rej_c_blobs;

    new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
    if (reversed) {
    } else {
BOOL8 WERD::display_flag ( uinT8  flag) const [inline]

Definition at line 130 of file werd.h.

{ return disp_flags.bit(flag); }
BOOL8 WERD::flag ( WERD_FLAGS  mask) const [inline]

Definition at line 127 of file werd.h.

{ return flags.bit(mask); }
void WERD::join_on ( WERD other)


Join other word onto this one. Delete the old word.

Definition at line 196 of file werd.cpp.

  C_BLOB_IT blob_it(&cblobs);
  C_BLOB_IT src_it(&other->cblobs);
  C_BLOB_IT rej_cblob_it(&rej_cblobs);
  C_BLOB_IT src_rej_it(&other->rej_cblobs);

  while (!src_it.empty()) {
  while (!src_rej_it.empty()) {
void WERD::move ( const ICOORD  vec)



Definition at line 183 of file werd.cpp.

  C_BLOB_IT cblob_it(&cblobs);  // cblob iterator

  for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward())
ScrollView::Color WERD::NextColor ( ScrollView::Color  colour) [static]

Definition at line 292 of file werd.cpp.

  ScrollView::Color next = static_cast<ScrollView::Color>(colour + 1);
  if (next >= LAST_COLOUR || next < FIRST_COLOUR)
    next = FIRST_COLOUR;
  return next;
WERD & WERD::operator= ( const WERD source)


Assign a word, DEEP copying the blob list

Definition at line 356 of file werd.cpp.

  this->ELIST2_LINK::operator= (source);
  blanks = source.blanks;
  flags = source.flags;
  script_id_ = source.script_id_;
  dummy = source.dummy;
  correct = source.correct;
  if (!cblobs.empty())
  cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);

  if (!rej_cblobs.empty())
  rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
  return *this;
void WERD::plot ( ScrollView window,
ScrollView::Color  colour 


Draw the WERD in the given colour.

Definition at line 282 of file werd.cpp.

  C_BLOB_IT it = &cblobs;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    it.data()->plot(window, colour, colour);
void WERD::plot ( ScrollView window)


Draw the WERD in rainbow colours in window.

Definition at line 306 of file werd.cpp.

  ScrollView::Color colour = FIRST_COLOUR;
  C_BLOB_IT it = &cblobs;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    it.data()->plot(window, colour, CHILD_COLOUR);
    colour = NextColor(colour);
void WERD::plot_rej_blobs ( ScrollView window)


Draw the WERD rejected blobs in window - ALWAYS GREY

Definition at line 324 of file werd.cpp.

  C_BLOB_IT it = &rej_cblobs;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    it.data()->plot(window, ScrollView::GREY, ScrollView::GREY);
void WERD::print ( )


Display members

Definition at line 251 of file werd.cpp.

  tprintf("Blanks= %d\n", blanks);
  tprintf("Flags = %d = 0%o\n", flags.val, flags.val);
  tprintf("   W_SEGMENTED = %s\n", flags.bit(W_SEGMENTED) ? "TRUE" : "FALSE ");
  tprintf("   W_ITALIC = %s\n", flags.bit(W_ITALIC) ? "TRUE" : "FALSE ");
  tprintf("   W_BOL = %s\n", flags.bit(W_BOL) ? "TRUE" : "FALSE ");
  tprintf("   W_EOL = %s\n", flags.bit(W_EOL) ? "TRUE" : "FALSE ");
  tprintf("   W_NORMALIZED = %s\n",
          flags.bit(W_NORMALIZED) ? "TRUE" : "FALSE ");
  tprintf("   W_SCRIPT_HAS_XHEIGHT = %s\n",
          flags.bit(W_SCRIPT_HAS_XHEIGHT) ? "TRUE" : "FALSE ");
  tprintf("   W_SCRIPT_IS_LATIN = %s\n",
          flags.bit(W_SCRIPT_IS_LATIN) ? "TRUE" : "FALSE ");
  tprintf("   W_DONT_CHOP = %s\n", flags.bit(W_DONT_CHOP) ? "TRUE" : "FALSE ");
  tprintf("   W_REP_CHAR = %s\n", flags.bit(W_REP_CHAR) ? "TRUE" : "FALSE ");
  tprintf("   W_FUZZY_SP = %s\n", flags.bit(W_FUZZY_SP) ? "TRUE" : "FALSE ");
  tprintf("   W_FUZZY_NON = %s\n", flags.bit(W_FUZZY_NON) ? "TRUE" : "FALSE ");
  tprintf("Correct= %s\n", correct.string());
  tprintf("Rejected cblob count = %d\n", rej_cblobs.length());
  tprintf("Script = %d\n", script_id_);
C_BLOB_LIST* WERD::rej_cblob_list ( ) [inline]

Definition at line 100 of file werd.h.

                                  {  // compact format
      return &rej_cblobs;
int WERD::script_id ( ) const [inline]

Definition at line 115 of file werd.h.

      return script_id_;
void WERD::set_blanks ( uinT8  new_blanks) [inline]

Definition at line 112 of file werd.h.

      blanks = new_blanks;
void WERD::set_display_flag ( uinT8  flag,
BOOL8  value 
) [inline]

Definition at line 131 of file werd.h.

      disp_flags.set_bit(flag, value);
void WERD::set_flag ( WERD_FLAGS  mask,
BOOL8  value 
) [inline]

Definition at line 128 of file werd.h.

{ flags.set_bit(mask, value); }
void WERD::set_script_id ( int  id) [inline]

Definition at line 118 of file werd.h.

      script_id_ = id;
void WERD::set_text ( const char *  new_text) [inline]

Definition at line 125 of file werd.h.

{ correct = new_text; }
WERD * WERD::shallow_copy ( )


Make a shallow copy of a word

Definition at line 339 of file werd.cpp.

  WERD *new_word = new WERD;

  new_word->blanks = blanks;
  new_word->flags = flags;
  new_word->dummy = dummy;
  new_word->correct = correct;
  return new_word;
uinT8 WERD::space ( ) [inline]

Definition at line 109 of file werd.h.

                  {  // access function
      return blanks;
const char* WERD::text ( ) const [inline]

Definition at line 124 of file werd.h.

{ return correct.string(); }

The documentation for this class was generated from the following files: