Tesseract  3.02
tesseract-ocr/ccstruct/ocrblock.cpp File Reference
#include "mfcpch.h"
#include <stdlib.h>
#include "blckerr.h"
#include "ocrblock.h"
#include "stepblob.h"
#include "tprintf.h"

Go to the source code of this file.

Defines

#define BLOCK_LABEL_HEIGHT   150
#define ROW_SPACING   5

Functions

int decreasing_top_order (const void *row1, const void *row2)
bool LeftMargin (ICOORDELT_LIST *segments, int x, int *margin)
bool RightMargin (ICOORDELT_LIST *segments, int x, int *margin)
void PrintSegmentationStats (BLOCK_LIST *block_list)
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)

Define Documentation

#define BLOCK_LABEL_HEIGHT   150

Definition at line 27 of file ocrblock.cpp.

#define ROW_SPACING   5

Function Documentation

int decreasing_top_order ( const void *  row1,
const void *  row2 
)

decreasing_top_order

Sort Comparator: Return <0 if row1 top < row2 top

Definition at line 72 of file ocrblock.cpp.

                                           {
  return (*(ROW **) row2)->bounding_box ().top () -
    (*(ROW **) row1)->bounding_box ().top ();
}
void ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 433 of file ocrblock.cpp.

                                                                 {
  C_BLOB_IT return_list_it(output_blob_list);
  BLOCK_IT block_it(blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW* row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD* werd = werd_it.data();
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->cblob_list());
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->rej_cblob_list());
      }
    }
  }
}
bool LeftMargin ( ICOORDELT_LIST *  segments,
int  x,
int *  margin 
)

Definition at line 244 of file ocrblock.cpp.

                                                              {
  bool found = false;
  *margin = 0;
  if (segments->empty())
    return found;
  ICOORDELT_IT seg_it(segments);
  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
    int cur_margin = x - seg_it.data()->x();
    if (cur_margin >= 0) {
      if (!found) {
        *margin = cur_margin;
      } else if (cur_margin < *margin) {
        *margin = cur_margin;
      }
      found = true;
    }
  }
  return found;
}
void PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 400 of file ocrblock.cpp.

                                                    {
  int num_blocks = 0;
  int num_rows = 0;
  int num_words = 0;
  int num_blobs = 0;
  BLOCK_IT block_it(block_list);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    ++num_blocks;
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ++num_rows;
      ROW* row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD* werd = werd_it.data();
        ++num_words;
        num_blobs += werd->cblob_list()->length();
      }
    }
  }
  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
          num_blocks, num_rows, num_words, num_blobs);
}
void RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 468 of file ocrblock.cpp.

                                                                {
  // Now iterate over all the blobs in the segmentation_block_list_, and just
  // replace the corresponding c-blobs inside the werds.
  BLOCK_IT block_it(block_list);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    // Iterate over all rows in the block.
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW* row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      WERD_LIST new_words;
      WERD_IT new_words_it(&new_words);
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD* werd = werd_it.extract();
        WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
                                                         not_found_blobs);
        if (new_werd) {
          // Insert this new werd into the actual row's werd-list. Remove the
          // existing one.
          new_words_it.add_after_then_move(new_werd);
          delete werd;
        } else {
          // Reinsert the older word back, for lack of better options.
          // This is critical since dropping the words messes up segmentation:
          // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
          new_words_it.add_after_then_move(werd);
        }
      }
      // Get rid of the old word list & replace it with the new one.
      row->word_list()->clear();
      werd_it.move_to_first();
      werd_it.add_list_after(&new_words);
    }
  }
}
bool RightMargin ( ICOORDELT_LIST *  segments,
int  x,
int *  margin 
)

Definition at line 274 of file ocrblock.cpp.

                                                               {
  bool found = false;
  *margin = 0;
  if (segments->empty())
    return found;
  ICOORDELT_IT seg_it(segments);
  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
    int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
    if (cur_margin >= 0) {
      if (!found) {
        *margin = cur_margin;
      } else if (cur_margin < *margin) {
        *margin = cur_margin;
      }
      found = true;
    }
  }
  return found;
}