#include <textord.h>

Public Member Functions
	Textord (CCStruct *ccstruct)
	~Textord ()
void	TextordPage (PageSegMode pageseg_mode, int width, int height, Pix pix, BLOCK_LIST blocks, TO_BLOCK_LIST *to_blocks)
void	CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
bool	use_cjk_fp_model () const
void	set_use_cjk_fp_model (bool flag)
void	to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
ROW *	make_prop_words (TO_ROW *row, FCOORD rotation)
ROW *	make_blob_words (TO_ROW *row, FCOORD rotation)
void	find_components (Pix pix, BLOCK_LIST blocks, TO_BLOCK_LIST *to_blocks)
void	filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Public Attributes
bool	textord_single_height_mode = false
bool	tosp_old_to_method = false
bool	tosp_old_to_constrain_sp_kn = false
bool	tosp_only_use_prop_rows = true
bool	tosp_force_wordbreak_on_punct = false
bool	tosp_use_pre_chopping = false
bool	tosp_old_to_bug_fix = false
bool	tosp_block_use_cert_spaces = true
bool	tosp_row_use_cert_spaces = true
bool	tosp_narrow_blobs_not_cert = true
bool	tosp_row_use_cert_spaces1 = true
bool	tosp_recovery_isolated_row_stats = true
bool	tosp_only_small_gaps_for_kern = false
bool	tosp_all_flips_fuzzy = false
bool	tosp_fuzzy_limit_all = true
bool	tosp_stats_use_xht_gaps = true
bool	tosp_use_xht_gaps = true
bool	tosp_only_use_xht_gaps = false
bool	tosp_rule_9_test_punct = false
bool	tosp_flip_fuzz_kn_to_sp = true
bool	tosp_flip_fuzz_sp_to_kn = true
bool	tosp_improve_thresh = false
int	tosp_debug_level = 0
int	tosp_enough_space_samples_for_median = 3
int	tosp_redo_kern_limit = 10
int	tosp_few_samples = 40
int	tosp_short_row = 20
int	tosp_sanity_method = 1
double	tosp_old_sp_kn_th_factor = 2.0
double	tosp_threshold_bias1 = 0
double	tosp_threshold_bias2 = 0
double	tosp_narrow_fraction = 0.3
double	tosp_narrow_aspect_ratio = 0.48
double	tosp_wide_fraction = 0.52
double	tosp_wide_aspect_ratio = 0.0
double	tosp_fuzzy_space_factor = 0.6
double	tosp_fuzzy_space_factor1 = 0.5
double	tosp_fuzzy_space_factor2 = 0.72
double	tosp_gap_factor = 0.83
double	tosp_kern_gap_factor1 = 2.0
double	tosp_kern_gap_factor2 = 1.3
double	tosp_kern_gap_factor3 = 2.5
double	tosp_ignore_big_gaps = -1
double	tosp_ignore_very_big_gaps = 3.5
double	tosp_rep_space = 1.6
double	tosp_enough_small_gaps = 0.65
double	tosp_table_kn_sp_ratio = 2.25
double	tosp_table_xht_sp_ratio = 0.33
double	tosp_table_fuzzy_kn_sp_ratio = 3.0
double	tosp_fuzzy_kn_fraction = 0.5
double	tosp_fuzzy_sp_fraction = 0.5
double	tosp_min_sane_kn_sp = 1.5
double	tosp_init_guess_kn_mult = 2.2
double	tosp_init_guess_xht_mult = 0.28
double	tosp_max_sane_kn_thresh = 5.0
double	tosp_flip_caution = 0.0
double	tosp_large_kerning = 0.19
double	tosp_dont_fool_with_small_kerns = -1
double	tosp_near_lh_edge = 0
double	tosp_silly_kn_sp_gap = 0.2
double	tosp_pass_wide_fuzz_sp_to_context = 0.75
bool	textord_no_rejects = false
bool	textord_show_blobs = false
bool	textord_show_boxes = false
int	textord_max_noise_size = 7
double	textord_blob_size_bigile = 95
double	textord_noise_area_ratio = 0.7
double	textord_blob_size_smallile = 20
double	textord_initialx_ile = 0.75
double	textord_initialasc_ile = 0.90
int	textord_noise_sizefraction = 10
double	textord_noise_sizelimit = 0.5
int	textord_noise_translimit = 16
double	textord_noise_normratio = 2.0
bool	textord_noise_rejwords = true
bool	textord_noise_rejrows = true
double	textord_noise_syfract = 0.2
double	textord_noise_sxfract = 0.4
double	textord_noise_hfract = 1.0/64
int	textord_noise_sncount = 1
double	textord_noise_rowratio = 6.0
bool	textord_noise_debug = 0
double	textord_blshift_maxshift = 0.00
double	textord_blshift_xfraction = 9.99

Detailed Description

Definition at line 39 of file textord.h.

Constructor & Destructor Documentation

tesseract::Textord::Textord ( CCStruct * ccstruct ) [explicit]

Definition at line 29 of file textord.cpp.

    : ccstruct_(ccstruct), use_cjk_fp_model_(false),
      // makerow.cpp ///////////////////////////////////////////
      BOOL_MEMBER(textord_single_height_mode, false,
                  "Script has no xheight, so use a single mode",
                  ccstruct_->params()),
      // tospace.cpp ///////////////////////////////////////////
      BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
                  "Constrain relative values of inter and intra-word gaps for "
                  "old_to_method.",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_only_use_prop_rows, true,
                  "Block stats to use fixed pitch rows?",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
                  "Force word breaks on punct to break long lines in non-space "
                  "delimited langs",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_use_pre_chopping, false,
                  "Space stats use prechopping?",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_block_use_cert_spaces, true,
                  "Only stat OBVIOUS spaces",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
            "Only stat OBVIOUS spaces",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
                  "Use row alone when inadequate cert spaces",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_fuzzy_limit_all, true,
                  "Dont restrict kn->sp fuzzy limit to tables",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
                  "Use within xht gap for wd breaks",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_only_use_xht_gaps, false,
                  "Only use within xht gap for wd breaks",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_rule_9_test_punct, false,
                  "Dont chng kn to space next to punct",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
                  ccstruct_->params()),
      BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
                  ccstruct_->params()),
      INT_MEMBER(tosp_debug_level, 0, "Debug data",
                 ccstruct_->params()),
      INT_MEMBER(tosp_enough_space_samples_for_median, 3,
           "or should we use mean",
                 ccstruct_->params()),
      INT_MEMBER(tosp_redo_kern_limit, 10,
                 "No.samples reqd to reestimate for row",
                 ccstruct_->params()),
      INT_MEMBER(tosp_few_samples, 40,
                 "No.gaps reqd with 1 large gap to treat as a table",
                 ccstruct_->params()),
      INT_MEMBER(tosp_short_row, 20,
                 "No.gaps reqd with few cert spaces to use certs",
                 ccstruct_->params()),
      INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
                 ccstruct_->params()),
      double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
                    "Factor for defining space threshold in terms of space and "
                    "kern sizes",
                    ccstruct_->params()),
      double_MEMBER(tosp_threshold_bias1, 0,
                    "how far between kern and space?",
                    ccstruct_->params()),
      double_MEMBER(tosp_threshold_bias2, 0,
                    "how far between kern and space?",
                    ccstruct_->params()),
      double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
                    ccstruct_->params()),
      double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
                    "narrow if w/h less than this",
                    ccstruct_->params()),
      double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
                    ccstruct_->params()),
      double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
                    ccstruct_->params()),
      double_MEMBER(tosp_fuzzy_space_factor, 0.6,
                    "Fract of xheight for fuzz sp",
                    ccstruct_->params()),
      double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
                    "Fract of xheight for fuzz sp",
                    ccstruct_->params()),
      double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
                    "Fract of xheight for fuzz sp",
                    ccstruct_->params()),
      double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
                    ccstruct_->params()),
      double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
                    ccstruct_->params()),
      double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
                    ccstruct_->params()),
      double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
                    ccstruct_->params()),
      double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
                    ccstruct_->params()),
      double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
                    ccstruct_->params()),
      double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
                    ccstruct_->params()),
      double_MEMBER(tosp_enough_small_gaps, 0.65,
                    "Fract of kerns reqd for isolated row stats",
                    ccstruct_->params()),
      double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
                    "Min difference of kn & sp in table",
                    ccstruct_->params()),
      double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
                    "Expect spaces bigger than this",
                    ccstruct_->params()),
      double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
                    "Fuzzy if less than this",
                    ccstruct_->params()),
      double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
                    ccstruct_->params()),
      double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
                    ccstruct_->params()),
      double_MEMBER(tosp_min_sane_kn_sp, 1.5,
                    "Dont trust spaces less than this time kn",
                    ccstruct_->params()),
      double_MEMBER(tosp_init_guess_kn_mult, 2.2,
                    "Thresh guess - mult kn by this",
                    ccstruct_->params()),
      double_MEMBER(tosp_init_guess_xht_mult, 0.28,
                    "Thresh guess - mult xht by this",
                    ccstruct_->params()),
      double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
                    "Multiplier on kn to limit thresh",
                    ccstruct_->params()),
      double_MEMBER(tosp_flip_caution, 0.0,
                    "Dont autoflip kn to sp when large separation",
                    ccstruct_->params()),
      double_MEMBER(tosp_large_kerning, 0.19,
                    "Limit use of xht gap with large kns",
                    ccstruct_->params()),
      double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
                    "Limit use of xht gap with odd small kns",
                    ccstruct_->params()),
      double_MEMBER(tosp_near_lh_edge, 0,
                    "Dont reduce box if the top left is non blank",
                    ccstruct_->params()),
      double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
                    "Dont let sp minus kn get too small",
                    ccstruct_->params()),
      double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
                    "How wide fuzzies need context",
                    ccstruct_->params()),
      // tordmain.cpp ///////////////////////////////////////////
      BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
                  ccstruct_->params()),
      BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
                  ccstruct_->params()),
      BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
                  ccstruct_->params()),
      INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
                  ccstruct_->params()),
      double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
                    ccstruct_->params()),
      double_MEMBER(textord_noise_area_ratio, 0.7,
                    "Fraction of bounding box for noise",
                    ccstruct_->params()),
      double_MEMBER(textord_blob_size_smallile, 20,
                    "Percentile for small blobs",
                    ccstruct_->params()),
      double_MEMBER(textord_initialx_ile, 0.75,
                    "Ile of sizes for xheight guess",
                    ccstruct_->params()),
      double_MEMBER(textord_initialasc_ile, 0.90,
                    "Ile of sizes for xheight guess",
                    ccstruct_->params()),
      INT_MEMBER(textord_noise_sizefraction, 10,
                 "Fraction of size for maxima",
                 ccstruct_->params()),
      double_MEMBER(textord_noise_sizelimit, 0.5,
                    "Fraction of x for big t count",
                    ccstruct_->params()),
      INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
                 ccstruct_->params()),
      double_MEMBER(textord_noise_normratio, 2.0,
                    "Dot to norm ratio for deletion",
                    ccstruct_->params()),
      BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
                  ccstruct_->params()),
      BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
                  ccstruct_->params()),
      double_MEMBER(textord_noise_syfract, 0.2,
                    "xh fract height error for norm blobs",
                    ccstruct_->params()),
      double_MEMBER(textord_noise_sxfract, 0.4,
                    "xh fract width error for norm blobs",
                    ccstruct_->params()),
      double_MEMBER(textord_noise_hfract, 1.0/64,
                    "Height fraction to discard outlines as speckle noise",
                    ccstruct_->params()),
      INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
                 ccstruct_->params()),
      double_MEMBER(textord_noise_rowratio, 6.0,
                    "Dot to norm ratio for deletion",
                    ccstruct_->params()),
      BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
                  ccstruct_->params()),
      double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
                    ccstruct_->params()),
      double_MEMBER(textord_blshift_xfraction, 9.99,
                    "Min size of baseline shift",
                    ccstruct_->params()) {
}

tesseract::Textord::~Textord ( )

Definition at line 256 of file textord.cpp.

{
}

Member Function Documentation

void tesseract::Textord::CleanupSingleRowResult	(	PageSegMode	pageseg_mode,
		PAGE_RES *	page_res
	)

Definition at line 334 of file textord.cpp.

                                                         {
  if (PSM_LINE_FIND_ENABLED(pageseg_mode))
    return;  // No cleanup required.
  PAGE_RES_IT it(page_res);
  // Find the best row, being the greatest mean word conf.
  float row_total_conf = 0.0f;
  int row_word_count = 0;
  ROW_RES* best_row = NULL;
  float best_conf = 0.0f;
  for (it.restart_page(); it.word() != NULL; it.forward()) {
    WERD_RES* word = it.word();
    row_total_conf += word->best_choice->certainty();
    ++row_word_count;
    if (it.next_row() != it.row()) {
      row_total_conf /= row_word_count;
      if (best_row == NULL || best_conf < row_total_conf) {
        best_row = it.row();
        best_conf = row_total_conf;
      }
      row_total_conf = 0.0f;
      row_word_count = 0;
    }
  }
  // Now eliminate any word not in the best row.
  for (it.restart_page(); it.word() != NULL; it.forward()) {
    if (it.row() != best_row)
      it.DeleteCurrentWord();
  }
}

void tesseract::Textord::filter_blobs	(	ICOORD	page_tr,
		TO_BLOCK_LIST *	blocks,
		BOOL8	testing_on
	)

Definition at line 239 of file tordmain.cpp.

                                             {     // for plotting
  TO_BLOCK_IT block_it = blocks;          // destination iterator
  TO_BLOCK *block;                        // created block

  #ifndef GRAPHICS_DISABLED
  if (to_win != NULL)
    to_win->Clear();
  #endif  // GRAPHICS_DISABLED

  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward()) {
    block = block_it.data();
    block->line_size = filter_noise_blobs(&block->blobs,
      &block->noise_blobs,
      &block->small_blobs,
      &block->large_blobs);
    block->line_spacing = block->line_size *
        (tesseract::CCStruct::kDescenderFraction +
         tesseract::CCStruct::kXHeightFraction +
         2 * tesseract::CCStruct::kAscenderFraction) /
         tesseract::CCStruct::kXHeightFraction;
    block->line_size *= textord_min_linesize;
    block->max_blob_size = block->line_size * textord_excess_blobsize;

    #ifndef GRAPHICS_DISABLED
    if (textord_show_blobs && testing_on) {
      if (to_win == NULL)
        create_to_win(page_tr);
      block->plot_graded_blobs(to_win);
    }
    if (textord_show_boxes && testing_on) {
      if (to_win == NULL)
        create_to_win(page_tr);
      plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE);
      plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE);
      plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE);
      plot_box_list(to_win, &block->blobs, ScrollView::WHITE);
    }
    #endif  // GRAPHICS_DISABLED
  }
}

void tesseract::Textord::find_components	(	Pix *	pix,
		BLOCK_LIST *	blocks,
		TO_BLOCK_LIST *	to_blocks
	)

Definition at line 208 of file tordmain.cpp.

                                                        {
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  if (width > MAX_INT16 || height > MAX_INT16) {
    tprintf("Input image too large! (%d, %d)\n", width, height);
    return;  // Can't handle it.
  }

  set_global_loc_code(LOC_EDGE_PROG);

  BLOCK_IT block_it(blocks);    // iterator
  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward()) {
    BLOCK* block = block_it.data();
    if (block->poly_block() == NULL || block->poly_block()->IsText()) {
      extract_edges(pix, block);
    }
  }

  assign_blobs_to_blocks2(pix, blocks, to_blocks);
  ICOORD page_tr(width, height);
  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
}

ROW * tesseract::Textord::make_blob_words	(	TO_ROW *	row,
		FCOORD	rotation
	)

Definition at line 1183 of file tospace.cpp.

                                {
  bool bol;                      // start of line
  ROW *real_row;                 // output row
  C_OUTLINE_IT cout_it;
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it = &cblobs;
  WERD_LIST words;
  WERD_IT word_it;               // new words
  WERD *word;                    // new word
  double coeffs[3];              // quadratic
  BLOBNBOX *bblob;               // current blob
  TBOX blob_box;                 // bounding box
  BLOBNBOX_IT box_it;            // iterator
  inT16 word_count = 0;

  cblob_it.set_to_list(&cblobs);
  box_it.set_to_list(row->blob_list());
  word_it.set_to_list(&words);
  bol = TRUE;
  if (!box_it.empty()) {

    do {
      bblob = box_it.data();
      blob_box = bblob->bounding_box();
      if (bblob->joined_to_prev()) {
        if (bblob->cblob() != NULL) {
          cout_it.set_to_list(cblob_it.data()->out_list());
          cout_it.move_to_last();
          cout_it.add_list_after(bblob->cblob()->out_list());
          delete bblob->cblob();
        }
      } else {
        if (bblob->cblob() != NULL)
          cblob_it.add_after_then_move(bblob->cblob());
      }
      box_it.forward();         // next one
      bblob = box_it.data();
      blob_box = bblob->bounding_box();

      if (!bblob->joined_to_prev() && !cblobs.empty()) {
        word = new WERD(&cblobs, 1, NULL);
        word_count++;
        word_it.add_after_then_move(word);
        if (bol) {
          word->set_flag(W_BOL, TRUE);
          bol = FALSE;
        }
        if (box_it.at_first()) { // at end of line
          word->set_flag(W_EOL, TRUE);
        }
      }
    }
    while (!box_it.at_first()); // until back at start
    /* Setup the row with created words. */
    coeffs[0] = 0;
    coeffs[1] = row->line_m();
    coeffs[2] = row->line_c();
    real_row = new ROW(row, (inT16) row->kern_size, (inT16) row->space_size);
    word_it.set_to_list(real_row->word_list());
                                 //put words in row
    word_it.add_list_after(&words);
    real_row->recalc_bounding_box();
    if (tosp_debug_level > 4) {
      tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
        word_count,
        real_row->bounding_box().left(),
        real_row->bounding_box().bottom(),
        real_row->bounding_box().right(),
        real_row->bounding_box().top());
    }
    return real_row;
  }
  return NULL;
}

ROW * tesseract::Textord::make_prop_words	(	TO_ROW *	row,
		FCOORD	rotation
	)

Definition at line 886 of file tospace.cpp.

                                {
  BOOL8 bol;                     //start of line
  /* prev_ values are for start of word being built. non prev_ values are for
  the gap between the word being built and the next one. */
  BOOL8 prev_fuzzy_sp;           //probably space
  BOOL8 prev_fuzzy_non;          //probably not
  uinT8 prev_blanks;             //in front of word
  BOOL8 fuzzy_sp;                //probably space
  BOOL8 fuzzy_non;               //probably not
  uinT8 blanks;                  //in front of word
  BOOL8 prev_gap_was_a_space = FALSE;
  BOOL8 break_at_next_gap = FALSE;
  ROW *real_row;                 //output row
  C_OUTLINE_IT cout_it;
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it = &cblobs;
  WERD_LIST words;
  WERD_IT word_it;               //new words
  WERD *word;                    //new word
  WERD_IT rep_char_it;           //repeated char words
  inT32 next_rep_char_word_right = MAX_INT32;
  float repetition_spacing;      //gap between repetitions
  inT32 xstarts[2];              //row ends
  double coeffs[3];              //quadratic
  inT32 prev_x;                  //end of prev blob
  BLOBNBOX *bblob;               //current blob
  TBOX blob_box;                  //bounding box
  BLOBNBOX_IT box_it;            //iterator
  TBOX prev_blob_box;
  TBOX next_blob_box;
  inT16 prev_gap = MAX_INT16;
  inT16 current_gap = MAX_INT16;
  inT16 next_gap = MAX_INT16;
  inT16 prev_within_xht_gap = MAX_INT16;
  inT16 current_within_xht_gap = MAX_INT16;
  inT16 next_within_xht_gap = MAX_INT16;
  inT16 word_count = 0;

  rep_char_it.set_to_list (&(row->rep_words));
  if (!rep_char_it.empty ()) {
    next_rep_char_word_right =
      rep_char_it.data ()->bounding_box ().right ();
  }

  prev_x = -MAX_INT16;
  cblob_it.set_to_list (&cblobs);
  box_it.set_to_list (row->blob_list ());
  word_it.set_to_list (&words);
  bol = TRUE;
  prev_blanks = 0;
  prev_fuzzy_sp = FALSE;
  prev_fuzzy_non = FALSE;
  if (!box_it.empty ()) {
    xstarts[0] = box_it.data ()->bounding_box ().left ();
    if (xstarts[0] > next_rep_char_word_right) {
      /* We need to insert a repeated char word at the start of the row */
      word = rep_char_it.extract ();
      word_it.add_after_then_move (word);
      /* Set spaces before repeated char word */
      word->set_flag (W_BOL, TRUE);
      bol = FALSE;
      word->set_blanks (0);
                                 //NO uncertainty
      word->set_flag (W_FUZZY_SP, FALSE);
      word->set_flag (W_FUZZY_NON, FALSE);
      xstarts[0] = word->bounding_box ().left ();
      /* Set spaces after repeated char word (and leave current word set) */
      repetition_spacing = find_mean_blob_spacing (word);
      current_gap = box_it.data ()->bounding_box ().left () -
        next_rep_char_word_right;
      current_within_xht_gap = current_gap;
      if (current_gap > tosp_rep_space * repetition_spacing) {
        prev_blanks = (uinT8) floor (current_gap / row->space_size);
        if (prev_blanks < 1)
          prev_blanks = 1;
      }
      else
        prev_blanks = 0;
      if (tosp_debug_level > 5)
        tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f;  Rgap:%d  ",
          box_it.data ()->bounding_box ().left (),
          box_it.data ()->bounding_box ().bottom (),
          repetition_spacing, current_gap);
      prev_fuzzy_sp = FALSE;
      prev_fuzzy_non = FALSE;
      if (rep_char_it.empty ()) {
        next_rep_char_word_right = MAX_INT32;
      }
      else {
        rep_char_it.forward ();
        next_rep_char_word_right =
          rep_char_it.data ()->bounding_box ().right ();
      }
    }

    peek_at_next_gap(row,
                     box_it,
                     next_blob_box,
                     next_gap,
                     next_within_xht_gap);
    do {
      bblob = box_it.data ();
      blob_box = bblob->bounding_box ();
      if (bblob->joined_to_prev ()) {
        if (bblob->cblob () != NULL) {
          cout_it.set_to_list (cblob_it.data ()->out_list ());
          cout_it.move_to_last ();
          cout_it.add_list_after (bblob->cblob ()->out_list ());
          delete bblob->cblob ();
        }
      } else {
        if (bblob->cblob() != NULL)
          cblob_it.add_after_then_move (bblob->cblob ());
        prev_x = blob_box.right ();
      }
      box_it.forward ();         //next one
      bblob = box_it.data ();
      blob_box = bblob->bounding_box ();

      if (!bblob->joined_to_prev() && bblob->cblob() != NULL) {
        /* Real Blob - not multiple outlines or pre-chopped */
        prev_gap = current_gap;
        prev_within_xht_gap = current_within_xht_gap;
        prev_blob_box = next_blob_box;
        current_gap = next_gap;
        current_within_xht_gap = next_within_xht_gap;
        peek_at_next_gap(row,
                         box_it,
                         next_blob_box,
                         next_gap,
                         next_within_xht_gap);

        inT16 prev_gap_arg = prev_gap;
        inT16 next_gap_arg = next_gap;
        if (tosp_only_use_xht_gaps) {
          prev_gap_arg = prev_within_xht_gap;
          next_gap_arg = next_within_xht_gap;
        }
        // Decide if a word-break should be inserted
        if (blob_box.left () > next_rep_char_word_right ||
            make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
                              current_gap, current_within_xht_gap,
                              next_blob_box, next_gap_arg,
                              blanks, fuzzy_sp, fuzzy_non,
                              prev_gap_was_a_space,
                              break_at_next_gap) ||
            box_it.at_first()) {
          /* Form a new word out of the blobs collected */
          word = new WERD (&cblobs, prev_blanks, NULL);
          word_count++;
          word_it.add_after_then_move (word);
          if (bol) {
            word->set_flag (W_BOL, TRUE);
            bol = FALSE;
          }
          if (prev_fuzzy_sp)
                                 //probably space
            word->set_flag (W_FUZZY_SP, TRUE);
          else if (prev_fuzzy_non)
            word->set_flag (W_FUZZY_NON, TRUE);
          //probably not

          if (blob_box.left () > next_rep_char_word_right) {
            /* We need to insert a repeated char word */
            word = rep_char_it.extract ();
            word_it.add_after_then_move (word);

            /* Set spaces before repeated char word */
            repetition_spacing = find_mean_blob_spacing (word);
            current_gap = word->bounding_box ().left () - prev_x;
            current_within_xht_gap = current_gap;
            if (current_gap > tosp_rep_space * repetition_spacing) {
              blanks =
                (uinT8) floor (current_gap / row->space_size);
              if (blanks < 1)
                blanks = 1;
            }
            else
              blanks = 0;
            if (tosp_debug_level > 5)
              tprintf
                ("Repch wd (%d,%d) rep gap %5.2f;  Lgap:%d (%d blanks);",
                word->bounding_box ().left (),
                word->bounding_box ().bottom (),
                repetition_spacing, current_gap, blanks);
            word->set_blanks (blanks);
                                 //NO uncertainty
            word->set_flag (W_FUZZY_SP, FALSE);
            word->set_flag (W_FUZZY_NON, FALSE);

            /* Set spaces after repeated char word (and leave current word set) */
            current_gap =
              blob_box.left () - next_rep_char_word_right;
            if (current_gap > tosp_rep_space * repetition_spacing) {
              blanks = (uinT8) (current_gap / row->space_size);
              if (blanks < 1)
                blanks = 1;
            }
            else
              blanks = 0;
            if (tosp_debug_level > 5)
              tprintf (" Rgap:%d (%d blanks)\n",
                current_gap, blanks);
            fuzzy_sp = FALSE;
            fuzzy_non = FALSE;

            if (rep_char_it.empty ()) {
              next_rep_char_word_right = MAX_INT32;
            }
            else {
              rep_char_it.forward ();
              next_rep_char_word_right =
                rep_char_it.data ()->bounding_box ().right ();
            }
          }

          if (box_it.at_first () && rep_char_it.empty ()) {
                                 //at end of line
            word->set_flag (W_EOL, TRUE);
            xstarts[1] = prev_x;
          }
          else {
            prev_blanks = blanks;
            prev_fuzzy_sp = fuzzy_sp;
            prev_fuzzy_non = fuzzy_non;
          }
        }
      }
    }
    while (!box_it.at_first ()); //until back at start

    /* Insert any further repeated char words */
    while (!rep_char_it.empty ()) {
      word = rep_char_it.extract ();
      word_it.add_after_then_move (word);

      /* Set spaces before repeated char word */
      repetition_spacing = find_mean_blob_spacing (word);
      current_gap = word->bounding_box ().left () - prev_x;
      if (current_gap > tosp_rep_space * repetition_spacing) {
        blanks = (uinT8) floor (current_gap / row->space_size);
        if (blanks < 1)
          blanks = 1;
      }
      else
        blanks = 0;
      if (tosp_debug_level > 5)
        tprintf
          ("Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n",
          word->bounding_box ().left (), word->bounding_box ().bottom (),
          repetition_spacing, current_gap, blanks);
      word->set_blanks (blanks);
                                 //NO uncertainty
      word->set_flag (W_FUZZY_SP, FALSE);
      word->set_flag (W_FUZZY_NON, FALSE);
      prev_x = word->bounding_box ().right ();
      if (rep_char_it.empty ()) {
                                 //at end of line
        word->set_flag (W_EOL, TRUE);
        xstarts[1] = prev_x;
      }
      else {
        rep_char_it.forward ();
      }
    }
    coeffs[0] = 0;
    coeffs[1] = row->line_m ();
    coeffs[2] = row->line_c ();
    real_row = new ROW (row,
      (inT16) row->kern_size, (inT16) row->space_size);
    word_it.set_to_list (real_row->word_list ());
                                 //put words in row
    word_it.add_list_after (&words);
    real_row->recalc_bounding_box ();

    if (tosp_debug_level > 4) {
      tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
        word_count,
        real_row->bounding_box ().left (),
        real_row->bounding_box ().bottom (),
        real_row->bounding_box ().right (),
        real_row->bounding_box ().top ());
    }
    return real_row;
  }
  return NULL;
}

void tesseract::Textord::set_use_cjk_fp_model ( bool flag ) [inline]

Definition at line 56 of file textord.h.

                                       {
    use_cjk_fp_model_ = flag;
  }

void tesseract::Textord::TextordPage	(	PageSegMode	pageseg_mode,
		int	width,
		int	height,
		Pix *	pix,
		BLOCK_LIST *	blocks,
		TO_BLOCK_LIST *	to_blocks
	)

Definition at line 260 of file textord.cpp.

                                                                        {
  page_tr_.set_x(width);
  page_tr_.set_y(height);
  if (to_blocks->empty()) {
    // AutoPageSeg was not used, so we need to find_components first.
    find_components(pix, blocks, to_blocks);
  } else {
    // AutoPageSeg does not need to find_components as it did that already.
    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
    filter_blobs(page_tr_, to_blocks, true);
  }

  ASSERT_HOST(!to_blocks->empty());
  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
    const FCOORD anticlockwise90(0.0f, 1.0f);
    const FCOORD clockwise90(0.0f, -1.0f);
    TO_BLOCK_IT it(to_blocks);
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
      TO_BLOCK* to_block = it.data();
      BLOCK* block = to_block->block;
      // Create a fake poly_block in block from its bounding box.
      block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
                                           PT_VERTICAL_TEXT));
      // Rotate the to_block along with its contained block and blobnbox lists.
      to_block->rotate(anticlockwise90);
      // Set the block's rotation values to obey the convention followed in
      // layout analysis for vertical text.
      block->set_re_rotation(clockwise90);
      block->set_classify_rotation(clockwise90);
    }
  }

  TO_BLOCK_IT to_block_it(to_blocks);
  TO_BLOCK* to_block = to_block_it.data();
  // Make the rows in the block.
  float gradient;
  // Do it the old fashioned way.
  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
    gradient = make_rows(page_tr_, to_blocks);
  } else {
    // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
    gradient = make_single_row(page_tr_, to_block, to_blocks);
  }
  // Now fit baselines. For now only old mode is available.
  fit_rows(gradient, page_tr_, to_blocks);
  // Now make the words in the lines.
  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
    // SINGLE_LINE uses the old word maker on the single line.
    make_words(this, page_tr_, gradient, blocks, to_blocks);
  } else {
    // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
    // single word, and in SINGLE_CHAR mode, all the outlines
    // go in a single blob.
    TO_BLOCK* to_block = to_block_it.data();
    make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
                     to_block->get_rows(), to_block->block->row_list());
  }
  cleanup_blocks(blocks);  // Remove empties.

  // Compute the margins for each row in the block, to be used later for
  // paragraph detection.
  BLOCK_IT b_it(blocks);
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
    b_it.data()->compute_row_margins();
  }
#ifndef GRAPHICS_DISABLED
  close_to_win();
#endif
}

void tesseract::Textord::to_spacing	(	ICOORD	page_tr,
		TO_BLOCK_LIST *	blocks
	)

Definition at line 35 of file tospace.cpp.

                           {
  TO_BLOCK_IT block_it;          //iterator
  TO_BLOCK *block;               //current block;
  TO_ROW_IT row_it;              //row iterator
  TO_ROW *row;                   //current row
  int block_index;               //block number
  int row_index;                 //row number
  //estimated width of real spaces for whole block
  inT16 block_space_gap_width;
  //estimated width of non space gaps for whole block
  inT16 block_non_space_gap_width;
  BOOL8 old_text_ord_proportional;//old fixed/prop result
  GAPMAP *gapmap = NULL;          //map of big vert gaps in blk

  block_it.set_to_list (blocks);
  block_index = 1;
  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
  block_it.forward ()) {
    block = block_it.data ();
    gapmap = new GAPMAP (block);
    block_spacing_stats(block,
                        gapmap,
                        old_text_ord_proportional,
                        block_space_gap_width,
                        block_non_space_gap_width);
    // Make sure relative values of block-level space and non-space gap
    // widths are reasonable. The ratio of 1:3 is also used in
    // block_spacing_stats, to corrrect the block_space_gap_width
    // Useful for arabic and hindi, when the non-space gap width is
    // often over-estimated and should not be trusted. A similar ratio
    // is found in block_spacing_stats.
    if (tosp_old_to_method && tosp_old_to_constrain_sp_kn &&
        (float) block_space_gap_width / block_non_space_gap_width < 3.0) {
      block_non_space_gap_width = (inT16) floor (block_space_gap_width / 3.0);
    }
    row_it.set_to_list (block->get_rows ());
    row_index = 1;
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      if ((row->pitch_decision == PITCH_DEF_PROP) ||
      (row->pitch_decision == PITCH_CORR_PROP)) {
        if ((tosp_debug_level > 0) && !old_text_ord_proportional)
          tprintf ("Block %d Row %d: Now Proportional\n",
            block_index, row_index);
        row_spacing_stats(row,
                          gapmap,
                          block_index,
                          row_index,
                          block_space_gap_width,
                          block_non_space_gap_width);
      }
      else {
        if ((tosp_debug_level > 0) && old_text_ord_proportional)
          tprintf
            ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
            block_index, row_index, row->pitch_decision,
            row->fixed_pitch);
      }
#ifndef GRAPHICS_DISABLED
      if (textord_show_initial_words)
        plot_word_decisions (to_win, (inT16) row->fixed_pitch, row);
#endif
      row_index++;
    }
    delete gapmap;
    block_index++;
  }
}

bool tesseract::Textord::use_cjk_fp_model ( ) const [inline]

Definition at line 53 of file textord.h.

                                {
    return use_cjk_fp_model_;
  }

Member Data Documentation

double tesseract::Textord::textord_blob_size_bigile = 95

"Percentile for large blobs"

Definition at line 330 of file textord.h.

double tesseract::Textord::textord_blob_size_smallile = 20

"Percentile for small blobs"

Definition at line 333 of file textord.h.

double tesseract::Textord::textord_blshift_maxshift = 0.00

"Max baseline shift"

Definition at line 350 of file textord.h.

double tesseract::Textord::textord_blshift_xfraction = 9.99

"Min size of baseline shift"

Definition at line 351 of file textord.h.

double tesseract::Textord::textord_initialasc_ile = 0.90

"Ile of sizes for xheight guess"

Definition at line 335 of file textord.h.

double tesseract::Textord::textord_initialx_ile = 0.75

"Ile of sizes for xheight guess"

Definition at line 334 of file textord.h.

int tesseract::Textord::textord_max_noise_size = 7

"Pixel size of noise"

Definition at line 329 of file textord.h.

bool tesseract::Textord::textord_no_rejects = false

"Don't remove noise blobs"

Definition at line 326 of file textord.h.

double tesseract::Textord::textord_noise_area_ratio = 0.7

"Fraction of bounding box for noise"

Definition at line 332 of file textord.h.

bool tesseract::Textord::textord_noise_debug = 0

"Debug row garbage detector"

Definition at line 349 of file textord.h.

double tesseract::Textord::textord_noise_hfract = 1.0/64

"Height fraction to discard outlines as speckle noise"

Definition at line 346 of file textord.h.

double tesseract::Textord::textord_noise_normratio = 2.0

"Dot to norm ratio for deletion"

Definition at line 339 of file textord.h.

bool tesseract::Textord::textord_noise_rejrows = true

"Reject noise-like rows"

Definition at line 341 of file textord.h.

bool tesseract::Textord::textord_noise_rejwords = true

"Reject noise-like words"

Definition at line 340 of file textord.h.

double tesseract::Textord::textord_noise_rowratio = 6.0

"Dot to norm ratio for deletion"

Definition at line 348 of file textord.h.

int tesseract::Textord::textord_noise_sizefraction = 10

"Fraction of size for maxima"

Definition at line 336 of file textord.h.

double tesseract::Textord::textord_noise_sizelimit = 0.5

"Fraction of x for big t count"

Definition at line 337 of file textord.h.

int tesseract::Textord::textord_noise_sncount = 1

"super norm blobs to save row"

Definition at line 347 of file textord.h.

double tesseract::Textord::textord_noise_sxfract = 0.4

"xh fract width error for norm blobs"

Definition at line 344 of file textord.h.

double tesseract::Textord::textord_noise_syfract = 0.2

"xh fract error for norm blobs"

Definition at line 342 of file textord.h.

int tesseract::Textord::textord_noise_translimit = 16

"Transitions for normal blob"

Definition at line 338 of file textord.h.

bool tesseract::Textord::textord_show_blobs = false

"Display unsorted blobs"

Definition at line 327 of file textord.h.

bool tesseract::Textord::textord_show_boxes = false

"Display boxes"

Definition at line 328 of file textord.h.

bool tesseract::Textord::textord_single_height_mode = false

"Script has no xheight, so use a single mode for horizontal text"

Definition at line 214 of file textord.h.

bool tesseract::Textord::tosp_all_flips_fuzzy = false

"Pass ANY flip to context?"

Definition at line 240 of file textord.h.

bool tesseract::Textord::tosp_block_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 230 of file textord.h.

int tesseract::Textord::tosp_debug_level = 0

"Debug data"

Definition at line 255 of file textord.h.

double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1

"Limit use of xht gap with odd small kns"

Definition at line 318 of file textord.h.

double tesseract::Textord::tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

Definition at line 296 of file textord.h.

int tesseract::Textord::tosp_enough_space_samples_for_median = 3

"or should we use mean"

Definition at line 257 of file textord.h.

int tesseract::Textord::tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

Definition at line 261 of file textord.h.

double tesseract::Textord::tosp_flip_caution = 0.0

"Dont autoflip kn to sp when large separation"

Definition at line 314 of file textord.h.

bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true

"Default flip"

Definition at line 251 of file textord.h.

bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true

"Default flip"

Definition at line 252 of file textord.h.

bool tesseract::Textord::tosp_force_wordbreak_on_punct = false

"Force word breaks on punct to break long lines in non-space " "delimited langs"

Definition at line 224 of file textord.h.

double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

Definition at line 303 of file textord.h.

bool tesseract::Textord::tosp_fuzzy_limit_all = true

"Dont restrict kn->sp fuzzy limit to tables"

Definition at line 242 of file textord.h.

double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

Definition at line 304 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor = 0.6

"Fract of xheight for fuzz sp"

Definition at line 280 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5

"Fract of xheight for fuzz sp"

Definition at line 282 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

Definition at line 284 of file textord.h.

double tesseract::Textord::tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

Definition at line 285 of file textord.h.

double tesseract::Textord::tosp_ignore_big_gaps = -1

"xht multiplier"

Definition at line 292 of file textord.h.

double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5

"xht multiplier"

Definition at line 293 of file textord.h.

bool tesseract::Textord::tosp_improve_thresh = false

"Enable improvement heuristic"

Definition at line 254 of file textord.h.

double tesseract::Textord::tosp_init_guess_kn_mult = 2.2

"Thresh guess - mult kn by this"

Definition at line 308 of file textord.h.

double tesseract::Textord::tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

Definition at line 310 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

Definition at line 287 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

Definition at line 289 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

Definition at line 291 of file textord.h.

double tesseract::Textord::tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

Definition at line 316 of file textord.h.

double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0

"Multiplier on kn to limit thresh"

Definition at line 312 of file textord.h.

double tesseract::Textord::tosp_min_sane_kn_sp = 1.5

"Dont trust spaces less than this time kn"

Definition at line 306 of file textord.h.

double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

Definition at line 275 of file textord.h.

bool tesseract::Textord::tosp_narrow_blobs_not_cert = true

"Only stat OBVIOUS spaces"

Definition at line 234 of file textord.h.

double tesseract::Textord::tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

Definition at line 273 of file textord.h.

double tesseract::Textord::tosp_near_lh_edge = 0

"Dont reduce box if the top left is non blank"

Definition at line 320 of file textord.h.

double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0

"Factor for defining space threshold in terms of space and " "kern sizes"

Definition at line 267 of file textord.h.

bool tesseract::Textord::tosp_old_to_bug_fix = false

"Fix suspected bug in old code"

Definition at line 228 of file textord.h.

bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false

"Constrain relative values of inter and intra-word gaps for " "old_to_method."

Definition at line 219 of file textord.h.

bool tesseract::Textord::tosp_old_to_method = false

"Space stats use prechopping?"

Definition at line 216 of file textord.h.

bool tesseract::Textord::tosp_only_small_gaps_for_kern = false

"Better guess"

Definition at line 239 of file textord.h.

bool tesseract::Textord::tosp_only_use_prop_rows = true

"Block stats to use fixed pitch rows?"

Definition at line 221 of file textord.h.

bool tesseract::Textord::tosp_only_use_xht_gaps = false

"Only use within xht gap for wd breaks"

Definition at line 248 of file textord.h.

double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75

"How wide fuzzies need context"

Definition at line 324 of file textord.h.

bool tesseract::Textord::tosp_recovery_isolated_row_stats = true

"Use row alone when inadequate cert spaces"

Definition at line 238 of file textord.h.

int tesseract::Textord::tosp_redo_kern_limit = 10

"No.samples reqd to reestimate for row"

Definition at line 259 of file textord.h.

double tesseract::Textord::tosp_rep_space = 1.6

"rep gap multiplier for space"

Definition at line 294 of file textord.h.

bool tesseract::Textord::tosp_row_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 232 of file textord.h.

bool tesseract::Textord::tosp_row_use_cert_spaces1 = true

"Only stat OBVIOUS spaces"

Definition at line 236 of file textord.h.

bool tesseract::Textord::tosp_rule_9_test_punct = false

"Dont chng kn to space next to punct"

Definition at line 250 of file textord.h.

int tesseract::Textord::tosp_sanity_method = 1

"How to avoid being silly"

Definition at line 264 of file textord.h.

int tesseract::Textord::tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

Definition at line 263 of file textord.h.

double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2

"Dont let sp minus kn get too small"

Definition at line 322 of file textord.h.

bool tesseract::Textord::tosp_stats_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 244 of file textord.h.

double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0

"Fuzzy if less than this"

Definition at line 302 of file textord.h.

double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

Definition at line 298 of file textord.h.

double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

Definition at line 300 of file textord.h.

double tesseract::Textord::tosp_threshold_bias1 = 0

"how far between kern and space?"

Definition at line 269 of file textord.h.

double tesseract::Textord::tosp_threshold_bias2 = 0

"how far between kern and space?"

Definition at line 271 of file textord.h.

bool tesseract::Textord::tosp_use_pre_chopping = false

"Space stats use prechopping?"

Definition at line 226 of file textord.h.

bool tesseract::Textord::tosp_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 246 of file textord.h.

double tesseract::Textord::tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

Definition at line 278 of file textord.h.

double tesseract::Textord::tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Definition at line 276 of file textord.h.

The documentation for this class was generated from the following files:

tesseract-ocr/textord/textord.h
tesseract-ocr/textord/makerow.cpp
tesseract-ocr/textord/oldbasel.cpp
tesseract-ocr/textord/textord.cpp
tesseract-ocr/textord/tordmain.cpp
tesseract-ocr/textord/tospace.cpp

Public Member Functions

Public Attributes

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation