Tesseract  3.02
tesseract-ocr/dict/permute.h File Reference
#include "ratngs.h"
#include "params.h"
#include "unicharset.h"

Go to the source code of this file.


class  tesseract::PermuterState


namespace  tesseract


#define MAX_PERM_LENGTH   128


void adjust_non_word (const char *word, const char *word_lengths, float rating, float *new_rating, float *adjust_factor)
const char * choose_il1 (const char *first_char, const char *second_char, const char *third_char, const char *prev_char, const char *next_char, const char *next_next_char)


int fragments_debug = 0
int segment_debug = 0
bool permute_debug = 0
bool permute_script_word = 0
bool permute_fixed_length_dawg = 0
bool segment_segcost_rating = 0
double segment_reward_script = 0.95
bool permute_chartype_word = 0
double segment_reward_chartype = 0.97
double segment_reward_ngram_best_choice = 0.99
int max_permuter_attempts = 100000
int permute_only_top

Define Documentation

#define MAX_PERM_LENGTH   128

Definition at line 36 of file permute.h.

Function Documentation

void adjust_non_word ( const char *  word,
const char *  word_lengths,
float  rating,
float *  new_rating,
float *  adjust_factor 
const char* choose_il1 ( const char *  first_char,
const char *  second_char,
const char *  third_char,
const char *  prev_char,
const char *  next_char,
const char *  next_next_char 

Variable Documentation

int fragments_debug = 0

"Debug character fragments"

int max_permuter_attempts = 100000

"Maximum number of different character choices to consider" " during permutation. This limit is especially useful when" " user patterns are specified, since overly generic patterns" " can result in dawg search exploring an overly large number" "of options."

"Turn on character type (property) consistency permuter"

bool permute_debug = 0

"char permutation debug"

"Turn on fixed-length phrasebook search permuter"

"Turn on word script consistency permuter"

int segment_debug = 0

"Debug the whole segmentation process"

double segment_reward_chartype = 0.97

"Score multipler for char type consistency within a word. "

"Score multipler for ngram permuter's best choice" " (only used in the Han script path)."

double segment_reward_script = 0.95

"Score multipler for script consistency within a word. " "Being a 'reward' factor, it should be <= 1. " "Smaller value implies bigger reward."

"incorporate segmentation cost in word rating?"