Tesseract  3.02
tesseract-ocr/ccmain/paragraphs.cpp File Reference
#include <ctype.h>
#include "genericvector.h"
#include "helpers.h"
#include "mutableiterator.h"
#include "ocrpara.h"
#include "pageres.h"
#include "paragraphs.h"
#include "paragraphs_internal.h"
#include "publictypes.h"
#include "ratngs.h"
#include "rect.h"
#include "statistc.h"
#include "strngs.h"
#include "tprintf.h"
#include "unicharset.h"
#include "unicodes.h"

Go to the source code of this file.

Classes

class  tesseract::UnicodeSpanSkipper
struct  tesseract::Cluster
class  tesseract::SimpleClusterer
struct  tesseract::GeometricClassifierState
struct  tesseract::Interval

Namespaces

namespace  tesseract

Functions

template<typename T >
void tesseract::SimpleSwap (T &a, T &b)
STRING tesseract::RtlEmbed (const STRING &word, bool rtlify)
bool tesseract::IsLatinLetter (int ch)
bool tesseract::IsDigitLike (int ch)
bool tesseract::IsOpeningPunct (int ch)
bool tesseract::IsTerminalPunct (int ch)
const char * tesseract::SkipChars (const char *str, const char *toskip)
const char * tesseract::SkipChars (const char *str, bool(*skip)(int))
const char * tesseract::SkipOne (const char *str, const char *toskip)
bool tesseract::LikelyListNumeral (const STRING &word)
bool tesseract::LikelyListMark (const STRING &word)
bool tesseract::AsciiLikelyListItem (const STRING &word)
int tesseract::UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos)
bool tesseract::LikelyListMarkUnicode (int ch)
bool tesseract::UniLikelyListItem (const UNICHARSET *u, const WERD_CHOICE *werd)
void tesseract::LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
void tesseract::RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
int tesseract::ClosestCluster (const GenericVector< Cluster > &clusters, int value)
void tesseract::CalculateTabStops (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, int tolerance, GenericVector< Cluster > *left_tabs, GenericVector< Cluster > *right_tabs)
void tesseract::MarkRowsWithModel (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, const ParagraphModel *model, bool ltr, int eop_threshold)
void tesseract::GeometricClassifyThreeTabStopTextBlock (int debug_level, GeometricClassifierState &s, ParagraphTheory *theory)
void tesseract::GeometricClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
bool tesseract::ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool tesseract::ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool tesseract::CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model)
void tesseract::DiscardUnusedModels (const GenericVector< RowScratchRegisters > &rows, ParagraphTheory *theory)
void tesseract::DowngradeWeakestToCrowns (int debug_level, ParagraphTheory *theory, GenericVector< RowScratchRegisters > *rows)
void tesseract::RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile)
int tesseract::InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
bool tesseract::FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)
bool tesseract::FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)
bool tesseract::TextSupportsBreak (const RowScratchRegisters &before, const RowScratchRegisters &after)
bool tesseract::LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after)
bool tesseract::LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification j)
ParagraphModel tesseract::InternalParagraphModelByOutline (const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance, bool *consistent)
ParagraphModel tesseract::ParagraphModelByOutline (int debug_level, const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance)
bool tesseract::RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model)
void tesseract::MarkStrongEvidence (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end)
void tesseract::ModelStrongEvidence (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, bool allow_flush_models, ParagraphTheory *theory)
void tesseract::StrongEvidenceClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
void tesseract::SeparateSimpleLeaderLines (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
void tesseract::ConvertHypothesizedModelRunsToParagraphs (int debug_level, const GenericVector< RowScratchRegisters > &rows, GenericVector< PARA * > *row_owners, ParagraphTheory *theory)
bool tesseract::RowIsStranded (const GenericVector< RowScratchRegisters > &rows, int row)
void tesseract::LeftoverSegments (const GenericVector< RowScratchRegisters > &rows, GenericVector< Interval > *to_fix, int row_start, int row_end)
void tesseract::CanonicalizeDetectionResults (GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs)
void tesseract::DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
void tesseract::InitializeRowInfo (const MutableIterator &it, RowInfo *info)
void tesseract::DetectParagraphs (int debug_level, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models)

Variables

const int tesseract::kStrayLinePer = 6
const ParagraphModeltesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F)
const ParagraphModeltesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F)