Tesseract
3.02
|
#include "mfcpch.h"
#include "ccstruct.h"
#include "statistc.h"
#include "quadlsq.h"
#include "detlinefit.h"
#include "makerow.h"
#include "drawtord.h"
#include "oldbasel.h"
#include "textord.h"
#include "tprintf.h"
Go to the source code of this file.
Namespaces | |
namespace | tesseract |
Defines | |
#define | EXTERN |
#define | TURNLIMIT 1 |
#define | X_HEIGHT_FRACTION 0.7 |
#define | DESCENDER_FRACTION 0.5 |
#define | MIN_ASC_FRACTION 0.20 |
#define | MIN_DESC_FRACTION 0.25 |
#define | MINASCRISE 2.0 |
#define | MAXHEIGHTVARIANCE 0.15 |
#define | MAXHEIGHT 300 |
#define | MAXOVERLAP 0.1 |
#define | MAXBADRUN 2 |
#define | HEIGHTBUCKETS 200 |
#define | DELTAHEIGHT 5.0 |
#define | GOODHEIGHT 5 |
#define | MAXLOOPS 10 |
#define | MODENUM 10 |
#define | MAXPARTS 6 |
#define | SPLINESIZE 23 |
#define | ABS(x) ((x)<0 ? (-(x)) : (x)) |
Functions | |
int | get_blob_coords (TO_ROW *row, inT32 lineheight, TBOX *blobcoords, BOOL8 &holed_line, int &outcount) |
void | make_first_baseline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit) |
void | make_holed_baseline (TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient) |
int | partition_line (TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[]) |
void | merge_oldbl_parts (TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit) |
int | get_ydiffs (TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[]) |
int | choose_partition (register float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount) |
int | partition_coords (TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[]) |
*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking | |
int | segment_spline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[]) |
BOOL8 | split_stepped_spline (QSPLINE *baseline, float jumplimit, int xcoords[], int xstarts[], int &segments) |
void | insert_spline_point (int xstarts[], int segment, int coord1, int coord2, int &segments) |
void | find_lesser_parts (TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart) |
void | old_first_xheight (TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit) |
void | make_first_xheight (TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit) |
void | find_top_modes (STATS *stats, int statnum, int modelist[], int modenum) |
void | pick_x_height (TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold) |
Variables | |
EXTERN bool | textord_really_old_xheight = 0 |
EXTERN bool | textord_oldbl_debug = 0 |
EXTERN bool | textord_debug_baselines = 0 |
EXTERN bool | textord_oldbl_paradef = 1 |
EXTERN bool | textord_oldbl_split_splines = 1 |
EXTERN bool | textord_oldbl_merge_parts = 1 |
EXTERN bool | oldbl_corrfix = 1 |
EXTERN bool | oldbl_xhfix = 0 |
EXTERN bool | textord_ocropus_mode = 0 |
EXTERN double | oldbl_xhfract = 0.4 |
EXTERN int | oldbl_holed_losscount = 10 |
EXTERN double | oldbl_dot_error_size = 1.26 |
EXTERN double | textord_oldbl_jumplimit = 0.15 |
const int | kMinModeFactorOcropus = 32 |
const int | kMinModeFactor = 12 |
#define ABS | ( | x | ) | ((x)<0 ? (-(x)) : (x)) |
Definition at line 74 of file oldbasel.cpp.
#define DELTAHEIGHT 5.0 |
Definition at line 67 of file oldbasel.cpp.
#define DESCENDER_FRACTION 0.5 |
Definition at line 58 of file oldbasel.cpp.
#define EXTERN |
Definition at line 36 of file oldbasel.cpp.
#define GOODHEIGHT 5 |
Definition at line 68 of file oldbasel.cpp.
#define HEIGHTBUCKETS 200 |
Definition at line 66 of file oldbasel.cpp.
#define MAXBADRUN 2 |
Definition at line 65 of file oldbasel.cpp.
#define MAXHEIGHT 300 |
Definition at line 63 of file oldbasel.cpp.
#define MAXHEIGHTVARIANCE 0.15 |
Definition at line 62 of file oldbasel.cpp.
#define MAXLOOPS 10 |
Definition at line 69 of file oldbasel.cpp.
#define MAXOVERLAP 0.1 |
Definition at line 64 of file oldbasel.cpp.
#define MAXPARTS 6 |
Definition at line 71 of file oldbasel.cpp.
#define MIN_ASC_FRACTION 0.20 |
Definition at line 59 of file oldbasel.cpp.
#define MIN_DESC_FRACTION 0.25 |
Definition at line 60 of file oldbasel.cpp.
#define MINASCRISE 2.0 |
Definition at line 61 of file oldbasel.cpp.
#define MODENUM 10 |
Definition at line 70 of file oldbasel.cpp.
#define SPLINESIZE 23 |
Definition at line 72 of file oldbasel.cpp.
#define TURNLIMIT 1 |
Definition at line 56 of file oldbasel.cpp.
#define X_HEIGHT_FRACTION 0.7 |
Definition at line 57 of file oldbasel.cpp.
int choose_partition | ( | register float | diff, |
float | partdiffs[], | ||
int | lastpart, | ||
float | jumplimit, | ||
float * | drift, | ||
float * | lastdelta, | ||
int * | partcount | ||
) |
Definition at line 963 of file oldbasel.cpp.
{ register int partition; /*partition no */ int bestpart; /*best new partition */ float bestdelta; /*best gap from a part */ float delta; /*diff from part */ if (lastpart < 0) { partdiffs[0] = diff; lastpart = 0; /*first point */ *drift = 0.0f; *lastdelta = 0.0f; } /*adjusted diff from part */ delta = diff - partdiffs[lastpart] - *drift; if (textord_oldbl_debug) { tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift); } if (ABS (delta) > jumplimit / 2) { /*delta on part 0 */ bestdelta = diff - partdiffs[0] - *drift; bestpart = 0; /*0 best so far */ for (partition = 1; partition < *partcount; partition++) { delta = diff - partdiffs[partition] - *drift; if (ABS (delta) < ABS (bestdelta)) { bestdelta = delta; bestpart = partition; /*part with nearest jump */ } } delta = bestdelta; /*too far away */ if (ABS (bestdelta) > jumplimit && *partcount < MAXPARTS) { /*and spare part left */ bestpart = (*partcount)++; /*best was new one */ /*start new one */ partdiffs[bestpart] = diff - *drift; delta = 0.0f; } } else { bestpart = lastpart; /*best was last one */ } if (bestpart == lastpart && (ABS (delta - *lastdelta) < jumplimit / 2 || ABS (delta) < jumplimit / 2)) /*smooth the drift */ *drift = (3 * *drift + delta) / 3; *lastdelta = delta; if (textord_oldbl_debug) { tprintf ("P=%d\n", bestpart); } return bestpart; }
void find_lesser_parts | ( | TO_ROW * | row, |
TBOX | blobcoords[], | ||
int | blobcount, | ||
char | partids[], | ||
int | partsizes[], | ||
int | partcount, | ||
int | bestpart | ||
) |
Definition at line 1377 of file oldbasel.cpp.
{ register int blobindex; /*index of blob */ register int partition; /*current partition */ int xcentre; /*centre of blob */ int poscount; /*count of best up step */ int negcount; /*count of best down step */ float partsteps[MAXPARTS]; /*average step to part */ float bestpos; /*best up step */ float bestneg; /*best down step */ int runlength; /*length of bad run */ int biggestrun; /*biggest bad run */ biggestrun = 0; for (partition = 0; partition < partcount; partition++) partsteps[partition] = 0.0; /*zero accumulators */ for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) { xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; /*in other parts */ if (partids[blobindex] != bestpart) { runlength++; /*run of non bests */ if (runlength > biggestrun) biggestrun = runlength; partsteps[partids[blobindex]] += blobcoords[blobindex].bottom () - row->baseline.y (xcentre); } else runlength = 0; } if (biggestrun > MAXBADRUN) row->xheight = -1.0f; /*failed */ else row->xheight = 1.0f; /*success */ poscount = negcount = 0; bestpos = bestneg = 0.0; /*no step yet */ for (partition = 0; partition < partcount; partition++) { if (partition != bestpart) { //by jetsoft divide by zero possible if (partsizes[partition]==0) partsteps[partition]=0; else partsteps[partition] /= partsizes[partition]; // if (partsteps[partition] >= MINASCRISE && partsizes[partition] > poscount) { /*ascender rise */ bestpos = partsteps[partition]; /*2nd most popular */ poscount = partsizes[partition]; } if (partsteps[partition] <= -MINASCRISE && partsizes[partition] > negcount) { /*ascender rise */ bestneg = partsteps[partition]; /*2nd most popular */ negcount = partsizes[partition]; } } } /*average x-height */ partsteps[bestpart] /= blobcount; row->descdrop = bestneg; }
void find_top_modes | ( | STATS * | stats, |
int | statnum, | ||
int | modelist[], | ||
int | modenum | ||
) |
Definition at line 1632 of file oldbasel.cpp.
{ int mode_count; int last_i = 0; int last_max = MAX_INT32; int i; int mode; int total_max = 0; int mode_factor = textord_ocropus_mode ? kMinModeFactorOcropus : kMinModeFactor; for (mode_count = 0; mode_count < modenum; mode_count++) { mode = 0; for (i = 0; i < statnum; i++) { if (stats->pile_count (i) > stats->pile_count (mode)) { if ((stats->pile_count (i) < last_max) || ((stats->pile_count (i) == last_max) && (i > last_i))) { mode = i; } } } last_i = mode; last_max = stats->pile_count (last_i); total_max += last_max; if (last_max <= total_max / mode_factor) mode = 0; modelist[mode_count] = mode; } }
int get_blob_coords | ( | TO_ROW * | row, |
inT32 | lineheight, | ||
TBOX * | blobcoords, | ||
BOOL8 & | holed_line, | ||
int & | outcount | ||
) |
Definition at line 447 of file oldbasel.cpp.
{ //blobs BLOBNBOX_IT blob_it = row->blob_list (); register int blobindex; /*no along text line */ int losscount; //lost blobs int maxlosscount; //greatest lost blobs /*height stat collection */ STATS heightstat (0, MAXHEIGHT); if (blob_it.empty ()) return 0; //none maxlosscount = 0; losscount = 0; blob_it.mark_cycle_pt (); blobindex = 0; do { blobcoords[blobindex] = box_next_pre_chopped (&blob_it); if (blobcoords[blobindex].height () > lineheight * 0.25) heightstat.add (blobcoords[blobindex].height (), 1); if (blobindex == 0 || blobcoords[blobindex].height () > lineheight * 0.25 || blob_it.cycled_list ()) { blobindex++; /*no of merged blobs */ losscount = 0; } else { if (blobcoords[blobindex].height () < blobcoords[blobindex].width () * oldbl_dot_error_size && blobcoords[blobindex].width () < blobcoords[blobindex].height () * oldbl_dot_error_size) { //counts as dot blobindex++; losscount = 0; } else { losscount++; //lost it if (losscount > maxlosscount) //remember max maxlosscount = losscount; } } } while (!blob_it.cycled_list ()); holed_line = maxlosscount > oldbl_holed_losscount; outcount = blobindex; /*total blobs */ if (heightstat.get_total () > 1) /*guess x-height */ return (int) heightstat.ile (0.25); else return blobcoords[0].height (); }
Definition at line 912 of file oldbasel.cpp.
{ register int blobindex; /*current blob */ int xcentre; /*xcoord */ int lastx; /*last xcentre */ float diffsum; /*sum of diffs */ float diff; /*current difference */ float drift; /*sum of spline steps */ float bestsum; /*smallest diffsum */ int bestindex; /*index of bestsum */ diffsum = 0.0f; bestindex = 0; bestsum = (float) MAX_INT32; drift = 0.0f; lastx = blobcoords[0].left (); /*do each blob in row */ for (blobindex = 0; blobindex < blobcount; blobindex++) { /*centre of blob */ xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; //step functions in spline drift += spline->step (lastx, xcentre); lastx = xcentre; diff = blobcoords[blobindex].bottom (); diff -= spline->y (xcentre); diff += drift; ydiffs[blobindex] = diff; /*store difference */ if (blobindex > 2) /*remove old one */ diffsum -= ABS (ydiffs[blobindex - 3]); diffsum += ABS (diff); /*add new one */ if (blobindex >= 2 && diffsum < bestsum) { bestsum = diffsum; /*find min sum */ bestindex = blobindex - 1; /*middle of set */ } } return bestindex; }
void insert_spline_point | ( | int | xstarts[], |
int | segment, | ||
int | coord1, | ||
int | coord2, | ||
int & | segments | ||
) |
Definition at line 1353 of file oldbasel.cpp.
{ int index; //for shuffling for (index = segments; index > segment; index--) xstarts[index + 1] = xstarts[index]; segments++; xstarts[segment] = coord1; xstarts[segment + 1] = coord2; }
void make_first_baseline | ( | TBOX | blobcoords[], |
int | blobcount, | ||
int | xcoords[], | ||
int | ycoords[], | ||
QSPLINE * | spline, | ||
QSPLINE * | baseline, | ||
float | jumplimit | ||
) |
Definition at line 517 of file oldbasel.cpp.
{ int leftedge; /*left edge of line */ int rightedge; /*right edge of line */ int blobindex; /*current blob */ int segment; /*current segment */ float prevy, thisy, nexty; /*3 y coords */ float y1, y2, y3; /*3 smooth blobs */ float maxmax, minmin; /*absolute limits */ int x2 = 0; /*right edge of old y3 */ int ycount; /*no of ycoords in use */ float yturns[SPLINESIZE]; /*y coords of turn pts */ int xturns[SPLINESIZE]; /*xcoords of turn pts */ int xstarts[SPLINESIZE + 1]; int segments; //no of segments ICOORD shift; //shift of spline prevy = 0; /*left edge of row */ leftedge = blobcoords[0].left (); /*right edge of line */ rightedge = blobcoords[blobcount - 1].right (); if (spline == NULL /*no given spline */ || spline->segments < 3 /*or trivial */ /*or too non-overlap */ || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) || spline->xcoords[spline->segments - 1] < rightedge - MAXOVERLAP * (rightedge - leftedge)) { if (textord_oldbl_paradef) return; //use default xstarts[0] = blobcoords[0].left () - 1; for (blobindex = 0; blobindex < blobcount; blobindex++) { xcoords[blobindex] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) / 2; ycoords[blobindex] = blobcoords[blobindex].bottom (); } xstarts[1] = blobcoords[blobcount - 1].right () + 1; segments = 1; /*no of segments */ /*linear */ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); if (blobcount >= 3) { y1 = y2 = y3 = 0.0f; ycount = 0; segment = 0; /*no of segments */ maxmax = minmin = 0.0f; thisy = ycoords[0] - baseline->y (xcoords[0]); nexty = ycoords[1] - baseline->y (xcoords[1]); for (blobindex = 2; blobindex < blobcount; blobindex++) { prevy = thisy; /*shift ycoords */ thisy = nexty; nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]); /*middle of smooth y */ if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) { y1 = y2; /*shift window */ y2 = y3; y3 = thisy; /*middle point */ ycount++; /*local max */ if (ycount >= 3 && ((y1 < y2 && y2 >= y3) /*local min */ || (y1 > y2 && y2 <= y3))) { if (segment < SPLINESIZE - 2) { /*turning pt */ xturns[segment] = x2; yturns[segment] = y2; segment++; /*no of spline segs */ } } if (ycount == 1) { maxmax = minmin = y3;/*initialise limits */ } else { if (y3 > maxmax) maxmax = y3; /*biggest max */ if (y3 < minmin) minmin = y3; /*smallest min */ } /*possible turning pt */ x2 = blobcoords[blobindex - 1].right (); } } jumplimit *= 1.2; /*must be wavy */ if (maxmax - minmin > jumplimit) { ycount = segment; /*no of segments */ for (blobindex = 0, segment = 1; blobindex < ycount; blobindex++) { if (yturns[blobindex] > minmin + jumplimit || yturns[blobindex] < maxmax - jumplimit) { /*significant peak */ if (segment == 1 || yturns[blobindex] > prevy + jumplimit || yturns[blobindex] < prevy - jumplimit) { /*different to previous */ xstarts[segment] = xturns[blobindex]; segment++; prevy = yturns[blobindex]; } /*bigger max */ else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy) /*smaller min */ || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) { xstarts[segment - 1] = xturns[blobindex]; /*improved previous */ prevy = yturns[blobindex]; } } } xstarts[segment] = blobcoords[blobcount - 1].right () + 1; segments = segment; /*no of segments */ /*linear */ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); } } } else { *baseline = *spline; /*copy it */ shift = ICOORD (0, (inT16) (blobcoords[0].bottom () - spline->y (blobcoords[0].right ()))); baseline->move (shift); } }
void make_first_xheight | ( | TO_ROW * | row, |
TBOX | blobcoords[], | ||
int | lineheight, | ||
int | init_lineheight, | ||
int | blobcount, | ||
QSPLINE * | baseline, | ||
float | jumplimit | ||
) |
Definition at line 1547 of file oldbasel.cpp.
{ STATS heightstat (0, HEIGHTBUCKETS); int lefts[HEIGHTBUCKETS]; int rights[HEIGHTBUCKETS]; int modelist[MODENUM]; int blobindex; int mode_count; //blobs to count in thr int sign_bit; int mode_threshold; const int kBaselineTouch = 2; // This really should change with resolution. const int kGoodStrength = 8; // Strength of baseline-touching heights. const float kMinHeight = 0.25; // Min fraction of lineheight to use. sign_bit = row->xheight > 0 ? 1 : -1; memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0])); memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0])); mode_count = 0; for (blobindex = 0; blobindex < blobcount; blobindex++) { int xcenter = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) / 2; float base = baseline->y(xcenter); float bottomdiff = fabs(base - blobcoords[blobindex].bottom()); int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1; int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5); if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) { if (height > lineheight * oldbl_xhfract && height > textord_min_xheight) { heightstat.add (height, strength); if (height < HEIGHTBUCKETS) { if (xcenter > rights[height]) rights[height] = xcenter; if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) lefts[height] = xcenter; } } mode_count += strength; } } mode_threshold = (int) (blobcount * 0.1); if (oldbl_dot_error_size > 1 || oldbl_xhfix) mode_threshold = (int) (mode_count * 0.1); if (textord_oldbl_debug) { tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n", blobcount, mode_count, mode_threshold); } find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM); if (textord_oldbl_debug) { for (blobindex = 0; blobindex < MODENUM; blobindex++) tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]); tprintf ("\n"); } pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold); if (textord_oldbl_debug) tprintf ("Output xheight=%g\n", row->xheight); if (row->xheight < 0 && textord_oldbl_debug) tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight); if (sign_bit < 0) row->xheight = -row->xheight; }
void make_holed_baseline | ( | TBOX | blobcoords[], |
int | blobcount, | ||
QSPLINE * | spline, | ||
QSPLINE * | baseline, | ||
float | gradient | ||
) |
Definition at line 660 of file oldbasel.cpp.
{ int leftedge; /*left edge of line */ int rightedge; /*right edge of line */ int blobindex; /*current blob */ float x; //centre of row ICOORD shift; //shift of spline tesseract::DetLineFit lms; // straight baseline inT32 xstarts[2]; //straight line double coeffs[3]; float c; //line parameter /*left edge of row */ leftedge = blobcoords[0].left (); /*right edge of line */ rightedge = blobcoords[blobcount - 1].right(); for (blobindex = 0; blobindex < blobcount; blobindex++) { lms.Add(ICOORD((blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2, blobcoords[blobindex].bottom())); } lms.ConstrainedFit(gradient, &c); xstarts[0] = leftedge; xstarts[1] = rightedge; coeffs[0] = 0; coeffs[1] = gradient; coeffs[2] = c; *baseline = QSPLINE (1, xstarts, coeffs); if (spline != NULL /*no given spline */ && spline->segments >= 3 /*or trivial */ /*or too non-overlap */ && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) && spline->xcoords[spline->segments - 1] >= rightedge - MAXOVERLAP * (rightedge - leftedge)) { *baseline = *spline; /*copy it */ x = (leftedge + rightedge) / 2.0; shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x))); baseline->move (shift); } }
void merge_oldbl_parts | ( | TBOX | blobcoords[], |
int | blobcount, | ||
char | partids[], | ||
int | partsizes[], | ||
int | biggestpart, | ||
float | jumplimit | ||
) |
Definition at line 799 of file oldbasel.cpp.
{ BOOL8 found_one; //found a bestpart blob BOOL8 close_one; //found was close enough register int blobindex; /*no along text line */ int prevpart; //previous iteration int runlength; //no in this part float diff; /*difference from line */ int startx; /*index of start blob */ int test_blob; //another index FCOORD coord; //blob coordinate float m, c; //fitted line QLSQ stats; //line stuff prevpart = biggestpart; runlength = 0; startx = 0; for (blobindex = 0; blobindex < blobcount; blobindex++) { if (partids[blobindex] != prevpart) { // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n", // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(), // prevpart,partids[blobindex],runlength); if (prevpart != biggestpart && runlength > MAXBADRUN) { stats.clear (); for (test_blob = startx; test_blob < blobindex; test_blob++) { coord = FCOORD ((blobcoords[test_blob].left () + blobcoords[test_blob].right ()) / 2.0, blobcoords[test_blob].bottom ()); stats.add (coord.x (), coord.y ()); } stats.fit (1); m = stats.get_b (); c = stats.get_c (); if (textord_oldbl_debug) tprintf ("Fitted line y=%g x + %g\n", m, c); found_one = FALSE; close_one = FALSE; for (test_blob = 1; !found_one && (startx - test_blob >= 0 || blobindex + test_blob <= blobcount); test_blob++) { if (startx - test_blob >= 0 && partids[startx - test_blob] == biggestpart) { found_one = TRUE; coord = FCOORD ((blobcoords[startx - test_blob].left () + blobcoords[startx - test_blob].right ()) / 2.0, blobcoords[startx - test_blob].bottom ()); diff = m * coord.x () + c - coord.y (); if (textord_oldbl_debug) tprintf ("Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.x (), coord.y ()); if (diff < jumplimit && -diff < jumplimit) close_one = TRUE; } if (blobindex + test_blob <= blobcount && partids[blobindex + test_blob - 1] == biggestpart) { found_one = TRUE; coord = FCOORD ((blobcoords[blobindex + test_blob - 1]. left () + blobcoords[blobindex + test_blob - 1].right ()) / 2.0, blobcoords[blobindex + test_blob - 1].bottom ()); diff = m * coord.x () + c - coord.y (); if (textord_oldbl_debug) tprintf ("Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.x (), coord.y ()); if (diff < jumplimit && -diff < jumplimit) close_one = TRUE; } } if (close_one) { if (textord_oldbl_debug) tprintf ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n", runlength, biggestpart, prevpart, blobcoords[startx].left (), blobcoords[startx].bottom ()); //switch sides partsizes[prevpart] -= runlength; for (test_blob = startx; test_blob < blobindex; test_blob++) partids[test_blob] = biggestpart; } } prevpart = partids[blobindex]; runlength = 1; startx = blobindex; } else runlength++; } }
void old_first_xheight | ( | TO_ROW * | row, |
TBOX | blobcoords[], | ||
int | initialheight, | ||
int | blobcount, | ||
QSPLINE * | baseline, | ||
float | jumplimit | ||
) |
Definition at line 1462 of file oldbasel.cpp.
{ register int blobindex; /*current blob */ /*height statistics */ STATS heightstat (0, MAXHEIGHT); int height; /*height of blob */ int xcentre; /*centre of blob */ int lineheight; /*approx xheight */ float ascenders; /*ascender sum */ int asccount; /*no of ascenders */ float xsum; /*xheight sum */ int xcount; /*xheight count */ register float diff; /*height difference */ if (blobcount > 1) { for (blobindex = 0; blobindex < blobcount; blobindex++) { xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) / 2; /*height of blob */ height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5); if (height > initialheight * oldbl_xhfract && height > textord_min_xheight) heightstat.add (height, 1); } if (heightstat.get_total () > 3) { lineheight = (int) heightstat.ile (0.25); if (lineheight <= 0) lineheight = (int) heightstat.ile (0.5); } else lineheight = initialheight; } else { lineheight = (int) (blobcoords[0].top () - baseline->y ((blobcoords[0].left () + blobcoords[0].right ()) / 2) + 0.5); } xsum = 0.0f; xcount = 0; for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; blobindex++) { xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) / 2; diff = blobcoords[blobindex].top () - baseline->y (xcentre); /*is it ascender */ if (diff > lineheight + jumplimit) { ascenders += diff; asccount++; /*count ascenders */ } else if (diff > lineheight - jumplimit) { xsum += diff; /*mean xheight */ xcount++; } } if (xcount > 0) xsum /= xcount; /*average xheight */ else xsum = (float) lineheight; /*guess it */ row->xheight *= xsum; if (asccount > 0) row->ascrise = ascenders / asccount - xsum; else row->ascrise = 0.0f; /*had none */ if (row->xheight == 0) row->xheight = -1.0f; }
int partition_coords | ( | TBOX | blobcoords[], |
int | blobcount, | ||
char | partids[], | ||
int | bestpart, | ||
int | xcoords[], | ||
int | ycoords[] | ||
) |
*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
Definition at line 1090 of file oldbasel.cpp.
{ register int blobindex; /*no along text line */ int pointcount; /*no of points */ pointcount = 0; for (blobindex = 0; blobindex < blobcount; blobindex++) { if (partids[blobindex] == bestpart) { /*centre of blob */ xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; ycoords[pointcount++] = blobcoords[blobindex].bottom (); } } return pointcount; /*no of points found */ }
int partition_line | ( | TBOX | blobcoords[], |
int | blobcount, | ||
int * | numparts, | ||
char | partids[], | ||
int | partsizes[], | ||
QSPLINE * | spline, | ||
float | jumplimit, | ||
float | ydiffs[] | ||
) |
Definition at line 718 of file oldbasel.cpp.
{ register int blobindex; /*no along text line */ int bestpart; /*best new partition */ int biggestpart; /*part with most members */ float diff; /*difference from line */ int startx; /*index of start blob */ float partdiffs[MAXPARTS]; /*step between parts */ for (bestpart = 0; bestpart < MAXPARTS; bestpart++) partsizes[bestpart] = 0; /*zero them all */ startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs); *numparts = 1; /*1 partition */ bestpart = -1; /*first point */ float drift = 0.0f; float last_delta = 0.0f; for (blobindex = startx; blobindex < blobcount; blobindex++) { /*do each blob in row */ diff = ydiffs[blobindex]; /*diff from line */ if (textord_oldbl_debug) { tprintf ("%d(%d,%d), ", blobindex, blobcoords[blobindex].left (), blobcoords[blobindex].bottom ()); } bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts); /*record partition */ partids[blobindex] = bestpart; partsizes[bestpart]++; /*another in it */ } bestpart = -1; /*first point */ drift = 0.0f; last_delta = 0.0f; partsizes[0]--; /*doing 1st pt again */ /*do each blob in row */ for (blobindex = startx; blobindex >= 0; blobindex--) { diff = ydiffs[blobindex]; /*diff from line */ if (textord_oldbl_debug) { tprintf ("%d(%d,%d), ", blobindex, blobcoords[blobindex].left (), blobcoords[blobindex].bottom ()); } bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts); /*record partition */ partids[blobindex] = bestpart; partsizes[bestpart]++; /*another in it */ } for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) if (partsizes[bestpart] >= partsizes[biggestpart]) biggestpart = bestpart; /*new biggest */ if (textord_oldbl_merge_parts) merge_oldbl_parts(blobcoords, blobcount, partids, partsizes, biggestpart, jumplimit); return biggestpart; /*biggest partition */ }
void pick_x_height | ( | TO_ROW * | row, |
int | modelist[], | ||
int | lefts[], | ||
int | rights[], | ||
STATS * | heightstat, | ||
int | mode_threshold | ||
) |
Definition at line 1672 of file oldbasel.cpp.
{ int x; int y; int z; float ratio; int found_one_bigger = FALSE; int best_x_height = 0; int best_asc = 0; int num_in_best; for (x = 0; x < MODENUM; x++) { for (y = 0; y < MODENUM; y++) { /* Check for two modes */ if (modelist[x] && modelist[y] && heightstat->pile_count (modelist[x]) > mode_threshold && (!textord_ocropus_mode || MIN(rights[modelist[x]], rights[modelist[y]]) > MAX(lefts[modelist[x]], lefts[modelist[y]]))) { ratio = (float) modelist[y] / (float) modelist[x]; if (1.2 < ratio && ratio < 1.8) { /* Two modes found */ best_x_height = modelist[x]; num_in_best = heightstat->pile_count (modelist[x]); /* Try to get one higher */ do { found_one_bigger = FALSE; for (z = 0; z < MODENUM; z++) { if (modelist[z] == best_x_height + 1 && (!textord_ocropus_mode || MIN(rights[modelist[x]], rights[modelist[y]]) > MAX(lefts[modelist[x]], lefts[modelist[y]]))) { ratio = (float) modelist[y] / (float) modelist[z]; if ((1.2 < ratio && ratio < 1.8) && /* Should be half of best */ heightstat->pile_count (modelist[z]) > num_in_best * 0.5) { best_x_height++; found_one_bigger = TRUE; break; } } } } while (found_one_bigger); /* try to get a higher ascender */ best_asc = modelist[y]; num_in_best = heightstat->pile_count (modelist[y]); /* Try to get one higher */ do { found_one_bigger = FALSE; for (z = 0; z < MODENUM; z++) { if (modelist[z] > best_asc && (!textord_ocropus_mode || MIN(rights[modelist[x]], rights[modelist[y]]) > MAX(lefts[modelist[x]], lefts[modelist[y]]))) { ratio = (float) modelist[z] / (float) best_x_height; if ((1.2 < ratio && ratio < 1.8) && /* Should be half of best */ heightstat->pile_count (modelist[z]) > num_in_best * 0.5) { best_asc = modelist[z]; found_one_bigger = TRUE; break; } } } } while (found_one_bigger); row->xheight = (float) best_x_height; row->ascrise = (float) best_asc - best_x_height; return; } } } } best_x_height = modelist[0]; /* Single Mode found */ num_in_best = heightstat->pile_count (best_x_height); do { /* Try to get one higher */ found_one_bigger = FALSE; for (z = 1; z < MODENUM; z++) { /* Should be half of best */ if ((modelist[z] == best_x_height + 1) && (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) { best_x_height++; found_one_bigger = TRUE; break; } } } while (found_one_bigger); row->ascrise = 0.0f; row->xheight = (float) best_x_height; if (row->xheight == 0) row->xheight = -1.0f; }
int segment_spline | ( | TBOX | blobcoords[], |
int | blobcount, | ||
int | xcoords[], | ||
int | ycoords[], | ||
int | degree, | ||
int | pointcount, | ||
int | xstarts[] | ||
) |
Definition at line 1121 of file oldbasel.cpp.
{ register int ptindex; /*no along text line */ register int segment; /*partition no */ int lastmin, lastmax; /*possible turn points */ int turnpoints[SPLINESIZE]; /*good turning points */ int turncount; /*no of turning points */ int max_x; //max specified coord xstarts[0] = xcoords[0] - 1; //leftmost defined pt max_x = xcoords[pointcount - 1] + 1; if (degree < 2) pointcount = 0; turncount = 0; /*no turning points yet */ if (pointcount > 3) { ptindex = 1; lastmax = lastmin = 0; /*start with first one */ while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) { /*minimum */ if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) { if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) { if (turncount == 0 || turnpoints[turncount - 1] != lastmax) /*new max point */ turnpoints[turncount++] = lastmax; lastmin = ptindex; /*latest minimum */ } else if (ycoords[ptindex] < ycoords[lastmin]) { lastmin = ptindex; /*lower minimum */ } } /*maximum */ if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) { if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) { if (turncount == 0 || turnpoints[turncount - 1] != lastmin) /*new min point */ turnpoints[turncount++] = lastmin; lastmax = ptindex; /*latest maximum */ } else if (ycoords[ptindex] > ycoords[lastmax]) { lastmax = ptindex; /*higher maximum */ } } ptindex++; } /*possible global min */ if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) { if (turncount < SPLINESIZE - 1) /*2 more turns */ turnpoints[turncount++] = lastmax; if (turncount < SPLINESIZE - 1) turnpoints[turncount++] = ptindex; } else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT /*possible global max */ && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) { if (turncount < SPLINESIZE - 1) /*2 more turns */ turnpoints[turncount++] = lastmin; if (turncount < SPLINESIZE - 1) turnpoints[turncount++] = ptindex; } else if (turncount > 0 && turnpoints[turncount - 1] == lastmin && turncount < SPLINESIZE - 1) { if (ycoords[ptindex] > ycoords[lastmax]) turnpoints[turncount++] = ptindex; else turnpoints[turncount++] = lastmax; } else if (turncount > 0 && turnpoints[turncount - 1] == lastmax && turncount < SPLINESIZE - 1) { if (ycoords[ptindex] < ycoords[lastmin]) turnpoints[turncount++] = ptindex; else turnpoints[turncount++] = lastmin; } } if (textord_oldbl_debug && turncount > 0) tprintf ("First turn is %d at (%d,%d)\n", turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]); for (segment = 1; segment < turncount; segment++) { /*centre y coord */ lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2; /* fix alg so that it works with both rising and falling sections */ if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) /*find rising y centre */ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++); else /*find falling y centre */ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++); /*centre x */ xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] + xcoords[turnpoints[segment - 1]] + xcoords[turnpoints[segment]] + 2) / 4; /*halfway between turns */ if (textord_oldbl_debug) tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", segment, turnpoints[segment], xcoords[turnpoints[segment]], ycoords[turnpoints[segment]], ptindex - 1, xcoords[ptindex - 1], xstarts[segment]); } xstarts[segment] = max_x; return segment; /*no of splines */ }
BOOL8 split_stepped_spline | ( | QSPLINE * | baseline, |
float | jumplimit, | ||
int | xcoords[], | ||
int | xstarts[], | ||
int & | segments | ||
) |
Definition at line 1246 of file oldbasel.cpp.
{ BOOL8 doneany; //return value register int segment; /*partition no */ int startindex, centreindex, endindex; float leftcoord, rightcoord; int leftindex, rightindex; float step; //spline step doneany = FALSE; startindex = 0; for (segment = 1; segment < segments - 1; segment++) { step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0, (xstarts[segment] + xstarts[segment + 1]) / 2.0); if (step < 0) step = -step; if (step > jumplimit) { while (xcoords[startindex] < xstarts[segment - 1]) startindex++; centreindex = startindex; while (xcoords[centreindex] < xstarts[segment]) centreindex++; endindex = centreindex; while (xcoords[endindex] < xstarts[segment + 1]) endindex++; if (segments >= SPLINESIZE) { if (textord_debug_baselines) tprintf ("Too many segments to resegment spline!!\n"); } else if (endindex - startindex >= textord_spline_medianwin * 3) { while (centreindex - startindex < textord_spline_medianwin * 3 / 2) centreindex++; while (endindex - centreindex < textord_spline_medianwin * 3 / 2) centreindex--; leftindex = (startindex + startindex + centreindex) / 3; rightindex = (centreindex + endindex + endindex) / 3; leftcoord = (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0; rightcoord = (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0; while (xcoords[leftindex] > leftcoord && leftindex - startindex > textord_spline_medianwin) leftindex--; while (xcoords[leftindex] < leftcoord && centreindex - leftindex > textord_spline_medianwin / 2) leftindex++; if (xcoords[leftindex] - leftcoord > leftcoord - xcoords[leftindex - 1]) leftindex--; while (xcoords[rightindex] > rightcoord && rightindex - centreindex > textord_spline_medianwin / 2) rightindex--; while (xcoords[rightindex] < rightcoord && endindex - rightindex > textord_spline_medianwin) rightindex++; if (xcoords[rightindex] - rightcoord > rightcoord - xcoords[rightindex - 1]) rightindex--; if (textord_debug_baselines) tprintf ("Splitting spline at %d with step %g at (%d,%d)\n", xstarts[segment], baseline-> step ((xstarts[segment - 1] + xstarts[segment]) / 2.0, (xstarts[segment] + xstarts[segment + 1]) / 2.0), (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, (xcoords[rightindex - 1] + xcoords[rightindex]) / 2); insert_spline_point (xstarts, segment, (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, (xcoords[rightindex - 1] + xcoords[rightindex]) / 2, segments); doneany = TRUE; } else if (textord_debug_baselines) { tprintf ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", startindex, centreindex, endindex, (inT32) textord_spline_medianwin); } } // else tprintf("Spline step at %d is %g\n", // xstarts[segment], // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0, // (xstarts[segment]+xstarts[segment+1])/2.0)); } return doneany; }
const int kMinModeFactor = 12 |
Definition at line 1629 of file oldbasel.cpp.
const int kMinModeFactorOcropus = 32 |
Definition at line 1628 of file oldbasel.cpp.
EXTERN bool oldbl_corrfix = 1 |
"Improve correlation of heights"
Definition at line 45 of file oldbasel.cpp.
EXTERN double oldbl_dot_error_size = 1.26 |
"Max aspect ratio of a dot"
Definition at line 52 of file oldbasel.cpp.
EXTERN int oldbl_holed_losscount = 10 |
"Max lost before fallback line used"
Definition at line 51 of file oldbasel.cpp.
EXTERN bool oldbl_xhfix = 0 |
"Fix bug in modes threshold for xheights"
Definition at line 47 of file oldbasel.cpp.
EXTERN double oldbl_xhfract = 0.4 |
"Fraction of est allowed in calc"
Definition at line 49 of file oldbasel.cpp.
EXTERN bool textord_debug_baselines = 0 |
"Debug baseline generation"
Definition at line 41 of file oldbasel.cpp.
EXTERN bool textord_ocropus_mode = 0 |
"Make baselines for ocropus"
Definition at line 48 of file oldbasel.cpp.
EXTERN bool textord_oldbl_debug = 0 |
"Debug old baseline generation"
Definition at line 40 of file oldbasel.cpp.
EXTERN double textord_oldbl_jumplimit = 0.15 |
"X fraction for new partition"
Definition at line 54 of file oldbasel.cpp.
EXTERN bool textord_oldbl_merge_parts = 1 |
"Merge suspect partitions"
Definition at line 44 of file oldbasel.cpp.
EXTERN bool textord_oldbl_paradef = 1 |
"Use para default mechanism"
Definition at line 42 of file oldbasel.cpp.
EXTERN bool textord_oldbl_split_splines = 1 |
"Split stepped splines"
Definition at line 43 of file oldbasel.cpp.
EXTERN bool textord_really_old_xheight = 0 |
"Use original wiseowl xheight"
Definition at line 39 of file oldbasel.cpp.