Tesseract  3.02
tesseract-ocr/textord/oldbasel.h File Reference
#include "params.h"
#include "blobbox.h"
#include "notdll.h"

Go to the source code of this file.

Functions

int get_blob_coords (TO_ROW *row, inT32 lineheight, TBOX *blobcoords, BOOL8 &holed_line, int &outcount)
void make_first_baseline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)
void make_holed_baseline (TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)
int partition_line (TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])
void merge_oldbl_parts (TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
int get_ydiffs (TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
int choose_partition (register float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *last_delta, int *partcount)
int partition_coords (TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])
 *merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
int segment_spline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])
BOOL8 split_stepped_spline (QSPLINE *baseline, float jumplimit, int xcoords[], int xstarts[], int &segments)
void insert_spline_point (int xstarts[], int segment, int coord1, int coord2, int &segments)
void find_lesser_parts (TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)
void old_first_xheight (TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)
void make_first_xheight (TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)
int * make_height_array (TBOX blobcoords[], int blobcount, QSPLINE *baseline)
void find_top_modes (STATS *stats, int statnum, int modelist[], int modenum)
void pick_x_height (TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)

Variables

bool textord_really_old_xheight = 0
bool textord_oldbl_debug = 0
bool textord_debug_baselines = 0
bool textord_oldbl_paradef = 1
bool textord_oldbl_split_splines = 1
bool textord_oldbl_merge_parts = 1
bool oldbl_xhfix = 0
int oldbl_holed_losscount = 10
double oldbl_dot_error_size = 1.26
double textord_oldbl_jumplimit = 0.15

Function Documentation

int choose_partition ( register float  diff,
float  partdiffs[],
int  lastpart,
float  jumplimit,
float *  drift,
float *  last_delta,
int *  partcount 
)

Definition at line 963 of file oldbasel.cpp.

  {
  register int partition;        /*partition no */
  int bestpart;                  /*best new partition */
  float bestdelta;               /*best gap from a part */
  float delta;                   /*diff from part */

  if (lastpart < 0) {
    partdiffs[0] = diff;
    lastpart = 0;                /*first point */
    *drift = 0.0f;
    *lastdelta = 0.0f;
  }
                                 /*adjusted diff from part */
  delta = diff - partdiffs[lastpart] - *drift;
  if (textord_oldbl_debug) {
    tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
  }
  if (ABS (delta) > jumplimit / 2) {
                                 /*delta on part 0 */
    bestdelta = diff - partdiffs[0] - *drift;
    bestpart = 0;                /*0 best so far */
    for (partition = 1; partition < *partcount; partition++) {
      delta = diff - partdiffs[partition] - *drift;
      if (ABS (delta) < ABS (bestdelta)) {
        bestdelta = delta;
        bestpart = partition;    /*part with nearest jump */
      }
    }
    delta = bestdelta;
                                 /*too far away */
    if (ABS (bestdelta) > jumplimit
    && *partcount < MAXPARTS) {  /*and spare part left */
      bestpart = (*partcount)++; /*best was new one */
                                 /*start new one */
      partdiffs[bestpart] = diff - *drift;
      delta = 0.0f;
    }
  }
  else {
    bestpart = lastpart;         /*best was last one */
  }

  if (bestpart == lastpart
    && (ABS (delta - *lastdelta) < jumplimit / 2
    || ABS (delta) < jumplimit / 2))
                                 /*smooth the drift */
    *drift = (3 * *drift + delta) / 3;
  *lastdelta = delta;

  if (textord_oldbl_debug) {
    tprintf ("P=%d\n", bestpart);
  }

  return bestpart;
}
void find_lesser_parts ( TO_ROW row,
TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  partcount,
int  bestpart 
)

Definition at line 1377 of file oldbasel.cpp.

  {
  register int blobindex;        /*index of blob */
  register int partition;        /*current partition */
  int xcentre;                   /*centre of blob */
  int poscount;                  /*count of best up step */
  int negcount;                  /*count of best down step */
  float partsteps[MAXPARTS];     /*average step to part */
  float bestpos;                 /*best up step */
  float bestneg;                 /*best down step */
  int runlength;                 /*length of bad run */
  int biggestrun;                /*biggest bad run */

  biggestrun = 0;
  for (partition = 0; partition < partcount; partition++)
    partsteps[partition] = 0.0;  /*zero accumulators */
  for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
    xcentre = (blobcoords[blobindex].left ()
      + blobcoords[blobindex].right ()) >> 1;
                                 /*in other parts */
    if (partids[blobindex] != bestpart) {
      runlength++;               /*run of non bests */
      if (runlength > biggestrun)
        biggestrun = runlength;
      partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
        - row->baseline.y (xcentre);
    }
    else
      runlength = 0;
  }
  if (biggestrun > MAXBADRUN)
    row->xheight = -1.0f;        /*failed */
  else
    row->xheight = 1.0f;         /*success */
  poscount = negcount = 0;
  bestpos = bestneg = 0.0;       /*no step yet */
  for (partition = 0; partition < partcount; partition++) {
    if (partition != bestpart) {

        //by jetsoft divide by zero possible
                if (partsizes[partition]==0)
                partsteps[partition]=0;
       else
                partsteps[partition] /= partsizes[partition];
        //


      if (partsteps[partition] >= MINASCRISE
      && partsizes[partition] > poscount) {
                                 /*ascender rise */
        bestpos = partsteps[partition];
                                 /*2nd most popular */
        poscount = partsizes[partition];
      }
      if (partsteps[partition] <= -MINASCRISE
      && partsizes[partition] > negcount) {
                                 /*ascender rise */
        bestneg = partsteps[partition];
                                 /*2nd most popular */
        negcount = partsizes[partition];
      }
    }
  }
                                 /*average x-height */
  partsteps[bestpart] /= blobcount;
  row->descdrop = bestneg;
}
void find_top_modes ( STATS stats,
int  statnum,
int  modelist[],
int  modenum 
)

Definition at line 1632 of file oldbasel.cpp.

  {
  int mode_count;
  int last_i = 0;
  int last_max = MAX_INT32;
  int i;
  int mode;
  int total_max = 0;
  int mode_factor = textord_ocropus_mode ?
                    kMinModeFactorOcropus : kMinModeFactor;

  for (mode_count = 0; mode_count < modenum; mode_count++) {
    mode = 0;
    for (i = 0; i < statnum; i++) {
      if (stats->pile_count (i) > stats->pile_count (mode)) {
        if ((stats->pile_count (i) < last_max) ||
        ((stats->pile_count (i) == last_max) && (i > last_i))) {
          mode = i;
        }
      }
    }
    last_i = mode;
    last_max = stats->pile_count (last_i);
    total_max += last_max;
    if (last_max <= total_max / mode_factor)
      mode = 0;
    modelist[mode_count] = mode;
  }
}
int get_blob_coords ( TO_ROW row,
inT32  lineheight,
TBOX blobcoords,
BOOL8 holed_line,
int &  outcount 
)

Definition at line 447 of file oldbasel.cpp.

                     {
                                 //blobs
  BLOBNBOX_IT blob_it = row->blob_list ();
  register int blobindex;        /*no along text line */
  int losscount;                 //lost blobs
  int maxlosscount;              //greatest lost blobs
                                 /*height stat collection */
  STATS heightstat (0, MAXHEIGHT);

  if (blob_it.empty ())
    return 0;                    //none
  maxlosscount = 0;
  losscount = 0;
  blob_it.mark_cycle_pt ();
  blobindex = 0;
  do {
    blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
    if (blobcoords[blobindex].height () > lineheight * 0.25)
      heightstat.add (blobcoords[blobindex].height (), 1);
    if (blobindex == 0
      || blobcoords[blobindex].height () > lineheight * 0.25
    || blob_it.cycled_list ()) {
      blobindex++;               /*no of merged blobs */
      losscount = 0;
    }
    else {
      if (blobcoords[blobindex].height ()
        < blobcoords[blobindex].width () * oldbl_dot_error_size
        && blobcoords[blobindex].width ()
      < blobcoords[blobindex].height () * oldbl_dot_error_size) {
                                 //counts as dot
        blobindex++;
        losscount = 0;
      }
      else {
        losscount++;             //lost it
        if (losscount > maxlosscount)
                                 //remember max
            maxlosscount = losscount;
      }
    }
  }
  while (!blob_it.cycled_list ());

  holed_line = maxlosscount > oldbl_holed_losscount;
  outcount = blobindex;          /*total blobs */

  if (heightstat.get_total () > 1)
                                 /*guess x-height */
    return (int) heightstat.ile (0.25);
  else
    return blobcoords[0].height ();
}
int get_ydiffs ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
float  ydiffs[] 
)

Definition at line 912 of file oldbasel.cpp.

  {
  register int blobindex;        /*current blob */
  int xcentre;                   /*xcoord */
  int lastx;                     /*last xcentre */
  float diffsum;                 /*sum of diffs */
  float diff;                    /*current difference */
  float drift;                   /*sum of spline steps */
  float bestsum;                 /*smallest diffsum */
  int bestindex;                 /*index of bestsum */

  diffsum = 0.0f;
  bestindex = 0;
  bestsum = (float) MAX_INT32;
  drift = 0.0f;
  lastx = blobcoords[0].left ();
                                 /*do each blob in row */
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
                                 /*centre of blob */
    xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
                                 //step functions in spline
    drift += spline->step (lastx, xcentre);
    lastx = xcentre;
    diff = blobcoords[blobindex].bottom ();
    diff -= spline->y (xcentre);
    diff += drift;
    ydiffs[blobindex] = diff;    /*store difference */
    if (blobindex > 2)
                                 /*remove old one */
      diffsum -= ABS (ydiffs[blobindex - 3]);
    diffsum += ABS (diff);       /*add new one */
    if (blobindex >= 2 && diffsum < bestsum) {
      bestsum = diffsum;         /*find min sum */
      bestindex = blobindex - 1; /*middle of set */
    }
  }
  return bestindex;
}
void insert_spline_point ( int  xstarts[],
int  segment,
int  coord1,
int  coord2,
int &  segments 
)

Definition at line 1353 of file oldbasel.cpp.

  {
  int index;                     //for shuffling

  for (index = segments; index > segment; index--)
    xstarts[index + 1] = xstarts[index];
  segments++;
  xstarts[segment] = coord1;
  xstarts[segment + 1] = coord2;
}
void make_first_baseline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
QSPLINE spline,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 517 of file oldbasel.cpp.

  {
  int leftedge;                  /*left edge of line */
  int rightedge;                 /*right edge of line */
  int blobindex;                 /*current blob */
  int segment;                   /*current segment */
  float prevy, thisy, nexty;     /*3 y coords */
  float y1, y2, y3;              /*3 smooth blobs */
  float maxmax, minmin;          /*absolute limits */
  int x2 = 0;                    /*right edge of old y3 */
  int ycount;                    /*no of ycoords in use */
  float yturns[SPLINESIZE];      /*y coords of turn pts */
  int xturns[SPLINESIZE];        /*xcoords of turn pts */
  int xstarts[SPLINESIZE + 1];
  int segments;                  //no of segments
  ICOORD shift;                  //shift of spline

  prevy = 0;
                                 /*left edge of row */
  leftedge = blobcoords[0].left ();
                                 /*right edge of line */
  rightedge = blobcoords[blobcount - 1].right ();
  if (spline == NULL             /*no given spline */
    || spline->segments < 3      /*or trivial */
                                 /*or too non-overlap */
    || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
    || spline->xcoords[spline->segments - 1] < rightedge
  - MAXOVERLAP * (rightedge - leftedge)) {
    if (textord_oldbl_paradef)
      return;                    //use default
    xstarts[0] = blobcoords[0].left () - 1;
    for (blobindex = 0; blobindex < blobcount; blobindex++) {
      xcoords[blobindex] = (blobcoords[blobindex].left ()
        + blobcoords[blobindex].right ()) / 2;
      ycoords[blobindex] = blobcoords[blobindex].bottom ();
    }
    xstarts[1] = blobcoords[blobcount - 1].right () + 1;
    segments = 1;                /*no of segments */

                                 /*linear */
    *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);

    if (blobcount >= 3) {
      y1 = y2 = y3 = 0.0f;
      ycount = 0;
      segment = 0;               /*no of segments */
      maxmax = minmin = 0.0f;
      thisy = ycoords[0] - baseline->y (xcoords[0]);
      nexty = ycoords[1] - baseline->y (xcoords[1]);
      for (blobindex = 2; blobindex < blobcount; blobindex++) {
        prevy = thisy;           /*shift ycoords */
        thisy = nexty;
        nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
                                 /*middle of smooth y */
        if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
          y1 = y2;               /*shift window */
          y2 = y3;
          y3 = thisy;            /*middle point */
          ycount++;
                                 /*local max */
          if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
                                 /*local min */
          || (y1 > y2 && y2 <= y3))) {
            if (segment < SPLINESIZE - 2) {
                                 /*turning pt */
              xturns[segment] = x2;
              yturns[segment] = y2;
              segment++;         /*no of spline segs */
            }
          }
          if (ycount == 1) {
            maxmax = minmin = y3;/*initialise limits */
          }
          else {
            if (y3 > maxmax)
              maxmax = y3;       /*biggest max */
            if (y3 < minmin)
              minmin = y3;       /*smallest min */
          }
                                 /*possible turning pt */
          x2 = blobcoords[blobindex - 1].right ();
        }
      }

      jumplimit *= 1.2;
                                 /*must be wavy */
      if (maxmax - minmin > jumplimit) {
        ycount = segment;        /*no of segments */
        for (blobindex = 0, segment = 1; blobindex < ycount;
        blobindex++) {
          if (yturns[blobindex] > minmin + jumplimit
          || yturns[blobindex] < maxmax - jumplimit) {
                                 /*significant peak */
            if (segment == 1
              || yturns[blobindex] > prevy + jumplimit
            || yturns[blobindex] < prevy - jumplimit) {
                                 /*different to previous */
              xstarts[segment] = xturns[blobindex];
              segment++;
              prevy = yturns[blobindex];
            }
                                 /*bigger max */
            else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
                                 /*smaller min */
            || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
              xstarts[segment - 1] = xturns[blobindex];
                                 /*improved previous */
              prevy = yturns[blobindex];
            }
          }
        }
        xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
        segments = segment;      /*no of segments */
                                 /*linear */
        *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
      }
    }
  }
  else {
    *baseline = *spline;         /*copy it */
    shift = ICOORD (0, (inT16) (blobcoords[0].bottom ()
      - spline->y (blobcoords[0].right ())));
    baseline->move (shift);
  }
}
void make_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  lineheight,
int  init_lineheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1547 of file oldbasel.cpp.

  {
  STATS heightstat (0, HEIGHTBUCKETS);
  int lefts[HEIGHTBUCKETS];
  int rights[HEIGHTBUCKETS];
  int modelist[MODENUM];
  int blobindex;
  int mode_count;                //blobs to count in thr
  int sign_bit;
  int mode_threshold;
  const int kBaselineTouch = 2;  // This really should change with resolution.
  const int kGoodStrength = 8;  // Strength of baseline-touching heights.
  const float kMinHeight = 0.25;  // Min fraction of lineheight to use.

  sign_bit = row->xheight > 0 ? 1 : -1;

  memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
  memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
  mode_count = 0;
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    int xcenter = (blobcoords[blobindex].left () +
        blobcoords[blobindex].right ()) / 2;
    float base = baseline->y(xcenter);
    float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
    int strength = textord_ocropus_mode &&
                   bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
    int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
    if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
      if (height > lineheight * oldbl_xhfract
        && height > textord_min_xheight) {
        heightstat.add (height, strength);
        if (height < HEIGHTBUCKETS) {
          if (xcenter > rights[height])
            rights[height] = xcenter;
          if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
            lefts[height] = xcenter;
        }
      }
      mode_count += strength;
    }
  }

  mode_threshold = (int) (blobcount * 0.1);
  if (oldbl_dot_error_size > 1 || oldbl_xhfix)
    mode_threshold = (int) (mode_count * 0.1);

  if (textord_oldbl_debug) {
    tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
      blobcount, mode_count, mode_threshold);
  }
  find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
  if (textord_oldbl_debug) {
    for (blobindex = 0; blobindex < MODENUM; blobindex++)
      tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
    tprintf ("\n");
  }
  pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);

  if (textord_oldbl_debug)
    tprintf ("Output xheight=%g\n", row->xheight);
  if (row->xheight < 0 && textord_oldbl_debug)
    tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);

  if (sign_bit < 0)
    row->xheight = -row->xheight;
}
int* make_height_array ( TBOX  blobcoords[],
int  blobcount,
QSPLINE baseline 
)
void make_holed_baseline ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
QSPLINE baseline,
float  gradient 
)

Definition at line 660 of file oldbasel.cpp.

  {
  int leftedge;                  /*left edge of line */
  int rightedge;                 /*right edge of line */
  int blobindex;                 /*current blob */
  float x;                       //centre of row
  ICOORD shift;                  //shift of spline

  tesseract::DetLineFit lms;  // straight baseline
  inT32 xstarts[2];              //straight line
  double coeffs[3];
  float c;                       //line parameter

                                 /*left edge of row */
  leftedge = blobcoords[0].left ();
                                 /*right edge of line */
  rightedge = blobcoords[blobcount - 1].right();
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    lms.Add(ICOORD((blobcoords[blobindex].left() +
                    blobcoords[blobindex].right()) / 2,
                   blobcoords[blobindex].bottom()));
  }
  lms.ConstrainedFit(gradient, &c);
  xstarts[0] = leftedge;
  xstarts[1] = rightedge;
  coeffs[0] = 0;
  coeffs[1] = gradient;
  coeffs[2] = c;
  *baseline = QSPLINE (1, xstarts, coeffs);
  if (spline != NULL             /*no given spline */
    && spline->segments >= 3     /*or trivial */
                                 /*or too non-overlap */
    && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
    && spline->xcoords[spline->segments - 1] >= rightedge
  - MAXOVERLAP * (rightedge - leftedge)) {
    *baseline = *spline;         /*copy it */
    x = (leftedge + rightedge) / 2.0;
    shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x)));
    baseline->move (shift);
  }
}
void merge_oldbl_parts ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  biggestpart,
float  jumplimit 
)

Definition at line 799 of file oldbasel.cpp.

  {
  BOOL8 found_one;               //found a bestpart blob
  BOOL8 close_one;               //found was close enough
  register int blobindex;        /*no along text line */
  int prevpart;                  //previous iteration
  int runlength;                 //no in this part
  float diff;                    /*difference from line */
  int startx;                    /*index of start blob */
  int test_blob;                 //another index
  FCOORD coord;                  //blob coordinate
  float m, c;                    //fitted line
  QLSQ stats;                    //line stuff

  prevpart = biggestpart;
  runlength = 0;
  startx = 0;
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    if (partids[blobindex] != prevpart) {
      //                      tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
      //                              blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
      //                              prevpart,partids[blobindex],runlength);
      if (prevpart != biggestpart && runlength > MAXBADRUN) {
        stats.clear ();
        for (test_blob = startx; test_blob < blobindex; test_blob++) {
          coord = FCOORD ((blobcoords[test_blob].left ()
            + blobcoords[test_blob].right ()) / 2.0,
            blobcoords[test_blob].bottom ());
          stats.add (coord.x (), coord.y ());
        }
        stats.fit (1);
        m = stats.get_b ();
        c = stats.get_c ();
        if (textord_oldbl_debug)
          tprintf ("Fitted line y=%g x + %g\n", m, c);
        found_one = FALSE;
        close_one = FALSE;
        for (test_blob = 1; !found_one
          && (startx - test_blob >= 0
        || blobindex + test_blob <= blobcount); test_blob++) {
          if (startx - test_blob >= 0
          && partids[startx - test_blob] == biggestpart) {
            found_one = TRUE;
            coord = FCOORD ((blobcoords[startx - test_blob].left ()
              + blobcoords[startx -
              test_blob].right ()) /
              2.0,
              blobcoords[startx -
              test_blob].bottom ());
            diff = m * coord.x () + c - coord.y ();
            if (textord_oldbl_debug)
              tprintf
                ("Diff of common blob to suspect part=%g at (%g,%g)\n",
                diff, coord.x (), coord.y ());
            if (diff < jumplimit && -diff < jumplimit)
              close_one = TRUE;
          }
          if (blobindex + test_blob <= blobcount
          && partids[blobindex + test_blob - 1] == biggestpart) {
            found_one = TRUE;
            coord =
              FCOORD ((blobcoords[blobindex + test_blob - 1].
              left () + blobcoords[blobindex + test_blob -
              1].right ()) / 2.0,
              blobcoords[blobindex + test_blob -
              1].bottom ());
            diff = m * coord.x () + c - coord.y ();
            if (textord_oldbl_debug)
              tprintf
                ("Diff of common blob to suspect part=%g at (%g,%g)\n",
                diff, coord.x (), coord.y ());
            if (diff < jumplimit && -diff < jumplimit)
              close_one = TRUE;
          }
        }
        if (close_one) {
          if (textord_oldbl_debug)
            tprintf
              ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
              runlength, biggestpart, prevpart,
              blobcoords[startx].left (),
              blobcoords[startx].bottom ());
                                 //switch sides
          partsizes[prevpart] -= runlength;
          for (test_blob = startx; test_blob < blobindex; test_blob++)
            partids[test_blob] = biggestpart;
        }
      }
      prevpart = partids[blobindex];
      runlength = 1;
      startx = blobindex;
    }
    else
      runlength++;
  }
}
void old_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  initialheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1462 of file oldbasel.cpp.

  {
  register int blobindex;        /*current blob */
                                 /*height statistics */
  STATS heightstat (0, MAXHEIGHT);
  int height;                    /*height of blob */
  int xcentre;                   /*centre of blob */
  int lineheight;                /*approx xheight */
  float ascenders;               /*ascender sum */
  int asccount;                  /*no of ascenders */
  float xsum;                    /*xheight sum */
  int xcount;                    /*xheight count */
  register float diff;           /*height difference */

  if (blobcount > 1) {
    for (blobindex = 0; blobindex < blobcount; blobindex++) {
      xcentre = (blobcoords[blobindex].left ()
        + blobcoords[blobindex].right ()) / 2;
                                 /*height of blob */
      height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
      if (height > initialheight * oldbl_xhfract
        && height > textord_min_xheight)
        heightstat.add (height, 1);
    }
    if (heightstat.get_total () > 3) {
      lineheight = (int) heightstat.ile (0.25);
      if (lineheight <= 0)
        lineheight = (int) heightstat.ile (0.5);
    }
    else
      lineheight = initialheight;
  }
  else {
    lineheight = (int) (blobcoords[0].top ()
      - baseline->y ((blobcoords[0].left ()
      + blobcoords[0].right ()) / 2) +
      0.5);
  }

  xsum = 0.0f;
  xcount = 0;
  for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
  blobindex++) {
    xcentre = (blobcoords[blobindex].left ()
      + blobcoords[blobindex].right ()) / 2;
    diff = blobcoords[blobindex].top () - baseline->y (xcentre);
                                 /*is it ascender */
    if (diff > lineheight + jumplimit) {
      ascenders += diff;
      asccount++;                /*count ascenders */
    }
    else if (diff > lineheight - jumplimit) {
      xsum += diff;              /*mean xheight */
      xcount++;
    }
  }
  if (xcount > 0)
    xsum /= xcount;              /*average xheight */
  else
    xsum = (float) lineheight;   /*guess it */
  row->xheight *= xsum;
  if (asccount > 0)
    row->ascrise = ascenders / asccount - xsum;
  else
    row->ascrise = 0.0f;         /*had none */
  if (row->xheight == 0)
    row->xheight = -1.0f;
}
int partition_coords ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  bestpart,
int  xcoords[],
int  ycoords[] 
)

*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking

Definition at line 1090 of file oldbasel.cpp.

  {
  register int blobindex;        /*no along text line */
  int pointcount;                /*no of points */

  pointcount = 0;
  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    if (partids[blobindex] == bestpart) {
                                 /*centre of blob */
      xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
      ycoords[pointcount++] = blobcoords[blobindex].bottom ();
    }
  }
  return pointcount;             /*no of points found */
}
int partition_line ( TBOX  blobcoords[],
int  blobcount,
int *  numparts,
char  partids[],
int  partsizes[],
QSPLINE spline,
float  jumplimit,
float  ydiffs[] 
)

Definition at line 718 of file oldbasel.cpp.

  {
  register int blobindex;        /*no along text line */
  int bestpart;                  /*best new partition */
  int biggestpart;               /*part with most members */
  float diff;                    /*difference from line */
  int startx;                    /*index of start blob */
  float partdiffs[MAXPARTS];     /*step between parts */

  for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
    partsizes[bestpart] = 0;     /*zero them all */

  startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
  *numparts = 1;                 /*1 partition */
  bestpart = -1;                 /*first point */
  float drift = 0.0f;
  float last_delta = 0.0f;
  for (blobindex = startx; blobindex < blobcount; blobindex++) {
  /*do each blob in row */
    diff = ydiffs[blobindex];    /*diff from line */
    if (textord_oldbl_debug) {
      tprintf ("%d(%d,%d), ", blobindex,
        blobcoords[blobindex].left (),
        blobcoords[blobindex].bottom ());
    }
    bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
                                &drift, &last_delta, numparts);
                                 /*record partition */
    partids[blobindex] = bestpart;
    partsizes[bestpart]++;       /*another in it */
  }

  bestpart = -1;                 /*first point */
  drift = 0.0f;
  last_delta = 0.0f;
  partsizes[0]--;                /*doing 1st pt again */
                                 /*do each blob in row */
  for (blobindex = startx; blobindex >= 0; blobindex--) {
    diff = ydiffs[blobindex];    /*diff from line */
    if (textord_oldbl_debug) {
      tprintf ("%d(%d,%d), ", blobindex,
        blobcoords[blobindex].left (),
        blobcoords[blobindex].bottom ());
    }
    bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
                                &drift, &last_delta, numparts);
                                 /*record partition */
    partids[blobindex] = bestpart;
    partsizes[bestpart]++;       /*another in it */
  }

  for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
    if (partsizes[bestpart] >= partsizes[biggestpart])
      biggestpart = bestpart;    /*new biggest */
  if (textord_oldbl_merge_parts)
    merge_oldbl_parts(blobcoords,
                      blobcount,
                      partids,
                      partsizes,
                      biggestpart,
                      jumplimit);
  return biggestpart;            /*biggest partition */
}
void pick_x_height ( TO_ROW row,
int  modelist[],
int  lefts[],
int  rights[],
STATS heightstat,
int  mode_threshold 
)

Definition at line 1672 of file oldbasel.cpp.

                                       {
  int x;
  int y;
  int z;
  float ratio;
  int found_one_bigger = FALSE;
  int best_x_height = 0;
  int best_asc = 0;
  int num_in_best;

  for (x = 0; x < MODENUM; x++) {
    for (y = 0; y < MODENUM; y++) {
      /* Check for two modes */
      if (modelist[x] && modelist[y] &&
          heightstat->pile_count (modelist[x]) > mode_threshold &&
          (!textord_ocropus_mode ||
           MIN(rights[modelist[x]], rights[modelist[y]]) >
           MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
        ratio = (float) modelist[y] / (float) modelist[x];
        if (1.2 < ratio && ratio < 1.8) {
          /* Two modes found */
          best_x_height = modelist[x];
          num_in_best = heightstat->pile_count (modelist[x]);

          /* Try to get one higher */
          do {
            found_one_bigger = FALSE;
            for (z = 0; z < MODENUM; z++) {
              if (modelist[z] == best_x_height + 1 &&
                  (!textord_ocropus_mode ||
                    MIN(rights[modelist[x]], rights[modelist[y]]) >
                    MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
                ratio = (float) modelist[y] / (float) modelist[z];
                if ((1.2 < ratio && ratio < 1.8) &&
                               /* Should be half of best */
                    heightstat->pile_count (modelist[z]) >
                    num_in_best * 0.5) {
                  best_x_height++;
                  found_one_bigger = TRUE;
                  break;
                }
              }
            }
          }
          while (found_one_bigger);

          /* try to get a higher ascender */

          best_asc = modelist[y];
          num_in_best = heightstat->pile_count (modelist[y]);

          /* Try to get one higher */
          do {
            found_one_bigger = FALSE;
            for (z = 0; z < MODENUM; z++) {
              if (modelist[z] > best_asc &&
                  (!textord_ocropus_mode ||
                    MIN(rights[modelist[x]], rights[modelist[y]]) >
                    MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
                ratio = (float) modelist[z] / (float) best_x_height;
                if ((1.2 < ratio && ratio < 1.8) &&
                               /* Should be half of best */
                    heightstat->pile_count (modelist[z]) >
                    num_in_best * 0.5) {
                  best_asc = modelist[z];
                  found_one_bigger = TRUE;
                  break;
                }
              }
            }
          }
          while (found_one_bigger);

          row->xheight = (float) best_x_height;
          row->ascrise = (float) best_asc - best_x_height;
          return;
        }
      }
    }
  }

  best_x_height = modelist[0];   /* Single Mode found */
  num_in_best = heightstat->pile_count (best_x_height);
  do {
                                 /* Try to get one higher */
    found_one_bigger = FALSE;
    for (z = 1; z < MODENUM; z++) {
      /* Should be half of best */
      if ((modelist[z] == best_x_height + 1) &&
      (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
        best_x_height++;
        found_one_bigger = TRUE;
        break;
      }
    }
  }
  while (found_one_bigger);

  row->ascrise = 0.0f;
  row->xheight = (float) best_x_height;
  if (row->xheight == 0)
    row->xheight = -1.0f;
}
int segment_spline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
int  degree,
int  pointcount,
int  xstarts[] 
)

Definition at line 1121 of file oldbasel.cpp.

  {
  register int ptindex;          /*no along text line */
  register int segment;          /*partition no */
  int lastmin, lastmax;          /*possible turn points */
  int turnpoints[SPLINESIZE];    /*good turning points */
  int turncount;                 /*no of turning points */
  int max_x;                     //max specified coord

  xstarts[0] = xcoords[0] - 1;   //leftmost defined pt
  max_x = xcoords[pointcount - 1] + 1;
  if (degree < 2)
    pointcount = 0;
  turncount = 0;                 /*no turning points yet */
  if (pointcount > 3) {
    ptindex = 1;
    lastmax = lastmin = 0;       /*start with first one */
    while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
                                 /*minimum */
      if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
        if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
          if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
                                 /*new max point */
            turnpoints[turncount++] = lastmax;
          lastmin = ptindex;     /*latest minimum */
        }
        else if (ycoords[ptindex] < ycoords[lastmin]) {
          lastmin = ptindex;     /*lower minimum */
        }
      }

                                 /*maximum */
      if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
        if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
          if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
                                 /*new min point */
            turnpoints[turncount++] = lastmin;
          lastmax = ptindex;     /*latest maximum */
        }
        else if (ycoords[ptindex] > ycoords[lastmax]) {
          lastmax = ptindex;     /*higher maximum */
        }
      }
      ptindex++;
    }
                                 /*possible global min */
    if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
    && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
      if (turncount < SPLINESIZE - 1)
                                 /*2 more turns */
        turnpoints[turncount++] = lastmax;
      if (turncount < SPLINESIZE - 1)
        turnpoints[turncount++] = ptindex;
    }
    else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
      /*possible global max */
    && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
      if (turncount < SPLINESIZE - 1)
                                 /*2 more turns */
        turnpoints[turncount++] = lastmin;
      if (turncount < SPLINESIZE - 1)
        turnpoints[turncount++] = ptindex;
    }
    else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
    && turncount < SPLINESIZE - 1) {
      if (ycoords[ptindex] > ycoords[lastmax])
        turnpoints[turncount++] = ptindex;
      else
        turnpoints[turncount++] = lastmax;
    }
    else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
    && turncount < SPLINESIZE - 1) {
      if (ycoords[ptindex] < ycoords[lastmin])
        turnpoints[turncount++] = ptindex;
      else
        turnpoints[turncount++] = lastmin;
    }
  }

  if (textord_oldbl_debug && turncount > 0)
    tprintf ("First turn is %d at (%d,%d)\n",
      turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
  for (segment = 1; segment < turncount; segment++) {
                                 /*centre y coord */
    lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;

    /* fix alg so that it works with both rising and falling sections */
    if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
                                 /*find rising y centre */
      for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
    else
                                 /*find falling y centre */
      for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);

                                 /*centre x */
    xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
      + xcoords[turnpoints[segment - 1]]
      + xcoords[turnpoints[segment]] + 2) / 4;
    /*halfway between turns */
    if (textord_oldbl_debug)
      tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
        segment, turnpoints[segment],
        xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
        ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
  }

  xstarts[segment] = max_x;
  return segment;                /*no of splines */
}
BOOL8 split_stepped_spline ( QSPLINE baseline,
float  jumplimit,
int  xcoords[],
int  xstarts[],
int &  segments 
)

Definition at line 1246 of file oldbasel.cpp.

  {
  BOOL8 doneany;                 //return value
  register int segment;          /*partition no */
  int startindex, centreindex, endindex;
  float leftcoord, rightcoord;
  int leftindex, rightindex;
  float step;                    //spline step

  doneany = FALSE;
  startindex = 0;
  for (segment = 1; segment < segments - 1; segment++) {
    step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
      (xstarts[segment] + xstarts[segment + 1]) / 2.0);
    if (step < 0)
      step = -step;
    if (step > jumplimit) {
      while (xcoords[startindex] < xstarts[segment - 1])
        startindex++;
      centreindex = startindex;
      while (xcoords[centreindex] < xstarts[segment])
        centreindex++;
      endindex = centreindex;
      while (xcoords[endindex] < xstarts[segment + 1])
        endindex++;
      if (segments >= SPLINESIZE) {
        if (textord_debug_baselines)
          tprintf ("Too many segments to resegment spline!!\n");
      }
      else if (endindex - startindex >= textord_spline_medianwin * 3) {
        while (centreindex - startindex <
          textord_spline_medianwin * 3 / 2)
          centreindex++;
        while (endindex - centreindex <
          textord_spline_medianwin * 3 / 2)
          centreindex--;
        leftindex = (startindex + startindex + centreindex) / 3;
        rightindex = (centreindex + endindex + endindex) / 3;
        leftcoord =
          (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
        rightcoord =
          (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
        while (xcoords[leftindex] > leftcoord
          && leftindex - startindex > textord_spline_medianwin)
          leftindex--;
        while (xcoords[leftindex] < leftcoord
          && centreindex - leftindex >
          textord_spline_medianwin / 2)
          leftindex++;
        if (xcoords[leftindex] - leftcoord >
          leftcoord - xcoords[leftindex - 1])
          leftindex--;
        while (xcoords[rightindex] > rightcoord
          && rightindex - centreindex >
          textord_spline_medianwin / 2)
          rightindex--;
        while (xcoords[rightindex] < rightcoord
          && endindex - rightindex > textord_spline_medianwin)
          rightindex++;
        if (xcoords[rightindex] - rightcoord >
          rightcoord - xcoords[rightindex - 1])
          rightindex--;
        if (textord_debug_baselines)
          tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
            xstarts[segment],
            baseline->
            step ((xstarts[segment - 1] +
            xstarts[segment]) / 2.0,
            (xstarts[segment] +
            xstarts[segment + 1]) / 2.0),
            (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
            (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
        insert_spline_point (xstarts, segment,
          (xcoords[leftindex - 1] +
          xcoords[leftindex]) / 2,
          (xcoords[rightindex - 1] +
          xcoords[rightindex]) / 2, segments);
        doneany = TRUE;
      }
      else if (textord_debug_baselines) {
        tprintf
          ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
          startindex, centreindex, endindex,
          (inT32) textord_spline_medianwin);
      }
    }
    //              else tprintf("Spline step at %d is %g\n",
    //                      xstarts[segment],
    //                      baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
    //                      (xstarts[segment]+xstarts[segment+1])/2.0));
  }
  return doneany;
}

Variable Documentation

double oldbl_dot_error_size = 1.26

"Max aspect ratio of a dot"

Definition at line 52 of file oldbasel.cpp.

"Max lost before fallback line used"

Definition at line 51 of file oldbasel.cpp.

bool oldbl_xhfix = 0

"Fix bug in modes threshold for xheights"

Definition at line 47 of file oldbasel.cpp.

"Debug baseline generation"

Definition at line 41 of file oldbasel.cpp.

"Debug old baseline generation"

Definition at line 40 of file oldbasel.cpp.

double textord_oldbl_jumplimit = 0.15

"X fraction for new partition"

Definition at line 54 of file oldbasel.cpp.

"Merge suspect partitions"

Definition at line 44 of file oldbasel.cpp.

"Use para default mechanism"

Definition at line 42 of file oldbasel.cpp.

"Split stepped splines"

Definition at line 43 of file oldbasel.cpp.

"Use original wiseowl xheight"

Definition at line 39 of file oldbasel.cpp.