Tesseract  3.02
tesseract-ocr/ccstruct/polyaprx.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        polyaprx.cpp  (Formerly polygon.c)
00003  * Description: Code for polygonal approximation from old edgeprog.
00004  * Author:      Ray Smith
00005  * Created:     Thu Nov 25 11:42:04 GMT 1993
00006  *
00007  * (C) Copyright 1993, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "mfcpch.h"
00021 #include          <stdio.h>
00022 #ifdef __UNIX__
00023 #include          <assert.h>
00024 #endif
00025 #define FASTEDGELENGTH    256
00026 #include          "polyaprx.h"
00027 #include          "params.h"
00028 #include          "tprintf.h"
00029 
00030 #define EXTERN
00031 
00032 EXTERN BOOL_VAR (poly_debug, FALSE, "Debug old poly");
00033 EXTERN BOOL_VAR (poly_wide_objects_better, TRUE,
00034 "More accurate approx on wide things");
00035 
00036 
00037 #define FIXED       4            /*OUTLINE point is fixed */
00038 
00039 #define RUNLENGTH     1          /*length of run */
00040 
00041 #define DIR         2            /*direction of run */
00042 
00043 #define FLAGS       0
00044 
00045 #define fixed_dist      20       //really an int_variable
00046 #define approx_dist     15       //really an int_variable
00047 
00048 const int par1 = 4500 / (approx_dist * approx_dist);
00049 const int par2 = 6750 / (approx_dist * approx_dist);
00050 
00051 
00052 /**********************************************************************
00053  * tesspoly_outline
00054  *
00055  * Approximate an outline from chain codes form using the old tess algorithm.
00056  **********************************************************************/
00057 
00058 
00059 TESSLINE* ApproximateOutline(C_OUTLINE* c_outline) {
00060   EDGEPT *edgept;                // converted steps
00061   TBOX loop_box;                  // bounding box
00062   inT32 area;                    // loop area
00063   EDGEPT stack_edgepts[FASTEDGELENGTH];  // converted path
00064   EDGEPT* edgepts = stack_edgepts;
00065 
00066   // Use heap memory if the stack buffer is not big enough.
00067   if (c_outline->pathlength() > FASTEDGELENGTH)
00068     edgepts = new EDGEPT[c_outline->pathlength()];
00069 
00070   loop_box = c_outline->bounding_box();
00071   area = loop_box.height();
00072   if (!poly_wide_objects_better && loop_box.width() > area)
00073     area = loop_box.width();
00074   area *= area;
00075   edgept = edgesteps_to_edgepts(c_outline, edgepts);
00076   fix2(edgepts, area);
00077   edgept = poly2 (edgepts, area);  // 2nd approximation.
00078   EDGEPT* startpt = edgept;
00079   EDGEPT* result = NULL;
00080   EDGEPT* prev_result = NULL;
00081   do {
00082     EDGEPT* new_pt = new EDGEPT;
00083     new_pt->pos = edgept->pos;
00084     new_pt->prev = prev_result;
00085     if (prev_result == NULL) {
00086       result = new_pt;
00087     } else {
00088       prev_result->next = new_pt;
00089       new_pt->prev = prev_result;
00090     }
00091     prev_result = new_pt;
00092     edgept = edgept->next;
00093   }
00094   while (edgept != startpt);
00095   prev_result->next = result;
00096   result->prev = prev_result;
00097   if (edgepts != stack_edgepts)
00098     delete [] edgepts;
00099   return TESSLINE::BuildFromOutlineList(result);
00100 }
00101 
00102 
00103 /**********************************************************************
00104  * edgesteps_to_edgepts
00105  *
00106  * Convert a C_OUTLINE to EDGEPTs.
00107  **********************************************************************/
00108 
00109 EDGEPT *
00110 edgesteps_to_edgepts (           //convert outline
00111 C_OUTLINE * c_outline,           //input
00112 EDGEPT edgepts[]                 //output is array
00113 ) {
00114   inT32 length;                  //steps in path
00115   ICOORD pos;                    //current coords
00116   inT32 stepindex;               //current step
00117   inT32 stepinc;                 //increment
00118   inT32 epindex;                 //current EDGEPT
00119   inT32 count;                   //repeated steps
00120   ICOORD vec;                    //for this 8 step
00121   ICOORD prev_vec;
00122   inT8 epdir;                    //of this step
00123   DIR128 prevdir;                //prvious dir
00124   DIR128 dir;                    //of this step
00125 
00126   pos = c_outline->start_pos (); //start of loop
00127   length = c_outline->pathlength ();
00128   stepindex = 0;
00129   epindex = 0;
00130   prevdir = -1;
00131   count = 0;
00132   do {
00133     dir = c_outline->step_dir (stepindex);
00134     vec = c_outline->step (stepindex);
00135     if (stepindex < length - 1
00136     && c_outline->step_dir (stepindex + 1) - dir == -32) {
00137       dir += 128 - 16;
00138       vec += c_outline->step (stepindex + 1);
00139       stepinc = 2;
00140     }
00141     else
00142       stepinc = 1;
00143     if (count == 0) {
00144       prevdir = dir;
00145       prev_vec = vec;
00146     }
00147     if (prevdir.get_dir () != dir.get_dir ()) {
00148       edgepts[epindex].pos.x = pos.x ();
00149       edgepts[epindex].pos.y = pos.y ();
00150       prev_vec *= count;
00151       edgepts[epindex].vec.x = prev_vec.x ();
00152       edgepts[epindex].vec.y = prev_vec.y ();
00153       pos += prev_vec;
00154       edgepts[epindex].flags[RUNLENGTH] = count;
00155       edgepts[epindex].prev = &edgepts[epindex - 1];
00156       edgepts[epindex].flags[FLAGS] = 0;
00157       edgepts[epindex].next = &edgepts[epindex + 1];
00158       prevdir += 64;
00159       epdir = (DIR128) 0 - prevdir;
00160       epdir >>= 4;
00161       epdir &= 7;
00162       edgepts[epindex].flags[DIR] = epdir;
00163       epindex++;
00164       prevdir = dir;
00165       prev_vec = vec;
00166       count = 1;
00167     }
00168     else
00169       count++;
00170     stepindex += stepinc;
00171   }
00172   while (stepindex < length);
00173   edgepts[epindex].pos.x = pos.x ();
00174   edgepts[epindex].pos.y = pos.y ();
00175   prev_vec *= count;
00176   edgepts[epindex].vec.x = prev_vec.x ();
00177   edgepts[epindex].vec.y = prev_vec.y ();
00178   pos += prev_vec;
00179   edgepts[epindex].flags[RUNLENGTH] = count;
00180   edgepts[epindex].flags[FLAGS] = 0;
00181   edgepts[epindex].prev = &edgepts[epindex - 1];
00182   edgepts[epindex].next = &edgepts[0];
00183   prevdir += 64;
00184   epdir = (DIR128) 0 - prevdir;
00185   epdir >>= 4;
00186   epdir &= 7;
00187   edgepts[epindex].flags[DIR] = epdir;
00188   edgepts[0].prev = &edgepts[epindex];
00189   ASSERT_HOST (pos.x () == c_outline->start_pos ().x ()
00190     && pos.y () == c_outline->start_pos ().y ());
00191   return &edgepts[0];
00192 }
00193 
00194 
00195 /**********************************************************************
00196  *fix2(start,area) fixes points on the outline according to a trial method*
00197  **********************************************************************/
00198 
00199 //#pragma OPT_LEVEL 1                                                                           /*stop compiler bugs*/
00200 
00201 void fix2(                //polygonal approx
00202           EDGEPT *start,  /*loop to approimate */
00203           int area) {
00204   register EDGEPT *edgept;       /*current point */
00205   register EDGEPT *edgept1;
00206   register EDGEPT *loopstart;    /*modified start of loop */
00207   register EDGEPT *linestart;    /*start of line segment */
00208   register int dir1, dir2;       /*directions of line */
00209   register int sum1, sum2;       /*lengths in dir1,dir2 */
00210   int stopped;                   /*completed flag */
00211   int fixed_count;               //no of fixed points
00212   int d01, d12, d23, gapmin;
00213   TPOINT d01vec, d12vec, d23vec;
00214   register EDGEPT *edgefix, *startfix;
00215   register EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
00216 
00217   edgept = start;                /*start of loop */
00218   while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3
00219     && (dir1 =
00220     (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2
00221     && dir1 != 6)
00222     edgept = edgept->next;       /*find suitable start */
00223   loopstart = edgept;            /*remember start */
00224 
00225   stopped = 0;                   /*not finished yet */
00226   edgept->flags[FLAGS] |= FIXED; /*fix it */
00227   do {
00228     linestart = edgept;          /*possible start of line */
00229     dir1 = edgept->flags[DIR];   /*first direction */
00230                                  /*length of dir1 */
00231     sum1 = edgept->flags[RUNLENGTH];
00232     edgept = edgept->next;
00233     dir2 = edgept->flags[DIR];   /*2nd direction */
00234                                  /*length in dir2 */
00235     sum2 = edgept->flags[RUNLENGTH];
00236     if (((dir1 - dir2 + 1) & 7) < 3) {
00237       while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) {
00238         edgept = edgept->next;   /*look at next */
00239         if (edgept->flags[DIR] == dir1)
00240                                  /*sum lengths */
00241           sum1 += edgept->flags[RUNLENGTH];
00242         else
00243           sum2 += edgept->flags[RUNLENGTH];
00244       }
00245 
00246       if (edgept == loopstart)
00247         stopped = 1;             /*finished */
00248       if (sum2 + sum1 > 2
00249         && linestart->prev->flags[DIR] == dir2
00250         && (linestart->prev->flags[RUNLENGTH] >
00251       linestart->flags[RUNLENGTH] || sum2 > sum1)) {
00252                                  /*start is back one */
00253         linestart = linestart->prev;
00254         linestart->flags[FLAGS] |= FIXED;
00255       }
00256 
00257       if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3
00258         || (edgept->flags[DIR] == dir1 && sum1 >= sum2)
00259         || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH]
00260         || (edgept->flags[DIR] == dir2 && sum2 >= sum1))
00261           && linestart->next != edgept))
00262         edgept = edgept->next;
00263     }
00264                                  /*sharp bend */
00265     edgept->flags[FLAGS] |= FIXED;
00266   }
00267                                  /*do whole loop */
00268   while (edgept != loopstart && !stopped);
00269 
00270   edgept = start;
00271   do {
00272     if (((edgept->flags[RUNLENGTH] >= 8) &&
00273       (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) ||
00274       ((edgept->flags[RUNLENGTH] >= 8) &&
00275     ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) {
00276       edgept->flags[FLAGS] |= FIXED;
00277       edgept1 = edgept->next;
00278       edgept1->flags[FLAGS] |= FIXED;
00279     }
00280     edgept = edgept->next;
00281   }
00282   while (edgept != start);
00283 
00284   edgept = start;
00285   do {
00286                                  /*single fixed step */
00287     if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1
00288                                  /*and neighours free */
00289       && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0
00290                                  /*same pair of dirs */
00291       && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR]
00292     && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) {
00293                                  /*unfix it */
00294       edgept->flags[FLAGS] &= ~FIXED;
00295       edgept->next->flags[FLAGS] &= ~FIXED;
00296     }
00297     edgept = edgept->next;       /*do all points */
00298   }
00299   while (edgept != start);       /*until finished */
00300 
00301   stopped = 0;
00302   if (area < 450)
00303     area = 450;
00304 
00305   gapmin = area * fixed_dist * fixed_dist / 44000;
00306 
00307   edgept = start;
00308   fixed_count = 0;
00309   do {
00310     if (edgept->flags[FLAGS] & FIXED)
00311       fixed_count++;
00312     edgept = edgept->next;
00313   }
00314   while (edgept != start);
00315   while ((edgept->flags[FLAGS] & FIXED) == 0)
00316     edgept = edgept->next;
00317   edgefix0 = edgept;
00318 
00319   edgept = edgept->next;
00320   while ((edgept->flags[FLAGS] & FIXED) == 0)
00321     edgept = edgept->next;
00322   edgefix1 = edgept;
00323 
00324   edgept = edgept->next;
00325   while ((edgept->flags[FLAGS] & FIXED) == 0)
00326     edgept = edgept->next;
00327   edgefix2 = edgept;
00328 
00329   edgept = edgept->next;
00330   while ((edgept->flags[FLAGS] & FIXED) == 0)
00331     edgept = edgept->next;
00332   edgefix3 = edgept;
00333 
00334   startfix = edgefix2;
00335 
00336   do {
00337     if (fixed_count <= 3)
00338       break;                     //already too few
00339     point_diff (d12vec, edgefix1->pos, edgefix2->pos);
00340     d12 = LENGTH (d12vec);
00341     // TODO(rays) investigate this change:
00342     // Only unfix a point if it is part of a low-curvature section
00343     // of outline and the total angle change of the outlines is
00344     // less than 90 degrees, ie the scalar product is positive.
00345     // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) {
00346     if (d12 <= gapmin) {
00347       point_diff (d01vec, edgefix0->pos, edgefix1->pos);
00348       d01 = LENGTH (d01vec);
00349       point_diff (d23vec, edgefix2->pos, edgefix3->pos);
00350       d23 = LENGTH (d23vec);
00351       if (d01 > d23) {
00352         edgefix2->flags[FLAGS] &= ~FIXED;
00353         fixed_count--;
00354       }
00355       else {
00356         edgefix1->flags[FLAGS] &= ~FIXED;
00357         fixed_count--;
00358         edgefix1 = edgefix2;
00359       }
00360     }
00361     else {
00362       edgefix0 = edgefix1;
00363       edgefix1 = edgefix2;
00364     }
00365     edgefix2 = edgefix3;
00366     edgept = edgept->next;
00367     while ((edgept->flags[FLAGS] & FIXED) == 0) {
00368       if (edgept == startfix)
00369         stopped = 1;
00370       edgept = edgept->next;
00371     }
00372     edgefix3 = edgept;
00373     edgefix = edgefix2;
00374   }
00375   while ((edgefix != startfix) && (!stopped));
00376 }
00377 
00378 
00379 //#pragma OPT_LEVEL 2                                                                           /*stop compiler bugs*/
00380 
00381 /**********************************************************************
00382  *poly2(startpt,area,path) applies a second approximation to the outline
00383  *using the points which have been fixed by the first approximation*
00384  **********************************************************************/
00385 
00386 EDGEPT *poly2(                  //second poly
00387               EDGEPT *startpt,  /*start of loop */
00388               int area          /*area of blob box */
00389              ) {
00390   register EDGEPT *edgept;       /*current outline point */
00391   EDGEPT *loopstart;             /*starting point */
00392   register EDGEPT *linestart;    /*start of line */
00393   register int edgesum;          /*correction count */
00394 
00395   if (area < 1200)
00396     area = 1200;                 /*minimum value */
00397 
00398   loopstart = NULL;              /*not found it yet */
00399   edgept = startpt;              /*start of loop */
00400 
00401   do {
00402                                  /*current point fixed */
00403     if (edgept->flags[FLAGS] & FIXED
00404                                  /*and next not */
00405     && (edgept->next->flags[FLAGS] & FIXED) == 0) {
00406       loopstart = edgept;        /*start of repoly */
00407       break;
00408     }
00409     edgept = edgept->next;       /*next point */
00410   }
00411   while (edgept != startpt);     /*until found or finished */
00412 
00413   if (loopstart == NULL && (startpt->flags[FLAGS] & FIXED) == 0) {
00414                                  /*fixed start of loop */
00415     startpt->flags[FLAGS] |= FIXED;
00416     loopstart = startpt;         /*or start of loop */
00417   }
00418   if (loopstart) {
00419     do {
00420       edgept = loopstart;        /*first to do */
00421       do {
00422         linestart = edgept;
00423         edgesum = 0;             /*sum of lengths */
00424         do {
00425                                  /*sum lengths */
00426           edgesum += edgept->flags[RUNLENGTH];
00427           edgept = edgept->next; /*move on */
00428         }
00429         while ((edgept->flags[FLAGS] & FIXED) == 0
00430           && edgept != loopstart && edgesum < 126);
00431         if (poly_debug)
00432           tprintf
00433             ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
00434             linestart->pos.x, linestart->pos.y, linestart->flags[DIR],
00435             linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
00436             edgept->pos.y);
00437                                  /*reapproximate */
00438         cutline(linestart, edgept, area);
00439 
00440         while ((edgept->next->flags[FLAGS] & FIXED)
00441           && edgept != loopstart)
00442           edgept = edgept->next; /*look for next non-fixed */
00443       }
00444                                  /*do all the loop */
00445       while (edgept != loopstart);
00446       edgesum = 0;
00447       do {
00448         if (edgept->flags[FLAGS] & FIXED)
00449           edgesum++;
00450         edgept = edgept->next;
00451       }
00452                                  //count fixed pts
00453       while (edgept != loopstart);
00454       if (edgesum < 3)
00455         area /= 2;               //must have 3 pts
00456     }
00457     while (edgesum < 3);
00458     do {
00459       linestart = edgept;
00460       do {
00461         edgept = edgept->next;
00462       }
00463       while ((edgept->flags[FLAGS] & FIXED) == 0);
00464       linestart->next = edgept;
00465       edgept->prev = linestart;
00466       linestart->vec.x = edgept->pos.x - linestart->pos.x;
00467       linestart->vec.y = edgept->pos.y - linestart->pos.y;
00468     }
00469     while (edgept != loopstart);
00470   }
00471   else
00472     edgept = startpt;            /*start of loop */
00473 
00474   loopstart = edgept;            /*new start */
00475   return loopstart;              /*correct exit */
00476 }
00477 
00478 
00479 /**********************************************************************
00480  *cutline(first,last,area) straightens out a line by partitioning
00481  *and joining the ends by a straight line*
00482  **********************************************************************/
00483 
00484 void cutline(                //recursive refine
00485              EDGEPT *first,  /*ends of line */
00486              EDGEPT *last,
00487              int area        /*area of object */
00488             ) {
00489   register EDGEPT *edge;         /*current edge */
00490   TPOINT vecsum;                 /*vector sum */
00491   int vlen;                      /*approx length of vecsum */
00492   TPOINT vec;                    /*accumulated vector */
00493   EDGEPT *maxpoint;              /*worst point */
00494   int maxperp;                   /*max deviation */
00495   register int perp;             /*perp distance */
00496   int ptcount;                   /*no of points */
00497   int squaresum;                 /*sum of perps */
00498 
00499   edge = first;                  /*start of line */
00500   if (edge->next == last)
00501     return;                      /*simple line */
00502 
00503                                  /*vector sum */
00504   vecsum.x = last->pos.x - edge->pos.x;
00505   vecsum.y = last->pos.y - edge->pos.y;
00506   if (vecsum.x == 0 && vecsum.y == 0) {
00507                                  /*special case */
00508     vecsum.x = -edge->prev->vec.x;
00509     vecsum.y = -edge->prev->vec.y;
00510   }
00511                                  /*absolute value */
00512   vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
00513   if (vecsum.y > vlen)
00514     vlen = vecsum.y;             /*maximum */
00515   else if (-vecsum.y > vlen)
00516     vlen = -vecsum.y;            /*absolute value */
00517 
00518   vec.x = edge->vec.x;           /*accumulated vector */
00519   vec.y = edge->vec.y;
00520   maxperp = 0;                   /*none yet */
00521   squaresum = ptcount = 0;
00522   edge = edge->next;             /*move to actual point */
00523   maxpoint = edge;               /*in case there isn't one */
00524   do {
00525     perp = CROSS (vec, vecsum);  /*get perp distance */
00526     if (perp != 0) {
00527       perp *= perp;              /*squared deviation */
00528     }
00529     squaresum += perp;           /*sum squares */
00530     ptcount++;                   /*count points */
00531     if (poly_debug)
00532       tprintf ("Cutline:Final perp=%d\n", perp);
00533     if (perp > maxperp) {
00534       maxperp = perp;
00535       maxpoint = edge;           /*find greatest deviation */
00536     }
00537     vec.x += edge->vec.x;        /*accumulate vectors */
00538     vec.y += edge->vec.y;
00539     edge = edge->next;
00540   }
00541   while (edge != last);          /*test all line */
00542 
00543   perp = LENGTH (vecsum);
00544   ASSERT_HOST (perp != 0);
00545 
00546   if (maxperp < 256 * MAX_INT16) {
00547     maxperp <<= 8;
00548     maxperp /= perp;             /*true max perp */
00549   }
00550   else {
00551     maxperp /= perp;
00552     maxperp <<= 8;               /*avoid overflow */
00553   }
00554   if (squaresum < 256 * MAX_INT16)
00555                                  /*mean squared perp */
00556     perp = (squaresum << 8) / (perp * ptcount);
00557   else
00558                                  /*avoid overflow */
00559     perp = (squaresum / perp << 8) / ptcount;
00560 
00561   if (poly_debug)
00562     tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n",
00563       area, maxperp / 256.0, maxperp * 200.0 / area,
00564       perp / 256.0, perp * 300.0 / area);
00565   if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
00566     maxpoint->flags[FLAGS] |= FIXED;
00567                                  /*partitions */
00568     cutline(first, maxpoint, area);
00569     cutline(maxpoint, last, area);
00570   }
00571 }