Tesseract  3.02
tesseract-ocr/textord/scanedg.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        scanedg.c  (Formerly scanedge.c)
00003  * Description: Raster scanning crack based edge extractor.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Fri Mar 22 16:11:50 GMT 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "mfcpch.h"
00021 #include          "edgloop.h"
00022 #include          "scanedg.h"
00023 
00024 #define WHITE_PIX     1          /*thresholded colours */
00025 #define BLACK_PIX     0
00026                                  /*W->B->W */
00027 #define FLIP_COLOUR(pix)  (1-(pix))
00028 
00029 /**********************************************************************
00030  * block_edges
00031  *
00032  * Extract edges from a PDBLK.
00033  **********************************************************************/
00034 
00035 void block_edges(IMAGE *t_image,       // thresholded image
00036                  PDBLK *block,         // block in image
00037                  C_OUTLINE_IT* outline_it) {
00038   uinT8 margin;                  // margin colour
00039   inT16 x;                       // line coords
00040   inT16 y;                       // current line
00041   ICOORD bleft;                  // bounding box
00042   ICOORD tright;
00043   ICOORD block_bleft;            // bounding box
00044   ICOORD block_tright;
00045   int xindex;                    // index to pixel
00046   BLOCK_LINE_IT line_it = block; // line iterator
00047   IMAGELINE bwline;              // thresholded line
00048                                  // lines in progress
00049   CRACKEDGE **ptrline = new CRACKEDGE*[t_image->get_xsize()+1];
00050   CRACKEDGE *free_cracks = NULL;
00051 
00052   block->bounding_box(bleft, tright);  // block box
00053   block_bleft = bleft;
00054   block_tright = tright;
00055   for (x = tright.x() - bleft.x(); x >= 0; x--)
00056     ptrline[x] = NULL;           //no lines in progress
00057 
00058   bwline.init(t_image->get_xsize());
00059 
00060   margin = WHITE_PIX;
00061 
00062   for (y = tright.y() - 1; y >= bleft.y() - 1; y--) {
00063     if (y >= block_bleft.y() && y < block_tright.y()) {
00064       t_image->get_line(bleft.x(), y, tright.x() - bleft.x(), &bwline, 0);
00065       make_margins(block, &line_it, bwline.pixels, margin, bleft.x(),
00066                    tright.x(), y);
00067     } else {
00068       x = tright.x() - bleft.x();
00069       for (xindex = 0; xindex < x; xindex++)
00070         bwline.pixels[xindex] = margin;
00071     }
00072     line_edges(bleft.x(), y, tright.x() - bleft.x(),
00073                margin, bwline.pixels, ptrline, &free_cracks, outline_it);
00074   }
00075 
00076   free_crackedges(free_cracks);  // really free them
00077   delete[] ptrline;
00078 }
00079 
00080 
00081 /**********************************************************************
00082  * make_margins
00083  *
00084  * Get an image line and set to margin non-text pixels.
00085  **********************************************************************/
00086 
00087 void make_margins(                         //get a line
00088                   PDBLK *block,            //block in image
00089                   BLOCK_LINE_IT *line_it,  //for old style
00090                   uinT8 *pixels,           //pixels to strip
00091                   uinT8 margin,            //white-out pixel
00092                   inT16 left,              //block edges
00093                   inT16 right,
00094                   inT16 y                  //line coord
00095                  ) {
00096   PB_LINE_IT *lines;
00097   ICOORDELT_LIST *segments;      //bits of a line
00098   ICOORDELT_IT seg_it;
00099   inT32 start;                   //of segment
00100   inT16 xext;                    //of segment
00101   int xindex;                    //index to pixel
00102 
00103   if (block->poly_block () != NULL) {
00104     lines = new PB_LINE_IT (block->poly_block ());
00105     segments = lines->get_line (y);
00106     if (!segments->empty ()) {
00107       seg_it.set_to_list (segments);
00108       seg_it.mark_cycle_pt ();
00109       start = seg_it.data ()->x ();
00110       xext = seg_it.data ()->y ();
00111       for (xindex = left; xindex < right; xindex++) {
00112         if (xindex >= start && !seg_it.cycled_list ()) {
00113           xindex = start + xext - 1;
00114           seg_it.forward ();
00115           start = seg_it.data ()->x ();
00116           xext = seg_it.data ()->y ();
00117         }
00118         else
00119           pixels[xindex - left] = margin;
00120       }
00121     }
00122     else {
00123       for (xindex = left; xindex < right; xindex++)
00124         pixels[xindex - left] = margin;
00125     }
00126     delete segments;
00127     delete lines;
00128   }
00129   else {
00130     start = line_it->get_line (y, xext);
00131     for (xindex = left; xindex < start; xindex++)
00132       pixels[xindex - left] = margin;
00133     for (xindex = start + xext; xindex < right; xindex++)
00134       pixels[xindex - left] = margin;
00135   }
00136 }
00137 
00138 
00139 /**********************************************************************
00140  * whiteout_block
00141  *
00142  * Extract edges from a PDBLK.
00143  **********************************************************************/
00144 
00145 void whiteout_block(                 //clean it
00146                     IMAGE *t_image,  //threshold image
00147                     PDBLK *block     //block in image
00148                    ) {
00149   inT16 x;                       //line coords
00150   inT16 y;                       //current line
00151   inT16 xext;                    //line width
00152   int xindex;                    //index to pixel
00153   uinT8 *dest;                   //destination pixel
00154   TBOX block_box;                 //bounding box
00155   BLOCK_LINE_IT line_it = block; //line iterator
00156   IMAGELINE bwline;              //thresholded line
00157 
00158   block_box = block->bounding_box ();
00159   for (y = block_box.bottom (); y < block_box.top (); y++) {
00160                                  //find line limits
00161     x = line_it.get_line (y, xext);
00162     t_image->get_line (x, y, xext, &bwline, 0);
00163     dest = bwline.pixels;        //destination pixel
00164     for (xindex = 0; xindex < xext; xindex++)
00165       *dest++ = 1;
00166     t_image->put_line (x, y, xext, &bwline, 0);
00167   }
00168 }
00169 
00170 
00171 /**********************************************************************
00172  * line_edges
00173  *
00174  * Scan a line for edges and update the edges in progress.
00175  * When edges close into loops, send them for approximation.
00176  **********************************************************************/
00177 
00178 void line_edges(inT16 x,                         // coord of line start
00179                 inT16 y,                         // coord of line
00180                 inT16 xext,                      // width of line
00181                 uinT8 uppercolour,               // start of prev line
00182                 uinT8 * bwpos,                   // thresholded line
00183                 CRACKEDGE ** prevline,           // edges in progress
00184                 CRACKEDGE **free_cracks,
00185                 C_OUTLINE_IT* outline_it) {
00186   CrackPos pos = {free_cracks, x, y };
00187   int xmax;                      // max x coord
00188   int colour;                    // of current pixel
00189   int prevcolour;                // of previous pixel
00190   CRACKEDGE *current;            // current h edge
00191   CRACKEDGE *newcurrent;         // new h edge
00192 
00193   xmax = x + xext;               // max allowable coord
00194   prevcolour = uppercolour;      // forced plain margin
00195   current = NULL;                // nothing yet
00196 
00197                                  // do each pixel
00198   for (; pos.x < xmax; pos.x++, prevline++) {
00199     colour = *bwpos++;           // current pixel
00200     if (*prevline != NULL) {
00201                                  // changed above
00202                                  // change colour
00203       uppercolour = FLIP_COLOUR(uppercolour);
00204       if (colour == prevcolour) {
00205         if (colour == uppercolour) {
00206                                  // finish a line
00207           join_edges(current, *prevline, free_cracks, outline_it);
00208           current = NULL;        // no edge now
00209         } else {
00210                                  // new horiz edge
00211           current = h_edge(uppercolour - colour, *prevline, &pos);
00212         }
00213         *prevline = NULL;        // no change this time
00214       } else {
00215         if (colour == uppercolour)
00216           *prevline = v_edge(colour - prevcolour, *prevline, &pos);
00217                                  // 8 vs 4 connection
00218         else if (colour == WHITE_PIX) {
00219           join_edges(current, *prevline, free_cracks, outline_it);
00220           current = h_edge(uppercolour - colour, NULL, &pos);
00221           *prevline = v_edge(colour - prevcolour, current, &pos);
00222         } else {
00223           newcurrent = h_edge(uppercolour - colour, *prevline, &pos);
00224           *prevline = v_edge(colour - prevcolour, current, &pos);
00225           current = newcurrent;  // right going h edge
00226         }
00227         prevcolour = colour;     // remember new colour
00228       }
00229     } else {
00230       if (colour != prevcolour) {
00231         *prevline = current = v_edge(colour - prevcolour, current, &pos);
00232         prevcolour = colour;
00233       }
00234       if (colour != uppercolour)
00235         current = h_edge(uppercolour - colour, current, &pos);
00236       else
00237         current = NULL;          // no edge now
00238     }
00239   }
00240   if (current != NULL) {
00241                                  // out of block
00242     if (*prevline != NULL) {     // got one to join to?
00243       join_edges(current, *prevline, free_cracks, outline_it);
00244       *prevline = NULL;          // tidy now
00245     } else {
00246                                  // fake vertical
00247       *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos);
00248     }
00249   } else if (*prevline != NULL) {
00250                                  //continue fake
00251     *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos);
00252   }
00253 }
00254 
00255 
00256 /**********************************************************************
00257  * h_edge
00258  *
00259  * Create a new horizontal CRACKEDGE and join it to the given edge.
00260  **********************************************************************/
00261 
00262 CRACKEDGE *h_edge(int sign,                       // sign of edge
00263                   CRACKEDGE* join,                // edge to join to
00264                   CrackPos* pos) {
00265   CRACKEDGE *newpt;              // return value
00266 
00267   if (*pos->free_cracks != NULL) {
00268     newpt = *pos->free_cracks;
00269     *pos->free_cracks = newpt->next;  // get one fast
00270   } else {
00271     newpt = new CRACKEDGE;
00272   }
00273   newpt->pos.set_y(pos->y + 1);       // coords of pt
00274   newpt->stepy = 0;              // edge is horizontal
00275 
00276   if (sign > 0) {
00277     newpt->pos.set_x(pos->x + 1);     // start location
00278     newpt->stepx = -1;
00279     newpt->stepdir = 0;
00280   } else {
00281     newpt->pos.set_x(pos->x);        // start location
00282     newpt->stepx = 1;
00283     newpt->stepdir = 2;
00284   }
00285 
00286   if (join == NULL) {
00287     newpt->next = newpt;         // ptrs to other ends
00288     newpt->prev = newpt;
00289   } else {
00290     if (newpt->pos.x() + newpt->stepx == join->pos.x()
00291     && newpt->pos.y() == join->pos.y()) {
00292       newpt->prev = join->prev;  // update other ends
00293       newpt->prev->next = newpt;
00294       newpt->next = join;        // join up
00295       join->prev = newpt;
00296     } else {
00297       newpt->next = join->next;  // update other ends
00298       newpt->next->prev = newpt;
00299       newpt->prev = join;        // join up
00300       join->next = newpt;
00301     }
00302   }
00303   return newpt;
00304 }
00305 
00306 
00307 /**********************************************************************
00308  * v_edge
00309  *
00310  * Create a new vertical CRACKEDGE and join it to the given edge.
00311  **********************************************************************/
00312 
00313 CRACKEDGE *v_edge(int sign,                       // sign of edge
00314                   CRACKEDGE* join,
00315                   CrackPos* pos) {
00316   CRACKEDGE *newpt;              // return value
00317 
00318   if (*pos->free_cracks != NULL) {
00319     newpt = *pos->free_cracks;
00320     *pos->free_cracks = newpt->next;  // get one fast
00321   } else {
00322     newpt = new CRACKEDGE;
00323   }
00324   newpt->pos.set_x(pos->x);           // coords of pt
00325   newpt->stepx = 0;              // edge is vertical
00326 
00327   if (sign > 0) {
00328     newpt->pos.set_y(pos->y);         // start location
00329     newpt->stepy = 1;
00330     newpt->stepdir = 3;
00331   } else {
00332     newpt->pos.set_y(pos->y + 1);     // start location
00333     newpt->stepy = -1;
00334     newpt->stepdir = 1;
00335   }
00336 
00337   if (join == NULL) {
00338     newpt->next = newpt;         //ptrs to other ends
00339     newpt->prev = newpt;
00340   } else {
00341     if (newpt->pos.x() == join->pos.x()
00342     && newpt->pos.y() + newpt->stepy == join->pos.y()) {
00343       newpt->prev = join->prev;  // update other ends
00344       newpt->prev->next = newpt;
00345       newpt->next = join;        // join up
00346       join->prev = newpt;
00347     } else {
00348       newpt->next = join->next;  // update other ends
00349       newpt->next->prev = newpt;
00350       newpt->prev = join;        // join up
00351       join->next = newpt;
00352     }
00353   }
00354   return newpt;
00355 }
00356 
00357 
00358 /**********************************************************************
00359  * join_edges
00360  *
00361  * Join 2 edges together. Send the outline for approximation when a
00362  * closed loop is formed.
00363  **********************************************************************/
00364 
00365 void join_edges(CRACKEDGE *edge1,  // edges to join
00366                 CRACKEDGE *edge2,   // no specific order
00367                 CRACKEDGE **free_cracks,
00368                 C_OUTLINE_IT* outline_it) {
00369   if (edge1->pos.x() + edge1->stepx != edge2->pos.x()
00370   || edge1->pos.y() + edge1->stepy != edge2->pos.y()) {
00371     CRACKEDGE *tempedge = edge1;
00372     edge1 = edge2;               // swap araound
00373     edge2 = tempedge;
00374   }
00375 
00376   if (edge1->next == edge2) {
00377                                  // already closed
00378     complete_edge(edge1, outline_it);
00379                                  // attach freelist to end
00380     edge1->prev->next = *free_cracks;
00381     *free_cracks = edge1;         // and free list
00382   } else {
00383                                  // update opposite ends
00384     edge2->prev->next = edge1->next;
00385     edge1->next->prev = edge2->prev;
00386     edge1->next = edge2;         // make joins
00387     edge2->prev = edge1;
00388   }
00389 }
00390 
00391 
00392 /**********************************************************************
00393  * free_crackedges
00394  *
00395  * Really free the CRACKEDGEs by giving them back to delete.
00396  **********************************************************************/
00397 
00398 void free_crackedges(CRACKEDGE *start) {
00399   CRACKEDGE *current;            // current edge to free
00400   CRACKEDGE *next;               // next one to free
00401 
00402   for (current = start; current != NULL; current = next) {
00403     next = current->next;
00404     delete current;              // delete them all
00405   }
00406 }