Tesseract  3.02
tesseract-ocr/wordrec/findseam.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        findseam.c  (Formerly findseam.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "findseam.h"
00029 #include "gradechop.h"
00030 #include "olutil.h"
00031 #include "plotedges.h"
00032 #include "outlines.h"
00033 #include "freelist.h"
00034 #include "seam.h"
00035 #include "wordrec.h"
00036 
00037 // Include automatically generated configuration file if running autoconf.
00038 #ifdef HAVE_CONFIG_H
00039 #include "config_auto.h"
00040 #endif
00041 
00042 /*----------------------------------------------------------------------
00043               T y p e s
00044 ----------------------------------------------------------------------*/
00045 #define SPLIT_CLOSENESS        20/* Difference in x value */
00046                                  /* How many to keep */
00047 #define MAX_NUM_SEAMS          150
00048                                  /* How many to keep */
00049 #define MAX_OLD_SEAMS          150
00050 #define NO_FULL_PRIORITY       -1/* Special marker for pri. */
00051                                  /* Evalute right away */
00052 #define BAD_PRIORITY           9999.0
00053 
00054 /*----------------------------------------------------------------------
00055               M a c r o s
00056 ----------------------------------------------------------------------*/
00057 /**********************************************************************
00058  * add_seam_to_queue
00059  *
00060  * Add this seam value to the seam queue.  If the heap is already full
00061  * then nothing is done.
00062  **********************************************************************/
00063 
00064 #define add_seam_to_queue(seams,seam,priority)  \
00065 if (seam)\
00066 {\
00067                 if (HeapFull(seams))\
00068                         junk_worst_seam(seams,seam,priority);\
00069                 else\
00070                         HeapPush (seams, priority, (char*) seam);\
00071         }
00072 
00073 /**********************************************************************
00074  * best_seam_priority
00075  *
00076  * Return the best priority value on the queue.
00077  **********************************************************************/
00078 
00079 #define best_seam_priority(seam_queue)   \
00080 (HeapEmpty (seam_queue) ?              \
00081         NO_FULL_PRIORITY       :              \
00082         ((SEAM*) seam_queue_element(seam_queue, 0))->priority)
00083 
00084 /**********************************************************************
00085  * create_seam_queue
00086  *
00087  * Create a new seam queue with no elements in it.
00088  **********************************************************************/
00089 
00090 #define create_seam_queue(seam_queue)     \
00091 (seam_queue = MakeHeap (MAX_NUM_SEAMS))
00092 
00093 /**********************************************************************
00094  * create_seam_pile
00095  *
00096  * Create a new seam pile with no elements in it.
00097  **********************************************************************/
00098 
00099 #define create_seam_pile(seam_pile)     \
00100 (seam_pile = array_new (MAX_OLD_SEAMS))
00101 
00102 /**********************************************************************
00103  * delete_seam_queue
00104  *
00105  * Delete a seam queue along with all the seam structures associated
00106  * with it.
00107  **********************************************************************/
00108 
00109 #define delete_seam_queue(seam_queue)      \
00110 (FreeHeapData (seam_queue, delete_seam), \
00111         seam_queue = NULL)                      \
00112 
00113 
00114 /**********************************************************************
00115  * pop_next_seam
00116  *
00117  * Remove the next seam from the queue.  Put the seam and priority
00118  * values in the requested variables.  If there was nothing to pop
00119  * then return FALSE, else return TRUE.
00120  **********************************************************************/
00121 
00122 #define pop_next_seam(seams,seam,priority)  \
00123 (HeapPop (seams,&priority,&seam) == TESS_HEAP_OK)   \
00124 
00125 
00126 /**********************************************************************
00127  * seam_queue_element
00128  *
00129  * Return the element from the seam queue at the requested index.
00130  **********************************************************************/
00131 
00132 #define seam_queue_element(seam_queue,index)  \
00133 ((index < SizeOfHeap (seam_queue)) ?        \
00134         HeapDataFor (seam_queue, index)   :        \
00135         NULL)                                      \
00136 
00137 
00138 /*----------------------------------------------------------------------
00139               F u n c t i o n s
00140 ----------------------------------------------------------------------*/
00141 namespace tesseract {
00142 
00143 /**********************************************************************
00144  * junk_worst_seam
00145  *
00146  * Delete the worst seam from the queue because it is full.
00147  **********************************************************************/
00148 void Wordrec::junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam,
00149                               float new_priority) {
00150   SEAM *seam;
00151   float priority;
00152 
00153   HeapPopWorst(seams, &priority, &seam);
00154   if (priority > new_priority) {
00155     delete_seam(seam);  /*get rid of it */
00156     HeapPush (seams, new_priority, (char *) new_seam);
00157   }
00158   else {
00159     delete_seam(new_seam);
00160     HeapPush (seams, priority, (char *) seam);
00161   }
00162 }
00163 
00164 
00165 /**********************************************************************
00166  * choose_best_seam
00167  *
00168  * Choose the best seam that can be created by assembling this a
00169  * collection of splits.  A queue of all the possible seams is
00170  * maintained.  Each new split received is placed in that queue with
00171  * its partial priority value.  These values in the seam queue are
00172  * evaluated and combined until a good enough seam is found.  If no
00173  * further good seams are being found then this function returns to the
00174  * caller, who will send more splits.  If this function is called with
00175  * a split of NULL, then no further splits can be supplied by the
00176  * caller.
00177  **********************************************************************/
00178 void Wordrec::choose_best_seam(SEAM_QUEUE seam_queue,
00179                                SEAM_PILE *seam_pile,
00180                                SPLIT *split,
00181                                PRIORITY priority,
00182                                SEAM **seam_result,
00183                                TBLOB *blob) {
00184   SEAM *seam;
00185   char str[80];
00186   float my_priority;
00187   /* Add seam of split */
00188   my_priority = priority;
00189   if (split != NULL) {
00190     TPOINT split_point = split->point1->pos;
00191     split_point += split->point2->pos;
00192     split_point /= 2;
00193     seam = new_seam(my_priority, split_point, split, NULL, NULL);
00194     if (chop_debug > 1)
00195       print_seam ("Partial priority    ", seam);
00196     add_seam_to_queue (seam_queue, seam, (float) my_priority);
00197 
00198     if (my_priority > chop_good_split)
00199       return;
00200   }
00201 
00202   TBOX bbox = blob->bounding_box();
00203   /* Queue loop */
00204   while (pop_next_seam (seam_queue, seam, my_priority)) {
00205     /* Set full priority */
00206     my_priority = seam_priority (seam, bbox.left(), bbox.right());
00207     if (chop_debug) {
00208       sprintf (str, "Full my_priority %0.0f,  ", my_priority);
00209       print_seam(str, seam);
00210     }
00211 
00212     if ((*seam_result == NULL || /* Replace answer */
00213     (*seam_result)->priority > my_priority) && my_priority < chop_ok_split) {
00214       /* No crossing */
00215       if (constrained_split (seam->split1, blob)) {
00216         delete_seam(*seam_result);
00217         clone_seam(*seam_result, seam);
00218         (*seam_result)->priority = my_priority;
00219       }
00220       else {
00221         delete_seam(seam);
00222         seam = NULL;
00223         my_priority = BAD_PRIORITY;
00224       }
00225     }
00226 
00227     if (my_priority < chop_good_split) {
00228       if (seam)
00229         delete_seam(seam);
00230       return;                    /* Made good answer */
00231     }
00232 
00233     if (seam) {
00234                                  /* Combine with others */
00235       if (array_count (*seam_pile) < MAX_NUM_SEAMS
00236       /*|| tessedit_truncate_chopper==0 */ ) {
00237         combine_seam(seam_queue, *seam_pile, seam);
00238         *seam_pile = array_push (*seam_pile, seam);
00239       }
00240       else
00241         delete_seam(seam);
00242     }
00243 
00244     my_priority = best_seam_priority (seam_queue);
00245     if ((my_priority > chop_ok_split) ||
00246       (my_priority > chop_good_split && split))
00247       return;
00248   }
00249 }
00250 
00251 
00252 /**********************************************************************
00253  * combine_seam
00254  *
00255  * Find other seams to combine with this one.  The new seams that result
00256  * from this union should be added to the seam queue.  The return value
00257  * tells whether or not any additional seams were added to the queue.
00258  **********************************************************************/
00259 void Wordrec::combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile,
00260                            SEAM *seam) {
00261   register inT16 x;
00262   register inT16 dist;
00263   inT16 bottom1, top1;
00264   inT16 bottom2, top2;
00265 
00266   SEAM *new_one;
00267   SEAM *this_one;
00268 
00269   bottom1 = seam->split1->point1->pos.y;
00270   if (seam->split1->point2->pos.y >= bottom1)
00271     top1 = seam->split1->point2->pos.y;
00272   else {
00273     top1 = bottom1;
00274     bottom1 = seam->split1->point2->pos.y;
00275   }
00276   if (seam->split2 != NULL) {
00277     bottom2 = seam->split2->point1->pos.y;
00278     if (seam->split2->point2->pos.y >= bottom2)
00279       top2 = seam->split2->point2->pos.y;
00280     else {
00281       top2 = bottom2;
00282       bottom2 = seam->split2->point2->pos.y;
00283     }
00284   }
00285   else {
00286     bottom2 = bottom1;
00287     top2 = top1;
00288   }
00289   array_loop(seam_pile, x) {
00290     this_one = (SEAM *) array_value (seam_pile, x);
00291     dist = seam->location.x - this_one->location.x;
00292     if (-SPLIT_CLOSENESS < dist &&
00293       dist < SPLIT_CLOSENESS &&
00294     seam->priority + this_one->priority < chop_ok_split) {
00295       inT16 split1_point1_y = this_one->split1->point1->pos.y;
00296       inT16 split1_point2_y = this_one->split1->point2->pos.y;
00297       inT16 split2_point1_y = 0;
00298       inT16 split2_point2_y = 0;
00299       if (this_one->split2) {
00300         split2_point1_y = this_one->split2->point1->pos.y;
00301         split2_point2_y = this_one->split2->point2->pos.y;
00302       }
00303       if (
00305         (
00306           /* this_one->split1 always exists */
00307           (
00308             ((split1_point1_y >= top1 && split1_point2_y >= top1) ||
00309              (split1_point1_y <= bottom1 && split1_point2_y <= bottom1))
00310             &&
00311             ((split1_point1_y >= top2 && split1_point2_y >= top2) ||
00312              (split1_point1_y <= bottom2 && split1_point2_y <= bottom2))
00313           )
00314         )
00315         &&
00316         (
00317           this_one->split2 == NULL ||
00318           (
00319             ((split2_point1_y >= top1 && split2_point2_y >= top1) ||
00320              (split2_point1_y <= bottom1 && split2_point2_y <= bottom1))
00321             &&
00322             ((split2_point1_y >= top2 && split2_point2_y >= top2) ||
00323              (split2_point1_y <= bottom2 && split2_point2_y <= bottom2))
00324           )
00325         )
00326       ) {
00327         new_one = join_two_seams (seam, this_one);
00328         if (chop_debug > 1)
00329           print_seam ("Combo priority       ", new_one);
00330         add_seam_to_queue (seam_queue, new_one, new_one->priority);
00331       }
00332     }
00333   }
00334 }
00335 
00336 
00337 /**********************************************************************
00338  * constrained_split
00339  *
00340  * Constrain this split to obey certain rules.  It must not cross any
00341  * inner outline.  It must not cut off a small chunk of the outline.
00342  **********************************************************************/
00343 inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) {
00344   TESSLINE *outline;
00345 
00346   if (is_little_chunk (split->point1, split->point2))
00347     return (FALSE);
00348 
00349   for (outline = blob->outlines; outline; outline = outline->next) {
00350     if (split_bounds_overlap (split, outline) &&
00351     crosses_outline (split->point1, split->point2, outline->loop)) {
00352       return (FALSE);
00353     }
00354   }
00355   return (TRUE);
00356 }
00357 
00358 
00359 /**********************************************************************
00360  * delete_seam_pile
00361  *
00362  * Delete the seams that are held in the seam pile.  Destroy the splits
00363  * that are referenced by these seams.
00364  **********************************************************************/
00365 void Wordrec::delete_seam_pile(SEAM_PILE seam_pile) {
00366   inT16 x;
00367 
00368   array_loop(seam_pile, x) {
00369     delete_seam ((SEAM *) array_value (seam_pile, x));
00370   }
00371   array_free(seam_pile);
00372 }
00373 
00374 /**********************************************************************
00375  * pick_good_seam
00376  *
00377  * Find and return a good seam that will split this blob into two pieces.
00378  * Work from the outlines provided.
00379  **********************************************************************/
00380 SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
00381   SEAM_QUEUE seam_queue;
00382   SEAM_PILE seam_pile;
00383   POINT_GROUP point_heap;
00384   PRIORITY priority;
00385   EDGEPT *edge;
00386   EDGEPT *points[MAX_NUM_POINTS];
00387   EDGEPT_CLIST new_points;
00388   SEAM *seam = NULL;
00389   TESSLINE *outline;
00390   inT16 num_points = 0;
00391 
00392 #ifndef GRAPHICS_DISABLED
00393   if (chop_debug > 2)
00394     wordrec_display_splits.set_value(true);
00395 
00396   draw_blob_edges(blob);
00397 #endif
00398 
00399   point_heap = MakeHeap (MAX_NUM_POINTS);
00400   for (outline = blob->outlines; outline; outline = outline->next)
00401     prioritize_points(outline, point_heap);
00402 
00403   while (HeapPop (point_heap, &priority, &edge) == TESS_HEAP_OK) {
00404     if (num_points < MAX_NUM_POINTS)
00405       points[num_points++] = (EDGEPT *) edge;
00406   }
00407   FreeHeap(point_heap);
00408 
00409   /* Initialize queue & pile */
00410   create_seam_pile(seam_pile);
00411   create_seam_queue(seam_queue);
00412 
00413   try_point_pairs(points, num_points, seam_queue, &seam_pile, &seam, blob);
00414   try_vertical_splits(points, num_points, &new_points,
00415                       seam_queue, &seam_pile, &seam, blob);
00416 
00417   if (seam == NULL) {
00418     choose_best_seam(seam_queue, &seam_pile, NULL, BAD_PRIORITY, &seam, blob);
00419   }
00420   else if (seam->priority > chop_good_split) {
00421     choose_best_seam (seam_queue, &seam_pile, NULL, seam->priority,
00422       &seam, blob);
00423   }
00424 
00425   EDGEPT_C_IT it(&new_points);
00426   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00427     EDGEPT *inserted_point = it.data();
00428     if (!point_used_by_seam(seam, inserted_point)) {
00429       remove_edgept(inserted_point);
00430     }
00431   }
00432 
00433   delete_seam_queue(seam_queue);
00434   delete_seam_pile(seam_pile);
00435 
00436   if (seam) {
00437     if (seam->priority > chop_ok_split) {
00438       delete_seam(seam);
00439       seam = NULL;
00440     }
00441 #ifndef GRAPHICS_DISABLED
00442     else if (wordrec_display_splits) {
00443       if (seam->split1)
00444         mark_split (seam->split1);
00445       if (seam->split2)
00446         mark_split (seam->split2);
00447       if (seam->split3)
00448         mark_split (seam->split3);
00449       if (chop_debug > 2) {
00450         update_edge_window();
00451         edge_window_wait();
00452       }
00453     }
00454 #endif
00455   }
00456 
00457   if (chop_debug)
00458     wordrec_display_splits.set_value(false);
00459 
00460   return (seam);
00461 }
00462 
00463 
00464 /**********************************************************************
00465  * seam_priority
00466  *
00467  * Assign a full priority value to the seam.
00468  **********************************************************************/
00469 PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) {
00470   PRIORITY priority;
00471 
00472   if (seam->split1 == NULL)
00473     priority = 0;
00474 
00475   else if (seam->split2 == NULL) {
00476     priority = (seam->priority +
00477       full_split_priority (seam->split1, xmin, xmax));
00478   }
00479 
00480   else if (seam->split3 == NULL) {
00481     split_outline (seam->split2->point1, seam->split2->point2);
00482     priority = (seam->priority +
00483       full_split_priority (seam->split1, xmin, xmax));
00484     unsplit_outlines (seam->split2->point1, seam->split2->point2);
00485   }
00486 
00487   else {
00488     split_outline (seam->split2->point1, seam->split2->point2);
00489     split_outline (seam->split3->point1, seam->split3->point2);
00490     priority = (seam->priority +
00491       full_split_priority (seam->split1, xmin, xmax));
00492     unsplit_outlines (seam->split3->point1, seam->split3->point2);
00493     unsplit_outlines (seam->split2->point1, seam->split2->point2);
00494   }
00495 
00496   return (priority);
00497 }
00498 
00499 
00500 /**********************************************************************
00501  * try_point_pairs
00502  *
00503  * Try all the splits that are produced by pairing critical points
00504  * together.  See if any of them are suitable for use.  Use a seam
00505  * queue and seam pile that have already been initialized and used.
00506  **********************************************************************/
00507 void Wordrec::try_point_pairs (EDGEPT * points[MAX_NUM_POINTS],
00508                                inT16 num_points,
00509                                SEAM_QUEUE seam_queue,
00510                                SEAM_PILE * seam_pile,
00511                                SEAM ** seam,
00512                                TBLOB * blob) {
00513   inT16 x;
00514   inT16 y;
00515   SPLIT *split;
00516   PRIORITY priority;
00517 
00518   for (x = 0; x < num_points; x++) {
00519     for (y = x + 1; y < num_points; y++) {
00520 
00521       if (points[y] &&
00522           weighted_edgept_dist(points[x], points[y],
00523                                chop_x_y_weight) < chop_split_length &&
00524           points[x] != points[y]->next &&
00525           points[y] != points[x]->next &&
00526           !is_exterior_point(points[x], points[y]) &&
00527           !is_exterior_point(points[y], points[x])) {
00528         split = new_split (points[x], points[y]);
00529         priority = partial_split_priority (split);
00530 
00531         choose_best_seam(seam_queue, seam_pile, split, priority, seam, blob);
00532       }
00533     }
00534   }
00535 
00536 }
00537 
00538 
00539 /**********************************************************************
00540  * try_vertical_splits
00541  *
00542  * Try all the splits that are produced by vertical projection to see
00543  * if any of them are suitable for use.  Use a seam queue and seam pile
00544  * that have already been initialized and used.
00545  * Return in new_points a collection of points that were inserted into
00546  * the blob while examining vertical splits and which may safely be
00547  * removed once a seam is chosen if they are not part of the seam.
00548  **********************************************************************/
00549 void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
00550                                   inT16 num_points,
00551                                   EDGEPT_CLIST *new_points,
00552                                   SEAM_QUEUE seam_queue,
00553                                   SEAM_PILE * seam_pile,
00554                                   SEAM ** seam,
00555                                   TBLOB * blob) {
00556   EDGEPT *vertical_point = NULL;
00557   SPLIT *split;
00558   inT16 x;
00559   PRIORITY priority;
00560   TESSLINE *outline;
00561 
00562   for (x = 0; x < num_points; x++) {
00563     vertical_point = NULL;
00564     for (outline = blob->outlines; outline; outline = outline->next) {
00565       vertical_projection_point(points[x], outline->loop,
00566                                 &vertical_point, new_points);
00567     }
00568 
00569     if (vertical_point &&
00570       points[x] != vertical_point->next &&
00571       vertical_point != points[x]->next &&
00572       weighted_edgept_dist(points[x], vertical_point,
00573                            chop_x_y_weight) < chop_split_length) {
00574 
00575       split = new_split (points[x], vertical_point);
00576       priority = partial_split_priority (split);
00577 
00578       choose_best_seam(seam_queue, seam_pile, split, priority, seam, blob);
00579     }
00580   }
00581 }
00582 
00583 }