Tesseract
3.02
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: findseam.c (Formerly findseam.c) 00005 * Description: 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 16 14:37:00 1987 00008 * Modified: Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Reusable Software Component 00012 * 00013 * (c) Copyright 1987, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 /*---------------------------------------------------------------------- 00026 I n c l u d e s 00027 ----------------------------------------------------------------------*/ 00028 #include "findseam.h" 00029 #include "gradechop.h" 00030 #include "olutil.h" 00031 #include "plotedges.h" 00032 #include "outlines.h" 00033 #include "freelist.h" 00034 #include "seam.h" 00035 #include "wordrec.h" 00036 00037 // Include automatically generated configuration file if running autoconf. 00038 #ifdef HAVE_CONFIG_H 00039 #include "config_auto.h" 00040 #endif 00041 00042 /*---------------------------------------------------------------------- 00043 T y p e s 00044 ----------------------------------------------------------------------*/ 00045 #define SPLIT_CLOSENESS 20/* Difference in x value */ 00046 /* How many to keep */ 00047 #define MAX_NUM_SEAMS 150 00048 /* How many to keep */ 00049 #define MAX_OLD_SEAMS 150 00050 #define NO_FULL_PRIORITY -1/* Special marker for pri. */ 00051 /* Evalute right away */ 00052 #define BAD_PRIORITY 9999.0 00053 00054 /*---------------------------------------------------------------------- 00055 M a c r o s 00056 ----------------------------------------------------------------------*/ 00057 /********************************************************************** 00058 * add_seam_to_queue 00059 * 00060 * Add this seam value to the seam queue. If the heap is already full 00061 * then nothing is done. 00062 **********************************************************************/ 00063 00064 #define add_seam_to_queue(seams,seam,priority) \ 00065 if (seam)\ 00066 {\ 00067 if (HeapFull(seams))\ 00068 junk_worst_seam(seams,seam,priority);\ 00069 else\ 00070 HeapPush (seams, priority, (char*) seam);\ 00071 } 00072 00073 /********************************************************************** 00074 * best_seam_priority 00075 * 00076 * Return the best priority value on the queue. 00077 **********************************************************************/ 00078 00079 #define best_seam_priority(seam_queue) \ 00080 (HeapEmpty (seam_queue) ? \ 00081 NO_FULL_PRIORITY : \ 00082 ((SEAM*) seam_queue_element(seam_queue, 0))->priority) 00083 00084 /********************************************************************** 00085 * create_seam_queue 00086 * 00087 * Create a new seam queue with no elements in it. 00088 **********************************************************************/ 00089 00090 #define create_seam_queue(seam_queue) \ 00091 (seam_queue = MakeHeap (MAX_NUM_SEAMS)) 00092 00093 /********************************************************************** 00094 * create_seam_pile 00095 * 00096 * Create a new seam pile with no elements in it. 00097 **********************************************************************/ 00098 00099 #define create_seam_pile(seam_pile) \ 00100 (seam_pile = array_new (MAX_OLD_SEAMS)) 00101 00102 /********************************************************************** 00103 * delete_seam_queue 00104 * 00105 * Delete a seam queue along with all the seam structures associated 00106 * with it. 00107 **********************************************************************/ 00108 00109 #define delete_seam_queue(seam_queue) \ 00110 (FreeHeapData (seam_queue, delete_seam), \ 00111 seam_queue = NULL) \ 00112 00113 00114 /********************************************************************** 00115 * pop_next_seam 00116 * 00117 * Remove the next seam from the queue. Put the seam and priority 00118 * values in the requested variables. If there was nothing to pop 00119 * then return FALSE, else return TRUE. 00120 **********************************************************************/ 00121 00122 #define pop_next_seam(seams,seam,priority) \ 00123 (HeapPop (seams,&priority,&seam) == TESS_HEAP_OK) \ 00124 00125 00126 /********************************************************************** 00127 * seam_queue_element 00128 * 00129 * Return the element from the seam queue at the requested index. 00130 **********************************************************************/ 00131 00132 #define seam_queue_element(seam_queue,index) \ 00133 ((index < SizeOfHeap (seam_queue)) ? \ 00134 HeapDataFor (seam_queue, index) : \ 00135 NULL) \ 00136 00137 00138 /*---------------------------------------------------------------------- 00139 F u n c t i o n s 00140 ----------------------------------------------------------------------*/ 00141 namespace tesseract { 00142 00143 /********************************************************************** 00144 * junk_worst_seam 00145 * 00146 * Delete the worst seam from the queue because it is full. 00147 **********************************************************************/ 00148 void Wordrec::junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, 00149 float new_priority) { 00150 SEAM *seam; 00151 float priority; 00152 00153 HeapPopWorst(seams, &priority, &seam); 00154 if (priority > new_priority) { 00155 delete_seam(seam); /*get rid of it */ 00156 HeapPush (seams, new_priority, (char *) new_seam); 00157 } 00158 else { 00159 delete_seam(new_seam); 00160 HeapPush (seams, priority, (char *) seam); 00161 } 00162 } 00163 00164 00165 /********************************************************************** 00166 * choose_best_seam 00167 * 00168 * Choose the best seam that can be created by assembling this a 00169 * collection of splits. A queue of all the possible seams is 00170 * maintained. Each new split received is placed in that queue with 00171 * its partial priority value. These values in the seam queue are 00172 * evaluated and combined until a good enough seam is found. If no 00173 * further good seams are being found then this function returns to the 00174 * caller, who will send more splits. If this function is called with 00175 * a split of NULL, then no further splits can be supplied by the 00176 * caller. 00177 **********************************************************************/ 00178 void Wordrec::choose_best_seam(SEAM_QUEUE seam_queue, 00179 SEAM_PILE *seam_pile, 00180 SPLIT *split, 00181 PRIORITY priority, 00182 SEAM **seam_result, 00183 TBLOB *blob) { 00184 SEAM *seam; 00185 char str[80]; 00186 float my_priority; 00187 /* Add seam of split */ 00188 my_priority = priority; 00189 if (split != NULL) { 00190 TPOINT split_point = split->point1->pos; 00191 split_point += split->point2->pos; 00192 split_point /= 2; 00193 seam = new_seam(my_priority, split_point, split, NULL, NULL); 00194 if (chop_debug > 1) 00195 print_seam ("Partial priority ", seam); 00196 add_seam_to_queue (seam_queue, seam, (float) my_priority); 00197 00198 if (my_priority > chop_good_split) 00199 return; 00200 } 00201 00202 TBOX bbox = blob->bounding_box(); 00203 /* Queue loop */ 00204 while (pop_next_seam (seam_queue, seam, my_priority)) { 00205 /* Set full priority */ 00206 my_priority = seam_priority (seam, bbox.left(), bbox.right()); 00207 if (chop_debug) { 00208 sprintf (str, "Full my_priority %0.0f, ", my_priority); 00209 print_seam(str, seam); 00210 } 00211 00212 if ((*seam_result == NULL || /* Replace answer */ 00213 (*seam_result)->priority > my_priority) && my_priority < chop_ok_split) { 00214 /* No crossing */ 00215 if (constrained_split (seam->split1, blob)) { 00216 delete_seam(*seam_result); 00217 clone_seam(*seam_result, seam); 00218 (*seam_result)->priority = my_priority; 00219 } 00220 else { 00221 delete_seam(seam); 00222 seam = NULL; 00223 my_priority = BAD_PRIORITY; 00224 } 00225 } 00226 00227 if (my_priority < chop_good_split) { 00228 if (seam) 00229 delete_seam(seam); 00230 return; /* Made good answer */ 00231 } 00232 00233 if (seam) { 00234 /* Combine with others */ 00235 if (array_count (*seam_pile) < MAX_NUM_SEAMS 00236 /*|| tessedit_truncate_chopper==0 */ ) { 00237 combine_seam(seam_queue, *seam_pile, seam); 00238 *seam_pile = array_push (*seam_pile, seam); 00239 } 00240 else 00241 delete_seam(seam); 00242 } 00243 00244 my_priority = best_seam_priority (seam_queue); 00245 if ((my_priority > chop_ok_split) || 00246 (my_priority > chop_good_split && split)) 00247 return; 00248 } 00249 } 00250 00251 00252 /********************************************************************** 00253 * combine_seam 00254 * 00255 * Find other seams to combine with this one. The new seams that result 00256 * from this union should be added to the seam queue. The return value 00257 * tells whether or not any additional seams were added to the queue. 00258 **********************************************************************/ 00259 void Wordrec::combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, 00260 SEAM *seam) { 00261 register inT16 x; 00262 register inT16 dist; 00263 inT16 bottom1, top1; 00264 inT16 bottom2, top2; 00265 00266 SEAM *new_one; 00267 SEAM *this_one; 00268 00269 bottom1 = seam->split1->point1->pos.y; 00270 if (seam->split1->point2->pos.y >= bottom1) 00271 top1 = seam->split1->point2->pos.y; 00272 else { 00273 top1 = bottom1; 00274 bottom1 = seam->split1->point2->pos.y; 00275 } 00276 if (seam->split2 != NULL) { 00277 bottom2 = seam->split2->point1->pos.y; 00278 if (seam->split2->point2->pos.y >= bottom2) 00279 top2 = seam->split2->point2->pos.y; 00280 else { 00281 top2 = bottom2; 00282 bottom2 = seam->split2->point2->pos.y; 00283 } 00284 } 00285 else { 00286 bottom2 = bottom1; 00287 top2 = top1; 00288 } 00289 array_loop(seam_pile, x) { 00290 this_one = (SEAM *) array_value (seam_pile, x); 00291 dist = seam->location.x - this_one->location.x; 00292 if (-SPLIT_CLOSENESS < dist && 00293 dist < SPLIT_CLOSENESS && 00294 seam->priority + this_one->priority < chop_ok_split) { 00295 inT16 split1_point1_y = this_one->split1->point1->pos.y; 00296 inT16 split1_point2_y = this_one->split1->point2->pos.y; 00297 inT16 split2_point1_y = 0; 00298 inT16 split2_point2_y = 0; 00299 if (this_one->split2) { 00300 split2_point1_y = this_one->split2->point1->pos.y; 00301 split2_point2_y = this_one->split2->point2->pos.y; 00302 } 00303 if ( 00305 ( 00306 /* this_one->split1 always exists */ 00307 ( 00308 ((split1_point1_y >= top1 && split1_point2_y >= top1) || 00309 (split1_point1_y <= bottom1 && split1_point2_y <= bottom1)) 00310 && 00311 ((split1_point1_y >= top2 && split1_point2_y >= top2) || 00312 (split1_point1_y <= bottom2 && split1_point2_y <= bottom2)) 00313 ) 00314 ) 00315 && 00316 ( 00317 this_one->split2 == NULL || 00318 ( 00319 ((split2_point1_y >= top1 && split2_point2_y >= top1) || 00320 (split2_point1_y <= bottom1 && split2_point2_y <= bottom1)) 00321 && 00322 ((split2_point1_y >= top2 && split2_point2_y >= top2) || 00323 (split2_point1_y <= bottom2 && split2_point2_y <= bottom2)) 00324 ) 00325 ) 00326 ) { 00327 new_one = join_two_seams (seam, this_one); 00328 if (chop_debug > 1) 00329 print_seam ("Combo priority ", new_one); 00330 add_seam_to_queue (seam_queue, new_one, new_one->priority); 00331 } 00332 } 00333 } 00334 } 00335 00336 00337 /********************************************************************** 00338 * constrained_split 00339 * 00340 * Constrain this split to obey certain rules. It must not cross any 00341 * inner outline. It must not cut off a small chunk of the outline. 00342 **********************************************************************/ 00343 inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) { 00344 TESSLINE *outline; 00345 00346 if (is_little_chunk (split->point1, split->point2)) 00347 return (FALSE); 00348 00349 for (outline = blob->outlines; outline; outline = outline->next) { 00350 if (split_bounds_overlap (split, outline) && 00351 crosses_outline (split->point1, split->point2, outline->loop)) { 00352 return (FALSE); 00353 } 00354 } 00355 return (TRUE); 00356 } 00357 00358 00359 /********************************************************************** 00360 * delete_seam_pile 00361 * 00362 * Delete the seams that are held in the seam pile. Destroy the splits 00363 * that are referenced by these seams. 00364 **********************************************************************/ 00365 void Wordrec::delete_seam_pile(SEAM_PILE seam_pile) { 00366 inT16 x; 00367 00368 array_loop(seam_pile, x) { 00369 delete_seam ((SEAM *) array_value (seam_pile, x)); 00370 } 00371 array_free(seam_pile); 00372 } 00373 00374 /********************************************************************** 00375 * pick_good_seam 00376 * 00377 * Find and return a good seam that will split this blob into two pieces. 00378 * Work from the outlines provided. 00379 **********************************************************************/ 00380 SEAM *Wordrec::pick_good_seam(TBLOB *blob) { 00381 SEAM_QUEUE seam_queue; 00382 SEAM_PILE seam_pile; 00383 POINT_GROUP point_heap; 00384 PRIORITY priority; 00385 EDGEPT *edge; 00386 EDGEPT *points[MAX_NUM_POINTS]; 00387 EDGEPT_CLIST new_points; 00388 SEAM *seam = NULL; 00389 TESSLINE *outline; 00390 inT16 num_points = 0; 00391 00392 #ifndef GRAPHICS_DISABLED 00393 if (chop_debug > 2) 00394 wordrec_display_splits.set_value(true); 00395 00396 draw_blob_edges(blob); 00397 #endif 00398 00399 point_heap = MakeHeap (MAX_NUM_POINTS); 00400 for (outline = blob->outlines; outline; outline = outline->next) 00401 prioritize_points(outline, point_heap); 00402 00403 while (HeapPop (point_heap, &priority, &edge) == TESS_HEAP_OK) { 00404 if (num_points < MAX_NUM_POINTS) 00405 points[num_points++] = (EDGEPT *) edge; 00406 } 00407 FreeHeap(point_heap); 00408 00409 /* Initialize queue & pile */ 00410 create_seam_pile(seam_pile); 00411 create_seam_queue(seam_queue); 00412 00413 try_point_pairs(points, num_points, seam_queue, &seam_pile, &seam, blob); 00414 try_vertical_splits(points, num_points, &new_points, 00415 seam_queue, &seam_pile, &seam, blob); 00416 00417 if (seam == NULL) { 00418 choose_best_seam(seam_queue, &seam_pile, NULL, BAD_PRIORITY, &seam, blob); 00419 } 00420 else if (seam->priority > chop_good_split) { 00421 choose_best_seam (seam_queue, &seam_pile, NULL, seam->priority, 00422 &seam, blob); 00423 } 00424 00425 EDGEPT_C_IT it(&new_points); 00426 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00427 EDGEPT *inserted_point = it.data(); 00428 if (!point_used_by_seam(seam, inserted_point)) { 00429 remove_edgept(inserted_point); 00430 } 00431 } 00432 00433 delete_seam_queue(seam_queue); 00434 delete_seam_pile(seam_pile); 00435 00436 if (seam) { 00437 if (seam->priority > chop_ok_split) { 00438 delete_seam(seam); 00439 seam = NULL; 00440 } 00441 #ifndef GRAPHICS_DISABLED 00442 else if (wordrec_display_splits) { 00443 if (seam->split1) 00444 mark_split (seam->split1); 00445 if (seam->split2) 00446 mark_split (seam->split2); 00447 if (seam->split3) 00448 mark_split (seam->split3); 00449 if (chop_debug > 2) { 00450 update_edge_window(); 00451 edge_window_wait(); 00452 } 00453 } 00454 #endif 00455 } 00456 00457 if (chop_debug) 00458 wordrec_display_splits.set_value(false); 00459 00460 return (seam); 00461 } 00462 00463 00464 /********************************************************************** 00465 * seam_priority 00466 * 00467 * Assign a full priority value to the seam. 00468 **********************************************************************/ 00469 PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) { 00470 PRIORITY priority; 00471 00472 if (seam->split1 == NULL) 00473 priority = 0; 00474 00475 else if (seam->split2 == NULL) { 00476 priority = (seam->priority + 00477 full_split_priority (seam->split1, xmin, xmax)); 00478 } 00479 00480 else if (seam->split3 == NULL) { 00481 split_outline (seam->split2->point1, seam->split2->point2); 00482 priority = (seam->priority + 00483 full_split_priority (seam->split1, xmin, xmax)); 00484 unsplit_outlines (seam->split2->point1, seam->split2->point2); 00485 } 00486 00487 else { 00488 split_outline (seam->split2->point1, seam->split2->point2); 00489 split_outline (seam->split3->point1, seam->split3->point2); 00490 priority = (seam->priority + 00491 full_split_priority (seam->split1, xmin, xmax)); 00492 unsplit_outlines (seam->split3->point1, seam->split3->point2); 00493 unsplit_outlines (seam->split2->point1, seam->split2->point2); 00494 } 00495 00496 return (priority); 00497 } 00498 00499 00500 /********************************************************************** 00501 * try_point_pairs 00502 * 00503 * Try all the splits that are produced by pairing critical points 00504 * together. See if any of them are suitable for use. Use a seam 00505 * queue and seam pile that have already been initialized and used. 00506 **********************************************************************/ 00507 void Wordrec::try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], 00508 inT16 num_points, 00509 SEAM_QUEUE seam_queue, 00510 SEAM_PILE * seam_pile, 00511 SEAM ** seam, 00512 TBLOB * blob) { 00513 inT16 x; 00514 inT16 y; 00515 SPLIT *split; 00516 PRIORITY priority; 00517 00518 for (x = 0; x < num_points; x++) { 00519 for (y = x + 1; y < num_points; y++) { 00520 00521 if (points[y] && 00522 weighted_edgept_dist(points[x], points[y], 00523 chop_x_y_weight) < chop_split_length && 00524 points[x] != points[y]->next && 00525 points[y] != points[x]->next && 00526 !is_exterior_point(points[x], points[y]) && 00527 !is_exterior_point(points[y], points[x])) { 00528 split = new_split (points[x], points[y]); 00529 priority = partial_split_priority (split); 00530 00531 choose_best_seam(seam_queue, seam_pile, split, priority, seam, blob); 00532 } 00533 } 00534 } 00535 00536 } 00537 00538 00539 /********************************************************************** 00540 * try_vertical_splits 00541 * 00542 * Try all the splits that are produced by vertical projection to see 00543 * if any of them are suitable for use. Use a seam queue and seam pile 00544 * that have already been initialized and used. 00545 * Return in new_points a collection of points that were inserted into 00546 * the blob while examining vertical splits and which may safely be 00547 * removed once a seam is chosen if they are not part of the seam. 00548 **********************************************************************/ 00549 void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], 00550 inT16 num_points, 00551 EDGEPT_CLIST *new_points, 00552 SEAM_QUEUE seam_queue, 00553 SEAM_PILE * seam_pile, 00554 SEAM ** seam, 00555 TBLOB * blob) { 00556 EDGEPT *vertical_point = NULL; 00557 SPLIT *split; 00558 inT16 x; 00559 PRIORITY priority; 00560 TESSLINE *outline; 00561 00562 for (x = 0; x < num_points; x++) { 00563 vertical_point = NULL; 00564 for (outline = blob->outlines; outline; outline = outline->next) { 00565 vertical_projection_point(points[x], outline->loop, 00566 &vertical_point, new_points); 00567 } 00568 00569 if (vertical_point && 00570 points[x] != vertical_point->next && 00571 vertical_point != points[x]->next && 00572 weighted_edgept_dist(points[x], vertical_point, 00573 chop_x_y_weight) < chop_split_length) { 00574 00575 split = new_split (points[x], vertical_point); 00576 priority = partial_split_priority (split); 00577 00578 choose_best_seam(seam_queue, seam_pile, split, priority, seam, blob); 00579 } 00580 } 00581 } 00582 00583 }