Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: wordseg.cpp (Formerly wspace.c) 00003 * Description: Code to segment the blobs into words. 00004 * Author: Ray Smith 00005 * Created: Fri Oct 16 11:32:28 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "mfcpch.h" 00021 #ifdef __UNIX__ 00022 #include <assert.h> 00023 #endif 00024 #include "stderr.h" 00025 #include "blobbox.h" 00026 #include "statistc.h" 00027 #include "drawtord.h" 00028 #include "makerow.h" 00029 #include "pitsync1.h" 00030 #include "tovars.h" 00031 #include "topitch.h" 00032 #include "cjkpitch.h" 00033 #include "textord.h" 00034 #include "fpchop.h" 00035 #include "wordseg.h" 00036 00037 // Include automatically generated configuration file if running autoconf. 00038 #ifdef HAVE_CONFIG_H 00039 #include "config_auto.h" 00040 #endif 00041 00042 #define EXTERN 00043 00044 EXTERN BOOL_VAR(textord_fp_chopping, TRUE, "Do fixed pitch chopping"); 00045 EXTERN BOOL_VAR(textord_force_make_prop_words, FALSE, 00046 "Force proportional word segmentation on all rows"); 00047 EXTERN BOOL_VAR(textord_chopper_test, FALSE, 00048 "Chopper is being tested."); 00049 00050 #define FIXED_WIDTH_MULTIPLE 5 00051 #define BLOCK_STATS_CLUSTERS 10 00052 00053 00061 void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) { 00062 TO_ROW_IT to_row_it(rows); 00063 ROW_IT row_it(real_rows); 00064 for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); 00065 to_row_it.forward()) { 00066 TO_ROW* row = to_row_it.data(); 00067 // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready 00068 // to create the word. 00069 C_BLOB_LIST cblobs; 00070 C_BLOB_IT cblob_it(&cblobs); 00071 BLOBNBOX_IT box_it(row->blob_list()); 00072 for (;!box_it.empty(); box_it.forward()) { 00073 BLOBNBOX* bblob= box_it.extract(); 00074 if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) { 00075 if (bblob->cblob() != NULL) { 00076 C_OUTLINE_IT cout_it(cblob_it.data()->out_list()); 00077 cout_it.move_to_last(); 00078 cout_it.add_list_after(bblob->cblob()->out_list()); 00079 delete bblob->cblob(); 00080 } 00081 } else { 00082 if (bblob->cblob() != NULL) 00083 cblob_it.add_after_then_move(bblob->cblob()); 00084 } 00085 delete bblob; 00086 } 00087 // Convert the TO_ROW to a ROW. 00088 ROW* real_row = new ROW(row, static_cast<inT16>(row->kern_size), 00089 static_cast<inT16>(row->space_size)); 00090 WERD_IT word_it(real_row->word_list()); 00091 WERD* word = new WERD(&cblobs, 0, NULL); 00092 word->set_flag(W_BOL, TRUE); 00093 word->set_flag(W_EOL, TRUE); 00094 word->set_flag(W_DONT_CHOP, one_blob); 00095 word_it.add_after_then_move(word); 00096 row_it.add_after_then_move(real_row); 00097 } 00098 } 00099 00105 void make_words(tesseract::Textord *textord, 00106 ICOORD page_tr, // top right 00107 float gradient, // page skew 00108 BLOCK_LIST *blocks, // block list 00109 TO_BLOCK_LIST *port_blocks) { // output list 00110 TO_BLOCK_IT block_it; // iterator 00111 TO_BLOCK *block; // current block 00112 00113 if (textord->use_cjk_fp_model()) { 00114 compute_fixed_pitch_cjk(page_tr, port_blocks); 00115 } else { 00116 compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f), 00117 !(BOOL8) textord_test_landscape); 00118 } 00119 textord->to_spacing(page_tr, port_blocks); 00120 block_it.set_to_list(port_blocks); 00121 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { 00122 block = block_it.data(); 00123 make_real_words(textord, block, FCOORD(1.0f, 0.0f)); 00124 } 00125 } 00126 00127 00135 void set_row_spaces( //find space sizes 00136 TO_BLOCK *block, //block to do 00137 FCOORD rotation, //for drawing 00138 BOOL8 testing_on //correct orientation 00139 ) { 00140 inT32 maxwidth; //of widest space 00141 TO_ROW *row; //current row 00142 TO_ROW_IT row_it = block->get_rows (); 00143 00144 if (row_it.empty ()) 00145 return; //empty block 00146 maxwidth = (inT32) ceil (block->xheight * textord_words_maxspace); 00147 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00148 row = row_it.data (); 00149 if (row->fixed_pitch == 0) { 00150 // if (!textord_test_mode 00151 // && row_words(block,row,maxwidth,rotation,testing_on)==0 00152 // || textord_test_mode 00153 // && row_words2(block,row,maxwidth,rotation,testing_on)==0) 00154 // { 00155 row->min_space = 00156 (inT32) ceil (row->pr_space - 00157 (row->pr_space - 00158 row->pr_nonsp) * textord_words_definite_spread); 00159 row->max_nonspace = 00160 (inT32) floor (row->pr_nonsp + 00161 (row->pr_space - 00162 row->pr_nonsp) * textord_words_definite_spread); 00163 if (testing_on && textord_show_initial_words) { 00164 tprintf ("Assigning defaults %d non, %d space to row at %g\n", 00165 row->max_nonspace, row->min_space, row->intercept ()); 00166 } 00167 row->space_threshold = (row->max_nonspace + row->min_space) / 2; 00168 row->space_size = row->pr_space; 00169 row->kern_size = row->pr_nonsp; 00170 // } 00171 } 00172 #ifndef GRAPHICS_DISABLED 00173 if (textord_show_initial_words && testing_on) { 00174 plot_word_decisions (to_win, (inT16) row->fixed_pitch, row); 00175 } 00176 #endif 00177 } 00178 } 00179 00180 00187 inT32 row_words( //compute space size 00188 TO_BLOCK *block, //block it came from 00189 TO_ROW *row, //row to operate on 00190 inT32 maxwidth, //max expected space size 00191 FCOORD rotation, //for drawing 00192 BOOL8 testing_on //for debug 00193 ) { 00194 BOOL8 testing_row; //contains testpt 00195 BOOL8 prev_valid; //if decent size 00196 BOOL8 this_valid; //current blob big enough 00197 inT32 prev_x; //end of prev blob 00198 inT32 min_gap; //min interesting gap 00199 inT32 cluster_count; //no of clusters 00200 inT32 gap_index; //which cluster 00201 inT32 smooth_factor; //for smoothing stats 00202 BLOBNBOX *blob; //current blob 00203 float lower, upper; //clustering parameters 00204 float gaps[3]; //gap clusers 00205 ICOORD testpt; 00206 TBOX blob_box; //bounding box 00207 //iterator 00208 BLOBNBOX_IT blob_it = row->blob_list (); 00209 STATS gap_stats (0, maxwidth); 00210 STATS cluster_stats[4]; //clusters 00211 00212 testpt = ICOORD (textord_test_x, textord_test_y); 00213 smooth_factor = 00214 (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5); 00215 // if (testing_on) 00216 // tprintf("Row smooth factor=%d\n",smooth_factor); 00217 prev_valid = FALSE; 00218 prev_x = -MAX_INT32; 00219 testing_row = FALSE; 00220 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 00221 blob = blob_it.data (); 00222 blob_box = blob->bounding_box (); 00223 if (blob_box.contains (testpt)) 00224 testing_row = TRUE; 00225 gap_stats.add (blob_box.width (), 1); 00226 } 00227 min_gap = (inT32) floor (gap_stats.ile (textord_words_width_ile)); 00228 gap_stats.clear (); 00229 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 00230 blob = blob_it.data (); 00231 if (!blob->joined_to_prev ()) { 00232 blob_box = blob->bounding_box (); 00233 // this_valid=blob_box.width()>=min_gap; 00234 this_valid = TRUE; 00235 if (this_valid && prev_valid 00236 && blob_box.left () - prev_x < maxwidth) { 00237 gap_stats.add (blob_box.left () - prev_x, 1); 00238 } 00239 prev_x = blob_box.right (); 00240 prev_valid = this_valid; 00241 } 00242 } 00243 if (gap_stats.get_total () == 0) { 00244 row->min_space = 0; //no evidence 00245 row->max_nonspace = 0; 00246 return 0; 00247 } 00248 gap_stats.smooth (smooth_factor); 00249 lower = row->xheight * textord_words_initial_lower; 00250 upper = row->xheight * textord_words_initial_upper; 00251 cluster_count = gap_stats.cluster (lower, upper, 00252 textord_spacesize_ratioprop, 3, 00253 cluster_stats); 00254 while (cluster_count < 2 && ceil (lower) < floor (upper)) { 00255 //shrink gap 00256 upper = (upper * 3 + lower) / 4; 00257 lower = (lower * 3 + upper) / 4; 00258 cluster_count = gap_stats.cluster (lower, upper, 00259 textord_spacesize_ratioprop, 3, 00260 cluster_stats); 00261 } 00262 if (cluster_count < 2) { 00263 row->min_space = 0; //no evidence 00264 row->max_nonspace = 0; 00265 return 0; 00266 } 00267 for (gap_index = 0; gap_index < cluster_count; gap_index++) 00268 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); 00269 //get medians 00270 if (cluster_count > 2) { 00271 if (testing_on && textord_show_initial_words) { 00272 tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n", 00273 row->intercept (), 00274 cluster_stats[1].ile (0.5), 00275 cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5)); 00276 } 00277 lower = gaps[0]; 00278 if (gaps[1] > lower) { 00279 upper = gaps[1]; //prefer most frequent 00280 if (upper < block->xheight * textord_words_min_minspace 00281 && gaps[2] > gaps[1]) { 00282 upper = gaps[2]; 00283 } 00284 } 00285 else if (gaps[2] > lower 00286 && gaps[2] >= block->xheight * textord_words_min_minspace) 00287 upper = gaps[2]; 00288 else if (lower >= block->xheight * textord_words_min_minspace) { 00289 upper = lower; //not nice 00290 lower = gaps[1]; 00291 if (testing_on && textord_show_initial_words) { 00292 tprintf ("Had to switch most common from lower to upper!!\n"); 00293 gap_stats.print(); 00294 } 00295 } 00296 else { 00297 row->min_space = 0; //no evidence 00298 row->max_nonspace = 0; 00299 return 0; 00300 } 00301 } 00302 else { 00303 if (gaps[1] < gaps[0]) { 00304 if (testing_on && textord_show_initial_words) { 00305 tprintf ("Had to switch most common from lower to upper!!\n"); 00306 gap_stats.print(); 00307 } 00308 lower = gaps[1]; 00309 upper = gaps[0]; 00310 } 00311 else { 00312 upper = gaps[1]; 00313 lower = gaps[0]; 00314 } 00315 } 00316 if (upper < block->xheight * textord_words_min_minspace) { 00317 row->min_space = 0; //no evidence 00318 row->max_nonspace = 0; 00319 return 0; 00320 } 00321 if (upper * 3 < block->min_space * 2 + block->max_nonspace 00322 || lower * 3 > block->min_space * 2 + block->max_nonspace) { 00323 if (testing_on && textord_show_initial_words) { 00324 tprintf ("Disagreement between block and row at %g!!\n", 00325 row->intercept ()); 00326 tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper); 00327 gap_stats.print(); 00328 } 00329 } 00330 row->min_space = 00331 (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread); 00332 row->max_nonspace = 00333 (inT32) floor (lower + (upper - lower) * textord_words_definite_spread); 00334 row->space_threshold = (row->max_nonspace + row->min_space) / 2; 00335 row->space_size = upper; 00336 row->kern_size = lower; 00337 if (testing_on && textord_show_initial_words) { 00338 if (testing_row) { 00339 tprintf ("GAP STATS\n"); 00340 gap_stats.print(); 00341 tprintf ("SPACE stats\n"); 00342 cluster_stats[2].print_summary(); 00343 tprintf ("NONSPACE stats\n"); 00344 cluster_stats[1].print_summary(); 00345 } 00346 tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", 00347 row->intercept (), row->min_space, upper, 00348 row->max_nonspace, lower); 00349 } 00350 return cluster_stats[2].get_total (); 00351 } 00352 00353 00360 inT32 row_words2( //compute space size 00361 TO_BLOCK *block, //block it came from 00362 TO_ROW *row, //row to operate on 00363 inT32 maxwidth, //max expected space size 00364 FCOORD rotation, //for drawing 00365 BOOL8 testing_on //for debug 00366 ) { 00367 BOOL8 testing_row; //contains testpt 00368 BOOL8 prev_valid; //if decent size 00369 BOOL8 this_valid; //current blob big enough 00370 inT32 prev_x; //end of prev blob 00371 inT32 min_width; //min interesting width 00372 inT32 valid_count; //good gaps 00373 inT32 total_count; //total gaps 00374 inT32 cluster_count; //no of clusters 00375 inT32 prev_count; //previous cluster_count 00376 inT32 gap_index; //which cluster 00377 inT32 smooth_factor; //for smoothing stats 00378 BLOBNBOX *blob; //current blob 00379 float lower, upper; //clustering parameters 00380 ICOORD testpt; 00381 TBOX blob_box; //bounding box 00382 //iterator 00383 BLOBNBOX_IT blob_it = row->blob_list (); 00384 STATS gap_stats (0, maxwidth); 00385 //gap sizes 00386 float gaps[BLOCK_STATS_CLUSTERS]; 00387 STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; 00388 //clusters 00389 00390 testpt = ICOORD (textord_test_x, textord_test_y); 00391 smooth_factor = 00392 (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5); 00393 // if (testing_on) 00394 // tprintf("Row smooth factor=%d\n",smooth_factor); 00395 prev_valid = FALSE; 00396 prev_x = -MAX_INT16; 00397 testing_row = FALSE; 00398 //min blob size 00399 min_width = (inT32) block->pr_space; 00400 total_count = 0; 00401 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 00402 blob = blob_it.data (); 00403 if (!blob->joined_to_prev ()) { 00404 blob_box = blob->bounding_box (); 00405 this_valid = blob_box.width () >= min_width; 00406 this_valid = TRUE; 00407 if (this_valid && prev_valid 00408 && blob_box.left () - prev_x < maxwidth) { 00409 gap_stats.add (blob_box.left () - prev_x, 1); 00410 } 00411 total_count++; //count possibles 00412 prev_x = blob_box.right (); 00413 prev_valid = this_valid; 00414 } 00415 } 00416 valid_count = gap_stats.get_total (); 00417 if (valid_count < total_count * textord_words_minlarge) { 00418 gap_stats.clear (); 00419 prev_x = -MAX_INT16; 00420 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 00421 blob_it.forward ()) { 00422 blob = blob_it.data (); 00423 if (!blob->joined_to_prev ()) { 00424 blob_box = blob->bounding_box (); 00425 if (blob_box.left () - prev_x < maxwidth) { 00426 gap_stats.add (blob_box.left () - prev_x, 1); 00427 } 00428 prev_x = blob_box.right (); 00429 } 00430 } 00431 } 00432 if (gap_stats.get_total () == 0) { 00433 row->min_space = 0; //no evidence 00434 row->max_nonspace = 0; 00435 return 0; 00436 } 00437 00438 cluster_count = 0; 00439 lower = block->xheight * words_initial_lower; 00440 upper = block->xheight * words_initial_upper; 00441 gap_stats.smooth (smooth_factor); 00442 do { 00443 prev_count = cluster_count; 00444 cluster_count = gap_stats.cluster (lower, upper, 00445 textord_spacesize_ratioprop, 00446 BLOCK_STATS_CLUSTERS, cluster_stats); 00447 } 00448 while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); 00449 if (cluster_count < 1) { 00450 row->min_space = 0; 00451 row->max_nonspace = 0; 00452 return 0; 00453 } 00454 for (gap_index = 0; gap_index < cluster_count; gap_index++) 00455 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); 00456 //get medians 00457 if (testing_on) { 00458 tprintf ("cluster_count=%d:", cluster_count); 00459 for (gap_index = 0; gap_index < cluster_count; gap_index++) 00460 tprintf (" %g(%d)", gaps[gap_index], 00461 cluster_stats[gap_index + 1].get_total ()); 00462 tprintf ("\n"); 00463 } 00464 00465 //Try to find proportional non-space and space for row. 00466 for (gap_index = 0; gap_index < cluster_count 00467 && gaps[gap_index] > block->max_nonspace; gap_index++); 00468 if (gap_index < cluster_count) 00469 lower = gaps[gap_index]; //most frequent below 00470 else { 00471 if (testing_on) 00472 tprintf ("No cluster below block threshold!, using default=%g\n", 00473 block->pr_nonsp); 00474 lower = block->pr_nonsp; 00475 } 00476 for (gap_index = 0; gap_index < cluster_count 00477 && gaps[gap_index] <= block->max_nonspace; gap_index++); 00478 if (gap_index < cluster_count) 00479 upper = gaps[gap_index]; //most frequent above 00480 else { 00481 if (testing_on) 00482 tprintf ("No cluster above block threshold!, using default=%g\n", 00483 block->pr_space); 00484 upper = block->pr_space; 00485 } 00486 row->min_space = 00487 (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread); 00488 row->max_nonspace = 00489 (inT32) floor (lower + (upper - lower) * textord_words_definite_spread); 00490 row->space_threshold = (row->max_nonspace + row->min_space) / 2; 00491 row->space_size = upper; 00492 row->kern_size = lower; 00493 if (testing_on) { 00494 if (testing_row) { 00495 tprintf ("GAP STATS\n"); 00496 gap_stats.print(); 00497 tprintf ("SPACE stats\n"); 00498 cluster_stats[2].print_summary(); 00499 tprintf ("NONSPACE stats\n"); 00500 cluster_stats[1].print_summary(); 00501 } 00502 tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", 00503 row->intercept (), row->min_space, upper, 00504 row->max_nonspace, lower); 00505 } 00506 return 1; 00507 } 00508 00509 00516 void make_real_words( 00517 tesseract::Textord *textord, 00518 TO_BLOCK *block, //block to do 00519 FCOORD rotation //for drawing 00520 ) { 00521 TO_ROW *row; //current row 00522 TO_ROW_IT row_it = block->get_rows (); 00523 ROW *real_row = NULL; //output row 00524 ROW_IT real_row_it = block->block->row_list (); 00525 00526 if (row_it.empty ()) 00527 return; //empty block 00528 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00529 row = row_it.data (); 00530 if (row->blob_list ()->empty () && !row->rep_words.empty ()) { 00531 real_row = make_rep_words (row, block); 00532 } else if (!row->blob_list()->empty()) { 00533 // In a fixed pitch document, some lines may be detected as fixed pitch 00534 // while others don't, and will go through different path. 00535 // For non-space delimited language like CJK, fixed pitch chop always 00536 // leave the entire line as one word. We can force consistent chopping 00537 // with force_make_prop_words flag. 00538 POLY_BLOCK* pb = block->block->poly_block(); 00539 if (textord_chopper_test) { 00540 real_row = textord->make_blob_words (row, rotation); 00541 } else if (textord_force_make_prop_words || 00542 (pb != NULL && !pb->IsText()) || 00543 row->pitch_decision == PITCH_DEF_PROP || 00544 row->pitch_decision == PITCH_CORR_PROP) { 00545 real_row = textord->make_prop_words (row, rotation); 00546 } else if (row->pitch_decision == PITCH_DEF_FIXED || 00547 row->pitch_decision == PITCH_CORR_FIXED) { 00548 real_row = fixed_pitch_words (row, rotation); 00549 } else { 00550 ASSERT_HOST(FALSE); 00551 } 00552 } 00553 if (real_row != NULL) { 00554 //put row in block 00555 real_row_it.add_after_then_move (real_row); 00556 } 00557 } 00558 block->block->set_stats (block->fixed_pitch == 0, (inT16) block->kern_size, 00559 (inT16) block->space_size, 00560 (inT16) block->fixed_pitch); 00561 block->block->check_pitch (); 00562 } 00563 00564 00572 ROW *make_rep_words( //make a row 00573 TO_ROW *row, //row to convert 00574 TO_BLOCK *block //block it lives in 00575 ) { 00576 inT32 xstarts[2]; //ends of row 00577 ROW *real_row; //output row 00578 TBOX word_box; //bounding box 00579 double coeffs[3]; //spline 00580 //iterator 00581 WERD_IT word_it = &row->rep_words; 00582 00583 if (word_it.empty ()) 00584 return NULL; 00585 word_box = word_it.data ()->bounding_box (); 00586 for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) 00587 word_box += word_it.data ()->bounding_box (); 00588 xstarts[0] = word_box.left (); 00589 xstarts[1] = word_box.right (); 00590 coeffs[0] = 0; 00591 coeffs[1] = row->line_m (); 00592 coeffs[2] = row->line_c (); 00593 row->xheight = block->xheight; 00594 real_row = new ROW(row, 00595 (inT16) block->kern_size, (inT16) block->space_size); 00596 word_it.set_to_list (real_row->word_list ()); 00597 //put words in row 00598 word_it.add_list_after (&row->rep_words); 00599 real_row->recalc_bounding_box (); 00600 return real_row; 00601 } 00602 00603 00611 WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator 00612 inT32 blobcount, //no of blobs to use 00613 BOOL8 bol, //start of line 00614 uinT8 blanks //no of blanks 00615 ) { 00616 C_OUTLINE_IT cout_it; 00617 C_BLOB_LIST cblobs; 00618 C_BLOB_IT cblob_it = &cblobs; 00619 WERD *word; // new word 00620 BLOBNBOX *bblob; // current blob 00621 inT32 blobindex; // in row 00622 00623 for (blobindex = 0; blobindex < blobcount; blobindex++) { 00624 bblob = box_it->extract(); 00625 if (bblob->joined_to_prev()) { 00626 if (bblob->cblob() != NULL) { 00627 cout_it.set_to_list(cblob_it.data()->out_list()); 00628 cout_it.move_to_last(); 00629 cout_it.add_list_after(bblob->cblob()->out_list()); 00630 delete bblob->cblob(); 00631 } 00632 } 00633 else { 00634 if (bblob->cblob() != NULL) 00635 cblob_it.add_after_then_move(bblob->cblob()); 00636 } 00637 delete bblob; 00638 box_it->forward(); // next one 00639 } 00640 00641 if (blanks < 1) 00642 blanks = 1; 00643 00644 word = new WERD(&cblobs, blanks, NULL); 00645 00646 if (bol) 00647 word->set_flag(W_BOL, TRUE); 00648 if (box_it->at_first()) 00649 word->set_flag(W_EOL, TRUE); // at end of line 00650 00651 return word; 00652 }