Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: underlin.cpp (Formerly undrline.c) 00003 * Description: Code to chop blobs apart from underlines. 00004 * Author: Ray Smith 00005 * Created: Mon Aug 8 11:14:00 BST 1994 00006 * 00007 * (C) Copyright 1994, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "mfcpch.h" 00021 #ifdef __UNIX__ 00022 #include <assert.h> 00023 #endif 00024 #include "underlin.h" 00025 00026 #define PROJECTION_MARGIN 10 //arbitrary 00027 #define EXTERN 00028 00029 EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore"); 00030 EXTERN BOOL_VAR (textord_restore_underlines, TRUE, 00031 "Chop underlines & put back"); 00032 00033 /********************************************************************** 00034 * restore_underlined_blobs 00035 * 00036 * Find underlined blobs and put them back in the row. 00037 **********************************************************************/ 00038 00039 void restore_underlined_blobs( //get chop points 00040 TO_BLOCK *block //block to do 00041 ) { 00042 inT16 chop_coord; //chop boundary 00043 TBOX blob_box; //of underline 00044 BLOBNBOX *u_line; //underline bit 00045 TO_ROW *row; //best row for blob 00046 ICOORDELT_LIST chop_cells; //blobs to cut out 00047 //real underlines 00048 BLOBNBOX_LIST residual_underlines; 00049 C_OUTLINE_LIST left_coutlines; 00050 C_OUTLINE_LIST right_coutlines; 00051 ICOORDELT_IT cell_it = &chop_cells; 00052 //under lines 00053 BLOBNBOX_IT under_it = &block->underlines; 00054 BLOBNBOX_IT ru_it = &residual_underlines; 00055 00056 if (block->get_rows()->empty()) 00057 return; // Don't crash if there are no rows. 00058 for (under_it.mark_cycle_pt (); !under_it.cycled_list (); 00059 under_it.forward ()) { 00060 u_line = under_it.extract (); 00061 blob_box = u_line->bounding_box (); 00062 row = most_overlapping_row (block->get_rows (), u_line); 00063 find_underlined_blobs (u_line, &row->baseline, row->xheight, 00064 row->xheight * textord_underline_offset, 00065 &chop_cells); 00066 cell_it.set_to_list (&chop_cells); 00067 for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); 00068 cell_it.forward ()) { 00069 chop_coord = cell_it.data ()->x (); 00070 if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) { 00071 split_to_blob (u_line, chop_coord, 00072 textord_fp_chop_error + 0.5, 00073 &left_coutlines, 00074 &right_coutlines); 00075 if (!left_coutlines.empty()) { 00076 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); 00077 } 00078 chop_coord = cell_it.data ()->y (); 00079 split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5, 00080 &left_coutlines, &right_coutlines); 00081 if (!left_coutlines.empty()) { 00082 row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines))); 00083 } else { 00084 fprintf(stderr, 00085 "Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n", 00086 cell_it.data ()->x (), cell_it.data ()->y (), 00087 blob_box.left (), blob_box.bottom (), 00088 blob_box.right (), blob_box.top ()); 00089 ASSERT_HOST(FALSE); 00090 } 00091 u_line = NULL; //no more blobs to add 00092 } 00093 delete cell_it.extract(); 00094 } 00095 if (!right_coutlines.empty ()) { 00096 split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5, 00097 &left_coutlines, &right_coutlines); 00098 if (!left_coutlines.empty()) 00099 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); 00100 } 00101 if (u_line != NULL) { 00102 if (u_line->cblob() != NULL) 00103 delete u_line->cblob(); 00104 delete u_line; 00105 } 00106 } 00107 if (!ru_it.empty()) { 00108 ru_it.move_to_first(); 00109 for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) { 00110 under_it.add_after_then_move(ru_it.extract()); 00111 } 00112 } 00113 } 00114 00115 00116 /********************************************************************** 00117 * most_overlapping_row 00118 * 00119 * Return the row which most overlaps the blob. 00120 **********************************************************************/ 00121 00122 TO_ROW *most_overlapping_row( //find best row 00123 TO_ROW_LIST *rows, //list of rows 00124 BLOBNBOX *blob //blob to place 00125 ) { 00126 inT16 x = (blob->bounding_box ().left () 00127 + blob->bounding_box ().right ()) / 2; 00128 TO_ROW_IT row_it = rows; //row iterator 00129 TO_ROW *row; //current row 00130 TO_ROW *best_row; //output row 00131 float overlap; //of blob & row 00132 float bestover; //best overlap 00133 00134 best_row = NULL; 00135 bestover = (float) -MAX_INT32; 00136 if (row_it.empty ()) 00137 return NULL; 00138 row = row_it.data (); 00139 row_it.mark_cycle_pt (); 00140 while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top () 00141 && !row_it.cycled_list ()) { 00142 best_row = row; 00143 bestover = 00144 blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop; 00145 row_it.forward (); 00146 row = row_it.data (); 00147 } 00148 while (row->baseline.y (x) + row->xheight + row->ascrise 00149 >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) { 00150 overlap = row->baseline.y (x) + row->xheight + row->ascrise; 00151 if (blob->bounding_box ().top () < overlap) 00152 overlap = blob->bounding_box ().top (); 00153 if (blob->bounding_box ().bottom () > 00154 row->baseline.y (x) + row->descdrop) 00155 overlap -= blob->bounding_box ().bottom (); 00156 else 00157 overlap -= row->baseline.y (x) + row->descdrop; 00158 if (overlap > bestover) { 00159 bestover = overlap; 00160 best_row = row; 00161 } 00162 row_it.forward (); 00163 row = row_it.data (); 00164 } 00165 if (bestover < 0 00166 && row->baseline.y (x) + row->xheight + row->ascrise 00167 - blob->bounding_box ().bottom () > bestover) 00168 best_row = row; 00169 return best_row; 00170 } 00171 00172 00173 /********************************************************************** 00174 * find_underlined_blobs 00175 * 00176 * Find the start and end coords of blobs in the underline. 00177 **********************************************************************/ 00178 00179 void find_underlined_blobs( //get chop points 00180 BLOBNBOX *u_line, //underlined unit 00181 QSPLINE *baseline, //actual baseline 00182 float xheight, //height of line 00183 float baseline_offset, //amount to shrinke it 00184 ICOORDELT_LIST *chop_cells //places to chop 00185 ) { 00186 inT16 x, y; //sides of blob 00187 ICOORD blob_chop; //sides of blob 00188 TBOX blob_box = u_line->bounding_box (); 00189 //cell iterator 00190 ICOORDELT_IT cell_it = chop_cells; 00191 STATS upper_proj (blob_box.left (), blob_box.right () + 1); 00192 STATS middle_proj (blob_box.left (), blob_box.right () + 1); 00193 STATS lower_proj (blob_box.left (), blob_box.right () + 1); 00194 C_OUTLINE_IT out_it; //outlines of blob 00195 00196 ASSERT_HOST (u_line->cblob () != NULL); 00197 00198 out_it.set_to_list (u_line->cblob ()->out_list ()); 00199 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00200 vertical_cunderline_projection (out_it.data (), 00201 baseline, xheight, baseline_offset, 00202 &lower_proj, &middle_proj, &upper_proj); 00203 } 00204 00205 for (x = blob_box.left (); x < blob_box.right (); x++) { 00206 if (middle_proj.pile_count (x) > 0) { 00207 for (y = x + 1; 00208 y < blob_box.right () && middle_proj.pile_count (y) > 0; y++); 00209 blob_chop = ICOORD (x, y); 00210 cell_it.add_after_then_move (new ICOORDELT (blob_chop)); 00211 x = y; 00212 } 00213 } 00214 } 00215 00216 00217 /********************************************************************** 00218 * vertical_cunderline_projection 00219 * 00220 * Compute the vertical projection of a outline from its outlines 00221 * and add to the given STATS. 00222 **********************************************************************/ 00223 00224 void vertical_cunderline_projection( //project outlines 00225 C_OUTLINE *outline, //outline to project 00226 QSPLINE *baseline, //actual baseline 00227 float xheight, //height of line 00228 float baseline_offset, //amount to shrinke it 00229 STATS *lower_proj, //below baseline 00230 STATS *middle_proj, //centre region 00231 STATS *upper_proj //top region 00232 ) { 00233 ICOORD pos; //current point 00234 ICOORD step; //edge step 00235 inT16 lower_y, upper_y; //region limits 00236 inT32 length; //of outline 00237 inT16 stepindex; //current step 00238 C_OUTLINE_IT out_it = outline->child (); 00239 00240 pos = outline->start_pos (); 00241 length = outline->pathlength (); 00242 for (stepindex = 0; stepindex < length; stepindex++) { 00243 step = outline->step (stepindex); 00244 if (step.x () > 0) { 00245 lower_y = 00246 (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5); 00247 upper_y = 00248 (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 00249 xheight + 0.5); 00250 if (pos.y () >= lower_y) { 00251 lower_proj->add (pos.x (), -lower_y); 00252 if (pos.y () >= upper_y) { 00253 middle_proj->add (pos.x (), lower_y - upper_y); 00254 upper_proj->add (pos.x (), upper_y - pos.y ()); 00255 } 00256 else 00257 middle_proj->add (pos.x (), lower_y - pos.y ()); 00258 } 00259 else 00260 lower_proj->add (pos.x (), -pos.y ()); 00261 } 00262 else if (step.x () < 0) { 00263 lower_y = 00264 (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset + 00265 0.5); 00266 upper_y = 00267 (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset + 00268 xheight + 0.5); 00269 if (pos.y () >= lower_y) { 00270 lower_proj->add (pos.x () - 1, lower_y); 00271 if (pos.y () >= upper_y) { 00272 middle_proj->add (pos.x () - 1, upper_y - lower_y); 00273 upper_proj->add (pos.x () - 1, pos.y () - upper_y); 00274 } 00275 else 00276 middle_proj->add (pos.x () - 1, pos.y () - lower_y); 00277 } 00278 else 00279 lower_proj->add (pos.x () - 1, pos.y ()); 00280 } 00281 pos += step; 00282 } 00283 00284 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00285 vertical_cunderline_projection (out_it.data (), 00286 baseline, xheight, baseline_offset, 00287 lower_proj, middle_proj, upper_proj); 00288 } 00289 }