Tesseract  3.02
tesseract-ocr/textord/underlin.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        underlin.cpp  (Formerly undrline.c)
00003  * Description: Code to chop blobs apart from underlines.
00004  * Author:              Ray Smith
00005  * Created:             Mon Aug  8 11:14:00 BST 1994
00006  *
00007  * (C) Copyright 1994, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "mfcpch.h"
00021 #ifdef __UNIX__
00022 #include          <assert.h>
00023 #endif
00024 #include          "underlin.h"
00025 
00026 #define PROJECTION_MARGIN 10     //arbitrary
00027 #define EXTERN
00028 
00029 EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
00030 EXTERN BOOL_VAR (textord_restore_underlines, TRUE,
00031 "Chop underlines & put back");
00032 
00033 /**********************************************************************
00034  * restore_underlined_blobs
00035  *
00036  * Find underlined blobs and put them back in the row.
00037  **********************************************************************/
00038 
00039 void restore_underlined_blobs(                 //get chop points
00040                               TO_BLOCK *block  //block to do
00041                              ) {
00042   inT16 chop_coord;              //chop boundary
00043   TBOX blob_box;                  //of underline
00044   BLOBNBOX *u_line;              //underline bit
00045   TO_ROW *row;                   //best row for blob
00046   ICOORDELT_LIST chop_cells;     //blobs to cut out
00047                                  //real underlines
00048   BLOBNBOX_LIST residual_underlines;
00049   C_OUTLINE_LIST left_coutlines;
00050   C_OUTLINE_LIST right_coutlines;
00051   ICOORDELT_IT cell_it = &chop_cells;
00052                                  //under lines
00053   BLOBNBOX_IT under_it = &block->underlines;
00054   BLOBNBOX_IT ru_it = &residual_underlines;
00055 
00056   if (block->get_rows()->empty())
00057     return;  // Don't crash if there are no rows.
00058   for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
00059   under_it.forward ()) {
00060     u_line = under_it.extract ();
00061     blob_box = u_line->bounding_box ();
00062     row = most_overlapping_row (block->get_rows (), u_line);
00063     find_underlined_blobs (u_line, &row->baseline, row->xheight,
00064       row->xheight * textord_underline_offset,
00065       &chop_cells);
00066     cell_it.set_to_list (&chop_cells);
00067     for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
00068     cell_it.forward ()) {
00069       chop_coord = cell_it.data ()->x ();
00070       if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
00071         split_to_blob (u_line, chop_coord,
00072           textord_fp_chop_error + 0.5,
00073           &left_coutlines,
00074           &right_coutlines);
00075         if (!left_coutlines.empty()) {
00076           ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
00077         }
00078         chop_coord = cell_it.data ()->y ();
00079         split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5,
00080                       &left_coutlines, &right_coutlines);
00081         if (!left_coutlines.empty()) {
00082           row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
00083         } else {
00084           fprintf(stderr,
00085             "Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n",
00086             cell_it.data ()->x (), cell_it.data ()->y (),
00087             blob_box.left (), blob_box.bottom (),
00088             blob_box.right (), blob_box.top ());
00089           ASSERT_HOST(FALSE);
00090         }
00091         u_line = NULL;           //no more blobs to add
00092       }
00093       delete cell_it.extract();
00094     }
00095     if (!right_coutlines.empty ()) {
00096       split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5,
00097                     &left_coutlines, &right_coutlines);
00098       if (!left_coutlines.empty())
00099         ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
00100     }
00101     if (u_line != NULL) {
00102       if (u_line->cblob() != NULL)
00103         delete u_line->cblob();
00104       delete u_line;
00105     }
00106   }
00107   if (!ru_it.empty()) {
00108     ru_it.move_to_first();
00109     for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
00110       under_it.add_after_then_move(ru_it.extract());
00111     }
00112   }
00113 }
00114 
00115 
00116 /**********************************************************************
00117  * most_overlapping_row
00118  *
00119  * Return the row which most overlaps the blob.
00120  **********************************************************************/
00121 
00122 TO_ROW *most_overlapping_row(                    //find best row
00123                              TO_ROW_LIST *rows,  //list of rows
00124                              BLOBNBOX *blob      //blob to place
00125                             ) {
00126   inT16 x = (blob->bounding_box ().left ()
00127     + blob->bounding_box ().right ()) / 2;
00128   TO_ROW_IT row_it = rows;       //row iterator
00129   TO_ROW *row;                   //current row
00130   TO_ROW *best_row;              //output row
00131   float overlap;                 //of blob & row
00132   float bestover;                //best overlap
00133 
00134   best_row = NULL;
00135   bestover = (float) -MAX_INT32;
00136   if (row_it.empty ())
00137     return NULL;
00138   row = row_it.data ();
00139   row_it.mark_cycle_pt ();
00140   while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
00141   && !row_it.cycled_list ()) {
00142     best_row = row;
00143     bestover =
00144       blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
00145     row_it.forward ();
00146     row = row_it.data ();
00147   }
00148   while (row->baseline.y (x) + row->xheight + row->ascrise
00149   >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
00150     overlap = row->baseline.y (x) + row->xheight + row->ascrise;
00151     if (blob->bounding_box ().top () < overlap)
00152       overlap = blob->bounding_box ().top ();
00153     if (blob->bounding_box ().bottom () >
00154       row->baseline.y (x) + row->descdrop)
00155       overlap -= blob->bounding_box ().bottom ();
00156     else
00157       overlap -= row->baseline.y (x) + row->descdrop;
00158     if (overlap > bestover) {
00159       bestover = overlap;
00160       best_row = row;
00161     }
00162     row_it.forward ();
00163     row = row_it.data ();
00164   }
00165   if (bestover < 0
00166     && row->baseline.y (x) + row->xheight + row->ascrise
00167     - blob->bounding_box ().bottom () > bestover)
00168     best_row = row;
00169   return best_row;
00170 }
00171 
00172 
00173 /**********************************************************************
00174  * find_underlined_blobs
00175  *
00176  * Find the start and end coords of blobs in the underline.
00177  **********************************************************************/
00178 
00179 void find_underlined_blobs(                            //get chop points
00180                            BLOBNBOX *u_line,           //underlined unit
00181                            QSPLINE *baseline,          //actual baseline
00182                            float xheight,              //height of line
00183                            float baseline_offset,      //amount to shrinke it
00184                            ICOORDELT_LIST *chop_cells  //places to chop
00185                           ) {
00186   inT16 x, y;                    //sides of blob
00187   ICOORD blob_chop;              //sides of blob
00188   TBOX blob_box = u_line->bounding_box ();
00189                                  //cell iterator
00190   ICOORDELT_IT cell_it = chop_cells;
00191   STATS upper_proj (blob_box.left (), blob_box.right () + 1);
00192   STATS middle_proj (blob_box.left (), blob_box.right () + 1);
00193   STATS lower_proj (blob_box.left (), blob_box.right () + 1);
00194   C_OUTLINE_IT out_it;           //outlines of blob
00195 
00196   ASSERT_HOST (u_line->cblob () != NULL);
00197 
00198   out_it.set_to_list (u_line->cblob ()->out_list ());
00199   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00200     vertical_cunderline_projection (out_it.data (),
00201       baseline, xheight, baseline_offset,
00202       &lower_proj, &middle_proj, &upper_proj);
00203   }
00204 
00205   for (x = blob_box.left (); x < blob_box.right (); x++) {
00206     if (middle_proj.pile_count (x) > 0) {
00207       for (y = x + 1;
00208         y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
00209       blob_chop = ICOORD (x, y);
00210       cell_it.add_after_then_move (new ICOORDELT (blob_chop));
00211       x = y;
00212     }
00213   }
00214 }
00215 
00216 
00217 /**********************************************************************
00218  * vertical_cunderline_projection
00219  *
00220  * Compute the vertical projection of a outline from its outlines
00221  * and add to the given STATS.
00222  **********************************************************************/
00223 
00224 void vertical_cunderline_projection(                        //project outlines
00225                                     C_OUTLINE *outline,     //outline to project
00226                                     QSPLINE *baseline,      //actual baseline
00227                                     float xheight,          //height of line
00228                                     float baseline_offset,  //amount to shrinke it
00229                                     STATS *lower_proj,      //below baseline
00230                                     STATS *middle_proj,     //centre region
00231                                     STATS *upper_proj       //top region
00232                                    ) {
00233   ICOORD pos;                    //current point
00234   ICOORD step;                   //edge step
00235   inT16 lower_y, upper_y;        //region limits
00236   inT32 length;                  //of outline
00237   inT16 stepindex;               //current step
00238   C_OUTLINE_IT out_it = outline->child ();
00239 
00240   pos = outline->start_pos ();
00241   length = outline->pathlength ();
00242   for (stepindex = 0; stepindex < length; stepindex++) {
00243     step = outline->step (stepindex);
00244     if (step.x () > 0) {
00245       lower_y =
00246         (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
00247       upper_y =
00248         (inT16) floor (baseline->y (pos.x ()) + baseline_offset +
00249         xheight + 0.5);
00250       if (pos.y () >= lower_y) {
00251         lower_proj->add (pos.x (), -lower_y);
00252         if (pos.y () >= upper_y) {
00253           middle_proj->add (pos.x (), lower_y - upper_y);
00254           upper_proj->add (pos.x (), upper_y - pos.y ());
00255         }
00256         else
00257           middle_proj->add (pos.x (), lower_y - pos.y ());
00258       }
00259       else
00260         lower_proj->add (pos.x (), -pos.y ());
00261     }
00262     else if (step.x () < 0) {
00263       lower_y =
00264         (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
00265         0.5);
00266       upper_y =
00267         (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
00268         xheight + 0.5);
00269       if (pos.y () >= lower_y) {
00270         lower_proj->add (pos.x () - 1, lower_y);
00271         if (pos.y () >= upper_y) {
00272           middle_proj->add (pos.x () - 1, upper_y - lower_y);
00273           upper_proj->add (pos.x () - 1, pos.y () - upper_y);
00274         }
00275         else
00276           middle_proj->add (pos.x () - 1, pos.y () - lower_y);
00277       }
00278       else
00279         lower_proj->add (pos.x () - 1, pos.y ());
00280     }
00281     pos += step;
00282   }
00283 
00284   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00285     vertical_cunderline_projection (out_it.data (),
00286       baseline, xheight, baseline_offset,
00287       lower_proj, middle_proj, upper_proj);
00288   }
00289 }