Tesseract  3.02
tesseract-ocr/ccstruct/blobbox.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        blobbox.cpp  (Formerly blobnbox.c)
00003  * Description: Code for the textord blob class.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Thu Jul 30 09:08:51 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "mfcpch.h"
00021 #include "blobbox.h"
00022 #include "helpers.h"
00023 
00024 #define PROJECTION_MARGIN 10     //arbitrary
00025 #define EXTERN
00026 
00027 ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)
00028 
00029 // Upto 30 degrees is allowed for rotations of diacritic blobs.
00030 const double kCosSmallAngle = 0.866;
00031 // Min aspect ratio for a joined word to indicate an obvious flow direction.
00032 const double kDefiniteAspectRatio = 2.0;
00033 // Multiple of short length in perimeter to make a joined word.
00034 const double kComplexShapePerimeterRatio = 1.5;
00035 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
00036 const double kMinMediumSizeRatio = 0.25;
00037 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
00038 const double kMaxMediumSizeRatio = 4.0;
00039 
00040 // Rotates the box and the underlying blob.
00041 void BLOBNBOX::rotate(FCOORD rotation) {
00042   cblob_ptr->rotate(rotation);
00043   rotate_box(rotation);
00044   compute_bounding_box();
00045 }
00046 
00047 // Reflect the box in the y-axis, leaving the underlying blob untouched.
00048 void BLOBNBOX::reflect_box_in_y_axis() {
00049   int left = -box.right();
00050   box.set_right(-box.left());
00051   box.set_left(left);
00052 }
00053 
00054 // Rotates the box by the angle given by rotation.
00055 // If the blob is a diacritic, then only small rotations for skew
00056 // correction can be applied.
00057 void BLOBNBOX::rotate_box(FCOORD rotation) {
00058   if (IsDiacritic()) {
00059     ASSERT_HOST(rotation.x() >= kCosSmallAngle)
00060     ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_);
00061     ICOORD bottom_pt(top_pt.x(), base_char_bottom_);
00062     top_pt.rotate(rotation);
00063     base_char_top_ = top_pt.y();
00064     bottom_pt.rotate(rotation);
00065     base_char_bottom_ = bottom_pt.y();
00066     box.rotate(rotation);
00067   } else {
00068     box.rotate(rotation);
00069     set_diacritic_box(box);
00070   }
00071 }
00072 
00073 /**********************************************************************
00074  * BLOBNBOX::merge
00075  *
00076  * Merge this blob with the given blob, which should be after this.
00077  **********************************************************************/
00078 void BLOBNBOX::merge(                    //merge blobs
00079                      BLOBNBOX *nextblob  //blob to join with
00080                     ) {
00081   box += nextblob->box;          //merge boxes
00082   set_diacritic_box(box);
00083   nextblob->joined = TRUE;
00084 }
00085 
00086 
00087 // Merge this with other, taking the outlines from other.
00088 // Other is not deleted, but left for the caller to handle.
00089 void BLOBNBOX::really_merge(BLOBNBOX* other) {
00090   if (cblob_ptr != NULL && other->cblob_ptr != NULL) {
00091     C_OUTLINE_IT ol_it(cblob_ptr->out_list());
00092     ol_it.add_list_after(other->cblob_ptr->out_list());
00093   }
00094   compute_bounding_box();
00095 }
00096 
00097 
00098 /**********************************************************************
00099  * BLOBNBOX::chop
00100  *
00101  * Chop this blob into equal sized pieces using the x height as a guide.
00102  * The blob is not actually chopped. Instead, fake blobs are inserted
00103  * with the relevant bounding boxes.
00104  **********************************************************************/
00105 
00106 void BLOBNBOX::chop(                        //chop blobs
00107                     BLOBNBOX_IT *start_it,  //location of this
00108                     BLOBNBOX_IT *end_it,    //iterator
00109                     FCOORD rotation,        //for landscape
00110                     float xheight           //of line
00111                    ) {
00112   inT16 blobcount;               //no of blobs
00113   BLOBNBOX *newblob;             //fake blob
00114   BLOBNBOX *blob;                //current blob
00115   inT16 blobindex;               //number of chop
00116   inT16 leftx;                   //left edge of blob
00117   float blobwidth;               //width of each
00118   float rightx;                  //right edge to scan
00119   float ymin, ymax;              //limits of new blob
00120   float test_ymin, test_ymax;    //limits of part blob
00121   ICOORD bl, tr;                 //corners of box
00122   BLOBNBOX_IT blob_it;           //blob iterator
00123 
00124                                  //get no of chops
00125   blobcount = (inT16) floor (box.width () / xheight);
00126   if (blobcount > 1 && cblob_ptr != NULL) {
00127                                  //width of each
00128     blobwidth = (float) (box.width () + 1) / blobcount;
00129     for (blobindex = blobcount - 1, rightx = box.right ();
00130     blobindex >= 0; blobindex--, rightx -= blobwidth) {
00131       ymin = (float) MAX_INT32;
00132       ymax = (float) -MAX_INT32;
00133       blob_it = *start_it;
00134       do {
00135         blob = blob_it.data ();
00136         find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth,
00137                            rightx,
00138             /*rotation, */ test_ymin, test_ymax);
00139         blob_it.forward ();
00140         UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
00141       }
00142       while (blob != end_it->data ());
00143       if (ymin < ymax) {
00144         leftx = (inT16) floor (rightx - blobwidth);
00145         if (leftx < box.left ())
00146           leftx = box.left ();   //clip to real box
00147         bl = ICOORD (leftx, (inT16) floor (ymin));
00148         tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax));
00149         if (blobindex == 0)
00150           box = TBOX (bl, tr);    //change box
00151         else {
00152           newblob = new BLOBNBOX;
00153                                  //box is all it has
00154           newblob->box = TBOX (bl, tr);
00155                                  //stay on current
00156           newblob->base_char_top_ = tr.y();
00157           newblob->base_char_bottom_ = bl.y();
00158           end_it->add_after_stay_put (newblob);
00159         }
00160       }
00161     }
00162   }
00163 }
00164 
00165 // Returns the box gaps between this and its neighbours_ in an array
00166 // indexed by BlobNeighbourDir.
00167 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
00168   for (int dir = 0; dir < BND_COUNT; ++dir) {
00169     gaps[dir] = MAX_INT16;
00170     BLOBNBOX* neighbour = neighbours_[dir];
00171     if (neighbour != NULL) {
00172       TBOX n_box = neighbour->bounding_box();
00173       if (dir == BND_LEFT || dir == BND_RIGHT) {
00174         gaps[dir] = box.x_gap(n_box);
00175       } else {
00176         gaps[dir] = box.y_gap(n_box);
00177       }
00178     }
00179   }
00180 }
00181 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps)
00182 // modified so that if the max exceeds the max dimension of the blob, and
00183 // the min is less, the max is replaced with the min.
00184 // The objective is to catch cases where there is only a single neighbour
00185 // and avoid reporting the other gap as a ridiculously large number
00186 void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max,
00187                                  int* v_min, int* v_max) const {
00188   int max_dimension = MAX(box.width(), box.height());
00189   int gaps[BND_COUNT];
00190   NeighbourGaps(gaps);
00191   *h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]);
00192   *h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]);
00193   if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min;
00194   *v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]);
00195   *v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]);
00196   if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min;
00197 }
00198 
00199 // NULLs out any neighbours that are DeletableNoise to remove references.
00200 void BLOBNBOX::CleanNeighbours() {
00201   for (int dir = 0; dir < BND_COUNT; ++dir) {
00202     BLOBNBOX* neighbour = neighbours_[dir];
00203     if (neighbour != NULL && neighbour->DeletableNoise()) {
00204       neighbours_[dir] = NULL;
00205       good_stroke_neighbours_[dir] = false;
00206     }
00207   }
00208 }
00209 
00210 // Returns positive if there is at least one side neighbour that has a similar
00211 // stroke width and is not on the other side of a rule line.
00212 int BLOBNBOX::GoodTextBlob() const {
00213   int score = 0;
00214   for (int dir = 0; dir < BND_COUNT; ++dir) {
00215     BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
00216     if (good_stroke_neighbour(bnd))
00217       ++score;
00218   }
00219   return score;
00220 }
00221 
00222 // Returns the number of side neighbours that are of type BRT_NOISE.
00223 int BLOBNBOX::NoisyNeighbours() const {
00224   int count = 0;
00225   for (int dir = 0; dir < BND_COUNT; ++dir) {
00226     BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
00227     BLOBNBOX* blob = neighbour(bnd);
00228     if (blob != NULL && blob->region_type() == BRT_NOISE)
00229       ++count;
00230   }
00231   return count;
00232 }
00233 
00234 // Returns true, and sets vert_possible/horz_possible if the blob has some
00235 // feature that makes it individually appear to flow one way.
00236 // eg if it has a high aspect ratio, yet has a complex shape, such as a
00237 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
00238 bool BLOBNBOX::DefiniteIndividualFlow() {
00239   int box_perimeter = 2 * (box.height() + box.width());
00240   if (box.width() > box.height() * kDefiniteAspectRatio) {
00241     // Attempt to distinguish a wide joined word from a dash.
00242     // If it is a dash, then its perimeter is approximately
00243     // 2 * (box width + stroke width), but more if the outline is noisy,
00244     // so perimeter - 2*(box width + stroke width) should be close to zero.
00245     // A complex shape such as a joined word should have a much larger value.
00246     int perimeter = cblob()->perimeter();
00247     if (vert_stroke_width() > 0)
00248       perimeter -= 2 * vert_stroke_width();
00249     else
00250       perimeter -= 4 * cblob()->area() / perimeter;
00251     perimeter -= 2 * box.width();
00252     // Use a multiple of the box perimeter as a threshold.
00253     if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
00254       set_vert_possible(false);
00255       set_horz_possible(true);
00256       return true;
00257     }
00258   }
00259   if (box.height() > box.width() * kDefiniteAspectRatio) {
00260     // As above, but for a putative vertical word vs a I/1/l.
00261     int perimeter = cblob()->perimeter();
00262     if (horz_stroke_width() > 0)
00263       perimeter -= 2 * horz_stroke_width();
00264     else
00265       perimeter -= 4 * cblob()->area() / perimeter;
00266     perimeter -= 2 * box.height();
00267     if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
00268       set_vert_possible(true);
00269       set_horz_possible(false);
00270       return true;
00271     }
00272   }
00273   return false;
00274 }
00275 
00276 // Returns true if there is no tabstop violation in merging this and other.
00277 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const {
00278   if (box.left() < other.box.left() && box.left() < other.left_rule_)
00279     return false;
00280   if (other.box.left() < box.left() && other.box.left() < left_rule_)
00281     return false;
00282   if (box.right() > other.box.right() && box.right() > other.right_rule_)
00283     return false;
00284   if (other.box.right() > box.right() && other.box.right() > right_rule_)
00285     return false;
00286   return true;
00287 }
00288 
00289 // Returns true if other has a similar stroke width to this.
00290 bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other,
00291                                    double fractional_tolerance,
00292                                    double constant_tolerance) const {
00293   // The perimeter-based width is used as a backup in case there is
00294   // no information in the blob.
00295   double p_width = area_stroke_width();
00296   double n_p_width = other.area_stroke_width();
00297   float h_tolerance = horz_stroke_width_ * fractional_tolerance
00298                      + constant_tolerance;
00299   float v_tolerance = vert_stroke_width_ * fractional_tolerance
00300                      + constant_tolerance;
00301   double p_tolerance = p_width * fractional_tolerance
00302                      + constant_tolerance;
00303   bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
00304   bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
00305   bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_,
00306                                      other.horz_stroke_width_, h_tolerance);
00307   bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_,
00308                                      other.vert_stroke_width_, v_tolerance);
00309   bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
00310   // For a match, at least one of the horizontal and vertical widths
00311   // must match, and the other one must either match or be zero.
00312   // Only if both are zero will we look at the perimeter metric.
00313   return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
00314 }
00315 
00316 // Returns a bounding box of the outline contained within the
00317 // given horizontal range.
00318 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
00319   FCOORD no_rotation(1.0f, 0.0f);
00320   float top = box.top();
00321   float bottom = box.bottom();
00322   if (cblob_ptr != NULL) {
00323     find_cblob_limits(cblob_ptr, static_cast<float>(left),
00324                       static_cast<float>(right), no_rotation,
00325                       bottom, top);
00326   }
00327 
00328   if (top < bottom) {
00329     top = box.top();
00330     bottom = box.bottom();
00331   }
00332   FCOORD bot_left(left, bottom);
00333   FCOORD top_right(right, top);
00334   TBOX shrunken_box(bot_left);
00335   TBOX shrunken_box2(top_right);
00336   shrunken_box += shrunken_box2;
00337   return shrunken_box;
00338 }
00339 
00340 // Helper to call CleanNeighbours on all blobs on the list.
00341 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) {
00342   BLOBNBOX_IT blob_it(blobs);
00343   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00344     blob_it.data()->CleanNeighbours();
00345   }
00346 }
00347 
00348 // Helper to delete all the deletable blobs on the list.
00349 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) {
00350   BLOBNBOX_IT blob_it(blobs);
00351   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00352     BLOBNBOX* blob = blob_it.data();
00353     if (blob->DeletableNoise()) {
00354       delete blob->cblob();
00355       delete blob_it.extract();
00356     }
00357   }
00358 }
00359 
00360 #ifndef GRAPHICS_DISABLED
00361 // Helper to draw all the blobs on the list in the given body_colour,
00362 // with child outlines in the child_colour.
00363 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list,
00364                          ScrollView::Color body_colour,
00365                          ScrollView::Color child_colour,
00366                          ScrollView* win) {
00367   BLOBNBOX_IT it(list);
00368   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00369     it.data()->plot(win, body_colour, child_colour);
00370   }
00371 }
00372 
00373 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
00374 // given list in the given body_colour, with child outlines in the
00375 // child_colour.
00376 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list,
00377                               ScrollView::Color body_colour,
00378                               ScrollView::Color child_colour,
00379                               ScrollView* win) {
00380   BLOBNBOX_IT it(list);
00381   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00382     BLOBNBOX* blob = it.data();
00383     if (blob->DeletableNoise())
00384       blob->plot(win, body_colour, child_colour);
00385   }
00386 }
00387 
00388 ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type,
00389                                           BlobTextFlowType flow_type) {
00390   switch (region_type) {
00391     case BRT_HLINE:
00392       return ScrollView::BROWN;
00393     case BRT_VLINE:
00394       return ScrollView::DARK_GREEN;
00395     case BRT_RECTIMAGE:
00396       return ScrollView::RED;
00397     case BRT_POLYIMAGE:
00398       return ScrollView::ORANGE;
00399     case BRT_UNKNOWN:
00400       return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
00401     case BRT_VERT_TEXT:
00402       if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE)
00403         return ScrollView::GREEN;
00404       if (flow_type == BTFT_CHAIN)
00405         return ScrollView::LIME_GREEN;
00406       return ScrollView::YELLOW;
00407     case BRT_TEXT:
00408       if (flow_type == BTFT_STRONG_CHAIN)
00409         return ScrollView::BLUE;
00410       if (flow_type == BTFT_TEXT_ON_IMAGE)
00411         return ScrollView::LIGHT_BLUE;
00412       if (flow_type == BTFT_CHAIN)
00413         return ScrollView::MEDIUM_BLUE;
00414       if (flow_type == BTFT_LEADER)
00415         return ScrollView::WHEAT;
00416       if (flow_type == BTFT_NONTEXT)
00417         return ScrollView::PINK;
00418       return ScrollView::MAGENTA;
00419     default:
00420       return ScrollView::GREY;
00421   }
00422 }
00423 
00424 // Keep in sync with BlobRegionType.
00425 ScrollView::Color BLOBNBOX::BoxColor() const {
00426   return TextlineColor(region_type_, flow_);
00427 }
00428 #endif
00429 /**********************************************************************
00430  * find_cblob_limits
00431  *
00432  * Scan the outlines of the cblob to locate the y min and max
00433  * between the given x limits.
00434  **********************************************************************/
00435 
00436 void find_cblob_limits(                  //get y limits
00437                        C_BLOB *blob,     //blob to search
00438                        float leftx,      //x limits
00439                        float rightx,
00440                        FCOORD rotation,  //for landscape
00441                        float &ymin,      //output y limits
00442                        float &ymax) {
00443   inT16 stepindex;               //current point
00444   ICOORD pos;                    //current coords
00445   ICOORD vec;                    //rotated step
00446   C_OUTLINE *outline;            //current outline
00447                                  //outlines
00448   C_OUTLINE_IT out_it = blob->out_list ();
00449 
00450   ymin = (float) MAX_INT32;
00451   ymax = (float) -MAX_INT32;
00452   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00453     outline = out_it.data ();
00454     pos = outline->start_pos (); //get coords
00455     pos.rotate (rotation);
00456     for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
00457                                  //inside
00458       if (pos.x () >= leftx && pos.x () <= rightx) {
00459         UpdateRange(pos.y(), &ymin, &ymax);
00460       }
00461       vec = outline->step (stepindex);
00462       vec.rotate (rotation);
00463       pos += vec;                //move to next
00464     }
00465   }
00466 }
00467 
00468 
00469 /**********************************************************************
00470  * find_cblob_vlimits
00471  *
00472  * Scan the outlines of the cblob to locate the y min and max
00473  * between the given x limits.
00474  **********************************************************************/
00475 
00476 void find_cblob_vlimits(               //get y limits
00477                         C_BLOB *blob,  //blob to search
00478                         float leftx,   //x limits
00479                         float rightx,
00480                         float &ymin,   //output y limits
00481                         float &ymax) {
00482   inT16 stepindex;               //current point
00483   ICOORD pos;                    //current coords
00484   ICOORD vec;                    //rotated step
00485   C_OUTLINE *outline;            //current outline
00486                                  //outlines
00487   C_OUTLINE_IT out_it = blob->out_list ();
00488 
00489   ymin = (float) MAX_INT32;
00490   ymax = (float) -MAX_INT32;
00491   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00492     outline = out_it.data ();
00493     pos = outline->start_pos (); //get coords
00494     for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
00495                                  //inside
00496       if (pos.x () >= leftx && pos.x () <= rightx) {
00497         UpdateRange(pos.y(), &ymin, &ymax);
00498       }
00499       vec = outline->step (stepindex);
00500       pos += vec;                //move to next
00501     }
00502   }
00503 }
00504 
00505 
00506 /**********************************************************************
00507  * find_cblob_hlimits
00508  *
00509  * Scan the outlines of the cblob to locate the x min and max
00510  * between the given y limits.
00511  **********************************************************************/
00512 
00513 void find_cblob_hlimits(                //get x limits
00514                         C_BLOB *blob,   //blob to search
00515                         float bottomy,  //y limits
00516                         float topy,
00517                         float &xmin,    //output x limits
00518                         float &xmax) {
00519   inT16 stepindex;               //current point
00520   ICOORD pos;                    //current coords
00521   ICOORD vec;                    //rotated step
00522   C_OUTLINE *outline;            //current outline
00523                                  //outlines
00524   C_OUTLINE_IT out_it = blob->out_list ();
00525 
00526   xmin = (float) MAX_INT32;
00527   xmax = (float) -MAX_INT32;
00528   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00529     outline = out_it.data ();
00530     pos = outline->start_pos (); //get coords
00531     for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
00532                                  //inside
00533       if (pos.y () >= bottomy && pos.y () <= topy) {
00534         UpdateRange(pos.x(), &xmin, &xmax);
00535       }
00536       vec = outline->step (stepindex);
00537       pos += vec;                //move to next
00538     }
00539   }
00540 }
00541 
00542 /**********************************************************************
00543  * crotate_cblob
00544  *
00545  * Rotate the copy by the given vector and return a C_BLOB.
00546  **********************************************************************/
00547 
00548 C_BLOB *crotate_cblob(                 //rotate it
00549                       C_BLOB *blob,    //blob to search
00550                       FCOORD rotation  //for landscape
00551                      ) {
00552   C_OUTLINE_LIST out_list;       //output outlines
00553                                  //input outlines
00554   C_OUTLINE_IT in_it = blob->out_list ();
00555                                  //output outlines
00556   C_OUTLINE_IT out_it = &out_list;
00557 
00558   for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
00559     out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
00560   }
00561   return new C_BLOB (&out_list);
00562 }
00563 
00564 
00565 /**********************************************************************
00566  * box_next
00567  *
00568  * Compute the bounding box of this blob with merging of x overlaps
00569  * but no pre-chopping.
00570  * Then move the iterator on to the start of the next blob.
00571  **********************************************************************/
00572 
00573 TBOX box_next(                 //get bounding box
00574              BLOBNBOX_IT *it  //iterator to blobds
00575             ) {
00576   BLOBNBOX *blob;                //current blob
00577   TBOX result;                    //total box
00578 
00579   blob = it->data ();
00580   result = blob->bounding_box ();
00581   do {
00582     it->forward ();
00583     blob = it->data ();
00584     if (blob->cblob() == NULL)
00585                                  //was pre-chopped
00586       result += blob->bounding_box ();
00587   }
00588                                  //until next real blob
00589   while ((blob->cblob() == NULL) || blob->joined_to_prev());
00590   return result;
00591 }
00592 
00593 
00594 /**********************************************************************
00595  * box_next_pre_chopped
00596  *
00597  * Compute the bounding box of this blob with merging of x overlaps
00598  * but WITH pre-chopping.
00599  * Then move the iterator on to the start of the next pre-chopped blob.
00600  **********************************************************************/
00601 
00602 TBOX box_next_pre_chopped(                 //get bounding box
00603                          BLOBNBOX_IT *it  //iterator to blobds
00604                         ) {
00605   BLOBNBOX *blob;                //current blob
00606   TBOX result;                    //total box
00607 
00608   blob = it->data ();
00609   result = blob->bounding_box ();
00610   do {
00611     it->forward ();
00612     blob = it->data ();
00613   }
00614                                  //until next real blob
00615   while (blob->joined_to_prev ());
00616   return result;
00617 }
00618 
00619 
00620 /**********************************************************************
00621  * TO_ROW::TO_ROW
00622  *
00623  * Constructor to make a row from a blob.
00624  **********************************************************************/
00625 
00626 TO_ROW::TO_ROW (                 //constructor
00627 BLOBNBOX * blob,                 //first blob
00628 float top,                       //corrected top
00629 float bottom,                    //of row
00630 float row_size                   //ideal
00631 ) {
00632   clear();
00633   y_min = bottom;
00634   y_max = top;
00635   initial_y_min = bottom;
00636 
00637   float diff;                    //in size
00638   BLOBNBOX_IT it = &blobs;       //list of blobs
00639 
00640   it.add_to_end (blob);
00641   diff = top - bottom - row_size;
00642   if (diff > 0) {
00643     y_max -= diff / 2;
00644     y_min += diff / 2;
00645   }
00646                                  //very small object
00647   else if ((top - bottom) * 3 < row_size) {
00648     diff = row_size / 3 + bottom - top;
00649     y_max += diff / 2;
00650     y_min -= diff / 2;
00651   }
00652 }
00653 
00654 
00655 /**********************************************************************
00656  * TO_ROW:add_blob
00657  *
00658  * Add the blob to the end of the row.
00659  **********************************************************************/
00660 
00661 void TO_ROW::add_blob(                 //constructor
00662                       BLOBNBOX *blob,  //first blob
00663                       float top,       //corrected top
00664                       float bottom,    //of row
00665                       float row_size   //ideal
00666                      ) {
00667   float allowed;                 //allowed expansion
00668   float available;               //expansion
00669   BLOBNBOX_IT it = &blobs;       //list of blobs
00670 
00671   it.add_to_end (blob);
00672   allowed = row_size + y_min - y_max;
00673   if (allowed > 0) {
00674     available = top > y_max ? top - y_max : 0;
00675     if (bottom < y_min)
00676                                  //total available
00677         available += y_min - bottom;
00678     if (available > 0) {
00679       available += available;    //do it gradually
00680       if (available < allowed)
00681         available = allowed;
00682       if (bottom < y_min)
00683         y_min -= (y_min - bottom) * allowed / available;
00684       if (top > y_max)
00685         y_max += (top - y_max) * allowed / available;
00686     }
00687   }
00688 }
00689 
00690 
00691 /**********************************************************************
00692  * TO_ROW:insert_blob
00693  *
00694  * Add the blob to the row in the correct position.
00695  **********************************************************************/
00696 
00697 void TO_ROW::insert_blob(                //constructor
00698                          BLOBNBOX *blob  //first blob
00699                         ) {
00700   BLOBNBOX_IT it = &blobs;       //list of blobs
00701 
00702   if (it.empty ())
00703     it.add_before_then_move (blob);
00704   else {
00705     it.mark_cycle_pt ();
00706     while (!it.cycled_list ()
00707       && it.data ()->bounding_box ().left () <=
00708       blob->bounding_box ().left ())
00709       it.forward ();
00710     if (it.cycled_list ())
00711       it.add_to_end (blob);
00712     else
00713       it.add_before_stay_put (blob);
00714   }
00715 }
00716 
00717 
00718 /**********************************************************************
00719  * TO_ROW::compute_vertical_projection
00720  *
00721  * Compute the vertical projection of a TO_ROW from its blobs.
00722  **********************************************************************/
00723 
00724 void TO_ROW::compute_vertical_projection() {  //project whole row
00725   TBOX row_box;                   //bound of row
00726   BLOBNBOX *blob;                //current blob
00727   TBOX blob_box;                  //bounding box
00728   BLOBNBOX_IT blob_it = blob_list ();
00729 
00730   if (blob_it.empty ())
00731     return;
00732   row_box = blob_it.data ()->bounding_box ();
00733   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ())
00734     row_box += blob_it.data ()->bounding_box ();
00735 
00736   projection.set_range (row_box.left () - PROJECTION_MARGIN,
00737     row_box.right () + PROJECTION_MARGIN);
00738   projection_left = row_box.left () - PROJECTION_MARGIN;
00739   projection_right = row_box.right () + PROJECTION_MARGIN;
00740   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
00741     blob = blob_it.data();
00742     if (blob->cblob() != NULL)
00743       vertical_cblob_projection(blob->cblob(), &projection);
00744   }
00745 }
00746 
00747 
00748 /**********************************************************************
00749  * TO_ROW::clear
00750  *
00751  * Zero out all scalar members.
00752  **********************************************************************/
00753 void TO_ROW::clear() {
00754   all_caps = 0;
00755   used_dm_model = 0;
00756   projection_left = 0;
00757   projection_right = 0;
00758   pitch_decision = PITCH_DUNNO;
00759   fixed_pitch = 0.0;
00760   fp_space = 0.0;
00761   fp_nonsp = 0.0;
00762   pr_space = 0.0;
00763   pr_nonsp = 0.0;
00764   spacing = 0.0;
00765   xheight = 0.0;
00766   xheight_evidence = 0;
00767   body_size = 0.0;
00768   ascrise = 0.0;
00769   descdrop = 0.0;
00770   min_space = 0;
00771   max_nonspace = 0;
00772   space_threshold = 0;
00773   kern_size = 0.0;
00774   space_size = 0.0;
00775   y_min = 0.0;
00776   y_max = 0.0;
00777   initial_y_min = 0.0;
00778   m = 0.0;
00779   c = 0.0;
00780   error = 0.0;
00781   para_c = 0.0;
00782   para_error = 0.0;
00783   y_origin = 0.0;
00784   credibility = 0.0;
00785   num_repeated_sets_ = -1;
00786 }
00787 
00788 
00789 /**********************************************************************
00790  * vertical_cblob_projection
00791  *
00792  * Compute the vertical projection of a cblob from its outlines
00793  * and add to the given STATS.
00794  **********************************************************************/
00795 
00796 void vertical_cblob_projection(               //project outlines
00797                                C_BLOB *blob,  //blob to project
00798                                STATS *stats   //output
00799                               ) {
00800                                  //outlines of blob
00801   C_OUTLINE_IT out_it = blob->out_list ();
00802 
00803   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00804     vertical_coutline_projection (out_it.data (), stats);
00805   }
00806 }
00807 
00808 
00809 /**********************************************************************
00810  * vertical_coutline_projection
00811  *
00812  * Compute the vertical projection of a outline from its outlines
00813  * and add to the given STATS.
00814  **********************************************************************/
00815 
00816 void vertical_coutline_projection(                     //project outlines
00817                                   C_OUTLINE *outline,  //outline to project
00818                                   STATS *stats         //output
00819                                  ) {
00820   ICOORD pos;                    //current point
00821   ICOORD step;                   //edge step
00822   inT32 length;                  //of outline
00823   inT16 stepindex;               //current step
00824   C_OUTLINE_IT out_it = outline->child ();
00825 
00826   pos = outline->start_pos ();
00827   length = outline->pathlength ();
00828   for (stepindex = 0; stepindex < length; stepindex++) {
00829     step = outline->step (stepindex);
00830     if (step.x () > 0) {
00831      stats->add (pos.x (), -pos.y ());
00832     } else if (step.x () < 0) {
00833       stats->add (pos.x () - 1, pos.y ());
00834     }
00835     pos += step;
00836   }
00837 
00838   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00839     vertical_coutline_projection (out_it.data (), stats);
00840   }
00841 }
00842 
00843 
00844 /**********************************************************************
00845  * TO_BLOCK::TO_BLOCK
00846  *
00847  * Constructor to make a TO_BLOCK from a real block.
00848  **********************************************************************/
00849 
00850 TO_BLOCK::TO_BLOCK(                  //make a block
00851                    BLOCK *src_block  //real block
00852                   ) {
00853   clear();
00854   block = src_block;
00855 }
00856 
00857 static void clear_blobnboxes(BLOBNBOX_LIST* boxes) {
00858   BLOBNBOX_IT it = boxes;
00859   // A BLOBNBOX generally doesn't own its blobs, so if they do, you
00860   // have to delete them explicitly.
00861   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00862     BLOBNBOX* box = it.data();
00863     if (box->cblob() != NULL)
00864       delete box->cblob();
00865   }
00866 }
00867 
00868 /**********************************************************************
00869  * TO_BLOCK::clear
00870  *
00871  * Zero out all scalar members.
00872  **********************************************************************/
00873 void TO_BLOCK::clear() {
00874   block = NULL;
00875   pitch_decision = PITCH_DUNNO;
00876   line_spacing = 0.0;
00877   line_size = 0.0;
00878   max_blob_size = 0.0;
00879   baseline_offset = 0.0;
00880   xheight = 0.0;
00881   fixed_pitch = 0.0;
00882   kern_size = 0.0;
00883   space_size = 0.0;
00884   min_space = 0;
00885   max_nonspace = 0;
00886   fp_space = 0.0;
00887   fp_nonsp = 0.0;
00888   pr_space = 0.0;
00889   pr_nonsp = 0.0;
00890   key_row = NULL;
00891 }
00892 
00893 
00894 TO_BLOCK::~TO_BLOCK() {
00895   // Any residual BLOBNBOXes at this stage own their blobs, so delete them.
00896   clear_blobnboxes(&blobs);
00897   clear_blobnboxes(&underlines);
00898   clear_blobnboxes(&noise_blobs);
00899   clear_blobnboxes(&small_blobs);
00900   clear_blobnboxes(&large_blobs);
00901 }
00902 
00903 // Helper function to divide the input blobs over noise, small, medium
00904 // and large lists. Blobs small in height and (small in width or large in width)
00905 // go in the noise list. Dash (-) candidates go in the small list, and
00906 // medium and large are by height.
00907 // SIDE-EFFECT: reset all blobs to initial state by calling Init().
00908 static void SizeFilterBlobs(int min_height, int max_height,
00909                             BLOBNBOX_LIST* src_list,
00910                             BLOBNBOX_LIST* noise_list,
00911                             BLOBNBOX_LIST* small_list,
00912                             BLOBNBOX_LIST* medium_list,
00913                             BLOBNBOX_LIST* large_list) {
00914   BLOBNBOX_IT noise_it(noise_list);
00915   BLOBNBOX_IT small_it(small_list);
00916   BLOBNBOX_IT medium_it(medium_list);
00917   BLOBNBOX_IT large_it(large_list);
00918   for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
00919     BLOBNBOX* blob = src_it.extract();
00920     blob->ReInit();
00921     int width = blob->bounding_box().width();
00922     int height = blob->bounding_box().height();
00923     if (height < min_height  &&
00924         (width < min_height || width > max_height))
00925       noise_it.add_after_then_move(blob);
00926     else if (height > max_height)
00927       large_it.add_after_then_move(blob);
00928     else if (height < min_height)
00929       small_it.add_after_then_move(blob);
00930     else
00931       medium_it.add_after_then_move(blob);
00932   }
00933 }
00934 
00935 // Reorganize the blob lists with a different definition of small, medium
00936 // and large, compared to the original definition.
00937 // Height is still the primary filter key, but medium width blobs of small
00938 // height become small, and very wide blobs of small height stay noise, along
00939 // with small dot-shaped blobs.
00940 void TO_BLOCK::ReSetAndReFilterBlobs() {
00941   int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
00942   int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
00943   BLOBNBOX_LIST noise_list;
00944   BLOBNBOX_LIST small_list;
00945   BLOBNBOX_LIST medium_list;
00946   BLOBNBOX_LIST large_list;
00947   SizeFilterBlobs(min_height, max_height, &blobs,
00948                   &noise_list, &small_list, &medium_list, &large_list);
00949   SizeFilterBlobs(min_height, max_height, &large_blobs,
00950                   &noise_list, &small_list, &medium_list, &large_list);
00951   SizeFilterBlobs(min_height, max_height, &small_blobs,
00952                   &noise_list, &small_list, &medium_list, &large_list);
00953   SizeFilterBlobs(min_height, max_height, &noise_blobs,
00954                   &noise_list, &small_list, &medium_list, &large_list);
00955   BLOBNBOX_IT blob_it(&blobs);
00956   blob_it.add_list_after(&medium_list);
00957   blob_it.set_to_list(&large_blobs);
00958   blob_it.add_list_after(&large_list);
00959   blob_it.set_to_list(&small_blobs);
00960   blob_it.add_list_after(&small_list);
00961   blob_it.set_to_list(&noise_blobs);
00962   blob_it.add_list_after(&noise_list);
00963 }
00964 
00965 // Deletes noise blobs from all lists where not owned by a ColPartition.
00966 void TO_BLOCK::DeleteUnownedNoise() {
00967   BLOBNBOX::CleanNeighbours(&blobs);
00968   BLOBNBOX::CleanNeighbours(&small_blobs);
00969   BLOBNBOX::CleanNeighbours(&noise_blobs);
00970   BLOBNBOX::CleanNeighbours(&large_blobs);
00971   BLOBNBOX::DeleteNoiseBlobs(&blobs);
00972   BLOBNBOX::DeleteNoiseBlobs(&small_blobs);
00973   BLOBNBOX::DeleteNoiseBlobs(&noise_blobs);
00974   BLOBNBOX::DeleteNoiseBlobs(&large_blobs);
00975 }
00976 
00977 #ifndef GRAPHICS_DISABLED
00978 // Draw the noise blobs from all lists in red.
00979 void TO_BLOCK::plot_noise_blobs(ScrollView* win) {
00980   BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win);
00981   BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win);
00982   BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win);
00983   BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win);
00984 }
00985 
00986 // Draw the blobs on the various lists in the block in different colors.
00987 void TO_BLOCK::plot_graded_blobs(ScrollView* win) {
00988   BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win);
00989   BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW,
00990                       win);
00991   BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW,
00992                       win);
00993   BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win);
00994 }
00995 
00996 /**********************************************************************
00997  * plot_blob_list
00998  *
00999  * Draw a list of blobs.
01000  **********************************************************************/
01001 
01002 void plot_blob_list(ScrollView* win,                   // window to draw in
01003                     BLOBNBOX_LIST *list,               // blob list
01004                     ScrollView::Color body_colour,     // colour to draw
01005                     ScrollView::Color child_colour) {  // colour of child
01006   BLOBNBOX_IT it = list;
01007   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
01008     it.data()->plot(win, body_colour, child_colour);
01009   }
01010 }
01011 #endif  // GRAPHICS_DISABLED