Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: blobbox.cpp (Formerly blobnbox.c) 00003 * Description: Code for the textord blob class. 00004 * Author: Ray Smith 00005 * Created: Thu Jul 30 09:08:51 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "mfcpch.h" 00021 #include "blobbox.h" 00022 #include "helpers.h" 00023 00024 #define PROJECTION_MARGIN 10 //arbitrary 00025 #define EXTERN 00026 00027 ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK) 00028 00029 // Upto 30 degrees is allowed for rotations of diacritic blobs. 00030 const double kCosSmallAngle = 0.866; 00031 // Min aspect ratio for a joined word to indicate an obvious flow direction. 00032 const double kDefiniteAspectRatio = 2.0; 00033 // Multiple of short length in perimeter to make a joined word. 00034 const double kComplexShapePerimeterRatio = 1.5; 00035 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs. 00036 const double kMinMediumSizeRatio = 0.25; 00037 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs. 00038 const double kMaxMediumSizeRatio = 4.0; 00039 00040 // Rotates the box and the underlying blob. 00041 void BLOBNBOX::rotate(FCOORD rotation) { 00042 cblob_ptr->rotate(rotation); 00043 rotate_box(rotation); 00044 compute_bounding_box(); 00045 } 00046 00047 // Reflect the box in the y-axis, leaving the underlying blob untouched. 00048 void BLOBNBOX::reflect_box_in_y_axis() { 00049 int left = -box.right(); 00050 box.set_right(-box.left()); 00051 box.set_left(left); 00052 } 00053 00054 // Rotates the box by the angle given by rotation. 00055 // If the blob is a diacritic, then only small rotations for skew 00056 // correction can be applied. 00057 void BLOBNBOX::rotate_box(FCOORD rotation) { 00058 if (IsDiacritic()) { 00059 ASSERT_HOST(rotation.x() >= kCosSmallAngle) 00060 ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_); 00061 ICOORD bottom_pt(top_pt.x(), base_char_bottom_); 00062 top_pt.rotate(rotation); 00063 base_char_top_ = top_pt.y(); 00064 bottom_pt.rotate(rotation); 00065 base_char_bottom_ = bottom_pt.y(); 00066 box.rotate(rotation); 00067 } else { 00068 box.rotate(rotation); 00069 set_diacritic_box(box); 00070 } 00071 } 00072 00073 /********************************************************************** 00074 * BLOBNBOX::merge 00075 * 00076 * Merge this blob with the given blob, which should be after this. 00077 **********************************************************************/ 00078 void BLOBNBOX::merge( //merge blobs 00079 BLOBNBOX *nextblob //blob to join with 00080 ) { 00081 box += nextblob->box; //merge boxes 00082 set_diacritic_box(box); 00083 nextblob->joined = TRUE; 00084 } 00085 00086 00087 // Merge this with other, taking the outlines from other. 00088 // Other is not deleted, but left for the caller to handle. 00089 void BLOBNBOX::really_merge(BLOBNBOX* other) { 00090 if (cblob_ptr != NULL && other->cblob_ptr != NULL) { 00091 C_OUTLINE_IT ol_it(cblob_ptr->out_list()); 00092 ol_it.add_list_after(other->cblob_ptr->out_list()); 00093 } 00094 compute_bounding_box(); 00095 } 00096 00097 00098 /********************************************************************** 00099 * BLOBNBOX::chop 00100 * 00101 * Chop this blob into equal sized pieces using the x height as a guide. 00102 * The blob is not actually chopped. Instead, fake blobs are inserted 00103 * with the relevant bounding boxes. 00104 **********************************************************************/ 00105 00106 void BLOBNBOX::chop( //chop blobs 00107 BLOBNBOX_IT *start_it, //location of this 00108 BLOBNBOX_IT *end_it, //iterator 00109 FCOORD rotation, //for landscape 00110 float xheight //of line 00111 ) { 00112 inT16 blobcount; //no of blobs 00113 BLOBNBOX *newblob; //fake blob 00114 BLOBNBOX *blob; //current blob 00115 inT16 blobindex; //number of chop 00116 inT16 leftx; //left edge of blob 00117 float blobwidth; //width of each 00118 float rightx; //right edge to scan 00119 float ymin, ymax; //limits of new blob 00120 float test_ymin, test_ymax; //limits of part blob 00121 ICOORD bl, tr; //corners of box 00122 BLOBNBOX_IT blob_it; //blob iterator 00123 00124 //get no of chops 00125 blobcount = (inT16) floor (box.width () / xheight); 00126 if (blobcount > 1 && cblob_ptr != NULL) { 00127 //width of each 00128 blobwidth = (float) (box.width () + 1) / blobcount; 00129 for (blobindex = blobcount - 1, rightx = box.right (); 00130 blobindex >= 0; blobindex--, rightx -= blobwidth) { 00131 ymin = (float) MAX_INT32; 00132 ymax = (float) -MAX_INT32; 00133 blob_it = *start_it; 00134 do { 00135 blob = blob_it.data (); 00136 find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, 00137 rightx, 00138 /*rotation, */ test_ymin, test_ymax); 00139 blob_it.forward (); 00140 UpdateRange(test_ymin, test_ymax, &ymin, &ymax); 00141 } 00142 while (blob != end_it->data ()); 00143 if (ymin < ymax) { 00144 leftx = (inT16) floor (rightx - blobwidth); 00145 if (leftx < box.left ()) 00146 leftx = box.left (); //clip to real box 00147 bl = ICOORD (leftx, (inT16) floor (ymin)); 00148 tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax)); 00149 if (blobindex == 0) 00150 box = TBOX (bl, tr); //change box 00151 else { 00152 newblob = new BLOBNBOX; 00153 //box is all it has 00154 newblob->box = TBOX (bl, tr); 00155 //stay on current 00156 newblob->base_char_top_ = tr.y(); 00157 newblob->base_char_bottom_ = bl.y(); 00158 end_it->add_after_stay_put (newblob); 00159 } 00160 } 00161 } 00162 } 00163 } 00164 00165 // Returns the box gaps between this and its neighbours_ in an array 00166 // indexed by BlobNeighbourDir. 00167 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { 00168 for (int dir = 0; dir < BND_COUNT; ++dir) { 00169 gaps[dir] = MAX_INT16; 00170 BLOBNBOX* neighbour = neighbours_[dir]; 00171 if (neighbour != NULL) { 00172 TBOX n_box = neighbour->bounding_box(); 00173 if (dir == BND_LEFT || dir == BND_RIGHT) { 00174 gaps[dir] = box.x_gap(n_box); 00175 } else { 00176 gaps[dir] = box.y_gap(n_box); 00177 } 00178 } 00179 } 00180 } 00181 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps) 00182 // modified so that if the max exceeds the max dimension of the blob, and 00183 // the min is less, the max is replaced with the min. 00184 // The objective is to catch cases where there is only a single neighbour 00185 // and avoid reporting the other gap as a ridiculously large number 00186 void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max, 00187 int* v_min, int* v_max) const { 00188 int max_dimension = MAX(box.width(), box.height()); 00189 int gaps[BND_COUNT]; 00190 NeighbourGaps(gaps); 00191 *h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]); 00192 *h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]); 00193 if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min; 00194 *v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]); 00195 *v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]); 00196 if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min; 00197 } 00198 00199 // NULLs out any neighbours that are DeletableNoise to remove references. 00200 void BLOBNBOX::CleanNeighbours() { 00201 for (int dir = 0; dir < BND_COUNT; ++dir) { 00202 BLOBNBOX* neighbour = neighbours_[dir]; 00203 if (neighbour != NULL && neighbour->DeletableNoise()) { 00204 neighbours_[dir] = NULL; 00205 good_stroke_neighbours_[dir] = false; 00206 } 00207 } 00208 } 00209 00210 // Returns positive if there is at least one side neighbour that has a similar 00211 // stroke width and is not on the other side of a rule line. 00212 int BLOBNBOX::GoodTextBlob() const { 00213 int score = 0; 00214 for (int dir = 0; dir < BND_COUNT; ++dir) { 00215 BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir); 00216 if (good_stroke_neighbour(bnd)) 00217 ++score; 00218 } 00219 return score; 00220 } 00221 00222 // Returns the number of side neighbours that are of type BRT_NOISE. 00223 int BLOBNBOX::NoisyNeighbours() const { 00224 int count = 0; 00225 for (int dir = 0; dir < BND_COUNT; ++dir) { 00226 BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir); 00227 BLOBNBOX* blob = neighbour(bnd); 00228 if (blob != NULL && blob->region_type() == BRT_NOISE) 00229 ++count; 00230 } 00231 return count; 00232 } 00233 00234 // Returns true, and sets vert_possible/horz_possible if the blob has some 00235 // feature that makes it individually appear to flow one way. 00236 // eg if it has a high aspect ratio, yet has a complex shape, such as a 00237 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc. 00238 bool BLOBNBOX::DefiniteIndividualFlow() { 00239 int box_perimeter = 2 * (box.height() + box.width()); 00240 if (box.width() > box.height() * kDefiniteAspectRatio) { 00241 // Attempt to distinguish a wide joined word from a dash. 00242 // If it is a dash, then its perimeter is approximately 00243 // 2 * (box width + stroke width), but more if the outline is noisy, 00244 // so perimeter - 2*(box width + stroke width) should be close to zero. 00245 // A complex shape such as a joined word should have a much larger value. 00246 int perimeter = cblob()->perimeter(); 00247 if (vert_stroke_width() > 0) 00248 perimeter -= 2 * vert_stroke_width(); 00249 else 00250 perimeter -= 4 * cblob()->area() / perimeter; 00251 perimeter -= 2 * box.width(); 00252 // Use a multiple of the box perimeter as a threshold. 00253 if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { 00254 set_vert_possible(false); 00255 set_horz_possible(true); 00256 return true; 00257 } 00258 } 00259 if (box.height() > box.width() * kDefiniteAspectRatio) { 00260 // As above, but for a putative vertical word vs a I/1/l. 00261 int perimeter = cblob()->perimeter(); 00262 if (horz_stroke_width() > 0) 00263 perimeter -= 2 * horz_stroke_width(); 00264 else 00265 perimeter -= 4 * cblob()->area() / perimeter; 00266 perimeter -= 2 * box.height(); 00267 if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { 00268 set_vert_possible(true); 00269 set_horz_possible(false); 00270 return true; 00271 } 00272 } 00273 return false; 00274 } 00275 00276 // Returns true if there is no tabstop violation in merging this and other. 00277 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const { 00278 if (box.left() < other.box.left() && box.left() < other.left_rule_) 00279 return false; 00280 if (other.box.left() < box.left() && other.box.left() < left_rule_) 00281 return false; 00282 if (box.right() > other.box.right() && box.right() > other.right_rule_) 00283 return false; 00284 if (other.box.right() > box.right() && other.box.right() > right_rule_) 00285 return false; 00286 return true; 00287 } 00288 00289 // Returns true if other has a similar stroke width to this. 00290 bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other, 00291 double fractional_tolerance, 00292 double constant_tolerance) const { 00293 // The perimeter-based width is used as a backup in case there is 00294 // no information in the blob. 00295 double p_width = area_stroke_width(); 00296 double n_p_width = other.area_stroke_width(); 00297 float h_tolerance = horz_stroke_width_ * fractional_tolerance 00298 + constant_tolerance; 00299 float v_tolerance = vert_stroke_width_ * fractional_tolerance 00300 + constant_tolerance; 00301 double p_tolerance = p_width * fractional_tolerance 00302 + constant_tolerance; 00303 bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; 00304 bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; 00305 bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, 00306 other.horz_stroke_width_, h_tolerance); 00307 bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, 00308 other.vert_stroke_width_, v_tolerance); 00309 bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance); 00310 // For a match, at least one of the horizontal and vertical widths 00311 // must match, and the other one must either match or be zero. 00312 // Only if both are zero will we look at the perimeter metric. 00313 return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero)); 00314 } 00315 00316 // Returns a bounding box of the outline contained within the 00317 // given horizontal range. 00318 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) { 00319 FCOORD no_rotation(1.0f, 0.0f); 00320 float top = box.top(); 00321 float bottom = box.bottom(); 00322 if (cblob_ptr != NULL) { 00323 find_cblob_limits(cblob_ptr, static_cast<float>(left), 00324 static_cast<float>(right), no_rotation, 00325 bottom, top); 00326 } 00327 00328 if (top < bottom) { 00329 top = box.top(); 00330 bottom = box.bottom(); 00331 } 00332 FCOORD bot_left(left, bottom); 00333 FCOORD top_right(right, top); 00334 TBOX shrunken_box(bot_left); 00335 TBOX shrunken_box2(top_right); 00336 shrunken_box += shrunken_box2; 00337 return shrunken_box; 00338 } 00339 00340 // Helper to call CleanNeighbours on all blobs on the list. 00341 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) { 00342 BLOBNBOX_IT blob_it(blobs); 00343 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00344 blob_it.data()->CleanNeighbours(); 00345 } 00346 } 00347 00348 // Helper to delete all the deletable blobs on the list. 00349 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) { 00350 BLOBNBOX_IT blob_it(blobs); 00351 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00352 BLOBNBOX* blob = blob_it.data(); 00353 if (blob->DeletableNoise()) { 00354 delete blob->cblob(); 00355 delete blob_it.extract(); 00356 } 00357 } 00358 } 00359 00360 #ifndef GRAPHICS_DISABLED 00361 // Helper to draw all the blobs on the list in the given body_colour, 00362 // with child outlines in the child_colour. 00363 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list, 00364 ScrollView::Color body_colour, 00365 ScrollView::Color child_colour, 00366 ScrollView* win) { 00367 BLOBNBOX_IT it(list); 00368 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00369 it.data()->plot(win, body_colour, child_colour); 00370 } 00371 } 00372 00373 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the 00374 // given list in the given body_colour, with child outlines in the 00375 // child_colour. 00376 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list, 00377 ScrollView::Color body_colour, 00378 ScrollView::Color child_colour, 00379 ScrollView* win) { 00380 BLOBNBOX_IT it(list); 00381 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00382 BLOBNBOX* blob = it.data(); 00383 if (blob->DeletableNoise()) 00384 blob->plot(win, body_colour, child_colour); 00385 } 00386 } 00387 00388 ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, 00389 BlobTextFlowType flow_type) { 00390 switch (region_type) { 00391 case BRT_HLINE: 00392 return ScrollView::BROWN; 00393 case BRT_VLINE: 00394 return ScrollView::DARK_GREEN; 00395 case BRT_RECTIMAGE: 00396 return ScrollView::RED; 00397 case BRT_POLYIMAGE: 00398 return ScrollView::ORANGE; 00399 case BRT_UNKNOWN: 00400 return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE; 00401 case BRT_VERT_TEXT: 00402 if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) 00403 return ScrollView::GREEN; 00404 if (flow_type == BTFT_CHAIN) 00405 return ScrollView::LIME_GREEN; 00406 return ScrollView::YELLOW; 00407 case BRT_TEXT: 00408 if (flow_type == BTFT_STRONG_CHAIN) 00409 return ScrollView::BLUE; 00410 if (flow_type == BTFT_TEXT_ON_IMAGE) 00411 return ScrollView::LIGHT_BLUE; 00412 if (flow_type == BTFT_CHAIN) 00413 return ScrollView::MEDIUM_BLUE; 00414 if (flow_type == BTFT_LEADER) 00415 return ScrollView::WHEAT; 00416 if (flow_type == BTFT_NONTEXT) 00417 return ScrollView::PINK; 00418 return ScrollView::MAGENTA; 00419 default: 00420 return ScrollView::GREY; 00421 } 00422 } 00423 00424 // Keep in sync with BlobRegionType. 00425 ScrollView::Color BLOBNBOX::BoxColor() const { 00426 return TextlineColor(region_type_, flow_); 00427 } 00428 #endif 00429 /********************************************************************** 00430 * find_cblob_limits 00431 * 00432 * Scan the outlines of the cblob to locate the y min and max 00433 * between the given x limits. 00434 **********************************************************************/ 00435 00436 void find_cblob_limits( //get y limits 00437 C_BLOB *blob, //blob to search 00438 float leftx, //x limits 00439 float rightx, 00440 FCOORD rotation, //for landscape 00441 float &ymin, //output y limits 00442 float &ymax) { 00443 inT16 stepindex; //current point 00444 ICOORD pos; //current coords 00445 ICOORD vec; //rotated step 00446 C_OUTLINE *outline; //current outline 00447 //outlines 00448 C_OUTLINE_IT out_it = blob->out_list (); 00449 00450 ymin = (float) MAX_INT32; 00451 ymax = (float) -MAX_INT32; 00452 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00453 outline = out_it.data (); 00454 pos = outline->start_pos (); //get coords 00455 pos.rotate (rotation); 00456 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { 00457 //inside 00458 if (pos.x () >= leftx && pos.x () <= rightx) { 00459 UpdateRange(pos.y(), &ymin, &ymax); 00460 } 00461 vec = outline->step (stepindex); 00462 vec.rotate (rotation); 00463 pos += vec; //move to next 00464 } 00465 } 00466 } 00467 00468 00469 /********************************************************************** 00470 * find_cblob_vlimits 00471 * 00472 * Scan the outlines of the cblob to locate the y min and max 00473 * between the given x limits. 00474 **********************************************************************/ 00475 00476 void find_cblob_vlimits( //get y limits 00477 C_BLOB *blob, //blob to search 00478 float leftx, //x limits 00479 float rightx, 00480 float &ymin, //output y limits 00481 float &ymax) { 00482 inT16 stepindex; //current point 00483 ICOORD pos; //current coords 00484 ICOORD vec; //rotated step 00485 C_OUTLINE *outline; //current outline 00486 //outlines 00487 C_OUTLINE_IT out_it = blob->out_list (); 00488 00489 ymin = (float) MAX_INT32; 00490 ymax = (float) -MAX_INT32; 00491 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00492 outline = out_it.data (); 00493 pos = outline->start_pos (); //get coords 00494 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { 00495 //inside 00496 if (pos.x () >= leftx && pos.x () <= rightx) { 00497 UpdateRange(pos.y(), &ymin, &ymax); 00498 } 00499 vec = outline->step (stepindex); 00500 pos += vec; //move to next 00501 } 00502 } 00503 } 00504 00505 00506 /********************************************************************** 00507 * find_cblob_hlimits 00508 * 00509 * Scan the outlines of the cblob to locate the x min and max 00510 * between the given y limits. 00511 **********************************************************************/ 00512 00513 void find_cblob_hlimits( //get x limits 00514 C_BLOB *blob, //blob to search 00515 float bottomy, //y limits 00516 float topy, 00517 float &xmin, //output x limits 00518 float &xmax) { 00519 inT16 stepindex; //current point 00520 ICOORD pos; //current coords 00521 ICOORD vec; //rotated step 00522 C_OUTLINE *outline; //current outline 00523 //outlines 00524 C_OUTLINE_IT out_it = blob->out_list (); 00525 00526 xmin = (float) MAX_INT32; 00527 xmax = (float) -MAX_INT32; 00528 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00529 outline = out_it.data (); 00530 pos = outline->start_pos (); //get coords 00531 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { 00532 //inside 00533 if (pos.y () >= bottomy && pos.y () <= topy) { 00534 UpdateRange(pos.x(), &xmin, &xmax); 00535 } 00536 vec = outline->step (stepindex); 00537 pos += vec; //move to next 00538 } 00539 } 00540 } 00541 00542 /********************************************************************** 00543 * crotate_cblob 00544 * 00545 * Rotate the copy by the given vector and return a C_BLOB. 00546 **********************************************************************/ 00547 00548 C_BLOB *crotate_cblob( //rotate it 00549 C_BLOB *blob, //blob to search 00550 FCOORD rotation //for landscape 00551 ) { 00552 C_OUTLINE_LIST out_list; //output outlines 00553 //input outlines 00554 C_OUTLINE_IT in_it = blob->out_list (); 00555 //output outlines 00556 C_OUTLINE_IT out_it = &out_list; 00557 00558 for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) { 00559 out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation)); 00560 } 00561 return new C_BLOB (&out_list); 00562 } 00563 00564 00565 /********************************************************************** 00566 * box_next 00567 * 00568 * Compute the bounding box of this blob with merging of x overlaps 00569 * but no pre-chopping. 00570 * Then move the iterator on to the start of the next blob. 00571 **********************************************************************/ 00572 00573 TBOX box_next( //get bounding box 00574 BLOBNBOX_IT *it //iterator to blobds 00575 ) { 00576 BLOBNBOX *blob; //current blob 00577 TBOX result; //total box 00578 00579 blob = it->data (); 00580 result = blob->bounding_box (); 00581 do { 00582 it->forward (); 00583 blob = it->data (); 00584 if (blob->cblob() == NULL) 00585 //was pre-chopped 00586 result += blob->bounding_box (); 00587 } 00588 //until next real blob 00589 while ((blob->cblob() == NULL) || blob->joined_to_prev()); 00590 return result; 00591 } 00592 00593 00594 /********************************************************************** 00595 * box_next_pre_chopped 00596 * 00597 * Compute the bounding box of this blob with merging of x overlaps 00598 * but WITH pre-chopping. 00599 * Then move the iterator on to the start of the next pre-chopped blob. 00600 **********************************************************************/ 00601 00602 TBOX box_next_pre_chopped( //get bounding box 00603 BLOBNBOX_IT *it //iterator to blobds 00604 ) { 00605 BLOBNBOX *blob; //current blob 00606 TBOX result; //total box 00607 00608 blob = it->data (); 00609 result = blob->bounding_box (); 00610 do { 00611 it->forward (); 00612 blob = it->data (); 00613 } 00614 //until next real blob 00615 while (blob->joined_to_prev ()); 00616 return result; 00617 } 00618 00619 00620 /********************************************************************** 00621 * TO_ROW::TO_ROW 00622 * 00623 * Constructor to make a row from a blob. 00624 **********************************************************************/ 00625 00626 TO_ROW::TO_ROW ( //constructor 00627 BLOBNBOX * blob, //first blob 00628 float top, //corrected top 00629 float bottom, //of row 00630 float row_size //ideal 00631 ) { 00632 clear(); 00633 y_min = bottom; 00634 y_max = top; 00635 initial_y_min = bottom; 00636 00637 float diff; //in size 00638 BLOBNBOX_IT it = &blobs; //list of blobs 00639 00640 it.add_to_end (blob); 00641 diff = top - bottom - row_size; 00642 if (diff > 0) { 00643 y_max -= diff / 2; 00644 y_min += diff / 2; 00645 } 00646 //very small object 00647 else if ((top - bottom) * 3 < row_size) { 00648 diff = row_size / 3 + bottom - top; 00649 y_max += diff / 2; 00650 y_min -= diff / 2; 00651 } 00652 } 00653 00654 00655 /********************************************************************** 00656 * TO_ROW:add_blob 00657 * 00658 * Add the blob to the end of the row. 00659 **********************************************************************/ 00660 00661 void TO_ROW::add_blob( //constructor 00662 BLOBNBOX *blob, //first blob 00663 float top, //corrected top 00664 float bottom, //of row 00665 float row_size //ideal 00666 ) { 00667 float allowed; //allowed expansion 00668 float available; //expansion 00669 BLOBNBOX_IT it = &blobs; //list of blobs 00670 00671 it.add_to_end (blob); 00672 allowed = row_size + y_min - y_max; 00673 if (allowed > 0) { 00674 available = top > y_max ? top - y_max : 0; 00675 if (bottom < y_min) 00676 //total available 00677 available += y_min - bottom; 00678 if (available > 0) { 00679 available += available; //do it gradually 00680 if (available < allowed) 00681 available = allowed; 00682 if (bottom < y_min) 00683 y_min -= (y_min - bottom) * allowed / available; 00684 if (top > y_max) 00685 y_max += (top - y_max) * allowed / available; 00686 } 00687 } 00688 } 00689 00690 00691 /********************************************************************** 00692 * TO_ROW:insert_blob 00693 * 00694 * Add the blob to the row in the correct position. 00695 **********************************************************************/ 00696 00697 void TO_ROW::insert_blob( //constructor 00698 BLOBNBOX *blob //first blob 00699 ) { 00700 BLOBNBOX_IT it = &blobs; //list of blobs 00701 00702 if (it.empty ()) 00703 it.add_before_then_move (blob); 00704 else { 00705 it.mark_cycle_pt (); 00706 while (!it.cycled_list () 00707 && it.data ()->bounding_box ().left () <= 00708 blob->bounding_box ().left ()) 00709 it.forward (); 00710 if (it.cycled_list ()) 00711 it.add_to_end (blob); 00712 else 00713 it.add_before_stay_put (blob); 00714 } 00715 } 00716 00717 00718 /********************************************************************** 00719 * TO_ROW::compute_vertical_projection 00720 * 00721 * Compute the vertical projection of a TO_ROW from its blobs. 00722 **********************************************************************/ 00723 00724 void TO_ROW::compute_vertical_projection() { //project whole row 00725 TBOX row_box; //bound of row 00726 BLOBNBOX *blob; //current blob 00727 TBOX blob_box; //bounding box 00728 BLOBNBOX_IT blob_it = blob_list (); 00729 00730 if (blob_it.empty ()) 00731 return; 00732 row_box = blob_it.data ()->bounding_box (); 00733 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) 00734 row_box += blob_it.data ()->bounding_box (); 00735 00736 projection.set_range (row_box.left () - PROJECTION_MARGIN, 00737 row_box.right () + PROJECTION_MARGIN); 00738 projection_left = row_box.left () - PROJECTION_MARGIN; 00739 projection_right = row_box.right () + PROJECTION_MARGIN; 00740 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 00741 blob = blob_it.data(); 00742 if (blob->cblob() != NULL) 00743 vertical_cblob_projection(blob->cblob(), &projection); 00744 } 00745 } 00746 00747 00748 /********************************************************************** 00749 * TO_ROW::clear 00750 * 00751 * Zero out all scalar members. 00752 **********************************************************************/ 00753 void TO_ROW::clear() { 00754 all_caps = 0; 00755 used_dm_model = 0; 00756 projection_left = 0; 00757 projection_right = 0; 00758 pitch_decision = PITCH_DUNNO; 00759 fixed_pitch = 0.0; 00760 fp_space = 0.0; 00761 fp_nonsp = 0.0; 00762 pr_space = 0.0; 00763 pr_nonsp = 0.0; 00764 spacing = 0.0; 00765 xheight = 0.0; 00766 xheight_evidence = 0; 00767 body_size = 0.0; 00768 ascrise = 0.0; 00769 descdrop = 0.0; 00770 min_space = 0; 00771 max_nonspace = 0; 00772 space_threshold = 0; 00773 kern_size = 0.0; 00774 space_size = 0.0; 00775 y_min = 0.0; 00776 y_max = 0.0; 00777 initial_y_min = 0.0; 00778 m = 0.0; 00779 c = 0.0; 00780 error = 0.0; 00781 para_c = 0.0; 00782 para_error = 0.0; 00783 y_origin = 0.0; 00784 credibility = 0.0; 00785 num_repeated_sets_ = -1; 00786 } 00787 00788 00789 /********************************************************************** 00790 * vertical_cblob_projection 00791 * 00792 * Compute the vertical projection of a cblob from its outlines 00793 * and add to the given STATS. 00794 **********************************************************************/ 00795 00796 void vertical_cblob_projection( //project outlines 00797 C_BLOB *blob, //blob to project 00798 STATS *stats //output 00799 ) { 00800 //outlines of blob 00801 C_OUTLINE_IT out_it = blob->out_list (); 00802 00803 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00804 vertical_coutline_projection (out_it.data (), stats); 00805 } 00806 } 00807 00808 00809 /********************************************************************** 00810 * vertical_coutline_projection 00811 * 00812 * Compute the vertical projection of a outline from its outlines 00813 * and add to the given STATS. 00814 **********************************************************************/ 00815 00816 void vertical_coutline_projection( //project outlines 00817 C_OUTLINE *outline, //outline to project 00818 STATS *stats //output 00819 ) { 00820 ICOORD pos; //current point 00821 ICOORD step; //edge step 00822 inT32 length; //of outline 00823 inT16 stepindex; //current step 00824 C_OUTLINE_IT out_it = outline->child (); 00825 00826 pos = outline->start_pos (); 00827 length = outline->pathlength (); 00828 for (stepindex = 0; stepindex < length; stepindex++) { 00829 step = outline->step (stepindex); 00830 if (step.x () > 0) { 00831 stats->add (pos.x (), -pos.y ()); 00832 } else if (step.x () < 0) { 00833 stats->add (pos.x () - 1, pos.y ()); 00834 } 00835 pos += step; 00836 } 00837 00838 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00839 vertical_coutline_projection (out_it.data (), stats); 00840 } 00841 } 00842 00843 00844 /********************************************************************** 00845 * TO_BLOCK::TO_BLOCK 00846 * 00847 * Constructor to make a TO_BLOCK from a real block. 00848 **********************************************************************/ 00849 00850 TO_BLOCK::TO_BLOCK( //make a block 00851 BLOCK *src_block //real block 00852 ) { 00853 clear(); 00854 block = src_block; 00855 } 00856 00857 static void clear_blobnboxes(BLOBNBOX_LIST* boxes) { 00858 BLOBNBOX_IT it = boxes; 00859 // A BLOBNBOX generally doesn't own its blobs, so if they do, you 00860 // have to delete them explicitly. 00861 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00862 BLOBNBOX* box = it.data(); 00863 if (box->cblob() != NULL) 00864 delete box->cblob(); 00865 } 00866 } 00867 00868 /********************************************************************** 00869 * TO_BLOCK::clear 00870 * 00871 * Zero out all scalar members. 00872 **********************************************************************/ 00873 void TO_BLOCK::clear() { 00874 block = NULL; 00875 pitch_decision = PITCH_DUNNO; 00876 line_spacing = 0.0; 00877 line_size = 0.0; 00878 max_blob_size = 0.0; 00879 baseline_offset = 0.0; 00880 xheight = 0.0; 00881 fixed_pitch = 0.0; 00882 kern_size = 0.0; 00883 space_size = 0.0; 00884 min_space = 0; 00885 max_nonspace = 0; 00886 fp_space = 0.0; 00887 fp_nonsp = 0.0; 00888 pr_space = 0.0; 00889 pr_nonsp = 0.0; 00890 key_row = NULL; 00891 } 00892 00893 00894 TO_BLOCK::~TO_BLOCK() { 00895 // Any residual BLOBNBOXes at this stage own their blobs, so delete them. 00896 clear_blobnboxes(&blobs); 00897 clear_blobnboxes(&underlines); 00898 clear_blobnboxes(&noise_blobs); 00899 clear_blobnboxes(&small_blobs); 00900 clear_blobnboxes(&large_blobs); 00901 } 00902 00903 // Helper function to divide the input blobs over noise, small, medium 00904 // and large lists. Blobs small in height and (small in width or large in width) 00905 // go in the noise list. Dash (-) candidates go in the small list, and 00906 // medium and large are by height. 00907 // SIDE-EFFECT: reset all blobs to initial state by calling Init(). 00908 static void SizeFilterBlobs(int min_height, int max_height, 00909 BLOBNBOX_LIST* src_list, 00910 BLOBNBOX_LIST* noise_list, 00911 BLOBNBOX_LIST* small_list, 00912 BLOBNBOX_LIST* medium_list, 00913 BLOBNBOX_LIST* large_list) { 00914 BLOBNBOX_IT noise_it(noise_list); 00915 BLOBNBOX_IT small_it(small_list); 00916 BLOBNBOX_IT medium_it(medium_list); 00917 BLOBNBOX_IT large_it(large_list); 00918 for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { 00919 BLOBNBOX* blob = src_it.extract(); 00920 blob->ReInit(); 00921 int width = blob->bounding_box().width(); 00922 int height = blob->bounding_box().height(); 00923 if (height < min_height && 00924 (width < min_height || width > max_height)) 00925 noise_it.add_after_then_move(blob); 00926 else if (height > max_height) 00927 large_it.add_after_then_move(blob); 00928 else if (height < min_height) 00929 small_it.add_after_then_move(blob); 00930 else 00931 medium_it.add_after_then_move(blob); 00932 } 00933 } 00934 00935 // Reorganize the blob lists with a different definition of small, medium 00936 // and large, compared to the original definition. 00937 // Height is still the primary filter key, but medium width blobs of small 00938 // height become small, and very wide blobs of small height stay noise, along 00939 // with small dot-shaped blobs. 00940 void TO_BLOCK::ReSetAndReFilterBlobs() { 00941 int min_height = IntCastRounded(kMinMediumSizeRatio * line_size); 00942 int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size); 00943 BLOBNBOX_LIST noise_list; 00944 BLOBNBOX_LIST small_list; 00945 BLOBNBOX_LIST medium_list; 00946 BLOBNBOX_LIST large_list; 00947 SizeFilterBlobs(min_height, max_height, &blobs, 00948 &noise_list, &small_list, &medium_list, &large_list); 00949 SizeFilterBlobs(min_height, max_height, &large_blobs, 00950 &noise_list, &small_list, &medium_list, &large_list); 00951 SizeFilterBlobs(min_height, max_height, &small_blobs, 00952 &noise_list, &small_list, &medium_list, &large_list); 00953 SizeFilterBlobs(min_height, max_height, &noise_blobs, 00954 &noise_list, &small_list, &medium_list, &large_list); 00955 BLOBNBOX_IT blob_it(&blobs); 00956 blob_it.add_list_after(&medium_list); 00957 blob_it.set_to_list(&large_blobs); 00958 blob_it.add_list_after(&large_list); 00959 blob_it.set_to_list(&small_blobs); 00960 blob_it.add_list_after(&small_list); 00961 blob_it.set_to_list(&noise_blobs); 00962 blob_it.add_list_after(&noise_list); 00963 } 00964 00965 // Deletes noise blobs from all lists where not owned by a ColPartition. 00966 void TO_BLOCK::DeleteUnownedNoise() { 00967 BLOBNBOX::CleanNeighbours(&blobs); 00968 BLOBNBOX::CleanNeighbours(&small_blobs); 00969 BLOBNBOX::CleanNeighbours(&noise_blobs); 00970 BLOBNBOX::CleanNeighbours(&large_blobs); 00971 BLOBNBOX::DeleteNoiseBlobs(&blobs); 00972 BLOBNBOX::DeleteNoiseBlobs(&small_blobs); 00973 BLOBNBOX::DeleteNoiseBlobs(&noise_blobs); 00974 BLOBNBOX::DeleteNoiseBlobs(&large_blobs); 00975 } 00976 00977 #ifndef GRAPHICS_DISABLED 00978 // Draw the noise blobs from all lists in red. 00979 void TO_BLOCK::plot_noise_blobs(ScrollView* win) { 00980 BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win); 00981 BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win); 00982 BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win); 00983 BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win); 00984 } 00985 00986 // Draw the blobs on the various lists in the block in different colors. 00987 void TO_BLOCK::plot_graded_blobs(ScrollView* win) { 00988 BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win); 00989 BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW, 00990 win); 00991 BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW, 00992 win); 00993 BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win); 00994 } 00995 00996 /********************************************************************** 00997 * plot_blob_list 00998 * 00999 * Draw a list of blobs. 01000 **********************************************************************/ 01001 01002 void plot_blob_list(ScrollView* win, // window to draw in 01003 BLOBNBOX_LIST *list, // blob list 01004 ScrollView::Color body_colour, // colour to draw 01005 ScrollView::Color child_colour) { // colour of child 01006 BLOBNBOX_IT it = list; 01007 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01008 it.data()->plot(win, body_colour, child_colour); 01009 } 01010 } 01011 #endif // GRAPHICS_DISABLED