Tesseract
3.02
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: blobs.c (Formerly blobs.c) 00005 * Description: Blob definition 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 27 15:39:52 1989 00008 * Modified: Thu Mar 28 15:33:26 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Experimental (Do Not Distribute) 00012 * 00013 * (c) Copyright 1989, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 00026 /*---------------------------------------------------------------------- 00027 I n c l u d e s 00028 ----------------------------------------------------------------------*/ 00029 #include "mfcpch.h" 00030 #include "blobs.h" 00031 #include "ccstruct.h" 00032 #include "clst.h" 00033 #include "cutil.h" 00034 #include "emalloc.h" 00035 #include "helpers.h" 00036 #include "ndminx.h" 00037 #include "normalis.h" 00038 #include "ocrblock.h" 00039 #include "ocrrow.h" 00040 #include "points.h" 00041 #include "polyaprx.h" 00042 #include "structures.h" 00043 #include "werd.h" 00044 00045 using tesseract::CCStruct; 00046 00047 // A Vector representing the "vertical" direction when measuring the 00048 // divisiblity of blobs into multiple blobs just by separating outlines. 00049 // See divisible_blob below for the use. 00050 const TPOINT kDivisibleVerticalUpright(0, 1); 00051 // A vector representing the "vertical" direction for italic text for use 00052 // when separating outlines. Using it actually deteriorates final accuracy, 00053 // so it is only used for ApplyBoxes chopping to get a better segmentation. 00054 const TPOINT kDivisibleVerticalItalic(1, 5); 00055 00056 /*---------------------------------------------------------------------- 00057 F u n c t i o n s 00058 ----------------------------------------------------------------------*/ 00059 00060 CLISTIZE(EDGEPT); 00061 00062 // Consume the circular list of EDGEPTs to make a TESSLINE. 00063 TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { 00064 TESSLINE* result = new TESSLINE; 00065 result->loop = outline; 00066 result->SetupFromPos(); 00067 return result; 00068 } 00069 00070 // Copies the data and the outline, but leaves next untouched. 00071 void TESSLINE::CopyFrom(const TESSLINE& src) { 00072 Clear(); 00073 topleft = src.topleft; 00074 botright = src.botright; 00075 start = src.start; 00076 is_hole = src.is_hole; 00077 if (src.loop != NULL) { 00078 EDGEPT* prevpt = NULL; 00079 EDGEPT* newpt = NULL; 00080 EDGEPT* srcpt = src.loop; 00081 do { 00082 newpt = new EDGEPT(*srcpt); 00083 if (prevpt == NULL) { 00084 loop = newpt; 00085 } else { 00086 newpt->prev = prevpt; 00087 prevpt->next = newpt; 00088 } 00089 prevpt = newpt; 00090 srcpt = srcpt->next; 00091 } while (srcpt != src.loop); 00092 loop->prev = newpt; 00093 newpt->next = loop; 00094 } 00095 } 00096 00097 // Deletes owned data. 00098 void TESSLINE::Clear() { 00099 if (loop == NULL) 00100 return; 00101 00102 EDGEPT* this_edge = loop; 00103 do { 00104 EDGEPT* next_edge = this_edge->next; 00105 delete this_edge; 00106 this_edge = next_edge; 00107 } while (this_edge != loop); 00108 loop = NULL; 00109 } 00110 00111 // Normalize in-place using the DENORM. 00112 void TESSLINE::Normalize(const DENORM& denorm) { 00113 EDGEPT* pt = loop; 00114 do { 00115 denorm.LocalNormTransform(pt->pos, &pt->pos); 00116 pt = pt->next; 00117 } while (pt != loop); 00118 SetupFromPos(); 00119 } 00120 00121 // Rotates by the given rotation in place. 00122 void TESSLINE::Rotate(const FCOORD rot) { 00123 EDGEPT* pt = loop; 00124 do { 00125 int tmp = static_cast<int>(floor(pt->pos.x * rot.x() - 00126 pt->pos.y * rot.y() + 0.5)); 00127 pt->pos.y = static_cast<int>(floor(pt->pos.y * rot.x() + 00128 pt->pos.x * rot.y() + 0.5)); 00129 pt->pos.x = tmp; 00130 pt = pt->next; 00131 } while (pt != loop); 00132 SetupFromPos(); 00133 } 00134 00135 // Moves by the given vec in place. 00136 void TESSLINE::Move(const ICOORD vec) { 00137 EDGEPT* pt = loop; 00138 do { 00139 pt->pos.x += vec.x(); 00140 pt->pos.y += vec.y(); 00141 pt = pt->next; 00142 } while (pt != loop); 00143 SetupFromPos(); 00144 } 00145 00146 // Scales by the given factor in place. 00147 void TESSLINE::Scale(float factor) { 00148 EDGEPT* pt = loop; 00149 do { 00150 pt->pos.x = static_cast<int>(floor(pt->pos.x * factor + 0.5)); 00151 pt->pos.y = static_cast<int>(floor(pt->pos.y * factor + 0.5)); 00152 pt = pt->next; 00153 } while (pt != loop); 00154 SetupFromPos(); 00155 } 00156 00157 // Sets up the start and vec members of the loop from the pos members. 00158 void TESSLINE::SetupFromPos() { 00159 EDGEPT* pt = loop; 00160 do { 00161 pt->vec.x = pt->next->pos.x - pt->pos.x; 00162 pt->vec.y = pt->next->pos.y - pt->pos.y; 00163 pt = pt->next; 00164 } while (pt != loop); 00165 start = pt->pos; 00166 ComputeBoundingBox(); 00167 } 00168 00169 // Recomputes the bounding box from the points in the loop. 00170 void TESSLINE::ComputeBoundingBox() { 00171 int minx = MAX_INT32; 00172 int miny = MAX_INT32; 00173 int maxx = -MAX_INT32; 00174 int maxy = -MAX_INT32; 00175 00176 // Find boundaries. 00177 start = loop->pos; 00178 EDGEPT* this_edge = loop; 00179 do { 00180 if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { 00181 if (this_edge->pos.x < minx) 00182 minx = this_edge->pos.x; 00183 if (this_edge->pos.y < miny) 00184 miny = this_edge->pos.y; 00185 if (this_edge->pos.x > maxx) 00186 maxx = this_edge->pos.x; 00187 if (this_edge->pos.y > maxy) 00188 maxy = this_edge->pos.y; 00189 } 00190 this_edge = this_edge->next; 00191 } while (this_edge != loop); 00192 // Reset bounds. 00193 topleft.x = minx; 00194 topleft.y = maxy; 00195 botright.x = maxx; 00196 botright.y = miny; 00197 } 00198 00199 // Computes the min and max cross product of the outline points with the 00200 // given vec and returns the results in min_xp and max_xp. Geometrically 00201 // this is the left and right edge of the outline perpendicular to the 00202 // given direction, but to get the distance units correct, you would 00203 // have to divide by the modulus of vec. 00204 void TESSLINE::MinMaxCrossProduct(const TPOINT vec, 00205 int* min_xp, int* max_xp) const { 00206 *min_xp = MAX_INT32; 00207 *max_xp = MIN_INT32; 00208 EDGEPT* this_edge = loop; 00209 do { 00210 if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { 00211 int product = CROSS(this_edge->pos, vec); 00212 UpdateRange(product, min_xp, max_xp); 00213 } 00214 this_edge = this_edge->next; 00215 } while (this_edge != loop); 00216 } 00217 00218 TBOX TESSLINE::bounding_box() const { 00219 return TBOX(topleft.x, botright.y, botright.x, topleft.y); 00220 } 00221 00222 void TESSLINE::plot(ScrollView* window, ScrollView::Color color, 00223 ScrollView::Color child_color) { 00224 #ifndef GRAPHICS_DISABLED 00225 if (is_hole) 00226 window->Pen(child_color); 00227 else 00228 window->Pen(color); 00229 window->SetCursor(start.x, start.y); 00230 EDGEPT* pt = loop; 00231 do { 00232 bool prev_hidden = pt->IsHidden(); 00233 pt = pt->next; 00234 if (prev_hidden) 00235 window->SetCursor(pt->pos.x, pt->pos.y); 00236 else 00237 window->DrawTo(pt->pos.x, pt->pos.y); 00238 } while (pt != loop); 00239 #endif // GRAPHICS_DISABLED 00240 } 00241 00242 // Iterate the given list of outlines, converting to TESSLINE by polygonal 00243 // approximation and recursively any children, returning the current tail 00244 // of the resulting list of TESSLINEs. 00245 static TESSLINE** ApproximateOutlineList(C_OUTLINE_LIST* outlines, 00246 bool children, 00247 TESSLINE** tail) { 00248 C_OUTLINE_IT ol_it(outlines); 00249 for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { 00250 C_OUTLINE* outline = ol_it.data(); 00251 TESSLINE* tessline = ApproximateOutline(outline); 00252 tessline->is_hole = children; 00253 *tail = tessline; 00254 tail = &tessline->next; 00255 if (!outline->child()->empty()) { 00256 tail = ApproximateOutlineList(outline->child(), true, tail); 00257 } 00258 } 00259 return tail; 00260 } 00261 00262 // Factory to build a TBLOB from a C_BLOB with polygonal 00263 // approximation along the way. 00264 TBLOB* TBLOB::PolygonalCopy(C_BLOB* src) { 00265 C_OUTLINE_IT ol_it = src->out_list(); 00266 TBLOB* tblob = new TBLOB; 00267 ApproximateOutlineList(src->out_list(), false, &tblob->outlines); 00268 return tblob; 00269 } 00270 00271 // Normalizes the blob for classification only if needed. 00272 // (Normally this means a non-zero classify rotation.) 00273 // If no Normalization is needed, then NULL is returned, and the denorm is 00274 // unchanged. Otherwise a new TBLOB is returned and the denorm points to 00275 // a new DENORM. In this case, both the TBLOB and DENORM must be deleted. 00276 TBLOB* TBLOB::ClassifyNormalizeIfNeeded(const DENORM** denorm) const { 00277 TBLOB* rotated_blob = NULL; 00278 // If necessary, copy the blob and rotate it. The rotation is always 00279 // +/- 90 degrees, as 180 was already taken care of. 00280 if ((*denorm)->block() != NULL && 00281 (*denorm)->block()->classify_rotation().y() != 0.0) { 00282 TBOX box = bounding_box(); 00283 int x_middle = (box.left() + box.right()) / 2; 00284 int y_middle = (box.top() + box.bottom()) / 2; 00285 rotated_blob = new TBLOB(*this); 00286 const FCOORD& rotation = (*denorm)->block()->classify_rotation(); 00287 DENORM* norm = new DENORM; 00288 // Move the rotated blob back to the same y-position so that we 00289 // can still distinguish similar glyphs with differeny y-position. 00290 float target_y = kBlnBaselineOffset + 00291 (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle); 00292 norm->SetupNormalization(NULL, NULL, &rotation, *denorm, NULL, 0, 00293 x_middle, y_middle, 1.0f, 1.0f, 0.0f, target_y); 00294 // x_middle, y_middle, 1.0f, 1.0f, 0.0f, y_middle); 00295 rotated_blob->Normalize(*norm); 00296 *denorm = norm; 00297 } 00298 return rotated_blob; 00299 } 00300 00301 // Copies the data and the outline, but leaves next untouched. 00302 void TBLOB::CopyFrom(const TBLOB& src) { 00303 Clear(); 00304 TESSLINE* prev_outline = NULL; 00305 for (TESSLINE* srcline = src.outlines; srcline != NULL; 00306 srcline = srcline->next) { 00307 TESSLINE* new_outline = new TESSLINE(*srcline); 00308 if (outlines == NULL) 00309 outlines = new_outline; 00310 else 00311 prev_outline->next = new_outline; 00312 prev_outline = new_outline; 00313 } 00314 } 00315 00316 // Deletes owned data. 00317 void TBLOB::Clear() { 00318 for (TESSLINE* next_outline = NULL; outlines != NULL; 00319 outlines = next_outline) { 00320 next_outline = outlines->next; 00321 delete outlines; 00322 } 00323 } 00324 // Normalize in-place using the DENORM. 00325 void TBLOB::Normalize(const DENORM& denorm) { 00326 // TODO(rays) outline->Normalize is more accurate, but breaks tests due 00327 // the changes it makes. Reinstate this code with a retraining. 00328 #if 1 00329 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00330 outline->Normalize(denorm); 00331 } 00332 #else 00333 denorm.LocalNormBlob(this); 00334 #endif 00335 } 00336 00337 // Rotates by the given rotation in place. 00338 void TBLOB::Rotate(const FCOORD rotation) { 00339 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00340 outline->Rotate(rotation); 00341 } 00342 } 00343 00344 // Moves by the given vec in place. 00345 void TBLOB::Move(const ICOORD vec) { 00346 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00347 outline->Move(vec); 00348 } 00349 } 00350 00351 // Scales by the given factor in place. 00352 void TBLOB::Scale(float factor) { 00353 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00354 outline->Scale(factor); 00355 } 00356 } 00357 00358 // Recomputes the bounding boxes of the outlines. 00359 void TBLOB::ComputeBoundingBoxes() { 00360 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00361 outline->ComputeBoundingBox(); 00362 } 00363 } 00364 00365 // Returns the number of outlines. 00366 int TBLOB::NumOutlines() const { 00367 int result = 0; 00368 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) 00369 ++result; 00370 return result; 00371 } 00372 00373 /********************************************************************** 00374 * TBLOB::bounding_box() 00375 * 00376 * Compute the bounding_box of a compound blob, defined to be the 00377 * bounding box of the union of all top-level outlines in the blob. 00378 **********************************************************************/ 00379 TBOX TBLOB::bounding_box() const { 00380 if (outlines == NULL) 00381 return TBOX(0, 0, 0, 0); 00382 TESSLINE *outline = outlines; 00383 TBOX box = outline->bounding_box(); 00384 for (outline = outline->next; outline != NULL; outline = outline->next) { 00385 box += outline->bounding_box(); 00386 } 00387 return box; 00388 } 00389 00390 void TBLOB::plot(ScrollView* window, ScrollView::Color color, 00391 ScrollView::Color child_color) { 00392 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) 00393 outline->plot(window, color, child_color); 00394 } 00395 00396 // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal 00397 // approximation along the way. 00398 TWERD* TWERD::PolygonalCopy(WERD* src) { 00399 TWERD* tessword = new TWERD; 00400 tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN); 00401 C_BLOB_IT b_it(src->cblob_list()); 00402 TBLOB *tail = NULL; 00403 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00404 C_BLOB* blob = b_it.data(); 00405 TBLOB* tblob = TBLOB::PolygonalCopy(blob); 00406 if (tail == NULL) { 00407 tessword->blobs = tblob; 00408 } else { 00409 tail->next = tblob; 00410 } 00411 tail = tblob; 00412 } 00413 return tessword; 00414 } 00415 00416 // Normalize in-place and record the normalization in the DENORM. 00417 void TWERD::SetupBLNormalize(const BLOCK* block, const ROW* row, 00418 float x_height, bool numeric_mode, 00419 DENORM* denorm) const { 00420 int num_segments = 0; 00421 DENORM_SEG* segs = NULL; 00422 if (numeric_mode) { 00423 segs = new DENORM_SEG[NumBlobs()]; 00424 for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { 00425 TBOX blob_box = blob->bounding_box(); 00426 float factor = kBlnXHeight / x_height; 00427 factor = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), 00428 factor, factor * 1.5f); 00429 segs[num_segments].xstart = blob_box.left(); 00430 segs[num_segments].ycoord = blob_box.bottom(); 00431 segs[num_segments++].scale_factor = factor; 00432 } 00433 } 00434 denorm->SetupBLNormalize(block, row, x_height, bounding_box(), 00435 num_segments, segs); 00436 delete [] segs; 00437 } 00438 00439 // Normalize in-place using the DENORM. 00440 void TWERD::Normalize(const DENORM& denorm) { 00441 for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { 00442 blob->Normalize(denorm); 00443 } 00444 } 00445 00446 // Copies the data and the blobs, but leaves next untouched. 00447 void TWERD::CopyFrom(const TWERD& src) { 00448 Clear(); 00449 latin_script = src.latin_script; 00450 TBLOB* prev_blob = NULL; 00451 for (TBLOB* srcblob = src.blobs; srcblob != NULL; srcblob = srcblob->next) { 00452 TBLOB* new_blob = new TBLOB(*srcblob); 00453 if (blobs == NULL) 00454 blobs = new_blob; 00455 else 00456 prev_blob->next = new_blob; 00457 prev_blob = new_blob; 00458 } 00459 } 00460 00461 // Deletes owned data. 00462 void TWERD::Clear() { 00463 for (TBLOB* next_blob = NULL; blobs != NULL; blobs = next_blob) { 00464 next_blob = blobs->next; 00465 delete blobs; 00466 } 00467 } 00468 00469 // Recomputes the bounding boxes of the blobs. 00470 void TWERD::ComputeBoundingBoxes() { 00471 for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { 00472 blob->ComputeBoundingBoxes(); 00473 } 00474 } 00475 00476 TBOX TWERD::bounding_box() const { 00477 TBOX result; 00478 for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { 00479 TBOX box = blob->bounding_box(); 00480 result += box; 00481 } 00482 return result; 00483 } 00484 00485 // Merges the blobs from start to end, not including end, and deletes 00486 // the blobs between start and end. 00487 void TWERD::MergeBlobs(int start, int end) { 00488 TBLOB* blob = blobs; 00489 for (int i = 0; i < start && blob != NULL; ++i) 00490 blob = blob->next; 00491 if (blob == NULL || blob->next == NULL) 00492 return; 00493 TBLOB* next_blob = blob->next; 00494 TESSLINE* outline = blob->outlines; 00495 for (int i = start + 1; i < end && next_blob != NULL; ++i) { 00496 // Take the outlines from the next blob. 00497 if (outline == NULL) { 00498 blob->outlines = next_blob->outlines; 00499 outline = blob->outlines; 00500 } else { 00501 while (outline->next != NULL) 00502 outline = outline->next; 00503 outline->next = next_blob->outlines; 00504 next_blob->outlines = NULL; 00505 } 00506 // Delete the next blob and move on. 00507 TBLOB* dead_blob = next_blob; 00508 next_blob = next_blob->next; 00509 blob->next = next_blob; 00510 delete dead_blob; 00511 } 00512 } 00513 00514 void TWERD::plot(ScrollView* window) { 00515 ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); 00516 for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { 00517 blob->plot(window, color, ScrollView::BROWN); 00518 color = WERD::NextColor(color); 00519 } 00520 } 00521 00522 /********************************************************************** 00523 * blob_origin 00524 * 00525 * Compute the origin of a compound blob, define to be the centre 00526 * of the bounding box. 00527 **********************************************************************/ 00528 void blob_origin(TBLOB *blob, /*blob to compute on */ 00529 TPOINT *origin) { /*return value */ 00530 TBOX bbox = blob->bounding_box(); 00531 *origin = (bbox.topleft() + bbox.botright()) / 2; 00532 } 00533 00534 /********************************************************************** 00535 * blobs_widths 00536 * 00537 * Compute the widths of a list of blobs. Return an array of the widths 00538 * and gaps. 00539 **********************************************************************/ 00540 WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */ 00541 WIDTH_RECORD *width_record; 00542 TPOINT topleft; /*bounding box */ 00543 TPOINT botright; 00544 int i = 0; 00545 int blob_end; 00546 int num_blobs = count_blobs (blobs); 00547 00548 /* Get memory */ 00549 width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2); 00550 width_record->num_chars = num_blobs; 00551 00552 TBOX bbox = blobs->bounding_box(); 00553 width_record->widths[i++] = bbox.width(); 00554 /* First width */ 00555 blob_end = bbox.right(); 00556 00557 for (TBLOB* blob = blobs->next; blob != NULL; blob = blob->next) { 00558 TBOX curbox = blob->bounding_box(); 00559 width_record->widths[i++] = curbox.left() - blob_end; 00560 width_record->widths[i++] = curbox.width(); 00561 blob_end = curbox.right(); 00562 } 00563 return width_record; 00564 } 00565 00566 00567 /********************************************************************** 00568 * count_blobs 00569 * 00570 * Return a count of the number of blobs attached to this one. 00571 **********************************************************************/ 00572 int count_blobs(TBLOB *blobs) { 00573 int x = 0; 00574 00575 for (TBLOB* b = blobs; b != NULL; b = b->next) 00576 x++; 00577 return x; 00578 } 00579 00580 /********************************************************************** 00581 * divisible_blob 00582 * 00583 * Returns true if the blob contains multiple outlines than can be 00584 * separated using divide_blobs. Sets the location to be used in the 00585 * call to divide_blobs. 00586 **********************************************************************/ 00587 bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) { 00588 if (blob->outlines == NULL || blob->outlines->next == NULL) 00589 return false; // Need at least 2 outlines for it to be possible. 00590 int max_gap = 0; 00591 TPOINT vertical = italic_blob ? kDivisibleVerticalItalic 00592 : kDivisibleVerticalUpright; 00593 for (TESSLINE* outline1 = blob->outlines; outline1 != NULL; 00594 outline1 = outline1->next) { 00595 if (outline1->is_hole) 00596 continue; // Holes do not count as separable. 00597 TPOINT mid_pt1( 00598 static_cast<inT16>((outline1->topleft.x + outline1->botright.x) / 2), 00599 static_cast<inT16>((outline1->topleft.y + outline1->botright.y) / 2)); 00600 int mid_prod1 = CROSS(mid_pt1, vertical); 00601 int min_prod1, max_prod1; 00602 outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1); 00603 for (TESSLINE* outline2 = outline1->next; outline2 != NULL; 00604 outline2 = outline2->next) { 00605 if (outline2->is_hole) 00606 continue; // Holes do not count as separable. 00607 TPOINT mid_pt2( 00608 static_cast<inT16>((outline2->topleft.x + outline2->botright.x) / 2), 00609 static_cast<inT16>((outline2->topleft.y + outline2->botright.y) / 2)); 00610 int mid_prod2 = CROSS(mid_pt2, vertical); 00611 int min_prod2, max_prod2; 00612 outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2); 00613 int mid_gap = abs(mid_prod2 - mid_prod1); 00614 int overlap = MIN(max_prod1, max_prod2) - MAX(min_prod1, min_prod2); 00615 if (mid_gap - overlap / 4 > max_gap) { 00616 max_gap = mid_gap - overlap / 4; 00617 *location = mid_pt1; 00618 *location += mid_pt2; 00619 *location /= 2; 00620 } 00621 } 00622 } 00623 // Use the y component of the vertical vector as an approximation to its 00624 // length. 00625 return max_gap > vertical.y; 00626 } 00627 00628 /********************************************************************** 00629 * divide_blobs 00630 * 00631 * Create two blobs by grouping the outlines in the appropriate blob. 00632 * The outlines that are beyond the location point are moved to the 00633 * other blob. The ones whose x location is less than that point are 00634 * retained in the original blob. 00635 **********************************************************************/ 00636 void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, 00637 const TPOINT& location) { 00638 TPOINT vertical = italic_blob ? kDivisibleVerticalItalic 00639 : kDivisibleVerticalUpright; 00640 TESSLINE *outline1 = NULL; 00641 TESSLINE *outline2 = NULL; 00642 00643 TESSLINE *outline = blob->outlines; 00644 blob->outlines = NULL; 00645 int location_prod = CROSS(location, vertical); 00646 00647 while (outline != NULL) { 00648 TPOINT mid_pt( 00649 static_cast<inT16>((outline->topleft.x + outline->botright.x) / 2), 00650 static_cast<inT16>((outline->topleft.y + outline->botright.y) / 2)); 00651 int mid_prod = CROSS(mid_pt, vertical); 00652 if (mid_prod < location_prod) { 00653 // Outline is in left blob. 00654 if (outline1) 00655 outline1->next = outline; 00656 else 00657 blob->outlines = outline; 00658 outline1 = outline; 00659 } else { 00660 // Outline is in right blob. 00661 if (outline2) 00662 outline2->next = outline; 00663 else 00664 other_blob->outlines = outline; 00665 outline2 = outline; 00666 } 00667 outline = outline->next; 00668 } 00669 00670 if (outline1) 00671 outline1->next = NULL; 00672 if (outline2) 00673 outline2->next = NULL; 00674 }