Tesseract  3.02
tesseract-ocr/ccstruct/blobs.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        blobs.c  (Formerly blobs.c)
00005  * Description:  Blob definition
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 27 15:39:52 1989
00008  * Modified:     Thu Mar 28 15:33:26 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Experimental (Do Not Distribute)
00012  *
00013  * (c) Copyright 1989, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 
00026 /*----------------------------------------------------------------------
00027               I n c l u d e s
00028 ----------------------------------------------------------------------*/
00029 #include "mfcpch.h"
00030 #include "blobs.h"
00031 #include "ccstruct.h"
00032 #include "clst.h"
00033 #include "cutil.h"
00034 #include "emalloc.h"
00035 #include "helpers.h"
00036 #include "ndminx.h"
00037 #include "normalis.h"
00038 #include "ocrblock.h"
00039 #include "ocrrow.h"
00040 #include "points.h"
00041 #include "polyaprx.h"
00042 #include "structures.h"
00043 #include "werd.h"
00044 
00045 using tesseract::CCStruct;
00046 
00047 // A Vector representing the "vertical" direction when measuring the
00048 // divisiblity of blobs into multiple blobs just by separating outlines.
00049 // See divisible_blob below for the use.
00050 const TPOINT kDivisibleVerticalUpright(0, 1);
00051 // A vector representing the "vertical" direction for italic text for use
00052 // when separating outlines. Using it actually deteriorates final accuracy,
00053 // so it is only used for ApplyBoxes chopping to get a better segmentation.
00054 const TPOINT kDivisibleVerticalItalic(1, 5);
00055 
00056 /*----------------------------------------------------------------------
00057               F u n c t i o n s
00058 ----------------------------------------------------------------------*/
00059 
00060 CLISTIZE(EDGEPT);
00061 
00062 // Consume the circular list of EDGEPTs to make a TESSLINE.
00063 TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) {
00064   TESSLINE* result = new TESSLINE;
00065   result->loop = outline;
00066   result->SetupFromPos();
00067   return result;
00068 }
00069 
00070 // Copies the data and the outline, but leaves next untouched.
00071 void TESSLINE::CopyFrom(const TESSLINE& src) {
00072   Clear();
00073   topleft = src.topleft;
00074   botright = src.botright;
00075   start = src.start;
00076   is_hole = src.is_hole;
00077   if (src.loop != NULL) {
00078     EDGEPT* prevpt = NULL;
00079     EDGEPT* newpt = NULL;
00080     EDGEPT* srcpt = src.loop;
00081     do {
00082       newpt = new EDGEPT(*srcpt);
00083       if (prevpt == NULL) {
00084         loop = newpt;
00085       } else {
00086         newpt->prev = prevpt;
00087         prevpt->next = newpt;
00088       }
00089       prevpt = newpt;
00090       srcpt = srcpt->next;
00091     } while (srcpt != src.loop);
00092     loop->prev = newpt;
00093     newpt->next = loop;
00094   }
00095 }
00096 
00097 // Deletes owned data.
00098 void TESSLINE::Clear() {
00099   if (loop == NULL)
00100     return;
00101 
00102   EDGEPT* this_edge = loop;
00103   do {
00104     EDGEPT* next_edge = this_edge->next;
00105     delete this_edge;
00106     this_edge = next_edge;
00107   } while (this_edge != loop);
00108   loop = NULL;
00109 }
00110 
00111 // Normalize in-place using the DENORM.
00112 void TESSLINE::Normalize(const DENORM& denorm) {
00113   EDGEPT* pt = loop;
00114   do {
00115     denorm.LocalNormTransform(pt->pos, &pt->pos);
00116     pt = pt->next;
00117   } while (pt != loop);
00118   SetupFromPos();
00119 }
00120 
00121 // Rotates by the given rotation in place.
00122 void TESSLINE::Rotate(const FCOORD rot) {
00123   EDGEPT* pt = loop;
00124   do {
00125     int tmp = static_cast<int>(floor(pt->pos.x * rot.x() -
00126                                      pt->pos.y * rot.y() + 0.5));
00127     pt->pos.y = static_cast<int>(floor(pt->pos.y * rot.x() +
00128                                        pt->pos.x * rot.y() + 0.5));
00129     pt->pos.x = tmp;
00130     pt = pt->next;
00131   } while (pt != loop);
00132   SetupFromPos();
00133 }
00134 
00135 // Moves by the given vec in place.
00136 void TESSLINE::Move(const ICOORD vec) {
00137   EDGEPT* pt = loop;
00138   do {
00139     pt->pos.x += vec.x();
00140     pt->pos.y += vec.y();
00141     pt = pt->next;
00142   } while (pt != loop);
00143   SetupFromPos();
00144 }
00145 
00146 // Scales by the given factor in place.
00147 void TESSLINE::Scale(float factor) {
00148   EDGEPT* pt = loop;
00149   do {
00150     pt->pos.x = static_cast<int>(floor(pt->pos.x * factor + 0.5));
00151     pt->pos.y = static_cast<int>(floor(pt->pos.y * factor + 0.5));
00152     pt = pt->next;
00153   } while (pt != loop);
00154   SetupFromPos();
00155 }
00156 
00157 // Sets up the start and vec members of the loop from the pos members.
00158 void TESSLINE::SetupFromPos() {
00159   EDGEPT* pt = loop;
00160   do {
00161     pt->vec.x = pt->next->pos.x - pt->pos.x;
00162     pt->vec.y = pt->next->pos.y - pt->pos.y;
00163     pt = pt->next;
00164   } while (pt != loop);
00165   start = pt->pos;
00166   ComputeBoundingBox();
00167 }
00168 
00169 // Recomputes the bounding box from the points in the loop.
00170 void TESSLINE::ComputeBoundingBox() {
00171   int minx = MAX_INT32;
00172   int miny = MAX_INT32;
00173   int maxx = -MAX_INT32;
00174   int maxy = -MAX_INT32;
00175 
00176   // Find boundaries.
00177   start = loop->pos;
00178   EDGEPT* this_edge = loop;
00179   do {
00180     if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
00181       if (this_edge->pos.x < minx)
00182         minx = this_edge->pos.x;
00183       if (this_edge->pos.y < miny)
00184         miny = this_edge->pos.y;
00185       if (this_edge->pos.x > maxx)
00186         maxx = this_edge->pos.x;
00187       if (this_edge->pos.y > maxy)
00188         maxy = this_edge->pos.y;
00189     }
00190     this_edge = this_edge->next;
00191   } while (this_edge != loop);
00192   // Reset bounds.
00193   topleft.x = minx;
00194   topleft.y = maxy;
00195   botright.x = maxx;
00196   botright.y = miny;
00197 }
00198 
00199 // Computes the min and max cross product of the outline points with the
00200 // given vec and returns the results in min_xp and max_xp. Geometrically
00201 // this is the left and right edge of the outline perpendicular to the
00202 // given direction, but to get the distance units correct, you would
00203 // have to divide by the modulus of vec.
00204 void TESSLINE::MinMaxCrossProduct(const TPOINT vec,
00205                                   int* min_xp, int* max_xp) const {
00206   *min_xp = MAX_INT32;
00207   *max_xp = MIN_INT32;
00208   EDGEPT* this_edge = loop;
00209   do {
00210     if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
00211       int product = CROSS(this_edge->pos, vec);
00212       UpdateRange(product, min_xp, max_xp);
00213     }
00214     this_edge = this_edge->next;
00215   } while (this_edge != loop);
00216 }
00217 
00218 TBOX TESSLINE::bounding_box() const {
00219   return TBOX(topleft.x, botright.y, botright.x, topleft.y);
00220 }
00221 
00222 void TESSLINE::plot(ScrollView* window, ScrollView::Color color,
00223                     ScrollView::Color child_color) {
00224   #ifndef GRAPHICS_DISABLED
00225   if (is_hole)
00226     window->Pen(child_color);
00227   else
00228     window->Pen(color);
00229   window->SetCursor(start.x, start.y);
00230   EDGEPT* pt = loop;
00231   do {
00232     bool prev_hidden = pt->IsHidden();
00233     pt = pt->next;
00234     if (prev_hidden)
00235       window->SetCursor(pt->pos.x, pt->pos.y);
00236     else
00237       window->DrawTo(pt->pos.x, pt->pos.y);
00238   } while (pt != loop);
00239   #endif  // GRAPHICS_DISABLED
00240 }
00241 
00242 // Iterate the given list of outlines, converting to TESSLINE by polygonal
00243 // approximation and recursively any children, returning the current tail
00244 // of the resulting list of TESSLINEs.
00245 static TESSLINE** ApproximateOutlineList(C_OUTLINE_LIST* outlines,
00246                                          bool children,
00247                                          TESSLINE** tail) {
00248   C_OUTLINE_IT ol_it(outlines);
00249   for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
00250     C_OUTLINE* outline = ol_it.data();
00251     TESSLINE* tessline = ApproximateOutline(outline);
00252     tessline->is_hole = children;
00253     *tail = tessline;
00254     tail = &tessline->next;
00255     if (!outline->child()->empty()) {
00256       tail = ApproximateOutlineList(outline->child(), true, tail);
00257     }
00258   }
00259   return tail;
00260 }
00261 
00262 // Factory to build a TBLOB from a C_BLOB with polygonal
00263 // approximation along the way.
00264 TBLOB* TBLOB::PolygonalCopy(C_BLOB* src) {
00265   C_OUTLINE_IT ol_it = src->out_list();
00266   TBLOB* tblob = new TBLOB;
00267   ApproximateOutlineList(src->out_list(), false, &tblob->outlines);
00268   return tblob;
00269 }
00270 
00271 // Normalizes the blob for classification only if needed.
00272 // (Normally this means a non-zero classify rotation.)
00273 // If no Normalization is needed, then NULL is returned, and the denorm is
00274 // unchanged. Otherwise a new TBLOB is returned and the denorm points to
00275 // a new DENORM. In this case, both the TBLOB and DENORM must be deleted.
00276 TBLOB* TBLOB::ClassifyNormalizeIfNeeded(const DENORM** denorm) const {
00277   TBLOB* rotated_blob = NULL;
00278   // If necessary, copy the blob and rotate it. The rotation is always
00279   // +/- 90 degrees, as 180 was already taken care of.
00280   if ((*denorm)->block() != NULL &&
00281       (*denorm)->block()->classify_rotation().y() != 0.0) {
00282     TBOX box = bounding_box();
00283     int x_middle = (box.left() + box.right()) / 2;
00284     int y_middle = (box.top() + box.bottom()) / 2;
00285     rotated_blob = new TBLOB(*this);
00286     const FCOORD& rotation = (*denorm)->block()->classify_rotation();
00287     DENORM* norm = new DENORM;
00288     // Move the rotated blob back to the same y-position so that we
00289     // can still distinguish similar glyphs with differeny y-position.
00290     float target_y = kBlnBaselineOffset +
00291         (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle);
00292     norm->SetupNormalization(NULL, NULL, &rotation, *denorm, NULL, 0,
00293                              x_middle, y_middle, 1.0f, 1.0f, 0.0f, target_y);
00294     //                             x_middle, y_middle, 1.0f, 1.0f, 0.0f, y_middle);
00295     rotated_blob->Normalize(*norm);
00296     *denorm = norm;
00297   }
00298   return rotated_blob;
00299 }
00300 
00301 // Copies the data and the outline, but leaves next untouched.
00302 void TBLOB::CopyFrom(const TBLOB& src) {
00303   Clear();
00304   TESSLINE* prev_outline = NULL;
00305   for (TESSLINE* srcline = src.outlines; srcline != NULL;
00306        srcline = srcline->next) {
00307     TESSLINE* new_outline = new TESSLINE(*srcline);
00308     if (outlines == NULL)
00309       outlines = new_outline;
00310     else
00311       prev_outline->next = new_outline;
00312     prev_outline = new_outline;
00313   }
00314 }
00315 
00316 // Deletes owned data.
00317 void TBLOB::Clear() {
00318   for (TESSLINE* next_outline = NULL; outlines != NULL;
00319        outlines = next_outline) {
00320     next_outline = outlines->next;
00321     delete outlines;
00322   }
00323 }
00324 // Normalize in-place using the DENORM.
00325 void TBLOB::Normalize(const DENORM& denorm) {
00326   // TODO(rays) outline->Normalize is more accurate, but breaks tests due
00327   // the changes it makes. Reinstate this code with a retraining.
00328 #if 1
00329   for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
00330     outline->Normalize(denorm);
00331   }
00332 #else
00333   denorm.LocalNormBlob(this);
00334 #endif
00335 }
00336 
00337 // Rotates by the given rotation in place.
00338 void TBLOB::Rotate(const FCOORD rotation) {
00339   for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
00340     outline->Rotate(rotation);
00341   }
00342 }
00343 
00344 // Moves by the given vec in place.
00345 void TBLOB::Move(const ICOORD vec) {
00346   for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
00347     outline->Move(vec);
00348   }
00349 }
00350 
00351 // Scales by the given factor in place.
00352 void TBLOB::Scale(float factor) {
00353   for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
00354     outline->Scale(factor);
00355   }
00356 }
00357 
00358 // Recomputes the bounding boxes of the outlines.
00359 void TBLOB::ComputeBoundingBoxes() {
00360   for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
00361     outline->ComputeBoundingBox();
00362   }
00363 }
00364 
00365 // Returns the number of outlines.
00366 int TBLOB::NumOutlines() const {
00367   int result = 0;
00368   for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next)
00369     ++result;
00370   return result;
00371 }
00372 
00373 /**********************************************************************
00374  * TBLOB::bounding_box()
00375  *
00376  * Compute the bounding_box of a compound blob, defined to be the
00377  * bounding box of the union of all top-level outlines in the blob.
00378  **********************************************************************/
00379 TBOX TBLOB::bounding_box() const {
00380   if (outlines == NULL)
00381     return TBOX(0, 0, 0, 0);
00382   TESSLINE *outline = outlines;
00383   TBOX box = outline->bounding_box();
00384   for (outline = outline->next; outline != NULL; outline = outline->next) {
00385     box += outline->bounding_box();
00386   }
00387   return box;
00388 }
00389 
00390 void TBLOB::plot(ScrollView* window, ScrollView::Color color,
00391                  ScrollView::Color child_color) {
00392   for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next)
00393     outline->plot(window, color, child_color);
00394 }
00395 
00396 // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
00397 // approximation along the way.
00398 TWERD* TWERD::PolygonalCopy(WERD* src) {
00399   TWERD* tessword = new TWERD;
00400   tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN);
00401   C_BLOB_IT b_it(src->cblob_list());
00402   TBLOB *tail = NULL;
00403   for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00404     C_BLOB* blob = b_it.data();
00405     TBLOB* tblob = TBLOB::PolygonalCopy(blob);
00406     if (tail == NULL) {
00407       tessword->blobs = tblob;
00408     } else {
00409       tail->next = tblob;
00410     }
00411     tail = tblob;
00412   }
00413   return tessword;
00414 }
00415 
00416 // Normalize in-place and record the normalization in the DENORM.
00417 void TWERD::SetupBLNormalize(const BLOCK* block, const ROW* row,
00418                              float x_height, bool numeric_mode,
00419                              DENORM* denorm) const {
00420   int num_segments = 0;
00421   DENORM_SEG* segs = NULL;
00422   if (numeric_mode) {
00423     segs = new DENORM_SEG[NumBlobs()];
00424     for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
00425       TBOX blob_box = blob->bounding_box();
00426       float factor = kBlnXHeight / x_height;
00427       factor = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
00428                            factor, factor * 1.5f);
00429       segs[num_segments].xstart = blob_box.left();
00430       segs[num_segments].ycoord = blob_box.bottom();
00431       segs[num_segments++].scale_factor = factor;
00432     }
00433   }
00434   denorm->SetupBLNormalize(block, row, x_height, bounding_box(),
00435                            num_segments, segs);
00436   delete [] segs;
00437 }
00438 
00439 // Normalize in-place using the DENORM.
00440 void TWERD::Normalize(const DENORM& denorm) {
00441   for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
00442     blob->Normalize(denorm);
00443   }
00444 }
00445 
00446 // Copies the data and the blobs, but leaves next untouched.
00447 void TWERD::CopyFrom(const TWERD& src) {
00448   Clear();
00449   latin_script = src.latin_script;
00450   TBLOB* prev_blob = NULL;
00451   for (TBLOB* srcblob = src.blobs; srcblob != NULL; srcblob = srcblob->next) {
00452     TBLOB* new_blob = new TBLOB(*srcblob);
00453     if (blobs == NULL)
00454       blobs = new_blob;
00455     else
00456       prev_blob->next = new_blob;
00457     prev_blob = new_blob;
00458   }
00459 }
00460 
00461 // Deletes owned data.
00462 void TWERD::Clear() {
00463   for (TBLOB* next_blob = NULL; blobs != NULL; blobs = next_blob) {
00464     next_blob = blobs->next;
00465     delete blobs;
00466   }
00467 }
00468 
00469 // Recomputes the bounding boxes of the blobs.
00470 void TWERD::ComputeBoundingBoxes() {
00471   for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
00472     blob->ComputeBoundingBoxes();
00473   }
00474 }
00475 
00476 TBOX TWERD::bounding_box() const {
00477   TBOX result;
00478   for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
00479     TBOX box = blob->bounding_box();
00480     result += box;
00481   }
00482   return result;
00483 }
00484 
00485 // Merges the blobs from start to end, not including end, and deletes
00486 // the blobs between start and end.
00487 void TWERD::MergeBlobs(int start, int end) {
00488   TBLOB* blob = blobs;
00489   for (int i = 0; i < start && blob != NULL; ++i)
00490     blob = blob->next;
00491   if (blob == NULL || blob->next == NULL)
00492     return;
00493   TBLOB* next_blob = blob->next;
00494   TESSLINE* outline = blob->outlines;
00495   for (int i = start + 1; i < end && next_blob != NULL; ++i) {
00496     // Take the outlines from the next blob.
00497     if (outline == NULL) {
00498       blob->outlines = next_blob->outlines;
00499       outline = blob->outlines;
00500     } else {
00501       while (outline->next != NULL)
00502         outline = outline->next;
00503       outline->next = next_blob->outlines;
00504       next_blob->outlines = NULL;
00505     }
00506     // Delete the next blob and move on.
00507     TBLOB* dead_blob = next_blob;
00508     next_blob = next_blob->next;
00509     blob->next = next_blob;
00510     delete dead_blob;
00511   }
00512 }
00513 
00514 void TWERD::plot(ScrollView* window) {
00515   ScrollView::Color color = WERD::NextColor(ScrollView::BLACK);
00516   for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
00517     blob->plot(window, color, ScrollView::BROWN);
00518     color = WERD::NextColor(color);
00519   }
00520 }
00521 
00522 /**********************************************************************
00523  * blob_origin
00524  *
00525  * Compute the origin of a compound blob, define to be the centre
00526  * of the bounding box.
00527  **********************************************************************/
00528 void blob_origin(TBLOB *blob,       /*blob to compute on */
00529                  TPOINT *origin) {  /*return value */
00530   TBOX bbox = blob->bounding_box();
00531   *origin = (bbox.topleft() + bbox.botright()) / 2;
00532 }
00533 
00534 /**********************************************************************
00535  * blobs_widths
00536  *
00537  * Compute the widths of a list of blobs. Return an array of the widths
00538  * and gaps.
00539  **********************************************************************/
00540 WIDTH_RECORD *blobs_widths(TBLOB *blobs) {  /*blob to compute on */
00541   WIDTH_RECORD *width_record;
00542   TPOINT topleft;                /*bounding box */
00543   TPOINT botright;
00544   int i = 0;
00545   int blob_end;
00546   int num_blobs = count_blobs (blobs);
00547 
00548   /* Get memory */
00549   width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
00550   width_record->num_chars = num_blobs;
00551 
00552   TBOX bbox = blobs->bounding_box();
00553   width_record->widths[i++] = bbox.width();
00554   /* First width */
00555   blob_end = bbox.right();
00556 
00557   for (TBLOB* blob = blobs->next; blob != NULL; blob = blob->next) {
00558     TBOX curbox = blob->bounding_box();
00559     width_record->widths[i++] = curbox.left() - blob_end;
00560     width_record->widths[i++] = curbox.width();
00561     blob_end = curbox.right();
00562   }
00563   return width_record;
00564 }
00565 
00566 
00567 /**********************************************************************
00568  * count_blobs
00569  *
00570  * Return a count of the number of blobs attached to this one.
00571  **********************************************************************/
00572 int count_blobs(TBLOB *blobs) { 
00573   int x = 0;
00574 
00575   for (TBLOB* b = blobs; b != NULL; b = b->next)
00576     x++;
00577   return x;
00578 }
00579 
00580 /**********************************************************************
00581  * divisible_blob
00582  *
00583  * Returns true if the blob contains multiple outlines than can be
00584  * separated using divide_blobs. Sets the location to be used in the
00585  * call to divide_blobs.
00586  **********************************************************************/
00587 bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) {
00588   if (blob->outlines == NULL || blob->outlines->next == NULL)
00589     return false;  // Need at least 2 outlines for it to be possible.
00590   int max_gap = 0;
00591   TPOINT vertical = italic_blob ? kDivisibleVerticalItalic
00592                                 : kDivisibleVerticalUpright;
00593   for (TESSLINE* outline1 = blob->outlines; outline1 != NULL;
00594        outline1 = outline1->next) {
00595     if (outline1->is_hole)
00596       continue;  // Holes do not count as separable.
00597     TPOINT mid_pt1(
00598       static_cast<inT16>((outline1->topleft.x + outline1->botright.x) / 2),
00599       static_cast<inT16>((outline1->topleft.y + outline1->botright.y) / 2));
00600     int mid_prod1 = CROSS(mid_pt1, vertical);
00601     int min_prod1, max_prod1;
00602     outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
00603     for (TESSLINE* outline2 = outline1->next; outline2 != NULL;
00604          outline2 = outline2->next) {
00605       if (outline2->is_hole)
00606         continue;  // Holes do not count as separable.
00607       TPOINT mid_pt2(
00608         static_cast<inT16>((outline2->topleft.x + outline2->botright.x) / 2),
00609         static_cast<inT16>((outline2->topleft.y + outline2->botright.y) / 2));
00610       int mid_prod2 = CROSS(mid_pt2, vertical);
00611       int min_prod2, max_prod2;
00612       outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2);
00613       int mid_gap = abs(mid_prod2 - mid_prod1);
00614       int overlap = MIN(max_prod1, max_prod2) - MAX(min_prod1, min_prod2);
00615       if (mid_gap - overlap / 4 > max_gap) {
00616         max_gap = mid_gap - overlap / 4;
00617         *location = mid_pt1;
00618         *location += mid_pt2;
00619         *location /= 2;
00620       }
00621     }
00622   }
00623   // Use the y component of the vertical vector as an approximation to its
00624   // length.
00625   return max_gap > vertical.y;
00626 }
00627 
00628 /**********************************************************************
00629  * divide_blobs
00630  *
00631  * Create two blobs by grouping the outlines in the appropriate blob.
00632  * The outlines that are beyond the location point are moved to the
00633  * other blob.  The ones whose x location is less than that point are
00634  * retained in the original blob.
00635  **********************************************************************/
00636 void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
00637                   const TPOINT& location) {
00638   TPOINT vertical = italic_blob ? kDivisibleVerticalItalic
00639                                 : kDivisibleVerticalUpright;
00640   TESSLINE *outline1 = NULL;
00641   TESSLINE *outline2 = NULL;
00642 
00643   TESSLINE *outline = blob->outlines;
00644   blob->outlines = NULL;
00645   int location_prod = CROSS(location, vertical);
00646 
00647   while (outline != NULL) {
00648     TPOINT mid_pt(
00649       static_cast<inT16>((outline->topleft.x + outline->botright.x) / 2),
00650       static_cast<inT16>((outline->topleft.y + outline->botright.y) / 2));
00651     int mid_prod = CROSS(mid_pt, vertical);
00652     if (mid_prod < location_prod) {
00653       // Outline is in left blob.
00654       if (outline1)
00655         outline1->next = outline;
00656       else
00657         blob->outlines = outline;
00658       outline1 = outline;
00659     } else {
00660       // Outline is in right blob.
00661       if (outline2)
00662         outline2->next = outline;
00663       else
00664         other_blob->outlines = outline;
00665       outline2 = outline;
00666     }
00667     outline = outline->next;
00668   }
00669 
00670   if (outline1)
00671     outline1->next = NULL;
00672   if (outline2)
00673     outline2->next = NULL;
00674 }