Tesseract
3.02
|
00001 00002 // File: linefind.cpp 00003 // Description: Class to find vertical lines in an image and create 00004 // a corresponding list of empty blobs. 00005 // Author: Ray Smith 00006 // Created: Thu Mar 20 09:49:01 PDT 2008 00007 // 00008 // (C) Copyright 2008, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifdef _MSC_VER 00022 #pragma warning(disable:4244) // Conversion warnings 00023 #endif 00024 00025 #include "linefind.h" 00026 #include "alignedblob.h" 00027 #include "tabvector.h" 00028 #include "blobbox.h" 00029 #include "edgblob.h" 00030 // This entire file is dependent upon leptonica. If you don't have it, 00031 // then the code doesn't do anything useful. 00032 #ifdef HAVE_CONFIG_H 00033 #include "config_auto.h" 00034 #endif 00035 #include "allheaders.h" 00036 00037 namespace tesseract { 00038 00040 const int kThinLineFraction = 20; 00042 const int kMinLineLengthFraction = 4; 00044 const int kCrackSpacing = 100; 00046 const int kLineFindGridSize = 50; 00047 // Min width of a line in pixels to be considered thick. 00048 const int kMinThickLineWidth = 12; 00049 // Max size of line residue. (The pixels that fail the long thin opening, and 00050 // therefore don't make it to the candidate line mask, but are nevertheless 00051 // part of the line.) 00052 const int kMaxLineResidue = 6; 00053 // Min length in inches of a line segment that exceeds kMinThickLineWidth in 00054 // thickness. (Such lines shouldn't break by simple image degradation.) 00055 const double kThickLengthMultiple = 0.75; 00056 // Max fraction of line box area that can be occupied by non-line pixels. 00057 const double kMaxNonLineDensity = 0.25; 00058 // Max height of a music stave in inches. 00059 const double kMaxStaveHeight = 1.0; 00060 // Minimum fraction of pixels in a music rectangle connected to the staves. 00061 const double kMinMusicPixelFraction = 0.75; 00062 00063 // Erases the unused blobs from the line_pix image, taking into account 00064 // whether this was a horizontal or vertical line set. 00065 static void RemoveUnusedLineSegments(bool horizontal_lines, 00066 BLOBNBOX_LIST* line_bblobs, 00067 Pix* line_pix) { 00068 int height = pixGetHeight(line_pix); 00069 BLOBNBOX_IT bbox_it(line_bblobs); 00070 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { 00071 BLOBNBOX* blob = bbox_it.data(); 00072 if (blob->left_tab_type() == TT_MAYBE_ALIGNED) { 00073 const TBOX& box = blob->bounding_box(); 00074 Box* pixbox = NULL; 00075 if (horizontal_lines) { 00076 // Horizontal lines are in tess format and also have x and y flipped 00077 // (to use FindVerticalAlignment) so we have to flip x and y and then 00078 // convert to Leptonica by height - flipped x (ie the right edge). 00079 // See GetLineBoxes for more explanation. 00080 pixbox = boxCreate(box.bottom(), height - box.right(), 00081 box.height(), box.width()); 00082 00083 } else { 00084 // For vertical lines, just flip upside-down to convert to Leptonica. 00085 // The y position of the box in Leptonica terms is the distance from 00086 // the top of the image to the top of the box. 00087 pixbox = boxCreate(box.left(), height - box.top(), 00088 box.width(), box.height()); 00089 } 00090 pixClearInRect(line_pix, pixbox); 00091 boxDestroy(&pixbox); 00092 } 00093 } 00094 } 00095 00096 // Helper subtracts the line_pix image from the src_pix, and removes residue 00097 // as well by removing components that touch the line, but are not in the 00098 // non_line_pix mask. It is assumed that the non_line_pix mask has already 00099 // been prepared to required accuracy. 00100 static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix, 00101 int resolution, Pix* src_pix) { 00102 // First remove the lines themselves. 00103 pixSubtract(src_pix, src_pix, line_pix); 00104 // Subtract the non-lines from the image to get the residue. 00105 Pix* residue_pix = pixSubtract(NULL, src_pix, non_line_pix); 00106 // Dilate the lines so they touch the residue. 00107 Pix* fat_line_pix = pixDilateBrick(NULL, line_pix, 3, 3); 00108 // Seed fill the fat lines to get all the residue. 00109 pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8); 00110 // Subtract the residue from the original image. 00111 pixSubtract(src_pix, src_pix, fat_line_pix); 00112 pixDestroy(&fat_line_pix); 00113 pixDestroy(&residue_pix); 00114 } 00115 00116 // Returns the maximum strokewidth in the given binary image by doubling 00117 // the maximum of the distance function. 00118 static int MaxStrokeWidth(Pix* pix) { 00119 Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); 00120 int width = pixGetWidth(dist_pix); 00121 int height = pixGetHeight(dist_pix); 00122 int wpl = pixGetWpl(dist_pix); 00123 l_uint32* data = pixGetData(dist_pix); 00124 // Find the maximum value in the distance image. 00125 int max_dist = 0; 00126 for (int y = 0; y < height; ++y) { 00127 for (int x = 0; x < width; ++x) { 00128 int pixel = GET_DATA_BYTE(data, x); 00129 if (pixel > max_dist) 00130 max_dist = pixel; 00131 } 00132 data += wpl; 00133 } 00134 pixDestroy(&dist_pix); 00135 return max_dist * 2; 00136 } 00137 00138 // Returns the number of components in the intersection_pix touched by line_box. 00139 static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) { 00140 if (intersection_pix == NULL) return 0; 00141 Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, NULL); 00142 Boxa* boxa = pixConnComp(rect_pix, NULL, 8); 00143 pixDestroy(&rect_pix); 00144 if (boxa == NULL) return false; 00145 int result = boxaGetCount(boxa); 00146 boxaDestroy(&boxa); 00147 return result; 00148 } 00149 00150 // Returns the number of black pixels found in the box made by adding the line 00151 // width to both sides of the line bounding box. (Increasing the smallest 00152 // dimension of the bounding box.) 00153 static int CountPixelsAdjacentToLine(int line_width, Box* line_box, 00154 Pix* nonline_pix) { 00155 l_int32 x, y, box_width, box_height; 00156 boxGetGeometry(line_box, &x, &y, &box_width, &box_height); 00157 if (box_width > box_height) { 00158 // horizontal line. 00159 int bottom = MIN(pixGetHeight(nonline_pix), y + box_height + line_width); 00160 y = MAX(0, y - line_width); 00161 box_height = bottom - y; 00162 } else { 00163 // Vertical line. 00164 int right = MIN(pixGetWidth(nonline_pix), x + box_width + line_width); 00165 x = MAX(0, x - line_width); 00166 box_width = right - x; 00167 } 00168 Box* box = boxCreate(x, y, box_width, box_height); 00169 Pix* rect_pix = pixClipRectangle(nonline_pix, box, NULL); 00170 boxDestroy(&box); 00171 l_int32 result; 00172 pixCountPixels(rect_pix, &result, NULL); 00173 pixDestroy(&rect_pix); 00174 return result; 00175 } 00176 00177 // Helper erases false-positive line segments from the input/output line_pix. 00178 // 1. Since thick lines shouldn't really break up, we can eliminate some false 00179 // positives by marking segments that are at least kMinThickLineWidth 00180 // thickness, yet have a length less than min_thick_length. 00181 // 2. Lines that don't have at least 2 intersections with other lines and have 00182 // a lot of neighbouring non-lines are probably not lines (perhaps arabic 00183 // or Hindi words, or underlines.) 00184 // Bad line components are erased from line_pix. 00185 // Returns the number of remaining connected components. 00186 static int FilterFalsePositives(int resolution, Pix* nonline_pix, 00187 Pix* intersection_pix, Pix* line_pix) { 00188 int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple); 00189 Pixa* pixa = NULL; 00190 Boxa* boxa = pixConnComp(line_pix, &pixa, 8); 00191 // Iterate over the boxes to remove false positives. 00192 int nboxes = boxaGetCount(boxa); 00193 int remaining_boxes = nboxes; 00194 for (int i = 0; i < nboxes; ++i) { 00195 Box* box = boxaGetBox(boxa, i, L_CLONE); 00196 l_int32 x, y, box_width, box_height; 00197 boxGetGeometry(box, &x, &y, &box_width, &box_height); 00198 Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE); 00199 int max_width = MaxStrokeWidth(comp_pix); 00200 pixDestroy(&comp_pix); 00201 bool bad_line = false; 00202 // If the length is too short to stand-alone as a line, and the box width 00203 // is thick enough, and the stroke width is thick enough it is bad. 00204 if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth && 00205 box_width < min_thick_length && box_height < min_thick_length && 00206 max_width > kMinThickLineWidth) { 00207 // Too thick for the length. 00208 bad_line = true; 00209 } 00210 if (!bad_line && 00211 (intersection_pix == NULL || 00212 NumTouchingIntersections(box, intersection_pix) < 2)) { 00213 // Test non-line density near the line. 00214 int nonline_count = CountPixelsAdjacentToLine(max_width, box, 00215 nonline_pix); 00216 if (nonline_count > box_height * box_width * kMaxNonLineDensity) 00217 bad_line = true; 00218 } 00219 if (bad_line) { 00220 // Not a good line. 00221 pixClearInRect(line_pix, box); 00222 --remaining_boxes; 00223 } 00224 boxDestroy(&box); 00225 } 00226 pixaDestroy(&pixa); 00227 boxaDestroy(&boxa); 00228 return remaining_boxes; 00229 } 00230 00231 // Finds vertical and horizontal line objects in the given pix. 00232 // Uses the given resolution to determine size thresholds instead of any 00233 // that may be present in the pix. 00234 // The output vertical_x and vertical_y contain a sum of the output vectors, 00235 // thereby giving the mean vertical direction. 00236 // If pix_music_mask != NULL, and music is detected, a mask of the staves 00237 // and anything that is connected (bars, notes etc.) will be returned in 00238 // pix_music_mask, the mask subtracted from pix, and the lines will not 00239 // appear in v_lines or h_lines. 00240 // The output vectors are owned by the list and Frozen (cannot refit) by 00241 // having no boxes, as there is no need to refit or merge separator lines. 00242 // The detected lines are removed from the pix. 00243 void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix, 00244 int* vertical_x, int* vertical_y, 00245 Pix** pix_music_mask, 00246 TabVector_LIST* v_lines, 00247 TabVector_LIST* h_lines) { 00248 if (pix == NULL || vertical_x == NULL || vertical_y == NULL) { 00249 tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n"); 00250 return; 00251 } 00252 Pix* pix_vline = NULL; 00253 Pix* pix_non_vline = NULL; 00254 Pix* pix_hline = NULL; 00255 Pix* pix_non_hline = NULL; 00256 Pix* pix_intersections = NULL; 00257 Pixa* pixa_display = debug ? pixaCreate(0) : NULL; 00258 GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, 00259 &pix_non_hline, &pix_intersections, pix_music_mask, 00260 pixa_display); 00261 // Find lines, convert to TabVector_LIST and remove those that are used. 00262 FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y, 00263 &pix_vline, pix_non_vline, pix, v_lines); 00264 if (pix_hline != NULL) { 00265 // Recompute intersections and re-filter false positive h-lines. 00266 if (pix_vline != NULL) 00267 pixAnd(pix_intersections, pix_vline, pix_hline); 00268 else 00269 pixDestroy(&pix_intersections); 00270 if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, 00271 pix_hline)) { 00272 pixDestroy(&pix_hline); 00273 } 00274 } 00275 FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, 00276 &pix_hline, pix_non_hline, pix, h_lines); 00277 if (pixa_display != NULL && pix_vline != NULL) 00278 pixaAddPix(pixa_display, pix_vline, L_CLONE); 00279 if (pixa_display != NULL && pix_hline != NULL) 00280 pixaAddPix(pixa_display, pix_hline, L_CLONE); 00281 if (pix_vline != NULL && pix_hline != NULL) { 00282 // Remove joins (intersections) where lines cross, and the residue. 00283 // Recalculate the intersections, since some lines have been deleted. 00284 pixAnd(pix_intersections, pix_vline, pix_hline); 00285 // Fatten up the intersections and seed-fill to get the intersection 00286 // residue. 00287 Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5); 00288 pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8); 00289 // Now remove the intersection residue. 00290 pixSubtract(pix, pix, pix_join_residue); 00291 pixDestroy(&pix_join_residue); 00292 } 00293 // Remove any detected music. 00294 if (pix_music_mask != NULL && *pix_music_mask != NULL) { 00295 if (pixa_display != NULL) 00296 pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); 00297 pixSubtract(pix, pix, *pix_music_mask); 00298 } 00299 if (pixa_display != NULL) 00300 pixaAddPix(pixa_display, pix, L_CLONE); 00301 00302 pixDestroy(&pix_vline); 00303 pixDestroy(&pix_non_vline); 00304 pixDestroy(&pix_hline); 00305 pixDestroy(&pix_non_hline); 00306 pixDestroy(&pix_intersections); 00307 if (pixa_display != NULL) { 00308 #if LIBLEPT_MINOR_VERSION >= 69 || LIBLEPT_MAJOR_VERSION > 1 00309 pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", 00310 "vhlinefinding.pdf"); 00311 #endif 00312 pixaDestroy(&pixa_display); 00313 } 00314 } 00315 00316 // Converts the Boxa array to a list of C_BLOB, getting rid of severely 00317 // overlapping outlines and those that are children of a bigger one. 00318 // The output is a list of C_BLOBs that are owned by the list. 00319 // The C_OUTLINEs in the C_BLOBs contain no outline data - just empty 00320 // bounding boxes. The Boxa is consumed and destroyed. 00321 void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, 00322 Boxa** boxes, C_BLOB_LIST* blobs) { 00323 C_OUTLINE_LIST outlines; 00324 C_OUTLINE_IT ol_it = &outlines; 00325 // Iterate the boxes to convert to outlines. 00326 int nboxes = boxaGetCount(*boxes); 00327 for (int i = 0; i < nboxes; ++i) { 00328 l_int32 x, y, width, height; 00329 boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height); 00330 // Make a C_OUTLINE from the leptonica box. This is a bit of a hack, 00331 // as there is no outline, just a bounding box, but with some very 00332 // small changes to coutln.cpp, it works nicely. 00333 ICOORD top_left(x, y); 00334 ICOORD bot_right(x + width, y + height); 00335 CRACKEDGE startpt; 00336 startpt.pos = top_left; 00337 C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0); 00338 ol_it.add_after_then_move(outline); 00339 } 00340 // Use outlines_to_blobs to convert the outlines to blobs and find 00341 // overlapping and contained objects. The output list of blobs in the block 00342 // has all the bad ones filtered out and deleted. 00343 BLOCK block; 00344 ICOORD page_tl(0, 0); 00345 ICOORD page_br(image_width, image_height); 00346 outlines_to_blobs(&block, page_tl, page_br, &outlines); 00347 // Transfer the created blobs to the output list. 00348 C_BLOB_IT blob_it(blobs); 00349 blob_it.add_list_after(block.blob_list()); 00350 // The boxes aren't needed any more. 00351 boxaDestroy(boxes); 00352 } 00353 00354 // Finds vertical line objects in pix_vline and removes the from src_pix. 00355 // Uses the given resolution to determine size thresholds instead of any 00356 // that may be present in the pix. 00357 // The output vertical_x and vertical_y contain a sum of the output vectors, 00358 // thereby giving the mean vertical direction. 00359 // The output vectors are owned by the list and Frozen (cannot refit) by 00360 // having no boxes, as there is no need to refit or merge separator lines. 00361 // If no good lines are found, pix_vline is destroyed. 00362 // None of the input pointers may be NULL, and if *pix_vline is NULL then 00363 // the function does nothing. 00364 void LineFinder::FindAndRemoveVLines(int resolution, 00365 Pix* pix_intersections, 00366 int* vertical_x, int* vertical_y, 00367 Pix** pix_vline, Pix* pix_non_vline, 00368 Pix* src_pix, TabVector_LIST* vectors) { 00369 if (pix_vline == NULL || *pix_vline == NULL) return; 00370 C_BLOB_LIST line_cblobs; 00371 BLOBNBOX_LIST line_bblobs; 00372 GetLineBoxes(false, *pix_vline, pix_intersections, 00373 &line_cblobs, &line_bblobs); 00374 int width = pixGetWidth(src_pix); 00375 int height = pixGetHeight(src_pix); 00376 ICOORD bleft(0, 0); 00377 ICOORD tright(width, height); 00378 FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); 00379 if (!vectors->empty()) { 00380 RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline); 00381 SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix); 00382 ICOORD vertical; 00383 vertical.set_with_shrink(*vertical_x, *vertical_y); 00384 TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); 00385 } else { 00386 pixDestroy(pix_vline); 00387 } 00388 } 00389 00390 // Finds horizontal line objects in pix_hline and removes them from src_pix. 00391 // Uses the given resolution to determine size thresholds instead of any 00392 // that may be present in the pix. 00393 // The output vertical_x and vertical_y contain a sum of the output vectors, 00394 // thereby giving the mean vertical direction. 00395 // The output vectors are owned by the list and Frozen (cannot refit) by 00396 // having no boxes, as there is no need to refit or merge separator lines. 00397 // If no good lines are found, pix_hline is destroyed. 00398 // None of the input pointers may be NULL, and if *pix_hline is NULL then 00399 // the function does nothing. 00400 void LineFinder::FindAndRemoveHLines(int resolution, 00401 Pix* pix_intersections, 00402 int vertical_x, int vertical_y, 00403 Pix** pix_hline, Pix* pix_non_hline, 00404 Pix* src_pix, TabVector_LIST* vectors) { 00405 if (pix_hline == NULL || *pix_hline == NULL) return; 00406 C_BLOB_LIST line_cblobs; 00407 BLOBNBOX_LIST line_bblobs; 00408 GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs); 00409 int width = pixGetWidth(src_pix); 00410 int height = pixGetHeight(src_pix); 00411 ICOORD bleft(0, 0); 00412 ICOORD tright(height, width); 00413 FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, 00414 vectors); 00415 if (!vectors->empty()) { 00416 RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline); 00417 SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix); 00418 ICOORD vertical; 00419 vertical.set_with_shrink(vertical_x, vertical_y); 00420 TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); 00421 // Iterate the vectors to flip them. x and y were flipped for horizontal 00422 // lines, so FindLineVectors can work just with the vertical case. 00423 // See GetLineBoxes for more on the flip. 00424 TabVector_IT h_it(vectors); 00425 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { 00426 h_it.data()->XYFlip(); 00427 } 00428 } else { 00429 pixDestroy(pix_hline); 00430 } 00431 } 00432 00433 // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright 00434 // are the bounds of the image on which the input line_bblobs were found. 00435 // The input line_bblobs list is const really. 00436 // The output vertical_x and vertical_y are the total of all the vectors. 00437 // The output list of TabVector makes no reference to the input BLOBNBOXes. 00438 void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, 00439 BLOBNBOX_LIST* line_bblobs, 00440 int* vertical_x, int* vertical_y, 00441 TabVector_LIST* vectors) { 00442 BLOBNBOX_IT bbox_it(line_bblobs); 00443 int b_count = 0; 00444 // Put all the blobs into the grid to find the lines, and move the blobs 00445 // to the output lists. 00446 AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); 00447 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { 00448 BLOBNBOX* bblob = bbox_it.data(); 00449 bblob->set_left_tab_type(TT_MAYBE_ALIGNED); 00450 bblob->set_left_rule(bleft.x()); 00451 bblob->set_right_rule(tright.x()); 00452 bblob->set_left_crossing_rule(bleft.x()); 00453 bblob->set_right_crossing_rule(tright.x()); 00454 blob_grid.InsertBBox(false, true, bblob); 00455 ++b_count; 00456 } 00457 if (b_count == 0) 00458 return; 00459 00460 // Search the entire grid, looking for vertical line vectors. 00461 BlobGridSearch lsearch(&blob_grid); 00462 BLOBNBOX* bbox; 00463 TabVector_IT vector_it(vectors); 00464 *vertical_x = 0; 00465 *vertical_y = 1; 00466 lsearch.StartFullSearch(); 00467 while ((bbox = lsearch.NextFullSearch()) != NULL) { 00468 if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) { 00469 const TBOX& box = bbox->bounding_box(); 00470 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) 00471 tprintf("Finding line vector starting at bbox (%d,%d)\n", 00472 box.left(), box.bottom()); 00473 AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); 00474 TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, 00475 vertical_x, 00476 vertical_y); 00477 if (vector != NULL) { 00478 vector->Freeze(); 00479 vector_it.add_to_end(vector); 00480 } 00481 } 00482 } 00483 } 00484 00485 // Returns a Pix music mask if music is detected. 00486 // Any vertical line that has at least 5 intersections in sufficient density 00487 // is taken to be a bar. Bars are used as a seed and the entire touching 00488 // component is added to the output music mask and subtracted from the lines. 00489 // Returns NULL and does minimal work if no music is found. 00490 static Pix* FilterMusic(int resolution, Pix* pix_closed, 00491 Pix* pix_vline, Pix* pix_hline, 00492 l_int32* v_empty, l_int32* h_empty) { 00493 int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight); 00494 Pix* intersection_pix = pixAnd(NULL, pix_vline, pix_hline); 00495 Boxa* boxa = pixConnComp(pix_vline, NULL, 8); 00496 // Iterate over the boxes to find music bars. 00497 int nboxes = boxaGetCount(boxa); 00498 Pix* music_mask = NULL; 00499 for (int i = 0; i < nboxes; ++i) { 00500 Box* box = boxaGetBox(boxa, i, L_CLONE); 00501 l_int32 x, y, box_width, box_height; 00502 boxGetGeometry(box, &x, &y, &box_width, &box_height); 00503 int joins = NumTouchingIntersections(box, intersection_pix); 00504 // Test for the join density being at least 5 per max_stave_height, 00505 // ie (joins-1)/box_height >= (5-1)/max_stave_height. 00506 if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) { 00507 // This is a music bar. Add to the mask. 00508 if (music_mask == NULL) 00509 music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), 00510 1); 00511 pixSetInRect(music_mask, box); 00512 } 00513 boxDestroy(&box); 00514 } 00515 boxaDestroy(&boxa); 00516 pixDestroy(&intersection_pix); 00517 if (music_mask != NULL) { 00518 // The mask currently contains just the bars. Use the mask as a seed 00519 // and the pix_closed as the mask for a seedfill to get all the 00520 // intersecting staves. 00521 pixSeedfillBinary(music_mask, music_mask, pix_closed, 8); 00522 // Filter out false positives. CCs in the music_mask should be the vast 00523 // majority of the pixels in their bounding boxes, as we expect just a 00524 // tiny amount of text, a few phrase marks, and crescendo etc left. 00525 Boxa* boxa = pixConnComp(music_mask, NULL, 8); 00526 // Iterate over the boxes to find music components. 00527 int nboxes = boxaGetCount(boxa); 00528 for (int i = 0; i < nboxes; ++i) { 00529 Box* box = boxaGetBox(boxa, i, L_CLONE); 00530 Pix* rect_pix = pixClipRectangle(music_mask, box, NULL); 00531 l_int32 music_pixels; 00532 pixCountPixels(rect_pix, &music_pixels, NULL); 00533 pixDestroy(&rect_pix); 00534 rect_pix = pixClipRectangle(pix_closed, box, NULL); 00535 l_int32 all_pixels; 00536 pixCountPixels(rect_pix, &all_pixels, NULL); 00537 pixDestroy(&rect_pix); 00538 if (music_pixels < kMinMusicPixelFraction * all_pixels) { 00539 // False positive. Delete from the music mask. 00540 pixClearInRect(music_mask, box); 00541 } 00542 boxDestroy(&box); 00543 } 00544 l_int32 no_remaining_music; 00545 boxaDestroy(&boxa); 00546 pixZero(music_mask, &no_remaining_music); 00547 if (no_remaining_music) { 00548 pixDestroy(&music_mask); 00549 } else { 00550 pixSubtract(pix_vline, pix_vline, music_mask); 00551 pixSubtract(pix_hline, pix_hline, music_mask); 00552 // We may have deleted all the lines 00553 pixZero(pix_vline, v_empty); 00554 pixZero(pix_hline, h_empty); 00555 } 00556 } 00557 return music_mask; 00558 } 00559 00560 // Most of the heavy lifting of line finding. Given src_pix and its separate 00561 // resolution, returns image masks: 00562 // pix_vline candidate vertical lines. 00563 // pix_non_vline pixels that didn't look like vertical lines. 00564 // pix_hline candidate horizontal lines. 00565 // pix_non_hline pixels that didn't look like horizontal lines. 00566 // pix_intersections pixels where vertical and horizontal lines meet. 00567 // pix_music_mask candidate music staves. 00568 // This function promises to initialize all the output (2nd level) pointers, 00569 // but any of the returns that are empty will be NULL on output. 00570 // None of the input (1st level) pointers may be NULL except pix_music_mask, 00571 // which will disable music detection, and pixa_display. 00572 void LineFinder::GetLineMasks(int resolution, Pix* src_pix, 00573 Pix** pix_vline, Pix** pix_non_vline, 00574 Pix** pix_hline, Pix** pix_non_hline, 00575 Pix** pix_intersections, Pix** pix_music_mask, 00576 Pixa* pixa_display) { 00577 int max_line_width = resolution / kThinLineFraction; 00578 int min_line_length = resolution / kMinLineLengthFraction; 00579 if (pixa_display != NULL) { 00580 tprintf("Image resolution = %d, max line width = %d, min length=%d\n", 00581 resolution, max_line_width, min_line_length); 00582 } 00583 int closing_brick = max_line_width / 3; 00584 00585 // Close up small holes, making it less likely that false alarms are found 00586 // in thickened text (as it will become more solid) and also smoothing over 00587 // some line breaks and nicks in the edges of the lines. 00588 Pix* pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick); 00589 if (pixa_display != NULL) 00590 pixaAddPix(pixa_display, pix_closed, L_CLONE); 00591 // Open up with a big box to detect solid areas, which can then be subtracted. 00592 // This is very generous and will leave in even quite wide lines. 00593 Pix* pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width, 00594 max_line_width); 00595 if (pixa_display != NULL) 00596 pixaAddPix(pixa_display, pix_solid, L_CLONE); 00597 Pix* pix_hollow = pixSubtract(NULL, pix_closed, pix_solid); 00598 pixDestroy(&pix_solid); 00599 // Now open up in both directions independently to find lines of at least 00600 // 1 inch/kMinLineLengthFraction in length. 00601 if (pixa_display != NULL) 00602 pixaAddPix(pixa_display, pix_hollow, L_CLONE); 00603 *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length); 00604 *pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1); 00605 pixDestroy(&pix_hollow); 00606 // Lines are sufficiently rare, that it is worth checking for a zero image. 00607 l_int32 v_empty = 0; 00608 l_int32 h_empty = 0; 00609 pixZero(*pix_vline, &v_empty); 00610 pixZero(*pix_hline, &h_empty); 00611 if (pix_music_mask != NULL) { 00612 if (!v_empty && !h_empty) { 00613 *pix_music_mask = FilterMusic(resolution, pix_closed, 00614 *pix_vline, *pix_hline, 00615 &v_empty, &h_empty); 00616 } else { 00617 *pix_music_mask = NULL; 00618 } 00619 } 00620 pixDestroy(&pix_closed); 00621 Pix* pix_nonlines = NULL; 00622 *pix_intersections = NULL; 00623 Pix* extra_non_hlines = NULL; 00624 if (!v_empty) { 00625 // Subtract both line candidates from the source to get definite non-lines. 00626 pix_nonlines = pixSubtract(NULL, src_pix, *pix_vline); 00627 if (!h_empty) { 00628 pixSubtract(pix_nonlines, pix_nonlines, *pix_hline); 00629 // Intersections are a useful indicator for likelihood of being a line. 00630 *pix_intersections = pixAnd(NULL, *pix_vline, *pix_hline); 00631 // Candidate vlines are not hlines (apart from the intersections) 00632 // and vice versa. 00633 extra_non_hlines = pixSubtract(NULL, *pix_vline, *pix_intersections); 00634 } 00635 *pix_non_vline = pixErodeBrick(NULL, pix_nonlines, kMaxLineResidue, 1); 00636 pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8); 00637 if (!h_empty) { 00638 // Candidate hlines are not vlines. 00639 pixOr(*pix_non_vline, *pix_non_vline, *pix_hline); 00640 pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections); 00641 } 00642 if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, 00643 *pix_vline)) 00644 pixDestroy(pix_vline); // No candidates left. 00645 } else { 00646 // No vertical lines. 00647 pixDestroy(pix_vline); 00648 *pix_non_vline = NULL; 00649 if (!h_empty) { 00650 pix_nonlines = pixSubtract(NULL, src_pix, *pix_hline); 00651 } 00652 } 00653 if (h_empty) { 00654 pixDestroy(pix_hline); 00655 *pix_non_hline = NULL; 00656 if (v_empty) { 00657 return; 00658 } 00659 } else { 00660 *pix_non_hline = pixErodeBrick(NULL, pix_nonlines, 1, kMaxLineResidue); 00661 pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8); 00662 if (extra_non_hlines != NULL) { 00663 pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines); 00664 pixDestroy(&extra_non_hlines); 00665 } 00666 if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, 00667 *pix_hline)) 00668 pixDestroy(pix_hline); // No candidates left. 00669 } 00670 if (pixa_display != NULL) { 00671 if (*pix_vline != NULL) pixaAddPix(pixa_display, *pix_vline, L_CLONE); 00672 if (*pix_hline != NULL) pixaAddPix(pixa_display, *pix_hline, L_CLONE); 00673 if (pix_nonlines != NULL) pixaAddPix(pixa_display, pix_nonlines, L_CLONE); 00674 if (*pix_non_vline != NULL) 00675 pixaAddPix(pixa_display, *pix_non_vline, L_CLONE); 00676 if (*pix_non_hline != NULL) 00677 pixaAddPix(pixa_display, *pix_non_hline, L_CLONE); 00678 if (*pix_intersections != NULL) 00679 pixaAddPix(pixa_display, *pix_intersections, L_CLONE); 00680 if (pix_music_mask != NULL && *pix_music_mask != NULL) 00681 pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); 00682 } 00683 pixDestroy(&pix_nonlines); 00684 } 00685 00686 // Returns a list of boxes corresponding to the candidate line segments. Sets 00687 // the line_crossings member of the boxes so we can later determin the number 00688 // of intersections touched by a full line. 00689 void LineFinder::GetLineBoxes(bool horizontal_lines, 00690 Pix* pix_lines, Pix* pix_intersections, 00691 C_BLOB_LIST* line_cblobs, 00692 BLOBNBOX_LIST* line_bblobs) { 00693 // Put a single pixel crack in every line at an arbitrary spacing, 00694 // so they break up and the bounding boxes can be used to get the 00695 // direction accurately enough without needing outlines. 00696 int wpl = pixGetWpl(pix_lines); 00697 int width = pixGetWidth(pix_lines); 00698 int height = pixGetHeight(pix_lines); 00699 l_uint32* data = pixGetData(pix_lines); 00700 if (horizontal_lines) { 00701 for (int y = 0; y < height; ++y, data += wpl) { 00702 for (int x = kCrackSpacing; x < width; x += kCrackSpacing) { 00703 CLEAR_DATA_BIT(data, x); 00704 } 00705 } 00706 } else { 00707 for (int y = kCrackSpacing; y < height; y += kCrackSpacing) { 00708 memset(data + wpl * y, 0, wpl * sizeof(*data)); 00709 } 00710 } 00711 // Get the individual connected components 00712 Boxa* boxa = pixConnComp(pix_lines, NULL, 8); 00713 ConvertBoxaToBlobs(width, height, &boxa, line_cblobs); 00714 // Make the BLOBNBOXes from the C_BLOBs. 00715 C_BLOB_IT blob_it(line_cblobs); 00716 BLOBNBOX_IT bbox_it(line_bblobs); 00717 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00718 C_BLOB* cblob = blob_it.data(); 00719 BLOBNBOX* bblob = new BLOBNBOX(cblob); 00720 bbox_it.add_to_end(bblob); 00721 // Determine whether the line segment touches two intersections. 00722 const TBOX& bbox = bblob->bounding_box(); 00723 Box* box = boxCreate(bbox.left(), bbox.bottom(), 00724 bbox.width(), bbox.height()); 00725 bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections)); 00726 boxDestroy(&box); 00727 // Transform the bounding box prior to finding lines. To save writing 00728 // two line finders, flip x and y for horizontal lines and re-use the 00729 // tab-stop detection code. For vertical lines we still have to flip the 00730 // y-coordinates to switch from leptonica coords to tesseract coords. 00731 if (horizontal_lines) { 00732 // Note that we have Leptonica coords stored in a Tesseract box, so that 00733 // bbox.bottom(), being the MIN y coord, is actually the top, so to get 00734 // back to Leptonica coords in RemoveUnusedLineSegments, we have to 00735 // use height - box.right() as the top, which looks very odd. 00736 TBOX new_box(height - bbox.top(), bbox.left(), 00737 height - bbox.bottom(), bbox.right()); 00738 bblob->set_bounding_box(new_box); 00739 } else { 00740 TBOX new_box(bbox.left(), height - bbox.top(), 00741 bbox.right(), height - bbox.bottom()); 00742 bblob->set_bounding_box(new_box); 00743 } 00744 } 00745 } 00746 00747 } // namespace tesseract. 00748