Tesseract
3.02
|
00001 00002 // File: ccnontextdetect.cpp 00003 // Description: Connected-Component-based photo (non-text) detection. 00004 // Copyright 2011 Google Inc. All Rights Reserved. 00005 // Author: rays@google.com (Ray Smith) 00006 // Created: Sat Jun 11 10:12:01 PST 2011 00007 // 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "ccnontextdetect.h" 00021 #include "imagefind.h" 00022 #include "strokewidth.h" 00023 00024 namespace tesseract { 00025 00026 // Max number of neighbour small objects per squared gridsize before a grid 00027 // cell becomes image. 00028 const double kMaxSmallNeighboursPerPix = 1.0 / 32; 00029 // Max number of small blobs a large blob may overlap before it is rejected 00030 // and determined to be image. 00031 const int kMaxLargeOverlapsWithSmall = 3; 00032 // Max number of small blobs a medium blob may overlap before it is rejected 00033 // and determined to be image. Larger than for large blobs as medium blobs 00034 // may be complex Chinese characters. Very large Chinese characters are going 00035 // to overlap more medium blobs than small. 00036 const int kMaxMediumOverlapsWithSmall = 12; 00037 // Max number of normal blobs a large blob may overlap before it is rejected 00038 // and determined to be image. This is set higher to allow for drop caps, which 00039 // may overlap a lot of good text blobs. 00040 const int kMaxLargeOverlapsWithMedium = 12; 00041 // Multiplier of original noise_count used to test for the case of spreading 00042 // noise beyond where it should really be. 00043 const int kOriginalNoiseMultiple = 8; 00044 // Pixel padding for noise blobs when rendering on the image 00045 // mask to encourage them to join together. Make it too big and images 00046 // will fatten out too much and have to be clipped to text. 00047 const int kNoisePadding = 4; 00048 // Fraction of max_noise_count_ to be added to the noise count if there is 00049 // photo mask in the background. 00050 const double kPhotoOffsetFraction = 0.375; 00051 // Min ratio of perimeter^2/16area for a "good" blob in estimating noise 00052 // density. Good blobs are supposed to be highly likely real text. 00053 // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor 00054 // of 16. Digital circles are weird and have a minimum ratio of pi/64, not 00055 // the 1/(4pi) that you would expect. 00056 const double kMinGoodTextPARatio = 1.5; 00057 00058 CCNonTextDetect::CCNonTextDetect(int gridsize, 00059 const ICOORD& bleft, const ICOORD& tright) 00060 : BlobGrid(gridsize, bleft, tright), 00061 max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix * 00062 gridsize * gridsize)), 00063 noise_density_(NULL) { 00064 // TODO(rays) break max_noise_count_ out into an area-proportional 00065 // value, as now plus an additive constant for the number of text blobs 00066 // in the 3x3 neigbourhood - maybe 9. 00067 } 00068 00069 CCNonTextDetect::~CCNonTextDetect() { 00070 delete noise_density_; 00071 } 00072 00073 // Creates and returns a Pix with the same resolution as the original 00074 // in which 1 (black) pixels represent likely non text (photo, line drawing) 00075 // areas of the page, deleting from the blob_block the blobs that were 00076 // determined to be non-text. 00077 // The photo_map is used to bias the decision towards non-text, rather than 00078 // supplying definite decision. 00079 // The blob_block is the usual result of connected component analysis, 00080 // holding the detected blobs. 00081 // The returned Pix should be PixDestroyed after use. 00082 Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map, 00083 TO_BLOCK* blob_block) { 00084 // Insert the smallest blobs into the grid. 00085 InsertBlobList(&blob_block->small_blobs); 00086 InsertBlobList(&blob_block->noise_blobs); 00087 // Add the medium blobs that don't have a good strokewidth neighbour. 00088 // Those that do go into good_grid as an antidote to spreading beyond the 00089 // real reaches of a noise region. 00090 BlobGrid good_grid(gridsize(), bleft(), tright()); 00091 BLOBNBOX_IT blob_it(&blob_block->blobs); 00092 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00093 BLOBNBOX* blob = blob_it.data(); 00094 double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0; 00095 perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area(); 00096 if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) 00097 InsertBBox(true, true, blob); 00098 else 00099 good_grid.InsertBBox(true, true, blob); 00100 } 00101 noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); 00102 good_grid.Clear(); // Not needed any more. 00103 Pix* pix = noise_density_->ThresholdToPix(max_noise_count_); 00104 if (debug) { 00105 pixWrite("junknoisemask.png", pix, IFF_PNG); 00106 } 00107 ScrollView* win = NULL; 00108 if (debug) { 00109 win = MakeWindow(0, 400, "Photo Mask Blobs"); 00110 } 00111 // Large and medium blobs are not text if they overlap with "a lot" of small 00112 // blobs. 00113 MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, 00114 kMaxLargeOverlapsWithSmall, 00115 win, ScrollView::DARK_GREEN, pix); 00116 MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, 00117 win, ScrollView::WHITE, pix); 00118 // Clear the grid of small blobs and insert the medium blobs. 00119 Clear(); 00120 InsertBlobList(&blob_block->blobs); 00121 MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, 00122 kMaxLargeOverlapsWithMedium, 00123 win, ScrollView::DARK_GREEN, pix); 00124 // Clear again before we start deleting the blobs in the grid. 00125 Clear(); 00126 MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, 00127 win, ScrollView::CORAL, pix); 00128 MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, 00129 win, ScrollView::GOLDENROD, pix); 00130 MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, 00131 win, ScrollView::WHITE, pix); 00132 if (debug) { 00133 #ifndef GRAPHICS_DISABLED 00134 win->Update(); 00135 #endif // GRAPHICS_DISABLED 00136 pixWrite("junkccphotomask.png", pix, IFF_PNG); 00137 #ifndef GRAPHICS_DISABLED 00138 delete win->AwaitEvent(SVET_DESTROY); 00139 #endif // GRAPHICS_DISABLED 00140 delete win; 00141 } 00142 return pix; 00143 } 00144 00145 // Computes and returns the noise_density IntGrid, at the same gridsize as 00146 // this by summing the number of small elements in a 3x3 neighbourhood of 00147 // each grid cell. good_grid is filled with blobs that are considered most 00148 // likely good text, and this is filled with small and medium blobs that are 00149 // more likely non-text. 00150 // The photo_map is used to bias the decision towards non-text, rather than 00151 // supplying definite decision. 00152 IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map, 00153 BlobGrid* good_grid) { 00154 IntGrid* noise_counts = CountCellElements(); 00155 IntGrid* noise_density = noise_counts->NeighbourhoodSum(); 00156 IntGrid* good_counts = good_grid->CountCellElements(); 00157 // Now increase noise density in photo areas, to bias the decision and 00158 // minimize hallucinated text on image, but trim the noise_density where 00159 // there are good blobs and the original count is low in non-photo areas, 00160 // indicating that most of the result came from neighbouring cells. 00161 int height = pixGetHeight(photo_map); 00162 int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction); 00163 for (int y = 0; y < gridheight(); ++y) { 00164 for (int x = 0; x < gridwidth(); ++x) { 00165 int noise = noise_density->GridCellValue(x, y); 00166 if (max_noise_count_ < noise + photo_offset && 00167 noise <= max_noise_count_) { 00168 // Test for photo. 00169 int left = x * gridsize(); 00170 int right = left + gridsize(); 00171 int bottom = height - y * gridsize(); 00172 int top = bottom - gridsize(); 00173 if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, 00174 &bottom)) { 00175 noise_density->SetGridCell(x, y, noise + photo_offset); 00176 } 00177 } 00178 if (debug && noise > max_noise_count_ && 00179 good_counts->GridCellValue(x, y) > 0) { 00180 tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", 00181 x * gridsize(), y * gridsize(), 00182 noise_density->GridCellValue(x, y), 00183 good_counts->GridCellValue(x, y), 00184 noise_counts->GridCellValue(x, y), max_noise_count_); 00185 } 00186 if (noise > max_noise_count_ && 00187 good_counts->GridCellValue(x, y) > 0 && 00188 noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= 00189 max_noise_count_) { 00190 noise_density->SetGridCell(x, y, 0); 00191 } 00192 } 00193 } 00194 delete noise_counts; 00195 delete good_counts; 00196 return noise_density; 00197 } 00198 00199 // Helper to expand a box in one of the 4 directions by the given pad, 00200 // provided it does not expand into any cell with a zero noise density. 00201 // If that is not possible, try expanding all round by a small constant. 00202 static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density, 00203 int pad) { 00204 TBOX expanded_box(box); 00205 expanded_box.set_right(box.right() + pad); 00206 if (!noise_density.AnyZeroInRect(expanded_box)) 00207 return expanded_box; 00208 expanded_box = box; 00209 expanded_box.set_left(box.left() - pad); 00210 if (!noise_density.AnyZeroInRect(expanded_box)) 00211 return expanded_box; 00212 expanded_box = box; 00213 expanded_box.set_top(box.top() + pad); 00214 if (!noise_density.AnyZeroInRect(expanded_box)) 00215 return expanded_box; 00216 expanded_box = box; 00217 expanded_box.set_bottom(box.bottom() + pad); 00218 if (!noise_density.AnyZeroInRect(expanded_box)) 00219 return expanded_box; 00220 expanded_box = box; 00221 expanded_box.pad(kNoisePadding, kNoisePadding); 00222 if (!noise_density.AnyZeroInRect(expanded_box)) 00223 return expanded_box; 00224 return box; 00225 } 00226 00227 // Tests each blob in the list to see if it is certain non-text using 2 00228 // conditions: 00229 // 1. blob overlaps a cell with high value in noise_density_ (previously set 00230 // by ComputeNoiseDensity). 00231 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This 00232 // condition is disabled with max_blob_overlaps == -1. 00233 // If it does, the blob is declared non-text, and is used to mark up the 00234 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their 00235 // neighbours reset, as they may now point to deleted data. 00236 // WARNING: The blobs list blobs may be in the *this grid, but they are 00237 // not removed. If any deleted blobs might be in *this, then this must be 00238 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. 00239 // If the win is not NULL, deleted blobs are drawn on it in red, and kept 00240 // blobs are drawn on it in ok_color. 00241 void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, 00242 int max_blob_overlaps, 00243 ScrollView* win, 00244 ScrollView::Color ok_color, 00245 Pix* nontext_mask) { 00246 int imageheight = tright().y() - bleft().x(); 00247 BLOBNBOX_IT blob_it(blobs); 00248 BLOBNBOX_LIST dead_blobs; 00249 BLOBNBOX_IT dead_it(&dead_blobs); 00250 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00251 BLOBNBOX* blob = blob_it.data(); 00252 TBOX box = blob->bounding_box(); 00253 if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && 00254 (max_blob_overlaps < 0 || 00255 !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { 00256 blob->ClearNeighbours(); 00257 #ifndef GRAPHICS_DISABLED 00258 if (win != NULL) 00259 blob->plot(win, ok_color, ok_color); 00260 #endif // GRAPHICS_DISABLED 00261 } else { 00262 if (noise_density_->AnyZeroInRect(box)) { 00263 // There is a danger that the bounding box may overlap real text, so 00264 // we need to render the outline. 00265 Pix* blob_pix = blob->cblob()->render_outline(); 00266 pixRasterop(nontext_mask, box.left(), imageheight - box.top(), 00267 box.width(), box.height(), PIX_SRC | PIX_DST, 00268 blob_pix, 0, 0); 00269 pixDestroy(&blob_pix); 00270 } else { 00271 if (box.area() < gridsize() * gridsize()) { 00272 // It is a really bad idea to make lots of small components in the 00273 // photo mask, so try to join it to a bigger area by expanding the 00274 // box in a way that does not touch any zero noise density cell. 00275 box = AttemptBoxExpansion(box, *noise_density_, gridsize()); 00276 } 00277 // All overlapped cells are non-zero, so just mark the rectangle. 00278 pixRasterop(nontext_mask, box.left(), imageheight - box.top(), 00279 box.width(), box.height(), PIX_SET, NULL, 0, 0); 00280 } 00281 #ifndef GRAPHICS_DISABLED 00282 if (win != NULL) 00283 blob->plot(win, ScrollView::RED, ScrollView::RED); 00284 #endif // GRAPHICS_DISABLED 00285 // It is safe to delete the cblob now, as it isn't used by the grid 00286 // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the 00287 // dead_blobs list. 00288 // TODO(rays) delete the delete when the BLOBNBOX destructor deletes 00289 // the cblob. 00290 delete blob->cblob(); 00291 dead_it.add_to_end(blob_it.extract()); 00292 } 00293 } 00294 } 00295 00296 // Returns true if the given blob overlaps more than max_overlaps blobs 00297 // in the current grid. 00298 bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { 00299 // Search the grid to see what intersects it. 00300 // Setup a Rectangle search for overlapping this blob. 00301 BlobGridSearch rsearch(this); 00302 TBOX box = blob->bounding_box(); 00303 rsearch.StartRectSearch(box); 00304 rsearch.SetUniqueMode(true); 00305 BLOBNBOX* neighbour; 00306 int overlap_count = 0; 00307 while (overlap_count <= max_overlaps && 00308 (neighbour = rsearch.NextRectSearch()) != NULL) { 00309 if (box.major_overlap(neighbour->bounding_box())) { 00310 ++overlap_count; 00311 if (overlap_count > max_overlaps) 00312 return true; 00313 } 00314 } 00315 return false; 00316 } 00317 00318 } // namespace tesseract.