Tesseract
3.02
|
00001 00002 // File: ccnontextdetect.h 00003 // Description: Connected-Component-based non-text detection. 00004 // Copyright 2011 Google Inc. All Rights Reserved. 00005 // Author: rays@google.com (Ray Smith) 00006 // Created: Sat Jun 11 09:52:01 PST 2011 00007 // 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_ 00021 #define TESSERACT_TEXTORD_CCPHOTODETECT_H_ 00022 00023 #include "blobgrid.h" 00024 #include "scrollview.h" 00025 00026 namespace tesseract { 00027 00028 // The CCNonTextDetect class contains grid-based operations on blobs to create 00029 // a full-resolution image mask analogous yet complementary to 00030 // pixGenHalftoneMask as it is better at line-drawings, graphs and charts. 00031 class CCNonTextDetect : public BlobGrid { 00032 public: 00033 CCNonTextDetect(int gridsize, const ICOORD& bleft, const ICOORD& tright); 00034 virtual ~CCNonTextDetect(); 00035 00036 // Creates and returns a Pix with the same resolution as the original 00037 // in which 1 (black) pixels represent likely non text (photo, line drawing) 00038 // areas of the page, deleting from the blob_block the blobs that were 00039 // determined to be non-text. 00040 // The photo_map (binary image mask) is used to bias the decision towards 00041 // non-text, rather than supplying a definite decision. 00042 // The blob_block is the usual result of connected component analysis, 00043 // holding the detected blobs. 00044 // The returned Pix should be PixDestroyed after use. 00045 Pix* ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block); 00046 00047 private: 00048 // Computes and returns the noise_density IntGrid, at the same gridsize as 00049 // this by summing the number of small elements in a 3x3 neighbourhood of 00050 // each grid cell. good_grid is filled with blobs that are considered most 00051 // likely good text, and this is filled with small and medium blobs that are 00052 // more likely non-text. 00053 // The photo_map is used to bias the decision towards non-text, rather than 00054 // supplying definite decision. 00055 IntGrid* ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid); 00056 00057 // Tests each blob in the list to see if it is certain non-text using 2 00058 // conditions: 00059 // 1. blob overlaps a cell with high value in noise_density_ (previously set 00060 // by ComputeNoiseDensity). 00061 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This 00062 // condition is disabled with max_blob_overlaps == -1. 00063 // If it does, the blob is declared non-text, and is used to mark up the 00064 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their 00065 // neighbours reset, as they may now point to deleted data. 00066 // WARNING: The blobs list blobs may be in the *this grid, but they are 00067 // not removed. If any deleted blobs might be in *this, then this must be 00068 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. 00069 // If the win is not NULL, deleted blobs are drawn on it in red, and kept 00070 void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, 00071 int max_blob_overlaps, 00072 ScrollView* win, ScrollView::Color ok_color, 00073 Pix* nontext_mask); 00074 // Returns true if the given blob overlaps more than max_overlaps blobs 00075 // in the current grid. 00076 bool BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps); 00077 00078 // Max entry in noise_density_ before the cell is declared noisy. 00079 int max_noise_count_; 00080 // Completed noise density map, which we keep around to use for secondary 00081 // noise detection. 00082 IntGrid* noise_density_; 00083 }; 00084 00085 } // namespace tesseract. 00086 00087 #endif // TESSERACT_TEXTORD_CCPHOTODETECT_H_