Tesseract
3.02
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: blobs.h (Formerly blobs.h) 00005 * Description: Blob definition 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 27 15:39:52 1989 00008 * Modified: Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Experimental (Do Not Distribute) 00012 * 00013 * (c) Copyright 1989, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 00026 #ifndef BLOBS_H 00027 #define BLOBS_H 00028 00029 /*---------------------------------------------------------------------- 00030 I n c l u d e s 00031 ----------------------------------------------------------------------*/ 00032 #include "clst.h" 00033 #include "rect.h" 00034 #include "vecfuncs.h" 00035 00036 class BLOCK; 00037 class C_BLOB; 00038 class DENORM; 00039 class ROW; 00040 class WERD; 00041 00042 /*---------------------------------------------------------------------- 00043 T y p e s 00044 ----------------------------------------------------------------------*/ 00045 #define EDGEPTFLAGS 4 /*concavity,length etc. */ 00046 00047 typedef struct 00048 { /* Widths of pieces */ 00049 int num_chars; 00050 int widths[1]; 00051 } WIDTH_RECORD; 00052 00053 struct TPOINT { 00054 TPOINT(): x(0), y(0) {} 00055 TPOINT(inT16 vx, inT16 vy) : x(vx), y(vy) {} 00056 TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {} 00057 00058 void operator+=(const TPOINT& other) { 00059 x += other.x; 00060 y += other.y; 00061 } 00062 void operator/=(int divisor) { 00063 x /= divisor; 00064 y /= divisor; 00065 } 00066 00067 inT16 x; // absolute x coord. 00068 inT16 y; // absolute y coord. 00069 }; 00070 typedef TPOINT VECTOR; // structure for coordinates. 00071 00072 struct EDGEPT { 00073 EDGEPT() : next(NULL), prev(NULL) { 00074 memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0])); 00075 } 00076 EDGEPT(const EDGEPT& src) : next(NULL), prev(NULL) { 00077 CopyFrom(src); 00078 } 00079 EDGEPT& operator=(const EDGEPT& src) { 00080 CopyFrom(src); 00081 return *this; 00082 } 00083 // Copies the data elements, but leaves the pointers untouched. 00084 void CopyFrom(const EDGEPT& src) { 00085 pos = src.pos; 00086 vec = src.vec; 00087 memcpy(flags, src.flags, EDGEPTFLAGS * sizeof(flags[0])); 00088 } 00089 // Accessors to hide or reveal a cut edge from feature extractors. 00090 void Hide() { 00091 flags[0] = true; 00092 } 00093 void Reveal() { 00094 flags[0] = false; 00095 } 00096 bool IsHidden() const { 00097 return flags[0] != 0; 00098 } 00099 00100 TPOINT pos; // position 00101 VECTOR vec; // vector to next point 00102 // TODO(rays) Remove flags and replace with 00103 // is_hidden, runlength, dir, and fixed. The only use 00104 // of the flags other than is_hidden is in polyaprx.cpp. 00105 char flags[EDGEPTFLAGS]; // concavity, length etc 00106 EDGEPT* next; // anticlockwise element 00107 EDGEPT* prev; // clockwise element 00108 }; 00109 00110 // For use in chop and findseam to keep a list of which EDGEPTs were inserted. 00111 CLISTIZEH(EDGEPT); 00112 00113 struct TESSLINE { 00114 TESSLINE() : is_hole(false), loop(NULL), next(NULL) {} 00115 TESSLINE(const TESSLINE& src) : loop(NULL), next(NULL) { 00116 CopyFrom(src); 00117 } 00118 ~TESSLINE() { 00119 Clear(); 00120 } 00121 TESSLINE& operator=(const TESSLINE& src) { 00122 CopyFrom(src); 00123 return *this; 00124 } 00125 // Consume the circular list of EDGEPTs to make a TESSLINE. 00126 static TESSLINE* BuildFromOutlineList(EDGEPT* outline); 00127 // Copies the data and the outline, but leaves next untouched. 00128 void CopyFrom(const TESSLINE& src); 00129 // Deletes owned data. 00130 void Clear(); 00131 // Normalize in-place using the DENORM. 00132 void Normalize(const DENORM& denorm); 00133 // Rotates by the given rotation in place. 00134 void Rotate(const FCOORD rotation); 00135 // Moves by the given vec in place. 00136 void Move(const ICOORD vec); 00137 // Scales by the given factor in place. 00138 void Scale(float factor); 00139 // Sets up the start and vec members of the loop from the pos members. 00140 void SetupFromPos(); 00141 // Recomputes the bounding box from the points in the loop. 00142 void ComputeBoundingBox(); 00143 // Computes the min and max cross product of the outline points with the 00144 // given vec and returns the results in min_xp and max_xp. Geometrically 00145 // this is the left and right edge of the outline perpendicular to the 00146 // given direction, but to get the distance units correct, you would 00147 // have to divide by the modulus of vec. 00148 void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const; 00149 00150 TBOX bounding_box() const; 00151 // Returns true if the point is contained within the outline box. 00152 bool Contains(const TPOINT& pt) { 00153 return topleft.x <= pt.x && pt.x <= botright.x && 00154 botright.y <= pt.y && pt.y <= topleft.y; 00155 } 00156 00157 void plot(ScrollView* window, ScrollView::Color color, 00158 ScrollView::Color child_color); 00159 00160 int BBArea() const { 00161 return (botright.x - topleft.x) * (topleft.y - botright.y); 00162 } 00163 00164 TPOINT topleft; // Top left of loop. 00165 TPOINT botright; // Bottom right of loop. 00166 TPOINT start; // Start of loop. 00167 bool is_hole; // True if this is a hole/child outline. 00168 EDGEPT *loop; // Edgeloop. 00169 TESSLINE *next; // Next outline in blob. 00170 }; // Outline structure. 00171 00172 struct TBLOB { 00173 TBLOB() : outlines(NULL), next(NULL) {} 00174 TBLOB(const TBLOB& src) : outlines(NULL), next(NULL) { 00175 CopyFrom(src); 00176 } 00177 ~TBLOB() { 00178 Clear(); 00179 } 00180 TBLOB& operator=(const TBLOB& src) { 00181 CopyFrom(src); 00182 return *this; 00183 } 00184 // Factory to build a TBLOB from a C_BLOB with polygonal 00185 // approximation along the way. 00186 static TBLOB* PolygonalCopy(C_BLOB* src); 00187 // Normalizes the blob for classification only if needed. 00188 // (Normally this means a non-zero classify rotation.) 00189 // If no Normalization is needed, then NULL is returned, and the denorm is 00190 // unchanged. Otherwise a new TBLOB is returned and the denorm points to 00191 // a new DENORM. In this case, both the TBLOB and DENORM must be deleted. 00192 TBLOB* ClassifyNormalizeIfNeeded(const DENORM** denorm) const; 00193 // Copies the data and the outlines, but leaves next untouched. 00194 void CopyFrom(const TBLOB& src); 00195 // Deletes owned data. 00196 void Clear(); 00197 // Normalize in-place using the DENORM. 00198 void Normalize(const DENORM& denorm); 00199 // Rotates by the given rotation in place. 00200 void Rotate(const FCOORD rotation); 00201 // Moves by the given vec in place. 00202 void Move(const ICOORD vec); 00203 // Scales by the given factor in place. 00204 void Scale(float factor); 00205 // Recomputes the bounding boxes of the outlines. 00206 void ComputeBoundingBoxes(); 00207 00208 // Returns the number of outlines. 00209 int NumOutlines() const; 00210 00211 TBOX bounding_box() const; 00212 00213 void plot(ScrollView* window, ScrollView::Color color, 00214 ScrollView::Color child_color); 00215 00216 int BBArea() const { 00217 int total_area = 0; 00218 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) 00219 total_area += outline->BBArea(); 00220 return total_area; 00221 } 00222 00223 TESSLINE *outlines; // List of outlines in blob. 00224 TBLOB *next; // Next blob in block. 00225 }; // Blob structure. 00226 00227 int count_blobs(TBLOB *blobs); 00228 00229 struct TWERD { 00230 TWERD() : blobs(NULL), latin_script(false), next(NULL) {} 00231 TWERD(const TWERD& src) : blobs(NULL), next(NULL) { 00232 CopyFrom(src); 00233 } 00234 ~TWERD() { 00235 Clear(); 00236 } 00237 TWERD& operator=(const TWERD& src) { 00238 CopyFrom(src); 00239 return *this; 00240 } 00241 // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal 00242 // approximation along the way. 00243 static TWERD* PolygonalCopy(WERD* src); 00244 // Setup for Baseline normalization, recording the normalization in the 00245 // DENORM, but doesn't do any normalization. 00246 void SetupBLNormalize(const BLOCK* block, const ROW* row, 00247 float x_height, bool numeric_mode, 00248 DENORM* denorm) const; 00249 // Normalize in-place using the DENORM. 00250 void Normalize(const DENORM& denorm); 00251 // Copies the data and the blobs, but leaves next untouched. 00252 void CopyFrom(const TWERD& src); 00253 // Deletes owned data. 00254 void Clear(); 00255 // Recomputes the bounding boxes of the blobs. 00256 void ComputeBoundingBoxes(); 00257 00258 // Returns the number of blobs in the word. 00259 int NumBlobs() const { 00260 return count_blobs(blobs); 00261 } 00262 TBOX bounding_box() const; 00263 00264 // Merges the blobs from start to end, not including end, and deletes 00265 // the blobs between start and end. 00266 void MergeBlobs(int start, int end); 00267 00268 void plot(ScrollView* window); 00269 00270 TBLOB* blobs; // blobs in word. 00271 bool latin_script; // This word is in a latin-based script. 00272 TWERD* next; // next word. 00273 }; 00274 00275 /*---------------------------------------------------------------------- 00276 M a c r o s 00277 ----------------------------------------------------------------------*/ 00278 /********************************************************************** 00279 * free_widths 00280 * 00281 * Free the memory taken up by a width array. 00282 **********************************************************************/ 00283 #define free_widths(w) \ 00284 if (w) memfree (w) 00285 00286 /*---------------------------------------------------------------------- 00287 F u n c t i o n s 00288 ----------------------------------------------------------------------*/ 00289 // TODO(rays) This will become a member of TBLOB when TBLOB's definition 00290 // moves to blobs.h 00291 00292 // Returns the center of blob's bounding box in origin. 00293 void blob_origin(TBLOB *blob, TPOINT *origin); 00294 00295 /*blob to compute on */ 00296 WIDTH_RECORD *blobs_widths(TBLOB *blobs); 00297 00298 bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location); 00299 00300 void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, 00301 const TPOINT& location); 00302 00303 #endif