Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: normalis.h (Formerly denorm.h) 00003 * Description: Code for the DENORM class. 00004 * Author: Ray Smith 00005 * Created: Thu Apr 23 09:22:43 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef NORMALIS_H 00021 #define NORMALIS_H 00022 00023 #include <stdio.h> 00024 #include "host.h" 00025 00026 const int kBlnCellHeight = 256; // Full-height for baseline normalization. 00027 const int kBlnXHeight = 128; // x-height for baseline normalization. 00028 const int kBlnBaselineOffset = 64; // offset for baseline normalization. 00029 00030 struct Pix; 00031 class ROW; // Forward decl 00032 class BLOCK; 00033 class FCOORD; 00034 class TBLOB; 00035 class TBOX; 00036 class TPOINT; 00037 class UNICHARSET; 00038 00039 namespace tesseract { 00040 // Possible normalization methods. Use NEGATIVE values as these also 00041 // double up as markers for the last sub-classifier. 00042 enum NormalizationMode { 00043 NM_BASELINE = -3, // The original BL normalization mode. 00044 NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic. 00045 NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. 00046 }; 00047 00048 } // namespace tesseract. 00049 00050 class DENORM_SEG { 00051 public: 00052 DENORM_SEG() {} 00053 00054 inT32 xstart; // start of segment 00055 inT32 ycoord; // y at segment 00056 float scale_factor; // normalized_x/scale_factor + x_center == original_x 00057 }; 00058 00059 class DENORM { 00060 public: 00061 DENORM(); 00062 DENORM(float x, float scaling, ROW *src); 00063 DENORM(float x, // from same pieces 00064 float scaling, 00065 double line_m, // default line: y = mx + c 00066 double line_c, 00067 inT16 seg_count, // no of segments 00068 DENORM_SEG *seg_pts, // actual segments 00069 BOOL8 using_row, // as baseline 00070 ROW *src); 00071 // Copying a DENORM is allowed. 00072 DENORM(const DENORM &); 00073 DENORM& operator=(const DENORM&); 00074 ~DENORM(); 00075 00076 // Setup for a baseline normalization. If there are segs, then they 00077 // are used, otherwise, if there is a row, that is used, otherwise the 00078 // bottom of the word_box is used for the baseline. 00079 void SetupBLNormalize(const BLOCK* block, const ROW* row, float x_height, 00080 const TBOX& word_box, 00081 int num_segs, const DENORM_SEG* segs); 00082 00083 // Setup the normalization transformation parameters. 00084 // The normalizations applied to a blob are as follows: 00085 // 1. An optional block layout rotation that was applied during layout 00086 // analysis to make the textlines horizontal. 00087 // 2. A normalization transformation (LocalNormTransform): 00088 // Subtract the "origin" 00089 // Apply an x,y scaling. 00090 // Apply an optional rotation. 00091 // Add back a final translation. 00092 // The origin is in the block-rotated space, and is usually something like 00093 // the x-middle of the word at the baseline. 00094 // 3. Zero or more further normalization transformations that are applied 00095 // in sequence, with a similar pattern to the first normalization transform. 00096 // 00097 // A DENORM holds the parameters of a single normalization, and can execute 00098 // both the LocalNormTransform (a forwards normalization), and the 00099 // LocalDenormTransform which is an inverse transform or de-normalization. 00100 // A DENORM may point to a predecessor DENORM, which is actually the earlier 00101 // normalization, so the full normalization sequence involves executing all 00102 // predecessors first and then the transform in "this". 00103 // Let x be image co-ordinates and that we have normalization classes A, B, C 00104 // where we first apply A then B then C to get normalized x': 00105 // x' = CBAx 00106 // Then the backwards (to original coordinates) would be: 00107 // x = A^-1 B^-1 C^-1 x' 00108 // and A = B->predecessor_ and B = C->predecessor_ 00109 // NormTransform executes all predecessors recursively, and then this. 00110 // NormTransform would be used to transform an image-based feature to 00111 // normalized space for use in a classifier 00112 // DenormTransform inverts this and then all predecessors. It can be 00113 // used to get back to the original image coordinates from normalized space. 00114 // The LocalNormTransform member executes just the transformation 00115 // in "this" without the layout rotation or any predecessors. It would be 00116 // used to run each successive normalization, eg the word normalization, 00117 // and later the character normalization. 00118 00119 // Arguments: 00120 // block: if not NULL, then this is the first transformation, and 00121 // block->re_rotation() needs to be used after the Denorm 00122 // transformation to get back to the image coords. 00123 // row: if not NULL, then row->baseline(x) is added to the y_origin, unless 00124 // segs is not NULL and num_segs > 0, in which case they are used. 00125 // rotation: if not NULL, apply this rotation after translation to the 00126 // origin and scaling. (Usually a classify rotation.) 00127 // predecessor: if not NULL, then predecessor has been applied to the 00128 // input space and needs to be undone to complete the inverse. 00129 // segs: if not NULL and num_segs > 0, then the segs provide the y_origin 00130 // and the y_scale at a given source x. 00131 // num_segs: the number of segs. 00132 // The above pointers are not owned by this DENORM and are assumed to live 00133 // longer than this denorm, except rotation and segs, which are deep 00134 // copied on input. 00135 // 00136 // x_origin: The x origin which will be mapped to final_xshift in the result. 00137 // y_origin: The y origin which will be mapped to final_yshift in the result. 00138 // Added to result of row->baseline(x) if not NULL. 00139 // 00140 // x_scale: scale factor for the x-coordinate. 00141 // y_scale: scale factor for the y-coordinate. Ignored if segs is given. 00142 // Note that these scale factors apply to the same x and y system as the 00143 // x-origin and y-origin apply, ie after any block rotation, but before 00144 // the rotation argument is applied. 00145 // 00146 // final_xshift: The x component of the final translation. 00147 // final_yshift: The y component of the final translation. 00148 // 00149 // In theory, any of the commonly used normalizations can be setup here: 00150 // * Traditional baseline normalization on a word: 00151 // SetupNormalization(block, row, NULL, NULL, NULL, 0, 00152 // box.x_middle(), 0.0f, 00153 // kBlnXHeight / x_height, kBlnXHeight / x_height, 00154 // 0, kBlnBaselineOffset); 00155 // * Numeric mode baseline normalization on a word: 00156 // SetupNormalization(block, NULL, NULL, NULL, segs, num_segs, 00157 // box.x_middle(), 0.0f, 00158 // kBlnXHeight / x_height, kBlnXHeight / x_height, 00159 // 0, kBlnBaselineOffset); 00160 // * Anisotropic character normalization used by IntFx. 00161 // SetupNormalization(NULL, NULL, NULL, denorm, NULL, 0, 00162 // centroid_x, centroid_y, 00163 // 51.2 / ry, 51.2 / rx, 128, 128); 00164 // * Normalize blob height to x-height (current OSD): 00165 // SetupNormalization(NULL, NULL, &rotation, NULL, NULL, 0, 00166 // box.rotational_x_middle(rotation), 00167 // box.rotational_y_middle(rotation), 00168 // kBlnXHeight / box.rotational_height(rotation), 00169 // kBlnXHeight / box.rotational_height(rotation), 00170 // 0, kBlnBaselineOffset); 00171 // * Secondary normalization for classification rotation (current): 00172 // FCOORD rotation = block->classify_rotation(); 00173 // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; 00174 // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0, 00175 // box.rotational_x_middle(rotation), 00176 // box.rotational_y_middle(rotation), 00177 // target_height / box.rotational_height(rotation), 00178 // target_height / box.rotational_height(rotation), 00179 // 0, kBlnBaselineOffset); 00180 // * Proposed new normalizations for CJK: Between them there is then 00181 // no need for further normalization at all, and the character fills the cell. 00182 // ** Replacement for baseline normalization on a word: 00183 // Scales height and width independently so that modal height and pitch 00184 // fill the cell respectively. 00185 // float cap_height = x_height / CCStruct::kXHeightCapRatio; 00186 // SetupNormalization(block, row, NULL, NULL, NULL, 0, 00187 // box.x_middle(), cap_height / 2.0f, 00188 // kBlnCellHeight / fixed_pitch, 00189 // kBlnCellHeight / cap_height, 00190 // 0, 0); 00191 // ** Secondary normalization for classification (with rotation) (proposed): 00192 // Requires a simple translation to the center of the appropriate character 00193 // cell, no further scaling and a simple rotation (or nothing) about the 00194 // cell center. 00195 // FCOORD rotation = block->classify_rotation(); 00196 // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0, 00197 // fixed_pitch_cell_center, 00198 // 0.0f, 00199 // 1.0f, 00200 // 1.0f, 00201 // 0, 0); 00202 void SetupNormalization(const BLOCK* block, 00203 const ROW* row, 00204 const FCOORD* rotation, 00205 const DENORM* predecessor, 00206 const DENORM_SEG* segs, int num_segs, 00207 float x_origin, float y_origin, 00208 float x_scale, float y_scale, 00209 float final_xshift, float final_yshift); 00210 00211 // Transforms the given coords one step forward to normalized space, without 00212 // using any block rotation or predecessor. 00213 void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const; 00214 void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const; 00215 // Transforms the given coords forward to normalized space using the 00216 // full transformation sequence defined by the block rotation, the 00217 // predecessors, deepest first, and finally this. 00218 void NormTransform(const TPOINT& pt, TPOINT* transformed) const; 00219 void NormTransform(const FCOORD& pt, FCOORD* transformed) const; 00220 // Transforms the given coords one step back to source space, without 00221 // using to any block rotation or predecessor. 00222 void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const; 00223 void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const; 00224 // Transforms the given coords all the way back to source image space using 00225 // the full transformation sequence defined by this and its predecesors 00226 // recursively, shallowest first, and finally any block re_rotation. 00227 void DenormTransform(const TPOINT& pt, TPOINT* original) const; 00228 void DenormTransform(const FCOORD& pt, FCOORD* original) const; 00229 00230 // Normalize a blob using blob transformations. Less accurate, but 00231 // more accurately copies the old way. 00232 void LocalNormBlob(TBLOB* blob) const; 00233 00234 // Fills in the x-height range accepted by the given unichar_id, given its 00235 // bounding box in the usual baseline-normalized coordinates, with some 00236 // initial crude x-height estimate (such as word size) and this denoting the 00237 // transformation that was used. Returns false, and an empty range if the 00238 // bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom 00239 // fits, but the top is impossible. 00240 bool XHeightRange(int unichar_id, const UNICHARSET& unicharset, 00241 const TBOX& bbox, inT16* min_xht, inT16* max_xht) const; 00242 00243 Pix* pix() const { 00244 return pix_; 00245 } 00246 void set_pix(Pix* pix) { 00247 pix_ = pix; 00248 } 00249 bool inverse() const { 00250 return inverse_; 00251 } 00252 void set_inverse(bool value) { 00253 inverse_ = value; 00254 } 00255 const DENORM* RootDenorm() const { 00256 if (predecessor_ != NULL) 00257 return predecessor_->RootDenorm(); 00258 return this; 00259 } 00260 const DENORM* predecessor() const { 00261 return predecessor_; 00262 } 00263 // Accessors - perhaps should not be needed. 00264 float x_scale() const { 00265 return x_scale_; 00266 } 00267 float y_scale() const { 00268 return y_scale_; 00269 } 00270 const ROW *row() const { 00271 return row_; 00272 } 00273 void set_row(ROW* row) { 00274 row_ = row; 00275 } 00276 const BLOCK* block() const { 00277 return block_; 00278 } 00279 void set_block(const BLOCK* block) { 00280 block_ = block; 00281 } 00282 00283 private: 00284 // Free allocated memory and clear pointers. 00285 void Clear(); 00286 // Setup default values. 00287 void Init(); 00288 00289 // Returns the y-origin at the original (un-normalized) x. 00290 float YOriginAtOrigX(float orig_x) const; 00291 00292 // Returns the y-scale at the original (un-normalized) x. 00293 float YScaleAtOrigX(float orig_x) const; 00294 00295 // Deep copy the array of segments for use as a y_origin and y_scale. 00296 void SetSegments(const DENORM_SEG* new_segs, int seg_count); 00297 00298 // Finds the appropriate segment for a given original x-coord 00299 const DENORM_SEG* BinarySearchSegment(float orig_x) const; 00300 00301 // Best available image. 00302 Pix* pix_; 00303 // True if the source image is white-on-black. 00304 bool inverse_; 00305 // Block the word came from. If not null, block->re_rotation() takes the 00306 // "untransformed" coordinates even further back to the original image. 00307 const BLOCK* block_; 00308 // Row the word came from. If not null, row->baseline() is added to y_origin_. 00309 const ROW* row_; 00310 // Rotation to apply between translation to the origin and scaling. 00311 const FCOORD* rotation_; 00312 // Previous transformation in a chain. 00313 const DENORM* predecessor_; 00314 // Array of segments used to specify local y_origin_ and y_scale_. 00315 // Owned by the DENORM. 00316 DENORM_SEG *segs_; 00317 // Size of the segs_ array. 00318 int num_segs_; 00319 // x-coordinate to be mapped to final_xshift_ in the result. 00320 float x_origin_; 00321 // y-coordinate to be mapped to final_yshift_ in the result. 00322 float y_origin_; 00323 // Scale factors for x and y coords. Applied to pre-rotation system. 00324 float x_scale_; 00325 float y_scale_; 00326 // Destination coords of the x_origin_ and y_origin_. 00327 float final_xshift_; 00328 float final_yshift_; 00329 }; 00330 #endif