Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: normalis.cpp (Formerly denorm.c) 00003 * Description: Code for the DENORM class. 00004 * Author: Ray Smith 00005 * Created: Thu Apr 23 09:22:43 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 #include "mfcpch.h" // Precompiled header include must be first. 00020 00021 #include "normalis.h" 00022 00023 #include <stdlib.h> 00024 00025 #include "allheaders.h" 00026 #include "blobs.h" 00027 #include "helpers.h" 00028 #include "ocrblock.h" 00029 #include "unicharset.h" 00030 #include "werd.h" 00031 00032 00033 DENORM::DENORM() { 00034 Init(); 00035 } 00036 00037 // TODO(rays) Abolish all non-default constructors. 00038 DENORM::DENORM(float x, float scaling, ROW *src) { 00039 Init(); 00040 x_origin_ = x; // just copy 00041 y_origin_ = 0.0f; 00042 x_scale_ = y_scale_ = scaling; 00043 row_ = src; 00044 } 00045 00046 DENORM::DENORM(float x, // from same pieces 00047 float scaling, 00048 double line_m, // default line: y = mx + c 00049 double line_c, 00050 inT16 seg_count, // no of segments 00051 DENORM_SEG *seg_pts, // actual segments 00052 BOOL8 using_row, // as baseline 00053 ROW *src) { 00054 Init(); 00055 x_origin_ = x; // just copy 00056 y_origin_ = line_c; 00057 ASSERT_HOST(line_m == 0.0); 00058 x_scale_ = y_scale_ = scaling; 00059 SetSegments(seg_pts, seg_count); 00060 } 00061 00062 00063 DENORM::DENORM(const DENORM &src) { 00064 num_segs_ = 0; 00065 segs_ = NULL; 00066 rotation_ = NULL; 00067 *this = src; 00068 } 00069 00070 00071 DENORM & DENORM::operator=(const DENORM & src) { 00072 Clear(); 00073 inverse_ = src.inverse_; 00074 pix_ = src.pix_; 00075 block_ = src.block_; 00076 row_ = src.row_; 00077 if (src.rotation_ == NULL) 00078 rotation_ = NULL; 00079 else 00080 rotation_ = new FCOORD(*src.rotation_); 00081 predecessor_ = src.predecessor_; 00082 SetSegments(src.segs_, src.num_segs_); 00083 x_origin_ = src.x_origin_; 00084 y_origin_ = src.y_origin_; 00085 x_scale_ = src.x_scale_; 00086 y_scale_ = src.y_scale_; 00087 final_xshift_ = src.final_xshift_; 00088 final_yshift_ = src.final_yshift_; 00089 return *this; 00090 } 00091 00092 DENORM::~DENORM() { 00093 Clear(); 00094 } 00095 00096 // Setup for a baseline normalization. If there are segs, then they 00097 // are used, otherwise, if there is a row, that is used, otherwise the 00098 // bottom of the word_box is used for the baseline. 00099 void DENORM::SetupBLNormalize(const BLOCK* block, const ROW* row, 00100 float x_height, const TBOX& word_box, 00101 int num_segs, const DENORM_SEG* segs) { 00102 float scale = kBlnXHeight / x_height; 00103 float x_origin = (word_box.left() + word_box.right()) / 2.0f; 00104 float y_origin = 0.0f; 00105 if (num_segs == 0 && row == NULL) { 00106 y_origin = word_box.bottom(); 00107 } 00108 SetupNormalization(block, row, NULL, NULL, segs, num_segs, 00109 x_origin, y_origin, scale, scale, 00110 0.0f, static_cast<float>(kBlnBaselineOffset)); 00111 } 00112 00113 // Initializes the denorm for a transformation. For details see the large 00114 // comment in normalis.h. 00115 // Arguments: 00116 // block: if not NULL, then this is the first transformation, and 00117 // block->re_rotation() needs to be used after the Denorm 00118 // transformation to get back to the image coords. 00119 // row: if not NULL, then row->baseline(x) is added to the y_origin, unless 00120 // segs is not NULL and num_segs > 0, in which case they are used. 00121 // rotation: if not NULL, apply this rotation after translation to the 00122 // origin and scaling. (Usually a classify rotation.) 00123 // predecessor: if not NULL, then predecessor has been applied to the 00124 // input space and needs to be undone to complete the inverse. 00125 // segs: if not NULL and num_segs > 0, then the segs provide the y_origin 00126 // and the y_scale at a given source x. 00127 // num_segs: the number of segs. 00128 // The above pointers are not owned by this DENORM and are assumed to live 00129 // longer than this denorm, except rotation, which is deep copied on input. 00130 // 00131 // x_origin: The x origin which will be mapped to final_xshift in the result. 00132 // y_origin: The y origin which will be mapped to final_yshift in the result. 00133 // Added to result of row->baseline(x) if not NULL. 00134 // 00135 // x_scale: scale factor for the x-coordinate. 00136 // y_scale: scale factor for the y-coordinate. Ignored if segs is given. 00137 // Note that these scale factors apply to the same x and y system as the 00138 // x-origin and y-origin apply, ie after any block rotation, but before 00139 // the rotation argument is applied. 00140 // 00141 // final_xshift: The x component of the final translation. 00142 // final_yshift: The y component of the final translation. 00143 void DENORM::SetupNormalization(const BLOCK* block, 00144 const ROW* row, 00145 const FCOORD* rotation, 00146 const DENORM* predecessor, 00147 const DENORM_SEG* segs, int num_segs, 00148 float x_origin, float y_origin, 00149 float x_scale, float y_scale, 00150 float final_xshift, float final_yshift) { 00151 Clear(); 00152 block_ = block; 00153 row_ = row; 00154 if (rotation == NULL) 00155 rotation_ = NULL; 00156 else 00157 rotation_ = new FCOORD(*rotation); 00158 predecessor_ = predecessor; 00159 SetSegments(segs, num_segs); 00160 x_origin_ = x_origin; 00161 y_origin_ = y_origin; 00162 x_scale_ = x_scale; 00163 y_scale_ = y_scale; 00164 final_xshift_ = final_xshift; 00165 final_yshift_ = final_yshift; 00166 } 00167 00168 // Transforms the given coords one step forward to normalized space, without 00169 // using any block rotation or predecessor. 00170 void DENORM::LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const { 00171 FCOORD src_pt(pt.x, pt.y); 00172 FCOORD float_result; 00173 LocalNormTransform(src_pt, &float_result); 00174 transformed->x = IntCastRounded(float_result.x()); 00175 transformed->y = IntCastRounded(float_result.y()); 00176 } 00177 void DENORM::LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const { 00178 FCOORD translated(pt.x() - x_origin_, pt.y() - YOriginAtOrigX(pt.x())); 00179 translated.set_x(translated.x() * x_scale_); 00180 translated.set_y(translated.y() * YScaleAtOrigX(pt.x())); 00181 if (rotation_ != NULL) 00182 translated.rotate(*rotation_); 00183 transformed->set_x(translated.x() + final_xshift_); 00184 transformed->set_y(translated.y() + final_yshift_); 00185 } 00186 00187 // Transforms the given coords forward to normalized space using the 00188 // full transformation sequence defined by the block rotation, the 00189 // predecessors, deepest first, and finally this. 00190 void DENORM::NormTransform(const TPOINT& pt, TPOINT* transformed) const { 00191 FCOORD src_pt(pt.x, pt.y); 00192 FCOORD float_result; 00193 NormTransform(src_pt, &float_result); 00194 transformed->x = IntCastRounded(float_result.x()); 00195 transformed->y = IntCastRounded(float_result.y()); 00196 } 00197 void DENORM::NormTransform(const FCOORD& pt, FCOORD* transformed) const { 00198 FCOORD src_pt(pt); 00199 if (predecessor_ != NULL) { 00200 predecessor_->NormTransform(pt, &src_pt); 00201 } else if (block_ != NULL) { 00202 FCOORD fwd_rotation(block_->re_rotation().x(), -block_->re_rotation().y()); 00203 src_pt.rotate(fwd_rotation); 00204 } 00205 LocalNormTransform(src_pt, transformed); 00206 } 00207 00208 // Transforms the given coords one step back to source space, without 00209 // using to any block rotation or predecessor. 00210 void DENORM::LocalDenormTransform(const TPOINT& pt, TPOINT* original) const { 00211 FCOORD src_pt(pt.x, pt.y); 00212 FCOORD float_result; 00213 LocalDenormTransform(src_pt, &float_result); 00214 original->x = IntCastRounded(float_result.x()); 00215 original->y = IntCastRounded(float_result.y()); 00216 } 00217 void DENORM::LocalDenormTransform(const FCOORD& pt, FCOORD* original) const { 00218 FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_); 00219 if (rotation_ != NULL) { 00220 FCOORD inverse_rotation(rotation_->x(), -rotation_->y()); 00221 rotated.rotate(inverse_rotation); 00222 } 00223 original->set_x(rotated.x() / x_scale_ + x_origin_); 00224 float y_scale = y_scale_; 00225 if (num_segs_ > 0) 00226 y_scale = YScaleAtOrigX(original->x()); 00227 original->set_y(rotated.y() / y_scale + YOriginAtOrigX(original->x())); 00228 } 00229 00230 // Transforms the given coords all the way back to source image space using 00231 // the full transformation sequence defined by this and its predecesors 00232 // recursively, shallowest first, and finally any block re_rotation. 00233 void DENORM::DenormTransform(const TPOINT& pt, TPOINT* original) const { 00234 FCOORD src_pt(pt.x, pt.y); 00235 FCOORD float_result; 00236 DenormTransform(src_pt, &float_result); 00237 original->x = IntCastRounded(float_result.x()); 00238 original->y = IntCastRounded(float_result.y()); 00239 } 00240 void DENORM::DenormTransform(const FCOORD& pt, FCOORD* original) const { 00241 LocalDenormTransform(pt, original); 00242 if (predecessor_ != NULL) { 00243 predecessor_->DenormTransform(*original, original); 00244 } else if (block_ != NULL) { 00245 original->rotate(block_->re_rotation()); 00246 } 00247 } 00248 00249 // Normalize a blob using blob transformations. Less accurate, but 00250 // more accurately copies the old way. 00251 void DENORM::LocalNormBlob(TBLOB* blob) const { 00252 TBOX blob_box = blob->bounding_box(); 00253 float x_center = (blob_box.left() + blob_box.right()) / 2.0f; 00254 ICOORD translation(-IntCastRounded(x_origin_), 00255 -IntCastRounded(YOriginAtOrigX(x_center))); 00256 blob->Move(translation); 00257 // Note that the old way of scaling only allowed for a single 00258 // scale factor. 00259 float scale = YScaleAtOrigX(x_center); 00260 if (scale != 1.0f) 00261 blob->Scale(scale); 00262 if (rotation_ != NULL) 00263 blob->Rotate(*rotation_); 00264 translation.set_x(IntCastRounded(final_xshift_)); 00265 translation.set_y(IntCastRounded(final_yshift_)); 00266 blob->Move(translation); 00267 } 00268 00269 // Fills in the x-height range accepted by the given unichar_id, given its 00270 // bounding box in the usual baseline-normalized coordinates, with some 00271 // initial crude x-height estimate (such as word size) and this denoting the 00272 // transformation that was used. Returns false, and an empty range if the 00273 // bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom 00274 // fits, but the top is impossible. 00275 bool DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset, 00276 const TBOX& bbox, 00277 inT16* min_xht, inT16* max_xht) const { 00278 // Clip the top and bottom to the limit of normalized feature space. 00279 int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1); 00280 int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1); 00281 // A tolerance of yscale corresponds to 1 pixel in the image. 00282 double tolerance = y_scale(); 00283 int min_bottom, max_bottom, min_top, max_top; 00284 unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, 00285 &min_top, &max_top); 00286 // Default returns indicate a mis-fit. 00287 *min_xht = 0; 00288 *max_xht = 0; 00289 // Chars with a misfitting bottom might be sub/superscript/dropcap, or might 00290 // just be wrongly classified. Return an empty range so they have to be 00291 // good to be considered. 00292 if (bottom < min_bottom - tolerance || bottom > max_bottom + tolerance) { 00293 return false; 00294 } 00295 // To help very high cap/xheight ratio fonts accept the correct x-height, 00296 // and to allow the large caps in small caps to accept the xheight of the 00297 // small caps, add kBlnBaselineOffset to chars with a maximum max. 00298 if (max_top == kBlnCellHeight - 1) 00299 max_top += kBlnBaselineOffset; 00300 int height = top - kBlnBaselineOffset; 00301 double min_height = min_top - kBlnBaselineOffset - tolerance; 00302 double max_height = max_top - kBlnBaselineOffset + tolerance; 00303 if (min_height <= 0.0) { 00304 if (height <= 0 || max_height > 0) 00305 *max_xht = MAX_INT16; // Anything will do. 00306 } else if (height > 0) { 00307 int result = IntCastRounded(height * kBlnXHeight / y_scale() / min_height); 00308 *max_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16)); 00309 } 00310 if (max_height > 0.0 && height > 0) { 00311 int result = IntCastRounded(height * kBlnXHeight / y_scale() / max_height); 00312 *min_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16)); 00313 } 00314 return true; 00315 } 00316 00317 // ============== Private Code ====================== 00318 00319 // Free allocated memory and clear pointers. 00320 void DENORM::Clear() { 00321 if (segs_ != NULL) { 00322 delete [] segs_; 00323 segs_ = NULL; 00324 num_segs_ = 0; 00325 } 00326 if (rotation_ != NULL) { 00327 delete rotation_; 00328 rotation_ = NULL; 00329 } 00330 } 00331 00332 // Setup default values. 00333 void DENORM::Init() { 00334 inverse_ = false; 00335 pix_ = NULL; 00336 block_ = NULL; 00337 row_ = NULL; 00338 rotation_ = NULL; 00339 predecessor_ = NULL; 00340 segs_ = NULL; 00341 num_segs_ = 0; 00342 x_origin_ = 0.0f; 00343 y_origin_ = 0.0f; 00344 x_scale_ = 1.0f; 00345 y_scale_ = 1.0f; 00346 final_xshift_ = 0.0f; 00347 final_yshift_ = static_cast<float>(kBlnBaselineOffset); 00348 } 00349 00350 // Returns the y-origin at the original (un-normalized) x. 00351 float DENORM::YOriginAtOrigX(float orig_x) const { 00352 if (num_segs_ > 0) { 00353 const DENORM_SEG* seg = BinarySearchSegment(orig_x); 00354 if (seg->ycoord != -MAX_INT32) { 00355 return seg->ycoord; 00356 } 00357 } 00358 if (row_ != NULL) 00359 return row_->base_line(orig_x) + y_origin_; 00360 else 00361 return y_origin_; 00362 } 00363 00364 // Returns the y-scale at the original (un-normalized) x. 00365 float DENORM::YScaleAtOrigX(float orig_x) const { 00366 if (num_segs_ > 0) { 00367 const DENORM_SEG* seg = BinarySearchSegment(orig_x); 00368 if (seg->scale_factor > 0.0) 00369 return seg->scale_factor; 00370 } 00371 return y_scale_; 00372 } 00373 00374 00375 // Compare two segments by xstart for use with qsort(3) and bsearch(3) 00376 static int CompareSegByXStart(const DENORM_SEG* a, const DENORM_SEG* b) { 00377 if (a->xstart < b->xstart) 00378 return -1; 00379 else if (a->xstart > b->xstart) 00380 return 1; 00381 return 0; 00382 } 00383 00384 // Deep copy the array of segments for use as a y_origin and y_scale. 00385 void DENORM::SetSegments(const DENORM_SEG* new_segs, int seg_count) { 00386 if (segs_ != NULL) 00387 delete [] segs_; 00388 if (seg_count > 0) { 00389 segs_ = new DENORM_SEG[seg_count]; 00390 memcpy(segs_, new_segs, seg_count * sizeof(new_segs[0])); 00391 // It is possible, if infrequent that the segments may be out of order. 00392 // since we are searching with a binary search, keep them in order. 00393 qsort(segs_, num_segs_, sizeof(segs_[0]), 00394 reinterpret_cast<int(*)(const void*, const void*)>( 00395 &CompareSegByXStart)); 00396 } else { 00397 num_segs_ = 0; 00398 segs_ = NULL; 00399 } 00400 } 00401 00402 // Finds the appropriate segment for a given original x-coord 00403 const DENORM_SEG* DENORM::BinarySearchSegment(float orig_x) const { 00404 int bottom, top, middle; // binary search 00405 bottom = 0; 00406 top = num_segs_; 00407 do { 00408 middle = (bottom + top) / 2; 00409 if (segs_[middle].xstart > orig_x) 00410 top = middle; 00411 else 00412 bottom = middle; 00413 } 00414 while (top - bottom > 1); 00415 return &segs_[bottom]; 00416 }