Tesseract  3.02
tesseract-ocr/ccstruct/normalis.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        normalis.cpp  (Formerly denorm.c)
00003  * Description: Code for the DENORM class.
00004  * Author:      Ray Smith
00005  * Created:     Thu Apr 23 09:22:43 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 #include "mfcpch.h"  // Precompiled header include must be first.
00020 
00021 #include "normalis.h"
00022 
00023 #include <stdlib.h>
00024 
00025 #include "allheaders.h"
00026 #include "blobs.h"
00027 #include "helpers.h"
00028 #include "ocrblock.h"
00029 #include "unicharset.h"
00030 #include "werd.h"
00031 
00032 
00033 DENORM::DENORM() {
00034   Init();
00035 }
00036 
00037 // TODO(rays) Abolish all non-default constructors.
00038 DENORM::DENORM(float x, float scaling, ROW *src) {
00039   Init();
00040   x_origin_ = x;              // just copy
00041   y_origin_ = 0.0f;
00042   x_scale_ = y_scale_ = scaling;
00043   row_ = src;
00044 }
00045 
00046 DENORM::DENORM(float x,              // from same pieces
00047                float scaling,
00048                double line_m,        // default line: y = mx + c
00049                double line_c,
00050                inT16 seg_count,      // no of segments
00051                DENORM_SEG *seg_pts,  // actual segments
00052                BOOL8 using_row,      // as baseline
00053                ROW *src) {
00054   Init();
00055   x_origin_ = x;                  // just copy
00056   y_origin_ = line_c;
00057   ASSERT_HOST(line_m == 0.0);
00058   x_scale_ = y_scale_ = scaling;
00059   SetSegments(seg_pts, seg_count);
00060 }
00061 
00062 
00063 DENORM::DENORM(const DENORM &src) {
00064   num_segs_ = 0;
00065   segs_ = NULL;
00066   rotation_ = NULL;
00067   *this = src;
00068 }
00069 
00070 
00071 DENORM & DENORM::operator=(const DENORM & src) {
00072   Clear();
00073   inverse_ = src.inverse_;
00074   pix_ = src.pix_;
00075   block_ = src.block_;
00076   row_ = src.row_;
00077   if (src.rotation_ == NULL)
00078     rotation_ = NULL;
00079   else
00080     rotation_ = new FCOORD(*src.rotation_);
00081   predecessor_ = src.predecessor_;
00082   SetSegments(src.segs_, src.num_segs_);
00083   x_origin_ = src.x_origin_;
00084   y_origin_ = src.y_origin_;
00085   x_scale_ = src.x_scale_;
00086   y_scale_ = src.y_scale_;
00087   final_xshift_ = src.final_xshift_;
00088   final_yshift_ = src.final_yshift_;
00089   return *this;
00090 }
00091 
00092 DENORM::~DENORM() {
00093   Clear();
00094 }
00095 
00096 // Setup for a baseline normalization. If there are segs, then they
00097 // are used, otherwise, if there is a row, that is used, otherwise the
00098 // bottom of the word_box is used for the baseline.
00099 void DENORM::SetupBLNormalize(const BLOCK* block, const ROW* row,
00100                               float x_height, const TBOX& word_box,
00101                               int num_segs, const DENORM_SEG* segs) {
00102   float scale = kBlnXHeight / x_height;
00103   float x_origin = (word_box.left() + word_box.right()) / 2.0f;
00104   float y_origin = 0.0f;
00105   if (num_segs == 0 && row == NULL) {
00106     y_origin = word_box.bottom();
00107   }
00108   SetupNormalization(block, row, NULL, NULL, segs, num_segs,
00109                      x_origin, y_origin, scale, scale,
00110                      0.0f, static_cast<float>(kBlnBaselineOffset));
00111 }
00112 
00113 // Initializes the denorm for a transformation. For details see the large
00114 // comment in normalis.h.
00115 // Arguments:
00116 // block: if not NULL, then this is the first transformation, and
00117 //        block->re_rotation() needs to be used after the Denorm
00118 //        transformation to get back to the image coords.
00119 // row: if not NULL, then row->baseline(x) is added to the y_origin, unless
00120 //      segs is not NULL and num_segs > 0, in which case they are used.
00121 // rotation: if not NULL, apply this rotation after translation to the
00122 //           origin and scaling. (Usually a classify rotation.)
00123 // predecessor: if not NULL, then predecessor has been applied to the
00124 //              input space and needs to be undone to complete the inverse.
00125 // segs: if not NULL and num_segs > 0, then the segs provide the y_origin
00126 //       and the y_scale at a given source x.
00127 // num_segs: the number of segs.
00128 // The above pointers are not owned by this DENORM and are assumed to live
00129 // longer than this denorm, except rotation, which is deep copied on input.
00130 //
00131 // x_origin: The x origin which will be mapped to final_xshift in the result.
00132 // y_origin: The y origin which will be mapped to final_yshift in the result.
00133 //           Added to result of row->baseline(x) if not NULL.
00134 //
00135 // x_scale: scale factor for the x-coordinate.
00136 // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
00137 // Note that these scale factors apply to the same x and y system as the
00138 // x-origin and y-origin apply, ie after any block rotation, but before
00139 // the rotation argument is applied.
00140 //
00141 // final_xshift: The x component of the final translation.
00142 // final_yshift: The y component of the final translation.
00143 void DENORM::SetupNormalization(const BLOCK* block,
00144                                 const ROW* row,
00145                                 const FCOORD* rotation,
00146                                 const DENORM* predecessor,
00147                                 const DENORM_SEG* segs, int num_segs,
00148                                 float x_origin, float y_origin,
00149                                 float x_scale, float y_scale,
00150                                 float final_xshift, float final_yshift) {
00151   Clear();
00152   block_ = block;
00153   row_ = row;
00154   if (rotation == NULL)
00155     rotation_ = NULL;
00156   else
00157     rotation_ = new FCOORD(*rotation);
00158   predecessor_ = predecessor;
00159   SetSegments(segs, num_segs);
00160   x_origin_ = x_origin;
00161   y_origin_ = y_origin;
00162   x_scale_ = x_scale;
00163   y_scale_ = y_scale;
00164   final_xshift_ = final_xshift;
00165   final_yshift_ = final_yshift;
00166 }
00167 
00168 // Transforms the given coords one step forward to normalized space, without
00169 // using any block rotation or predecessor.
00170 void DENORM::LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const {
00171   FCOORD src_pt(pt.x, pt.y);
00172   FCOORD float_result;
00173   LocalNormTransform(src_pt, &float_result);
00174   transformed->x = IntCastRounded(float_result.x());
00175   transformed->y = IntCastRounded(float_result.y());
00176 }
00177 void DENORM::LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const {
00178   FCOORD translated(pt.x() - x_origin_, pt.y() - YOriginAtOrigX(pt.x()));
00179   translated.set_x(translated.x() * x_scale_);
00180   translated.set_y(translated.y() * YScaleAtOrigX(pt.x()));
00181   if (rotation_ != NULL)
00182     translated.rotate(*rotation_);
00183   transformed->set_x(translated.x() + final_xshift_);
00184   transformed->set_y(translated.y() + final_yshift_);
00185 }
00186 
00187 // Transforms the given coords forward to normalized space using the
00188 // full transformation sequence defined by the block rotation, the
00189 // predecessors, deepest first, and finally this.
00190 void DENORM::NormTransform(const TPOINT& pt, TPOINT* transformed) const {
00191   FCOORD src_pt(pt.x, pt.y);
00192   FCOORD float_result;
00193   NormTransform(src_pt, &float_result);
00194   transformed->x = IntCastRounded(float_result.x());
00195   transformed->y = IntCastRounded(float_result.y());
00196 }
00197 void DENORM::NormTransform(const FCOORD& pt, FCOORD* transformed) const {
00198   FCOORD src_pt(pt);
00199   if (predecessor_ != NULL) {
00200     predecessor_->NormTransform(pt, &src_pt);
00201   } else if (block_ != NULL) {
00202     FCOORD fwd_rotation(block_->re_rotation().x(), -block_->re_rotation().y());
00203     src_pt.rotate(fwd_rotation);
00204   }
00205   LocalNormTransform(src_pt, transformed);
00206 }
00207 
00208 // Transforms the given coords one step back to source space, without
00209 // using to any block rotation or predecessor.
00210 void DENORM::LocalDenormTransform(const TPOINT& pt, TPOINT* original) const {
00211   FCOORD src_pt(pt.x, pt.y);
00212   FCOORD float_result;
00213   LocalDenormTransform(src_pt, &float_result);
00214   original->x = IntCastRounded(float_result.x());
00215   original->y = IntCastRounded(float_result.y());
00216 }
00217 void DENORM::LocalDenormTransform(const FCOORD& pt, FCOORD* original) const {
00218   FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_);
00219   if (rotation_ != NULL) {
00220     FCOORD inverse_rotation(rotation_->x(), -rotation_->y());
00221     rotated.rotate(inverse_rotation);
00222   }
00223   original->set_x(rotated.x() / x_scale_ + x_origin_);
00224   float y_scale = y_scale_;
00225   if (num_segs_ > 0)
00226     y_scale = YScaleAtOrigX(original->x());
00227   original->set_y(rotated.y() / y_scale + YOriginAtOrigX(original->x()));
00228 }
00229 
00230 // Transforms the given coords all the way back to source image space using
00231 // the full transformation sequence defined by this and its predecesors
00232 // recursively, shallowest first, and finally any block re_rotation.
00233 void DENORM::DenormTransform(const TPOINT& pt, TPOINT* original) const {
00234   FCOORD src_pt(pt.x, pt.y);
00235   FCOORD float_result;
00236   DenormTransform(src_pt, &float_result);
00237   original->x = IntCastRounded(float_result.x());
00238   original->y = IntCastRounded(float_result.y());
00239 }
00240 void DENORM::DenormTransform(const FCOORD& pt, FCOORD* original) const {
00241   LocalDenormTransform(pt, original);
00242   if (predecessor_ != NULL) {
00243     predecessor_->DenormTransform(*original, original);
00244   } else if (block_ != NULL) {
00245     original->rotate(block_->re_rotation());
00246   }
00247 }
00248 
00249 // Normalize a blob using blob transformations. Less accurate, but
00250 // more accurately copies the old way.
00251 void DENORM::LocalNormBlob(TBLOB* blob) const {
00252   TBOX blob_box = blob->bounding_box();
00253   float x_center = (blob_box.left() + blob_box.right()) / 2.0f;
00254   ICOORD translation(-IntCastRounded(x_origin_),
00255                      -IntCastRounded(YOriginAtOrigX(x_center)));
00256   blob->Move(translation);
00257   // Note that the old way of scaling only allowed for a single
00258   // scale factor.
00259   float scale = YScaleAtOrigX(x_center);
00260   if (scale != 1.0f)
00261     blob->Scale(scale);
00262   if (rotation_ != NULL)
00263     blob->Rotate(*rotation_);
00264   translation.set_x(IntCastRounded(final_xshift_));
00265   translation.set_y(IntCastRounded(final_yshift_));
00266   blob->Move(translation);
00267 }
00268 
00269 // Fills in the x-height range accepted by the given unichar_id, given its
00270 // bounding box in the usual baseline-normalized coordinates, with some
00271 // initial crude x-height estimate (such as word size) and this denoting the
00272 // transformation that was used. Returns false, and an empty range if the
00273 // bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
00274 // fits, but the top is impossible.
00275 bool DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
00276                           const TBOX& bbox,
00277                           inT16* min_xht, inT16* max_xht) const {
00278   // Clip the top and bottom to the limit of normalized feature space.
00279   int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
00280   int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
00281   // A tolerance of yscale corresponds to 1 pixel in the image.
00282   double tolerance = y_scale();
00283   int min_bottom, max_bottom, min_top, max_top;
00284   unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
00285                             &min_top, &max_top);
00286   // Default returns indicate a mis-fit.
00287   *min_xht = 0;
00288   *max_xht = 0;
00289   // Chars with a misfitting bottom might be sub/superscript/dropcap, or might
00290   // just be wrongly classified. Return an empty range so they have to be
00291   // good to be considered.
00292   if (bottom < min_bottom - tolerance || bottom > max_bottom + tolerance) {
00293     return false;
00294   }
00295   // To help very high cap/xheight ratio fonts accept the correct x-height,
00296   // and to allow the large caps in small caps to accept the xheight of the
00297   // small caps, add kBlnBaselineOffset to chars with a maximum max.
00298   if (max_top == kBlnCellHeight - 1)
00299     max_top += kBlnBaselineOffset;
00300   int height = top - kBlnBaselineOffset;
00301   double min_height = min_top - kBlnBaselineOffset - tolerance;
00302   double max_height = max_top - kBlnBaselineOffset + tolerance;
00303   if (min_height <= 0.0) {
00304     if (height <= 0 || max_height > 0)
00305       *max_xht = MAX_INT16;  // Anything will do.
00306   } else if (height > 0) {
00307     int result = IntCastRounded(height * kBlnXHeight / y_scale() / min_height);
00308     *max_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
00309   }
00310   if (max_height > 0.0 && height > 0) {
00311     int result = IntCastRounded(height * kBlnXHeight / y_scale() / max_height);
00312     *min_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
00313   }
00314   return true;
00315 }
00316 
00317 // ============== Private Code ======================
00318 
00319 // Free allocated memory and clear pointers.
00320 void DENORM::Clear() {
00321   if (segs_ != NULL) {
00322     delete [] segs_;
00323     segs_ = NULL;
00324     num_segs_ = 0;
00325   }
00326   if (rotation_ != NULL) {
00327     delete rotation_;
00328     rotation_ = NULL;
00329   }
00330 }
00331 
00332 // Setup default values.
00333 void DENORM::Init() {
00334   inverse_ = false;
00335   pix_ = NULL;
00336   block_ = NULL;
00337   row_ = NULL;
00338   rotation_ = NULL;
00339   predecessor_ = NULL;
00340   segs_ = NULL;
00341   num_segs_ = 0;
00342   x_origin_ = 0.0f;
00343   y_origin_ = 0.0f;
00344   x_scale_ = 1.0f;
00345   y_scale_ = 1.0f;
00346   final_xshift_ = 0.0f;
00347   final_yshift_ = static_cast<float>(kBlnBaselineOffset);
00348 }
00349 
00350 // Returns the y-origin at the original (un-normalized) x.
00351 float DENORM::YOriginAtOrigX(float orig_x) const {
00352   if (num_segs_ > 0) {
00353     const DENORM_SEG* seg = BinarySearchSegment(orig_x);
00354     if (seg->ycoord != -MAX_INT32) {
00355       return seg->ycoord;
00356     }
00357   }
00358   if (row_ != NULL)
00359     return row_->base_line(orig_x) + y_origin_;
00360   else
00361     return y_origin_;
00362 }
00363 
00364 // Returns the y-scale at the original (un-normalized) x.
00365 float DENORM::YScaleAtOrigX(float orig_x) const {
00366   if (num_segs_ > 0) {
00367     const DENORM_SEG* seg = BinarySearchSegment(orig_x);
00368     if (seg->scale_factor > 0.0)
00369       return seg->scale_factor;
00370   }
00371   return y_scale_;
00372 }
00373 
00374 
00375 // Compare two segments by xstart for use with qsort(3) and bsearch(3)
00376 static int CompareSegByXStart(const DENORM_SEG* a, const DENORM_SEG* b) {
00377   if (a->xstart < b->xstart)
00378     return -1;
00379   else if (a->xstart > b->xstart)
00380     return 1;
00381   return 0;
00382 }
00383 
00384 // Deep copy the array of segments for use as a y_origin and y_scale.
00385 void DENORM::SetSegments(const DENORM_SEG* new_segs, int seg_count) {
00386   if (segs_ != NULL)
00387     delete [] segs_;
00388   if (seg_count > 0) {
00389     segs_ = new DENORM_SEG[seg_count];
00390     memcpy(segs_, new_segs, seg_count * sizeof(new_segs[0]));
00391     // It is possible, if infrequent that the segments may be out of order.
00392     // since we are searching with a binary search, keep them in order.
00393     qsort(segs_, num_segs_, sizeof(segs_[0]),
00394           reinterpret_cast<int(*)(const void*, const void*)>(
00395               &CompareSegByXStart));
00396   } else {
00397     num_segs_ = 0;
00398     segs_ = NULL;
00399   }
00400 }
00401 
00402 // Finds the appropriate segment for a given original x-coord
00403 const DENORM_SEG* DENORM::BinarySearchSegment(float orig_x) const {
00404   int bottom, top, middle;       // binary search
00405   bottom = 0;
00406   top = num_segs_;
00407   do {
00408     middle = (bottom + top) / 2;
00409     if (segs_[middle].xstart > orig_x)
00410       top = middle;
00411     else
00412       bottom = middle;
00413   }
00414   while (top - bottom > 1);
00415   return &segs_[bottom];
00416 }