tesseract-doc/normalis_8h_source.html

00001 /**********************************************************************
00002  * File:        normalis.h  (Formerly denorm.h)
00003  * Description: Code for the DENORM class.
00004  * Author:      Ray Smith
00005  * Created:     Thu Apr 23 09:22:43 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019
00020 #ifndef           NORMALIS_H
00021 #define           NORMALIS_H
00022
00023 #include <stdio.h>
00024 #include "host.h"
00025
00026 const int kBlnCellHeight = 256;     // Full-height for baseline normalization.
00027 const int kBlnXHeight = 128;        // x-height for baseline normalization.
00028 const int kBlnBaselineOffset = 64;  // offset for baseline normalization.
00029
00030 struct Pix;
00031 class ROW;                          // Forward decl
00032 class BLOCK;
00033 class FCOORD;
00034 class TBLOB;
00035 class TBOX;
00036 class TPOINT;
00037 class UNICHARSET;
00038
00039 namespace tesseract {
00040 // Possible normalization methods. Use NEGATIVE values as these also
00041 // double up as markers for the last sub-classifier.
00042 enum NormalizationMode {
00043   NM_BASELINE = -3,         // The original BL normalization mode.
00044   NM_CHAR_ISOTROPIC = -2,   // Character normalization but isotropic.
00045   NM_CHAR_ANISOTROPIC = -1  // The original CN normalization mode.
00046 };
00047
00048 }  // namespace tesseract.
00049
00050 class DENORM_SEG {
00051  public:
00052   DENORM_SEG() {}
00053
00054   inT32 xstart;         // start of segment
00055   inT32 ycoord;         // y at segment
00056   float scale_factor;   // normalized_x/scale_factor + x_center == original_x
00057 };
00058
00059 class DENORM {
00060  public:
00061   DENORM();
00062   DENORM(float x, float scaling, ROW *src);
00063   DENORM(float x,              // from same pieces
00064          float scaling,
00065          double line_m,        // default line: y = mx + c
00066          double line_c,
00067          inT16 seg_count,      // no of segments
00068          DENORM_SEG *seg_pts,  // actual segments
00069          BOOL8 using_row,      // as baseline
00070          ROW *src);
00071   // Copying a DENORM is allowed.
00072   DENORM(const DENORM &);
00073   DENORM& operator=(const DENORM&);
00074   ~DENORM();
00075
00076   // Setup for a baseline normalization. If there are segs, then they
00077   // are used, otherwise, if there is a row, that is used, otherwise the
00078   // bottom of the word_box is used for the baseline.
00079   void SetupBLNormalize(const BLOCK* block, const ROW* row, float x_height,
00080                         const TBOX& word_box,
00081                         int num_segs, const DENORM_SEG* segs);
00082
00083   // Setup the normalization transformation parameters.
00084   // The normalizations applied to a blob are as follows:
00085   // 1. An optional block layout rotation that was applied during layout
00086   // analysis to make the textlines horizontal.
00087   // 2. A normalization transformation (LocalNormTransform):
00088   // Subtract the "origin"
00089   // Apply an x,y scaling.
00090   // Apply an optional rotation.
00091   // Add back a final translation.
00092   // The origin is in the block-rotated space, and is usually something like
00093   // the x-middle of the word at the baseline.
00094   // 3. Zero or more further normalization transformations that are applied
00095   // in sequence, with a similar pattern to the first normalization transform.
00096   //
00097   // A DENORM holds the parameters of a single normalization, and can execute
00098   // both the LocalNormTransform (a forwards normalization), and the
00099   // LocalDenormTransform which is an inverse transform or de-normalization.
00100   // A DENORM may point to a predecessor DENORM, which is actually the earlier
00101   // normalization, so the full normalization sequence involves executing all
00102   // predecessors first and then the transform in "this".
00103   // Let x be image co-ordinates and that we have normalization classes A, B, C
00104   // where we first apply A then B then C to get normalized x':
00105   // x' = CBAx
00106   // Then the backwards (to original coordinates) would be:
00107   // x = A^-1 B^-1 C^-1 x'
00108   // and A = B->predecessor_ and B = C->predecessor_
00109   // NormTransform executes all predecessors recursively, and then this.
00110   // NormTransform would be used to transform an image-based feature to
00111   // normalized space for use in a classifier
00112   // DenormTransform inverts this and then all predecessors. It can be
00113   // used to get back to the original image coordinates from normalized space.
00114   // The LocalNormTransform member executes just the transformation
00115   // in "this" without the layout rotation or any predecessors. It would be
00116   // used to run each successive normalization, eg the word normalization,
00117   // and later the character normalization.
00118
00119   // Arguments:
00120   // block: if not NULL, then this is the first transformation, and
00121   //        block->re_rotation() needs to be used after the Denorm
00122   //        transformation to get back to the image coords.
00123   // row: if not NULL, then row->baseline(x) is added to the y_origin, unless
00124   //      segs is not NULL and num_segs > 0, in which case they are used.
00125   // rotation: if not NULL, apply this rotation after translation to the
00126   //           origin and scaling. (Usually a classify rotation.)
00127   // predecessor: if not NULL, then predecessor has been applied to the
00128   //              input space and needs to be undone to complete the inverse.
00129   // segs: if not NULL and num_segs > 0, then the segs provide the y_origin
00130   //       and the y_scale at a given source x.
00131   // num_segs: the number of segs.
00132   // The above pointers are not owned by this DENORM and are assumed to live
00133   // longer than this denorm, except rotation and segs, which are deep
00134   // copied on input.
00135   //
00136   // x_origin: The x origin which will be mapped to final_xshift in the result.
00137   // y_origin: The y origin which will be mapped to final_yshift in the result.
00138   //           Added to result of row->baseline(x) if not NULL.
00139   //
00140   // x_scale: scale factor for the x-coordinate.
00141   // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
00142   // Note that these scale factors apply to the same x and y system as the
00143   // x-origin and y-origin apply, ie after any block rotation, but before
00144   // the rotation argument is applied.
00145   //
00146   // final_xshift: The x component of the final translation.
00147   // final_yshift: The y component of the final translation.
00148   //
00149   // In theory, any of the commonly used normalizations can be setup here:
00150   // * Traditional baseline normalization on a word:
00151   // SetupNormalization(block, row, NULL, NULL, NULL, 0,
00152   //                    box.x_middle(), 0.0f,
00153   //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
00154   //                    0, kBlnBaselineOffset);
00155   // * Numeric mode baseline normalization on a word:
00156   // SetupNormalization(block, NULL, NULL, NULL, segs, num_segs,
00157   //                    box.x_middle(), 0.0f,
00158   //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
00159   //                    0, kBlnBaselineOffset);
00160   // * Anisotropic character normalization used by IntFx.
00161   // SetupNormalization(NULL, NULL, NULL, denorm, NULL, 0,
00162   //                    centroid_x, centroid_y,
00163   //                    51.2 / ry, 51.2 / rx, 128, 128);
00164   // * Normalize blob height to x-height (current OSD):
00165   // SetupNormalization(NULL, NULL, &rotation, NULL, NULL, 0,
00166   //                    box.rotational_x_middle(rotation),
00167   //                    box.rotational_y_middle(rotation),
00168   //                    kBlnXHeight / box.rotational_height(rotation),
00169   //                    kBlnXHeight / box.rotational_height(rotation),
00170   //                    0, kBlnBaselineOffset);
00171   // * Secondary normalization for classification rotation (current):
00172   // FCOORD rotation = block->classify_rotation();
00173   // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
00174   // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0,
00175   //                    box.rotational_x_middle(rotation),
00176   //                    box.rotational_y_middle(rotation),
00177   //                    target_height / box.rotational_height(rotation),
00178   //                    target_height / box.rotational_height(rotation),
00179   //                    0, kBlnBaselineOffset);
00180   // * Proposed new normalizations for CJK: Between them there is then
00181   // no need for further normalization at all, and the character fills the cell.
00182   // ** Replacement for baseline normalization on a word:
00183   // Scales height and width independently so that modal height and pitch
00184   // fill the cell respectively.
00185   // float cap_height = x_height / CCStruct::kXHeightCapRatio;
00186   // SetupNormalization(block, row, NULL, NULL, NULL, 0,
00187   //                    box.x_middle(), cap_height / 2.0f,
00188   //                    kBlnCellHeight / fixed_pitch,
00189   //                    kBlnCellHeight / cap_height,
00190   //                    0, 0);
00191   // ** Secondary normalization for classification (with rotation) (proposed):
00192   // Requires a simple translation to the center of the appropriate character
00193   // cell, no further scaling and a simple rotation (or nothing) about the
00194   // cell center.
00195   // FCOORD rotation = block->classify_rotation();
00196   // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0,
00197   //                    fixed_pitch_cell_center,
00198   //                    0.0f,
00199   //                    1.0f,
00200   //                    1.0f,
00201   //                    0, 0);
00202   void SetupNormalization(const BLOCK* block,
00203                           const ROW* row,
00204                           const FCOORD* rotation,
00205                           const DENORM* predecessor,
00206                           const DENORM_SEG* segs, int num_segs,
00207                           float x_origin, float y_origin,
00208                           float x_scale, float y_scale,
00209                           float final_xshift, float final_yshift);
00210
00211   // Transforms the given coords one step forward to normalized space, without
00212   // using any block rotation or predecessor.
00213   void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const;
00214   void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const;
00215   // Transforms the given coords forward to normalized space using the
00216   // full transformation sequence defined by the block rotation, the
00217   // predecessors, deepest first, and finally this.
00218   void NormTransform(const TPOINT& pt, TPOINT* transformed) const;
00219   void NormTransform(const FCOORD& pt, FCOORD* transformed) const;
00220   // Transforms the given coords one step back to source space, without
00221   // using to any block rotation or predecessor.
00222   void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
00223   void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
00224   // Transforms the given coords all the way back to source image space using
00225   // the full transformation sequence defined by this and its predecesors
00226   // recursively, shallowest first, and finally any block re_rotation.
00227   void DenormTransform(const TPOINT& pt, TPOINT* original) const;
00228   void DenormTransform(const FCOORD& pt, FCOORD* original) const;
00229
00230   // Normalize a blob using blob transformations. Less accurate, but
00231   // more accurately copies the old way.
00232   void LocalNormBlob(TBLOB* blob) const;
00233
00234   // Fills in the x-height range accepted by the given unichar_id, given its
00235   // bounding box in the usual baseline-normalized coordinates, with some
00236   // initial crude x-height estimate (such as word size) and this denoting the
00237   // transformation that was used. Returns false, and an empty range if the
00238   // bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
00239   // fits, but the top is impossible.
00240   bool XHeightRange(int unichar_id, const UNICHARSET& unicharset,
00241                     const TBOX& bbox, inT16* min_xht, inT16* max_xht) const;
00242
00243   Pix* pix() const {
00244     return pix_;
00245   }
00246   void set_pix(Pix* pix) {
00247     pix_ = pix;
00248   }
00249   bool inverse() const {
00250     return inverse_;
00251   }
00252   void set_inverse(bool value) {
00253     inverse_ = value;
00254   }
00255   const DENORM* RootDenorm() const {
00256     if (predecessor_ != NULL)
00257       return predecessor_->RootDenorm();
00258     return this;
00259   }
00260   const DENORM* predecessor() const {
00261     return predecessor_;
00262   }
00263   // Accessors - perhaps should not be needed.
00264   float x_scale() const {
00265     return x_scale_;
00266   }
00267   float y_scale() const {
00268     return y_scale_;
00269   }
00270   const ROW *row() const {
00271     return row_;
00272   }
00273   void set_row(ROW* row) {
00274     row_ = row;
00275   }
00276   const BLOCK* block() const {
00277     return block_;
00278   }
00279   void set_block(const BLOCK* block) {
00280     block_ = block;
00281   }
00282
00283  private:
00284   // Free allocated memory and clear pointers.
00285   void Clear();
00286   // Setup default values.
00287   void Init();
00288
00289   // Returns the y-origin at the original (un-normalized) x.
00290   float YOriginAtOrigX(float orig_x) const;
00291
00292   // Returns the y-scale at the original (un-normalized) x.
00293   float YScaleAtOrigX(float orig_x) const;
00294
00295   // Deep copy the array of segments for use as a y_origin and y_scale.
00296   void SetSegments(const DENORM_SEG* new_segs, int seg_count);
00297
00298   // Finds the appropriate segment for a given original x-coord
00299   const DENORM_SEG* BinarySearchSegment(float orig_x) const;
00300
00301   // Best available image.
00302   Pix* pix_;
00303   // True if the source image is white-on-black.
00304   bool inverse_;
00305   // Block the word came from. If not null, block->re_rotation() takes the
00306   // "untransformed" coordinates even further back to the original image.
00307   const BLOCK* block_;
00308   // Row the word came from. If not null, row->baseline() is added to y_origin_.
00309   const ROW* row_;
00310   // Rotation to apply between translation to the origin and scaling.
00311   const FCOORD* rotation_;
00312   // Previous transformation in a chain.
00313   const DENORM* predecessor_;
00314   // Array of segments used to specify local y_origin_ and y_scale_.
00315   // Owned by the DENORM.
00316   DENORM_SEG *segs_;
00317   // Size of the segs_ array.
00318   int num_segs_;
00319   // x-coordinate to be mapped to final_xshift_ in the result.
00320   float x_origin_;
00321   // y-coordinate to be mapped to final_yshift_ in the result.
00322   float y_origin_;
00323   // Scale factors for x and y coords. Applied to pre-rotation system.
00324   float x_scale_;
00325   float y_scale_;
00326   // Destination coords of the x_origin_ and y_origin_.
00327   float final_xshift_;
00328   float final_yshift_;
00329 };
00330 #endif