Tesseract  3.02
tesseract-ocr/cube/cube_line_segmenter.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_page_segmenter.h
00003  * Description: Declaration of the Cube Page Segmenter Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // TODO(ahmadab)
00021 // This is really a makeshift line segmenter that works well for Arabic
00022 // This should eventually be replaced by Ray Smith's Page segmenter
00023 // There are lots of magic numbers below that were determined empirically
00024 // but not thoroughly tested
00025 
00026 #ifndef CUBE_LINE_SEGMENTER_H
00027 #define CUBE_LINE_SEGMENTER_H
00028 
00029 #include "cube_reco_context.h"
00030 #include "allheaders.h"
00031 
00032 namespace tesseract {
00033 
00034 class CubeLineSegmenter {
00035  public:
00036   CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img);
00037   ~CubeLineSegmenter();
00038 
00039   // Accessor functions
00040   Pix *PostProcessedImage() {
00041     if (init_ == false && Init() == false) {
00042       return NULL;
00043     }
00044     return img_;
00045   }
00046   int ColumnCnt() {
00047     if (init_ == false && Init() == false) {
00048       return NULL;
00049     }
00050     return columns_->n;
00051   }
00052   Box *Column(int col) {
00053     if (init_ == false && Init() == false) {
00054       return NULL;
00055     }
00056 
00057     return columns_->boxa->box[col];
00058   }
00059   int LineCnt() {
00060     if (init_ == false && Init() == false) {
00061       return NULL;
00062     }
00063 
00064     return line_cnt_;
00065   }
00066   Pixa *ConComps() {
00067     if (init_ == false && Init() == false) {
00068       return NULL;
00069     }
00070 
00071     return con_comps_;
00072   }
00073   Pixaa *Columns() {
00074     if (init_ == false && Init() == false) {
00075       return NULL;
00076     }
00077 
00078     return columns_;
00079   }
00080   inline double AlefHgtEst() { return est_alef_hgt_; }
00081   inline double DotHgtEst() { return est_dot_hgt_; }
00082   Pix *Line(int line, Box **line_box);
00083 
00084  private:
00085   static const float kMinValidLineHgtRatio;
00086   static const int kLineSepMorphMinHgt;
00087   static const int kHgtBins;
00088   static const int kMaxConnCompHgt;
00089   static const int kMaxConnCompWid;
00090   static const int kMaxHorzAspectRatio;
00091   static const int kMaxVertAspectRatio;
00092   static const int kMinWid;
00093   static const int kMinHgt;
00094   static const double kMaxValidLineRatio;
00095 
00096   // Cube Reco context
00097   CubeRecoContext *cntxt_;
00098   // Original image
00099   Pix *orig_img_;
00100   // Post processed image
00101   Pix *img_;
00102   // Init flag
00103   bool init_;
00104   // Output Line and column info
00105   int line_cnt_;
00106   Pixaa *columns_;
00107   Pixa *con_comps_;
00108   Pixa *lines_pixa_;
00109   // Estimates for sizes of ALEF and DOT needed for Arabic analysis
00110   double est_alef_hgt_;
00111   double est_dot_hgt_;
00112 
00113   // Init the page analysis
00114   bool Init();
00115   // Performs line segmentation
00116   bool LineSegment();
00117   // Cleanup function
00118   Pix *CleanUp(Pix *pix);
00119   // compute validity ratio for a line
00120   double ValidityRatio(Pix *line_mask_pix, Box *line_box);
00121   // validate line
00122   bool ValidLine(Pix *line_mask_pix, Box *line_box);
00123   // split a line continuously until valid or fail
00124   Pixa *SplitLine(Pix *line_mask_pix, Box *line_box);
00125   // do a desperate attempt at cracking lines
00126   Pixa *CrackLine(Pix *line_mask_pix, Box *line_box);
00127   Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt);
00128   // Checks of a line is too small
00129   bool SmallLine(Box *line_box);
00130   // Compute the connected components in a line
00131   Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box,
00132                              Pixa **con_comps_pixa);
00133   // create a union of two arbitrary pix
00134   Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box);
00135   // create a union of a pixa subset
00136   Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt);
00137   // create a union of a pixa
00138   Pix *Pixa2Pix(Pixa *pixa, Box **dest_box);
00139   // merges a number of lines into one line given a bounding box and a mask
00140   bool MergeLine(Pix *line_mask_pix, Box *line_box,
00141                  Pixa *lines, Boxaa *lines_con_comps);
00142   // Creates new set of lines from the computed columns
00143   bool AddLines(Pixa *lines);
00144   // Estimate the parameters of the font(s) used in the page
00145   bool EstimateFontParams();
00146   // perform a vertical Closing with the specified threshold
00147   // returning the resulting conn comps as a pixa
00148   Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa);
00149   // Index the specific pixa using RTL reading order
00150   int *IndexRTL(Pixa *pixa);
00151   // Implements a rudimentary page & line segmenter
00152   bool FindLines();
00153 };
00154 }
00155 
00156 #endif  // CUBE_LINE_SEGMENTER_H