Tesseract  3.02
tesseract-ocr/cube/cube_search_object.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_search_object.h
00003  * Description: Declaration of the Cube Search Object Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // The CubeSearchObject class represents a char_samp (a word bitmap) that is
00021 // being searched for characters (or recognizeable entities).
00022 // The Class detects the connected components and peforms an oversegmentation
00023 // on each ConComp. The result of which is a list of segments that are ordered
00024 // in reading order.
00025 // The class provided methods that inquire about the number of segments, the
00026 // CharSamp corresponding to any segment range and the recognition results
00027 // of any segment range
00028 // An object of Class CubeSearchObject is used by the BeamSearch algorithm
00029 // to recognize a CharSamp into a list of word alternates
00030 
00031 #ifndef CUBE_SEARCH_OBJECT_H
00032 #define CUBE_SEARCH_OBJECT_H
00033 
00034 #include "search_object.h"
00035 #include "char_samp.h"
00036 #include "conv_net_classifier.h"
00037 #include "cube_reco_context.h"
00038 #include "allheaders.h"
00039 
00040 namespace tesseract {
00041 class CubeSearchObject : public SearchObject {
00042  public:
00043   CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp);
00044   ~CubeSearchObject();
00045 
00046   // returns the Segmentation Point count of the CharSamp owned by the class
00047   int SegPtCnt();
00048   // Recognize the set of segments given by the specified range and return
00049   // a list of possible alternate answers
00050   CharAltList * RecognizeSegment(int start_pt, int end_pt);
00051   // Returns the CharSamp corresponding to the specified segment range
00052   CharSamp *CharSample(int start_pt, int end_pt);
00053   // Returns a leptonica box corresponding to the specified segment range
00054   Box *CharBox(int start_pt, int end_pt);
00055   // Returns the cost of having a space before the specified segmentation pt
00056   int SpaceCost(int seg_pt);
00057   // Returns the cost of not having a space before the specified
00058   // segmentation pt
00059   int NoSpaceCost(int seg_pt);
00060   // Returns the cost of not having any spaces within the specified range
00061   // of segmentation points
00062   int NoSpaceCost(int seg_pt, int end_pt);
00063 
00064  private:
00065   // Maximum reasonable segment count
00066   static const int kMaxSegmentCnt = 128;
00067   // Use cropped samples
00068   static const bool kUseCroppedChars;
00069 
00070   // reading order flag
00071   bool rtl_;
00072   // cached dimensions of char samp
00073   int left_;
00074   int itop_;
00075   int wid_;
00076   int hgt_;
00077   // minimum and maximum and possible inter-segment gaps for spaces
00078   int min_spc_gap_;
00079   int max_spc_gap_;
00080   // initialization flag
00081   bool init_;
00082   // maximum segments per character: Cached from tuning parameters object
00083   int max_seg_per_char_;
00084   // char sample to be processed
00085   CharSamp *samp_;
00086   // segment count
00087   int segment_cnt_;
00088   // segments of the processed char samp
00089   ConComp **segments_;
00090   // Cache data members:
00091   // There are two caches kept; a CharSamp cache and a CharAltList cache
00092   // Each is a 2-D array of CharSamp and CharAltList pointers respectively
00093   // hence the triple pointer.
00094   CharAltList ***reco_cache_;
00095   CharSamp ***samp_cache_;
00096   // Cached costs of space and no-space after every segment. Computed only
00097   // in phrase mode
00098   int *space_cost_;
00099   int *no_space_cost_;
00100 
00101   // init and allocate variables, perform segmentation
00102   bool Init();
00103   // Cleanup
00104   void Cleanup();
00105   // Perform segmentation of the bitmap by detecting connected components,
00106   // segmenting each connected component using windowed vertical pixel density
00107   // histogram and sorting the resulting segments in reading order
00108   // Returns true on success
00109   bool Segment();
00110   // validate the segment ranges.
00111   inline bool IsValidSegmentRange(int start_pt, int end_pt) {
00112     return (end_pt > start_pt && start_pt >= -1 && start_pt < segment_cnt_ &&
00113             end_pt >= 0 && end_pt <= segment_cnt_ &&
00114             end_pt <= (start_pt + max_seg_per_char_));
00115   }
00116   // computes the space and no space costs at gaps between segments
00117   // return true on sucess
00118   bool ComputeSpaceCosts();
00119 };
00120 }
00121 
00122 #endif  // CUBE_SEARCH_OBJECT_H