Tesseract  3.02
tesseract-ocr/textord/workingpartset.h
Go to the documentation of this file.
00001 
00002 // File:        workingpartset.h
00003 // Description: Class to hold a working set of partitions of the page
00004 //              during construction of text/image regions.
00005 // Author:      Ray Smith
00006 // Created:     Tue Ocr 28 17:21:01 PDT 2008
00007 //
00008 // (C) Copyright 2008, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifndef TESSERACT_TEXTORD_WORKINGPARSET_H__
00022 #define TESSERACT_TEXTORD_WORKINGPARSET_H__
00023 
00024 #include "blobbox.h"       // For TO_BLOCK_LIST and BLOCK_LIST.
00025 #include "colpartition.h"  // For ColPartition_LIST.
00026 
00027 namespace tesseract {
00028 
00029 // WorkingPartSet holds a working set of ColPartitions during transformation
00030 // from the grid-based storage to regions in logical reading order, and is
00031 // therefore only used during construction of the regions.
00032 class WorkingPartSet : public ELIST_LINK {
00033  public:
00034   WorkingPartSet() {
00035   }
00036   explicit WorkingPartSet(ColPartition* column)
00037     : column_(column), latest_part_(NULL), part_it_(&part_set_) {
00038   }
00039 
00040   // Simple accessors.
00041   ColPartition* column() const {
00042     return column_;
00043   }
00044   void set_column(ColPartition* col) {
00045     column_ = col;
00046   }
00047 
00048   // Add the partition to this WorkingPartSet. Partitions are generally
00049   // stored in the order in which they are received, but if the partition
00050   // has a SingletonPartner, make sure that it stays with its partner.
00051   void AddPartition(ColPartition* part);
00052 
00053   // Make blocks out of any partitions in this WorkingPartSet, and append
00054   // them to the end of the blocks list. bleft, tright and resolution give
00055   // the bounds and resolution of the source image, so that blocks can be
00056   // made to fit in the bounds.
00057   // All ColPartitions go in the used_parts list, as they need to be kept
00058   // around, but are no longer needed.
00059   void ExtractCompletedBlocks(const ICOORD& bleft, const ICOORD& tright,
00060                               int resolution, ColPartition_LIST* used_parts,
00061                               BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
00062 
00063   // Insert the given blocks at the front of the completed_blocks_ list so
00064   // they can be kept in the correct reading order.
00065   void InsertCompletedBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
00066 
00067  private:
00068   // Convert the part_set_ into blocks, starting a new block at a break
00069   // in partnerships, or a change in linespacing (for text).
00070   void MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution,
00071                   ColPartition_LIST* used_parts);
00072 
00073   // The column that this working set applies to. Used by the caller.
00074   ColPartition* column_;
00075   // The most recently added partition.
00076   ColPartition* latest_part_;
00077   // All the partitions in the block that is currently under construction.
00078   ColPartition_LIST part_set_;
00079   // Iteratorn on part_set_ pointing to the most recent addition.
00080   ColPartition_IT part_it_;
00081   // The blocks that have been made so far and belong before the current block.
00082   BLOCK_LIST completed_blocks_;
00083   TO_BLOCK_LIST to_blocks_;
00084 };
00085 
00086 ELISTIZEH(WorkingPartSet)
00087 
00088 }  // namespace tesseract.
00089 
00090 #endif  // TESSERACT_TEXTORD_WORKINGPARSET_H__
00091