Tesseract
3.02
|
00001 00002 // File: workingpartset.h 00003 // Description: Class to hold a working set of partitions of the page 00004 // during construction of text/image regions. 00005 // Author: Ray Smith 00006 // Created: Tue Ocr 28 17:21:01 PDT 2008 00007 // 00008 // (C) Copyright 2008, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_TEXTORD_WORKINGPARSET_H__ 00022 #define TESSERACT_TEXTORD_WORKINGPARSET_H__ 00023 00024 #include "blobbox.h" // For TO_BLOCK_LIST and BLOCK_LIST. 00025 #include "colpartition.h" // For ColPartition_LIST. 00026 00027 namespace tesseract { 00028 00029 // WorkingPartSet holds a working set of ColPartitions during transformation 00030 // from the grid-based storage to regions in logical reading order, and is 00031 // therefore only used during construction of the regions. 00032 class WorkingPartSet : public ELIST_LINK { 00033 public: 00034 WorkingPartSet() { 00035 } 00036 explicit WorkingPartSet(ColPartition* column) 00037 : column_(column), latest_part_(NULL), part_it_(&part_set_) { 00038 } 00039 00040 // Simple accessors. 00041 ColPartition* column() const { 00042 return column_; 00043 } 00044 void set_column(ColPartition* col) { 00045 column_ = col; 00046 } 00047 00048 // Add the partition to this WorkingPartSet. Partitions are generally 00049 // stored in the order in which they are received, but if the partition 00050 // has a SingletonPartner, make sure that it stays with its partner. 00051 void AddPartition(ColPartition* part); 00052 00053 // Make blocks out of any partitions in this WorkingPartSet, and append 00054 // them to the end of the blocks list. bleft, tright and resolution give 00055 // the bounds and resolution of the source image, so that blocks can be 00056 // made to fit in the bounds. 00057 // All ColPartitions go in the used_parts list, as they need to be kept 00058 // around, but are no longer needed. 00059 void ExtractCompletedBlocks(const ICOORD& bleft, const ICOORD& tright, 00060 int resolution, ColPartition_LIST* used_parts, 00061 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00062 00063 // Insert the given blocks at the front of the completed_blocks_ list so 00064 // they can be kept in the correct reading order. 00065 void InsertCompletedBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00066 00067 private: 00068 // Convert the part_set_ into blocks, starting a new block at a break 00069 // in partnerships, or a change in linespacing (for text). 00070 void MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution, 00071 ColPartition_LIST* used_parts); 00072 00073 // The column that this working set applies to. Used by the caller. 00074 ColPartition* column_; 00075 // The most recently added partition. 00076 ColPartition* latest_part_; 00077 // All the partitions in the block that is currently under construction. 00078 ColPartition_LIST part_set_; 00079 // Iteratorn on part_set_ pointing to the most recent addition. 00080 ColPartition_IT part_it_; 00081 // The blocks that have been made so far and belong before the current block. 00082 BLOCK_LIST completed_blocks_; 00083 TO_BLOCK_LIST to_blocks_; 00084 }; 00085 00086 ELISTIZEH(WorkingPartSet) 00087 00088 } // namespace tesseract. 00089 00090 #endif // TESSERACT_TEXTORD_WORKINGPARSET_H__ 00091