Tesseract
3.02
|
#include <shapetable.h>
Public Member Functions | |
ShapeTable () | |
ShapeTable (const UNICHARSET &unicharset) | |
bool | Serialize (FILE *fp) const |
bool | DeSerialize (bool swap, FILE *fp) |
int | NumShapes () const |
const UNICHARSET & | unicharset () const |
void | set_unicharset (const UNICHARSET &unicharset) |
STRING | DebugStr (int shape_id) const |
STRING | SummaryStr () const |
int | AddShape (int unichar_id, int font_id) |
int | AddShape (const Shape &other) |
void | DeleteShape (int shape_id) |
void | AddToShape (int shape_id, int unichar_id, int font_id) |
void | AddShapeToShape (int shape_id, const Shape &other) |
int | FindShape (int unichar_id, int font_id) const |
void | GetFirstUnicharAndFont (int shape_id, int *unichar_id, int *font_id) const |
const Shape & | GetShape (int shape_id) const |
Shape * | MutableShape (int shape_id) |
int | BuildFromShape (const Shape &shape, const ShapeTable &master_shapes) |
bool | AlreadyMerged (int shape_id1, int shape_id2) |
bool | AnyMultipleUnichars () |
int | MaxNumUnichars () const |
void | ForceFontMerges (int start, int end) |
int | MasterUnicharCount (int shape_id) const |
int | MasterFontCount (int shape_id) const |
int | MergedUnicharCount (int shape_id1, int shape_id2) const |
void | MergeShapes (int shape_id1, int shape_id2) |
void | AppendMasterShapes (const ShapeTable &other) |
int | NumMasterShapes () const |
int | MasterDestinationIndex (int shape_id) const |
Definition at line 126 of file shapetable.h.
tesseract::ShapeTable::ShapeTable | ( | ) |
Definition at line 175 of file shapetable.cpp.
: unicharset_(NULL) { }
tesseract::ShapeTable::ShapeTable | ( | const UNICHARSET & | unicharset | ) | [explicit] |
Definition at line 177 of file shapetable.cpp.
: unicharset_(&unicharset) { }
int tesseract::ShapeTable::AddShape | ( | int | unichar_id, |
int | font_id | ||
) |
Definition at line 237 of file shapetable.cpp.
{ int index = shape_table_.size(); Shape* shape = new Shape; shape->AddToShape(unichar_id, font_id); shape_table_.push_back(shape); return index; }
int tesseract::ShapeTable::AddShape | ( | const Shape & | other | ) |
Definition at line 247 of file shapetable.cpp.
{ int index = shape_table_.size(); Shape* shape = new Shape(other); shape_table_.push_back(shape); return index; }
void tesseract::ShapeTable::AddShapeToShape | ( | int | shape_id, |
const Shape & | other | ||
) |
Definition at line 269 of file shapetable.cpp.
{ Shape& shape = *shape_table_[shape_id]; shape.AddShape(other); }
void tesseract::ShapeTable::AddToShape | ( | int | shape_id, |
int | unichar_id, | ||
int | font_id | ||
) |
Definition at line 263 of file shapetable.cpp.
{ Shape& shape = *shape_table_[shape_id]; shape.AddToShape(unichar_id, font_id); }
bool tesseract::ShapeTable::AlreadyMerged | ( | int | shape_id1, |
int | shape_id2 | ||
) |
Definition at line 330 of file shapetable.cpp.
{ return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2); }
bool tesseract::ShapeTable::AnyMultipleUnichars | ( | ) |
Definition at line 335 of file shapetable.cpp.
{ int num_shapes = NumShapes(); for (int s1 = 0; s1 < num_shapes; ++s1) { if (MasterDestinationIndex(s1) != s1) continue; if (GetShape(s1).size() > 1) return true; } return false; }
void tesseract::ShapeTable::AppendMasterShapes | ( | const ShapeTable & | other | ) |
Definition at line 431 of file shapetable.cpp.
{ for (int s = 0; s < other.shape_table_.size(); ++s) { if (other.shape_table_[s]->destination_index() < 0) { AddShape(*other.shape_table_[s]); } } }
int tesseract::ShapeTable::BuildFromShape | ( | const Shape & | shape, |
const ShapeTable & | master_shapes | ||
) |
Definition at line 305 of file shapetable.cpp.
{ int num_masters = 0; for (int u_ind = 0; u_ind < shape.size(); ++u_ind) { for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) { int c = shape[u_ind].unichar_id; int f = shape[u_ind].font_ids[f_ind]; if (FindShape(c, f) < 0) { int shape_id = AddShape(c, f); int master_id = master_shapes.FindShape(c, f); if (master_id >= 0 && shape.size() > 1) { const Shape& master = master_shapes.GetShape(master_id); if (master.IsSubsetOf(shape) && !shape.IsSubsetOf(master)) { // Add everything else from the master shape. shape_table_[shape_id]->AddShape(master); ++num_masters; } } } } } return num_masters; }
STRING tesseract::ShapeTable::DebugStr | ( | int | shape_id | ) | const |
Definition at line 194 of file shapetable.cpp.
{ if (shape_id < 0 || shape_id >= shape_table_.size()) return STRING("INVALID_UNICHAR_ID"); const Shape& shape = GetShape(shape_id); STRING result; result.add_str_int("Shape", shape_id); for (int c = 0; c < shape.size(); ++c) { result.add_str_int(" c_id=", shape[c].unichar_id); result += "="; result += unicharset_->id_to_unichar(shape[c].unichar_id); result.add_str_int(", ", shape[c].font_ids.size()); result += " fonts ="; for (int f = 0; f < shape[c].font_ids.size(); ++f) { result.add_str_int(" ", shape[c].font_ids[f]); } } return result; }
void tesseract::ShapeTable::DeleteShape | ( | int | shape_id | ) |
Definition at line 255 of file shapetable.cpp.
{ delete shape_table_[shape_id]; shape_table_[shape_id] = NULL; shape_table_.remove(shape_id); }
bool tesseract::ShapeTable::DeSerialize | ( | bool | swap, |
FILE * | fp | ||
) |
Definition at line 188 of file shapetable.cpp.
{ if (!shape_table_.DeSerialize(swap, fp)) return false; return true; }
int tesseract::ShapeTable::FindShape | ( | int | unichar_id, |
int | font_id | ||
) | const |
Definition at line 278 of file shapetable.cpp.
{ for (int s = 0; s < shape_table_.size(); ++s) { const Shape& shape = GetShape(s); for (int c = 0; c < shape.size(); ++c) { if (shape[c].unichar_id == unichar_id) { if (font_id < 0) return s; // We don't care about the font. for (int f = 0; f < shape[c].font_ids.size(); ++f) { if (shape[c].font_ids[f] == font_id) return s; } } } } return -1; }
void tesseract::ShapeTable::ForceFontMerges | ( | int | start, |
int | end | ||
) |
Definition at line 359 of file shapetable.cpp.
{ for (int s1 = start; s1 < end; ++s1) { if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) { int unichar_id = GetShape(s1)[0].unichar_id; for (int s2 = s1 + 1; s2 < end; ++s2) { if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 && unichar_id == GetShape(s2)[0].unichar_id) { MergeShapes(s1, s2); } } } } ShapeTable compacted(*unicharset_); compacted.AppendMasterShapes(*this); *this = compacted; }
void tesseract::ShapeTable::GetFirstUnicharAndFont | ( | int | shape_id, |
int * | unichar_id, | ||
int * | font_id | ||
) | const |
Definition at line 296 of file shapetable.cpp.
{
const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
*unichar_id = unichar_and_fonts.unichar_id;
*font_id = unichar_and_fonts.font_ids[0];
}
const Shape& tesseract::ShapeTable::GetShape | ( | int | shape_id | ) | const [inline] |
Definition at line 179 of file shapetable.h.
{
return *shape_table_[shape_id];
}
int tesseract::ShapeTable::MasterDestinationIndex | ( | int | shape_id | ) | const |
Definition at line 419 of file shapetable.cpp.
{ int dest_id = shape_table_[shape_id]->destination_index(); if (dest_id == shape_id || dest_id < 0) return shape_id; // Is master already. int master_id = shape_table_[dest_id]->destination_index(); if (master_id == dest_id || master_id < 0) return dest_id; // Dest is the master and shape_id points to it. master_id = MasterDestinationIndex(master_id); return master_id; }
int tesseract::ShapeTable::MasterFontCount | ( | int | shape_id | ) | const |
Definition at line 383 of file shapetable.cpp.
{ int master_id = MasterDestinationIndex(shape_id); const Shape& shape = GetShape(master_id); int font_count = 0; for (int c = 0; c < shape.size(); ++c) { font_count += shape[c].font_ids.size(); } return font_count; }
int tesseract::ShapeTable::MasterUnicharCount | ( | int | shape_id | ) | const |
Definition at line 377 of file shapetable.cpp.
{ int master_id = MasterDestinationIndex(shape_id); return GetShape(master_id).size(); }
int tesseract::ShapeTable::MaxNumUnichars | ( | ) | const |
Definition at line 346 of file shapetable.cpp.
int tesseract::ShapeTable::MergedUnicharCount | ( | int | shape_id1, |
int | shape_id2 | ||
) | const |
Definition at line 394 of file shapetable.cpp.
{ // Do it the easy way for now. int master_id1 = MasterDestinationIndex(shape_id1); int master_id2 = MasterDestinationIndex(shape_id2); Shape combined_shape(*shape_table_[master_id1]); combined_shape.AddShape(*shape_table_[master_id2]); return combined_shape.size(); }
void tesseract::ShapeTable::MergeShapes | ( | int | shape_id1, |
int | shape_id2 | ||
) |
Definition at line 404 of file shapetable.cpp.
{ int master_id1 = MasterDestinationIndex(shape_id1); int master_id2 = MasterDestinationIndex(shape_id2); // Point master_id2 (and all merged shapes) to master_id1. shape_table_[master_id2]->set_destination_index(master_id1); // Add all the shapes of master_id2 to master_id1. shape_table_[master_id1]->AddShape(*shape_table_[master_id2]); tprintf("Merged shape %d->%d, %d->%d, now with %d unichars: %s\n", shape_id1, master_id1, shape_id2, master_id2, shape_table_[master_id1]->size(), DebugStr(master_id1).string()); }
Shape* tesseract::ShapeTable::MutableShape | ( | int | shape_id | ) | [inline] |
Definition at line 182 of file shapetable.h.
{
return shape_table_[shape_id];
}
int tesseract::ShapeTable::NumMasterShapes | ( | ) | const |
Definition at line 440 of file shapetable.cpp.
{ int num_shapes = 0; for (int s = 0; s < shape_table_.size(); ++s) { if (shape_table_[s]->destination_index() < 0) ++num_shapes; } return num_shapes; }
int tesseract::ShapeTable::NumShapes | ( | ) | const [inline] |
Definition at line 140 of file shapetable.h.
{
return shape_table_.size();
}
bool tesseract::ShapeTable::Serialize | ( | FILE * | fp | ) | const |
Definition at line 182 of file shapetable.cpp.
{ if (!shape_table_.Serialize(fp)) return false; return true; }
void tesseract::ShapeTable::set_unicharset | ( | const UNICHARSET & | unicharset | ) | [inline] |
Definition at line 148 of file shapetable.h.
{ unicharset_ = &unicharset; }
STRING tesseract::ShapeTable::SummaryStr | ( | ) | const |
Definition at line 214 of file shapetable.cpp.
{ int max_unichars = 0; int num_multi_shapes = 0; int num_master_shapes = 0; for (int s = 0; s < shape_table_.size(); ++s) { if (MasterDestinationIndex(s) != s) continue; ++num_master_shapes; int shape_size = GetShape(s).size(); if (shape_size > 1) ++num_multi_shapes; if (shape_size > max_unichars) max_unichars = shape_size; } STRING result; result.add_str_int("Number of shapes = ", num_master_shapes); result.add_str_int(" max unichars = ", max_unichars); result.add_str_int(" number with multiple unichars = ", num_multi_shapes); return result; }
const UNICHARSET& tesseract::ShapeTable::unicharset | ( | ) | const [inline] |
Definition at line 143 of file shapetable.h.
{
return *unicharset_;
}