Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: tess_lang_mod_edge.cpp 00003 * Description: Implementation of the Tesseract Language Model Edge Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "tess_lang_mod_edge.h" 00021 #include "const.h" 00022 #include "unichar.h" 00023 00024 00025 00026 namespace tesseract { 00027 // OOD constructor 00028 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) { 00029 root_ = false; 00030 cntxt_ = cntxt; 00031 dawg_ = NULL; 00032 start_edge_ = 0; 00033 end_edge_ = 0; 00034 edge_mask_ = 0; 00035 class_id_ = class_id; 00036 str_ = cntxt_->CharacterSet()->ClassString(class_id); 00037 path_cost_ = Cost(); 00038 } 00039 00040 // leading, trailing punc constructor and single byte UTF char 00041 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, 00042 const Dawg *dawg, EDGE_REF edge_idx, int class_id) { 00043 root_ = false; 00044 cntxt_ = cntxt; 00045 dawg_ = dawg; 00046 start_edge_ = edge_idx; 00047 end_edge_ = edge_idx; 00048 edge_mask_ = 0; 00049 class_id_ = class_id; 00050 str_ = cntxt_->CharacterSet()->ClassString(class_id); 00051 path_cost_ = Cost(); 00052 } 00053 00054 // dict constructor: multi byte UTF char 00055 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg, 00056 EDGE_REF start_edge_idx, EDGE_REF end_edge_idx, 00057 int class_id) { 00058 root_ = false; 00059 cntxt_ = cntxt; 00060 dawg_ = dawg; 00061 start_edge_ = start_edge_idx; 00062 end_edge_ = end_edge_idx; 00063 edge_mask_ = 0; 00064 class_id_ = class_id; 00065 str_ = cntxt_->CharacterSet()->ClassString(class_id); 00066 path_cost_ = Cost(); 00067 } 00068 00069 char *TessLangModEdge::Description() const { 00070 char *char_ptr = new char[256]; 00071 if (!char_ptr) { 00072 return NULL; 00073 } 00074 00075 char dawg_str[256]; 00076 char edge_str[32]; 00077 if (dawg_ == (Dawg *)DAWG_OOD) { 00078 strcpy(dawg_str, "OOD"); 00079 } else if (dawg_ == (Dawg *)DAWG_NUMBER) { 00080 strcpy(dawg_str, "NUM"); 00081 } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) { 00082 strcpy(dawg_str, "Main"); 00083 } else if (dawg_->permuter() == USER_DAWG_PERM) { 00084 strcpy(dawg_str, "User"); 00085 } else if (dawg_->permuter() == DOC_DAWG_PERM) { 00086 strcpy(dawg_str, "Doc"); 00087 } else { 00088 strcpy(dawg_str, "N/A"); 00089 } 00090 00091 sprintf(edge_str, "%d", static_cast<int>(start_edge_)); 00092 if (IsLeadingPuncEdge(edge_mask_)) { 00093 strcat(edge_str, "-LP"); 00094 } 00095 if (IsTrailingPuncEdge(edge_mask_)) { 00096 strcat(edge_str, "-TP"); 00097 } 00098 sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d", 00099 dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_); 00100 00101 return char_ptr; 00102 } 00103 00104 int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt, 00105 const Dawg *dawg, 00106 NODE_REF parent_node, 00107 LangModEdge **edge_array) { 00108 int edge_cnt = 0; 00109 NodeChildVector vec; 00110 dawg->unichar_ids_of(parent_node, &vec); // find all children of the parent 00111 for (int i = 0; i < vec.size(); ++i) { 00112 const NodeChild &child = vec[i]; 00113 if (child.unichar_id == INVALID_UNICHAR_ID) continue; 00114 edge_array[edge_cnt] = 00115 new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id); 00116 if (edge_array[edge_cnt] != NULL) edge_cnt++; 00117 } 00118 return edge_cnt; 00119 } 00120 }