Tesseract  3.02
tesseract-ocr/cube/tess_lang_mod_edge.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        tess_lang_mod_edge.cpp
00003  * Description: Implementation of the Tesseract Language Model Edge Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "tess_lang_mod_edge.h"
00021 #include "const.h"
00022 #include "unichar.h"
00023 
00024 
00025 
00026 namespace tesseract {
00027 // OOD constructor
00028 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
00029   root_ = false;
00030   cntxt_ = cntxt;
00031   dawg_ = NULL;
00032   start_edge_ = 0;
00033   end_edge_ = 0;
00034   edge_mask_ = 0;
00035   class_id_ = class_id;
00036   str_ = cntxt_->CharacterSet()->ClassString(class_id);
00037   path_cost_ = Cost();
00038 }
00039 
00040 // leading, trailing punc constructor and single byte UTF char
00041 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
00042     const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
00043   root_ = false;
00044   cntxt_ = cntxt;
00045   dawg_ = dawg;
00046   start_edge_ = edge_idx;
00047   end_edge_ = edge_idx;
00048   edge_mask_ = 0;
00049   class_id_ = class_id;
00050   str_ = cntxt_->CharacterSet()->ClassString(class_id);
00051   path_cost_ = Cost();
00052 }
00053 
00054 // dict constructor: multi byte UTF char
00055 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
00056                                  EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
00057                                  int class_id) {
00058   root_ = false;
00059   cntxt_ = cntxt;
00060   dawg_ = dawg;
00061   start_edge_ = start_edge_idx;
00062   end_edge_ = end_edge_idx;
00063   edge_mask_ = 0;
00064   class_id_ = class_id;
00065   str_ = cntxt_->CharacterSet()->ClassString(class_id);
00066   path_cost_ = Cost();
00067 }
00068 
00069 char *TessLangModEdge::Description() const {
00070   char *char_ptr = new char[256];
00071   if (!char_ptr) {
00072     return NULL;
00073   }
00074 
00075   char dawg_str[256];
00076   char edge_str[32];
00077   if (dawg_ == (Dawg *)DAWG_OOD) {
00078     strcpy(dawg_str, "OOD");
00079   } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
00080     strcpy(dawg_str, "NUM");
00081   } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
00082     strcpy(dawg_str, "Main");
00083   } else if (dawg_->permuter() == USER_DAWG_PERM) {
00084     strcpy(dawg_str, "User");
00085   } else if (dawg_->permuter() == DOC_DAWG_PERM) {
00086     strcpy(dawg_str, "Doc");
00087   } else {
00088     strcpy(dawg_str, "N/A");
00089   }
00090 
00091   sprintf(edge_str, "%d", static_cast<int>(start_edge_));
00092   if (IsLeadingPuncEdge(edge_mask_)) {
00093     strcat(edge_str, "-LP");
00094   }
00095   if (IsTrailingPuncEdge(edge_mask_)) {
00096     strcat(edge_str, "-TP");
00097   }
00098   sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
00099           dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
00100 
00101   return char_ptr;
00102 }
00103 
00104 int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
00105                                     const Dawg *dawg,
00106                                     NODE_REF parent_node,
00107                                     LangModEdge **edge_array) {
00108   int edge_cnt = 0;
00109   NodeChildVector vec;
00110   dawg->unichar_ids_of(parent_node, &vec);  // find all children of the parent
00111   for (int i = 0; i < vec.size(); ++i) {
00112     const NodeChild &child = vec[i];
00113     if (child.unichar_id == INVALID_UNICHAR_ID) continue;
00114     edge_array[edge_cnt] =
00115       new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
00116     if (edge_array[edge_cnt] != NULL) edge_cnt++;
00117   }
00118   return edge_cnt;
00119 }
00120 }