Tesseract
3.02
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * File: hyphen.c (Formerly hyphen.c) 00004 * Description: Functions for maintaining information about hyphenated words. 00005 * Author: Mark Seaman, OCR Technology 00006 * Created: Fri Oct 16 14:37:00 1987 00007 * Modified: Thu Mar 14 11:09:43 1991 (Mark Seaman) marks@hpgrlt 00008 * Language: C 00009 * Package: N/A 00010 * Status: Reusable Software Component 00011 * 00012 * (c) Copyright 1987, Hewlett-Packard Company. 00013 ** Licensed under the Apache License, Version 2.0 (the "License"); 00014 ** you may not use this file except in compliance with the License. 00015 ** You may obtain a copy of the License at 00016 ** http://www.apache.org/licenses/LICENSE-2.0 00017 ** Unless required by applicable law or agreed to in writing, software 00018 ** distributed under the License is distributed on an "AS IS" BASIS, 00019 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00020 ** See the License for the specific language governing permissions and 00021 ** limitations under the License. 00022 * 00023 *********************************************************************************/ 00024 00025 #include "dict.h" 00026 00027 namespace tesseract { 00028 00029 // Unless the previous word was the last one on the line, and the current 00030 // one is not (thus it is the first one on the line), erase hyphen_word_, 00031 // clear hyphen_active_dawgs_, hyphen_constraints_ update last_word_on_line_. 00032 void Dict::reset_hyphen_vars(bool last_word_on_line) { 00033 if (!(last_word_on_line_ == true && last_word_on_line == false)) { 00034 if (hyphen_word_ != NULL) { 00035 delete hyphen_word_; 00036 hyphen_word_ = NULL; 00037 hyphen_active_dawgs_.clear(); 00038 hyphen_constraints_.clear(); 00039 } 00040 } 00041 if (hyphen_debug_level) { 00042 tprintf("reset_hyphen_vars: last_word_on_line %d -> %d\n", 00043 last_word_on_line_, last_word_on_line); 00044 } 00045 last_word_on_line_ = last_word_on_line; 00046 } 00047 00048 // Update hyphen_word_, and copy the given DawgInfoVectors into 00049 // hyphen_active_dawgs_ and hyphen_constraints_. 00050 void Dict::set_hyphen_word(const WERD_CHOICE &word, 00051 const DawgInfoVector &active_dawgs, 00052 const DawgInfoVector &constraints) { 00053 if (hyphen_word_ == NULL) { 00054 hyphen_word_ = new WERD_CHOICE(word.unicharset()); 00055 hyphen_word_->make_bad(); 00056 } 00057 if (hyphen_word_->rating() > word.rating()) { 00058 *hyphen_word_ = word; 00059 // Remove the last unichar id as it is a hyphen, and remove 00060 // any unichar_string/lengths that are present. 00061 hyphen_word_->remove_last_unichar_id(); 00062 hyphen_active_dawgs_ = active_dawgs; 00063 hyphen_constraints_ = constraints; 00064 } 00065 if (hyphen_debug_level) { 00066 hyphen_word_->print("set_hyphen_word: "); 00067 } 00068 } 00069 } // namespace tesseract