Tesseract  3.02
tesseract-ocr/dict/hyphen.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  * File:        hyphen.c  (Formerly hyphen.c)
00004  * Description: Functions for maintaining information about hyphenated words.
00005  * Author:       Mark Seaman, OCR Technology
00006  * Created:      Fri Oct 16 14:37:00 1987
00007  * Modified:     Thu Mar 14 11:09:43 1991 (Mark Seaman) marks@hpgrlt
00008  * Language:     C
00009  * Package:      N/A
00010  * Status:       Reusable Software Component
00011  *
00012  * (c) Copyright 1987, Hewlett-Packard Company.
00013  ** Licensed under the Apache License, Version 2.0 (the "License");
00014  ** you may not use this file except in compliance with the License.
00015  ** You may obtain a copy of the License at
00016  ** http://www.apache.org/licenses/LICENSE-2.0
00017  ** Unless required by applicable law or agreed to in writing, software
00018  ** distributed under the License is distributed on an "AS IS" BASIS,
00019  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020  ** See the License for the specific language governing permissions and
00021  ** limitations under the License.
00022  *
00023  *********************************************************************************/
00024 
00025 #include "dict.h"
00026 
00027 namespace tesseract {
00028 
00029 // Unless the previous word was the last one on the line, and the current
00030 // one is not (thus it is the first one on the line), erase hyphen_word_,
00031 // clear hyphen_active_dawgs_, hyphen_constraints_ update last_word_on_line_.
00032 void Dict::reset_hyphen_vars(bool last_word_on_line) {
00033   if (!(last_word_on_line_ == true && last_word_on_line == false)) {
00034     if (hyphen_word_ != NULL) {
00035       delete hyphen_word_;
00036       hyphen_word_ = NULL;
00037       hyphen_active_dawgs_.clear();
00038       hyphen_constraints_.clear();
00039     }
00040   }
00041   if (hyphen_debug_level) {
00042     tprintf("reset_hyphen_vars: last_word_on_line %d -> %d\n",
00043             last_word_on_line_, last_word_on_line);
00044   }
00045   last_word_on_line_ = last_word_on_line;
00046 }
00047 
00048 // Update hyphen_word_, and copy the given DawgInfoVectors into
00049 // hyphen_active_dawgs_ and hyphen_constraints_.
00050 void Dict::set_hyphen_word(const WERD_CHOICE &word,
00051                            const DawgInfoVector &active_dawgs,
00052                            const DawgInfoVector &constraints) {
00053   if (hyphen_word_ == NULL) {
00054     hyphen_word_ = new WERD_CHOICE(word.unicharset());
00055     hyphen_word_->make_bad();
00056   }
00057   if (hyphen_word_->rating() > word.rating()) {
00058     *hyphen_word_ = word;
00059     // Remove the last unichar id as it is a hyphen, and remove
00060     // any unichar_string/lengths that are present.
00061     hyphen_word_->remove_last_unichar_id();
00062     hyphen_active_dawgs_ = active_dawgs;
00063     hyphen_constraints_ = constraints;
00064   }
00065   if (hyphen_debug_level) {
00066     hyphen_word_->print("set_hyphen_word: ");
00067   }
00068 }
00069 }  // namespace tesseract