Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: adaptions.cpp (Formerly adaptions.c) 00003 * Description: Functions used to adapt to blobs already confidently 00004 * identified 00005 * Author: Chris Newton 00006 * Created: Thu Oct 7 10:17:28 BST 1993 00007 * 00008 * (C) Copyright 1992, Hewlett-Packard Ltd. 00009 ** Licensed under the Apache License, Version 2.0 (the "License"); 00010 ** you may not use this file except in compliance with the License. 00011 ** You may obtain a copy of the License at 00012 ** http://www.apache.org/licenses/LICENSE-2.0 00013 ** Unless required by applicable law or agreed to in writing, software 00014 ** distributed under the License is distributed on an "AS IS" BASIS, 00015 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 ** See the License for the specific language governing permissions and 00017 ** limitations under the License. 00018 * 00019 **********************************************************************/ 00020 00021 #ifdef _MSC_VER 00022 #pragma warning(disable:4244) // Conversion warnings 00023 #pragma warning(disable:4305) // int/float warnings 00024 #endif 00025 00026 #include "mfcpch.h" 00027 00028 #ifdef __UNIX__ 00029 #include <assert.h> 00030 #endif 00031 #include <ctype.h> 00032 #include <string.h> 00033 #include "tessbox.h" 00034 #include "tessvars.h" 00035 #include "memry.h" 00036 #include "imgs.h" 00037 #include "scaleimg.h" 00038 #include "reject.h" 00039 #include "control.h" 00040 #include "stopper.h" 00041 #include "secname.h" 00042 #include "tesseractclass.h" 00043 00044 // Include automatically generated configuration file if running autoconf. 00045 #ifdef HAVE_CONFIG_H 00046 #include "config_auto.h" 00047 #endif 00048 00049 namespace tesseract { 00050 BOOL8 Tesseract::word_adaptable( //should we adapt? 00051 WERD_RES *word, 00052 uinT16 mode) { 00053 if (tessedit_adaption_debug) { 00054 tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", 00055 word->best_choice == NULL ? "" : 00056 word->best_choice->unichar_string().string(), 00057 word->best_choice->rating(), word->best_choice->certainty()); 00058 } 00059 00060 BOOL8 status = FALSE; 00061 BITS16 flags(mode); 00062 00063 enum MODES 00064 { 00065 ADAPTABLE_WERD, 00066 ACCEPTABLE_WERD, 00067 CHECK_DAWGS, 00068 CHECK_SPACES, 00069 CHECK_ONE_ELL_CONFLICT, 00070 CHECK_AMBIG_WERD 00071 }; 00072 00073 /* 00074 0: NO adaption 00075 */ 00076 if (mode == 0) { 00077 if (tessedit_adaption_debug) tprintf("adaption disabled\n"); 00078 return FALSE; 00079 } 00080 00081 if (flags.bit (ADAPTABLE_WERD)) { 00082 status |= word->tess_would_adapt; // result of Classify::AdaptableWord() 00083 if (tessedit_adaption_debug && !status) { 00084 tprintf("tess_would_adapt bit is false\n"); 00085 } 00086 } 00087 00088 if (flags.bit (ACCEPTABLE_WERD)) { 00089 status |= word->tess_accepted; 00090 if (tessedit_adaption_debug && !status) { 00091 tprintf("tess_accepted bit is false\n"); 00092 } 00093 } 00094 00095 if (!status) { // If not set then 00096 return FALSE; // ignore other checks 00097 } 00098 00099 if (flags.bit (CHECK_DAWGS) && 00100 (word->best_choice->permuter () != SYSTEM_DAWG_PERM) && 00101 (word->best_choice->permuter () != FREQ_DAWG_PERM) && 00102 (word->best_choice->permuter () != USER_DAWG_PERM) && 00103 (word->best_choice->permuter () != NUMBER_PERM)) { 00104 if (tessedit_adaption_debug) tprintf("word not in dawgs\n"); 00105 return FALSE; 00106 } 00107 00108 if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) { 00109 if (tessedit_adaption_debug) tprintf("word has ell conflict\n"); 00110 return FALSE; 00111 } 00112 00113 if (flags.bit (CHECK_SPACES) && 00114 (strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) { 00115 if (tessedit_adaption_debug) tprintf("word contains spaces\n"); 00116 return FALSE; 00117 } 00118 00119 // if (flags.bit (CHECK_AMBIG_WERD) && test_ambig_word (word)) 00120 if (flags.bit (CHECK_AMBIG_WERD) && 00121 !getDict().NoDangerousAmbig(word->best_choice, NULL, false, NULL, NULL)) { 00122 if (tessedit_adaption_debug) tprintf("word is ambiguous\n"); 00123 return FALSE; 00124 } 00125 00126 // Do not adapt to words that are composed from fragments if 00127 // tessedit_adapt_to_char_fragments is false. 00128 if (!tessedit_adapt_to_char_fragments) { 00129 const char *fragment_lengths = word->best_choice->fragment_lengths(); 00130 if (fragment_lengths != NULL && *fragment_lengths != '\0') { 00131 for (int i = 0; i < word->best_choice->length(); ++i) { 00132 if (fragment_lengths[i] > 1) { 00133 if (tessedit_adaption_debug) tprintf("won't adapt to fragments\n"); 00134 return false; // found a character composed from fragments 00135 } 00136 } 00137 } 00138 } 00139 00140 if (tessedit_adaption_debug) { 00141 tprintf("returning status %d\n", status); 00142 } 00143 return status; 00144 } 00145 00146 } // namespace tesseract