Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: rejctmap.cpp (Formerly rejmap.c) 00003 * Description: REJ and REJMAP class functions. 00004 * Author: Phil Cheatle 00005 * Created: Thu Jun 9 13:46:38 BST 1994 00006 * 00007 * (C) Copyright 1994, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "mfcpch.h" 00021 #include "hosthplb.h" 00022 //#include "basefile.h" 00023 #include "rejctmap.h" 00024 #include "secname.h" 00025 #include "params.h" 00026 00027 BOOL8 REJ::perm_rejected() { //Is char perm reject? 00028 return (flag (R_TESS_FAILURE) || 00029 flag (R_SMALL_XHT) || 00030 flag (R_EDGE_CHAR) || 00031 flag (R_1IL_CONFLICT) || 00032 flag (R_POSTNN_1IL) || 00033 flag (R_REJ_CBLOB) || 00034 flag (R_BAD_REPETITION) || flag (R_MM_REJECT)); 00035 } 00036 00037 00038 BOOL8 REJ::rej_before_nn_accept() { 00039 return flag (R_POOR_MATCH) || 00040 flag (R_NOT_TESS_ACCEPTED) || 00041 flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER); 00042 } 00043 00044 00045 BOOL8 REJ::rej_between_nn_and_mm() { 00046 return flag (R_HYPHEN) || 00047 flag (R_DUBIOUS) || 00048 flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP); 00049 } 00050 00051 00052 BOOL8 REJ::rej_between_mm_and_quality_accept() { 00053 return flag (R_BAD_QUALITY); 00054 } 00055 00056 00057 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() { 00058 return flag (R_DOC_REJ) || 00059 flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ); 00060 } 00061 00062 00063 BOOL8 REJ::rej_before_mm_accept() { 00064 return rej_between_nn_and_mm () || 00065 (rej_before_nn_accept () && 00066 !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT)); 00067 } 00068 00069 00070 BOOL8 REJ::rej_before_quality_accept() { 00071 return rej_between_mm_and_quality_accept () || 00072 (!flag (R_MM_ACCEPT) && rej_before_mm_accept ()); 00073 } 00074 00075 00076 BOOL8 REJ::rejected() { //Is char rejected? 00077 if (flag (R_MINIMAL_REJ_ACCEPT)) 00078 return FALSE; 00079 else 00080 return (perm_rejected () || 00081 rej_between_quality_and_minimal_rej_accept () || 00082 (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ())); 00083 } 00084 00085 00086 BOOL8 REJ::accept_if_good_quality() { //potential rej? 00087 return (rejected () && 00088 !perm_rejected () && 00089 flag (R_BAD_PERMUTER) && 00090 !flag (R_POOR_MATCH) && 00091 !flag (R_NOT_TESS_ACCEPTED) && 00092 !flag (R_CONTAINS_BLANKS) && 00093 (!rej_between_nn_and_mm () && 00094 !rej_between_mm_and_quality_accept () && 00095 !rej_between_quality_and_minimal_rej_accept ())); 00096 } 00097 00098 00099 void REJ::setrej_tess_failure() { //Tess generated blank 00100 set_flag(R_TESS_FAILURE); 00101 } 00102 00103 00104 void REJ::setrej_small_xht() { //Small xht char/wd 00105 set_flag(R_SMALL_XHT); 00106 } 00107 00108 00109 void REJ::setrej_edge_char() { //Close to image edge 00110 set_flag(R_EDGE_CHAR); 00111 } 00112 00113 00114 void REJ::setrej_1Il_conflict() { //Initial reject map 00115 set_flag(R_1IL_CONFLICT); 00116 } 00117 00118 00119 void REJ::setrej_postNN_1Il() { //1Il after NN 00120 set_flag(R_POSTNN_1IL); 00121 } 00122 00123 00124 void REJ::setrej_rej_cblob() { //Insert duff blob 00125 set_flag(R_REJ_CBLOB); 00126 } 00127 00128 00129 void REJ::setrej_mm_reject() { //Matrix matcher 00130 set_flag(R_MM_REJECT); 00131 } 00132 00133 00134 void REJ::setrej_bad_repetition() { //Odd repeated char 00135 set_flag(R_BAD_REPETITION); 00136 } 00137 00138 00139 void REJ::setrej_poor_match() { //Failed Rays heuristic 00140 set_flag(R_POOR_MATCH); 00141 } 00142 00143 00144 void REJ::setrej_not_tess_accepted() { 00145 //TEMP reject_word 00146 set_flag(R_NOT_TESS_ACCEPTED); 00147 } 00148 00149 00150 void REJ::setrej_contains_blanks() { 00151 //TEMP reject_word 00152 set_flag(R_CONTAINS_BLANKS); 00153 } 00154 00155 00156 void REJ::setrej_bad_permuter() { //POTENTIAL reject_word 00157 set_flag(R_BAD_PERMUTER); 00158 } 00159 00160 00161 void REJ::setrej_hyphen() { //PostNN dubious hyphen or . 00162 set_flag(R_HYPHEN); 00163 } 00164 00165 00166 void REJ::setrej_dubious() { //PostNN dubious limit 00167 set_flag(R_DUBIOUS); 00168 } 00169 00170 00171 void REJ::setrej_no_alphanums() { //TEMP reject_word 00172 set_flag(R_NO_ALPHANUMS); 00173 } 00174 00175 00176 void REJ::setrej_mostly_rej() { //TEMP reject_word 00177 set_flag(R_MOSTLY_REJ); 00178 } 00179 00180 00181 void REJ::setrej_xht_fixup() { //xht fixup 00182 set_flag(R_XHT_FIXUP); 00183 } 00184 00185 00186 void REJ::setrej_bad_quality() { //TEMP reject_word 00187 set_flag(R_BAD_QUALITY); 00188 } 00189 00190 00191 void REJ::setrej_doc_rej() { //TEMP reject_word 00192 set_flag(R_DOC_REJ); 00193 } 00194 00195 00196 void REJ::setrej_block_rej() { //TEMP reject_word 00197 set_flag(R_BLOCK_REJ); 00198 } 00199 00200 00201 void REJ::setrej_row_rej() { //TEMP reject_word 00202 set_flag(R_ROW_REJ); 00203 } 00204 00205 00206 void REJ::setrej_unlv_rej() { //TEMP reject_word 00207 set_flag(R_UNLV_REJ); 00208 } 00209 00210 00211 void REJ::setrej_hyphen_accept() { //NN Flipped a char 00212 set_flag(R_HYPHEN_ACCEPT); 00213 } 00214 00215 00216 void REJ::setrej_nn_accept() { //NN Flipped a char 00217 set_flag(R_NN_ACCEPT); 00218 } 00219 00220 00221 void REJ::setrej_mm_accept() { //Matrix matcher 00222 set_flag(R_MM_ACCEPT); 00223 } 00224 00225 00226 void REJ::setrej_quality_accept() { //Quality flip a char 00227 set_flag(R_QUALITY_ACCEPT); 00228 } 00229 00230 00231 void REJ::setrej_minimal_rej_accept() { 00232 //Accept all except blank 00233 set_flag(R_MINIMAL_REJ_ACCEPT); 00234 } 00235 00236 00237 void REJ::full_print(FILE *fp) { 00238 #ifndef SECURE_NAMES 00239 00240 fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F"); 00241 fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F"); 00242 fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F"); 00243 fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F"); 00244 fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F"); 00245 fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F"); 00246 fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F"); 00247 fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F"); 00248 fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F"); 00249 fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n", 00250 flag (R_NOT_TESS_ACCEPTED) ? "T" : "F"); 00251 fprintf (fp, "R_CONTAINS_BLANKS: %s\n", 00252 flag (R_CONTAINS_BLANKS) ? "T" : "F"); 00253 fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F"); 00254 fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F"); 00255 fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F"); 00256 fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F"); 00257 fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F"); 00258 fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F"); 00259 fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F"); 00260 fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F"); 00261 fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F"); 00262 fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F"); 00263 fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F"); 00264 fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F"); 00265 fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F"); 00266 fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F"); 00267 fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F"); 00268 fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n", 00269 flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); 00270 #endif 00271 } 00272 00273 00274 //The REJMAP class has been hacked to use alloc_struct instead of new []. 00275 //This is to reduce memory fragmentation only as it is rather kludgy. 00276 //alloc_struct by-passes the call to the contsructor of REJ on each 00277 //array element. Although the constructor is empty, the BITS16 members 00278 //do have a constructor which sets all the flags to 0. The memset 00279 //replaces this functionality. 00280 00281 REJMAP::REJMAP( //classwise copy 00282 const REJMAP &source) { 00283 REJ *to; 00284 REJ *from = source.ptr; 00285 int i; 00286 00287 len = source.length (); 00288 00289 if (len > 0) { 00290 ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ"); 00291 to = ptr; 00292 for (i = 0; i < len; i++) { 00293 *to = *from; 00294 to++; 00295 from++; 00296 } 00297 } 00298 else 00299 ptr = NULL; 00300 } 00301 00302 00303 REJMAP & REJMAP::operator= ( //assign REJMAP 00304 const REJMAP & source //from this 00305 ) { 00306 REJ * 00307 to; 00308 REJ * 00309 from = source.ptr; 00310 int 00311 i; 00312 00313 initialise (source.len); 00314 to = ptr; 00315 for (i = 0; i < len; i++) { 00316 *to = *from; 00317 to++; 00318 from++; 00319 } 00320 return *this; 00321 } 00322 00323 00324 void REJMAP::initialise( //Redefine map 00325 inT16 length) { 00326 if (ptr != NULL) 00327 free_struct (ptr, len * sizeof (REJ), "REJ"); 00328 len = length; 00329 if (len > 0) 00330 ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), 00331 0, len * sizeof (REJ)); 00332 else 00333 ptr = NULL; 00334 } 00335 00336 00337 inT16 REJMAP::accept_count() { //How many accepted? 00338 int i; 00339 inT16 count = 0; 00340 00341 for (i = 0; i < len; i++) { 00342 if (ptr[i].accepted ()) 00343 count++; 00344 } 00345 return count; 00346 } 00347 00348 00349 BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs? 00350 int i; 00351 00352 for (i = 0; i < len; i++) { 00353 if (ptr[i].recoverable ()) 00354 return TRUE; 00355 } 00356 return FALSE; 00357 } 00358 00359 00360 BOOL8 REJMAP::quality_recoverable_rejects() { //Any potential rejs? 00361 int i; 00362 00363 for (i = 0; i < len; i++) { 00364 if (ptr[i].accept_if_good_quality ()) 00365 return TRUE; 00366 } 00367 return FALSE; 00368 } 00369 00370 00371 void REJMAP::remove_pos( //Cut out an element 00372 inT16 pos //element to remove 00373 ) { 00374 REJ *new_ptr; //new, smaller map 00375 int i; 00376 00377 ASSERT_HOST (pos >= 0); 00378 ASSERT_HOST (pos < len); 00379 ASSERT_HOST (len > 0); 00380 00381 len--; 00382 if (len > 0) 00383 new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), 00384 0, len * sizeof (REJ)); 00385 else 00386 new_ptr = NULL; 00387 00388 for (i = 0; i < pos; i++) 00389 new_ptr[i] = ptr[i]; //copy pre pos 00390 00391 for (; pos < len; pos++) 00392 new_ptr[pos] = ptr[pos + 1]; //copy post pos 00393 00394 //delete old map 00395 free_struct (ptr, (len + 1) * sizeof (REJ), "REJ"); 00396 ptr = new_ptr; 00397 } 00398 00399 00400 void REJMAP::print(FILE *fp) { 00401 int i; 00402 char buff[512]; 00403 00404 for (i = 0; i < len; i++) { 00405 buff[i] = ptr[i].display_char (); 00406 } 00407 buff[i] = '\0'; 00408 fprintf (fp, "\"%s\"", buff); 00409 } 00410 00411 00412 void REJMAP::full_print(FILE *fp) { 00413 int i; 00414 00415 for (i = 0; i < len; i++) { 00416 ptr[i].full_print (fp); 00417 fprintf (fp, "\n"); 00418 } 00419 } 00420 00421 00422 void REJMAP::rej_word_small_xht() { //Reject whole word 00423 int i; 00424 00425 for (i = 0; i < len; i++) { 00426 ptr[i].setrej_small_xht (); 00427 } 00428 } 00429 00430 00431 void REJMAP::rej_word_tess_failure() { //Reject whole word 00432 int i; 00433 00434 for (i = 0; i < len; i++) { 00435 ptr[i].setrej_tess_failure (); 00436 } 00437 } 00438 00439 00440 void REJMAP::rej_word_not_tess_accepted() { //Reject whole word 00441 int i; 00442 00443 for (i = 0; i < len; i++) { 00444 if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted(); 00445 } 00446 } 00447 00448 00449 void REJMAP::rej_word_contains_blanks() { //Reject whole word 00450 int i; 00451 00452 for (i = 0; i < len; i++) { 00453 if (ptr[i].accepted()) ptr[i].setrej_contains_blanks(); 00454 } 00455 } 00456 00457 00458 void REJMAP::rej_word_bad_permuter() { //Reject whole word 00459 int i; 00460 00461 for (i = 0; i < len; i++) { 00462 if (ptr[i].accepted()) ptr[i].setrej_bad_permuter (); 00463 } 00464 } 00465 00466 00467 void REJMAP::rej_word_xht_fixup() { //Reject whole word 00468 int i; 00469 00470 for (i = 0; i < len; i++) { 00471 if (ptr[i].accepted()) ptr[i].setrej_xht_fixup(); 00472 } 00473 } 00474 00475 00476 void REJMAP::rej_word_no_alphanums() { //Reject whole word 00477 int i; 00478 00479 for (i = 0; i < len; i++) { 00480 if (ptr[i].accepted()) ptr[i].setrej_no_alphanums(); 00481 } 00482 } 00483 00484 00485 void REJMAP::rej_word_mostly_rej() { //Reject whole word 00486 int i; 00487 00488 for (i = 0; i < len; i++) { 00489 if (ptr[i].accepted()) ptr[i].setrej_mostly_rej(); 00490 } 00491 } 00492 00493 00494 void REJMAP::rej_word_bad_quality() { //Reject whole word 00495 int i; 00496 00497 for (i = 0; i < len; i++) { 00498 if (ptr[i].accepted()) ptr[i].setrej_bad_quality(); 00499 } 00500 } 00501 00502 00503 void REJMAP::rej_word_doc_rej() { //Reject whole word 00504 int i; 00505 00506 for (i = 0; i < len; i++) { 00507 if (ptr[i].accepted()) ptr[i].setrej_doc_rej(); 00508 } 00509 } 00510 00511 00512 void REJMAP::rej_word_block_rej() { //Reject whole word 00513 int i; 00514 00515 for (i = 0; i < len; i++) { 00516 if (ptr[i].accepted()) ptr[i].setrej_block_rej(); 00517 } 00518 } 00519 00520 00521 void REJMAP::rej_word_row_rej() { //Reject whole word 00522 int i; 00523 00524 for (i = 0; i < len; i++) { 00525 if (ptr[i].accepted()) ptr[i].setrej_row_rej(); 00526 } 00527 }