Tesseract  3.02
tesseract-ocr/ccstruct/rejctmap.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        rejctmap.cpp  (Formerly rejmap.c)
00003  * Description: REJ and REJMAP class functions.
00004  * Author:              Phil Cheatle
00005  * Created:             Thu Jun  9 13:46:38 BST 1994
00006  *
00007  * (C) Copyright 1994, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "mfcpch.h"
00021 #include          "hosthplb.h"
00022 //#include                                      "basefile.h"
00023 #include          "rejctmap.h"
00024 #include          "secname.h"
00025 #include          "params.h"
00026 
00027 BOOL8 REJ::perm_rejected() {  //Is char perm reject?
00028   return (flag (R_TESS_FAILURE) ||
00029     flag (R_SMALL_XHT) ||
00030     flag (R_EDGE_CHAR) ||
00031     flag (R_1IL_CONFLICT) ||
00032     flag (R_POSTNN_1IL) ||
00033     flag (R_REJ_CBLOB) ||
00034     flag (R_BAD_REPETITION) || flag (R_MM_REJECT));
00035 }
00036 
00037 
00038 BOOL8 REJ::rej_before_nn_accept() {
00039   return flag (R_POOR_MATCH) ||
00040     flag (R_NOT_TESS_ACCEPTED) ||
00041     flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER);
00042 }
00043 
00044 
00045 BOOL8 REJ::rej_between_nn_and_mm() {
00046   return flag (R_HYPHEN) ||
00047     flag (R_DUBIOUS) ||
00048     flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP);
00049 }
00050 
00051 
00052 BOOL8 REJ::rej_between_mm_and_quality_accept() {
00053   return flag (R_BAD_QUALITY);
00054 }
00055 
00056 
00057 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
00058   return flag (R_DOC_REJ) ||
00059     flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ);
00060 }
00061 
00062 
00063 BOOL8 REJ::rej_before_mm_accept() {
00064   return rej_between_nn_and_mm () ||
00065     (rej_before_nn_accept () &&
00066     !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT));
00067 }
00068 
00069 
00070 BOOL8 REJ::rej_before_quality_accept() {
00071   return rej_between_mm_and_quality_accept () ||
00072     (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
00073 }
00074 
00075 
00076 BOOL8 REJ::rejected() {  //Is char rejected?
00077   if (flag (R_MINIMAL_REJ_ACCEPT))
00078     return FALSE;
00079   else
00080     return (perm_rejected () ||
00081       rej_between_quality_and_minimal_rej_accept () ||
00082       (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
00083 }
00084 
00085 
00086 BOOL8 REJ::accept_if_good_quality() {  //potential rej?
00087   return (rejected () &&
00088     !perm_rejected () &&
00089     flag (R_BAD_PERMUTER) &&
00090     !flag (R_POOR_MATCH) &&
00091     !flag (R_NOT_TESS_ACCEPTED) &&
00092     !flag (R_CONTAINS_BLANKS) &&
00093     (!rej_between_nn_and_mm () &&
00094      !rej_between_mm_and_quality_accept () &&
00095      !rej_between_quality_and_minimal_rej_accept ()));
00096 }
00097 
00098 
00099 void REJ::setrej_tess_failure() {  //Tess generated blank
00100   set_flag(R_TESS_FAILURE);
00101 }
00102 
00103 
00104 void REJ::setrej_small_xht() {  //Small xht char/wd
00105   set_flag(R_SMALL_XHT);
00106 }
00107 
00108 
00109 void REJ::setrej_edge_char() {  //Close to image edge
00110   set_flag(R_EDGE_CHAR);
00111 }
00112 
00113 
00114 void REJ::setrej_1Il_conflict() {  //Initial reject map
00115   set_flag(R_1IL_CONFLICT);
00116 }
00117 
00118 
00119 void REJ::setrej_postNN_1Il() {  //1Il after NN
00120   set_flag(R_POSTNN_1IL);
00121 }
00122 
00123 
00124 void REJ::setrej_rej_cblob() {  //Insert duff blob
00125   set_flag(R_REJ_CBLOB);
00126 }
00127 
00128 
00129 void REJ::setrej_mm_reject() {  //Matrix matcher
00130   set_flag(R_MM_REJECT);
00131 }
00132 
00133 
00134 void REJ::setrej_bad_repetition() {  //Odd repeated char
00135   set_flag(R_BAD_REPETITION);
00136 }
00137 
00138 
00139 void REJ::setrej_poor_match() {  //Failed Rays heuristic
00140   set_flag(R_POOR_MATCH);
00141 }
00142 
00143 
00144 void REJ::setrej_not_tess_accepted() {
00145                                  //TEMP reject_word
00146   set_flag(R_NOT_TESS_ACCEPTED);
00147 }
00148 
00149 
00150 void REJ::setrej_contains_blanks() {
00151                                  //TEMP reject_word
00152   set_flag(R_CONTAINS_BLANKS);
00153 }
00154 
00155 
00156 void REJ::setrej_bad_permuter() {  //POTENTIAL reject_word
00157   set_flag(R_BAD_PERMUTER);
00158 }
00159 
00160 
00161 void REJ::setrej_hyphen() {  //PostNN dubious hyphen or .
00162   set_flag(R_HYPHEN);
00163 }
00164 
00165 
00166 void REJ::setrej_dubious() {  //PostNN dubious limit
00167   set_flag(R_DUBIOUS);
00168 }
00169 
00170 
00171 void REJ::setrej_no_alphanums() {  //TEMP reject_word
00172   set_flag(R_NO_ALPHANUMS);
00173 }
00174 
00175 
00176 void REJ::setrej_mostly_rej() {  //TEMP reject_word
00177   set_flag(R_MOSTLY_REJ);
00178 }
00179 
00180 
00181 void REJ::setrej_xht_fixup() {  //xht fixup
00182   set_flag(R_XHT_FIXUP);
00183 }
00184 
00185 
00186 void REJ::setrej_bad_quality() {  //TEMP reject_word
00187   set_flag(R_BAD_QUALITY);
00188 }
00189 
00190 
00191 void REJ::setrej_doc_rej() {  //TEMP reject_word
00192   set_flag(R_DOC_REJ);
00193 }
00194 
00195 
00196 void REJ::setrej_block_rej() {  //TEMP reject_word
00197   set_flag(R_BLOCK_REJ);
00198 }
00199 
00200 
00201 void REJ::setrej_row_rej() {  //TEMP reject_word
00202   set_flag(R_ROW_REJ);
00203 }
00204 
00205 
00206 void REJ::setrej_unlv_rej() {  //TEMP reject_word
00207   set_flag(R_UNLV_REJ);
00208 }
00209 
00210 
00211 void REJ::setrej_hyphen_accept() {  //NN Flipped a char
00212   set_flag(R_HYPHEN_ACCEPT);
00213 }
00214 
00215 
00216 void REJ::setrej_nn_accept() {  //NN Flipped a char
00217   set_flag(R_NN_ACCEPT);
00218 }
00219 
00220 
00221 void REJ::setrej_mm_accept() {  //Matrix matcher
00222   set_flag(R_MM_ACCEPT);
00223 }
00224 
00225 
00226 void REJ::setrej_quality_accept() {  //Quality flip a char
00227   set_flag(R_QUALITY_ACCEPT);
00228 }
00229 
00230 
00231 void REJ::setrej_minimal_rej_accept() {
00232                                  //Accept all except blank
00233   set_flag(R_MINIMAL_REJ_ACCEPT);
00234 }
00235 
00236 
00237 void REJ::full_print(FILE *fp) {
00238   #ifndef SECURE_NAMES
00239 
00240   fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
00241   fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
00242   fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
00243   fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
00244   fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
00245   fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
00246   fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
00247   fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
00248   fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
00249   fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
00250     flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
00251   fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
00252     flag (R_CONTAINS_BLANKS) ? "T" : "F");
00253   fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
00254   fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
00255   fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
00256   fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
00257   fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
00258   fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
00259   fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
00260   fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
00261   fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
00262   fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
00263   fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
00264   fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
00265   fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
00266   fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
00267   fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
00268   fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
00269     flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
00270   #endif
00271 }
00272 
00273 
00274 //The REJMAP class has been hacked to use alloc_struct instead of new [].
00275 //This is to reduce memory fragmentation only as it is rather kludgy.
00276 //alloc_struct by-passes the call to the contsructor of REJ on each
00277 //array element. Although the constructor is empty, the BITS16 members
00278 //do have a constructor which sets all the flags to 0. The memset
00279 //replaces this functionality.
00280 
00281 REJMAP::REJMAP(  //classwise copy
00282                const REJMAP &source) {
00283   REJ *to;
00284   REJ *from = source.ptr;
00285   int i;
00286 
00287   len = source.length ();
00288 
00289   if (len > 0) {
00290     ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
00291     to = ptr;
00292     for (i = 0; i < len; i++) {
00293       *to = *from;
00294       to++;
00295       from++;
00296     }
00297   }
00298   else
00299     ptr = NULL;
00300 }
00301 
00302 
00303 REJMAP & REJMAP::operator= (     //assign REJMAP
00304 const REJMAP & source            //from this
00305 ) {
00306   REJ *
00307     to;
00308   REJ *
00309     from = source.ptr;
00310   int
00311     i;
00312 
00313   initialise (source.len);
00314   to = ptr;
00315   for (i = 0; i < len; i++) {
00316     *to = *from;
00317     to++;
00318     from++;
00319   }
00320   return *this;
00321 }
00322 
00323 
00324 void REJMAP::initialise(  //Redefine map
00325                         inT16 length) {
00326   if (ptr != NULL)
00327     free_struct (ptr, len * sizeof (REJ), "REJ");
00328   len = length;
00329   if (len > 0)
00330     ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
00331       0, len * sizeof (REJ));
00332   else
00333     ptr = NULL;
00334 }
00335 
00336 
00337 inT16 REJMAP::accept_count() {  //How many accepted?
00338   int i;
00339   inT16 count = 0;
00340 
00341   for (i = 0; i < len; i++) {
00342     if (ptr[i].accepted ())
00343       count++;
00344   }
00345   return count;
00346 }
00347 
00348 
00349 BOOL8 REJMAP::recoverable_rejects() {  //Any non perm rejs?
00350   int i;
00351 
00352   for (i = 0; i < len; i++) {
00353     if (ptr[i].recoverable ())
00354       return TRUE;
00355   }
00356   return FALSE;
00357 }
00358 
00359 
00360 BOOL8 REJMAP::quality_recoverable_rejects() {  //Any potential rejs?
00361   int i;
00362 
00363   for (i = 0; i < len; i++) {
00364     if (ptr[i].accept_if_good_quality ())
00365       return TRUE;
00366   }
00367   return FALSE;
00368 }
00369 
00370 
00371 void REJMAP::remove_pos(           //Cut out an element
00372                         inT16 pos  //element to remove
00373                        ) {
00374   REJ *new_ptr;                  //new, smaller map
00375   int i;
00376 
00377   ASSERT_HOST (pos >= 0);
00378   ASSERT_HOST (pos < len);
00379   ASSERT_HOST (len > 0);
00380 
00381   len--;
00382   if (len > 0)
00383     new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
00384       0, len * sizeof (REJ));
00385   else
00386     new_ptr = NULL;
00387 
00388   for (i = 0; i < pos; i++)
00389     new_ptr[i] = ptr[i];         //copy pre pos
00390 
00391   for (; pos < len; pos++)
00392     new_ptr[pos] = ptr[pos + 1]; //copy post pos
00393 
00394                                  //delete old map
00395   free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
00396   ptr = new_ptr;
00397 }
00398 
00399 
00400 void REJMAP::print(FILE *fp) {
00401   int i;
00402   char buff[512];
00403 
00404   for (i = 0; i < len; i++) {
00405     buff[i] = ptr[i].display_char ();
00406   }
00407   buff[i] = '\0';
00408   fprintf (fp, "\"%s\"", buff);
00409 }
00410 
00411 
00412 void REJMAP::full_print(FILE *fp) {
00413   int i;
00414 
00415   for (i = 0; i < len; i++) {
00416     ptr[i].full_print (fp);
00417     fprintf (fp, "\n");
00418   }
00419 }
00420 
00421 
00422 void REJMAP::rej_word_small_xht() {  //Reject whole word
00423   int i;
00424 
00425   for (i = 0; i < len; i++) {
00426     ptr[i].setrej_small_xht ();
00427   }
00428 }
00429 
00430 
00431 void REJMAP::rej_word_tess_failure() {  //Reject whole word
00432   int i;
00433 
00434   for (i = 0; i < len; i++) {
00435     ptr[i].setrej_tess_failure ();
00436   }
00437 }
00438 
00439 
00440 void REJMAP::rej_word_not_tess_accepted() {  //Reject whole word
00441   int i;
00442 
00443   for (i = 0; i < len; i++) {
00444     if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted();
00445   }
00446 }
00447 
00448 
00449 void REJMAP::rej_word_contains_blanks() {  //Reject whole word
00450   int i;
00451 
00452   for (i = 0; i < len; i++) {
00453     if (ptr[i].accepted()) ptr[i].setrej_contains_blanks();
00454   }
00455 }
00456 
00457 
00458 void REJMAP::rej_word_bad_permuter() {  //Reject whole word
00459   int i;
00460 
00461   for (i = 0; i < len; i++) {
00462     if (ptr[i].accepted()) ptr[i].setrej_bad_permuter ();
00463   }
00464 }
00465 
00466 
00467 void REJMAP::rej_word_xht_fixup() {  //Reject whole word
00468   int i;
00469 
00470   for (i = 0; i < len; i++) {
00471     if (ptr[i].accepted()) ptr[i].setrej_xht_fixup();
00472   }
00473 }
00474 
00475 
00476 void REJMAP::rej_word_no_alphanums() {  //Reject whole word
00477   int i;
00478 
00479   for (i = 0; i < len; i++) {
00480     if (ptr[i].accepted()) ptr[i].setrej_no_alphanums();
00481   }
00482 }
00483 
00484 
00485 void REJMAP::rej_word_mostly_rej() {  //Reject whole word
00486   int i;
00487 
00488   for (i = 0; i < len; i++) {
00489     if (ptr[i].accepted()) ptr[i].setrej_mostly_rej();
00490   }
00491 }
00492 
00493 
00494 void REJMAP::rej_word_bad_quality() {  //Reject whole word
00495   int i;
00496 
00497   for (i = 0; i < len; i++) {
00498     if (ptr[i].accepted()) ptr[i].setrej_bad_quality();
00499   }
00500 }
00501 
00502 
00503 void REJMAP::rej_word_doc_rej() {  //Reject whole word
00504   int i;
00505 
00506   for (i = 0; i < len; i++) {
00507     if (ptr[i].accepted()) ptr[i].setrej_doc_rej();
00508   }
00509 }
00510 
00511 
00512 void REJMAP::rej_word_block_rej() {  //Reject whole word
00513   int i;
00514 
00515   for (i = 0; i < len; i++) {
00516     if (ptr[i].accepted()) ptr[i].setrej_block_rej();
00517   }
00518 }
00519 
00520 
00521 void REJMAP::rej_word_row_rej() {  //Reject whole word
00522   int i;
00523 
00524   for (i = 0; i < len; i++) {
00525     if (ptr[i].accepted()) ptr[i].setrej_row_rej();
00526   }
00527 }