Tesseract
3.02
|
00001 // Copyright 2006 Google Inc. 00002 // All Rights Reserved. 00003 // Author: renn 00004 // 00005 // The fscanf, vfscanf and creat functions are implemented so that their 00006 // functionality is mostly like their stdio counterparts. However, currently 00007 // these functions do not use any buffering, making them rather slow. 00008 // File streams are thus processed one character at a time. 00009 // Although the implementations of the scanf functions do lack a few minor 00010 // features, they should be sufficient for their use in tesseract. 00011 // 00012 // Licensed under the Apache License, Version 2.0 (the "License"); 00013 // you may not use this file except in compliance with the License. 00014 // You may obtain a copy of the License at 00015 // http://www.apache.org/licenses/LICENSE-2.0 00016 // Unless required by applicable law or agreed to in writing, software 00017 // distributed under the License is distributed on an "AS IS" BASIS, 00018 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00019 // See the License for the specific language governing permissions and 00020 // limitations under the License. 00021 00022 #ifdef EMBEDDED 00023 00024 #include <ctype.h> 00025 #include <stdarg.h> 00026 #include <stddef.h> 00027 #include <inttypes.h> 00028 #include <string.h> 00029 #include <limits.h> 00030 #include <stdio.h> 00031 #include <sys/types.h> 00032 #include <sys/stat.h> 00033 #include <fcntl.h> 00034 00035 #include "scanutils.h" 00036 #include "tprintf.h" 00037 00038 enum Flags { 00039 FL_SPLAT = 0x01, // Drop the value, do not assign 00040 FL_INV = 0x02, // Character-set with inverse 00041 FL_WIDTH = 0x04, // Field width specified 00042 FL_MINUS = 0x08, // Negative number 00043 }; 00044 00045 enum Ranks { 00046 RANK_CHAR = -2, 00047 RANK_SHORT = -1, 00048 RANK_INT = 0, 00049 RANK_LONG = 1, 00050 RANK_LONGLONG = 2, 00051 RANK_PTR = INT_MAX // Special value used for pointers 00052 }; 00053 00054 const enum Ranks kMinRank = RANK_CHAR; 00055 const enum Ranks kMaxRank = RANK_LONGLONG; 00056 00057 const enum Ranks kIntMaxRank = RANK_LONGLONG; 00058 const enum Ranks kSizeTRank = RANK_LONG; 00059 const enum Ranks kPtrDiffRank = RANK_LONG; 00060 00061 enum Bail { 00062 BAIL_NONE = 0, // No error condition 00063 BAIL_EOF, // Hit EOF 00064 BAIL_ERR // Conversion mismatch 00065 }; 00066 00067 // Helper functions ------------------------------------------------------------ 00068 inline size_t LongBit() { 00069 return CHAR_BIT * sizeof(long); 00070 } 00071 00072 static inline int 00073 SkipSpace(FILE *s) 00074 { 00075 int p; 00076 while (isspace(p = fgetc(s))); 00077 ungetc(p, s); // Make sure next char is available for reading 00078 return p; 00079 } 00080 00081 static inline void 00082 SetBit(unsigned long *bitmap, unsigned int bit) 00083 { 00084 bitmap[bit/LongBit()] |= 1UL << (bit%LongBit()); 00085 } 00086 00087 static inline int 00088 TestBit(unsigned long *bitmap, unsigned int bit) 00089 { 00090 return static_cast<int>(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1; 00091 } 00092 00093 static inline int DigitValue(int ch) 00094 { 00095 if (ch >= '0' && ch <= '9') { 00096 return ch-'0'; 00097 } else if (ch >= 'A' && ch <= 'Z') { 00098 return ch-'A'+10; 00099 } else if (ch >= 'a' && ch <= 'z') { 00100 return ch-'a'+10; 00101 } else { 00102 return -1; 00103 } 00104 } 00105 00106 // IO (re-)implementations ----------------------------------------------------- 00107 uintmax_t streamtoumax(FILE* s, int base) 00108 { 00109 int minus = 0; 00110 uintmax_t v = 0; 00111 int d, c = 0; 00112 00113 for (c = fgetc(s); 00114 isspace(static_cast<unsigned char>(c)) && (c != EOF); 00115 c = fgetc(s)) 00116 00117 // Single optional + or - 00118 if (c == '-' || c == '+') { 00119 minus = (c == '-'); 00120 c = fgetc(s); 00121 } 00122 00123 // Assign correct base 00124 if (base == 0) { 00125 if (c == '0') { 00126 c = fgetc(s); 00127 if (c == 'x' || c == 'X') { 00128 base = 16; 00129 c = fgetc(s); 00130 } else { 00131 base = 8; 00132 } 00133 } 00134 } else if (base == 16) { 00135 if (c == '0') { 00136 c = fgetc(s); 00137 if (c == 'x' && c == 'X') c = fgetc(s); 00138 } 00139 } 00140 00141 // Actual number parsing 00142 for (; (c != EOF) && (d = DigitValue(c)) >= 0 && d < base; c = fgetc(s)) 00143 v = v*base + d; 00144 00145 ungetc(c, s); 00146 return minus ? -v : v; 00147 } 00148 00149 double streamtofloat(FILE* s) 00150 { 00151 int minus = 0; 00152 int v = 0; 00153 int d, c = 0; 00154 int k = 1; 00155 int w = 0; 00156 00157 for (c = fgetc(s); 00158 isspace(static_cast<unsigned char>(c)) && (c != EOF); 00159 c = fgetc(s)); 00160 00161 // Single optional + or - 00162 if (c == '-' || c == '+') { 00163 minus = (c == '-'); 00164 c = fgetc(s); 00165 } 00166 00167 // Actual number parsing 00168 for (; (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s)) 00169 v = v*10 + d; 00170 if (c == '.') { 00171 for (c = fgetc(s); (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s)) { 00172 w = w*10 + d; 00173 k *= 10; 00174 } 00175 } else if (c == 'e' || c == 'E') 00176 tprintf("WARNING: Scientific Notation not supported!"); 00177 00178 ungetc(c, s); 00179 double f = static_cast<double>(v) 00180 + static_cast<double>(w) / static_cast<double>(k); 00181 00182 return minus ? -f : f; 00183 } 00184 00185 double strtofloat(const char* s) 00186 { 00187 int minus = 0; 00188 int v = 0; 00189 int d; 00190 int k = 1; 00191 int w = 0; 00192 00193 while(*s && isspace(static_cast<unsigned char>(*s))) s++; 00194 00195 // Single optional + or - 00196 if (*s == '-' || *s == '+') { 00197 minus = (*s == '-'); 00198 s++; 00199 } 00200 00201 // Actual number parsing 00202 for (; *s && (d = DigitValue(*s)) >= 0; s++) 00203 v = v*10 + d; 00204 if (*s == '.') { 00205 for (++s; *s && (d = DigitValue(*s)) >= 0; s++) { 00206 w = w*10 + d; 00207 k *= 10; 00208 } 00209 } else if (*s == 'e' || *s == 'E') 00210 tprintf("WARNING: Scientific Notation not supported!"); 00211 00212 double f = static_cast<double>(v) 00213 + static_cast<double>(w) / static_cast<double>(k); 00214 00215 return minus ? -f : f; 00216 } 00217 00218 int fscanf(FILE* stream, const char *format, ...) 00219 { 00220 va_list ap; 00221 int rv; 00222 00223 va_start(ap, format); 00224 rv = vfscanf(stream, format, ap); 00225 va_end(ap); 00226 00227 return rv; 00228 } 00229 00230 int vfscanf(FILE* stream, const char *format, va_list ap) 00231 { 00232 const char *p = format; 00233 char ch; 00234 int q = 0; 00235 uintmax_t val = 0; 00236 int rank = RANK_INT; // Default rank 00237 unsigned int width = UINT_MAX; 00238 int base; 00239 int flags = 0; 00240 enum { 00241 ST_NORMAL, // Ground state 00242 ST_FLAGS, // Special flags 00243 ST_WIDTH, // Field width 00244 ST_MODIFIERS, // Length or conversion modifiers 00245 ST_MATCH_INIT, // Initial state of %[ sequence 00246 ST_MATCH, // Main state of %[ sequence 00247 ST_MATCH_RANGE, // After - in a %[ sequence 00248 } state = ST_NORMAL; 00249 char *sarg = NULL; // %s %c or %[ string argument 00250 enum Bail bail = BAIL_NONE; 00251 int sign; 00252 int converted = 0; // Successful conversions 00253 unsigned long matchmap[((1 << CHAR_BIT)+(LongBit()-1))/LongBit()]; 00254 int matchinv = 0; // Is match map inverted? 00255 unsigned char range_start = 0; 00256 off_t start_off = ftell(stream); 00257 00258 // Skip leading spaces 00259 SkipSpace(stream); 00260 00261 while ((ch = *p++) && !bail) { 00262 switch (state) { 00263 case ST_NORMAL: 00264 if (ch == '%') { 00265 state = ST_FLAGS; 00266 flags = 0; rank = RANK_INT; width = UINT_MAX; 00267 } else if (isspace(static_cast<unsigned char>(ch))) { 00268 SkipSpace(stream); 00269 } else { 00270 if (fgetc(stream) != ch) 00271 bail = BAIL_ERR; // Match failure 00272 } 00273 break; 00274 00275 case ST_FLAGS: 00276 switch (ch) { 00277 case '*': 00278 flags |= FL_SPLAT; 00279 break; 00280 00281 case '0' ... '9': 00282 width = (ch-'0'); 00283 state = ST_WIDTH; 00284 flags |= FL_WIDTH; 00285 break; 00286 00287 default: 00288 state = ST_MODIFIERS; 00289 p--; // Process this character again 00290 break; 00291 } 00292 break; 00293 00294 case ST_WIDTH: 00295 if (ch >= '0' && ch <= '9') { 00296 width = width*10+(ch-'0'); 00297 } else { 00298 state = ST_MODIFIERS; 00299 p--; // Process this character again 00300 } 00301 break; 00302 00303 case ST_MODIFIERS: 00304 switch (ch) { 00305 // Length modifiers - nonterminal sequences 00306 case 'h': 00307 rank--; // Shorter rank 00308 break; 00309 case 'l': 00310 rank++; // Longer rank 00311 break; 00312 case 'j': 00313 rank = kIntMaxRank; 00314 break; 00315 case 'z': 00316 rank = kSizeTRank; 00317 break; 00318 case 't': 00319 rank = kPtrDiffRank; 00320 break; 00321 case 'L': 00322 case 'q': 00323 rank = RANK_LONGLONG; // long double/long long 00324 break; 00325 00326 default: 00327 // Output modifiers - terminal sequences 00328 state = ST_NORMAL; // Next state will be normal 00329 if (rank < kMinRank) // Canonicalize rank 00330 rank = kMinRank; 00331 else if (rank > kMaxRank) 00332 rank = kMaxRank; 00333 00334 switch (ch) { 00335 case 'P': // Upper case pointer 00336 case 'p': // Pointer 00337 rank = RANK_PTR; 00338 base = 0; sign = 0; 00339 goto scan_int; 00340 00341 case 'i': // Base-independent integer 00342 base = 0; sign = 1; 00343 goto scan_int; 00344 00345 case 'd': // Decimal integer 00346 base = 10; sign = 1; 00347 goto scan_int; 00348 00349 case 'o': // Octal integer 00350 base = 8; sign = 0; 00351 goto scan_int; 00352 00353 case 'u': // Unsigned decimal integer 00354 base = 10; sign = 0; 00355 goto scan_int; 00356 00357 case 'x': // Hexadecimal integer 00358 case 'X': 00359 base = 16; sign = 0; 00360 goto scan_int; 00361 00362 case 'n': // Number of characters consumed 00363 val = ftell(stream) - start_off; 00364 goto set_integer; 00365 00366 scan_int: 00367 q = SkipSpace(stream); 00368 if ( q <= 0 ) { 00369 bail = BAIL_EOF; 00370 break; 00371 } 00372 val = streamtoumax(stream, base); 00373 converted++; 00374 // fall through 00375 00376 set_integer: 00377 if (!(flags & FL_SPLAT)) { 00378 switch(rank) { 00379 case RANK_CHAR: 00380 *va_arg(ap, unsigned char *) 00381 = static_cast<unsigned char>(val); 00382 break; 00383 case RANK_SHORT: 00384 *va_arg(ap, unsigned short *) 00385 = static_cast<unsigned short>(val); 00386 break; 00387 case RANK_INT: 00388 *va_arg(ap, unsigned int *) 00389 = static_cast<unsigned int>(val); 00390 break; 00391 case RANK_LONG: 00392 *va_arg(ap, unsigned long *) 00393 = static_cast<unsigned long>(val); 00394 break; 00395 case RANK_LONGLONG: 00396 *va_arg(ap, unsigned long long *) 00397 = static_cast<unsigned long long>(val); 00398 break; 00399 case RANK_PTR: 00400 *va_arg(ap, void **) 00401 = reinterpret_cast<void *>(static_cast<uintptr_t>(val)); 00402 break; 00403 } 00404 } 00405 break; 00406 00407 case 'f': // Preliminary float value parsing 00408 case 'g': 00409 case 'G': 00410 case 'e': 00411 case 'E': 00412 q = SkipSpace(stream); 00413 if (q <= 0) { 00414 bail = BAIL_EOF; 00415 break; 00416 } 00417 00418 { 00419 double fval = streamtofloat(stream); 00420 switch(rank) { 00421 case RANK_INT: 00422 *va_arg(ap, float *) = static_cast<float>(fval); 00423 break; 00424 case RANK_LONG: 00425 *va_arg(ap, double *) = static_cast<double>(fval); 00426 break; 00427 } 00428 converted++; 00429 } 00430 break; 00431 00432 case 'c': // Character 00433 width = (flags & FL_WIDTH) ? width : 1; // Default width == 1 00434 sarg = va_arg(ap, char *); 00435 while (width--) { 00436 if ((q = fgetc(stream)) <= 0) { 00437 bail = BAIL_EOF; 00438 break; 00439 } 00440 *sarg++ = q; 00441 } 00442 if (!bail) 00443 converted++; 00444 break; 00445 00446 case 's': // String 00447 { 00448 char *sp; 00449 sp = sarg = va_arg(ap, char *); 00450 while (width--) { 00451 q = fgetc(stream); 00452 if (isspace(static_cast<unsigned char>(q)) || q <= 0) { 00453 ungetc(q, stream); 00454 break; 00455 } 00456 *sp++ = q; 00457 } 00458 if (sarg != sp) { 00459 *sp = '\0'; // Terminate output 00460 converted++; 00461 } else { 00462 bail = BAIL_EOF; 00463 } 00464 } 00465 break; 00466 00467 case '[': // Character range 00468 sarg = va_arg(ap, char *); 00469 state = ST_MATCH_INIT; 00470 matchinv = 0; 00471 memset(matchmap, 0, sizeof matchmap); 00472 break; 00473 00474 case '%': // %% sequence 00475 if (fgetc(stream) != '%' ) 00476 bail = BAIL_ERR; 00477 break; 00478 00479 default: // Anything else 00480 bail = BAIL_ERR; // Unknown sequence 00481 break; 00482 } 00483 } 00484 break; 00485 00486 case ST_MATCH_INIT: // Initial state for %[ match 00487 if (ch == '^' && !(flags & FL_INV)) { 00488 matchinv = 1; 00489 } else { 00490 SetBit(matchmap, static_cast<unsigned char>(ch)); 00491 state = ST_MATCH; 00492 } 00493 break; 00494 00495 case ST_MATCH: // Main state for %[ match 00496 if (ch == ']') { 00497 goto match_run; 00498 } else if (ch == '-') { 00499 range_start = static_cast<unsigned char>(ch); 00500 state = ST_MATCH_RANGE; 00501 } else { 00502 SetBit(matchmap, static_cast<unsigned char>(ch)); 00503 } 00504 break; 00505 00506 case ST_MATCH_RANGE: // %[ match after - 00507 if (ch == ']') { 00508 SetBit(matchmap, static_cast<unsigned char>('-')); 00509 goto match_run; 00510 } else { 00511 int i; 00512 for (i = range_start ; i < (static_cast<unsigned char>(ch)) ; i++) 00513 SetBit(matchmap, i); 00514 state = ST_MATCH; 00515 } 00516 break; 00517 00518 match_run: // Match expression finished 00519 char* oarg = sarg; 00520 while (width) { 00521 q = fgetc(stream); 00522 unsigned char qc = static_cast<unsigned char>(q); 00523 if (q <= 0 || !(TestBit(matchmap, qc)^matchinv)) { 00524 ungetc(q, stream); 00525 break; 00526 } 00527 *sarg++ = q; 00528 } 00529 if (oarg != sarg) { 00530 *sarg = '\0'; 00531 converted++; 00532 } else { 00533 bail = (q <= 0) ? BAIL_EOF : BAIL_ERR; 00534 } 00535 break; 00536 } 00537 } 00538 00539 if (bail == BAIL_EOF && !converted) 00540 converted = -1; // Return EOF (-1) 00541 00542 return converted; 00543 } 00544 00545 int creat(const char *pathname, mode_t mode) 00546 { 00547 return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode); 00548 } 00549 00550 #endif // EMBEDDED