Tesseract  3.02
tesseract-ocr/image/imgtiff.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        imgtiff.c  (Formerly tiff.c)
00003  * Description: Max format image reader/writer.
00004  * Author:      Ray Smith
00005  * Created:     Mon Jun 11 14:00:21 BST 1990
00006  *
00007  * (C) Copyright 1990, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include          "mfcpch.h"     //precompiled headers
00021 
00022 #include <stdio.h>
00023 /*
00024 ** Include automatically generated configuration file if running autoconf
00025 */
00026 #ifdef HAVE_CONFIG_H
00027 #include "config_auto.h"
00028 #if defined(MOTOROLA_BYTE_ORDER) || defined(WORDS_BIGENDIAN)
00029 #define __MOTO__  // Big-endian.
00030 #endif
00031 #endif
00032 
00033 #include          "imgtiff.h"
00034 #include          "helpers.h"
00035 
00036 #define INTEL       0x4949
00037 #define MOTO        0x4d4d
00038 
00039 /*************************************************************************
00040  * NOTE ON BIG-ENDIAN vs LITTLE-ENDIAN
00041  *
00042  * Intel machines store numbers with LSByte in the left position.
00043  * Motorola     (and PA_RISC) machines use the opposite byte ordering.
00044  *
00045  * This code is written so that:
00046  *   a) it will compile and run on EITHER machine type   AND
00047  *   b) the program (on either machine) will process tiff file written in either
00048  *      Motorola or Intel format.
00049  *
00050  * The code is compiled with a __NATIVE__ define which is either MOTO or INTEL.
00051  * MOTO and INTEL are defined (above) to be the value of the first two bytes of
00052  * a tiff file in either format. (This identifies the filetype).
00053  *
00054  * Subsequent reads and writes normally just reverse the byte order if the
00055  * machine type (__NATIVE__) is not equal to the filetype determined from the
00056  * first two bytes of the tiff file.
00057  *
00058  * A special case is the "value" field of the tag structure. This can contain
00059  * EITHER a 16bit or a 32bit value. According to the "type" field. The 4 cases
00060  * of machine type / file type combinations need to be treated differently in
00061  * the case of 16 bit values
00062  *************************************************************************/
00063 
00064 #define ENTRIES       19         /*no of entries */
00065 #define START       8            /*start of tag table */
00066 
00067 typedef struct
00068 {
00069   uinT16 tag;                    //entry tag
00070   uinT16 type;
00071   uinT32 length;
00072   inT32 value;
00073 } TIFFENTRY;                     //tiff tag entry
00074 
00075 
00076 // CountTiffPages
00077 // Returns the number of pages in the file if it is a tiff file, otherwise 0.
00078 // WARNING: requires __MOTO__ to be #defined on a big-endian system.
00079 // On linux this is handled by configure - see above.
00080 int CountTiffPages(FILE* fp) {
00081   if (fp == NULL) return 0;
00082   // Read header
00083   inT16 filetype = 0;
00084   if (fread(&filetype, sizeof(filetype), 1, fp) != 1 ||
00085       (filetype != INTEL && filetype != MOTO)) {
00086     return 0;
00087   }
00088   fseek(fp, 4L, SEEK_SET);
00089   int npages = 0;
00090   do {
00091     inT32 start;                   // Start of tiff directory.
00092     if (fread(&start, sizeof(start), 1, fp) != 1) {
00093       return npages;
00094     }
00095     if (filetype != __NATIVE__)
00096       ReverseN(&start, sizeof(start));
00097     if (start <= 0) {
00098       return npages;
00099     }
00100     fseek(fp, start, SEEK_SET);
00101     inT16 entries;                 // No of tiff entries.
00102     if (fread(&entries, sizeof(entries), 1, fp) != 1) {
00103       return npages;
00104     }
00105     if (filetype != __NATIVE__)
00106       ReverseN(&entries, sizeof(entries));
00107     // Skip the tags and get to the next start.
00108     fseek(fp, entries * sizeof(TIFFENTRY), SEEK_CUR);
00109     ++npages;
00110   } while (1);
00111   return 0;
00112 }
00113