Coverage Report

Created: 2025-06-13 06:48

/src/leptonica/src/tiffio.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file tiffio.c
29
 * <pre>
30
 *
31
 *     TIFFClientOpen() wrappers for FILE*:
32
 *      static tsize_t    lept_read_proc()
33
 *      static tsize_t    lept_write_proc()
34
 *      static toff_t     lept_seek_proc()
35
 *      static int        lept_close_proc()
36
 *      static toff_t     lept_size_proc()
37
 *
38
 *     Reading tiff:
39
 *             PIX       *pixReadTiff()             [ special top level ]
40
 *             PIX       *pixReadStreamTiff()
41
 *      static PIX       *pixReadFromTiffStream()
42
 *
43
 *     Writing tiff:
44
 *             l_int32    pixWriteTiff()            [ special top level ]
45
 *             l_int32    pixWriteTiffCustom()      [ special top level ]
46
 *             l_int32    pixWriteStreamTiff()
47
 *             l_int32    pixWriteStreamTiffWA()
48
 *      static l_int32    pixWriteToTiffStream()
49
 *      static l_int32    writeCustomTiffTags()
50
 *
51
 *     Reading and writing multipage tiff
52
 *             PIX       *pixReadFromMultipageTiff()
53
 *             PIXA      *pixaReadMultipageTiff()   [ special top level ]
54
 *             l_int32    pixaWriteMultipageTiff()  [ special top level ]
55
 *             l_int32    writeMultipageTiff()      [ special top level ]
56
 *             l_int32    writeMultipageTiffSA()
57
 *
58
 *     Information about tiff file
59
 *             l_int32    fprintTiffInfo()
60
 *             l_int32    tiffGetCount()
61
 *             l_int32    getTiffResolution()
62
 *      static l_int32    getTiffStreamResolution()
63
 *             l_int32    readHeaderTiff()
64
 *             l_int32    freadHeaderTiff()
65
 *             l_int32    readHeaderMemTiff()
66
 *      static l_int32    tiffReadHeaderTiff()
67
 *             l_int32    findTiffCompression()
68
 *      static l_int32    getTiffCompressedFormat()
69
 *
70
 *     Extraction of tiff g4 data:
71
 *             l_int32    extractG4DataFromFile()
72
 *
73
 *     Open tiff stream from file stream
74
 *      static TIFF      *fopenTiff()
75
 *
76
 *     Wrapper for TIFFOpen:
77
 *      static TIFF      *openTiff()
78
 *
79
 *     Memory I/O: reading memory --> pix and writing pix --> memory
80
 *        Ten static low-level memstream functions
81
 *           static L_MEMSTREAM  *memstreamCreateForRead()
82
 *           static L_MEMSTREAM  *memstreamCreateForWrite()
83
 *           static tsize_t       tiffReadCallback()
84
 *           static tsize_t       tiffWriteCallback()
85
 *           static toff_t        tiffSeekCallback()
86
 *           static l_int32       tiffCloseCallback()
87
 *           static toff_t        tiffSizeCallback()
88
 *           static l_int32       tiffMapCallback()
89
 *           static void          tiffUnmapCallback()
90
 *           static TIFF         *fopenTiffMemstream()
91
 *
92
 *           PIX       *pixReadMemTiff();
93
 *           PIX       *pixReadMemFromMultipageTiff();
94
 *           PIXA      *pixaReadMemMultipageTiff()    [ special top level ]
95
 *           l_int32    pixaWriteMemMultipageTiff()   [ special top level ]
96
 *           l_int32    pixWriteMemTiff();
97
 *           l_int32    pixWriteMemTiffCustom();
98
 *
99
 *  Note 1: To include all necessary functions, use libtiff version 3.7.4
100
 *          (from 2005) or later.
101
 *  Note 2: What compression methods in tiff are supported?
102
 *          * We support most methods that are fully implemented in the
103
 *            tiff library, such as G3, G4, RLE and LZW.
104
 *          * The exception is the old-style jpeg tiff format (OJPEG), which
105
 *            is not supported.
106
 *          * We support two formats requiring external libraries: ZIP and JPEG
107
 *            All computers should have the zip library.
108
 *          * At present we do not support WEBP in tiff, which uses
109
 *            libwebp and was added in tifflib 4.1.0 in 2019.
110
 *  Note 3: We set the pad bits to 0 before writing in pixWriteToTiffStream().
111
 *          Although they don't affect the raster image after decompression,
112
 *          it is sometimes convenient to use a golden file with a
113
 *          byte-by-byte check to verify invariance.  The issue came up
114
 *          on Windows for 2 and 4 bpp images.
115
 * </pre>
116
 */
117
118
#ifdef HAVE_CONFIG_H
119
#include <config_auto.h>
120
#endif  /* HAVE_CONFIG_H */
121
122
#include <string.h>
123
#include <math.h>   /* for isnan */
124
#include <sys/types.h>
125
#ifndef _MSC_VER
126
#include <unistd.h>
127
#else  /* _MSC_VER */
128
#include <io.h>
129
#endif  /* _MSC_VER */
130
#include <fcntl.h>
131
#include "allheaders.h"
132
133
/* ---------------------------------------------------------*/
134
#if  HAVE_LIBTIFF && HAVE_LIBJPEG   /* defined in environ.h */
135
/* ---------------------------------------------------------*/
136
137
#include "tiff.h"
138
#include "tiffio.h"
139
140
static const l_int32  DefaultResolution = 300;   /* ppi */
141
static const l_int32  ManyPagesInTiffFile = 3000;  /* warn if big */
142
143
    /* Verified that tiflib makes valid g4 files of this size */
144
static const l_int32  MaxTiffWidth = 1 << 20;  /* 1M pixels */
145
static const l_int32  MaxTiffHeight = 1 << 20;  /* 1M pixels */
146
147
    /* Check g4 data size */
148
static const size_t  MaxNumTiffBytes = (1 << 28) - 1;  /* 256 MB */
149
150
    /* All functions with TIFF interfaces are static. */
151
static PIX      *pixReadFromTiffStream(TIFF *tif);
152
static l_int32   getTiffStreamResolution(TIFF *tif, l_int32 *pxres,
153
                                         l_int32 *pyres);
154
static l_int32   tiffReadHeaderTiff(TIFF *tif, l_int32 *pwidth,
155
                                    l_int32 *pheight, l_int32 *pbps,
156
                                    l_int32 *pspp, l_int32 *pres,
157
                                    l_int32 *pcmap, l_int32 *pformat);
158
static l_int32   writeCustomTiffTags(TIFF *tif, NUMA *natags,
159
                                     SARRAY *savals, SARRAY  *satypes,
160
                                     NUMA *nasizes);
161
static l_int32   pixWriteToTiffStream(TIFF *tif, PIX *pix, l_int32 comptype,
162
                                      NUMA *natags, SARRAY *savals,
163
                                      SARRAY *satypes, NUMA *nasizes);
164
static TIFF     *fopenTiff(FILE *fp, const char *modestring);
165
static TIFF     *openTiff(const char *filename, const char *modestring);
166
167
    /* Static helper for tiff compression type */
168
static l_int32   getTiffCompressedFormat(l_uint16 tiffcomp);
169
170
    /* Static function for memory I/O */
171
static TIFF     *fopenTiffMemstream(const char *filename, const char *operation,
172
                                    l_uint8 **pdata, size_t *pdatasize);
173
174
    /* This structure defines a transform to be performed on a TIFF image
175
     * (note that the same transformation can be represented in
176
     * several different ways using this structure since
177
     * vflip + hflip + counterclockwise == clockwise). */
178
struct tiff_transform {
179
    int vflip;    /* if non-zero, image needs a vertical fip */
180
    int hflip;    /* if non-zero, image needs a horizontal flip */
181
    int rotate;   /* -1 -> counterclockwise 90-degree rotation,
182
                      0 -> no rotation
183
                      1 -> clockwise 90-degree rotation */
184
};
185
186
    /* This describes the transformations needed for a given orientation
187
     * tag.  The tag values start at 1, so you need to subtract 1 to get a
188
     * valid index into this array.  It is only valid when not using
189
     * TIFFReadRGBAImageOriented(). */
190
static struct tiff_transform tiff_orientation_transforms[] = {
191
    {0, 0, 0},
192
    {0, 1, 0},
193
    {1, 1, 0},
194
    {1, 0, 0},
195
    {0, 1, -1},
196
    {0, 0, 1},
197
    {0, 1, 1},
198
    {0, 0, -1}
199
};
200
201
    /* Same as above, except that test transformations are only valid
202
     * when using TIFFReadRGBAImageOriented().  Transformations
203
     * were determined empirically.  See the libtiff mailing list for
204
     * more discussion: http://www.asmail.be/msg0054683875.html  */
205
static struct tiff_transform tiff_partial_orientation_transforms[] = {
206
    {0, 0, 0},
207
    {0, 0, 0},
208
    {0, 0, 0},
209
    {0, 0, 0},
210
    {0, 1, -1},
211
    {0, 1, 1},
212
    {1, 0, 1},
213
    {0, 1, -1}
214
};
215
216
217
/*-----------------------------------------------------------------------*
218
 *             TIFFClientOpen() wrappers for FILE*                       *
219
 *             Provided by Jürgen Buchmüller                             *
220
 *                                                                       *
221
 *  We previously used TIFFFdOpen(), which used low-level file           *
222
 *  descriptors.  It had portability issues with Windows, along          *
223
 *  with other limitations from lack of stream control operations.       *
224
 *  These callbacks to TIFFClientOpen() avoid the problems.              *
225
 *                                                                       *
226
 *  Jürgen made the functions use 64 bit file operations where possible  *
227
 *  or required, namely for seek and size. On Windows there are specific *
228
 *  _fseeki64() and _ftelli64() functions.  On unix it is common to look *
229
 *  for a macro _LARGEFILE64_SOURCE being defined, which makes available *
230
 *  the off64_t type, and to use fseeko() and ftello() in this case.     *
231
 *-----------------------------------------------------------------------*/
232
static tsize_t
233
lept_read_proc(thandle_t  cookie,
234
               tdata_t    buff,
235
               tsize_t    size)
236
0
{
237
0
    FILE* fp = (FILE *)cookie;
238
0
    tsize_t done;
239
0
    if (!buff || !cookie || !fp)
240
0
        return (tsize_t)-1;
241
0
    done = fread(buff, 1, size, fp);
242
0
    return done;
243
0
}
244
245
static tsize_t
246
lept_write_proc(thandle_t  cookie,
247
                tdata_t    buff,
248
                tsize_t    size)
249
0
{
250
0
    FILE* fp = (FILE *)cookie;
251
0
    tsize_t done;
252
0
    if (!buff || !cookie || !fp)
253
0
        return (tsize_t)-1;
254
0
    done = fwrite(buff, 1, size, fp);
255
0
    return done;
256
0
}
257
258
static toff_t
259
lept_seek_proc(thandle_t  cookie,
260
               toff_t     offs,
261
               int        whence)
262
0
{
263
0
    FILE* fp = (FILE *)cookie;
264
#if defined(_MSC_VER)
265
    __int64 pos = 0;
266
    if (!cookie || !fp)
267
        return (tsize_t)-1;
268
    switch (whence) {
269
    case SEEK_SET:
270
        pos = 0;
271
        break;
272
    case SEEK_CUR:
273
        pos = ftell(fp);
274
        break;
275
    case SEEK_END:
276
        _fseeki64(fp, 0, SEEK_END);
277
        pos = _ftelli64(fp);
278
        break;
279
    }
280
    pos = (__int64)(pos + offs);
281
    _fseeki64(fp, pos, SEEK_SET);
282
    if (pos == _ftelli64(fp))
283
        return (tsize_t)pos;
284
#elif defined(_LARGEFILE64_SOURCE)
285
    off64_t pos = 0;
286
    if (!cookie || !fp)
287
        return (tsize_t)-1;
288
    switch (whence) {
289
    case SEEK_SET:
290
        pos = 0;
291
        break;
292
    case SEEK_CUR:
293
        pos = ftello(fp);
294
        break;
295
    case SEEK_END:
296
        fseeko(fp, 0, SEEK_END);
297
        pos = ftello(fp);
298
        break;
299
    }
300
    pos = (off64_t)(pos + offs);
301
    fseeko(fp, pos, SEEK_SET);
302
    if (pos == ftello(fp))
303
        return (tsize_t)pos;
304
#else
305
0
    off_t pos = 0;
306
0
    if (!cookie || !fp)
307
0
        return (tsize_t)-1;
308
0
    switch (whence) {
309
0
    case SEEK_SET:
310
0
        pos = 0;
311
0
        break;
312
0
    case SEEK_CUR:
313
0
        pos = ftell(fp);
314
0
        break;
315
0
    case SEEK_END:
316
0
        fseek(fp, 0, SEEK_END);
317
0
        pos = ftell(fp);
318
0
        break;
319
0
    }
320
0
    pos = (off_t)(pos + offs);
321
0
    fseek(fp, pos, SEEK_SET);
322
0
    if (pos == ftell(fp))
323
0
        return (tsize_t)pos;
324
0
#endif
325
0
    return (tsize_t)-1;
326
0
}
327
328
static int
329
lept_close_proc(thandle_t  cookie)
330
0
{
331
0
    FILE* fp = (FILE *)cookie;
332
0
    if (!cookie || !fp)
333
0
        return 0;
334
0
    fseek(fp, 0, SEEK_SET);
335
0
    return 0;
336
0
}
337
338
static toff_t
339
lept_size_proc(thandle_t  cookie)
340
0
{
341
0
    FILE* fp = (FILE *)cookie;
342
#if defined(_MSC_VER)
343
    __int64 pos;
344
    __int64 size;
345
    if (!cookie || !fp)
346
        return (tsize_t)-1;
347
    pos = _ftelli64(fp);
348
    _fseeki64(fp, 0, SEEK_END);
349
    size = _ftelli64(fp);
350
    _fseeki64(fp, pos, SEEK_SET);
351
#elif defined(_LARGEFILE64_SOURCE)
352
    off64_t pos;
353
    off64_t size;
354
    if (!fp)
355
        return (tsize_t)-1;
356
    pos = ftello(fp);
357
    fseeko(fp, 0, SEEK_END);
358
    size = ftello(fp);
359
    fseeko(fp, pos, SEEK_SET);
360
#else
361
0
    off_t pos;
362
0
    off_t size;
363
0
    if (!cookie || !fp)
364
0
        return (tsize_t)-1;
365
0
    pos = ftell(fp);
366
0
    fseek(fp, 0, SEEK_END);
367
0
    size = ftell(fp);
368
0
    fseek(fp, pos, SEEK_SET);
369
0
#endif
370
0
    return (toff_t)size;
371
0
}
372
373
374
/*--------------------------------------------------------------*
375
 *                      Reading from file                       *
376
 *--------------------------------------------------------------*/
377
/*!
378
 * \brief   pixReadTiff()
379
 *
380
 * \param[in]    filename
381
 * \param[in]    n           page number 0 based
382
 * \return  pix, or NULL on error
383
 *
384
 * <pre>
385
 * Notes:
386
 *      (1) This is a version of pixRead(), specialized for tiff
387
 *          files, that allows specification of the page to be returned
388
 *      (2) No warning messages on failure, because of how multi-page
389
 *          TIFF reading works. You are supposed to keep trying until
390
 *          it stops working.
391
 * </pre>
392
 */
393
PIX *
394
pixReadTiff(const char  *filename,
395
            l_int32      n)
396
0
{
397
0
FILE  *fp;
398
0
PIX   *pix;
399
400
0
    if (!filename)
401
0
        return (PIX *)ERROR_PTR("filename not defined", __func__, NULL);
402
403
0
    if ((fp = fopenReadStream(filename)) == NULL)
404
0
        return (PIX *)ERROR_PTR_1("image file not found",
405
0
                                  filename, __func__, NULL);
406
0
    pix = pixReadStreamTiff(fp, n);
407
0
    fclose(fp);
408
0
    return pix;
409
0
}
410
411
412
/*--------------------------------------------------------------*
413
 *                     Reading from stream                      *
414
 *--------------------------------------------------------------*/
415
/*!
416
 * \brief   pixReadStreamTiff()
417
 *
418
 * \param[in]    fp    file stream
419
 * \param[in]    n     page number: 0 based
420
 * \return  pix, or NULL on error or if there are no more images in the file
421
 *
422
 * <pre>
423
 * Notes:
424
 *      (1) No warning messages on failure, because of how multi-page
425
 *          TIFF reading works. You are supposed to keep trying until
426
 *          it stops working.
427
 * </pre>
428
 */
429
PIX *
430
pixReadStreamTiff(FILE    *fp,
431
                  l_int32  n)
432
0
{
433
0
PIX   *pix;
434
0
TIFF  *tif;
435
436
0
    if (!fp)
437
0
        return (PIX *)ERROR_PTR("stream not defined", __func__, NULL);
438
439
0
    if ((tif = fopenTiff(fp, "r")) == NULL)
440
0
        return (PIX *)ERROR_PTR("tif not opened", __func__, NULL);
441
442
0
    if (TIFFSetDirectory(tif, n) == 0) {
443
0
        TIFFCleanup(tif);
444
0
        return NULL;
445
0
    }
446
0
    if ((pix = pixReadFromTiffStream(tif)) == NULL) {
447
0
        TIFFCleanup(tif);
448
0
        return NULL;
449
0
    }
450
0
    TIFFCleanup(tif);
451
0
    return pix;
452
0
}
453
454
455
/*!
456
 * \brief   pixReadFromTiffStream()
457
 *
458
 * \param[in]    tif    TIFF handle
459
 * \return  pix, or NULL on error
460
 *
461
 * <pre>
462
 * Notes:
463
 *      (1) We can read the following images (up to 32 bits/pixel):
464
 *          1 spp (grayscale): 1, 2, 4, 8, 16 bps
465
 *          1 spp (colormapped): 1, 2, 4, 8 bps
466
 *          2 spp (gray+alpha): 8 bps
467
 *          3 spp (rgb) and 4 spp (rgba): 8 or 16 bps
468
 *          Note that 16 bps rgb and rgba are converted to 8 bps in the pix.
469
 *      (2) In particular, we do not support
470
 *             16 bps for spp == 2
471
 *              4 bps for spp == 3 or spp == 4.
472
 *      (3) We only support uint image data.
473
 *      (4) We do not support tiled format, old-style jpeg encoding,
474
 *          or webp encoded tiff.
475
 *      (5) 2 bpp gray+alpha are rasterized as 32 bit/pixel rgba, with
476
 *          the gray value replicated in r, g and b.
477
 *      (6) For colormapped images, we support 8 bits/color in the palette.
478
 *          Tiff colormaps have 16 bits/color, and we reduce them to 8.
479
 *      (7) Quoting the libtiff documentation at
480
 *               http://libtiff.maptools.org/libtiff.html
481
 *          "libtiff provides a high-level interface for reading image data
482
 *          from a TIFF file. This interface handles the details of data
483
 *          organization and format for a wide variety of TIFF files;
484
 *          at least the large majority of those files that one would
485
 *          normally encounter. Image data is, by default, returned as
486
 *          ABGR pixels packed into 32-bit words (8 bits per sample).
487
 *          Rectangular rasters can be read or data can be intercepted
488
 *          at an intermediate level and packed into memory in a format
489
 *          more suitable to the application. The library handles all
490
 *          the details of the format of data stored on disk and,
491
 *          in most cases, if any colorspace conversions are required:
492
 *          bilevel to RGB, greyscale to RGB, CMYK to RGB, YCbCr to RGB,
493
 *          16-bit samples to 8-bit samples, associated/unassociated alpha,
494
 *          etc."
495
 * </pre>
496
 */
497
static PIX *
498
pixReadFromTiffStream(TIFF  *tif)
499
0
{
500
0
char      *text;
501
0
l_uint8   *linebuf, *data, *rowptr;
502
0
l_uint16   spp, bps, photometry, tiffcomp, orientation, sample_fmt;
503
0
l_uint16  *redmap, *greenmap, *bluemap;
504
0
l_int32    d, wpl, bpl, comptype, i, j, k, ncolors, rval, gval, bval, aval;
505
0
l_int32    xres, yres, tiffbpl, packedbpl, half_size, twothirds_size;
506
0
l_uint32   w, h, tiffword, read_oriented;
507
0
l_uint32  *line, *ppixel, *tiffdata, *pixdata;
508
0
PIX       *pix, *pix1;
509
0
PIXCMAP   *cmap;
510
511
0
    if (!tif)
512
0
        return (PIX *)ERROR_PTR("tif not defined", __func__, NULL);
513
514
0
    read_oriented = 0;
515
516
        /* Only accept uint image data:
517
         *   SAMPLEFORMAT_UINT = 1;
518
         *   SAMPLEFORMAT_INT = 2;
519
         *   SAMPLEFORMAT_IEEEFP = 3;
520
         *   SAMPLEFORMAT_VOID = 4;   */
521
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLEFORMAT, &sample_fmt);
522
0
    if (sample_fmt != SAMPLEFORMAT_UINT) {
523
0
        L_ERROR("sample format = %d is not uint\n", __func__, sample_fmt);
524
0
        return NULL;
525
0
    }
526
527
        /* Can't read tiff in tiled format. For what is involved, see, e.g:
528
         *   https://www.cs.rochester.edu/~nelson/courses/vision/\
529
         *     resources/tiff/libtiff.html#Tiles
530
         * A tiled tiff can be converted to a normal (strip) tif:
531
         *   tiffcp -s <input-tiled-tif> <output-strip-tif>    */
532
0
    if (TIFFIsTiled(tif)) {
533
0
        L_ERROR("tiled format is not supported\n", __func__);
534
0
        return NULL;
535
0
    }
536
537
        /* Old style jpeg is not supported.  We tried supporting 8 bpp.
538
         * TIFFReadScanline() fails on this format, so we used RGBA
539
         * reading, which generates a 4 spp image, and pulled out the
540
         * red component.  However, there were problems with double-frees
541
         * in cleanup.  For RGB, tiffbpl is exactly half the size that
542
         * you would expect for the raster data in a scanline, which
543
         * is 3 * w.  */
544
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
545
0
    if (tiffcomp == COMPRESSION_OJPEG) {
546
0
        L_ERROR("old style jpeg format is not supported\n", __func__);
547
0
        return NULL;
548
0
    }
549
550
        /* webp in tiff is in 4.1.0 and not yet supported in Adobe registry */
551
0
#if defined(COMPRESSION_WEBP)
552
0
    if (tiffcomp == COMPRESSION_WEBP) {
553
0
        L_ERROR("webp in tiff not generally supported yet\n", __func__);
554
0
        return NULL;
555
0
    }
556
0
#endif  /* COMPRESSION_WEBP */
557
558
        /* Use default fields for bps and spp */
559
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
560
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
561
0
    if (bps != 1 && bps != 2 && bps != 4 && bps != 8 && bps != 16) {
562
0
        L_ERROR("invalid bps = %d\n", __func__, bps);
563
0
        return NULL;
564
0
    }
565
0
    if (spp == 2 && bps != 8) {
566
0
        L_ERROR("for 2 spp, only handle 8 bps; this is %d bps\n",
567
0
                __func__, bps);
568
0
        return NULL;
569
0
    }
570
0
    if ((spp == 3 || spp == 4) && bps < 8) {
571
0
        L_ERROR("for 3 and 4 spp, only handle 8 and 16 bps; this is %d bps\n",
572
0
                __func__, bps);
573
0
        return NULL;
574
0
    }
575
0
    if (spp == 1) {
576
0
        d = bps;
577
0
    } else if (spp == 2) {  /* gray plus alpha */
578
0
        d = 32;  /* will convert to RGBA */
579
0
    } else if (spp == 3 || spp == 4) {
580
0
        d = 32;
581
0
    } else {
582
0
        L_ERROR("spp = %d; not in {1,2,3,4}\n", __func__, spp);
583
0
        return NULL;
584
0
    }
585
586
0
    TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
587
0
    TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
588
0
    if (w > MaxTiffWidth) {
589
0
        L_ERROR("width = %d pixels; too large\n", __func__, w);
590
0
        return NULL;
591
0
    }
592
0
    if (h > MaxTiffHeight) {
593
0
        L_ERROR("height = %d pixels; too large\n", __func__, h);
594
0
        return NULL;
595
0
    }
596
597
        /* The relation between the size of a byte buffer required to hold
598
           a raster of image pixels (packedbpl) and the size of the tiff
599
           buffer (tiffbuf) is either 1:1 or approximately 1.5:1 or 2:1,
600
           depending on how the data is stored and subsampled.  For security,
601
           we test this relation between tiffbuf and the image parameters
602
           w, spp and bps. */
603
0
    tiffbpl = TIFFScanlineSize(tif);
604
0
    packedbpl = (bps * spp * w + 7) / 8;
605
0
    half_size = (L_ABS(2 * tiffbpl - packedbpl) <= 8);
606
0
    twothirds_size = (L_ABS(3 * tiffbpl - 2 * packedbpl) <= 8);
607
#if 0
608
    if (half_size)
609
        L_INFO("half_size: packedbpl = %d is approx. twice tiffbpl = %d\n",
610
               __func__, packedbpl, tiffbpl);
611
    if (twothirds_size)
612
        L_INFO("twothirds_size: packedbpl = %d is approx. 1.5 tiffbpl = %d\n",
613
               __func__, packedbpl, tiffbpl);
614
    lept_stderr("tiffbpl = %d, packedbpl = %d, bps = %d, spp = %d, w = %d\n",
615
                tiffbpl, packedbpl, bps, spp, w);
616
#endif
617
0
    if (tiffbpl != packedbpl && !half_size && !twothirds_size) {
618
0
        L_ERROR("invalid tiffbpl: tiffbpl = %d, packedbpl = %d, "
619
0
                "bps = %d, spp = %d, w = %d\n",
620
0
                __func__, tiffbpl, packedbpl, bps, spp, w);
621
0
        return NULL;
622
0
    }
623
624
        /* Use a linebuf that will hold all the pixels generated
625
           by tiff when reading (decompressing) a scanline. */
626
0
    if ((pix = pixCreate(w, h, d)) == NULL)
627
0
        return (PIX *)ERROR_PTR("pix not made", __func__, NULL);
628
0
    pixSetInputFormat(pix, IFF_TIFF);
629
0
    data = (l_uint8 *)pixGetData(pix);
630
0
    wpl = pixGetWpl(pix);
631
0
    bpl = 4 * wpl;
632
0
    if (spp == 1) {
633
0
        linebuf = (l_uint8 *)LEPT_CALLOC(4 * wpl, sizeof(l_uint8));
634
0
        for (i = 0; i < h; i++) {
635
0
            if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
636
0
                LEPT_FREE(linebuf);
637
0
                pixDestroy(&pix);
638
0
                L_ERROR("spp = 1, read fail at line %d\n", __func__, i);
639
0
                return NULL;
640
0
            }
641
0
            memcpy(data, linebuf, tiffbpl);
642
0
            data += bpl;
643
0
        }
644
0
        if (bps <= 8)
645
0
            pixEndianByteSwap(pix);
646
0
        else   /* bps == 16 */
647
0
            pixEndianTwoByteSwap(pix);
648
0
        LEPT_FREE(linebuf);
649
0
    } else if (spp == 2 && bps == 8) {  /* gray plus alpha */
650
0
        L_INFO("gray+alpha is not supported; converting to RGBA\n", __func__);
651
0
        pixSetSpp(pix, 4);
652
0
        linebuf = (l_uint8 *)LEPT_CALLOC(4 * wpl, sizeof(l_uint8));
653
0
        pixdata = pixGetData(pix);
654
0
        for (i = 0; i < h; i++) {
655
0
            if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
656
0
                LEPT_FREE(linebuf);
657
0
                pixDestroy(&pix);
658
0
                L_ERROR("spp = 2, read fail at line %d\n", __func__, i);
659
0
                return NULL;
660
0
            }
661
0
            rowptr = linebuf;
662
0
            ppixel = pixdata + i * wpl;
663
0
            for (j = k = 0; j < w; j++) {
664
                    /* Copy gray value into r, g and b */
665
0
                SET_DATA_BYTE(ppixel, COLOR_RED, rowptr[k]);
666
0
                SET_DATA_BYTE(ppixel, COLOR_GREEN, rowptr[k]);
667
0
                SET_DATA_BYTE(ppixel, COLOR_BLUE, rowptr[k++]);
668
0
                SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, rowptr[k++]);
669
0
                ppixel++;
670
0
            }
671
0
        }
672
0
        LEPT_FREE(linebuf);
673
0
    } else {  /* rgb and rgba */
674
0
        if ((tiffdata = (l_uint32 *)LEPT_CALLOC((size_t)w * h,
675
0
                                                 sizeof(l_uint32))) == NULL) {
676
0
            pixDestroy(&pix);
677
0
            return (PIX *)ERROR_PTR("calloc fail for tiffdata", __func__, NULL);
678
0
        }
679
            /* TIFFReadRGBAImageOriented() converts to 8 bps */
680
0
        if (!TIFFReadRGBAImageOriented(tif, w, h, tiffdata,
681
0
                                       ORIENTATION_TOPLEFT, 0)) {
682
0
            LEPT_FREE(tiffdata);
683
0
            pixDestroy(&pix);
684
0
            return (PIX *)ERROR_PTR("failed to read tiffdata", __func__, NULL);
685
0
        } else {
686
0
            read_oriented = 1;
687
0
        }
688
689
0
        if (spp == 4) pixSetSpp(pix, 4);
690
0
        line = pixGetData(pix);
691
0
        for (i = 0; i < h; i++, line += wpl) {
692
0
            for (j = 0, ppixel = line; j < w; j++) {
693
                    /* TIFFGet* are macros */
694
0
                tiffword = tiffdata[i * w + j];
695
0
                rval = TIFFGetR(tiffword);
696
0
                gval = TIFFGetG(tiffword);
697
0
                bval = TIFFGetB(tiffword);
698
0
                if (spp == 3) {
699
0
                    composeRGBPixel(rval, gval, bval, ppixel);
700
0
                } else {  /* spp == 4 */
701
0
                    aval = TIFFGetA(tiffword);
702
0
                    composeRGBAPixel(rval, gval, bval, aval, ppixel);
703
0
                }
704
0
                ppixel++;
705
0
            }
706
0
        }
707
0
        LEPT_FREE(tiffdata);
708
0
    }
709
710
0
    if (getTiffStreamResolution(tif, &xres, &yres) == 0) {
711
0
        pixSetXRes(pix, xres);
712
0
        pixSetYRes(pix, yres);
713
0
    }
714
715
        /* Find and save the compression type */
716
0
    comptype = getTiffCompressedFormat(tiffcomp);
717
0
    pixSetInputFormat(pix, comptype);
718
719
0
    if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) {
720
            /* Save the colormap as a pix cmap.  Because the
721
             * tiff colormap components are 16 bit unsigned,
722
             * and go from black (0) to white (0xffff), the
723
             * the pix cmap takes the most significant byte. */
724
0
        if (bps > 8) {
725
0
            pixDestroy(&pix);
726
0
            return (PIX *)ERROR_PTR("colormap size > 256", __func__, NULL);
727
0
        }
728
0
        if ((cmap = pixcmapCreate(bps)) == NULL) {
729
0
            pixDestroy(&pix);
730
0
            return (PIX *)ERROR_PTR("colormap not made", __func__, NULL);
731
0
        }
732
0
        ncolors = 1 << bps;
733
0
        for (i = 0; i < ncolors; i++)
734
0
            pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8,
735
0
                            bluemap[i] >> 8);
736
0
        if (pixSetColormap(pix, cmap)) {
737
0
            pixDestroy(&pix);
738
0
            return (PIX *)ERROR_PTR("invalid colormap", __func__, NULL);
739
0
        }
740
741
            /* Remove the colormap for 1 bpp. */
742
0
        if (bps == 1) {
743
0
            pix1 = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC);
744
0
            pixDestroy(&pix);
745
0
            pix = pix1;
746
0
        }
747
0
    } else {   /* No colormap: check photometry and invert if necessary */
748
0
        if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) {
749
                /* Guess default photometry setting.  Assume min_is_white
750
                 * if compressed 1 bpp; min_is_black otherwise. */
751
0
            if (tiffcomp == COMPRESSION_CCITTFAX3 ||
752
0
                tiffcomp == COMPRESSION_CCITTFAX4 ||
753
0
                tiffcomp == COMPRESSION_CCITTRLE ||
754
0
                tiffcomp == COMPRESSION_CCITTRLEW) {
755
0
                photometry = PHOTOMETRIC_MINISWHITE;
756
0
            } else {
757
0
                photometry = PHOTOMETRIC_MINISBLACK;
758
0
            }
759
0
        }
760
0
        if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) ||
761
0
            (d == 8 && photometry == PHOTOMETRIC_MINISWHITE))
762
0
            pixInvert(pix, pix);
763
0
    }
764
765
0
    if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) {
766
0
        if (orientation >= 1 && orientation <= 8) {
767
0
            struct tiff_transform *transform = (read_oriented) ?
768
0
                &tiff_partial_orientation_transforms[orientation - 1] :
769
0
                &tiff_orientation_transforms[orientation - 1];
770
0
            if (transform->vflip) pixFlipTB(pix, pix);
771
0
            if (transform->hflip) pixFlipLR(pix, pix);
772
0
            if (transform->rotate) {
773
0
                PIX *oldpix = pix;
774
0
                pix = pixRotate90(oldpix, transform->rotate);
775
0
                pixDestroy(&oldpix);
776
0
            }
777
0
        }
778
0
    }
779
780
0
    text = NULL;
781
0
    TIFFGetField(tif, TIFFTAG_IMAGEDESCRIPTION, &text);
782
0
    if (text) pixSetText(pix, text);
783
0
    return pix;
784
0
}
785
786
787
/*--------------------------------------------------------------*
788
 *                       Writing to file                        *
789
 *--------------------------------------------------------------*/
790
/*!
791
 * \brief   pixWriteTiff()
792
 *
793
 * \param[in]    filename   to write to
794
 * \param[in]    pix        any depth, colormap will be removed
795
 * \param[in]    comptype   IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
796
 *                          IFF_TIFF_G3, IFF_TIFF_G4,
797
 *                          IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
798
 * \param[in]    modestr    "a" or "w"
799
 * \return  0 if OK, 1 on error
800
 *
801
 * <pre>
802
 * Notes:
803
 *      (1) For multipage tiff, write the first pix with mode "w" and
804
 *          all subsequent pix with mode "a".
805
 *      (2) For multipage tiff, there is considerable overhead in the
806
 *          machinery to append an image and add the directory entry,
807
 *          and the time required for each image increases linearly
808
 *          with the number of images in the file.
809
 * </pre>
810
 */
811
l_ok
812
pixWriteTiff(const char  *filename,
813
             PIX         *pix,
814
             l_int32      comptype,
815
             const char  *modestr)
816
0
{
817
0
    return pixWriteTiffCustom(filename, pix, comptype, modestr,
818
0
                              NULL, NULL, NULL, NULL);
819
0
}
820
821
822
/*!
823
 * \brief   pixWriteTiffCustom()
824
 *
825
 * \param[in]    filename   to write to
826
 * \param[in]    pix
827
 * \param[in]    comptype   IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
828
 *                          IFF_TIFF_G3, IFF_TIFF_G4,
829
 *                          IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
830
 * \param[in]    modestr    "a" or "w"
831
 * \param[in]    natags [optional] NUMA of custom tiff tags
832
 * \param[in]    savals [optional] SARRAY of values
833
 * \param[in]    satypes [optional] SARRAY of types
834
 * \param[in]    nasizes [optional] NUMA of sizes
835
 * \return  0 if OK, 1 on error
836
 *
837
 *  Usage:
838
 *      1 This writes a page image to a tiff file, with optional
839
 *          extra tags defined in tiff.h
840
 *      2 For multipage tiff, write the first pix with mode "w" and
841
 *          all subsequent pix with mode "a".
842
 *      3 For the custom tiff tags:
843
 *          a The three arrays {natags, savals, satypes} must all be
844
 *              either NULL or defined and of equal size.
845
 *          b If they are defined, the tags are an array of integers,
846
 *              the vals are an array of values in string format, and
847
 *              the types are an array of types in string format.
848
 *          c All valid tags are definined in tiff.h.
849
 *          d The types allowed are the set of strings:
850
 *                "char*"
851
 *                "l_uint8*"
852
 *                "l_uint16"
853
 *                "l_uint32"
854
 *                "l_int32"
855
 *                "l_float64"
856
 *                "l_uint16-l_uint16" note the dash; use it between the
857
 *                                    two l_uint16 vals in the val string
858
 *              Of these, "char*" and "l_uint16" are the most commonly used.
859
 *          e The last array, nasizes, is also optional.  It is for
860
 *              tags that take an array of bytes for a value, a number of
861
 *              elements in the array, and a type that is either "char*"
862
 *              or "l_uint8*" probably either will work.
863
 *              Use NULL if there are no such tags.
864
 *          f VERY IMPORTANT: if there are any tags that require the
865
 *              extra size value, stored in nasizes, they must be
866
 *              written first!
867
 */
868
l_ok
869
pixWriteTiffCustom(const char  *filename,
870
                   PIX         *pix,
871
                   l_int32      comptype,
872
                   const char  *modestr,
873
                   NUMA        *natags,
874
                   SARRAY      *savals,
875
                   SARRAY      *satypes,
876
                   NUMA        *nasizes)
877
0
{
878
0
l_int32  ret;
879
0
TIFF    *tif;
880
881
0
    if (!filename)
882
0
        return ERROR_INT("filename not defined", __func__, 1);
883
0
    if (!pix)
884
0
        return ERROR_INT("pix not defined", __func__, 1);
885
886
0
    if ((tif = openTiff(filename, modestr)) == NULL)
887
0
        return ERROR_INT("tif not opened", __func__, 1);
888
0
    ret = pixWriteToTiffStream(tif, pix, comptype, natags, savals,
889
0
                               satypes, nasizes);
890
0
    TIFFClose(tif);
891
0
    return ret;
892
0
}
893
894
895
/*--------------------------------------------------------------*
896
 *                       Writing to stream                      *
897
 *--------------------------------------------------------------*/
898
/*!
899
 * \brief   pixWriteStreamTiff()
900
 *
901
 * \param[in]    fp       file stream
902
 * \param[in]    pix
903
 * \param[in]    comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
904
 *                        IFF_TIFF_G3, IFF_TIFF_G4,
905
 *                        IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
906
 * \return  0 if OK, 1 on error
907
 *
908
 * <pre>
909
 * Notes:
910
 *      (1) This writes a single image to a file stream opened for writing.
911
 *      (2) If the pix has a colormap, it is preserved in the output file.
912
 *      (3) For images with bpp > 1, this resets the comptype, if
913
 *          necessary, to write uncompressed data.
914
 *      (4) G3 and G4 are only defined for 1 bpp.
915
 *      (5) We only allow PACKBITS for bpp = 1, because for bpp > 1
916
 *          it typically expands images that are not synthetically generated.
917
 *      (6) G4 compression is typically about twice as good as G3.
918
 *          G4 is excellent for binary compression of text/line-art,
919
 *          but terrible for halftones and dithered patterns.  (In
920
 *          fact, G4 on halftones can give a file that is larger
921
 *          than uncompressed!)  If a binary image has dithered
922
 *          regions, it is usually better to compress with png.
923
 * </pre>
924
 */
925
l_ok
926
pixWriteStreamTiff(FILE    *fp,
927
                   PIX     *pix,
928
                   l_int32  comptype)
929
0
{
930
0
    return pixWriteStreamTiffWA(fp, pix, comptype, "w");
931
0
}
932
933
934
/*!
935
 * \brief   pixWriteStreamTiffWA()
936
 *
937
 * \param[in]    fp       file stream opened for append or write
938
 * \param[in]    pix
939
 * \param[in]    comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
940
 *                        IFF_TIFF_G3, IFF_TIFF_G4,
941
 *                        IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
942
 * \param[in]    modestr  "w" or "a"
943
 * \return  0 if OK, 1 on error
944
 *
945
 * <pre>
946
 * Notes:
947
 *      (1) See pixWriteStreamTiff()
948
 * </pre>
949
 */
950
l_ok
951
pixWriteStreamTiffWA(FILE        *fp,
952
                     PIX         *pix,
953
                     l_int32      comptype,
954
                     const char  *modestr)
955
0
{
956
0
TIFF  *tif;
957
958
0
    if (!fp)
959
0
        return ERROR_INT("stream not defined", __func__, 1 );
960
0
    if (!pix)
961
0
        return ERROR_INT("pix not defined", __func__, 1 );
962
0
    if (strcmp(modestr, "w") && strcmp(modestr, "a")) {
963
0
        L_ERROR("modestr = %s; not 'w' or 'a'\n", __func__, modestr);
964
0
        return 1;
965
0
    }
966
967
0
    if (pixGetDepth(pix) != 1 && comptype != IFF_TIFF &&
968
0
        comptype != IFF_TIFF_LZW && comptype != IFF_TIFF_ZIP &&
969
0
        comptype != IFF_TIFF_JPEG) {
970
0
        L_WARNING("invalid compression type %d for bpp > 1; using TIFF_ZIP\n",
971
0
                  __func__, comptype);
972
0
        comptype = IFF_TIFF_ZIP;
973
0
    }
974
975
0
    if ((tif = fopenTiff(fp, modestr)) == NULL)
976
0
        return ERROR_INT("tif not opened", __func__, 1);
977
978
0
    if (pixWriteToTiffStream(tif, pix, comptype, NULL, NULL, NULL, NULL)) {
979
0
        TIFFCleanup(tif);
980
0
        return ERROR_INT("tif write error", __func__, 1);
981
0
    }
982
983
0
    TIFFCleanup(tif);
984
0
    return 0;
985
0
}
986
987
988
/*!
989
 * \brief   pixWriteToTiffStream()
990
 *
991
 * \param[in]    tif       data structure, opened to a file
992
 * \param[in]    pix
993
 * \param[in]    comptype  IFF_TIFF: for any image; no compression
994
 *                         IFF_TIFF_RLE, IFF_TIFF_PACKBITS: for 1 bpp only
995
 *                         IFF_TIFF_G4 and IFF_TIFF_G3: for 1 bpp only
996
 *                         IFF_TIFF_LZW, IFF_TIFF_ZIP: lossless for any image
997
 *                         IFF_TIFF_JPEG: lossy 8 bpp gray or rgb
998
 * \param[in]    natags    [optional] NUMA of custom tiff tags
999
 * \param[in]    savals    [optional] SARRAY of values
1000
 * \param[in]    satypes   [optional] SARRAY of types
1001
 * \param[in]    nasizes   [optional] NUMA of sizes
1002
 * \return  0 if OK, 1 on error
1003
 *
1004
 * <pre>
1005
 * Notes:
1006
 *      (1) This static function should only be called through higher
1007
 *          level functions in this file; namely, pixWriteTiffCustom(),
1008
 *          pixWriteTiff(), pixWriteStreamTiff(), pixWriteMemTiff()
1009
 *          and pixWriteMemTiffCustom().
1010
 *      (2) We only allow PACKBITS for bpp = 1, because for bpp > 1
1011
 *          it typically expands images that are not synthetically generated.
1012
 *      (3) See pixWriteTiffCustom() for details on how to use
1013
 *          the last four parameters for customized tiff tags.
1014
 *      (4) The only valid pixel depths in leptonica are 1, 2, 4, 8, 16
1015
 *          and 32.  However, it is possible, and in some cases desirable,
1016
 *          to write out a tiff file using an rgb pix that has 24 bpp.
1017
 *          This can be created by appending the raster data for a 24 bpp
1018
 *          image (with proper scanline padding) directly to a 24 bpp
1019
 *          pix that was created without a data array.  See note in
1020
 *          pixWriteStreamPng() for an example.
1021
 * </pre>
1022
 */
1023
static l_int32
1024
pixWriteToTiffStream(TIFF    *tif,
1025
                     PIX     *pix,
1026
                     l_int32  comptype,
1027
                     NUMA    *natags,
1028
                     SARRAY  *savals,
1029
                     SARRAY  *satypes,
1030
                     NUMA    *nasizes)
1031
0
{
1032
0
l_uint8   *linebuf, *data;
1033
0
l_uint16   redmap[256], greenmap[256], bluemap[256];
1034
0
l_int32    w, h, d, spp, i, j, k, wpl, bpl, tiffbpl, ncolors, cmapsize;
1035
0
l_int32   *rmap, *gmap, *bmap;
1036
0
l_int32    xres, yres;
1037
0
l_uint32  *line, *ppixel;
1038
0
PIX       *pixt;
1039
0
PIXCMAP   *cmap;
1040
0
char      *text;
1041
1042
0
    if (!tif)
1043
0
        return ERROR_INT("tif stream not defined", __func__, 1);
1044
0
    if (!pix)
1045
0
        return ERROR_INT( "pix not defined", __func__, 1 );
1046
1047
0
    pixSetPadBits(pix, 0);
1048
0
    pixGetDimensions(pix, &w, &h, &d);
1049
0
    spp = pixGetSpp(pix);
1050
0
    xres = pixGetXRes(pix);
1051
0
    yres = pixGetYRes(pix);
1052
0
    if (xres == 0) xres = DefaultResolution;
1053
0
    if (yres == 0) yres = DefaultResolution;
1054
1055
        /* ------------------ Write out the header -------------  */
1056
0
    TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, (l_uint32)RESUNIT_INCH);
1057
0
    TIFFSetField(tif, TIFFTAG_XRESOLUTION, (l_float64)xres);
1058
0
    TIFFSetField(tif, TIFFTAG_YRESOLUTION, (l_float64)yres);
1059
1060
0
    TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (l_uint32)w);
1061
0
    TIFFSetField(tif, TIFFTAG_IMAGELENGTH, (l_uint32)h);
1062
0
    TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT);
1063
1064
0
    if ((text = pixGetText(pix)) != NULL)
1065
0
        TIFFSetField(tif, TIFFTAG_IMAGEDESCRIPTION, text);
1066
1067
0
    if (d == 1 && !pixGetColormap(pix)) {
1068
            /* If d == 1, preserve the colormap.  Note that when
1069
             * d == 1 pix with colormaps are read, the colormaps
1070
             * are removed.  The only pix in leptonica that have
1071
             * colormaps are made programmatically. */
1072
0
        TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISWHITE);
1073
0
    } else if ((d == 32 && spp == 3) || d == 24) {
1074
0
        TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB);
1075
0
        TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)3);
1076
0
        TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE,
1077
0
                       (l_uint16)8, (l_uint16)8, (l_uint16)8);
1078
0
    } else if (d == 32 && spp == 4) {
1079
0
        l_uint16  val[1];
1080
0
        val[0] = EXTRASAMPLE_ASSOCALPHA;
1081
0
        TIFFSetField(tif, TIFFTAG_EXTRASAMPLES, (l_uint16)1, &val);
1082
0
        TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB);
1083
0
        TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)4);
1084
0
        TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE,
1085
0
                     (l_uint16)8, (l_uint16)8, (l_uint16)8, (l_uint16)8);
1086
0
    } else if (d == 16) {  /* we only support spp = 1, bps = 16 */
1087
0
        TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
1088
0
    } else if ((cmap = pixGetColormap(pix)) == NULL) {
1089
0
        TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
1090
0
    } else {  /* Save colormap in the tiff; not more than 256 colors */
1091
0
        if (d > 8) {
1092
0
            L_ERROR("d = %d > 8 with colormap!; reducing to 8\n", __func__, d);
1093
0
            d = 8;
1094
0
        }
1095
0
        pixcmapToArrays(cmap, &rmap, &gmap, &bmap, NULL);
1096
0
        ncolors = pixcmapGetCount(cmap);
1097
0
        ncolors = L_MIN(256, ncolors);  /* max 256 */
1098
0
        cmapsize = 1 << d;
1099
0
        cmapsize = L_MIN(256, cmapsize);  /* power of 2; max 256 */
1100
0
        if (ncolors > cmapsize) {
1101
0
            L_WARNING("too many colors in cmap for tiff; truncating\n",
1102
0
                      __func__);
1103
0
            ncolors = cmapsize;
1104
0
        }
1105
0
        for (i = 0; i < ncolors; i++) {
1106
0
            redmap[i] = (rmap[i] << 8) | rmap[i];
1107
0
            greenmap[i] = (gmap[i] << 8) | gmap[i];
1108
0
            bluemap[i] = (bmap[i] << 8) | bmap[i];
1109
0
        }
1110
0
        for (i = ncolors; i < cmapsize; i++)  /* init, even though not used */
1111
0
            redmap[i] = greenmap[i] = bluemap[i] = 0;
1112
0
        LEPT_FREE(rmap);
1113
0
        LEPT_FREE(gmap);
1114
0
        LEPT_FREE(bmap);
1115
1116
0
        TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_PALETTE);
1117
0
        TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)1);
1118
0
        TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (l_uint16)d);
1119
0
        TIFFSetField(tif, TIFFTAG_COLORMAP, redmap, greenmap, bluemap);
1120
0
    }
1121
1122
0
    if (d <= 16) {
1123
0
        TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (l_uint16)d);
1124
0
        TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)1);
1125
0
    }
1126
1127
0
    TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG);
1128
0
    if (comptype == IFF_TIFF) {  /* no compression */
1129
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
1130
0
    } else if (comptype == IFF_TIFF_G4) {
1131
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX4);
1132
0
    } else if (comptype == IFF_TIFF_G3) {
1133
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX3);
1134
0
    } else if (comptype == IFF_TIFF_RLE) {
1135
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTRLE);
1136
0
    } else if (comptype == IFF_TIFF_PACKBITS) {
1137
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS);
1138
0
    } else if (comptype == IFF_TIFF_LZW) {
1139
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_LZW);
1140
0
    } else if (comptype == IFF_TIFF_ZIP) {
1141
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_ADOBE_DEFLATE);
1142
0
    } else if (comptype == IFF_TIFF_JPEG) {
1143
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_JPEG);
1144
0
    } else {
1145
0
        L_WARNING("unknown tiff compression; using none\n", __func__);
1146
0
        TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
1147
0
    }
1148
1149
        /* This is a no-op if arrays are NULL */
1150
0
    writeCustomTiffTags(tif, natags, savals, satypes, nasizes);
1151
1152
        /* ------------- Write out the image data -------------  */
1153
0
    tiffbpl = TIFFScanlineSize(tif);
1154
0
    wpl = pixGetWpl(pix);
1155
0
    bpl = 4 * wpl;
1156
0
    if (tiffbpl > bpl)
1157
0
        lept_stderr("Big trouble: tiffbpl = %d, bpl = %d\n", tiffbpl, bpl);
1158
0
    if ((linebuf = (l_uint8 *)LEPT_CALLOC(1, bpl)) == NULL)
1159
0
        return ERROR_INT("calloc fail for linebuf", __func__, 1);
1160
1161
        /* Use single strip for image */
1162
0
    TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, h);
1163
1164
0
    if (d != 24 && d != 32) {
1165
0
        if (d == 16)
1166
0
            pixt = pixEndianTwoByteSwapNew(pix);
1167
0
        else
1168
0
            pixt = pixEndianByteSwapNew(pix);
1169
0
        data = (l_uint8 *)pixGetData(pixt);
1170
0
        for (i = 0; i < h; i++, data += bpl) {
1171
0
            memcpy(linebuf, data, tiffbpl);
1172
0
            if (TIFFWriteScanline(tif, linebuf, i, 0) < 0)
1173
0
                break;
1174
0
        }
1175
0
        pixDestroy(&pixt);
1176
0
    } else if (d == 24) {  /* See note 4 above: special case of 24 bpp rgb */
1177
0
        for (i = 0; i < h; i++) {
1178
0
            line = pixGetData(pix) + i * wpl;
1179
0
            if (TIFFWriteScanline(tif, (l_uint8 *)line, i, 0) < 0)
1180
0
                break;
1181
0
        }
1182
0
    } else {  /* 32 bpp rgb or rgba */
1183
0
        for (i = 0; i < h; i++) {
1184
0
            line = pixGetData(pix) + i * wpl;
1185
0
            for (j = 0, k = 0, ppixel = line; j < w; j++) {
1186
0
                linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_RED);
1187
0
                linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_GREEN);
1188
0
                linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_BLUE);
1189
0
                if (spp == 4)
1190
0
                    linebuf[k++] = GET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL);
1191
0
                ppixel++;
1192
0
            }
1193
0
            if (TIFFWriteScanline(tif, linebuf, i, 0) < 0)
1194
0
                break;
1195
0
        }
1196
0
    }
1197
1198
/*    TIFFWriteDirectory(tif); */
1199
0
    LEPT_FREE(linebuf);
1200
1201
0
    return 0;
1202
0
}
1203
1204
1205
/*!
1206
 * \brief   writeCustomTiffTags()
1207
 *
1208
 * \param[in]    tif
1209
 * \param[in]    natags   [optional] NUMA of custom tiff tags
1210
 * \param[in]    savals   [optional] SARRAY of values
1211
 * \param[in]    satypes  [optional] SARRAY of types
1212
 * \param[in]    nasizes  [optional] NUMA of sizes
1213
 * \return  0 if OK, 1 on error
1214
 *
1215
 * <pre>
1216
 * Notes:
1217
 *      (1) This static function should be called indirectly through
1218
 *          higher level functions, such as pixWriteTiffCustom(),
1219
 *          which call pixWriteToTiffStream().  See details in
1220
 *          pixWriteTiffCustom() for using the 4 input arrays.
1221
 *      (2) This is a no-op if the first 3 arrays are all NULL.
1222
 *      (3) Otherwise, the first 3 arrays must be defined and all
1223
 *          of equal size.
1224
 *      (4) The fourth array is always optional.
1225
 *      (5) The most commonly used types are "char*" and "u_int16".
1226
 *          See tiff.h for a full listing of the tiff tags.
1227
 *          Note that many of these tags, in particular the bit tags,
1228
 *          are intended to be private, and cannot be set by this function.
1229
 *          Examples are the STRIPOFFSETS and STRIPBYTECOUNTS tags,
1230
 *          which are bit tags that are automatically set in the header,
1231
 *          and can be extracted using tiffdump.
1232
 * </pre>
1233
 */
1234
static l_int32
1235
writeCustomTiffTags(TIFF    *tif,
1236
                    NUMA    *natags,
1237
                    SARRAY  *savals,
1238
                    SARRAY  *satypes,
1239
                    NUMA    *nasizes)
1240
0
{
1241
0
char      *sval, *type;
1242
0
l_int32    i, n, ns, size, tagval, val;
1243
0
l_float64  dval;
1244
0
l_uint32   uval, uval2;
1245
1246
0
    if (!tif)
1247
0
        return ERROR_INT("tif stream not defined", __func__, 1);
1248
0
    if (!natags && !savals && !satypes)
1249
0
        return 0;
1250
0
    if (!natags || !savals || !satypes)
1251
0
        return ERROR_INT("not all arrays defined", __func__, 1);
1252
0
    n = numaGetCount(natags);
1253
0
    if ((sarrayGetCount(savals) != n) || (sarrayGetCount(satypes) != n))
1254
0
        return ERROR_INT("not all sa the same size", __func__, 1);
1255
1256
        /* The sized arrays (4 args to TIFFSetField) are written first */
1257
0
    if (nasizes) {
1258
0
        ns = numaGetCount(nasizes);
1259
0
        if (ns > n)
1260
0
            return ERROR_INT("too many 4-arg tag calls", __func__, 1);
1261
0
        for (i = 0; i < ns; i++) {
1262
0
            numaGetIValue(natags, i, &tagval);
1263
0
            sval = sarrayGetString(savals, i, L_NOCOPY);
1264
0
            type = sarrayGetString(satypes, i, L_NOCOPY);
1265
0
            numaGetIValue(nasizes, i, &size);
1266
0
            if (strcmp(type, "char*") && strcmp(type, "l_uint8*"))
1267
0
                L_WARNING("array type not char* or l_uint8*; ignore\n",
1268
0
                          __func__);
1269
0
            TIFFSetField(tif, tagval, size, sval);
1270
0
        }
1271
0
    } else {
1272
0
        ns = 0;
1273
0
    }
1274
1275
        /* The typical tags (3 args to TIFFSetField) are now written */
1276
0
    for (i = ns; i < n; i++) {
1277
0
        numaGetIValue(natags, i, &tagval);
1278
0
        sval = sarrayGetString(savals, i, L_NOCOPY);
1279
0
        type = sarrayGetString(satypes, i, L_NOCOPY);
1280
0
        if (!strcmp(type, "char*") || !strcmp(type, "const char*")) {
1281
0
            TIFFSetField(tif, tagval, sval);
1282
0
        } else if (!strcmp(type, "l_uint16")) {
1283
0
            if (sscanf(sval, "%u", &uval) == 1) {
1284
0
                TIFFSetField(tif, tagval, (l_uint16)uval);
1285
0
            } else {
1286
0
                lept_stderr("val %s not of type %s\n", sval, type);
1287
0
                return ERROR_INT("custom tag(s) not written", __func__, 1);
1288
0
            }
1289
0
        } else if (!strcmp(type, "l_uint32")) {
1290
0
            if (sscanf(sval, "%u", &uval) == 1) {
1291
0
                TIFFSetField(tif, tagval, uval);
1292
0
            } else {
1293
0
                lept_stderr("val %s not of type %s\n", sval, type);
1294
0
                return ERROR_INT("custom tag(s) not written", __func__, 1);
1295
0
            }
1296
0
        } else if (!strcmp(type, "l_int32")) {
1297
0
            if (sscanf(sval, "%d", &val) == 1) {
1298
0
                TIFFSetField(tif, tagval, val);
1299
0
            } else {
1300
0
                lept_stderr("val %s not of type %s\n", sval, type);
1301
0
                return ERROR_INT("custom tag(s) not written", __func__, 1);
1302
0
            }
1303
0
        } else if (!strcmp(type, "l_float64")) {
1304
0
            if (sscanf(sval, "%lf", &dval) == 1) {
1305
0
                TIFFSetField(tif, tagval, dval);
1306
0
            } else {
1307
0
                lept_stderr("val %s not of type %s\n", sval, type);
1308
0
                return ERROR_INT("custom tag(s) not written", __func__, 1);
1309
0
            }
1310
0
        } else if (!strcmp(type, "l_uint16-l_uint16")) {
1311
0
            if (sscanf(sval, "%u-%u", &uval, &uval2) == 2) {
1312
0
                TIFFSetField(tif, tagval, (l_uint16)uval, (l_uint16)uval2);
1313
0
            } else {
1314
0
                lept_stderr("val %s not of type %s\n", sval, type);
1315
0
                return ERROR_INT("custom tag(s) not written", __func__, 1);
1316
0
            }
1317
0
        } else {
1318
0
            lept_stderr("unknown type %s\n",type);
1319
0
            return ERROR_INT("unknown type; tag(s) not written", __func__, 1);
1320
0
        }
1321
0
    }
1322
0
    return 0;
1323
0
}
1324
1325
1326
/*--------------------------------------------------------------*
1327
 *               Reading and writing multipage tiff             *
1328
 *--------------------------------------------------------------*/
1329
/*!
1330
 * \brief   pixReadFromMultipageTiff()
1331
 *
1332
 * \param[in]      fname     filename
1333
 * \param[in,out]  poffset   set offset to 0 for first image
1334
 * \return  pix, or NULL on error or if previous call returned the last image
1335
 *
1336
 * <pre>
1337
 * Notes:
1338
 *      (1) This allows overhead for traversal of a multipage tiff file
1339
 *          to be linear in the number of images.  This will also work
1340
 *          with a singlepage tiff file.
1341
 *      (2) No TIFF internal data structures are exposed to the caller
1342
 *          (thanks to Jeff Breidenbach).
1343
 *      (3) offset is the byte offset of a particular image in a multipage
1344
 *          tiff file. To get the first image in the file, input the
1345
 *          special offset value of 0.
1346
 *      (4) The offset is updated to point to the next image, for a
1347
 *          subsequent call.
1348
 *      (5) On the last image, the offset returned is 0.  Exit the loop
1349
 *          when the returned offset is 0.
1350
 *      (6) For reading a multipage tiff from a memory buffer, see
1351
 *            pixReadMemFromMultipageTiff()
1352
 *      (7) Example usage for reading all the images in the tif file:
1353
 *            size_t offset = 0;
1354
 *            do {
1355
 *                Pix *pix = pixReadFromMultipageTiff(filename, &offset);
1356
 *                // do something with pix
1357
 *            } while (offset != 0);
1358
 * </pre>
1359
 */
1360
PIX *
1361
pixReadFromMultipageTiff(const char  *fname,
1362
                         size_t      *poffset)
1363
0
{
1364
0
l_int32  retval;
1365
0
size_t   offset;
1366
0
PIX     *pix;
1367
0
TIFF    *tif;
1368
1369
0
    if (!fname)
1370
0
        return (PIX *)ERROR_PTR("fname not defined", __func__, NULL);
1371
0
    if (!poffset)
1372
0
        return (PIX *)ERROR_PTR("&offset not defined", __func__, NULL);
1373
1374
0
    if ((tif = openTiff(fname, "r")) == NULL) {
1375
0
        L_ERROR("tif open failed for %s\n", __func__, fname);
1376
0
        return NULL;
1377
0
    }
1378
1379
        /* Set ptrs in the TIFF to the beginning of the image */
1380
0
    offset = *poffset;
1381
0
    retval = (offset == 0) ? TIFFSetDirectory(tif, 0)
1382
0
                            : TIFFSetSubDirectory(tif, offset);
1383
0
    if (retval == 0) {
1384
0
        TIFFClose(tif);
1385
0
        return NULL;
1386
0
    }
1387
1388
0
    if ((pix = pixReadFromTiffStream(tif)) == NULL) {
1389
0
        TIFFClose(tif);
1390
0
        return NULL;
1391
0
    }
1392
1393
        /* Advance to the next image and return the new offset */
1394
0
    TIFFReadDirectory(tif);
1395
0
    *poffset = TIFFCurrentDirOffset(tif);
1396
0
    TIFFClose(tif);
1397
0
    return pix;
1398
0
}
1399
1400
1401
/*!
1402
 * \brief   pixaReadMultipageTiff()
1403
 *
1404
 * \param[in]    filename    input tiff file
1405
 * \return  pixa of page images, or NULL on error
1406
 */
1407
PIXA *
1408
pixaReadMultipageTiff(const char  *filename)
1409
0
{
1410
0
l_int32  i, npages;
1411
0
FILE    *fp;
1412
0
PIX     *pix;
1413
0
PIXA    *pixa;
1414
0
TIFF    *tif;
1415
1416
0
    if (!filename)
1417
0
        return (PIXA *)ERROR_PTR("filename not defined", __func__, NULL);
1418
1419
0
    if ((fp = fopenReadStream(filename)) == NULL)
1420
0
        return (PIXA *)ERROR_PTR_1("stream not opened",
1421
0
                                   filename, __func__, NULL);
1422
0
    if (fileFormatIsTiff(fp)) {
1423
0
        tiffGetCount(fp, &npages);
1424
0
        L_INFO(" Tiff: %d pages\n", __func__, npages);
1425
0
    } else {
1426
0
        return (PIXA *)ERROR_PTR_1("file is not tiff",
1427
0
                                   filename, __func__, NULL);
1428
0
    }
1429
1430
0
    if ((tif = fopenTiff(fp, "r")) == NULL)
1431
0
        return (PIXA *)ERROR_PTR_1("tif not opened",
1432
0
                                   filename, __func__, NULL);
1433
1434
0
    pixa = pixaCreate(npages);
1435
0
    pix = NULL;
1436
0
    for (i = 0; i < npages; i++) {
1437
0
        if ((pix = pixReadFromTiffStream(tif)) != NULL) {
1438
0
            pixaAddPix(pixa, pix, L_INSERT);
1439
0
        } else {
1440
0
            L_WARNING("pix not read for page %d\n", __func__, i);
1441
0
        }
1442
1443
            /* Advance to the next directory (i.e., the next image) */
1444
0
        if (TIFFReadDirectory(tif) == 0)
1445
0
            break;
1446
0
    }
1447
1448
0
    fclose(fp);
1449
0
    TIFFCleanup(tif);
1450
0
    return pixa;
1451
0
}
1452
1453
1454
/*!
1455
 * \brief   pixaWriteMultipageTiff()
1456
 *
1457
 * \param[in]    fname      input tiff file
1458
 * \param[in]    pixa       any depth; colormap will be removed
1459
 * \return  0 if OK, 1 on error
1460
 *
1461
 * <pre>
1462
 * Notes:
1463
 *      (1) The tiff directory overhead is O(n^2).  I have not been
1464
 *          able to reduce it to O(n).  The overhead for n = 2000 is
1465
 *          about 1 second.
1466
 * </pre>
1467
 */
1468
l_ok
1469
pixaWriteMultipageTiff(const char  *fname,
1470
                       PIXA        *pixa)
1471
0
{
1472
0
const char  *modestr;
1473
0
l_int32      i, n;
1474
0
PIX         *pix1;
1475
1476
0
    if (!fname)
1477
0
        return ERROR_INT("fname not defined", __func__, 1);
1478
0
    if (!pixa)
1479
0
        return ERROR_INT("pixa not defined", __func__, 1);
1480
1481
0
    n = pixaGetCount(pixa);
1482
0
    for (i = 0; i < n; i++) {
1483
0
        modestr = (i == 0) ? "w" : "a";
1484
0
        pix1 = pixaGetPix(pixa, i, L_CLONE);
1485
0
        if (pixGetDepth(pix1) == 1)
1486
0
            pixWriteTiff(fname, pix1, IFF_TIFF_G4, modestr);
1487
0
        else
1488
0
            pixWriteTiff(fname, pix1, IFF_TIFF_ZIP, modestr);
1489
0
        pixDestroy(&pix1);
1490
0
    }
1491
1492
0
    return 0;
1493
0
}
1494
1495
1496
/*!
1497
 * \brief   writeMultipageTiff()
1498
 *
1499
 * \param[in]    dirin   input directory
1500
 * \param[in]    substr  [optional] substring filter on filenames; can be NULL
1501
 * \param[in]    fileout output multipage tiff file
1502
 * \return  0 if OK, 1 on error
1503
 *
1504
 * <pre>
1505
 * Notes:
1506
 *      (1) This writes a set of image files in a directory out
1507
 *          as a multipage tiff file.  The images can be in any
1508
 *          initial file format.
1509
 *      (2) Images with a colormap have the colormap removed before
1510
 *          re-encoding as tiff.
1511
 *      (3) All images are encoded losslessly.  Those with 1 bpp are
1512
 *          encoded 'g4'.  The rest are encoded as 'zip' (flate encoding).
1513
 *          Because it is lossless, this is an expensive method for
1514
 *          saving most rgb images.
1515
 *      (4) The tiff directory overhead is quadratic in the number of
1516
 *          images.  To avoid this for very large numbers of images to be
1517
 *          written, apply the method used in pixaWriteMultipageTiff().
1518
 * </pre>
1519
 */
1520
l_ok
1521
writeMultipageTiff(const char  *dirin,
1522
                   const char  *substr,
1523
                   const char  *fileout)
1524
0
{
1525
0
SARRAY  *sa;
1526
1527
0
    if (!dirin)
1528
0
        return ERROR_INT("dirin not defined", __func__, 1);
1529
0
    if (!fileout)
1530
0
        return ERROR_INT("fileout not defined", __func__, 1);
1531
1532
        /* Get all filtered and sorted full pathnames. */
1533
0
    sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
1534
1535
        /* Generate the tiff file */
1536
0
    writeMultipageTiffSA(sa, fileout);
1537
0
    sarrayDestroy(&sa);
1538
0
    return 0;
1539
0
}
1540
1541
1542
/*!
1543
 * \brief   writeMultipageTiffSA()
1544
 *
1545
 * \param[in]    sa       string array of full path names
1546
 * \param[in]    fileout  output ps file
1547
 * \return  0 if OK, 1 on error
1548
 *
1549
 * <pre>
1550
 * Notes:
1551
 *      (1) See writeMultipageTiff()
1552
 * </pre>
1553
 */
1554
l_ok
1555
writeMultipageTiffSA(SARRAY      *sa,
1556
                     const char  *fileout)
1557
0
{
1558
0
char        *fname;
1559
0
const char  *op;
1560
0
l_int32      i, nfiles, firstfile, format;
1561
0
PIX         *pix;
1562
1563
0
    if (!sa)
1564
0
        return ERROR_INT("sa not defined", __func__, 1);
1565
0
    if (!fileout)
1566
0
        return ERROR_INT("fileout not defined", __func__, 1);
1567
1568
0
    nfiles = sarrayGetCount(sa);
1569
0
    firstfile = TRUE;
1570
0
    for (i = 0; i < nfiles; i++) {
1571
0
        op = (firstfile) ? "w" : "a";
1572
0
        fname = sarrayGetString(sa, i, L_NOCOPY);
1573
0
        findFileFormat(fname, &format);
1574
0
        if (format == IFF_UNKNOWN) {
1575
0
            L_INFO("format of %s not known\n", __func__, fname);
1576
0
            continue;
1577
0
        }
1578
1579
0
        if ((pix = pixRead(fname)) == NULL) {
1580
0
            L_WARNING("pix not made for file: %s\n", __func__, fname);
1581
0
            continue;
1582
0
        }
1583
0
        if (pixGetDepth(pix) == 1)
1584
0
            pixWriteTiff(fileout, pix, IFF_TIFF_G4, op);
1585
0
        else
1586
0
            pixWriteTiff(fileout, pix, IFF_TIFF_ZIP, op);
1587
0
        firstfile = FALSE;
1588
0
        pixDestroy(&pix);
1589
0
    }
1590
1591
0
    return 0;
1592
0
}
1593
1594
1595
/*--------------------------------------------------------------*
1596
 *                    Print info to stream                      *
1597
 *--------------------------------------------------------------*/
1598
/*!
1599
 * \brief   fprintTiffInfo()
1600
 *
1601
 * \param[in]    fpout    stream for output of tag data
1602
 * \param[in]    tiffile  input
1603
 * \return  0 if OK; 1 on error
1604
 */
1605
l_ok
1606
fprintTiffInfo(FILE        *fpout,
1607
               const char  *tiffile)
1608
0
{
1609
0
TIFF  *tif;
1610
1611
0
    if (!tiffile)
1612
0
        return ERROR_INT("tiffile not defined", __func__, 1);
1613
0
    if (!fpout)
1614
0
        return ERROR_INT("stream out not defined", __func__, 1);
1615
1616
0
    if ((tif = openTiff(tiffile, "rb")) == NULL)
1617
0
        return ERROR_INT("tif not open for read", __func__, 1);
1618
1619
0
    TIFFPrintDirectory(tif, fpout, 0);
1620
0
    TIFFClose(tif);
1621
1622
0
    return 0;
1623
0
}
1624
1625
1626
/*--------------------------------------------------------------*
1627
 *                        Get page count                        *
1628
 *--------------------------------------------------------------*/
1629
/*!
1630
 * \brief   tiffGetCount()
1631
 *
1632
 * \param[in]    fp   file stream opened for read
1633
 * \param[out]   pn   number of images
1634
 * \return  0 if OK; 1 on error
1635
 */
1636
l_ok
1637
tiffGetCount(FILE     *fp,
1638
             l_int32  *pn)
1639
0
{
1640
0
l_int32  i;
1641
0
TIFF    *tif;
1642
1643
0
    if (!fp)
1644
0
        return ERROR_INT("stream not defined", __func__, 1);
1645
0
    if (!pn)
1646
0
        return ERROR_INT("&n not defined", __func__, 1);
1647
0
    *pn = 0;
1648
1649
0
    if ((tif = fopenTiff(fp, "r")) == NULL)
1650
0
        return ERROR_INT("tif not open for read", __func__, 1);
1651
1652
0
    for (i = 1; ; i++) {
1653
0
        if (TIFFReadDirectory(tif) == 0)
1654
0
            break;
1655
0
        if (i == ManyPagesInTiffFile + 1) {
1656
0
            L_WARNING("big file: more than %d pages\n", __func__,
1657
0
                      ManyPagesInTiffFile);
1658
0
        }
1659
0
    }
1660
0
    *pn = i;
1661
0
    TIFFCleanup(tif);
1662
0
    return 0;
1663
0
}
1664
1665
1666
/*--------------------------------------------------------------*
1667
 *                   Get resolution from tif                    *
1668
 *--------------------------------------------------------------*/
1669
/*!
1670
 * \brief   getTiffResolution()
1671
 *
1672
 * \param[in]    fp            file stream opened for read
1673
 * \param[out]   pxres, pyres  resolution in ppi
1674
 * \return  0 if OK; 1 on error
1675
 *
1676
 * <pre>
1677
 * Notes:
1678
 *      (1) If neither resolution field is set, this is not an error;
1679
 *          the returned resolution values are 0 (designating 'unknown').
1680
 * </pre>
1681
 */
1682
l_ok
1683
getTiffResolution(FILE     *fp,
1684
                  l_int32  *pxres,
1685
                  l_int32  *pyres)
1686
0
{
1687
0
TIFF  *tif;
1688
1689
0
    if (!pxres || !pyres)
1690
0
        return ERROR_INT("&xres and &yres not both defined", __func__, 1);
1691
0
    *pxres = *pyres = 0;
1692
0
    if (!fp)
1693
0
        return ERROR_INT("stream not opened", __func__, 1);
1694
1695
0
    if ((tif = fopenTiff(fp, "r")) == NULL)
1696
0
        return ERROR_INT("tif not open for read", __func__, 1);
1697
0
    getTiffStreamResolution(tif, pxres, pyres);
1698
0
    TIFFCleanup(tif);
1699
0
    return 0;
1700
0
}
1701
1702
1703
/*!
1704
 * \brief   getTiffStreamResolution()
1705
 *
1706
 * \param[in]    tif            TIFF handle opened for read
1707
 * \param[out]   pxres, pyres   resolution in ppi
1708
 * \return  0 if OK; 1 on error
1709
 *
1710
 * <pre>
1711
 * Notes:
1712
 *      (1) If neither resolution field is set, this is not an error;
1713
 *          the returned resolution values are 0 (designating 'unknown').
1714
 * </pre>
1715
 */
1716
static l_int32
1717
getTiffStreamResolution(TIFF     *tif,
1718
                        l_int32  *pxres,
1719
                        l_int32  *pyres)
1720
0
{
1721
0
l_uint16   resunit;
1722
0
l_int32    foundxres, foundyres;
1723
0
l_float32  fxres, fyres;
1724
1725
0
    if (!tif)
1726
0
        return ERROR_INT("tif not opened", __func__, 1);
1727
0
    if (!pxres || !pyres)
1728
0
        return ERROR_INT("&xres and &yres not both defined", __func__, 1);
1729
0
    *pxres = *pyres = 0;
1730
1731
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_RESOLUTIONUNIT, &resunit);
1732
0
    foundxres = TIFFGetField(tif, TIFFTAG_XRESOLUTION, &fxres);
1733
0
    foundyres = TIFFGetField(tif, TIFFTAG_YRESOLUTION, &fyres);
1734
0
    if (!foundxres && !foundyres) return 1;
1735
0
    if (isnan(fxres) || isnan(fyres)) return 1;
1736
0
    if (!foundxres && foundyres)
1737
0
        fxres = fyres;
1738
0
    else if (foundxres && !foundyres)
1739
0
        fyres = fxres;
1740
1741
        /* Avoid overflow into int32; set max fxres and fyres to 5 x 10^8 */
1742
0
    if (fxres < 0 || fxres > (1L << 29) || fyres < 0 || fyres > (1L << 29))
1743
0
        return ERROR_INT("fxres and/or fyres values are invalid", __func__, 1);
1744
1745
0
    if (resunit == RESUNIT_CENTIMETER) {  /* convert to ppi */
1746
0
        *pxres = (l_int32)(2.54 * fxres + 0.5);
1747
0
        *pyres = (l_int32)(2.54 * fyres + 0.5);
1748
0
    } else {
1749
0
        *pxres = (l_int32)(fxres + 0.5);
1750
0
        *pyres = (l_int32)(fyres + 0.5);
1751
0
    }
1752
1753
0
    return 0;
1754
0
}
1755
1756
1757
/*--------------------------------------------------------------*
1758
 *              Get some tiff header information                *
1759
 *--------------------------------------------------------------*/
1760
/*!
1761
 * \brief   readHeaderTiff()
1762
 *
1763
 * \param[in]    filename
1764
 * \param[in]    n          page image number: 0-based
1765
 * \param[out]   pw         [optional] width
1766
 * \param[out]   ph         [optional] height
1767
 * \param[out]   pbps       [optional] bits per sample -- 1, 2, 4 or 8
1768
 * \param[out]   pspp       [optional] samples per pixel -- 1 or 3
1769
 * \param[out]   pres       [optional] resolution in x dir; NULL to ignore
1770
 * \param[out]   pcmap      [optional] colormap exists; input NULL to ignore
1771
 * \param[out]   pformat    [optional] tiff format; input NULL to ignore
1772
 * \return  0 if OK, 1 on error
1773
 *
1774
 * <pre>
1775
 * Notes:
1776
 *      (1) If there is a colormap, cmap is returned as 1; else 0.
1777
 *      (2) If %n is equal to or greater than the number of images, returns 1.
1778
 * </pre>
1779
 */
1780
l_ok
1781
readHeaderTiff(const char *filename,
1782
               l_int32     n,
1783
               l_int32    *pw,
1784
               l_int32    *ph,
1785
               l_int32    *pbps,
1786
               l_int32    *pspp,
1787
               l_int32    *pres,
1788
               l_int32    *pcmap,
1789
               l_int32    *pformat)
1790
0
{
1791
0
l_int32  ret;
1792
0
FILE    *fp;
1793
1794
0
    if (pw) *pw = 0;
1795
0
    if (ph) *ph = 0;
1796
0
    if (pbps) *pbps = 0;
1797
0
    if (pspp) *pspp = 0;
1798
0
    if (pres) *pres = 0;
1799
0
    if (pcmap) *pcmap = 0;
1800
0
    if (pformat) *pformat = 0;
1801
0
    if (!filename)
1802
0
        return ERROR_INT("filename not defined", __func__, 1);
1803
0
    if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat)
1804
0
        return ERROR_INT("no results requested", __func__, 1);
1805
1806
0
    if ((fp = fopenReadStream(filename)) == NULL)
1807
0
        return ERROR_INT_1("image file not found", filename, __func__, 1);
1808
0
    ret = freadHeaderTiff(fp, n, pw, ph, pbps, pspp, pres, pcmap, pformat);
1809
0
    fclose(fp);
1810
0
    return ret;
1811
0
}
1812
1813
1814
/*!
1815
 * \brief   freadHeaderTiff()
1816
 *
1817
 * \param[in]    fp       file stream
1818
 * \param[in]    n        page image number: 0-based
1819
 * \param[out]   pw       [optional] width
1820
 * \param[out]   ph       [optional] height
1821
 * \param[out]   pbps     [optional] bits per sample -- 1, 2, 4 or 8
1822
 * \param[out]   pspp     [optional] samples per pixel -- 1 or 3
1823
 * \param[out]   pres     [optional] resolution in x dir; NULL to ignore
1824
 * \param[out]   pcmap    [optional] colormap exists; input NULL to ignore
1825
 * \param[out]   pformat  [optional] tiff format; input NULL to ignore
1826
 * \return  0 if OK, 1 on error
1827
 *
1828
 * <pre>
1829
 * Notes:
1830
 *      (1) If there is a colormap, cmap is returned as 1; else 0.
1831
 *      (2) If %n is equal to or greater than the number of images, returns 1.
1832
 * </pre>
1833
 */
1834
l_ok
1835
freadHeaderTiff(FILE     *fp,
1836
                l_int32   n,
1837
                l_int32  *pw,
1838
                l_int32  *ph,
1839
                l_int32  *pbps,
1840
                l_int32  *pspp,
1841
                l_int32  *pres,
1842
                l_int32  *pcmap,
1843
                l_int32  *pformat)
1844
0
{
1845
0
l_int32  i, ret, format;
1846
0
TIFF    *tif;
1847
1848
0
    if (pw) *pw = 0;
1849
0
    if (ph) *ph = 0;
1850
0
    if (pbps) *pbps = 0;
1851
0
    if (pspp) *pspp = 0;
1852
0
    if (pres) *pres = 0;
1853
0
    if (pcmap) *pcmap = 0;
1854
0
    if (pformat) *pformat = 0;
1855
0
    if (!fp)
1856
0
        return ERROR_INT("stream not defined", __func__, 1);
1857
0
    if (n < 0)
1858
0
        return ERROR_INT("image index must be >= 0", __func__, 1);
1859
0
    if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat)
1860
0
        return ERROR_INT("no results requested", __func__, 1);
1861
1862
0
    findFileFormatStream(fp, &format);
1863
0
    if (!L_FORMAT_IS_TIFF(format))
1864
0
        return ERROR_INT("file not tiff format", __func__, 1);
1865
1866
0
    if ((tif = fopenTiff(fp, "r")) == NULL)
1867
0
        return ERROR_INT("tif not open for read", __func__, 1);
1868
1869
0
    for (i = 0; i < n; i++) {
1870
0
        if (TIFFReadDirectory(tif) == 0)
1871
0
            return ERROR_INT("image n not found in file", __func__, 1);
1872
0
    }
1873
1874
0
    ret = tiffReadHeaderTiff(tif, pw, ph, pbps, pspp, pres, pcmap, pformat);
1875
0
    TIFFCleanup(tif);
1876
0
    return ret;
1877
0
}
1878
1879
1880
/*!
1881
 * \brief   readHeaderMemTiff()
1882
 *
1883
 * \param[in]    cdata     const; tiff-encoded
1884
 * \param[in]    size      size of data
1885
 * \param[in]    n         page image number: 0-based
1886
 * \param[out]   pw        [optional] width
1887
 * \param[out]   ph        [optional] height
1888
 * \param[out]   pbps      [optional] bits per sample -- 1, 2, 4 or 8
1889
 * \param[out]   pspp      [optional] samples per pixel -- 1 or 3
1890
 * \param[out]   pres      [optional] resolution in x dir; NULL to ignore
1891
 * \param[out]   pcmap     [optional] colormap exists; input NULL to ignore
1892
 * \param[out]   pformat   [optional] tiff format; input NULL to ignore
1893
 * \return  0 if OK, 1 on error
1894
 *
1895
 * <pre>
1896
 * Notes:
1897
 *      (1) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
1898
 *      (2) Returns res = 0 if not set in the file.
1899
 * </pre>
1900
 */
1901
l_ok
1902
readHeaderMemTiff(const l_uint8  *cdata,
1903
                  size_t          size,
1904
                  l_int32         n,
1905
                  l_int32        *pw,
1906
                  l_int32        *ph,
1907
                  l_int32        *pbps,
1908
                  l_int32        *pspp,
1909
                  l_int32        *pres,
1910
                  l_int32        *pcmap,
1911
                  l_int32        *pformat)
1912
0
{
1913
0
l_uint8  *data;
1914
0
l_int32   i, ret;
1915
0
TIFF     *tif;
1916
1917
0
    if (pw) *pw = 0;
1918
0
    if (ph) *ph = 0;
1919
0
    if (pbps) *pbps = 0;
1920
0
    if (pspp) *pspp = 0;
1921
0
    if (pres) *pres = 0;
1922
0
    if (pcmap) *pcmap = 0;
1923
0
    if (pformat) *pformat = 0;
1924
0
    if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat)
1925
0
        return ERROR_INT("no results requested", __func__, 1);
1926
0
    if (!cdata)
1927
0
        return ERROR_INT("cdata not defined", __func__, 1);
1928
1929
        /* Open a tiff stream to memory */
1930
0
    data = (l_uint8 *)cdata;  /* we're really not going to change this */
1931
0
    if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL)
1932
0
        return ERROR_INT("tiff stream not opened", __func__, 1);
1933
1934
0
    for (i = 0; i < n; i++) {
1935
0
        if (TIFFReadDirectory(tif) == 0) {
1936
0
            TIFFClose(tif);
1937
0
            return ERROR_INT("image n not found in file", __func__, 1);
1938
0
        }
1939
0
    }
1940
1941
0
    ret = tiffReadHeaderTiff(tif, pw, ph, pbps, pspp, pres, pcmap, pformat);
1942
0
    TIFFClose(tif);
1943
0
    return ret;
1944
0
}
1945
1946
1947
/*!
1948
 * \brief   tiffReadHeaderTiff()
1949
 *
1950
 * \param[in]    tif
1951
 * \param[out]   pw        [optional] width
1952
 * \param[out]   ph        [optional] height
1953
 * \param[out]   pbps      [optional] bits per sample -- 1, 2, 4 or 8
1954
 * \param[out]   pspp      [optional] samples per pixel -- 1 or 3
1955
 * \param[out]   pres      [optional] resolution in x dir; NULL to ignore
1956
 * \param[out]   pcmap     [optional] cmap exists; input NULL to ignore
1957
 * \param[out]   pformat   [optional] tiff format; input NULL to ignore
1958
 * \return  0 if OK, 1 on error
1959
 */
1960
static l_int32
1961
tiffReadHeaderTiff(TIFF     *tif,
1962
                   l_int32  *pw,
1963
                   l_int32  *ph,
1964
                   l_int32  *pbps,
1965
                   l_int32  *pspp,
1966
                   l_int32  *pres,
1967
                   l_int32  *pcmap,
1968
                   l_int32  *pformat)
1969
0
{
1970
0
l_uint16   tiffcomp;
1971
0
l_uint16   bps, spp;
1972
0
l_uint16  *rmap, *gmap, *bmap;
1973
0
l_int32    xres, yres;
1974
0
l_uint32   w, h;
1975
1976
0
    if (!tif)
1977
0
        return ERROR_INT("tif not opened", __func__, 1);
1978
1979
0
    TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
1980
0
    TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
1981
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
1982
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
1983
0
    if (w < 1 || h < 1)
1984
0
        return ERROR_INT("tif w and h not both > 0", __func__, 1);
1985
0
    if (bps != 1 && bps != 2 && bps != 4 && bps != 8 && bps != 16)
1986
0
        return ERROR_INT("bps not in set {1,2,4,8,16}", __func__, 1);
1987
0
    if (spp != 1 && spp != 2 && spp != 3 && spp != 4)
1988
0
        return ERROR_INT("spp not in set {1,2,3,4}", __func__, 1);
1989
0
    if (pw) *pw = w;
1990
0
    if (ph) *ph = h;
1991
0
    if (pbps) *pbps = bps;
1992
0
    if (pspp) *pspp = spp;
1993
0
    if (pres) {
1994
0
        if (getTiffStreamResolution(tif, &xres, &yres) == 0)
1995
0
            *pres = (l_int32)xres;
1996
0
    }
1997
0
    if (pcmap) {
1998
0
        if (TIFFGetField(tif, TIFFTAG_COLORMAP, &rmap, &gmap, &bmap))
1999
0
            *pcmap = 1;
2000
0
    }
2001
0
    if (pformat) {
2002
0
        TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
2003
0
        *pformat = getTiffCompressedFormat(tiffcomp);
2004
0
    }
2005
0
    return 0;
2006
0
}
2007
2008
2009
/*!
2010
 * \brief   findTiffCompression()
2011
 *
2012
 * \param[in]    fp         file stream; must be rewound to BOF
2013
 * \param[out]   pcomptype  compression type
2014
 * \return  0 if OK, 1 on error
2015
 *
2016
 * <pre>
2017
 * Notes:
2018
 *      (1) The returned compression type is that defined in
2019
 *          the enum in imageio.h.  It is not the tiff flag value.
2020
 *      (2) The compression type is initialized to IFF_UNKNOWN.
2021
 *          If it is not one of the specified types, the returned
2022
 *          type is IFF_TIFF, which indicates no compression.
2023
 *      (3) When this function is called, the stream must be at BOF.
2024
 *          If the opened stream is to be used again to read the
2025
 *          file, it must be rewound to BOF after calling this function.
2026
 * </pre>
2027
 */
2028
l_ok
2029
findTiffCompression(FILE     *fp,
2030
                    l_int32  *pcomptype)
2031
0
{
2032
0
l_uint16  tiffcomp;
2033
0
TIFF     *tif;
2034
2035
0
    if (!pcomptype)
2036
0
        return ERROR_INT("&comptype not defined", __func__, 1);
2037
0
    *pcomptype = IFF_UNKNOWN;  /* init */
2038
0
    if (!fp)
2039
0
        return ERROR_INT("stream not defined", __func__, 1);
2040
2041
0
    if ((tif = fopenTiff(fp, "r")) == NULL)
2042
0
        return ERROR_INT("tif not opened", __func__, 1);
2043
0
    TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
2044
0
    *pcomptype = getTiffCompressedFormat(tiffcomp);
2045
0
    TIFFCleanup(tif);
2046
0
    return 0;
2047
0
}
2048
2049
2050
/*!
2051
 * \brief   getTiffCompressedFormat()
2052
 *
2053
 * \param[in]    tiffcomp    defined in tiff.h
2054
 * \return  compression format defined in imageio.h
2055
 *
2056
 * <pre>
2057
 * Notes:
2058
 *      (1) The input must be the actual tiff compression type
2059
 *          returned by a tiff library call.  It should always be
2060
 *          a valid tiff type.
2061
 *      (2) The return type is defined in the enum in imageio.h.
2062
 * </pre>
2063
 */
2064
static l_int32
2065
getTiffCompressedFormat(l_uint16  tiffcomp)
2066
0
{
2067
0
l_int32  comptype;
2068
2069
0
    switch (tiffcomp)
2070
0
    {
2071
0
    case COMPRESSION_CCITTFAX4:
2072
0
        comptype = IFF_TIFF_G4;
2073
0
        break;
2074
0
    case COMPRESSION_CCITTFAX3:
2075
0
        comptype = IFF_TIFF_G3;
2076
0
        break;
2077
0
    case COMPRESSION_CCITTRLE:
2078
0
        comptype = IFF_TIFF_RLE;
2079
0
        break;
2080
0
    case COMPRESSION_PACKBITS:
2081
0
        comptype = IFF_TIFF_PACKBITS;
2082
0
        break;
2083
0
    case COMPRESSION_LZW:
2084
0
        comptype = IFF_TIFF_LZW;
2085
0
        break;
2086
0
    case COMPRESSION_ADOBE_DEFLATE:
2087
0
        comptype = IFF_TIFF_ZIP;
2088
0
        break;
2089
0
    case COMPRESSION_JPEG:
2090
0
        comptype = IFF_TIFF_JPEG;
2091
0
        break;
2092
0
    default:
2093
0
        comptype = IFF_TIFF;
2094
0
        break;
2095
0
    }
2096
0
    return comptype;
2097
0
}
2098
2099
2100
/*--------------------------------------------------------------*
2101
 *                   Extraction of tiff g4 data                 *
2102
 *--------------------------------------------------------------*/
2103
/*!
2104
 * \brief   extractG4DataFromFile()
2105
 *
2106
 * \param[in]    filein
2107
 * \param[out]   pdata         binary data of ccitt g4 encoded stream
2108
 * \param[out]   pnbytes       size of binary data
2109
 * \param[out]   pw            [optional] image width
2110
 * \param[out]   ph            [optional] image height
2111
 * \param[out]   pminisblack   [optional] boolean
2112
 * \return  0 if OK, 1 on error
2113
 */
2114
l_ok
2115
extractG4DataFromFile(const char  *filein,
2116
                      l_uint8    **pdata,
2117
                      size_t      *pnbytes,
2118
                      l_int32     *pw,
2119
                      l_int32     *ph,
2120
                      l_int32     *pminisblack)
2121
0
{
2122
0
l_uint8  *inarray, *data;
2123
0
l_uint16  minisblack, comptype;  /* accessors require l_uint16 */
2124
0
l_int32   istiff;
2125
0
l_uint32  w, h, rowsperstrip;  /* accessors require l_uint32 */
2126
0
l_uint32  diroff;
2127
0
size_t    fbytes, nbytes;
2128
0
FILE     *fpin;
2129
0
TIFF     *tif;
2130
2131
0
    if (!pdata)
2132
0
        return ERROR_INT("&data not defined", __func__, 1);
2133
0
    if (!pnbytes)
2134
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
2135
0
    if (!pw && !ph && !pminisblack)
2136
0
        return ERROR_INT("no output data requested", __func__, 1);
2137
0
    *pdata = NULL;
2138
0
    *pnbytes = 0;
2139
2140
0
    if ((fpin = fopenReadStream(filein)) == NULL)
2141
0
        return ERROR_INT_1("stream not opened to file", filein, __func__, 1);
2142
0
    istiff = fileFormatIsTiff(fpin);
2143
0
    fclose(fpin);
2144
0
    if (!istiff)
2145
0
        return ERROR_INT_1("filein not tiff", filein, __func__, 1);
2146
2147
0
    if ((inarray = l_binaryRead(filein, &fbytes)) == NULL)
2148
0
        return ERROR_INT_1("inarray not made", filein, __func__, 1);
2149
2150
        /* Get metadata about the image */
2151
0
    if ((tif = openTiff(filein, "rb")) == NULL) {
2152
0
        LEPT_FREE(inarray);
2153
0
        return ERROR_INT_1("tif not open for read", filein, __func__, 1);
2154
0
    }
2155
0
    TIFFGetField(tif, TIFFTAG_COMPRESSION, &comptype);
2156
0
    if (comptype != COMPRESSION_CCITTFAX4) {
2157
0
        LEPT_FREE(inarray);
2158
0
        TIFFClose(tif);
2159
0
        return ERROR_INT_1("filein is not g4 compressed", filein, __func__, 1);
2160
0
    }
2161
2162
0
    TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
2163
0
    TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
2164
0
    TIFFGetField(tif, TIFFTAG_ROWSPERSTRIP, &rowsperstrip);
2165
0
    if (h != rowsperstrip)
2166
0
        L_WARNING("more than 1 strip\n", __func__);
2167
        /* From the standard:
2168
             TIFFTAG_PHOTOMETRIC = 0 (false) -->  min value is white.
2169
             TIFFTAG_PHOTOMETRIC = 1 (true) -->  min value is black.
2170
           Most 1 bpp tiffs have the tag value 0 (black is 1),
2171
           because there are fewer black pixels than white pixels,
2172
           so it makes sense to encode runs of black pixels.  */
2173
0
    TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &minisblack);
2174
/*    TIFFPrintDirectory(tif, stderr, 0); */
2175
0
    TIFFClose(tif);
2176
0
    if (pw) *pw = (l_int32)w;
2177
0
    if (ph) *ph = (l_int32)h;
2178
0
    if (pminisblack) *pminisblack = (l_int32)minisblack;
2179
2180
        /* The header has 8 bytes: the first 2 are the magic number,
2181
         * the next 2 are the version, and the last 4 are the
2182
         * offset to the first directory.  That's what we want here.
2183
         * We have to test the byte order before decoding 4 bytes! */
2184
0
    if (inarray[0] == 0x4d) {  /* big-endian */
2185
0
        diroff = (inarray[4] << 24) | (inarray[5] << 16) |
2186
0
                 (inarray[6] << 8) | inarray[7];
2187
0
    } else  {   /* inarray[0] == 0x49 :  little-endian */
2188
0
        diroff = (inarray[7] << 24) | (inarray[6] << 16) |
2189
0
                 (inarray[5] << 8) | inarray[4];
2190
0
    }
2191
/*    lept_stderr(" diroff = %d, %x\n", diroff, diroff); */
2192
2193
        /* Extract the ccittg4 encoded data from the tiff file.
2194
         * We skip the 8 byte header and take nbytes of data,
2195
         * up to the beginning of the directory (at diroff)  */
2196
0
    nbytes = diroff - 8;
2197
0
    if (nbytes > MaxNumTiffBytes) {
2198
0
        LEPT_FREE(inarray);
2199
0
        L_ERROR("requesting %zu bytes > %zu\n", __func__,
2200
0
                nbytes, MaxNumTiffBytes);
2201
0
        return 1;
2202
0
    }
2203
0
    *pnbytes = nbytes;
2204
0
    if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL) {
2205
0
        LEPT_FREE(inarray);
2206
0
        return ERROR_INT("data not allocated", __func__, 1);
2207
0
    }
2208
0
    *pdata = data;
2209
0
    memcpy(data, inarray + 8, nbytes);
2210
0
    LEPT_FREE(inarray);
2211
2212
0
    return 0;
2213
0
}
2214
2215
2216
/*--------------------------------------------------------------*
2217
 *               Open tiff stream from file stream              *
2218
 *--------------------------------------------------------------*/
2219
/*!
2220
 * \brief   fopenTiff()
2221
 *
2222
 * \param[in]    fp           file stream
2223
 * \param[in]    modestring   "r", "w", ...
2224
 * \return  tiff data structure, opened for a file descriptor
2225
 *
2226
 * <pre>
2227
 * Notes:
2228
 *      (1) Why is this here?  Leffler did not provide a function that
2229
 *          takes a stream and gives a TIFF.  He only gave one that
2230
 *          generates a TIFF starting with a file descriptor.  So we
2231
 *          need to make it here, because it is useful to have functions
2232
 *          that take a stream as input.
2233
 *      (2) We use TIFFClientOpen() together with a set of static wrapper
2234
 *          functions which map TIFF read, write, seek, close and size.
2235
 *          to functions expecting a cookie of type stream (i.e. FILE *).
2236
 *          This implementation was contributed by Jürgen Buchmüller.
2237
 * </pre>
2238
 */
2239
static TIFF *
2240
fopenTiff(FILE        *fp,
2241
          const char  *modestring)
2242
0
{
2243
0
    if (!fp)
2244
0
        return (TIFF *)ERROR_PTR("stream not opened", __func__, NULL);
2245
0
    if (!modestring)
2246
0
        return (TIFF *)ERROR_PTR("modestring not defined", __func__, NULL);
2247
2248
0
    TIFFSetWarningHandler(NULL);  /* disable warnings */
2249
0
    TIFFSetErrorHandler(NULL);  /* disable error messages */
2250
2251
0
    fseek(fp, 0, SEEK_SET);
2252
0
    return TIFFClientOpen("TIFFstream", modestring, (thandle_t)fp,
2253
0
                          lept_read_proc, lept_write_proc, lept_seek_proc,
2254
0
                          lept_close_proc, lept_size_proc, NULL, NULL);
2255
0
}
2256
2257
2258
/*--------------------------------------------------------------*
2259
 *                      Wrapper for TIFFOpen                    *
2260
 *--------------------------------------------------------------*/
2261
/*!
2262
 * \brief   openTiff()
2263
 *
2264
 * \param[in]    filename
2265
 * \param[in]    modestring   "r", "w", ...
2266
 * \return  tiff data structure
2267
 *
2268
 * <pre>
2269
 * Notes:
2270
 *      (1) This handles multi-platform file naming.
2271
 * </pre>
2272
 */
2273
static TIFF *
2274
openTiff(const char  *filename,
2275
         const char  *modestring)
2276
0
{
2277
0
char  *fname;
2278
0
TIFF  *tif;
2279
2280
0
    if (!filename)
2281
0
        return (TIFF *)ERROR_PTR("filename not defined", __func__, NULL);
2282
0
    if (!modestring)
2283
0
        return (TIFF *)ERROR_PTR("modestring not defined", __func__, NULL);
2284
2285
0
    TIFFSetWarningHandler(NULL);  /* disable warnings */
2286
0
    TIFFSetErrorHandler(NULL);  /* disable error messages */
2287
2288
0
    fname = genPathname(filename, NULL);
2289
0
    tif = TIFFOpen(fname, modestring);
2290
0
    LEPT_FREE(fname);
2291
0
    return tif;
2292
0
}
2293
2294
2295
/*----------------------------------------------------------------------*
2296
 *     Memory I/O: reading memory --> pix and writing pix --> memory    *
2297
 *----------------------------------------------------------------------*/
2298
/*  It would be nice to use open_memstream() and fmemopen()
2299
 *  for writing and reading to memory, rsp.  These functions manage
2300
 *  memory for writes and reads that use a file streams interface.
2301
 *  Unfortunately, the tiff library only has an interface for reading
2302
 *  and writing to file descriptors, not to file streams.  The tiff
2303
 *  library procedure is to open a "tiff stream" and read/write to it.
2304
 *  The library provides a client interface for managing the I/O
2305
 *  from memory, which requires seven callbacks.  See the TIFFClientOpen
2306
 *  man page for callback signatures.  Adam Langley provided the code
2307
 *  to do this.  */
2308
2309
/*!
2310
 * \brief   Memory stream buffer used with TIFFClientOpen()
2311
 *
2312
 *  The L_Memstram %buffer has different functions in writing and reading.
2313
 *
2314
 *     * In reading, it is assigned to the data and read from as
2315
 *       the tiff library uncompresses the data and generates the pix.
2316
 *       The %offset points to the current read position in the data,
2317
 *       and the %hw always gives the number of bytes of data.
2318
 *       The %outdata and %outsize ptrs are not used.
2319
 *       When finished, tiffCloseCallback() simply frees the L_Memstream.
2320
 *
2321
 *     * In writing, it accepts the data that the tiff library
2322
 *       produces when a pix is compressed.  the buffer points to a
2323
 *       malloced area of %bufsize bytes.  The current writing position
2324
 *       in the buffer is %offset and the most ever written is %hw.
2325
 *       The buffer is expanded as necessary.  When finished,
2326
 *       tiffCloseCallback() assigns the %outdata and %outsize ptrs
2327
 *       to the %buffer and %bufsize results, and frees the L_Memstream.
2328
 */
2329
struct L_Memstream
2330
{
2331
    l_uint8   *buffer;    /* expands to hold data when written to;         */
2332
                          /* fixed size when read from.                    */
2333
    size_t     bufsize;   /* current size allocated when written to;       */
2334
                          /* fixed size of input data when read from.      */
2335
    size_t     offset;    /* byte offset from beginning of buffer.         */
2336
    size_t     hw;        /* high-water mark; max bytes in buffer.         */
2337
    l_uint8  **poutdata;  /* input param for writing; data goes here.      */
2338
    size_t    *poutsize;  /* input param for writing; data size goes here. */
2339
};
2340
typedef struct L_Memstream  L_MEMSTREAM;
2341
2342
2343
    /* These are static functions for memory I/O */
2344
static L_MEMSTREAM *memstreamCreateForRead(l_uint8 *indata, size_t pinsize);
2345
static L_MEMSTREAM *memstreamCreateForWrite(l_uint8 **poutdata,
2346
                                            size_t *poutsize);
2347
static tsize_t tiffReadCallback(thandle_t handle, tdata_t data, tsize_t length);
2348
static tsize_t tiffWriteCallback(thandle_t handle, tdata_t data,
2349
                                 tsize_t length);
2350
static toff_t tiffSeekCallback(thandle_t handle, toff_t offset, l_int32 whence);
2351
static l_int32 tiffCloseCallback(thandle_t handle);
2352
static toff_t tiffSizeCallback(thandle_t handle);
2353
static l_int32 tiffMapCallback(thandle_t handle, tdata_t *data, toff_t *length);
2354
static void tiffUnmapCallback(thandle_t handle, tdata_t data, toff_t length);
2355
2356
2357
static L_MEMSTREAM *
2358
memstreamCreateForRead(l_uint8  *indata,
2359
                       size_t    insize)
2360
0
{
2361
0
L_MEMSTREAM  *mstream;
2362
2363
0
    mstream = (L_MEMSTREAM *)LEPT_CALLOC(1, sizeof(L_MEMSTREAM));
2364
0
    mstream->buffer = indata;   /* handle to input data array */
2365
0
    mstream->bufsize = insize;  /* amount of input data */
2366
0
    mstream->hw = insize;       /* high-water mark fixed at input data size */
2367
0
    mstream->offset = 0;        /* offset always starts at 0 */
2368
0
    return mstream;
2369
0
}
2370
2371
2372
static L_MEMSTREAM *
2373
memstreamCreateForWrite(l_uint8  **poutdata,
2374
                        size_t    *poutsize)
2375
0
{
2376
0
L_MEMSTREAM  *mstream;
2377
2378
0
    mstream = (L_MEMSTREAM *)LEPT_CALLOC(1, sizeof(L_MEMSTREAM));
2379
0
    mstream->buffer = (l_uint8 *)LEPT_CALLOC(8 * 1024, 1);
2380
0
    mstream->bufsize = 8 * 1024;
2381
0
    mstream->poutdata = poutdata;  /* used only at end of write */
2382
0
    mstream->poutsize = poutsize;  /* ditto  */
2383
0
    mstream->hw = mstream->offset = 0;
2384
0
    return mstream;
2385
0
}
2386
2387
2388
static tsize_t
2389
tiffReadCallback(thandle_t  handle,
2390
                 tdata_t    data,
2391
                 tsize_t    length)
2392
0
{
2393
0
L_MEMSTREAM  *mstream;
2394
0
size_t        amount;
2395
2396
0
    mstream = (L_MEMSTREAM *)handle;
2397
0
    amount = L_MIN((size_t)length, mstream->hw - mstream->offset);
2398
2399
        /* Fuzzed files can create this condition! */
2400
0
    if (mstream->offset + amount < amount ||  /* overflow */
2401
0
        mstream->offset + amount > mstream->hw) {
2402
0
        lept_stderr("Bad file: amount too big: %zu\n", amount);
2403
0
        return 0;
2404
0
    }
2405
2406
0
    memcpy(data, mstream->buffer + mstream->offset, amount);
2407
0
    mstream->offset += amount;
2408
0
    return amount;
2409
0
}
2410
2411
2412
static tsize_t
2413
tiffWriteCallback(thandle_t  handle,
2414
                  tdata_t    data,
2415
                  tsize_t    length)
2416
0
{
2417
0
L_MEMSTREAM  *mstream;
2418
0
size_t        newsize;
2419
2420
        /* reallocNew() uses calloc to initialize the array.
2421
         * If malloc is used instead, for some of the encoding methods,
2422
         * not all the data in 'bufsize' bytes in the buffer will
2423
         * have been initialized by the end of the compression. */
2424
0
    mstream = (L_MEMSTREAM *)handle;
2425
0
    if (mstream->offset + length > mstream->bufsize) {
2426
0
        newsize = 2 * (mstream->offset + length);
2427
0
        mstream->buffer = (l_uint8 *)reallocNew((void **)&mstream->buffer,
2428
0
                                                mstream->hw, newsize);
2429
0
        mstream->bufsize = newsize;
2430
0
    }
2431
2432
0
    memcpy(mstream->buffer + mstream->offset, data, length);
2433
0
    mstream->offset += length;
2434
0
    mstream->hw = L_MAX(mstream->offset, mstream->hw);
2435
0
    return length;
2436
0
}
2437
2438
2439
static toff_t
2440
tiffSeekCallback(thandle_t  handle,
2441
                 toff_t     offset,
2442
                 l_int32    whence)
2443
0
{
2444
0
L_MEMSTREAM  *mstream;
2445
2446
0
    mstream = (L_MEMSTREAM *)handle;
2447
0
    switch (whence) {
2448
0
        case SEEK_SET:
2449
/*            lept_stderr("seek_set: offset = %d\n", offset); */
2450
0
            if((size_t)offset != offset) {  /* size_t overflow on uint32 */
2451
0
                return (toff_t)ERROR_INT("too large offset value", __func__, 1);
2452
0
            }
2453
0
            mstream->offset = offset;
2454
0
            break;
2455
0
        case SEEK_CUR:
2456
/*            lept_stderr("seek_cur: offset = %d\n", offset); */
2457
0
            mstream->offset += offset;
2458
0
            break;
2459
0
        case SEEK_END:
2460
/*            lept_stderr("seek end: hw = %d, offset = %d\n",
2461
                    mstream->hw, offset); */
2462
0
            mstream->offset = mstream->hw - offset;  /* offset >= 0 */
2463
0
            break;
2464
0
        default:
2465
0
            return (toff_t)ERROR_INT("bad whence value", __func__,
2466
0
                                     mstream->offset);
2467
0
    }
2468
2469
0
    return mstream->offset;
2470
0
}
2471
2472
2473
static l_int32
2474
tiffCloseCallback(thandle_t  handle)
2475
0
{
2476
0
L_MEMSTREAM  *mstream;
2477
2478
0
    mstream = (L_MEMSTREAM *)handle;
2479
0
    if (mstream->poutdata) {   /* writing: save the output data */
2480
0
        *mstream->poutdata = mstream->buffer;
2481
0
        *mstream->poutsize = mstream->hw;
2482
0
    }
2483
0
    LEPT_FREE(mstream);  /* never free the buffer! */
2484
0
    return 0;
2485
0
}
2486
2487
2488
static toff_t
2489
tiffSizeCallback(thandle_t  handle)
2490
0
{
2491
0
L_MEMSTREAM  *mstream;
2492
2493
0
    mstream = (L_MEMSTREAM *)handle;
2494
0
    return mstream->hw;
2495
0
}
2496
2497
2498
static l_int32
2499
tiffMapCallback(thandle_t  handle,
2500
                tdata_t   *data,
2501
                toff_t    *length)
2502
0
{
2503
0
L_MEMSTREAM  *mstream;
2504
2505
0
    mstream = (L_MEMSTREAM *)handle;
2506
0
    *data = mstream->buffer;
2507
0
    *length = mstream->hw;
2508
0
    return 0;
2509
0
}
2510
2511
2512
static void
2513
tiffUnmapCallback(thandle_t  handle,
2514
                  tdata_t    data,
2515
                  toff_t     length)
2516
0
{
2517
0
    return;
2518
0
}
2519
2520
2521
/*!
2522
 * \brief   fopenTiffMemstream()
2523
 *
2524
 * \param[in]    filename    for error output; can be ""
2525
 * \param[in]    operation   "w" for write, "r" for read
2526
 * \param[out]   pdata       written data
2527
 * \param[out]   pdatasize   size of written data
2528
 * \return  tiff data structure, opened for write to memory
2529
 *
2530
 * <pre>
2531
 * Notes:
2532
 *      (1) This wraps up a number of callbacks for either:
2533
 *            * reading from tiff in memory buffer --> pix
2534
 *            * writing from pix --> tiff in memory buffer
2535
 *      (2) After use, the memstream is automatically destroyed when
2536
 *          TIFFClose() is called.  TIFFCleanup() doesn't free the memstream.
2537
 *      (3) This does not work in append mode, and in write mode it
2538
 *          does not append.
2539
 * </pre>
2540
 */
2541
static TIFF *
2542
fopenTiffMemstream(const char  *filename,
2543
                   const char  *operation,
2544
                   l_uint8    **pdata,
2545
                   size_t      *pdatasize)
2546
0
{
2547
0
L_MEMSTREAM  *mstream;
2548
0
TIFF         *tif;
2549
2550
0
    if (!filename)
2551
0
        return (TIFF *)ERROR_PTR("filename not defined", __func__, NULL);
2552
0
    if (!operation)
2553
0
        return (TIFF *)ERROR_PTR("operation not defined", __func__, NULL);
2554
0
    if (!pdata)
2555
0
        return (TIFF *)ERROR_PTR("&data not defined", __func__, NULL);
2556
0
    if (!pdatasize)
2557
0
        return (TIFF *)ERROR_PTR("&datasize not defined", __func__, NULL);
2558
0
    if (strcmp(operation, "r") && strcmp(operation, "w"))
2559
0
        return (TIFF *)ERROR_PTR("op not 'r' or 'w'", __func__, NULL);
2560
2561
0
    if (!strcmp(operation, "r"))
2562
0
        mstream = memstreamCreateForRead(*pdata, *pdatasize);
2563
0
    else
2564
0
        mstream = memstreamCreateForWrite(pdata, pdatasize);
2565
0
    if (!mstream)
2566
0
        return (TIFF *)ERROR_PTR("mstream not made", __func__, NULL);
2567
2568
0
    TIFFSetWarningHandler(NULL);  /* disable warnings */
2569
0
    TIFFSetErrorHandler(NULL);  /* disable error messages */
2570
2571
0
    tif = TIFFClientOpen(filename, operation, (thandle_t)mstream,
2572
0
                         tiffReadCallback, tiffWriteCallback,
2573
0
                         tiffSeekCallback, tiffCloseCallback,
2574
0
                         tiffSizeCallback, tiffMapCallback,
2575
0
                         tiffUnmapCallback);
2576
0
    if (!tif)
2577
0
        LEPT_FREE(mstream);
2578
0
    return tif;
2579
0
}
2580
2581
2582
/*!
2583
 * \brief   pixReadMemTiff()
2584
 *
2585
 * \param[in]    cdata    const; tiff-encoded
2586
 * \param[in]    size     size of cdata
2587
 * \param[in]    n        page image number: 0-based
2588
 * \return  pix, or NULL on error
2589
 *
2590
 * <pre>
2591
 * Notes:
2592
 *      (1) This is a version of pixReadTiff(), where the data is read
2593
 *          from a memory buffer and uncompressed.
2594
 *      (2) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
2595
 *      (3) No warning messages on failure, because of how multi-page
2596
 *          TIFF reading works. You are supposed to keep trying until
2597
 *          it stops working.
2598
 *      (4) Tiff directory overhead is linear in the input page number.
2599
 *          If reading many images, use pixReadMemFromMultipageTiff().
2600
 * </pre>
2601
 */
2602
PIX *
2603
pixReadMemTiff(const l_uint8  *cdata,
2604
               size_t          size,
2605
               l_int32         n)
2606
0
{
2607
0
l_uint8  *data;
2608
0
l_int32   i;
2609
0
PIX      *pix;
2610
0
TIFF     *tif;
2611
2612
0
    if (!cdata)
2613
0
        return (PIX *)ERROR_PTR("cdata not defined", __func__, NULL);
2614
2615
0
    data = (l_uint8 *)cdata;  /* we're really not going to change this */
2616
0
    if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL)
2617
0
        return (PIX *)ERROR_PTR("tiff stream not opened", __func__, NULL);
2618
2619
0
    pix = NULL;
2620
0
    for (i = 0; ; i++) {
2621
0
        if (i == n) {
2622
0
            if ((pix = pixReadFromTiffStream(tif)) == NULL) {
2623
0
                TIFFClose(tif);
2624
0
                return NULL;
2625
0
            }
2626
0
            pixSetInputFormat(pix, IFF_TIFF);
2627
0
            break;
2628
0
        }
2629
0
        if (TIFFReadDirectory(tif) == 0)
2630
0
            break;
2631
0
        if (i == ManyPagesInTiffFile + 1) {
2632
0
            L_WARNING("big file: more than %d pages\n", __func__,
2633
0
                      ManyPagesInTiffFile);
2634
0
        }
2635
0
    }
2636
2637
0
    TIFFClose(tif);
2638
0
    return pix;
2639
0
}
2640
2641
2642
/*!
2643
 * \brief   pixReadMemFromMultipageTiff()
2644
 *
2645
 * \param[in]    cdata      const; tiff-encoded
2646
 * \param[in]    size       size of cdata
2647
 * \param[in,out]  poffset  set offset to 0 for first image
2648
 * \return  pix, or NULL on error or if previous call returned the last image
2649
 *
2650
 * <pre>
2651
 * Notes:
2652
 *      (1) This is a read-from-memory version of pixReadFromMultipageTiff().
2653
 *          See that function for usage.
2654
 *      (2) If reading sequentially from the tiff data, this is more
2655
 *          efficient than pixReadMemTiff(), which has an overhead
2656
 *          proportional to the image index n.
2657
 *      (3) Example usage for reading all the images:
2658
 *            size_t offset = 0;
2659
 *            do {
2660
 *                Pix *pix = pixReadMemFromMultipageTiff(data, size, &offset);
2661
 *                // do something with pix
2662
 *            } while (offset != 0);
2663
 * </pre>
2664
 */
2665
PIX *
2666
pixReadMemFromMultipageTiff(const l_uint8  *cdata,
2667
                            size_t          size,
2668
                            size_t         *poffset)
2669
0
{
2670
0
l_uint8  *data;
2671
0
l_int32   retval;
2672
0
size_t    offset;
2673
0
PIX      *pix;
2674
0
TIFF     *tif;
2675
2676
0
    if (!cdata)
2677
0
        return (PIX *)ERROR_PTR("cdata not defined", __func__, NULL);
2678
0
    if (!poffset)
2679
0
        return (PIX *)ERROR_PTR("&offset not defined", __func__, NULL);
2680
2681
0
    data = (l_uint8 *)cdata;  /* we're really not going to change this */
2682
0
    if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL)
2683
0
        return (PIX *)ERROR_PTR("tiff stream not opened", __func__, NULL);
2684
2685
        /* Set ptrs in the TIFF to the beginning of the image */
2686
0
    offset = *poffset;
2687
0
    retval = (offset == 0) ? TIFFSetDirectory(tif, 0)
2688
0
                           : TIFFSetSubDirectory(tif, offset);
2689
0
    if (retval == 0) {
2690
0
        TIFFClose(tif);
2691
0
        return NULL;
2692
0
    }
2693
2694
0
    if ((pix = pixReadFromTiffStream(tif)) == NULL) {
2695
0
        TIFFClose(tif);
2696
0
        return NULL;
2697
0
    }
2698
2699
        /* Advance to the next image and return the new offset */
2700
0
    TIFFReadDirectory(tif);
2701
0
    *poffset = TIFFCurrentDirOffset(tif);
2702
0
    TIFFClose(tif);
2703
0
    return pix;
2704
0
}
2705
2706
2707
/*!
2708
 * \brief   pixaReadMemMultipageTiff()
2709
 *
2710
 * \param[in]    data    const; multiple pages; tiff-encoded
2711
 * \param[in]    size    size of cdata
2712
 * \return  pixa, or NULL on error
2713
 *
2714
 * <pre>
2715
 * Notes:
2716
 *      (1) This is an O(n) read-from-memory version of pixaReadMultipageTiff().
2717
 * </pre>
2718
 */
2719
PIXA *
2720
pixaReadMemMultipageTiff(const l_uint8  *data,
2721
                         size_t          size)
2722
0
{
2723
0
size_t  offset;
2724
0
PIX    *pix;
2725
0
PIXA   *pixa;
2726
2727
0
    if (!data)
2728
0
        return (PIXA *)ERROR_PTR("data not defined", __func__, NULL);
2729
2730
0
    offset = 0;
2731
0
    pixa = pixaCreate(0);
2732
0
    do {
2733
0
        pix = pixReadMemFromMultipageTiff(data, size, &offset);
2734
0
        pixaAddPix(pixa, pix, L_INSERT);
2735
0
    } while (offset != 0);
2736
0
    return pixa;
2737
0
}
2738
2739
2740
/*!
2741
 * \brief   pixaWriteMemMultipageTiff()
2742
 *
2743
 * \param[out]    pdata   const; tiff-encoded
2744
 * \param[out]    psize   size of data
2745
 * \param[in]     pixa    any depth; colormap will be removed
2746
 * \return  0 if OK, 1 on error
2747
 *
2748
 * <pre>
2749
 * Notes:
2750
 *      (1) fopenTiffMemstream() does not work in append mode, so we
2751
 *          must work-around with a temporary file.
2752
 *      (2) Getting a file stream from
2753
 *            open_memstream((char **)pdata, psize)
2754
 *          does not work with the tiff directory.
2755
 * </pre>
2756
 */
2757
l_ok
2758
pixaWriteMemMultipageTiff(l_uint8  **pdata,
2759
                          size_t    *psize,
2760
                          PIXA      *pixa)
2761
0
{
2762
0
const char  *modestr;
2763
0
l_int32      i, n;
2764
0
FILE        *fp;
2765
0
PIX         *pix1;
2766
2767
0
    if (pdata) *pdata = NULL;
2768
0
    if (!pdata)
2769
0
        return ERROR_INT("pdata not defined", __func__, 1);
2770
0
    if (!pixa)
2771
0
        return ERROR_INT("pixa not defined", __func__, 1);
2772
2773
#ifdef _WIN32
2774
    if ((fp = fopenWriteWinTempfile()) == NULL)
2775
        return ERROR_INT("tmpfile stream not opened", __func__, 1);
2776
#else
2777
0
    if ((fp = tmpfile()) == NULL)
2778
0
        return ERROR_INT("tmpfile stream not opened", __func__, 1);
2779
0
#endif  /* _WIN32 */
2780
2781
0
    n = pixaGetCount(pixa);
2782
0
    for (i = 0; i < n; i++) {
2783
0
        modestr = (i == 0) ? "w" : "a";
2784
0
        pix1 = pixaGetPix(pixa, i, L_CLONE);
2785
0
        if (pixGetDepth(pix1) == 1)
2786
0
            pixWriteStreamTiffWA(fp, pix1, IFF_TIFF_G4, modestr);
2787
0
        else
2788
0
            pixWriteStreamTiffWA(fp, pix1, IFF_TIFF_ZIP, modestr);
2789
0
        pixDestroy(&pix1);
2790
0
    }
2791
2792
0
    rewind(fp);
2793
0
    *pdata = l_binaryReadStream(fp, psize);
2794
0
    fclose(fp);
2795
0
    return 0;
2796
0
}
2797
2798
2799
/*!
2800
 * \brief   pixWriteMemTiff()
2801
 *
2802
 * \param[out]   pdata     data of tiff compressed image
2803
 * \param[out]   psize     size of returned data
2804
 * \param[in]    pix
2805
 * \param[in]    comptype  IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
2806
 *                         IFF_TIFF_G3, IFF_TIFF_G4,
2807
 *                         IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
2808
 * \return  0 if OK, 1 on error
2809
 *
2810
 *  Usage:
2811
 *      1) See pixWriteTiff(.  This version writes to
2812
 *          memory instead of to a file.
2813
 */
2814
l_ok
2815
pixWriteMemTiff(l_uint8  **pdata,
2816
                size_t    *psize,
2817
                PIX       *pix,
2818
                l_int32    comptype)
2819
0
{
2820
0
    return pixWriteMemTiffCustom(pdata, psize, pix, comptype,
2821
0
                                 NULL, NULL, NULL, NULL);
2822
0
}
2823
2824
2825
/*!
2826
 * \brief   pixWriteMemTiffCustom()
2827
 *
2828
 * \param[out]   pdata     data of tiff compressed image
2829
 * \param[out]   psize     size of returned data
2830
 * \param[in]    pix
2831
 * \param[in]    comptype  IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
2832
 *                         IFF_TIFF_G3, IFF_TIFF_G4,
2833
 *                         IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
2834
 * \param[in]    natags    [optional] NUMA of custom tiff tags
2835
 * \param[in]    savals    [optional] SARRAY of values
2836
 * \param[in]    satypes   [optional] SARRAY of types
2837
 * \param[in]    nasizes   [optional] NUMA of sizes
2838
 * \return  0 if OK, 1 on error
2839
 *
2840
 *  Usage:
2841
 *      1) See pixWriteTiffCustom(.  This version writes to
2842
 *          memory instead of to a file.
2843
 *      2) Use TIFFClose(); TIFFCleanup( doesn't free internal memstream.
2844
 */
2845
l_ok
2846
pixWriteMemTiffCustom(l_uint8  **pdata,
2847
                      size_t    *psize,
2848
                      PIX       *pix,
2849
                      l_int32    comptype,
2850
                      NUMA      *natags,
2851
                      SARRAY    *savals,
2852
                      SARRAY    *satypes,
2853
                      NUMA      *nasizes)
2854
0
{
2855
0
l_int32  ret;
2856
0
TIFF    *tif;
2857
2858
0
    if (!pdata)
2859
0
        return ERROR_INT("&data not defined", __func__, 1);
2860
0
    if (!psize)
2861
0
        return ERROR_INT("&size not defined", __func__, 1);
2862
0
    if (!pix)
2863
0
        return ERROR_INT("&pix not defined", __func__, 1);
2864
0
    if (pixGetDepth(pix) != 1 && comptype != IFF_TIFF &&
2865
0
        comptype != IFF_TIFF_LZW && comptype != IFF_TIFF_ZIP &&
2866
0
        comptype != IFF_TIFF_JPEG) {
2867
0
        L_WARNING("invalid compression type for bpp > 1\n", __func__);
2868
0
        comptype = IFF_TIFF_ZIP;
2869
0
    }
2870
2871
0
    if ((tif = fopenTiffMemstream("tifferror", "w", pdata, psize)) == NULL)
2872
0
        return ERROR_INT("tiff stream not opened", __func__, 1);
2873
0
    ret = pixWriteToTiffStream(tif, pix, comptype, natags, savals,
2874
0
                               satypes, nasizes);
2875
2876
0
    TIFFClose(tif);
2877
0
    return ret;
2878
0
}
2879
2880
/* ---------------------------------------*/
2881
#endif  /* HAVE_LIBTIFF && HAVE_LIBJPEG   */
2882
/* ---------------------------------------*/