Coverage Report

Created: 2025-06-13 07:15

/src/leptonica/src/readfile.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file readfile.c:  reads image on file into memory
29
 * <pre>
30
 *
31
 *      Top-level functions for reading images from file
32
 *           PIXA      *pixaReadFiles()
33
 *           PIXA      *pixaReadFilesSA()
34
 *           PIX       *pixRead()
35
 *           PIX       *pixReadWithHint()
36
 *           PIX       *pixReadIndexed()
37
 *           PIX       *pixReadStream()
38
 *
39
 *      Read header information from file
40
 *           l_int32    pixReadHeader()
41
 *
42
 *      Format finders
43
 *           l_int32    findFileFormat()
44
 *           l_int32    findFileFormatStream()
45
 *           l_int32    findFileFormatBuffer()
46
 *           l_int32    fileFormatIsTiff()
47
 *
48
 *      Read from memory
49
 *           PIX       *pixReadMem()
50
 *           l_int32    pixReadHeaderMem()
51
 *
52
 *      Output image file information
53
 *           void       writeImageFileInfo()
54
 *
55
 *      Test function for I/O with different formats
56
 *           l_int32    ioFormatTest()
57
 *
58
 *  Supported file formats:
59
 *  (1) Reading is supported without any external libraries:
60
 *          bmp
61
 *          pnm   (including pbm, pgm, etc)
62
 *          spix  (raw serialized)
63
 *  (2) Reading is supported with installation of external libraries:
64
 *          png
65
 *          jpg   (standard jfif version)
66
 *          tiff  (including most varieties of compression)
67
 *          gif
68
 *          webp
69
 *          jp2 (jpeg 2000)
70
 *  (3) Other file types will get an "unknown format" error.
71
 * </pre>
72
 */
73
74
#ifdef HAVE_CONFIG_H
75
#include <config_auto.h>
76
#endif  /* HAVE_CONFIG_H */
77
78
#include <string.h>
79
#include "allheaders.h"
80
81
    /* Output files for ioFormatTest(). */
82
static const char *FILE_BMP  =  "/tmp/lept/format/file.bmp";
83
static const char *FILE_PNG  =  "/tmp/lept/format/file.png";
84
static const char *FILE_PNM  =  "/tmp/lept/format/file.pnm";
85
static const char *FILE_G3   =  "/tmp/lept/format/file_g3.tif";
86
static const char *FILE_G4   =  "/tmp/lept/format/file_g4.tif";
87
static const char *FILE_RLE  =  "/tmp/lept/format/file_rle.tif";
88
static const char *FILE_PB   =  "/tmp/lept/format/file_packbits.tif";
89
static const char *FILE_LZW  =  "/tmp/lept/format/file_lzw.tif";
90
static const char *FILE_ZIP  =  "/tmp/lept/format/file_zip.tif";
91
static const char *FILE_TIFF_JPEG =  "/tmp/lept/format/file_jpeg.tif";
92
static const char *FILE_TIFF =  "/tmp/lept/format/file.tif";
93
static const char *FILE_JPG  =  "/tmp/lept/format/file.jpg";
94
static const char *FILE_GIF  =  "/tmp/lept/format/file.gif";
95
static const char *FILE_WEBP =  "/tmp/lept/format/file.webp";
96
static const char *FILE_JP2K =  "/tmp/lept/format/file.jp2";
97
98
    /* There are two jp2 formats, and two codecs associated with them:
99
     *    OPJ_CODEC_J2K (L_J2K_CODEC) is associated with JP2K_CODESTREAM
100
     *    OPJ_CODEC_JP2 (L_JP2_CODEC) is associated with JP2K_IMAGE_DATA    */
101
static const unsigned char JP2K_CODESTREAM[4] = { 0xff, 0x4f, 0xff, 0x51 };
102
static const unsigned char JP2K_IMAGE_DATA[12] = { 0x00, 0x00, 0x00, 0x0c,
103
                                                   0x6a, 0x50, 0x20, 0x20,
104
                                                   0x0d, 0x0a, 0x87, 0x0a };
105
106
107
/*---------------------------------------------------------------------*
108
 *          Top-level functions for reading images from file           *
109
 *---------------------------------------------------------------------*/
110
/*!
111
 * \brief   pixaReadFiles()
112
 *
113
 * \param[in]    dirname
114
 * \param[in]    substr   [optional] substring filter on filenames; can be null
115
 * \return  pixa, or NULL on error
116
 *
117
 * <pre>
118
 * Notes:
119
 *      (1) %dirname is the full path for the directory.
120
 *      (2) %substr is the part of the file name (excluding
121
 *          the directory) that is to be matched.  All matching
122
 *          filenames are read into the Pixa.  If substr is NULL,
123
 *          all filenames are read into the Pixa.
124
 * </pre>
125
 */
126
PIXA *
127
pixaReadFiles(const char  *dirname,
128
              const char  *substr)
129
0
{
130
0
PIXA    *pixa;
131
0
SARRAY  *sa;
132
133
0
    if (!dirname)
134
0
        return (PIXA *)ERROR_PTR("dirname not defined", __func__, NULL);
135
136
0
    if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
137
0
        return (PIXA *)ERROR_PTR("sa not made", __func__, NULL);
138
139
0
    pixa = pixaReadFilesSA(sa);
140
0
    sarrayDestroy(&sa);
141
0
    return pixa;
142
0
}
143
144
145
/*!
146
 * \brief   pixaReadFilesSA()
147
 *
148
 * \param[in]    sa     full pathnames for all files
149
 * \return  pixa, or NULL on error
150
 */
151
PIXA *
152
pixaReadFilesSA(SARRAY  *sa)
153
0
{
154
0
char    *str;
155
0
l_int32  i, n;
156
0
PIX     *pix;
157
0
PIXA    *pixa;
158
159
0
    if (!sa)
160
0
        return (PIXA *)ERROR_PTR("sa not defined", __func__, NULL);
161
162
0
    n = sarrayGetCount(sa);
163
0
    pixa = pixaCreate(n);
164
0
    for (i = 0; i < n; i++) {
165
0
        str = sarrayGetString(sa, i, L_NOCOPY);
166
0
        if ((pix = pixRead(str)) == NULL) {
167
0
            L_WARNING("pix not read from file %s\n", __func__, str);
168
0
            continue;
169
0
        }
170
0
        pixaAddPix(pixa, pix, L_INSERT);
171
0
    }
172
173
0
    return pixa;
174
0
}
175
176
177
/*!
178
 * \brief   pixRead()
179
 *
180
 * \param[in]    filename    with full pathname or in local directory
181
 * \return  pix if OK; NULL on error
182
 *
183
 * <pre>
184
 * Notes:
185
 *      (1) See at top of file for supported formats.
186
 * </pre>
187
 */
188
PIX *
189
pixRead(const char  *filename)
190
0
{
191
0
FILE  *fp;
192
0
PIX   *pix;
193
194
0
    if (!filename)
195
0
        return (PIX *)ERROR_PTR("filename not defined", __func__, NULL);
196
197
0
    if ((fp = fopenReadStream(filename)) == NULL)
198
0
        return (PIX*)ERROR_PTR_1("image file not found",
199
0
                                 filename, __func__, NULL);
200
0
    pix = pixReadStream(fp, 0);
201
0
    fclose(fp);
202
0
    if (!pix)
203
0
        return (PIX *)ERROR_PTR_1("pix not read", filename, __func__, NULL);
204
0
    return pix;
205
0
}
206
207
208
/*!
209
 * \brief   pixReadWithHint()
210
 *
211
 * \param[in]    filename    with full pathname or in local directory
212
 * \param[in]    hint        bitwise OR of L_HINT_* values for jpeg;
213
 *                           use 0 for no hint
214
 * \return  pix if OK; NULL on error
215
 *
216
 * <pre>
217
 * Notes:
218
 *      (1) The hint is not binding, but may be used to optimize jpeg decoding.
219
 *          Use 0 for no hinting.
220
 * </pre>
221
 */
222
PIX *
223
pixReadWithHint(const char  *filename,
224
                l_int32      hint)
225
0
{
226
0
FILE  *fp;
227
0
PIX   *pix;
228
229
0
    if (!filename)
230
0
        return (PIX *)ERROR_PTR("filename not defined", __func__, NULL);
231
232
0
    if ((fp = fopenReadStream(filename)) == NULL)
233
0
        return (PIX *)ERROR_PTR_1("image file not found",
234
0
                                  filename, __func__, NULL);
235
0
    pix = pixReadStream(fp, hint);
236
0
    fclose(fp);
237
238
0
    if (!pix)
239
0
        return (PIX *)ERROR_PTR_1("image not returned",
240
0
                                  filename, __func__, NULL);
241
0
    return pix;
242
0
}
243
244
245
/*!
246
 * \brief   pixReadIndexed()
247
 *
248
 * \param[in]    sa      string array of full pathnames
249
 * \param[in]    index   into pathname array
250
 * \return  pix if OK; null if not found
251
 *
252
 * <pre>
253
 * Notes:
254
 *      (1) This function is useful for selecting image files from a
255
 *          directory, where the integer %index is embedded into
256
 *          the file name.
257
 *      (2) This is typically done by generating the sarray using
258
 *          getNumberedPathnamesInDirectory(), so that the %index
259
 *          pathname would have the number %index in it.  The size
260
 *          of the sarray should be the largest number (plus 1) appearing
261
 *          in the file names, respecting the constraints in the
262
 *          call to getNumberedPathnamesInDirectory().
263
 *      (3) Consequently, for some indices into the sarray, there may
264
 *          be no pathnames in the directory containing that number.
265
 *          By convention, we place empty C strings ("") in those
266
 *          locations in the sarray, and it is not an error if such
267
 *          a string is encountered and no pix is returned.
268
 *          Therefore, the caller must verify that a pix is returned.
269
 *      (4) See convertSegmentedPagesToPS() in src/psio1.c for an
270
 *          example of usage.
271
 * </pre>
272
 */
273
PIX *
274
pixReadIndexed(SARRAY  *sa,
275
               l_int32  index)
276
0
{
277
0
char    *fname;
278
0
l_int32  n;
279
0
PIX     *pix;
280
281
0
    if (!sa)
282
0
        return (PIX *)ERROR_PTR("sa not defined", __func__, NULL);
283
0
    n = sarrayGetCount(sa);
284
0
    if (index < 0 || index >= n)
285
0
        return (PIX *)ERROR_PTR("index out of bounds", __func__, NULL);
286
287
0
    fname = sarrayGetString(sa, index, L_NOCOPY);
288
0
    if (fname[0] == '\0')
289
0
        return NULL;
290
291
0
    if ((pix = pixRead(fname)) == NULL) {
292
0
        L_ERROR("pix not read from file %s\n", __func__, fname);
293
0
        return NULL;
294
0
    }
295
296
0
    return pix;
297
0
}
298
299
300
/*!
301
 * \brief   pixReadStream()
302
 *
303
 * \param[in]    fp      file stream
304
 * \param[in]    hint    bitwise OR of L_HINT_* values for jpeg; 0 for no hint
305
 * \return  pix if OK; NULL on error
306
 *
307
 * <pre>
308
 * Notes:
309
 *      (1) The hint only applies to jpeg.
310
 * </pre>
311
 */
312
PIX *
313
pixReadStream(FILE    *fp,
314
              l_int32  hint)
315
0
{
316
0
l_int32   format, ret, valid;
317
0
l_uint8  *comment;
318
0
PIX      *pix;
319
0
PIXCMAP  *cmap;
320
321
0
    if (!fp)
322
0
        return (PIX *)ERROR_PTR("stream not defined", __func__, NULL);
323
0
    pix = NULL;
324
325
0
    findFileFormatStream(fp, &format);
326
0
    switch (format)
327
0
    {
328
0
    case IFF_BMP:
329
0
        if ((pix = pixReadStreamBmp(fp)) == NULL )
330
0
            return (PIX *)ERROR_PTR( "bmp: no pix returned", __func__, NULL);
331
0
        break;
332
333
0
    case IFF_JFIF_JPEG:
334
0
        if ((pix = pixReadStreamJpeg(fp, 0, 1, NULL, hint)) == NULL)
335
0
            return (PIX *)ERROR_PTR( "jpeg: no pix returned", __func__, NULL);
336
0
        ret = fgetJpegComment(fp, &comment);
337
0
        if (!ret && comment)
338
0
            pixSetText(pix, (char *)comment);
339
0
        LEPT_FREE(comment);
340
0
        break;
341
342
0
    case IFF_PNG:
343
0
        if ((pix = pixReadStreamPng(fp)) == NULL)
344
0
            return (PIX *)ERROR_PTR("png: no pix returned", __func__, NULL);
345
0
        break;
346
347
0
    case IFF_TIFF:
348
0
    case IFF_TIFF_PACKBITS:
349
0
    case IFF_TIFF_RLE:
350
0
    case IFF_TIFF_G3:
351
0
    case IFF_TIFF_G4:
352
0
    case IFF_TIFF_LZW:
353
0
    case IFF_TIFF_ZIP:
354
0
    case IFF_TIFF_JPEG:
355
0
        if ((pix = pixReadStreamTiff(fp, 0)) == NULL)  /* page 0 by default */
356
0
            return (PIX *)ERROR_PTR("tiff: no pix returned", __func__, NULL);
357
0
        break;
358
359
0
    case IFF_PNM:
360
0
        if ((pix = pixReadStreamPnm(fp)) == NULL)
361
0
            return (PIX *)ERROR_PTR("pnm: no pix returned", __func__, NULL);
362
0
        break;
363
364
0
    case IFF_GIF:
365
0
        if ((pix = pixReadStreamGif(fp)) == NULL)
366
0
            return (PIX *)ERROR_PTR("gif: no pix returned", __func__, NULL);
367
0
        break;
368
369
0
    case IFF_JP2:
370
0
        if ((pix = pixReadStreamJp2k(fp, 1, NULL, 0, 0)) == NULL)
371
0
            return (PIX *)ERROR_PTR("jp2: no pix returned", __func__, NULL);
372
0
        break;
373
374
0
    case IFF_WEBP:
375
0
        if ((pix = pixReadStreamWebP(fp)) == NULL)
376
0
            return (PIX *)ERROR_PTR("webp: no pix returned", __func__, NULL);
377
0
        break;
378
379
0
    case IFF_PS:
380
0
        L_ERROR("PostScript reading is not supported\n", __func__);
381
0
        return NULL;
382
383
0
    case IFF_LPDF:
384
0
        L_ERROR("Pdf reading is not supported\n", __func__);
385
0
        return NULL;
386
387
0
    case IFF_SPIX:
388
0
        if ((pix = pixReadStreamSpix(fp)) == NULL)
389
0
            return (PIX *)ERROR_PTR("spix: no pix returned", __func__, NULL);
390
0
        break;
391
392
0
    case IFF_UNKNOWN:
393
0
        return (PIX *)ERROR_PTR( "Unknown format: no pix returned",
394
0
                __func__, NULL);
395
0
    }
396
397
0
    if (pix) {
398
0
        pixSetInputFormat(pix, format);
399
0
        if ((cmap = pixGetColormap(pix))) {
400
0
            pixcmapIsValid(cmap, pix, &valid);
401
0
            if (!valid) {
402
0
                pixDestroy(&pix);
403
0
                return (PIX *)ERROR_PTR("invalid colormap", __func__, NULL);
404
0
            }
405
0
        }
406
0
    }
407
0
    return pix;
408
0
}
409
410
411
412
/*---------------------------------------------------------------------*
413
 *                     Read header information from file               *
414
 *---------------------------------------------------------------------*/
415
/*!
416
 * \brief   pixReadHeader()
417
 *
418
 * \param[in]    filename    with full pathname or in local directory
419
 * \param[out]   pformat     [optional] file format
420
 * \param[out]   pw, ph      [optional] width and height
421
 * \param[out]   pbps        [optional] bits/sample
422
 * \param[out]   pspp        [optional] samples/pixel 1, 3 or 4
423
 * \param[out]   piscmap     [optional] 1 if cmap exists; 0 otherwise
424
 * \return  0 if OK, 1 on error
425
 *
426
 * <pre>
427
 * Notes:
428
 *      (1) This reads the actual headers for jpeg, png, tiff and pnm.
429
 *          For bmp and gif, we cheat and read the entire file into a pix,
430
 *          from which we extract the "header" information.
431
 * </pre>
432
 */
433
l_ok
434
pixReadHeader(const char  *filename,
435
              l_int32     *pformat,
436
              l_int32     *pw,
437
              l_int32     *ph,
438
              l_int32     *pbps,
439
              l_int32     *pspp,
440
              l_int32     *piscmap)
441
0
{
442
0
l_int32  format, ret, w, h, d, bps, spp, iscmap;
443
0
l_int32  type;  /* ignored */
444
0
FILE    *fp;
445
0
PIX     *pix;
446
447
0
    if (pw) *pw = 0;
448
0
    if (ph) *ph = 0;
449
0
    if (pbps) *pbps = 0;
450
0
    if (pspp) *pspp = 0;
451
0
    if (piscmap) *piscmap = 0;
452
0
    if (pformat) *pformat = 0;
453
0
    iscmap = 0;  /* init to false */
454
0
    if (!filename)
455
0
        return ERROR_INT("filename not defined", __func__, 1);
456
457
0
    if ((fp = fopenReadStream(filename)) == NULL)
458
0
        return ERROR_INT_1("image file not found", filename, __func__, 1);
459
0
    findFileFormatStream(fp, &format);
460
0
    fclose(fp);
461
462
0
    switch (format)
463
0
    {
464
0
    case IFF_BMP:  /* cheating: reading the entire file */
465
0
        if ((pix = pixRead(filename)) == NULL)
466
0
            return ERROR_INT_1( "bmp: pix not read", filename, __func__, 1);
467
0
        pixGetDimensions(pix, &w, &h, &d);
468
0
        bps = (d == 32) ? 8 : d;
469
0
        spp = pixGetSpp(pix);
470
0
        iscmap = (pixGetColormap(pix)) ? 1 : 0;
471
0
        pixDestroy(&pix);
472
0
        break;
473
474
0
    case IFF_JFIF_JPEG:
475
0
        ret = readHeaderJpeg(filename, &w, &h, &spp, NULL, NULL);
476
0
        bps = 8;
477
0
        if (ret)
478
0
            return ERROR_INT_1("jpeg: no header info returned",
479
0
                               filename, __func__, 1);
480
0
        break;
481
482
0
    case IFF_PNG:
483
0
        ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap);
484
0
        if (ret)
485
0
            return ERROR_INT_1("png: no header info returned",
486
0
                               filename, __func__, 1);
487
0
        break;
488
489
0
    case IFF_TIFF:
490
0
    case IFF_TIFF_PACKBITS:
491
0
    case IFF_TIFF_RLE:
492
0
    case IFF_TIFF_G3:
493
0
    case IFF_TIFF_G4:
494
0
    case IFF_TIFF_LZW:
495
0
    case IFF_TIFF_ZIP:
496
0
    case IFF_TIFF_JPEG:
497
            /* Reading page 0 by default; possibly redefine format */
498
0
        ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap,
499
0
                             &format);
500
0
        if (ret)
501
0
            return ERROR_INT_1("tiff: no header info returned",
502
0
                               filename, __func__, 1);
503
0
        break;
504
505
0
    case IFF_PNM:
506
0
        ret = readHeaderPnm(filename, &w, &h, &d, &type, &bps, &spp);
507
0
        if (ret)
508
0
            return ERROR_INT_1("pnm: no header info returned",
509
0
                               filename, __func__, 1);
510
0
        break;
511
512
0
    case IFF_GIF:  /* cheating: reading the entire file */
513
0
        if ((pix = pixRead(filename)) == NULL)
514
0
            return ERROR_INT_1( "gif: pix not read", filename, __func__, 1);
515
0
        pixGetDimensions(pix, &w, &h, &d);
516
0
        pixDestroy(&pix);
517
0
        iscmap = 1;  /* always colormapped; max 256 colors */
518
0
        spp = 1;
519
0
        bps = d;
520
0
        break;
521
522
0
    case IFF_JP2:
523
0
        ret = readHeaderJp2k(filename, &w, &h, &bps, &spp, NULL);
524
0
        break;
525
526
0
    case IFF_WEBP:
527
0
        if (readHeaderWebP(filename, &w, &h, &spp))
528
0
            return ERROR_INT_1("webp: no header info returned",
529
0
                               filename, __func__, 1);
530
0
        bps = 8;
531
0
        break;
532
533
0
    case IFF_PS:
534
0
        if (pformat) *pformat = format;
535
0
        return ERROR_INT("PostScript reading is not supported\n", __func__, 1);
536
537
0
    case IFF_LPDF:
538
0
        if (pformat) *pformat = format;
539
0
        return ERROR_INT("Pdf reading is not supported\n", __func__, 1);
540
541
0
    case IFF_SPIX:
542
0
        ret = readHeaderSpix(filename, &w, &h, &bps, &spp, &iscmap);
543
0
        if (ret)
544
0
            return ERROR_INT_1("spix: no header info returned",
545
0
                               filename, __func__, 1);
546
0
        break;
547
548
0
    case IFF_UNKNOWN:
549
0
        return ERROR_INT_1("unknown format in file", filename, __func__, 1);
550
0
    }
551
552
0
    if (pw) *pw = w;
553
0
    if (ph) *ph = h;
554
0
    if (pbps) *pbps = bps;
555
0
    if (pspp) *pspp = spp;
556
0
    if (piscmap) *piscmap = iscmap;
557
0
    if (pformat) *pformat = format;
558
0
    return 0;
559
0
}
560
561
562
/*---------------------------------------------------------------------*
563
 *                            Format finders                           *
564
 *---------------------------------------------------------------------*/
565
/*!
566
 * \brief   findFileFormat()
567
 *
568
 * \param[in]    filename
569
 * \param[out]   pformat    found format
570
 * \return  0 if OK, 1 on error or if format is not recognized
571
 */
572
l_ok
573
findFileFormat(const char  *filename,
574
               l_int32     *pformat)
575
0
{
576
0
l_int32  ret;
577
0
FILE    *fp;
578
579
0
    if (!pformat)
580
0
        return ERROR_INT("&format not defined", __func__, 1);
581
0
    *pformat = IFF_UNKNOWN;
582
0
    if (!filename)
583
0
        return ERROR_INT("filename not defined", __func__, 1);
584
585
0
    if ((fp = fopenReadStream(filename)) == NULL)
586
0
        return ERROR_INT_1("image file not found", filename, __func__, 1);
587
0
    ret = findFileFormatStream(fp, pformat);
588
0
    fclose(fp);
589
0
    return ret;
590
0
}
591
592
593
/*!
594
 * \brief   findFileFormatStream()
595
 *
596
 * \param[in]    fp        file stream
597
 * \param[out]   pformat   found format
598
 * \return  0 if OK, 1 on error or if format is not recognized
599
 *
600
 * <pre>
601
 * Notes:
602
 *      (1) Important: Side effect -- this resets fp to BOF.
603
 * </pre>
604
 */
605
l_ok
606
findFileFormatStream(FILE     *fp,
607
                     l_int32  *pformat)
608
0
{
609
0
l_uint8  firstbytes[13];
610
0
l_int32  format;
611
612
0
    if (!pformat)
613
0
        return ERROR_INT("&format not defined", __func__, 1);
614
0
    *pformat = IFF_UNKNOWN;
615
0
    if (!fp)
616
0
        return ERROR_INT("stream not defined", __func__, 1);
617
618
0
    rewind(fp);
619
0
    if (fnbytesInFile(fp) < 12)
620
0
        return ERROR_INT("truncated file", __func__, 1);
621
622
0
    if (fread(&firstbytes, 1, 12, fp) != 12)
623
0
        return ERROR_INT("failed to read first 12 bytes of file", __func__, 1);
624
0
    firstbytes[12] = 0;
625
0
    rewind(fp);
626
627
0
    findFileFormatBuffer(firstbytes, &format);
628
0
    if (format == IFF_TIFF) {
629
0
        findTiffCompression(fp, &format);
630
0
        rewind(fp);
631
0
    }
632
0
    *pformat = format;
633
0
    if (format == IFF_UNKNOWN)
634
0
        return 1;
635
0
    else
636
0
        return 0;
637
0
}
638
639
640
/*!
641
 * \brief   findFileFormatBuffer()
642
 *
643
 * \param[in]    buf       byte buffer at least 12 bytes in size; we can't check
644
 * \param[out]   pformat   found format
645
 * \return  0 if OK, 1 on error or if format is not recognized
646
 *
647
 * <pre>
648
 * Notes:
649
 *      (1) This determines the file format from the first 12 bytes in
650
 *          the compressed data stream, which are stored in memory.
651
 *      (2) For tiff files, this returns IFF_TIFF.  The specific tiff
652
 *          compression is then determined using findTiffCompression().
653
 * </pre>
654
 */
655
l_ok
656
findFileFormatBuffer(const l_uint8  *buf,
657
                     l_int32        *pformat)
658
142k
{
659
142k
l_uint16  twobytepw;
660
661
142k
    if (!pformat)
662
0
        return ERROR_INT("&format not defined", __func__, 1);
663
142k
    *pformat = IFF_UNKNOWN;
664
142k
    if (!buf)
665
0
        return ERROR_INT("byte buffer not defined", __func__, 0);
666
667
        /* Check the bmp and tiff 2-byte header ids */
668
142k
    ((char *)(&twobytepw))[0] = buf[0];
669
142k
    ((char *)(&twobytepw))[1] = buf[1];
670
671
142k
    if (convertOnBigEnd16(twobytepw) == BMP_ID) {
672
142k
        *pformat = IFF_BMP;
673
142k
        return 0;
674
142k
    }
675
676
4
    if (twobytepw == TIFF_BIGEND_ID || twobytepw == TIFF_LITTLEEND_ID) {
677
4
        *pformat = IFF_TIFF;
678
4
        return 0;
679
4
    }
680
681
        /* Check for the p*m 2-byte header ids */
682
0
    if ((buf[0] == 'P' && buf[1] == '4') || /* newer packed */
683
0
        (buf[0] == 'P' && buf[1] == '1')) {  /* old ASCII format */
684
0
        *pformat = IFF_PNM;
685
0
        return 0;
686
0
    }
687
688
0
    if ((buf[0] == 'P' && buf[1] == '5') || /* newer */
689
0
        (buf[0] == 'P' && buf[1] == '2')) {  /* old */
690
0
        *pformat = IFF_PNM;
691
0
        return 0;
692
0
    }
693
694
0
    if ((buf[0] == 'P' && buf[1] == '6') || /* newer */
695
0
        (buf[0] == 'P' && buf[1] == '3')) {  /* old */
696
0
        *pformat = IFF_PNM;
697
0
        return 0;
698
0
    }
699
700
0
    if (buf[0] == 'P' && buf[1] == '7') {  /* new arbitrary (PAM) */
701
0
        *pformat = IFF_PNM;
702
0
        return 0;
703
0
    }
704
705
        /*  Consider the first 11 bytes of the standard JFIF JPEG header:
706
         *    - The first two bytes are the most important:  0xffd8.
707
         *    - The next two bytes are the jfif marker: 0xffe0.
708
         *      Not all jpeg files have this marker.
709
         *    - The next two bytes are the header length.
710
         *    - The next 5 bytes are a null-terminated string.
711
         *      For JFIF, the string is "JFIF", naturally.  For others it
712
         *      can be "Exif" or just about anything else.
713
         *    - Because of all this variability, we only check the first
714
         *      two byte marker.  All jpeg files are identified as
715
         *      IFF_JFIF_JPEG.  */
716
0
    if (buf[0] == 0xff && buf[1] == 0xd8) {
717
0
        *pformat = IFF_JFIF_JPEG;
718
0
        return 0;
719
0
    }
720
721
        /* Check for the 8 byte PNG signature (png_signature in png.c):
722
         *       {137, 80, 78, 71, 13, 10, 26, 10}      */
723
0
    if (buf[0] == 137 && buf[1] == 80  && buf[2] == 78  && buf[3] == 71  &&
724
0
        buf[4] == 13  && buf[5] == 10  && buf[6] == 26  && buf[7] == 10) {
725
0
        *pformat = IFF_PNG;
726
0
        return 0;
727
0
    }
728
729
        /* Look for "GIF87a" or "GIF89a" */
730
0
    if (buf[0] == 'G' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == '8' &&
731
0
        (buf[4] == '7' || buf[4] == '9') && buf[5] == 'a') {
732
0
        *pformat = IFF_GIF;
733
0
        return 0;
734
0
    }
735
736
        /* Check for both types of jp2k file */
737
0
    if (memcmp(buf, JP2K_CODESTREAM, 4) == 0 ||
738
0
        memcmp(buf, JP2K_IMAGE_DATA, 12) == 0) {
739
0
        *pformat = IFF_JP2;
740
0
        return 0;
741
0
    }
742
743
        /* Check for webp */
744
0
    if (buf[0] == 'R' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == 'F' &&
745
0
        buf[8] == 'W' && buf[9] == 'E' && buf[10] == 'B' && buf[11] == 'P') {
746
0
        *pformat = IFF_WEBP;
747
0
        return 0;
748
0
    }
749
750
        /* Check for ps */
751
0
    if (buf[0] == '%' && buf[1] == '!' && buf[2] == 'P' && buf[3] == 'S' &&
752
0
        buf[4] == '-' && buf[5] == 'A' && buf[6] == 'd' && buf[7] == 'o' &&
753
0
        buf[8] == 'b' && buf[9] == 'e') {
754
0
        *pformat = IFF_PS;
755
0
        return 0;
756
0
    }
757
758
        /* Check for pdf */
759
0
    if (buf[0] == '%' && buf[1] == 'P' && buf[2] == 'D' && buf[3] == 'F' &&
760
0
        buf[4] == '-' && buf[5] == '1') {
761
0
        *pformat = IFF_LPDF;
762
0
        return 0;
763
0
    }
764
765
        /* Check for "spix" serialized pix */
766
0
    if (buf[0] == 's' && buf[1] == 'p' && buf[2] == 'i' && buf[3] == 'x') {
767
0
        *pformat = IFF_SPIX;
768
0
        return 0;
769
0
    }
770
771
        /* File format identifier not found; unknown */
772
0
    return 1;
773
0
}
774
775
776
/*!
777
 * \brief   fileFormatIsTiff()
778
 *
779
 * \param[in]    fp    file stream
780
 * \return  1 if file is tiff; 0 otherwise or on error
781
 */
782
l_int32
783
fileFormatIsTiff(FILE  *fp)
784
0
{
785
0
l_int32  format;
786
787
0
    if (!fp)
788
0
        return ERROR_INT("stream not defined", __func__, 0);
789
790
0
    findFileFormatStream(fp, &format);
791
0
    if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
792
0
        format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
793
0
        format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
794
0
        format == IFF_TIFF_ZIP || format == IFF_TIFF_JPEG)
795
0
        return 1;
796
0
    else
797
0
        return 0;
798
0
}
799
800
801
/*---------------------------------------------------------------------*
802
 *                            Read from memory                         *
803
 *---------------------------------------------------------------------*/
804
/*!
805
 * \brief   pixReadMem()
806
 *
807
 * \param[in]    data    const; encoded
808
 * \param[in]    size    size of data
809
 * \return  pix, or NULL on error
810
 *
811
 * <pre>
812
 * Notes:
813
 *      (1) This is a variation of pixReadStream(), where the data is read
814
 *          from a memory buffer rather than a file.
815
 *      (2) On Windows, this only reads tiff formatted files directly from
816
 *          memory.  For other formats, it writes to a temp file and
817
 *          decompresses from file.
818
 *      (3) findFileFormatBuffer() requires up to 12 bytes to decide on
819
 *          the format.  That determines the constraint here.  But in
820
 *          fact the data must contain the entire compressed string for
821
 *          the image.
822
 * </pre>
823
 */
824
PIX *
825
pixReadMem(const l_uint8  *data,
826
           size_t          size)
827
142k
{
828
142k
l_int32   format, valid;
829
142k
PIX      *pix;
830
142k
PIXCMAP  *cmap;
831
832
142k
    if (!data)
833
0
        return (PIX *)ERROR_PTR("data not defined", __func__, NULL);
834
142k
    if (size < 12)
835
0
        return (PIX *)ERROR_PTR("size < 12", __func__, NULL);
836
142k
    pix = NULL;
837
838
142k
    findFileFormatBuffer(data, &format);
839
142k
    switch (format)
840
142k
    {
841
142k
    case IFF_BMP:
842
142k
        if ((pix = pixReadMemBmp(data, size)) == NULL )
843
0
            return (PIX *)ERROR_PTR( "bmp: no pix returned", __func__, NULL);
844
142k
        break;
845
846
142k
    case IFF_JFIF_JPEG:
847
0
        if ((pix = pixReadMemJpeg(data, size, 0, 1, NULL, 0)) == NULL)
848
0
            return (PIX *)ERROR_PTR( "jpeg: no pix returned", __func__, NULL);
849
0
        break;
850
851
0
    case IFF_PNG:
852
0
        if ((pix = pixReadMemPng(data, size)) == NULL)
853
0
            return (PIX *)ERROR_PTR("png: no pix returned", __func__, NULL);
854
0
        break;
855
856
4
    case IFF_TIFF:
857
4
    case IFF_TIFF_PACKBITS:
858
4
    case IFF_TIFF_RLE:
859
4
    case IFF_TIFF_G3:
860
4
    case IFF_TIFF_G4:
861
4
    case IFF_TIFF_LZW:
862
4
    case IFF_TIFF_ZIP:
863
            /* Reading page 0 by default */
864
4
        if ((pix = pixReadMemTiff(data, size, 0)) == NULL)
865
0
            return (PIX *)ERROR_PTR("tiff: no pix returned", __func__, NULL);
866
4
        break;
867
868
4
    case IFF_PNM:
869
0
        if ((pix = pixReadMemPnm(data, size)) == NULL)
870
0
            return (PIX *)ERROR_PTR("pnm: no pix returned", __func__, NULL);
871
0
        break;
872
873
0
    case IFF_GIF:
874
0
        if ((pix = pixReadMemGif(data, size)) == NULL)
875
0
            return (PIX *)ERROR_PTR("gif: no pix returned", __func__, NULL);
876
0
        break;
877
878
0
    case IFF_JP2:
879
0
        if ((pix = pixReadMemJp2k(data, size, 1, NULL, 0, 0)) == NULL)
880
0
            return (PIX *)ERROR_PTR("jp2k: no pix returned", __func__, NULL);
881
0
        break;
882
883
0
    case IFF_WEBP:
884
0
        if ((pix = pixReadMemWebP(data, size)) == NULL)
885
0
            return (PIX *)ERROR_PTR("webp: no pix returned", __func__, NULL);
886
0
        break;
887
888
0
    case IFF_PS:
889
0
        L_ERROR("PostScript reading is not supported\n", __func__);
890
0
        return NULL;
891
892
0
    case IFF_LPDF:
893
0
        L_ERROR("Pdf reading is not supported\n", __func__);
894
0
        return NULL;
895
896
0
    case IFF_SPIX:
897
0
        if ((pix = pixReadMemSpix(data, size)) == NULL)
898
0
            return (PIX *)ERROR_PTR("spix: no pix returned", __func__, NULL);
899
0
        break;
900
901
0
    case IFF_UNKNOWN:
902
0
        return (PIX *)ERROR_PTR("Unknown format: no pix returned",
903
142k
                __func__, NULL);
904
142k
    }
905
906
        /* Set the input format.  For tiff reading from memory we lose
907
         * the actual input format; for 1 bpp, default to G4.  Also
908
         * verify that the colormap is valid.  */
909
142k
    if (pix) {
910
142k
        if (format == IFF_TIFF && pixGetDepth(pix) == 1)
911
4
            format = IFF_TIFF_G4;
912
142k
        pixSetInputFormat(pix, format);
913
142k
        if ((cmap = pixGetColormap(pix))) {
914
142k
            pixcmapIsValid(cmap, pix, &valid);
915
142k
            if (!valid) {
916
0
                pixDestroy(&pix);
917
0
                return (PIX *)ERROR_PTR("invalid colormap", __func__, NULL);
918
0
            }
919
142k
        }
920
142k
        pixSetPadBits(pix, 0);
921
142k
    }
922
142k
    return pix;
923
142k
}
924
925
926
/*!
927
 * \brief   pixReadHeaderMem()
928
 *
929
 * \param[in]    data       const; encoded
930
 * \param[in]    size       size of data
931
 * \param[out]   pformat    [optional] image format
932
 * \param[out]   pw, ph     [optional] width and height
933
 * \param[out]   pbps       [optional] bits/sample
934
 * \param[out]   pspp       [optional] samples/pixel 1, 3 or 4
935
 * \param[out]   piscmap    [optional] 1 if cmap exists; 0 otherwise
936
 * \return  0 if OK, 1 on error
937
 *
938
 * <pre>
939
 * Notes:
940
 *      (1) This reads the actual headers for jpeg, png, tiff, jp2k and pnm.
941
 *          For bmp and gif, we cheat and read all the data into a pix,
942
 *          from which we extract the "header" information.
943
 *      (2) The amount of data required depends on the format.  For
944
 *          png, it requires less than 30 bytes, but for jpeg it can
945
 *          require most of the compressed file.  In practice, the data
946
 *          is typically the entire compressed file in memory.
947
 *      (3) findFileFormatBuffer() requires up to 12 bytes to decide on
948
 *          the format, which we require.
949
 * </pre>
950
 */
951
l_ok
952
pixReadHeaderMem(const l_uint8  *data,
953
                 size_t          size,
954
                 l_int32        *pformat,
955
                 l_int32        *pw,
956
                 l_int32        *ph,
957
                 l_int32        *pbps,
958
                 l_int32        *pspp,
959
                 l_int32        *piscmap)
960
0
{
961
0
l_int32  format, ret, w, h, d, bps, spp, iscmap;
962
0
l_int32  type;  /* not used */
963
0
PIX     *pix;
964
965
0
    if (pw) *pw = 0;
966
0
    if (ph) *ph = 0;
967
0
    if (pbps) *pbps = 0;
968
0
    if (pspp) *pspp = 0;
969
0
    if (piscmap) *piscmap = 0;
970
0
    if (pformat) *pformat = 0;
971
0
    iscmap = 0;  /* init to false */
972
0
    if (!data)
973
0
        return ERROR_INT("data not defined", __func__, 1);
974
0
    if (size < 12)
975
0
        return ERROR_INT("size < 12", __func__, 1);
976
977
0
    findFileFormatBuffer(data, &format);
978
979
0
    switch (format)
980
0
    {
981
0
    case IFF_BMP:  /* cheating: read the pix */
982
0
        if ((pix = pixReadMemBmp(data, size)) == NULL)
983
0
            return ERROR_INT( "bmp: pix not read", __func__, 1);
984
0
        pixGetDimensions(pix, &w, &h, &d);
985
0
        pixDestroy(&pix);
986
0
        bps = (d == 32) ? 8 : d;
987
0
        spp = (d == 32) ? 3 : 1;
988
0
        break;
989
990
0
    case IFF_JFIF_JPEG:
991
0
        ret = readHeaderMemJpeg(data, size, &w, &h, &spp, NULL, NULL);
992
0
        bps = 8;
993
0
        if (ret)
994
0
            return ERROR_INT( "jpeg: no header info returned", __func__, 1);
995
0
        break;
996
997
0
    case IFF_PNG:
998
0
        ret = readHeaderMemPng(data, size, &w, &h, &bps, &spp, &iscmap);
999
0
        if (ret)
1000
0
            return ERROR_INT( "png: no header info returned", __func__, 1);
1001
0
        break;
1002
1003
0
    case IFF_TIFF:
1004
0
    case IFF_TIFF_PACKBITS:
1005
0
    case IFF_TIFF_RLE:
1006
0
    case IFF_TIFF_G3:
1007
0
    case IFF_TIFF_G4:
1008
0
    case IFF_TIFF_LZW:
1009
0
    case IFF_TIFF_ZIP:
1010
0
    case IFF_TIFF_JPEG:
1011
            /* Reading page 0 by default; possibly redefine format */
1012
0
        ret = readHeaderMemTiff(data, size, 0, &w, &h, &bps, &spp,
1013
0
                                NULL, &iscmap, &format);
1014
0
        if (ret)
1015
0
            return ERROR_INT( "tiff: no header info returned", __func__, 1);
1016
0
        break;
1017
1018
0
    case IFF_PNM:
1019
0
        ret = readHeaderMemPnm(data, size, &w, &h, &d, &type, &bps, &spp);
1020
0
        if (ret)
1021
0
            return ERROR_INT( "pnm: no header info returned", __func__, 1);
1022
0
        break;
1023
1024
0
    case IFF_GIF:  /* cheating: read the pix */
1025
0
        if ((pix = pixReadMemGif(data, size)) == NULL)
1026
0
            return ERROR_INT( "gif: pix not read", __func__, 1);
1027
0
        pixGetDimensions(pix, &w, &h, &d);
1028
0
        pixDestroy(&pix);
1029
0
        iscmap = 1;  /* always colormapped; max 256 colors */
1030
0
        spp = 1;
1031
0
        bps = d;
1032
0
        break;
1033
1034
0
    case IFF_JP2:
1035
0
        ret = readHeaderMemJp2k(data, size, &w, &h, &bps, &spp, NULL);
1036
0
        break;
1037
1038
0
    case IFF_WEBP:
1039
0
        bps = 8;
1040
0
        ret = readHeaderMemWebP(data, size, &w, &h, &spp);
1041
0
        break;
1042
1043
0
    case IFF_PS:
1044
0
        if (pformat) *pformat = format;
1045
0
        return ERROR_INT("PostScript reading is not supported\n", __func__, 1);
1046
1047
0
    case IFF_LPDF:
1048
0
        if (pformat) *pformat = format;
1049
0
        return ERROR_INT("Pdf reading is not supported\n", __func__, 1);
1050
1051
0
    case IFF_SPIX:
1052
0
        ret = sreadHeaderSpix((l_uint32 *)data, size, &w, &h, &bps,
1053
0
                               &spp, &iscmap);
1054
0
        if (ret)
1055
0
            return ERROR_INT( "pnm: no header info returned", __func__, 1);
1056
0
        break;
1057
1058
0
    case IFF_UNKNOWN:
1059
0
        return ERROR_INT("unknown format; no data returned", __func__, 1);
1060
0
    }
1061
1062
0
    if (pw) *pw = w;
1063
0
    if (ph) *ph = h;
1064
0
    if (pbps) *pbps = bps;
1065
0
    if (pspp) *pspp = spp;
1066
0
    if (piscmap) *piscmap = iscmap;
1067
0
    if (pformat) *pformat = format;
1068
0
    return 0;
1069
0
}
1070
1071
1072
/*---------------------------------------------------------------------*
1073
 *                    Output image file information                    *
1074
 *---------------------------------------------------------------------*/
1075
extern const char *ImageFileFormatExtensions[];
1076
1077
/*!
1078
 * \brief   writeImageFileInfo()
1079
 *
1080
 * \param[in]    filename    input file
1081
 * \param[in]    fpout       output file stream
1082
 * \param[in]    headeronly  1 to read only the header; 0 to read both
1083
 *                           the header and the input file
1084
 * \return  0 if OK; 1 on error
1085
 *
1086
 * <pre>
1087
 * Notes:
1088
 *      (1) If headeronly == 0 and the image has spp == 4,this will
1089
 *          also call pixDisplayLayersRGBA() to display the image
1090
 *          in three views.
1091
 *      (2) This is a debug function that changes the value of
1092
 *          var_PNG_STRIP_16_TO_8 to 1 (the default).
1093
 * </pre>
1094
 */
1095
l_ok
1096
writeImageFileInfo(const char  *filename,
1097
                   FILE        *fpout,
1098
                   l_int32      headeronly)
1099
0
{
1100
0
char     *text;
1101
0
l_int32   w, h, d, wpl, count, npages, color;
1102
0
l_int32   format, bps, spp, iscmap, xres, yres, transparency;
1103
0
FILE     *fpin;
1104
0
PIX      *pix, *pixt;
1105
0
PIXCMAP  *cmap;
1106
1107
0
    if (!filename)
1108
0
        return ERROR_INT("filename not defined", __func__, 1);
1109
0
    if (!fpout)
1110
0
        return ERROR_INT("stream not defined", __func__, 1);
1111
1112
        /* Read the header */
1113
0
    if (pixReadHeader(filename, &format, &w, &h, &bps, &spp, &iscmap)) {
1114
0
        L_ERROR("failure to read header of %s\n", __func__, filename);
1115
0
        return 1;
1116
0
    }
1117
0
    fprintf(fpout, "===============================================\n"
1118
0
                    "Reading the header:\n");
1119
0
    fprintf(fpout, "  input image format type: %s\n",
1120
0
            ImageFileFormatExtensions[format]);
1121
0
    fprintf(fpout, "  w = %d, h = %d, bps = %d, spp = %d, iscmap = %d\n",
1122
0
            w, h, bps, spp, iscmap);
1123
1124
0
    findFileFormat(filename, &format);
1125
0
    if (format == IFF_JP2) {
1126
0
        fpin = fopenReadStream(filename);
1127
0
        fgetJp2kResolution(fpin, &xres, &yres);
1128
0
        if (fpin) fclose(fpin);
1129
0
        fprintf(fpout, "  xres = %d, yres = %d\n", xres, yres);
1130
0
    } else if (format == IFF_PNG) {
1131
0
        fpin = fopenReadStream(filename);
1132
0
        fgetPngResolution(fpin, &xres, &yres);
1133
0
        if (fpin) fclose(fpin);
1134
0
        fprintf(fpout, "  xres = %d, yres = %d\n", xres, yres);
1135
0
        if (iscmap) {
1136
0
            fpin = fopenReadStream(filename);
1137
0
            fgetPngColormapInfo(fpin, &cmap, &transparency);
1138
0
            if (fpin) fclose(fpin);
1139
0
            if (transparency)
1140
0
                fprintf(fpout, "  colormap has transparency\n");
1141
0
            else
1142
0
                fprintf(fpout, "  colormap does not have transparency\n");
1143
0
            pixcmapWriteStream(fpout, cmap);
1144
0
            pixcmapDestroy(&cmap);
1145
0
        }
1146
0
    } else if (format == IFF_JFIF_JPEG) {
1147
0
        fpin = fopenReadStream(filename);
1148
0
        fgetJpegResolution(fpin, &xres, &yres);
1149
0
        if (fpin) fclose(fpin);
1150
0
        fprintf(fpout, "  xres = %d, yres = %d\n", xres, yres);
1151
0
    }
1152
1153
0
    if (headeronly)
1154
0
        return 0;
1155
1156
        /* Read the full image.  Note that when we read an image that
1157
         * has transparency in a colormap, we convert it to RGBA. */
1158
0
    fprintf(fpout, "===============================================\n"
1159
0
                    "Reading the full image:\n");
1160
1161
        /* Preserve 16 bpp if the format is png */
1162
0
    if (format == IFF_PNG && bps == 16)
1163
0
        l_pngSetReadStrip16To8(0);
1164
1165
0
    if ((pix = pixRead(filename)) == NULL) {
1166
0
        L_ERROR("failure to read full image of %s\n", __func__, filename);
1167
0
        return 1;
1168
0
    }
1169
1170
0
    format = pixGetInputFormat(pix);
1171
0
    pixGetDimensions(pix, &w, &h, &d);
1172
0
    wpl = pixGetWpl(pix);
1173
0
    spp = pixGetSpp(pix);
1174
0
    fprintf(fpout, "  input image format type: %s\n",
1175
0
            ImageFileFormatExtensions[format]);
1176
0
    fprintf(fpout, "  w = %d, h = %d, d = %d, spp = %d, wpl = %d\n",
1177
0
            w, h, d, spp, wpl);
1178
0
    fprintf(fpout, "  xres = %d, yres = %d\n",
1179
0
            pixGetXRes(pix), pixGetYRes(pix));
1180
1181
0
    text = pixGetText(pix);
1182
0
    if (text)  /*  not null */
1183
0
        fprintf(fpout, "  text: %s\n", text);
1184
1185
0
    cmap = pixGetColormap(pix);
1186
0
    if (cmap) {
1187
0
        pixcmapHasColor(cmap, &color);
1188
0
        if (color)
1189
0
            fprintf(fpout, "  colormap exists and has color values:");
1190
0
        else
1191
0
            fprintf(fpout, "  colormap exists and has only gray values:");
1192
0
        pixcmapWriteStream(fpout, pixGetColormap(pix));
1193
0
    }
1194
0
    else
1195
0
        fprintf(fpout, "  colormap does not exist\n");
1196
1197
0
    if (format == IFF_TIFF || format == IFF_TIFF_G4 ||
1198
0
        format == IFF_TIFF_G3 || format == IFF_TIFF_PACKBITS) {
1199
0
        fprintf(fpout, "  Tiff header information:\n");
1200
0
        fpin = fopenReadStream(filename);
1201
0
        tiffGetCount(fpin, &npages);
1202
0
        if (fpin) fclose(fpin);
1203
0
        if (npages == 1)
1204
0
            fprintf(fpout, "    One page in file\n");
1205
0
        else
1206
0
            fprintf(fpout, "    %d pages in file\n", npages);
1207
0
        fprintTiffInfo(fpout, filename);
1208
0
    }
1209
1210
0
    if (d == 1) {
1211
0
        pixCountPixels(pix, &count, NULL);
1212
0
        pixGetDimensions(pix, &w, &h, NULL);
1213
0
        fprintf(fpout, "  1 bpp: foreground pixel fraction ON/Total = %g\n",
1214
0
                (l_float32)count / (l_float32)(w * h));
1215
0
    }
1216
0
    fprintf(fpout, "===============================================\n");
1217
1218
        /* If there is an alpha component, visualize it.  Note that when
1219
         * alpha == 0, the rgb layer is transparent.  We visualize the
1220
         * result when a white background is visible through the
1221
         * transparency layer. */
1222
0
    if (pixGetSpp(pix) == 4) {
1223
0
        pixt = pixDisplayLayersRGBA(pix, 0xffffff00, 600);
1224
0
        pixDisplay(pixt, 100, 100);
1225
0
        pixDestroy(&pixt);
1226
0
    }
1227
1228
0
    if (format == IFF_PNG && bps == 16)
1229
0
        l_pngSetReadStrip16To8(1);  /* return to default if format is png */
1230
1231
0
    pixDestroy(&pix);
1232
0
    return 0;
1233
0
}
1234
1235
1236
/*---------------------------------------------------------------------*
1237
 *             Test function for I/O with different formats            *
1238
 *---------------------------------------------------------------------*/
1239
/*!
1240
 * \brief   ioFormatTest()
1241
 *
1242
 * \param[in]    filename    input image file
1243
 * \return  0 if OK; 1 on error or if the test fails
1244
 *
1245
 * <pre>
1246
 * Notes:
1247
 *      (1) This writes and reads a set of output files losslessly
1248
 *          in different formats to /tmp/format/, and tests that the
1249
 *          result before and after is unchanged.
1250
 *      (2) This should work properly on input images of any depth,
1251
 *          with and without colormaps.
1252
 *      (3) All supported formats are tested for bmp, png, tiff and
1253
 *          non-ascii pnm.  Ascii pnm also works (but who'd ever want
1254
 *          to use it?)   We allow 2 bpp bmp, although it's not
1255
 *          supported elsewhere.  And we don't support reading
1256
 *          16 bpp png, although this can be turned on in pngio.c.
1257
 *      (4) This silently skips png or tiff testing if HAVE_LIBPNG
1258
 *          or HAVE_LIBTIFF are 0, respectively.
1259
 * </pre>
1260
 */
1261
l_ok
1262
ioFormatTest(const char  *filename)
1263
0
{
1264
0
l_int32    w, h, d, depth, equal, problems;
1265
0
l_float32  diff;
1266
0
BOX       *box;
1267
0
PIX       *pixs, *pixc, *pix1, *pix2;
1268
0
PIXCMAP   *cmap;
1269
1270
0
    if (!filename)
1271
0
        return ERROR_INT("filename not defined", __func__, 1);
1272
1273
        /* Read the input file and limit the size */
1274
0
    if ((pix1 = pixRead(filename)) == NULL)
1275
0
        return ERROR_INT("pix1 not made", __func__, 1);
1276
0
    pixGetDimensions(pix1, &w, &h, NULL);
1277
0
    if (w > 250 && h > 250) {  /* take the central 250 x 250 region */
1278
0
        box = boxCreate(w / 2 - 125, h / 2 - 125, 250, 250);
1279
0
        pixs = pixClipRectangle(pix1, box, NULL);
1280
0
        boxDestroy(&box);
1281
0
    } else {
1282
0
        pixs = pixClone(pix1);
1283
0
    }
1284
0
    pixDestroy(&pix1);
1285
1286
0
    lept_mkdir("lept/format");
1287
1288
        /* Note that the reader automatically removes colormaps
1289
         * from 1 bpp BMP images, but not from 8 bpp BMP images.
1290
         * Therefore, if our 8 bpp image initially doesn't have a
1291
         * colormap, we are going to need to remove it from any
1292
         * pix read from a BMP file. */
1293
0
    pixc = pixClone(pixs);  /* laziness */
1294
1295
        /* This does not test the alpha layer pixels, because most
1296
         * formats don't support it.  Remove any alpha.  */
1297
0
    if (pixGetSpp(pixc) == 4)
1298
0
        pixSetSpp(pixc, 3);
1299
0
    cmap = pixGetColormap(pixc);  /* colormap; can be NULL */
1300
0
    d = pixGetDepth(pixc);
1301
1302
0
    problems = FALSE;
1303
1304
        /* ----------------------- BMP -------------------------- */
1305
1306
        /* BMP works for 1, 2, 4, 8 and 32 bpp images.
1307
         * It always writes colormaps for 1 and 8 bpp, so we must
1308
         * remove it after readback if the input image doesn't have
1309
         * a colormap.  Although we can write/read 2 bpp BMP, nobody
1310
         * else can read them! */
1311
0
    if (d == 1 || d == 8) {
1312
0
        L_INFO("write/read bmp\n", __func__);
1313
0
        pixWrite(FILE_BMP, pixc, IFF_BMP);
1314
0
        pix1 = pixRead(FILE_BMP);
1315
0
        if (!cmap)
1316
0
            pix2 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
1317
0
        else
1318
0
            pix2 = pixClone(pix1);
1319
0
        pixEqual(pixc, pix2, &equal);
1320
0
        if (!equal) {
1321
0
            L_INFO("   **** bad bmp image: d = %d ****\n", __func__, d);
1322
0
            problems = TRUE;
1323
0
        }
1324
0
        pixDestroy(&pix1);
1325
0
        pixDestroy(&pix2);
1326
0
    }
1327
1328
0
    if (d == 2 || d == 4 || d == 32) {
1329
0
        L_INFO("write/read bmp\n", __func__);
1330
0
        pixWrite(FILE_BMP, pixc, IFF_BMP);
1331
0
        pix1 = pixRead(FILE_BMP);
1332
0
        pixEqual(pixc, pix1, &equal);
1333
0
        if (!equal) {
1334
0
            L_INFO("   **** bad bmp image: d = %d ****\n", __func__, d);
1335
0
            problems = TRUE;
1336
0
        }
1337
0
        pixDestroy(&pix1);
1338
0
    }
1339
1340
        /* ----------------------- PNG -------------------------- */
1341
#if HAVE_LIBPNG
1342
        /* PNG works for all depths, but here, because we strip
1343
         * 16 --> 8 bpp on reading, we don't test png for 16 bpp. */
1344
    if (d != 16) {
1345
        L_INFO("write/read png\n", __func__);
1346
        pixWrite(FILE_PNG, pixc, IFF_PNG);
1347
        pix1 = pixRead(FILE_PNG);
1348
        pixEqual(pixc, pix1, &equal);
1349
        if (!equal) {
1350
            L_INFO("   **** bad png image: d = %d ****\n", __func__, d);
1351
            problems = TRUE;
1352
        }
1353
        pixDestroy(&pix1);
1354
    }
1355
#endif  /* HAVE_LIBPNG */
1356
1357
        /* ----------------------- TIFF -------------------------- */
1358
0
#if HAVE_LIBTIFF && HAVE_LIBJPEG
1359
        /* TIFF works for 1, 2, 4, 8, 16 and 32 bpp images.
1360
         * Because 8 bpp tiff always writes 256 entry colormaps, the
1361
         * colormap sizes may be different for 8 bpp images with
1362
         * colormap; we are testing if the image content is the same.
1363
         * Likewise, the 2 and 4 bpp tiff images with colormaps
1364
         * have colormap sizes 4 and 16, rsp.  This test should
1365
         * work properly on the content, regardless of the number
1366
         * of color entries in pixc. */
1367
1368
        /* tiff uncompressed works for all pixel depths */
1369
0
    L_INFO("write/read uncompressed tiff\n", __func__);
1370
0
    pixWrite(FILE_TIFF, pixc, IFF_TIFF);
1371
0
    pix1 = pixRead(FILE_TIFF);
1372
0
    pixEqual(pixc, pix1, &equal);
1373
0
    if (!equal) {
1374
0
        L_INFO("   **** bad tiff uncompressed image: d = %d ****\n",
1375
0
               __func__, d);
1376
0
        problems = TRUE;
1377
0
    }
1378
0
    pixDestroy(&pix1);
1379
1380
        /* tiff lzw works for all pixel depths */
1381
0
    L_INFO("write/read lzw compressed tiff\n", __func__);
1382
0
    pixWrite(FILE_LZW, pixc, IFF_TIFF_LZW);
1383
0
    pix1 = pixRead(FILE_LZW);
1384
0
    pixEqual(pixc, pix1, &equal);
1385
0
    if (!equal) {
1386
0
        L_INFO("   **** bad tiff lzw compressed image: d = %d ****\n",
1387
0
               __func__, d);
1388
0
        problems = TRUE;
1389
0
    }
1390
0
    pixDestroy(&pix1);
1391
1392
        /* tiff adobe deflate (zip) works for all pixel depths */
1393
0
    L_INFO("write/read zip compressed tiff\n", __func__);
1394
0
    pixWrite(FILE_ZIP, pixc, IFF_TIFF_ZIP);
1395
0
    pix1 = pixRead(FILE_ZIP);
1396
0
    pixEqual(pixc, pix1, &equal);
1397
0
    if (!equal) {
1398
0
        L_INFO("   **** bad tiff zip compressed image: d = %d ****\n",
1399
0
               __func__, d);
1400
0
        problems = TRUE;
1401
0
    }
1402
0
    pixDestroy(&pix1);
1403
1404
        /* tiff jpeg encoding works for grayscale and rgb */
1405
0
    if (d == 8 || d == 32) {
1406
0
        PIX  *pixc1;
1407
0
        L_INFO("write/read jpeg compressed tiff\n", __func__);
1408
0
        if (d == 8 && pixGetColormap(pixc)) {
1409
0
            pixc1 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC);
1410
0
            pixWrite(FILE_TIFF_JPEG, pixc1, IFF_TIFF_JPEG);
1411
0
            if ((pix1 = pixRead(FILE_TIFF_JPEG)) == NULL) {
1412
0
                L_INFO(" did not read FILE_TIFF_JPEG\n", __func__);
1413
0
                problems = TRUE;
1414
0
            }
1415
0
            pixDestroy(&pixc1);
1416
0
        } else {
1417
0
            pixWrite(FILE_TIFF_JPEG, pixc, IFF_TIFF_JPEG);
1418
0
            pix1 = pixRead(FILE_TIFF_JPEG);
1419
0
            if (d == 8) {
1420
0
                pixCompareGray(pix1, pixc, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1421
0
                               NULL, NULL);
1422
0
            } else {
1423
0
                pixCompareRGB(pix1, pixc, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1424
0
                              NULL, NULL);
1425
0
            }
1426
0
            if (diff > 8.0) {
1427
0
                L_INFO("   **** bad tiff jpeg compressed image: "
1428
0
                       "d = %d, diff = %5.2f ****\n", __func__, d, diff);
1429
0
                problems = TRUE;
1430
0
            }
1431
0
        }
1432
0
        pixDestroy(&pix1);
1433
0
    }
1434
1435
        /* tiff g4, g3, rle and packbits work for 1 bpp */
1436
0
    if (d == 1) {
1437
0
        L_INFO("write/read g4 compressed tiff\n", __func__);
1438
0
        pixWrite(FILE_G4, pixc, IFF_TIFF_G4);
1439
0
        pix1 = pixRead(FILE_G4);
1440
0
        pixEqual(pixc, pix1, &equal);
1441
0
        if (!equal) {
1442
0
            L_INFO("   **** bad tiff g4 image ****\n", __func__);
1443
0
            problems = TRUE;
1444
0
        }
1445
0
        pixDestroy(&pix1);
1446
1447
0
        L_INFO("write/read g3 compressed tiff\n", __func__);
1448
0
        pixWrite(FILE_G3, pixc, IFF_TIFF_G3);
1449
0
        pix1 = pixRead(FILE_G3);
1450
0
        pixEqual(pixc, pix1, &equal);
1451
0
        if (!equal) {
1452
0
            L_INFO("   **** bad tiff g3 image ****\n", __func__);
1453
0
            problems = TRUE;
1454
0
        }
1455
0
        pixDestroy(&pix1);
1456
1457
0
        L_INFO("write/read rle compressed tiff\n", __func__);
1458
0
        pixWrite(FILE_RLE, pixc, IFF_TIFF_RLE);
1459
0
        pix1 = pixRead(FILE_RLE);
1460
0
        pixEqual(pixc, pix1, &equal);
1461
0
        if (!equal) {
1462
0
            L_INFO("   **** bad tiff rle image: d = %d ****\n", __func__, d);
1463
0
            problems = TRUE;
1464
0
        }
1465
0
        pixDestroy(&pix1);
1466
1467
0
        L_INFO("write/read packbits compressed tiff\n", __func__);
1468
0
        pixWrite(FILE_PB, pixc, IFF_TIFF_PACKBITS);
1469
0
        pix1 = pixRead(FILE_PB);
1470
0
        pixEqual(pixc, pix1, &equal);
1471
0
        if (!equal) {
1472
0
            L_INFO("   **** bad tiff packbits image: d = %d ****\n",
1473
0
                   __func__, d);
1474
0
            problems = TRUE;
1475
0
        }
1476
0
        pixDestroy(&pix1);
1477
0
    }
1478
0
#endif  /* HAVE_LIBTIFF && HAVE_LIBJPEG */
1479
1480
        /* ----------------------- PNM -------------------------- */
1481
1482
        /* pnm works for 1, 2, 4, 8, 16 and 32 bpp.
1483
         * pnm doesn't have colormaps, so when we write colormapped
1484
         * pix out as pnm, the colormap is removed.  Thus for the test,
1485
         * we must remove the colormap from pixc before testing.  */
1486
0
    L_INFO("write/read pnm\n", __func__);
1487
0
    pixWrite(FILE_PNM, pixc, IFF_PNM);
1488
0
    pix1 = pixRead(FILE_PNM);
1489
0
    if (cmap)
1490
0
        pix2 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC);
1491
0
    else
1492
0
        pix2 = pixClone(pixc);
1493
0
    pixEqual(pix1, pix2, &equal);
1494
0
    if (!equal) {
1495
0
        L_INFO("   **** bad pnm image: d = %d ****\n", __func__, d);
1496
0
        problems = TRUE;
1497
0
    }
1498
0
    pixDestroy(&pix1);
1499
0
    pixDestroy(&pix2);
1500
1501
        /* ----------------------- GIF -------------------------- */
1502
#if HAVE_LIBGIF
1503
        /* GIF works for only 1 and 8 bpp, colormapped */
1504
    if (d != 8 || !cmap)
1505
        pix1 = pixConvertTo8(pixc, 1);
1506
    else
1507
        pix1 = pixClone(pixc);
1508
    L_INFO("write/read gif\n", __func__);
1509
    pixWrite(FILE_GIF, pix1, IFF_GIF);
1510
    pix2 = pixRead(FILE_GIF);
1511
    pixEqual(pix1, pix2, &equal);
1512
    if (!equal) {
1513
        L_INFO("   **** bad gif image: d = %d ****\n", __func__, d);
1514
        problems = TRUE;
1515
    }
1516
    pixDestroy(&pix1);
1517
    pixDestroy(&pix2);
1518
#endif  /* HAVE_LIBGIF */
1519
1520
        /* ----------------------- JPEG ------------------------- */
1521
0
#if HAVE_LIBJPEG
1522
        /* JPEG works for only 8 bpp gray and rgb */
1523
0
    if (cmap || d > 8)
1524
0
        pix1 = pixConvertTo32(pixc);
1525
0
    else
1526
0
        pix1 = pixConvertTo8(pixc, 0);
1527
0
    depth = pixGetDepth(pix1);
1528
0
    L_INFO("write/read jpeg\n", __func__);
1529
0
    pixWrite(FILE_JPG, pix1, IFF_JFIF_JPEG);
1530
0
    pix2 = pixRead(FILE_JPG);
1531
0
    if (depth == 8) {
1532
0
        pixCompareGray(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1533
0
                       NULL, NULL);
1534
0
    } else {
1535
0
        pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1536
0
                      NULL, NULL);
1537
0
    }
1538
0
    if (diff > 8.0) {
1539
0
        L_INFO("   **** bad jpeg image: d = %d, diff = %5.2f ****\n",
1540
0
               __func__, depth, diff);
1541
0
        problems = TRUE;
1542
0
    }
1543
0
    pixDestroy(&pix1);
1544
0
    pixDestroy(&pix2);
1545
0
#endif  /* HAVE_LIBJPEG */
1546
1547
        /* ----------------------- WEBP ------------------------- */
1548
0
#if HAVE_LIBWEBP
1549
        /* WEBP works for rgb and rgba */
1550
0
    if (cmap || d <= 16)
1551
0
        pix1 = pixConvertTo32(pixc);
1552
0
    else
1553
0
        pix1 = pixClone(pixc);
1554
0
    depth = pixGetDepth(pix1);
1555
0
    L_INFO("write/read webp\n", __func__);
1556
0
    pixWrite(FILE_WEBP, pix1, IFF_WEBP);
1557
0
    pix2 = pixRead(FILE_WEBP);
1558
0
    pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff, NULL, NULL);
1559
0
    if (diff > 5.0) {
1560
0
        L_INFO("   **** bad webp image: d = %d, diff = %5.2f ****\n",
1561
0
               __func__, depth, diff);
1562
0
        problems = TRUE;
1563
0
    }
1564
0
    pixDestroy(&pix1);
1565
0
    pixDestroy(&pix2);
1566
0
#endif  /* HAVE_LIBWEBP */
1567
1568
        /* ----------------------- JP2K ------------------------- */
1569
#if HAVE_LIBJP2K
1570
        /* JP2K works for only 8 bpp gray, rgb and rgba */
1571
    if (cmap || d > 8)
1572
        pix1 = pixConvertTo32(pixc);
1573
    else
1574
        pix1 = pixConvertTo8(pixc, 0);
1575
    depth = pixGetDepth(pix1);
1576
    L_INFO("write/read jp2k\n", __func__);
1577
    pixWrite(FILE_JP2K, pix1, IFF_JP2);
1578
    pix2 = pixRead(FILE_JP2K);
1579
    if (depth == 8) {
1580
        pixCompareGray(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1581
                       NULL, NULL);
1582
    } else {
1583
        pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1584
                      NULL, NULL);
1585
    }
1586
    lept_stderr("diff = %7.3f\n", diff);
1587
    if (diff > 7.0) {
1588
        L_INFO("   **** bad jp2k image: d = %d, diff = %5.2f ****\n",
1589
               __func__, depth, diff);
1590
        problems = TRUE;
1591
    }
1592
    pixDestroy(&pix1);
1593
    pixDestroy(&pix2);
1594
#endif  /* HAVE_LIBJP2K */
1595
1596
0
    if (problems == FALSE)
1597
0
        L_INFO("All formats read and written OK!\n", __func__);
1598
1599
0
    pixDestroy(&pixc);
1600
0
    pixDestroy(&pixs);
1601
0
    return problems;
1602
0
}