Coverage Report

Created: 2024-07-27 06:27

/src/leptonica/src/readfile.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file readfile.c:  reads image on file into memory
29
 * <pre>
30
 *
31
 *      Top-level functions for reading images from file
32
 *           PIXA      *pixaReadFiles()
33
 *           PIXA      *pixaReadFilesSA()
34
 *           PIX       *pixRead()
35
 *           PIX       *pixReadWithHint()
36
 *           PIX       *pixReadIndexed()
37
 *           PIX       *pixReadStream()
38
 *
39
 *      Read header information from file
40
 *           l_int32    pixReadHeader()
41
 *
42
 *      Format finders
43
 *           l_int32    findFileFormat()
44
 *           l_int32    findFileFormatStream()
45
 *           l_int32    findFileFormatBuffer()
46
 *           l_int32    fileFormatIsTiff()
47
 *
48
 *      Read from memory
49
 *           PIX       *pixReadMem()
50
 *           l_int32    pixReadHeaderMem()
51
 *
52
 *      Output image file information
53
 *           void       writeImageFileInfo()
54
 *
55
 *      Test function for I/O with different formats
56
 *           l_int32    ioFormatTest()
57
 *
58
 *  Supported file formats:
59
 *  (1) Reading is supported without any external libraries:
60
 *          bmp
61
 *          pnm   (including pbm, pgm, etc)
62
 *          spix  (raw serialized)
63
 *  (2) Reading is supported with installation of external libraries:
64
 *          png
65
 *          jpg   (standard jfif version)
66
 *          tiff  (including most varieties of compression)
67
 *          gif
68
 *          webp
69
 *          jp2 (jpeg 2000)
70
 *  (3) Other file types will get an "unknown format" error.
71
 * </pre>
72
 */
73
74
#ifdef HAVE_CONFIG_H
75
#include <config_auto.h>
76
#endif  /* HAVE_CONFIG_H */
77
78
#include <string.h>
79
#include "allheaders.h"
80
81
    /* Output files for ioFormatTest(). */
82
static const char *FILE_BMP  =  "/tmp/lept/format/file.bmp";
83
static const char *FILE_PNG  =  "/tmp/lept/format/file.png";
84
static const char *FILE_PNM  =  "/tmp/lept/format/file.pnm";
85
static const char *FILE_G3   =  "/tmp/lept/format/file_g3.tif";
86
static const char *FILE_G4   =  "/tmp/lept/format/file_g4.tif";
87
static const char *FILE_RLE  =  "/tmp/lept/format/file_rle.tif";
88
static const char *FILE_PB   =  "/tmp/lept/format/file_packbits.tif";
89
static const char *FILE_LZW  =  "/tmp/lept/format/file_lzw.tif";
90
static const char *FILE_ZIP  =  "/tmp/lept/format/file_zip.tif";
91
static const char *FILE_TIFF_JPEG =  "/tmp/lept/format/file_jpeg.tif";
92
static const char *FILE_TIFF =  "/tmp/lept/format/file.tif";
93
static const char *FILE_JPG  =  "/tmp/lept/format/file.jpg";
94
static const char *FILE_GIF  =  "/tmp/lept/format/file.gif";
95
static const char *FILE_WEBP =  "/tmp/lept/format/file.webp";
96
static const char *FILE_JP2K =  "/tmp/lept/format/file.jp2";
97
98
    /* There are two jp2 formats, and two codecs associated with them:
99
     *    OPJ_CODEC_J2K (L_J2K_CODEC) is associated with JP2K_CODESTREAM
100
     *    OPJ_CODEC_JP2 (L_JP2_CODEC) is associated with JP2K_IMAGE_DATA    */
101
static const unsigned char JP2K_CODESTREAM[4] = { 0xff, 0x4f, 0xff, 0x51 };
102
static const unsigned char JP2K_IMAGE_DATA[12] = { 0x00, 0x00, 0x00, 0x0c,
103
                                                   0x6a, 0x50, 0x20, 0x20,
104
                                                   0x0d, 0x0a, 0x87, 0x0a };
105
106
107
/*---------------------------------------------------------------------*
108
 *          Top-level functions for reading images from file           *
109
 *---------------------------------------------------------------------*/
110
/*!
111
 * \brief   pixaReadFiles()
112
 *
113
 * \param[in]    dirname
114
 * \param[in]    substr   [optional] substring filter on filenames; can be null
115
 * \return  pixa, or NULL on error
116
 *
117
 * <pre>
118
 * Notes:
119
 *      (1) %dirname is the full path for the directory.
120
 *      (2) %substr is the part of the file name (excluding
121
 *          the directory) that is to be matched.  All matching
122
 *          filenames are read into the Pixa.  If substr is NULL,
123
 *          all filenames are read into the Pixa.
124
 * </pre>
125
 */
126
PIXA *
127
pixaReadFiles(const char  *dirname,
128
              const char  *substr)
129
0
{
130
0
PIXA    *pixa;
131
0
SARRAY  *sa;
132
133
0
    if (!dirname)
134
0
        return (PIXA *)ERROR_PTR("dirname not defined", __func__, NULL);
135
136
0
    if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
137
0
        return (PIXA *)ERROR_PTR("sa not made", __func__, NULL);
138
139
0
    pixa = pixaReadFilesSA(sa);
140
0
    sarrayDestroy(&sa);
141
0
    return pixa;
142
0
}
143
144
145
/*!
146
 * \brief   pixaReadFilesSA()
147
 *
148
 * \param[in]    sa     full pathnames for all files
149
 * \return  pixa, or NULL on error
150
 */
151
PIXA *
152
pixaReadFilesSA(SARRAY  *sa)
153
0
{
154
0
char    *str;
155
0
l_int32  i, n;
156
0
PIX     *pix;
157
0
PIXA    *pixa;
158
159
0
    if (!sa)
160
0
        return (PIXA *)ERROR_PTR("sa not defined", __func__, NULL);
161
162
0
    n = sarrayGetCount(sa);
163
0
    pixa = pixaCreate(n);
164
0
    for (i = 0; i < n; i++) {
165
0
        str = sarrayGetString(sa, i, L_NOCOPY);
166
0
        if ((pix = pixRead(str)) == NULL) {
167
0
            L_WARNING("pix not read from file %s\n", __func__, str);
168
0
            continue;
169
0
        }
170
0
        pixaAddPix(pixa, pix, L_INSERT);
171
0
    }
172
173
0
    return pixa;
174
0
}
175
176
177
/*!
178
 * \brief   pixRead()
179
 *
180
 * \param[in]    filename    with full pathname or in local directory
181
 * \return  pix if OK; NULL on error
182
 *
183
 * <pre>
184
 * Notes:
185
 *      (1) See at top of file for supported formats.
186
 * </pre>
187
 */
188
PIX *
189
pixRead(const char  *filename)
190
0
{
191
0
FILE  *fp;
192
0
PIX   *pix;
193
194
0
    if (!filename)
195
0
        return (PIX *)ERROR_PTR("filename not defined", __func__, NULL);
196
197
0
    if ((fp = fopenReadStream(filename)) == NULL)
198
0
        return (PIX*)ERROR_PTR_1("image file not found",
199
0
                                 filename, __func__, NULL);
200
0
    pix = pixReadStream(fp, 0);
201
0
    fclose(fp);
202
0
    if (!pix)
203
0
        return (PIX *)ERROR_PTR_1("pix not read", filename, __func__, NULL);
204
0
    return pix;
205
0
}
206
207
208
/*!
209
 * \brief   pixReadWithHint()
210
 *
211
 * \param[in]    filename    with full pathname or in local directory
212
 * \param[in]    hint        bitwise OR of L_HINT_* values for jpeg;
213
 *                           use 0 for no hint
214
 * \return  pix if OK; NULL on error
215
 *
216
 * <pre>
217
 * Notes:
218
 *      (1) The hint is not binding, but may be used to optimize jpeg decoding.
219
 *          Use 0 for no hinting.
220
 * </pre>
221
 */
222
PIX *
223
pixReadWithHint(const char  *filename,
224
                l_int32      hint)
225
0
{
226
0
FILE  *fp;
227
0
PIX   *pix;
228
229
0
    if (!filename)
230
0
        return (PIX *)ERROR_PTR("filename not defined", __func__, NULL);
231
232
0
    if ((fp = fopenReadStream(filename)) == NULL)
233
0
        return (PIX *)ERROR_PTR_1("image file not found",
234
0
                                  filename, __func__, NULL);
235
0
    pix = pixReadStream(fp, hint);
236
0
    fclose(fp);
237
238
0
    if (!pix)
239
0
        return (PIX *)ERROR_PTR_1("image not returned",
240
0
                                  filename, __func__, NULL);
241
0
    return pix;
242
0
}
243
244
245
/*!
246
 * \brief   pixReadIndexed()
247
 *
248
 * \param[in]    sa      string array of full pathnames
249
 * \param[in]    index   into pathname array
250
 * \return  pix if OK; null if not found
251
 *
252
 * <pre>
253
 * Notes:
254
 *      (1) This function is useful for selecting image files from a
255
 *          directory, where the integer %index is embedded into
256
 *          the file name.
257
 *      (2) This is typically done by generating the sarray using
258
 *          getNumberedPathnamesInDirectory(), so that the %index
259
 *          pathname would have the number %index in it.  The size
260
 *          of the sarray should be the largest number (plus 1) appearing
261
 *          in the file names, respecting the constraints in the
262
 *          call to getNumberedPathnamesInDirectory().
263
 *      (3) Consequently, for some indices into the sarray, there may
264
 *          be no pathnames in the directory containing that number.
265
 *          By convention, we place empty C strings ("") in those
266
 *          locations in the sarray, and it is not an error if such
267
 *          a string is encountered and no pix is returned.
268
 *          Therefore, the caller must verify that a pix is returned.
269
 *      (4) See convertSegmentedPagesToPS() in src/psio1.c for an
270
 *          example of usage.
271
 * </pre>
272
 */
273
PIX *
274
pixReadIndexed(SARRAY  *sa,
275
               l_int32  index)
276
0
{
277
0
char    *fname;
278
0
l_int32  n;
279
0
PIX     *pix;
280
281
0
    if (!sa)
282
0
        return (PIX *)ERROR_PTR("sa not defined", __func__, NULL);
283
0
    n = sarrayGetCount(sa);
284
0
    if (index < 0 || index >= n)
285
0
        return (PIX *)ERROR_PTR("index out of bounds", __func__, NULL);
286
287
0
    fname = sarrayGetString(sa, index, L_NOCOPY);
288
0
    if (fname[0] == '\0')
289
0
        return NULL;
290
291
0
    if ((pix = pixRead(fname)) == NULL) {
292
0
        L_ERROR("pix not read from file %s\n", __func__, fname);
293
0
        return NULL;
294
0
    }
295
296
0
    return pix;
297
0
}
298
299
300
/*!
301
 * \brief   pixReadStream()
302
 *
303
 * \param[in]    fp      file stream
304
 * \param[in]    hint    bitwise OR of L_HINT_* values for jpeg; 0 for no hint
305
 * \return  pix if OK; NULL on error
306
 *
307
 * <pre>
308
 * Notes:
309
 *      (1) The hint only applies to jpeg.
310
 * </pre>
311
 */
312
PIX *
313
pixReadStream(FILE    *fp,
314
              l_int32  hint)
315
0
{
316
0
l_int32   format, ret, valid;
317
0
l_uint8  *comment;
318
0
PIX      *pix;
319
0
PIXCMAP  *cmap;
320
321
0
    if (!fp)
322
0
        return (PIX *)ERROR_PTR("stream not defined", __func__, NULL);
323
0
    pix = NULL;
324
325
0
    findFileFormatStream(fp, &format);
326
0
    switch (format)
327
0
    {
328
0
    case IFF_BMP:
329
0
        if ((pix = pixReadStreamBmp(fp)) == NULL )
330
0
            return (PIX *)ERROR_PTR( "bmp: no pix returned", __func__, NULL);
331
0
        break;
332
333
0
    case IFF_JFIF_JPEG:
334
0
        if ((pix = pixReadStreamJpeg(fp, 0, 1, NULL, hint)) == NULL)
335
0
            return (PIX *)ERROR_PTR( "jpeg: no pix returned", __func__, NULL);
336
0
        ret = fgetJpegComment(fp, &comment);
337
0
        if (!ret && comment)
338
0
            pixSetText(pix, (char *)comment);
339
0
        LEPT_FREE(comment);
340
0
        break;
341
342
0
    case IFF_PNG:
343
0
        if ((pix = pixReadStreamPng(fp)) == NULL)
344
0
            return (PIX *)ERROR_PTR("png: no pix returned", __func__, NULL);
345
0
        break;
346
347
0
    case IFF_TIFF:
348
0
    case IFF_TIFF_PACKBITS:
349
0
    case IFF_TIFF_RLE:
350
0
    case IFF_TIFF_G3:
351
0
    case IFF_TIFF_G4:
352
0
    case IFF_TIFF_LZW:
353
0
    case IFF_TIFF_ZIP:
354
0
    case IFF_TIFF_JPEG:
355
0
        if ((pix = pixReadStreamTiff(fp, 0)) == NULL)  /* page 0 by default */
356
0
            return (PIX *)ERROR_PTR("tiff: no pix returned", __func__, NULL);
357
0
        break;
358
359
0
    case IFF_PNM:
360
0
        if ((pix = pixReadStreamPnm(fp)) == NULL)
361
0
            return (PIX *)ERROR_PTR("pnm: no pix returned", __func__, NULL);
362
0
        break;
363
364
0
    case IFF_GIF:
365
0
        if ((pix = pixReadStreamGif(fp)) == NULL)
366
0
            return (PIX *)ERROR_PTR("gif: no pix returned", __func__, NULL);
367
0
        break;
368
369
0
    case IFF_JP2:
370
0
        if ((pix = pixReadStreamJp2k(fp, 1, NULL, 0, 0)) == NULL)
371
0
            return (PIX *)ERROR_PTR("jp2: no pix returned", __func__, NULL);
372
0
        break;
373
374
0
    case IFF_WEBP:
375
0
        if ((pix = pixReadStreamWebP(fp)) == NULL)
376
0
            return (PIX *)ERROR_PTR("webp: no pix returned", __func__, NULL);
377
0
        break;
378
379
0
    case IFF_PS:
380
0
        L_ERROR("PostScript reading is not supported\n", __func__);
381
0
        return NULL;
382
383
0
    case IFF_LPDF:
384
0
        L_ERROR("Pdf reading is not supported\n", __func__);
385
0
        return NULL;
386
387
0
    case IFF_SPIX:
388
0
        if ((pix = pixReadStreamSpix(fp)) == NULL)
389
0
            return (PIX *)ERROR_PTR("spix: no pix returned", __func__, NULL);
390
0
        break;
391
392
0
    case IFF_UNKNOWN:
393
0
        return (PIX *)ERROR_PTR( "Unknown format: no pix returned",
394
0
                __func__, NULL);
395
0
        break;
396
0
    }
397
398
0
    if (pix) {
399
0
        pixSetInputFormat(pix, format);
400
0
        if ((cmap = pixGetColormap(pix))) {
401
0
            pixcmapIsValid(cmap, pix, &valid);
402
0
            if (!valid) {
403
0
                pixDestroy(&pix);
404
0
                return (PIX *)ERROR_PTR("invalid colormap", __func__, NULL);
405
0
            }
406
0
        }
407
0
    }
408
0
    return pix;
409
0
}
410
411
412
413
/*---------------------------------------------------------------------*
414
 *                     Read header information from file               *
415
 *---------------------------------------------------------------------*/
416
/*!
417
 * \brief   pixReadHeader()
418
 *
419
 * \param[in]    filename    with full pathname or in local directory
420
 * \param[out]   pformat     [optional] file format
421
 * \param[out]   pw, ph      [optional] width and height
422
 * \param[out]   pbps        [optional] bits/sample
423
 * \param[out]   pspp        [optional] samples/pixel 1, 3 or 4
424
 * \param[out]   piscmap     [optional] 1 if cmap exists; 0 otherwise
425
 * \return  0 if OK, 1 on error
426
 *
427
 * <pre>
428
 * Notes:
429
 *      (1) This reads the actual headers for jpeg, png, tiff and pnm.
430
 *          For bmp and gif, we cheat and read the entire file into a pix,
431
 *          from which we extract the "header" information.
432
 * </pre>
433
 */
434
l_ok
435
pixReadHeader(const char  *filename,
436
              l_int32     *pformat,
437
              l_int32     *pw,
438
              l_int32     *ph,
439
              l_int32     *pbps,
440
              l_int32     *pspp,
441
              l_int32     *piscmap)
442
0
{
443
0
l_int32  format, ret, w, h, d, bps, spp, iscmap;
444
0
l_int32  type;  /* ignored */
445
0
FILE    *fp;
446
0
PIX     *pix;
447
448
0
    if (pw) *pw = 0;
449
0
    if (ph) *ph = 0;
450
0
    if (pbps) *pbps = 0;
451
0
    if (pspp) *pspp = 0;
452
0
    if (piscmap) *piscmap = 0;
453
0
    if (pformat) *pformat = 0;
454
0
    iscmap = 0;  /* init to false */
455
0
    if (!filename)
456
0
        return ERROR_INT("filename not defined", __func__, 1);
457
458
0
    if ((fp = fopenReadStream(filename)) == NULL)
459
0
        return ERROR_INT_1("image file not found", filename, __func__, 1);
460
0
    findFileFormatStream(fp, &format);
461
0
    fclose(fp);
462
463
0
    switch (format)
464
0
    {
465
0
    case IFF_BMP:  /* cheating: reading the entire file */
466
0
        if ((pix = pixRead(filename)) == NULL)
467
0
            return ERROR_INT_1( "bmp: pix not read", filename, __func__, 1);
468
0
        pixGetDimensions(pix, &w, &h, &d);
469
0
        bps = (d == 32) ? 8 : d;
470
0
        spp = pixGetSpp(pix);
471
0
        iscmap = (pixGetColormap(pix)) ? 1 : 0;
472
0
        pixDestroy(&pix);
473
0
        break;
474
475
0
    case IFF_JFIF_JPEG:
476
0
        ret = readHeaderJpeg(filename, &w, &h, &spp, NULL, NULL);
477
0
        bps = 8;
478
0
        if (ret)
479
0
            return ERROR_INT_1("jpeg: no header info returned",
480
0
                               filename, __func__, 1);
481
0
        break;
482
483
0
    case IFF_PNG:
484
0
        ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap);
485
0
        if (ret)
486
0
            return ERROR_INT_1("png: no header info returned",
487
0
                               filename, __func__, 1);
488
0
        break;
489
490
0
    case IFF_TIFF:
491
0
    case IFF_TIFF_PACKBITS:
492
0
    case IFF_TIFF_RLE:
493
0
    case IFF_TIFF_G3:
494
0
    case IFF_TIFF_G4:
495
0
    case IFF_TIFF_LZW:
496
0
    case IFF_TIFF_ZIP:
497
0
    case IFF_TIFF_JPEG:
498
            /* Reading page 0 by default; possibly redefine format */
499
0
        ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap,
500
0
                             &format);
501
0
        if (ret)
502
0
            return ERROR_INT_1("tiff: no header info returned",
503
0
                               filename, __func__, 1);
504
0
        break;
505
506
0
    case IFF_PNM:
507
0
        ret = readHeaderPnm(filename, &w, &h, &d, &type, &bps, &spp);
508
0
        if (ret)
509
0
            return ERROR_INT_1("pnm: no header info returned",
510
0
                               filename, __func__, 1);
511
0
        break;
512
513
0
    case IFF_GIF:  /* cheating: reading the entire file */
514
0
        if ((pix = pixRead(filename)) == NULL)
515
0
            return ERROR_INT_1( "gif: pix not read", filename, __func__, 1);
516
0
        pixGetDimensions(pix, &w, &h, &d);
517
0
        pixDestroy(&pix);
518
0
        iscmap = 1;  /* always colormapped; max 256 colors */
519
0
        spp = 1;
520
0
        bps = d;
521
0
        break;
522
523
0
    case IFF_JP2:
524
0
        ret = readHeaderJp2k(filename, &w, &h, &bps, &spp, NULL);
525
0
        break;
526
527
0
    case IFF_WEBP:
528
0
        if (readHeaderWebP(filename, &w, &h, &spp))
529
0
            return ERROR_INT_1("webp: no header info returned",
530
0
                               filename, __func__, 1);
531
0
        bps = 8;
532
0
        break;
533
534
0
    case IFF_PS:
535
0
        if (pformat) *pformat = format;
536
0
        return ERROR_INT("PostScript reading is not supported\n", __func__, 1);
537
538
0
    case IFF_LPDF:
539
0
        if (pformat) *pformat = format;
540
0
        return ERROR_INT("Pdf reading is not supported\n", __func__, 1);
541
542
0
    case IFF_SPIX:
543
0
        ret = readHeaderSpix(filename, &w, &h, &bps, &spp, &iscmap);
544
0
        if (ret)
545
0
            return ERROR_INT_1("spix: no header info returned",
546
0
                               filename, __func__, 1);
547
0
        break;
548
549
0
    case IFF_UNKNOWN:
550
0
        return ERROR_INT_1("unknown format in file", filename, __func__, 1);
551
0
    }
552
553
0
    if (pw) *pw = w;
554
0
    if (ph) *ph = h;
555
0
    if (pbps) *pbps = bps;
556
0
    if (pspp) *pspp = spp;
557
0
    if (piscmap) *piscmap = iscmap;
558
0
    if (pformat) *pformat = format;
559
0
    return 0;
560
0
}
561
562
563
/*---------------------------------------------------------------------*
564
 *                            Format finders                           *
565
 *---------------------------------------------------------------------*/
566
/*!
567
 * \brief   findFileFormat()
568
 *
569
 * \param[in]    filename
570
 * \param[out]   pformat    found format
571
 * \return  0 if OK, 1 on error or if format is not recognized
572
 */
573
l_ok
574
findFileFormat(const char  *filename,
575
               l_int32     *pformat)
576
0
{
577
0
l_int32  ret;
578
0
FILE    *fp;
579
580
0
    if (!pformat)
581
0
        return ERROR_INT("&format not defined", __func__, 1);
582
0
    *pformat = IFF_UNKNOWN;
583
0
    if (!filename)
584
0
        return ERROR_INT("filename not defined", __func__, 1);
585
586
0
    if ((fp = fopenReadStream(filename)) == NULL)
587
0
        return ERROR_INT_1("image file not found", filename, __func__, 1);
588
0
    ret = findFileFormatStream(fp, pformat);
589
0
    fclose(fp);
590
0
    return ret;
591
0
}
592
593
594
/*!
595
 * \brief   findFileFormatStream()
596
 *
597
 * \param[in]    fp        file stream
598
 * \param[out]   pformat   found format
599
 * \return  0 if OK, 1 on error or if format is not recognized
600
 *
601
 * <pre>
602
 * Notes:
603
 *      (1) Important: Side effect -- this resets fp to BOF.
604
 * </pre>
605
 */
606
l_ok
607
findFileFormatStream(FILE     *fp,
608
                     l_int32  *pformat)
609
0
{
610
0
l_uint8  firstbytes[13];
611
0
l_int32  format;
612
613
0
    if (!pformat)
614
0
        return ERROR_INT("&format not defined", __func__, 1);
615
0
    *pformat = IFF_UNKNOWN;
616
0
    if (!fp)
617
0
        return ERROR_INT("stream not defined", __func__, 1);
618
619
0
    rewind(fp);
620
0
    if (fnbytesInFile(fp) < 12)
621
0
        return ERROR_INT("truncated file", __func__, 1);
622
623
0
    if (fread(&firstbytes, 1, 12, fp) != 12)
624
0
        return ERROR_INT("failed to read first 12 bytes of file", __func__, 1);
625
0
    firstbytes[12] = 0;
626
0
    rewind(fp);
627
628
0
    findFileFormatBuffer(firstbytes, &format);
629
0
    if (format == IFF_TIFF) {
630
0
        findTiffCompression(fp, &format);
631
0
        rewind(fp);
632
0
    }
633
0
    *pformat = format;
634
0
    if (format == IFF_UNKNOWN)
635
0
        return 1;
636
0
    else
637
0
        return 0;
638
0
}
639
640
641
/*!
642
 * \brief   findFileFormatBuffer()
643
 *
644
 * \param[in]    buf       byte buffer at least 12 bytes in size; we can't check
645
 * \param[out]   pformat   found format
646
 * \return  0 if OK, 1 on error or if format is not recognized
647
 *
648
 * <pre>
649
 * Notes:
650
 *      (1) This determines the file format from the first 12 bytes in
651
 *          the compressed data stream, which are stored in memory.
652
 *      (2) For tiff files, this returns IFF_TIFF.  The specific tiff
653
 *          compression is then determined using findTiffCompression().
654
 * </pre>
655
 */
656
l_ok
657
findFileFormatBuffer(const l_uint8  *buf,
658
                     l_int32        *pformat)
659
0
{
660
0
l_uint16  twobytepw;
661
662
0
    if (!pformat)
663
0
        return ERROR_INT("&format not defined", __func__, 1);
664
0
    *pformat = IFF_UNKNOWN;
665
0
    if (!buf)
666
0
        return ERROR_INT("byte buffer not defined", __func__, 0);
667
668
        /* Check the bmp and tiff 2-byte header ids */
669
0
    ((char *)(&twobytepw))[0] = buf[0];
670
0
    ((char *)(&twobytepw))[1] = buf[1];
671
672
0
    if (convertOnBigEnd16(twobytepw) == BMP_ID) {
673
0
        *pformat = IFF_BMP;
674
0
        return 0;
675
0
    }
676
677
0
    if (twobytepw == TIFF_BIGEND_ID || twobytepw == TIFF_LITTLEEND_ID) {
678
0
        *pformat = IFF_TIFF;
679
0
        return 0;
680
0
    }
681
682
        /* Check for the p*m 2-byte header ids */
683
0
    if ((buf[0] == 'P' && buf[1] == '4') || /* newer packed */
684
0
        (buf[0] == 'P' && buf[1] == '1')) {  /* old ASCII format */
685
0
        *pformat = IFF_PNM;
686
0
        return 0;
687
0
    }
688
689
0
    if ((buf[0] == 'P' && buf[1] == '5') || /* newer */
690
0
        (buf[0] == 'P' && buf[1] == '2')) {  /* old */
691
0
        *pformat = IFF_PNM;
692
0
        return 0;
693
0
    }
694
695
0
    if ((buf[0] == 'P' && buf[1] == '6') || /* newer */
696
0
        (buf[0] == 'P' && buf[1] == '3')) {  /* old */
697
0
        *pformat = IFF_PNM;
698
0
        return 0;
699
0
    }
700
701
0
    if (buf[0] == 'P' && buf[1] == '7') {  /* new arbitrary (PAM) */
702
0
        *pformat = IFF_PNM;
703
0
        return 0;
704
0
    }
705
706
        /*  Consider the first 11 bytes of the standard JFIF JPEG header:
707
         *    - The first two bytes are the most important:  0xffd8.
708
         *    - The next two bytes are the jfif marker: 0xffe0.
709
         *      Not all jpeg files have this marker.
710
         *    - The next two bytes are the header length.
711
         *    - The next 5 bytes are a null-terminated string.
712
         *      For JFIF, the string is "JFIF", naturally.  For others it
713
         *      can be "Exif" or just about anything else.
714
         *    - Because of all this variability, we only check the first
715
         *      two byte marker.  All jpeg files are identified as
716
         *      IFF_JFIF_JPEG.  */
717
0
    if (buf[0] == 0xff && buf[1] == 0xd8) {
718
0
        *pformat = IFF_JFIF_JPEG;
719
0
        return 0;
720
0
    }
721
722
        /* Check for the 8 byte PNG signature (png_signature in png.c):
723
         *       {137, 80, 78, 71, 13, 10, 26, 10}      */
724
0
    if (buf[0] == 137 && buf[1] == 80  && buf[2] == 78  && buf[3] == 71  &&
725
0
        buf[4] == 13  && buf[5] == 10  && buf[6] == 26  && buf[7] == 10) {
726
0
        *pformat = IFF_PNG;
727
0
        return 0;
728
0
    }
729
730
        /* Look for "GIF87a" or "GIF89a" */
731
0
    if (buf[0] == 'G' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == '8' &&
732
0
        (buf[4] == '7' || buf[4] == '9') && buf[5] == 'a') {
733
0
        *pformat = IFF_GIF;
734
0
        return 0;
735
0
    }
736
737
        /* Check for both types of jp2k file */
738
0
    if (memcmp(buf, JP2K_CODESTREAM, 4) == 0 ||
739
0
        memcmp(buf, JP2K_IMAGE_DATA, 12) == 0) {
740
0
        *pformat = IFF_JP2;
741
0
        return 0;
742
0
    }
743
744
        /* Check for webp */
745
0
    if (buf[0] == 'R' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == 'F' &&
746
0
        buf[8] == 'W' && buf[9] == 'E' && buf[10] == 'B' && buf[11] == 'P') {
747
0
        *pformat = IFF_WEBP;
748
0
        return 0;
749
0
    }
750
751
        /* Check for ps */
752
0
    if (buf[0] == '%' && buf[1] == '!' && buf[2] == 'P' && buf[3] == 'S' &&
753
0
        buf[4] == '-' && buf[5] == 'A' && buf[6] == 'd' && buf[7] == 'o' &&
754
0
        buf[8] == 'b' && buf[9] == 'e') {
755
0
        *pformat = IFF_PS;
756
0
        return 0;
757
0
    }
758
759
        /* Check for pdf */
760
0
    if (buf[0] == '%' && buf[1] == 'P' && buf[2] == 'D' && buf[3] == 'F' &&
761
0
        buf[4] == '-' && buf[5] == '1') {
762
0
        *pformat = IFF_LPDF;
763
0
        return 0;
764
0
    }
765
766
        /* Check for "spix" serialized pix */
767
0
    if (buf[0] == 's' && buf[1] == 'p' && buf[2] == 'i' && buf[3] == 'x') {
768
0
        *pformat = IFF_SPIX;
769
0
        return 0;
770
0
    }
771
772
        /* File format identifier not found; unknown */
773
0
    return 1;
774
0
}
775
776
777
/*!
778
 * \brief   fileFormatIsTiff()
779
 *
780
 * \param[in]    fp    file stream
781
 * \return  1 if file is tiff; 0 otherwise or on error
782
 */
783
l_int32
784
fileFormatIsTiff(FILE  *fp)
785
0
{
786
0
l_int32  format;
787
788
0
    if (!fp)
789
0
        return ERROR_INT("stream not defined", __func__, 0);
790
791
0
    findFileFormatStream(fp, &format);
792
0
    if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
793
0
        format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
794
0
        format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
795
0
        format == IFF_TIFF_ZIP || format == IFF_TIFF_JPEG)
796
0
        return 1;
797
0
    else
798
0
        return 0;
799
0
}
800
801
802
/*---------------------------------------------------------------------*
803
 *                            Read from memory                         *
804
 *---------------------------------------------------------------------*/
805
/*!
806
 * \brief   pixReadMem()
807
 *
808
 * \param[in]    data    const; encoded
809
 * \param[in]    size    size of data
810
 * \return  pix, or NULL on error
811
 *
812
 * <pre>
813
 * Notes:
814
 *      (1) This is a variation of pixReadStream(), where the data is read
815
 *          from a memory buffer rather than a file.
816
 *      (2) On Windows, this only reads tiff formatted files directly from
817
 *          memory.  For other formats, it writes to a temp file and
818
 *          decompresses from file.
819
 *      (3) findFileFormatBuffer() requires up to 12 bytes to decide on
820
 *          the format.  That determines the constraint here.  But in
821
 *          fact the data must contain the entire compressed string for
822
 *          the image.
823
 * </pre>
824
 */
825
PIX *
826
pixReadMem(const l_uint8  *data,
827
           size_t          size)
828
0
{
829
0
l_int32   format, valid;
830
0
PIX      *pix;
831
0
PIXCMAP  *cmap;
832
833
0
    if (!data)
834
0
        return (PIX *)ERROR_PTR("data not defined", __func__, NULL);
835
0
    if (size < 12)
836
0
        return (PIX *)ERROR_PTR("size < 12", __func__, NULL);
837
0
    pix = NULL;
838
839
0
    findFileFormatBuffer(data, &format);
840
0
    switch (format)
841
0
    {
842
0
    case IFF_BMP:
843
0
        if ((pix = pixReadMemBmp(data, size)) == NULL )
844
0
            return (PIX *)ERROR_PTR( "bmp: no pix returned", __func__, NULL);
845
0
        break;
846
847
0
    case IFF_JFIF_JPEG:
848
0
        if ((pix = pixReadMemJpeg(data, size, 0, 1, NULL, 0)) == NULL)
849
0
            return (PIX *)ERROR_PTR( "jpeg: no pix returned", __func__, NULL);
850
0
        break;
851
852
0
    case IFF_PNG:
853
0
        if ((pix = pixReadMemPng(data, size)) == NULL)
854
0
            return (PIX *)ERROR_PTR("png: no pix returned", __func__, NULL);
855
0
        break;
856
857
0
    case IFF_TIFF:
858
0
    case IFF_TIFF_PACKBITS:
859
0
    case IFF_TIFF_RLE:
860
0
    case IFF_TIFF_G3:
861
0
    case IFF_TIFF_G4:
862
0
    case IFF_TIFF_LZW:
863
0
    case IFF_TIFF_ZIP:
864
            /* Reading page 0 by default */
865
0
        if ((pix = pixReadMemTiff(data, size, 0)) == NULL)
866
0
            return (PIX *)ERROR_PTR("tiff: no pix returned", __func__, NULL);
867
0
        break;
868
869
0
    case IFF_PNM:
870
0
        if ((pix = pixReadMemPnm(data, size)) == NULL)
871
0
            return (PIX *)ERROR_PTR("pnm: no pix returned", __func__, NULL);
872
0
        break;
873
874
0
    case IFF_GIF:
875
0
        if ((pix = pixReadMemGif(data, size)) == NULL)
876
0
            return (PIX *)ERROR_PTR("gif: no pix returned", __func__, NULL);
877
0
        break;
878
879
0
    case IFF_JP2:
880
0
        if ((pix = pixReadMemJp2k(data, size, 1, NULL, 0, 0)) == NULL)
881
0
            return (PIX *)ERROR_PTR("jp2k: no pix returned", __func__, NULL);
882
0
        break;
883
884
0
    case IFF_WEBP:
885
0
        if ((pix = pixReadMemWebP(data, size)) == NULL)
886
0
            return (PIX *)ERROR_PTR("webp: no pix returned", __func__, NULL);
887
0
        break;
888
889
0
    case IFF_PS:
890
0
        L_ERROR("PostScript reading is not supported\n", __func__);
891
0
        return NULL;
892
893
0
    case IFF_LPDF:
894
0
        L_ERROR("Pdf reading is not supported\n", __func__);
895
0
        return NULL;
896
897
0
    case IFF_SPIX:
898
0
        if ((pix = pixReadMemSpix(data, size)) == NULL)
899
0
            return (PIX *)ERROR_PTR("spix: no pix returned", __func__, NULL);
900
0
        break;
901
902
0
    case IFF_UNKNOWN:
903
0
        return (PIX *)ERROR_PTR("Unknown format: no pix returned",
904
0
                __func__, NULL);
905
0
        break;
906
0
    }
907
908
        /* Set the input format.  For tiff reading from memory we lose
909
         * the actual input format; for 1 bpp, default to G4.  Also
910
         * verify that the colormap is valid.  */
911
0
    if (pix) {
912
0
        if (format == IFF_TIFF && pixGetDepth(pix) == 1)
913
0
            format = IFF_TIFF_G4;
914
0
        pixSetInputFormat(pix, format);
915
0
        if ((cmap = pixGetColormap(pix))) {
916
0
            pixcmapIsValid(cmap, pix, &valid);
917
0
            if (!valid) {
918
0
                pixDestroy(&pix);
919
0
                return (PIX *)ERROR_PTR("invalid colormap", __func__, NULL);
920
0
            }
921
0
        }
922
0
        pixSetPadBits(pix, 0);
923
0
    }
924
0
    return pix;
925
0
}
926
927
928
/*!
929
 * \brief   pixReadHeaderMem()
930
 *
931
 * \param[in]    data       const; encoded
932
 * \param[in]    size       size of data
933
 * \param[out]   pformat    [optional] image format
934
 * \param[out]   pw, ph     [optional] width and height
935
 * \param[out]   pbps       [optional] bits/sample
936
 * \param[out]   pspp       [optional] samples/pixel 1, 3 or 4
937
 * \param[out]   piscmap    [optional] 1 if cmap exists; 0 otherwise
938
 * \return  0 if OK, 1 on error
939
 *
940
 * <pre>
941
 * Notes:
942
 *      (1) This reads the actual headers for jpeg, png, tiff, jp2k and pnm.
943
 *          For bmp and gif, we cheat and read all the data into a pix,
944
 *          from which we extract the "header" information.
945
 *      (2) The amount of data required depends on the format.  For
946
 *          png, it requires less than 30 bytes, but for jpeg it can
947
 *          require most of the compressed file.  In practice, the data
948
 *          is typically the entire compressed file in memory.
949
 *      (3) findFileFormatBuffer() requires up to 12 bytes to decide on
950
 *          the format, which we require.
951
 * </pre>
952
 */
953
l_ok
954
pixReadHeaderMem(const l_uint8  *data,
955
                 size_t          size,
956
                 l_int32        *pformat,
957
                 l_int32        *pw,
958
                 l_int32        *ph,
959
                 l_int32        *pbps,
960
                 l_int32        *pspp,
961
                 l_int32        *piscmap)
962
0
{
963
0
l_int32  format, ret, w, h, d, bps, spp, iscmap;
964
0
l_int32  type;  /* not used */
965
0
PIX     *pix;
966
967
0
    if (pw) *pw = 0;
968
0
    if (ph) *ph = 0;
969
0
    if (pbps) *pbps = 0;
970
0
    if (pspp) *pspp = 0;
971
0
    if (piscmap) *piscmap = 0;
972
0
    if (pformat) *pformat = 0;
973
0
    iscmap = 0;  /* init to false */
974
0
    if (!data)
975
0
        return ERROR_INT("data not defined", __func__, 1);
976
0
    if (size < 12)
977
0
        return ERROR_INT("size < 12", __func__, 1);
978
979
0
    findFileFormatBuffer(data, &format);
980
981
0
    switch (format)
982
0
    {
983
0
    case IFF_BMP:  /* cheating: read the pix */
984
0
        if ((pix = pixReadMemBmp(data, size)) == NULL)
985
0
            return ERROR_INT( "bmp: pix not read", __func__, 1);
986
0
        pixGetDimensions(pix, &w, &h, &d);
987
0
        pixDestroy(&pix);
988
0
        bps = (d == 32) ? 8 : d;
989
0
        spp = (d == 32) ? 3 : 1;
990
0
        break;
991
992
0
    case IFF_JFIF_JPEG:
993
0
        ret = readHeaderMemJpeg(data, size, &w, &h, &spp, NULL, NULL);
994
0
        bps = 8;
995
0
        if (ret)
996
0
            return ERROR_INT( "jpeg: no header info returned", __func__, 1);
997
0
        break;
998
999
0
    case IFF_PNG:
1000
0
        ret = readHeaderMemPng(data, size, &w, &h, &bps, &spp, &iscmap);
1001
0
        if (ret)
1002
0
            return ERROR_INT( "png: no header info returned", __func__, 1);
1003
0
        break;
1004
1005
0
    case IFF_TIFF:
1006
0
    case IFF_TIFF_PACKBITS:
1007
0
    case IFF_TIFF_RLE:
1008
0
    case IFF_TIFF_G3:
1009
0
    case IFF_TIFF_G4:
1010
0
    case IFF_TIFF_LZW:
1011
0
    case IFF_TIFF_ZIP:
1012
0
    case IFF_TIFF_JPEG:
1013
            /* Reading page 0 by default; possibly redefine format */
1014
0
        ret = readHeaderMemTiff(data, size, 0, &w, &h, &bps, &spp,
1015
0
                                NULL, &iscmap, &format);
1016
0
        if (ret)
1017
0
            return ERROR_INT( "tiff: no header info returned", __func__, 1);
1018
0
        break;
1019
1020
0
    case IFF_PNM:
1021
0
        ret = readHeaderMemPnm(data, size, &w, &h, &d, &type, &bps, &spp);
1022
0
        if (ret)
1023
0
            return ERROR_INT( "pnm: no header info returned", __func__, 1);
1024
0
        break;
1025
1026
0
    case IFF_GIF:  /* cheating: read the pix */
1027
0
        if ((pix = pixReadMemGif(data, size)) == NULL)
1028
0
            return ERROR_INT( "gif: pix not read", __func__, 1);
1029
0
        pixGetDimensions(pix, &w, &h, &d);
1030
0
        pixDestroy(&pix);
1031
0
        iscmap = 1;  /* always colormapped; max 256 colors */
1032
0
        spp = 1;
1033
0
        bps = d;
1034
0
        break;
1035
1036
0
    case IFF_JP2:
1037
0
        ret = readHeaderMemJp2k(data, size, &w, &h, &bps, &spp, NULL);
1038
0
        break;
1039
1040
0
    case IFF_WEBP:
1041
0
        bps = 8;
1042
0
        ret = readHeaderMemWebP(data, size, &w, &h, &spp);
1043
0
        break;
1044
1045
0
    case IFF_PS:
1046
0
        if (pformat) *pformat = format;
1047
0
        return ERROR_INT("PostScript reading is not supported\n", __func__, 1);
1048
1049
0
    case IFF_LPDF:
1050
0
        if (pformat) *pformat = format;
1051
0
        return ERROR_INT("Pdf reading is not supported\n", __func__, 1);
1052
1053
0
    case IFF_SPIX:
1054
0
        ret = sreadHeaderSpix((l_uint32 *)data, size, &w, &h, &bps,
1055
0
                               &spp, &iscmap);
1056
0
        if (ret)
1057
0
            return ERROR_INT( "pnm: no header info returned", __func__, 1);
1058
0
        break;
1059
1060
0
    case IFF_UNKNOWN:
1061
0
        return ERROR_INT("unknown format; no data returned", __func__, 1);
1062
0
        break;
1063
0
    }
1064
1065
0
    if (pw) *pw = w;
1066
0
    if (ph) *ph = h;
1067
0
    if (pbps) *pbps = bps;
1068
0
    if (pspp) *pspp = spp;
1069
0
    if (piscmap) *piscmap = iscmap;
1070
0
    if (pformat) *pformat = format;
1071
0
    return 0;
1072
0
}
1073
1074
1075
/*---------------------------------------------------------------------*
1076
 *                    Output image file information                    *
1077
 *---------------------------------------------------------------------*/
1078
extern const char *ImageFileFormatExtensions[];
1079
1080
/*!
1081
 * \brief   writeImageFileInfo()
1082
 *
1083
 * \param[in]    filename    input file
1084
 * \param[in]    fpout       output file stream
1085
 * \param[in]    headeronly  1 to read only the header; 0 to read both
1086
 *                           the header and the input file
1087
 * \return  0 if OK; 1 on error
1088
 *
1089
 * <pre>
1090
 * Notes:
1091
 *      (1) If headeronly == 0 and the image has spp == 4,this will
1092
 *          also call pixDisplayLayersRGBA() to display the image
1093
 *          in three views.
1094
 *      (2) This is a debug function that changes the value of
1095
 *          var_PNG_STRIP_16_TO_8 to 1 (the default).
1096
 * </pre>
1097
 */
1098
l_ok
1099
writeImageFileInfo(const char  *filename,
1100
                   FILE        *fpout,
1101
                   l_int32      headeronly)
1102
0
{
1103
0
char     *text;
1104
0
l_int32   w, h, d, wpl, count, npages, color;
1105
0
l_int32   format, bps, spp, iscmap, xres, yres, transparency;
1106
0
FILE     *fpin;
1107
0
PIX      *pix, *pixt;
1108
0
PIXCMAP  *cmap;
1109
1110
0
    if (!filename)
1111
0
        return ERROR_INT("filename not defined", __func__, 1);
1112
0
    if (!fpout)
1113
0
        return ERROR_INT("stream not defined", __func__, 1);
1114
1115
        /* Read the header */
1116
0
    if (pixReadHeader(filename, &format, &w, &h, &bps, &spp, &iscmap)) {
1117
0
        L_ERROR("failure to read header of %s\n", __func__, filename);
1118
0
        return 1;
1119
0
    }
1120
0
    fprintf(fpout, "===============================================\n"
1121
0
                    "Reading the header:\n");
1122
0
    fprintf(fpout, "  input image format type: %s\n",
1123
0
            ImageFileFormatExtensions[format]);
1124
0
    fprintf(fpout, "  w = %d, h = %d, bps = %d, spp = %d, iscmap = %d\n",
1125
0
            w, h, bps, spp, iscmap);
1126
1127
0
    findFileFormat(filename, &format);
1128
0
    if (format == IFF_JP2) {
1129
0
        fpin = fopenReadStream(filename);
1130
0
        fgetJp2kResolution(fpin, &xres, &yres);
1131
0
        if (fpin) fclose(fpin);
1132
0
        fprintf(fpout, "  xres = %d, yres = %d\n", xres, yres);
1133
0
    } else if (format == IFF_PNG) {
1134
0
        fpin = fopenReadStream(filename);
1135
0
        fgetPngResolution(fpin, &xres, &yres);
1136
0
        if (fpin) fclose(fpin);
1137
0
        fprintf(fpout, "  xres = %d, yres = %d\n", xres, yres);
1138
0
        if (iscmap) {
1139
0
            fpin = fopenReadStream(filename);
1140
0
            fgetPngColormapInfo(fpin, &cmap, &transparency);
1141
0
            if (fpin) fclose(fpin);
1142
0
            if (transparency)
1143
0
                fprintf(fpout, "  colormap has transparency\n");
1144
0
            else
1145
0
                fprintf(fpout, "  colormap does not have transparency\n");
1146
0
            pixcmapWriteStream(fpout, cmap);
1147
0
            pixcmapDestroy(&cmap);
1148
0
        }
1149
0
    } else if (format == IFF_JFIF_JPEG) {
1150
0
        fpin = fopenReadStream(filename);
1151
0
        fgetJpegResolution(fpin, &xres, &yres);
1152
0
        if (fpin) fclose(fpin);
1153
0
        fprintf(fpout, "  xres = %d, yres = %d\n", xres, yres);
1154
0
    }
1155
1156
0
    if (headeronly)
1157
0
        return 0;
1158
1159
        /* Read the full image.  Note that when we read an image that
1160
         * has transparency in a colormap, we convert it to RGBA. */
1161
0
    fprintf(fpout, "===============================================\n"
1162
0
                    "Reading the full image:\n");
1163
1164
        /* Preserve 16 bpp if the format is png */
1165
0
    if (format == IFF_PNG && bps == 16)
1166
0
        l_pngSetReadStrip16To8(0);
1167
1168
0
    if ((pix = pixRead(filename)) == NULL) {
1169
0
        L_ERROR("failure to read full image of %s\n", __func__, filename);
1170
0
        return 1;
1171
0
    }
1172
1173
0
    format = pixGetInputFormat(pix);
1174
0
    pixGetDimensions(pix, &w, &h, &d);
1175
0
    wpl = pixGetWpl(pix);
1176
0
    spp = pixGetSpp(pix);
1177
0
    fprintf(fpout, "  input image format type: %s\n",
1178
0
            ImageFileFormatExtensions[format]);
1179
0
    fprintf(fpout, "  w = %d, h = %d, d = %d, spp = %d, wpl = %d\n",
1180
0
            w, h, d, spp, wpl);
1181
0
    fprintf(fpout, "  xres = %d, yres = %d\n",
1182
0
            pixGetXRes(pix), pixGetYRes(pix));
1183
1184
0
    text = pixGetText(pix);
1185
0
    if (text)  /*  not null */
1186
0
        fprintf(fpout, "  text: %s\n", text);
1187
1188
0
    cmap = pixGetColormap(pix);
1189
0
    if (cmap) {
1190
0
        pixcmapHasColor(cmap, &color);
1191
0
        if (color)
1192
0
            fprintf(fpout, "  colormap exists and has color values:");
1193
0
        else
1194
0
            fprintf(fpout, "  colormap exists and has only gray values:");
1195
0
        pixcmapWriteStream(fpout, pixGetColormap(pix));
1196
0
    }
1197
0
    else
1198
0
        fprintf(fpout, "  colormap does not exist\n");
1199
1200
0
    if (format == IFF_TIFF || format == IFF_TIFF_G4 ||
1201
0
        format == IFF_TIFF_G3 || format == IFF_TIFF_PACKBITS) {
1202
0
        fprintf(fpout, "  Tiff header information:\n");
1203
0
        fpin = fopenReadStream(filename);
1204
0
        tiffGetCount(fpin, &npages);
1205
0
        if (fpin) fclose(fpin);
1206
0
        if (npages == 1)
1207
0
            fprintf(fpout, "    One page in file\n");
1208
0
        else
1209
0
            fprintf(fpout, "    %d pages in file\n", npages);
1210
0
        fprintTiffInfo(fpout, filename);
1211
0
    }
1212
1213
0
    if (d == 1) {
1214
0
        pixCountPixels(pix, &count, NULL);
1215
0
        pixGetDimensions(pix, &w, &h, NULL);
1216
0
        fprintf(fpout, "  1 bpp: foreground pixel fraction ON/Total = %g\n",
1217
0
                (l_float32)count / (l_float32)(w * h));
1218
0
    }
1219
0
    fprintf(fpout, "===============================================\n");
1220
1221
        /* If there is an alpha component, visualize it.  Note that when
1222
         * alpha == 0, the rgb layer is transparent.  We visualize the
1223
         * result when a white background is visible through the
1224
         * transparency layer. */
1225
0
    if (pixGetSpp(pix) == 4) {
1226
0
        pixt = pixDisplayLayersRGBA(pix, 0xffffff00, 600);
1227
0
        pixDisplay(pixt, 100, 100);
1228
0
        pixDestroy(&pixt);
1229
0
    }
1230
1231
0
    if (format == IFF_PNG && bps == 16)
1232
0
        l_pngSetReadStrip16To8(1);  /* return to default if format is png */
1233
1234
0
    pixDestroy(&pix);
1235
0
    return 0;
1236
0
}
1237
1238
1239
/*---------------------------------------------------------------------*
1240
 *             Test function for I/O with different formats            *
1241
 *---------------------------------------------------------------------*/
1242
/*!
1243
 * \brief   ioFormatTest()
1244
 *
1245
 * \param[in]    filename    input image file
1246
 * \return  0 if OK; 1 on error or if the test fails
1247
 *
1248
 * <pre>
1249
 * Notes:
1250
 *      (1) This writes and reads a set of output files losslessly
1251
 *          in different formats to /tmp/format/, and tests that the
1252
 *          result before and after is unchanged.
1253
 *      (2) This should work properly on input images of any depth,
1254
 *          with and without colormaps.
1255
 *      (3) All supported formats are tested for bmp, png, tiff and
1256
 *          non-ascii pnm.  Ascii pnm also works (but who'd ever want
1257
 *          to use it?)   We allow 2 bpp bmp, although it's not
1258
 *          supported elsewhere.  And we don't support reading
1259
 *          16 bpp png, although this can be turned on in pngio.c.
1260
 *      (4) This silently skips png or tiff testing if HAVE_LIBPNG
1261
 *          or HAVE_LIBTIFF are 0, respectively.
1262
 * </pre>
1263
 */
1264
l_ok
1265
ioFormatTest(const char  *filename)
1266
0
{
1267
0
l_int32    w, h, d, depth, equal, problems;
1268
0
l_float32  diff;
1269
0
BOX       *box;
1270
0
PIX       *pixs, *pixc, *pix1, *pix2;
1271
0
PIXCMAP   *cmap;
1272
1273
0
    if (!filename)
1274
0
        return ERROR_INT("filename not defined", __func__, 1);
1275
1276
        /* Read the input file and limit the size */
1277
0
    if ((pix1 = pixRead(filename)) == NULL)
1278
0
        return ERROR_INT("pix1 not made", __func__, 1);
1279
0
    pixGetDimensions(pix1, &w, &h, NULL);
1280
0
    if (w > 250 && h > 250) {  /* take the central 250 x 250 region */
1281
0
        box = boxCreate(w / 2 - 125, h / 2 - 125, 250, 250);
1282
0
        pixs = pixClipRectangle(pix1, box, NULL);
1283
0
        boxDestroy(&box);
1284
0
    } else {
1285
0
        pixs = pixClone(pix1);
1286
0
    }
1287
0
    pixDestroy(&pix1);
1288
1289
0
    lept_mkdir("lept/format");
1290
1291
        /* Note that the reader automatically removes colormaps
1292
         * from 1 bpp BMP images, but not from 8 bpp BMP images.
1293
         * Therefore, if our 8 bpp image initially doesn't have a
1294
         * colormap, we are going to need to remove it from any
1295
         * pix read from a BMP file. */
1296
0
    pixc = pixClone(pixs);  /* laziness */
1297
1298
        /* This does not test the alpha layer pixels, because most
1299
         * formats don't support it.  Remove any alpha.  */
1300
0
    if (pixGetSpp(pixc) == 4)
1301
0
        pixSetSpp(pixc, 3);
1302
0
    cmap = pixGetColormap(pixc);  /* colormap; can be NULL */
1303
0
    d = pixGetDepth(pixc);
1304
1305
0
    problems = FALSE;
1306
1307
        /* ----------------------- BMP -------------------------- */
1308
1309
        /* BMP works for 1, 2, 4, 8 and 32 bpp images.
1310
         * It always writes colormaps for 1 and 8 bpp, so we must
1311
         * remove it after readback if the input image doesn't have
1312
         * a colormap.  Although we can write/read 2 bpp BMP, nobody
1313
         * else can read them! */
1314
0
    if (d == 1 || d == 8) {
1315
0
        L_INFO("write/read bmp\n", __func__);
1316
0
        pixWrite(FILE_BMP, pixc, IFF_BMP);
1317
0
        pix1 = pixRead(FILE_BMP);
1318
0
        if (!cmap)
1319
0
            pix2 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
1320
0
        else
1321
0
            pix2 = pixClone(pix1);
1322
0
        pixEqual(pixc, pix2, &equal);
1323
0
        if (!equal) {
1324
0
            L_INFO("   **** bad bmp image: d = %d ****\n", __func__, d);
1325
0
            problems = TRUE;
1326
0
        }
1327
0
        pixDestroy(&pix1);
1328
0
        pixDestroy(&pix2);
1329
0
    }
1330
1331
0
    if (d == 2 || d == 4 || d == 32) {
1332
0
        L_INFO("write/read bmp\n", __func__);
1333
0
        pixWrite(FILE_BMP, pixc, IFF_BMP);
1334
0
        pix1 = pixRead(FILE_BMP);
1335
0
        pixEqual(pixc, pix1, &equal);
1336
0
        if (!equal) {
1337
0
            L_INFO("   **** bad bmp image: d = %d ****\n", __func__, d);
1338
0
            problems = TRUE;
1339
0
        }
1340
0
        pixDestroy(&pix1);
1341
0
    }
1342
1343
        /* ----------------------- PNG -------------------------- */
1344
0
#if HAVE_LIBPNG
1345
        /* PNG works for all depths, but here, because we strip
1346
         * 16 --> 8 bpp on reading, we don't test png for 16 bpp. */
1347
0
    if (d != 16) {
1348
0
        L_INFO("write/read png\n", __func__);
1349
0
        pixWrite(FILE_PNG, pixc, IFF_PNG);
1350
0
        pix1 = pixRead(FILE_PNG);
1351
0
        pixEqual(pixc, pix1, &equal);
1352
0
        if (!equal) {
1353
0
            L_INFO("   **** bad png image: d = %d ****\n", __func__, d);
1354
0
            problems = TRUE;
1355
0
        }
1356
0
        pixDestroy(&pix1);
1357
0
    }
1358
0
#endif  /* HAVE_LIBPNG */
1359
1360
        /* ----------------------- TIFF -------------------------- */
1361
0
#if HAVE_LIBTIFF && HAVE_LIBJPEG
1362
        /* TIFF works for 1, 2, 4, 8, 16 and 32 bpp images.
1363
         * Because 8 bpp tiff always writes 256 entry colormaps, the
1364
         * colormap sizes may be different for 8 bpp images with
1365
         * colormap; we are testing if the image content is the same.
1366
         * Likewise, the 2 and 4 bpp tiff images with colormaps
1367
         * have colormap sizes 4 and 16, rsp.  This test should
1368
         * work properly on the content, regardless of the number
1369
         * of color entries in pixc. */
1370
1371
        /* tiff uncompressed works for all pixel depths */
1372
0
    L_INFO("write/read uncompressed tiff\n", __func__);
1373
0
    pixWrite(FILE_TIFF, pixc, IFF_TIFF);
1374
0
    pix1 = pixRead(FILE_TIFF);
1375
0
    pixEqual(pixc, pix1, &equal);
1376
0
    if (!equal) {
1377
0
        L_INFO("   **** bad tiff uncompressed image: d = %d ****\n",
1378
0
               __func__, d);
1379
0
        problems = TRUE;
1380
0
    }
1381
0
    pixDestroy(&pix1);
1382
1383
        /* tiff lzw works for all pixel depths */
1384
0
    L_INFO("write/read lzw compressed tiff\n", __func__);
1385
0
    pixWrite(FILE_LZW, pixc, IFF_TIFF_LZW);
1386
0
    pix1 = pixRead(FILE_LZW);
1387
0
    pixEqual(pixc, pix1, &equal);
1388
0
    if (!equal) {
1389
0
        L_INFO("   **** bad tiff lzw compressed image: d = %d ****\n",
1390
0
               __func__, d);
1391
0
        problems = TRUE;
1392
0
    }
1393
0
    pixDestroy(&pix1);
1394
1395
        /* tiff adobe deflate (zip) works for all pixel depths */
1396
0
    L_INFO("write/read zip compressed tiff\n", __func__);
1397
0
    pixWrite(FILE_ZIP, pixc, IFF_TIFF_ZIP);
1398
0
    pix1 = pixRead(FILE_ZIP);
1399
0
    pixEqual(pixc, pix1, &equal);
1400
0
    if (!equal) {
1401
0
        L_INFO("   **** bad tiff zip compressed image: d = %d ****\n",
1402
0
               __func__, d);
1403
0
        problems = TRUE;
1404
0
    }
1405
0
    pixDestroy(&pix1);
1406
1407
        /* tiff jpeg encoding works for grayscale and rgb */
1408
0
    if (d == 8 || d == 32) {
1409
0
        PIX  *pixc1;
1410
0
        L_INFO("write/read jpeg compressed tiff\n", __func__);
1411
0
        if (d == 8 && pixGetColormap(pixc)) {
1412
0
            pixc1 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC);
1413
0
            pixWrite(FILE_TIFF_JPEG, pixc1, IFF_TIFF_JPEG);
1414
0
            if ((pix1 = pixRead(FILE_TIFF_JPEG)) == NULL) {
1415
0
                L_INFO(" did not read FILE_TIFF_JPEG\n", __func__);
1416
0
                problems = TRUE;
1417
0
            }
1418
0
            pixDestroy(&pixc1);
1419
0
        } else {
1420
0
            pixWrite(FILE_TIFF_JPEG, pixc, IFF_TIFF_JPEG);
1421
0
            pix1 = pixRead(FILE_TIFF_JPEG);
1422
0
            if (d == 8) {
1423
0
                pixCompareGray(pix1, pixc, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1424
0
                               NULL, NULL);
1425
0
            } else {
1426
0
                pixCompareRGB(pix1, pixc, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1427
0
                              NULL, NULL);
1428
0
            }
1429
0
            if (diff > 8.0) {
1430
0
                L_INFO("   **** bad tiff jpeg compressed image: "
1431
0
                       "d = %d, diff = %5.2f ****\n", __func__, d, diff);
1432
0
                problems = TRUE;
1433
0
            }
1434
0
        }
1435
0
        pixDestroy(&pix1);
1436
0
    }
1437
1438
        /* tiff g4, g3, rle and packbits work for 1 bpp */
1439
0
    if (d == 1) {
1440
0
        L_INFO("write/read g4 compressed tiff\n", __func__);
1441
0
        pixWrite(FILE_G4, pixc, IFF_TIFF_G4);
1442
0
        pix1 = pixRead(FILE_G4);
1443
0
        pixEqual(pixc, pix1, &equal);
1444
0
        if (!equal) {
1445
0
            L_INFO("   **** bad tiff g4 image ****\n", __func__);
1446
0
            problems = TRUE;
1447
0
        }
1448
0
        pixDestroy(&pix1);
1449
1450
0
        L_INFO("write/read g3 compressed tiff\n", __func__);
1451
0
        pixWrite(FILE_G3, pixc, IFF_TIFF_G3);
1452
0
        pix1 = pixRead(FILE_G3);
1453
0
        pixEqual(pixc, pix1, &equal);
1454
0
        if (!equal) {
1455
0
            L_INFO("   **** bad tiff g3 image ****\n", __func__);
1456
0
            problems = TRUE;
1457
0
        }
1458
0
        pixDestroy(&pix1);
1459
1460
0
        L_INFO("write/read rle compressed tiff\n", __func__);
1461
0
        pixWrite(FILE_RLE, pixc, IFF_TIFF_RLE);
1462
0
        pix1 = pixRead(FILE_RLE);
1463
0
        pixEqual(pixc, pix1, &equal);
1464
0
        if (!equal) {
1465
0
            L_INFO("   **** bad tiff rle image: d = %d ****\n", __func__, d);
1466
0
            problems = TRUE;
1467
0
        }
1468
0
        pixDestroy(&pix1);
1469
1470
0
        L_INFO("write/read packbits compressed tiff\n", __func__);
1471
0
        pixWrite(FILE_PB, pixc, IFF_TIFF_PACKBITS);
1472
0
        pix1 = pixRead(FILE_PB);
1473
0
        pixEqual(pixc, pix1, &equal);
1474
0
        if (!equal) {
1475
0
            L_INFO("   **** bad tiff packbits image: d = %d ****\n",
1476
0
                   __func__, d);
1477
0
            problems = TRUE;
1478
0
        }
1479
0
        pixDestroy(&pix1);
1480
0
    }
1481
0
#endif  /* HAVE_LIBTIFF && HAVE_LIBJPEG */
1482
1483
        /* ----------------------- PNM -------------------------- */
1484
1485
        /* pnm works for 1, 2, 4, 8, 16 and 32 bpp.
1486
         * pnm doesn't have colormaps, so when we write colormapped
1487
         * pix out as pnm, the colormap is removed.  Thus for the test,
1488
         * we must remove the colormap from pixc before testing.  */
1489
0
    L_INFO("write/read pnm\n", __func__);
1490
0
    pixWrite(FILE_PNM, pixc, IFF_PNM);
1491
0
    pix1 = pixRead(FILE_PNM);
1492
0
    if (cmap)
1493
0
        pix2 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC);
1494
0
    else
1495
0
        pix2 = pixClone(pixc);
1496
0
    pixEqual(pix1, pix2, &equal);
1497
0
    if (!equal) {
1498
0
        L_INFO("   **** bad pnm image: d = %d ****\n", __func__, d);
1499
0
        problems = TRUE;
1500
0
    }
1501
0
    pixDestroy(&pix1);
1502
0
    pixDestroy(&pix2);
1503
1504
        /* ----------------------- GIF -------------------------- */
1505
#if HAVE_LIBGIF
1506
        /* GIF works for only 1 and 8 bpp, colormapped */
1507
    if (d != 8 || !cmap)
1508
        pix1 = pixConvertTo8(pixc, 1);
1509
    else
1510
        pix1 = pixClone(pixc);
1511
    L_INFO("write/read gif\n", __func__);
1512
    pixWrite(FILE_GIF, pix1, IFF_GIF);
1513
    pix2 = pixRead(FILE_GIF);
1514
    pixEqual(pix1, pix2, &equal);
1515
    if (!equal) {
1516
        L_INFO("   **** bad gif image: d = %d ****\n", __func__, d);
1517
        problems = TRUE;
1518
    }
1519
    pixDestroy(&pix1);
1520
    pixDestroy(&pix2);
1521
#endif  /* HAVE_LIBGIF */
1522
1523
        /* ----------------------- JPEG ------------------------- */
1524
0
#if HAVE_LIBJPEG
1525
        /* JPEG works for only 8 bpp gray and rgb */
1526
0
    if (cmap || d > 8)
1527
0
        pix1 = pixConvertTo32(pixc);
1528
0
    else
1529
0
        pix1 = pixConvertTo8(pixc, 0);
1530
0
    depth = pixGetDepth(pix1);
1531
0
    L_INFO("write/read jpeg\n", __func__);
1532
0
    pixWrite(FILE_JPG, pix1, IFF_JFIF_JPEG);
1533
0
    pix2 = pixRead(FILE_JPG);
1534
0
    if (depth == 8) {
1535
0
        pixCompareGray(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1536
0
                       NULL, NULL);
1537
0
    } else {
1538
0
        pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1539
0
                      NULL, NULL);
1540
0
    }
1541
0
    if (diff > 8.0) {
1542
0
        L_INFO("   **** bad jpeg image: d = %d, diff = %5.2f ****\n",
1543
0
               __func__, depth, diff);
1544
0
        problems = TRUE;
1545
0
    }
1546
0
    pixDestroy(&pix1);
1547
0
    pixDestroy(&pix2);
1548
0
#endif  /* HAVE_LIBJPEG */
1549
1550
        /* ----------------------- WEBP ------------------------- */
1551
0
#if HAVE_LIBWEBP
1552
        /* WEBP works for rgb and rgba */
1553
0
    if (cmap || d <= 16)
1554
0
        pix1 = pixConvertTo32(pixc);
1555
0
    else
1556
0
        pix1 = pixClone(pixc);
1557
0
    depth = pixGetDepth(pix1);
1558
0
    L_INFO("write/read webp\n", __func__);
1559
0
    pixWrite(FILE_WEBP, pix1, IFF_WEBP);
1560
0
    pix2 = pixRead(FILE_WEBP);
1561
0
    pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff, NULL, NULL);
1562
0
    if (diff > 5.0) {
1563
0
        L_INFO("   **** bad webp image: d = %d, diff = %5.2f ****\n",
1564
0
               __func__, depth, diff);
1565
0
        problems = TRUE;
1566
0
    }
1567
0
    pixDestroy(&pix1);
1568
0
    pixDestroy(&pix2);
1569
0
#endif  /* HAVE_LIBWEBP */
1570
1571
        /* ----------------------- JP2K ------------------------- */
1572
#if HAVE_LIBJP2K
1573
        /* JP2K works for only 8 bpp gray, rgb and rgba */
1574
    if (cmap || d > 8)
1575
        pix1 = pixConvertTo32(pixc);
1576
    else
1577
        pix1 = pixConvertTo8(pixc, 0);
1578
    depth = pixGetDepth(pix1);
1579
    L_INFO("write/read jp2k\n", __func__);
1580
    pixWrite(FILE_JP2K, pix1, IFF_JP2);
1581
    pix2 = pixRead(FILE_JP2K);
1582
    if (depth == 8) {
1583
        pixCompareGray(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1584
                       NULL, NULL);
1585
    } else {
1586
        pixCompareRGB(pix1, pix2, L_COMPARE_ABS_DIFF, 0, NULL, &diff,
1587
                      NULL, NULL);
1588
    }
1589
    lept_stderr("diff = %7.3f\n", diff);
1590
    if (diff > 7.0) {
1591
        L_INFO("   **** bad jp2k image: d = %d, diff = %5.2f ****\n",
1592
               __func__, depth, diff);
1593
        problems = TRUE;
1594
    }
1595
    pixDestroy(&pix1);
1596
    pixDestroy(&pix2);
1597
#endif  /* HAVE_LIBJP2K */
1598
1599
0
    if (problems == FALSE)
1600
0
        L_INFO("All formats read and written OK!\n", __func__);
1601
1602
0
    pixDestroy(&pixc);
1603
0
    pixDestroy(&pixs);
1604
0
    return problems;
1605
0
}