Coverage Report

Created: 2024-02-28 06:46

/src/leptonica/src/pdfio1.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file pdfio1.c
29
 * <pre>
30
 *
31
 *    Higher-level operations for generating pdf from images.
32
 *    Use poppler's pdftoppm or pdfimages to invert the process,
33
 *    extracting raster images from pdf.
34
 *
35
 *    |=============================================================|
36
 *    |                        Important notes                      |
37
 *    |=============================================================|
38
 *    | Some of these functions require I/O libraries such as       |
39
 *    | libtiff, libjpeg, libpng, libz and libopenjp2.  If you do   |
40
 *    | not have these libraries, some calls will fail.  For        |
41
 *    | example, if you do not have libopenjp2, you cannot write a  |
42
 *    | pdf where transcoding is required to incorporate a          |
43
 *    | jp2k image.                                                 |
44
 *    |                                                             |
45
 *    | You can manually deactivate all pdf writing by setting      |
46
 *    | this in environ.h:                                          |
47
 *    | \code                                                       |
48
 *    |      #define  USE_PDFIO     0                               |
49
 *    | \endcode                                                    |
50
 *    | This will link the stub file pdfiostub.c.                   |
51
 *    |=============================================================|
52
 *
53
 *     Set 1. These functions convert a set of image files
54
 *     to a multi-page pdf file, with one image on each page.
55
 *     All images are rendered at the same (input) resolution.
56
 *     The images can be specified as being in a directory, or they
57
 *     can be in an sarray.  The output pdf can be either a file
58
 *     or an array of bytes in memory.
59
 *
60
 *     Set 2. These functions are a special case of set 1, where
61
 *     no scaling or change in quality is required.  For jpeg, jp2k and
62
 *     tiffg4 images, the bytes in each file can be directly incorporated
63
 *     into the output pdf, and the wrapping up of multiple image
64
 *     files is very fast.  For non-interlaced png, the data bytes
65
 *     including the predictors can also be written directly into the
66
 *     flate pdf data.  For other image formats transcoding is required,
67
 *     where the image data is first decompressed and then flate (gzip),
68
 *     DCT (jpeg) or tiffg4 (1 bpp) encodings are generated.
69
 *
70
 *     Set 3. These functions convert a set of images in memory
71
 *     to a multi-page pdf, with one image on each page.  The pdf
72
 *     output can be either a file or an array of bytes in memory.
73
 *
74
 *     Set 4. These functions implement a pdf output "device driver"
75
 *     for wrapping (encoding) any number of images on a single page
76
 *     in pdf.  The input can be either an image file or a Pix;
77
 *     the pdf output can be either a file or an array of bytes in memory.
78
 *
79
 *     Set 5. These "segmented" functions take a set of image
80
 *     files, along with optional segmentation information, and
81
 *     generate a multi-page pdf file, where each page consists
82
 *     in general of a mixed raster pdf of image and non-image regions.
83
 *     The segmentation information for each page can be input as
84
 *     either a mask over the image parts, or as a Boxa of those
85
 *     regions.
86
 *
87
 *     Set 6. These "segmented" functions convert an image and
88
 *     an optional Boxa of image regions into a mixed raster pdf file
89
 *     for the page.  The input image can be either a file or a Pix.
90
 *
91
 *     Set 7. These functions take a set of single-page pdf files
92
 *     and concatenates it into a multi-page pdf.  The input can be
93
 *     a set of either single page pdf files or pdf 'strings' in memory.
94
 *     The output can be either a file or an array of bytes in memory.
95
 *
96
 *     The images in the pdf file can be rendered using a pdf viewer,
97
 *     such as evince, gv, xpdf or acroread.
98
 *
99
 *     Reference on the pdf file format:
100
 *         http://www.adobe.com/devnet/pdf/pdf_reference_archive.html
101
 *
102
 *     1. Convert specified image files to pdf (one image file per page)
103
 *          l_int32             convertFilesToPdf()
104
 *          l_int32             saConvertFilesToPdf()
105
 *          l_int32             saConvertFilesToPdfData()
106
 *          l_int32             selectDefaultPdfEncoding()
107
 *
108
 *     2. Convert specified image files to pdf without scaling
109
 *          l_int32             convertUnscaledFilesToPdf()
110
 *          l_int32             saConvertUnscaledFilesToPdf()
111
 *          l_int32             saConvertUnscaledFilesToPdfData()
112
 *          l_int32             convertUnscaledToPdfData()
113
 *
114
 *     3. Convert multiple images to pdf (one image per page)
115
 *          l_int32             pixaConvertToPdf()
116
 *          l_int32             pixaConvertToPdfData()
117
 *
118
 *     4. Single page, multi-image converters
119
 *          l_int32             convertToPdf()
120
 *          l_int32             convertImageDataToPdf()
121
 *          l_int32             convertToPdfData()
122
 *          l_int32             convertImageDataToPdfData()
123
 *          l_int32             pixConvertToPdf()
124
 *          l_int32             pixWriteStreamPdf()
125
 *          l_int32             pixWriteMemPdf()
126
 *
127
 *     5. Segmented multi-page, multi-image converter
128
 *          l_int32             convertSegmentedFilesToPdf()
129
 *          BOXAA              *convertNumberedMasksToBoxaa()
130
 *
131
 *     6. Segmented single page, multi-image converters
132
 *          l_int32             convertToPdfSegmented()
133
 *          l_int32             pixConvertToPdfSegmented()
134
 *          l_int32             convertToPdfDataSegmented()
135
 *          l_int32             pixConvertToPdfDataSegmented()
136
 *
137
 *     7. Multipage concatenation
138
 *          l_int32             concatenatePdf()
139
 *          l_int32             saConcatenatePdf()
140
 *          l_int32             ptraConcatenatePdf()
141
 *          l_int32             concatenatePdfToData()
142
 *          l_int32             saConcatenatePdfToData()
143
 *
144
 *     The top-level multi-image functions can be visualized as follows:
145
 *          Output pdf data to file:
146
 *             convertToPdf()  and  convertImageDataToPdf()
147
 *                     --> pixConvertToPdf()
148
 *                           --> pixConvertToPdfData()
149
 *
150
 *          Output pdf data to array in memory:
151
 *             convertToPdfData()  and  convertImageDataToPdfData()
152
 *                     --> pixConvertToPdfData()
153
 *
154
 *     The top-level segmented image functions can be visualized as follows:
155
 *          Output pdf data to file:
156
 *             convertToPdfSegmented()
157
 *                     --> pixConvertToPdfSegmented()
158
 *                           --> pixConvertToPdfDataSegmented()
159
 *
160
 *          Output pdf data to array in memory:
161
 *             convertToPdfDataSegmented()
162
 *                     --> pixConvertToPdfDataSegmented()
163
 *
164
 *     For multi-page concatenation, there are three different types of input
165
 *        (1) directory and optional filename filter
166
 *        (2) sarray of filenames
167
 *        (3) ptra of byte arrays of pdf data
168
 *     and two types of output for the concatenated pdf data
169
 *        (1) filename
170
 *        (2) data array and size
171
 *     High-level interfaces are given for each of the six combinations.
172
 *
173
 *     Note: When wrapping small images into pdf, it is useful to give
174
 *     them a relatively low resolution value, to avoid rounding errors
175
 *     when rendering the images.  For example, if you want an image
176
 *     of width w pixels to be 5 inches wide on a screen, choose a
177
 *     resolution w/5.
178
 *
179
 *     The very fast functions in section (2) require neither transcoding
180
 *     nor parsing of the compressed jpeg file.  With three types of image
181
 *     compression, the compressed strings can be incorporated into
182
 *     the pdf data without decompression and re-encoding: jpeg, jp2k
183
 *     and png.  The DCTDecode and JPXDecode filters can handle the
184
 *     entire jpeg and jp2k encoded string as a byte array in the pdf file.
185
 *     The FlateDecode filter can handle the png compressed image data,
186
 *     including predictors that occur as the first byte in each
187
 *     raster line, but it is necessary to store only the png IDAT chunk
188
 *     data in the pdf array.  The alternative for wrapping png images
189
 *     is to transcode them: uncompress into a raster (a pix) and then
190
 *     gzip the raster data.  This typically results in a larger pdf file
191
 *     because it doesn't use the two-dimensional png predictor.
192
 *     Colormaps, which are found in png PLTE chunks, must always be
193
 *     pulled out and included separately in the pdf.  For CCITT-G4
194
 *     compression, you can not simply include a tiff G4 file -- you must
195
 *     either parse it and extract the G4 compressed data within it,
196
 *     or uncompress to a raster and G4 compress again.
197
 * </pre>
198
 */
199
200
#ifdef HAVE_CONFIG_H
201
#include <config_auto.h>
202
#endif  /* HAVE_CONFIG_H */
203
204
#include <string.h>
205
#include <math.h>
206
#include "allheaders.h"
207
208
/* --------------------------------------------*/
209
#if  USE_PDFIO   /* defined in environ.h */
210
 /* --------------------------------------------*/
211
212
    /* Typical scan resolution in ppi (pixels/inch) */
213
static const l_int32  DefaultInputRes = 300;
214
215
/*---------------------------------------------------------------------*
216
 *    Convert specified image files to pdf (one image file per page)   *
217
 *---------------------------------------------------------------------*/
218
/*!
219
 * \brief   convertFilesToPdf()
220
 *
221
 * \param[in]    dirname       directory name containing images
222
 * \param[in]    substr        [optional] substring filter on filenames;
223
 *                             can be null
224
 * \param[in]    res           input resolution of all images
225
 * \param[in]    scalefactor   scaling factor applied to each image; > 0.0
226
 * \param[in]    type          encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
227
 *                             L_FLATE_ENCODE, L_JP2K_ENCODE or
228
 *                             L_DEFAULT_ENCODE for default)
229
 * \param[in]    quality       for jpeg: 1-100; 0 for default (75)
230
 *                             for jp2k: 27-45; 0 for default (34)
231
 * \param[in]    title         [optional] pdf title; can be null
232
 * \param[in]    fileout       pdf file of all images
233
 * \return  0 if OK, 1 on error
234
 *
235
 * <pre>
236
 * Notes:
237
 *      (1) If %substr is not NULL, only image filenames that contain
238
 *          the substring can be used.  If %substr == NULL, all files
239
 *          in the directory are used.
240
 *      (2) The files in the directory, after optional filtering by
241
 *          the substring, are lexically sorted in increasing order
242
 *          before concatenation.
243
 *      (3) The scalefactor is applied to each image before encoding.
244
 *          If you enter a value <= 0.0, it will be set to 1.0.
245
 *      (4) Specifying one of the four encoding types for %type forces
246
 *          all images to be compressed with that type.  Use 0 to have
247
 *          the type determined for each image based on depth and whether
248
 *          or not it has a colormap.
249
 * </pre>
250
 */
251
l_ok
252
convertFilesToPdf(const char  *dirname,
253
                  const char  *substr,
254
                  l_int32      res,
255
                  l_float32    scalefactor,
256
                  l_int32      type,
257
                  l_int32      quality,
258
                  const char  *title,
259
                  const char  *fileout)
260
0
{
261
0
l_int32  ret;
262
0
SARRAY  *sa;
263
264
0
    if (!dirname)
265
0
        return ERROR_INT("dirname not defined", __func__, 1);
266
0
    if (!fileout)
267
0
        return ERROR_INT("fileout not defined", __func__, 1);
268
269
0
    if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
270
0
        return ERROR_INT("sa not made", __func__, 1);
271
0
    ret = saConvertFilesToPdf(sa, res, scalefactor, type, quality,
272
0
                              title, fileout);
273
0
    sarrayDestroy(&sa);
274
0
    return ret;
275
0
}
276
277
278
/*!
279
 * \brief   saConvertFilesToPdf()
280
 *
281
 * \param[in]    sa            string array of pathnames for images
282
 * \param[in]    res           input resolution of all images
283
 * \param[in]    scalefactor   scaling factor applied to each image; > 0.0
284
 * \param[in]    type          encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
285
 *                             L_FLATE_ENCODE, L_JP2K_ENCODE or
286
 *                             L_DEFAULT_ENCODE for default)
287
 * \param[in]    quality       for jpeg: 1-100; 0 for default (75)
288
 *                             for jp2k: 27-45; 0 for default (34)
289
 * \param[in]    title         [optional] pdf title; can be null
290
 * \param[in]    fileout       pdf file of all images
291
 * \return  0 if OK, 1 on error
292
 *
293
 * <pre>
294
 * Notes:
295
 *      (1) See convertFilesToPdf().
296
 * </pre>
297
 */
298
l_ok
299
saConvertFilesToPdf(SARRAY      *sa,
300
                    l_int32      res,
301
                    l_float32    scalefactor,
302
                    l_int32      type,
303
                    l_int32      quality,
304
                    const char  *title,
305
                    const char  *fileout)
306
0
{
307
0
l_uint8  *data;
308
0
l_int32   ret;
309
0
size_t    nbytes;
310
311
0
    if (!sa)
312
0
        return ERROR_INT("sa not defined", __func__, 1);
313
314
0
    ret = saConvertFilesToPdfData(sa, res, scalefactor, type, quality,
315
0
                                  title, &data, &nbytes);
316
0
    if (ret) {
317
0
        if (data) LEPT_FREE(data);
318
0
        return ERROR_INT("pdf data not made", __func__, 1);
319
0
    }
320
321
0
    ret = l_binaryWrite(fileout, "w", data, nbytes);
322
0
    LEPT_FREE(data);
323
0
    if (ret)
324
0
        L_ERROR("pdf data not written to file\n", __func__);
325
0
    return ret;
326
0
}
327
328
329
/*!
330
 * \brief   saConvertFilesToPdfData()
331
 *
332
 * \param[in]    sa            string array of pathnames for images
333
 * \param[in]    res           input resolution of all images
334
 * \param[in]    scalefactor   scaling factor applied to each image; > 0.0
335
 * \param[in]    type          encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
336
 *                             L_FLATE_ENCODE, L_JP2K_ENCODE or
337
 *                             L_DEFAULT_ENCODE for default)
338
 * \param[in]    quality       for jpeg: 1-100; 0 for default (75)
339
 *                             for jp2k: 27-45; 0 for default (34)
340
 * \param[in]    title         [optional] pdf title; can be null
341
 * \param[out]   pdata         output pdf data (of all images
342
 * \param[out]   pnbytes       size of output pdf data
343
 * \return  0 if OK, 1 on error
344
 *
345
 * <pre>
346
 * Notes:
347
 *      (1) See convertFilesToPdf().
348
 * </pre>
349
 */
350
l_ok
351
saConvertFilesToPdfData(SARRAY      *sa,
352
                        l_int32      res,
353
                        l_float32    scalefactor,
354
                        l_int32      type,
355
                        l_int32      quality,
356
                        const char  *title,
357
                        l_uint8    **pdata,
358
                        size_t      *pnbytes)
359
0
{
360
0
char     *fname;
361
0
l_uint8  *imdata;
362
0
l_int32   i, n, ret, pagetype, npages, scaledres;
363
0
size_t    imbytes;
364
0
L_BYTEA  *ba;
365
0
PIX      *pixs, *pix;
366
0
L_PTRA   *pa_data;
367
368
0
    if (!pdata)
369
0
        return ERROR_INT("&data not defined", __func__, 1);
370
0
    *pdata = NULL;
371
0
    if (!pnbytes)
372
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
373
0
    *pnbytes = 0;
374
0
    if (!sa)
375
0
        return ERROR_INT("sa not defined", __func__, 1);
376
0
    if (scalefactor <= 0.0) scalefactor = 1.0;
377
0
    if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
378
0
        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
379
0
        type = L_DEFAULT_ENCODE;
380
0
    }
381
382
        /* Generate all the encoded pdf strings */
383
0
    n = sarrayGetCount(sa);
384
0
    pa_data = ptraCreate(n);
385
0
    for (i = 0; i < n; i++) {
386
0
        if (i && (i % 10 == 0)) lept_stderr(".. %d ", i);
387
0
        fname = sarrayGetString(sa, i, L_NOCOPY);
388
0
        if ((pixs = pixRead(fname)) == NULL) {
389
0
            L_ERROR("image not readable from file %s\n", __func__, fname);
390
0
            continue;
391
0
        }
392
0
        if (scalefactor != 1.0)
393
0
            pix = pixScale(pixs, scalefactor, scalefactor);
394
0
        else
395
0
            pix = pixClone(pixs);
396
0
        pixDestroy(&pixs);
397
0
        scaledres = (l_int32)(res * scalefactor);
398
399
            /* Select the encoding type */
400
0
        if (type != L_DEFAULT_ENCODE) {
401
0
            pagetype = type;
402
0
        } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) {
403
0
            pixDestroy(&pix);
404
0
            L_ERROR("encoding type selection failed for file %s\n",
405
0
                    __func__, fname);
406
0
            continue;
407
0
        }
408
409
0
        ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes,
410
0
                                  0, 0, scaledres, title, NULL, 0);
411
0
        pixDestroy(&pix);
412
0
        if (ret) {
413
0
            LEPT_FREE(imdata);
414
0
            L_ERROR("pdf encoding failed for %s\n", __func__, fname);
415
0
            continue;
416
0
        }
417
0
        ba = l_byteaInitFromMem(imdata, imbytes);
418
0
        LEPT_FREE(imdata);
419
0
        ptraAdd(pa_data, ba);
420
0
    }
421
0
    ptraGetActualCount(pa_data, &npages);
422
0
    if (npages == 0) {
423
0
        L_ERROR("no pdf files made\n", __func__);
424
0
        ptraDestroy(&pa_data, FALSE, FALSE);
425
0
        return 1;
426
0
    }
427
428
        /* Concatenate them */
429
0
    lept_stderr("\nconcatenating ... ");
430
0
    ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
431
0
    lept_stderr("done\n");
432
433
0
    ptraGetActualCount(pa_data, &npages);  /* recalculate in case it changes */
434
0
    for (i = 0; i < npages; i++) {
435
0
        ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
436
0
        l_byteaDestroy(&ba);
437
0
    }
438
0
    ptraDestroy(&pa_data, FALSE, FALSE);
439
0
    return ret;
440
0
}
441
442
443
/*!
444
 * \brief   selectDefaultPdfEncoding()
445
 *
446
 * \param[in]    pix
447
 * \param[out]   ptype     L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE
448
 * \return  0 if OK, 1 on error
449
 *
450
 * <pre>
451
 * Notes:
452
 *      (1) This attempts to choose an encoding for the pix that results
453
 *          in the smallest file, assuming that if jpeg encoded, it will
454
 *          use quality = 75.  The decision is approximate, in that
455
 *          (a) all colormapped images will be losslessly encoded with
456
 *          gzip (flate), and (b) an image with less than about 20 colors
457
 *          is likely to be smaller if flate encoded than if encoded
458
 *          as a jpeg (dct).  For example, an image made by pixScaleToGray3()
459
 *          will have 10 colors, and flate encoding will give about
460
 *          twice the compression as jpeg with quality = 75.
461
 *      (2) We could have used L_JP2K_ENCODE instead of L_JPEG_ENCODE.
462
 *          However, the jp2k compression is not much better than jpeg, and
463
 *          the jpeg library is more commonly available than the jp2k library.
464
 * </pre>
465
 */
466
l_ok
467
selectDefaultPdfEncoding(PIX      *pix,
468
                         l_int32  *ptype)
469
0
{
470
0
l_int32   w, h, d, factor, ncolors;
471
0
PIXCMAP  *cmap;
472
473
0
    if (!ptype)
474
0
        return ERROR_INT("&type not defined", __func__, 1);
475
0
    *ptype = L_FLATE_ENCODE;  /* default universal encoding */
476
0
    if (!pix)
477
0
        return ERROR_INT("pix not defined", __func__, 1);
478
0
    pixGetDimensions(pix, &w, &h, &d);
479
0
    cmap = pixGetColormap(pix);
480
0
    if (d == 8 && !cmap) {
481
0
        factor = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 20000.));
482
0
        pixNumColors(pix, factor, &ncolors);
483
0
        if (ncolors < 20)
484
0
            *ptype = L_FLATE_ENCODE;
485
0
        else
486
0
            *ptype = L_JPEG_ENCODE;
487
0
    } else if (d == 1) {
488
0
        *ptype = L_G4_ENCODE;
489
0
    } else if (cmap || d == 2 || d == 4) {
490
0
        *ptype = L_FLATE_ENCODE;
491
0
    } else if (d == 8 || d == 32) {
492
0
        *ptype = L_JPEG_ENCODE;
493
0
    } else if (d == 16) {
494
0
        *ptype = L_FLATE_ENCODE;
495
0
    } else {
496
0
        return ERROR_INT("type selection failure", __func__, 1);
497
0
    }
498
499
0
    return 0;
500
0
}
501
502
503
/*---------------------------------------------------------------------*
504
 *          Convert specified image files to pdf without scaling       *
505
 *---------------------------------------------------------------------*/
506
/*!
507
 * \brief   convertUnscaledFilesToPdf()
508
 *
509
 * \param[in]    dirname   directory name containing images
510
 * \param[in]    substr    [optional] substring filter on filenames;
511
 *                         can be null
512
 * \param[in]    title     [optional] pdf title; can be null
513
 * \param[in]    fileout   pdf file of all images
514
 * \return  0 if OK, 1 on error
515
 *
516
 * <pre>
517
 * Notes:
518
 *      (1) If %substr is not NULL, only image filenames that contain
519
 *          the substring can be used.  If %substr == NULL, all files
520
 *          in the directory are used.
521
 *      (2) The files in the directory, after optional filtering by
522
 *          the substring, are lexically sorted in increasing order
523
 *          before concatenation.
524
 *      (3) This is very fast for jpeg, jp2k and some png files,
525
 *          because the compressed data is wrapped up and concatenated.
526
 *          For other types of png, the images must be read and recompressed.
527
 * </pre>
528
 */
529
l_ok
530
convertUnscaledFilesToPdf(const char  *dirname,
531
                          const char  *substr,
532
                          const char  *title,
533
                          const char  *fileout)
534
0
{
535
0
l_int32  ret;
536
0
SARRAY  *sa;
537
538
0
    if (!dirname)
539
0
        return ERROR_INT("dirname not defined", __func__, 1);
540
0
    if (!fileout)
541
0
        return ERROR_INT("fileout not defined", __func__, 1);
542
543
0
    if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
544
0
        return ERROR_INT("sa not made", __func__, 1);
545
0
    ret = saConvertUnscaledFilesToPdf(sa, title, fileout);
546
0
    sarrayDestroy(&sa);
547
0
    return ret;
548
0
}
549
550
551
/*!
552
 * \brief   saConvertUnscaledFilesToPdf()
553
 *
554
 * \param[in]    sa        string array of pathnames for images
555
 * \param[in]    title     [optional] pdf title; can be null
556
 * \param[in]    fileout   pdf file of all images
557
 * \return  0 if OK, 1 on error
558
 *
559
 * <pre>
560
 * Notes:
561
 *      (1) See convertUnscaledFilesToPdf().
562
 * </pre>
563
 */
564
l_ok
565
saConvertUnscaledFilesToPdf(SARRAY      *sa,
566
                            const char  *title,
567
                            const char  *fileout)
568
0
{
569
0
l_uint8  *data;
570
0
l_int32   ret;
571
0
size_t    nbytes;
572
573
0
    if (!sa)
574
0
        return ERROR_INT("sa not defined", __func__, 1);
575
576
0
    ret = saConvertUnscaledFilesToPdfData(sa, title, &data, &nbytes);
577
0
    if (ret) {
578
0
        if (data) LEPT_FREE(data);
579
0
        return ERROR_INT("pdf data not made", __func__, 1);
580
0
    }
581
582
0
    ret = l_binaryWrite(fileout, "w", data, nbytes);
583
0
    LEPT_FREE(data);
584
0
    if (ret)
585
0
        L_ERROR("pdf data not written to file\n", __func__);
586
0
    return ret;
587
0
}
588
589
590
/*!
591
 * \brief   saConvertUnscaledFilesToPdfData()
592
 *
593
 * \param[in]    sa        string array of pathnames for image files
594
 * \param[in]    title     [optional] pdf title; can be null
595
 * \param[out]   pdata     output pdf data (of all images)
596
 * \param[out]   pnbytes   size of output pdf data
597
 * \return  0 if OK, 1 on error
598
 *
599
 * <pre>
600
 * Notes:
601
 *      (1) This is very fast for jpeg, jp2k and some png files,
602
 *          because the compressed data is wrapped up and concatenated.
603
 *          For other types of png, the images must be read and recompressed.
604
 * </pre>
605
 */
606
l_ok
607
saConvertUnscaledFilesToPdfData(SARRAY      *sa,
608
                                const char  *title,
609
                                l_uint8    **pdata,
610
                                size_t      *pnbytes)
611
0
{
612
0
char         *fname;
613
0
l_uint8      *imdata;
614
0
l_int32       i, n, ret, npages;
615
0
size_t        imbytes;
616
0
L_BYTEA      *ba;
617
0
L_PTRA       *pa_data;
618
619
0
    if (!pdata)
620
0
        return ERROR_INT("&data not defined", __func__, 1);
621
0
    *pdata = NULL;
622
0
    if (!pnbytes)
623
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
624
0
    *pnbytes = 0;
625
0
    if (!sa)
626
0
        return ERROR_INT("sa not defined", __func__, 1);
627
628
        /* Generate all the encoded pdf strings */
629
0
    n = sarrayGetCount(sa);
630
0
    pa_data = ptraCreate(n);
631
0
    for (i = 0; i < n; i++) {
632
0
        if (i && (i % 10 == 0)) lept_stderr(".. %d ", i);
633
0
        fname = sarrayGetString(sa, i, L_NOCOPY);
634
635
            /* Generate the pdf data */
636
0
        if (convertUnscaledToPdfData(fname, title, &imdata, &imbytes))
637
0
            continue;
638
639
            /* ... and add it to the array of single page data */
640
0
        ba = l_byteaInitFromMem(imdata, imbytes);
641
0
        if (imdata) LEPT_FREE(imdata);
642
0
        ptraAdd(pa_data, ba);
643
0
    }
644
0
    ptraGetActualCount(pa_data, &npages);
645
0
    if (npages == 0) {
646
0
        L_ERROR("no pdf files made\n", __func__);
647
0
        ptraDestroy(&pa_data, FALSE, FALSE);
648
0
        return 1;
649
0
    }
650
651
        /* Concatenate to generate a multipage pdf */
652
0
    lept_stderr("\nconcatenating ... ");
653
0
    ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
654
0
    lept_stderr("done\n");
655
656
        /* Clean up */
657
0
    ptraGetActualCount(pa_data, &npages);  /* maybe failed to read some files */
658
0
    for (i = 0; i < npages; i++) {
659
0
        ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
660
0
        l_byteaDestroy(&ba);
661
0
    }
662
0
    ptraDestroy(&pa_data, FALSE, FALSE);
663
0
    return ret;
664
0
}
665
666
667
/*!
668
 * \brief   convertUnscaledToPdfData()
669
 *
670
 * \param[in]    fname      of image file in all formats
671
 * \param[in]    title      [optional] pdf title; can be null
672
 * \param[out]   pdata      output pdf data for image
673
 * \param[out]   pnbytes    size of output pdf data
674
 * \return  0 if OK, 1 on error
675
 *
676
 * <pre>
677
 * Notes:
678
 *      (1) This is very fast for jpeg, jp2k and some png files,
679
 *          because the compressed data is wrapped up and concatenated.
680
 *          For other types of png, the images must be read and recompressed.
681
 * </pre>
682
 */
683
l_ok
684
convertUnscaledToPdfData(const char  *fname,
685
                         const char  *title,
686
                         l_uint8    **pdata,
687
                         size_t      *pnbytes)
688
0
{
689
0
l_int32       format;
690
0
L_COMP_DATA  *cid;
691
692
0
    if (!pdata)
693
0
        return ERROR_INT("&data not defined", __func__, 1);
694
0
    *pdata = NULL;
695
0
    if (!pnbytes)
696
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
697
0
    *pnbytes = 0;
698
0
    if (!fname)
699
0
        return ERROR_INT("fname not defined", __func__, 1);
700
701
0
    findFileFormat(fname, &format);
702
0
    if (format == IFF_UNKNOWN) {
703
0
        L_WARNING("file %s format is unknown; skip\n", __func__, fname);
704
0
        return 1;
705
0
    }
706
0
    if (format == IFF_PS || format == IFF_LPDF) {
707
0
        L_WARNING("file %s format is %d; skip\n", __func__, fname, format);
708
0
        return 1;
709
0
    }
710
711
        /* Generate the image data required for pdf generation, always
712
         * in binary (not ascii85) coding.  Note that jpeg, jp2k and some
713
         * png files are not transcoded.  */
714
0
    l_generateCIDataForPdf(fname, NULL, 0, &cid);
715
0
    if (!cid) {
716
0
        L_ERROR("file %s format is %d; unreadable\n", __func__, fname, format);
717
0
        return 1;
718
0
    }
719
720
        /* Generate the pdf string for this page (image).  This destroys
721
         * the cid by attaching it to an lpd and destroying the lpd. */
722
0
    cidConvertToPdfData(cid, title, pdata, pnbytes);
723
0
    return 0;
724
0
}
725
726
727
/*---------------------------------------------------------------------*
728
 *          Convert multiple images to pdf (one image per page)        *
729
 *---------------------------------------------------------------------*/
730
/*!
731
 * \brief   pixaConvertToPdf()
732
 *
733
 * \param[in]    pixa          containing images all at the same resolution
734
 * \param[in]    res           override the resolution of each input image,
735
 *                             in ppi; use 0 to respect the resolution
736
 *                             embedded in the input images
737
 * \param[in]    scalefactor   scaling factor applied to each image; > 0.0
738
 * \param[in]    type          encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
739
 *                             L_FLATE_ENCODE, L_JP2K_ENCODE, or
740
 *                             L_DEFAULT_ENCODE for default)
741
 * \param[in]    quality       for jpeg: 1-100; 0 for default (75)
742
 *                             for jp2k: 27-45; 0 for default (34)
743
 * \param[in]    title         [optional] pdf title; can be null
744
 * \param[in]    fileout       pdf file of all images
745
 * \return  0 if OK, 1 on error
746
 *
747
 * <pre>
748
 * Notes:
749
 *      (1) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
750
 *          colormap and many colors, or 32 bpp; FLATE for anything else.
751
 *      (2) The scalefactor must be > 0.0; otherwise it is set to 1.0.
752
 *      (3) Specifying one of the three encoding types for %type forces
753
 *          all images to be compressed with that type.  Use 0 to have
754
 *          the type determined for each image based on depth and whether
755
 *          or not it has a colormap.
756
 * </pre>
757
 */
758
l_ok
759
pixaConvertToPdf(PIXA        *pixa,
760
                 l_int32      res,
761
                 l_float32    scalefactor,
762
                 l_int32      type,
763
                 l_int32      quality,
764
                 const char  *title,
765
                 const char  *fileout)
766
0
{
767
0
l_uint8  *data;
768
0
l_int32   ret;
769
0
size_t    nbytes;
770
771
0
    if (!pixa)
772
0
        return ERROR_INT("pixa not defined", __func__, 1);
773
774
0
    ret = pixaConvertToPdfData(pixa, res, scalefactor, type, quality,
775
0
                               title, &data, &nbytes);
776
0
    if (ret) {
777
0
        LEPT_FREE(data);
778
0
        return ERROR_INT("conversion to pdf failed", __func__, 1);
779
0
    }
780
781
0
    ret = l_binaryWrite(fileout, "w", data, nbytes);
782
0
    LEPT_FREE(data);
783
0
    if (ret)
784
0
        L_ERROR("pdf data not written to file\n", __func__);
785
0
    return ret;
786
0
}
787
788
789
/*!
790
 * \brief   pixaConvertToPdfData()
791
 *
792
 * \param[in]    pixa           containing images all at the same resolution
793
 * \param[in]    res            input resolution of all images
794
 * \param[in]    scalefactor    scaling factor applied to each image; > 0.0; <50
795
 * \param[in]    type           encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
796
 *                              L_FLATE_ENCODE, L_JP2K_ENCODE, or
797
 *                              L_DEFAULT_ENCODE for default)
798
 * \param[in]    quality        for jpeg: 1-100; 0 for default (75)
799
 *                              for jp2k: 27-45; 0 for default (34)
800
 * \param[in]    title          [optional] pdf title; can be null
801
 * \param[out]   pdata          output pdf data of all images
802
 * \param[out]   pnbytes        size of output pdf data
803
 * \return  0 if OK, 1 on error
804
 *
805
 * <pre>
806
 * Notes:
807
 *      (1) See pixaConvertToPdf().
808
 * </pre>
809
 */
810
l_ok
811
pixaConvertToPdfData(PIXA        *pixa,
812
                     l_int32      res,
813
                     l_float32    scalefactor,
814
                     l_int32      type,
815
                     l_int32      quality,
816
                     const char  *title,
817
                     l_uint8    **pdata,
818
                     size_t      *pnbytes)
819
0
{
820
0
l_uint8  *imdata;
821
0
l_int32   i, n, ret, scaledres, pagetype;
822
0
size_t    imbytes;
823
0
L_BYTEA  *ba;
824
0
PIX      *pixs, *pix;
825
0
L_PTRA   *pa_data;
826
827
0
    if (!pdata)
828
0
        return ERROR_INT("&data not defined", __func__, 1);
829
0
    *pdata = NULL;
830
0
    if (!pnbytes)
831
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
832
0
    *pnbytes = 0;
833
0
    if (!pixa)
834
0
        return ERROR_INT("pixa not defined", __func__, 1);
835
0
    if (scalefactor <= 0.0) scalefactor = 1.0;
836
0
    if (scalefactor >= 50.0)
837
0
        return ERROR_INT("scalefactor too large", __func__, 1);
838
0
    if (type != L_DEFAULT_ENCODE && type != L_JPEG_ENCODE &&
839
0
        type != L_G4_ENCODE && type != L_FLATE_ENCODE &&
840
0
        type != L_JP2K_ENCODE) {
841
0
        L_WARNING("invalid compression type; using per-page default\n",
842
0
                  __func__);
843
0
        type = L_DEFAULT_ENCODE;
844
0
    }
845
0
    if (quality < 0 || quality > 100)
846
0
        return ERROR_INT("invalid quality", __func__, 1);
847
848
        /* Generate all the encoded pdf strings */
849
0
    n = pixaGetCount(pixa);
850
0
    pa_data = ptraCreate(n);
851
0
    for (i = 0; i < n; i++) {
852
0
        if ((pixs = pixaGetPix(pixa, i, L_CLONE)) == NULL) {
853
0
            L_ERROR("pixs[%d] not retrieved\n", __func__, i);
854
0
            continue;
855
0
        }
856
0
        if (scalefactor != 1.0)
857
0
            pix = pixScale(pixs, scalefactor, scalefactor);
858
0
        else
859
0
            pix = pixClone(pixs);
860
0
        pixDestroy(&pixs);
861
0
        if (!pix) {
862
0
            L_ERROR("pix[%d] not made\n", __func__, i);
863
0
            continue;
864
0
        }
865
0
        scaledres = (l_int32)(res * scalefactor);
866
867
            /* Select the encoding type */
868
0
        if (type != L_DEFAULT_ENCODE) {
869
0
            pagetype = type;
870
0
        } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) {
871
0
            L_ERROR("encoding type selection failed for pix[%d]\n",
872
0
                        __func__, i);
873
0
            pixDestroy(&pix);
874
0
            continue;
875
0
        }
876
877
0
        ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes,
878
0
                                  0, 0, scaledres, title, NULL, 0);
879
0
        pixDestroy(&pix);
880
0
        if (ret) {
881
0
            LEPT_FREE(imdata);
882
0
            L_ERROR("pdf encoding failed for pix[%d]\n", __func__, i);
883
0
            continue;
884
0
        }
885
0
        ba = l_byteaInitFromMem(imdata, imbytes);
886
0
        LEPT_FREE(imdata);
887
0
        ptraAdd(pa_data, ba);
888
0
    }
889
0
    ptraGetActualCount(pa_data, &n);
890
0
    if (n == 0) {
891
0
        L_ERROR("no pdf files made\n", __func__);
892
0
        ptraDestroy(&pa_data, FALSE, FALSE);
893
0
        return 1;
894
0
    }
895
896
        /* Concatenate them */
897
0
    ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
898
899
0
    ptraGetActualCount(pa_data, &n);  /* recalculate in case it changes */
900
0
    for (i = 0; i < n; i++) {
901
0
        ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
902
0
        l_byteaDestroy(&ba);
903
0
    }
904
0
    ptraDestroy(&pa_data, FALSE, FALSE);
905
0
    return ret;
906
0
}
907
908
909
/*---------------------------------------------------------------------*
910
 *                Single page, multi-image converters                  *
911
 *---------------------------------------------------------------------*/
912
/*!
913
 * \brief   convertToPdf()
914
 *
915
 * \param[in]      filein       input image file -- any format
916
 * \param[in]      type         encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
917
 *                              L_FLATE_ENCODE, or L_JP2K_ENCODE)
918
 * \param[in]      quality      for jpeg: 1-100; 0 for default (75)
919
 *                              for jp2k: 27-45; 0 for default (34)
920
 * \param[in]      fileout      output pdf file; only required on last
921
 *                              image on page
922
 * \param[in]      x, y         location of lower-left corner of image,
923
 *                              in pixels, relative to the PostScript origin
924
 *                              (0,0) at the lower-left corner of the page
925
 * \param[in]      res          override the resolution of the input image,
926
 *                              in ppi; use 0 to respect the resolution
927
 *                              embedded in the input images
928
 * \param[in]      title        [optional] pdf title; can be null
929
 * \param[in,out]  plpd         ptr to lpd, which is created on the first
930
 *                              invocation and returned until last image is
931
 *                              processed, at which time it is destroyed
932
 * \param[in]      position     in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
933
 *                              L_LAST_IMAGE
934
 * \return  0 if OK, 1 on error
935
 *
936
 * <pre>
937
 * Notes:
938
 *      (1) To wrap only one image in pdf, input %plpd = NULL, and
939
 *          the value of %position will be ignored:
940
 *            convertToPdf(...  type, quality, x, y, res, NULL, 0);
941
 *      (2) To wrap multiple images on a single pdf page, this is called
942
 *          once for each successive image.  Do it this way:
943
 *            L_PDF_DATA   *lpd;
944
 *            convertToPdf(...  type, quality, x, y, res, &lpd, L_FIRST_IMAGE);
945
 *            convertToPdf(...  type, quality, x, y, res, &lpd, L_NEXT_IMAGE);
946
 *            ...
947
 *            convertToPdf(...  type, quality, x, y, res, &lpd, L_LAST_IMAGE);
948
 *          This will write the result to the value of %fileout specified
949
 *          in the first call; succeeding values of %fileout are ignored.
950
 *          On the last call: the pdf data bytes are computed and written
951
 *          to %fileout, lpd is destroyed internally, and the returned
952
 *          value of lpd is null.  So the client has nothing to clean up.
953
 *      (3) (a) Set %res == 0 to respect the resolution embedded in the
954
 *              image file.  If no resolution is embedded, it will be set
955
 *              to the default value.
956
 *          (b) Set %res to some other value to override the file resolution.
957
 *      (4) (a) If the input %res and the resolution of the output device
958
 *              are equal, the image will be "displayed" at the same size
959
 *              as the original.
960
 *          (b) If the input %res is 72, the output device will render
961
 *              the image at 1 pt/pixel.
962
 *          (c) Some possible choices for the default input pix resolution are:
963
 *                 72 ppi     Render pix on any output device at one pt/pixel
964
 *                 96 ppi     Windows default for generated display images
965
 *                300 ppi     Typical default for scanned images.
966
 *              We choose 300, which is sensible for rendering page images.
967
 *              However,  images come from a variety of sources, and
968
 *              some are explicitly created for viewing on a display.
969
 * </pre>
970
 */
971
l_ok
972
convertToPdf(const char   *filein,
973
             l_int32       type,
974
             l_int32       quality,
975
             const char   *fileout,
976
             l_int32       x,
977
             l_int32       y,
978
             l_int32       res,
979
             const char   *title,
980
             L_PDF_DATA  **plpd,
981
             l_int32       position)
982
0
{
983
0
l_uint8  *data;
984
0
l_int32   ret;
985
0
size_t    nbytes;
986
987
0
    if (!filein)
988
0
        return ERROR_INT("filein not defined", __func__, 1);
989
0
    if (!plpd || (position == L_LAST_IMAGE)) {
990
0
        if (!fileout)
991
0
            return ERROR_INT("fileout not defined", __func__, 1);
992
0
    }
993
994
0
    if (convertToPdfData(filein, type, quality, &data, &nbytes, x, y,
995
0
                         res, title, plpd, position))
996
0
        return ERROR_INT("pdf data not made", __func__, 1);
997
998
0
    if (!plpd || (position == L_LAST_IMAGE)) {
999
0
        ret = l_binaryWrite(fileout, "w", data, nbytes);
1000
0
        LEPT_FREE(data);
1001
0
        if (ret)
1002
0
            return ERROR_INT("pdf data not written to file", __func__, 1);
1003
0
    }
1004
1005
0
    return 0;
1006
0
}
1007
1008
1009
/*!
1010
 * \brief   convertImageDataToPdf()
1011
 *
1012
 * \param[in]      imdata       array of formatted image data; e.g., png, jpeg
1013
 * \param[in]      size         size of image data
1014
 * \param[in]      type         encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
1015
 *                              L_FLATE_ENCODE, or L_JP2K_ENCODE)
1016
 * \param[in]      quality      for jpeg: 1-100; 0 for default (75)
1017
 *                              for jp2k: 27-45; 0 for default (34)
1018
 * \param[in]      fileout      output pdf file; only required on last
1019
 *                              image on page
1020
 * \param[in]      x, y         location of lower-left corner of image,
1021
 *                              in pixels, relative to the PostScript origin
1022
 *                              (0,0) at the lower-left corner of the page
1023
 * \param[in]      res          override the resolution of the input image,
1024
 *                              in ppi; use 0 to respect the resolution
1025
 *                              embedded in the input images
1026
 * \param[in]      title        [optional] pdf title; can be null
1027
 * \param[in,out]  plpd         ptr to lpd, which is created on the first
1028
 *                              invocation and returned until last image is
1029
 *                              processed, at which time it is destroyed
1030
 * \param[in]      position     in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
1031
 *                              L_LAST_IMAGE
1032
 * \return  0 if OK, 1 on error
1033
 *
1034
 * <pre>
1035
 * Notes:
1036
 *      (1) If %res == 0 and the input resolution field is 0,
1037
 *          this will use DefaultInputRes.
1038
 *      (2) See comments in convertToPdf().
1039
 * </pre>
1040
 */
1041
l_ok
1042
convertImageDataToPdf(l_uint8      *imdata,
1043
                      size_t        size,
1044
                      l_int32       type,
1045
                      l_int32       quality,
1046
                      const char   *fileout,
1047
                      l_int32       x,
1048
                      l_int32       y,
1049
                      l_int32       res,
1050
                      const char   *title,
1051
                      L_PDF_DATA  **plpd,
1052
                      l_int32       position)
1053
0
{
1054
0
l_int32  ret;
1055
0
PIX     *pix;
1056
1057
0
    if (!imdata)
1058
0
        return ERROR_INT("image data not defined", __func__, 1);
1059
0
    if (!plpd || (position == L_LAST_IMAGE)) {
1060
0
        if (!fileout)
1061
0
            return ERROR_INT("fileout not defined", __func__, 1);
1062
0
    }
1063
1064
0
    if ((pix = pixReadMem(imdata, size)) == NULL)
1065
0
        return ERROR_INT("pix not read", __func__, 1);
1066
0
    if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
1067
0
        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
1068
0
        selectDefaultPdfEncoding(pix, &type);
1069
0
    }
1070
0
    ret = pixConvertToPdf(pix, type, quality, fileout, x, y, res,
1071
0
                          title, plpd, position);
1072
0
    pixDestroy(&pix);
1073
0
    return ret;
1074
0
}
1075
1076
1077
/*!
1078
 * \brief   convertToPdfData()
1079
 *
1080
 * \param[in]      filein       input image file -- any format
1081
 * \param[in]      type         encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
1082
 *                              L_FLATE_ENCODE, or L_JP2K_ENCODE)
1083
 * \param[in]      quality      for jpeg: 1-100; 0 for default (75)
1084
 *                              for jp2k: 27-45; 0 for default (34)
1085
 * \param[out]     pdata        pdf data in memory
1086
 * \param[out]     pnbytes      number of bytes in pdf data
1087
 * \param[in]      x, y         location of lower-left corner of image,
1088
 *                              in pixels, relative to the PostScript origin
1089
 *                              (0,0) at the lower-left corner of the page
1090
 * \param[in]      res          override the resolution of the input image,
1091
 *                              in ppi; use 0 to respect the resolution
1092
 *                              embedded in the input images
1093
 * \param[in]      title        [optional] pdf title; can be null
1094
 * \param[in,out]  plpd         ptr to lpd, which is created on the first
1095
 *                              invocation and returned until last image is
1096
 *                              processed, at which time it is destroyed
1097
 * \param[in]      position     in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
1098
 *                              L_LAST_IMAGE
1099
 * \return  0 if OK, 1 on error
1100
 *
1101
 * <pre>
1102
 * Notes:
1103
 *      (1) If %res == 0 and the input resolution field is 0,
1104
 *          this will use DefaultInputRes.
1105
 *      (2) See comments in convertToPdf().
1106
 * </pre>
1107
 */
1108
l_ok
1109
convertToPdfData(const char   *filein,
1110
                 l_int32       type,
1111
                 l_int32       quality,
1112
                 l_uint8     **pdata,
1113
                 size_t       *pnbytes,
1114
                 l_int32       x,
1115
                 l_int32       y,
1116
                 l_int32       res,
1117
                 const char   *title,
1118
                 L_PDF_DATA  **plpd,
1119
                 l_int32       position)
1120
0
{
1121
0
PIX  *pix;
1122
1123
0
    if (!pdata)
1124
0
        return ERROR_INT("&data not defined", __func__, 1);
1125
0
    *pdata = NULL;
1126
0
    if (!pnbytes)
1127
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
1128
0
    *pnbytes = 0;
1129
0
    if (!filein)
1130
0
        return ERROR_INT("filein not defined", __func__, 1);
1131
1132
0
    if ((pix = pixRead(filein)) == NULL)
1133
0
        return ERROR_INT("pix not made", __func__, 1);
1134
1135
0
    pixConvertToPdfData(pix, type, quality, pdata, pnbytes,
1136
0
                        x, y, res, title, plpd, position);
1137
0
    pixDestroy(&pix);
1138
0
    return 0;
1139
0
}
1140
1141
1142
/*!
1143
 * \brief   convertImageDataToPdfData()
1144
 *
1145
 * \param[in]    imdata       array of formatted image data; e.g., png, jpeg
1146
 * \param[in]    size         size of image data
1147
 * \param[in]    type         encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
1148
 *                            L_FLATE_ENCODE, or L_JP2K_ENCODE)
1149
 * \param[in]    quality      for jpeg: 1-100; 0 for default (75)
1150
 *                            for jp2k: 27-45; 0 for default (34)
1151
 * \param[out]   pdata        pdf data in memory
1152
 * \param[out]   pnbytes      number of bytes in pdf data
1153
 * \param[in]    x, y         location of lower-left corner of image,
1154
 *                            in pixels, relative to the PostScript origin
1155
 *                            (0,0) at the lower-left corner of the page
1156
 * \param[in]    res          override the resolution of the input image,
1157
 *                            in ppi; use 0 to respect the resolution
1158
 *                            embedded in the input images
1159
 * \param[in]    title        [optional] pdf title; can be null
1160
 * \param[out]   plpd         ptr to lpd, which is created on the first
1161
 *                            invocation and returned until last image is
1162
 *                            processed, at which time it is destroyed
1163
 * \param[in]    position     in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
1164
 *                            L_LAST_IMAGE
1165
 * \return  0 if OK, 1 on error
1166
 *
1167
 * <pre>
1168
 * Notes:
1169
 *      (1) If %res == 0 and the input resolution field is 0,
1170
 *          this will use DefaultInputRes.
1171
 *      (2) See comments in convertToPdf().
1172
 * </pre>
1173
 */
1174
l_ok
1175
convertImageDataToPdfData(l_uint8      *imdata,
1176
                          size_t        size,
1177
                          l_int32       type,
1178
                          l_int32       quality,
1179
                          l_uint8     **pdata,
1180
                          size_t       *pnbytes,
1181
                          l_int32       x,
1182
                          l_int32       y,
1183
                          l_int32       res,
1184
                          const char   *title,
1185
                          L_PDF_DATA  **plpd,
1186
                          l_int32       position)
1187
0
{
1188
0
l_int32  ret;
1189
0
PIX     *pix;
1190
1191
0
    if (!pdata)
1192
0
        return ERROR_INT("&data not defined", __func__, 1);
1193
0
    *pdata = NULL;
1194
0
    if (!pnbytes)
1195
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
1196
0
    *pnbytes = 0;
1197
0
    if (!imdata)
1198
0
        return ERROR_INT("image data not defined", __func__, 1);
1199
0
    if (plpd) {  /* part of multi-page invocation */
1200
0
        if (position == L_FIRST_IMAGE)
1201
0
            *plpd = NULL;
1202
0
    }
1203
1204
0
    if ((pix = pixReadMem(imdata, size)) == NULL)
1205
0
        return ERROR_INT("pix not read", __func__, 1);
1206
0
    if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
1207
0
        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
1208
0
        selectDefaultPdfEncoding(pix, &type);
1209
0
    }
1210
0
    ret = pixConvertToPdfData(pix, type, quality, pdata, pnbytes,
1211
0
                              x, y, res, title, plpd, position);
1212
0
    pixDestroy(&pix);
1213
0
    return ret;
1214
0
}
1215
1216
1217
/*!
1218
 * \brief   pixConvertToPdf()
1219
 *
1220
 * \param[in]      pix
1221
 * \param[in]      type         encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
1222
 *                              L_FLATE_ENCODE, L_JP2K_ENCODE)
1223
 * \param[in]      quality      for jpeg: 1-100; 0 for default (75)
1224
 *                              for jp2k: 27-45; 0 for default (34)
1225
 * \param[in]      fileout      output pdf file; only required on last
1226
 *                              image on page
1227
 * \param[in]      x, y         location of lower-left corner of image,
1228
 *                              in pixels, relative to the PostScript origin
1229
 *                              (0,0) at the lower-left corner of the page
1230
 * \param[in]      res          override the resolution of the input image,
1231
 *                              in ppi; use 0 to respect the resolution
1232
 *                              embedded in the input images
1233
 * \param[in]      title        [optional] pdf title; can be null
1234
 * \param[in,out]  plpd         ptr to lpd, which is created on the first
1235
 *                              invocation and returned until last image is
1236
 *                              processed, at which time it is destroyed
1237
 * \param[in]      position     in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
1238
 *                              L_LAST_IMAGE
1239
 * \return  0 if OK, 1 on error
1240
 *
1241
 * <pre>
1242
 * Notes:
1243
 *      (1) If %res == 0 and the input resolution field is 0,
1244
 *          this will use DefaultInputRes.
1245
 *      (2) This only writes data to fileout if it is the last
1246
 *          image to be written on the page.
1247
 *      (3) See comments in convertToPdf().
1248
 * </pre>
1249
 */
1250
l_ok
1251
pixConvertToPdf(PIX          *pix,
1252
                l_int32       type,
1253
                l_int32       quality,
1254
                const char   *fileout,
1255
                l_int32       x,
1256
                l_int32       y,
1257
                l_int32       res,
1258
                const char   *title,
1259
                L_PDF_DATA  **plpd,
1260
                l_int32       position)
1261
0
{
1262
0
l_uint8  *data;
1263
0
l_int32   ret;
1264
0
size_t    nbytes;
1265
1266
0
    if (!pix)
1267
0
        return ERROR_INT("pix not defined", __func__, 1);
1268
0
    if (!plpd || (position == L_LAST_IMAGE)) {
1269
0
        if (!fileout)
1270
0
            return ERROR_INT("fileout not defined", __func__, 1);
1271
0
    }
1272
1273
0
    if (pixConvertToPdfData(pix, type, quality, &data, &nbytes,
1274
0
                            x, y, res, title, plpd, position)) {
1275
0
        LEPT_FREE(data);
1276
0
        return ERROR_INT("pdf data not made", __func__, 1);
1277
0
    }
1278
1279
0
    if (!plpd || (position == L_LAST_IMAGE)) {
1280
0
        ret = l_binaryWrite(fileout, "w", data, nbytes);
1281
0
        LEPT_FREE(data);
1282
0
        if (ret)
1283
0
            return ERROR_INT("pdf data not written to file", __func__, 1);
1284
0
    }
1285
0
    return 0;
1286
0
}
1287
1288
1289
/*!
1290
 * \brief   pixWriteStreamPdf()
1291
 *
1292
 * \param[in]    fp       file stream opened for writing
1293
 * \param[in]    pix      all depths, cmap OK
1294
 * \param[in]    res      override the resolution of the input image, in ppi;
1295
 *                        use 0 to respect the resolution embedded in the input
1296
 * \param[in]    title    [optional] pdf title; can be null
1297
 * \return  0 if OK, 1 on error
1298
 *
1299
 * <pre>
1300
 * Notes:
1301
 *      (1) This is the simplest interface for writing a single image
1302
 *          with pdf encoding to a stream.  It uses G4 encoding for 1 bpp,
1303
 *          JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE
1304
 *          encoding for everything else.
1305
 * </pre>
1306
 */
1307
l_ok
1308
pixWriteStreamPdf(FILE        *fp,
1309
                  PIX         *pix,
1310
                  l_int32      res,
1311
                  const char  *title)
1312
0
{
1313
0
l_uint8  *data;
1314
0
size_t    nbytes, nbytes_written;
1315
1316
0
    if (!fp)
1317
0
        return ERROR_INT("stream not opened", __func__, 1);
1318
0
    if (!pix)
1319
0
        return ERROR_INT("pix not defined", __func__, 1);
1320
1321
0
    if (pixWriteMemPdf(&data, &nbytes, pix, res, title) != 0) {
1322
0
        LEPT_FREE(data);
1323
0
        return ERROR_INT("pdf data not made", __func__, 1);
1324
0
    }
1325
1326
0
    nbytes_written = fwrite(data, 1, nbytes, fp);
1327
0
    LEPT_FREE(data);
1328
0
    if (nbytes != nbytes_written)
1329
0
        return ERROR_INT("failure writing pdf data to stream", __func__, 1);
1330
0
    return 0;
1331
0
}
1332
1333
1334
/*!
1335
 * \brief   pixWriteMemPdf()
1336
 *
1337
 * \param[out]   pdata      pdf as byte array
1338
 * \param[out]   pnbytes    number of bytes in pdf array
1339
 * \param[in]    pix        all depths, cmap OK
1340
 * \param[in]    res        override the resolution of the input image, in ppi;
1341
 *                          use 0 to respect the res embedded in the input
1342
 * \param[in]    title      [optional] pdf title; can be null
1343
 * \return  0 if OK, 1 on error
1344
 *
1345
 * <pre>
1346
 * Notes:
1347
 *      (1) This is the simplest interface for writing a single image
1348
 *          with pdf encoding to memory.  It uses G4 encoding for 1 bpp,
1349
 *          and makes a guess whether to use JPEG or FLATE encoding for
1350
 *          everything else.
1351
 * </pre>
1352
 */
1353
l_ok
1354
pixWriteMemPdf(l_uint8    **pdata,
1355
               size_t      *pnbytes,
1356
               PIX         *pix,
1357
               l_int32      res,
1358
               const char  *title)
1359
0
{
1360
0
l_int32  ret, type;
1361
1362
0
    if (pdata) *pdata = NULL;
1363
0
    if (pnbytes) *pnbytes = 0;
1364
0
    if (!pdata || !pnbytes)
1365
0
        return ERROR_INT("&data or &nbytes not defined", __func__, 1);
1366
0
    if (!pix)
1367
0
        return ERROR_INT("pix not defined", __func__, 1);
1368
1369
0
    selectDefaultPdfEncoding(pix, &type);
1370
0
    ret = pixConvertToPdfData(pix, type, 75, pdata, pnbytes,
1371
0
                              0, 0, res, title, NULL, 0);
1372
0
    if (ret)
1373
0
        return ERROR_INT("pdf data not made", __func__, 1);
1374
0
    return 0;
1375
0
}
1376
1377
1378
/*---------------------------------------------------------------------*
1379
 *            Segmented multi-page, multi-image converter              *
1380
 *---------------------------------------------------------------------*/
1381
/*!
1382
 * \brief   convertSegmentedFilesToPdf()
1383
 *
1384
 * \param[in]    dirname       directory name containing images
1385
 * \param[in]    substr        [optional] substring filter on filenames;
1386
 *                             can be null
1387
 * \param[in]    res           input resolution of all images
1388
 * \param[in]    type          compression type for non-image regions; the
1389
 *                             image regions are always compressed with
1390
 *                             L_JPEG_ENCODE
1391
 * \param[in]    thresh        used for converting gray --> 1 bpp with
1392
 *                             L_G4_ENCODE
1393
 * \param[in]    baa           [optional] boxaa of image regions
1394
 * \param[in]    quality       used for JPEG only; 0 for default (75)
1395
 * \param[in]    scalefactor   scaling factor applied to each image region
1396
 * \param[in]    title         [optional] pdf title; can be null
1397
 * \param[in]    fileout       pdf file of all images
1398
 * \return  0 if OK, 1 on error
1399
 *
1400
 * <pre>
1401
 * Notes:
1402
 *      (1) If %substr is not NULL, only image filenames that contain
1403
 *          the substring can be used.  If %substr == NULL, all files
1404
 *          in the directory are used.
1405
 *      (2) The files in the directory, after optional filtering by
1406
 *          the substring, are lexically sorted in increasing order
1407
 *          before concatenation.
1408
 *      (3) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
1409
 *          colormap and many colors, or 32 bpp; FLATE for anything else.
1410
 *      (4) The boxaa, if it exists, contains one boxa of "image regions"
1411
 *          for each image file.  The boxa must be aligned with the
1412
 *          sorted set of images.
1413
 *      (5) The scalefactor is applied to each image region.  It is
1414
 *          typically < 1.0, to save bytes in the final pdf, because
1415
 *          the resolution is often not critical in non-text regions.
1416
 *      (6) If the non-image regions have pixel depth > 1 and the encoding
1417
 *          type is G4, they are automatically scaled up by 2x and
1418
 *          thresholded.  Otherwise, no scaling is performed on them.
1419
 *      (7) Note that this function can be used to generate multipage
1420
 *          G4 compressed pdf from any input, by using %boxaa == NULL
1421
 *          and %type == L_G4_ENCODE.
1422
 * </pre>
1423
 */
1424
l_ok
1425
convertSegmentedFilesToPdf(const char  *dirname,
1426
                           const char  *substr,
1427
                           l_int32      res,
1428
                           l_int32      type,
1429
                           l_int32      thresh,
1430
                           BOXAA       *baa,
1431
                           l_int32      quality,
1432
                           l_float32    scalefactor,
1433
                           const char  *title,
1434
                           const char  *fileout)
1435
0
{
1436
0
char     *fname;
1437
0
l_uint8  *imdata, *data;
1438
0
l_int32   i, npages, nboxa, nboxes, ret;
1439
0
size_t    imbytes, databytes;
1440
0
BOXA     *boxa;
1441
0
L_BYTEA  *ba;
1442
0
L_PTRA   *pa_data;
1443
0
SARRAY   *sa;
1444
1445
0
    if (!dirname)
1446
0
        return ERROR_INT("dirname not defined", __func__, 1);
1447
0
    if (!fileout)
1448
0
        return ERROR_INT("fileout not defined", __func__, 1);
1449
1450
0
    if ((sa = getNumberedPathnamesInDirectory(dirname, substr, 0, 0, 10000))
1451
0
            == NULL)
1452
0
        return ERROR_INT("sa not made", __func__, 1);
1453
1454
0
    npages = sarrayGetCount(sa);
1455
        /* If necessary, extend the boxaa, which is page-aligned with
1456
         * the image files, to be as large as the set of images. */
1457
0
    if (baa) {
1458
0
        nboxa = boxaaGetCount(baa);
1459
0
        if (nboxa < npages) {
1460
0
            boxa = boxaCreate(1);
1461
0
            boxaaExtendWithInit(baa, npages, boxa);
1462
0
            boxaDestroy(&boxa);
1463
0
        }
1464
0
    }
1465
1466
        /* Generate and save all the encoded pdf strings */
1467
0
    pa_data = ptraCreate(npages);
1468
0
    for (i = 0; i < npages; i++) {
1469
0
        fname = sarrayGetString(sa, i, L_NOCOPY);
1470
0
        if (!strcmp(fname, "")) continue;
1471
0
        boxa = NULL;
1472
0
        if (baa) {
1473
0
            boxa = boxaaGetBoxa(baa, i, L_CLONE);
1474
0
            nboxes = boxaGetCount(boxa);
1475
0
            if (nboxes == 0)
1476
0
                boxaDestroy(&boxa);
1477
0
        }
1478
0
        ret = convertToPdfDataSegmented(fname, res, type, thresh, boxa,
1479
0
                                        quality, scalefactor, title,
1480
0
                                        &imdata, &imbytes);
1481
0
        boxaDestroy(&boxa);  /* safe; in case nboxes > 0 */
1482
0
        if (ret) {
1483
0
            L_ERROR("pdf encoding failed for %s\n", __func__, fname);
1484
0
            continue;
1485
0
        }
1486
0
        ba = l_byteaInitFromMem(imdata, imbytes);
1487
0
        if (imdata) LEPT_FREE(imdata);
1488
0
        ptraAdd(pa_data, ba);
1489
0
    }
1490
0
    sarrayDestroy(&sa);
1491
1492
0
    ptraGetActualCount(pa_data, &npages);
1493
0
    if (npages == 0) {
1494
0
        L_ERROR("no pdf files made\n", __func__);
1495
0
        ptraDestroy(&pa_data, FALSE, FALSE);
1496
0
        return 1;
1497
0
    }
1498
1499
        /* Concatenate */
1500
0
    ret = ptraConcatenatePdfToData(pa_data, NULL, &data, &databytes);
1501
1502
        /* Clean up */
1503
0
    ptraGetActualCount(pa_data, &npages);  /* recalculate in case it changes */
1504
0
    for (i = 0; i < npages; i++) {
1505
0
        ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
1506
0
        l_byteaDestroy(&ba);
1507
0
    }
1508
0
    ptraDestroy(&pa_data, FALSE, FALSE);
1509
1510
0
    if (ret) {
1511
0
        if (data) LEPT_FREE(data);
1512
0
        return ERROR_INT("pdf data not made", __func__, 1);
1513
0
    }
1514
1515
0
    ret = l_binaryWrite(fileout, "w", data, databytes);
1516
0
    LEPT_FREE(data);
1517
0
    if (ret)
1518
0
        L_ERROR("pdf data not written to file\n", __func__);
1519
0
    return ret;
1520
0
}
1521
1522
1523
/*!
1524
 * \brief   convertNumberedMasksToBoxaa()
1525
 *
1526
 * \param[in]    dirname   directory name containing mask images
1527
 * \param[in]    substr    [optional] substring filter on filenames;
1528
 *                         can be null 
1529
 * \param[in]    numpre    number of characters in name before number
1530
 * \param[in]    numpost   number of characters in name after number,
1531
 *                         up to a dot before an extension
1532
 * \return  boxaa of mask regions, or NULL on error
1533
 *
1534
 * <pre>
1535
 * Notes:
1536
 *      (1) This is conveniently used to generate the input boxaa
1537
 *          for convertSegmentedFilesToPdf().  It guarantees that the
1538
 *          boxa will be aligned with the page images, even if some
1539
 *          of the boxa are empty.
1540
 * </pre>
1541
 */
1542
BOXAA *
1543
convertNumberedMasksToBoxaa(const char  *dirname,
1544
                            const char  *substr,
1545
                            l_int32      numpre,
1546
                            l_int32      numpost)
1547
0
{
1548
0
char    *fname;
1549
0
l_int32  i, n;
1550
0
BOXA    *boxa;
1551
0
BOXAA   *baa;
1552
0
PIX     *pix;
1553
0
SARRAY  *sa;
1554
1555
0
    if (!dirname)
1556
0
        return (BOXAA *)ERROR_PTR("dirname not defined", __func__, NULL);
1557
1558
0
    if ((sa = getNumberedPathnamesInDirectory(dirname, substr, numpre,
1559
0
                                              numpost, 10000)) == NULL)
1560
0
        return (BOXAA *)ERROR_PTR("sa not made", __func__, NULL);
1561
1562
        /* Generate and save all the encoded pdf strings */
1563
0
    n = sarrayGetCount(sa);
1564
0
    baa = boxaaCreate(n);
1565
0
    boxa = boxaCreate(1);
1566
0
    boxaaInitFull(baa, boxa);
1567
0
    boxaDestroy(&boxa);
1568
0
    for (i = 0; i < n; i++) {
1569
0
        fname = sarrayGetString(sa, i, L_NOCOPY);
1570
0
        if (!strcmp(fname, "")) continue;
1571
0
        if ((pix = pixRead(fname)) == NULL) {
1572
0
            L_WARNING("invalid image on page %d\n", __func__, i);
1573
0
            continue;
1574
0
        }
1575
0
        boxa = pixConnComp(pix, NULL, 8);
1576
0
        boxaaReplaceBoxa(baa, i, boxa);
1577
0
        pixDestroy(&pix);
1578
0
    }
1579
1580
0
    sarrayDestroy(&sa);
1581
0
    return baa;
1582
0
}
1583
1584
1585
/*---------------------------------------------------------------------*
1586
 *            Segmented single page, multi-image converters            *
1587
 *---------------------------------------------------------------------*/
1588
/*!
1589
 * \brief   convertToPdfSegmented()
1590
 *
1591
 * \param[in]    filein        input image file -- any format
1592
 * \param[in]    res           input image resolution; typ. 300 ppi;
1593
 *                             use 0 for default
1594
 * \param[in]    type          compression type for non-image regions; image
1595
 *                             regions are always compressed with L_JPEG_ENCODE
1596
 * \param[in]    thresh        for converting gray --> 1 bpp with L_G4_ENCODE
1597
 * \param[in]    boxa          [optional] of image regions; can be null
1598
 * \param[in]    quality       used for jpeg image regions; 0 for default
1599
 * \param[in]    scalefactor   used for jpeg regions; must be <= 1.0
1600
 * \param[in]    title         [optional] pdf title; can be null
1601
 * \param[in]    fileout       output pdf file
1602
 * \return  0 if OK, 1 on error
1603
 *
1604
 * <pre>
1605
 * Notes:
1606
 *      (1) If there are no image regions, set %boxa == NULL;
1607
 *          %quality and %scalefactor are ignored.
1608
 *      (2) Typically, %scalefactor is < 1.0, because the image regions
1609
 *          can be rendered at a lower resolution (for better compression)
1610
 *          than the text regions.  If %scalefactor == 0, we use 1.0.
1611
 *          If the input image is 1 bpp and scalefactor < 1.0, we
1612
 *          use scaleToGray() to downsample the image regions to gray
1613
 *          before compressing them.
1614
 *      (3) If the compression type for non-image regions is L_G4_ENCODE
1615
 *          and bpp > 1, the image is upscaled 2x and thresholded
1616
 *          to 1 bpp.  That is the only situation where %thresh is used.
1617
 *      (4) The parameter %quality is only used for image regions.
1618
 *          If %type == L_JPEG_ENCODE, default jpeg quality (75) is
1619
 *          used for the non-image regions.
1620
 *      (5) Processing matrix for non-image regions.
1621
 *
1622
 *          Input           G4              JPEG                FLATE
1623
 *          ----------|---------------------------------------------------
1624
 *          1 bpp     |  1x, 1 bpp       1x flate, 1 bpp     1x, 1 bpp
1625
 *                    |
1626
 *          cmap      |  2x, 1 bpp       1x flate, cmap      1x, cmap
1627
 *                    |
1628
 *          2,4 bpp   |  2x, 1 bpp       1x flate            1x, 2,4 bpp
1629
 *          no cmap   |                  2,4 bpp
1630
 *                    |
1631
 *          8,32 bpp  |  2x, 1 bpp       1x (jpeg)           1x, 8,32 bpp
1632
 *          no cmap   |                  8,32 bpp
1633
 *
1634
 *          Summary:
1635
 *          (a) if G4 is requested, G4 is used, with 2x upscaling
1636
 *              for all cases except 1 bpp.
1637
 *          (b) if JPEG is requested, use flate encoding for all cases
1638
 *              except 8 bpp without cmap and 32 bpp (rgb).
1639
 *          (c) if FLATE is requested, use flate with no transformation
1640
 *              of the raster data.
1641
 *      (6) Calling options/sequence for these functions:
1642
 *              file  -->  file      (convertToPdfSegmented)
1643
 *                  pix  -->  file      (pixConvertToPdfSegmented)
1644
 *                      pix  -->  data      (pixConvertToPdfDataSegmented)
1645
 *              file  -->  data      (convertToPdfDataSegmented)
1646
 *                      pix  -->  data      (pixConvertToPdfDataSegmented)
1647
 * </pre>
1648
 */
1649
l_ok
1650
convertToPdfSegmented(const char  *filein,
1651
                      l_int32      res,
1652
                      l_int32      type,
1653
                      l_int32      thresh,
1654
                      BOXA        *boxa,
1655
                      l_int32      quality,
1656
                      l_float32    scalefactor,
1657
                      const char  *title,
1658
                      const char  *fileout)
1659
0
{
1660
0
l_int32  ret;
1661
0
PIX     *pixs;
1662
1663
0
    if (!filein)
1664
0
        return ERROR_INT("filein not defined", __func__, 1);
1665
0
    if (!fileout)
1666
0
        return ERROR_INT("fileout not defined", __func__, 1);
1667
0
    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1668
0
        type != L_FLATE_ENCODE)
1669
0
        return ERROR_INT("invalid conversion type", __func__, 1);
1670
0
    if (boxa && scalefactor > 1.0) {
1671
0
        L_WARNING("setting scalefactor to 1.0\n", __func__);
1672
0
        scalefactor = 1.0;
1673
0
    }
1674
1675
0
    if ((pixs = pixRead(filein)) == NULL)
1676
0
        return ERROR_INT("pixs not made", __func__, 1);
1677
1678
0
    ret = pixConvertToPdfSegmented(pixs, res, type, thresh, boxa, quality,
1679
0
                                   scalefactor, title, fileout);
1680
0
    pixDestroy(&pixs);
1681
0
    return ret;
1682
0
}
1683
1684
1685
/*!
1686
 * \brief   pixConvertToPdfSegmented()
1687
 *
1688
 * \param[in]    pixs          any depth, cmap OK
1689
 * \param[in]    res           input image resolution; typ. 300 ppi;
1690
 *                             use 0 for default
1691
 * \param[in]    type          compression type for non-image regions; image
1692
 *                             regions are always compressed with L_JPEG_ENCODE
1693
 * \param[in]    thresh        for converting gray --> 1 bpp with L_G4_ENCODE
1694
 * \param[in]    boxa          [optional] of image regions; can be null
1695
 * \param[in]    quality       used for jpeg image regions; 0 for default
1696
 * \param[in]    scalefactor   used for jpeg regions; must be <= 1.0
1697
 * \param[in]    title         [optional] pdf title; can be null
1698
 * \param[in]    fileout       output pdf file
1699
 * \return  0 if OK, 1 on error
1700
 *
1701
 * <pre>
1702
 * Notes:
1703
 *      (1) See convertToPdfSegmented() for details.
1704
 * </pre>
1705
 */
1706
l_ok
1707
pixConvertToPdfSegmented(PIX         *pixs,
1708
                         l_int32      res,
1709
                         l_int32      type,
1710
                         l_int32      thresh,
1711
                         BOXA        *boxa,
1712
                         l_int32      quality,
1713
                         l_float32    scalefactor,
1714
                         const char  *title,
1715
                         const char  *fileout)
1716
0
{
1717
0
l_uint8  *data;
1718
0
l_int32   ret;
1719
0
size_t    nbytes;
1720
1721
0
    if (!pixs)
1722
0
        return ERROR_INT("pixs not defined", __func__, 1);
1723
0
    if (!fileout)
1724
0
        return ERROR_INT("fileout not defined", __func__, 1);
1725
0
    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1726
0
        type != L_FLATE_ENCODE)
1727
0
        return ERROR_INT("invalid conversion type", __func__, 1);
1728
0
    if (boxa && scalefactor > 1.0) {
1729
0
        L_WARNING("setting scalefactor to 1.0\n", __func__);
1730
0
        scalefactor = 1.0;
1731
0
    }
1732
1733
0
    ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, quality,
1734
0
                                       scalefactor, title, &data, &nbytes);
1735
0
    if (ret)
1736
0
        return ERROR_INT("pdf generation failure", __func__, 1);
1737
1738
0
    ret = l_binaryWrite(fileout, "w", data, nbytes);
1739
0
    if (data) LEPT_FREE(data);
1740
0
    return ret;
1741
0
}
1742
1743
1744
/*!
1745
 * \brief   convertToPdfDataSegmented()
1746
 *
1747
 * \param[in]    filein        input image file -- any format
1748
 * \param[in]    res           input image resolution; typ. 300 ppi;
1749
 *                             use 0 for default
1750
 * \param[in]    type          compression type for non-image regions; image
1751
 *                             regions are always compressed with L_JPEG_ENCODE
1752
 * \param[in]    thresh        for converting gray --> 1 bpp with L_G4_ENCODE
1753
 * \param[in]    boxa          [optional] image regions; can be null
1754
 * \param[in]    quality       used for jpeg image regions; 0 for default
1755
 * \param[in]    scalefactor   used for jpeg regions; must be <= 1.0
1756
 * \param[in]    title         [optional] pdf title; can be null
1757
 * \param[out]   pdata         pdf data in memory
1758
 * \param[out]   pnbytes       number of bytes in pdf data
1759
 * \return  0 if OK, 1 on error
1760
 *
1761
 * <pre>
1762
 * Notes:
1763
 *      (1) If there are no image regions, set %boxa == NULL;
1764
 *          %quality and %scalefactor are ignored.
1765
 *      (2) Typically, %scalefactor is < 1.0.  The image regions are
1766
 * </pre>
1767
 */
1768
l_ok
1769
convertToPdfDataSegmented(const char  *filein,
1770
                          l_int32      res,
1771
                          l_int32      type,
1772
                          l_int32      thresh,
1773
                          BOXA        *boxa,
1774
                          l_int32      quality,
1775
                          l_float32    scalefactor,
1776
                          const char  *title,
1777
                          l_uint8    **pdata,
1778
                          size_t      *pnbytes)
1779
0
{
1780
0
l_int32  ret;
1781
0
PIX     *pixs;
1782
1783
0
    if (!pdata)
1784
0
        return ERROR_INT("&data not defined", __func__, 1);
1785
0
    *pdata = NULL;
1786
0
    if (!pnbytes)
1787
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
1788
0
    *pnbytes = 0;
1789
0
    if (!filein)
1790
0
        return ERROR_INT("filein not defined", __func__, 1);
1791
0
    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1792
0
        type != L_FLATE_ENCODE)
1793
0
        return ERROR_INT("invalid conversion type", __func__, 1);
1794
0
    if (boxa && scalefactor > 1.0) {
1795
0
        L_WARNING("setting scalefactor to 1.0\n", __func__);
1796
0
        scalefactor = 1.0;
1797
0
    }
1798
1799
0
    if ((pixs = pixRead(filein)) == NULL)
1800
0
        return ERROR_INT("pixs not made", __func__, 1);
1801
1802
0
    ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa,
1803
0
                                       quality, scalefactor, title,
1804
0
                                       pdata, pnbytes);
1805
0
    pixDestroy(&pixs);
1806
0
    return ret;
1807
0
}
1808
1809
1810
/*!
1811
 * \brief   pixConvertToPdfDataSegmented()
1812
 *
1813
 * \param[in]    pixs          any depth, cmap OK
1814
 * \param[in]    res           input image resolution; typ. 300 ppi;
1815
 *                             use 0 for default
1816
 * \param[in]    type          compression type for non-image regions; image
1817
 *                             regions are always compressed with L_JPEG_ENCODE
1818
 * \param[in]    thresh        for converting gray --> 1 bpp with L_G4_ENCODE
1819
 * \param[in]    boxa          [optional] of image regions; can be null
1820
 * \param[in]    quality       used for jpeg image regions; 0 for default
1821
 * \param[in]    scalefactor   used for jpeg regions; must be <= 1.0
1822
 * \param[in]    title         [optional] pdf title; can be null
1823
 * \param[out]   pdata         pdf data in memory
1824
 * \param[out]   pnbytes       number of bytes in pdf data
1825
 * \return  0 if OK, 1 on error
1826
 *
1827
 * <pre>
1828
 * Notes:
1829
 *      (1) See convertToPdfSegmented() for details.
1830
 * </pre>
1831
 */
1832
l_ok
1833
pixConvertToPdfDataSegmented(PIX         *pixs,
1834
                             l_int32      res,
1835
                             l_int32      type,
1836
                             l_int32      thresh,
1837
                             BOXA        *boxa,
1838
                             l_int32      quality,
1839
                             l_float32    scalefactor,
1840
                             const char  *title,
1841
                             l_uint8    **pdata,
1842
                             size_t      *pnbytes)
1843
0
{
1844
0
l_int32      i, nbox, seq, bx, by, bw, bh, upscale;
1845
0
l_float32    scale;
1846
0
BOX         *box, *boxc, *box2;
1847
0
PIX         *pix, *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6;
1848
0
PIXCMAP     *cmap;
1849
0
L_PDF_DATA  *lpd;
1850
1851
0
    if (!pdata)
1852
0
        return ERROR_INT("&data not defined", __func__, 1);
1853
0
    *pdata = NULL;
1854
0
    if (!pnbytes)
1855
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
1856
0
    *pnbytes = 0;
1857
0
    if (!pixs)
1858
0
        return ERROR_INT("pixs not defined", __func__, 1);
1859
0
    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1860
0
        type != L_FLATE_ENCODE)
1861
0
        return ERROR_INT("invalid conversion type", __func__, 1);
1862
0
    if (boxa && (scalefactor <= 0.0 || scalefactor > 1.0)) {
1863
0
        L_WARNING("setting scalefactor to 1.0\n", __func__);
1864
0
        scalefactor = 1.0;
1865
0
    }
1866
1867
        /* Adjust scalefactor so that the product with res gives an integer */
1868
0
    if (res <= 0)
1869
0
        res = DefaultInputRes;
1870
0
    scale = (l_float32)((l_int32)(scalefactor * res + 0.5)) / (l_float32)res;
1871
0
    cmap = pixGetColormap(pixs);
1872
1873
        /* Simple case: single image to be encoded */
1874
0
    if (!boxa || boxaGetCount(boxa) == 0) {
1875
0
        if (pixGetDepth(pixs) > 1 && type == L_G4_ENCODE) {
1876
0
            if (cmap)
1877
0
                pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE);
1878
0
            else
1879
0
                pixt1 = pixConvertTo8(pixs, FALSE);
1880
0
            pixt2 = pixScaleGray2xLIThresh(pixt1, thresh);
1881
0
            pixConvertToPdfData(pixt2, type, quality, pdata, pnbytes,
1882
0
                                0, 0, 2 * res, title, NULL, 0);
1883
0
            pixDestroy(&pixt1);
1884
0
            pixDestroy(&pixt2);
1885
0
        } else {
1886
0
            pixConvertToPdfData(pixs, type, quality, pdata, pnbytes,
1887
0
                                0, 0, res, title, NULL, 0);
1888
0
        }
1889
0
        return 0;
1890
0
    }
1891
1892
        /* Multiple images to be encoded.  If %type == L_G4_ENCODE,
1893
         * jpeg encode a version of pixs that is blanked in the non-image
1894
         * regions, and paint the scaled non-image part onto it through a mask.
1895
         * Otherwise, we must put the non-image part down first and
1896
         * then render all the image regions separately on top of it,
1897
         * at their own resolution. */
1898
0
    pixt1 = pixSetBlackOrWhiteBoxa(pixs, boxa, L_SET_WHITE);  /* non-image */
1899
0
    nbox = boxaGetCount(boxa);
1900
0
    if (type == L_G4_ENCODE) {
1901
0
        pixt2 = pixCreateTemplate(pixs);  /* only image regions */
1902
0
        pixSetBlackOrWhite(pixt2, L_SET_WHITE);
1903
0
        for (i = 0; i < nbox; i++) {
1904
0
             box = boxaGetBox(boxa, i, L_CLONE);
1905
0
             pix = pixClipRectangle(pixs, box, &boxc);
1906
0
             boxGetGeometry(boxc, &bx, &by, &bw, &bh);
1907
0
             pixRasterop(pixt2, bx, by, bw, bh, PIX_SRC, pix, 0, 0);
1908
0
             pixDestroy(&pix);
1909
0
             boxDestroy(&box);
1910
0
             boxDestroy(&boxc);
1911
0
        }
1912
0
        pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC);
1913
0
        if (pixGetDepth(pixt3) == 1)
1914
0
            pixt4 = pixScaleToGray(pixt3, scale);
1915
0
        else
1916
0
            pixt4 = pixScale(pixt3, scale, scale);
1917
0
        pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes,
1918
0
                            0, 0, (l_int32)(scale * res), title,
1919
0
                            &lpd, L_FIRST_IMAGE);
1920
1921
0
        if (pixGetDepth(pixt1) == 1) {
1922
0
            pixt5 = pixClone(pixt1);
1923
0
            upscale = 1;
1924
0
        } else {
1925
0
            pixt6 = pixConvertTo8(pixt1, 0);
1926
0
            pixt5 = pixScaleGray2xLIThresh(pixt6, thresh);
1927
0
            pixDestroy(&pixt6);
1928
0
            upscale = 2;
1929
0
        }
1930
0
        pixConvertToPdfData(pixt5, L_G4_ENCODE, quality, pdata, pnbytes,
1931
0
                            0, 0, upscale * res, title, &lpd, L_LAST_IMAGE);
1932
0
        pixDestroy(&pixt2);
1933
0
        pixDestroy(&pixt3);
1934
0
        pixDestroy(&pixt4);
1935
0
        pixDestroy(&pixt5);
1936
0
    } else {
1937
            /* Put the non-image part down first.  This is the full
1938
               size of the page, so we can use it to find the page
1939
               height in pixels, which is required for determining
1940
               the LL corner of the image relative to the LL corner
1941
               of the page. */
1942
0
        pixConvertToPdfData(pixt1, type, quality, pdata, pnbytes, 0, 0,
1943
0
                            res, title, &lpd, L_FIRST_IMAGE);
1944
0
        for (i = 0; i < nbox; i++) {
1945
0
            box = boxaGetBox(boxa, i, L_CLONE);
1946
0
            pixt2 = pixClipRectangle(pixs, box, &boxc);
1947
0
            pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC);
1948
0
            if (pixGetDepth(pixt3) == 1)
1949
0
                pixt4 = pixScaleToGray(pixt3, scale);
1950
0
            else
1951
0
                pixt4 = pixScale(pixt3, scale, scale);
1952
0
            box2 = boxTransform(boxc, 0, 0, scale, scale);
1953
0
            boxGetGeometry(box2, &bx, &by, NULL, &bh);
1954
0
            seq = (i == nbox - 1) ? L_LAST_IMAGE : L_NEXT_IMAGE;
1955
0
            pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes,
1956
0
                                bx, by, (l_int32)(scale * res), title,
1957
0
                                &lpd, seq);
1958
0
            pixDestroy(&pixt2);
1959
0
            pixDestroy(&pixt3);
1960
0
            pixDestroy(&pixt4);
1961
0
            boxDestroy(&box);
1962
0
            boxDestroy(&boxc);
1963
0
            boxDestroy(&box2);
1964
0
        }
1965
0
    }
1966
1967
0
    pixDestroy(&pixt1);
1968
0
    return 0;
1969
0
}
1970
1971
1972
/*---------------------------------------------------------------------*
1973
 *                         Multi-page concatenation                    *
1974
 *---------------------------------------------------------------------*/
1975
/*!
1976
 * \brief   concatenatePdf()
1977
 *
1978
 * \param[in]    dirname   directory name containing single-page pdf files
1979
 * \param[in]    substr    [optional] substring filter on filenames;
1980
 *                         can be null
1981
 * \param[in]    fileout   concatenated pdf file
1982
 * \return  0 if OK, 1 on error
1983
 *
1984
 * <pre>
1985
 * Notes:
1986
 *      (1) This only works with leptonica-formatted single-page pdf files.
1987
 *      (2) If %substr is not NULL, only filenames that contain
1988
 *          the substring can be returned.  If %substr == NULL,
1989
 *          none of the filenames are filtered out.
1990
 *      (3) The files in the directory, after optional filtering by
1991
 *          the substring, are lexically sorted in increasing order
1992
 *          before concatenation.
1993
 * </pre>
1994
 */
1995
l_ok
1996
concatenatePdf(const char  *dirname,
1997
               const char  *substr,
1998
               const char  *fileout)
1999
0
{
2000
0
l_int32  ret;
2001
0
SARRAY  *sa;
2002
2003
0
    if (!dirname)
2004
0
        return ERROR_INT("dirname not defined", __func__, 1);
2005
0
    if (!fileout)
2006
0
        return ERROR_INT("fileout not defined", __func__, 1);
2007
2008
0
    if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
2009
0
        return ERROR_INT("sa not made", __func__, 1);
2010
0
    ret = saConcatenatePdf(sa, fileout);
2011
0
    sarrayDestroy(&sa);
2012
0
    return ret;
2013
0
}
2014
2015
2016
/*!
2017
 * \brief   saConcatenatePdf()
2018
 *
2019
 * \param[in]    sa        string array of pathnames for single-page pdf files
2020
 * \param[in]    fileout   concatenated pdf file
2021
 * \return  0 if OK, 1 on error
2022
 *
2023
 * <pre>
2024
 * Notes:
2025
 *      (1) This only works with leptonica-formatted single-page pdf files.
2026
 * </pre>
2027
 */
2028
l_ok
2029
saConcatenatePdf(SARRAY      *sa,
2030
                 const char  *fileout)
2031
0
{
2032
0
l_uint8  *data;
2033
0
l_int32   ret;
2034
0
size_t    nbytes;
2035
2036
0
    if (!sa)
2037
0
        return ERROR_INT("sa not defined", __func__, 1);
2038
0
    if (!fileout)
2039
0
        return ERROR_INT("fileout not defined", __func__, 1);
2040
2041
0
    ret = saConcatenatePdfToData(sa, &data, &nbytes);
2042
0
    if (ret)
2043
0
        return ERROR_INT("pdf data not made", __func__, 1);
2044
0
    ret = l_binaryWrite(fileout, "w", data, nbytes);
2045
0
    LEPT_FREE(data);
2046
0
    return ret;
2047
0
}
2048
2049
2050
/*!
2051
 * \brief   ptraConcatenatePdf()
2052
 *
2053
 * \param[in]    pa       array of pdf strings, each for a single-page pdf file
2054
 * \param[in]    fileout  concatenated pdf file
2055
 * \return  0 if OK, 1 on error
2056
 *
2057
 * <pre>
2058
 * Notes:
2059
 *      (1) This only works with leptonica-formatted single-page pdf files.
2060
 * </pre>
2061
 */
2062
l_ok
2063
ptraConcatenatePdf(L_PTRA      *pa,
2064
                   const char  *fileout)
2065
0
{
2066
0
l_uint8  *data;
2067
0
l_int32   ret;
2068
0
size_t    nbytes;
2069
2070
0
    if (!pa)
2071
0
        return ERROR_INT("pa not defined", __func__, 1);
2072
0
    if (!fileout)
2073
0
        return ERROR_INT("fileout not defined", __func__, 1);
2074
2075
0
    ret = ptraConcatenatePdfToData(pa, NULL, &data, &nbytes);
2076
0
    if (ret)
2077
0
        return ERROR_INT("pdf data not made", __func__, 1);
2078
0
    ret = l_binaryWrite(fileout, "w", data, nbytes);
2079
0
    LEPT_FREE(data);
2080
0
    return ret;
2081
0
}
2082
2083
2084
/*!
2085
 * \brief   concatenatePdfToData()
2086
 *
2087
 * \param[in]    dirname   directory name containing single-page pdf files
2088
 * \param[in]    substr    [optional] substring filter on filenames;
2089
 *                         can be null
2090
 * \param[out]   pdata     concatenated pdf data in memory
2091
 * \param[out]   pnbytes   number of bytes in pdf data
2092
 * \return  0 if OK, 1 on error
2093
 *
2094
 * <pre>
2095
 * Notes:
2096
 *      (1) This only works with leptonica-formatted single-page pdf files.
2097
 *      (2) If %substr is not NULL, only filenames that contain
2098
 *          the substring can be returned.  If %substr == NULL,
2099
 *          none of the filenames are filtered out.
2100
 *      (3) The files in the directory, after optional filtering by
2101
 *          the substring, are lexically sorted in increasing order
2102
 *          before concatenation.
2103
 * </pre>
2104
 */
2105
l_ok
2106
concatenatePdfToData(const char  *dirname,
2107
                     const char  *substr,
2108
                     l_uint8    **pdata,
2109
                     size_t      *pnbytes)
2110
0
{
2111
0
l_int32  ret;
2112
0
SARRAY  *sa;
2113
2114
0
    if (!pdata)
2115
0
        return ERROR_INT("&data not defined", __func__, 1);
2116
0
    *pdata = NULL;
2117
0
    if (!pnbytes)
2118
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
2119
0
    *pnbytes = 0;
2120
0
    if (!dirname)
2121
0
        return ERROR_INT("dirname not defined", __func__, 1);
2122
2123
0
    if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
2124
0
        return ERROR_INT("sa not made", __func__, 1);
2125
0
    ret = saConcatenatePdfToData(sa, pdata, pnbytes);
2126
0
    sarrayDestroy(&sa);
2127
0
    return ret;
2128
0
}
2129
2130
2131
/*!
2132
 * \brief   saConcatenatePdfToData()
2133
 *
2134
 * \param[in]    sa        string array of pathnames for single-page pdf files
2135
 * \param[out]   pdata     concatenated pdf data in memory
2136
 * \param[out]   pnbytes   number of bytes in pdf data
2137
 * \return  0 if OK, 1 on error
2138
 *
2139
 * <pre>
2140
 * Notes:
2141
 *      (1) This only works with leptonica-formatted single-page pdf files.
2142
 * </pre>
2143
 */
2144
l_ok
2145
saConcatenatePdfToData(SARRAY    *sa,
2146
                       l_uint8  **pdata,
2147
                       size_t    *pnbytes)
2148
0
{
2149
0
char     *fname;
2150
0
l_int32   i, npages, ret;
2151
0
L_BYTEA  *bas;
2152
0
L_PTRA   *pa_data;  /* input pdf data for each page */
2153
2154
0
    if (!pdata)
2155
0
        return ERROR_INT("&data not defined", __func__, 1);
2156
0
    *pdata = NULL;
2157
0
    if (!pnbytes)
2158
0
        return ERROR_INT("&nbytes not defined", __func__, 1);
2159
0
    *pnbytes = 0;
2160
0
    if (!sa)
2161
0
        return ERROR_INT("sa not defined", __func__, 1);
2162
2163
        /* Read the pdf files into memory */
2164
0
    if ((npages = sarrayGetCount(sa)) == 0)
2165
0
        return ERROR_INT("no filenames found", __func__, 1);
2166
0
    pa_data = ptraCreate(npages);
2167
0
    for (i = 0; i < npages; i++) {
2168
0
        fname = sarrayGetString(sa, i, L_NOCOPY);
2169
0
        bas = l_byteaInitFromFile(fname);
2170
0
        ptraAdd(pa_data, bas);
2171
0
    }
2172
2173
0
    ret = ptraConcatenatePdfToData(pa_data, sa, pdata, pnbytes);
2174
2175
        /* Cleanup: some pages could have been removed */
2176
0
    ptraGetActualCount(pa_data, &npages);
2177
0
    for (i = 0; i < npages; i++) {
2178
0
        bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
2179
0
        l_byteaDestroy(&bas);
2180
0
    }
2181
0
    ptraDestroy(&pa_data, FALSE, FALSE);
2182
0
    return ret;
2183
0
}
2184
2185
/* --------------------------------------------*/
2186
#endif  /* USE_PDFIO */
2187
/* --------------------------------------------*/