/src/leptonica/src/pdfio2.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*====================================================================*  | 
2  |  |  -  Copyright (C) 2001 Leptonica.  All rights reserved.  | 
3  |  |  -  | 
4  |  |  -  Redistribution and use in source and binary forms, with or without  | 
5  |  |  -  modification, are permitted provided that the following conditions  | 
6  |  |  -  are met:  | 
7  |  |  -  1. Redistributions of source code must retain the above copyright  | 
8  |  |  -     notice, this list of conditions and the following disclaimer.  | 
9  |  |  -  2. Redistributions in binary form must reproduce the above  | 
10  |  |  -     copyright notice, this list of conditions and the following  | 
11  |  |  -     disclaimer in the documentation and/or other materials  | 
12  |  |  -     provided with the distribution.  | 
13  |  |  -  | 
14  |  |  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS  | 
15  |  |  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT  | 
16  |  |  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  | 
17  |  |  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY  | 
18  |  |  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,  | 
19  |  |  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,  | 
20  |  |  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR  | 
21  |  |  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY  | 
22  |  |  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING  | 
23  |  |  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS  | 
24  |  |  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  | 
25  |  |  *====================================================================*/  | 
26  |  |  | 
27  |  | /*!  | 
28  |  |  * \file pdfio2.c  | 
29  |  |  * <pre>  | 
30  |  |  *  | 
31  |  |  *    Lower-level operations for generating pdf.  | 
32  |  |  *  | 
33  |  |  *     Intermediate function for single page, multi-image conversion  | 
34  |  |  *          l_int32              pixConvertToPdfData()  | 
35  |  |  *  | 
36  |  |  *     Intermediate function for generating multipage pdf output  | 
37  |  |  *          l_int32              ptraConcatenatePdfToData()  | 
38  |  |  *  | 
39  |  |  *     Convert tiff multipage to pdf file  | 
40  |  |  *          l_int32              convertTiffMultipageToPdf()  | 
41  |  |  *  | 
42  |  |  *     Generates the CID, transcoding under some conditions  | 
43  |  |  *          l_int32              l_generateCIDataForPdf()  | 
44  |  |  *          l_int32              l_generateCIData()  | 
45  |  |  *  | 
46  |  |  *       Lower-level CID generation without transcoding  | 
47  |  |  *          L_COMP_DATA         *l_generateFlateDataPdf()  | 
48  |  |  *          L_COMP_DATA         *l_generateJpegData()  | 
49  |  |  *          L_COMP_DATA         *l_generateJpegDataMem()  | 
50  |  |  *          static L_COMP_DATA  *l_generateJp2kData()  | 
51  |  |  *          L_COMP_DATA         *l_generateG4Data()  | 
52  |  |  *  | 
53  |  |  *       Lower-level CID generation with transcoding  | 
54  |  |  *          l_int32              pixGenerateCIData()  | 
55  |  |  *          L_COMP_DATA         *l_generateFlateData()  | 
56  |  |  *          static L_COMP_DATA  *pixGenerateFlateData()  | 
57  |  |  *          static L_COMP_DATA  *pixGenerateJpegData()  | 
58  |  |  *          static L_COMP_DATA  *pixGenerateJp2kData()  | 
59  |  |  *          static L_COMP_DATA  *pixGenerateG4Data()  | 
60  |  |  *  | 
61  |  |  *       Other CID operations  | 
62  |  |  *          l_int32              cidConvertToPdfData()  | 
63  |  |  *          void                 l_CIDataDestroy()  | 
64  |  |  *  | 
65  |  |  *     Helper functions for generating the output pdf string  | 
66  |  |  *          static l_int32       l_generatePdf()  | 
67  |  |  *          static void          generateFixedStringsPdf()  | 
68  |  |  *          static char         *generateEscapeString()  | 
69  |  |  *          static void          generateMediaboxPdf()  | 
70  |  |  *          static l_int32       generatePageStringPdf()  | 
71  |  |  *          static l_int32       generateContentStringPdf()  | 
72  |  |  *          static l_int32       generatePreXStringsPdf()  | 
73  |  |  *          static l_int32       generateColormapStringsPdf()  | 
74  |  |  *          static void          generateTrailerPdf()  | 
75  |  |  *          static l_int32       makeTrailerStringPdf()  | 
76  |  |  *          static l_int32       generateOutputDataPdf()  | 
77  |  |  *  | 
78  |  |  *     Helper functions for generating multipage pdf output  | 
79  |  |  *          static l_int32       parseTrailerPdf()  | 
80  |  |  *          static char         *generatePagesObjStringPdf()  | 
81  |  |  *          static L_BYTEA      *substituteObjectNumbers()  | 
82  |  |  *  | 
83  |  |  *     Create/destroy/access pdf data  | 
84  |  |  *          static L_PDF_DATA   *pdfdataCreate()  | 
85  |  |  *          static void          pdfdataDestroy()  | 
86  |  |  *          static L_COMP_DATA  *pdfdataGetCid()  | 
87  |  |  *  | 
88  |  |  *     Find number of pages in a pdf  | 
89  |  |  *          l_int32              getPdfPageCount()  | 
90  |  |  *  | 
91  |  |  *     Find widths and heights of pages and media boxes in a pdf  | 
92  |  |  *          l_int32              getPdfPageSizes()  | 
93  |  |  *          l_int32              getPdfMediaBoxSizes()  | 
94  |  |  *  | 
95  |  |  *     Find effective resolution of images rendered from a pdf  | 
96  |  |  *          l_int32              getPdfRendererResolution()  | 
97  |  |  *  | 
98  |  |  *     Set flags for special modes  | 
99  |  |  *          void                 l_pdfSetG4ImageMask()  | 
100  |  |  *          void                 l_pdfSetDateAndVersion()  | 
101  |  |  *  | 
102  |  |  * </pre>  | 
103  |  |  */  | 
104  |  |  | 
105  |  | #ifdef HAVE_CONFIG_H  | 
106  |  | #include <config_auto.h>  | 
107  |  | #endif  /* HAVE_CONFIG_H */  | 
108  |  |  | 
109  |  | #include <string.h>  | 
110  |  | #include <math.h>  | 
111  |  | #include "allheaders.h"  | 
112  |  |  | 
113  |  | /* --------------------------------------------*/  | 
114  |  | #if  USE_PDFIO   /* defined in environ.h */  | 
115  |  |  /* --------------------------------------------*/  | 
116  |  |  | 
117  |  |     /* Typical scan resolution in ppi (pixels/inch) */  | 
118  |  | static const l_int32  DefaultInputRes = 300;  | 
119  |  |  | 
120  |  |     /* Static helpers */  | 
121  |  | static L_COMP_DATA  *l_generateJp2kData(const char *fname);  | 
122  |  | static L_COMP_DATA  *pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag);  | 
123  |  | static L_COMP_DATA  *pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag,  | 
124  |  |                                          l_int32 quality);  | 
125  |  | static L_COMP_DATA  *pixGenerateJp2kData(PIX *pixs, l_int32 quality);  | 
126  |  | static L_COMP_DATA  *pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag);  | 
127  |  |  | 
128  |  | static l_int32       l_generatePdf(l_uint8 **pdata, size_t *pnbytes,  | 
129  |  |                                    L_PDF_DATA  *lpd);  | 
130  |  | static void          generateFixedStringsPdf(L_PDF_DATA *lpd);  | 
131  |  | static char         *generateEscapeString(const char  *str);  | 
132  |  | static void          generateMediaboxPdf(L_PDF_DATA *lpd);  | 
133  |  | static l_int32       generatePageStringPdf(L_PDF_DATA *lpd);  | 
134  |  | static l_int32       generateContentStringPdf(L_PDF_DATA *lpd);  | 
135  |  | static l_int32       generatePreXStringsPdf(L_PDF_DATA *lpd);  | 
136  |  | static l_int32       generateColormapStringsPdf(L_PDF_DATA *lpd);  | 
137  |  | static void          generateTrailerPdf(L_PDF_DATA *lpd);  | 
138  |  | static char         *makeTrailerStringPdf(L_DNA *daloc);  | 
139  |  | static l_int32       generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes,  | 
140  |  |                                        L_PDF_DATA *lpd);  | 
141  |  |  | 
142  |  | static l_int32       parseTrailerPdf(L_BYTEA *bas, L_DNA **pda);  | 
143  |  | static char         *generatePagesObjStringPdf(NUMA *napage);  | 
144  |  | static L_BYTEA      *substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs);  | 
145  |  |  | 
146  |  | static L_PDF_DATA   *pdfdataCreate(const char *title);  | 
147  |  | static void          pdfdataDestroy(L_PDF_DATA **plpd);  | 
148  |  | static L_COMP_DATA  *pdfdataGetCid(L_PDF_DATA *lpd, l_int32 index);  | 
149  |  |  | 
150  |  |  | 
151  |  | /* ---------------- Defaults for rendering options ----------------- */  | 
152  |  |     /* Output G4 as writing through image mask; this is the default */  | 
153  |  | static l_int32   var_WRITE_G4_IMAGE_MASK = 1;  | 
154  |  |     /* Write date/time and lib version into pdf; this is the default */  | 
155  |  | static l_int32   var_WRITE_DATE_AND_VERSION = 1;  | 
156  |  |  | 
157  |  | #define L_SMALLBUF   256  | 
158  |  | #define L_BIGBUF    2048   /* must be able to hold hex colormap */  | 
159  |  |  | 
160  |  |  | 
161  |  | #ifndef  NO_CONSOLE_IO  | 
162  |  | #define  DEBUG_MULTIPAGE      0  | 
163  |  | #endif  /* ~NO_CONSOLE_IO */  | 
164  |  |  | 
165  |  |  | 
166  |  | /*---------------------------------------------------------------------*  | 
167  |  |  *       Intermediate function for generating multipage pdf output     *  | 
168  |  |  *---------------------------------------------------------------------*/  | 
169  |  | /*!  | 
170  |  |  * \brief   pixConvertToPdfData()  | 
171  |  |  *  | 
172  |  |  * \param[in]      pix       all depths; cmap OK  | 
173  |  |  * \param[in]      type      L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE,  | 
174  |  |  *                           L_JP2K_ENCODE  | 
175  |  |  * \param[in]      quality   for jpeg: 1-100; 0 for default (75)  | 
176  |  |  *                           for jp2k: 27-45; 0 for default (34)  | 
177  |  |  * \param[out]     pdata     pdf array  | 
178  |  |  * \param[out]     pnbytes   number of bytes in pdf array  | 
179  |  |  * \param[in]      x, y      location of lower-left corner of image, in pixels,  | 
180  |  |  *                           relative to the PostScript origin (0,0) at  | 
181  |  |  *                           the lower-left corner of the page)  | 
182  |  |  * \param[in]      res       override the resolution of the input image, in ppi;  | 
183  |  |  *                           use 0 to respect resolution embedded in the input  | 
184  |  |  * \param[in]      title     [optional] pdf title; can be null  | 
185  |  |  * \param[in,out]  plpd      ptr to lpd; created on the first invocation and  | 
186  |  |  *                           returned until last image is processed  | 
187  |  |  * \param[in]      position  in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,  | 
188  |  |  *                           L_LAST_IMAGE  | 
189  |  |  * \return  0 if OK, 1 on error  | 
190  |  |  *  | 
191  |  |  * <pre>  | 
192  |  |  * Notes:  | 
193  |  |  *      (1) If %res == 0 and the input resolution field from the pix is 0,  | 
194  |  |  *          this will use DefaultInputRes.  | 
195  |  |  *      (2) This only writes %data if it is the last image to be  | 
196  |  |  *          written on the page.  | 
197  |  |  *      (3) See comments in convertToPdf().  | 
198  |  |  * </pre>  | 
199  |  |  */  | 
200  |  | l_ok  | 
201  |  | pixConvertToPdfData(PIX          *pix,  | 
202  |  |                     l_int32       type,  | 
203  |  |                     l_int32       quality,  | 
204  |  |                     l_uint8     **pdata,  | 
205  |  |                     size_t       *pnbytes,  | 
206  |  |                     l_int32       x,  | 
207  |  |                     l_int32       y,  | 
208  |  |                     l_int32       res,  | 
209  |  |                     const char   *title,  | 
210  |  |                     L_PDF_DATA  **plpd,  | 
211  |  |                     l_int32       position)  | 
212  | 0  | { | 
213  | 0  | l_int32       pixres, w, h, ret;  | 
214  | 0  | l_float32     xpt, ypt, wpt, hpt;  | 
215  | 0  | L_COMP_DATA  *cid = NULL;  | 
216  | 0  | L_PDF_DATA   *lpd = NULL;  | 
217  |  | 
  | 
218  | 0  |     if (!pdata)  | 
219  | 0  |         return ERROR_INT("&data not defined", __func__, 1); | 
220  | 0  |     *pdata = NULL;  | 
221  | 0  |     if (!pnbytes)  | 
222  | 0  |         return ERROR_INT("&nbytes not defined", __func__, 1); | 
223  | 0  |     *pnbytes = 0;  | 
224  | 0  |     if (!pix)  | 
225  | 0  |         return ERROR_INT("pix not defined", __func__, 1); | 
226  | 0  |     if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&  | 
227  | 0  |         type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { | 
228  | 0  |         selectDefaultPdfEncoding(pix, &type);  | 
229  | 0  |     }  | 
230  | 0  |     if (quality < 0 || quality > 100)  | 
231  | 0  |         return ERROR_INT("invalid quality", __func__, 1); | 
232  |  |  | 
233  | 0  |     if (plpd) {  /* part of multi-page invocation */ | 
234  | 0  |         if (position == L_FIRST_IMAGE)  | 
235  | 0  |             *plpd = NULL;  | 
236  | 0  |     }  | 
237  |  |  | 
238  |  |         /* Generate the compressed image data.  It must NOT  | 
239  |  |          * be ascii85 encoded. */  | 
240  | 0  |     pixGenerateCIData(pix, type, quality, 0, &cid);  | 
241  | 0  |     if (!cid)  | 
242  | 0  |         return ERROR_INT("cid not made", __func__, 1); | 
243  |  |  | 
244  |  |         /* Get media box in pts.  Guess the input image resolution  | 
245  |  |          * based on the input parameter %res, the resolution data in  | 
246  |  |          * the pix, and the size of the image. */  | 
247  | 0  |     pixres = cid->res;  | 
248  | 0  |     w = cid->w;  | 
249  | 0  |     h = cid->h;  | 
250  | 0  |     if (res <= 0.0)  | 
251  | 0  |         res = (pixres > 0) ? pixres : DefaultInputRes;  | 
252  | 0  |     xpt = x * 72.f / res;  | 
253  | 0  |     ypt = y * 72.f / res;  | 
254  | 0  |     wpt = w * 72.f / res;  | 
255  | 0  |     hpt = h * 72.f / res;  | 
256  |  |  | 
257  |  |         /* Set up lpd */  | 
258  | 0  |     if (!plpd) {  /* single image */ | 
259  | 0  |         if ((lpd = pdfdataCreate(title)) == NULL)  | 
260  | 0  |             return ERROR_INT("lpd not made", __func__, 1); | 
261  | 0  |     } else if (position == L_FIRST_IMAGE) {  /* first of multiple images */ | 
262  | 0  |         if ((lpd = pdfdataCreate(title)) == NULL)  | 
263  | 0  |             return ERROR_INT("lpd not made", __func__, 1); | 
264  | 0  |         *plpd = lpd;  | 
265  | 0  |     } else {  /* not the first of multiple images */ | 
266  | 0  |         lpd = *plpd;  | 
267  | 0  |     }  | 
268  |  |  | 
269  |  |         /* Add the data to the lpd */  | 
270  | 0  |     ptraAdd(lpd->cida, cid);  | 
271  | 0  |     lpd->n++;  | 
272  | 0  |     ptaAddPt(lpd->xy, xpt, ypt);  | 
273  | 0  |     ptaAddPt(lpd->wh, wpt, hpt);  | 
274  |  |  | 
275  |  |         /* If a single image or the last of multiple images,  | 
276  |  |          * generate the pdf and destroy the lpd */  | 
277  | 0  |     if (!plpd || (position == L_LAST_IMAGE)) { | 
278  | 0  |         ret = l_generatePdf(pdata, pnbytes, lpd);  | 
279  | 0  |         pdfdataDestroy(&lpd);  | 
280  | 0  |         if (plpd) *plpd = NULL;  | 
281  | 0  |         if (ret)  | 
282  | 0  |             return ERROR_INT("pdf output not made", __func__, 1); | 
283  | 0  |     }  | 
284  |  |  | 
285  | 0  |     return 0;  | 
286  | 0  | }  | 
287  |  |  | 
288  |  |  | 
289  |  | /*---------------------------------------------------------------------*  | 
290  |  |  *      Intermediate function for generating multipage pdf output      *  | 
291  |  |  *---------------------------------------------------------------------*/  | 
292  |  | /*!  | 
293  |  |  * \brief   ptraConcatenatePdfToData()  | 
294  |  |  *  | 
295  |  |  * \param[in]    pa_data    ptra array of pdf strings, each for a  | 
296  |  |  *                          single-page pdf file  | 
297  |  |  * \param[in]    sa         [optional] string array of pathnames for  | 
298  |  |  *                          input pdf files; can be null  | 
299  |  |  * \param[out]   pdata      concatenated pdf data in memory  | 
300  |  |  * \param[out]   pnbytes    number of bytes in pdf data  | 
301  |  |  * \return  0 if OK, 1 on error  | 
302  |  |  *  | 
303  |  |  * <pre>  | 
304  |  |  * Notes:  | 
305  |  |  *      (1) This only works with leptonica-formatted single-page pdf files.  | 
306  |  |  *          pdf files generated by other programs will have unpredictable  | 
307  |  |  *          (and usually bad) results.  The requirements for each pdf file:  | 
308  |  |  *            (a) The Catalog and Info objects are the first two.  | 
309  |  |  *            (b) Object 3 is Pages  | 
310  |  |  *            (c) Object 4 is Page  | 
311  |  |  *            (d) The remaining objects are Contents, XObjects, and ColorSpace  | 
312  |  |  *      (2) We remove trailers from each page, and append the full trailer  | 
313  |  |  *          for all pages at the end.  | 
314  |  |  *      (3) For all but the first file, remove the ID and the first 3  | 
315  |  |  *          objects (catalog, info, pages), so that each subsequent  | 
316  |  |  *          file has only objects of these classes:  | 
317  |  |  *              Page, Contents, XObject, ColorSpace (Indexed RGB).  | 
318  |  |  *          For those objects, we substitute these refs to objects  | 
319  |  |  *          in the local file:  | 
320  |  |  *              Page:  Parent(object 3), Contents, XObject(typically multiple)  | 
321  |  |  *              XObject:  [ColorSpace if indexed]  | 
322  |  |  *          The Pages object on the first page (object 3) has a Kids array  | 
323  |  |  *          of references to all the Page objects, with a Count equal  | 
324  |  |  *          to the number of pages.  Each Page object refers back to  | 
325  |  |  *          this parent.  | 
326  |  |  * </pre>  | 
327  |  |  */  | 
328  |  | l_ok  | 
329  |  | ptraConcatenatePdfToData(L_PTRA    *pa_data,  | 
330  |  |                          SARRAY    *sa,  | 
331  |  |                          l_uint8  **pdata,  | 
332  |  |                          size_t    *pnbytes)  | 
333  | 0  | { | 
334  | 0  | char     *fname, *str_pages, *str_trailer;  | 
335  | 0  | l_uint8  *pdfdata, *data;  | 
336  | 0  | l_int32   i, j, index, nobj, npages;  | 
337  | 0  | l_int32  *sizes, *locs;  | 
338  | 0  | size_t    size;  | 
339  | 0  | L_BYTEA  *bas, *bad, *bat1, *bat2;  | 
340  | 0  | L_DNA    *da_locs, *da_sizes, *da_outlocs, *da;  | 
341  | 0  | L_DNAA   *daa_locs;  /* object locations on each page */  | 
342  | 0  | NUMA     *na_objs, *napage;  | 
343  | 0  | NUMAA    *naa_objs;  /* object mapping numbers to new values */  | 
344  |  | 
  | 
345  | 0  |     if (!pdata)  | 
346  | 0  |         return ERROR_INT("&data not defined", __func__, 1); | 
347  | 0  |     *pdata = NULL;  | 
348  | 0  |     if (!pnbytes)  | 
349  | 0  |         return ERROR_INT("&nbytes not defined", __func__, 1); | 
350  | 0  |     *pnbytes = 0;  | 
351  | 0  |     if (!pa_data)  | 
352  | 0  |         return ERROR_INT("pa_data not defined", __func__, 1); | 
353  |  |  | 
354  |  |         /* Parse the files and find the object locations.  | 
355  |  |          * Remove file data that cannot be parsed. */  | 
356  | 0  |     ptraGetActualCount(pa_data, &npages);  | 
357  | 0  |     daa_locs = l_dnaaCreate(npages);  | 
358  | 0  |     for (i = 0; i < npages; i++) { | 
359  | 0  |         bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);  | 
360  | 0  |         if (parseTrailerPdf(bas, &da_locs) != 0) { | 
361  | 0  |             bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);  | 
362  | 0  |             l_byteaDestroy(&bas);  | 
363  | 0  |             if (sa) { | 
364  | 0  |                 fname = sarrayGetString(sa, i, L_NOCOPY);  | 
365  | 0  |                 L_ERROR("can't parse file %s; skipping\n", __func__, fname); | 
366  | 0  |             } else { | 
367  | 0  |                 L_ERROR("can't parse file %d; skipping\n", __func__, i); | 
368  | 0  |             }  | 
369  | 0  |         } else { | 
370  | 0  |             l_dnaaAddDna(daa_locs, da_locs, L_INSERT);  | 
371  | 0  |         }  | 
372  | 0  |     }  | 
373  |  |  | 
374  |  |         /* Recompute npages in case some of the files were not pdf */  | 
375  | 0  |     ptraCompactArray(pa_data);  | 
376  | 0  |     ptraGetActualCount(pa_data, &npages);  | 
377  | 0  |     if (npages == 0) { | 
378  | 0  |         l_dnaaDestroy(&daa_locs);  | 
379  | 0  |         return ERROR_INT("no parsable pdf files found", __func__, 1); | 
380  | 0  |     }  | 
381  |  |  | 
382  |  |         /* Find the mapping from initial to final object numbers */  | 
383  | 0  |     naa_objs = numaaCreate(npages);  /* stores final object numbers */  | 
384  | 0  |     napage = numaCreate(npages);  /* stores "Page" object numbers */  | 
385  | 0  |     index = 0;  | 
386  | 0  |     for (i = 0; i < npages; i++) { | 
387  | 0  |         da = l_dnaaGetDna(daa_locs, i, L_CLONE);  | 
388  | 0  |         nobj = l_dnaGetCount(da);  | 
389  | 0  |         if (i == 0) { | 
390  | 0  |             numaAddNumber(napage, 4);  /* object 4 on first page */  | 
391  | 0  |             na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);  | 
392  | 0  |             index = nobj - 1;  | 
393  | 0  |         } else {  /* skip the first 3 objects in each file */ | 
394  | 0  |             numaAddNumber(napage, index);  /* Page object is first we add */  | 
395  | 0  |             na_objs = numaMakeConstant(0.0, nobj - 1);  | 
396  | 0  |             numaReplaceNumber(na_objs, 3, 3);  /* refers to parent of all */  | 
397  | 0  |             for (j = 4; j < nobj - 1; j++)  | 
398  | 0  |                 numaSetValue(na_objs, j, index++);  | 
399  | 0  |         }  | 
400  | 0  |         numaaAddNuma(naa_objs, na_objs, L_INSERT);  | 
401  | 0  |         l_dnaDestroy(&da);  | 
402  | 0  |     }  | 
403  |  |  | 
404  |  |         /* Make the Pages object (#3) */  | 
405  | 0  |     str_pages = generatePagesObjStringPdf(napage);  | 
406  |  |  | 
407  |  |         /* Build the output */  | 
408  | 0  |     bad = l_byteaCreate(5000);  | 
409  | 0  |     da_outlocs = l_dnaCreate(0);  /* locations of all output objects */  | 
410  | 0  |     for (i = 0; i < npages; i++) { | 
411  | 0  |         bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);  | 
412  | 0  |         pdfdata = l_byteaGetData(bas, &size);  | 
413  | 0  |         da_locs = l_dnaaGetDna(daa_locs, i, L_CLONE);  /* locs on this page */  | 
414  | 0  |         na_objs = numaaGetNuma(naa_objs, i, L_CLONE);  /* obj # on this page */  | 
415  | 0  |         nobj = l_dnaGetCount(da_locs) - 1;  | 
416  | 0  |         da_sizes = l_dnaDiffAdjValues(da_locs);  /* object sizes on this page */  | 
417  | 0  |         sizes = l_dnaGetIArray(da_sizes);  | 
418  | 0  |         locs = l_dnaGetIArray(da_locs);  | 
419  | 0  |         if (i == 0) { | 
420  | 0  |             l_byteaAppendData(bad, pdfdata, sizes[0]);  | 
421  | 0  |             l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);  | 
422  | 0  |             l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);  | 
423  | 0  |             l_byteaAppendString(bad, str_pages);  | 
424  | 0  |             for (j = 0; j < 4; j++)  | 
425  | 0  |                 l_dnaAddNumber(da_outlocs, locs[j]);  | 
426  | 0  |         }  | 
427  | 0  |         for (j = 4; j < nobj; j++) { | 
428  | 0  |             l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));  | 
429  | 0  |             bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);  | 
430  | 0  |             bat2 = substituteObjectNumbers(bat1, na_objs);  | 
431  | 0  |             data = l_byteaGetData(bat2, &size);  | 
432  | 0  |             l_byteaAppendData(bad, data, size);  | 
433  | 0  |             l_byteaDestroy(&bat1);  | 
434  | 0  |             l_byteaDestroy(&bat2);  | 
435  | 0  |         }  | 
436  | 0  |         if (i == npages - 1)  /* last one */  | 
437  | 0  |             l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));  | 
438  | 0  |         LEPT_FREE(sizes);  | 
439  | 0  |         LEPT_FREE(locs);  | 
440  | 0  |         l_dnaDestroy(&da_locs);  | 
441  | 0  |         numaDestroy(&na_objs);  | 
442  | 0  |         l_dnaDestroy(&da_sizes);  | 
443  | 0  |     }  | 
444  |  |  | 
445  |  |         /* Add the trailer */  | 
446  | 0  |     str_trailer = makeTrailerStringPdf(da_outlocs);  | 
447  | 0  |     l_byteaAppendString(bad, str_trailer);  | 
448  |  |  | 
449  |  |         /* Transfer the output data */  | 
450  | 0  |     *pdata = l_byteaCopyData(bad, pnbytes);  | 
451  | 0  |     l_byteaDestroy(&bad);  | 
452  |  | 
  | 
453  |  | #if  DEBUG_MULTIPAGE  | 
454  |  |     lept_stderr("******** object mapper **********"); | 
455  |  |     numaaWriteStream(stderr, naa_objs);  | 
456  |  |  | 
457  |  |     lept_stderr("******** Page object numbers ***********"); | 
458  |  |     numaWriteStderr(napage);  | 
459  |  |  | 
460  |  |     lept_stderr("******** Pages object ***********\n"); | 
461  |  |     lept_stderr("%s\n", str_pages); | 
462  |  | #endif  /* DEBUG_MULTIPAGE */  | 
463  |  | 
  | 
464  | 0  |     numaDestroy(&napage);  | 
465  | 0  |     numaaDestroy(&naa_objs);  | 
466  | 0  |     l_dnaDestroy(&da_outlocs);  | 
467  | 0  |     l_dnaaDestroy(&daa_locs);  | 
468  | 0  |     LEPT_FREE(str_pages);  | 
469  | 0  |     LEPT_FREE(str_trailer);  | 
470  | 0  |     return 0;  | 
471  | 0  | }  | 
472  |  |  | 
473  |  |  | 
474  |  | /*---------------------------------------------------------------------*  | 
475  |  |  *                  Convert tiff multipage to pdf file                 *  | 
476  |  |  *---------------------------------------------------------------------*/  | 
477  |  | /*!  | 
478  |  |  * \brief   convertTiffMultipageToPdf()  | 
479  |  |  *  | 
480  |  |  * \param[in]    filein    (tiff)  | 
481  |  |  * \param[in]    fileout   (pdf)  | 
482  |  |  * \return  0 if OK, 1 on error  | 
483  |  |  *  | 
484  |  |  * <pre>  | 
485  |  |  * Notes:  | 
486  |  |  *      (1) A multipage tiff file can also be converted to PS, using  | 
487  |  |  *          convertTiffMultipageToPS()  | 
488  |  |  * </pre>  | 
489  |  |  */  | 
490  |  | l_ok  | 
491  |  | convertTiffMultipageToPdf(const char  *filein,  | 
492  |  |                           const char  *fileout)  | 
493  | 0  | { | 
494  | 0  | l_int32  istiff;  | 
495  | 0  | PIXA    *pixa;  | 
496  | 0  | FILE    *fp;  | 
497  |  | 
  | 
498  | 0  |     if ((fp = fopenReadStream(filein)) == NULL)  | 
499  | 0  |         return ERROR_INT_1("file not found", filein, __func__, 1); | 
500  | 0  |     istiff = fileFormatIsTiff(fp);  | 
501  | 0  |     fclose(fp);  | 
502  | 0  |     if (!istiff)  | 
503  | 0  |         return ERROR_INT_1("file not tiff format", filein, __func__, 1); | 
504  |  |  | 
505  | 0  |     pixa = pixaReadMultipageTiff(filein);  | 
506  | 0  |     pixaConvertToPdf(pixa, 0, 1.0, 0, 0, "weasel2", fileout);  | 
507  | 0  |     pixaDestroy(&pixa);  | 
508  | 0  |     return 0;  | 
509  | 0  | }  | 
510  |  |  | 
511  |  |  | 
512  |  | /*---------------------------------------------------------------------*  | 
513  |  |  *                          CID-based operations                       *  | 
514  |  |  *---------------------------------------------------------------------*/  | 
515  |  | /*!  | 
516  |  |  * \brief   l_generateCIDataForPdf()  | 
517  |  |  *  | 
518  |  |  * \param[in]    fname      [optional] can be null  | 
519  |  |  * \param[in]    pix        [optional] can be null  | 
520  |  |  * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)  | 
521  |  |  *                          for jp2k if transcoded: 27-45; 0 for default (34)  | 
522  |  |  * \param[out]   pcid       compressed data  | 
523  |  |  * \return  0 if OK, 1 on error  | 
524  |  |  *  | 
525  |  |  * <pre>  | 
526  |  |  * Notes:  | 
527  |  |  *      (1) You must set either filename or pix.  | 
528  |  |  *      (2) Given an image file and optionally a pix raster of that data,  | 
529  |  |  *          this provides a CID that is compatible with PDF, preferably  | 
530  |  |  *          without transcoding.  | 
531  |  |  *      (3) The pix is included for efficiency, in case transcoding  | 
532  |  |  *          is required and the pix is available to the caller.  | 
533  |  |  *      (4) We don't try to open files named "stdin" or "-" for Tesseract  | 
534  |  |  *          compatibility reasons. We may remove this restriction  | 
535  |  |  *          in the future.  | 
536  |  |  *      (5) Note that tiff-g4 must be transcoded to properly handle byte  | 
537  |  |  *          order and perhaps photometry (e.g., min-is-black).  For a  | 
538  |  |  *          multipage tiff file, data will only be extracted from the  | 
539  |  |  *          first page, so this should not be invoked.  | 
540  |  |  * </pre>  | 
541  |  |  */  | 
542  |  | l_ok  | 
543  |  | l_generateCIDataForPdf(const char    *fname,  | 
544  |  |                        PIX           *pix,  | 
545  |  |                        l_int32        quality,  | 
546  |  |                        L_COMP_DATA  **pcid)  | 
547  | 0  | { | 
548  | 0  | l_int32       format, type;  | 
549  | 0  | L_COMP_DATA  *cid;  | 
550  | 0  | PIX          *pixt;  | 
551  |  | 
  | 
552  | 0  |     if (!pcid)  | 
553  | 0  |         return ERROR_INT("&cid not defined", __func__, 1); | 
554  | 0  |     *pcid = cid = NULL;  | 
555  | 0  |     if (!fname && !pix)  | 
556  | 0  |         return ERROR_INT("neither fname nor pix are defined", __func__, 1); | 
557  |  |  | 
558  |  |         /* If a compressed file is given that is not 'stdin', see if we  | 
559  |  |          * can generate the pdf output without transcoding. */  | 
560  | 0  |     if (fname && strcmp(fname, "-") != 0 && strcmp(fname, "stdin") != 0) { | 
561  | 0  |         findFileFormat(fname, &format);  | 
562  | 0  |         if (format == IFF_UNKNOWN)  | 
563  | 0  |             L_WARNING("file %s format is unknown\n", __func__, fname); | 
564  | 0  |         if (format == IFF_PS || format == IFF_LPDF) { | 
565  | 0  |             L_ERROR("file %s is unsupported format %d\n", | 
566  | 0  |                   __func__, fname, format);  | 
567  | 0  |             return 1;  | 
568  | 0  |         }  | 
569  | 0  |         if (format == IFF_JFIF_JPEG) { | 
570  | 0  |             cid = l_generateJpegData(fname, 0);  | 
571  | 0  |         } else if (format == IFF_JP2) { | 
572  | 0  |             cid = l_generateJp2kData(fname);  | 
573  | 0  |         } else if (format == IFF_PNG) { | 
574  | 0  |             cid = l_generateFlateDataPdf(fname, pix);  | 
575  | 0  |         }  | 
576  | 0  |     }  | 
577  |  |  | 
578  |  |         /* Otherwise, use the pix to generate the pdf output */  | 
579  | 0  |     if  (!cid) { | 
580  | 0  |         if (!pix)  | 
581  | 0  |             pixt = pixRead(fname);  | 
582  | 0  |         else  | 
583  | 0  |             pixt = pixClone(pix);  | 
584  | 0  |         if (!pixt)  | 
585  | 0  |             return ERROR_INT("pixt not made", __func__, 1); | 
586  | 0  |         if (selectDefaultPdfEncoding(pixt, &type)) { | 
587  | 0  |             pixDestroy(&pixt);  | 
588  | 0  |             return 1;  | 
589  | 0  |         }  | 
590  | 0  |         pixGenerateCIData(pixt, type, quality, 0, &cid);  | 
591  | 0  |         pixDestroy(&pixt);  | 
592  | 0  |         if (!cid)  | 
593  | 0  |             return ERROR_INT("cid not made from pix", __func__, 1); | 
594  | 0  |     }  | 
595  | 0  |     *pcid = cid;  | 
596  | 0  |     return 0;  | 
597  | 0  | }  | 
598  |  |  | 
599  |  |  | 
600  |  | /*!  | 
601  |  |  * \brief   l_generateCIData()  | 
602  |  |  *  | 
603  |  |  * \param[in]    fname  | 
604  |  |  * \param[in]    type       L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE,  | 
605  |  |  *                          L_JP2K_ENCODE  | 
606  |  |  * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)  | 
607  |  |  *                          for jp2k if transcoded: 27-45; 0 for default (34)  | 
608  |  |  * \param[in]    ascii85    0 for binary; 1 for ascii85-encoded  | 
609  |  |  * \param[out]   pcid       compressed data  | 
610  |  |  * \return  0 if OK, 1 on error  | 
611  |  |  *  | 
612  |  |  * <pre>  | 
613  |  |  * Notes:  | 
614  |  |  *      (1) This can be used for both PostScript and pdf.  | 
615  |  |  *      (1) Set ascii85:  | 
616  |  |  *           ~ 0 for binary data (PDF only)  | 
617  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
618  |  |  *      (2) This attempts to compress according to the requested type.  | 
619  |  |  *          If this can't be done, it falls back to ordinary flate encoding.  | 
620  |  |  *      (3) This differs from l_generateCIDataForPdf(), which determines  | 
621  |  |  *          the file format and only works for pdf.  | 
622  |  |  * </pre>  | 
623  |  |  */  | 
624  |  | l_ok  | 
625  |  | l_generateCIData(const char    *fname,  | 
626  |  |                  l_int32        type,  | 
627  |  |                  l_int32        quality,  | 
628  |  |                  l_int32        ascii85,  | 
629  |  |                  L_COMP_DATA  **pcid)  | 
630  | 0  | { | 
631  | 0  | l_int32       format, d, bps, spp, iscmap;  | 
632  | 0  | L_COMP_DATA  *cid;  | 
633  | 0  | PIX          *pix;  | 
634  |  | 
  | 
635  | 0  |     if (!pcid)  | 
636  | 0  |         return ERROR_INT("&cid not defined", __func__, 1); | 
637  | 0  |     *pcid = NULL;  | 
638  | 0  |     if (!fname)  | 
639  | 0  |         return ERROR_INT("fname not defined", __func__, 1); | 
640  | 0  |     if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&  | 
641  | 0  |         type != L_FLATE_ENCODE && type != L_JP2K_ENCODE)  | 
642  | 0  |         return ERROR_INT("invalid conversion type", __func__, 1); | 
643  | 0  |     if (ascii85 != 0 && ascii85 != 1)  | 
644  | 0  |         return ERROR_INT("invalid ascii85", __func__, 1); | 
645  |  |  | 
646  |  |         /* Sanity check on requested encoding */  | 
647  | 0  |     pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);  | 
648  | 0  |     d = bps * spp;  | 
649  | 0  |     if (d == 24) d = 32;  | 
650  | 0  |     if (iscmap && type != L_FLATE_ENCODE) { | 
651  | 0  |         L_WARNING("pixs has cmap; using flate encoding\n", __func__); | 
652  | 0  |         type = L_FLATE_ENCODE;  | 
653  | 0  |     } else if (d < 8 && type == L_JPEG_ENCODE) { | 
654  | 0  |         L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__); | 
655  | 0  |         type = L_FLATE_ENCODE;  | 
656  | 0  |     } else if (d < 8 && type == L_JP2K_ENCODE) { | 
657  | 0  |         L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__); | 
658  | 0  |         type = L_FLATE_ENCODE;  | 
659  | 0  |     } else if (d > 1 && type == L_G4_ENCODE) { | 
660  | 0  |         L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__); | 
661  | 0  |         type = L_FLATE_ENCODE;  | 
662  | 0  |     }  | 
663  |  | 
  | 
664  | 0  |     if (type == L_JPEG_ENCODE) { | 
665  | 0  |         if (format == IFF_JFIF_JPEG) {  /* do not transcode */ | 
666  | 0  |             cid = l_generateJpegData(fname, ascii85);  | 
667  | 0  |         } else { | 
668  | 0  |             if ((pix = pixRead(fname)) == NULL)  | 
669  | 0  |                 return ERROR_INT("pix not returned for JPEG", __func__, 1); | 
670  | 0  |             cid = pixGenerateJpegData(pix, ascii85, quality);  | 
671  | 0  |             pixDestroy(&pix);  | 
672  | 0  |         }  | 
673  | 0  |         if (!cid)  | 
674  | 0  |             return ERROR_INT("jpeg data not made", __func__, 1); | 
675  | 0  |     } else if (type == L_JP2K_ENCODE) { | 
676  | 0  |         if (format == IFF_JP2) {  /* do not transcode */ | 
677  | 0  |             cid = l_generateJp2kData(fname);  | 
678  | 0  |         } else { | 
679  | 0  |             if ((pix = pixRead(fname)) == NULL)  | 
680  | 0  |                 return ERROR_INT("pix not returned for JP2K", __func__, 1); | 
681  | 0  |             cid = pixGenerateJp2kData(pix, quality);  | 
682  | 0  |             pixDestroy(&pix);  | 
683  | 0  |         }  | 
684  | 0  |         if (!cid)  | 
685  | 0  |             return ERROR_INT("jp2k data not made", __func__, 1); | 
686  | 0  |     } else if (type == L_G4_ENCODE) { | 
687  | 0  |         if ((pix = pixRead(fname)) == NULL)  | 
688  | 0  |             return ERROR_INT("pix not returned for G4", __func__, 1); | 
689  | 0  |         cid = pixGenerateG4Data(pix, ascii85);  | 
690  | 0  |         pixDestroy(&pix);  | 
691  | 0  |         if (!cid)  | 
692  | 0  |             return ERROR_INT("g4 data not made", __func__, 1); | 
693  | 0  |     } else if (type == L_FLATE_ENCODE) { | 
694  | 0  |         if ((cid = l_generateFlateData(fname, ascii85)) == NULL)  | 
695  | 0  |             return ERROR_INT("flate data not made", __func__, 1); | 
696  | 0  |     } else { | 
697  | 0  |         return ERROR_INT("invalid conversion type", __func__, 1); | 
698  | 0  |     }  | 
699  | 0  |     *pcid = cid;  | 
700  |  | 
  | 
701  | 0  |     return 0;  | 
702  | 0  | }  | 
703  |  |  | 
704  |  |  | 
705  |  | /*---------------------------------------------------------------------*  | 
706  |  |  *                     Low-level CID-based operations                  *  | 
707  |  |  *---------------------------------------------------------------------*/  | 
708  |  | /*!  | 
709  |  |  * \brief   l_generateFlateDataPdf()  | 
710  |  |  *  | 
711  |  |  * \param[in]    fname     preferably png  | 
712  |  |  * \param[in]    pixs      [optional] can be null  | 
713  |  |  * \return  cid containing png data, or NULL on error  | 
714  |  |  *  | 
715  |  |  * <pre>  | 
716  |  |  * Notes:  | 
717  |  |  *      (1) If you hand this a png file, you are going to get  | 
718  |  |  *          png predictors embedded in the flate data. So it has  | 
719  |  |  *          come to this. http://xkcd.com/1022/  | 
720  |  |  *      (2) Exception: if the png is interlaced or if it is RGBA,  | 
721  |  |  *          it will be transcoded.  | 
722  |  |  *      (3) If transcoding is required, this will not have to read from  | 
723  |  |  *          file if a pix is input.  | 
724  |  |  * </pre>  | 
725  |  |  */  | 
726  |  | L_COMP_DATA *  | 
727  |  | l_generateFlateDataPdf(const char  *fname,  | 
728  |  |                        PIX         *pixs)  | 
729  | 0  | { | 
730  | 0  | l_uint8      *pngcomp = NULL;  /* entire PNG compressed file */  | 
731  | 0  | l_uint8      *datacomp = NULL;  /* gzipped raster data */  | 
732  | 0  | l_uint8      *cmapdata = NULL;  /* uncompressed colormap */  | 
733  | 0  | char         *cmapdatahex = NULL;  /* hex ascii uncompressed colormap */  | 
734  | 0  | l_uint32      i, j, n;  | 
735  | 0  | l_int32       format, interlaced;  | 
736  | 0  | l_int32       ncolors;  /* in colormap */  | 
737  | 0  | l_int32       bps;  /* bits/sample: usually 8 */  | 
738  | 0  | l_int32       spp;  /* samples/pixel: 1-grayscale/cmap); 3-rgb; 4-rgba */  | 
739  | 0  | l_int32       w, h, cmapflag;  | 
740  | 0  | l_int32       xres, yres;  | 
741  | 0  | size_t        nbytescomp = 0, nbytespng = 0;  | 
742  | 0  | FILE         *fp;  | 
743  | 0  | L_COMP_DATA  *cid;  | 
744  | 0  | PIX          *pix;  | 
745  | 0  | PIXCMAP      *cmap = NULL;  | 
746  |  | 
  | 
747  | 0  |     if (!fname)  | 
748  | 0  |         return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL); | 
749  |  |  | 
750  | 0  |     findFileFormat(fname, &format);  | 
751  | 0  |     spp = 0;  /* init to spp != 4 if not png */  | 
752  | 0  |     interlaced = 0;  /* initialize to no interlacing */  | 
753  | 0  |     bps = 0;  /* initialize to a nonsense value */  | 
754  | 0  |     if (format == IFF_PNG) { | 
755  | 0  |         isPngInterlaced(fname, &interlaced);  | 
756  | 0  |         if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))  | 
757  | 0  |             return (L_COMP_DATA *)ERROR_PTR("bad png input", __func__, NULL); | 
758  | 0  |     }  | 
759  |  |  | 
760  |  |         /* PDF is capable of inlining some types of PNG files, but not all  | 
761  |  |            of them. We need to transcode anything with interlacing, an  | 
762  |  |            alpha channel, or 1 bpp (which would otherwise be photo-inverted).  | 
763  |  |  | 
764  |  |            Note: any PNG image file with an alpha channel is converted on  | 
765  |  |            reading to RGBA (spp == 4). This includes the (gray + alpha) format  | 
766  |  |            with spp == 2.  Because of the conversion, readHeaderPng() gives  | 
767  |  |            spp = 2, whereas pixGetSpp() gives spp = 4 on the converted pix. */  | 
768  | 0  |     if (format != IFF_PNG ||  | 
769  | 0  |        (format == IFF_PNG && (interlaced || bps == 1 || spp == 4 || spp == 2)))  | 
770  | 0  |     {  /* lgtm+ analyzer needed the logic expanded */ | 
771  | 0  |         if (!pixs)  | 
772  | 0  |             pix = pixRead(fname);  | 
773  | 0  |         else  | 
774  | 0  |             pix = pixClone(pixs);  | 
775  | 0  |         if (!pix)  | 
776  | 0  |             return (L_COMP_DATA *)ERROR_PTR("pix not made", __func__, NULL); | 
777  | 0  |         cid = pixGenerateFlateData(pix, 0);  | 
778  | 0  |         pixDestroy(&pix);  | 
779  | 0  |         return cid;  | 
780  | 0  |     }  | 
781  |  |  | 
782  |  |         /* It's png.  Generate the pdf data without transcoding.  | 
783  |  |          * Implementation by Jeff Breidenbach.  | 
784  |  |          * First, read the metadata */  | 
785  | 0  |     if ((fp = fopenReadStream(fname)) == NULL)  | 
786  | 0  |         return (L_COMP_DATA *)ERROR_PTR_1("stream not opened", | 
787  | 0  |                                           fname, __func__, NULL);  | 
788  | 0  |     freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);  | 
789  | 0  |     fgetPngResolution(fp, &xres, &yres);  | 
790  | 0  |     fclose(fp);  | 
791  |  |  | 
792  |  |         /* We get pdf corruption when inlining the data from 16 bpp png. */  | 
793  | 0  |     if (bps == 16)  | 
794  | 0  |         return l_generateFlateData(fname, 0);  | 
795  |  |  | 
796  |  |         /* Read the entire png file */  | 
797  | 0  |     if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)  | 
798  | 0  |         return (L_COMP_DATA *)ERROR_PTR_1("unable to read file", | 
799  | 0  |                                           fname, __func__, NULL);  | 
800  |  |  | 
801  |  |         /* Extract flate data, copying portions of it to memory, including  | 
802  |  |          * the predictor information in a byte at the beginning of each  | 
803  |  |          * raster line.  The flate data makes up the vast majority of  | 
804  |  |          * the png file, so after extraction we expect datacomp to  | 
805  |  |          * be nearly full (i.e., nbytescomp will be only slightly less  | 
806  |  |          * than nbytespng).  Also extract the colormap if present. */  | 
807  | 0  |     if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) { | 
808  | 0  |         LEPT_FREE(pngcomp);  | 
809  | 0  |         return (L_COMP_DATA *)ERROR_PTR("unable to allocate memory", | 
810  | 0  |                                         __func__, NULL);  | 
811  | 0  |     }  | 
812  |  |  | 
813  |  |         /* Parse the png file.  Each chunk consists of:  | 
814  |  |          *    length: 4 bytes  | 
815  |  |          *    name:   4 bytes (e.g., "IDAT")  | 
816  |  |          *    data:   n bytes  | 
817  |  |          *    CRC:    4 bytes  | 
818  |  |          * Start at the beginning of the data section of the first chunk,  | 
819  |  |          * byte 16, because the png file begins with 8 bytes of header,  | 
820  |  |          * followed by the first 8 bytes of the first chunk  | 
821  |  |          * (length and name).  On each loop, increment by 12 bytes to  | 
822  |  |          * skip over the CRC, length and name of the next chunk. */  | 
823  | 0  |     for (i = 16; i < nbytespng; i += 12) {  /* do each successive chunk */ | 
824  |  |             /* Get the chunk length */  | 
825  | 0  |         n  = pngcomp[i - 8] << 24;  | 
826  | 0  |         n += pngcomp[i - 7] << 16;  | 
827  | 0  |         n += pngcomp[i - 6] << 8;  | 
828  | 0  |         n += pngcomp[i - 5] << 0;  | 
829  | 0  |         if (n >= nbytespng - i) {  /* "n + i" can overflow */ | 
830  | 0  |             LEPT_FREE(pngcomp);  | 
831  | 0  |             LEPT_FREE(datacomp);  | 
832  | 0  |             pixcmapDestroy(&cmap);  | 
833  | 0  |             L_ERROR("invalid png: i = %d, n = %d, nbytes = %zu\n", __func__, | 
834  | 0  |                     i, n, nbytespng);  | 
835  | 0  |             return NULL;  | 
836  | 0  |         }  | 
837  |  |  | 
838  |  |             /* Is it a data chunk? */  | 
839  | 0  |         if (memcmp(pngcomp + i - 4, "IDAT", 4) == 0) { | 
840  | 0  |             memcpy(datacomp + nbytescomp, pngcomp + i, n);  | 
841  | 0  |             nbytescomp += n;  | 
842  | 0  |         }  | 
843  |  |  | 
844  |  |             /* Is it a palette chunk? */  | 
845  | 0  |         if (cmapflag && !cmap &&  | 
846  | 0  |             memcmp(pngcomp + i - 4, "PLTE", 4) == 0) { | 
847  | 0  |             if ((n / 3) > (1 << bps)) { | 
848  | 0  |                 LEPT_FREE(pngcomp);  | 
849  | 0  |                 LEPT_FREE(datacomp);  | 
850  | 0  |                 pixcmapDestroy(&cmap);  | 
851  | 0  |                 L_ERROR("invalid png: i = %d, n = %d, cmapsize = %d\n", | 
852  | 0  |                         __func__, i, n, (1 << bps));  | 
853  | 0  |                 return NULL;  | 
854  | 0  |             }  | 
855  | 0  |             cmap = pixcmapCreate(bps);  | 
856  | 0  |             for (j = i; j < i + n; j += 3) { | 
857  | 0  |                 pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],  | 
858  | 0  |                                 pngcomp[j + 2]);  | 
859  | 0  |             }  | 
860  | 0  |         }  | 
861  | 0  |         i += n;  /* move to the end of the data chunk */  | 
862  | 0  |     }  | 
863  | 0  |     LEPT_FREE(pngcomp);  | 
864  |  | 
  | 
865  | 0  |     if (nbytescomp == 0) { | 
866  | 0  |         LEPT_FREE(datacomp);  | 
867  | 0  |         pixcmapDestroy(&cmap);  | 
868  | 0  |         return (L_COMP_DATA *)ERROR_PTR("invalid PNG file", __func__, NULL); | 
869  | 0  |     }  | 
870  |  |  | 
871  |  |         /* Extract and encode the colormap data as hexascii  */  | 
872  | 0  |     ncolors = 0;  | 
873  | 0  |     if (cmap) { | 
874  | 0  |         pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);  | 
875  | 0  |         pixcmapDestroy(&cmap);  | 
876  | 0  |         if (!cmapdata) { | 
877  | 0  |             LEPT_FREE(datacomp);  | 
878  | 0  |             return (L_COMP_DATA *)ERROR_PTR("cmapdata not made", | 
879  | 0  |                                             __func__, NULL);  | 
880  | 0  |         }  | 
881  | 0  |         cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);  | 
882  | 0  |         LEPT_FREE(cmapdata);  | 
883  | 0  |     }  | 
884  |  |  | 
885  |  |         /* Note that this is the only situation where the predictor  | 
886  |  |          * field of the CID is set to 1.  Adobe's predictor values on  | 
887  |  |          * p. 76 of pdf_reference_1-7.pdf give 1 for no predictor and  | 
888  |  |          * 10-14 for inline predictors, the specifics of which are  | 
889  |  |          * ignored by the pdf interpreter, which just needs to know that  | 
890  |  |          * the first byte on each compressed scanline is some predictor  | 
891  |  |          * whose type can be inferred from the byte itself.  */  | 
892  | 0  |     cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));  | 
893  | 0  |     cid->datacomp = datacomp;  | 
894  | 0  |     cid->type = L_FLATE_ENCODE;  | 
895  | 0  |     cid->cmapdatahex = cmapdatahex;  | 
896  | 0  |     cid->nbytescomp = nbytescomp;  | 
897  | 0  |     cid->ncolors = ncolors;  | 
898  | 0  |     cid->predictor = TRUE;  | 
899  | 0  |     cid->w = w;  | 
900  | 0  |     cid->h = h;  | 
901  | 0  |     cid->bps = bps;  | 
902  | 0  |     cid->spp = spp;  | 
903  | 0  |     cid->res = xres;  | 
904  | 0  |     return cid;  | 
905  | 0  | }  | 
906  |  |  | 
907  |  |  | 
908  |  | /*!  | 
909  |  |  * \brief   l_generateJpegData()  | 
910  |  |  *  | 
911  |  |  * \param[in]    fname           of jpeg file  | 
912  |  |  * \param[in]    ascii85flag     0 for jpeg; 1 for ascii85-encoded jpeg  | 
913  |  |  * \return  cid containing jpeg data, or NULL on error  | 
914  |  |  *  | 
915  |  |  * <pre>  | 
916  |  |  * Notes:  | 
917  |  |  *      (1) Set ascii85flag:  | 
918  |  |  *           ~ 0 for binary data (PDF only)  | 
919  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
920  |  |  *      (2) Most of this function is repeated in l_generateJpegMemData(),  | 
921  |  |  *          which is required in pixacompFastConvertToPdfData().  | 
922  |  |  * </pre>  | 
923  |  |  */  | 
924  |  | L_COMP_DATA *  | 
925  |  | l_generateJpegData(const char  *fname,  | 
926  |  |                    l_int32      ascii85flag)  | 
927  | 0  | { | 
928  | 0  | char         *data85 = NULL;  /* ascii85 encoded jpeg compressed file */  | 
929  | 0  | l_uint8      *data = NULL;  | 
930  | 0  | l_int32       w, h, xres, yres, bps, spp;  | 
931  | 0  | size_t        nbytes, nbytes85;  | 
932  | 0  | L_COMP_DATA  *cid;  | 
933  | 0  | FILE         *fp;  | 
934  |  | 
  | 
935  | 0  |     if (!fname)  | 
936  | 0  |         return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL); | 
937  |  |  | 
938  | 0  |     if (ascii85flag != 0 && ascii85flag != 1)  | 
939  | 0  |         return (L_COMP_DATA *)ERROR_PTR("wrong ascii85flags", __func__, NULL); | 
940  |  |  | 
941  |  |         /* Read the metadata */  | 
942  | 0  |     if (readHeaderJpeg(fname, &w, &h, &spp, NULL, NULL))  | 
943  | 0  |         return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL); | 
944  | 0  |     bps = 8;  | 
945  | 0  |     if ((fp = fopenReadStream(fname)) == NULL)  | 
946  | 0  |         return (L_COMP_DATA *)ERROR_PTR_1("stream not opened", | 
947  | 0  |                                           fname, __func__, NULL);  | 
948  | 0  |     fgetJpegResolution(fp, &xres, &yres);  | 
949  | 0  |     fclose(fp);  | 
950  |  |  | 
951  |  |         /* Read the entire jpeg file.  The returned jpeg data in memory  | 
952  |  |          * starts with ffd8 and ends with ffd9 */  | 
953  | 0  |     if ((data = l_binaryRead(fname, &nbytes)) == NULL)  | 
954  | 0  |         return (L_COMP_DATA *)ERROR_PTR_1("data not extracted", | 
955  | 0  |                                           fname, __func__, NULL);  | 
956  |  |  | 
957  |  |         /* Optionally, encode the compressed data */  | 
958  | 0  |     if (ascii85flag == 1) { | 
959  | 0  |         data85 = encodeAscii85(data, nbytes, &nbytes85);  | 
960  | 0  |         LEPT_FREE(data);  | 
961  | 0  |         if (!data85)  | 
962  | 0  |             return (L_COMP_DATA *)ERROR_PTR_1("data85 not made", | 
963  | 0  |                                               fname, __func__, NULL);  | 
964  | 0  |         else  | 
965  | 0  |             data85[nbytes85 - 1] = '\0';  /* remove the newline */  | 
966  | 0  |     }  | 
967  |  |  | 
968  | 0  |     cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));  | 
969  | 0  |     if (ascii85flag == 0) { | 
970  | 0  |         cid->datacomp = data;  | 
971  | 0  |     } else {  /* ascii85 */ | 
972  | 0  |         cid->data85 = data85;  | 
973  | 0  |         cid->nbytes85 = nbytes85;  | 
974  | 0  |     }  | 
975  | 0  |     cid->type = L_JPEG_ENCODE;  | 
976  | 0  |     cid->nbytescomp = nbytes;  | 
977  | 0  |     cid->w = w;  | 
978  | 0  |     cid->h = h;  | 
979  | 0  |     cid->bps = bps;  | 
980  | 0  |     cid->spp = spp;  | 
981  | 0  |     cid->res = xres;  | 
982  | 0  |     return cid;  | 
983  | 0  | }  | 
984  |  |  | 
985  |  |  | 
986  |  | /*!  | 
987  |  |  * \brief   l_generateJpegDataMem()  | 
988  |  |  *  | 
989  |  |  * \param[in]    data           of jpeg-encoded file  | 
990  |  |  * \param[in]    nbytes         size of jpeg-encoded file  | 
991  |  |  * \param[in]    ascii85flag    0 for jpeg; 1 for ascii85-encoded jpeg  | 
992  |  |  * \return  cid containing jpeg data, or NULL on error  | 
993  |  |  *  | 
994  |  |  * <pre>  | 
995  |  |  * Notes:  | 
996  |  |  *      (1) Set ascii85flag:  | 
997  |  |  *           ~ 0 for binary data (PDF only)  | 
998  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
999  |  |  * </pre>  | 
1000  |  |  */  | 
1001  |  | L_COMP_DATA *  | 
1002  |  | l_generateJpegDataMem(l_uint8  *data,  | 
1003  |  |                       size_t    nbytes,  | 
1004  |  |                       l_int32   ascii85flag)  | 
1005  | 0  | { | 
1006  | 0  | char         *data85 = NULL;  /* ascii85 encoded jpeg compressed file */  | 
1007  | 0  | l_int32       w, h, xres, yres, bps, spp;  | 
1008  | 0  | size_t        nbytes85;  | 
1009  | 0  | L_COMP_DATA  *cid;  | 
1010  |  | 
  | 
1011  | 0  |     if (!data)  | 
1012  | 0  |         return (L_COMP_DATA *)ERROR_PTR("data not defined", __func__, NULL); | 
1013  |  |  | 
1014  |  |         /* Read the metadata */  | 
1015  | 0  |     if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) { | 
1016  | 0  |         LEPT_FREE(data);  | 
1017  | 0  |         return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL); | 
1018  | 0  |     }  | 
1019  | 0  |     bps = 8;  | 
1020  | 0  |     readResolutionMemJpeg(data, nbytes, &xres, &yres);  | 
1021  |  |  | 
1022  |  |         /* Optionally, encode the compressed data */  | 
1023  | 0  |     if (ascii85flag == 1) { | 
1024  | 0  |         data85 = encodeAscii85(data, nbytes, &nbytes85);  | 
1025  | 0  |         LEPT_FREE(data);  | 
1026  | 0  |         if (!data85)  | 
1027  | 0  |             return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL); | 
1028  | 0  |         else  | 
1029  | 0  |             data85[nbytes85 - 1] = '\0';  /* remove the newline */  | 
1030  | 0  |     }  | 
1031  |  |  | 
1032  | 0  |     cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));  | 
1033  | 0  |     if (ascii85flag == 0) { | 
1034  | 0  |         cid->datacomp = data;  | 
1035  | 0  |     } else {  /* ascii85 */ | 
1036  | 0  |         cid->data85 = data85;  | 
1037  | 0  |         cid->nbytes85 = nbytes85;  | 
1038  | 0  |     }  | 
1039  | 0  |     cid->type = L_JPEG_ENCODE;  | 
1040  | 0  |     cid->nbytescomp = nbytes;  | 
1041  | 0  |     cid->w = w;  | 
1042  | 0  |     cid->h = h;  | 
1043  | 0  |     cid->bps = bps;  | 
1044  | 0  |     cid->spp = spp;  | 
1045  | 0  |     cid->res = xres;  | 
1046  | 0  |     return cid;  | 
1047  | 0  | }  | 
1048  |  |  | 
1049  |  |  | 
1050  |  | /*!  | 
1051  |  |  * \brief   l_generateJp2kData()  | 
1052  |  |  *  | 
1053  |  |  * \param[in]    fname     of jp2k file  | 
1054  |  |  * \return  cid containing jp2k data, or NULL on error  | 
1055  |  |  *  | 
1056  |  |  * <pre>  | 
1057  |  |  * Notes:  | 
1058  |  |  *      (1) This is only called after the file is verified to be jp2k.  | 
1059  |  |  * </pre>  | 
1060  |  |  */  | 
1061  |  | static L_COMP_DATA *  | 
1062  |  | l_generateJp2kData(const char  *fname)  | 
1063  | 0  | { | 
1064  | 0  | l_int32       w, h, bps, spp, xres, yres;  | 
1065  | 0  | size_t        nbytes;  | 
1066  | 0  | L_COMP_DATA  *cid;  | 
1067  | 0  | FILE         *fp;  | 
1068  |  | 
  | 
1069  | 0  |     if (!fname)  | 
1070  | 0  |         return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL); | 
1071  |  |  | 
1072  | 0  |     if (readHeaderJp2k(fname, &w, &h, &bps, &spp, NULL))  | 
1073  | 0  |         return (L_COMP_DATA *)ERROR_PTR("bad jp2k metadata", __func__, NULL); | 
1074  |  |  | 
1075  |  |         /* The returned jp2k data in memory is the entire jp2k file */  | 
1076  | 0  |     cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));  | 
1077  | 0  |     if ((cid->datacomp = l_binaryRead(fname, &nbytes)) == NULL) { | 
1078  | 0  |         l_CIDataDestroy(&cid);  | 
1079  | 0  |         return (L_COMP_DATA *)ERROR_PTR("data not extracted", __func__, NULL); | 
1080  | 0  |     }  | 
1081  |  |  | 
1082  | 0  |     xres = yres = 0;  | 
1083  | 0  |     if ((fp = fopenReadStream(fname)) != NULL) { | 
1084  | 0  |         fgetJp2kResolution(fp, &xres, &yres);  | 
1085  | 0  |         fclose(fp);  | 
1086  | 0  |     }  | 
1087  | 0  |     cid->type = L_JP2K_ENCODE;  | 
1088  | 0  |     cid->nbytescomp = nbytes;  | 
1089  | 0  |     cid->w = w;  | 
1090  | 0  |     cid->h = h;  | 
1091  | 0  |     cid->bps = bps;  | 
1092  | 0  |     cid->spp = spp;  | 
1093  | 0  |     cid->res = xres;  | 
1094  | 0  |     return cid;  | 
1095  | 0  | }  | 
1096  |  |  | 
1097  |  |  | 
1098  |  | /*!  | 
1099  |  |  * \brief   l_generateG4Data()  | 
1100  |  |  *  | 
1101  |  |  * \param[in]    fname          of g4 compressed file  | 
1102  |  |  * \param[in]    ascii85flag    0 for g4 compressed; 1 for ascii85-encoded g4  | 
1103  |  |  * \return  cid g4 compressed image data, or NULL on error  | 
1104  |  |  *  | 
1105  |  |  * <pre>  | 
1106  |  |  * Notes:  | 
1107  |  |  *      (1) Set ascii85flag:  | 
1108  |  |  *           ~ 0 for binary data (PDF only)  | 
1109  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
1110  |  |  *      (2) This does not work for multipage tiff files.  | 
1111  |  |  * </pre>  | 
1112  |  |  */  | 
1113  |  | L_COMP_DATA *  | 
1114  |  | l_generateG4Data(const char  *fname,  | 
1115  |  |                  l_int32      ascii85flag)  | 
1116  | 0  | { | 
1117  | 0  | l_uint8      *datacomp = NULL;  /* g4 compressed raster data */  | 
1118  | 0  | char         *data85 = NULL;  /* ascii85 encoded g4 compressed data */  | 
1119  | 0  | l_int32       w, h, xres, yres, npages;  | 
1120  | 0  | l_int32       minisblack;  /* TRUE or FALSE */  | 
1121  | 0  | size_t        nbytes85, nbytescomp;  | 
1122  | 0  | L_COMP_DATA  *cid;  | 
1123  | 0  | FILE         *fp;  | 
1124  |  | 
  | 
1125  | 0  |     if (!fname)  | 
1126  | 0  |         return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL); | 
1127  |  |  | 
1128  |  |         /* Make sure this is a single page tiff file */  | 
1129  | 0  |     if ((fp = fopenReadStream(fname)) == NULL)  | 
1130  | 0  |         return (L_COMP_DATA *)ERROR_PTR_1("stream not opened", | 
1131  | 0  |                                           fname, __func__, NULL);  | 
1132  | 0  |     tiffGetCount(fp, &npages);  | 
1133  | 0  |     fclose(fp);  | 
1134  | 0  |     if (npages != 1) { | 
1135  | 0  |         L_ERROR(" %d page tiff; only works with 1 page (file: %s)\n", __func__, npages, fname); | 
1136  | 0  |         return NULL;  | 
1137  | 0  |     }  | 
1138  |  |  | 
1139  |  |         /* Read the resolution */  | 
1140  | 0  |     if ((fp = fopenReadStream(fname)) == NULL)  | 
1141  | 0  |         return (L_COMP_DATA *)ERROR_PTR_1("stream not opened", | 
1142  | 0  |                                           fname, __func__, NULL);  | 
1143  | 0  |     getTiffResolution(fp, &xres, &yres);  | 
1144  | 0  |     fclose(fp);  | 
1145  |  |  | 
1146  |  |         /* The returned ccitt g4 data in memory is the block of  | 
1147  |  |          * bytes in the tiff file, starting after 8 bytes and  | 
1148  |  |          * ending before the directory. */  | 
1149  | 0  |     if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,  | 
1150  | 0  |                               &w, &h, &minisblack)) { | 
1151  | 0  |         return (L_COMP_DATA *)ERROR_PTR_1("datacomp not extracted", | 
1152  | 0  |                                           fname, __func__, NULL);  | 
1153  | 0  |     }  | 
1154  |  |  | 
1155  |  |         /* Optionally, encode the compressed data */  | 
1156  | 0  |     if (ascii85flag == 1) { | 
1157  | 0  |         data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);  | 
1158  | 0  |         LEPT_FREE(datacomp);  | 
1159  | 0  |         if (!data85)  | 
1160  | 0  |             return (L_COMP_DATA *)ERROR_PTR_1("data85 not made", | 
1161  | 0  |                                               fname, __func__, NULL);  | 
1162  | 0  |         else  | 
1163  | 0  |             data85[nbytes85 - 1] = '\0';  /* remove the newline */  | 
1164  | 0  |     }  | 
1165  |  |  | 
1166  | 0  |     cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));  | 
1167  | 0  |     if (ascii85flag == 0) { | 
1168  | 0  |         cid->datacomp = datacomp;  | 
1169  | 0  |     } else {  /* ascii85 */ | 
1170  | 0  |         cid->data85 = data85;  | 
1171  | 0  |         cid->nbytes85 = nbytes85;  | 
1172  | 0  |     }  | 
1173  | 0  |     cid->type = L_G4_ENCODE;  | 
1174  | 0  |     cid->nbytescomp = nbytescomp;  | 
1175  | 0  |     cid->w = w;  | 
1176  | 0  |     cid->h = h;  | 
1177  | 0  |     cid->bps = 1;  | 
1178  | 0  |     cid->spp = 1;  | 
1179  | 0  |     cid->minisblack = minisblack;  | 
1180  | 0  |     cid->res = xres;  | 
1181  | 0  |     return cid;  | 
1182  | 0  | }  | 
1183  |  |  | 
1184  |  |  | 
1185  |  | /*!  | 
1186  |  |  * \brief   pixGenerateCIData()  | 
1187  |  |  *  | 
1188  |  |  * \param[in]    pixs       8 or 32 bpp, no colormap  | 
1189  |  |  * \param[in]    type       L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or  | 
1190  |  |  *                          L_JP2K_ENCODE  | 
1191  |  |  * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)  | 
1192  |  |  *                          for jp2k if transcoded: 27-45; 0 for default (34)  | 
1193  |  |  * \param[in]    ascii85    0 for binary; 1 for ascii85-encoded  | 
1194  |  |  * \param[out]   pcid       compressed data  | 
1195  |  |  * \return  0 if OK, 1 on error  | 
1196  |  |  *  | 
1197  |  |  * <pre>  | 
1198  |  |  * Notes:  | 
1199  |  |  *      (1) Set ascii85:  | 
1200  |  |  *           ~ 0 for binary data (PDF only)  | 
1201  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
1202  |  |  *      (2) Do not accept images with an asperity ratio greater than 10.  | 
1203  |  |  * </pre>  | 
1204  |  |  */  | 
1205  |  | l_ok  | 
1206  |  | pixGenerateCIData(PIX           *pixs,  | 
1207  |  |                   l_int32        type,  | 
1208  |  |                   l_int32        quality,  | 
1209  |  |                   l_int32        ascii85,  | 
1210  |  |                   L_COMP_DATA  **pcid)  | 
1211  | 0  | { | 
1212  | 0  | l_int32   w, h, d, maxAsp;  | 
1213  | 0  | PIXCMAP  *cmap;  | 
1214  |  | 
  | 
1215  | 0  |     if (!pcid)  | 
1216  | 0  |         return ERROR_INT("&cid not defined", __func__, 1); | 
1217  | 0  |     *pcid = NULL;  | 
1218  | 0  |     if (!pixs)  | 
1219  | 0  |         return ERROR_INT("pixs not defined", __func__, 1); | 
1220  | 0  |     if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&  | 
1221  | 0  |         type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { | 
1222  | 0  |         selectDefaultPdfEncoding(pixs, &type);  | 
1223  | 0  |     }  | 
1224  | 0  |     if (ascii85 != 0 && ascii85 != 1)  | 
1225  | 0  |         return ERROR_INT("invalid ascii85", __func__, 1); | 
1226  | 0  |     pixGetDimensions(pixs, &w, &h, NULL);  | 
1227  | 0  |     if (w == 0 || h == 0)  | 
1228  | 0  |         return ERROR_INT("invalid w or h", __func__, 1); | 
1229  | 0  |     maxAsp = L_MAX(w / h, h / w);  | 
1230  | 0  |     if (maxAsp > 10)  | 
1231  | 0  |         return ERROR_INT("max asperity > 10", __func__, 1); | 
1232  |  |  | 
1233  |  |         /* Conditionally modify the encoding type if libz is  | 
1234  |  |          * available and the requested library is missing. */  | 
1235  | 0  | #if defined(HAVE_LIBZ)  | 
1236  |  | # if !defined(HAVE_LIBJPEG)  | 
1237  |  |     if (type == L_JPEG_ENCODE) { | 
1238  |  |         L_WARNING("no libjpeg; using flate encoding\n", __func__); | 
1239  |  |         type = L_FLATE_ENCODE;  | 
1240  |  |     }  | 
1241  |  | # endif /* !defined(HAVE_LIBJPEG) */  | 
1242  | 0  | # if !defined(HAVE_LIBJP2K)  | 
1243  | 0  |     if (type == L_JP2K_ENCODE) { | 
1244  | 0  |         L_WARNING("no libjp2k; using flate encoding\n", __func__); | 
1245  | 0  |         type = L_FLATE_ENCODE;  | 
1246  | 0  |     }  | 
1247  | 0  | # endif /* !defined(HAVE_LIBJP2K) */  | 
1248  |  | # if !defined(HAVE_LIBTIFF)  | 
1249  |  |     if (type == L_G4_ENCODE) { | 
1250  |  |         L_WARNING("no libtiff; using flate encoding\n", __func__); | 
1251  |  |         type = L_FLATE_ENCODE;  | 
1252  |  |     }  | 
1253  |  | # endif /* !defined(HAVE_LIBTIFF) */  | 
1254  | 0  | #endif /* defined(HAVE_LIBZ) */  | 
1255  |  |  | 
1256  |  |         /* Sanity check on requested encoding */  | 
1257  | 0  |     d = pixGetDepth(pixs);  | 
1258  | 0  |     cmap = pixGetColormap(pixs);  | 
1259  | 0  |     if (cmap && type != L_FLATE_ENCODE) { | 
1260  | 0  |         L_WARNING("pixs has cmap; using flate encoding\n", __func__); | 
1261  | 0  |         type = L_FLATE_ENCODE;  | 
1262  | 0  |     } else if (d < 8 && (type == L_JPEG_ENCODE || type == L_JP2K_ENCODE)) { | 
1263  | 0  |         L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__); | 
1264  | 0  |         type = L_FLATE_ENCODE;  | 
1265  | 0  |     } else if (d > 1 && type == L_G4_ENCODE) { | 
1266  | 0  |         L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__); | 
1267  | 0  |         type = L_FLATE_ENCODE;  | 
1268  | 0  |     }  | 
1269  |  | 
  | 
1270  | 0  |     if (type == L_JPEG_ENCODE) { | 
1271  | 0  |         if ((*pcid = pixGenerateJpegData(pixs, ascii85, quality)) == NULL)  | 
1272  | 0  |             return ERROR_INT("jpeg data not made", __func__, 1); | 
1273  | 0  |     } else if (type == L_JP2K_ENCODE) { | 
1274  | 0  |         if ((*pcid = pixGenerateJp2kData(pixs, quality)) == NULL)  | 
1275  | 0  |             return ERROR_INT("jp2k data not made", __func__, 1); | 
1276  | 0  |     } else if (type == L_G4_ENCODE) { | 
1277  | 0  |         if ((*pcid = pixGenerateG4Data(pixs, ascii85)) == NULL)  | 
1278  | 0  |             return ERROR_INT("g4 data not made", __func__, 1); | 
1279  | 0  |     } else {  /* type == L_FLATE_ENCODE */ | 
1280  | 0  |         if ((*pcid = pixGenerateFlateData(pixs, ascii85)) == NULL)  | 
1281  | 0  |             return ERROR_INT("flate data not made", __func__, 1); | 
1282  | 0  |     }  | 
1283  | 0  |     return 0;  | 
1284  | 0  | }  | 
1285  |  |  | 
1286  |  |  | 
1287  |  | /*!  | 
1288  |  |  * \brief   l_generateFlateData()  | 
1289  |  |  *  | 
1290  |  |  * \param[in]    fname  | 
1291  |  |  * \param[in]    ascii85flag    0 for gzipped; 1 for ascii85-encoded gzipped  | 
1292  |  |  * \return  cid flate compressed image data, or NULL on error  | 
1293  |  |  *  | 
1294  |  |  * <pre>  | 
1295  |  |  * Notes:  | 
1296  |  |  *      (1) The input image is converted to one of these 4 types:  | 
1297  |  |  *           ~ 1 bpp  | 
1298  |  |  *           ~ 8 bpp, no colormap  | 
1299  |  |  *           ~ 8 bpp, colormap  | 
1300  |  |  *           ~ 32 bpp rgb  | 
1301  |  |  *      (2) Set ascii85flag:  | 
1302  |  |  *           ~ 0 for binary data (PDF only)  | 
1303  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
1304  |  |  *      (3) Always transcodes (i.e., first decodes the png file)  | 
1305  |  |  * </pre>  | 
1306  |  |  */  | 
1307  |  | L_COMP_DATA *  | 
1308  |  | l_generateFlateData(const char  *fname,  | 
1309  |  |                     l_int32      ascii85flag)  | 
1310  | 0  | { | 
1311  | 0  | L_COMP_DATA  *cid;  | 
1312  | 0  | PIX          *pixs;  | 
1313  |  | 
  | 
1314  | 0  |     if (!fname)  | 
1315  | 0  |         return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL); | 
1316  |  |  | 
1317  | 0  |     if ((pixs = pixRead(fname)) == NULL)  | 
1318  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not made", __func__, NULL); | 
1319  | 0  |     cid = pixGenerateFlateData(pixs, ascii85flag);  | 
1320  | 0  |     pixDestroy(&pixs);  | 
1321  | 0  |     return cid;  | 
1322  | 0  | }  | 
1323  |  |  | 
1324  |  |  | 
1325  |  | /*!  | 
1326  |  |  * \brief   pixGenerateFlateData()  | 
1327  |  |  *  | 
1328  |  |  * \param[in]    pixs  | 
1329  |  |  * \param[in]    ascii85flag 0    for gzipped; 1 for ascii85-encoded gzipped  | 
1330  |  |  * \return  cid flate compressed image data, or NULL on error  | 
1331  |  |  *  | 
1332  |  |  * <pre>  | 
1333  |  |  * Notes:  | 
1334  |  |  *     (1) If called with an RGBA pix (spp == 4), the alpha channel  | 
1335  |  |  *         will be removed, projecting a white backgrouond through  | 
1336  |  |  *         any transparency.  | 
1337  |  |  *     (2) If called with a colormapped pix, any transparency in the  | 
1338  |  |  *         alpha component in the colormap will be ignored, as it is  | 
1339  |  |  *         for all leptonica operations on colormapped pix.  | 
1340  |  |  * </pre>  | 
1341  |  |  */  | 
1342  |  | static L_COMP_DATA *  | 
1343  |  | pixGenerateFlateData(PIX     *pixs,  | 
1344  |  |                      l_int32  ascii85flag)  | 
1345  | 0  | { | 
1346  | 0  | l_uint8      *data = NULL;  /* uncompressed raster data in required format */  | 
1347  | 0  | l_uint8      *datacomp = NULL;  /* gzipped raster data */  | 
1348  | 0  | char         *data85 = NULL;  /* ascii85 encoded gzipped raster data */  | 
1349  | 0  | l_uint8      *cmapdata = NULL;  /* uncompressed colormap */  | 
1350  | 0  | char         *cmapdata85 = NULL;  /* ascii85 encoded uncompressed colormap */  | 
1351  | 0  | char         *cmapdatahex = NULL;  /* hex ascii uncompressed colormap */  | 
1352  | 0  | l_int32       ncolors;  /* in colormap; not used if cmapdata85 is null */  | 
1353  | 0  | l_int32       bps;  /* bits/sample: usually 8 */  | 
1354  | 0  | l_int32       spp;  /* samples/pixel: 1-grayscale/cmap); 3-rgb */  | 
1355  | 0  | l_int32       w, h, d, cmapflag;  | 
1356  | 0  | size_t        ncmapbytes85 = 0;  | 
1357  | 0  | size_t        nbytes85 = 0;  | 
1358  | 0  | size_t        nbytes, nbytescomp;  | 
1359  | 0  | L_COMP_DATA  *cid;  | 
1360  | 0  | PIX          *pixt;  | 
1361  | 0  | PIXCMAP      *cmap;  | 
1362  |  | 
  | 
1363  | 0  |     if (!pixs)  | 
1364  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL); | 
1365  |  |  | 
1366  |  |         /* Convert the image to one of these 4 types:  | 
1367  |  |          *     1 bpp  | 
1368  |  |          *     8 bpp, no colormap  | 
1369  |  |          *     8 bpp, colormap  | 
1370  |  |          *     32 bpp rgb    */  | 
1371  | 0  |     pixGetDimensions(pixs, &w, &h, &d);  | 
1372  | 0  |     cmap = pixGetColormap(pixs);  | 
1373  | 0  |     cmapflag = (cmap) ? 1 : 0;  | 
1374  | 0  |     if (d == 2 || d == 4 || d == 16) { | 
1375  | 0  |         pixt = pixConvertTo8(pixs, cmapflag);  | 
1376  | 0  |         cmap = pixGetColormap(pixt);  | 
1377  | 0  |         d = pixGetDepth(pixt);  | 
1378  | 0  |     } else if (d == 32 && pixGetSpp(pixs) == 4) {  /* remove alpha */ | 
1379  | 0  |         pixt = pixAlphaBlendUniform(pixs, 0xffffff00);  | 
1380  | 0  |     } else { | 
1381  | 0  |         pixt = pixClone(pixs);  | 
1382  | 0  |     }  | 
1383  | 0  |     if (!pixt)  | 
1384  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixt not made", __func__, NULL); | 
1385  | 0  |     spp = (d == 32) ? 3 : 1;  | 
1386  | 0  |     bps = (d == 32) ? 8 : d;  | 
1387  |  |  | 
1388  |  |         /* Extract and encode the colormap data as both ascii85 and hexascii  */  | 
1389  | 0  |     ncolors = 0;  | 
1390  | 0  |     if (cmap) { | 
1391  | 0  |         pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);  | 
1392  | 0  |         if (!cmapdata) { | 
1393  | 0  |             pixDestroy(&pixt);  | 
1394  | 0  |             return (L_COMP_DATA *)ERROR_PTR("cmapdata not made", | 
1395  | 0  |                                             __func__, NULL);  | 
1396  | 0  |         }  | 
1397  |  |  | 
1398  | 0  |         cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);  | 
1399  | 0  |         cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);  | 
1400  | 0  |         LEPT_FREE(cmapdata);  | 
1401  | 0  |     }  | 
1402  |  |  | 
1403  |  |         /* Extract and compress the raster data */  | 
1404  | 0  |     pixGetRasterData(pixt, &data, &nbytes);  | 
1405  | 0  |     pixDestroy(&pixt);  | 
1406  | 0  |     if (!data) { | 
1407  | 0  |         LEPT_FREE(cmapdata85);  | 
1408  | 0  |         LEPT_FREE(cmapdatahex);  | 
1409  | 0  |         return (L_COMP_DATA *)ERROR_PTR("data not returned", __func__, NULL); | 
1410  | 0  |     }  | 
1411  | 0  |     datacomp = zlibCompress(data, nbytes, &nbytescomp);  | 
1412  | 0  |     LEPT_FREE(data);  | 
1413  | 0  |     if (!datacomp) { | 
1414  | 0  |         LEPT_FREE(cmapdata85);  | 
1415  | 0  |         LEPT_FREE(cmapdatahex);  | 
1416  | 0  |         return (L_COMP_DATA *)ERROR_PTR("datacomp not made", __func__, NULL); | 
1417  | 0  |     }  | 
1418  |  |  | 
1419  |  |         /* Optionally, encode the compressed data */  | 
1420  | 0  |     if (ascii85flag == 1) { | 
1421  | 0  |         data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);  | 
1422  | 0  |         LEPT_FREE(datacomp);  | 
1423  | 0  |         if (!data85) { | 
1424  | 0  |             LEPT_FREE(cmapdata85);  | 
1425  | 0  |             LEPT_FREE(cmapdatahex);  | 
1426  | 0  |             return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL); | 
1427  | 0  |         } else { | 
1428  | 0  |             data85[nbytes85 - 1] = '\0';  /* remove the newline */  | 
1429  | 0  |         }  | 
1430  | 0  |     }  | 
1431  |  |  | 
1432  | 0  |     cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));  | 
1433  | 0  |     if (ascii85flag == 0) { | 
1434  | 0  |         cid->datacomp = datacomp;  | 
1435  | 0  |     } else {  /* ascii85 */ | 
1436  | 0  |         cid->data85 = data85;  | 
1437  | 0  |         cid->nbytes85 = nbytes85;  | 
1438  | 0  |     }  | 
1439  | 0  |     cid->type = L_FLATE_ENCODE;  | 
1440  | 0  |     cid->cmapdatahex = cmapdatahex;  | 
1441  | 0  |     cid->cmapdata85 = cmapdata85;  | 
1442  | 0  |     cid->nbytescomp = nbytescomp;  | 
1443  | 0  |     cid->ncolors = ncolors;  | 
1444  | 0  |     cid->w = w;  | 
1445  | 0  |     cid->h = h;  | 
1446  | 0  |     cid->bps = bps;  | 
1447  | 0  |     cid->spp = spp;  | 
1448  | 0  |     cid->res = pixGetXRes(pixs);  | 
1449  | 0  |     cid->nbytes = nbytes;  /* only for debugging */  | 
1450  | 0  |     return cid;  | 
1451  | 0  | }  | 
1452  |  |  | 
1453  |  |  | 
1454  |  | /*!  | 
1455  |  |  * \brief   pixGenerateJpegData()  | 
1456  |  |  *  | 
1457  |  |  * \param[in]    pixs           8, 16 or 32 bpp, no colormap  | 
1458  |  |  * \param[in]    ascii85flag    0 for jpeg; 1 for ascii85-encoded jpeg  | 
1459  |  |  * \param[in]    quality        0 for default, which is 75  | 
1460  |  |  * \return  cid jpeg compressed data, or NULL on error  | 
1461  |  |  *  | 
1462  |  |  * <pre>  | 
1463  |  |  * Notes:  | 
1464  |  |  *      (1) Set ascii85flag:  | 
1465  |  |  *           ~ 0 for binary data (PDF only)  | 
1466  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
1467  |  |  *      (2) If 16 bpp, convert first to 8 bpp, using the MSB  | 
1468  |  |  * </pre>  | 
1469  |  |  */  | 
1470  |  | static L_COMP_DATA *  | 
1471  |  | pixGenerateJpegData(PIX     *pixs,  | 
1472  |  |                     l_int32  ascii85flag,  | 
1473  |  |                     l_int32  quality)  | 
1474  | 0  | { | 
1475  | 0  | l_int32       d;  | 
1476  | 0  | char         *fname;  | 
1477  | 0  | L_COMP_DATA  *cid;  | 
1478  |  | 
  | 
1479  | 0  |     if (!pixs)  | 
1480  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL); | 
1481  | 0  |     if (pixGetColormap(pixs))  | 
1482  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL); | 
1483  | 0  |     d = pixGetDepth(pixs);  | 
1484  | 0  |     if (d != 8 && d != 16 && d != 32)  | 
1485  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not 8, 16 or 32 bpp", | 
1486  | 0  |                 __func__, NULL);  | 
1487  |  |  | 
1488  |  |         /* Compress to a temp jpeg file */  | 
1489  | 0  |     fname = l_makeTempFilename();  | 
1490  | 0  |     if (pixWriteJpeg(fname, pixs, quality, 0)) { | 
1491  | 0  |         LEPT_FREE(fname);  | 
1492  | 0  |         return NULL;  | 
1493  | 0  |     }  | 
1494  |  |  | 
1495  |  |         /* Generate the data */  | 
1496  | 0  |     cid = l_generateJpegData(fname, ascii85flag);  | 
1497  | 0  |     if (lept_rmfile(fname) != 0)  | 
1498  | 0  |         L_ERROR("temp file %s was not deleted\n", __func__, fname); | 
1499  | 0  |     LEPT_FREE(fname);  | 
1500  | 0  |     return cid;  | 
1501  | 0  | }  | 
1502  |  |  | 
1503  |  |  | 
1504  |  | /*!  | 
1505  |  |  * \brief   pixGenerateJp2kData()  | 
1506  |  |  *  | 
1507  |  |  * \param[in]    pixs           8 or 32 bpp, no colormap  | 
1508  |  |  * \param[in]    quality        0 for default, which is 34  | 
1509  |  |  * \return  cid jp2k compressed data, or NULL on error  | 
1510  |  |  *  | 
1511  |  |  * <pre>  | 
1512  |  |  * Notes:  | 
1513  |  |  *      (1) The quality can be set between 27 (very poor) and 45  | 
1514  |  |  *          (nearly perfect).  Use 0 for default (34). Use 100 for lossless,  | 
1515  |  |  *          but this is very expensive and not recommended.  | 
1516  |  |  * </pre>  | 
1517  |  |  */  | 
1518  |  | static L_COMP_DATA *  | 
1519  |  | pixGenerateJp2kData(PIX     *pixs,  | 
1520  |  |                     l_int32  quality)  | 
1521  | 0  | { | 
1522  | 0  | l_int32       d;  | 
1523  | 0  | char         *fname;  | 
1524  | 0  | L_COMP_DATA  *cid;  | 
1525  |  | 
  | 
1526  | 0  |     if (!pixs)  | 
1527  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL); | 
1528  | 0  |     if (pixGetColormap(pixs))  | 
1529  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL); | 
1530  | 0  |     d = pixGetDepth(pixs);  | 
1531  | 0  |     if (d != 8 && d != 32)  | 
1532  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", __func__, NULL); | 
1533  |  |  | 
1534  |  |         /* Compress to a temp jp2k file */  | 
1535  | 0  |     fname = l_makeTempFilename();  | 
1536  | 0  |     if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) { | 
1537  | 0  |         LEPT_FREE(fname);  | 
1538  | 0  |         return NULL;  | 
1539  | 0  |     }  | 
1540  |  |  | 
1541  |  |         /* Generate the data */  | 
1542  | 0  |     cid = l_generateJp2kData(fname);  | 
1543  | 0  |     if (lept_rmfile(fname) != 0)  | 
1544  | 0  |         L_ERROR("temp file %s was not deleted\n", __func__, fname); | 
1545  | 0  |     LEPT_FREE(fname);  | 
1546  | 0  |     return cid;  | 
1547  | 0  | }  | 
1548  |  |  | 
1549  |  |  | 
1550  |  | /*!  | 
1551  |  |  * \brief   pixGenerateG4Data()  | 
1552  |  |  *  | 
1553  |  |  * \param[in]    pixs           1 bpp, no colormap  | 
1554  |  |  * \param[in]    ascii85flag    0 for gzipped; 1 for ascii85-encoded gzipped  | 
1555  |  |  * \return  cid g4 compressed image data, or NULL on error  | 
1556  |  |  *  | 
1557  |  |  * <pre>  | 
1558  |  |  * Notes:  | 
1559  |  |  *      (1) Set ascii85flag:  | 
1560  |  |  *           ~ 0 for binary data (PDF only)  | 
1561  |  |  *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)  | 
1562  |  |  * </pre>  | 
1563  |  |  */  | 
1564  |  | static L_COMP_DATA *  | 
1565  |  | pixGenerateG4Data(PIX     *pixs,  | 
1566  |  |                   l_int32  ascii85flag)  | 
1567  | 0  | { | 
1568  | 0  | char         *fname;  | 
1569  | 0  | L_COMP_DATA  *cid;  | 
1570  |  | 
  | 
1571  | 0  |     if (!pixs)  | 
1572  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL); | 
1573  | 0  |     if (pixGetDepth(pixs) != 1)  | 
1574  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs not 1 bpp", __func__, NULL); | 
1575  | 0  |     if (pixGetColormap(pixs))  | 
1576  | 0  |         return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL); | 
1577  |  |  | 
1578  |  |         /* Compress to a temp tiff g4 file */  | 
1579  | 0  |     fname = l_makeTempFilename();  | 
1580  | 0  |     if (pixWrite(fname, pixs, IFF_TIFF_G4)) { | 
1581  | 0  |         LEPT_FREE(fname);  | 
1582  | 0  |         return NULL;  | 
1583  | 0  |     }  | 
1584  |  |  | 
1585  | 0  |     cid = l_generateG4Data(fname, ascii85flag);  | 
1586  | 0  |     if (lept_rmfile(fname) != 0)  | 
1587  | 0  |         L_ERROR("temp file %s was not deleted\n", __func__, fname); | 
1588  | 0  |     LEPT_FREE(fname);  | 
1589  | 0  |     return cid;  | 
1590  | 0  | }  | 
1591  |  |  | 
1592  |  |  | 
1593  |  | /*!  | 
1594  |  |  * \brief   cidConvertToPdfData()  | 
1595  |  |  *  | 
1596  |  |  * \param[in]    cid       compressed image data  | 
1597  |  |  * \param[in]    title     [optional] pdf title; can be null  | 
1598  |  |  * \param[out]   pdata     output pdf data for image  | 
1599  |  |  * \param[out]   pnbytes   size of output pdf data  | 
1600  |  |  * \return  0 if OK, 1 on error  | 
1601  |  |  *  | 
1602  |  |  * <pre>  | 
1603  |  |  * Notes:  | 
1604  |  |  *      (1) Caller must not destroy the cid.  It is absorbed in the  | 
1605  |  |  *          lpd and destroyed by this function.  | 
1606  |  |  * </pre>  | 
1607  |  |  */  | 
1608  |  | l_ok  | 
1609  |  | cidConvertToPdfData(L_COMP_DATA  *cid,  | 
1610  |  |                     const char   *title,  | 
1611  |  |                     l_uint8     **pdata,  | 
1612  |  |                     size_t       *pnbytes)  | 
1613  | 0  | { | 
1614  | 0  | l_int32      res, ret;  | 
1615  | 0  | l_float32    wpt, hpt;  | 
1616  | 0  | L_PDF_DATA  *lpd = NULL;  | 
1617  |  | 
  | 
1618  | 0  |     if (!pdata || !pnbytes)  | 
1619  | 0  |         return ERROR_INT("&data and &nbytes not both defined", __func__, 1); | 
1620  | 0  |     *pdata = NULL;  | 
1621  | 0  |     *pnbytes = 0;  | 
1622  | 0  |     if (!cid)  | 
1623  | 0  |         return ERROR_INT("cid not defined", __func__, 1); | 
1624  |  |  | 
1625  |  |         /* Get media box parameters, in pts */  | 
1626  | 0  |     res = cid->res;  | 
1627  | 0  |     if (res <= 0)  | 
1628  | 0  |         res = DefaultInputRes;  | 
1629  | 0  |     wpt = cid->w * 72.f / res;  | 
1630  | 0  |     hpt = cid->h * 72.f / res;  | 
1631  |  |  | 
1632  |  |         /* Set up the pdf data struct (lpd) */  | 
1633  | 0  |     if ((lpd = pdfdataCreate(title)) == NULL)  | 
1634  | 0  |         return ERROR_INT("lpd not made", __func__, 1); | 
1635  | 0  |     ptraAdd(lpd->cida, cid);  | 
1636  | 0  |     lpd->n++;  | 
1637  | 0  |     ptaAddPt(lpd->xy, 0, 0);   /* xpt = ypt = 0 */  | 
1638  | 0  |     ptaAddPt(lpd->wh, wpt, hpt);  | 
1639  |  |  | 
1640  |  |         /* Generate the pdf string and destroy the lpd */  | 
1641  | 0  |     ret = l_generatePdf(pdata, pnbytes, lpd);  | 
1642  | 0  |     pdfdataDestroy(&lpd);  | 
1643  | 0  |     if (ret)  | 
1644  | 0  |         return ERROR_INT("pdf output not made", __func__, 1); | 
1645  | 0  |     return 0;  | 
1646  | 0  | }  | 
1647  |  |  | 
1648  |  |  | 
1649  |  | /*!  | 
1650  |  |  * \brief   l_CIDataDestroy()  | 
1651  |  |  *  | 
1652  |  |  * \param[in,out]   pcid     will be set to null before returning  | 
1653  |  |  * \return  void  | 
1654  |  |  */  | 
1655  |  | void  | 
1656  |  | l_CIDataDestroy(L_COMP_DATA  **pcid)  | 
1657  | 0  | { | 
1658  | 0  | L_COMP_DATA  *cid;  | 
1659  |  | 
  | 
1660  | 0  |     if (pcid == NULL) { | 
1661  | 0  |         L_WARNING("ptr address is null!\n", __func__); | 
1662  | 0  |         return;  | 
1663  | 0  |     }  | 
1664  | 0  |     if ((cid = *pcid) == NULL)  | 
1665  | 0  |         return;  | 
1666  |  |  | 
1667  | 0  |     if (cid->datacomp) LEPT_FREE(cid->datacomp);  | 
1668  | 0  |     if (cid->data85) LEPT_FREE(cid->data85);  | 
1669  | 0  |     if (cid->cmapdata85) LEPT_FREE(cid->cmapdata85);  | 
1670  | 0  |     if (cid->cmapdatahex) LEPT_FREE(cid->cmapdatahex);  | 
1671  | 0  |     LEPT_FREE(cid);  | 
1672  | 0  |     *pcid = NULL;  | 
1673  | 0  | }  | 
1674  |  |  | 
1675  |  |  | 
1676  |  | /*---------------------------------------------------------------------*  | 
1677  |  |  *         Helper functions for generating the output pdf string       *  | 
1678  |  |  *---------------------------------------------------------------------*/  | 
1679  |  | /*!  | 
1680  |  |  * \brief   l_generatePdf()  | 
1681  |  |  *  | 
1682  |  |  * \param[out]   pdata     pdf array  | 
1683  |  |  * \param[out]   pnbytes   number of bytes in pdf array  | 
1684  |  |  * \param[in]    lpd       all the required input image data  | 
1685  |  |  * \return  0 if OK, 1 on error  | 
1686  |  |  *  | 
1687  |  |  * <pre>  | 
1688  |  |  * Notes:  | 
1689  |  |  *      (1) On error, no data is returned.  | 
1690  |  |  *      (2) The objects are:  | 
1691  |  |  *            1: Catalog  | 
1692  |  |  *            2: Info  | 
1693  |  |  *            3: Pages  | 
1694  |  |  *            4: Page  | 
1695  |  |  *            5: Contents  (rendering command)  | 
1696  |  |  *            6 to 6+n-1: n XObjects  | 
1697  |  |  *            6+n to 6+n+m-1: m colormaps  | 
1698  |  |  * </pre>  | 
1699  |  |  */  | 
1700  |  | static l_int32  | 
1701  |  | l_generatePdf(l_uint8    **pdata,  | 
1702  |  |               size_t      *pnbytes,  | 
1703  |  |               L_PDF_DATA  *lpd)  | 
1704  | 0  | { | 
1705  | 0  |     if (!pdata)  | 
1706  | 0  |         return ERROR_INT("&data not defined", __func__, 1); | 
1707  | 0  |     *pdata = NULL;  | 
1708  | 0  |     if (!pnbytes)  | 
1709  | 0  |         return ERROR_INT("&nbytes not defined", __func__, 1); | 
1710  | 0  |     *pnbytes = 0;  | 
1711  | 0  |     if (!lpd)  | 
1712  | 0  |         return ERROR_INT("lpd not defined", __func__, 1); | 
1713  |  |  | 
1714  | 0  |     generateFixedStringsPdf(lpd);  | 
1715  | 0  |     generateMediaboxPdf(lpd);  | 
1716  | 0  |     generatePageStringPdf(lpd);  | 
1717  | 0  |     generateContentStringPdf(lpd);  | 
1718  | 0  |     generatePreXStringsPdf(lpd);  | 
1719  | 0  |     generateColormapStringsPdf(lpd);  | 
1720  | 0  |     generateTrailerPdf(lpd);  | 
1721  | 0  |     return generateOutputDataPdf(pdata, pnbytes, lpd);  | 
1722  | 0  | }  | 
1723  |  |  | 
1724  |  |  | 
1725  |  | static void  | 
1726  |  | generateFixedStringsPdf(L_PDF_DATA  *lpd)  | 
1727  | 0  | { | 
1728  | 0  | char     buf[L_SMALLBUF];  | 
1729  | 0  | char    *version, *datestr;  | 
1730  | 0  | SARRAY  *sa;  | 
1731  |  |  | 
1732  |  |         /* Accumulate data for the header and objects 1-3 */  | 
1733  | 0  |     lpd->id = stringNew("%PDF-1.5\n"); | 
1734  | 0  |     l_dnaAddNumber(lpd->objsize, strlen(lpd->id));  | 
1735  |  | 
  | 
1736  | 0  |     lpd->obj1 = stringNew("1 0 obj\n" | 
1737  | 0  |                           "<<\n"  | 
1738  | 0  |                           "/Type /Catalog\n"  | 
1739  | 0  |                           "/Pages 3 0 R\n"  | 
1740  | 0  |                           ">>\n"  | 
1741  | 0  |                           "endobj\n");  | 
1742  | 0  |     l_dnaAddNumber(lpd->objsize, strlen(lpd->obj1));  | 
1743  |  | 
  | 
1744  | 0  |     sa = sarrayCreate(0);  | 
1745  | 0  |     sarrayAddString(sa, "2 0 obj\n"  | 
1746  | 0  |                         "<<\n", L_COPY);  | 
1747  | 0  |     if (var_WRITE_DATE_AND_VERSION) { | 
1748  | 0  |         datestr = l_getFormattedDate();  | 
1749  | 0  |         snprintf(buf, sizeof(buf), "/CreationDate (D:%s)\n", datestr);  | 
1750  | 0  |         sarrayAddString(sa, buf, L_COPY);  | 
1751  | 0  |         LEPT_FREE(datestr);  | 
1752  | 0  |         version = getLeptonicaVersion();  | 
1753  | 0  |         snprintf(buf, sizeof(buf),  | 
1754  | 0  |                  "/Producer (leptonica: %s)\n", version);  | 
1755  | 0  |         LEPT_FREE(version);  | 
1756  | 0  |     } else { | 
1757  | 0  |         snprintf(buf, sizeof(buf), "/Producer (leptonica)\n");  | 
1758  | 0  |     }  | 
1759  | 0  |     sarrayAddString(sa, buf, L_COPY);  | 
1760  | 0  |     if (lpd->title) { | 
1761  | 0  |         char *hexstr;  | 
1762  | 0  |         if ((hexstr = generateEscapeString(lpd->title)) != NULL) { | 
1763  | 0  |             snprintf(buf, sizeof(buf), "/Title %s\n", hexstr);  | 
1764  | 0  |             sarrayAddString(sa, buf, L_COPY);  | 
1765  | 0  |         } else { | 
1766  | 0  |             L_ERROR("title string is not ascii\n", __func__); | 
1767  | 0  |         }  | 
1768  | 0  |         LEPT_FREE(hexstr);  | 
1769  | 0  |     }  | 
1770  | 0  |     sarrayAddString(sa, ">>\n"  | 
1771  | 0  |                                 "endobj\n", L_COPY);  | 
1772  | 0  |     lpd->obj2 = sarrayToString(sa, 0);  | 
1773  | 0  |     l_dnaAddNumber(lpd->objsize, strlen(lpd->obj2));  | 
1774  | 0  |     sarrayDestroy(&sa);  | 
1775  |  | 
  | 
1776  | 0  |     lpd->obj3 = stringNew("3 0 obj\n" | 
1777  | 0  |                           "<<\n"  | 
1778  | 0  |                           "/Type /Pages\n"  | 
1779  | 0  |                           "/Kids [ 4 0 R ]\n"  | 
1780  | 0  |                           "/Count 1\n"  | 
1781  | 0  |                           ">>\n");  | 
1782  | 0  |     l_dnaAddNumber(lpd->objsize, strlen(lpd->obj3));  | 
1783  |  |  | 
1784  |  |         /* Do the post-datastream string */  | 
1785  | 0  |     lpd->poststream = stringNew("\n" | 
1786  | 0  |                                 "endstream\n"  | 
1787  | 0  |                                 "endobj\n");  | 
1788  | 0  | }  | 
1789  |  |  | 
1790  |  |  | 
1791  |  | /*!  | 
1792  |  |  * \brief   generateEscapeString()  | 
1793  |  |  *  | 
1794  |  |  * \param[in]   str      input string  | 
1795  |  |  * \return   hex escape string, or null on error  | 
1796  |  |  *  | 
1797  |  |  * <pre>  | 
1798  |  |  * Notes:  | 
1799  |  |  *      (1) If the input string is not ascii, returns null.  | 
1800  |  |  *      (2) This takes an input ascii string and generates a hex  | 
1801  |  |  *          ascii output string with 4 bytes out for each byte in.  | 
1802  |  |  *          The feff code at the beginning tells the pdf interpreter  | 
1803  |  |  *          that the data is to be interpreted as big-endian, 4 bytes  | 
1804  |  |  *          at a time.  For ascii, the first two bytes are 0 and the  | 
1805  |  |  *          last two bytes are less than 0x80.  | 
1806  |  |  * </pre>  | 
1807  |  |  */  | 
1808  |  | static char  *  | 
1809  |  | generateEscapeString(const char  *str)  | 
1810  | 0  | { | 
1811  | 0  | char     smallbuf[8];  | 
1812  | 0  | char    *buffer;  | 
1813  | 0  | l_int32  i, nchar, buflen;  | 
1814  |  | 
  | 
1815  | 0  |     if (!str)  | 
1816  | 0  |         return (char *)ERROR_PTR("str not defined", __func__, NULL); | 
1817  | 0  |     nchar = strlen(str);  | 
1818  | 0  |     for (i = 0; i < nchar; i++) { | 
1819  | 0  |         if (str[i] < 0)  | 
1820  | 0  |             return (char *)ERROR_PTR("str not all ascii", __func__, NULL); | 
1821  | 0  |     }  | 
1822  |  |  | 
1823  | 0  |     buflen = 4 * nchar + 10;  | 
1824  | 0  |     buffer = (char *)LEPT_CALLOC(buflen, sizeof(char));  | 
1825  | 0  |     stringCat(buffer, buflen, "<feff");  | 
1826  | 0  |     for (i = 0; i < nchar; i++) { | 
1827  | 0  |         snprintf(smallbuf, sizeof(smallbuf), "%04x", str[i]);  | 
1828  | 0  |         stringCat(buffer, buflen, smallbuf);  | 
1829  | 0  |     }  | 
1830  | 0  |     stringCat(buffer, buflen, ">");  | 
1831  | 0  |     return buffer;  | 
1832  | 0  | }  | 
1833  |  |  | 
1834  |  |  | 
1835  |  | static void  | 
1836  |  | generateMediaboxPdf(L_PDF_DATA  *lpd)  | 
1837  | 0  | { | 
1838  | 0  | l_int32    i;  | 
1839  | 0  | l_float32  xpt, ypt, wpt, hpt, maxx, maxy;  | 
1840  |  |  | 
1841  |  |         /* First get the full extent of all the images.  | 
1842  |  |          * This is the mediabox, in pts. */  | 
1843  | 0  |     maxx = maxy = 0;  | 
1844  | 0  |     for (i = 0; i < lpd->n; i++) { | 
1845  | 0  |         ptaGetPt(lpd->xy, i, &xpt, &ypt);  | 
1846  | 0  |         ptaGetPt(lpd->wh, i, &wpt, &hpt);  | 
1847  | 0  |         maxx = L_MAX(maxx, xpt + wpt);  | 
1848  | 0  |         maxy = L_MAX(maxy, ypt + hpt);  | 
1849  | 0  |     }  | 
1850  |  | 
  | 
1851  | 0  |     lpd->mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),  | 
1852  | 0  |                               (l_int32)(maxy + 0.5));  | 
1853  |  |  | 
1854  |  |         /* ypt is in standard image coordinates: the location of  | 
1855  |  |          * the UL image corner with respect to the UL media box corner.  | 
1856  |  |          * Rewrite each ypt for PostScript coordinates: the location of  | 
1857  |  |          * the LL image corner with respect to the LL media box corner. */  | 
1858  | 0  |     for (i = 0; i < lpd->n; i++) { | 
1859  | 0  |         ptaGetPt(lpd->xy, i, &xpt, &ypt);  | 
1860  | 0  |         ptaGetPt(lpd->wh, i, &wpt, &hpt);  | 
1861  | 0  |         ptaSetPt(lpd->xy, i, xpt, maxy - ypt - hpt);  | 
1862  | 0  |     }  | 
1863  | 0  | }  | 
1864  |  |  | 
1865  |  |  | 
1866  |  | static l_int32  | 
1867  |  | generatePageStringPdf(L_PDF_DATA  *lpd)  | 
1868  | 0  | { | 
1869  | 0  | char    *buf;  | 
1870  | 0  | char    *xstr;  | 
1871  | 0  | l_int32  bufsize, i, wpt, hpt;  | 
1872  | 0  | SARRAY  *sa;  | 
1873  |  |  | 
1874  |  |         /* Allocate 1000 bytes for the boilerplate text, and  | 
1875  |  |          * 50 bytes for each reference to an image in the  | 
1876  |  |          * ProcSet array.  */  | 
1877  | 0  |     bufsize = 1000 + 50 * lpd->n;  | 
1878  | 0  |     if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)  | 
1879  | 0  |         return ERROR_INT("calloc fail for buf", __func__, 1); | 
1880  |  |  | 
1881  | 0  |     boxGetGeometry(lpd->mediabox, NULL, NULL, &wpt, &hpt);  | 
1882  | 0  |     sa = sarrayCreate(lpd->n);  | 
1883  | 0  |     for (i = 0; i < lpd->n; i++) { | 
1884  | 0  |         snprintf(buf, bufsize, "/Im%d %d 0 R   ", i + 1, 6 + i);  | 
1885  | 0  |         sarrayAddString(sa, buf, L_COPY);  | 
1886  | 0  |     }  | 
1887  | 0  |     xstr = sarrayToString(sa, 0);  | 
1888  | 0  |     sarrayDestroy(&sa);  | 
1889  | 0  |     if (!xstr) { | 
1890  | 0  |         LEPT_FREE(buf);  | 
1891  | 0  |         return ERROR_INT("xstr not made", __func__, 1); | 
1892  | 0  |     }  | 
1893  |  |  | 
1894  | 0  |     snprintf(buf, bufsize, "4 0 obj\n"  | 
1895  | 0  |                            "<<\n"  | 
1896  | 0  |                            "/Type /Page\n"  | 
1897  | 0  |                            "/Parent 3 0 R\n"  | 
1898  | 0  |                            "/MediaBox [%d %d %d %d]\n"  | 
1899  | 0  |                            "/Contents 5 0 R\n"  | 
1900  | 0  |                            "/Resources\n"  | 
1901  | 0  |                            "<<\n"  | 
1902  | 0  |                            "/XObject << %s >>\n"  | 
1903  | 0  |                            "/ProcSet [ /ImageB /ImageI /ImageC ]\n"  | 
1904  | 0  |                            ">>\n"  | 
1905  | 0  |                            ">>\n"  | 
1906  | 0  |                            "endobj\n",  | 
1907  | 0  |                            0, 0, wpt, hpt, xstr);  | 
1908  |  | 
  | 
1909  | 0  |     lpd->obj4 = stringNew(buf);  | 
1910  | 0  |     l_dnaAddNumber(lpd->objsize, strlen(lpd->obj4));  | 
1911  | 0  |     sarrayDestroy(&sa);  | 
1912  | 0  |     LEPT_FREE(buf);  | 
1913  | 0  |     LEPT_FREE(xstr);  | 
1914  | 0  |     return 0;  | 
1915  | 0  | }  | 
1916  |  |  | 
1917  |  |  | 
1918  |  | static l_int32  | 
1919  |  | generateContentStringPdf(L_PDF_DATA  *lpd)  | 
1920  | 0  | { | 
1921  | 0  | char      *buf;  | 
1922  | 0  | char      *cstr;  | 
1923  | 0  | l_int32    i, bufsize;  | 
1924  | 0  | l_float32  xpt, ypt, wpt, hpt;  | 
1925  | 0  | SARRAY    *sa;  | 
1926  |  | 
  | 
1927  | 0  |     bufsize = 1000 + 200 * lpd->n;  | 
1928  | 0  |     if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)  | 
1929  | 0  |         return ERROR_INT("calloc fail for buf", __func__, 1); | 
1930  |  |  | 
1931  | 0  |     sa = sarrayCreate(lpd->n);  | 
1932  | 0  |     for (i = 0; i < lpd->n; i++) { | 
1933  | 0  |         ptaGetPt(lpd->xy, i, &xpt, &ypt);  | 
1934  | 0  |         ptaGetPt(lpd->wh, i, &wpt, &hpt);  | 
1935  | 0  |         snprintf(buf, bufsize,  | 
1936  | 0  |                  "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",  | 
1937  | 0  |                  wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);  | 
1938  | 0  |         sarrayAddString(sa, buf, L_COPY);  | 
1939  | 0  |     }  | 
1940  | 0  |     cstr = sarrayToString(sa, 0);  | 
1941  | 0  |     sarrayDestroy(&sa);  | 
1942  | 0  |     if (!cstr) { | 
1943  | 0  |         LEPT_FREE(buf);  | 
1944  | 0  |         return ERROR_INT("cstr not made", __func__, 1); | 
1945  | 0  |     }  | 
1946  |  |  | 
1947  | 0  |     snprintf(buf, bufsize, "5 0 obj\n"  | 
1948  | 0  |                            "<< /Length %d >>\n"  | 
1949  | 0  |                            "stream\n"  | 
1950  | 0  |                            "%s"  | 
1951  | 0  |                            "endstream\n"  | 
1952  | 0  |                            "endobj\n",  | 
1953  | 0  |                            (l_int32)strlen(cstr), cstr);  | 
1954  |  | 
  | 
1955  | 0  |     lpd->obj5 = stringNew(buf);  | 
1956  | 0  |     l_dnaAddNumber(lpd->objsize, strlen(lpd->obj5));  | 
1957  | 0  |     sarrayDestroy(&sa);  | 
1958  | 0  |     LEPT_FREE(buf);  | 
1959  | 0  |     LEPT_FREE(cstr);  | 
1960  | 0  |     return 0;  | 
1961  | 0  | }  | 
1962  |  |  | 
1963  |  |  | 
1964  |  | static l_int32  | 
1965  |  | generatePreXStringsPdf(L_PDF_DATA  *lpd)  | 
1966  | 0  | { | 
1967  | 0  | char          buff[256];  | 
1968  | 0  | char          buf[L_BIGBUF];  | 
1969  | 0  | char         *cstr, *bstr, *fstr, *pstr, *xstr, *photometry;  | 
1970  | 0  | l_int32       i, cmindex;  | 
1971  | 0  | L_COMP_DATA  *cid;  | 
1972  | 0  | SARRAY       *sa;  | 
1973  |  | 
  | 
1974  | 0  |     sa = lpd->saprex;  | 
1975  | 0  |     cmindex = 6 + lpd->n;  /* starting value */  | 
1976  | 0  |     for (i = 0; i < lpd->n; i++) { | 
1977  | 0  |         pstr = cstr = NULL;  | 
1978  | 0  |         if ((cid = pdfdataGetCid(lpd, i)) == NULL)  | 
1979  | 0  |             return ERROR_INT("cid not found", __func__, 1); | 
1980  |  |  | 
1981  | 0  |         if (cid->type == L_G4_ENCODE) { | 
1982  | 0  |             if (var_WRITE_G4_IMAGE_MASK) { | 
1983  | 0  |                 cstr = stringNew("/ImageMask true\n" | 
1984  | 0  |                                  "/ColorSpace /DeviceGray");  | 
1985  | 0  |             } else { | 
1986  | 0  |                 cstr = stringNew("/ColorSpace /DeviceGray"); | 
1987  | 0  |             }  | 
1988  | 0  |             bstr = stringNew("/BitsPerComponent 1\n" | 
1989  | 0  |                              "/Interpolate true");  | 
1990  |  |                 /* Note: the reversal is deliberate.  The BlackIs1 flag  | 
1991  |  |                  * is misleadingly named: it says whether to invert the  | 
1992  |  |                  * image on decoding because the black pixels are 0,  | 
1993  |  |                  * not whether the black pixels are 1!  The default for  | 
1994  |  |                  * BlackIs1 is "false", which means "don't invert because  | 
1995  |  |                  * black is 1."  Yikes. */  | 
1996  | 0  |             photometry = (cid->minisblack) ? stringNew("true") | 
1997  | 0  |                                            : stringNew("false"); | 
1998  | 0  |             snprintf(buff, sizeof(buff),  | 
1999  | 0  |                      "/Filter /CCITTFaxDecode\n"  | 
2000  | 0  |                      "/DecodeParms\n"  | 
2001  | 0  |                      "<<\n"  | 
2002  | 0  |                      "/BlackIs1 %s\n"  | 
2003  | 0  |                      "/K -1\n"  | 
2004  | 0  |                      "/Columns %d\n"  | 
2005  | 0  |                      ">>", photometry, cid->w);  | 
2006  | 0  |             fstr = stringNew(buff);  | 
2007  | 0  |             LEPT_FREE(photometry);  | 
2008  | 0  |         } else if (cid->type == L_JPEG_ENCODE) { | 
2009  | 0  |             if (cid->spp == 1)  | 
2010  | 0  |                 cstr = stringNew("/ColorSpace /DeviceGray"); | 
2011  | 0  |             else if (cid->spp == 3)  | 
2012  | 0  |                 cstr = stringNew("/ColorSpace /DeviceRGB"); | 
2013  | 0  |             else if (cid->spp == 4)   /* pdf supports cmyk */  | 
2014  | 0  |                 cstr = stringNew("/ColorSpace /DeviceCMYK"); | 
2015  | 0  |             else  | 
2016  | 0  |                 L_ERROR("in jpeg: spp != 1, 3 or 4\n", __func__); | 
2017  | 0  |             bstr = stringNew("/BitsPerComponent 8"); | 
2018  | 0  |             fstr = stringNew("/Filter /DCTDecode"); | 
2019  | 0  |         } else if (cid->type == L_JP2K_ENCODE) { | 
2020  | 0  |             if (cid->spp == 1)  | 
2021  | 0  |                 cstr = stringNew("/ColorSpace /DeviceGray"); | 
2022  | 0  |             else if (cid->spp == 3)  | 
2023  | 0  |                 cstr = stringNew("/ColorSpace /DeviceRGB"); | 
2024  | 0  |             else  | 
2025  | 0  |                 L_ERROR("in jp2k: spp != 1 && spp != 3\n", __func__); | 
2026  | 0  |             bstr = stringNew("/BitsPerComponent 8"); | 
2027  | 0  |             fstr = stringNew("/Filter /JPXDecode"); | 
2028  | 0  |         } else {  /* type == L_FLATE_ENCODE */ | 
2029  | 0  |             if (cid->ncolors > 0) {  /* cmapped */ | 
2030  | 0  |                 snprintf(buff, sizeof(buff), "/ColorSpace %d 0 R", cmindex++);  | 
2031  | 0  |                 cstr = stringNew(buff);  | 
2032  | 0  |             } else { | 
2033  | 0  |                 if (cid->spp == 1 && cid->bps == 1)  | 
2034  | 0  |                     cstr = stringNew("/ColorSpace /DeviceGray\n" | 
2035  | 0  |                                      "/Decode [1 0]");  | 
2036  | 0  |                 else if (cid->spp == 1)  /* 8 bpp */  | 
2037  | 0  |                     cstr = stringNew("/ColorSpace /DeviceGray"); | 
2038  | 0  |                 else if (cid->spp == 3)  | 
2039  | 0  |                     cstr = stringNew("/ColorSpace /DeviceRGB"); | 
2040  | 0  |                 else  | 
2041  | 0  |                     L_ERROR("unknown colorspace: spp = %d\n", | 
2042  | 0  |                             __func__, cid->spp);  | 
2043  | 0  |             }  | 
2044  | 0  |             snprintf(buff, sizeof(buff), "/BitsPerComponent %d", cid->bps);  | 
2045  | 0  |             bstr = stringNew(buff);  | 
2046  | 0  |             fstr = stringNew("/Filter /FlateDecode"); | 
2047  | 0  |             if (cid->predictor == TRUE) { | 
2048  | 0  |                 snprintf(buff, sizeof(buff),  | 
2049  | 0  |                          "/DecodeParms\n"  | 
2050  | 0  |                          "<<\n"  | 
2051  | 0  |                          "  /Columns %d\n"  | 
2052  | 0  |                          "  /Predictor 14\n"  | 
2053  | 0  |                          "  /Colors %d\n"  | 
2054  | 0  |                          "  /BitsPerComponent %d\n"  | 
2055  | 0  |                          ">>\n", cid->w, cid->spp, cid->bps);  | 
2056  | 0  |                 pstr = stringNew(buff);  | 
2057  | 0  |             }  | 
2058  | 0  |         }  | 
2059  | 0  |         if (!pstr)  /* no decode parameters */  | 
2060  | 0  |             pstr = stringNew(""); | 
2061  |  | 
  | 
2062  | 0  |         snprintf(buf, sizeof(buf),  | 
2063  | 0  |                  "%d 0 obj\n"  | 
2064  | 0  |                  "<<\n"  | 
2065  | 0  |                  "/Length %zu\n"  | 
2066  | 0  |                  "/Subtype /Image\n"  | 
2067  | 0  |                  "%s\n"  /* colorspace */  | 
2068  | 0  |                  "/Width %d\n"  | 
2069  | 0  |                  "/Height %d\n"  | 
2070  | 0  |                  "%s\n"  /* bits/component */  | 
2071  | 0  |                  "%s\n"  /* filter */  | 
2072  | 0  |                  "%s"   /* decode parms; can be empty */  | 
2073  | 0  |                  ">>\n"  | 
2074  | 0  |                  "stream\n",  | 
2075  | 0  |                  6 + i, cid->nbytescomp, cstr,  | 
2076  | 0  |                  cid->w, cid->h, bstr, fstr, pstr);  | 
2077  | 0  |         xstr = stringNew(buf);  | 
2078  | 0  |         sarrayAddString(sa, xstr, L_INSERT);  | 
2079  | 0  |         l_dnaAddNumber(lpd->objsize,  | 
2080  | 0  |                       strlen(xstr) + cid->nbytescomp + strlen(lpd->poststream));  | 
2081  | 0  |         LEPT_FREE(cstr);  | 
2082  | 0  |         LEPT_FREE(bstr);  | 
2083  | 0  |         LEPT_FREE(fstr);  | 
2084  | 0  |         LEPT_FREE(pstr);  | 
2085  | 0  |     }  | 
2086  |  |  | 
2087  | 0  |     return 0;  | 
2088  | 0  | }  | 
2089  |  |  | 
2090  |  |  | 
2091  |  | static l_int32  | 
2092  |  | generateColormapStringsPdf(L_PDF_DATA  *lpd)  | 
2093  | 0  | { | 
2094  | 0  | char          buf[L_BIGBUF];  | 
2095  | 0  | char         *cmstr;  | 
2096  | 0  | l_int32       i, cmindex, ncmap;  | 
2097  | 0  | L_COMP_DATA  *cid;  | 
2098  | 0  | SARRAY       *sa;  | 
2099  |  |  | 
2100  |  |         /* In our canonical format, we have 5 objects, followed  | 
2101  |  |          * by n XObjects, followed by m colormaps, so the index of  | 
2102  |  |          * the first colormap object is 6 + n. */  | 
2103  | 0  |     sa = lpd->sacmap;  | 
2104  | 0  |     cmindex = 6 + lpd->n;  /* starting value */  | 
2105  | 0  |     ncmap = 0;  | 
2106  | 0  |     for (i = 0; i < lpd->n; i++) { | 
2107  | 0  |         if ((cid = pdfdataGetCid(lpd, i)) == NULL)  | 
2108  | 0  |             return ERROR_INT("cid not found", __func__, 1); | 
2109  | 0  |         if (cid->ncolors == 0) continue;  | 
2110  |  |  | 
2111  | 0  |         ncmap++;  | 
2112  | 0  |         snprintf(buf, sizeof(buf), "%d 0 obj\n"  | 
2113  | 0  |                                    "[ /Indexed /DeviceRGB\n"  | 
2114  | 0  |                                    "%d\n"  | 
2115  | 0  |                                    "%s\n"  | 
2116  | 0  |                                    "]\n"  | 
2117  | 0  |                                    "endobj\n",  | 
2118  | 0  |                                    cmindex, cid->ncolors - 1, cid->cmapdatahex);  | 
2119  | 0  |         cmindex++;  | 
2120  | 0  |         cmstr = stringNew(buf);  | 
2121  | 0  |         l_dnaAddNumber(lpd->objsize, strlen(cmstr));  | 
2122  | 0  |         sarrayAddString(sa, cmstr, L_INSERT);  | 
2123  | 0  |     }  | 
2124  |  |  | 
2125  | 0  |     lpd->ncmap = ncmap;  | 
2126  | 0  |     return 0;  | 
2127  | 0  | }  | 
2128  |  |  | 
2129  |  |  | 
2130  |  | static void  | 
2131  |  | generateTrailerPdf(L_PDF_DATA  *lpd)  | 
2132  | 0  | { | 
2133  | 0  | l_int32  i, n, size, linestart;  | 
2134  | 0  | L_DNA   *daloc, *dasize;  | 
2135  |  |  | 
2136  |  |         /* Let nobj be the number of numbered objects.  These numbered  | 
2137  |  |          * objects are indexed by their pdf number in arrays naloc[]  | 
2138  |  |          * and nasize[].  The 0th object is the 9 byte header.  Then  | 
2139  |  |          * the number of objects in nasize, which includes the header,  | 
2140  |  |          * is n = nobj + 1.  The array naloc[] has n + 1 elements,  | 
2141  |  |          * because it includes as the last element the starting  | 
2142  |  |          * location of xref.  The indexing of these objects, their  | 
2143  |  |          * starting locations and sizes are:  | 
2144  |  |          *  | 
2145  |  |          *     Object number         Starting location         Size  | 
2146  |  |          *     -------------         -----------------     --------------  | 
2147  |  |          *          0                   daloc[0] = 0       dasize[0] = 9  | 
2148  |  |          *          1                   daloc[1] = 9       dasize[1] = 49  | 
2149  |  |          *          n                   daloc[n]           dasize[n]  | 
2150  |  |          *          xref                daloc[n+1]  | 
2151  |  |          *  | 
2152  |  |          * We first generate daloc.  | 
2153  |  |          */  | 
2154  | 0  |     dasize = lpd->objsize;  | 
2155  | 0  |     daloc = lpd->objloc;  | 
2156  | 0  |     linestart = 0;  | 
2157  | 0  |     l_dnaAddNumber(daloc, linestart);  /* header */  | 
2158  | 0  |     n = l_dnaGetCount(dasize);  | 
2159  | 0  |     for (i = 0; i < n; i++) { | 
2160  | 0  |         l_dnaGetIValue(dasize, i, &size);  | 
2161  | 0  |         linestart += size;  | 
2162  | 0  |         l_dnaAddNumber(daloc, linestart);  | 
2163  | 0  |     }  | 
2164  | 0  |     l_dnaGetIValue(daloc, n, &lpd->xrefloc);  /* save it */  | 
2165  |  |  | 
2166  |  |         /* Now make the actual trailer string */  | 
2167  | 0  |     lpd->trailer = makeTrailerStringPdf(daloc);  | 
2168  | 0  | }  | 
2169  |  |  | 
2170  |  |  | 
2171  |  | static char *  | 
2172  |  | makeTrailerStringPdf(L_DNA  *daloc)  | 
2173  | 0  | { | 
2174  | 0  | char    *outstr;  | 
2175  | 0  | char     buf[L_BIGBUF];  | 
2176  | 0  | l_int32  i, n, linestart, xrefloc;  | 
2177  | 0  | SARRAY  *sa;  | 
2178  |  | 
  | 
2179  | 0  |     if (!daloc)  | 
2180  | 0  |         return (char *)ERROR_PTR("daloc not defined", __func__, NULL); | 
2181  | 0  |     n = l_dnaGetCount(daloc) - 1;  /* numbered objects + 1 (yes, +1) */  | 
2182  |  | 
  | 
2183  | 0  |     sa = sarrayCreate(0);  | 
2184  | 0  |     snprintf(buf, sizeof(buf), "xref\n"  | 
2185  | 0  |                                "0 %d\n"  | 
2186  | 0  |                                "0000000000 65535 f \n", n);  | 
2187  | 0  |     sarrayAddString(sa, buf, L_COPY);  | 
2188  | 0  |     for (i = 1; i < n; i++) { | 
2189  | 0  |         l_dnaGetIValue(daloc, i, &linestart);  | 
2190  | 0  |         snprintf(buf, sizeof(buf), "%010d 00000 n \n", linestart);  | 
2191  | 0  |         sarrayAddString(sa, buf, L_COPY);  | 
2192  | 0  |     }  | 
2193  |  | 
  | 
2194  | 0  |     l_dnaGetIValue(daloc, n, &xrefloc);  | 
2195  | 0  |     snprintf(buf, sizeof(buf), "trailer\n"  | 
2196  | 0  |                                "<<\n"  | 
2197  | 0  |                                "/Size %d\n"  | 
2198  | 0  |                                "/Root 1 0 R\n"  | 
2199  | 0  |                                "/Info 2 0 R\n"  | 
2200  | 0  |                                ">>\n"  | 
2201  | 0  |                                "startxref\n"  | 
2202  | 0  |                                "%d\n"  | 
2203  | 0  |                                "%%%%EOF\n", n, xrefloc);  | 
2204  | 0  |     sarrayAddString(sa, buf, L_COPY);  | 
2205  | 0  |     outstr = sarrayToString(sa, 0);  | 
2206  | 0  |     sarrayDestroy(&sa);  | 
2207  | 0  |     return outstr;  | 
2208  | 0  | }  | 
2209  |  |  | 
2210  |  |  | 
2211  |  | /*!  | 
2212  |  |  * \brief   generateOutputDataPdf()  | 
2213  |  |  *  | 
2214  |  |  * \param[out]   pdata      pdf data array  | 
2215  |  |  * \param[out]   pnbytes    size of pdf data array  | 
2216  |  |  * \param[in]    lpd        input data used to make pdf  | 
2217  |  |  * \return  0 if OK, 1 on error  | 
2218  |  |  *  | 
2219  |  |  * <pre>  | 
2220  |  |  * Notes:  | 
2221  |  |  *      (1) Only called from l_generatePdf().  On error, no data is returned.  | 
2222  |  |  * </pre>  | 
2223  |  |  */  | 
2224  |  | static l_int32  | 
2225  |  | generateOutputDataPdf(l_uint8    **pdata,  | 
2226  |  |                       size_t      *pnbytes,  | 
2227  |  |                       L_PDF_DATA  *lpd)  | 
2228  | 0  | { | 
2229  | 0  | char         *str;  | 
2230  | 0  | l_uint8      *data;  | 
2231  | 0  | l_int32       nimages, i, len;  | 
2232  | 0  | l_int32      *sizes, *locs;  | 
2233  | 0  | size_t        nbytes;  | 
2234  | 0  | L_COMP_DATA  *cid;  | 
2235  |  | 
  | 
2236  | 0  |     if (!pdata)  | 
2237  | 0  |         return ERROR_INT("&data not defined", __func__, 1); | 
2238  | 0  |     *pdata = NULL;  | 
2239  | 0  |     if (!pnbytes)  | 
2240  | 0  |         return ERROR_INT("&nbytes not defined", __func__, 1); | 
2241  | 0  |     nbytes = lpd->xrefloc + strlen(lpd->trailer);  | 
2242  | 0  |     *pnbytes = nbytes;  | 
2243  | 0  |     if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL)  | 
2244  | 0  |         return ERROR_INT("calloc fail for data", __func__, 1); | 
2245  | 0  |     *pdata = data;  | 
2246  |  | 
  | 
2247  | 0  |     sizes = l_dnaGetIArray(lpd->objsize);  | 
2248  | 0  |     locs = l_dnaGetIArray(lpd->objloc);  | 
2249  | 0  |     memcpy(data, lpd->id, sizes[0]);  | 
2250  | 0  |     memcpy(data + locs[1], lpd->obj1, sizes[1]);  | 
2251  | 0  |     memcpy(data + locs[2], lpd->obj2, sizes[2]);  | 
2252  | 0  |     memcpy(data + locs[3], lpd->obj3, sizes[3]);  | 
2253  | 0  |     memcpy(data + locs[4], lpd->obj4, sizes[4]);  | 
2254  | 0  |     memcpy(data + locs[5], lpd->obj5, sizes[5]);  | 
2255  |  |  | 
2256  |  |         /* Each image has 3 parts: variable preamble, the compressed  | 
2257  |  |          * data stream, and the fixed poststream. */  | 
2258  | 0  |     nimages = lpd->n;  | 
2259  | 0  |     for (i = 0; i < nimages; i++) { | 
2260  | 0  |         if ((cid = pdfdataGetCid(lpd, i)) == NULL) {  /* should not happen */ | 
2261  | 0  |             LEPT_FREE(sizes);  | 
2262  | 0  |             LEPT_FREE(locs);  | 
2263  | 0  |             return ERROR_INT("cid not found", __func__, 1); | 
2264  | 0  |         }  | 
2265  | 0  |         str = sarrayGetString(lpd->saprex, i, L_NOCOPY);  | 
2266  | 0  |         len = strlen(str);  | 
2267  | 0  |         memcpy(data + locs[6 + i], str, len);  | 
2268  | 0  |         memcpy(data + locs[6 + i] + len,  | 
2269  | 0  |                cid->datacomp, cid->nbytescomp);  | 
2270  | 0  |         memcpy(data + locs[6 + i] + len + cid->nbytescomp,  | 
2271  | 0  |                lpd->poststream, strlen(lpd->poststream));  | 
2272  | 0  |     }  | 
2273  |  |  | 
2274  |  |         /* Each colormap is simply a stored string */  | 
2275  | 0  |     for (i = 0; i < lpd->ncmap; i++) { | 
2276  | 0  |         str = sarrayGetString(lpd->sacmap, i, L_NOCOPY);  | 
2277  | 0  |         memcpy(data + locs[6 + nimages + i], str, strlen(str));  | 
2278  | 0  |     }  | 
2279  |  |  | 
2280  |  |         /* And finally the trailer */  | 
2281  | 0  |     memcpy(data + lpd->xrefloc, lpd->trailer, strlen(lpd->trailer));  | 
2282  | 0  |     LEPT_FREE(sizes);  | 
2283  | 0  |     LEPT_FREE(locs);  | 
2284  | 0  |     return 0;  | 
2285  | 0  | }  | 
2286  |  |  | 
2287  |  |  | 
2288  |  | /*---------------------------------------------------------------------*  | 
2289  |  |  *          Helper functions for generating multipage pdf output       *  | 
2290  |  |  *---------------------------------------------------------------------*/  | 
2291  |  | /*!  | 
2292  |  |  * \brief   parseTrailerPdf()  | 
2293  |  |  *  | 
2294  |  |  * \param[in]    bas     lba of a pdf file  | 
2295  |  |  * \param[out]   pda     byte locations of the beginning of each object  | 
2296  |  |  * \return  0 if OK, 1 on error  | 
2297  |  |  */  | 
2298  |  | static l_int32  | 
2299  |  | parseTrailerPdf(L_BYTEA  *bas,  | 
2300  |  |                 L_DNA   **pda)  | 
2301  | 0  | { | 
2302  | 0  | char     *str;  | 
2303  | 0  | l_uint8   nl = '\n';  | 
2304  | 0  | l_uint8  *data;  | 
2305  | 0  | l_int32   i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;  | 
2306  | 0  | size_t    size;  | 
2307  | 0  | L_DNA    *da, *daobj, *daxref;  | 
2308  | 0  | SARRAY   *sa;  | 
2309  |  | 
  | 
2310  | 0  |     if (!pda)  | 
2311  | 0  |         return ERROR_INT("&da not defined", __func__, 1); | 
2312  | 0  |     *pda = NULL;  | 
2313  | 0  |     if (!bas)  | 
2314  | 0  |         return ERROR_INT("bas not defined", __func__, 1); | 
2315  | 0  |     data = l_byteaGetData(bas, &size);  | 
2316  | 0  |     if (memcmp(data, "%PDF-1.", 7) != 0)  | 
2317  | 0  |         return ERROR_INT("PDF header signature not found", __func__, 1); | 
2318  |  |  | 
2319  |  |         /* Search for "startxref" starting 50 bytes from the EOF */  | 
2320  | 0  |     start = 0;  | 
2321  | 0  |     if (size > 50)  | 
2322  | 0  |         start = size - 50;  | 
2323  | 0  |     arrayFindSequence(data + start, size - start,  | 
2324  | 0  |                       (l_uint8 *)"startxref\n", 10, &loc, &found);  | 
2325  | 0  |     if (!found)  | 
2326  | 0  |         return ERROR_INT("startxref not found!", __func__, 1); | 
2327  | 0  |     if (sscanf((char *)(data + start + loc + 10), "%d\n", &xrefloc) != 1)  | 
2328  | 0  |         return ERROR_INT("xrefloc not found!", __func__, 1); | 
2329  | 0  |     if (xrefloc < 0 || xrefloc >= size)  | 
2330  | 0  |         return ERROR_INT("invalid xrefloc!", __func__, 1); | 
2331  | 0  |     sa = sarrayCreateLinesFromString((char *)(data + xrefloc), 0);  | 
2332  | 0  |     str = sarrayGetString(sa, 1, L_NOCOPY);  | 
2333  | 0  |     if ((sscanf(str, "0 %d", &nobj)) != 1) { | 
2334  | 0  |         sarrayDestroy(&sa);  | 
2335  | 0  |         return ERROR_INT("nobj not found", __func__, 1); | 
2336  | 0  |     }  | 
2337  |  |  | 
2338  |  |         /* Get starting locations.  The numa index is the  | 
2339  |  |          * object number.  loc[0] is the ID; loc[nobj + 1] is xrefloc.  */  | 
2340  | 0  |     da = l_dnaCreate(nobj + 1);  | 
2341  | 0  |     *pda = da;  | 
2342  | 0  |     for (i = 0; i < nobj; i++) { | 
2343  | 0  |         str = sarrayGetString(sa, i + 2, L_NOCOPY);  | 
2344  | 0  |         sscanf(str, "%d", &startloc);  | 
2345  | 0  |         l_dnaAddNumber(da, startloc);  | 
2346  | 0  |     }  | 
2347  | 0  |     l_dnaAddNumber(da, xrefloc);  | 
2348  |  | 
  | 
2349  |  | #if  DEBUG_MULTIPAGE  | 
2350  |  |     lept_stderr("************** Trailer string ************\n"); | 
2351  |  |     lept_stderr("xrefloc = %d", xrefloc); | 
2352  |  |     sarrayWriteStderr(sa);  | 
2353  |  |  | 
2354  |  |     lept_stderr("************** Object locations ************"); | 
2355  |  |     l_dnaWriteStderr(da);  | 
2356  |  | #endif  /* DEBUG_MULTIPAGE */  | 
2357  | 0  |     sarrayDestroy(&sa);  | 
2358  |  |  | 
2359  |  |         /* Verify correct parsing */  | 
2360  | 0  |     trailer_ok = TRUE;  | 
2361  | 0  |     for (i = 1; i < nobj; i++) { | 
2362  | 0  |         l_dnaGetIValue(da, i, &startloc);  | 
2363  | 0  |         if ((sscanf((char *)(data + startloc), "%d 0 obj", &objno)) != 1) { | 
2364  | 0  |             L_ERROR("bad trailer for object %d\n", __func__, i); | 
2365  | 0  |             trailer_ok = FALSE;  | 
2366  | 0  |             break;  | 
2367  | 0  |         }  | 
2368  | 0  |     }  | 
2369  |  |  | 
2370  |  |         /* If the trailer is broken, reconstruct the correct obj locations */  | 
2371  | 0  |     if (!trailer_ok) { | 
2372  | 0  |         L_INFO("rebuilding pdf trailer\n", __func__); | 
2373  | 0  |         l_dnaEmpty(da);  | 
2374  | 0  |         l_dnaAddNumber(da, 0);  | 
2375  | 0  |         l_byteaFindEachSequence(bas, (l_uint8 *)" 0 obj\n", 7, &daobj);  | 
2376  | 0  |         nobj = l_dnaGetCount(daobj);  | 
2377  | 0  |         for (i = 0; i < nobj; i++) { | 
2378  | 0  |             l_dnaGetIValue(daobj, i, &loc);  | 
2379  | 0  |             for (j = loc - 1; j > 0; j--) { | 
2380  | 0  |                 if (data[j] == nl)  | 
2381  | 0  |                     break;  | 
2382  | 0  |             }  | 
2383  | 0  |             l_dnaAddNumber(da, j + 1);  | 
2384  | 0  |         }  | 
2385  | 0  |         l_byteaFindEachSequence(bas, (l_uint8 *)"xref", 4, &daxref);  | 
2386  | 0  |         l_dnaGetIValue(daxref, 0, &loc);  | 
2387  | 0  |         l_dnaAddNumber(da, loc);  | 
2388  | 0  |         l_dnaDestroy(&daobj);  | 
2389  | 0  |         l_dnaDestroy(&daxref);  | 
2390  | 0  |     }  | 
2391  |  | 
  | 
2392  | 0  |     return 0;  | 
2393  | 0  | }  | 
2394  |  |  | 
2395  |  |  | 
2396  |  | static char *  | 
2397  |  | generatePagesObjStringPdf(NUMA  *napage)  | 
2398  | 0  | { | 
2399  | 0  | char    *str;  | 
2400  | 0  | char    *buf;  | 
2401  | 0  | l_int32  i, n, index, bufsize;  | 
2402  | 0  | SARRAY  *sa;  | 
2403  |  | 
  | 
2404  | 0  |     if (!napage)  | 
2405  | 0  |         return (char *)ERROR_PTR("napage not defined", __func__, NULL); | 
2406  |  |  | 
2407  | 0  |     n = numaGetCount(napage);  | 
2408  | 0  |     bufsize = 100 + 16 * n;  /* large enough to hold the output string */  | 
2409  | 0  |     buf = (char *)LEPT_CALLOC(bufsize, sizeof(char));  | 
2410  | 0  |     sa = sarrayCreate(n);  | 
2411  | 0  |     for (i = 0; i < n; i++) { | 
2412  | 0  |         numaGetIValue(napage, i, &index);  | 
2413  | 0  |         snprintf(buf, bufsize, " %d 0 R ", index);  | 
2414  | 0  |         sarrayAddString(sa, buf, L_COPY);  | 
2415  | 0  |     }  | 
2416  |  | 
  | 
2417  | 0  |     str = sarrayToString(sa, 0);  | 
2418  | 0  |     snprintf(buf, bufsize - 1, "3 0 obj\n"  | 
2419  | 0  |                                "<<\n"  | 
2420  | 0  |                                "/Type /Pages\n"  | 
2421  | 0  |                                "/Kids [%s]\n"  | 
2422  | 0  |                                "/Count %d\n"  | 
2423  | 0  |                                ">>\n"  | 
2424  | 0  |                                "endobj\n",  | 
2425  | 0  |                                str, n);  | 
2426  | 0  |     sarrayDestroy(&sa);  | 
2427  | 0  |     LEPT_FREE(str);  | 
2428  | 0  |     return buf;  | 
2429  | 0  | }  | 
2430  |  |  | 
2431  |  |  | 
2432  |  | /*!  | 
2433  |  |  * \brief   substituteObjectNumbers()  | 
2434  |  |  *  | 
2435  |  |  * \param[in]   bas        lba of a pdf object  | 
2436  |  |  * \param[in]   na_objs    object number mapping array  | 
2437  |  |  * \return    bad   lba of rewritten pdf for the object  | 
2438  |  |  *  | 
2439  |  |  * <pre>  | 
2440  |  |  * Notes:  | 
2441  |  |  *      (1) Interpret the first set of bytes as the object number,  | 
2442  |  |  *          map to the new number, and write it out.  | 
2443  |  |  *      (2) Find all occurrences of this 4-byte sequence: " 0 R"  | 
2444  |  |  *      (3) Find the location and value of the integer preceding this,  | 
2445  |  |  *          and map it to the new value.  | 
2446  |  |  *      (4) Rewrite the object with new object numbers.  | 
2447  |  |  * </pre>  | 
2448  |  |  */  | 
2449  |  | static L_BYTEA *  | 
2450  |  | substituteObjectNumbers(L_BYTEA  *bas,  | 
2451  |  |                         NUMA     *na_objs)  | 
2452  | 0  | { | 
2453  | 0  | l_uint8   space = ' ';  | 
2454  | 0  | l_uint8  *datas;  | 
2455  | 0  | l_uint8   buf[32];  /* only needs to hold one integer in ascii format */  | 
2456  | 0  | l_int32   start, nrepl, i, j, nobjs, objin, objout, found;  | 
2457  | 0  | l_int32  *objs, *matches;  | 
2458  | 0  | size_t    size;  | 
2459  | 0  | L_BYTEA  *bad;  | 
2460  | 0  | L_DNA    *da_match;  | 
2461  |  | 
  | 
2462  | 0  |     if (!bas)  | 
2463  | 0  |         return (L_BYTEA *)ERROR_PTR("bas not defined", __func__, NULL); | 
2464  | 0  |     if (!na_objs)  | 
2465  | 0  |         return (L_BYTEA *)ERROR_PTR("na_objs not defined", __func__, NULL); | 
2466  |  |  | 
2467  | 0  |     datas = l_byteaGetData(bas, &size);  | 
2468  | 0  |     bad = l_byteaCreate(100);  | 
2469  | 0  |     objs = numaGetIArray(na_objs);  /* object number mapper */  | 
2470  | 0  |     nobjs = numaGetCount(na_objs);  /* use for sanity checking */  | 
2471  |  |  | 
2472  |  |         /* Substitute the object number on the first line */  | 
2473  | 0  |     sscanf((char *)datas, "%d", &objin);  | 
2474  | 0  |     if (objin < 0 || objin >= nobjs) { | 
2475  | 0  |         L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs); | 
2476  | 0  |         LEPT_FREE(objs);  | 
2477  | 0  |         return bad;  | 
2478  | 0  |     }  | 
2479  | 0  |     objout = objs[objin];  | 
2480  | 0  |     snprintf((char *)buf, 32, "%d", objout);  | 
2481  | 0  |     l_byteaAppendString(bad, (char *)buf);  | 
2482  |  |  | 
2483  |  |         /* Find the set of matching locations for object references */  | 
2484  | 0  |     arrayFindSequence(datas, size, &space, 1, &start, &found);  | 
2485  | 0  |     da_match = arrayFindEachSequence(datas, size, (l_uint8 *)" 0 R", 4);  | 
2486  | 0  |     if (!da_match) { | 
2487  | 0  |         l_byteaAppendData(bad, datas + start, size - start);  | 
2488  | 0  |         LEPT_FREE(objs);  | 
2489  | 0  |         return bad;  | 
2490  | 0  |     }  | 
2491  |  |  | 
2492  |  |         /* Substitute all the object reference numbers */  | 
2493  | 0  |     nrepl = l_dnaGetCount(da_match);  | 
2494  | 0  |     matches = l_dnaGetIArray(da_match);  | 
2495  | 0  |     for (i = 0; i < nrepl; i++) { | 
2496  |  |             /* Find the first space before the object number */  | 
2497  | 0  |         for (j = matches[i] - 1; j > 0; j--) { | 
2498  | 0  |             if (datas[j] == space)  | 
2499  | 0  |                 break;  | 
2500  | 0  |         }  | 
2501  |  |             /* Copy bytes from 'start' up to the object number */  | 
2502  | 0  |         l_byteaAppendData(bad, datas + start, j - start + 1);  | 
2503  | 0  |         sscanf((char *)(datas + j + 1), "%d", &objin);  | 
2504  | 0  |         if (objin < 0 || objin >= nobjs) { | 
2505  | 0  |             L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs); | 
2506  | 0  |             LEPT_FREE(objs);  | 
2507  | 0  |             LEPT_FREE(matches);  | 
2508  | 0  |             l_dnaDestroy(&da_match);  | 
2509  | 0  |             return bad;  | 
2510  | 0  |         }  | 
2511  | 0  |         objout = objs[objin];  | 
2512  | 0  |         snprintf((char *)buf, 32, "%d", objout);  | 
2513  | 0  |         l_byteaAppendString(bad, (char *)buf);  | 
2514  | 0  |         start = matches[i];  | 
2515  | 0  |     }  | 
2516  | 0  |     l_byteaAppendData(bad, datas + start, size - start);  | 
2517  |  | 
  | 
2518  | 0  |     LEPT_FREE(objs);  | 
2519  | 0  |     LEPT_FREE(matches);  | 
2520  | 0  |     l_dnaDestroy(&da_match);  | 
2521  | 0  |     return bad;  | 
2522  | 0  | }  | 
2523  |  |  | 
2524  |  |  | 
2525  |  | /*---------------------------------------------------------------------*  | 
2526  |  |  *                     Create/destroy/access pdf data                  *  | 
2527  |  |  *---------------------------------------------------------------------*/  | 
2528  |  | static L_PDF_DATA *  | 
2529  |  | pdfdataCreate(const char  *title)  | 
2530  | 0  | { | 
2531  | 0  | L_PDF_DATA *lpd;  | 
2532  |  | 
  | 
2533  | 0  |     lpd = (L_PDF_DATA *)LEPT_CALLOC(1, sizeof(L_PDF_DATA));  | 
2534  | 0  |     if (title) lpd->title = stringNew(title);  | 
2535  | 0  |     lpd->cida = ptraCreate(10);  | 
2536  | 0  |     lpd->xy = ptaCreate(10);  | 
2537  | 0  |     lpd->wh = ptaCreate(10);  | 
2538  | 0  |     lpd->saprex = sarrayCreate(10);  | 
2539  | 0  |     lpd->sacmap = sarrayCreate(10);  | 
2540  | 0  |     lpd->objsize = l_dnaCreate(20);  | 
2541  | 0  |     lpd->objloc = l_dnaCreate(20);  | 
2542  | 0  |     return lpd;  | 
2543  | 0  | }  | 
2544  |  |  | 
2545  |  | static void  | 
2546  |  | pdfdataDestroy(L_PDF_DATA  **plpd)  | 
2547  | 0  | { | 
2548  | 0  | l_int32       i;  | 
2549  | 0  | L_COMP_DATA  *cid;  | 
2550  | 0  | L_PDF_DATA   *lpd;  | 
2551  |  | 
  | 
2552  | 0  |     if (plpd== NULL) { | 
2553  | 0  |         L_WARNING("ptr address is null!\n", __func__); | 
2554  | 0  |         return;  | 
2555  | 0  |     }  | 
2556  | 0  |     if ((lpd = *plpd) == NULL)  | 
2557  | 0  |         return;  | 
2558  |  |  | 
2559  | 0  |     if (lpd->title) LEPT_FREE(lpd->title);  | 
2560  | 0  |     for (i = 0; i < lpd->n; i++) { | 
2561  | 0  |         cid = (L_COMP_DATA *)ptraRemove(lpd->cida, i, L_NO_COMPACTION);  | 
2562  | 0  |         l_CIDataDestroy(&cid);  | 
2563  | 0  |     }  | 
2564  |  | 
  | 
2565  | 0  |     ptraDestroy(&lpd->cida, 0, 0);  | 
2566  | 0  |     if (lpd->id) LEPT_FREE(lpd->id);  | 
2567  | 0  |     if (lpd->obj1) LEPT_FREE(lpd->obj1);  | 
2568  | 0  |     if (lpd->obj2) LEPT_FREE(lpd->obj2);  | 
2569  | 0  |     if (lpd->obj3) LEPT_FREE(lpd->obj3);  | 
2570  | 0  |     if (lpd->obj4) LEPT_FREE(lpd->obj4);  | 
2571  | 0  |     if (lpd->obj5) LEPT_FREE(lpd->obj5);  | 
2572  | 0  |     if (lpd->poststream) LEPT_FREE(lpd->poststream);  | 
2573  | 0  |     if (lpd->trailer) LEPT_FREE(lpd->trailer);  | 
2574  | 0  |     if (lpd->xy) ptaDestroy(&lpd->xy);  | 
2575  | 0  |     if (lpd->wh) ptaDestroy(&lpd->wh);  | 
2576  | 0  |     if (lpd->mediabox) boxDestroy(&lpd->mediabox);  | 
2577  | 0  |     if (lpd->saprex) sarrayDestroy(&lpd->saprex);  | 
2578  | 0  |     if (lpd->sacmap) sarrayDestroy(&lpd->sacmap);  | 
2579  | 0  |     if (lpd->objsize) l_dnaDestroy(&lpd->objsize);  | 
2580  | 0  |     if (lpd->objloc) l_dnaDestroy(&lpd->objloc);  | 
2581  | 0  |     LEPT_FREE(lpd);  | 
2582  | 0  |     *plpd = NULL;  | 
2583  | 0  | }  | 
2584  |  |  | 
2585  |  |  | 
2586  |  | static L_COMP_DATA *  | 
2587  |  | pdfdataGetCid(L_PDF_DATA  *lpd,  | 
2588  |  |               l_int32      index)  | 
2589  | 0  | { | 
2590  | 0  |     if (!lpd)  | 
2591  | 0  |         return (L_COMP_DATA *)ERROR_PTR("lpd not defined", __func__, NULL); | 
2592  | 0  |     if (index < 0 || index >= lpd->n)  | 
2593  | 0  |         return (L_COMP_DATA *)ERROR_PTR("invalid image index", __func__, NULL); | 
2594  |  |  | 
2595  | 0  |     return (L_COMP_DATA *)ptraGetPtrToItem(lpd->cida, index);  | 
2596  | 0  | }  | 
2597  |  |  | 
2598  |  |  | 
2599  |  | /*---------------------------------------------------------------------*  | 
2600  |  |  *                     Find number of pages in a pdf                   *  | 
2601  |  |  *---------------------------------------------------------------------*/  | 
2602  |  | /*!  | 
2603  |  |  * \brief   getPdfPageCount()  | 
2604  |  |  *  | 
2605  |  |  * \param[in]    fname      filename  | 
2606  |  |  * \param[out]   pnpages    number of pages  | 
2607  |  |  * \return  0 if OK, 1 on error  | 
2608  |  |  *  | 
2609  |  |  * <pre>  | 
2610  |  |  * Notes:  | 
2611  |  |  *      (1) Looks for the argument of the first instance of /Count in the file.  | 
2612  |  |  *      (2) This first reads 10000 bytes from the beginning of the file.  | 
2613  |  |  *          If "/Count" is not in that string, it reads the entire file  | 
2614  |  |  *          and looks for "/Count".  | 
2615  |  |  *      (3) This will not work on encrypted pdf files or on files where  | 
2616  |  |  *          the "/Count" field is binary compressed.  Not finding the  | 
2617  |  |  *          "/Count" field is not an error, but a warning is given.  | 
2618  |  |  * </pre>  | 
2619  |  |  */  | 
2620  |  | l_ok  | 
2621  |  | getPdfPageCount(const char  *fname,  | 
2622  |  |                 l_int32     *pnpages)  | 
2623  | 0  | { | 
2624  | 0  | l_uint8  *data;  | 
2625  | 0  | l_int32   format, loc, ret, npages, found;  | 
2626  | 0  | size_t    nread;  | 
2627  |  | 
  | 
2628  | 0  |     if (!pnpages)  | 
2629  | 0  |         return ERROR_INT("&npages not defined", __func__, 1); | 
2630  | 0  |     *pnpages = 0;  | 
2631  | 0  |     if (!fname)  | 
2632  | 0  |         return ERROR_INT("fname not defined", __func__, 1); | 
2633  |  |  | 
2634  |  |         /* Make sure this a pdf file */  | 
2635  | 0  |     findFileFormat(fname, &format);  | 
2636  | 0  |     if (format != IFF_LPDF)  | 
2637  | 0  |         return ERROR_INT("file is not pdf", __func__, 1); | 
2638  |  |  | 
2639  |  |         /* Read 10000 bytes from the beginning of the file */  | 
2640  | 0  |     if ((data = l_binaryReadSelect(fname, 0, 10000, &nread))  | 
2641  | 0  |                  == NULL)  | 
2642  | 0  |         return ERROR_INT("partial data not read", __func__, 1); | 
2643  |  |  | 
2644  |  |         /* Find the location of the first instance of "/Count".  | 
2645  |  |          * If it is not found, try reading the entire file and  | 
2646  |  |          * looking again. */  | 
2647  | 0  |     arrayFindSequence(data, nread, (const l_uint8 *)"/Count",  | 
2648  | 0  |           strlen("/Count"), &loc, &found); | 
2649  | 0  |     if (!found) { | 
2650  | 0  |         lept_stderr("Reading entire file looking for '/Count'\n"); | 
2651  | 0  |         LEPT_FREE(data);  | 
2652  | 0  |         if ((data = l_binaryRead(fname, &nread)) == NULL)  | 
2653  | 0  |             return ERROR_INT("full data not read", __func__, 1); | 
2654  | 0  |         arrayFindSequence(data, nread, (const l_uint8 *)"/Count",  | 
2655  | 0  |              strlen("/Count"), &loc, &found); | 
2656  | 0  |         if (!found) { | 
2657  | 0  |             LEPT_FREE(data);  | 
2658  | 0  |             L_WARNING("/Count not found\n", __func__); | 
2659  | 0  |             return 0;  | 
2660  | 0  |         }  | 
2661  | 0  |     }  | 
2662  |  |  | 
2663  |  |         /* Unlikely: make sure we can read the count field */  | 
2664  | 0  |     if (nread - loc < 12)  { /* haven't read enough to capture page count */ | 
2665  | 0  |         LEPT_FREE(data);  | 
2666  | 0  |         return ERROR_INT("data may not include page count field", __func__, 1); | 
2667  | 0  |     }  | 
2668  |  |  | 
2669  |  |         /* Read the page count; if not found, puts garbage in npages */  | 
2670  | 0  |     ret = sscanf((char *)&data[loc], "/Count %d", &npages);  | 
2671  | 0  |     LEPT_FREE(data);  | 
2672  | 0  |     if (ret != 1)  | 
2673  | 0  |         return ERROR_INT("npages not found", __func__, 1); | 
2674  | 0  |     *pnpages = npages;  | 
2675  |  | /*    lept_stderr("bytes read = %d, loc = %d, npages = %d\n", | 
2676  |  |                 nread, loc, *pnpages);  */  | 
2677  | 0  |     return 0;  | 
2678  | 0  | }  | 
2679  |  |  | 
2680  |  |  | 
2681  |  | /*---------------------------------------------------------------------*  | 
2682  |  |  *      Find widths and heights of pages and media boxes in a pdf      *  | 
2683  |  |  *---------------------------------------------------------------------*/  | 
2684  |  | /*!  | 
2685  |  |  * \brief   getPdfPageSizes()  | 
2686  |  |  *  | 
2687  |  |  * \param[in]    fname        filename  | 
2688  |  |  * \param[out]   pnaw         [optional] array of page widths  | 
2689  |  |  * \param[out]   pnah         [optional] array of page heights  | 
2690  |  |  * \param[out]   pmedw        [optional] median page width  | 
2691  |  |  * \param[out]   pmedh        [optional] median page height  | 
2692  |  |  * \return  0 if OK, 1 on error  | 
2693  |  |  *  | 
2694  |  |  * <pre>  | 
2695  |  |  * Notes:  | 
2696  |  |  *      (1) Finds the arguments of each instance of '/Width' and '/Height'  | 
2697  |  |  *          in the file.  | 
2698  |  |  *      (2) This will not work on encrypted pdf files or on files where  | 
2699  |  |  *          the "/Width" and "/Height" fields are binary compressed.  | 
2700  |  |  *          Not finding the "/Width" and /Height" fields is not an error,  | 
2701  |  |  *          but a warning is given.  | 
2702  |  |  * </pre>  | 
2703  |  |  */  | 
2704  |  | l_ok  | 
2705  |  | getPdfPageSizes(const char  *fname,  | 
2706  |  |                 NUMA       **pnaw,  | 
2707  |  |                 NUMA       **pnah,  | 
2708  |  |                 l_int32     *pmedw,  | 
2709  |  |                 l_int32     *pmedh)  | 
2710  | 0  | { | 
2711  | 0  | l_uint8   *data;  | 
2712  | 0  | l_int32    i, nw, nh, format, ret, loc, width, height;  | 
2713  | 0  | l_float32  fval;  | 
2714  | 0  | size_t     nread;  | 
2715  | 0  | L_DNA     *dnaw;  /* width locations */  | 
2716  | 0  | L_DNA     *dnah;  /* height locations */  | 
2717  | 0  | NUMA      *naw;   /* widths */  | 
2718  | 0  | NUMA      *nah;   /* heights */  | 
2719  |  | 
  | 
2720  | 0  |     if (pnaw) *pnaw = NULL;  | 
2721  | 0  |     if (pnah) *pnah = NULL;  | 
2722  | 0  |     if (pmedw) *pmedw = 0;  | 
2723  | 0  |     if (pmedh) *pmedh = 0;  | 
2724  | 0  |     if (!pnaw && !pnah && !pmedw && !pmedh)  | 
2725  | 0  |         return ERROR_INT("no output requested", __func__, 1); | 
2726  | 0  |     if (!fname)  | 
2727  | 0  |         return ERROR_INT("fname not defined", __func__, 1); | 
2728  |  |  | 
2729  |  |         /* Make sure this a pdf file */  | 
2730  | 0  |     findFileFormat(fname, &format);  | 
2731  | 0  |     if (format != IFF_LPDF)  | 
2732  | 0  |         return ERROR_INT("file is not pdf", __func__, 1); | 
2733  |  |  | 
2734  |  |         /* Read the file into memory and find all locations of  | 
2735  |  |          * '/Width' and '/Height' */  | 
2736  | 0  |     if ((data = l_binaryRead(fname, &nread)) == NULL)  | 
2737  | 0  |         return ERROR_INT("full data not read", __func__, 1); | 
2738  | 0  |     dnaw = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Width",  | 
2739  | 0  |                                  strlen("/Width")); | 
2740  | 0  |     dnah = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Height",  | 
2741  | 0  |                                  strlen("/Height")); | 
2742  | 0  |     if (!dnaw)  | 
2743  | 0  |         L_WARNING("unable to find widths\n", __func__); | 
2744  | 0  |     if (!dnah)  | 
2745  | 0  |         L_WARNING("unable to find heights\n", __func__); | 
2746  | 0  |     if (!dnaw && !dnah) { | 
2747  | 0  |         LEPT_FREE(data);  | 
2748  | 0  |         L_WARNING("no fields found\n", __func__); | 
2749  | 0  |         return 0;  | 
2750  | 0  |     }  | 
2751  |  |  | 
2752  |  |         /* Find the page widths and heights */  | 
2753  | 0  |     nw = l_dnaGetCount(dnaw);  | 
2754  | 0  |     naw = numaCreate(nw);  | 
2755  | 0  |     for (i = 0; i < nw; i++) { | 
2756  | 0  |         l_dnaGetIValue(dnaw, i, &loc);  | 
2757  | 0  |         ret = sscanf((char *)&data[loc], "/Width %d", &width);  | 
2758  | 0  |         if (ret != 1) { | 
2759  | 0  |             L_ERROR("width not found for item %d at loc %d\n", | 
2760  | 0  |                     __func__, i, loc);  | 
2761  | 0  |             continue;  | 
2762  | 0  |         }  | 
2763  | 0  |         numaAddNumber(naw, width);  | 
2764  | 0  |     }  | 
2765  | 0  |     nh = l_dnaGetCount(dnah);  | 
2766  | 0  |     nah = numaCreate(nh);  | 
2767  | 0  |     for (i = 0; i < nh; i++) { | 
2768  | 0  |         l_dnaGetIValue(dnah, i, &loc);  | 
2769  | 0  |         ret = sscanf((char *)&data[loc], "/Height %d", &height);  | 
2770  | 0  |         if (ret != 1) { | 
2771  | 0  |             L_ERROR("height not found for item %d at loc %d\n", | 
2772  | 0  |                     __func__, i, loc);  | 
2773  | 0  |             continue;  | 
2774  | 0  |         }  | 
2775  | 0  |         numaAddNumber(nah, height);  | 
2776  | 0  |     }  | 
2777  |  | 
  | 
2778  | 0  |     LEPT_FREE(data);  | 
2779  | 0  |     l_dnaDestroy(&dnaw);  | 
2780  | 0  |     l_dnaDestroy(&dnah);  | 
2781  | 0  |     if (pmedw) { | 
2782  | 0  |         numaGetMedian(naw, &fval);  | 
2783  | 0  |         *pmedw = lept_roundftoi(fval);  | 
2784  | 0  |     }  | 
2785  | 0  |     if (pnaw)  | 
2786  | 0  |         *pnaw = naw;  | 
2787  | 0  |     else  | 
2788  | 0  |         numaDestroy(&naw);  | 
2789  | 0  |     if (pmedh) { | 
2790  | 0  |         numaGetMedian(nah, &fval);  | 
2791  | 0  |         *pmedh = lept_roundftoi(fval);  | 
2792  | 0  |     }  | 
2793  | 0  |     if (pnah)  | 
2794  | 0  |         *pnah = nah;  | 
2795  | 0  |     else  | 
2796  | 0  |         numaDestroy(&nah);  | 
2797  | 0  |     return 0;  | 
2798  | 0  | }  | 
2799  |  |  | 
2800  |  |  | 
2801  |  | /*!  | 
2802  |  |  * \brief   getPdfMediaBoxSizes()  | 
2803  |  |  *  | 
2804  |  |  * \param[in]    fname        filename  | 
2805  |  |  * \param[out]   pnaw         [optional] array of mediabox widths  | 
2806  |  |  * \param[out]   pnah         [optional] array of mediabox heights  | 
2807  |  |  * \param[out]   pmedw        [optional] median mediabox width  | 
2808  |  |  * \param[out]   pmedh        [optional] median mediabox height  | 
2809  |  |  * \return  0 if OK, 1 on error  | 
2810  |  |  *  | 
2811  |  |  * <pre>  | 
2812  |  |  * Notes:  | 
2813  |  |  *      (1) Finds the arguments of each instance of '/MediaBox' in the file.  | 
2814  |  |  *      (2) This will not work on encrypted pdf files or on files where  | 
2815  |  |  *          the "/MediaBoxes" field is binary compressed.  Not finding  | 
2816  |  |  *          the "/MediaBoxes" field is not an error, but a warning is given.  | 
2817  |  |  *      (3) This is useful for determining if the media boxes are  | 
2818  |  |  *          incorrectly assigned, such as assuming the resolution is 72 ppi.  | 
2819  |  |  *          If that happens and the input the the renderer assumes the  | 
2820  |  |  *          resolution is 300 ppi, the rendered images will be over 4x too  | 
2821  |  |  *          large in each dimension.  | 
2822  |  |  *      (4) An image dimension of 11 inches corresponds to a MediaBox  | 
2823  |  |  *          parameter of 792.  We consider a value > 850 to be oversized  | 
2824  |  |  *          and not to be taken literally.  | 
2825  |  |  * </pre>  | 
2826  |  |  */  | 
2827  |  | l_ok  | 
2828  |  | getPdfMediaBoxSizes(const char  *fname,  | 
2829  |  |                     NUMA       **pnaw,  | 
2830  |  |                     NUMA       **pnah,  | 
2831  |  |                     l_int32     *pmedw,  | 
2832  |  |                     l_int32     *pmedh)  | 
2833  | 0  | { | 
2834  | 0  | l_uint8   *data;  | 
2835  | 0  | l_int32    i, n, format, ret, loc;  | 
2836  | 0  | l_float32  fval, ignore1, ignore2, w, h;  | 
2837  | 0  | size_t     nread;  | 
2838  | 0  | L_DNA     *dna;   /* mediabox locations */  | 
2839  | 0  | NUMA      *naw;   /* mediabox widths */  | 
2840  | 0  | NUMA      *nah;   /* mediabox heights */  | 
2841  |  | 
  | 
2842  | 0  |     if (pnaw) *pnaw = NULL;  | 
2843  | 0  |     if (pnah) *pnah = NULL;  | 
2844  | 0  |     if (pmedw) *pmedw = 0;  | 
2845  | 0  |     if (pmedh) *pmedh = 0;  | 
2846  | 0  |     if (!pnaw && !pnah && !pmedw && !pmedh)  | 
2847  | 0  |         return ERROR_INT("no output requested", __func__, 1); | 
2848  | 0  |     if (!fname)  | 
2849  | 0  |         return ERROR_INT("fname not defined", __func__, 1); | 
2850  |  |  | 
2851  |  |         /* Make sure this a pdf file */  | 
2852  | 0  |     findFileFormat(fname, &format);  | 
2853  | 0  |     if (format != IFF_LPDF)  | 
2854  | 0  |         return ERROR_INT("file is not pdf", __func__, 1); | 
2855  |  |  | 
2856  |  |         /* Read the file into memory and find all locations of '/MediaBox' */  | 
2857  | 0  |     if ((data = l_binaryRead(fname, &nread)) == NULL)  | 
2858  | 0  |         return ERROR_INT("full data not read", __func__, 1); | 
2859  | 0  |     dna = arrayFindEachSequence(data, nread, (const l_uint8 *)"/MediaBox",  | 
2860  | 0  |                                 strlen("/MediaBox")); | 
2861  | 0  |     if (!dna) { | 
2862  | 0  |         LEPT_FREE(data);  | 
2863  | 0  |         L_WARNING("no mediaboxes found\n", __func__); | 
2864  | 0  |         return 1;  | 
2865  | 0  |     }  | 
2866  |  |  | 
2867  |  |         /* Find the mediabox widths and heights */  | 
2868  | 0  |     n = l_dnaGetCount(dna);  | 
2869  | 0  |     naw = numaCreate(n);  | 
2870  | 0  |     nah = numaCreate(n);  | 
2871  | 0  |     for (i = 0; i < n; i++) { | 
2872  | 0  |         l_dnaGetIValue(dna, i, &loc);  | 
2873  | 0  |         ret = sscanf((char *)&data[loc], "/MediaBox [ %f %f %f %f",  | 
2874  | 0  |                      &ignore1, &ignore2, &w, &h);  | 
2875  | 0  |         if (ret != 4) { | 
2876  | 0  |             L_ERROR("mediabox sizes not found for item %d at loc %d\n", | 
2877  | 0  |                     __func__, i, loc);  | 
2878  | 0  |             continue;  | 
2879  | 0  |         }  | 
2880  | 0  |         numaAddNumber(naw, w);  | 
2881  | 0  |         numaAddNumber(nah, h);  | 
2882  | 0  |     }  | 
2883  | 0  |     LEPT_FREE(data);  | 
2884  | 0  |     l_dnaDestroy(&dna);  | 
2885  |  | 
  | 
2886  | 0  |     if (pmedw) { | 
2887  | 0  |         numaGetMedian(naw, &fval);  | 
2888  | 0  |         *pmedw = lept_roundftoi(fval);  | 
2889  | 0  |         if (*pmedw > 850) lept_stderr("oversize width: %d\n", *pmedw); | 
2890  | 0  |     }  | 
2891  | 0  |     if (pnaw)  | 
2892  | 0  |         *pnaw = naw;  | 
2893  | 0  |     else  | 
2894  | 0  |         numaDestroy(&naw);  | 
2895  | 0  |     if (pmedh) { | 
2896  | 0  |         numaGetMedian(nah, &fval);  | 
2897  | 0  |         *pmedh = lept_roundftoi(fval);  | 
2898  | 0  |         if (*pmedh > 850) lept_stderr("oversize height: %d\n", *pmedh); | 
2899  | 0  |     }  | 
2900  | 0  |     if (pnah)  | 
2901  | 0  |         *pnah = nah;  | 
2902  | 0  |     else  | 
2903  | 0  |         numaDestroy(&nah);  | 
2904  | 0  |     return 0;  | 
2905  | 0  | }  | 
2906  |  |  | 
2907  |  |  | 
2908  |  | /*---------------------------------------------------------------------*  | 
2909  |  |  *       Find effective resolution of images rendered from a pdf       *  | 
2910  |  |  *---------------------------------------------------------------------*/  | 
2911  |  | /*!  | 
2912  |  |  * \brief   getPdfRendererResolution()  | 
2913  |  |  *  | 
2914  |  |  * \param[in]    infile       filename of input pdf file  | 
2915  |  |  * \param[in]    outdir       directory of rendered output images  | 
2916  |  |  * \param[out]   pres         desired resolution to use with renderer  | 
2917  |  |  * \return  0 if OK, 1 on error  | 
2918  |  |  *  | 
2919  |  |  * <pre>  | 
2920  |  |  * Notes:  | 
2921  |  |  *      (1) Finds the input resolution to pdftoppm that will generate  | 
2922  |  |  *          images with a maximum dimension of about 3300 pixels,  | 
2923  |  |  *          representing a full page at 300 ppi.  | 
2924  |  |  *      (2) It is most important is to make sure the renderer does  | 
2925  |  |  *          not make huge images because of an error in /MediaBox.  | 
2926  |  |  *          An image dimension of 11 inches corresponds to a MediaBox  | 
2927  |  |  *          parameter of 792.  We consider a value > 850 to be oversized  | 
2928  |  |  *          and not to be taken literally.  If the mediaboxes are  | 
2929  |  |  *          oversized, choose an appropriate lower resolution.  | 
2930  |  |  *      (3) If the mediaboxes are not accessible, render an image at  | 
2931  |  |  *          a low known resolution (say, 72 ppi) and based on the image  | 
2932  |  |  *          size, determine the resolution necessary to make an image  | 
2933  |  |  *          with 3300 pixels in the largest dimension.  | 
2934  |  |  *      (4) Requires pdftoppm, so this is disabled on windows for now.  | 
2935  |  |  *      (5) Requires the ability to call an external program, so it is  | 
2936  |  |  *          necessary to call setLeptDebugOK(1) before this function.  | 
2937  |  |  * </pre>  | 
2938  |  |  */  | 
2939  |  | l_ok  | 
2940  |  | getPdfRendererResolution(const char  *infile,  | 
2941  |  |                          const char  *outdir,  | 
2942  |  |                          l_int32     *pres)  | 
2943  | 0  | { | 
2944  | 0  | char      buf[256];  | 
2945  | 0  | char     *tail, *basename, *fname;  | 
2946  | 0  | l_int32   ret, res, medw, medh, medmax, npages, pageno, w, h;  | 
2947  | 0  | SARRAY   *sa;  | 
2948  |  | 
  | 
2949  | 0  |     if (!pres)  | 
2950  | 0  |         return ERROR_INT("&res not defined", __func__, 1); | 
2951  | 0  |     *pres = 300;  /* default */  | 
2952  |  | 
  | 
2953  |  | #ifdef _WIN32  | 
2954  |  |     L_INFO("Requires pdftoppm, so this is disabled on windows.\n" | 
2955  |  |            "Returns default resolution 300 ppi", __func__);  | 
2956  |  |     return 0;  | 
2957  |  | #endif  /* _WIN32 */  | 
2958  |  | 
  | 
2959  | 0  |     if (!LeptDebugOK) { | 
2960  | 0  |         L_INFO("Running pdftoppm is disabled; " | 
2961  | 0  |                "use setLeptDebugOK(1) to enable\n"  | 
2962  | 0  |                "returns default resolution 300 ppi\n", __func__);  | 
2963  | 0  |         return 1;  | 
2964  | 0  |     }  | 
2965  |  |  | 
2966  | 0  |     if (!infile)  | 
2967  | 0  |         return ERROR_INT("infile not defined", __func__, 1); | 
2968  | 0  |     if (!outdir)  | 
2969  | 0  |         return ERROR_INT("outdir not defined", __func__, 1); | 
2970  |  |  | 
2971  | 0  |     res = 300;  /* default value */  | 
2972  | 0  |     ret = getPdfMediaBoxSizes(infile, NULL, NULL, &medw, &medh);  | 
2973  | 0  |     if (ret == 0) {  /* Check for oversize mediaboxes */ | 
2974  | 0  |         lept_stderr("Media Box medians: medw = %d, medh = %d\n", medw, medh); | 
2975  | 0  |         medmax = L_MAX(medw, medh);  | 
2976  | 0  |         if (medmax > 850) { | 
2977  | 0  |             res = 300 * ((l_float32)792 / (l_float32)medmax);  | 
2978  | 0  |             lept_stderr(" Oversize media box; use resolution = %d\n", res); | 
2979  | 0  |             *pres = res;  | 
2980  | 0  |         }  | 
2981  | 0  |         return 0;  | 
2982  | 0  |     }  | 
2983  |  |  | 
2984  |  |         /* No mediaboxes; render one page and measure the max dimension */  | 
2985  | 0  |     lept_stderr("Media Box dimensions not found\n"); | 
2986  | 0  |     getPdfPageCount(infile, &npages);  | 
2987  | 0  |     pageno = (npages > 0) ? (npages + 1) / 2 : 1;  | 
2988  | 0  |     splitPathAtDirectory(infile, NULL, &tail);  | 
2989  | 0  |     splitPathAtExtension(tail, &basename, NULL);  | 
2990  | 0  |     snprintf(buf, sizeof(buf), "pdftoppm -f %d -l %d -r 72 %s %s/%s",  | 
2991  | 0  |              pageno, pageno, infile, outdir, basename);  | 
2992  | 0  |     LEPT_FREE(tail);  | 
2993  | 0  |     LEPT_FREE(basename);  | 
2994  | 0  |     callSystemDebug(buf);  /* pdftoppm */  | 
2995  |  |  | 
2996  |  |         /* Get the page size */  | 
2997  | 0  |     sa = getSortedPathnamesInDirectory(outdir, NULL, 0, 0);  | 
2998  | 0  |     fname = sarrayGetString(sa, 0, L_NOCOPY);  | 
2999  | 0  |     pixReadHeader(fname, NULL, &w, &h, NULL, NULL, NULL);  | 
3000  | 0  |     sarrayDestroy(&sa);  | 
3001  | 0  |     if (w > 0 && h > 0) { | 
3002  | 0  |         res = L_MIN((72 * 3300 / L_MAX(w, h)), 600);  | 
3003  | 0  |         *pres = res;  | 
3004  | 0  |         lept_stderr("Use resolution = %d\n", res); | 
3005  | 0  |     } else { | 
3006  | 0  |         L_ERROR("page size not found; assuming res = 300\n", __func__); | 
3007  | 0  |     }  | 
3008  |  | 
  | 
3009  | 0  |     return 0;  | 
3010  | 0  | }  | 
3011  |  |  | 
3012  |  |  | 
3013  |  | /*---------------------------------------------------------------------*  | 
3014  |  |  *                      Set flags for special modes                    *  | 
3015  |  |  *---------------------------------------------------------------------*/  | 
3016  |  | /*!  | 
3017  |  |  * \brief   l_pdfSetG4ImageMask()  | 
3018  |  |  *  | 
3019  |  |  * \param[in]    flag    1 for writing g4 data as fg only through a mask;  | 
3020  |  |  *                       0 for writing fg and bg  | 
3021  |  |  * \return  void  | 
3022  |  |  *  | 
3023  |  |  * <pre>  | 
3024  |  |  * Notes:  | 
3025  |  |  *      (1) The default is for writing only the fg (through the mask).  | 
3026  |  |  *          That way when you write a 1 bpp image, the bg is transparent,  | 
3027  |  |  *          so any previously written image remains visible behind it.  | 
3028  |  |  * </pre>  | 
3029  |  |  */  | 
3030  |  | void  | 
3031  |  | l_pdfSetG4ImageMask(l_int32  flag)  | 
3032  | 0  | { | 
3033  | 0  |     var_WRITE_G4_IMAGE_MASK = flag;  | 
3034  | 0  | }  | 
3035  |  |  | 
3036  |  |  | 
3037  |  | /*!  | 
3038  |  |  * \brief   l_pdfSetDateAndVersion()  | 
3039  |  |  *  | 
3040  |  |  * \param[in]    flag    1 for writing date/time and leptonica version;  | 
3041  |  |  *                       0 for omitting this from the metadata  | 
3042  |  |  * \return  void  | 
3043  |  |  *  | 
3044  |  |  * <pre>  | 
3045  |  |  * Notes:  | 
3046  |  |  *      (1) The default is for writing this data.  For regression tests  | 
3047  |  |  *          that compare output against golden files, it is useful to omit.  | 
3048  |  |  * </pre>  | 
3049  |  |  */  | 
3050  |  | void  | 
3051  |  | l_pdfSetDateAndVersion(l_int32  flag)  | 
3052  | 0  | { | 
3053  | 0  |     var_WRITE_DATE_AND_VERSION = flag;  | 
3054  | 0  | }  | 
3055  |  |  | 
3056  |  | /* --------------------------------------------*/  | 
3057  |  | #endif  /* USE_PDFIO */  | 
3058  |  | /* --------------------------------------------*/  |