/src/leptonica/src/pdfio1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file pdfio1.c |
29 | | * <pre> |
30 | | * |
31 | | * Higher-level operations for generating pdf from images. |
32 | | * Use poppler's pdftoppm or pdfimages to invert the process, |
33 | | * extracting raster images from pdf. |
34 | | * |
35 | | * |=============================================================| |
36 | | * | Important notes | |
37 | | * |=============================================================| |
38 | | * | Some of these functions require I/O libraries such as | |
39 | | * | libtiff, libjpeg, libpng, libz and libopenjp2. If you do | |
40 | | * | not have these libraries, some calls will fail. For | |
41 | | * | example, if you do not have libopenjp2, you cannot write a | |
42 | | * | pdf where transcoding is required to incorporate a | |
43 | | * | jp2k image. | |
44 | | * | | |
45 | | * | You can manually deactivate all pdf writing by setting | |
46 | | * | this in environ.h: | |
47 | | * | \code | |
48 | | * | #define USE_PDFIO 0 | |
49 | | * | \endcode | |
50 | | * | This will link the stub file pdfiostub.c. | |
51 | | * |=============================================================| |
52 | | * |
53 | | * Set 1. These functions convert a set of image files |
54 | | * to a multi-page pdf file, with one image on each page. |
55 | | * All images are rendered at the same (input) resolution. |
56 | | * The images can be specified as being in a directory, or they |
57 | | * can be in an sarray. The output pdf can be either a file |
58 | | * or an array of bytes in memory. |
59 | | * |
60 | | * Set 2. These functions are a special case of set 1, where |
61 | | * no scaling or change in quality is required. For jpeg, jp2k and |
62 | | * tiffg4 images, the bytes in each file can be directly incorporated |
63 | | * into the output pdf, and the wrapping up of multiple image |
64 | | * files is very fast. For non-interlaced png, the data bytes |
65 | | * including the predictors can also be written directly into the |
66 | | * flate pdf data. For other image formats transcoding is required, |
67 | | * where the image data is first decompressed and then flate (gzip), |
68 | | * DCT (jpeg) or tiffg4 (1 bpp) encodings are generated. |
69 | | * |
70 | | * Set 3. These functions convert a set of images in memory |
71 | | * to a multi-page pdf, with one image on each page. The pdf |
72 | | * output can be either a file or an array of bytes in memory. |
73 | | * |
74 | | * Set 4. These functions implement a pdf output "device driver" |
75 | | * for wrapping (encoding) any number of images on a single page |
76 | | * in pdf. The input can be either an image file or a Pix; |
77 | | * the pdf output can be either a file or an array of bytes in memory. |
78 | | * |
79 | | * Set 5. These "segmented" functions take a set of image |
80 | | * files, along with optional segmentation information, and |
81 | | * generate a multi-page pdf file, where each page consists |
82 | | * in general of a mixed raster pdf of image and non-image regions. |
83 | | * The segmentation information for each page can be input as |
84 | | * either a mask over the image parts, or as a Boxa of those |
85 | | * regions. |
86 | | * |
87 | | * Set 6. These "segmented" functions convert an image and |
88 | | * an optional Boxa of image regions into a mixed raster pdf file |
89 | | * for the page. The input image can be either a file or a Pix. |
90 | | * |
91 | | * Set 7. These functions take a set of single-page pdf files |
92 | | * and concatenates it into a multi-page pdf. The input can be |
93 | | * a set of either single page pdf files or pdf 'strings' in memory. |
94 | | * The output can be either a file or an array of bytes in memory. |
95 | | * |
96 | | * The images in the pdf file can be rendered using a pdf viewer, |
97 | | * such as evince, gv, xpdf or acroread. |
98 | | * |
99 | | * Reference on the pdf file format: |
100 | | * http://www.adobe.com/devnet/pdf/pdf_reference_archive.html |
101 | | * |
102 | | * 1. Convert specified image files to pdf (one image file per page) |
103 | | * l_int32 convertFilesToPdf() |
104 | | * l_int32 saConvertFilesToPdf() |
105 | | * l_int32 saConvertFilesToPdfData() |
106 | | * l_int32 selectDefaultPdfEncoding() |
107 | | * |
108 | | * 2. Convert specified image files to pdf without scaling |
109 | | * l_int32 convertUnscaledFilesToPdf() |
110 | | * l_int32 saConvertUnscaledFilesToPdf() |
111 | | * l_int32 saConvertUnscaledFilesToPdfData() |
112 | | * l_int32 convertUnscaledToPdfData() |
113 | | * |
114 | | * 3. Convert multiple images to pdf (one image per page) |
115 | | * l_int32 pixaConvertToPdf() |
116 | | * l_int32 pixaConvertToPdfData() |
117 | | * |
118 | | * 4. Single page, multi-image converters |
119 | | * l_int32 convertToPdf() |
120 | | * l_int32 convertImageDataToPdf() |
121 | | * l_int32 convertToPdfData() |
122 | | * l_int32 convertImageDataToPdfData() |
123 | | * l_int32 pixConvertToPdf() |
124 | | * l_int32 pixWriteStreamPdf() |
125 | | * l_int32 pixWriteMemPdf() |
126 | | * |
127 | | * 5. Segmented multi-page, multi-image converter |
128 | | * l_int32 convertSegmentedFilesToPdf() |
129 | | * BOXAA *convertNumberedMasksToBoxaa() |
130 | | * |
131 | | * 6. Segmented single page, multi-image converters |
132 | | * l_int32 convertToPdfSegmented() |
133 | | * l_int32 pixConvertToPdfSegmented() |
134 | | * l_int32 convertToPdfDataSegmented() |
135 | | * l_int32 pixConvertToPdfDataSegmented() |
136 | | * |
137 | | * 7. Multipage concatenation |
138 | | * l_int32 concatenatePdf() |
139 | | * l_int32 saConcatenatePdf() |
140 | | * l_int32 ptraConcatenatePdf() |
141 | | * l_int32 concatenatePdfToData() |
142 | | * l_int32 saConcatenatePdfToData() |
143 | | * |
144 | | * The top-level multi-image functions can be visualized as follows: |
145 | | * Output pdf data to file: |
146 | | * convertToPdf() and convertImageDataToPdf() |
147 | | * --> pixConvertToPdf() |
148 | | * --> pixConvertToPdfData() |
149 | | * |
150 | | * Output pdf data to array in memory: |
151 | | * convertToPdfData() and convertImageDataToPdfData() |
152 | | * --> pixConvertToPdfData() |
153 | | * |
154 | | * The top-level segmented image functions can be visualized as follows: |
155 | | * Output pdf data to file: |
156 | | * convertToPdfSegmented() |
157 | | * --> pixConvertToPdfSegmented() |
158 | | * --> pixConvertToPdfDataSegmented() |
159 | | * |
160 | | * Output pdf data to array in memory: |
161 | | * convertToPdfDataSegmented() |
162 | | * --> pixConvertToPdfDataSegmented() |
163 | | * |
164 | | * For multi-page concatenation, there are three different types of input |
165 | | * (1) directory and optional filename filter |
166 | | * (2) sarray of filenames |
167 | | * (3) ptra of byte arrays of pdf data |
168 | | * and two types of output for the concatenated pdf data |
169 | | * (1) filename |
170 | | * (2) data array and size |
171 | | * High-level interfaces are given for each of the six combinations. |
172 | | * |
173 | | * Note: When wrapping small images into pdf, it is useful to give |
174 | | * them a relatively low resolution value, to avoid rounding errors |
175 | | * when rendering the images. For example, if you want an image |
176 | | * of width w pixels to be 5 inches wide on a screen, choose a |
177 | | * resolution w/5. |
178 | | * |
179 | | * The very fast functions in section (2) require neither transcoding |
180 | | * nor parsing of the compressed jpeg file. With three types of image |
181 | | * compression, the compressed strings can be incorporated into |
182 | | * the pdf data without decompression and re-encoding: jpeg, jp2k |
183 | | * and png. The DCTDecode and JPXDecode filters can handle the |
184 | | * entire jpeg and jp2k encoded string as a byte array in the pdf file. |
185 | | * The FlateDecode filter can handle the png compressed image data, |
186 | | * including predictors that occur as the first byte in each |
187 | | * raster line, but it is necessary to store only the png IDAT chunk |
188 | | * data in the pdf array. The alternative for wrapping png images |
189 | | * is to transcode them: uncompress into a raster (a pix) and then |
190 | | * gzip the raster data. This typically results in a larger pdf file |
191 | | * because it doesn't use the two-dimensional png predictor. |
192 | | * Colormaps, which are found in png PLTE chunks, must always be |
193 | | * pulled out and included separately in the pdf. For CCITT-G4 |
194 | | * compression, you can not simply include a tiff G4 file -- you must |
195 | | * either parse it and extract the G4 compressed data within it, |
196 | | * or uncompress to a raster and G4 compress again. |
197 | | * </pre> |
198 | | */ |
199 | | |
200 | | #ifdef HAVE_CONFIG_H |
201 | | #include <config_auto.h> |
202 | | #endif /* HAVE_CONFIG_H */ |
203 | | |
204 | | #include <string.h> |
205 | | #include <math.h> |
206 | | #include "allheaders.h" |
207 | | |
208 | | /* --------------------------------------------*/ |
209 | | #if USE_PDFIO /* defined in environ.h */ |
210 | | /* --------------------------------------------*/ |
211 | | |
212 | | /* Typical scan resolution in ppi (pixels/inch) */ |
213 | | static const l_int32 DefaultInputRes = 300; |
214 | | |
215 | | /*---------------------------------------------------------------------* |
216 | | * Convert specified image files to pdf (one image file per page) * |
217 | | *---------------------------------------------------------------------*/ |
218 | | /*! |
219 | | * \brief convertFilesToPdf() |
220 | | * |
221 | | * \param[in] dirname directory name containing images |
222 | | * \param[in] substr [optional] substring filter on filenames; |
223 | | * can be null |
224 | | * \param[in] res input resolution of all images |
225 | | * \param[in] scalefactor scaling factor applied to each image; > 0.0 |
226 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
227 | | * L_FLATE_ENCODE, L_JP2K_ENCODE or |
228 | | * L_DEFAULT_ENCODE for default) |
229 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
230 | | * for jp2k: 27-45; 0 for default (34) |
231 | | * \param[in] title [optional] pdf title; can be null |
232 | | * \param[in] fileout pdf file of all images |
233 | | * \return 0 if OK, 1 on error |
234 | | * |
235 | | * <pre> |
236 | | * Notes: |
237 | | * (1) If %substr is not NULL, only image filenames that contain |
238 | | * the substring can be used. If %substr == NULL, all files |
239 | | * in the directory are used. |
240 | | * (2) The files in the directory, after optional filtering by |
241 | | * the substring, are lexically sorted in increasing order |
242 | | * before concatenation. |
243 | | * (3) The scalefactor is applied to each image before encoding. |
244 | | * If you enter a value <= 0.0, it will be set to 1.0. |
245 | | * (4) Specifying one of the four encoding types for %type forces |
246 | | * all images to be compressed with that type. Use 0 to have |
247 | | * the type determined for each image based on depth and whether |
248 | | * or not it has a colormap. |
249 | | * </pre> |
250 | | */ |
251 | | l_ok |
252 | | convertFilesToPdf(const char *dirname, |
253 | | const char *substr, |
254 | | l_int32 res, |
255 | | l_float32 scalefactor, |
256 | | l_int32 type, |
257 | | l_int32 quality, |
258 | | const char *title, |
259 | | const char *fileout) |
260 | 0 | { |
261 | 0 | l_int32 ret; |
262 | 0 | SARRAY *sa; |
263 | |
|
264 | 0 | if (!dirname) |
265 | 0 | return ERROR_INT("dirname not defined", __func__, 1); |
266 | 0 | if (!fileout) |
267 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
268 | | |
269 | 0 | if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) |
270 | 0 | return ERROR_INT("sa not made", __func__, 1); |
271 | 0 | ret = saConvertFilesToPdf(sa, res, scalefactor, type, quality, |
272 | 0 | title, fileout); |
273 | 0 | sarrayDestroy(&sa); |
274 | 0 | return ret; |
275 | 0 | } |
276 | | |
277 | | |
278 | | /*! |
279 | | * \brief saConvertFilesToPdf() |
280 | | * |
281 | | * \param[in] sa string array of pathnames for images |
282 | | * \param[in] res input resolution of all images |
283 | | * \param[in] scalefactor scaling factor applied to each image; > 0.0 |
284 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
285 | | * L_FLATE_ENCODE, L_JP2K_ENCODE or |
286 | | * L_DEFAULT_ENCODE for default) |
287 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
288 | | * for jp2k: 27-45; 0 for default (34) |
289 | | * \param[in] title [optional] pdf title; can be null |
290 | | * \param[in] fileout pdf file of all images |
291 | | * \return 0 if OK, 1 on error |
292 | | * |
293 | | * <pre> |
294 | | * Notes: |
295 | | * (1) See convertFilesToPdf(). |
296 | | * </pre> |
297 | | */ |
298 | | l_ok |
299 | | saConvertFilesToPdf(SARRAY *sa, |
300 | | l_int32 res, |
301 | | l_float32 scalefactor, |
302 | | l_int32 type, |
303 | | l_int32 quality, |
304 | | const char *title, |
305 | | const char *fileout) |
306 | 0 | { |
307 | 0 | l_uint8 *data; |
308 | 0 | l_int32 ret; |
309 | 0 | size_t nbytes; |
310 | |
|
311 | 0 | if (!sa) |
312 | 0 | return ERROR_INT("sa not defined", __func__, 1); |
313 | | |
314 | 0 | ret = saConvertFilesToPdfData(sa, res, scalefactor, type, quality, |
315 | 0 | title, &data, &nbytes); |
316 | 0 | if (ret) { |
317 | 0 | if (data) LEPT_FREE(data); |
318 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
319 | 0 | } |
320 | | |
321 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
322 | 0 | LEPT_FREE(data); |
323 | 0 | if (ret) |
324 | 0 | L_ERROR("pdf data not written to file\n", __func__); |
325 | 0 | return ret; |
326 | 0 | } |
327 | | |
328 | | |
329 | | /*! |
330 | | * \brief saConvertFilesToPdfData() |
331 | | * |
332 | | * \param[in] sa string array of pathnames for images |
333 | | * \param[in] res input resolution of all images |
334 | | * \param[in] scalefactor scaling factor applied to each image; > 0.0 |
335 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
336 | | * L_FLATE_ENCODE, L_JP2K_ENCODE or |
337 | | * L_DEFAULT_ENCODE for default) |
338 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
339 | | * for jp2k: 27-45; 0 for default (34) |
340 | | * \param[in] title [optional] pdf title; can be null |
341 | | * \param[out] pdata output pdf data (of all images |
342 | | * \param[out] pnbytes size of output pdf data |
343 | | * \return 0 if OK, 1 on error |
344 | | * |
345 | | * <pre> |
346 | | * Notes: |
347 | | * (1) See convertFilesToPdf(). |
348 | | * </pre> |
349 | | */ |
350 | | l_ok |
351 | | saConvertFilesToPdfData(SARRAY *sa, |
352 | | l_int32 res, |
353 | | l_float32 scalefactor, |
354 | | l_int32 type, |
355 | | l_int32 quality, |
356 | | const char *title, |
357 | | l_uint8 **pdata, |
358 | | size_t *pnbytes) |
359 | 0 | { |
360 | 0 | char *fname; |
361 | 0 | l_uint8 *imdata; |
362 | 0 | l_int32 i, n, ret, pagetype, npages, scaledres; |
363 | 0 | size_t imbytes; |
364 | 0 | L_BYTEA *ba; |
365 | 0 | PIX *pixs, *pix; |
366 | 0 | L_PTRA *pa_data; |
367 | |
|
368 | 0 | if (!pdata) |
369 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
370 | 0 | *pdata = NULL; |
371 | 0 | if (!pnbytes) |
372 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
373 | 0 | *pnbytes = 0; |
374 | 0 | if (!sa) |
375 | 0 | return ERROR_INT("sa not defined", __func__, 1); |
376 | 0 | if (scalefactor <= 0.0) scalefactor = 1.0; |
377 | 0 | if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && |
378 | 0 | type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { |
379 | 0 | type = L_DEFAULT_ENCODE; |
380 | 0 | } |
381 | | |
382 | | /* Generate all the encoded pdf strings */ |
383 | 0 | n = sarrayGetCount(sa); |
384 | 0 | pa_data = ptraCreate(n); |
385 | 0 | for (i = 0; i < n; i++) { |
386 | 0 | if (i && (i % 10 == 0)) lept_stderr(".. %d ", i); |
387 | 0 | fname = sarrayGetString(sa, i, L_NOCOPY); |
388 | 0 | if ((pixs = pixRead(fname)) == NULL) { |
389 | 0 | L_ERROR("image not readable from file %s\n", __func__, fname); |
390 | 0 | continue; |
391 | 0 | } |
392 | 0 | if (scalefactor != 1.0) |
393 | 0 | pix = pixScale(pixs, scalefactor, scalefactor); |
394 | 0 | else |
395 | 0 | pix = pixClone(pixs); |
396 | 0 | pixDestroy(&pixs); |
397 | 0 | scaledres = (l_int32)(res * scalefactor); |
398 | | |
399 | | /* Select the encoding type */ |
400 | 0 | if (type != L_DEFAULT_ENCODE) { |
401 | 0 | pagetype = type; |
402 | 0 | } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) { |
403 | 0 | pixDestroy(&pix); |
404 | 0 | L_ERROR("encoding type selection failed for file %s\n", |
405 | 0 | __func__, fname); |
406 | 0 | continue; |
407 | 0 | } |
408 | | |
409 | 0 | ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes, |
410 | 0 | 0, 0, scaledres, title, NULL, 0); |
411 | 0 | pixDestroy(&pix); |
412 | 0 | if (ret) { |
413 | 0 | LEPT_FREE(imdata); |
414 | 0 | L_ERROR("pdf encoding failed for %s\n", __func__, fname); |
415 | 0 | continue; |
416 | 0 | } |
417 | 0 | ba = l_byteaInitFromMem(imdata, imbytes); |
418 | 0 | LEPT_FREE(imdata); |
419 | 0 | ptraAdd(pa_data, ba); |
420 | 0 | } |
421 | 0 | ptraGetActualCount(pa_data, &npages); |
422 | 0 | if (npages == 0) { |
423 | 0 | L_ERROR("no pdf files made\n", __func__); |
424 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
425 | 0 | return 1; |
426 | 0 | } |
427 | | |
428 | | /* Concatenate them */ |
429 | 0 | lept_stderr("\nconcatenating ... "); |
430 | 0 | ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); |
431 | 0 | lept_stderr("done\n"); |
432 | |
|
433 | 0 | ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */ |
434 | 0 | for (i = 0; i < npages; i++) { |
435 | 0 | ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); |
436 | 0 | l_byteaDestroy(&ba); |
437 | 0 | } |
438 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
439 | 0 | return ret; |
440 | 0 | } |
441 | | |
442 | | |
443 | | /*! |
444 | | * \brief selectDefaultPdfEncoding() |
445 | | * |
446 | | * \param[in] pix |
447 | | * \param[out] ptype L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE |
448 | | * \return 0 if OK, 1 on error |
449 | | * |
450 | | * <pre> |
451 | | * Notes: |
452 | | * (1) This attempts to choose an encoding for the pix that results |
453 | | * in the smallest file, assuming that if jpeg encoded, it will |
454 | | * use quality = 75. The decision is approximate, in that |
455 | | * (a) all colormapped images will be losslessly encoded with |
456 | | * gzip (flate), and (b) an image with less than about 20 colors |
457 | | * is likely to be smaller if flate encoded than if encoded |
458 | | * as a jpeg (dct). For example, an image made by pixScaleToGray3() |
459 | | * will have 10 colors, and flate encoding will give about |
460 | | * twice the compression as jpeg with quality = 75. |
461 | | * (2) We could have used L_JP2K_ENCODE instead of L_JPEG_ENCODE. |
462 | | * However, the jp2k compression is not much better than jpeg, and |
463 | | * the jpeg library is more commonly available than the jp2k library. |
464 | | * </pre> |
465 | | */ |
466 | | l_ok |
467 | | selectDefaultPdfEncoding(PIX *pix, |
468 | | l_int32 *ptype) |
469 | 0 | { |
470 | 0 | l_int32 w, h, d, factor, ncolors; |
471 | 0 | PIXCMAP *cmap; |
472 | |
|
473 | 0 | if (!ptype) |
474 | 0 | return ERROR_INT("&type not defined", __func__, 1); |
475 | 0 | *ptype = L_FLATE_ENCODE; /* default universal encoding */ |
476 | 0 | if (!pix) |
477 | 0 | return ERROR_INT("pix not defined", __func__, 1); |
478 | 0 | pixGetDimensions(pix, &w, &h, &d); |
479 | 0 | cmap = pixGetColormap(pix); |
480 | 0 | if (d == 8 && !cmap) { |
481 | 0 | factor = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 20000.)); |
482 | 0 | pixNumColors(pix, factor, &ncolors); |
483 | 0 | if (ncolors < 20) |
484 | 0 | *ptype = L_FLATE_ENCODE; |
485 | 0 | else |
486 | 0 | *ptype = L_JPEG_ENCODE; |
487 | 0 | } else if (d == 1) { |
488 | 0 | *ptype = L_G4_ENCODE; |
489 | 0 | } else if (cmap || d == 2 || d == 4) { |
490 | 0 | *ptype = L_FLATE_ENCODE; |
491 | 0 | } else if (d == 8 || d == 32) { |
492 | 0 | *ptype = L_JPEG_ENCODE; |
493 | 0 | } else if (d == 16) { |
494 | 0 | *ptype = L_FLATE_ENCODE; |
495 | 0 | } else { |
496 | 0 | return ERROR_INT("type selection failure", __func__, 1); |
497 | 0 | } |
498 | | |
499 | 0 | return 0; |
500 | 0 | } |
501 | | |
502 | | |
503 | | /*---------------------------------------------------------------------* |
504 | | * Convert specified image files to pdf without scaling * |
505 | | *---------------------------------------------------------------------*/ |
506 | | /*! |
507 | | * \brief convertUnscaledFilesToPdf() |
508 | | * |
509 | | * \param[in] dirname directory name containing images |
510 | | * \param[in] substr [optional] substring filter on filenames; |
511 | | * can be null |
512 | | * \param[in] title [optional] pdf title; can be null |
513 | | * \param[in] fileout pdf file of all images |
514 | | * \return 0 if OK, 1 on error |
515 | | * |
516 | | * <pre> |
517 | | * Notes: |
518 | | * (1) If %substr is not NULL, only image filenames that contain |
519 | | * the substring can be used. If %substr == NULL, all files |
520 | | * in the directory are used. |
521 | | * (2) The files in the directory, after optional filtering by |
522 | | * the substring, are lexically sorted in increasing order |
523 | | * before concatenation. |
524 | | * (3) This is very fast for jpeg, jp2k and some png files, |
525 | | * because the compressed data is wrapped up and concatenated. |
526 | | * For other types of png, the images must be read and recompressed. |
527 | | * </pre> |
528 | | */ |
529 | | l_ok |
530 | | convertUnscaledFilesToPdf(const char *dirname, |
531 | | const char *substr, |
532 | | const char *title, |
533 | | const char *fileout) |
534 | 0 | { |
535 | 0 | l_int32 ret; |
536 | 0 | SARRAY *sa; |
537 | |
|
538 | 0 | if (!dirname) |
539 | 0 | return ERROR_INT("dirname not defined", __func__, 1); |
540 | 0 | if (!fileout) |
541 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
542 | | |
543 | 0 | if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) |
544 | 0 | return ERROR_INT("sa not made", __func__, 1); |
545 | 0 | ret = saConvertUnscaledFilesToPdf(sa, title, fileout); |
546 | 0 | sarrayDestroy(&sa); |
547 | 0 | return ret; |
548 | 0 | } |
549 | | |
550 | | |
551 | | /*! |
552 | | * \brief saConvertUnscaledFilesToPdf() |
553 | | * |
554 | | * \param[in] sa string array of pathnames for images |
555 | | * \param[in] title [optional] pdf title; can be null |
556 | | * \param[in] fileout pdf file of all images |
557 | | * \return 0 if OK, 1 on error |
558 | | * |
559 | | * <pre> |
560 | | * Notes: |
561 | | * (1) See convertUnscaledFilesToPdf(). |
562 | | * </pre> |
563 | | */ |
564 | | l_ok |
565 | | saConvertUnscaledFilesToPdf(SARRAY *sa, |
566 | | const char *title, |
567 | | const char *fileout) |
568 | 0 | { |
569 | 0 | l_uint8 *data; |
570 | 0 | l_int32 ret; |
571 | 0 | size_t nbytes; |
572 | |
|
573 | 0 | if (!sa) |
574 | 0 | return ERROR_INT("sa not defined", __func__, 1); |
575 | | |
576 | 0 | ret = saConvertUnscaledFilesToPdfData(sa, title, &data, &nbytes); |
577 | 0 | if (ret) { |
578 | 0 | if (data) LEPT_FREE(data); |
579 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
580 | 0 | } |
581 | | |
582 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
583 | 0 | LEPT_FREE(data); |
584 | 0 | if (ret) |
585 | 0 | L_ERROR("pdf data not written to file\n", __func__); |
586 | 0 | return ret; |
587 | 0 | } |
588 | | |
589 | | |
590 | | /*! |
591 | | * \brief saConvertUnscaledFilesToPdfData() |
592 | | * |
593 | | * \param[in] sa string array of pathnames for image files |
594 | | * \param[in] title [optional] pdf title; can be null |
595 | | * \param[out] pdata output pdf data (of all images) |
596 | | * \param[out] pnbytes size of output pdf data |
597 | | * \return 0 if OK, 1 on error |
598 | | * |
599 | | * <pre> |
600 | | * Notes: |
601 | | * (1) This is very fast for jpeg, jp2k and some png files, |
602 | | * because the compressed data is wrapped up and concatenated. |
603 | | * For other types of png, the images must be read and recompressed. |
604 | | * </pre> |
605 | | */ |
606 | | l_ok |
607 | | saConvertUnscaledFilesToPdfData(SARRAY *sa, |
608 | | const char *title, |
609 | | l_uint8 **pdata, |
610 | | size_t *pnbytes) |
611 | 0 | { |
612 | 0 | char *fname; |
613 | 0 | l_uint8 *imdata; |
614 | 0 | l_int32 i, n, ret, npages; |
615 | 0 | size_t imbytes; |
616 | 0 | L_BYTEA *ba; |
617 | 0 | L_PTRA *pa_data; |
618 | |
|
619 | 0 | if (!pdata) |
620 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
621 | 0 | *pdata = NULL; |
622 | 0 | if (!pnbytes) |
623 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
624 | 0 | *pnbytes = 0; |
625 | 0 | if (!sa) |
626 | 0 | return ERROR_INT("sa not defined", __func__, 1); |
627 | | |
628 | | /* Generate all the encoded pdf strings */ |
629 | 0 | n = sarrayGetCount(sa); |
630 | 0 | pa_data = ptraCreate(n); |
631 | 0 | for (i = 0; i < n; i++) { |
632 | 0 | if (i && (i % 10 == 0)) lept_stderr(".. %d ", i); |
633 | 0 | fname = sarrayGetString(sa, i, L_NOCOPY); |
634 | | |
635 | | /* Generate the pdf data */ |
636 | 0 | if (convertUnscaledToPdfData(fname, title, &imdata, &imbytes)) |
637 | 0 | continue; |
638 | | |
639 | | /* ... and add it to the array of single page data */ |
640 | 0 | ba = l_byteaInitFromMem(imdata, imbytes); |
641 | 0 | if (imdata) LEPT_FREE(imdata); |
642 | 0 | ptraAdd(pa_data, ba); |
643 | 0 | } |
644 | 0 | ptraGetActualCount(pa_data, &npages); |
645 | 0 | if (npages == 0) { |
646 | 0 | L_ERROR("no pdf files made\n", __func__); |
647 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
648 | 0 | return 1; |
649 | 0 | } |
650 | | |
651 | | /* Concatenate to generate a multipage pdf */ |
652 | 0 | lept_stderr("\nconcatenating ... "); |
653 | 0 | ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); |
654 | 0 | lept_stderr("done\n"); |
655 | | |
656 | | /* Clean up */ |
657 | 0 | ptraGetActualCount(pa_data, &npages); /* maybe failed to read some files */ |
658 | 0 | for (i = 0; i < npages; i++) { |
659 | 0 | ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); |
660 | 0 | l_byteaDestroy(&ba); |
661 | 0 | } |
662 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
663 | 0 | return ret; |
664 | 0 | } |
665 | | |
666 | | |
667 | | /*! |
668 | | * \brief convertUnscaledToPdfData() |
669 | | * |
670 | | * \param[in] fname of image file in all formats |
671 | | * \param[in] title [optional] pdf title; can be null |
672 | | * \param[out] pdata output pdf data for image |
673 | | * \param[out] pnbytes size of output pdf data |
674 | | * \return 0 if OK, 1 on error |
675 | | * |
676 | | * <pre> |
677 | | * Notes: |
678 | | * (1) This is very fast for jpeg, jp2k and some png files, |
679 | | * because the compressed data is wrapped up and concatenated. |
680 | | * For other types of png, the images must be read and recompressed. |
681 | | * </pre> |
682 | | */ |
683 | | l_ok |
684 | | convertUnscaledToPdfData(const char *fname, |
685 | | const char *title, |
686 | | l_uint8 **pdata, |
687 | | size_t *pnbytes) |
688 | 0 | { |
689 | 0 | l_int32 format; |
690 | 0 | L_COMP_DATA *cid; |
691 | |
|
692 | 0 | if (!pdata) |
693 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
694 | 0 | *pdata = NULL; |
695 | 0 | if (!pnbytes) |
696 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
697 | 0 | *pnbytes = 0; |
698 | 0 | if (!fname) |
699 | 0 | return ERROR_INT("fname not defined", __func__, 1); |
700 | | |
701 | 0 | findFileFormat(fname, &format); |
702 | 0 | if (format == IFF_UNKNOWN) { |
703 | 0 | L_WARNING("file %s format is unknown; skip\n", __func__, fname); |
704 | 0 | return 1; |
705 | 0 | } |
706 | 0 | if (format == IFF_PS || format == IFF_LPDF) { |
707 | 0 | L_WARNING("file %s format is %d; skip\n", __func__, fname, format); |
708 | 0 | return 1; |
709 | 0 | } |
710 | | |
711 | | /* Generate the image data required for pdf generation, always |
712 | | * in binary (not ascii85) coding. Note that jpeg, jp2k and some |
713 | | * png files are not transcoded. */ |
714 | 0 | l_generateCIDataForPdf(fname, NULL, 0, &cid); |
715 | 0 | if (!cid) { |
716 | 0 | L_ERROR("file %s format is %d; unreadable\n", __func__, fname, format); |
717 | 0 | return 1; |
718 | 0 | } |
719 | | |
720 | | /* Generate the pdf string for this page (image). This destroys |
721 | | * the cid by attaching it to an lpd and destroying the lpd. */ |
722 | 0 | cidConvertToPdfData(cid, title, pdata, pnbytes); |
723 | 0 | return 0; |
724 | 0 | } |
725 | | |
726 | | |
727 | | /*---------------------------------------------------------------------* |
728 | | * Convert multiple images to pdf (one image per page) * |
729 | | *---------------------------------------------------------------------*/ |
730 | | /*! |
731 | | * \brief pixaConvertToPdf() |
732 | | * |
733 | | * \param[in] pixa containing images all at the same resolution |
734 | | * \param[in] res override the resolution of each input image, |
735 | | * in ppi; use 0 to respect the resolution |
736 | | * embedded in the input images |
737 | | * \param[in] scalefactor scaling factor applied to each image; > 0.0 |
738 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
739 | | * L_FLATE_ENCODE, L_JP2K_ENCODE, or |
740 | | * L_DEFAULT_ENCODE for default) |
741 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
742 | | * for jp2k: 27-45; 0 for default (34) |
743 | | * \param[in] title [optional] pdf title; can be null |
744 | | * \param[in] fileout pdf file of all images |
745 | | * \return 0 if OK, 1 on error |
746 | | * |
747 | | * <pre> |
748 | | * Notes: |
749 | | * (1) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without |
750 | | * colormap and many colors, or 32 bpp; FLATE for anything else. |
751 | | * (2) The scalefactor must be > 0.0; otherwise it is set to 1.0. |
752 | | * (3) Specifying one of the three encoding types for %type forces |
753 | | * all images to be compressed with that type. Use 0 to have |
754 | | * the type determined for each image based on depth and whether |
755 | | * or not it has a colormap. |
756 | | * </pre> |
757 | | */ |
758 | | l_ok |
759 | | pixaConvertToPdf(PIXA *pixa, |
760 | | l_int32 res, |
761 | | l_float32 scalefactor, |
762 | | l_int32 type, |
763 | | l_int32 quality, |
764 | | const char *title, |
765 | | const char *fileout) |
766 | 0 | { |
767 | 0 | l_uint8 *data; |
768 | 0 | l_int32 ret; |
769 | 0 | size_t nbytes; |
770 | |
|
771 | 0 | if (!pixa) |
772 | 0 | return ERROR_INT("pixa not defined", __func__, 1); |
773 | | |
774 | 0 | ret = pixaConvertToPdfData(pixa, res, scalefactor, type, quality, |
775 | 0 | title, &data, &nbytes); |
776 | 0 | if (ret) { |
777 | 0 | LEPT_FREE(data); |
778 | 0 | return ERROR_INT("conversion to pdf failed", __func__, 1); |
779 | 0 | } |
780 | | |
781 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
782 | 0 | LEPT_FREE(data); |
783 | 0 | if (ret) |
784 | 0 | L_ERROR("pdf data not written to file\n", __func__); |
785 | 0 | return ret; |
786 | 0 | } |
787 | | |
788 | | |
789 | | /*! |
790 | | * \brief pixaConvertToPdfData() |
791 | | * |
792 | | * \param[in] pixa containing images all at the same resolution |
793 | | * \param[in] res input resolution of all images |
794 | | * \param[in] scalefactor scaling factor applied to each image; > 0.0; <50 |
795 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
796 | | * L_FLATE_ENCODE, L_JP2K_ENCODE, or |
797 | | * L_DEFAULT_ENCODE for default) |
798 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
799 | | * for jp2k: 27-45; 0 for default (34) |
800 | | * \param[in] title [optional] pdf title; can be null |
801 | | * \param[out] pdata output pdf data of all images |
802 | | * \param[out] pnbytes size of output pdf data |
803 | | * \return 0 if OK, 1 on error |
804 | | * |
805 | | * <pre> |
806 | | * Notes: |
807 | | * (1) See pixaConvertToPdf(). |
808 | | * </pre> |
809 | | */ |
810 | | l_ok |
811 | | pixaConvertToPdfData(PIXA *pixa, |
812 | | l_int32 res, |
813 | | l_float32 scalefactor, |
814 | | l_int32 type, |
815 | | l_int32 quality, |
816 | | const char *title, |
817 | | l_uint8 **pdata, |
818 | | size_t *pnbytes) |
819 | 0 | { |
820 | 0 | l_uint8 *imdata; |
821 | 0 | l_int32 i, n, ret, scaledres, pagetype; |
822 | 0 | size_t imbytes; |
823 | 0 | L_BYTEA *ba; |
824 | 0 | PIX *pixs, *pix; |
825 | 0 | L_PTRA *pa_data; |
826 | |
|
827 | 0 | if (!pdata) |
828 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
829 | 0 | *pdata = NULL; |
830 | 0 | if (!pnbytes) |
831 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
832 | 0 | *pnbytes = 0; |
833 | 0 | if (!pixa) |
834 | 0 | return ERROR_INT("pixa not defined", __func__, 1); |
835 | 0 | if (scalefactor <= 0.0) scalefactor = 1.0; |
836 | 0 | if (scalefactor >= 50.0) |
837 | 0 | return ERROR_INT("scalefactor too large", __func__, 1); |
838 | 0 | if (type != L_DEFAULT_ENCODE && type != L_JPEG_ENCODE && |
839 | 0 | type != L_G4_ENCODE && type != L_FLATE_ENCODE && |
840 | 0 | type != L_JP2K_ENCODE) { |
841 | 0 | L_WARNING("invalid compression type; using per-page default\n", |
842 | 0 | __func__); |
843 | 0 | type = L_DEFAULT_ENCODE; |
844 | 0 | } |
845 | 0 | if (quality < 0 || quality > 100) |
846 | 0 | return ERROR_INT("invalid quality", __func__, 1); |
847 | | |
848 | | /* Generate all the encoded pdf strings */ |
849 | 0 | n = pixaGetCount(pixa); |
850 | 0 | pa_data = ptraCreate(n); |
851 | 0 | for (i = 0; i < n; i++) { |
852 | 0 | if ((pixs = pixaGetPix(pixa, i, L_CLONE)) == NULL) { |
853 | 0 | L_ERROR("pixs[%d] not retrieved\n", __func__, i); |
854 | 0 | continue; |
855 | 0 | } |
856 | 0 | if (scalefactor != 1.0) |
857 | 0 | pix = pixScale(pixs, scalefactor, scalefactor); |
858 | 0 | else |
859 | 0 | pix = pixClone(pixs); |
860 | 0 | pixDestroy(&pixs); |
861 | 0 | if (!pix) { |
862 | 0 | L_ERROR("pix[%d] not made\n", __func__, i); |
863 | 0 | continue; |
864 | 0 | } |
865 | 0 | scaledres = (l_int32)(res * scalefactor); |
866 | | |
867 | | /* Select the encoding type */ |
868 | 0 | if (type != L_DEFAULT_ENCODE) { |
869 | 0 | pagetype = type; |
870 | 0 | } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) { |
871 | 0 | L_ERROR("encoding type selection failed for pix[%d]\n", |
872 | 0 | __func__, i); |
873 | 0 | pixDestroy(&pix); |
874 | 0 | continue; |
875 | 0 | } |
876 | | |
877 | 0 | ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes, |
878 | 0 | 0, 0, scaledres, title, NULL, 0); |
879 | 0 | pixDestroy(&pix); |
880 | 0 | if (ret) { |
881 | 0 | LEPT_FREE(imdata); |
882 | 0 | L_ERROR("pdf encoding failed for pix[%d]\n", __func__, i); |
883 | 0 | continue; |
884 | 0 | } |
885 | 0 | ba = l_byteaInitFromMem(imdata, imbytes); |
886 | 0 | LEPT_FREE(imdata); |
887 | 0 | ptraAdd(pa_data, ba); |
888 | 0 | } |
889 | 0 | ptraGetActualCount(pa_data, &n); |
890 | 0 | if (n == 0) { |
891 | 0 | L_ERROR("no pdf files made\n", __func__); |
892 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
893 | 0 | return 1; |
894 | 0 | } |
895 | | |
896 | | /* Concatenate them */ |
897 | 0 | ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); |
898 | |
|
899 | 0 | ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */ |
900 | 0 | for (i = 0; i < n; i++) { |
901 | 0 | ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); |
902 | 0 | l_byteaDestroy(&ba); |
903 | 0 | } |
904 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
905 | 0 | return ret; |
906 | 0 | } |
907 | | |
908 | | |
909 | | /*---------------------------------------------------------------------* |
910 | | * Single page, multi-image converters * |
911 | | *---------------------------------------------------------------------*/ |
912 | | /*! |
913 | | * \brief convertToPdf() |
914 | | * |
915 | | * \param[in] filein input image file -- any format |
916 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
917 | | * L_FLATE_ENCODE, or L_JP2K_ENCODE) |
918 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
919 | | * for jp2k: 27-45; 0 for default (34) |
920 | | * \param[in] fileout output pdf file; only required on last |
921 | | * image on page |
922 | | * \param[in] x, y location of lower-left corner of image, |
923 | | * in pixels, relative to the PostScript origin |
924 | | * (0,0) at the lower-left corner of the page |
925 | | * \param[in] res override the resolution of the input image, |
926 | | * in ppi; use 0 to respect the resolution |
927 | | * embedded in the input images |
928 | | * \param[in] title [optional] pdf title; can be null |
929 | | * \param[in,out] plpd ptr to lpd, which is created on the first |
930 | | * invocation and returned until last image is |
931 | | * processed, at which time it is destroyed |
932 | | * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, |
933 | | * L_LAST_IMAGE |
934 | | * \return 0 if OK, 1 on error |
935 | | * |
936 | | * <pre> |
937 | | * Notes: |
938 | | * (1) To wrap only one image in pdf, input %plpd = NULL, and |
939 | | * the value of %position will be ignored: |
940 | | * convertToPdf(... type, quality, x, y, res, NULL, 0); |
941 | | * (2) To wrap multiple images on a single pdf page, this is called |
942 | | * once for each successive image. Do it this way: |
943 | | * L_PDF_DATA *lpd; |
944 | | * convertToPdf(... type, quality, x, y, res, &lpd, L_FIRST_IMAGE); |
945 | | * convertToPdf(... type, quality, x, y, res, &lpd, L_NEXT_IMAGE); |
946 | | * ... |
947 | | * convertToPdf(... type, quality, x, y, res, &lpd, L_LAST_IMAGE); |
948 | | * This will write the result to the value of %fileout specified |
949 | | * in the first call; succeeding values of %fileout are ignored. |
950 | | * On the last call: the pdf data bytes are computed and written |
951 | | * to %fileout, lpd is destroyed internally, and the returned |
952 | | * value of lpd is null. So the client has nothing to clean up. |
953 | | * (3) (a) Set %res == 0 to respect the resolution embedded in the |
954 | | * image file. If no resolution is embedded, it will be set |
955 | | * to the default value. |
956 | | * (b) Set %res to some other value to override the file resolution. |
957 | | * (4) (a) If the input %res and the resolution of the output device |
958 | | * are equal, the image will be "displayed" at the same size |
959 | | * as the original. |
960 | | * (b) If the input %res is 72, the output device will render |
961 | | * the image at 1 pt/pixel. |
962 | | * (c) Some possible choices for the default input pix resolution are: |
963 | | * 72 ppi Render pix on any output device at one pt/pixel |
964 | | * 96 ppi Windows default for generated display images |
965 | | * 300 ppi Typical default for scanned images. |
966 | | * We choose 300, which is sensible for rendering page images. |
967 | | * However, images come from a variety of sources, and |
968 | | * some are explicitly created for viewing on a display. |
969 | | * </pre> |
970 | | */ |
971 | | l_ok |
972 | | convertToPdf(const char *filein, |
973 | | l_int32 type, |
974 | | l_int32 quality, |
975 | | const char *fileout, |
976 | | l_int32 x, |
977 | | l_int32 y, |
978 | | l_int32 res, |
979 | | const char *title, |
980 | | L_PDF_DATA **plpd, |
981 | | l_int32 position) |
982 | 0 | { |
983 | 0 | l_uint8 *data; |
984 | 0 | l_int32 ret; |
985 | 0 | size_t nbytes; |
986 | |
|
987 | 0 | if (!filein) |
988 | 0 | return ERROR_INT("filein not defined", __func__, 1); |
989 | 0 | if (!plpd || (position == L_LAST_IMAGE)) { |
990 | 0 | if (!fileout) |
991 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
992 | 0 | } |
993 | | |
994 | 0 | if (convertToPdfData(filein, type, quality, &data, &nbytes, x, y, |
995 | 0 | res, title, plpd, position)) |
996 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
997 | | |
998 | 0 | if (!plpd || (position == L_LAST_IMAGE)) { |
999 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
1000 | 0 | LEPT_FREE(data); |
1001 | 0 | if (ret) |
1002 | 0 | return ERROR_INT("pdf data not written to file", __func__, 1); |
1003 | 0 | } |
1004 | | |
1005 | 0 | return 0; |
1006 | 0 | } |
1007 | | |
1008 | | |
1009 | | /*! |
1010 | | * \brief convertImageDataToPdf() |
1011 | | * |
1012 | | * \param[in] imdata array of formatted image data; e.g., png, jpeg |
1013 | | * \param[in] size size of image data |
1014 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
1015 | | * L_FLATE_ENCODE, or L_JP2K_ENCODE) |
1016 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
1017 | | * for jp2k: 27-45; 0 for default (34) |
1018 | | * \param[in] fileout output pdf file; only required on last |
1019 | | * image on page |
1020 | | * \param[in] x, y location of lower-left corner of image, |
1021 | | * in pixels, relative to the PostScript origin |
1022 | | * (0,0) at the lower-left corner of the page |
1023 | | * \param[in] res override the resolution of the input image, |
1024 | | * in ppi; use 0 to respect the resolution |
1025 | | * embedded in the input images |
1026 | | * \param[in] title [optional] pdf title; can be null |
1027 | | * \param[in,out] plpd ptr to lpd, which is created on the first |
1028 | | * invocation and returned until last image is |
1029 | | * processed, at which time it is destroyed |
1030 | | * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, |
1031 | | * L_LAST_IMAGE |
1032 | | * \return 0 if OK, 1 on error |
1033 | | * |
1034 | | * <pre> |
1035 | | * Notes: |
1036 | | * (1) If %res == 0 and the input resolution field is 0, |
1037 | | * this will use DefaultInputRes. |
1038 | | * (2) See comments in convertToPdf(). |
1039 | | * </pre> |
1040 | | */ |
1041 | | l_ok |
1042 | | convertImageDataToPdf(l_uint8 *imdata, |
1043 | | size_t size, |
1044 | | l_int32 type, |
1045 | | l_int32 quality, |
1046 | | const char *fileout, |
1047 | | l_int32 x, |
1048 | | l_int32 y, |
1049 | | l_int32 res, |
1050 | | const char *title, |
1051 | | L_PDF_DATA **plpd, |
1052 | | l_int32 position) |
1053 | 0 | { |
1054 | 0 | l_int32 ret; |
1055 | 0 | PIX *pix; |
1056 | |
|
1057 | 0 | if (!imdata) |
1058 | 0 | return ERROR_INT("image data not defined", __func__, 1); |
1059 | 0 | if (!plpd || (position == L_LAST_IMAGE)) { |
1060 | 0 | if (!fileout) |
1061 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
1062 | 0 | } |
1063 | | |
1064 | 0 | if ((pix = pixReadMem(imdata, size)) == NULL) |
1065 | 0 | return ERROR_INT("pix not read", __func__, 1); |
1066 | 0 | if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && |
1067 | 0 | type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { |
1068 | 0 | selectDefaultPdfEncoding(pix, &type); |
1069 | 0 | } |
1070 | 0 | ret = pixConvertToPdf(pix, type, quality, fileout, x, y, res, |
1071 | 0 | title, plpd, position); |
1072 | 0 | pixDestroy(&pix); |
1073 | 0 | return ret; |
1074 | 0 | } |
1075 | | |
1076 | | |
1077 | | /*! |
1078 | | * \brief convertToPdfData() |
1079 | | * |
1080 | | * \param[in] filein input image file -- any format |
1081 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
1082 | | * L_FLATE_ENCODE, or L_JP2K_ENCODE) |
1083 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
1084 | | * for jp2k: 27-45; 0 for default (34) |
1085 | | * \param[out] pdata pdf data in memory |
1086 | | * \param[out] pnbytes number of bytes in pdf data |
1087 | | * \param[in] x, y location of lower-left corner of image, |
1088 | | * in pixels, relative to the PostScript origin |
1089 | | * (0,0) at the lower-left corner of the page |
1090 | | * \param[in] res override the resolution of the input image, |
1091 | | * in ppi; use 0 to respect the resolution |
1092 | | * embedded in the input images |
1093 | | * \param[in] title [optional] pdf title; can be null |
1094 | | * \param[in,out] plpd ptr to lpd, which is created on the first |
1095 | | * invocation and returned until last image is |
1096 | | * processed, at which time it is destroyed |
1097 | | * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, |
1098 | | * L_LAST_IMAGE |
1099 | | * \return 0 if OK, 1 on error |
1100 | | * |
1101 | | * <pre> |
1102 | | * Notes: |
1103 | | * (1) If %res == 0 and the input resolution field is 0, |
1104 | | * this will use DefaultInputRes. |
1105 | | * (2) See comments in convertToPdf(). |
1106 | | * </pre> |
1107 | | */ |
1108 | | l_ok |
1109 | | convertToPdfData(const char *filein, |
1110 | | l_int32 type, |
1111 | | l_int32 quality, |
1112 | | l_uint8 **pdata, |
1113 | | size_t *pnbytes, |
1114 | | l_int32 x, |
1115 | | l_int32 y, |
1116 | | l_int32 res, |
1117 | | const char *title, |
1118 | | L_PDF_DATA **plpd, |
1119 | | l_int32 position) |
1120 | 0 | { |
1121 | 0 | PIX *pix; |
1122 | |
|
1123 | 0 | if (!pdata) |
1124 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
1125 | 0 | *pdata = NULL; |
1126 | 0 | if (!pnbytes) |
1127 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
1128 | 0 | *pnbytes = 0; |
1129 | 0 | if (!filein) |
1130 | 0 | return ERROR_INT("filein not defined", __func__, 1); |
1131 | | |
1132 | 0 | if ((pix = pixRead(filein)) == NULL) |
1133 | 0 | return ERROR_INT("pix not made", __func__, 1); |
1134 | | |
1135 | 0 | pixConvertToPdfData(pix, type, quality, pdata, pnbytes, |
1136 | 0 | x, y, res, title, plpd, position); |
1137 | 0 | pixDestroy(&pix); |
1138 | 0 | return 0; |
1139 | 0 | } |
1140 | | |
1141 | | |
1142 | | /*! |
1143 | | * \brief convertImageDataToPdfData() |
1144 | | * |
1145 | | * \param[in] imdata array of formatted image data; e.g., png, jpeg |
1146 | | * \param[in] size size of image data |
1147 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
1148 | | * L_FLATE_ENCODE, or L_JP2K_ENCODE) |
1149 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
1150 | | * for jp2k: 27-45; 0 for default (34) |
1151 | | * \param[out] pdata pdf data in memory |
1152 | | * \param[out] pnbytes number of bytes in pdf data |
1153 | | * \param[in] x, y location of lower-left corner of image, |
1154 | | * in pixels, relative to the PostScript origin |
1155 | | * (0,0) at the lower-left corner of the page |
1156 | | * \param[in] res override the resolution of the input image, |
1157 | | * in ppi; use 0 to respect the resolution |
1158 | | * embedded in the input images |
1159 | | * \param[in] title [optional] pdf title; can be null |
1160 | | * \param[out] plpd ptr to lpd, which is created on the first |
1161 | | * invocation and returned until last image is |
1162 | | * processed, at which time it is destroyed |
1163 | | * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, |
1164 | | * L_LAST_IMAGE |
1165 | | * \return 0 if OK, 1 on error |
1166 | | * |
1167 | | * <pre> |
1168 | | * Notes: |
1169 | | * (1) If %res == 0 and the input resolution field is 0, |
1170 | | * this will use DefaultInputRes. |
1171 | | * (2) See comments in convertToPdf(). |
1172 | | * </pre> |
1173 | | */ |
1174 | | l_ok |
1175 | | convertImageDataToPdfData(l_uint8 *imdata, |
1176 | | size_t size, |
1177 | | l_int32 type, |
1178 | | l_int32 quality, |
1179 | | l_uint8 **pdata, |
1180 | | size_t *pnbytes, |
1181 | | l_int32 x, |
1182 | | l_int32 y, |
1183 | | l_int32 res, |
1184 | | const char *title, |
1185 | | L_PDF_DATA **plpd, |
1186 | | l_int32 position) |
1187 | 0 | { |
1188 | 0 | l_int32 ret; |
1189 | 0 | PIX *pix; |
1190 | |
|
1191 | 0 | if (!pdata) |
1192 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
1193 | 0 | *pdata = NULL; |
1194 | 0 | if (!pnbytes) |
1195 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
1196 | 0 | *pnbytes = 0; |
1197 | 0 | if (!imdata) |
1198 | 0 | return ERROR_INT("image data not defined", __func__, 1); |
1199 | 0 | if (plpd) { /* part of multi-page invocation */ |
1200 | 0 | if (position == L_FIRST_IMAGE) |
1201 | 0 | *plpd = NULL; |
1202 | 0 | } |
1203 | |
|
1204 | 0 | if ((pix = pixReadMem(imdata, size)) == NULL) |
1205 | 0 | return ERROR_INT("pix not read", __func__, 1); |
1206 | 0 | if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && |
1207 | 0 | type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { |
1208 | 0 | selectDefaultPdfEncoding(pix, &type); |
1209 | 0 | } |
1210 | 0 | ret = pixConvertToPdfData(pix, type, quality, pdata, pnbytes, |
1211 | 0 | x, y, res, title, plpd, position); |
1212 | 0 | pixDestroy(&pix); |
1213 | 0 | return ret; |
1214 | 0 | } |
1215 | | |
1216 | | |
1217 | | /*! |
1218 | | * \brief pixConvertToPdf() |
1219 | | * |
1220 | | * \param[in] pix |
1221 | | * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, |
1222 | | * L_FLATE_ENCODE, L_JP2K_ENCODE) |
1223 | | * \param[in] quality for jpeg: 1-100; 0 for default (75) |
1224 | | * for jp2k: 27-45; 0 for default (34) |
1225 | | * \param[in] fileout output pdf file; only required on last |
1226 | | * image on page |
1227 | | * \param[in] x, y location of lower-left corner of image, |
1228 | | * in pixels, relative to the PostScript origin |
1229 | | * (0,0) at the lower-left corner of the page |
1230 | | * \param[in] res override the resolution of the input image, |
1231 | | * in ppi; use 0 to respect the resolution |
1232 | | * embedded in the input images |
1233 | | * \param[in] title [optional] pdf title; can be null |
1234 | | * \param[in,out] plpd ptr to lpd, which is created on the first |
1235 | | * invocation and returned until last image is |
1236 | | * processed, at which time it is destroyed |
1237 | | * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, |
1238 | | * L_LAST_IMAGE |
1239 | | * \return 0 if OK, 1 on error |
1240 | | * |
1241 | | * <pre> |
1242 | | * Notes: |
1243 | | * (1) If %res == 0 and the input resolution field is 0, |
1244 | | * this will use DefaultInputRes. |
1245 | | * (2) This only writes data to fileout if it is the last |
1246 | | * image to be written on the page. |
1247 | | * (3) See comments in convertToPdf(). |
1248 | | * </pre> |
1249 | | */ |
1250 | | l_ok |
1251 | | pixConvertToPdf(PIX *pix, |
1252 | | l_int32 type, |
1253 | | l_int32 quality, |
1254 | | const char *fileout, |
1255 | | l_int32 x, |
1256 | | l_int32 y, |
1257 | | l_int32 res, |
1258 | | const char *title, |
1259 | | L_PDF_DATA **plpd, |
1260 | | l_int32 position) |
1261 | 0 | { |
1262 | 0 | l_uint8 *data; |
1263 | 0 | l_int32 ret; |
1264 | 0 | size_t nbytes; |
1265 | |
|
1266 | 0 | if (!pix) |
1267 | 0 | return ERROR_INT("pix not defined", __func__, 1); |
1268 | 0 | if (!plpd || (position == L_LAST_IMAGE)) { |
1269 | 0 | if (!fileout) |
1270 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
1271 | 0 | } |
1272 | | |
1273 | 0 | if (pixConvertToPdfData(pix, type, quality, &data, &nbytes, |
1274 | 0 | x, y, res, title, plpd, position)) { |
1275 | 0 | LEPT_FREE(data); |
1276 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
1277 | 0 | } |
1278 | | |
1279 | 0 | if (!plpd || (position == L_LAST_IMAGE)) { |
1280 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
1281 | 0 | LEPT_FREE(data); |
1282 | 0 | if (ret) |
1283 | 0 | return ERROR_INT("pdf data not written to file", __func__, 1); |
1284 | 0 | } |
1285 | 0 | return 0; |
1286 | 0 | } |
1287 | | |
1288 | | |
1289 | | /*! |
1290 | | * \brief pixWriteStreamPdf() |
1291 | | * |
1292 | | * \param[in] fp file stream opened for writing |
1293 | | * \param[in] pix all depths, cmap OK |
1294 | | * \param[in] res override the resolution of the input image, in ppi; |
1295 | | * use 0 to respect the resolution embedded in the input |
1296 | | * \param[in] title [optional] pdf title; can be null |
1297 | | * \return 0 if OK, 1 on error |
1298 | | * |
1299 | | * <pre> |
1300 | | * Notes: |
1301 | | * (1) This is the simplest interface for writing a single image |
1302 | | * with pdf encoding to a stream. It uses G4 encoding for 1 bpp, |
1303 | | * JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE |
1304 | | * encoding for everything else. |
1305 | | * </pre> |
1306 | | */ |
1307 | | l_ok |
1308 | | pixWriteStreamPdf(FILE *fp, |
1309 | | PIX *pix, |
1310 | | l_int32 res, |
1311 | | const char *title) |
1312 | 0 | { |
1313 | 0 | l_uint8 *data; |
1314 | 0 | size_t nbytes, nbytes_written; |
1315 | |
|
1316 | 0 | if (!fp) |
1317 | 0 | return ERROR_INT("stream not opened", __func__, 1); |
1318 | 0 | if (!pix) |
1319 | 0 | return ERROR_INT("pix not defined", __func__, 1); |
1320 | | |
1321 | 0 | if (pixWriteMemPdf(&data, &nbytes, pix, res, title) != 0) { |
1322 | 0 | LEPT_FREE(data); |
1323 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
1324 | 0 | } |
1325 | | |
1326 | 0 | nbytes_written = fwrite(data, 1, nbytes, fp); |
1327 | 0 | LEPT_FREE(data); |
1328 | 0 | if (nbytes != nbytes_written) |
1329 | 0 | return ERROR_INT("failure writing pdf data to stream", __func__, 1); |
1330 | 0 | return 0; |
1331 | 0 | } |
1332 | | |
1333 | | |
1334 | | /*! |
1335 | | * \brief pixWriteMemPdf() |
1336 | | * |
1337 | | * \param[out] pdata pdf as byte array |
1338 | | * \param[out] pnbytes number of bytes in pdf array |
1339 | | * \param[in] pix all depths, cmap OK |
1340 | | * \param[in] res override the resolution of the input image, in ppi; |
1341 | | * use 0 to respect the res embedded in the input |
1342 | | * \param[in] title [optional] pdf title; can be null |
1343 | | * \return 0 if OK, 1 on error |
1344 | | * |
1345 | | * <pre> |
1346 | | * Notes: |
1347 | | * (1) This is the simplest interface for writing a single image |
1348 | | * with pdf encoding to memory. It uses G4 encoding for 1 bpp, |
1349 | | * and makes a guess whether to use JPEG or FLATE encoding for |
1350 | | * everything else. |
1351 | | * </pre> |
1352 | | */ |
1353 | | l_ok |
1354 | | pixWriteMemPdf(l_uint8 **pdata, |
1355 | | size_t *pnbytes, |
1356 | | PIX *pix, |
1357 | | l_int32 res, |
1358 | | const char *title) |
1359 | 0 | { |
1360 | 0 | l_int32 ret, type; |
1361 | |
|
1362 | 0 | if (pdata) *pdata = NULL; |
1363 | 0 | if (pnbytes) *pnbytes = 0; |
1364 | 0 | if (!pdata || !pnbytes) |
1365 | 0 | return ERROR_INT("&data or &nbytes not defined", __func__, 1); |
1366 | 0 | if (!pix) |
1367 | 0 | return ERROR_INT("pix not defined", __func__, 1); |
1368 | | |
1369 | 0 | selectDefaultPdfEncoding(pix, &type); |
1370 | 0 | ret = pixConvertToPdfData(pix, type, 75, pdata, pnbytes, |
1371 | 0 | 0, 0, res, title, NULL, 0); |
1372 | 0 | if (ret) |
1373 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
1374 | 0 | return 0; |
1375 | 0 | } |
1376 | | |
1377 | | |
1378 | | /*---------------------------------------------------------------------* |
1379 | | * Segmented multi-page, multi-image converter * |
1380 | | *---------------------------------------------------------------------*/ |
1381 | | /*! |
1382 | | * \brief convertSegmentedFilesToPdf() |
1383 | | * |
1384 | | * \param[in] dirname directory name containing images |
1385 | | * \param[in] substr [optional] substring filter on filenames; |
1386 | | * can be null |
1387 | | * \param[in] res input resolution of all images |
1388 | | * \param[in] type compression type for non-image regions; the |
1389 | | * image regions are always compressed with |
1390 | | * L_JPEG_ENCODE |
1391 | | * \param[in] thresh used for converting gray --> 1 bpp with |
1392 | | * L_G4_ENCODE |
1393 | | * \param[in] baa [optional] boxaa of image regions |
1394 | | * \param[in] quality used for JPEG only; 0 for default (75) |
1395 | | * \param[in] scalefactor scaling factor applied to each image region |
1396 | | * \param[in] title [optional] pdf title; can be null |
1397 | | * \param[in] fileout pdf file of all images |
1398 | | * \return 0 if OK, 1 on error |
1399 | | * |
1400 | | * <pre> |
1401 | | * Notes: |
1402 | | * (1) If %substr is not NULL, only image filenames that contain |
1403 | | * the substring can be used. If %substr == NULL, all files |
1404 | | * in the directory are used. |
1405 | | * (2) The files in the directory, after optional filtering by |
1406 | | * the substring, are lexically sorted in increasing order |
1407 | | * before concatenation. |
1408 | | * (3) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without |
1409 | | * colormap and many colors, or 32 bpp; FLATE for anything else. |
1410 | | * (4) The boxaa, if it exists, contains one boxa of "image regions" |
1411 | | * for each image file. The boxa must be aligned with the |
1412 | | * sorted set of images. |
1413 | | * (5) The scalefactor is applied to each image region. It is |
1414 | | * typically < 1.0, to save bytes in the final pdf, because |
1415 | | * the resolution is often not critical in non-text regions. |
1416 | | * (6) If the non-image regions have pixel depth > 1 and the encoding |
1417 | | * type is G4, they are automatically scaled up by 2x and |
1418 | | * thresholded. Otherwise, no scaling is performed on them. |
1419 | | * (7) Note that this function can be used to generate multipage |
1420 | | * G4 compressed pdf from any input, by using %boxaa == NULL |
1421 | | * and %type == L_G4_ENCODE. |
1422 | | * </pre> |
1423 | | */ |
1424 | | l_ok |
1425 | | convertSegmentedFilesToPdf(const char *dirname, |
1426 | | const char *substr, |
1427 | | l_int32 res, |
1428 | | l_int32 type, |
1429 | | l_int32 thresh, |
1430 | | BOXAA *baa, |
1431 | | l_int32 quality, |
1432 | | l_float32 scalefactor, |
1433 | | const char *title, |
1434 | | const char *fileout) |
1435 | 0 | { |
1436 | 0 | char *fname; |
1437 | 0 | l_uint8 *imdata, *data; |
1438 | 0 | l_int32 i, npages, nboxa, nboxes, ret; |
1439 | 0 | size_t imbytes, databytes; |
1440 | 0 | BOXA *boxa; |
1441 | 0 | L_BYTEA *ba; |
1442 | 0 | L_PTRA *pa_data; |
1443 | 0 | SARRAY *sa; |
1444 | |
|
1445 | 0 | if (!dirname) |
1446 | 0 | return ERROR_INT("dirname not defined", __func__, 1); |
1447 | 0 | if (!fileout) |
1448 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
1449 | | |
1450 | 0 | if ((sa = getNumberedPathnamesInDirectory(dirname, substr, 0, 0, 10000)) |
1451 | 0 | == NULL) |
1452 | 0 | return ERROR_INT("sa not made", __func__, 1); |
1453 | | |
1454 | 0 | npages = sarrayGetCount(sa); |
1455 | | /* If necessary, extend the boxaa, which is page-aligned with |
1456 | | * the image files, to be as large as the set of images. */ |
1457 | 0 | if (baa) { |
1458 | 0 | nboxa = boxaaGetCount(baa); |
1459 | 0 | if (nboxa < npages) { |
1460 | 0 | boxa = boxaCreate(1); |
1461 | 0 | boxaaExtendWithInit(baa, npages, boxa); |
1462 | 0 | boxaDestroy(&boxa); |
1463 | 0 | } |
1464 | 0 | } |
1465 | | |
1466 | | /* Generate and save all the encoded pdf strings */ |
1467 | 0 | pa_data = ptraCreate(npages); |
1468 | 0 | for (i = 0; i < npages; i++) { |
1469 | 0 | fname = sarrayGetString(sa, i, L_NOCOPY); |
1470 | 0 | if (!strcmp(fname, "")) continue; |
1471 | 0 | boxa = NULL; |
1472 | 0 | if (baa) { |
1473 | 0 | boxa = boxaaGetBoxa(baa, i, L_CLONE); |
1474 | 0 | nboxes = boxaGetCount(boxa); |
1475 | 0 | if (nboxes == 0) |
1476 | 0 | boxaDestroy(&boxa); |
1477 | 0 | } |
1478 | 0 | ret = convertToPdfDataSegmented(fname, res, type, thresh, boxa, |
1479 | 0 | quality, scalefactor, title, |
1480 | 0 | &imdata, &imbytes); |
1481 | 0 | boxaDestroy(&boxa); /* safe; in case nboxes > 0 */ |
1482 | 0 | if (ret) { |
1483 | 0 | L_ERROR("pdf encoding failed for %s\n", __func__, fname); |
1484 | 0 | continue; |
1485 | 0 | } |
1486 | 0 | ba = l_byteaInitFromMem(imdata, imbytes); |
1487 | 0 | if (imdata) LEPT_FREE(imdata); |
1488 | 0 | ptraAdd(pa_data, ba); |
1489 | 0 | } |
1490 | 0 | sarrayDestroy(&sa); |
1491 | |
|
1492 | 0 | ptraGetActualCount(pa_data, &npages); |
1493 | 0 | if (npages == 0) { |
1494 | 0 | L_ERROR("no pdf files made\n", __func__); |
1495 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
1496 | 0 | return 1; |
1497 | 0 | } |
1498 | | |
1499 | | /* Concatenate */ |
1500 | 0 | ret = ptraConcatenatePdfToData(pa_data, NULL, &data, &databytes); |
1501 | | |
1502 | | /* Clean up */ |
1503 | 0 | ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */ |
1504 | 0 | for (i = 0; i < npages; i++) { |
1505 | 0 | ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); |
1506 | 0 | l_byteaDestroy(&ba); |
1507 | 0 | } |
1508 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
1509 | |
|
1510 | 0 | if (ret) { |
1511 | 0 | if (data) LEPT_FREE(data); |
1512 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
1513 | 0 | } |
1514 | | |
1515 | 0 | ret = l_binaryWrite(fileout, "w", data, databytes); |
1516 | 0 | LEPT_FREE(data); |
1517 | 0 | if (ret) |
1518 | 0 | L_ERROR("pdf data not written to file\n", __func__); |
1519 | 0 | return ret; |
1520 | 0 | } |
1521 | | |
1522 | | |
1523 | | /*! |
1524 | | * \brief convertNumberedMasksToBoxaa() |
1525 | | * |
1526 | | * \param[in] dirname directory name containing mask images |
1527 | | * \param[in] substr [optional] substring filter on filenames; |
1528 | | * can be null |
1529 | | * \param[in] numpre number of characters in name before number |
1530 | | * \param[in] numpost number of characters in name after number, |
1531 | | * up to a dot before an extension |
1532 | | * \return boxaa of mask regions, or NULL on error |
1533 | | * |
1534 | | * <pre> |
1535 | | * Notes: |
1536 | | * (1) This is conveniently used to generate the input boxaa |
1537 | | * for convertSegmentedFilesToPdf(). It guarantees that the |
1538 | | * boxa will be aligned with the page images, even if some |
1539 | | * of the boxa are empty. |
1540 | | * </pre> |
1541 | | */ |
1542 | | BOXAA * |
1543 | | convertNumberedMasksToBoxaa(const char *dirname, |
1544 | | const char *substr, |
1545 | | l_int32 numpre, |
1546 | | l_int32 numpost) |
1547 | 0 | { |
1548 | 0 | char *fname; |
1549 | 0 | l_int32 i, n; |
1550 | 0 | BOXA *boxa; |
1551 | 0 | BOXAA *baa; |
1552 | 0 | PIX *pix; |
1553 | 0 | SARRAY *sa; |
1554 | |
|
1555 | 0 | if (!dirname) |
1556 | 0 | return (BOXAA *)ERROR_PTR("dirname not defined", __func__, NULL); |
1557 | | |
1558 | 0 | if ((sa = getNumberedPathnamesInDirectory(dirname, substr, numpre, |
1559 | 0 | numpost, 10000)) == NULL) |
1560 | 0 | return (BOXAA *)ERROR_PTR("sa not made", __func__, NULL); |
1561 | | |
1562 | | /* Generate and save all the encoded pdf strings */ |
1563 | 0 | n = sarrayGetCount(sa); |
1564 | 0 | baa = boxaaCreate(n); |
1565 | 0 | boxa = boxaCreate(1); |
1566 | 0 | boxaaInitFull(baa, boxa); |
1567 | 0 | boxaDestroy(&boxa); |
1568 | 0 | for (i = 0; i < n; i++) { |
1569 | 0 | fname = sarrayGetString(sa, i, L_NOCOPY); |
1570 | 0 | if (!strcmp(fname, "")) continue; |
1571 | 0 | if ((pix = pixRead(fname)) == NULL) { |
1572 | 0 | L_WARNING("invalid image on page %d\n", __func__, i); |
1573 | 0 | continue; |
1574 | 0 | } |
1575 | 0 | boxa = pixConnComp(pix, NULL, 8); |
1576 | 0 | boxaaReplaceBoxa(baa, i, boxa); |
1577 | 0 | pixDestroy(&pix); |
1578 | 0 | } |
1579 | |
|
1580 | 0 | sarrayDestroy(&sa); |
1581 | 0 | return baa; |
1582 | 0 | } |
1583 | | |
1584 | | |
1585 | | /*---------------------------------------------------------------------* |
1586 | | * Segmented single page, multi-image converters * |
1587 | | *---------------------------------------------------------------------*/ |
1588 | | /*! |
1589 | | * \brief convertToPdfSegmented() |
1590 | | * |
1591 | | * \param[in] filein input image file -- any format |
1592 | | * \param[in] res input image resolution; typ. 300 ppi; |
1593 | | * use 0 for default |
1594 | | * \param[in] type compression type for non-image regions; image |
1595 | | * regions are always compressed with L_JPEG_ENCODE |
1596 | | * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE |
1597 | | * \param[in] boxa [optional] of image regions; can be null |
1598 | | * \param[in] quality used for jpeg image regions; 0 for default |
1599 | | * \param[in] scalefactor used for jpeg regions; must be <= 1.0 |
1600 | | * \param[in] title [optional] pdf title; can be null |
1601 | | * \param[in] fileout output pdf file |
1602 | | * \return 0 if OK, 1 on error |
1603 | | * |
1604 | | * <pre> |
1605 | | * Notes: |
1606 | | * (1) If there are no image regions, set %boxa == NULL; |
1607 | | * %quality and %scalefactor are ignored. |
1608 | | * (2) Typically, %scalefactor is < 1.0, because the image regions |
1609 | | * can be rendered at a lower resolution (for better compression) |
1610 | | * than the text regions. If %scalefactor == 0, we use 1.0. |
1611 | | * If the input image is 1 bpp and scalefactor < 1.0, we |
1612 | | * use scaleToGray() to downsample the image regions to gray |
1613 | | * before compressing them. |
1614 | | * (3) If the compression type for non-image regions is L_G4_ENCODE |
1615 | | * and bpp > 1, the image is upscaled 2x and thresholded |
1616 | | * to 1 bpp. That is the only situation where %thresh is used. |
1617 | | * (4) The parameter %quality is only used for image regions. |
1618 | | * If %type == L_JPEG_ENCODE, default jpeg quality (75) is |
1619 | | * used for the non-image regions. |
1620 | | * (5) Processing matrix for non-image regions. |
1621 | | * |
1622 | | * Input G4 JPEG FLATE |
1623 | | * ----------|--------------------------------------------------- |
1624 | | * 1 bpp | 1x, 1 bpp 1x flate, 1 bpp 1x, 1 bpp |
1625 | | * | |
1626 | | * cmap | 2x, 1 bpp 1x flate, cmap 1x, cmap |
1627 | | * | |
1628 | | * 2,4 bpp | 2x, 1 bpp 1x flate 1x, 2,4 bpp |
1629 | | * no cmap | 2,4 bpp |
1630 | | * | |
1631 | | * 8,32 bpp | 2x, 1 bpp 1x (jpeg) 1x, 8,32 bpp |
1632 | | * no cmap | 8,32 bpp |
1633 | | * |
1634 | | * Summary: |
1635 | | * (a) if G4 is requested, G4 is used, with 2x upscaling |
1636 | | * for all cases except 1 bpp. |
1637 | | * (b) if JPEG is requested, use flate encoding for all cases |
1638 | | * except 8 bpp without cmap and 32 bpp (rgb). |
1639 | | * (c) if FLATE is requested, use flate with no transformation |
1640 | | * of the raster data. |
1641 | | * (6) Calling options/sequence for these functions: |
1642 | | * file --> file (convertToPdfSegmented) |
1643 | | * pix --> file (pixConvertToPdfSegmented) |
1644 | | * pix --> data (pixConvertToPdfDataSegmented) |
1645 | | * file --> data (convertToPdfDataSegmented) |
1646 | | * pix --> data (pixConvertToPdfDataSegmented) |
1647 | | * </pre> |
1648 | | */ |
1649 | | l_ok |
1650 | | convertToPdfSegmented(const char *filein, |
1651 | | l_int32 res, |
1652 | | l_int32 type, |
1653 | | l_int32 thresh, |
1654 | | BOXA *boxa, |
1655 | | l_int32 quality, |
1656 | | l_float32 scalefactor, |
1657 | | const char *title, |
1658 | | const char *fileout) |
1659 | 0 | { |
1660 | 0 | l_int32 ret; |
1661 | 0 | PIX *pixs; |
1662 | |
|
1663 | 0 | if (!filein) |
1664 | 0 | return ERROR_INT("filein not defined", __func__, 1); |
1665 | 0 | if (!fileout) |
1666 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
1667 | 0 | if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && |
1668 | 0 | type != L_FLATE_ENCODE) |
1669 | 0 | return ERROR_INT("invalid conversion type", __func__, 1); |
1670 | 0 | if (boxa && scalefactor > 1.0) { |
1671 | 0 | L_WARNING("setting scalefactor to 1.0\n", __func__); |
1672 | 0 | scalefactor = 1.0; |
1673 | 0 | } |
1674 | |
|
1675 | 0 | if ((pixs = pixRead(filein)) == NULL) |
1676 | 0 | return ERROR_INT("pixs not made", __func__, 1); |
1677 | | |
1678 | 0 | ret = pixConvertToPdfSegmented(pixs, res, type, thresh, boxa, quality, |
1679 | 0 | scalefactor, title, fileout); |
1680 | 0 | pixDestroy(&pixs); |
1681 | 0 | return ret; |
1682 | 0 | } |
1683 | | |
1684 | | |
1685 | | /*! |
1686 | | * \brief pixConvertToPdfSegmented() |
1687 | | * |
1688 | | * \param[in] pixs any depth, cmap OK |
1689 | | * \param[in] res input image resolution; typ. 300 ppi; |
1690 | | * use 0 for default |
1691 | | * \param[in] type compression type for non-image regions; image |
1692 | | * regions are always compressed with L_JPEG_ENCODE |
1693 | | * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE |
1694 | | * \param[in] boxa [optional] of image regions; can be null |
1695 | | * \param[in] quality used for jpeg image regions; 0 for default |
1696 | | * \param[in] scalefactor used for jpeg regions; must be <= 1.0 |
1697 | | * \param[in] title [optional] pdf title; can be null |
1698 | | * \param[in] fileout output pdf file |
1699 | | * \return 0 if OK, 1 on error |
1700 | | * |
1701 | | * <pre> |
1702 | | * Notes: |
1703 | | * (1) See convertToPdfSegmented() for details. |
1704 | | * </pre> |
1705 | | */ |
1706 | | l_ok |
1707 | | pixConvertToPdfSegmented(PIX *pixs, |
1708 | | l_int32 res, |
1709 | | l_int32 type, |
1710 | | l_int32 thresh, |
1711 | | BOXA *boxa, |
1712 | | l_int32 quality, |
1713 | | l_float32 scalefactor, |
1714 | | const char *title, |
1715 | | const char *fileout) |
1716 | 0 | { |
1717 | 0 | l_uint8 *data; |
1718 | 0 | l_int32 ret; |
1719 | 0 | size_t nbytes; |
1720 | |
|
1721 | 0 | if (!pixs) |
1722 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
1723 | 0 | if (!fileout) |
1724 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
1725 | 0 | if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && |
1726 | 0 | type != L_FLATE_ENCODE) |
1727 | 0 | return ERROR_INT("invalid conversion type", __func__, 1); |
1728 | 0 | if (boxa && scalefactor > 1.0) { |
1729 | 0 | L_WARNING("setting scalefactor to 1.0\n", __func__); |
1730 | 0 | scalefactor = 1.0; |
1731 | 0 | } |
1732 | |
|
1733 | 0 | ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, quality, |
1734 | 0 | scalefactor, title, &data, &nbytes); |
1735 | 0 | if (ret) |
1736 | 0 | return ERROR_INT("pdf generation failure", __func__, 1); |
1737 | | |
1738 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
1739 | 0 | if (data) LEPT_FREE(data); |
1740 | 0 | return ret; |
1741 | 0 | } |
1742 | | |
1743 | | |
1744 | | /*! |
1745 | | * \brief convertToPdfDataSegmented() |
1746 | | * |
1747 | | * \param[in] filein input image file -- any format |
1748 | | * \param[in] res input image resolution; typ. 300 ppi; |
1749 | | * use 0 for default |
1750 | | * \param[in] type compression type for non-image regions; image |
1751 | | * regions are always compressed with L_JPEG_ENCODE |
1752 | | * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE |
1753 | | * \param[in] boxa [optional] image regions; can be null |
1754 | | * \param[in] quality used for jpeg image regions; 0 for default |
1755 | | * \param[in] scalefactor used for jpeg regions; must be <= 1.0 |
1756 | | * \param[in] title [optional] pdf title; can be null |
1757 | | * \param[out] pdata pdf data in memory |
1758 | | * \param[out] pnbytes number of bytes in pdf data |
1759 | | * \return 0 if OK, 1 on error |
1760 | | * |
1761 | | * <pre> |
1762 | | * Notes: |
1763 | | * (1) If there are no image regions, set %boxa == NULL; |
1764 | | * %quality and %scalefactor are ignored. |
1765 | | * (2) Typically, %scalefactor is < 1.0. The image regions are |
1766 | | * </pre> |
1767 | | */ |
1768 | | l_ok |
1769 | | convertToPdfDataSegmented(const char *filein, |
1770 | | l_int32 res, |
1771 | | l_int32 type, |
1772 | | l_int32 thresh, |
1773 | | BOXA *boxa, |
1774 | | l_int32 quality, |
1775 | | l_float32 scalefactor, |
1776 | | const char *title, |
1777 | | l_uint8 **pdata, |
1778 | | size_t *pnbytes) |
1779 | 0 | { |
1780 | 0 | l_int32 ret; |
1781 | 0 | PIX *pixs; |
1782 | |
|
1783 | 0 | if (!pdata) |
1784 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
1785 | 0 | *pdata = NULL; |
1786 | 0 | if (!pnbytes) |
1787 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
1788 | 0 | *pnbytes = 0; |
1789 | 0 | if (!filein) |
1790 | 0 | return ERROR_INT("filein not defined", __func__, 1); |
1791 | 0 | if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && |
1792 | 0 | type != L_FLATE_ENCODE) |
1793 | 0 | return ERROR_INT("invalid conversion type", __func__, 1); |
1794 | 0 | if (boxa && scalefactor > 1.0) { |
1795 | 0 | L_WARNING("setting scalefactor to 1.0\n", __func__); |
1796 | 0 | scalefactor = 1.0; |
1797 | 0 | } |
1798 | |
|
1799 | 0 | if ((pixs = pixRead(filein)) == NULL) |
1800 | 0 | return ERROR_INT("pixs not made", __func__, 1); |
1801 | | |
1802 | 0 | ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, |
1803 | 0 | quality, scalefactor, title, |
1804 | 0 | pdata, pnbytes); |
1805 | 0 | pixDestroy(&pixs); |
1806 | 0 | return ret; |
1807 | 0 | } |
1808 | | |
1809 | | |
1810 | | /*! |
1811 | | * \brief pixConvertToPdfDataSegmented() |
1812 | | * |
1813 | | * \param[in] pixs any depth, cmap OK |
1814 | | * \param[in] res input image resolution; typ. 300 ppi; |
1815 | | * use 0 for default |
1816 | | * \param[in] type compression type for non-image regions; image |
1817 | | * regions are always compressed with L_JPEG_ENCODE |
1818 | | * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE |
1819 | | * \param[in] boxa [optional] of image regions; can be null |
1820 | | * \param[in] quality used for jpeg image regions; 0 for default |
1821 | | * \param[in] scalefactor used for jpeg regions; must be <= 1.0 |
1822 | | * \param[in] title [optional] pdf title; can be null |
1823 | | * \param[out] pdata pdf data in memory |
1824 | | * \param[out] pnbytes number of bytes in pdf data |
1825 | | * \return 0 if OK, 1 on error |
1826 | | * |
1827 | | * <pre> |
1828 | | * Notes: |
1829 | | * (1) See convertToPdfSegmented() for details. |
1830 | | * </pre> |
1831 | | */ |
1832 | | l_ok |
1833 | | pixConvertToPdfDataSegmented(PIX *pixs, |
1834 | | l_int32 res, |
1835 | | l_int32 type, |
1836 | | l_int32 thresh, |
1837 | | BOXA *boxa, |
1838 | | l_int32 quality, |
1839 | | l_float32 scalefactor, |
1840 | | const char *title, |
1841 | | l_uint8 **pdata, |
1842 | | size_t *pnbytes) |
1843 | 0 | { |
1844 | 0 | l_int32 i, nbox, seq, bx, by, bw, bh, upscale; |
1845 | 0 | l_float32 scale; |
1846 | 0 | BOX *box, *boxc, *box2; |
1847 | 0 | PIX *pix, *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6; |
1848 | 0 | PIXCMAP *cmap; |
1849 | 0 | L_PDF_DATA *lpd; |
1850 | |
|
1851 | 0 | if (!pdata) |
1852 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
1853 | 0 | *pdata = NULL; |
1854 | 0 | if (!pnbytes) |
1855 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
1856 | 0 | *pnbytes = 0; |
1857 | 0 | if (!pixs) |
1858 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
1859 | 0 | if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && |
1860 | 0 | type != L_FLATE_ENCODE) |
1861 | 0 | return ERROR_INT("invalid conversion type", __func__, 1); |
1862 | 0 | if (boxa && (scalefactor <= 0.0 || scalefactor > 1.0)) { |
1863 | 0 | L_WARNING("setting scalefactor to 1.0\n", __func__); |
1864 | 0 | scalefactor = 1.0; |
1865 | 0 | } |
1866 | | |
1867 | | /* Adjust scalefactor so that the product with res gives an integer */ |
1868 | 0 | if (res <= 0) |
1869 | 0 | res = DefaultInputRes; |
1870 | 0 | scale = (l_float32)((l_int32)(scalefactor * res + 0.5)) / (l_float32)res; |
1871 | 0 | cmap = pixGetColormap(pixs); |
1872 | | |
1873 | | /* Simple case: single image to be encoded */ |
1874 | 0 | if (!boxa || boxaGetCount(boxa) == 0) { |
1875 | 0 | if (pixGetDepth(pixs) > 1 && type == L_G4_ENCODE) { |
1876 | 0 | if (cmap) |
1877 | 0 | pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); |
1878 | 0 | else |
1879 | 0 | pixt1 = pixConvertTo8(pixs, FALSE); |
1880 | 0 | pixt2 = pixScaleGray2xLIThresh(pixt1, thresh); |
1881 | 0 | pixConvertToPdfData(pixt2, type, quality, pdata, pnbytes, |
1882 | 0 | 0, 0, 2 * res, title, NULL, 0); |
1883 | 0 | pixDestroy(&pixt1); |
1884 | 0 | pixDestroy(&pixt2); |
1885 | 0 | } else { |
1886 | 0 | pixConvertToPdfData(pixs, type, quality, pdata, pnbytes, |
1887 | 0 | 0, 0, res, title, NULL, 0); |
1888 | 0 | } |
1889 | 0 | return 0; |
1890 | 0 | } |
1891 | | |
1892 | | /* Multiple images to be encoded. If %type == L_G4_ENCODE, |
1893 | | * jpeg encode a version of pixs that is blanked in the non-image |
1894 | | * regions, and paint the scaled non-image part onto it through a mask. |
1895 | | * Otherwise, we must put the non-image part down first and |
1896 | | * then render all the image regions separately on top of it, |
1897 | | * at their own resolution. */ |
1898 | 0 | pixt1 = pixSetBlackOrWhiteBoxa(pixs, boxa, L_SET_WHITE); /* non-image */ |
1899 | 0 | nbox = boxaGetCount(boxa); |
1900 | 0 | if (type == L_G4_ENCODE) { |
1901 | 0 | pixt2 = pixCreateTemplate(pixs); /* only image regions */ |
1902 | 0 | pixSetBlackOrWhite(pixt2, L_SET_WHITE); |
1903 | 0 | for (i = 0; i < nbox; i++) { |
1904 | 0 | box = boxaGetBox(boxa, i, L_CLONE); |
1905 | 0 | pix = pixClipRectangle(pixs, box, &boxc); |
1906 | 0 | boxGetGeometry(boxc, &bx, &by, &bw, &bh); |
1907 | 0 | pixRasterop(pixt2, bx, by, bw, bh, PIX_SRC, pix, 0, 0); |
1908 | 0 | pixDestroy(&pix); |
1909 | 0 | boxDestroy(&box); |
1910 | 0 | boxDestroy(&boxc); |
1911 | 0 | } |
1912 | 0 | pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC); |
1913 | 0 | if (pixGetDepth(pixt3) == 1) |
1914 | 0 | pixt4 = pixScaleToGray(pixt3, scale); |
1915 | 0 | else |
1916 | 0 | pixt4 = pixScale(pixt3, scale, scale); |
1917 | 0 | pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes, |
1918 | 0 | 0, 0, (l_int32)(scale * res), title, |
1919 | 0 | &lpd, L_FIRST_IMAGE); |
1920 | |
|
1921 | 0 | if (pixGetDepth(pixt1) == 1) { |
1922 | 0 | pixt5 = pixClone(pixt1); |
1923 | 0 | upscale = 1; |
1924 | 0 | } else { |
1925 | 0 | pixt6 = pixConvertTo8(pixt1, 0); |
1926 | 0 | pixt5 = pixScaleGray2xLIThresh(pixt6, thresh); |
1927 | 0 | pixDestroy(&pixt6); |
1928 | 0 | upscale = 2; |
1929 | 0 | } |
1930 | 0 | pixConvertToPdfData(pixt5, L_G4_ENCODE, quality, pdata, pnbytes, |
1931 | 0 | 0, 0, upscale * res, title, &lpd, L_LAST_IMAGE); |
1932 | 0 | pixDestroy(&pixt2); |
1933 | 0 | pixDestroy(&pixt3); |
1934 | 0 | pixDestroy(&pixt4); |
1935 | 0 | pixDestroy(&pixt5); |
1936 | 0 | } else { |
1937 | | /* Put the non-image part down first. This is the full |
1938 | | size of the page, so we can use it to find the page |
1939 | | height in pixels, which is required for determining |
1940 | | the LL corner of the image relative to the LL corner |
1941 | | of the page. */ |
1942 | 0 | pixConvertToPdfData(pixt1, type, quality, pdata, pnbytes, 0, 0, |
1943 | 0 | res, title, &lpd, L_FIRST_IMAGE); |
1944 | 0 | for (i = 0; i < nbox; i++) { |
1945 | 0 | box = boxaGetBox(boxa, i, L_CLONE); |
1946 | 0 | pixt2 = pixClipRectangle(pixs, box, &boxc); |
1947 | 0 | pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC); |
1948 | 0 | if (pixGetDepth(pixt3) == 1) |
1949 | 0 | pixt4 = pixScaleToGray(pixt3, scale); |
1950 | 0 | else |
1951 | 0 | pixt4 = pixScale(pixt3, scale, scale); |
1952 | 0 | box2 = boxTransform(boxc, 0, 0, scale, scale); |
1953 | 0 | boxGetGeometry(box2, &bx, &by, NULL, &bh); |
1954 | 0 | seq = (i == nbox - 1) ? L_LAST_IMAGE : L_NEXT_IMAGE; |
1955 | 0 | pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes, |
1956 | 0 | bx, by, (l_int32)(scale * res), title, |
1957 | 0 | &lpd, seq); |
1958 | 0 | pixDestroy(&pixt2); |
1959 | 0 | pixDestroy(&pixt3); |
1960 | 0 | pixDestroy(&pixt4); |
1961 | 0 | boxDestroy(&box); |
1962 | 0 | boxDestroy(&boxc); |
1963 | 0 | boxDestroy(&box2); |
1964 | 0 | } |
1965 | 0 | } |
1966 | |
|
1967 | 0 | pixDestroy(&pixt1); |
1968 | 0 | return 0; |
1969 | 0 | } |
1970 | | |
1971 | | |
1972 | | /*---------------------------------------------------------------------* |
1973 | | * Multi-page concatenation * |
1974 | | *---------------------------------------------------------------------*/ |
1975 | | /*! |
1976 | | * \brief concatenatePdf() |
1977 | | * |
1978 | | * \param[in] dirname directory name containing single-page pdf files |
1979 | | * \param[in] substr [optional] substring filter on filenames; |
1980 | | * can be null |
1981 | | * \param[in] fileout concatenated pdf file |
1982 | | * \return 0 if OK, 1 on error |
1983 | | * |
1984 | | * <pre> |
1985 | | * Notes: |
1986 | | * (1) This only works with leptonica-formatted single-page pdf files. |
1987 | | * (2) If %substr is not NULL, only filenames that contain |
1988 | | * the substring can be returned. If %substr == NULL, |
1989 | | * none of the filenames are filtered out. |
1990 | | * (3) The files in the directory, after optional filtering by |
1991 | | * the substring, are lexically sorted in increasing order |
1992 | | * before concatenation. |
1993 | | * </pre> |
1994 | | */ |
1995 | | l_ok |
1996 | | concatenatePdf(const char *dirname, |
1997 | | const char *substr, |
1998 | | const char *fileout) |
1999 | 0 | { |
2000 | 0 | l_int32 ret; |
2001 | 0 | SARRAY *sa; |
2002 | |
|
2003 | 0 | if (!dirname) |
2004 | 0 | return ERROR_INT("dirname not defined", __func__, 1); |
2005 | 0 | if (!fileout) |
2006 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
2007 | | |
2008 | 0 | if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) |
2009 | 0 | return ERROR_INT("sa not made", __func__, 1); |
2010 | 0 | ret = saConcatenatePdf(sa, fileout); |
2011 | 0 | sarrayDestroy(&sa); |
2012 | 0 | return ret; |
2013 | 0 | } |
2014 | | |
2015 | | |
2016 | | /*! |
2017 | | * \brief saConcatenatePdf() |
2018 | | * |
2019 | | * \param[in] sa string array of pathnames for single-page pdf files |
2020 | | * \param[in] fileout concatenated pdf file |
2021 | | * \return 0 if OK, 1 on error |
2022 | | * |
2023 | | * <pre> |
2024 | | * Notes: |
2025 | | * (1) This only works with leptonica-formatted single-page pdf files. |
2026 | | * </pre> |
2027 | | */ |
2028 | | l_ok |
2029 | | saConcatenatePdf(SARRAY *sa, |
2030 | | const char *fileout) |
2031 | 0 | { |
2032 | 0 | l_uint8 *data; |
2033 | 0 | l_int32 ret; |
2034 | 0 | size_t nbytes; |
2035 | |
|
2036 | 0 | if (!sa) |
2037 | 0 | return ERROR_INT("sa not defined", __func__, 1); |
2038 | 0 | if (!fileout) |
2039 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
2040 | | |
2041 | 0 | ret = saConcatenatePdfToData(sa, &data, &nbytes); |
2042 | 0 | if (ret) |
2043 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
2044 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
2045 | 0 | LEPT_FREE(data); |
2046 | 0 | return ret; |
2047 | 0 | } |
2048 | | |
2049 | | |
2050 | | /*! |
2051 | | * \brief ptraConcatenatePdf() |
2052 | | * |
2053 | | * \param[in] pa array of pdf strings, each for a single-page pdf file |
2054 | | * \param[in] fileout concatenated pdf file |
2055 | | * \return 0 if OK, 1 on error |
2056 | | * |
2057 | | * <pre> |
2058 | | * Notes: |
2059 | | * (1) This only works with leptonica-formatted single-page pdf files. |
2060 | | * </pre> |
2061 | | */ |
2062 | | l_ok |
2063 | | ptraConcatenatePdf(L_PTRA *pa, |
2064 | | const char *fileout) |
2065 | 0 | { |
2066 | 0 | l_uint8 *data; |
2067 | 0 | l_int32 ret; |
2068 | 0 | size_t nbytes; |
2069 | |
|
2070 | 0 | if (!pa) |
2071 | 0 | return ERROR_INT("pa not defined", __func__, 1); |
2072 | 0 | if (!fileout) |
2073 | 0 | return ERROR_INT("fileout not defined", __func__, 1); |
2074 | | |
2075 | 0 | ret = ptraConcatenatePdfToData(pa, NULL, &data, &nbytes); |
2076 | 0 | if (ret) |
2077 | 0 | return ERROR_INT("pdf data not made", __func__, 1); |
2078 | 0 | ret = l_binaryWrite(fileout, "w", data, nbytes); |
2079 | 0 | LEPT_FREE(data); |
2080 | 0 | return ret; |
2081 | 0 | } |
2082 | | |
2083 | | |
2084 | | /*! |
2085 | | * \brief concatenatePdfToData() |
2086 | | * |
2087 | | * \param[in] dirname directory name containing single-page pdf files |
2088 | | * \param[in] substr [optional] substring filter on filenames; |
2089 | | * can be null |
2090 | | * \param[out] pdata concatenated pdf data in memory |
2091 | | * \param[out] pnbytes number of bytes in pdf data |
2092 | | * \return 0 if OK, 1 on error |
2093 | | * |
2094 | | * <pre> |
2095 | | * Notes: |
2096 | | * (1) This only works with leptonica-formatted single-page pdf files. |
2097 | | * (2) If %substr is not NULL, only filenames that contain |
2098 | | * the substring can be returned. If %substr == NULL, |
2099 | | * none of the filenames are filtered out. |
2100 | | * (3) The files in the directory, after optional filtering by |
2101 | | * the substring, are lexically sorted in increasing order |
2102 | | * before concatenation. |
2103 | | * </pre> |
2104 | | */ |
2105 | | l_ok |
2106 | | concatenatePdfToData(const char *dirname, |
2107 | | const char *substr, |
2108 | | l_uint8 **pdata, |
2109 | | size_t *pnbytes) |
2110 | 0 | { |
2111 | 0 | l_int32 ret; |
2112 | 0 | SARRAY *sa; |
2113 | |
|
2114 | 0 | if (!pdata) |
2115 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
2116 | 0 | *pdata = NULL; |
2117 | 0 | if (!pnbytes) |
2118 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
2119 | 0 | *pnbytes = 0; |
2120 | 0 | if (!dirname) |
2121 | 0 | return ERROR_INT("dirname not defined", __func__, 1); |
2122 | | |
2123 | 0 | if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) |
2124 | 0 | return ERROR_INT("sa not made", __func__, 1); |
2125 | 0 | ret = saConcatenatePdfToData(sa, pdata, pnbytes); |
2126 | 0 | sarrayDestroy(&sa); |
2127 | 0 | return ret; |
2128 | 0 | } |
2129 | | |
2130 | | |
2131 | | /*! |
2132 | | * \brief saConcatenatePdfToData() |
2133 | | * |
2134 | | * \param[in] sa string array of pathnames for single-page pdf files |
2135 | | * \param[out] pdata concatenated pdf data in memory |
2136 | | * \param[out] pnbytes number of bytes in pdf data |
2137 | | * \return 0 if OK, 1 on error |
2138 | | * |
2139 | | * <pre> |
2140 | | * Notes: |
2141 | | * (1) This only works with leptonica-formatted single-page pdf files. |
2142 | | * </pre> |
2143 | | */ |
2144 | | l_ok |
2145 | | saConcatenatePdfToData(SARRAY *sa, |
2146 | | l_uint8 **pdata, |
2147 | | size_t *pnbytes) |
2148 | 0 | { |
2149 | 0 | char *fname; |
2150 | 0 | l_int32 i, npages, ret; |
2151 | 0 | L_BYTEA *bas; |
2152 | 0 | L_PTRA *pa_data; /* input pdf data for each page */ |
2153 | |
|
2154 | 0 | if (!pdata) |
2155 | 0 | return ERROR_INT("&data not defined", __func__, 1); |
2156 | 0 | *pdata = NULL; |
2157 | 0 | if (!pnbytes) |
2158 | 0 | return ERROR_INT("&nbytes not defined", __func__, 1); |
2159 | 0 | *pnbytes = 0; |
2160 | 0 | if (!sa) |
2161 | 0 | return ERROR_INT("sa not defined", __func__, 1); |
2162 | | |
2163 | | /* Read the pdf files into memory */ |
2164 | 0 | if ((npages = sarrayGetCount(sa)) == 0) |
2165 | 0 | return ERROR_INT("no filenames found", __func__, 1); |
2166 | 0 | pa_data = ptraCreate(npages); |
2167 | 0 | for (i = 0; i < npages; i++) { |
2168 | 0 | fname = sarrayGetString(sa, i, L_NOCOPY); |
2169 | 0 | bas = l_byteaInitFromFile(fname); |
2170 | 0 | ptraAdd(pa_data, bas); |
2171 | 0 | } |
2172 | |
|
2173 | 0 | ret = ptraConcatenatePdfToData(pa_data, sa, pdata, pnbytes); |
2174 | | |
2175 | | /* Cleanup: some pages could have been removed */ |
2176 | 0 | ptraGetActualCount(pa_data, &npages); |
2177 | 0 | for (i = 0; i < npages; i++) { |
2178 | 0 | bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); |
2179 | 0 | l_byteaDestroy(&bas); |
2180 | 0 | } |
2181 | 0 | ptraDestroy(&pa_data, FALSE, FALSE); |
2182 | 0 | return ret; |
2183 | 0 | } |
2184 | | |
2185 | | /* --------------------------------------------*/ |
2186 | | #endif /* USE_PDFIO */ |
2187 | | /* --------------------------------------------*/ |