Coverage Report

Created: 2024-05-20 06:23

/src/mupdf/source/fitz/output-docx.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#if FZ_ENABLE_DOCX_OUTPUT
26
27
#include "glyphbox.h"
28
#include "extract/extract.h"
29
#include "extract/buffer.h"
30
31
#include <assert.h>
32
#include <errno.h>
33
#include <string.h>
34
35
36
typedef struct
37
{
38
  fz_document_writer super;
39
  extract_alloc_t *alloc;
40
41
  /*
42
   * .ctx is needed for the callbacks we get from the Extract library, for
43
   * example s_realloc_fn(). Each of our main device callbacks sets .ctx on
44
   * entry, and resets back to NULL before returning.
45
   */
46
  fz_context *ctx;
47
48
  fz_output *output;
49
  extract_t *extract;
50
  int spacing;
51
  int rotation;
52
  int images;
53
  int mediabox_clip;
54
  fz_rect mediabox; /* As passed to writer_begin_page(). */
55
  char output_cache[1024];
56
} fz_docx_writer;
57
58
59
typedef struct
60
{
61
  fz_device super;
62
  fz_docx_writer *writer;
63
} fz_docx_device;
64
65
66
static void dev_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
67
  fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
68
0
{
69
0
  fz_docx_device *dev = (fz_docx_device*) dev_;
70
0
  fz_text_span *span;
71
0
  assert(!dev->writer->ctx);
72
0
  dev->writer->ctx = ctx;
73
0
  fz_try(ctx)
74
0
  {
75
0
    for (span = text->head; span; span = span->next)
76
0
    {
77
0
      int i;
78
0
      fz_matrix combined, trm;
79
0
      fz_rect bbox;
80
81
0
      combined = fz_concat(span->trm, ctm);
82
83
0
      bbox = span->font->bbox;
84
0
      if (extract_span_begin(
85
0
          dev->writer->extract,
86
0
          span->font->name,
87
0
          span->font->flags.is_bold,
88
0
          span->font->flags.is_italic,
89
0
          span->wmode,
90
0
          combined.a,
91
0
          combined.b,
92
0
          combined.c,
93
0
          combined.d,
94
0
          bbox.x0,
95
0
          bbox.y0,
96
0
          bbox.x1,
97
0
          bbox.y1))
98
0
      {
99
0
        fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin span");
100
0
      }
101
102
0
      trm = span->trm;
103
0
      for (i=0; i<span->len; ++i)
104
0
      {
105
0
        fz_text_item *item = &span->items[i];
106
0
        float adv = 0;
107
0
        fz_rect bounds;
108
0
        fz_matrix combined;
109
110
0
        trm.e = item->x;
111
0
        trm.f = item->y;
112
0
        combined = fz_concat(trm, ctm);
113
114
0
        if (dev->writer->mediabox_clip)
115
0
          if (fz_glyph_entirely_outside_box(ctx, &ctm, span, item, &dev->writer->mediabox))
116
0
            continue;
117
118
0
        if (span->items[i].gid >= 0)
119
0
          adv = fz_advance_glyph(ctx, span->font, span->items[i].gid, span->wmode);
120
121
0
        bounds = fz_bound_glyph(ctx, span->font, span->items[i].gid, combined);
122
0
        if (extract_add_char(dev->writer->extract, combined.e, combined.f, item->ucs, adv,
123
0
              bounds.x0, bounds.y0, bounds.x1, bounds.y1))
124
0
          fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to add char");
125
0
      }
126
127
0
      if (extract_span_end(dev->writer->extract))
128
0
        fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end span");
129
0
    }
130
0
  }
131
0
  fz_always(ctx)
132
0
  {
133
0
    dev->writer->ctx = NULL;
134
0
  }
135
0
  fz_catch(ctx)
136
0
  {
137
0
    fz_rethrow(ctx);
138
0
  }
139
0
}
140
141
static void dev_fill_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
142
  fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
143
0
{
144
0
  dev_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
145
0
}
146
147
static void dev_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
148
  fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
149
0
{
150
0
  dev_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
151
0
}
152
153
static void dev_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor)
154
0
{
155
0
  dev_text(ctx, dev_, text, ctm, NULL, NULL, 0 /*alpha*/, fz_default_color_params);
156
0
}
157
158
static void dev_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
159
0
{
160
0
  dev_text(ctx, dev_, text, ctm, NULL, 0, 0, fz_default_color_params);
161
0
}
162
163
static void
164
dev_ignore_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm)
165
0
{
166
0
}
167
168
static void writer_image_free(void *handle, void *image_data)
169
0
{
170
0
  fz_docx_writer *writer = handle;
171
0
  fz_free(writer->ctx, image_data);
172
0
}
173
174
static void dev_fill_image(fz_context *ctx, fz_device *dev_, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
175
0
{
176
0
  fz_docx_device *dev = (fz_docx_device*) dev_;
177
0
  const char *type = NULL;
178
0
  fz_compressed_buffer *compressed = fz_compressed_image_buffer(ctx, img);
179
180
0
  assert(!dev->writer->ctx);
181
0
  dev->writer->ctx = ctx;
182
0
  fz_try(ctx)
183
0
  {
184
0
    if (compressed)
185
0
    {
186
0
      if (0) { /* For alignment */ }
187
0
      else if (compressed->params.type == FZ_IMAGE_RAW) type = "raw";
188
0
      else if (compressed->params.type == FZ_IMAGE_FAX) type = "fax";
189
0
      else if (compressed->params.type == FZ_IMAGE_FLATE) type = "flate";
190
0
      else if (compressed->params.type == FZ_IMAGE_LZW) type = "lzw";
191
0
      else if (compressed->params.type == FZ_IMAGE_BMP) type = "bmp";
192
0
      else if (compressed->params.type == FZ_IMAGE_GIF) type = "gif";
193
0
      else if (compressed->params.type == FZ_IMAGE_JBIG2) type = "jbig2";
194
0
      else if (compressed->params.type == FZ_IMAGE_JPEG) type = "jpeg";
195
0
      else if (compressed->params.type == FZ_IMAGE_JPX) type = "jpx";
196
0
      else if (compressed->params.type == FZ_IMAGE_JXR) type = "jxr";
197
0
      else if (compressed->params.type == FZ_IMAGE_PNG) type = "png";
198
0
      else if (compressed->params.type == FZ_IMAGE_PNM) type = "pnm";
199
0
      else if (compressed->params.type == FZ_IMAGE_TIFF) type = "tiff";
200
201
0
      if (type)
202
0
      {
203
        /* Write out raw data. */
204
0
        unsigned char *data;
205
0
        size_t datasize = fz_buffer_extract(ctx, compressed->buffer, &data);
206
0
        if (extract_add_image(
207
0
            dev->writer->extract,
208
0
            type,
209
0
            ctm.e /*x*/,
210
0
            ctm.f /*y*/,
211
0
            img->w /*w*/,
212
0
            img->h /*h*/,
213
0
            data,
214
0
            datasize,
215
0
            writer_image_free,
216
0
            dev->writer
217
0
            ))
218
0
        {
219
0
          fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to add image type=%s", type);
220
0
        }
221
0
      }
222
0
      else
223
0
      {
224
        /* We don't recognise this image type, so ignore. */
225
0
      }
226
0
    }
227
0
    else
228
0
    {
229
      /*
230
       * Compressed data not available, so we could write out
231
       * raw pixel values. But for now we ignore.
232
       */
233
0
    }
234
0
  }
235
0
  fz_always(ctx)
236
0
  {
237
0
    dev->writer->ctx = NULL;
238
0
  }
239
0
  fz_catch(ctx)
240
0
  {
241
0
    fz_rethrow(ctx);
242
0
  }
243
0
}
244
245
/*
246
 * Support for sending information to Extract when walking stroke/fill path
247
 * with fz_walk_path().
248
 */
249
typedef struct
250
{
251
  fz_path_walker walker;
252
  extract_t *extract;
253
} walker_info_t;
254
255
static void s_moveto(fz_context *ctx, void *arg, float x, float y)
256
0
{
257
0
  extract_t* extract = arg;
258
0
  if (extract_moveto(extract, x, y))
259
0
    fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_moveto() failed");
260
0
}
261
262
static void s_lineto(fz_context *ctx, void *arg, float x, float y)
263
0
{
264
0
  extract_t* extract = arg;
265
0
  if (extract_lineto(extract, x, y))
266
0
    fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_lineto() failed");
267
0
}
268
269
static void s_curveto(fz_context *ctx, void *arg, float x1, float y1,
270
    float x2, float y2, float x3, float y3)
271
0
{
272
  /* We simply move to the end point of the curve so that subsequent
273
  (straight) lines will be handled correctly. */
274
0
  extract_t* extract = arg;
275
0
  if (extract_moveto(extract, x3, y3))
276
0
    fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_moveto() failed");
277
0
}
278
279
static void s_closepath(fz_context *ctx, void *arg)
280
0
{
281
0
  extract_t* extract = arg;
282
0
  if (extract_closepath(extract))
283
0
    fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_closepath() failed");
284
0
}
285
286
/*
287
 * Calls extract_*() path functions on <path> using fz_walk_path() and the
288
 * above callbacks.
289
 */
290
static void s_walk_path(fz_context *ctx, fz_docx_device *dev, extract_t *extract, const fz_path *path)
291
0
{
292
0
  fz_path_walker walker;
293
0
  walker.moveto = s_moveto;
294
0
  walker.lineto = s_lineto;
295
0
  walker.curveto = s_curveto;
296
0
  walker.closepath = s_closepath;
297
0
  walker.quadto = NULL;
298
0
  walker.curvetov = NULL;
299
0
  walker.curvetoy = NULL;
300
0
  walker.rectto = NULL;
301
302
0
  assert(dev->writer->ctx == ctx);
303
0
  fz_walk_path(ctx, path, &walker, extract /*arg*/);
304
0
}
305
306
void dev_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int even_odd,
307
    fz_matrix matrix, fz_colorspace * colorspace, const float *color, float alpha,
308
    fz_color_params color_params)
309
0
{
310
0
  fz_docx_device *dev = (fz_docx_device*) dev_;
311
0
  extract_t *extract = dev->writer->extract;
312
313
0
  assert(!dev->writer->ctx);
314
0
  dev->writer->ctx = ctx;
315
316
0
  fz_try(ctx)
317
0
  {
318
0
    if (extract_fill_begin(
319
0
        extract,
320
0
        matrix.a,
321
0
        matrix.b,
322
0
        matrix.c,
323
0
        matrix.d,
324
0
        matrix.e,
325
0
        matrix.f,
326
0
        color[0]
327
0
        ))
328
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin fill");
329
0
    s_walk_path(ctx, dev, extract, path);
330
0
    if (extract_fill_end(extract))
331
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_fill_end() failed");
332
0
  }
333
0
  fz_always(ctx)
334
0
  {
335
0
    dev->writer->ctx = NULL;
336
0
  }
337
0
  fz_catch(ctx)
338
0
  {
339
0
    fz_rethrow(ctx);
340
0
  }
341
0
}
342
343
344
static void
345
dev_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path,
346
    const fz_stroke_state *stroke, fz_matrix in_ctm,
347
    fz_colorspace *colorspace_in, const float *color, float alpha,
348
    fz_color_params color_params)
349
0
{
350
0
  fz_docx_device *dev = (fz_docx_device*) dev_;
351
0
  extract_t *extract = dev->writer->extract;
352
353
0
  assert(!dev->writer->ctx);
354
0
  dev->writer->ctx = ctx;
355
0
  fz_try(ctx)
356
0
  {
357
0
    if (extract_stroke_begin(
358
0
        extract,
359
0
        in_ctm.a,
360
0
        in_ctm.b,
361
0
        in_ctm.c,
362
0
        in_ctm.d,
363
0
        in_ctm.e,
364
0
        in_ctm.f,
365
0
        stroke->linewidth,
366
0
        color[0]
367
0
        ))
368
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin stroke");
369
0
    s_walk_path(ctx, dev, extract, path);
370
0
    if (extract_stroke_end(extract))
371
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_stroke_end() failed");
372
0
  }
373
0
  fz_always(ctx)
374
0
  {
375
0
    dev->writer->ctx = NULL;
376
0
  }
377
0
  fz_catch(ctx)
378
0
  {
379
0
    fz_rethrow(ctx);
380
0
  }
381
0
}
382
383
static extract_struct_t
384
fz_struct_to_extract(fz_structure type)
385
0
{
386
0
  switch (type)
387
0
  {
388
0
  default:
389
0
    return extract_struct_INVALID;
390
391
0
  case FZ_STRUCTURE_DOCUMENT:
392
0
    return extract_struct_DOCUMENT;
393
0
  case FZ_STRUCTURE_PART:
394
0
    return extract_struct_PART;
395
0
  case FZ_STRUCTURE_ART:
396
0
    return extract_struct_ART;
397
0
  case FZ_STRUCTURE_SECT:
398
0
    return extract_struct_SECT;
399
0
  case FZ_STRUCTURE_DIV:
400
0
    return extract_struct_DIV;
401
0
  case FZ_STRUCTURE_BLOCKQUOTE:
402
0
    return extract_struct_BLOCKQUOTE;
403
0
  case FZ_STRUCTURE_CAPTION:
404
0
    return extract_struct_CAPTION;
405
0
  case FZ_STRUCTURE_TOC:
406
0
    return extract_struct_TOC;
407
0
  case FZ_STRUCTURE_TOCI:
408
0
    return extract_struct_TOCI;
409
0
  case FZ_STRUCTURE_INDEX:
410
0
    return extract_struct_INDEX;
411
0
  case FZ_STRUCTURE_NONSTRUCT:
412
0
    return extract_struct_NONSTRUCT;
413
0
  case FZ_STRUCTURE_PRIVATE:
414
0
    return extract_struct_PRIVATE;
415
  /* Grouping elements (PDF 2.0 - Table 364) */
416
0
  case FZ_STRUCTURE_DOCUMENTFRAGMENT:
417
0
    return extract_struct_DOCUMENTFRAGMENT;
418
  /* Grouping elements (PDF 2.0 - Table 365) */
419
0
  case FZ_STRUCTURE_ASIDE:
420
0
    return extract_struct_ASIDE;
421
  /* Grouping elements (PDF 2.0 - Table 366) */
422
0
  case FZ_STRUCTURE_TITLE:
423
0
    return extract_struct_TITLE;
424
0
  case FZ_STRUCTURE_FENOTE:
425
0
    return extract_struct_FENOTE;
426
  /* Grouping elements (PDF 2.0 - Table 367) */
427
0
  case FZ_STRUCTURE_SUB:
428
0
    return extract_struct_SUB;
429
430
  /* Paragraphlike elements (PDF 1.7 - Table 10.21) */
431
0
  case FZ_STRUCTURE_P:
432
0
    return extract_struct_P;
433
0
  case FZ_STRUCTURE_H:
434
0
    return extract_struct_H;
435
0
  case FZ_STRUCTURE_H1:
436
0
    return extract_struct_H1;
437
0
  case FZ_STRUCTURE_H2:
438
0
    return extract_struct_H2;
439
0
  case FZ_STRUCTURE_H3:
440
0
    return extract_struct_H3;
441
0
  case FZ_STRUCTURE_H4:
442
0
    return extract_struct_H4;
443
0
  case FZ_STRUCTURE_H5:
444
0
    return extract_struct_H5;
445
0
  case FZ_STRUCTURE_H6:
446
0
    return extract_struct_H6;
447
448
  /* List elements (PDF 1.7 - Table 10.23) */
449
0
  case FZ_STRUCTURE_LIST:
450
0
    return extract_struct_LIST;
451
0
  case FZ_STRUCTURE_LISTITEM:
452
0
    return extract_struct_LISTITEM;
453
0
  case FZ_STRUCTURE_LABEL:
454
0
    return extract_struct_LABEL;
455
0
  case FZ_STRUCTURE_LISTBODY:
456
0
    return extract_struct_LISTBODY;
457
458
  /* Table elements (PDF 1.7 - Table 10.24) */
459
0
  case FZ_STRUCTURE_TABLE:
460
0
    return extract_struct_TABLE;
461
0
  case FZ_STRUCTURE_TR:
462
0
    return extract_struct_TR;
463
0
  case FZ_STRUCTURE_TH:
464
0
    return extract_struct_TH;
465
0
  case FZ_STRUCTURE_TD:
466
0
    return extract_struct_TD;
467
0
  case FZ_STRUCTURE_THEAD:
468
0
    return extract_struct_THEAD;
469
0
  case FZ_STRUCTURE_TBODY:
470
0
    return extract_struct_TBODY;
471
0
  case FZ_STRUCTURE_TFOOT:
472
0
    return extract_struct_TFOOT;
473
474
  /* Inline elements (PDF 1.7 - Table 10.25) */
475
0
  case FZ_STRUCTURE_SPAN:
476
0
    return extract_struct_SPAN;
477
0
  case FZ_STRUCTURE_QUOTE:
478
0
    return extract_struct_QUOTE;
479
0
  case FZ_STRUCTURE_NOTE:
480
0
    return extract_struct_NOTE;
481
0
  case FZ_STRUCTURE_REFERENCE:
482
0
    return extract_struct_REFERENCE;
483
0
  case FZ_STRUCTURE_BIBENTRY:
484
0
    return extract_struct_BIBENTRY;
485
0
  case FZ_STRUCTURE_CODE:
486
0
    return extract_struct_CODE;
487
0
  case FZ_STRUCTURE_LINK:
488
0
    return extract_struct_LINK;
489
0
  case FZ_STRUCTURE_ANNOT:
490
0
    return extract_struct_ANNOT;
491
  /* Inline elements (PDF 2.0 - Table 368) */
492
0
  case FZ_STRUCTURE_EM:
493
0
    return extract_struct_EM;
494
0
  case FZ_STRUCTURE_STRONG:
495
0
    return extract_struct_STRONG;
496
497
  /* Ruby inline element (PDF 1.7 - Table 10.26) */
498
0
  case FZ_STRUCTURE_RUBY:
499
0
    return extract_struct_RUBY;
500
0
  case FZ_STRUCTURE_RB:
501
0
    return extract_struct_RB;
502
0
  case FZ_STRUCTURE_RT:
503
0
    return extract_struct_RT;
504
0
  case FZ_STRUCTURE_RP:
505
0
    return extract_struct_RP;
506
507
  /* Warichu inline element (PDF 1.7 - Table 10.26) */
508
0
  case FZ_STRUCTURE_WARICHU:
509
0
    return extract_struct_WARICHU;
510
0
  case FZ_STRUCTURE_WT:
511
0
    return extract_struct_WT;
512
0
  case FZ_STRUCTURE_WP:
513
0
    return extract_struct_WP;
514
515
  /* Illustration elements (PDF 1.7 - Table 10.27) */
516
0
  case FZ_STRUCTURE_FIGURE:
517
0
    return extract_struct_FIGURE;
518
0
  case FZ_STRUCTURE_FORMULA:
519
0
    return extract_struct_FORMULA;
520
0
  case FZ_STRUCTURE_FORM:
521
0
    return extract_struct_FORM;
522
523
  /* Artifact structure type (PDF 2.0 - Table 375) */
524
0
  case FZ_STRUCTURE_ARTIFACT:
525
0
    return extract_struct_ARTIFACT;
526
0
  }
527
0
}
528
529
static void
530
dev_begin_structure(fz_context *ctx, fz_device *dev_, fz_structure standard, const char *raw, int idx)
531
0
{
532
0
  fz_docx_device *dev = (fz_docx_device *)dev_;
533
0
  extract_t *extract = dev->writer->extract;
534
535
0
  assert(!dev->writer->ctx);
536
0
  dev->writer->ctx = ctx;
537
0
  fz_try(ctx)
538
0
  {
539
0
    if (extract_begin_struct(extract, fz_struct_to_extract(standard), idx, -1))
540
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin struct");
541
0
  }
542
0
  fz_always(ctx)
543
0
    dev->writer->ctx = NULL;
544
0
  fz_catch(ctx)
545
0
    fz_rethrow(ctx);
546
0
}
547
548
static void
549
dev_end_structure(fz_context *ctx, fz_device *dev_)
550
0
{
551
0
  fz_docx_device *dev = (fz_docx_device *)dev_;
552
0
  extract_t *extract = dev->writer->extract;
553
554
0
  assert(!dev->writer->ctx);
555
0
  dev->writer->ctx = ctx;
556
0
  fz_try(ctx)
557
0
  {
558
0
    if (extract_end_struct(extract))
559
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end struct");
560
0
  }
561
0
  fz_always(ctx)
562
0
    dev->writer->ctx = NULL;
563
0
  fz_catch(ctx)
564
0
    fz_rethrow(ctx);
565
0
}
566
567
568
static fz_device *writer_begin_page(fz_context *ctx, fz_document_writer *writer_, fz_rect mediabox)
569
0
{
570
0
  fz_docx_writer *writer = (fz_docx_writer*) writer_;
571
0
  fz_docx_device *dev;
572
0
  assert(!writer->ctx);
573
0
  writer->ctx = ctx;
574
0
  writer->mediabox = mediabox;
575
0
  fz_var(dev);
576
0
  fz_try(ctx)
577
0
  {
578
0
    if (extract_page_begin(writer->extract, mediabox.x0, mediabox.y0, mediabox.x1, mediabox.y1))
579
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin page");
580
0
    dev = fz_new_derived_device(ctx, fz_docx_device);
581
0
    dev->super.fill_text = dev_fill_text;
582
0
    dev->super.stroke_text = dev_stroke_text;
583
0
    dev->super.clip_text = dev_clip_text;
584
0
    dev->super.clip_stroke_text = dev_clip_stroke_text;
585
0
    dev->super.ignore_text = dev_ignore_text;
586
0
    dev->super.fill_image = dev_fill_image;
587
0
    dev->super.fill_path = dev_fill_path;
588
0
    dev->super.stroke_path = dev_stroke_path;
589
0
    dev->super.begin_structure = dev_begin_structure;
590
0
    dev->super.end_structure = dev_end_structure;
591
0
    dev->writer = writer;
592
0
  }
593
0
  fz_always(ctx)
594
0
  {
595
0
    writer->ctx = NULL;
596
0
  }
597
0
  fz_catch(ctx)
598
0
  {
599
0
    fz_rethrow(ctx);
600
0
  }
601
0
  return &dev->super;
602
0
}
603
604
static void writer_end_page(fz_context *ctx, fz_document_writer *writer_, fz_device *dev)
605
0
{
606
0
  fz_docx_writer *writer = (fz_docx_writer*) writer_;
607
0
  assert(!writer->ctx);
608
0
  writer->ctx = ctx;
609
0
  fz_try(ctx)
610
0
  {
611
0
    fz_close_device(ctx, dev);
612
0
    if (extract_page_end(writer->extract))
613
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end page");
614
615
0
    if (extract_process(writer->extract, writer->spacing, writer->rotation, writer->images))
616
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to process page");
617
0
  }
618
0
  fz_always(ctx)
619
0
  {
620
0
    writer->ctx = NULL;
621
0
    fz_drop_device(ctx, dev);
622
0
  }
623
0
  fz_catch(ctx)
624
0
  {
625
0
    fz_rethrow(ctx);
626
0
  }
627
0
}
628
629
static int buffer_write(void *handle, const void *source, size_t numbytes, size_t *o_actual)
630
/*
631
 * extract_buffer_t callback that calls fz_write_data(). <source> will be docx
632
 * archive data.
633
 */
634
0
{
635
0
  int e = 0;
636
0
  fz_docx_writer *writer = handle;
637
0
  fz_var(e);
638
0
  fz_try(writer->ctx)
639
0
  {
640
0
    fz_write_data(writer->ctx, writer->output, source, numbytes);
641
0
    *o_actual = numbytes;
642
0
  }
643
0
  fz_catch(writer->ctx)
644
0
  {
645
0
    errno = EIO;
646
0
    e = -1;
647
0
  }
648
0
  return e;
649
0
}
650
651
static int buffer_cache(void *handle, void **o_cache, size_t *o_numbytes)
652
/*
653
 * extract_buffer_t cache function. We simply return writer->output_cache.
654
 */
655
0
{
656
0
  fz_docx_writer *writer = handle;
657
0
  *o_cache = writer->output_cache;
658
0
  *o_numbytes = sizeof(writer->output_cache);
659
0
  return 0;
660
0
}
661
662
static void writer_close(fz_context *ctx, fz_document_writer *writer_)
663
0
{
664
0
  fz_docx_writer *writer = (fz_docx_writer*) writer_;
665
0
  extract_buffer_t *extract_buffer_output = NULL;
666
667
0
  fz_var(extract_buffer_output);
668
0
  fz_var(writer);
669
0
  assert(!writer->ctx);
670
0
  writer->ctx = ctx;
671
0
  fz_try(ctx)
672
0
  {
673
    /*
674
     * Write docx to writer->output. Need to create an
675
     * extract_buffer_t that writes to writer->output, for use by
676
     * extract_write().
677
     */
678
0
    if (extract_buffer_open(
679
0
        writer->alloc,
680
0
        writer,
681
0
        NULL /*fn_read*/,
682
0
        buffer_write,
683
0
        buffer_cache,
684
0
        NULL /*fn_close*/,
685
0
        &extract_buffer_output
686
0
        ))
687
0
    {
688
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract_buffer_output: %s", strerror(errno));
689
0
    }
690
0
    if (extract_write(writer->extract, extract_buffer_output))
691
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to generate docx content: %s", strerror(errno));
692
0
    if (extract_buffer_close(&extract_buffer_output))
693
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to close extract_buffer: %s", strerror(errno));
694
695
0
    extract_end(&writer->extract);
696
0
    fz_close_output(ctx, writer->output);
697
0
    writer->ctx = NULL;
698
0
  }
699
0
  fz_catch(ctx)
700
0
  {
701
    /*
702
     * We don't call fz_close_output() because it can throw and in
703
     * this error case we can safely leave cleanup to our s_drop()
704
     * function's calls to fz_drop_output().
705
     */
706
0
    extract_buffer_close(&extract_buffer_output);
707
0
    extract_end(&writer->extract);
708
0
    writer->ctx = NULL;
709
0
    fz_rethrow(ctx);
710
0
  }
711
0
}
712
713
static void writer_drop(fz_context *ctx, fz_document_writer *writer_)
714
0
{
715
0
  fz_docx_writer *writer = (fz_docx_writer*) writer_;
716
0
  fz_drop_output(ctx, writer->output);
717
0
  writer->output = NULL;
718
0
  assert(!writer->ctx);
719
0
  writer->ctx = ctx;
720
0
  extract_end(&writer->extract);
721
0
  extract_alloc_destroy(&writer->alloc);
722
0
  writer->ctx = NULL;
723
0
}
724
725
726
static int get_bool_option(fz_context *ctx, const char *options, const char *name, int default_)
727
0
{
728
0
  const char *value;
729
0
  if (fz_has_option(ctx, options, name, &value))
730
0
  {
731
0
    if (fz_option_eq(value, "yes")) return 1;
732
0
    if (fz_option_eq(value, "no")) return 0;
733
0
    else fz_throw(ctx, FZ_ERROR_SYNTAX, "option '%s' should be yes or no in options='%s'", name, options);
734
0
  }
735
0
  else
736
0
    return default_;
737
0
}
738
739
static double get_double_option(fz_context *ctx, const char *options, const char *name, double default_)
740
0
{
741
0
  const char *value;
742
0
  if (fz_has_option(ctx, options, name, &value))
743
0
  {
744
0
    double ret = atof(value);
745
0
    return ret;
746
0
  }
747
0
  else
748
0
    return default_;
749
0
}
750
751
static void *s_realloc_fn(void *state, void *prev, size_t size)
752
0
{
753
0
  fz_docx_writer *writer = state;
754
0
  assert(writer);
755
0
  assert(writer->ctx);
756
0
  return fz_realloc_no_throw(writer->ctx, prev, size);
757
0
}
758
759
/* Will drop <out> if an error occurs. */
760
static fz_document_writer *fz_new_docx_writer_internal(fz_context *ctx, fz_output *out,
761
    const char *options, extract_format_t format)
762
0
{
763
0
  fz_docx_writer *writer = NULL;
764
765
0
  fz_var(writer);
766
767
0
  fz_try(ctx)
768
0
  {
769
0
    double space_guess = get_double_option(ctx, options, "space-guess", 0);
770
0
    writer = fz_new_derived_document_writer(
771
0
        ctx,
772
0
        fz_docx_writer,
773
0
        writer_begin_page,
774
0
        writer_end_page,
775
0
        writer_close,
776
0
        writer_drop
777
0
        );
778
0
    writer->ctx = ctx;
779
0
    writer->output = out;
780
0
    if (get_bool_option(ctx, options, "html", 0)) format = extract_format_HTML;
781
0
    if (get_bool_option(ctx, options, "text", 0)) format = extract_format_TEXT;
782
0
    if (get_bool_option(ctx, options, "json", 0)) format = extract_format_JSON;
783
0
    if (extract_alloc_create(s_realloc_fn, writer, &writer->alloc))
784
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract_alloc instance");
785
0
    if (extract_begin(writer->alloc, format, &writer->extract))
786
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract instance");
787
0
    if (space_guess)
788
0
      extract_set_space_guess(writer->extract, space_guess);
789
0
    writer->spacing = get_bool_option(ctx, options, "spacing", 0);
790
0
    writer->rotation = get_bool_option(ctx, options, "rotation", 1);
791
0
    writer->images = get_bool_option(ctx, options, "images", 1);
792
0
    writer->mediabox_clip = get_bool_option(ctx, options, "mediabox-clip", 1);
793
0
    if (extract_set_layout_analysis(writer->extract, get_bool_option(ctx, options, "analyse", 0)))
794
0
      fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_enable_analysis failed.");
795
0
    {
796
0
      const char* v;
797
0
      if (fz_has_option(ctx, options, "tables-csv-format", &v))
798
0
      {
799
0
        size_t len = strlen(v) + 1; /* Might include trailing options. */
800
0
        char* formatbuf = fz_malloc(ctx, len);
801
0
        fz_copy_option(ctx, v, formatbuf, len);
802
0
        fprintf(stderr, "tables-csv-format: %s\n", formatbuf);
803
0
        if (extract_tables_csv_format(writer->extract, formatbuf))
804
0
        {
805
0
          fz_free(ctx, formatbuf);
806
0
          fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_tables_csv_format() failed.");
807
0
        }
808
0
        fz_free(ctx, formatbuf);
809
0
      }
810
0
    }
811
0
    writer->ctx = NULL;
812
0
  }
813
0
  fz_catch(ctx)
814
0
  {
815
    /* fz_drop_document_writer() drops its output so we only need to call
816
    fz_drop_output() if we failed before creating the writer. */
817
0
    if (writer)
818
0
    {
819
0
      writer->ctx = ctx;
820
0
      fz_drop_document_writer(ctx, &writer->super);
821
0
      writer->ctx = NULL;
822
0
    }
823
0
    else
824
0
      fz_drop_output(ctx, out);
825
0
    fz_rethrow(ctx);
826
0
  }
827
0
  return &writer->super;
828
0
}
829
830
fz_document_writer *fz_new_docx_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
831
0
{
832
0
  return fz_new_docx_writer_internal(ctx, out, options, extract_format_DOCX);
833
0
}
834
835
fz_document_writer *fz_new_docx_writer(fz_context *ctx, const char *path, const char *options)
836
0
{
837
  /* No need to drop <out> if fz_new_docx_writer_internal() throws, because
838
  it always drops <out> if it fails. */
839
0
  fz_output *out = fz_new_output_with_path(ctx, path, 0 /*append*/);
840
0
  return fz_new_docx_writer_internal(ctx, out, options, extract_format_DOCX);
841
0
}
842
843
#if FZ_ENABLE_ODT_OUTPUT
844
845
fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
846
0
{
847
0
  return fz_new_docx_writer_internal(ctx, out, options, extract_format_ODT);
848
0
}
849
850
fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
851
0
{
852
  /* No need to drop <out> if fz_new_docx_writer_internal() throws, because
853
  it always drops <out> if it fails. */
854
0
  fz_output *out = fz_new_output_with_path(ctx, path, 0 /*append*/);
855
0
  return fz_new_docx_writer_internal(ctx, out, options, extract_format_ODT);
856
0
}
857
858
#else
859
860
fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
861
{
862
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "ODT writer not enabled");
863
  return NULL;
864
}
865
866
fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
867
{
868
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "ODT writer not enabled");
869
  return NULL;
870
}
871
872
#endif
873
874
#else
875
876
fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
877
{
878
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX/ODT writer not enabled");
879
  return NULL;
880
}
881
882
fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
883
{
884
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX/ODT writer not enabled");
885
  return NULL;
886
}
887
888
fz_document_writer *fz_new_docx_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
889
{
890
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX writer not enabled");
891
  return NULL;
892
}
893
894
fz_document_writer *fz_new_docx_writer(fz_context *ctx, const char *path, const char *options)
895
{
896
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX writer not enabled");
897
  return NULL;
898
}
899
900
#endif