Coverage Report

Created: 2025-07-23 08:13

/src/poppler/cpp/poppler-page.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) 2009-2010, Pino Toscano <pino@kde.org>
3
 * Copyright (C) 2017-2020, Albert Astals Cid <aacid@kde.org>
4
 * Copyright (C) 2017, Jason Alan Palmer <jalanpalmer@gmail.com>
5
 * Copyright (C) 2018, 2020, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
6
 * Copyright (C) 2018, 2020, Adam Reichold <adam.reichold@t-online.de>
7
 * Copyright (C) 2018, Zsombor Hollay-Horvath <hollay.horvath@gmail.com>
8
 * Copyright (C) 2018, Aleksey Nikolaev <nae202@gmail.com>
9
 * Copyright (C) 2020, Jiri Jakes <freedesktop@jirijakes.eu>
10
 *
11
 * This program is free software; you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by
13
 * the Free Software Foundation; either version 2, or (at your option)
14
 * any later version.
15
 *
16
 * This program is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
 * GNU General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU General Public License
22
 * along with this program; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
24
 */
25
26
/**
27
 \file poppler-page.h
28
 */
29
#include "poppler-page.h"
30
#include "poppler-page-transition.h"
31
32
#include "poppler-document-private.h"
33
#include "poppler-page-private.h"
34
#include "poppler-private.h"
35
#include "poppler-font-private.h"
36
#include "poppler-font.h"
37
38
#include "TextOutputDev.h"
39
40
#include <algorithm>
41
#include <memory>
42
#include <utility>
43
44
using namespace poppler;
45
46
211k
page_private::page_private(document_private *_doc, int _index) : doc(_doc), page(doc->doc->getCatalog()->getPage(_index + 1)), index(_index), transition(nullptr), font_info_cache_initialized(false) { }
47
48
page_private::~page_private()
49
211k
{
50
211k
    delete transition;
51
211k
}
52
53
void page_private::init_font_info_cache()
54
1.46M
{
55
1.46M
    if (font_info_cache_initialized) {
56
1.45M
        return;
57
1.45M
    }
58
59
8.96k
    poppler::font_iterator it(index, doc);
60
61
8.96k
    if (it.has_next()) {
62
8.96k
        font_info_cache = it.next();
63
8.96k
    }
64
65
8.96k
    font_info_cache_initialized = true;
66
8.96k
}
67
68
/**
69
 \class poppler::page poppler-page.h "poppler/cpp/poppler-page.h"
70
71
 A page in a PDF %document.
72
 */
73
74
/**
75
 \enum poppler::page::orientation_enum
76
77
 The possible orientation of a page.
78
*/
79
80
/**
81
 \enum poppler::page::search_direction_enum
82
83
 The direction/action to follow when performing a text search.
84
*/
85
86
/**
87
 \enum poppler::page::text_layout_enum
88
89
 A layout of the text of a page.
90
*/
91
92
211k
page::page(document_private *doc, int index) : d(new page_private(doc, index)) { }
93
94
/**
95
 Destructor.
96
 */
97
page::~page()
98
211k
{
99
211k
    delete d;
100
211k
}
101
102
/**
103
 \returns the orientation of the page
104
 */
105
page::orientation_enum page::orientation() const
106
0
{
107
0
    const int rotation = d->page->getRotate();
108
0
    switch (rotation) {
109
0
    case 90:
110
0
        return landscape;
111
0
        break;
112
0
    case 180:
113
0
        return upside_down;
114
0
        break;
115
0
    case 270:
116
0
        return seascape;
117
0
        break;
118
0
    default:
119
0
        return portrait;
120
0
    }
121
0
}
122
123
/**
124
 The eventual duration the page can be hinted to be shown in a presentation.
125
126
 If this value is positive (usually different than -1) then a PDF viewer, when
127
 showing the page in a presentation, should show the page for at most for this
128
 number of seconds, and then switch to the next page (if any). Note this is
129
 purely a presentation attribute, it has no influence on the behaviour.
130
131
 \returns the duration time (in seconds) of the page
132
 */
133
double page::duration() const
134
0
{
135
0
    return d->page->getDuration();
136
0
}
137
138
/**
139
 Returns the size of one rect of the page.
140
141
 \returns the size of the specified page rect
142
 */
143
rectf page::page_rect(page_box_enum box) const
144
13.8k
{
145
13.8k
    const PDFRectangle *r = nullptr;
146
13.8k
    switch (box) {
147
0
    case media_box:
148
0
        r = d->page->getMediaBox();
149
0
        break;
150
13.8k
    case crop_box:
151
13.8k
        r = d->page->getCropBox();
152
13.8k
        break;
153
0
    case bleed_box:
154
0
        r = d->page->getBleedBox();
155
0
        break;
156
0
    case trim_box:
157
0
        r = d->page->getTrimBox();
158
0
        break;
159
0
    case art_box:
160
0
        r = d->page->getArtBox();
161
0
        break;
162
13.8k
    }
163
13.8k
    if (r) {
164
13.8k
        return detail::pdfrectangle_to_rectf(*r);
165
13.8k
    }
166
0
    return rectf();
167
13.8k
}
168
169
/**
170
 \returns the label of the page, if any
171
 */
172
ustring page::label() const
173
35.0k
{
174
35.0k
    GooString goo;
175
35.0k
    if (!d->doc->doc->getCatalog()->indexToLabel(d->index, &goo)) {
176
684
        return ustring();
177
684
    }
178
179
34.3k
    return detail::unicode_GooString_to_ustring(&goo);
180
35.0k
}
181
182
/**
183
 The transition from this page to the next one.
184
185
 If it is set, then a PDF viewer in a presentation should perform the
186
 specified transition effect when switching from this page to the next one.
187
188
 \returns the transition effect for the switch to the next page, if any
189
 */
190
page_transition *page::transition() const
191
0
{
192
0
    if (!d->transition) {
193
0
        Object o = d->page->getTrans();
194
0
        if (o.isDict()) {
195
0
            d->transition = new page_transition(&o);
196
0
        }
197
0
    }
198
0
    return d->transition;
199
0
}
200
201
/**
202
 Search the page for some text.
203
204
 \param text the text to search
205
 \param[in,out] r the area where to start search, which will be set to the area
206
                  of the match (if any)
207
 \param direction in which direction search for text
208
 \param case_sensitivity whether search in a case sensitive way
209
 \param rotation the rotation assumed for the page
210
 */
211
bool page::search(const ustring &text, rectf &r, search_direction_enum direction, case_sensitivity_enum case_sensitivity, rotation_enum rotation) const
212
13.8k
{
213
13.8k
    const size_t len = text.length();
214
215
13.8k
    if (len == 0) {
216
745
        return false;
217
745
    }
218
219
13.1k
    std::vector<Unicode> u(len);
220
225k
    for (size_t i = 0; i < len; ++i) {
221
212k
        u[i] = text[i];
222
212k
    }
223
224
13.1k
    const bool sCase = case_sensitivity == case_sensitive;
225
13.1k
    const int rotation_value = (int)rotation * 90;
226
227
13.1k
    bool found = false;
228
13.1k
    double rect_left = r.left();
229
13.1k
    double rect_top = r.top();
230
13.1k
    double rect_right = r.right();
231
13.1k
    double rect_bottom = r.bottom();
232
233
13.1k
    TextOutputDev td(nullptr, true, 0, false, false);
234
13.1k
    d->doc->doc->displayPage(&td, d->index + 1, 72, 72, rotation_value, false, true, false);
235
13.1k
    TextPage *text_page = td.takeText();
236
237
13.1k
    switch (direction) {
238
13.1k
    case search_from_top:
239
13.1k
        found = text_page->findText(&u[0], len, true, true, false, false, sCase, false, false, &rect_left, &rect_top, &rect_right, &rect_bottom);
240
13.1k
        break;
241
0
    case search_next_result:
242
0
        found = text_page->findText(&u[0], len, false, true, true, false, sCase, false, false, &rect_left, &rect_top, &rect_right, &rect_bottom);
243
0
        break;
244
0
    case search_previous_result:
245
0
        found = text_page->findText(&u[0], len, false, true, true, false, sCase, true, false, &rect_left, &rect_top, &rect_right, &rect_bottom);
246
0
        break;
247
13.1k
    }
248
249
13.1k
    text_page->decRefCnt();
250
13.1k
    r.set_left(rect_left);
251
13.1k
    r.set_top(rect_top);
252
13.1k
    r.set_right(rect_right);
253
13.1k
    r.set_bottom(rect_bottom);
254
255
13.1k
    return found;
256
13.1k
}
257
258
/**
259
 Returns the text in the page, in its physical layout.
260
261
 \param r if not empty, it will be extracted the text in it; otherwise, the
262
          text of the whole page
263
264
 \returns the text of the page in the specified rect or in the whole page
265
 */
266
ustring page::text(const rectf &r) const
267
0
{
268
0
    return text(r, physical_layout);
269
0
}
270
271
static void appendToGooString(void *stream, const char *text, int len)
272
0
{
273
0
    ((GooString *)stream)->append(text, len);
274
0
}
275
276
/**
277
 Returns the text in the page.
278
279
 \param rect if not empty, it will be extracted the text in it; otherwise, the
280
             text of the whole page
281
 \param layout_mode the layout of the text
282
283
 \returns the text of the page in the specified rect or in the whole page
284
285
 \since 0.16
286
 */
287
ustring page::text(const rectf &r, text_layout_enum layout_mode) const
288
0
{
289
0
    std::unique_ptr<GooString> out(new GooString());
290
0
    const bool use_raw_order = (layout_mode == raw_order_layout);
291
0
    const bool use_physical_layout = (layout_mode == physical_layout);
292
0
    TextOutputDev td(&appendToGooString, out.get(), use_physical_layout, 0, use_raw_order, false);
293
0
    if (r.is_empty()) {
294
0
        d->doc->doc->displayPage(&td, d->index + 1, 72, 72, 0, false, true, false);
295
0
    } else {
296
0
        d->doc->doc->displayPageSlice(&td, d->index + 1, 72, 72, 0, false, true, false, r.left(), r.top(), r.width(), r.height());
297
0
    }
298
0
    return ustring::from_utf8(out->c_str());
299
0
}
300
301
/*
302
 * text_box_font_info object for text_box
303
 */
304
1.46M
text_box_font_info_data::~text_box_font_info_data() = default;
305
306
/*
307
 * text_box object for page::text_list()
308
 */
309
1.46M
text_box_data::~text_box_data() = default;
310
311
2.92M
text_box::~text_box() = default;
312
313
0
text_box &text_box::operator=(text_box &&a) noexcept = default;
314
1.46M
text_box::text_box(text_box &&a) noexcept = default;
315
316
1.46M
text_box::text_box(text_box_data *data) : m_data { data } { }
317
318
ustring text_box::text() const
319
0
{
320
0
    return m_data->text;
321
0
}
322
323
rectf text_box::bbox() const
324
0
{
325
0
    return m_data->bbox;
326
0
}
327
328
int text_box::rotation() const
329
0
{
330
0
    return m_data->rotation;
331
0
}
332
333
rectf text_box::char_bbox(size_t i) const
334
0
{
335
0
    if (i < m_data->char_bboxes.size()) {
336
0
        return m_data->char_bboxes[i];
337
0
    }
338
0
    return rectf(0, 0, 0, 0);
339
0
}
340
341
bool text_box::has_space_after() const
342
0
{
343
0
    return m_data->has_space_after;
344
0
}
345
346
bool text_box::has_font_info() const
347
0
{
348
0
    return (m_data->text_box_font != nullptr);
349
0
}
350
351
text_box::writing_mode_enum text_box::get_wmode(int i) const
352
0
{
353
0
    if (this->has_font_info()) {
354
0
        return m_data->text_box_font->wmodes[i];
355
0
    } else {
356
0
        return text_box::invalid_wmode;
357
0
    }
358
0
}
359
360
double text_box::get_font_size() const
361
0
{
362
0
    if (this->has_font_info()) {
363
0
        return m_data->text_box_font->font_size;
364
0
    } else {
365
0
        return -1;
366
0
    }
367
0
}
368
369
std::string text_box::get_font_name(int i) const
370
0
{
371
0
    if (!this->has_font_info()) {
372
0
        return std::string("*ignored*");
373
0
    }
374
375
0
    int j = m_data->text_box_font->glyph_to_cache_index[i];
376
0
    if (j < 0) {
377
0
        return std::string("");
378
0
    }
379
0
    return m_data->text_box_font->font_info_cache[j].name();
380
0
}
381
382
std::vector<text_box> page::text_list(int opt_flag) const
383
18.3k
{
384
18.3k
    std::vector<text_box> output_list;
385
386
    /* config values are same with Qt5 Page::TextList() */
387
18.3k
    auto output_dev = std::make_unique<TextOutputDev>(nullptr, /* char* fileName */
388
18.3k
                                                      false, /* bool physLayoutA */
389
18.3k
                                                      0, /* double fixedPitchA */
390
18.3k
                                                      false, /* bool rawOrderA */
391
18.3k
                                                      false /* bool append */
392
18.3k
    );
393
394
    /*
395
     * config values are same with Qt5 Page::TextList(),
396
     * but rotation is fixed to zero.
397
     * Few people use non-zero values.
398
     */
399
18.3k
    d->doc->doc->displayPageSlice(output_dev.get(), d->index + 1, /* page */
400
18.3k
                                  72, 72, 0, /* hDPI, vDPI, rot */
401
18.3k
                                  false, false, false, /* useMediaBox, crop, printing */
402
18.3k
                                  -1, -1, -1, -1, /* sliceX, sliceY, sliceW, sliceH */
403
18.3k
                                  nullptr, nullptr, /* abortCheckCbk(), abortCheckCbkData */
404
18.3k
                                  nullptr, nullptr, /* annotDisplayDecideCbk(), annotDisplayDecideCbkData */
405
18.3k
                                  true); /* copyXRef */
406
407
18.3k
    if (std::unique_ptr<TextWordList> word_list { output_dev->makeWordList() }) {
408
409
18.3k
        output_list.reserve(word_list->getLength());
410
1.47M
        for (int i = 0; i < word_list->getLength(); i++) {
411
1.46M
            TextWord *word = word_list->get(i);
412
413
1.46M
            std::unique_ptr<GooString> gooWord { word->getText() };
414
1.46M
            ustring ustr = ustring::from_utf8(gooWord->c_str());
415
416
1.46M
            double xMin, yMin, xMax, yMax;
417
1.46M
            word->getBBox(&xMin, &yMin, &xMax, &yMax);
418
419
1.46M
            text_box tb { new text_box_data { ustr, { xMin, yMin, xMax - xMin, yMax - yMin }, word->getRotation(), {}, word->hasSpaceAfter() == true, nullptr } };
420
421
1.46M
            std::unique_ptr<text_box_font_info_data> tb_font_info = nullptr;
422
1.46M
            if (opt_flag & page::text_list_include_font) {
423
1.46M
                d->init_font_info_cache();
424
425
1.46M
                std::unique_ptr<text_box_font_info_data> tb_font { new text_box_font_info_data {
426
1.46M
                        word->getFontSize(), // double font_size
427
1.46M
                        {}, // std::vector<text_box::writing_mode> wmodes;
428
1.46M
                        d->font_info_cache, // std::vector<font_info> font_info_cache;
429
1.46M
                        {} // std::vector<int> glyph_to_cache_index;
430
1.46M
                } };
431
432
1.46M
                tb_font_info = std::move(tb_font);
433
1.46M
            };
434
435
1.46M
            tb.m_data->char_bboxes.reserve(word->getLength());
436
8.40M
            for (int j = 0; j < word->getLength(); j++) {
437
6.94M
                word->getCharBBox(j, &xMin, &yMin, &xMax, &yMax);
438
6.94M
                tb.m_data->char_bboxes.emplace_back(xMin, yMin, xMax - xMin, yMax - yMin);
439
6.94M
            }
440
441
1.46M
            if (tb_font_info && d->font_info_cache_initialized) {
442
1.46M
                tb_font_info->glyph_to_cache_index.reserve(word->getLength());
443
8.40M
                for (int j = 0; j < word->getLength(); j++) {
444
6.94M
                    const TextFontInfo *cur_text_font_info = word->getFontInfo(j);
445
446
                    // filter-out the invalid WMode value here.
447
6.94M
                    switch (cur_text_font_info->getWMode()) {
448
6.93M
                    case 0:
449
6.93M
                        tb_font_info->wmodes.push_back(text_box::horizontal_wmode);
450
6.93M
                        break;
451
7.69k
                    case 1:
452
7.69k
                        tb_font_info->wmodes.push_back(text_box::vertical_wmode);
453
7.69k
                        break;
454
36
                    default:
455
36
                        tb_font_info->wmodes.push_back(text_box::invalid_wmode);
456
6.94M
                    };
457
458
6.94M
                    tb_font_info->glyph_to_cache_index.push_back(-1);
459
18.3M
                    for (size_t k = 0; k < tb_font_info->font_info_cache.size(); k++) {
460
18.3M
                        if (cur_text_font_info->matches(&(tb_font_info->font_info_cache[k].d->ref))) {
461
6.94M
                            tb_font_info->glyph_to_cache_index[j] = k;
462
6.94M
                            break;
463
6.94M
                        }
464
18.3M
                    }
465
6.94M
                }
466
1.46M
                tb.m_data->text_box_font = std::move(tb_font_info);
467
1.46M
            }
468
469
1.46M
            output_list.push_back(std::move(tb));
470
1.46M
        }
471
18.3k
    }
472
473
18.3k
    return output_list;
474
18.3k
}
475
476
std::vector<text_box> page::text_list() const
477
0
{
478
0
    return text_list(0);
479
0
}