/src/poppler/cpp/poppler-page.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2009-2010, Pino Toscano <pino@kde.org> |
3 | | * Copyright (C) 2017-2020, Albert Astals Cid <aacid@kde.org> |
4 | | * Copyright (C) 2017, Jason Alan Palmer <jalanpalmer@gmail.com> |
5 | | * Copyright (C) 2018, 2020, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> |
6 | | * Copyright (C) 2018, 2020, Adam Reichold <adam.reichold@t-online.de> |
7 | | * Copyright (C) 2018, Zsombor Hollay-Horvath <hollay.horvath@gmail.com> |
8 | | * Copyright (C) 2018, Aleksey Nikolaev <nae202@gmail.com> |
9 | | * Copyright (C) 2020, Jiri Jakes <freedesktop@jirijakes.eu> |
10 | | * |
11 | | * This program is free software; you can redistribute it and/or modify |
12 | | * it under the terms of the GNU General Public License as published by |
13 | | * the Free Software Foundation; either version 2, or (at your option) |
14 | | * any later version. |
15 | | * |
16 | | * This program is distributed in the hope that it will be useful, |
17 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | | * GNU General Public License for more details. |
20 | | * |
21 | | * You should have received a copy of the GNU General Public License |
22 | | * along with this program; if not, write to the Free Software |
23 | | * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. |
24 | | */ |
25 | | |
26 | | /** |
27 | | \file poppler-page.h |
28 | | */ |
29 | | #include "poppler-page.h" |
30 | | #include "poppler-page-transition.h" |
31 | | |
32 | | #include "poppler-document-private.h" |
33 | | #include "poppler-page-private.h" |
34 | | #include "poppler-private.h" |
35 | | #include "poppler-font-private.h" |
36 | | #include "poppler-font.h" |
37 | | |
38 | | #include "TextOutputDev.h" |
39 | | |
40 | | #include <algorithm> |
41 | | #include <memory> |
42 | | #include <utility> |
43 | | |
44 | | using namespace poppler; |
45 | | |
46 | 211k | page_private::page_private(document_private *_doc, int _index) : doc(_doc), page(doc->doc->getCatalog()->getPage(_index + 1)), index(_index), transition(nullptr), font_info_cache_initialized(false) { } |
47 | | |
48 | | page_private::~page_private() |
49 | 211k | { |
50 | 211k | delete transition; |
51 | 211k | } |
52 | | |
53 | | void page_private::init_font_info_cache() |
54 | 1.46M | { |
55 | 1.46M | if (font_info_cache_initialized) { |
56 | 1.45M | return; |
57 | 1.45M | } |
58 | | |
59 | 8.96k | poppler::font_iterator it(index, doc); |
60 | | |
61 | 8.96k | if (it.has_next()) { |
62 | 8.96k | font_info_cache = it.next(); |
63 | 8.96k | } |
64 | | |
65 | 8.96k | font_info_cache_initialized = true; |
66 | 8.96k | } |
67 | | |
68 | | /** |
69 | | \class poppler::page poppler-page.h "poppler/cpp/poppler-page.h" |
70 | | |
71 | | A page in a PDF %document. |
72 | | */ |
73 | | |
74 | | /** |
75 | | \enum poppler::page::orientation_enum |
76 | | |
77 | | The possible orientation of a page. |
78 | | */ |
79 | | |
80 | | /** |
81 | | \enum poppler::page::search_direction_enum |
82 | | |
83 | | The direction/action to follow when performing a text search. |
84 | | */ |
85 | | |
86 | | /** |
87 | | \enum poppler::page::text_layout_enum |
88 | | |
89 | | A layout of the text of a page. |
90 | | */ |
91 | | |
92 | 211k | page::page(document_private *doc, int index) : d(new page_private(doc, index)) { } |
93 | | |
94 | | /** |
95 | | Destructor. |
96 | | */ |
97 | | page::~page() |
98 | 211k | { |
99 | 211k | delete d; |
100 | 211k | } |
101 | | |
102 | | /** |
103 | | \returns the orientation of the page |
104 | | */ |
105 | | page::orientation_enum page::orientation() const |
106 | 0 | { |
107 | 0 | const int rotation = d->page->getRotate(); |
108 | 0 | switch (rotation) { |
109 | 0 | case 90: |
110 | 0 | return landscape; |
111 | 0 | break; |
112 | 0 | case 180: |
113 | 0 | return upside_down; |
114 | 0 | break; |
115 | 0 | case 270: |
116 | 0 | return seascape; |
117 | 0 | break; |
118 | 0 | default: |
119 | 0 | return portrait; |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | | /** |
124 | | The eventual duration the page can be hinted to be shown in a presentation. |
125 | | |
126 | | If this value is positive (usually different than -1) then a PDF viewer, when |
127 | | showing the page in a presentation, should show the page for at most for this |
128 | | number of seconds, and then switch to the next page (if any). Note this is |
129 | | purely a presentation attribute, it has no influence on the behaviour. |
130 | | |
131 | | \returns the duration time (in seconds) of the page |
132 | | */ |
133 | | double page::duration() const |
134 | 0 | { |
135 | 0 | return d->page->getDuration(); |
136 | 0 | } |
137 | | |
138 | | /** |
139 | | Returns the size of one rect of the page. |
140 | | |
141 | | \returns the size of the specified page rect |
142 | | */ |
143 | | rectf page::page_rect(page_box_enum box) const |
144 | 13.8k | { |
145 | 13.8k | const PDFRectangle *r = nullptr; |
146 | 13.8k | switch (box) { |
147 | 0 | case media_box: |
148 | 0 | r = d->page->getMediaBox(); |
149 | 0 | break; |
150 | 13.8k | case crop_box: |
151 | 13.8k | r = d->page->getCropBox(); |
152 | 13.8k | break; |
153 | 0 | case bleed_box: |
154 | 0 | r = d->page->getBleedBox(); |
155 | 0 | break; |
156 | 0 | case trim_box: |
157 | 0 | r = d->page->getTrimBox(); |
158 | 0 | break; |
159 | 0 | case art_box: |
160 | 0 | r = d->page->getArtBox(); |
161 | 0 | break; |
162 | 13.8k | } |
163 | 13.8k | if (r) { |
164 | 13.8k | return detail::pdfrectangle_to_rectf(*r); |
165 | 13.8k | } |
166 | 0 | return rectf(); |
167 | 13.8k | } |
168 | | |
169 | | /** |
170 | | \returns the label of the page, if any |
171 | | */ |
172 | | ustring page::label() const |
173 | 35.0k | { |
174 | 35.0k | GooString goo; |
175 | 35.0k | if (!d->doc->doc->getCatalog()->indexToLabel(d->index, &goo)) { |
176 | 684 | return ustring(); |
177 | 684 | } |
178 | | |
179 | 34.3k | return detail::unicode_GooString_to_ustring(&goo); |
180 | 35.0k | } |
181 | | |
182 | | /** |
183 | | The transition from this page to the next one. |
184 | | |
185 | | If it is set, then a PDF viewer in a presentation should perform the |
186 | | specified transition effect when switching from this page to the next one. |
187 | | |
188 | | \returns the transition effect for the switch to the next page, if any |
189 | | */ |
190 | | page_transition *page::transition() const |
191 | 0 | { |
192 | 0 | if (!d->transition) { |
193 | 0 | Object o = d->page->getTrans(); |
194 | 0 | if (o.isDict()) { |
195 | 0 | d->transition = new page_transition(&o); |
196 | 0 | } |
197 | 0 | } |
198 | 0 | return d->transition; |
199 | 0 | } |
200 | | |
201 | | /** |
202 | | Search the page for some text. |
203 | | |
204 | | \param text the text to search |
205 | | \param[in,out] r the area where to start search, which will be set to the area |
206 | | of the match (if any) |
207 | | \param direction in which direction search for text |
208 | | \param case_sensitivity whether search in a case sensitive way |
209 | | \param rotation the rotation assumed for the page |
210 | | */ |
211 | | bool page::search(const ustring &text, rectf &r, search_direction_enum direction, case_sensitivity_enum case_sensitivity, rotation_enum rotation) const |
212 | 13.8k | { |
213 | 13.8k | const size_t len = text.length(); |
214 | | |
215 | 13.8k | if (len == 0) { |
216 | 745 | return false; |
217 | 745 | } |
218 | | |
219 | 13.1k | std::vector<Unicode> u(len); |
220 | 225k | for (size_t i = 0; i < len; ++i) { |
221 | 212k | u[i] = text[i]; |
222 | 212k | } |
223 | | |
224 | 13.1k | const bool sCase = case_sensitivity == case_sensitive; |
225 | 13.1k | const int rotation_value = (int)rotation * 90; |
226 | | |
227 | 13.1k | bool found = false; |
228 | 13.1k | double rect_left = r.left(); |
229 | 13.1k | double rect_top = r.top(); |
230 | 13.1k | double rect_right = r.right(); |
231 | 13.1k | double rect_bottom = r.bottom(); |
232 | | |
233 | 13.1k | TextOutputDev td(nullptr, true, 0, false, false); |
234 | 13.1k | d->doc->doc->displayPage(&td, d->index + 1, 72, 72, rotation_value, false, true, false); |
235 | 13.1k | TextPage *text_page = td.takeText(); |
236 | | |
237 | 13.1k | switch (direction) { |
238 | 13.1k | case search_from_top: |
239 | 13.1k | found = text_page->findText(&u[0], len, true, true, false, false, sCase, false, false, &rect_left, &rect_top, &rect_right, &rect_bottom); |
240 | 13.1k | break; |
241 | 0 | case search_next_result: |
242 | 0 | found = text_page->findText(&u[0], len, false, true, true, false, sCase, false, false, &rect_left, &rect_top, &rect_right, &rect_bottom); |
243 | 0 | break; |
244 | 0 | case search_previous_result: |
245 | 0 | found = text_page->findText(&u[0], len, false, true, true, false, sCase, true, false, &rect_left, &rect_top, &rect_right, &rect_bottom); |
246 | 0 | break; |
247 | 13.1k | } |
248 | | |
249 | 13.1k | text_page->decRefCnt(); |
250 | 13.1k | r.set_left(rect_left); |
251 | 13.1k | r.set_top(rect_top); |
252 | 13.1k | r.set_right(rect_right); |
253 | 13.1k | r.set_bottom(rect_bottom); |
254 | | |
255 | 13.1k | return found; |
256 | 13.1k | } |
257 | | |
258 | | /** |
259 | | Returns the text in the page, in its physical layout. |
260 | | |
261 | | \param r if not empty, it will be extracted the text in it; otherwise, the |
262 | | text of the whole page |
263 | | |
264 | | \returns the text of the page in the specified rect or in the whole page |
265 | | */ |
266 | | ustring page::text(const rectf &r) const |
267 | 0 | { |
268 | 0 | return text(r, physical_layout); |
269 | 0 | } |
270 | | |
271 | | static void appendToGooString(void *stream, const char *text, int len) |
272 | 0 | { |
273 | 0 | ((GooString *)stream)->append(text, len); |
274 | 0 | } |
275 | | |
276 | | /** |
277 | | Returns the text in the page. |
278 | | |
279 | | \param rect if not empty, it will be extracted the text in it; otherwise, the |
280 | | text of the whole page |
281 | | \param layout_mode the layout of the text |
282 | | |
283 | | \returns the text of the page in the specified rect or in the whole page |
284 | | |
285 | | \since 0.16 |
286 | | */ |
287 | | ustring page::text(const rectf &r, text_layout_enum layout_mode) const |
288 | 0 | { |
289 | 0 | std::unique_ptr<GooString> out(new GooString()); |
290 | 0 | const bool use_raw_order = (layout_mode == raw_order_layout); |
291 | 0 | const bool use_physical_layout = (layout_mode == physical_layout); |
292 | 0 | TextOutputDev td(&appendToGooString, out.get(), use_physical_layout, 0, use_raw_order, false); |
293 | 0 | if (r.is_empty()) { |
294 | 0 | d->doc->doc->displayPage(&td, d->index + 1, 72, 72, 0, false, true, false); |
295 | 0 | } else { |
296 | 0 | d->doc->doc->displayPageSlice(&td, d->index + 1, 72, 72, 0, false, true, false, r.left(), r.top(), r.width(), r.height()); |
297 | 0 | } |
298 | 0 | return ustring::from_utf8(out->c_str()); |
299 | 0 | } |
300 | | |
301 | | /* |
302 | | * text_box_font_info object for text_box |
303 | | */ |
304 | 1.46M | text_box_font_info_data::~text_box_font_info_data() = default; |
305 | | |
306 | | /* |
307 | | * text_box object for page::text_list() |
308 | | */ |
309 | 1.46M | text_box_data::~text_box_data() = default; |
310 | | |
311 | 2.92M | text_box::~text_box() = default; |
312 | | |
313 | 0 | text_box &text_box::operator=(text_box &&a) noexcept = default; |
314 | 1.46M | text_box::text_box(text_box &&a) noexcept = default; |
315 | | |
316 | 1.46M | text_box::text_box(text_box_data *data) : m_data { data } { } |
317 | | |
318 | | ustring text_box::text() const |
319 | 0 | { |
320 | 0 | return m_data->text; |
321 | 0 | } |
322 | | |
323 | | rectf text_box::bbox() const |
324 | 0 | { |
325 | 0 | return m_data->bbox; |
326 | 0 | } |
327 | | |
328 | | int text_box::rotation() const |
329 | 0 | { |
330 | 0 | return m_data->rotation; |
331 | 0 | } |
332 | | |
333 | | rectf text_box::char_bbox(size_t i) const |
334 | 0 | { |
335 | 0 | if (i < m_data->char_bboxes.size()) { |
336 | 0 | return m_data->char_bboxes[i]; |
337 | 0 | } |
338 | 0 | return rectf(0, 0, 0, 0); |
339 | 0 | } |
340 | | |
341 | | bool text_box::has_space_after() const |
342 | 0 | { |
343 | 0 | return m_data->has_space_after; |
344 | 0 | } |
345 | | |
346 | | bool text_box::has_font_info() const |
347 | 0 | { |
348 | 0 | return (m_data->text_box_font != nullptr); |
349 | 0 | } |
350 | | |
351 | | text_box::writing_mode_enum text_box::get_wmode(int i) const |
352 | 0 | { |
353 | 0 | if (this->has_font_info()) { |
354 | 0 | return m_data->text_box_font->wmodes[i]; |
355 | 0 | } else { |
356 | 0 | return text_box::invalid_wmode; |
357 | 0 | } |
358 | 0 | } |
359 | | |
360 | | double text_box::get_font_size() const |
361 | 0 | { |
362 | 0 | if (this->has_font_info()) { |
363 | 0 | return m_data->text_box_font->font_size; |
364 | 0 | } else { |
365 | 0 | return -1; |
366 | 0 | } |
367 | 0 | } |
368 | | |
369 | | std::string text_box::get_font_name(int i) const |
370 | 0 | { |
371 | 0 | if (!this->has_font_info()) { |
372 | 0 | return std::string("*ignored*"); |
373 | 0 | } |
374 | | |
375 | 0 | int j = m_data->text_box_font->glyph_to_cache_index[i]; |
376 | 0 | if (j < 0) { |
377 | 0 | return std::string(""); |
378 | 0 | } |
379 | 0 | return m_data->text_box_font->font_info_cache[j].name(); |
380 | 0 | } |
381 | | |
382 | | std::vector<text_box> page::text_list(int opt_flag) const |
383 | 18.3k | { |
384 | 18.3k | std::vector<text_box> output_list; |
385 | | |
386 | | /* config values are same with Qt5 Page::TextList() */ |
387 | 18.3k | auto output_dev = std::make_unique<TextOutputDev>(nullptr, /* char* fileName */ |
388 | 18.3k | false, /* bool physLayoutA */ |
389 | 18.3k | 0, /* double fixedPitchA */ |
390 | 18.3k | false, /* bool rawOrderA */ |
391 | 18.3k | false /* bool append */ |
392 | 18.3k | ); |
393 | | |
394 | | /* |
395 | | * config values are same with Qt5 Page::TextList(), |
396 | | * but rotation is fixed to zero. |
397 | | * Few people use non-zero values. |
398 | | */ |
399 | 18.3k | d->doc->doc->displayPageSlice(output_dev.get(), d->index + 1, /* page */ |
400 | 18.3k | 72, 72, 0, /* hDPI, vDPI, rot */ |
401 | 18.3k | false, false, false, /* useMediaBox, crop, printing */ |
402 | 18.3k | -1, -1, -1, -1, /* sliceX, sliceY, sliceW, sliceH */ |
403 | 18.3k | nullptr, nullptr, /* abortCheckCbk(), abortCheckCbkData */ |
404 | 18.3k | nullptr, nullptr, /* annotDisplayDecideCbk(), annotDisplayDecideCbkData */ |
405 | 18.3k | true); /* copyXRef */ |
406 | | |
407 | 18.3k | if (std::unique_ptr<TextWordList> word_list { output_dev->makeWordList() }) { |
408 | | |
409 | 18.3k | output_list.reserve(word_list->getLength()); |
410 | 1.47M | for (int i = 0; i < word_list->getLength(); i++) { |
411 | 1.46M | TextWord *word = word_list->get(i); |
412 | | |
413 | 1.46M | std::unique_ptr<GooString> gooWord { word->getText() }; |
414 | 1.46M | ustring ustr = ustring::from_utf8(gooWord->c_str()); |
415 | | |
416 | 1.46M | double xMin, yMin, xMax, yMax; |
417 | 1.46M | word->getBBox(&xMin, &yMin, &xMax, &yMax); |
418 | | |
419 | 1.46M | text_box tb { new text_box_data { ustr, { xMin, yMin, xMax - xMin, yMax - yMin }, word->getRotation(), {}, word->hasSpaceAfter() == true, nullptr } }; |
420 | | |
421 | 1.46M | std::unique_ptr<text_box_font_info_data> tb_font_info = nullptr; |
422 | 1.46M | if (opt_flag & page::text_list_include_font) { |
423 | 1.46M | d->init_font_info_cache(); |
424 | | |
425 | 1.46M | std::unique_ptr<text_box_font_info_data> tb_font { new text_box_font_info_data { |
426 | 1.46M | word->getFontSize(), // double font_size |
427 | 1.46M | {}, // std::vector<text_box::writing_mode> wmodes; |
428 | 1.46M | d->font_info_cache, // std::vector<font_info> font_info_cache; |
429 | 1.46M | {} // std::vector<int> glyph_to_cache_index; |
430 | 1.46M | } }; |
431 | | |
432 | 1.46M | tb_font_info = std::move(tb_font); |
433 | 1.46M | }; |
434 | | |
435 | 1.46M | tb.m_data->char_bboxes.reserve(word->getLength()); |
436 | 8.40M | for (int j = 0; j < word->getLength(); j++) { |
437 | 6.94M | word->getCharBBox(j, &xMin, &yMin, &xMax, &yMax); |
438 | 6.94M | tb.m_data->char_bboxes.emplace_back(xMin, yMin, xMax - xMin, yMax - yMin); |
439 | 6.94M | } |
440 | | |
441 | 1.46M | if (tb_font_info && d->font_info_cache_initialized) { |
442 | 1.46M | tb_font_info->glyph_to_cache_index.reserve(word->getLength()); |
443 | 8.40M | for (int j = 0; j < word->getLength(); j++) { |
444 | 6.94M | const TextFontInfo *cur_text_font_info = word->getFontInfo(j); |
445 | | |
446 | | // filter-out the invalid WMode value here. |
447 | 6.94M | switch (cur_text_font_info->getWMode()) { |
448 | 6.93M | case 0: |
449 | 6.93M | tb_font_info->wmodes.push_back(text_box::horizontal_wmode); |
450 | 6.93M | break; |
451 | 7.69k | case 1: |
452 | 7.69k | tb_font_info->wmodes.push_back(text_box::vertical_wmode); |
453 | 7.69k | break; |
454 | 36 | default: |
455 | 36 | tb_font_info->wmodes.push_back(text_box::invalid_wmode); |
456 | 6.94M | }; |
457 | | |
458 | 6.94M | tb_font_info->glyph_to_cache_index.push_back(-1); |
459 | 18.3M | for (size_t k = 0; k < tb_font_info->font_info_cache.size(); k++) { |
460 | 18.3M | if (cur_text_font_info->matches(&(tb_font_info->font_info_cache[k].d->ref))) { |
461 | 6.94M | tb_font_info->glyph_to_cache_index[j] = k; |
462 | 6.94M | break; |
463 | 6.94M | } |
464 | 18.3M | } |
465 | 6.94M | } |
466 | 1.46M | tb.m_data->text_box_font = std::move(tb_font_info); |
467 | 1.46M | } |
468 | | |
469 | 1.46M | output_list.push_back(std::move(tb)); |
470 | 1.46M | } |
471 | 18.3k | } |
472 | | |
473 | 18.3k | return output_list; |
474 | 18.3k | } |
475 | | |
476 | | std::vector<text_box> page::text_list() const |
477 | 0 | { |
478 | 0 | return text_list(0); |
479 | 0 | } |