/src/qpdf/libqpdf/ContentNormalizer.cc
Line | Count | Source |
1 | | #include <qpdf/ContentNormalizer.hh> |
2 | | |
3 | | #include <qpdf/QPDFObjectHandle_private.hh> |
4 | | #include <qpdf/QUtil.hh> |
5 | | |
6 | | using namespace qpdf; |
7 | | |
8 | | void |
9 | | ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) |
10 | 82.5M | { |
11 | 82.5M | QPDFTokenizer::token_type_e token_type = token.getType(); |
12 | | |
13 | 82.5M | if (token_type == QPDFTokenizer::tt_bad) { |
14 | 3.24M | this->any_bad_tokens = true; |
15 | 3.24M | this->last_token_was_bad = true; |
16 | 79.3M | } else if (token_type != QPDFTokenizer::tt_eof) { |
17 | 79.3M | this->last_token_was_bad = false; |
18 | 79.3M | } |
19 | | |
20 | 82.5M | switch (token_type) { |
21 | 35.0M | case QPDFTokenizer::tt_space: |
22 | 35.0M | { |
23 | 35.0M | std::string const& value = token.getRawValue(); |
24 | 35.0M | auto size = value.size(); |
25 | 35.0M | size_t pos = 0; |
26 | 35.0M | auto r_pos = value.find('\r'); |
27 | 39.4M | while (r_pos != std::string::npos) { |
28 | 4.86M | if (pos != r_pos) { |
29 | 4.29M | write(&value[pos], r_pos - pos); |
30 | 4.29M | } |
31 | 4.86M | if (++r_pos >= size) { |
32 | 470k | write("\n"); |
33 | 470k | return; |
34 | 470k | } |
35 | 4.39M | if (value[r_pos] != '\n') { |
36 | 4.38M | write("\n"); |
37 | 4.38M | } |
38 | 4.39M | pos = r_pos; |
39 | 4.39M | r_pos = value.find('\r', pos); |
40 | 4.39M | } |
41 | 34.6M | if (pos < size) { |
42 | 34.6M | write(&value[pos], size - pos); |
43 | 34.6M | } |
44 | 34.6M | } |
45 | 0 | return; |
46 | | |
47 | 301k | case QPDFTokenizer::tt_string: |
48 | | // Replacing string and name tokens in this way normalizes their representation as this will |
49 | | // automatically handle quoting of unprintable characters, etc. |
50 | 301k | write(QPDFObjectHandle::newString(token.getValue()).unparse()); |
51 | 301k | break; |
52 | | |
53 | 1.42M | case QPDFTokenizer::tt_name: |
54 | 1.42M | write(Name::normalize(token.getValue())); |
55 | 1.42M | break; |
56 | | |
57 | 45.7M | default: |
58 | 45.7M | writeToken(token); |
59 | 45.7M | return; |
60 | 82.5M | } |
61 | | |
62 | | // tt_string or tt_name |
63 | 1.72M | std::string const& value = token.getRawValue(); |
64 | 1.72M | if (value.find('\r') != std::string::npos || value.find('\n') != std::string::npos) { |
65 | 44.9k | write("\n"); |
66 | 44.9k | } |
67 | 1.72M | } |