/src/qpdf/libqpdf/ContentNormalizer.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/ContentNormalizer.hh> |
2 | | |
3 | | #include <qpdf/QPDF_Name.hh> |
4 | | #include <qpdf/QUtil.hh> |
5 | | |
6 | | ContentNormalizer::ContentNormalizer() : |
7 | | any_bad_tokens(false), |
8 | | last_token_was_bad(false) |
9 | 0 | { |
10 | 0 | } |
11 | | |
12 | | void |
13 | | ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) |
14 | 0 | { |
15 | 0 | QPDFTokenizer::token_type_e token_type = token.getType(); |
16 | |
|
17 | 0 | if (token_type == QPDFTokenizer::tt_bad) { |
18 | 0 | this->any_bad_tokens = true; |
19 | 0 | this->last_token_was_bad = true; |
20 | 0 | } else if (token_type != QPDFTokenizer::tt_eof) { |
21 | 0 | this->last_token_was_bad = false; |
22 | 0 | } |
23 | |
|
24 | 0 | switch (token_type) { |
25 | 0 | case QPDFTokenizer::tt_space: |
26 | 0 | { |
27 | 0 | std::string const& value = token.getRawValue(); |
28 | 0 | auto size = value.size(); |
29 | 0 | size_t pos = 0; |
30 | 0 | auto r_pos = value.find('\r'); |
31 | 0 | while (r_pos != std::string::npos) { |
32 | 0 | if (pos != r_pos) { |
33 | 0 | write(&value[pos], r_pos - pos); |
34 | 0 | } |
35 | 0 | if (++r_pos >= size) { |
36 | 0 | write("\n"); |
37 | 0 | return; |
38 | 0 | } |
39 | 0 | if (value[r_pos] != '\n') { |
40 | 0 | write("\n"); |
41 | 0 | } |
42 | 0 | pos = r_pos; |
43 | 0 | r_pos = value.find('\r', pos); |
44 | 0 | } |
45 | 0 | if (pos < size) { |
46 | 0 | write(&value[pos], size - pos); |
47 | 0 | } |
48 | 0 | } |
49 | 0 | return; |
50 | | |
51 | 0 | case QPDFTokenizer::tt_string: |
52 | | // Replacing string and name tokens in this way normalizes their representation as this will |
53 | | // automatically handle quoting of unprintable characters, etc. |
54 | 0 | write(QPDFObjectHandle::newString(token.getValue()).unparse()); |
55 | 0 | break; |
56 | | |
57 | 0 | case QPDFTokenizer::tt_name: |
58 | 0 | write(QPDF_Name::normalizeName(token.getValue())); |
59 | 0 | break; |
60 | | |
61 | 0 | default: |
62 | 0 | writeToken(token); |
63 | 0 | return; |
64 | 0 | } |
65 | | |
66 | | // tt_string or tt_name |
67 | 0 | std::string const& value = token.getRawValue(); |
68 | 0 | if (value.find('\r') != std::string::npos || value.find('\n') != std::string::npos) { |
69 | 0 | write("\n"); |
70 | 0 | } |
71 | 0 | } |
72 | | |
73 | | bool |
74 | | ContentNormalizer::anyBadTokens() const |
75 | 0 | { |
76 | 0 | return this->any_bad_tokens; |
77 | 0 | } |
78 | | |
79 | | bool |
80 | | ContentNormalizer::lastTokenWasBad() const |
81 | 0 | { |
82 | 0 | return this->last_token_was_bad; |
83 | 0 | } |