/src/logging-log4cxx/src/main/cpp/transform.cpp
Line | Count | Source |
1 | | /* |
2 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
3 | | * contributor license agreements. See the NOTICE file distributed with |
4 | | * this work for additional information regarding copyright ownership. |
5 | | * The ASF licenses this file to You under the Apache License, Version 2.0 |
6 | | * (the "License"); you may not use this file except in compliance with |
7 | | * the License. You may obtain a copy of the License at |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | #include <log4cxx/logstring.h> |
19 | | #include <log4cxx/helpers/transform.h> |
20 | | #include <log4cxx/helpers/transcoder.h> |
21 | | #include <log4cxx/helpers/widelife.h> |
22 | | #include <functional> |
23 | | |
24 | | using namespace LOG4CXX_NS; |
25 | | using namespace LOG4CXX_NS::helpers; |
26 | | |
27 | | namespace |
28 | | { |
29 | | using CharProcessor = std::function<void(LogString&, int)>; |
30 | | |
31 | | // Allowable XML 1.0 characters are: |
32 | | // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] |
33 | | void appendValidCharacters(LogString& buf, const LogString& input, CharProcessor handler = {}) |
34 | 28.2k | { |
35 | 28.2k | static const unsigned int specials[] = |
36 | 28.2k | { 0x22 /* " */ |
37 | 28.2k | , 0x26 /* & */ |
38 | 28.2k | , 0x3C /* < */ |
39 | 28.2k | , 0x3E /* > */ |
40 | 28.2k | , 0x00 |
41 | 28.2k | }; |
42 | 28.2k | auto start = input.begin(); |
43 | 1.26M | for (auto nextCodePoint = start; input.end() != nextCodePoint; ) |
44 | 1.24M | { |
45 | 1.24M | auto lastCodePoint = nextCodePoint; |
46 | 1.24M | auto ch = Transcoder::decode(input, nextCodePoint); |
47 | 1.24M | if (nextCodePoint == lastCodePoint) // failed to decode input? |
48 | 1.23k | nextCodePoint = input.end(); |
49 | 1.23M | else if ((0x20 <= ch && ch <= 0xD7FF) && |
50 | 971k | specials[0] != ch && |
51 | 669k | specials[1] != ch && |
52 | 585k | specials[2] != ch && |
53 | 583k | specials[3] != ch) |
54 | 579k | { |
55 | 579k | continue; |
56 | 579k | } |
57 | 659k | else if ((0x9 == ch || 0xA == ch || 0xD == ch) || |
58 | 659k | (0xE000 <= ch && ch <= 0xFFFD) || |
59 | 659k | (0x10000 <= ch && ch <= 0x10FFFF)) |
60 | 262 | { |
61 | 262 | continue; |
62 | 262 | } |
63 | | |
64 | 660k | if (start != lastCodePoint) |
65 | 2.88k | buf.append(start, lastCodePoint); |
66 | 660k | start = nextCodePoint; |
67 | 660k | switch (ch) |
68 | 660k | { |
69 | 266k | case 0: // Do not output a NUL character |
70 | 266k | break; |
71 | 301k | case 0x22: |
72 | 301k | buf.append(LOG4CXX_STR(""")); |
73 | 301k | break; |
74 | | |
75 | 84.4k | case 0x26: |
76 | 84.4k | buf.append(LOG4CXX_STR("&")); |
77 | 84.4k | break; |
78 | | |
79 | 1.70k | case 0x3C: |
80 | 1.70k | buf.append(LOG4CXX_STR("<")); |
81 | 1.70k | break; |
82 | | |
83 | 3.78k | case 0x3E: |
84 | 3.78k | buf.append(LOG4CXX_STR(">")); |
85 | 3.78k | break; |
86 | | |
87 | 1.23k | case 0xFFFF: // invalid sequence |
88 | 1.23k | Transform::appendCharacterReference(buf, 0xFFFD); // The Unicode replacement character |
89 | 1.23k | break; |
90 | | |
91 | 765 | default: |
92 | 765 | if (handler) |
93 | 0 | handler(buf, ch); |
94 | 765 | break; |
95 | 660k | } |
96 | 660k | } |
97 | 28.2k | buf.append(start, input.end()); |
98 | 28.2k | } |
99 | | |
100 | | } // namespace |
101 | | |
102 | | void Transform::appendEscapingCDATA( |
103 | | LogString& buf, const LogString& input) |
104 | 6.51k | { |
105 | 6.51k | static const LogString CDATA_END(LOG4CXX_STR("]]>")); |
106 | 6.51k | const LogString::size_type CDATA_END_LEN = 3; |
107 | 6.51k | static const LogString CDATA_EMBEDED_END(LOG4CXX_STR("]]><![CDATA[")); |
108 | 6.51k | auto start = input.begin(); |
109 | 1.28M | for (auto nextCodePoint = start; input.end() != nextCodePoint; ) |
110 | 1.28M | { |
111 | 1.28M | bool cdataEnd = false; |
112 | 1.28M | auto lastCodePoint = nextCodePoint; |
113 | 1.28M | auto ch = Transcoder::decode(input, nextCodePoint); |
114 | 1.28M | if (nextCodePoint == lastCodePoint) // failed to decode input? |
115 | 912 | { |
116 | 912 | nextCodePoint = input.end(); |
117 | 912 | ch = 0xFFFD; // The Unicode replacement character |
118 | 912 | } |
119 | 1.27M | else if (CDATA_END[0] == ch && input.end() != nextCodePoint) |
120 | 30.2k | { |
121 | 30.2k | lastCodePoint = nextCodePoint; |
122 | 30.2k | if (CDATA_END[1] != Transcoder::decode(input, nextCodePoint) || |
123 | 29.4k | input.end() == nextCodePoint || |
124 | 29.4k | CDATA_END[2] != Transcoder::decode(input, nextCodePoint)) |
125 | 1.73k | { |
126 | 1.73k | nextCodePoint = lastCodePoint; |
127 | 1.73k | continue; |
128 | 1.73k | } |
129 | 28.5k | lastCodePoint = nextCodePoint; |
130 | 28.5k | cdataEnd = true; |
131 | 28.5k | } |
132 | 1.24M | else if ((0x20 <= ch && ch <= 0xD7FF) || |
133 | 269k | (0x9 == ch || 0xA == ch || 0xD == ch) || |
134 | 268k | (0xE000 <= ch && ch <= 0xFFFD) || |
135 | 268k | (0x10000 <= ch && ch <= 0x10FFFF)) |
136 | 981k | { |
137 | 981k | continue; |
138 | 981k | } |
139 | | |
140 | 297k | if (start != lastCodePoint) |
141 | 29.7k | buf.append(start, lastCodePoint); |
142 | 297k | if (cdataEnd) |
143 | 28.5k | buf.append(CDATA_EMBEDED_END); |
144 | 269k | else if (0 != ch) |
145 | 1.63k | appendCharacterReference(buf, ch); |
146 | 297k | start = nextCodePoint; |
147 | 297k | } |
148 | 6.51k | buf.append(start, input.end()); |
149 | 6.51k | } |
150 | | |
151 | | void Transform::appendCharacterReference(LogString& buf, unsigned int ch) |
152 | 2.86k | { |
153 | 2.86k | auto toHexDigit = [](int ch) -> int |
154 | 9.90k | { |
155 | 9.90k | return (10 <= ch ? (0x61 - 10) : 0x30) + ch; |
156 | 9.90k | }; |
157 | 2.86k | buf.push_back('&'); |
158 | 2.86k | buf.push_back('#'); |
159 | 2.86k | buf.push_back('x'); |
160 | 2.86k | if (0xFFFFFFF < ch) |
161 | 0 | buf.push_back(toHexDigit((ch & 0xF0000000) >> 28)); |
162 | 2.86k | if (0xFFFFFF < ch) |
163 | 0 | buf.push_back(toHexDigit((ch & 0xF000000) >> 24)); |
164 | 2.86k | if (0xFFFFF < ch) |
165 | 0 | buf.push_back(toHexDigit((ch & 0xF00000) >> 20)); |
166 | 2.86k | if (0xFFFF < ch) |
167 | 0 | buf.push_back(toHexDigit((ch & 0xF0000) >> 16)); |
168 | 2.86k | if (0xFFF < ch) |
169 | 2.15k | buf.push_back(toHexDigit((ch & 0xF000) >> 12)); |
170 | 2.86k | if (0xFF < ch) |
171 | 2.15k | buf.push_back(toHexDigit((ch & 0xF00) >> 8)); |
172 | 2.86k | if (0xF < ch) |
173 | 2.72k | buf.push_back(toHexDigit((ch & 0xF0) >> 4)); |
174 | 2.86k | buf.push_back(toHexDigit(ch & 0xF)); |
175 | 2.86k | buf.push_back(';'); |
176 | 2.86k | } |
177 | | |
178 | | void Transform::appendEscapingTags(LogString& buf, const LogString& input) |
179 | 0 | { |
180 | 0 | appendValidCharacters(buf, input, appendCharacterReference); |
181 | 0 | } |
182 | | |
183 | | void Transform::appendLegalCharacters(LogString& buf, const LogString& input) |
184 | 28.2k | { |
185 | 28.2k | appendValidCharacters(buf, input); |
186 | 28.2k | } |