Coverage Report

Created: 2026-06-15 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/logging-log4cxx/src/main/cpp/transform.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
#include <log4cxx/logstring.h>
19
#include <log4cxx/helpers/transform.h>
20
#include <log4cxx/helpers/transcoder.h>
21
#include <log4cxx/helpers/widelife.h>
22
#include <functional>
23
24
using namespace LOG4CXX_NS;
25
using namespace LOG4CXX_NS::helpers;
26
27
namespace
28
{
29
using CharProcessor = std::function<bool(LogString&, int)>;
30
31
// Allowable XML 1.0 characters are:
32
// #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
33
void appendValidCharacters(LogString& buf, const LogString& input, CharProcessor handler = {}, bool handleValidCharacters = false)
34
0
{
35
0
  static const unsigned int specials[] =
36
0
    { 0x22 /* " */
37
0
    , 0x26 /* & */
38
0
    , 0x3C /* < */
39
0
    , 0x3E /* > */
40
0
    , 0x00
41
0
    };
42
0
  auto start = input.begin();
43
0
  for (auto nextCodePoint = start; input.end() != nextCodePoint; )
44
0
  {
45
0
    auto lastCodePoint = nextCodePoint;
46
0
    auto ch = Transcoder::decode(input, nextCodePoint);
47
0
    if (nextCodePoint == lastCodePoint) // failed to decode input?
48
0
    {
49
      // Skip the undecodable run and keep escaping the remaining input
50
      // instead of discarding it; the run collapses to one replacement.
51
0
      for (++nextCodePoint; nextCodePoint != input.end(); ++nextCodePoint)
52
0
      {
53
0
        auto probe = nextCodePoint;
54
0
        Transcoder::decode(input, probe);
55
0
        if (probe != nextCodePoint) // next unit starts a decodable sequence
56
0
          break;
57
0
      }
58
0
    }
59
0
    else if (0xD800 <= ch && ch <= 0xDFFF)
60
0
    {
61
      // RFC 3629 ยง3 explicitly forbids surrogate-half values in UTF-8
62
0
      ch = 0xFFFF;
63
0
    }
64
0
    else if (((0x20 <= ch && ch <= 0xD7FF) &&
65
0
        specials[0] != ch &&
66
0
        specials[1] != ch &&
67
0
        specials[2] != ch &&
68
0
        specials[3] != ch) ||
69
0
      (0x9 == ch || 0xA == ch || 0xD == ch) ||
70
0
      (0xE000 <= ch && ch <= 0xFFFD) ||
71
0
      (0x10000 <= ch && ch <= 0x10FFFF))
72
0
    {
73
0
      LogString escaped;
74
0
      if (handleValidCharacters && handler && handler(escaped, ch))
75
0
      {
76
0
        if (start != lastCodePoint)
77
0
          buf.append(start, lastCodePoint);
78
0
        buf.append(escaped);
79
0
        start = nextCodePoint;
80
0
      }
81
0
      continue;
82
0
    }
83
84
0
    if (start != lastCodePoint)
85
0
      buf.append(start, lastCodePoint);
86
0
    start = nextCodePoint;
87
0
    switch (ch)
88
0
    {
89
0
      case 0: // Do not output a NUL character
90
0
        break;
91
0
      case 0x22:
92
0
        buf.append(LOG4CXX_STR("&quot;"));
93
0
        break;
94
95
0
      case 0x26:
96
0
        buf.append(LOG4CXX_STR("&amp;"));
97
0
        break;
98
99
0
      case 0x3C:
100
0
        buf.append(LOG4CXX_STR("&lt;"));
101
0
        break;
102
103
0
      case 0x3E:
104
0
        buf.append(LOG4CXX_STR("&gt;"));
105
0
        break;
106
107
0
      case 0xFFFF: // invalid sequence
108
0
        Transform::appendCharacterReference(buf, 0xFFFD); // The Unicode replacement character
109
0
        break;
110
111
0
      default:
112
0
        if (handler && !handler(buf, ch))
113
0
          Transform::appendCharacterReference(buf, ch);
114
0
        break;
115
0
    }
116
0
  }
117
0
  buf.append(start, input.end());
118
0
}
119
120
bool appendCharacterReferenceHandler(LogString& buf, int ch)
121
0
{
122
0
  Transform::appendCharacterReference(buf, ch);
123
0
  return true;
124
0
}
125
126
bool appendAttributeCharacterReference(LogString& buf, int ch)
127
0
{
128
0
  if (0x27 == ch || 0x9 == ch || 0xA == ch || 0xD == ch)
129
0
  {
130
0
    Transform::appendCharacterReference(buf, ch);
131
0
    return true;
132
0
  }
133
134
0
  return false;
135
0
}
136
137
} // namespace
138
139
void Transform::appendEscapingCDATA(
140
  LogString& buf, const LogString& input)
141
0
{
142
0
  static const LogString CDATA_END(LOG4CXX_STR("]]>"));
143
0
  const LogString::size_type CDATA_END_LEN = 3;
144
0
  static const LogString CDATA_EMBEDED_END(LOG4CXX_STR("]]&gt;<![CDATA["));
145
0
  auto start = input.begin();
146
0
  for (auto nextCodePoint = start; input.end() != nextCodePoint; )
147
0
  {
148
0
    bool cdataEnd = false;
149
0
    auto lastCodePoint = nextCodePoint;
150
0
    auto ch = Transcoder::decode(input, nextCodePoint);
151
0
    if (nextCodePoint == lastCodePoint) // failed to decode input?
152
0
    {
153
      // Skip the undecodable run and keep escaping the remaining input
154
      // instead of discarding it; the run collapses to one replacement.
155
0
      for (++nextCodePoint; nextCodePoint != input.end(); ++nextCodePoint)
156
0
      {
157
0
        auto probe = nextCodePoint;
158
0
        Transcoder::decode(input, probe);
159
0
        if (probe != nextCodePoint) // next unit starts a decodable sequence
160
0
          break;
161
0
      }
162
0
      ch = 0xFFFD; // The Unicode replacement character
163
0
    }
164
0
    else if (CDATA_END[0] == ch && input.end() != nextCodePoint)
165
0
    {
166
0
      lastCodePoint = nextCodePoint;
167
0
      if (CDATA_END[1] != Transcoder::decode(input, nextCodePoint) ||
168
0
        input.end() == nextCodePoint ||
169
0
        CDATA_END[2] != Transcoder::decode(input, nextCodePoint))
170
0
      {
171
0
        nextCodePoint = lastCodePoint;
172
0
        continue;
173
0
      }
174
0
      lastCodePoint = nextCodePoint;
175
0
      cdataEnd = true;
176
0
    }
177
0
    else if ((0x20 <= ch && ch <= 0xD7FF) ||
178
0
        (0x9 == ch || 0xA == ch || 0xD == ch) ||
179
0
        (0xE000 <= ch && ch <= 0xFFFD) ||
180
0
        (0x10000 <= ch && ch <= 0x10FFFF))
181
0
    {
182
0
      continue;
183
0
    }
184
185
0
    if (start != lastCodePoint)
186
0
      buf.append(start, lastCodePoint);
187
0
    if (cdataEnd)
188
0
      buf.append(CDATA_EMBEDED_END);
189
0
    else if (0 != ch)
190
0
      appendCharacterReference(buf, ch);
191
0
    start = nextCodePoint;
192
0
  }
193
0
  buf.append(start, input.end());
194
0
}
195
196
void Transform::appendCharacterReference(LogString& buf, unsigned int ch)
197
0
{
198
0
  auto toHexDigit = [](int ch) -> int
199
0
  {
200
0
    return (10 <= ch ? (0x61 - 10) : 0x30) + ch;
201
0
  };
202
0
  buf.push_back('&');
203
0
  buf.push_back('#');
204
0
  buf.push_back('x');
205
0
  if (0xFFFFFFF < ch)
206
0
    buf.push_back(toHexDigit((ch & 0xF0000000) >> 28));
207
0
  if (0xFFFFFF < ch)
208
0
    buf.push_back(toHexDigit((ch & 0xF000000) >> 24));
209
0
  if (0xFFFFF < ch)
210
0
    buf.push_back(toHexDigit((ch & 0xF00000) >> 20));
211
0
  if (0xFFFF < ch)
212
0
    buf.push_back(toHexDigit((ch & 0xF0000) >> 16));
213
0
  if (0xFFF < ch)
214
0
    buf.push_back(toHexDigit((ch & 0xF000) >> 12));
215
0
  if (0xFF < ch)
216
0
    buf.push_back(toHexDigit((ch & 0xF00) >> 8));
217
0
  if (0xF < ch)
218
0
    buf.push_back(toHexDigit((ch & 0xF0) >> 4));
219
0
  buf.push_back(toHexDigit(ch & 0xF));
220
0
  buf.push_back(';');
221
0
}
222
223
void Transform::appendEscapingTags(LogString& buf, const LogString& input)
224
0
{
225
0
  appendValidCharacters(buf, input, appendCharacterReferenceHandler);
226
0
}
227
228
void Transform::appendEscapingAttribute(LogString& buf, const LogString& input)
229
0
{
230
0
  appendValidCharacters(buf, input, appendAttributeCharacterReference, true);
231
0
}
232
233
void Transform::appendLegalCharacters(LogString& buf, const LogString& input)
234
0
{
235
0
  appendValidCharacters(buf, input);
236
0
}