Coverage Report

Created: 2026-06-15 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/logging-log4cxx/src/main/cpp/transcoder.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
#include <log4cxx/logstring.h>
19
#include <log4cxx/helpers/transcoder.h>
20
#include <log4cxx/helpers/pool.h>
21
#include <stdlib.h>
22
#include <log4cxx/helpers/exception.h>
23
#include <log4cxx/helpers/bytebuffer.h>
24
#include <log4cxx/helpers/charsetdecoder.h>
25
#include <log4cxx/helpers/charsetencoder.h>
26
#include <log4cxx/helpers/stringhelper.h>
27
#include <log4cxx/helpers/loglog.h>
28
#include <vector>
29
#include <cstring>
30
#if !defined(LOG4CXX)
31
  #define LOG4CXX 1
32
#endif
33
#include <log4cxx/private/log4cxx_private.h>
34
35
#if LOG4CXX_CFSTRING_API
36
  #include <CoreFoundation/CFString.h>
37
#endif
38
39
using namespace LOG4CXX_NS;
40
using namespace LOG4CXX_NS::helpers;
41
42
43
void Transcoder::decodeUTF8(const std::string& src, LogString& dst)
44
0
{
45
0
  std::string::const_iterator iter = src.begin();
46
47
0
  while (iter != src.end())
48
0
  {
49
0
    std::string::const_iterator start = iter;
50
0
    unsigned int sv = decode(src, iter);
51
52
0
    if (sv != 0xFFFF)
53
0
    {
54
0
      encode(sv, dst);
55
0
    }
56
0
    else
57
0
    {
58
0
      dst.append(1, LOSSCHAR);
59
60
      // decode() returns 0xFFFF both for a decode error (iter left at
61
      // start) and for a successfully decoded U+FFFF (iter already
62
      // advanced past EF BF BF).  Only advance here in the former case,
63
      // otherwise the byte following U+FFFF is skipped and, at end of
64
      // input, iter is pushed past src.end().
65
0
      if (iter == start)
66
0
      {
67
0
        iter++;
68
0
      }
69
0
    }
70
0
  }
71
0
}
72
73
void Transcoder::encodeUTF8(const LogString& src, std::string& dst)
74
0
{
75
0
#if LOG4CXX_LOGCHAR_IS_UTF8
76
0
  dst.append(src);
77
#else
78
  LogString::const_iterator iter = src.begin();
79
80
  while (iter != src.end())
81
  {
82
    unsigned int sv = decode(src, iter);
83
84
    if (sv != 0xFFFF)
85
    {
86
      encode(sv, dst);
87
    }
88
    else
89
    {
90
      dst.append(1, LOSSCHAR);
91
      iter++;
92
    }
93
  }
94
95
#endif
96
0
}
97
98
#if LOG4CXX_ABI_VERSION <= 15
99
char* Transcoder::encodeUTF8(const LogString& src, Pool& p)
100
0
{
101
0
#if LOG4CXX_LOGCHAR_IS_UTF8
102
0
  return p.pstrdup(src);
103
#else
104
  std::string tmp;
105
  encodeUTF8(src, tmp);
106
  return p.pstrdup(tmp);
107
#endif
108
0
}
109
#endif
110
111
void Transcoder::encodeUTF8(unsigned int sv, ByteBuffer& dst)
112
0
{
113
0
  size_t bytes = encodeUTF8(sv, dst.current());
114
0
  dst.increment_position(bytes);
115
0
}
116
117
118
size_t Transcoder::encodeUTF8(unsigned int ch, char* dst)
119
0
{
120
0
  if (ch < 0x80)
121
0
  {
122
0
    dst[0] = (char) ch;
123
0
    return 1;
124
0
  }
125
0
  else if (ch < 0x800)
126
0
  {
127
0
    dst[0] = (char) (0xC0 + (ch >> 6));
128
0
    dst[1] = (char) (0x80 + (ch & 0x3F));
129
0
    return 2;
130
0
  }
131
0
  else if (ch < 0x10000)
132
0
  {
133
0
    dst[0] = (char) (0xE0 + (ch >> 12));
134
0
    dst[1] = (char) (0x80 + ((ch >> 6) & 0x3F));
135
0
    dst[2] = (char) (0x80 + (ch & 0x3F));
136
0
    return 3;
137
0
  }
138
0
  else if (ch <= 0x10FFFF)
139
0
  {
140
0
    dst[0] = (char) (0xF0 + (ch >> 18));
141
0
    dst[1] = (char) (0x80 + ((ch >> 12) & 0x3F));
142
0
    dst[2] = (char) (0x80 + ((ch >> 6) & 0x3F));
143
0
    dst[3] = (char) (0x80 + (ch & 0x3F));
144
0
    return 4;
145
0
  }
146
0
  else
147
0
  {
148
    //
149
    //  output UTF-8 encoding of 0xFFFF
150
    //
151
0
    dst[0] = (char) 0xEF;
152
0
    dst[1] = (char) 0xBF;
153
0
    dst[2] = (char) 0xBF;
154
0
    return 3;
155
0
  }
156
0
}
157
158
void Transcoder::encodeUTF16BE(unsigned int sv, ByteBuffer& dst)
159
0
{
160
0
  size_t bytes = encodeUTF16BE(sv, dst.current());
161
0
  dst.increment_position(bytes);
162
0
}
163
164
165
size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst)
166
0
{
167
0
  if (ch <= 0xFFFF)
168
0
  {
169
0
    dst[0] = (char) (ch >> 8);
170
0
    dst[1] = (char) (ch & 0xFF);
171
0
    return 2;
172
0
  }
173
174
0
  if (ch <= 0x10FFFF)
175
0
  {
176
0
    unsigned char w = (unsigned char) ((ch >> 16) - 1);
177
0
    dst[0] = (char) (0xD8 + (w >> 2));
178
0
    dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
179
0
    dst[2] = (char) (0xDC + ((ch >> 8) & 0x03));
180
0
    dst[3] = (char) (ch & 0xFF);
181
0
    return 4;
182
0
  }
183
184
0
  dst[0] = dst[1] = (char) 0xFF;
185
0
  return 2;
186
0
}
187
188
void Transcoder::encodeUTF16LE(unsigned int sv, ByteBuffer& dst)
189
0
{
190
0
  size_t bytes = encodeUTF16LE(sv, dst.current());
191
0
  dst.increment_position(bytes);
192
0
}
193
194
size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst)
195
0
{
196
0
  if (ch <= 0xFFFF)
197
0
  {
198
0
    dst[1] = (char) (ch >> 8);
199
0
    dst[0] = (char) (ch & 0xFF);
200
0
    return 2;
201
0
  }
202
203
0
  if (ch <= 0x10FFFF)
204
0
  {
205
0
    unsigned char w = (unsigned char) ((ch >> 16) - 1);
206
0
    dst[1] = (char) (0xD8 + (w >> 2));
207
0
    dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
208
0
    dst[3] = (char) (0xDC + ((ch >> 8) & 0x03));
209
0
    dst[2] = (char) (ch & 0xFF);
210
0
    return 4;
211
0
  }
212
213
0
  dst[0] = dst[1] = (char) 0xFF;
214
0
  return 2;
215
0
}
216
217
218
unsigned int Transcoder::decode(const std::string& src,
219
  std::string::const_iterator& iter)
220
948k
{
221
948k
  auto offset = iter - src.begin();
222
948k
  auto remaining = src.size() - offset;
223
948k
  ByteBuffer buf(const_cast<char*>(src.data() + offset), remaining);
224
948k
  auto result = CharsetDecoder::getUTF8CodePoint(buf);
225
948k
  iter += remaining - buf.remaining();
226
948k
  return result;
227
948k
}
228
229
230
void Transcoder::encode(unsigned int sv, std::string& dst)
231
0
{
232
0
  char tmp[8];
233
0
  size_t bytes = encodeUTF8(sv, tmp);
234
0
  dst.append(tmp, bytes);
235
0
}
236
237
238
void Transcoder::decode(const std::string& src, LogString& dst)
239
39.2k
{
240
39.2k
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
241
39.2k
  dst.append(src);
242
#else
243
  static CharsetDecoderPtr decoder(CharsetDecoder::getDefaultDecoder());
244
  dst.reserve(dst.size() + src.size());
245
  std::string::const_iterator iter = src.begin();
246
#if !LOG4CXX_CHARSET_EBCDIC
247
248
  for (;
249
    iter != src.end() && ((unsigned char) *iter) < 0x80;
250
    iter++)
251
  {
252
    dst.append(1, *iter);
253
  }
254
255
#endif
256
257
  if (iter != src.end())
258
  {
259
    size_t offset = iter - src.begin();
260
    ByteBuffer buf(const_cast<char*>(src.data() + offset), src.size() - offset);
261
262
    while (buf.remaining() > 0)
263
    {
264
      log4cxx_status_t stat = decoder->decode(buf, dst);
265
266
      if (CharsetDecoder::isError(stat))
267
      {
268
        dst.append(1, LOSSCHAR);
269
        buf.increment_position(1);
270
      }
271
    }
272
273
    decoder->decode(buf, dst);
274
  }
275
276
#endif
277
39.2k
}
278
279
#if LOG4CXX_ABI_VERSION <= 15
280
char* Transcoder::encode(const LogString& src, Pool& p)
281
0
{
282
0
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
283
0
  return p.pstrdup(src);
284
#else
285
  std::string tmp;
286
  encode(src, tmp);
287
  return p.pstrdup(tmp);
288
#endif
289
0
}
290
#endif
291
292
293
void Transcoder::encode(const LogString& src, std::string& dst)
294
2.21k
{
295
2.21k
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
296
2.21k
  dst.append(src);
297
#else
298
  static CharsetEncoderPtr encoder(CharsetEncoder::getDefaultEncoder());
299
  dst.reserve(dst.size() + src.size());
300
  LogString::const_iterator iter = src.begin();
301
#if !LOG4CXX_CHARSET_EBCDIC
302
303
  for (;
304
    iter != src.end() && ((unsigned int) *iter) < 0x80;
305
    iter++)
306
  {
307
    dst.append(1, static_cast<char>(*iter));
308
  }
309
310
#endif
311
312
  if (iter != src.end())
313
  {
314
    char buf[BUFSIZE];
315
    ByteBuffer out(buf, BUFSIZE);
316
317
    while (iter != src.end())
318
    {
319
      log4cxx_status_t stat = encoder->encode(src, iter, out);
320
      out.flip();
321
      dst.append(out.data(), out.limit());
322
      out.clear();
323
324
      if (CharsetEncoder::isError(stat))
325
      {
326
        dst.append(1, LOSSCHAR);
327
        iter++;
328
      }
329
    }
330
331
    encoder->encode(src, iter, out);
332
  }
333
334
#endif
335
2.21k
}
336
337
338
template<class String, class Iterator>
339
static unsigned int decodeUTF16(const String& in, Iterator& iter)
340
{
341
  unsigned int ch1 = *iter;
342
343
  //
344
  //   if not surrogate pair
345
  //
346
  if (ch1 < 0xD800 || ch1 > 0xDFFF)
347
  {
348
    //
349
    //  then advance iterator and return wchar_t value
350
    //
351
    if (ch1 != 0xFFFF)
352
    {
353
      iter++;
354
    }
355
356
    return ch1;
357
  }
358
  else if (ch1 < 0xDC00)
359
  {
360
    //
361
    //  started with high-surrogate value
362
    //     if there is an additional wchar_t
363
    Iterator iter2 = iter + 1;
364
365
    if (iter2 != in.end())
366
    {
367
      unsigned int ch2 = *iter2;
368
369
      //
370
      //    if it is a matching low surrogate then
371
      //       advance the iterator and return the scalar value
372
      if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
373
      {
374
        iter += 2;
375
        return (ch1 - 0xD800) * 0x400 + (ch2 - 0xDC00) + 0x10000;
376
      }
377
    }
378
  }
379
380
  //
381
  //    unrecognized value, do not advance iterator
382
  //
383
  return 0xFFFF;
384
}
385
386
template<class String>
387
static void encodeUTF16(unsigned int sv, String& dst)
388
{
389
  if (sv < 0x10000)
390
  {
391
    dst.append(1, sv);
392
  }
393
  else
394
  {
395
    unsigned char u = (unsigned char) (sv >> 16);
396
    unsigned char w = (unsigned char) (u - 1);
397
    unsigned short hs = (0xD800 + ((w & 0xF) << 6) + ((sv & 0xFFFF) >> 10));
398
    unsigned short ls = (0xDC00 + (sv & 0x3FF));
399
    dst.append(1, hs);
400
    dst.append(1, ls);
401
  }
402
}
403
404
405
406
#if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR || defined(WIN32) || defined(_WIN32)
407
void Transcoder::decode(const std::wstring& src, LogString& dst)
408
0
{
409
#if LOG4CXX_LOGCHAR_IS_WCHAR
410
  dst.append(src);
411
#else
412
0
  std::wstring::const_iterator i = src.begin();
413
414
0
  while (i != src.end())
415
0
  {
416
0
    std::wstring::const_iterator start = i;
417
0
    unsigned int cp = decode(src, i);
418
419
0
    if (cp != 0xFFFF)
420
0
    {
421
0
      encode(cp, dst);
422
0
    }
423
0
    else
424
0
    {
425
0
      dst.append(1, LOSSCHAR);
426
0
      if (i == start)
427
0
      {
428
0
        i++;
429
0
      }
430
0
    }
431
0
  }
432
433
0
#endif
434
0
}
435
436
void Transcoder::encode(const LogString& src, std::wstring& dst)
437
0
{
438
#if LOG4CXX_LOGCHAR_IS_WCHAR
439
  dst.append(src);
440
#else
441
442
0
  for (LogString::const_iterator i = src.begin(); i != src.end();)
443
0
  {
444
0
    LogString::const_iterator start = i;
445
0
    unsigned int cp = Transcoder::decode(src, i);
446
447
0
    if (cp != 0xFFFF)
448
0
    {
449
0
      encode(cp, dst);
450
0
    }
451
0
    else
452
0
    {
453
0
      dst.append(1, LOSSCHAR);
454
0
      if (i == start)
455
0
      {
456
0
        i++;
457
0
      }
458
0
    }
459
0
  }
460
461
0
#endif
462
0
}
463
464
#if LOG4CXX_ABI_VERSION <= 15
465
wchar_t* Transcoder::wencode(const LogString& src, Pool& p)
466
0
{
467
#if LOG4CXX_LOGCHAR_IS_WCHAR
468
  const std::wstring& tmp = src;
469
#else
470
0
  std::wstring tmp;
471
0
  encode(src, tmp);
472
0
#endif
473
0
  wchar_t* dst = (wchar_t*) p.palloc((tmp.length() + 1) * sizeof(wchar_t));
474
0
  dst[tmp.length()] = 0;
475
0
  std::memcpy(dst, tmp.data(), tmp.length() * sizeof(wchar_t));
476
0
  return dst;
477
0
}
478
#endif
479
480
unsigned int Transcoder::decode(const std::wstring& in,
481
  std::wstring::const_iterator& iter)
482
0
{
483
0
#if defined(__STDC_ISO_10646__)
484
0
  return *(iter++);
485
#else
486
  return decodeUTF16(in, iter);
487
#endif
488
0
}
489
490
491
void Transcoder::encode(unsigned int sv, std::wstring& dst)
492
0
{
493
0
#if defined(__STDC_ISO_10646__)
494
0
  dst.append(1, sv);
495
#else
496
497
  if (sizeof(wchar_t) == 4)
498
  {
499
    dst.append(1, sv);
500
  }
501
  else
502
  {
503
    encodeUTF16(sv, dst);
504
  }
505
506
#endif
507
0
}
508
509
#endif
510
511
512
513
#if LOG4CXX_UNICHAR_API || LOG4CXX_LOGCHAR_IS_UNICHAR
514
void Transcoder::decode(const std::basic_string<UniChar>& src, LogString& dst)
515
{
516
#if LOG4CXX_LOGCHAR_IS_UNICHAR
517
  dst.append(src);
518
#else
519
520
  for (std::basic_string<UniChar>::const_iterator i = src.begin();
521
    i != src.end();)
522
  {
523
    std::basic_string<UniChar>::const_iterator start = i;
524
    unsigned int cp = decode(src, i);
525
526
    if (cp != 0xFFFF)
527
    {
528
      encode(cp, dst);
529
    }
530
    else
531
    {
532
      dst.append(1, LOSSCHAR);
533
      if (i == start)
534
      {
535
        i++;
536
      }
537
    }
538
  }
539
540
#endif
541
}
542
543
void Transcoder::encode(const LogString& src, std::basic_string<UniChar>& dst)
544
{
545
#if LOG4CXX_LOGCHAR_IS_UNICHAR
546
  dst.append(src);
547
#else
548
549
  for (LogString::const_iterator i = src.begin();
550
    i != src.end();)
551
  {
552
    LogString::const_iterator start = i;
553
    unsigned int cp = decode(src, i);
554
555
    if (cp != 0xFFFF)
556
    {
557
      encode(cp, dst);
558
    }
559
    else
560
    {
561
      encode(LOSSCHAR, dst);
562
      if (i == start)
563
      {
564
        i++;
565
      }
566
    }
567
  }
568
569
#endif
570
}
571
572
unsigned int Transcoder::decode(const std::basic_string<UniChar>& in,
573
  std::basic_string<UniChar>::const_iterator& iter)
574
{
575
  return decodeUTF16(in, iter);
576
}
577
578
void Transcoder::encode(unsigned int sv, std::basic_string<UniChar>& dst)
579
{
580
  encodeUTF16(sv, dst);
581
}
582
583
#endif
584
585
#if LOG4CXX_CFSTRING_API
586
void Transcoder::decode(const CFStringRef& src, LogString& dst)
587
{
588
  auto length = CFStringGetLength(src);
589
#if defined(_DEBUG)
590
  if (LogLog::isDebugEnabled())
591
  {
592
    LogString msg(LOG4CXX_STR("Transcoder::decodeCFString"));
593
    msg += LOG4CXX_STR(" length ");
594
    StringHelper::toString((size_t)length, msg);
595
    LogLog::debug(msg);
596
  }
597
#endif
598
599
  if (length > 0)
600
  {
601
    std::vector<unsigned short> tmp(length);
602
    CFStringGetCharacters(src, CFRangeMake(0, length), &tmp[0]);
603
    for (auto i = tmp.begin(); i != tmp.end(); )
604
    {
605
      auto start = i;
606
      unsigned int cp = decodeUTF16(tmp, i);
607
      if (cp != 0xFFFF)
608
      {
609
        encode(cp, dst);
610
      }
611
      else
612
      {
613
        dst.append(1, LOSSCHAR);
614
        if (i == start)
615
        {
616
          i++;
617
        }
618
      }
619
    }
620
  }
621
}
622
623
CFStringRef Transcoder::encode(const LogString& src)
624
{
625
  std::basic_string<unsigned short> tmp;
626
  for (auto ch : src)
627
    encodeUTF16(ch, tmp);
628
  return CFStringCreateWithCharacters(kCFAllocatorDefault, tmp.data(), tmp.size());
629
}
630
#endif // #if LOG4CXX_CFSTRING_API
631
632
633
logchar Transcoder::decode(char val)
634
0
{
635
#if LOG4CXX_CHARSET_EBCDIC
636
  LogString dst;
637
  Transcoder::decode(std::string(1, val), dst);
638
  return dst[0];
639
#else
640
0
  return val;
641
0
#endif
642
0
}
643
644
LogString Transcoder::decode(const char* val)
645
2
{
646
2
#if LOG4CXX_LOGCHAR_IS_UTF8 && !LOG4CXX_CHARSET_EBCDIC
647
2
  return val;
648
#else
649
  LogString dst;
650
  Transcoder::decode(val, dst);
651
  return dst;
652
#endif
653
2
}
654
655
656
std::string Transcoder::encodeCharsetName(const LogString& val)
657
0
{
658
0
  char asciiTable[] = { ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
659
0
      '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
660
0
      '@', 'A', 'B', 'C', 'D', 'E', 'F',  'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
661
0
      'P', 'Q', 'R', 'S', 'T', 'U', 'V',  'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
662
0
      '`', 'a', 'b', 'c', 'd', 'e', 'f',  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
663
0
      'p', 'q', 'r', 's', 't', 'u', 'v',  'w', 'x', 'y', 'z', '{', '|', '}', '~'
664
0
    };
665
0
  std::string out;
666
667
0
  for (auto& item : val)
668
0
  {
669
0
    if (item >= 0x20 && item < 0x7F)
670
0
    {
671
0
      out.append(1, asciiTable[item - 0x20]);
672
0
    }
673
0
    else
674
0
    {
675
0
      out.append(1, LOSSCHAR);
676
0
    }
677
0
  }
678
679
0
  return out;
680
0
}