Coverage Report

Created: 2026-06-10 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/logging-log4cxx/src/main/cpp/transcoder.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
#include <log4cxx/logstring.h>
19
#include <log4cxx/helpers/transcoder.h>
20
#include <log4cxx/helpers/pool.h>
21
#include <stdlib.h>
22
#include <log4cxx/helpers/exception.h>
23
#include <log4cxx/helpers/bytebuffer.h>
24
#include <log4cxx/helpers/charsetdecoder.h>
25
#include <log4cxx/helpers/charsetencoder.h>
26
#include <log4cxx/helpers/stringhelper.h>
27
#include <log4cxx/helpers/loglog.h>
28
#include <vector>
29
#include <cstring>
30
#if !defined(LOG4CXX)
31
  #define LOG4CXX 1
32
#endif
33
#include <log4cxx/private/log4cxx_private.h>
34
35
#if LOG4CXX_CFSTRING_API
36
  #include <CoreFoundation/CFString.h>
37
#endif
38
39
using namespace LOG4CXX_NS;
40
using namespace LOG4CXX_NS::helpers;
41
42
43
void Transcoder::decodeUTF8(const std::string& src, LogString& dst)
44
0
{
45
0
  std::string::const_iterator iter = src.begin();
46
47
0
  while (iter != src.end())
48
0
  {
49
0
    std::string::const_iterator start = iter;
50
0
    unsigned int sv = decode(src, iter);
51
52
0
    if (sv != 0xFFFF)
53
0
    {
54
0
      encode(sv, dst);
55
0
    }
56
0
    else
57
0
    {
58
0
      dst.append(1, LOSSCHAR);
59
60
      // decode() returns 0xFFFF both for a decode error (iter left at
61
      // start) and for a successfully decoded U+FFFF (iter already
62
      // advanced past EF BF BF).  Only advance here in the former case,
63
      // otherwise the byte following U+FFFF is skipped and, at end of
64
      // input, iter is pushed past src.end().
65
0
      if (iter == start)
66
0
      {
67
0
        iter++;
68
0
      }
69
0
    }
70
0
  }
71
0
}
72
73
void Transcoder::encodeUTF8(const LogString& src, std::string& dst)
74
0
{
75
#if LOG4CXX_LOGCHAR_IS_UTF8
76
  dst.append(src);
77
#else
78
0
  LogString::const_iterator iter = src.begin();
79
80
0
  while (iter != src.end())
81
0
  {
82
0
    unsigned int sv = decode(src, iter);
83
84
0
    if (sv != 0xFFFF)
85
0
    {
86
0
      encode(sv, dst);
87
0
    }
88
0
    else
89
0
    {
90
0
      dst.append(1, LOSSCHAR);
91
0
      iter++;
92
0
    }
93
0
  }
94
95
0
#endif
96
0
}
97
98
#if LOG4CXX_ABI_VERSION <= 15
99
char* Transcoder::encodeUTF8(const LogString& src, Pool& p)
100
0
{
101
#if LOG4CXX_LOGCHAR_IS_UTF8
102
  return p.pstrdup(src);
103
#else
104
0
  std::string tmp;
105
0
  encodeUTF8(src, tmp);
106
0
  return p.pstrdup(tmp);
107
0
#endif
108
0
}
109
#endif
110
111
void Transcoder::encodeUTF8(unsigned int sv, ByteBuffer& dst)
112
0
{
113
0
  size_t bytes = encodeUTF8(sv, dst.current());
114
0
  dst.increment_position(bytes);
115
0
}
116
117
118
size_t Transcoder::encodeUTF8(unsigned int ch, char* dst)
119
0
{
120
0
  if (ch < 0x80)
121
0
  {
122
0
    dst[0] = (char) ch;
123
0
    return 1;
124
0
  }
125
0
  else if (ch < 0x800)
126
0
  {
127
0
    dst[0] = (char) (0xC0 + (ch >> 6));
128
0
    dst[1] = (char) (0x80 + (ch & 0x3F));
129
0
    return 2;
130
0
  }
131
0
  else if (ch < 0x10000)
132
0
  {
133
0
    dst[0] = (char) (0xE0 + (ch >> 12));
134
0
    dst[1] = (char) (0x80 + ((ch >> 6) & 0x3F));
135
0
    dst[2] = (char) (0x80 + (ch & 0x3F));
136
0
    return 3;
137
0
  }
138
0
  else if (ch <= 0x10FFFF)
139
0
  {
140
0
    dst[0] = (char) (0xF0 + (ch >> 18));
141
0
    dst[1] = (char) (0x80 + ((ch >> 12) & 0x3F));
142
0
    dst[2] = (char) (0x80 + ((ch >> 6) & 0x3F));
143
0
    dst[3] = (char) (0x80 + (ch & 0x3F));
144
0
    return 4;
145
0
  }
146
0
  else
147
0
  {
148
    //
149
    //  output UTF-8 encoding of 0xFFFF
150
    //
151
0
    dst[0] = (char) 0xEF;
152
0
    dst[1] = (char) 0xBF;
153
0
    dst[2] = (char) 0xBF;
154
0
    return 3;
155
0
  }
156
0
}
157
158
void Transcoder::encodeUTF16BE(unsigned int sv, ByteBuffer& dst)
159
0
{
160
0
  size_t bytes = encodeUTF16BE(sv, dst.current());
161
0
  dst.increment_position(bytes);
162
0
}
163
164
165
size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst)
166
0
{
167
0
  if (ch <= 0xFFFF)
168
0
  {
169
0
    dst[0] = (char) (ch >> 8);
170
0
    dst[1] = (char) (ch & 0xFF);
171
0
    return 2;
172
0
  }
173
174
0
  if (ch <= 0x10FFFF)
175
0
  {
176
0
    unsigned char w = (unsigned char) ((ch >> 16) - 1);
177
0
    dst[0] = (char) (0xD8 + (w >> 2));
178
0
    dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
179
0
    dst[2] = (char) (0xDC + ((ch >> 8) & 0x03));
180
0
    dst[3] = (char) (ch & 0xFF);
181
0
    return 4;
182
0
  }
183
184
0
  dst[0] = dst[1] = (char) 0xFF;
185
0
  return 2;
186
0
}
187
188
void Transcoder::encodeUTF16LE(unsigned int sv, ByteBuffer& dst)
189
0
{
190
0
  size_t bytes = encodeUTF16LE(sv, dst.current());
191
0
  dst.increment_position(bytes);
192
0
}
193
194
size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst)
195
0
{
196
0
  if (ch <= 0xFFFF)
197
0
  {
198
0
    dst[1] = (char) (ch >> 8);
199
0
    dst[0] = (char) (ch & 0xFF);
200
0
    return 2;
201
0
  }
202
203
0
  if (ch <= 0x10FFFF)
204
0
  {
205
0
    unsigned char w = (unsigned char) ((ch >> 16) - 1);
206
0
    dst[1] = (char) (0xD8 + (w >> 2));
207
0
    dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
208
0
    dst[3] = (char) (0xDC + ((ch >> 8) & 0x03));
209
0
    dst[2] = (char) (ch & 0xFF);
210
0
    return 4;
211
0
  }
212
213
0
  dst[0] = dst[1] = (char) 0xFF;
214
0
  return 2;
215
0
}
216
217
218
unsigned int Transcoder::decode(const std::string& src,
219
  std::string::const_iterator& iter)
220
0
{
221
0
  auto offset = iter - src.begin();
222
0
  auto remaining = src.size() - offset;
223
0
  ByteBuffer buf(const_cast<char*>(src.data() + offset), remaining);
224
0
  auto result = CharsetDecoder::getUTF8CodePoint(buf);
225
0
  iter += remaining - buf.remaining();
226
0
  return result;
227
0
}
228
229
230
void Transcoder::encode(unsigned int sv, std::string& dst)
231
0
{
232
0
  char tmp[8];
233
0
  size_t bytes = encodeUTF8(sv, tmp);
234
0
  dst.append(tmp, bytes);
235
0
}
236
237
238
void Transcoder::decode(const std::string& src, LogString& dst)
239
166k
{
240
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
241
  dst.append(src);
242
#else
243
166k
  static CharsetDecoderPtr decoder(CharsetDecoder::getDefaultDecoder());
244
166k
  dst.reserve(dst.size() + src.size());
245
166k
  std::string::const_iterator iter = src.begin();
246
166k
#if !LOG4CXX_CHARSET_EBCDIC
247
248
166k
  for (;
249
4.66M
    iter != src.end() && ((unsigned char) *iter) < 0x80;
250
4.49M
    iter++)
251
4.49M
  {
252
4.49M
    dst.append(1, *iter);
253
4.49M
  }
254
255
166k
#endif
256
257
166k
  if (iter != src.end())
258
41.8k
  {
259
41.8k
    size_t offset = iter - src.begin();
260
41.8k
    ByteBuffer buf(const_cast<char*>(src.data() + offset), src.size() - offset);
261
262
2.61M
    while (buf.remaining() > 0)
263
2.57M
    {
264
2.57M
      log4cxx_status_t stat = decoder->decode(buf, dst);
265
266
2.57M
      if (CharsetDecoder::isError(stat))
267
2.54M
      {
268
2.54M
        dst.append(1, LOSSCHAR);
269
2.54M
        buf.increment_position(1);
270
2.54M
      }
271
2.57M
    }
272
273
41.8k
    decoder->decode(buf, dst);
274
41.8k
  }
275
276
166k
#endif
277
166k
}
278
279
#if LOG4CXX_ABI_VERSION <= 15
280
char* Transcoder::encode(const LogString& src, Pool& p)
281
0
{
282
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
283
  return p.pstrdup(src);
284
#else
285
0
  std::string tmp;
286
0
  encode(src, tmp);
287
0
  return p.pstrdup(tmp);
288
0
#endif
289
0
}
290
#endif
291
292
293
void Transcoder::encode(const LogString& src, std::string& dst)
294
4
{
295
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
296
  dst.append(src);
297
#else
298
4
  static CharsetEncoderPtr encoder(CharsetEncoder::getDefaultEncoder());
299
4
  dst.reserve(dst.size() + src.size());
300
4
  LogString::const_iterator iter = src.begin();
301
4
#if !LOG4CXX_CHARSET_EBCDIC
302
303
4
  for (;
304
90
    iter != src.end() && ((unsigned int) *iter) < 0x80;
305
86
    iter++)
306
86
  {
307
86
    dst.append(1, static_cast<char>(*iter));
308
86
  }
309
310
4
#endif
311
312
4
  if (iter != src.end())
313
0
  {
314
0
    char buf[BUFSIZE];
315
0
    ByteBuffer out(buf, BUFSIZE);
316
317
0
    while (iter != src.end())
318
0
    {
319
0
      log4cxx_status_t stat = encoder->encode(src, iter, out);
320
0
      out.flip();
321
0
      dst.append(out.data(), out.limit());
322
0
      out.clear();
323
324
0
      if (CharsetEncoder::isError(stat))
325
0
      {
326
0
        dst.append(1, LOSSCHAR);
327
0
        iter++;
328
0
      }
329
0
    }
330
331
0
    encoder->encode(src, iter, out);
332
0
  }
333
334
4
#endif
335
4
}
336
337
338
template<class String, class Iterator>
339
static unsigned int decodeUTF16(const String& in, Iterator& iter)
340
{
341
  unsigned int ch1 = *iter;
342
343
  //
344
  //   if not surrogate pair
345
  //
346
  if (ch1 < 0xD800 || ch1 > 0xDFFF)
347
  {
348
    //
349
    //  then advance iterator and return wchar_t value
350
    //
351
    if (ch1 != 0xFFFF)
352
    {
353
      iter++;
354
    }
355
356
    return ch1;
357
  }
358
  else if (ch1 < 0xDC00)
359
  {
360
    //
361
    //  started with high-surrogate value
362
    //     if there is an additional wchar_t
363
    Iterator iter2 = iter + 1;
364
365
    if (iter2 != in.end())
366
    {
367
      unsigned int ch2 = *iter2;
368
369
      //
370
      //    if it is a matching low surrogate then
371
      //       advance the iterator and return the scalar value
372
      if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
373
      {
374
        iter += 2;
375
        return (ch1 - 0xD800) * 0x400 + (ch2 - 0xDC00) + 0x10000;
376
      }
377
    }
378
  }
379
380
  //
381
  //    unrecognized value, do not advance iterator
382
  //
383
  return 0xFFFF;
384
}
385
386
template<class String>
387
static void encodeUTF16(unsigned int sv, String& dst)
388
{
389
  if (sv < 0x10000)
390
  {
391
    dst.append(1, sv);
392
  }
393
  else
394
  {
395
    unsigned char u = (unsigned char) (sv >> 16);
396
    unsigned char w = (unsigned char) (u - 1);
397
    unsigned short hs = (0xD800 + ((w & 0xF) << 6) + ((sv & 0xFFFF) >> 10));
398
    unsigned short ls = (0xDC00 + (sv & 0x3FF));
399
    dst.append(1, hs);
400
    dst.append(1, ls);
401
  }
402
}
403
404
405
406
#if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR || defined(WIN32) || defined(_WIN32)
407
void Transcoder::decode(const std::wstring& src, LogString& dst)
408
0
{
409
0
#if LOG4CXX_LOGCHAR_IS_WCHAR
410
0
  dst.append(src);
411
#else
412
  std::wstring::const_iterator i = src.begin();
413
414
  while (i != src.end())
415
  {
416
    std::wstring::const_iterator start = i;
417
    unsigned int cp = decode(src, i);
418
419
    if (cp != 0xFFFF)
420
    {
421
      encode(cp, dst);
422
    }
423
    else
424
    {
425
      dst.append(1, LOSSCHAR);
426
      if (i == start)
427
      {
428
        i++;
429
      }
430
    }
431
  }
432
433
#endif
434
0
}
435
436
void Transcoder::encode(const LogString& src, std::wstring& dst)
437
0
{
438
0
#if LOG4CXX_LOGCHAR_IS_WCHAR
439
0
  dst.append(src);
440
#else
441
442
  for (LogString::const_iterator i = src.begin(); i != src.end();)
443
  {
444
    LogString::const_iterator start = i;
445
    unsigned int cp = Transcoder::decode(src, i);
446
447
    if (cp != 0xFFFF)
448
    {
449
      encode(cp, dst);
450
    }
451
    else
452
    {
453
      dst.append(1, LOSSCHAR);
454
      if (i == start)
455
      {
456
        i++;
457
      }
458
    }
459
  }
460
461
#endif
462
0
}
463
464
#if LOG4CXX_ABI_VERSION <= 15
465
wchar_t* Transcoder::wencode(const LogString& src, Pool& p)
466
0
{
467
0
#if LOG4CXX_LOGCHAR_IS_WCHAR
468
0
  const std::wstring& tmp = src;
469
#else
470
  std::wstring tmp;
471
  encode(src, tmp);
472
#endif
473
0
  wchar_t* dst = (wchar_t*) p.palloc((tmp.length() + 1) * sizeof(wchar_t));
474
0
  dst[tmp.length()] = 0;
475
0
  std::memcpy(dst, tmp.data(), tmp.length() * sizeof(wchar_t));
476
0
  return dst;
477
0
}
478
#endif
479
480
unsigned int Transcoder::decode(const std::wstring& in,
481
  std::wstring::const_iterator& iter)
482
0
{
483
0
#if defined(__STDC_ISO_10646__)
484
0
  return *(iter++);
485
#else
486
  return decodeUTF16(in, iter);
487
#endif
488
0
}
489
490
491
void Transcoder::encode(unsigned int sv, std::wstring& dst)
492
5.37M
{
493
5.37M
#if defined(__STDC_ISO_10646__)
494
5.37M
  dst.append(1, sv);
495
#else
496
497
  if (sizeof(wchar_t) == 4)
498
  {
499
    dst.append(1, sv);
500
  }
501
  else
502
  {
503
    encodeUTF16(sv, dst);
504
  }
505
506
#endif
507
5.37M
}
508
509
#endif
510
511
512
513
#if LOG4CXX_UNICHAR_API || LOG4CXX_LOGCHAR_IS_UNICHAR
514
void Transcoder::decode(const std::basic_string<UniChar>& src, LogString& dst)
515
{
516
#if LOG4CXX_LOGCHAR_IS_UNICHAR
517
  dst.append(src);
518
#else
519
520
  for (std::basic_string<UniChar>::const_iterator i = src.begin();
521
    i != src.end();)
522
  {
523
    std::basic_string<UniChar>::const_iterator start = i;
524
    unsigned int cp = decode(src, i);
525
526
    if (cp != 0xFFFF)
527
    {
528
      encode(cp, dst);
529
    }
530
    else
531
    {
532
      dst.append(1, LOSSCHAR);
533
      if (i == start)
534
      {
535
        i++;
536
      }
537
    }
538
  }
539
540
#endif
541
}
542
543
void Transcoder::encode(const LogString& src, std::basic_string<UniChar>& dst)
544
{
545
#if LOG4CXX_LOGCHAR_IS_UNICHAR
546
  dst.append(src);
547
#else
548
549
  for (LogString::const_iterator i = src.begin();
550
    i != src.end();)
551
  {
552
    LogString::const_iterator start = i;
553
    unsigned int cp = decode(src, i);
554
555
    if (cp != 0xFFFF)
556
    {
557
      encode(cp, dst);
558
    }
559
    else
560
    {
561
      encode(LOSSCHAR, dst);
562
      if (i == start)
563
      {
564
        i++;
565
      }
566
    }
567
  }
568
569
#endif
570
}
571
572
unsigned int Transcoder::decode(const std::basic_string<UniChar>& in,
573
  std::basic_string<UniChar>::const_iterator& iter)
574
{
575
  return decodeUTF16(in, iter);
576
}
577
578
void Transcoder::encode(unsigned int sv, std::basic_string<UniChar>& dst)
579
{
580
  encodeUTF16(sv, dst);
581
}
582
583
#endif
584
585
#if LOG4CXX_CFSTRING_API
586
void Transcoder::decode(const CFStringRef& src, LogString& dst)
587
{
588
  auto length = CFStringGetLength(src);
589
#if defined(_DEBUG)
590
  if (LogLog::isDebugEnabled())
591
  {
592
    LogString msg(LOG4CXX_STR("Transcoder::decodeCFString"));
593
    msg += LOG4CXX_STR(" length ");
594
    StringHelper::toString((size_t)length, msg);
595
    LogLog::debug(msg);
596
  }
597
#endif
598
599
  if (length > 0)
600
  {
601
    std::vector<unsigned short> tmp(length);
602
    CFStringGetCharacters(src, CFRangeMake(0, length), &tmp[0]);
603
    for (auto i = tmp.begin(); i != tmp.end(); )
604
    {
605
      auto start = i;
606
      unsigned int cp = decodeUTF16(tmp, i);
607
      if (cp != 0xFFFF)
608
      {
609
        encode(cp, dst);
610
      }
611
      else
612
      {
613
        dst.append(1, LOSSCHAR);
614
        if (i == start)
615
        {
616
          i++;
617
        }
618
      }
619
    }
620
  }
621
}
622
623
CFStringRef Transcoder::encode(const LogString& src)
624
{
625
  std::basic_string<unsigned short> tmp;
626
  for (auto ch : src)
627
    encodeUTF16(ch, tmp);
628
  return CFStringCreateWithCharacters(kCFAllocatorDefault, tmp.data(), tmp.size());
629
}
630
#endif // #if LOG4CXX_CFSTRING_API
631
632
633
logchar Transcoder::decode(char val)
634
0
{
635
#if LOG4CXX_CHARSET_EBCDIC
636
  LogString dst;
637
  Transcoder::decode(std::string(1, val), dst);
638
  return dst[0];
639
#else
640
0
  return val;
641
0
#endif
642
0
}
643
644
LogString Transcoder::decode(const char* val)
645
2
{
646
#if LOG4CXX_LOGCHAR_IS_UTF8 && !LOG4CXX_CHARSET_EBCDIC
647
  return val;
648
#else
649
2
  LogString dst;
650
2
  Transcoder::decode(val, dst);
651
2
  return dst;
652
2
#endif
653
2
}
654
655
656
std::string Transcoder::encodeCharsetName(const LogString& val)
657
0
{
658
0
  char asciiTable[] = { ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
659
0
      '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
660
0
      '@', 'A', 'B', 'C', 'D', 'E', 'F',  'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
661
0
      'P', 'Q', 'R', 'S', 'T', 'U', 'V',  'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
662
0
      '`', 'a', 'b', 'c', 'd', 'e', 'f',  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
663
0
      'p', 'q', 'r', 's', 't', 'u', 'v',  'w', 'x', 'y', 'z', '{', '|', '}', '~'
664
0
    };
665
0
  std::string out;
666
667
0
  for (auto& item : val)
668
0
  {
669
0
    if (item >= 0x20 && item < 0x7F)
670
0
    {
671
0
      out.append(1, asciiTable[item - 0x20]);
672
0
    }
673
0
    else
674
0
    {
675
0
      out.append(1, LOSSCHAR);
676
0
    }
677
0
  }
678
679
0
  return out;
680
0
}