Coverage Report

Created: 2025-07-01 06:08

/src/logging-log4cxx/src/main/cpp/transcoder.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
#include <log4cxx/logstring.h>
19
#include <log4cxx/helpers/transcoder.h>
20
#include <log4cxx/helpers/pool.h>
21
#include <stdlib.h>
22
#include <log4cxx/helpers/exception.h>
23
#include <log4cxx/helpers/bytebuffer.h>
24
#include <log4cxx/helpers/charsetdecoder.h>
25
#include <log4cxx/helpers/charsetencoder.h>
26
#include <log4cxx/helpers/stringhelper.h>
27
#include <log4cxx/helpers/loglog.h>
28
#include <vector>
29
#include <cstring>
30
#if !defined(LOG4CXX)
31
  #define LOG4CXX 1
32
#endif
33
#include <log4cxx/private/log4cxx_private.h>
34
35
#if LOG4CXX_CFSTRING_API
36
  #include <CoreFoundation/CFString.h>
37
#endif
38
39
using namespace LOG4CXX_NS;
40
using namespace LOG4CXX_NS::helpers;
41
42
43
void Transcoder::decodeUTF8(const std::string& src, LogString& dst)
44
0
{
45
0
  std::string::const_iterator iter = src.begin();
46
47
0
  while (iter != src.end())
48
0
  {
49
0
    unsigned int sv = decode(src, iter);
50
51
0
    if (sv != 0xFFFF)
52
0
    {
53
0
      encode(sv, dst);
54
0
    }
55
0
    else
56
0
    {
57
0
      dst.append(1, LOSSCHAR);
58
0
      iter++;
59
0
    }
60
0
  }
61
0
}
62
63
void Transcoder::encodeUTF8(const LogString& src, std::string& dst)
64
0
{
65
#if LOG4CXX_LOGCHAR_IS_UTF8
66
  dst.append(src);
67
#else
68
0
  LogString::const_iterator iter = src.begin();
69
70
0
  while (iter != src.end())
71
0
  {
72
0
    unsigned int sv = decode(src, iter);
73
74
0
    if (sv != 0xFFFF)
75
0
    {
76
0
      encode(sv, dst);
77
0
    }
78
0
    else
79
0
    {
80
0
      dst.append(1, LOSSCHAR);
81
0
      iter++;
82
0
    }
83
0
  }
84
85
0
#endif
86
0
}
87
88
char* Transcoder::encodeUTF8(const LogString& src, Pool& p)
89
0
{
90
#if LOG4CXX_LOGCHAR_IS_UTF8
91
  return p.pstrdup(src);
92
#else
93
0
  std::string tmp;
94
0
  encodeUTF8(src, tmp);
95
0
  return p.pstrdup(tmp);
96
0
#endif
97
0
}
98
99
100
void Transcoder::encodeUTF8(unsigned int sv, ByteBuffer& dst)
101
0
{
102
0
  size_t bytes = encodeUTF8(sv, dst.current());
103
0
  dst.position(dst.position() + bytes);
104
0
}
105
106
107
size_t Transcoder::encodeUTF8(unsigned int ch, char* dst)
108
0
{
109
0
  if (ch < 0x80)
110
0
  {
111
0
    dst[0] = (char) ch;
112
0
    return 1;
113
0
  }
114
0
  else if (ch < 0x800)
115
0
  {
116
0
    dst[0] = (char) (0xC0 + (ch >> 6));
117
0
    dst[1] = (char) (0x80 + (ch & 0x3F));
118
0
    return 2;
119
0
  }
120
0
  else if (ch < 0x10000)
121
0
  {
122
0
    dst[0] = (char) (0xE0 + (ch >> 12));
123
0
    dst[1] = (char) (0x80 + ((ch >> 6) & 0x3F));
124
0
    dst[2] = (char) (0x80 + (ch & 0x3F));
125
0
    return 3;
126
0
  }
127
0
  else if (ch <= 0x10FFFF)
128
0
  {
129
0
    dst[0] = (char) (0xF0 + (ch >> 18));
130
0
    dst[1] = (char) (0x80 + ((ch >> 12) & 0x3F));
131
0
    dst[2] = (char) (0x80 + ((ch >> 6) & 0x3F));
132
0
    dst[3] = (char) (0x80 + (ch & 0x3F));
133
0
    return 4;
134
0
  }
135
0
  else
136
0
  {
137
    //
138
    //  output UTF-8 encoding of 0xFFFF
139
    //
140
0
    dst[0] = (char) 0xEF;
141
0
    dst[1] = (char) 0xBF;
142
0
    dst[2] = (char) 0xBF;
143
0
    return 3;
144
0
  }
145
0
}
146
147
void Transcoder::encodeUTF16BE(unsigned int sv, ByteBuffer& dst)
148
0
{
149
0
  size_t bytes = encodeUTF16BE(sv, dst.current());
150
0
  dst.position(dst.position() + bytes);
151
0
}
152
153
154
size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst)
155
0
{
156
0
  if (ch <= 0xFFFF)
157
0
  {
158
0
    dst[0] = (char) (ch >> 8);
159
0
    dst[1] = (char) (ch & 0xFF);
160
0
    return 2;
161
0
  }
162
163
0
  if (ch <= 0x10FFFF)
164
0
  {
165
0
    unsigned char w = (unsigned char) ((ch >> 16) - 1);
166
0
    dst[0] = (char) (0xD8 + (w >> 2));
167
0
    dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
168
0
    dst[2] = (char) (0xDC + ((ch & 0x30) >> 4));
169
0
    dst[3] = (char) (ch & 0xFF);
170
0
    return 4;
171
0
  }
172
173
0
  dst[0] = dst[1] = (char) 0xFF;
174
0
  return 2;
175
0
}
176
177
void Transcoder::encodeUTF16LE(unsigned int sv, ByteBuffer& dst)
178
0
{
179
0
  size_t bytes = encodeUTF16LE(sv, dst.current());
180
0
  dst.position(dst.position() + bytes);
181
0
}
182
183
size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst)
184
0
{
185
0
  if (ch <= 0xFFFF)
186
0
  {
187
0
    dst[1] = (char) (ch >> 8);
188
0
    dst[0] = (char) (ch & 0xFF);
189
0
    return 2;
190
0
  }
191
192
0
  if (ch <= 0x10FFFF)
193
0
  {
194
0
    unsigned char w = (unsigned char) ((ch >> 16) - 1);
195
0
    dst[1] = (char) (0xD8 + (w >> 2));
196
0
    dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
197
0
    dst[3] = (char) (0xDC + ((ch & 0x30) >> 4));
198
0
    dst[2] = (char) (ch & 0xFF);
199
0
    return 4;
200
0
  }
201
202
0
  dst[0] = dst[1] = (char) 0xFF;
203
0
  return 2;
204
0
}
205
206
207
unsigned int Transcoder::decode(const std::string& src,
208
  std::string::const_iterator& iter)
209
7.90M
{
210
7.90M
  std::string::const_iterator start(iter);
211
7.90M
  unsigned char ch1 = *(iter++);
212
213
7.90M
  if (ch1 <= 0x7F)
214
5.30M
  {
215
5.30M
    return ch1;
216
5.30M
  }
217
218
  //
219
  //   should not have continuation character here
220
  //
221
2.59M
  if ((ch1 & 0xC0) != 0x80 && iter != src.end())
222
1.82M
  {
223
1.82M
    unsigned char ch2 = *(iter++);
224
225
    //
226
    //   should be continuation
227
1.82M
    if ((ch2 & 0xC0) != 0x80)
228
1.71M
    {
229
1.71M
      iter = start;
230
1.71M
      return 0xFFFF;
231
1.71M
    }
232
233
111k
    if ((ch1 & 0xE0) == 0xC0)
234
49.9k
    {
235
49.9k
      if ((ch2 & 0xC0) == 0x80)
236
49.9k
      {
237
49.9k
        unsigned int rv = ((ch1 & 0x1F) << 6) + (ch2 & 0x3F);
238
239
49.9k
        if (rv >= 0x80)
240
47.2k
        {
241
47.2k
          return rv;
242
47.2k
        }
243
49.9k
      }
244
245
2.70k
      iter = start;
246
2.70k
      return 0xFFFF;
247
49.9k
    }
248
249
61.3k
    if (iter != src.end())
250
60.1k
    {
251
60.1k
      unsigned char ch3 = *(iter++);
252
253
      //
254
      //   should be continuation
255
      //
256
60.1k
      if ((ch3 & 0xC0) != 0x80)
257
34.2k
      {
258
34.2k
        iter = start;
259
34.2k
        return 0xFFFF;
260
34.2k
      }
261
262
25.9k
      if ((ch1 & 0xF0) == 0xE0)
263
6.53k
      {
264
6.53k
        unsigned rv = ((ch1 & 0x0F) << 12)
265
6.53k
          + ((ch2 & 0x3F) << 6)
266
6.53k
          + (ch3 & 0x3F);
267
268
6.53k
        if (rv <= 0x800)
269
1.04k
        {
270
1.04k
          iter = start;
271
1.04k
          return 0xFFFF;
272
1.04k
        }
273
274
5.49k
        return rv;
275
6.53k
      }
276
277
19.4k
      if (iter != src.end())
278
18.6k
      {
279
18.6k
        unsigned char ch4 = *(iter++);
280
281
18.6k
        if ((ch4 & 0xC0) != 0x80)
282
11.7k
        {
283
11.7k
          iter = start;
284
11.7k
          return 0xFFFF;
285
11.7k
        }
286
287
6.97k
        unsigned int rv = ((ch1 & 0x07) << 18)
288
6.97k
          + ((ch2 & 0x3F) << 12)
289
6.97k
          + ((ch3 & 0x3F) << 6)
290
6.97k
          + (ch4 & 0x3F);
291
292
6.97k
        if (rv > 0xFFFF)
293
5.60k
        {
294
5.60k
          return rv;
295
5.60k
        }
296
297
6.97k
      }
298
19.4k
    }
299
61.3k
  }
300
301
771k
  iter = start;
302
771k
  return 0xFFFF;
303
2.59M
}
304
305
306
void Transcoder::encode(unsigned int sv, std::string& dst)
307
0
{
308
0
  char tmp[8];
309
0
  size_t bytes = encodeUTF8(sv, tmp);
310
0
  dst.append(tmp, bytes);
311
0
}
312
313
314
void Transcoder::decode(const std::string& src, LogString& dst)
315
837k
{
316
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
317
  dst.append(src);
318
#else
319
837k
  static CharsetDecoderPtr decoder(CharsetDecoder::getDefaultDecoder());
320
837k
  dst.reserve(dst.size() + src.size());
321
837k
  std::string::const_iterator iter = src.begin();
322
837k
#if !LOG4CXX_CHARSET_EBCDIC
323
324
837k
  for (;
325
16.6M
    iter != src.end() && ((unsigned char) *iter) < 0x80;
326
15.8M
    iter++)
327
15.8M
  {
328
15.8M
    dst.append(1, *iter);
329
15.8M
  }
330
331
837k
#endif
332
333
837k
  if (iter != src.end())
334
27.3k
  {
335
27.3k
    size_t offset = iter - src.begin();
336
27.3k
    ByteBuffer buf(const_cast<char*>(src.data() + offset), src.size() - offset);
337
338
2.58M
    while (buf.remaining() > 0)
339
2.55M
    {
340
2.55M
      log4cxx_status_t stat = decoder->decode(buf, dst);
341
342
2.55M
      if (CharsetDecoder::isError(stat))
343
2.53M
      {
344
2.53M
        dst.append(1, LOSSCHAR);
345
2.53M
        buf.position(buf.position() + 1);
346
2.53M
      }
347
2.55M
    }
348
349
27.3k
    decoder->decode(buf, dst);
350
27.3k
  }
351
352
837k
#endif
353
837k
}
354
355
char* Transcoder::encode(const LogString& src, Pool& p)
356
1
{
357
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
358
  return p.pstrdup(src);
359
#else
360
1
  std::string tmp;
361
1
  encode(src, tmp);
362
1
  return p.pstrdup(tmp);
363
1
#endif
364
1
}
365
366
367
368
void Transcoder::encode(const LogString& src, std::string& dst)
369
2
{
370
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
371
  dst.append(src);
372
#else
373
2
  static CharsetEncoderPtr encoder(CharsetEncoder::getDefaultEncoder());
374
2
  dst.reserve(dst.size() + src.size());
375
2
  LogString::const_iterator iter = src.begin();
376
2
#if !LOG4CXX_CHARSET_EBCDIC
377
378
2
  for (;
379
45
    iter != src.end() && ((unsigned int) *iter) < 0x80;
380
43
    iter++)
381
43
  {
382
43
    dst.append(1, *iter);
383
43
  }
384
385
2
#endif
386
387
2
  if (iter != src.end())
388
0
  {
389
0
    char buf[BUFSIZE];
390
0
    ByteBuffer out(buf, BUFSIZE);
391
392
0
    while (iter != src.end())
393
0
    {
394
0
      log4cxx_status_t stat = encoder->encode(src, iter, out);
395
0
      out.flip();
396
0
      dst.append(out.data(), out.limit());
397
0
      out.clear();
398
399
0
      if (CharsetEncoder::isError(stat))
400
0
      {
401
0
        dst.append(1, LOSSCHAR);
402
0
        iter++;
403
0
      }
404
0
    }
405
406
0
    encoder->encode(src, iter, out);
407
0
  }
408
409
2
#endif
410
2
}
411
412
413
template<class String, class Iterator>
414
static unsigned int decodeUTF16(const String& in, Iterator& iter)
415
{
416
  unsigned int ch1 = *iter;
417
418
  //
419
  //   if not surrogate pair
420
  //
421
  if (ch1 < 0xD800 || ch1 > 0xDFFF)
422
  {
423
    //
424
    //  then advance iterator and return wchar_t value
425
    //
426
    if (ch1 != 0xFFFF)
427
    {
428
      iter++;
429
    }
430
431
    return ch1;
432
  }
433
  else if (ch1 < 0xDC00)
434
  {
435
    //
436
    //  started with high-surrogate value
437
    //     if there is an additional wchar_t
438
    Iterator iter2 = iter + 1;
439
440
    if (iter2 != in.end())
441
    {
442
      unsigned int ch2 = *iter2;
443
444
      //
445
      //    if it is a matching low surrogate then
446
      //       advance the iterator and return the scalar value
447
      if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
448
      {
449
        iter += 2;
450
        return (ch1 - 0xD800) * 0x400 + (ch2 - 0xDC00) + 0x10000;
451
      }
452
    }
453
  }
454
455
  //
456
  //    unrecognized value, do not advance iterator
457
  //
458
  return 0xFFFF;
459
}
460
461
template<class String>
462
static void encodeUTF16(unsigned int sv, String& dst)
463
{
464
  if (sv < 0x10000)
465
  {
466
    dst.append(1, sv);
467
  }
468
  else
469
  {
470
    unsigned char u = (unsigned char) (sv >> 16);
471
    unsigned char w = (unsigned char) (u - 1);
472
    unsigned short hs = (0xD800 + ((w & 0xF) << 6) + ((sv & 0xFFFF) >> 10));
473
    unsigned short ls = (0xDC00 + (sv & 0x3FF));
474
    dst.append(1, hs);
475
    dst.append(1, ls);
476
  }
477
}
478
479
480
481
#if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR_T || defined(WIN32) || defined(_WIN32)
482
void Transcoder::decode(const std::wstring& src, LogString& dst)
483
0
{
484
#if LOG4CXX_LOGCHAR_IS_WCHAR_T
485
  dst.append(src, len);
486
#else
487
0
  std::wstring::const_iterator i = src.begin();
488
489
0
  while (i != src.end())
490
0
  {
491
0
    unsigned int cp = decode(src, i);
492
493
0
    if (cp != 0xFFFF)
494
0
    {
495
0
      encode(cp, dst);
496
0
    }
497
0
    else
498
0
    {
499
0
      dst.append(1, LOSSCHAR);
500
0
      i++;
501
0
    }
502
0
  }
503
504
0
#endif
505
0
}
506
507
void Transcoder::encode(const LogString& src, std::wstring& dst)
508
0
{
509
#if LOG4CXX_LOGCHAR_IS_WCHAR_T
510
  dst.append(src);
511
#else
512
513
0
  for (LogString::const_iterator i = src.begin(); i != src.end();)
514
0
  {
515
0
    unsigned int cp = Transcoder::decode(src, i);
516
517
0
    if (cp != 0xFFFF)
518
0
    {
519
0
      encode(cp, dst);
520
0
    }
521
0
    else
522
0
    {
523
0
      dst.append(1, LOSSCHAR);
524
0
      i++;
525
0
    }
526
0
  }
527
528
0
#endif
529
0
}
530
531
wchar_t* Transcoder::wencode(const LogString& src, Pool& p)
532
0
{
533
#if LOG4CXX_LOGCHAR_IS_WCHAR_T
534
  std::wstring& tmp = src;
535
#else
536
0
  std::wstring tmp;
537
0
  encode(src, tmp);
538
0
#endif
539
0
  wchar_t* dst = (wchar_t*) p.palloc((tmp.length() + 1) * sizeof(wchar_t));
540
0
  dst[tmp.length()] = 0;
541
0
  std::memcpy(dst, tmp.data(), tmp.length() * sizeof(wchar_t));
542
0
  return dst;
543
0
}
544
545
546
unsigned int Transcoder::decode(const std::wstring& in,
547
  std::wstring::const_iterator& iter)
548
0
{
549
0
#if defined(__STDC_ISO_10646__)
550
0
  return *(iter++);
551
#else
552
  return decodeUTF16(in, iter);
553
#endif
554
0
}
555
556
557
void Transcoder::encode(unsigned int sv, std::wstring& dst)
558
5.36M
{
559
5.36M
#if defined(__STDC_ISO_10646__)
560
5.36M
  dst.append(1, sv);
561
#else
562
563
  if (sizeof(wchar_t) == 4)
564
  {
565
    dst.append(1, sv);
566
  }
567
  else
568
  {
569
    encodeUTF16(sv, dst);
570
  }
571
572
#endif
573
5.36M
}
574
575
#endif
576
577
578
579
#if LOG4CXX_UNICHAR_API || LOG4CXX_LOGCHAR_IS_UNICHAR
580
void Transcoder::decode(const std::basic_string<UniChar>& src, LogString& dst)
581
{
582
#if LOG4CXX_LOGCHAR_IS_UNICHAR
583
  dst.append(src);
584
#else
585
586
  for (std::basic_string<UniChar>::const_iterator i = src.begin();
587
    i != src.end();)
588
  {
589
    unsigned int cp = decode(src, i);
590
    encode(cp, dst);
591
  }
592
593
#endif
594
}
595
596
void Transcoder::encode(const LogString& src, std::basic_string<UniChar>& dst)
597
{
598
#if LOG4CXX_LOGCHAR_IS_UNICHAR
599
  dst.append(src);
600
#else
601
602
  for (LogString::const_iterator i = src.begin();
603
    i != src.end();)
604
  {
605
    unsigned int cp = decode(src, i);
606
    encode(cp, dst);
607
  }
608
609
#endif
610
}
611
612
unsigned int Transcoder::decode(const std::basic_string<UniChar>& in,
613
  std::basic_string<UniChar>::const_iterator& iter)
614
{
615
  return decodeUTF16(in, iter);
616
}
617
618
void Transcoder::encode(unsigned int sv, std::basic_string<UniChar>& dst)
619
{
620
  encodeUTF16(sv, dst);
621
}
622
623
#endif
624
625
#if LOG4CXX_CFSTRING_API
626
void Transcoder::decode(const CFStringRef& src, LogString& dst)
627
{
628
  auto length = CFStringGetLength(src);
629
#if defined(_DEBUG)
630
  if (LogLog::isDebugEnabled())
631
  {
632
    Pool pool;
633
    LogString msg(LOG4CXX_STR("Transcoder::decodeCFString"));
634
    msg += LOG4CXX_STR(" length ");
635
    StringHelper::toString((size_t)length, pool, msg);
636
    LogLog::debug(msg);
637
  }
638
#endif
639
640
  if (length > 0)
641
  {
642
    std::vector<unsigned short> tmp(length);
643
    CFStringGetCharacters(src, CFRangeMake(0, length), &tmp[0]);
644
    for (auto i = tmp.begin(); i != tmp.end(); )
645
    {
646
      unsigned int cp = decodeUTF16(tmp, i);
647
      encode(cp, dst);
648
    }
649
  }
650
}
651
652
CFStringRef Transcoder::encode(const LogString& src)
653
{
654
  std::basic_string<unsigned short> tmp;
655
  for (auto ch : src)
656
    encodeUTF16(ch, tmp);
657
  return CFStringCreateWithCharacters(kCFAllocatorDefault, tmp.data(), tmp.size());
658
}
659
#endif // #if LOG4CXX_CFSTRING_API
660
661
662
logchar Transcoder::decode(char val)
663
0
{
664
#if LOG4CXX_CHARSET_EBCDIC
665
  LogString dst;
666
  Transcoder::decode(std::string(1, val), dst);
667
  return dst[0];
668
#else
669
0
  return val;
670
0
#endif
671
0
}
672
673
LogString Transcoder::decode(const char* val)
674
2
{
675
#if LOG4CXX_LOGCHAR_IS_UTF8 && !LOG4CXX_CHARSET_EBCDIC
676
  return val;
677
#else
678
2
  LogString dst;
679
2
  Transcoder::decode(val, dst);
680
2
  return dst;
681
2
#endif
682
2
}
683
684
685
std::string Transcoder::encodeCharsetName(const LogString& val)
686
0
{
687
0
  char asciiTable[] = { ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
688
0
      '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
689
0
      '@', 'A', 'B', 'C', 'D', 'E', 'F',  'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
690
0
      'P', 'Q', 'R', 'S', 'T', 'U', 'V',  'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
691
0
      '`', 'a', 'b', 'c', 'd', 'e', 'f',  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
692
0
      'p', 'q', 'r', 's', 't', 'u', 'v',  'w', 'x', 'y', 'z', '{', '|', '}', '~'
693
0
    };
694
0
  std::string out;
695
696
0
  for (auto& item : val)
697
0
  {
698
0
    if (item >= 0x20 && item < 0x7F)
699
0
    {
700
0
      out.append(1, asciiTable[item - 0x20]);
701
0
    }
702
0
    else
703
0
    {
704
0
      out.append(1, LOSSCHAR);
705
0
    }
706
0
  }
707
708
0
  return out;
709
0
}