Coverage Report

Created: 2026-04-12 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/logging-log4cxx/src/main/cpp/charsetdecoder.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
#define NOMINMAX /* tell windows not to define min/max macros */
18
#include <log4cxx/private/string_c11.h>
19
#include <log4cxx/logstring.h>
20
#include <log4cxx/helpers/charsetdecoder.h>
21
#include <log4cxx/helpers/bytebuffer.h>
22
#include <log4cxx/helpers/exception.h>
23
#include <log4cxx/helpers/pool.h>
24
#include <log4cxx/helpers/loglog.h>
25
#include <apr_xlate.h>
26
#if !defined(LOG4CXX)
27
  #define LOG4CXX 1
28
#endif
29
#include <log4cxx/private/log4cxx_private.h>
30
#include <locale.h>
31
#include <apr_portable.h>
32
#include <log4cxx/helpers/stringhelper.h>
33
#include <log4cxx/helpers/transcoder.h>
34
#include <mutex>
35
36
using namespace LOG4CXX_NS;
37
using namespace LOG4CXX_NS::helpers;
38
39
IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
40
41
42
namespace LOG4CXX_NS
43
{
44
namespace helpers
45
{
46
47
#if APR_HAS_XLATE
48
/**
49
 *  Converts from an arbitrary encoding to LogString
50
 *    using apr_xlate.  Requires real iconv implementation,
51
*    apr-iconv will crash in use.
52
 */
53
class APRCharsetDecoder : public CharsetDecoder
54
{
55
  public:
56
    /**
57
     *  Creates a new instance.
58
     *  @param frompage name of source encoding.
59
     */
60
0
    APRCharsetDecoder(const LogString& frompage) : pool()
61
0
    {
62
0
#if LOG4CXX_LOGCHAR_IS_WCHAR
63
0
      const char* topage = "WCHAR_T";
64
0
#endif
65
#if LOG4CXX_LOGCHAR_IS_UTF8
66
      const char* topage = "UTF-8";
67
#endif
68
#if LOG4CXX_LOGCHAR_IS_UNICHAR
69
      const char* topage = "UTF-16";
70
#endif
71
0
      std::string fpage(Transcoder::encodeCharsetName(frompage));
72
0
      apr_status_t stat = apr_xlate_open(&convset,
73
0
          topage,
74
0
          fpage.c_str(),
75
0
          pool.getAPRPool());
76
77
0
      if (stat != APR_SUCCESS)
78
0
      {
79
0
        throw IllegalArgumentException(frompage);
80
0
      }
81
0
    }
82
83
    /**
84
     *  Destructor.
85
     */
86
    virtual ~APRCharsetDecoder()
87
0
    {
88
0
    }
89
90
    virtual log4cxx_status_t decode(ByteBuffer& in,
91
      LogString& out)
92
0
    {
93
0
      enum { BUFSIZE = 256 };
94
0
      logchar buf[BUFSIZE];
95
0
      const apr_size_t initial_outbytes_left = BUFSIZE * sizeof(logchar);
96
0
      apr_status_t stat = APR_SUCCESS;
97
98
0
      if (in.remaining() == 0)
99
0
      {
100
0
        size_t outbytes_left = initial_outbytes_left;
101
0
        {
102
0
          std::lock_guard<std::mutex> lock(mutex);
103
0
          stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
104
0
              NULL, NULL, (char*) buf, &outbytes_left);
105
0
        }
106
0
        out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
107
0
      }
108
0
      else
109
0
      {
110
0
        while (in.remaining() > 0 && stat == APR_SUCCESS)
111
0
        {
112
0
          size_t inbytes_left = in.remaining();
113
0
          size_t initial_inbytes_left = inbytes_left;
114
0
          apr_size_t outbytes_left = initial_outbytes_left;
115
0
          {
116
0
            std::lock_guard<std::mutex> lock(mutex);
117
0
            stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
118
0
                in.current(),
119
0
                &inbytes_left,
120
0
                (char*) buf,
121
0
                &outbytes_left);
122
0
          }
123
0
          out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
124
0
          if (inbytes_left == initial_inbytes_left && stat == APR_SUCCESS)
125
0
          {
126
0
            stat = APR_BADCH;
127
0
            break;
128
0
          }
129
0
          in.increment_position(initial_inbytes_left - inbytes_left);
130
0
        }
131
0
      }
132
133
0
      return stat;
134
0
    }
135
136
  private:
137
    APRCharsetDecoder(const APRCharsetDecoder&);
138
    APRCharsetDecoder& operator=(const APRCharsetDecoder&);
139
    LOG4CXX_NS::helpers::Pool pool;
140
    std::mutex mutex;
141
    apr_xlate_t* convset;
142
};
143
144
#endif
145
146
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
147
/**
148
*    Converts from the default multi-byte string to
149
*        LogString using mbstowcs.
150
*
151
*/
152
class MbstowcsCharsetDecoder : public CharsetDecoder
153
{
154
  public:
155
    MbstowcsCharsetDecoder()
156
0
    {
157
0
    }
158
159
    virtual ~MbstowcsCharsetDecoder()
160
0
    {
161
0
    }
162
163
  private:
164
    inline log4cxx_status_t append(LogString& out, const wchar_t* buf)
165
0
    {
166
0
      out.append(buf);
167
0
      return APR_SUCCESS;
168
0
    }
169
170
    virtual log4cxx_status_t decode(ByteBuffer& in,
171
      LogString& out)
172
0
    {
173
0
      log4cxx_status_t stat = APR_SUCCESS;
174
0
      enum { BUFSIZE = 256 };
175
0
      wchar_t wbuf[BUFSIZE];
176
0
      char cbuf[BUFSIZE*4];
177
0
178
0
      mbstate_t mbstate;
179
0
      memset(&mbstate, 0, sizeof(mbstate));
180
0
181
0
      while (in.remaining() > 0)
182
0
      {
183
0
        const char* src = in.current();
184
0
185
0
        if (*src == 0)
186
0
        {
187
0
          out.append(1, (logchar) 0);
188
0
          in.increment_position(1);
189
0
        }
190
0
        else
191
0
        {
192
0
          auto available = std::min(sizeof (cbuf) - 1, in.remaining());
193
0
          strncpy(cbuf, src, available);
194
0
          cbuf[available] = 0;
195
0
          src = cbuf;
196
0
          size_t wCharCount = mbsrtowcs(wbuf,
197
0
              &src,
198
0
              BUFSIZE - 1,
199
0
              &mbstate);
200
0
          auto converted = src - cbuf;
201
0
          in.increment_position(converted);
202
0
203
0
          if (wCharCount == (size_t) -1) // Illegal byte sequence?
204
0
          {
205
0
            LogString msg(LOG4CXX_STR("Illegal byte sequence at "));
206
0
            msg.append(std::to_wstring(in.position()));
207
0
            msg.append(LOG4CXX_STR(" of "));
208
0
            msg.append(std::to_wstring(in.limit()));
209
0
            LogLog::warn(msg);
210
0
            stat = APR_BADCH;
211
0
            break;
212
0
          }
213
0
          else
214
0
          {
215
0
            // FIX: Check for incomplete sequence infinite loop.
216
0
            // If mbsrtowcs returns success (>=0) but converted 0 bytes while data remains,
217
0
            // we are stuck (e.g. incomplete multibyte char at EOF).
218
0
            if (converted == 0 && in.remaining() > 0)
219
0
            {
220
0
              LogString msg(LOG4CXX_STR("Incomplete multibyte sequence at end of buffer"));
221
0
              LogLog::warn(msg);
222
0
              stat = APR_BADCH;
223
0
              break; // Break the infinite loop
224
0
            }
225
0
226
0
            wbuf[wCharCount] = 0;
227
0
            stat = append(out, wbuf);
228
0
          }
229
0
        }
230
0
      }
231
0
232
0
      return stat;
233
0
    }
234
235
236
237
  private:
238
    MbstowcsCharsetDecoder(const MbstowcsCharsetDecoder&);
239
    MbstowcsCharsetDecoder& operator=(const MbstowcsCharsetDecoder&);
240
};
241
#endif
242
243
244
/**
245
*    Decoder used when the external and internal charsets
246
*    are the same.
247
*
248
*/
249
class TrivialCharsetDecoder : public CharsetDecoder
250
{
251
  public:
252
    TrivialCharsetDecoder()
253
0
    {
254
0
    }
255
256
    virtual ~TrivialCharsetDecoder()
257
0
    {
258
0
    }
259
260
    virtual log4cxx_status_t decode(ByteBuffer& in,
261
      LogString& out)
262
0
    {
263
0
      size_t remaining = in.remaining();
264
0
265
0
      if ( remaining > 0)
266
0
      {
267
0
        auto src = in.current();
268
0
        auto count = remaining / sizeof(logchar);
269
0
        out.append(reinterpret_cast<const logchar*>(src), count);
270
0
        in.increment_position(remaining);
271
0
      }
272
0
273
0
      return APR_SUCCESS;
274
0
    }
275
276
277
278
  private:
279
    TrivialCharsetDecoder(const TrivialCharsetDecoder&);
280
    TrivialCharsetDecoder& operator=(const TrivialCharsetDecoder&);
281
};
282
283
/**
284
*    Converts from UTF-8 to std::wstring
285
*
286
*/
287
class UTF8CharsetDecoder : public CharsetDecoder
288
{
289
  public:
290
    UTF8CharsetDecoder()
291
1
    {
292
1
    }
293
294
    virtual ~UTF8CharsetDecoder()
295
0
    {
296
0
    }
297
298
  private:
299
    virtual log4cxx_status_t decode(ByteBuffer& in,
300
      LogString& out)
301
256k
    {
302
256k
      auto availableByteCount = in.remaining();
303
256k
      std::string tmp(in.current(), availableByteCount);
304
256k
      std::string::const_iterator nextCodePoint = tmp.begin();
305
306
1.78M
      while (nextCodePoint != tmp.end())
307
1.77M
      {
308
1.77M
        auto lastCodePoint = nextCodePoint;
309
1.77M
        auto sv = Transcoder::decode(tmp, nextCodePoint);
310
311
1.77M
        if (sv == 0xFFFF || nextCodePoint == lastCodePoint)
312
252k
        {
313
252k
          size_t offset = nextCodePoint - tmp.begin();
314
252k
          in.increment_position(offset);
315
252k
          return APR_BADCH;
316
252k
        }
317
1.52M
        else
318
1.52M
        {
319
1.52M
          Transcoder::encode(sv, out);
320
1.52M
        }
321
1.77M
      }
322
323
3.88k
      in.increment_position(availableByteCount);
324
325
3.88k
      return APR_SUCCESS;
326
256k
    }
327
328
  private:
329
    UTF8CharsetDecoder(const UTF8CharsetDecoder&);
330
    UTF8CharsetDecoder& operator=(const UTF8CharsetDecoder&);
331
};
332
333
/**
334
*    Converts from ISO-8859-1 to LogString.
335
*
336
*/
337
class ISOLatinCharsetDecoder : public CharsetDecoder
338
{
339
  public:
340
    ISOLatinCharsetDecoder()
341
0
    {
342
0
    }
343
344
    virtual ~ISOLatinCharsetDecoder()
345
0
    {
346
0
    }
347
348
  private:
349
    virtual log4cxx_status_t decode(ByteBuffer& in,
350
      LogString& out)
351
0
    {
352
0
      auto availableByteCount = in.remaining();
353
0
      auto src = in.current();
354
0
      auto srcEnd = src + availableByteCount;
355
356
0
      while (src < srcEnd)
357
0
      {
358
0
        auto sv = static_cast<unsigned int>(*src++);
359
0
        Transcoder::encode(sv, out);
360
0
      }
361
0
      in.increment_position(availableByteCount);
362
363
0
      return APR_SUCCESS;
364
0
    }
365
366
367
368
  private:
369
    ISOLatinCharsetDecoder(const ISOLatinCharsetDecoder&);
370
    ISOLatinCharsetDecoder& operator=(const ISOLatinCharsetDecoder&);
371
};
372
373
374
/**
375
*    Converts from US-ASCII to LogString.
376
*
377
*/
378
class USASCIICharsetDecoder : public CharsetDecoder
379
{
380
  public:
381
    USASCIICharsetDecoder()
382
0
    {
383
0
    }
384
385
    virtual ~USASCIICharsetDecoder()
386
0
    {
387
0
    }
388
389
  private:
390
391
    virtual log4cxx_status_t decode(ByteBuffer& in,
392
      LogString& out)
393
0
    {
394
0
      log4cxx_status_t stat = APR_SUCCESS;
395
396
0
      auto availableByteCount = in.remaining();
397
0
      auto src = in.current();
398
0
      auto srcEnd = src + availableByteCount;
399
0
      size_t byteCount = 0;
400
0
      while (src < srcEnd)
401
0
      {
402
0
        auto sv = static_cast<unsigned int>(*src++);
403
404
0
        if (sv < 0x80)
405
0
        {
406
0
          ++byteCount;
407
0
          Transcoder::encode(sv, out);
408
0
        }
409
0
        else
410
0
        {
411
0
          stat = APR_BADCH;
412
0
          break;
413
0
        }
414
0
      }
415
0
      in.increment_position(byteCount);
416
417
0
      return stat;
418
0
    }
419
420
421
422
  private:
423
    USASCIICharsetDecoder(const USASCIICharsetDecoder&);
424
    USASCIICharsetDecoder& operator=(const USASCIICharsetDecoder&);
425
};
426
427
/**
428
 *    Charset decoder that uses current locale settings.
429
 */
430
class LocaleCharsetDecoder : public CharsetDecoder
431
{
432
  public:
433
0
    LocaleCharsetDecoder() : state()
434
0
    {
435
0
    }
436
    log4cxx_status_t decode(ByteBuffer& in, LogString& out) override
437
0
    {
438
0
      log4cxx_status_t result = APR_SUCCESS;
439
0
      auto p = in.current();
440
0
      auto availableByteCount = in.remaining();
441
0
      size_t byteCount = 0;
442
0
#if !LOG4CXX_CHARSET_EBCDIC
443
0
      if (std::mbsinit(&this->state)) // ByteBuffer not partially decoded?
444
0
      {
445
        // Copy single byte characters
446
0
        for (; byteCount < availableByteCount && static_cast<unsigned int>(*p) < 0x80; ++byteCount, ++p)
447
0
        {
448
0
          out.append(1, *p);
449
0
        }
450
0
      }
451
0
#endif
452
      // Decode characters that may be represented by multiple bytes
453
0
      while (byteCount < availableByteCount)
454
0
      {
455
0
        wchar_t ch = 0;
456
0
        size_t n = std::mbrtowc(&ch, p, availableByteCount - byteCount, &this->state);
457
0
        if (0 == n) // NULL encountered?
458
0
        {
459
0
          ++byteCount;
460
0
          break;
461
0
        }
462
0
        if (static_cast<std::size_t>(-1) == n) // decoding error?
463
0
        {
464
0
          result = APR_BADCH;
465
0
          break;
466
0
        }
467
0
        if (static_cast<std::size_t>(-2) == n) // incomplete sequence?
468
0
        {
469
0
          break;
470
0
        }
471
0
        Transcoder::encode(static_cast<unsigned int>(ch), out);
472
0
        byteCount += n;
473
0
        p += n;
474
0
      }
475
0
      in.increment_position(byteCount);
476
0
      return result;
477
0
    }
478
479
  private:
480
    std::mbstate_t state;
481
};
482
483
484
485
} // namespace helpers
486
487
}  //namespace log4cxx
488
489
490
CharsetDecoder::CharsetDecoder()
491
1
{
492
1
}
493
494
495
CharsetDecoder::~CharsetDecoder()
496
1
{
497
1
}
498
499
CharsetDecoder* CharsetDecoder::createDefaultDecoder()
500
1
{
501
1
#if LOG4CXX_CHARSET_UTF8
502
#if LOG4CXX_LOGCHAR_IS_UTF8
503
  return new TrivialCharsetDecoder();
504
#else
505
1
  return new UTF8CharsetDecoder();
506
1
#endif
507
#elif LOG4CXX_CHARSET_ISO88591 || defined(_WIN32_WCE)
508
  return new ISOLatinCharsetDecoder();
509
#elif LOG4CXX_CHARSET_USASCII
510
  return new USASCIICharsetDecoder();
511
#elif LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
512
  return new MbstowcsCharsetDecoder();
513
#else
514
  return new LocaleCharsetDecoder();
515
#endif
516
1
}
517
518
CharsetDecoderPtr CharsetDecoder::getDefaultDecoder()
519
1
{
520
1
  static WideLife<CharsetDecoderPtr> decoder(createDefaultDecoder());
521
522
  //
523
  //  if invoked after static variable destruction
524
  //     (if logging is called in the destructor of a static object)
525
  //     then create a new decoder.
526
  //
527
1
  if (decoder.value() == 0)
528
0
  {
529
0
    return CharsetDecoderPtr( createDefaultDecoder() );
530
0
  }
531
532
1
  return decoder;
533
1
}
534
535
CharsetDecoderPtr CharsetDecoder::getUTF8Decoder()
536
0
{
537
0
  return std::make_shared<UTF8CharsetDecoder>();
538
0
}
539
540
CharsetDecoderPtr CharsetDecoder::getISOLatinDecoder()
541
0
{
542
0
  return std::make_shared<ISOLatinCharsetDecoder>();
543
0
}
544
545
546
CharsetDecoderPtr CharsetDecoder::getDecoder(const LogString& charset)
547
0
{
548
0
  if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-8"), LOG4CXX_STR("utf-8")) ||
549
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF8"), LOG4CXX_STR("utf8")) ||
550
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP65001"), LOG4CXX_STR("cp65001")))
551
0
  {
552
#if LOG4CXX_LOGCHAR_IS_UTF8
553
    return std::make_shared<TrivialCharsetDecoder>();
554
#else
555
0
    return std::make_shared<UTF8CharsetDecoder>();
556
0
#endif
557
0
  }
558
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) ||
559
0
    charset == LOG4CXX_STR("646") ||
560
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("US-ASCII"), LOG4CXX_STR("us-ascii")) ||
561
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO646-US"), LOG4CXX_STR("iso646-US")) ||
562
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ANSI_X3.4-1968"), LOG4CXX_STR("ansi_x3.4-1968")) ||
563
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP20127"), LOG4CXX_STR("cp20127")))
564
0
  {
565
0
    return std::make_shared<USASCIICharsetDecoder>();
566
0
  }
567
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-8859-1"), LOG4CXX_STR("iso-8859-1")) ||
568
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-LATIN-1"), LOG4CXX_STR("iso-latin-1")) ||
569
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP1252"), LOG4CXX_STR("cp1252")))
570
0
  {
571
0
    return std::make_shared<ISOLatinCharsetDecoder>();
572
0
  }
573
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("LOCALE"), LOG4CXX_STR("locale")))
574
0
  {
575
0
    return std::make_shared<LocaleCharsetDecoder>();
576
0
  }
577
578
0
#if APR_HAS_XLATE
579
0
  return std::make_shared<APRCharsetDecoder>(charset);
580
#else
581
  throw IllegalArgumentException(charset);
582
#endif
583
0
}
584
585
log4cxx_status_t CharsetDecoder::decode(const char* in, size_t maxByteCount, LogString& out)
586
0
{
587
0
  ByteBuffer buf((char*)in, strnlen_s(in, maxByteCount));
588
0
  return decode(buf, out);
589
0
}
590
591
592
593
594
595