Coverage Report

Created: 2025-10-10 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/logging-log4cxx/src/main/cpp/charsetdecoder.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
#define NOMINMAX /* tell windows not to define min/max macros */
18
#include <log4cxx/private/string_c11.h>
19
#include <log4cxx/logstring.h>
20
#include <log4cxx/helpers/charsetdecoder.h>
21
#include <log4cxx/helpers/bytebuffer.h>
22
#include <log4cxx/helpers/exception.h>
23
#include <log4cxx/helpers/pool.h>
24
#include <log4cxx/helpers/loglog.h>
25
#include <apr_xlate.h>
26
#if !defined(LOG4CXX)
27
  #define LOG4CXX 1
28
#endif
29
#include <log4cxx/private/log4cxx_private.h>
30
#include <locale.h>
31
#include <apr_portable.h>
32
#include <log4cxx/helpers/stringhelper.h>
33
#include <log4cxx/helpers/transcoder.h>
34
#include <mutex>
35
36
using namespace LOG4CXX_NS;
37
using namespace LOG4CXX_NS::helpers;
38
39
IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
40
41
42
namespace LOG4CXX_NS
43
{
44
namespace helpers
45
{
46
47
#if APR_HAS_XLATE
48
/**
49
 *  Converts from an arbitrary encoding to LogString
50
 *    using apr_xlate.  Requires real iconv implementation,
51
*    apr-iconv will crash in use.
52
 */
53
class APRCharsetDecoder : public CharsetDecoder
54
{
55
  public:
56
    /**
57
     *  Creates a new instance.
58
     *  @param frompage name of source encoding.
59
     */
60
0
    APRCharsetDecoder(const LogString& frompage) : pool()
61
0
    {
62
#if LOG4CXX_LOGCHAR_IS_WCHAR
63
      const char* topage = "WCHAR_T";
64
#endif
65
0
#if LOG4CXX_LOGCHAR_IS_UTF8
66
0
      const char* topage = "UTF-8";
67
0
#endif
68
#if LOG4CXX_LOGCHAR_IS_UNICHAR
69
      const char* topage = "UTF-16";
70
#endif
71
0
      std::string fpage(Transcoder::encodeCharsetName(frompage));
72
0
      apr_status_t stat = apr_xlate_open(&convset,
73
0
          topage,
74
0
          fpage.c_str(),
75
0
          pool.getAPRPool());
76
77
0
      if (stat != APR_SUCCESS)
78
0
      {
79
0
        throw IllegalArgumentException(frompage);
80
0
      }
81
0
    }
82
83
    /**
84
     *  Destructor.
85
     */
86
    virtual ~APRCharsetDecoder()
87
0
    {
88
0
    }
89
90
    virtual log4cxx_status_t decode(ByteBuffer& in,
91
      LogString& out)
92
0
    {
93
0
      enum { BUFSIZE = 256 };
94
0
      logchar buf[BUFSIZE];
95
0
      const apr_size_t initial_outbytes_left = BUFSIZE * sizeof(logchar);
96
0
      apr_status_t stat = APR_SUCCESS;
97
98
0
      if (in.remaining() == 0)
99
0
      {
100
0
        size_t outbytes_left = initial_outbytes_left;
101
0
        {
102
0
          std::lock_guard<std::mutex> lock(mutex);
103
0
          stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
104
0
              NULL, NULL, (char*) buf, &outbytes_left);
105
0
        }
106
0
        out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
107
0
      }
108
0
      else
109
0
      {
110
0
        while (in.remaining() > 0 && stat == APR_SUCCESS)
111
0
        {
112
0
          size_t inbytes_left = in.remaining();
113
0
          size_t initial_inbytes_left = inbytes_left;
114
0
          size_t pos = in.position();
115
0
          apr_size_t outbytes_left = initial_outbytes_left;
116
0
          {
117
0
            std::lock_guard<std::mutex> lock(mutex);
118
0
            stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
119
0
                in.data() + pos,
120
0
                &inbytes_left,
121
0
                (char*) buf,
122
0
                &outbytes_left);
123
0
          }
124
0
          out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
125
0
          in.position(pos + (initial_inbytes_left - inbytes_left));
126
0
        }
127
0
      }
128
129
0
      return stat;
130
0
    }
131
132
  private:
133
    APRCharsetDecoder(const APRCharsetDecoder&);
134
    APRCharsetDecoder& operator=(const APRCharsetDecoder&);
135
    LOG4CXX_NS::helpers::Pool pool;
136
    std::mutex mutex;
137
    apr_xlate_t* convset;
138
};
139
140
#endif
141
142
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
143
/**
144
*    Converts from the default multi-byte string to
145
*        LogString using mbstowcs.
146
*
147
*/
148
class MbstowcsCharsetDecoder : public CharsetDecoder
149
{
150
  public:
151
    MbstowcsCharsetDecoder()
152
    {
153
    }
154
155
    virtual ~MbstowcsCharsetDecoder()
156
    {
157
    }
158
159
  private:
160
    inline log4cxx_status_t append(LogString& out, const wchar_t* buf)
161
    {
162
      out.append(buf);
163
      return APR_SUCCESS;
164
    }
165
166
    virtual log4cxx_status_t decode(ByteBuffer& in,
167
      LogString& out)
168
    {
169
      log4cxx_status_t stat = APR_SUCCESS;
170
      enum { BUFSIZE = 256 };
171
      wchar_t wbuf[BUFSIZE];
172
      char cbuf[BUFSIZE*4];
173
174
      mbstate_t mbstate;
175
      memset(&mbstate, 0, sizeof(mbstate));
176
177
      while (in.remaining() > 0)
178
      {
179
        const char* src = in.current();
180
181
        if (*src == 0)
182
        {
183
          out.append(1, (logchar) 0);
184
          in.position(in.position() + 1);
185
        }
186
        else
187
        {
188
          auto available = std::min(sizeof (cbuf) - 1, in.remaining());
189
          strncpy(cbuf, src, available);
190
          cbuf[available] = 0;
191
          src = cbuf;
192
          size_t wCharCount = mbsrtowcs(wbuf,
193
              &src,
194
              BUFSIZE - 1,
195
              &mbstate);
196
          auto converted = src - cbuf;
197
          in.position(in.position() + converted);
198
199
          if (wCharCount == (size_t) -1) // Illegal byte sequence?
200
          {
201
            LogString msg(LOG4CXX_STR("Illegal byte sequence at "));
202
            msg.append(std::to_wstring(in.position()));
203
            msg.append(LOG4CXX_STR(" of "));
204
            msg.append(std::to_wstring(in.limit()));
205
            LogLog::warn(msg);
206
            stat = APR_BADCH;
207
            break;
208
          }
209
          else
210
          {
211
            wbuf[wCharCount] = 0;
212
            stat = append(out, wbuf);
213
          }
214
        }
215
      }
216
217
      return stat;
218
    }
219
220
221
222
  private:
223
    MbstowcsCharsetDecoder(const MbstowcsCharsetDecoder&);
224
    MbstowcsCharsetDecoder& operator=(const MbstowcsCharsetDecoder&);
225
};
226
#endif
227
228
229
/**
230
*    Decoder used when the external and internal charsets
231
*    are the same.
232
*
233
*/
234
class TrivialCharsetDecoder : public CharsetDecoder
235
{
236
  public:
237
    TrivialCharsetDecoder()
238
0
    {
239
0
    }
240
241
    virtual ~TrivialCharsetDecoder()
242
0
    {
243
0
    }
244
245
    virtual log4cxx_status_t decode(ByteBuffer& in,
246
      LogString& out)
247
0
    {
248
0
      size_t remaining = in.remaining();
249
250
0
      if ( remaining > 0)
251
0
      {
252
0
        const logchar* src = (const logchar*) (in.data() + in.position());
253
0
        size_t count = remaining / sizeof(logchar);
254
0
        out.append(src, count);
255
0
        in.position(in.position() + remaining);
256
0
      }
257
258
0
      return APR_SUCCESS;
259
0
    }
260
261
262
263
  private:
264
    TrivialCharsetDecoder(const TrivialCharsetDecoder&);
265
    TrivialCharsetDecoder& operator=(const TrivialCharsetDecoder&);
266
};
267
268
269
#if LOG4CXX_LOGCHAR_IS_UTF8
270
typedef TrivialCharsetDecoder UTF8CharsetDecoder;
271
#else
272
/**
273
*    Converts from UTF-8 to std::wstring
274
*
275
*/
276
class UTF8CharsetDecoder : public CharsetDecoder
277
{
278
  public:
279
    UTF8CharsetDecoder()
280
    {
281
    }
282
283
    virtual ~UTF8CharsetDecoder()
284
    {
285
    }
286
287
  private:
288
    virtual log4cxx_status_t decode(ByteBuffer& in,
289
      LogString& out)
290
    {
291
      if (in.remaining() > 0)
292
      {
293
        std::string tmp(in.current(), in.remaining());
294
        std::string::const_iterator iter = tmp.begin();
295
296
        while (iter != tmp.end())
297
        {
298
          unsigned int sv = Transcoder::decode(tmp, iter);
299
300
          if (sv == 0xFFFF)
301
          {
302
            size_t offset = iter - tmp.begin();
303
            in.position(in.position() + offset);
304
            return APR_BADARG;
305
          }
306
          else
307
          {
308
            Transcoder::encode(sv, out);
309
          }
310
        }
311
312
        in.position(in.limit());
313
      }
314
315
      return APR_SUCCESS;
316
    }
317
318
  private:
319
    UTF8CharsetDecoder(const UTF8CharsetDecoder&);
320
    UTF8CharsetDecoder& operator=(const UTF8CharsetDecoder&);
321
};
322
#endif
323
324
/**
325
*    Converts from ISO-8859-1 to LogString.
326
*
327
*/
328
class ISOLatinCharsetDecoder : public CharsetDecoder
329
{
330
  public:
331
    ISOLatinCharsetDecoder()
332
0
    {
333
0
    }
334
335
    virtual ~ISOLatinCharsetDecoder()
336
0
    {
337
0
    }
338
339
  private:
340
    virtual log4cxx_status_t decode(ByteBuffer& in,
341
      LogString& out)
342
0
    {
343
0
      if (in.remaining() > 0)
344
0
      {
345
346
0
        const unsigned char* src = (unsigned char*) in.current();
347
0
        const unsigned char* srcEnd = src + in.remaining();
348
349
0
        while (src < srcEnd)
350
0
        {
351
0
          unsigned int sv = *(src++);
352
0
          Transcoder::encode(sv, out);
353
0
        }
354
355
0
        in.position(in.limit());
356
0
      }
357
358
0
      return APR_SUCCESS;
359
0
    }
360
361
362
363
  private:
364
    ISOLatinCharsetDecoder(const ISOLatinCharsetDecoder&);
365
    ISOLatinCharsetDecoder& operator=(const ISOLatinCharsetDecoder&);
366
};
367
368
369
/**
370
*    Converts from US-ASCII to LogString.
371
*
372
*/
373
class USASCIICharsetDecoder : public CharsetDecoder
374
{
375
  public:
376
    USASCIICharsetDecoder()
377
0
    {
378
0
    }
379
380
    virtual ~USASCIICharsetDecoder()
381
0
    {
382
0
    }
383
384
  private:
385
386
    virtual log4cxx_status_t decode(ByteBuffer& in,
387
      LogString& out)
388
0
    {
389
0
      log4cxx_status_t stat = APR_SUCCESS;
390
391
0
      if (in.remaining() > 0)
392
0
      {
393
394
0
        const unsigned char* src = (unsigned char*) in.current();
395
0
        const unsigned char* srcEnd = src + in.remaining();
396
397
0
        while (src < srcEnd)
398
0
        {
399
0
          unsigned char sv = *src;
400
401
0
          if (sv < 0x80)
402
0
          {
403
0
            src++;
404
0
            Transcoder::encode(sv, out);
405
0
          }
406
0
          else
407
0
          {
408
0
            stat = APR_BADARG;
409
0
            break;
410
0
          }
411
0
        }
412
413
0
        in.position(src - (const unsigned char*) in.data());
414
0
      }
415
416
0
      return stat;
417
0
    }
418
419
420
421
  private:
422
    USASCIICharsetDecoder(const USASCIICharsetDecoder&);
423
    USASCIICharsetDecoder& operator=(const USASCIICharsetDecoder&);
424
};
425
426
/**
427
 *    Charset decoder that uses current locale settings.
428
 */
429
class LocaleCharsetDecoder : public CharsetDecoder
430
{
431
  public:
432
0
    LocaleCharsetDecoder() : state()
433
0
    {
434
0
    }
435
    log4cxx_status_t decode(ByteBuffer& in, LogString& out) override
436
0
    {
437
0
      log4cxx_status_t result = APR_SUCCESS;
438
0
      const char* p = in.current();
439
0
      size_t i = in.position();
440
0
      size_t remain = in.limit() - i;
441
0
#if !LOG4CXX_CHARSET_EBCDIC
442
0
      if (std::mbsinit(&this->state)) // ByteBuffer not partially decoded?
443
0
      {
444
        // Copy single byte characters
445
0
        for (; 0 < remain && ((unsigned int) *p) < 0x80; --remain, ++i, p++)
446
0
        {
447
0
          out.append(1, *p);
448
0
        }
449
0
      }
450
0
#endif
451
      // Decode characters that may be represented by multiple bytes
452
0
      while (0 < remain)
453
0
      {
454
0
        wchar_t ch = 0;
455
0
        size_t n = std::mbrtowc(&ch, p, remain, &this->state);
456
0
        if (0 == n) // NULL encountered?
457
0
        {
458
0
          ++i;
459
0
          break;
460
0
        }
461
0
        if (static_cast<std::size_t>(-1) == n) // decoding error?
462
0
        {
463
0
          result = APR_BADARG;
464
0
          break;
465
0
        }
466
0
        if (static_cast<std::size_t>(-2) == n) // incomplete sequence?
467
0
        {
468
0
          break;
469
0
        }
470
0
        Transcoder::encode(static_cast<unsigned int>(ch), out);
471
0
        remain -= n;
472
0
        i += n;
473
0
        p += n;
474
0
      }
475
0
      in.position(i);
476
0
      return result;
477
0
    }
478
479
  private:
480
    std::mbstate_t state;
481
};
482
483
484
485
} // namespace helpers
486
487
}  //namespace log4cxx
488
489
490
CharsetDecoder::CharsetDecoder()
491
0
{
492
0
}
493
494
495
CharsetDecoder::~CharsetDecoder()
496
0
{
497
0
}
498
499
CharsetDecoder* CharsetDecoder::createDefaultDecoder()
500
0
{
501
0
#if LOG4CXX_CHARSET_UTF8
502
0
  return new UTF8CharsetDecoder();
503
#elif LOG4CXX_CHARSET_ISO88591 || defined(_WIN32_WCE)
504
  return new ISOLatinCharsetDecoder();
505
#elif LOG4CXX_CHARSET_USASCII
506
  return new USASCIICharsetDecoder();
507
#elif LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
508
  return new MbstowcsCharsetDecoder();
509
#else
510
  return new LocaleCharsetDecoder();
511
#endif
512
0
}
513
514
CharsetDecoderPtr CharsetDecoder::getDefaultDecoder()
515
0
{
516
0
  static WideLife<CharsetDecoderPtr> decoder(createDefaultDecoder());
517
518
  //
519
  //  if invoked after static variable destruction
520
  //     (if logging is called in the destructor of a static object)
521
  //     then create a new decoder.
522
  //
523
0
  if (decoder.value() == 0)
524
0
  {
525
0
    return CharsetDecoderPtr( createDefaultDecoder() );
526
0
  }
527
528
0
  return decoder;
529
0
}
530
531
CharsetDecoderPtr CharsetDecoder::getUTF8Decoder()
532
0
{
533
0
  static WideLife<CharsetDecoderPtr> decoder(new UTF8CharsetDecoder());
534
535
  //
536
  //  if invoked after static variable destruction
537
  //     (if logging is called in the destructor of a static object)
538
  //     then create a new decoder.
539
  //
540
0
  if (decoder.value() == 0)
541
0
  {
542
0
    return std::make_shared<UTF8CharsetDecoder>();
543
0
  }
544
545
0
  return decoder;
546
0
}
547
548
CharsetDecoderPtr CharsetDecoder::getISOLatinDecoder()
549
0
{
550
0
  return std::make_shared<ISOLatinCharsetDecoder>();
551
0
}
552
553
554
CharsetDecoderPtr CharsetDecoder::getDecoder(const LogString& charset)
555
0
{
556
0
  if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-8"), LOG4CXX_STR("utf-8")) ||
557
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF8"), LOG4CXX_STR("utf8")) ||
558
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP65001"), LOG4CXX_STR("cp65001")))
559
0
  {
560
0
    return std::make_shared<UTF8CharsetDecoder>();
561
0
  }
562
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) ||
563
0
    charset == LOG4CXX_STR("646") ||
564
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("US-ASCII"), LOG4CXX_STR("us-ascii")) ||
565
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO646-US"), LOG4CXX_STR("iso646-US")) ||
566
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ANSI_X3.4-1968"), LOG4CXX_STR("ansi_x3.4-1968")) ||
567
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP20127"), LOG4CXX_STR("cp20127")))
568
0
  {
569
0
    return std::make_shared<USASCIICharsetDecoder>();
570
0
  }
571
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-8859-1"), LOG4CXX_STR("iso-8859-1")) ||
572
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-LATIN-1"), LOG4CXX_STR("iso-latin-1")) ||
573
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP1252"), LOG4CXX_STR("cp1252")))
574
0
  {
575
0
    return std::make_shared<ISOLatinCharsetDecoder>();
576
0
  }
577
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("LOCALE"), LOG4CXX_STR("locale")))
578
0
  {
579
0
    return std::make_shared<LocaleCharsetDecoder>();
580
0
  }
581
582
0
#if APR_HAS_XLATE
583
0
  return std::make_shared<APRCharsetDecoder>(charset);
584
#else
585
  throw IllegalArgumentException(charset);
586
#endif
587
0
}
588
589
log4cxx_status_t CharsetDecoder::decode(const char* in, size_t maxByteCount, LogString& out)
590
0
{
591
0
  ByteBuffer buf((char*)in, strnlen_s(in, maxByteCount));
592
0
  return decode(buf, out);
593
0
}
594
595
596
597
598
599