Coverage Report

Created: 2025-07-01 06:08

/src/logging-log4cxx/src/main/cpp/charsetdecoder.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
#define NOMINMAX /* tell windows not to define min/max macros */
18
#include <log4cxx/logstring.h>
19
#include <log4cxx/helpers/charsetdecoder.h>
20
#include <log4cxx/helpers/bytebuffer.h>
21
#include <log4cxx/helpers/exception.h>
22
#include <log4cxx/helpers/pool.h>
23
#include <log4cxx/helpers/loglog.h>
24
#include <apr_xlate.h>
25
#if !defined(LOG4CXX)
26
  #define LOG4CXX 1
27
#endif
28
#include <log4cxx/private/log4cxx_private.h>
29
#include <locale.h>
30
#include <apr_portable.h>
31
#include <log4cxx/helpers/stringhelper.h>
32
#include <log4cxx/helpers/transcoder.h>
33
#include <mutex>
34
35
using namespace LOG4CXX_NS;
36
using namespace LOG4CXX_NS::helpers;
37
38
IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
39
40
41
namespace LOG4CXX_NS
42
{
43
namespace helpers
44
{
45
46
#if APR_HAS_XLATE
47
/**
48
 *  Converts from an arbitrary encoding to LogString
49
 *    using apr_xlate.  Requires real iconv implementation,
50
*    apr-iconv will crash in use.
51
 */
52
class APRCharsetDecoder : public CharsetDecoder
53
{
54
  public:
55
    /**
56
     *  Creates a new instance.
57
     *  @param frompage name of source encoding.
58
     */
59
0
    APRCharsetDecoder(const LogString& frompage) : pool()
60
0
    {
61
#if LOG4CXX_LOGCHAR_IS_WCHAR
62
      const char* topage = "WCHAR_T";
63
#endif
64
0
#if LOG4CXX_LOGCHAR_IS_UTF8
65
0
      const char* topage = "UTF-8";
66
0
#endif
67
#if LOG4CXX_LOGCHAR_IS_UNICHAR
68
      const char* topage = "UTF-16";
69
#endif
70
0
      std::string fpage(Transcoder::encodeCharsetName(frompage));
71
0
      apr_status_t stat = apr_xlate_open(&convset,
72
0
          topage,
73
0
          fpage.c_str(),
74
0
          pool.getAPRPool());
75
76
0
      if (stat != APR_SUCCESS)
77
0
      {
78
0
        throw IllegalArgumentException(frompage);
79
0
      }
80
0
    }
81
82
    /**
83
     *  Destructor.
84
     */
85
    virtual ~APRCharsetDecoder()
86
0
    {
87
0
    }
88
89
    virtual log4cxx_status_t decode(ByteBuffer& in,
90
      LogString& out)
91
0
    {
92
0
      enum { BUFSIZE = 256 };
93
0
      logchar buf[BUFSIZE];
94
0
      const apr_size_t initial_outbytes_left = BUFSIZE * sizeof(logchar);
95
0
      apr_status_t stat = APR_SUCCESS;
96
97
0
      if (in.remaining() == 0)
98
0
      {
99
0
        size_t outbytes_left = initial_outbytes_left;
100
0
        {
101
0
          std::lock_guard<std::mutex> lock(mutex);
102
0
          stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
103
0
              NULL, NULL, (char*) buf, &outbytes_left);
104
0
        }
105
0
        out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
106
0
      }
107
0
      else
108
0
      {
109
0
        while (in.remaining() > 0 && stat == APR_SUCCESS)
110
0
        {
111
0
          size_t inbytes_left = in.remaining();
112
0
          size_t initial_inbytes_left = inbytes_left;
113
0
          size_t pos = in.position();
114
0
          apr_size_t outbytes_left = initial_outbytes_left;
115
0
          {
116
0
            std::lock_guard<std::mutex> lock(mutex);
117
0
            stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
118
0
                in.data() + pos,
119
0
                &inbytes_left,
120
0
                (char*) buf,
121
0
                &outbytes_left);
122
0
          }
123
0
          out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
124
0
          in.position(pos + (initial_inbytes_left - inbytes_left));
125
0
        }
126
0
      }
127
128
0
      return stat;
129
0
    }
130
131
  private:
132
    APRCharsetDecoder(const APRCharsetDecoder&);
133
    APRCharsetDecoder& operator=(const APRCharsetDecoder&);
134
    LOG4CXX_NS::helpers::Pool pool;
135
    std::mutex mutex;
136
    apr_xlate_t* convset;
137
};
138
139
#endif
140
141
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
142
/**
143
*    Converts from the default multi-byte string to
144
*        LogString using mbstowcs.
145
*
146
*/
147
class MbstowcsCharsetDecoder : public CharsetDecoder
148
{
149
  public:
150
    MbstowcsCharsetDecoder()
151
    {
152
    }
153
154
    virtual ~MbstowcsCharsetDecoder()
155
    {
156
    }
157
158
  private:
159
    inline log4cxx_status_t append(LogString& out, const wchar_t* buf)
160
    {
161
      out.append(buf);
162
      return APR_SUCCESS;
163
    }
164
165
    virtual log4cxx_status_t decode(ByteBuffer& in,
166
      LogString& out)
167
    {
168
      log4cxx_status_t stat = APR_SUCCESS;
169
      enum { BUFSIZE = 256 };
170
      wchar_t wbuf[BUFSIZE];
171
      char cbuf[BUFSIZE*4];
172
173
      mbstate_t mbstate;
174
      memset(&mbstate, 0, sizeof(mbstate));
175
176
      while (in.remaining() > 0)
177
      {
178
        const char* src = in.current();
179
180
        if (*src == 0)
181
        {
182
          out.append(1, (logchar) 0);
183
          in.position(in.position() + 1);
184
        }
185
        else
186
        {
187
          auto available = std::min(sizeof (cbuf) - 1, in.remaining());
188
          strncpy(cbuf, src, available);
189
          cbuf[available] = 0;
190
          src = cbuf;
191
          size_t wCharCount = mbsrtowcs(wbuf,
192
              &src,
193
              BUFSIZE - 1,
194
              &mbstate);
195
          auto converted = src - cbuf;
196
          in.position(in.position() + converted);
197
198
          if (wCharCount == (size_t) -1) // Illegal byte sequence?
199
          {
200
            LogString msg(LOG4CXX_STR("Illegal byte sequence at "));
201
            msg.append(std::to_wstring(in.position()));
202
            msg.append(LOG4CXX_STR(" of "));
203
            msg.append(std::to_wstring(in.limit()));
204
            LogLog::warn(msg);
205
            stat = APR_BADCH;
206
            break;
207
          }
208
          else
209
          {
210
            wbuf[wCharCount] = 0;
211
            stat = append(out, wbuf);
212
          }
213
        }
214
      }
215
216
      return stat;
217
    }
218
219
220
221
  private:
222
    MbstowcsCharsetDecoder(const MbstowcsCharsetDecoder&);
223
    MbstowcsCharsetDecoder& operator=(const MbstowcsCharsetDecoder&);
224
};
225
#endif
226
227
228
/**
229
*    Decoder used when the external and internal charsets
230
*    are the same.
231
*
232
*/
233
class TrivialCharsetDecoder : public CharsetDecoder
234
{
235
  public:
236
    TrivialCharsetDecoder()
237
0
    {
238
0
    }
239
240
    virtual ~TrivialCharsetDecoder()
241
0
    {
242
0
    }
243
244
    virtual log4cxx_status_t decode(ByteBuffer& in,
245
      LogString& out)
246
0
    {
247
0
      size_t remaining = in.remaining();
248
249
0
      if ( remaining > 0)
250
0
      {
251
0
        const logchar* src = (const logchar*) (in.data() + in.position());
252
0
        size_t count = remaining / sizeof(logchar);
253
0
        out.append(src, count);
254
0
        in.position(in.position() + remaining);
255
0
      }
256
257
0
      return APR_SUCCESS;
258
0
    }
259
260
261
262
  private:
263
    TrivialCharsetDecoder(const TrivialCharsetDecoder&);
264
    TrivialCharsetDecoder& operator=(const TrivialCharsetDecoder&);
265
};
266
267
268
#if LOG4CXX_LOGCHAR_IS_UTF8
269
typedef TrivialCharsetDecoder UTF8CharsetDecoder;
270
#else
271
/**
272
*    Converts from UTF-8 to std::wstring
273
*
274
*/
275
class UTF8CharsetDecoder : public CharsetDecoder
276
{
277
  public:
278
    UTF8CharsetDecoder()
279
    {
280
    }
281
282
    virtual ~UTF8CharsetDecoder()
283
    {
284
    }
285
286
  private:
287
    virtual log4cxx_status_t decode(ByteBuffer& in,
288
      LogString& out)
289
    {
290
      if (in.remaining() > 0)
291
      {
292
        std::string tmp(in.current(), in.remaining());
293
        std::string::const_iterator iter = tmp.begin();
294
295
        while (iter != tmp.end())
296
        {
297
          unsigned int sv = Transcoder::decode(tmp, iter);
298
299
          if (sv == 0xFFFF)
300
          {
301
            size_t offset = iter - tmp.begin();
302
            in.position(in.position() + offset);
303
            return APR_BADARG;
304
          }
305
          else
306
          {
307
            Transcoder::encode(sv, out);
308
          }
309
        }
310
311
        in.position(in.limit());
312
      }
313
314
      return APR_SUCCESS;
315
    }
316
317
  private:
318
    UTF8CharsetDecoder(const UTF8CharsetDecoder&);
319
    UTF8CharsetDecoder& operator=(const UTF8CharsetDecoder&);
320
};
321
#endif
322
323
/**
324
*    Converts from ISO-8859-1 to LogString.
325
*
326
*/
327
class ISOLatinCharsetDecoder : public CharsetDecoder
328
{
329
  public:
330
    ISOLatinCharsetDecoder()
331
0
    {
332
0
    }
333
334
    virtual ~ISOLatinCharsetDecoder()
335
0
    {
336
0
    }
337
338
  private:
339
    virtual log4cxx_status_t decode(ByteBuffer& in,
340
      LogString& out)
341
0
    {
342
0
      if (in.remaining() > 0)
343
0
      {
344
345
0
        const unsigned char* src = (unsigned char*) in.current();
346
0
        const unsigned char* srcEnd = src + in.remaining();
347
348
0
        while (src < srcEnd)
349
0
        {
350
0
          unsigned int sv = *(src++);
351
0
          Transcoder::encode(sv, out);
352
0
        }
353
354
0
        in.position(in.limit());
355
0
      }
356
357
0
      return APR_SUCCESS;
358
0
    }
359
360
361
362
  private:
363
    ISOLatinCharsetDecoder(const ISOLatinCharsetDecoder&);
364
    ISOLatinCharsetDecoder& operator=(const ISOLatinCharsetDecoder&);
365
};
366
367
368
/**
369
*    Converts from US-ASCII to LogString.
370
*
371
*/
372
class USASCIICharsetDecoder : public CharsetDecoder
373
{
374
  public:
375
    USASCIICharsetDecoder()
376
0
    {
377
0
    }
378
379
    virtual ~USASCIICharsetDecoder()
380
0
    {
381
0
    }
382
383
  private:
384
385
    virtual log4cxx_status_t decode(ByteBuffer& in,
386
      LogString& out)
387
0
    {
388
0
      log4cxx_status_t stat = APR_SUCCESS;
389
390
0
      if (in.remaining() > 0)
391
0
      {
392
393
0
        const unsigned char* src = (unsigned char*) in.current();
394
0
        const unsigned char* srcEnd = src + in.remaining();
395
396
0
        while (src < srcEnd)
397
0
        {
398
0
          unsigned char sv = *src;
399
400
0
          if (sv < 0x80)
401
0
          {
402
0
            src++;
403
0
            Transcoder::encode(sv, out);
404
0
          }
405
0
          else
406
0
          {
407
0
            stat = APR_BADARG;
408
0
            break;
409
0
          }
410
0
        }
411
412
0
        in.position(src - (const unsigned char*) in.data());
413
0
      }
414
415
0
      return stat;
416
0
    }
417
418
419
420
  private:
421
    USASCIICharsetDecoder(const USASCIICharsetDecoder&);
422
    USASCIICharsetDecoder& operator=(const USASCIICharsetDecoder&);
423
};
424
425
/**
426
 *    Charset decoder that uses current locale settings.
427
 */
428
class LocaleCharsetDecoder : public CharsetDecoder
429
{
430
  public:
431
0
    LocaleCharsetDecoder() : state()
432
0
    {
433
0
    }
434
    log4cxx_status_t decode(ByteBuffer& in, LogString& out) override
435
0
    {
436
0
      log4cxx_status_t result = APR_SUCCESS;
437
0
      const char* p = in.current();
438
0
      size_t i = in.position();
439
0
      size_t remain = in.limit() - i;
440
0
#if !LOG4CXX_CHARSET_EBCDIC
441
0
      if (std::mbsinit(&this->state)) // ByteBuffer not partially decoded?
442
0
      {
443
        // Copy single byte characters
444
0
        for (; 0 < remain && ((unsigned int) *p) < 0x80; --remain, ++i, p++)
445
0
        {
446
0
          out.append(1, *p);
447
0
        }
448
0
      }
449
0
#endif
450
      // Decode characters that may be represented by multiple bytes
451
0
      while (0 < remain)
452
0
      {
453
0
        wchar_t ch = 0;
454
0
        size_t n = std::mbrtowc(&ch, p, remain, &this->state);
455
0
        if (0 == n) // NULL encountered?
456
0
        {
457
0
          ++i;
458
0
          break;
459
0
        }
460
0
        if (static_cast<std::size_t>(-1) == n) // decoding error?
461
0
        {
462
0
          result = APR_BADARG;
463
0
          break;
464
0
        }
465
0
        if (static_cast<std::size_t>(-2) == n) // incomplete sequence?
466
0
        {
467
0
          break;
468
0
        }
469
0
        Transcoder::encode(static_cast<unsigned int>(ch), out);
470
0
        remain -= n;
471
0
        i += n;
472
0
        p += n;
473
0
      }
474
0
      in.position(i);
475
0
      return result;
476
0
    }
477
478
  private:
479
    std::mbstate_t state;
480
};
481
482
483
484
} // namespace helpers
485
486
}  //namespace log4cxx
487
488
489
CharsetDecoder::CharsetDecoder()
490
0
{
491
0
}
492
493
494
CharsetDecoder::~CharsetDecoder()
495
0
{
496
0
}
497
498
CharsetDecoder* CharsetDecoder::createDefaultDecoder()
499
0
{
500
0
#if LOG4CXX_CHARSET_UTF8
501
0
  return new UTF8CharsetDecoder();
502
#elif LOG4CXX_CHARSET_ISO88591 || defined(_WIN32_WCE)
503
  return new ISOLatinCharsetDecoder();
504
#elif LOG4CXX_CHARSET_USASCII
505
  return new USASCIICharsetDecoder();
506
#elif LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
507
  return new MbstowcsCharsetDecoder();
508
#else
509
  return new LocaleCharsetDecoder();
510
#endif
511
0
}
512
513
CharsetDecoderPtr CharsetDecoder::getDefaultDecoder()
514
0
{
515
0
  static WideLife<CharsetDecoderPtr> decoder(createDefaultDecoder());
516
517
  //
518
  //  if invoked after static variable destruction
519
  //     (if logging is called in the destructor of a static object)
520
  //     then create a new decoder.
521
  //
522
0
  if (decoder.value() == 0)
523
0
  {
524
0
    return CharsetDecoderPtr( createDefaultDecoder() );
525
0
  }
526
527
0
  return decoder;
528
0
}
529
530
CharsetDecoderPtr CharsetDecoder::getUTF8Decoder()
531
0
{
532
0
  static WideLife<CharsetDecoderPtr> decoder(new UTF8CharsetDecoder());
533
534
  //
535
  //  if invoked after static variable destruction
536
  //     (if logging is called in the destructor of a static object)
537
  //     then create a new decoder.
538
  //
539
0
  if (decoder.value() == 0)
540
0
  {
541
0
    return std::make_shared<UTF8CharsetDecoder>();
542
0
  }
543
544
0
  return decoder;
545
0
}
546
547
CharsetDecoderPtr CharsetDecoder::getISOLatinDecoder()
548
0
{
549
0
  return std::make_shared<ISOLatinCharsetDecoder>();
550
0
}
551
552
553
CharsetDecoderPtr CharsetDecoder::getDecoder(const LogString& charset)
554
0
{
555
0
  if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-8"), LOG4CXX_STR("utf-8")) ||
556
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF8"), LOG4CXX_STR("utf8")) ||
557
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP65001"), LOG4CXX_STR("cp65001")))
558
0
  {
559
0
    return std::make_shared<UTF8CharsetDecoder>();
560
0
  }
561
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) ||
562
0
    charset == LOG4CXX_STR("646") ||
563
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("US-ASCII"), LOG4CXX_STR("us-ascii")) ||
564
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO646-US"), LOG4CXX_STR("iso646-US")) ||
565
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ANSI_X3.4-1968"), LOG4CXX_STR("ansi_x3.4-1968")) ||
566
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP20127"), LOG4CXX_STR("cp20127")))
567
0
  {
568
0
    return std::make_shared<USASCIICharsetDecoder>();
569
0
  }
570
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-8859-1"), LOG4CXX_STR("iso-8859-1")) ||
571
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-LATIN-1"), LOG4CXX_STR("iso-latin-1")) ||
572
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP1252"), LOG4CXX_STR("cp1252")))
573
0
  {
574
0
    return std::make_shared<ISOLatinCharsetDecoder>();
575
0
  }
576
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("LOCALE"), LOG4CXX_STR("locale")))
577
0
  {
578
0
    return std::make_shared<LocaleCharsetDecoder>();
579
0
  }
580
581
0
#if APR_HAS_XLATE
582
0
  return std::make_shared<APRCharsetDecoder>(charset);
583
#else
584
  throw IllegalArgumentException(charset);
585
#endif
586
0
}
587
588
589
590
591
592