Coverage Report

Created: 2025-07-01 06:08

/src/logging-log4cxx/src/main/cpp/charsetencoder.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
#include <log4cxx/logstring.h>
18
#include <log4cxx/helpers/charsetencoder.h>
19
#include <log4cxx/helpers/bytebuffer.h>
20
#include <log4cxx/helpers/exception.h>
21
#include <apr_xlate.h>
22
#include <log4cxx/helpers/stringhelper.h>
23
#include <log4cxx/helpers/transcoder.h>
24
#include <algorithm>
25
26
#if !defined(LOG4CXX)
27
  #define LOG4CXX 1
28
#endif
29
30
#include <log4cxx/private/log4cxx_private.h>
31
#include <apr_portable.h>
32
#include <mutex>
33
34
#ifdef LOG4CXX_HAS_WCSTOMBS
35
  #include <stdlib.h>
36
#endif
37
38
using namespace LOG4CXX_NS;
39
using namespace LOG4CXX_NS::helpers;
40
41
IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder)
42
43
namespace LOG4CXX_NS
44
{
45
46
namespace helpers
47
{
48
49
#if APR_HAS_XLATE
50
/**
51
* A character encoder implemented using apr_xlate.
52
*/
53
class APRCharsetEncoder : public CharsetEncoder
54
{
55
  public:
56
0
    APRCharsetEncoder(const LogString& topage) : pool()
57
0
    {
58
#if LOG4CXX_LOGCHAR_IS_WCHAR
59
      const char* frompage = "WCHAR_T";
60
#endif
61
0
#if LOG4CXX_LOGCHAR_IS_UTF8
62
0
      const char* frompage = "UTF-8";
63
0
#endif
64
#if LOG4CXX_LOGCHAR_IS_UNICHAR
65
      const char* frompage = "UTF-16";
66
#endif
67
0
      std::string tpage(Transcoder::encodeCharsetName(topage));
68
0
      apr_status_t stat = apr_xlate_open(&convset,
69
0
          tpage.c_str(),
70
0
          frompage,
71
0
          pool.getAPRPool());
72
73
0
      if (stat != APR_SUCCESS)
74
0
      {
75
0
        throw IllegalArgumentException(topage);
76
0
      }
77
0
    }
78
79
    virtual ~APRCharsetEncoder()
80
0
    {
81
0
    }
82
83
    virtual log4cxx_status_t encode(const LogString& in,
84
      LogString::const_iterator& iter,
85
      ByteBuffer& out)
86
0
    {
87
0
      apr_status_t stat;
88
0
      size_t outbytes_left = out.remaining();
89
0
      size_t initial_outbytes_left = outbytes_left;
90
0
      size_t position = out.position();
91
92
0
      if (iter == in.end())
93
0
      {
94
0
        std::lock_guard<std::mutex> lock(mutex);
95
0
        stat = apr_xlate_conv_buffer(convset, NULL, NULL,
96
0
            out.data() + position, &outbytes_left);
97
0
      }
98
0
      else
99
0
      {
100
0
        LogString::size_type inOffset = (iter - in.begin());
101
0
        apr_size_t inbytes_left =
102
0
          (in.size() - inOffset) * sizeof(LogString::value_type);
103
0
        apr_size_t initial_inbytes_left = inbytes_left;
104
0
        {
105
0
          std::lock_guard<std::mutex> lock(mutex);
106
0
          stat = apr_xlate_conv_buffer(convset,
107
0
              (const char*) (in.data() + inOffset),
108
0
              &inbytes_left,
109
0
              out.data() + position,
110
0
              &outbytes_left);
111
0
        }
112
0
        iter += ((initial_inbytes_left - inbytes_left) / sizeof(LogString::value_type));
113
0
      }
114
115
0
      out.position(out.position() + (initial_outbytes_left - outbytes_left));
116
0
      return stat;
117
0
    }
118
119
  private:
120
    APRCharsetEncoder(const APRCharsetEncoder&);
121
    APRCharsetEncoder& operator=(const APRCharsetEncoder&);
122
    Pool pool;
123
    std::mutex mutex;
124
    apr_xlate_t* convset;
125
};
126
#endif
127
128
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_WCSTOMBS
129
/**
130
 *  A character encoder implemented using wcstombs.
131
*/
132
class WcstombsCharsetEncoder : public CharsetEncoder
133
{
134
  public:
135
    WcstombsCharsetEncoder()
136
    {
137
    }
138
139
    /**
140
     *   Converts a wchar_t to the default external multibyte encoding.
141
     */
142
    log4cxx_status_t encode(const LogString& in,
143
      LogString::const_iterator& iter,
144
      ByteBuffer& out)
145
    {
146
      log4cxx_status_t stat = APR_SUCCESS;
147
148
      if (iter != in.end())
149
      {
150
        size_t outbytes_left = out.remaining();
151
        size_t position = out.position();
152
        std::wstring::size_type inOffset = (iter - in.begin());
153
        enum { BUFSIZE = 256 };
154
        wchar_t buf[BUFSIZE];
155
        size_t chunkSize = BUFSIZE - 1;
156
157
        if (chunkSize * MB_LEN_MAX > outbytes_left)
158
        {
159
          chunkSize = outbytes_left / MB_LEN_MAX;
160
        }
161
162
        if (chunkSize > in.length() - inOffset)
163
        {
164
          chunkSize = in.length() - inOffset;
165
        }
166
167
        memset(buf, 0, BUFSIZE * sizeof(wchar_t));
168
        memcpy(buf,
169
          in.data() + inOffset,
170
          chunkSize * sizeof(wchar_t));
171
        size_t converted = wcstombs(out.data() + position, buf, outbytes_left);
172
173
        if (converted == (size_t) -1)
174
        {
175
          stat = APR_BADARG;
176
177
          //
178
          //   if unconvertable character was encountered
179
          //       repeatedly halve source to get fragment that
180
          //       can be converted
181
          for (chunkSize /= 2;
182
            chunkSize > 0;
183
            chunkSize /= 2)
184
          {
185
            buf[chunkSize] = 0;
186
            converted = wcstombs(out.data() + position, buf, outbytes_left);
187
188
            if (converted != (size_t) -1)
189
            {
190
              iter += chunkSize;
191
              out.position(out.position() + converted);
192
              break;
193
            }
194
          }
195
        }
196
        else
197
        {
198
          iter += chunkSize;
199
          out.position(out.position() + converted);
200
        }
201
      }
202
203
      return stat;
204
    }
205
206
207
208
  private:
209
    WcstombsCharsetEncoder(const WcstombsCharsetEncoder&);
210
    WcstombsCharsetEncoder& operator=(const WcstombsCharsetEncoder&);
211
};
212
#endif
213
214
215
/**
216
*   Encodes a LogString to US-ASCII.
217
*/
218
class USASCIICharsetEncoder : public CharsetEncoder
219
{
220
  public:
221
    USASCIICharsetEncoder()
222
0
    {
223
0
    }
224
225
    virtual log4cxx_status_t encode(const LogString& in,
226
      LogString::const_iterator& iter,
227
      ByteBuffer& out)
228
0
    {
229
0
      log4cxx_status_t stat = APR_SUCCESS;
230
231
0
      if (iter != in.end())
232
0
      {
233
0
        while (out.remaining() > 0 && iter != in.end())
234
0
        {
235
0
          LogString::const_iterator prev(iter);
236
0
          unsigned int sv = Transcoder::decode(in, iter);
237
238
0
          if (sv <= 0x7F)
239
0
          {
240
0
            out.put((char) sv);
241
0
          }
242
0
          else
243
0
          {
244
0
            iter = prev;
245
0
            stat = APR_BADARG;
246
0
            break;
247
0
          }
248
0
        }
249
0
      }
250
251
0
      return stat;
252
0
    }
253
254
  private:
255
    USASCIICharsetEncoder(const USASCIICharsetEncoder&);
256
    USASCIICharsetEncoder& operator=(const USASCIICharsetEncoder&);
257
};
258
259
/**
260
*   Converts a LogString to ISO-8859-1.
261
*/
262
class ISOLatinCharsetEncoder : public CharsetEncoder
263
{
264
  public:
265
    ISOLatinCharsetEncoder()
266
0
    {
267
0
    }
268
269
    virtual log4cxx_status_t encode(const LogString& in,
270
      LogString::const_iterator& iter,
271
      ByteBuffer& out)
272
0
    {
273
0
      log4cxx_status_t stat = APR_SUCCESS;
274
275
0
      if (iter != in.end())
276
0
      {
277
0
        while (out.remaining() > 0 && iter != in.end())
278
0
        {
279
0
          LogString::const_iterator prev(iter);
280
0
          unsigned int sv = Transcoder::decode(in, iter);
281
282
0
          if (sv <= 0xFF)
283
0
          {
284
0
            out.put((char) sv);
285
0
          }
286
0
          else
287
0
          {
288
0
            iter = prev;
289
0
            stat = APR_BADARG;
290
0
            break;
291
0
          }
292
0
        }
293
0
      }
294
295
0
      return stat;
296
0
    }
297
298
  private:
299
    ISOLatinCharsetEncoder(const ISOLatinCharsetEncoder&);
300
    ISOLatinCharsetEncoder& operator=(const ISOLatinCharsetEncoder&);
301
};
302
303
/**
304
*   Encodes a LogString to a byte array when the encodings are identical.
305
*/
306
class TrivialCharsetEncoder : public CharsetEncoder
307
{
308
  public:
309
    TrivialCharsetEncoder()
310
0
    {
311
0
    }
312
313
314
    virtual log4cxx_status_t encode(const LogString& in,
315
      LogString::const_iterator& iter,
316
      ByteBuffer& out)
317
0
    {
318
0
      if (iter != in.end())
319
0
      {
320
0
        size_t requested = in.length() - (iter - in.begin());
321
322
0
        if (requested > out.remaining() / sizeof(logchar))
323
0
        {
324
0
          requested = out.remaining() / sizeof(logchar);
325
0
        }
326
327
0
        memcpy(out.current(),
328
0
          (const char*) in.data() + (iter - in.begin()),
329
0
          requested * sizeof(logchar));
330
0
        iter += requested;
331
0
        out.position(out.position() + requested * sizeof(logchar));
332
0
      }
333
334
0
      return APR_SUCCESS;
335
0
    }
336
337
  private:
338
    TrivialCharsetEncoder(const TrivialCharsetEncoder&);
339
    TrivialCharsetEncoder& operator=(const TrivialCharsetEncoder&);
340
};
341
342
#if LOG4CXX_LOGCHAR_IS_UTF8
343
typedef TrivialCharsetEncoder UTF8CharsetEncoder;
344
#else
345
/**
346
 *  Converts a LogString to UTF-8.
347
 */
348
class UTF8CharsetEncoder : public CharsetEncoder
349
{
350
  public:
351
    UTF8CharsetEncoder()
352
    {
353
    }
354
355
    virtual log4cxx_status_t encode(const LogString& in,
356
      LogString::const_iterator& iter,
357
      ByteBuffer& out)
358
    {
359
      while (iter != in.end() && out.remaining() >= 8)
360
      {
361
        unsigned int sv = Transcoder::decode(in, iter);
362
363
        if (sv == 0xFFFF)
364
        {
365
          return APR_BADARG;
366
        }
367
368
        Transcoder::encodeUTF8(sv, out);
369
      }
370
371
      return APR_SUCCESS;
372
    }
373
374
  private:
375
    UTF8CharsetEncoder(const UTF8CharsetEncoder&);
376
    UTF8CharsetEncoder& operator=(const UTF8CharsetEncoder&);
377
};
378
#endif
379
380
/**
381
 *   Encodes a LogString to UTF16-BE.
382
 */
383
class UTF16BECharsetEncoder : public CharsetEncoder
384
{
385
  public:
386
    UTF16BECharsetEncoder()
387
0
    {
388
0
    }
389
390
    virtual log4cxx_status_t encode(const LogString& in,
391
      LogString::const_iterator& iter,
392
      ByteBuffer& out)
393
0
    {
394
0
      while (iter != in.end() && out.remaining() >= 4)
395
0
      {
396
0
        unsigned int sv = Transcoder::decode(in, iter);
397
398
0
        if (sv == 0xFFFF)
399
0
        {
400
0
          return APR_BADARG;
401
0
        }
402
403
0
        Transcoder::encodeUTF16BE(sv, out);
404
0
      }
405
406
0
      return APR_SUCCESS;
407
0
    }
408
409
  private:
410
    UTF16BECharsetEncoder(const UTF16BECharsetEncoder&);
411
    UTF16BECharsetEncoder& operator=(const UTF16BECharsetEncoder&);
412
};
413
414
/**
415
 *   Encodes a LogString to UTF16-LE.
416
 */
417
class UTF16LECharsetEncoder : public CharsetEncoder
418
{
419
  public:
420
    UTF16LECharsetEncoder()
421
0
    {
422
0
    }
423
424
425
    virtual log4cxx_status_t encode(const LogString& in,
426
      LogString::const_iterator& iter,
427
      ByteBuffer& out)
428
0
    {
429
0
      while (iter != in.end() && out.remaining() >= 4)
430
0
      {
431
0
        unsigned int sv = Transcoder::decode(in, iter);
432
433
0
        if (sv == 0xFFFF)
434
0
        {
435
0
          return APR_BADARG;
436
0
        }
437
438
0
        Transcoder::encodeUTF16LE(sv, out);
439
0
      }
440
441
0
      return APR_SUCCESS;
442
0
    }
443
  private:
444
    UTF16LECharsetEncoder(const UTF16LECharsetEncoder&);
445
    UTF16LECharsetEncoder& operator=(const UTF16LECharsetEncoder&);
446
};
447
448
/**
449
 *    Charset encoder that uses current locale settings.
450
 */
451
class LocaleCharsetEncoder : public CharsetEncoder
452
{
453
  public:
454
0
    LocaleCharsetEncoder() : state()
455
0
    {
456
0
    }
457
    log4cxx_status_t encode
458
      ( const LogString&           in
459
      , LogString::const_iterator& iter
460
      , ByteBuffer&                out
461
      ) override
462
0
    {
463
0
      log4cxx_status_t result = APR_SUCCESS;
464
0
#if !LOG4CXX_CHARSET_EBCDIC
465
0
      char* current = out.current();
466
0
      size_t remain = out.remaining();
467
0
      if (std::mbsinit(&this->state)) // ByteBuffer not partially encoded?
468
0
      {
469
        // Copy single byte characters
470
0
        for (;
471
0
          iter != in.end() && ((unsigned int) *iter) < 0x80 && 0 < remain;
472
0
          iter++, remain--, current++)
473
0
        {
474
0
          *current = *iter;
475
0
        }
476
0
      }
477
0
#endif
478
      // Encode characters that may require multiple bytes
479
0
      while (iter != in.end() && MB_CUR_MAX <= remain)
480
0
      {
481
0
        auto ch = Transcoder::decode(in, iter);
482
0
        auto n = std::wcrtomb(current, ch, &this->state);
483
0
        if (static_cast<std::size_t>(-1) == n) // not a valid wide character?
484
0
        {
485
0
          result = APR_BADARG;
486
0
          break;
487
0
        }
488
0
        remain -= n;
489
0
        current += n;
490
0
      }
491
0
      out.position(current - out.data());
492
0
      return result;
493
0
    }
494
495
  private:
496
    std::mbstate_t state;
497
};
498
499
500
} // namespace helpers
501
502
}  //namespace log4cxx
503
504
505
506
CharsetEncoder::CharsetEncoder()
507
0
{
508
0
}
509
510
CharsetEncoder::~CharsetEncoder()
511
0
{
512
0
}
513
514
CharsetEncoderPtr CharsetEncoder::getDefaultEncoder()
515
0
{
516
0
  static WideLife<CharsetEncoderPtr> encoder(createDefaultEncoder());
517
518
  //
519
  //  if invoked after static variable destruction
520
  //     (if logging is called in the destructor of a static object)
521
  //     then create a new decoder.
522
  //
523
0
  if (encoder.value() == 0)
524
0
  {
525
0
    return CharsetEncoderPtr( createDefaultEncoder() );
526
0
  }
527
528
0
  return encoder;
529
0
}
530
531
CharsetEncoder* CharsetEncoder::createDefaultEncoder()
532
0
{
533
0
#if LOG4CXX_CHARSET_UTF8
534
0
  return new UTF8CharsetEncoder();
535
#elif LOG4CXX_CHARSET_ISO88591
536
  return new ISOLatinCharsetEncoder();
537
#elif LOG4CXX_CHARSET_USASCII
538
  return new USASCIICharsetEncoder();
539
#elif LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_WCSTOMBS
540
  return new WcstombsCharsetEncoder();
541
#else
542
  return new LocaleCharsetEncoder();
543
#endif
544
0
}
545
546
547
CharsetEncoderPtr CharsetEncoder::getUTF8Encoder()
548
0
{
549
0
  return std::make_shared<UTF8CharsetEncoder>();
550
0
}
551
552
553
554
CharsetEncoderPtr CharsetEncoder::getEncoder(const LogString& charset)
555
0
{
556
0
  if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-8"), LOG4CXX_STR("utf-8"))
557
0
    || StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP65001"), LOG4CXX_STR("cp65001")))
558
0
  {
559
0
    return std::make_shared<UTF8CharsetEncoder>();
560
0
  }
561
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) ||
562
0
    charset == LOG4CXX_STR("646") ||
563
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("US-ASCII"), LOG4CXX_STR("us-ascii")) ||
564
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO646-US"), LOG4CXX_STR("iso646-US")) ||
565
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ANSI_X3.4-1968"), LOG4CXX_STR("ansi_x3.4-1968")) ||
566
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP20127"), LOG4CXX_STR("cp20127")))
567
0
  {
568
0
    return std::make_shared<USASCIICharsetEncoder>();
569
0
  }
570
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-8859-1"), LOG4CXX_STR("iso-8859-1")) ||
571
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-LATIN-1"), LOG4CXX_STR("iso-latin-1")) ||
572
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP1252"), LOG4CXX_STR("cp1252")))
573
0
  {
574
0
    return std::make_shared<ISOLatinCharsetEncoder>();
575
0
  }
576
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16BE"), LOG4CXX_STR("utf-16be"))
577
0
    || StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16"), LOG4CXX_STR("utf-16"))
578
0
    || StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP1200"), LOG4CXX_STR("cp1200")))
579
0
  {
580
0
    return std::make_shared<UTF16BECharsetEncoder>();
581
0
  }
582
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16LE"), LOG4CXX_STR("utf-16le")))
583
0
  {
584
0
    return std::make_shared<UTF16LECharsetEncoder>();
585
0
  }
586
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("LOCALE"), LOG4CXX_STR("locale")))
587
0
  {
588
0
    return std::make_shared<LocaleCharsetEncoder>();
589
0
  }
590
591
0
#if APR_HAS_XLATE
592
0
  return std::make_shared<APRCharsetEncoder>(charset);
593
#else
594
  throw IllegalArgumentException(charset);
595
#endif
596
0
}
597
598
599
void CharsetEncoder::reset()
600
0
{
601
0
}
602
603
void CharsetEncoder::flush(ByteBuffer& /* out */ )
604
0
{
605
0
}
606
607
608
void CharsetEncoder::encode(CharsetEncoderPtr& enc,
609
  const LogString& src,
610
  LogString::const_iterator& iter,
611
  ByteBuffer& dst)
612
0
{
613
0
  log4cxx_status_t stat = enc->encode(src, iter, dst);
614
615
0
  if (stat != APR_SUCCESS && iter != src.end())
616
0
  {
617
#if LOG4CXX_LOGCHAR_IS_WCHAR || LOG4CXX_LOGCHAR_IS_UNICHAR
618
    iter++;
619
#elif LOG4CXX_LOGCHAR_IS_UTF8
620
621
    //  advance past this character and all continuation characters
622
0
    while ((*(++iter) & 0xC0) == 0x80);
623
624
#else
625
#error logchar is unrecognized
626
#endif
627
0
    dst.put(Transcoder::LOSSCHAR);
628
0
  }
629
0
}
630
631
bool CharsetEncoder::isTriviallyCopyable(const LogString& src, const CharsetEncoderPtr& enc)
632
0
{
633
0
  bool result;
634
0
#if !LOG4CXX_CHARSET_EBCDIC
635
0
  if (dynamic_cast<LocaleCharsetEncoder*>(enc.get()))
636
0
  {
637
0
    result = src.end() == std::find_if(src.begin(), src.end()
638
0
      , [](const logchar& ch) -> bool { return 0x80 <= (unsigned int)ch; });
639
0
  }
640
0
  else
641
0
#endif
642
0
    result = !!dynamic_cast<TrivialCharsetEncoder*>(enc.get());
643
0
  return result;
644
0
}