Coverage Report

Created: 2026-04-12 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/logging-log4cxx/src/main/cpp/charsetencoder.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
#include <log4cxx/logstring.h>
18
#include <log4cxx/helpers/charsetencoder.h>
19
#include <log4cxx/helpers/bytebuffer.h>
20
#include <log4cxx/helpers/exception.h>
21
#include <apr_xlate.h>
22
#include <log4cxx/helpers/stringhelper.h>
23
#include <log4cxx/helpers/transcoder.h>
24
#include <algorithm>
25
26
#if !defined(LOG4CXX)
27
  #define LOG4CXX 1
28
#endif
29
30
#include <log4cxx/private/log4cxx_private.h>
31
#include <apr_portable.h>
32
#include <mutex>
33
34
#ifdef LOG4CXX_HAS_WCSTOMBS
35
  #include <stdlib.h>
36
#endif
37
38
using namespace LOG4CXX_NS;
39
using namespace LOG4CXX_NS::helpers;
40
41
IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder)
42
43
namespace LOG4CXX_NS
44
{
45
46
namespace helpers
47
{
48
49
#if APR_HAS_XLATE
50
/**
51
* A character encoder implemented using apr_xlate.
52
*/
53
class APRCharsetEncoder : public CharsetEncoder
54
{
55
  public:
56
0
    APRCharsetEncoder(const LogString& topage) : pool()
57
0
    {
58
0
#if LOG4CXX_LOGCHAR_IS_WCHAR
59
0
      const char* frompage = "WCHAR_T";
60
0
#endif
61
#if LOG4CXX_LOGCHAR_IS_UTF8
62
      const char* frompage = "UTF-8";
63
#endif
64
#if LOG4CXX_LOGCHAR_IS_UNICHAR
65
      const char* frompage = "UTF-16";
66
#endif
67
0
      std::string tpage(Transcoder::encodeCharsetName(topage));
68
0
      apr_status_t stat = apr_xlate_open(&convset,
69
0
          tpage.c_str(),
70
0
          frompage,
71
0
          pool.getAPRPool());
72
73
0
      if (stat != APR_SUCCESS)
74
0
      {
75
0
        throw IllegalArgumentException(topage);
76
0
      }
77
0
    }
78
79
    virtual ~APRCharsetEncoder()
80
0
    {
81
0
    }
82
83
    virtual log4cxx_status_t encode(const LogString& in,
84
      LogString::const_iterator& iter,
85
      ByteBuffer& out)
86
0
    {
87
0
      apr_status_t stat;
88
0
      size_t outbytes_left = out.remaining();
89
0
      size_t initial_outbytes_left = outbytes_left;
90
0
      size_t position = out.position();
91
92
0
      if (iter == in.end())
93
0
      {
94
0
        std::lock_guard<std::mutex> lock(mutex);
95
0
        stat = apr_xlate_conv_buffer(convset, NULL, NULL,
96
0
            out.data() + position, &outbytes_left);
97
0
      }
98
0
      else
99
0
      {
100
0
        LogString::size_type inOffset = (iter - in.begin());
101
0
        apr_size_t inbytes_left =
102
0
          (in.size() - inOffset) * sizeof(LogString::value_type);
103
0
        apr_size_t initial_inbytes_left = inbytes_left;
104
0
        {
105
0
          std::lock_guard<std::mutex> lock(mutex);
106
0
          stat = apr_xlate_conv_buffer(convset,
107
0
              (const char*) (in.data() + inOffset),
108
0
              &inbytes_left,
109
0
              out.data() + position,
110
0
              &outbytes_left);
111
0
        }
112
0
        iter += ((initial_inbytes_left - inbytes_left) / sizeof(LogString::value_type));
113
0
      }
114
115
0
      out.increment_position((initial_outbytes_left - outbytes_left));
116
0
      return stat;
117
0
    }
118
119
  private:
120
    APRCharsetEncoder(const APRCharsetEncoder&);
121
    APRCharsetEncoder& operator=(const APRCharsetEncoder&);
122
    Pool pool;
123
    std::mutex mutex;
124
    apr_xlate_t* convset;
125
};
126
#endif
127
128
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_WCSTOMBS
129
/**
130
 *  A character encoder implemented using wcstombs.
131
*/
132
class WcstombsCharsetEncoder : public CharsetEncoder
133
{
134
  public:
135
    WcstombsCharsetEncoder()
136
0
    {
137
0
    }
138
139
    /**
140
     *   Converts a wchar_t to the default external multibyte encoding.
141
     */
142
    log4cxx_status_t encode(const LogString& in,
143
      LogString::const_iterator& iter,
144
      ByteBuffer& out)
145
0
    {
146
0
      log4cxx_status_t stat = APR_SUCCESS;
147
0
148
0
      if (iter != in.end())
149
0
      {
150
0
        size_t outbytes_left = out.remaining();
151
0
        size_t position = out.position();
152
0
        std::wstring::size_type inOffset = (iter - in.begin());
153
0
        enum { BUFSIZE = 256 };
154
0
        wchar_t buf[BUFSIZE];
155
0
        size_t chunkSize = BUFSIZE - 1;
156
0
157
0
        if (chunkSize * MB_LEN_MAX > outbytes_left)
158
0
        {
159
0
          chunkSize = outbytes_left / MB_LEN_MAX;
160
0
        }
161
0
162
0
        if (chunkSize > in.length() - inOffset)
163
0
        {
164
0
          chunkSize = in.length() - inOffset;
165
0
        }
166
0
167
0
        memset(buf, 0, BUFSIZE * sizeof(wchar_t));
168
0
        memcpy(buf,
169
0
          in.data() + inOffset,
170
0
          chunkSize * sizeof(wchar_t));
171
0
        size_t converted = wcstombs(out.data() + position, buf, outbytes_left);
172
0
173
0
        if (converted == (size_t) -1)
174
0
        {
175
0
          stat = APR_BADARG;
176
0
177
0
          //
178
0
          //   if unconvertable character was encountered
179
0
          //       repeatedly halve source to get fragment that
180
0
          //       can be converted
181
0
          for (chunkSize /= 2;
182
0
            chunkSize > 0;
183
0
            chunkSize /= 2)
184
0
          {
185
0
            buf[chunkSize] = 0;
186
0
            converted = wcstombs(out.data() + position, buf, outbytes_left);
187
0
188
0
            if (converted != (size_t) -1)
189
0
            {
190
0
              iter += chunkSize;
191
0
              out.increment_position(converted);
192
0
              break;
193
0
            }
194
0
          }
195
0
        }
196
0
        else
197
0
        {
198
0
          iter += chunkSize;
199
0
          out.increment_position(converted);
200
0
        }
201
0
      }
202
0
203
0
      return stat;
204
0
    }
205
206
207
208
  private:
209
    WcstombsCharsetEncoder(const WcstombsCharsetEncoder&);
210
    WcstombsCharsetEncoder& operator=(const WcstombsCharsetEncoder&);
211
};
212
#endif
213
214
215
/**
216
*   Encodes a LogString to US-ASCII.
217
*/
218
class USASCIICharsetEncoder : public CharsetEncoder
219
{
220
  public:
221
    USASCIICharsetEncoder()
222
0
    {
223
0
    }
224
225
    virtual log4cxx_status_t encode(const LogString& in,
226
      LogString::const_iterator& iter,
227
      ByteBuffer& out)
228
0
    {
229
0
      log4cxx_status_t stat = APR_SUCCESS;
230
231
0
      if (iter != in.end())
232
0
      {
233
0
        while (out.remaining() > 0 && iter != in.end())
234
0
        {
235
0
          LogString::const_iterator prev(iter);
236
0
          unsigned int sv = Transcoder::decode(in, iter);
237
238
0
          if (sv <= 0x7F)
239
0
          {
240
0
            out.put((char) sv);
241
0
          }
242
0
          else
243
0
          {
244
0
            iter = prev;
245
0
            stat = APR_BADARG;
246
0
            break;
247
0
          }
248
0
        }
249
0
      }
250
251
0
      return stat;
252
0
    }
253
254
  private:
255
    USASCIICharsetEncoder(const USASCIICharsetEncoder&);
256
    USASCIICharsetEncoder& operator=(const USASCIICharsetEncoder&);
257
};
258
259
/**
260
*   Converts a LogString to ISO-8859-1.
261
*/
262
class ISOLatinCharsetEncoder : public CharsetEncoder
263
{
264
  public:
265
    ISOLatinCharsetEncoder()
266
0
    {
267
0
    }
268
269
    virtual log4cxx_status_t encode(const LogString& in,
270
      LogString::const_iterator& iter,
271
      ByteBuffer& out)
272
0
    {
273
0
      log4cxx_status_t stat = APR_SUCCESS;
274
275
0
      if (iter != in.end())
276
0
      {
277
0
        while (out.remaining() > 0 && iter != in.end())
278
0
        {
279
0
          LogString::const_iterator prev(iter);
280
0
          unsigned int sv = Transcoder::decode(in, iter);
281
282
0
          if (sv <= 0xFF)
283
0
          {
284
0
            out.put((char) sv);
285
0
          }
286
0
          else
287
0
          {
288
0
            iter = prev;
289
0
            stat = APR_BADARG;
290
0
            break;
291
0
          }
292
0
        }
293
0
      }
294
295
0
      return stat;
296
0
    }
297
298
  private:
299
    ISOLatinCharsetEncoder(const ISOLatinCharsetEncoder&);
300
    ISOLatinCharsetEncoder& operator=(const ISOLatinCharsetEncoder&);
301
};
302
303
/**
304
*   Encodes a LogString to a byte array when the encodings are identical.
305
*/
306
class TrivialCharsetEncoder : public CharsetEncoder
307
{
308
  public:
309
    TrivialCharsetEncoder()
310
0
    {
311
0
    }
312
313
314
    virtual log4cxx_status_t encode(const LogString& in,
315
      LogString::const_iterator& iter,
316
      ByteBuffer& out)
317
0
    {
318
0
      if (iter != in.end())
319
0
      {
320
0
        size_t requested = in.length() - (iter - in.begin());
321
0
322
0
        if (requested > out.remaining() / sizeof(logchar))
323
0
        {
324
0
          requested = out.remaining() / sizeof(logchar);
325
0
        }
326
0
327
0
        memcpy(out.current(),
328
0
          (const char*) in.data() + (iter - in.begin()),
329
0
          requested * sizeof(logchar));
330
0
        iter += requested;
331
0
        out.increment_position(requested * sizeof(logchar));
332
0
      }
333
0
334
0
      return APR_SUCCESS;
335
0
    }
336
337
  private:
338
    TrivialCharsetEncoder(const TrivialCharsetEncoder&);
339
    TrivialCharsetEncoder& operator=(const TrivialCharsetEncoder&);
340
};
341
342
#if LOG4CXX_LOGCHAR_IS_UTF8
343
typedef TrivialCharsetEncoder UTF8CharsetEncoder;
344
#else
345
/**
346
 *  Converts a LogString to UTF-8.
347
 */
348
class UTF8CharsetEncoder : public CharsetEncoder
349
{
350
  public:
351
    UTF8CharsetEncoder()
352
1
    {
353
1
    }
354
355
    virtual log4cxx_status_t encode(const LogString& in,
356
      LogString::const_iterator& iter,
357
      ByteBuffer& out)
358
4.30k
    {
359
91.7k
      while (iter != in.end() && out.remaining() >= 8)
360
87.4k
      {
361
87.4k
        unsigned int sv = Transcoder::decode(in, iter);
362
363
87.4k
        if (sv == 0xFFFF)
364
0
        {
365
0
          return APR_BADARG;
366
0
        }
367
368
87.4k
        Transcoder::encodeUTF8(sv, out);
369
87.4k
      }
370
371
4.30k
      return APR_SUCCESS;
372
4.30k
    }
373
374
  private:
375
    UTF8CharsetEncoder(const UTF8CharsetEncoder&);
376
    UTF8CharsetEncoder& operator=(const UTF8CharsetEncoder&);
377
};
378
#endif
379
380
/**
381
 *   Encodes a LogString to UTF16-BE.
382
 */
383
class UTF16BECharsetEncoder : public CharsetEncoder
384
{
385
  public:
386
    UTF16BECharsetEncoder()
387
0
    {
388
0
    }
389
390
    virtual log4cxx_status_t encode(const LogString& in,
391
      LogString::const_iterator& iter,
392
      ByteBuffer& out)
393
0
    {
394
0
      while (iter != in.end() && out.remaining() >= 4)
395
0
      {
396
0
        unsigned int sv = Transcoder::decode(in, iter);
397
398
0
        if (sv == 0xFFFF)
399
0
        {
400
0
          return APR_BADARG;
401
0
        }
402
403
0
        Transcoder::encodeUTF16BE(sv, out);
404
0
      }
405
406
0
      return APR_SUCCESS;
407
0
    }
408
409
  private:
410
    UTF16BECharsetEncoder(const UTF16BECharsetEncoder&);
411
    UTF16BECharsetEncoder& operator=(const UTF16BECharsetEncoder&);
412
};
413
414
/**
415
 *   Encodes a LogString to UTF16-LE.
416
 */
417
class UTF16LECharsetEncoder : public CharsetEncoder
418
{
419
  public:
420
    UTF16LECharsetEncoder()
421
0
    {
422
0
    }
423
424
425
    virtual log4cxx_status_t encode(const LogString& in,
426
      LogString::const_iterator& iter,
427
      ByteBuffer& out)
428
0
    {
429
0
      while (iter != in.end() && out.remaining() >= 4)
430
0
      {
431
0
        unsigned int sv = Transcoder::decode(in, iter);
432
433
0
        if (sv == 0xFFFF)
434
0
        {
435
0
          return APR_BADARG;
436
0
        }
437
438
0
        Transcoder::encodeUTF16LE(sv, out);
439
0
      }
440
441
0
      return APR_SUCCESS;
442
0
    }
443
  private:
444
    UTF16LECharsetEncoder(const UTF16LECharsetEncoder&);
445
    UTF16LECharsetEncoder& operator=(const UTF16LECharsetEncoder&);
446
};
447
448
/**
449
 *    Charset encoder that uses current locale settings.
450
 */
451
class LocaleCharsetEncoder : public CharsetEncoder
452
{
453
  public:
454
0
    LocaleCharsetEncoder() : state()
455
0
    {
456
0
    }
457
    log4cxx_status_t encode
458
      ( const LogString&           in
459
      , LogString::const_iterator& nextCodePoint
460
      , ByteBuffer&                out
461
      ) override
462
0
    {
463
0
      log4cxx_status_t result = APR_SUCCESS;
464
0
#if !LOG4CXX_CHARSET_EBCDIC
465
0
      char* current = out.current();
466
0
      size_t availableByteCount = out.remaining();
467
0
      size_t byteCount = 0;
468
0
      if (std::mbsinit(&this->state)) // ByteBuffer not partially encoded?
469
0
      {
470
        // Copy single byte characters
471
0
        for (;
472
0
          nextCodePoint != in.end() && byteCount < availableByteCount && static_cast<unsigned int>(*nextCodePoint) < 0x80;
473
0
          ++nextCodePoint, ++byteCount, ++current)
474
0
        {
475
0
          *current = static_cast<char>(*nextCodePoint);
476
0
        }
477
0
      }
478
0
#endif
479
      // Encode characters that may require multiple bytes
480
0
      while (nextCodePoint != in.end() && byteCount < availableByteCount && MB_CUR_MAX <= (availableByteCount - byteCount))
481
0
      {
482
0
        LogString::const_iterator lastCodePoint = nextCodePoint;
483
0
        auto ch = Transcoder::decode(in, nextCodePoint);
484
0
        if (nextCodePoint == lastCodePoint) // invalid input sequence?
485
0
          nextCodePoint = in.end();
486
0
        auto n = std::wcrtomb(current, ch, &this->state);
487
0
        if (static_cast<std::size_t>(-1) == n) // not a valid wide character?
488
0
        {
489
0
          result = APR_BADARG;
490
0
          break;
491
0
        }
492
0
        byteCount += n;
493
0
        current += n;
494
0
      }
495
0
      out.increment_position(byteCount);
496
0
      return result;
497
0
    }
498
499
  private:
500
    std::mbstate_t state;
501
};
502
503
504
} // namespace helpers
505
506
}  //namespace log4cxx
507
508
509
510
CharsetEncoder::CharsetEncoder()
511
1
{
512
1
}
513
514
CharsetEncoder::~CharsetEncoder()
515
1
{
516
1
}
517
518
CharsetEncoderPtr CharsetEncoder::getDefaultEncoder()
519
1
{
520
1
  static WideLife<CharsetEncoderPtr> encoder(createDefaultEncoder());
521
522
  //
523
  //  if invoked after static variable destruction
524
  //     (if logging is called in the destructor of a static object)
525
  //     then create a new decoder.
526
  //
527
1
  if (encoder.value() == 0)
528
0
  {
529
0
    return CharsetEncoderPtr( createDefaultEncoder() );
530
0
  }
531
532
1
  return encoder;
533
1
}
534
535
CharsetEncoder* CharsetEncoder::createDefaultEncoder()
536
1
{
537
1
#if LOG4CXX_CHARSET_UTF8
538
1
  return new UTF8CharsetEncoder();
539
#elif LOG4CXX_CHARSET_ISO88591
540
  return new ISOLatinCharsetEncoder();
541
#elif LOG4CXX_CHARSET_USASCII
542
  return new USASCIICharsetEncoder();
543
#elif LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_WCSTOMBS
544
  return new WcstombsCharsetEncoder();
545
#else
546
  return new LocaleCharsetEncoder();
547
#endif
548
1
}
549
550
551
CharsetEncoderPtr CharsetEncoder::getUTF8Encoder()
552
0
{
553
0
  return std::make_shared<UTF8CharsetEncoder>();
554
0
}
555
556
557
558
CharsetEncoderPtr CharsetEncoder::getEncoder(const LogString& charset)
559
0
{
560
0
  if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-8"), LOG4CXX_STR("utf-8"))
561
0
    || StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP65001"), LOG4CXX_STR("cp65001")))
562
0
  {
563
0
    return std::make_shared<UTF8CharsetEncoder>();
564
0
  }
565
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) ||
566
0
    charset == LOG4CXX_STR("646") ||
567
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("US-ASCII"), LOG4CXX_STR("us-ascii")) ||
568
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO646-US"), LOG4CXX_STR("iso646-US")) ||
569
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ANSI_X3.4-1968"), LOG4CXX_STR("ansi_x3.4-1968")) ||
570
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP20127"), LOG4CXX_STR("cp20127")))
571
0
  {
572
0
    return std::make_shared<USASCIICharsetEncoder>();
573
0
  }
574
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-8859-1"), LOG4CXX_STR("iso-8859-1")) ||
575
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-LATIN-1"), LOG4CXX_STR("iso-latin-1")) ||
576
0
    StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP1252"), LOG4CXX_STR("cp1252")))
577
0
  {
578
0
    return std::make_shared<ISOLatinCharsetEncoder>();
579
0
  }
580
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16BE"), LOG4CXX_STR("utf-16be"))
581
0
    || StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16"), LOG4CXX_STR("utf-16"))
582
0
    || StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP1200"), LOG4CXX_STR("cp1200")))
583
0
  {
584
0
    return std::make_shared<UTF16BECharsetEncoder>();
585
0
  }
586
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16LE"), LOG4CXX_STR("utf-16le")))
587
0
  {
588
0
    return std::make_shared<UTF16LECharsetEncoder>();
589
0
  }
590
0
  else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("LOCALE"), LOG4CXX_STR("locale")))
591
0
  {
592
0
    return std::make_shared<LocaleCharsetEncoder>();
593
0
  }
594
595
0
#if APR_HAS_XLATE
596
0
  return std::make_shared<APRCharsetEncoder>(charset);
597
#else
598
  throw IllegalArgumentException(charset);
599
#endif
600
0
}
601
602
603
void CharsetEncoder::reset()
604
0
{
605
0
}
606
607
void CharsetEncoder::flush(ByteBuffer& /* out */ )
608
0
{
609
0
}
610
611
612
void CharsetEncoder::encode(CharsetEncoderPtr& enc,
613
  const LogString& src,
614
  LogString::const_iterator& iter,
615
  ByteBuffer& dst)
616
0
{
617
0
  log4cxx_status_t stat = enc->encode(src, iter, dst);
618
619
0
  if (stat != APR_SUCCESS && iter != src.end())
620
0
  {
621
0
#if LOG4CXX_LOGCHAR_IS_WCHAR || LOG4CXX_LOGCHAR_IS_UNICHAR
622
0
    iter++;
623
#elif LOG4CXX_LOGCHAR_IS_UTF8
624
625
    //  advance past this character and all continuation characters
626
    while ((*(++iter) & 0xC0) == 0x80);
627
628
#else
629
#error logchar is unrecognized
630
#endif
631
0
    dst.put(Transcoder::LOSSCHAR);
632
0
  }
633
0
}
634
635
bool CharsetEncoder::isTriviallyCopyable(const LogString& src, const CharsetEncoderPtr& enc)
636
0
{
637
0
  bool result;
638
0
#if !LOG4CXX_CHARSET_EBCDIC
639
0
  if (dynamic_cast<LocaleCharsetEncoder*>(enc.get()))
640
0
  {
641
0
    result = src.end() == std::find_if(src.begin(), src.end()
642
0
      , [](const logchar& ch) -> bool { return 0x80 <= (unsigned int)ch; });
643
0
  }
644
0
  else
645
0
#endif
646
0
    result = !!dynamic_cast<TrivialCharsetEncoder*>(enc.get());
647
0
  return result;
648
0
}