Coverage Report

Created: 2025-06-13 06:30

/src/wxwidgets/include/wx/strconv.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////////////
2
// Name:        wx/strconv.h
3
// Purpose:     conversion routines for char sets any Unicode
4
// Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin
5
// Created:     29/01/98
6
// Copyright:   (c) 1998 Ove Kaaven, Robert Roebling
7
//              (c) 1998-2006 Vadim Zeitlin
8
// Licence:     wxWindows licence
9
///////////////////////////////////////////////////////////////////////////////
10
11
#ifndef _WX_STRCONV_H_
12
#define _WX_STRCONV_H_
13
14
#include "wx/defs.h"
15
#include "wx/chartype.h"
16
#include "wx/buffer.h"
17
18
class WXDLLIMPEXP_FWD_BASE wxString;
19
20
// the error value returned by wxMBConv methods
21
58.8k
#define wxCONV_FAILED ((size_t)-1)
22
23
// ----------------------------------------------------------------------------
24
// wxMBConv (abstract base class for conversions)
25
// ----------------------------------------------------------------------------
26
27
// When deriving a new class from wxMBConv you must reimplement ToWChar() and
28
// FromWChar() methods which are not pure virtual only for historical reasons,
29
// don't let the fact that the existing classes implement MB2WC/WC2MB() instead
30
// confuse you.
31
//
32
// For many encodings you must override GetMaxCharLen().
33
//
34
// You also have to implement Clone() to allow copying the conversions
35
// polymorphically.
36
//
37
// And you might need to override GetMBNulLen() as well.
38
class WXDLLIMPEXP_BASE wxMBConv
39
{
40
public:
41
    // The functions doing actual conversion from/to narrow to/from wide
42
    // character strings.
43
    //
44
    // On success, the return value is the length (i.e. the number of
45
    // characters, not bytes) of the converted string including any trailing
46
    // L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if
47
    // there is not enough space for everything, including the trailing NUL
48
    // character(s), in the output buffer, wxCONV_FAILED is returned.
49
    //
50
    // In the special case when dst is null (the value of dstLen is ignored
51
    // then) the return value is the length of the needed buffer but nothing
52
    // happens otherwise. If srcLen is wxNO_LEN, the entire string, up to and
53
    // including the trailing NUL(s), is converted, otherwise exactly srcLen
54
    // bytes are.
55
    //
56
    // Typical usage:
57
    //
58
    //          size_t dstLen = conv.ToWChar(nullptr, 0, src);
59
    //          if ( dstLen == wxCONV_FAILED )
60
    //              ... handle error ...
61
    //          wchar_t *wbuf = new wchar_t[dstLen];
62
    //          conv.ToWChar(wbuf, dstLen, src);
63
    //          ... work with wbuf ...
64
    //          delete [] wbuf;
65
    //
66
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
67
                           const char *src, size_t srcLen = wxNO_LEN) const;
68
69
    virtual size_t FromWChar(char *dst, size_t dstLen,
70
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const;
71
72
73
    // Convenience functions for translating NUL-terminated strings: return
74
    // the buffer containing the converted string or empty buffer if the
75
    // conversion failed.
76
    wxWCharBuffer cMB2WC(const char *in) const
77
0
        { return DoConvertMB2WC(in, wxNO_LEN); }
78
    wxCharBuffer cWC2MB(const wchar_t *in) const
79
0
        { return DoConvertWC2MB(in, wxNO_LEN); }
80
81
    wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const
82
0
        { return DoConvertMB2WC(in, in.length()); }
83
    wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const
84
0
        { return DoConvertWC2MB(in, in.length()); }
85
86
87
    // Convenience functions for converting strings which may contain embedded
88
    // NULs and don't have to be NUL-terminated.
89
    //
90
    // inLen is the length of the buffer including trailing NUL if any or
91
    // wxNO_LEN if the input is NUL-terminated.
92
    //
93
    // outLen receives, if not null, the length of the converted string or 0 if
94
    // the conversion failed (returning 0 and not -1 in this case makes it
95
    // difficult to distinguish between failed conversion and empty input but
96
    // this is done for backwards compatibility). Notice that the rules for
97
    // whether outLen accounts or not for the last NUL are the same as for
98
    // To/FromWChar() above: if inLen is specified, outLen is exactly the
99
    // number of characters converted, whether the last one of them was NUL or
100
    // not. But if inLen == wxNO_LEN then outLen doesn't account for the last
101
    // NUL even though it is present.
102
    wxWCharBuffer
103
        cMB2WC(const char *in, size_t inLen, size_t *outLen) const;
104
    wxCharBuffer
105
        cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const;
106
107
    // Obsolete convenience functions.
108
0
    wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); }
109
0
    wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); }
110
0
    const wchar_t* cWC2WX(const wchar_t *psz) const { return psz; }
111
0
    const wchar_t* cWX2WC(const wchar_t *psz) const { return psz; }
112
113
    // return the maximum number of bytes that can be required to encode a
114
    // single character in this encoding, e.g. 4 for UTF-8
115
0
    virtual size_t GetMaxCharLen() const { return 1; }
116
117
    // this function is used in the implementation of cMB2WC() to distinguish
118
    // between the following cases:
119
    //
120
    //      a) var width encoding with strings terminated by a single NUL
121
    //         (usual multibyte encodings): return 1 in this case
122
    //      b) fixed width encoding with 2 bytes/char and so terminated by
123
    //         2 NULs (UTF-16/UCS-2 and variants): return 2 in this case
124
    //      c) fixed width encoding with 4 bytes/char and so terminated by
125
    //         4 NULs (UTF-32/UCS-4 and variants): return 4 in this case
126
    //
127
    // anything else is not supported currently and -1 should be returned
128
12
    virtual size_t GetMBNulLen() const { return 1; }
129
130
    // return the maximal value currently returned by GetMBNulLen() for any
131
    // encoding
132
0
    static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; }
133
134
    // return true if the converter's charset is UTF-8, i.e. char* strings
135
    // decoded using this object can be directly copied to wxString's internal
136
    // storage without converting to WC and then back to UTF-8 MB string
137
0
    virtual bool IsUTF8() const { return false; }
138
139
    // The old conversion functions. The existing classes currently mostly
140
    // implement these ones but we're in transition to using To/FromWChar()
141
    // instead and any new classes should implement just the new functions.
142
    // For now, however, we provide default implementation of To/FromWChar() in
143
    // this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all
144
    // the conversions at once.
145
    //
146
    // On success, the return value is the length (i.e. the number of
147
    // characters, not bytes) not counting the trailing NUL(s) of the converted
148
    // string. On failure, (size_t)-1 is returned. In the special case when
149
    // outputBuf is null the return value is the same one but nothing is
150
    // written to the buffer.
151
    //
152
    // Note that outLen is the length of the output buffer, not the length of
153
    // the input (which is always supposed to be terminated by one or more
154
    // NULs, as appropriate for the encoding)!
155
    virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const;
156
    virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const;
157
158
159
    // make a heap-allocated copy of this object
160
    wxNODISCARD virtual wxMBConv *Clone() const = 0;
161
162
    // virtual dtor for any base class
163
0
    virtual ~wxMBConv() = default;
164
165
private:
166
    // Common part of single argument cWC2MB() and cMB2WC() overloads above.
167
    wxCharBuffer DoConvertWC2MB(const wchar_t* pwz, size_t srcLen) const;
168
    wxWCharBuffer DoConvertMB2WC(const char* psz, size_t srcLen) const;
169
};
170
171
// ----------------------------------------------------------------------------
172
// wxMBConvLibc uses standard mbstowcs() and wcstombs() functions for
173
//              conversion (hence it depends on the current locale)
174
// ----------------------------------------------------------------------------
175
176
class WXDLLIMPEXP_BASE wxMBConvLibc : public wxMBConv
177
{
178
public:
179
    virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const override;
180
    virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const override;
181
182
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvLibc; }
183
184
0
    virtual bool IsUTF8() const override { return wxLocaleIsUtf8; }
185
};
186
187
#ifdef __UNIX__
188
189
// ----------------------------------------------------------------------------
190
// wxConvBrokenFileNames is made for Unix in Unicode mode when
191
// files are accidentally written in an encoding which is not
192
// the system encoding. Typically, the system encoding will be
193
// UTF8 but there might be files stored in ISO8859-1 on disk.
194
// ----------------------------------------------------------------------------
195
196
class WXDLLIMPEXP_BASE wxConvBrokenFileNames : public wxMBConv
197
{
198
public:
199
    wxConvBrokenFileNames(const wxString& charset);
200
    wxConvBrokenFileNames(const wxConvBrokenFileNames& conv)
201
0
        : wxMBConv(),
202
0
          m_conv(conv.m_conv ? conv.m_conv->Clone() : nullptr)
203
0
    {
204
0
    }
205
0
    virtual ~wxConvBrokenFileNames() { delete m_conv; }
206
207
    virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const override
208
0
    {
209
0
        return m_conv->MB2WC(out, in, outLen);
210
0
    }
211
212
    virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const override
213
0
    {
214
0
        return m_conv->WC2MB(out, in, outLen);
215
0
    }
216
217
    virtual size_t GetMBNulLen() const override
218
0
    {
219
        // cast needed to call a private function
220
0
        return m_conv->GetMBNulLen();
221
0
    }
222
223
0
    virtual bool IsUTF8() const override { return m_conv->IsUTF8(); }
224
225
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxConvBrokenFileNames(*this); }
226
227
private:
228
    // the conversion object we forward to
229
    wxMBConv *m_conv;
230
231
    wxDECLARE_NO_ASSIGN_CLASS(wxConvBrokenFileNames);
232
};
233
234
#endif // __UNIX__
235
236
// ----------------------------------------------------------------------------
237
// wxMBConvUTF7 (for conversion using UTF7 encoding)
238
// ----------------------------------------------------------------------------
239
240
class WXDLLIMPEXP_BASE wxMBConvUTF7 : public wxMBConv
241
{
242
public:
243
2
    wxMBConvUTF7() = default;
244
245
    // compiler-generated copy ctor, assignment operator and dtor are ok
246
    // (assuming it's ok to copy the shift state -- not really sure about it)
247
248
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
249
                           const char *src, size_t srcLen = wxNO_LEN) const override;
250
    virtual size_t FromWChar(char *dst, size_t dstLen,
251
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
252
253
0
    virtual size_t GetMaxCharLen() const override { return 4; }
254
255
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvUTF7; }
256
257
private:
258
    // UTF-7 decoder/encoder may be in direct mode or in shifted mode after a
259
    // '+' (and until the '-' or any other non-base64 character)
260
    struct StateMode
261
    {
262
        enum Mode
263
        {
264
            Direct,     // pass through state
265
            Shifted     // after a '+' (and before '-')
266
        };
267
    };
268
269
    // the current decoder state: this is only used by ToWChar() if srcLen
270
    // parameter is not wxNO_LEN, when working on the entire NUL-terminated
271
    // strings we neither update nor use the state
272
    class DecoderState : private StateMode
273
    {
274
    private:
275
        // current state: this one is private as we want to enforce the use of
276
        // ToDirect/ToShifted() methods below
277
        Mode mode;
278
279
    public:
280
        // the initial state is direct
281
2
        DecoderState() { mode = Direct; accum = bit = msb = 0; isLSB = false; }
282
283
        // switch to/from shifted mode
284
0
        void ToDirect() { mode = Direct; }
285
0
        void ToShifted() { mode = Shifted; accum = bit = 0; isLSB = false; }
286
287
0
        bool IsDirect() const { return mode == Direct; }
288
0
        bool IsShifted() const { return mode == Shifted; }
289
290
291
        // these variables are only used in shifted mode
292
293
        unsigned int accum; // accumulator of the bit we've already got
294
        unsigned int bit;   // the number of bits consumed mod 8
295
        unsigned char msb;  // the high byte of UTF-16 word
296
        bool isLSB;         // whether we're decoding LSB or MSB of UTF-16 word
297
    };
298
299
    DecoderState m_stateDecoder;
300
301
302
    // encoder state is simpler as we always receive entire Unicode characters
303
    // on input
304
    class EncoderState : private StateMode
305
    {
306
    private:
307
        Mode mode;
308
309
    public:
310
2
        EncoderState() { mode = Direct; accum = bit = 0; }
311
312
0
        void ToDirect() { mode = Direct; }
313
0
        void ToShifted() { mode = Shifted; accum = bit = 0; }
314
315
0
        bool IsDirect() const { return mode == Direct; }
316
0
        bool IsShifted() const { return mode == Shifted; }
317
318
        unsigned int accum;
319
        unsigned int bit;
320
    };
321
322
    EncoderState m_stateEncoder;
323
};
324
325
// ----------------------------------------------------------------------------
326
// wxMBConvUTF8 (for conversion using UTF8 encoding)
327
// ----------------------------------------------------------------------------
328
329
// this is the real UTF-8 conversion class, it has to be called "strict UTF-8"
330
// for compatibility reasons: the wxMBConvUTF8 class below also supports lossy
331
// conversions if it is created with non default options
332
class WXDLLIMPEXP_BASE wxMBConvStrictUTF8 : public wxMBConv
333
{
334
public:
335
    // compiler-generated default ctor and other methods are ok
336
337
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
338
                           const char *src, size_t srcLen = wxNO_LEN) const override;
339
    virtual size_t FromWChar(char *dst, size_t dstLen,
340
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
341
342
0
    virtual size_t GetMaxCharLen() const override { return 4; }
343
344
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvStrictUTF8(); }
345
346
    // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
347
    //     take the shortcut in that case
348
0
    virtual bool IsUTF8() const override { return true; }
349
};
350
351
class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConvStrictUTF8
352
{
353
public:
354
    enum
355
    {
356
        MAP_INVALID_UTF8_NOT = 0,
357
        MAP_INVALID_UTF8_TO_PUA = 1,
358
        MAP_INVALID_UTF8_TO_OCTAL = 2
359
    };
360
361
0
    wxMBConvUTF8(int options = MAP_INVALID_UTF8_NOT) : m_options(options) { }
362
363
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
364
                           const char *src, size_t srcLen = wxNO_LEN) const override;
365
    virtual size_t FromWChar(char *dst, size_t dstLen,
366
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
367
368
0
    virtual size_t GetMaxCharLen() const override { return 4; }
369
370
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvUTF8(m_options); }
371
372
    // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
373
    //     take the shortcut in that case
374
0
    virtual bool IsUTF8() const override { return m_options == MAP_INVALID_UTF8_NOT; }
375
376
private:
377
    int m_options;
378
};
379
380
// ----------------------------------------------------------------------------
381
// wxMBConvUTF16Base: for both LE and BE variants
382
// ----------------------------------------------------------------------------
383
384
class WXDLLIMPEXP_BASE wxMBConvUTF16Base : public wxMBConv
385
{
386
public:
387
    enum { BYTES_PER_CHAR = 2 };
388
389
0
    virtual size_t GetMBNulLen() const override { return BYTES_PER_CHAR; }
390
391
protected:
392
    // return the length of the buffer using srcLen if it's not wxNO_LEN and
393
    // computing the length ourselves if it is; also checks that the length is
394
    // even if specified as we need an entire number of UTF-16 characters and
395
    // returns wxNO_LEN which indicates error if it is odd
396
    static size_t GetLength(const char *src, size_t srcLen);
397
};
398
399
// ----------------------------------------------------------------------------
400
// wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding)
401
// ----------------------------------------------------------------------------
402
403
class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConvUTF16Base
404
{
405
public:
406
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
407
                           const char *src, size_t srcLen = wxNO_LEN) const override;
408
    virtual size_t FromWChar(char *dst, size_t dstLen,
409
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
410
0
    virtual size_t GetMaxCharLen() const override { return 4; }
411
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvUTF16LE; }
412
};
413
414
// ----------------------------------------------------------------------------
415
// wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding)
416
// ----------------------------------------------------------------------------
417
418
class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConvUTF16Base
419
{
420
public:
421
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
422
                           const char *src, size_t srcLen = wxNO_LEN) const override;
423
    virtual size_t FromWChar(char *dst, size_t dstLen,
424
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
425
0
    virtual size_t GetMaxCharLen() const override { return 4; }
426
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvUTF16BE; }
427
};
428
429
// ----------------------------------------------------------------------------
430
// wxMBConvUTF32Base: base class for both LE and BE variants
431
// ----------------------------------------------------------------------------
432
433
class WXDLLIMPEXP_BASE wxMBConvUTF32Base : public wxMBConv
434
{
435
public:
436
    enum { BYTES_PER_CHAR = 4 };
437
438
0
    virtual size_t GetMBNulLen() const override { return BYTES_PER_CHAR; }
439
440
protected:
441
    // this is similar to wxMBConvUTF16Base method with the same name except
442
    // that, of course, it verifies that length is divisible by 4 if given and
443
    // not by 2
444
    static size_t GetLength(const char *src, size_t srcLen);
445
};
446
447
// ----------------------------------------------------------------------------
448
// wxMBConvUTF32LE (for conversion using UTF32 Little Endian encoding)
449
// ----------------------------------------------------------------------------
450
451
class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConvUTF32Base
452
{
453
public:
454
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
455
                           const char *src, size_t srcLen = wxNO_LEN) const override;
456
    virtual size_t FromWChar(char *dst, size_t dstLen,
457
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
458
0
    virtual size_t GetMaxCharLen() const override { return 4; }
459
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvUTF32LE; }
460
};
461
462
// ----------------------------------------------------------------------------
463
// wxMBConvUTF32BE (for conversion using UTF32 Big Endian encoding)
464
// ----------------------------------------------------------------------------
465
466
class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConvUTF32Base
467
{
468
public:
469
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
470
                           const char *src, size_t srcLen = wxNO_LEN) const override;
471
    virtual size_t FromWChar(char *dst, size_t dstLen,
472
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
473
0
    virtual size_t GetMaxCharLen() const override { return 4; }
474
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxMBConvUTF32BE; }
475
};
476
477
// ----------------------------------------------------------------------------
478
// wxCSConv (for conversion based on loadable char sets)
479
// ----------------------------------------------------------------------------
480
481
#include "wx/fontenc.h"
482
483
class WXDLLIMPEXP_BASE wxCSConv : public wxMBConv
484
{
485
public:
486
    // we can be created either from charset name or from an encoding constant
487
    // but we can't have both at once
488
    wxCSConv(const wxString& charset);
489
    wxCSConv(wxFontEncoding encoding);
490
491
    wxCSConv(const wxCSConv& conv);
492
    virtual ~wxCSConv();
493
494
    wxCSConv& operator=(const wxCSConv& conv);
495
496
    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
497
                           const char *src, size_t srcLen = wxNO_LEN) const override;
498
    virtual size_t FromWChar(char *dst, size_t dstLen,
499
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
500
    virtual size_t GetMBNulLen() const override;
501
502
    virtual bool IsUTF8() const override;
503
504
0
    wxNODISCARD virtual wxMBConv *Clone() const override { return new wxCSConv(*this); }
505
506
    void Clear();
507
508
    // return true if the conversion could be initialized successfully
509
    bool IsOk() const;
510
511
private:
512
    // common part of all ctors
513
    void Init();
514
515
    // Creates the conversion to use, called from all ctors to initialize
516
    // m_convReal.
517
    wxMBConv *DoCreate() const;
518
519
    // Set the name (may be only called when m_name == nullptr), makes copy of
520
    // the charset string.
521
    void SetName(const char *charset);
522
523
    // Set m_encoding field respecting the rules below, i.e. making sure it has
524
    // a valid value if m_name == nullptr (thus this should be always called after
525
    // SetName()).
526
    //
527
    // Input encoding may be valid or not.
528
    void SetEncoding(wxFontEncoding encoding);
529
530
531
    // The encoding we use is specified by the two fields below:
532
    //
533
    //  1. If m_name != nullptr, m_encoding corresponds to it if it's one of
534
    //     encodings we know about (i.e. member of wxFontEncoding) or is
535
    //     wxFONTENCODING_SYSTEM otherwise.
536
    //
537
    //  2. If m_name == nullptr, m_encoding is always valid, i.e. not one of
538
    //     wxFONTENCODING_{SYSTEM,DEFAULT,MAX}.
539
    char *m_name;
540
    wxFontEncoding m_encoding;
541
542
    // The conversion object for our encoding or nullptr if we failed to create it
543
    // in which case we fall back to hard-coded ISO8859-1 conversion.
544
    wxMBConv *m_convReal;
545
};
546
547
// ----------------------------------------------------------------------------
548
// wxWhateverWorksConv: use whatever encoding works for the input
549
// ----------------------------------------------------------------------------
550
551
class WXDLLIMPEXP_BASE wxWhateverWorksConv : public wxMBConv
552
{
553
public:
554
    wxWhateverWorksConv()
555
2
    {
556
2
    }
557
558
    // Try to interpret the string as UTF-8, if it fails fall back to the
559
    // current locale encoding (wxConvLibc) and if this fails as well,
560
    // interpret it as wxConvISO8859_1 (which is used because it never fails
561
    // and this conversion is used when we really, really must produce
562
    // something on output).
563
    virtual size_t
564
    ToWChar(wchar_t *dst, size_t dstLen,
565
            const char *src, size_t srcLen = wxNO_LEN) const override;
566
567
    // Try to encode the string using the current locale encoding (wxConvLibc)
568
    // and fall back to UTF-8 (which never fails) if it doesn't work. Note that
569
    // we never use wxConvISO8859_1 here as we prefer to fall back on UTF-8
570
    // even for the strings containing only code points representable in 8869-1.
571
    virtual size_t
572
    FromWChar(char *dst, size_t dstLen,
573
              const wchar_t *src, size_t srcLen = wxNO_LEN) const override;
574
575
    // Use the value for UTF-8 here to make sure we try to decode up to 4 bytes
576
    // as UTF-8 before giving up.
577
0
    virtual size_t GetMaxCharLen() const override { return 4; }
578
579
    wxNODISCARD virtual wxMBConv *Clone() const override
580
0
    {
581
0
        return new wxWhateverWorksConv();
582
0
    }
583
};
584
585
// ----------------------------------------------------------------------------
586
// declare predefined conversion objects
587
// ----------------------------------------------------------------------------
588
589
// Note: this macro is an implementation detail (see the comment in
590
// strconv.cpp). The wxGet_XXX() and wxGet_XXXPtr() functions shouldn't be
591
// used by user code and neither should XXXPtr, use the wxConvXXX macro
592
// instead.
593
#define WX_DECLARE_GLOBAL_CONV(klass, name)                             \
594
    extern WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr;                     \
595
    extern WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr();                 \
596
    inline klass& wxGet_##name()                                        \
597
12.5k
    {                                                                   \
598
12.5k
        if ( !name##Ptr )                                               \
599
12.5k
            name##Ptr = wxGet_##name##Ptr();                            \
600
12.5k
        return *name##Ptr;                                              \
601
12.5k
    }
wxGet_wxConvLocal()
Line
Count
Source
597
2.67k
    {                                                                   \
598
2.67k
        if ( !name##Ptr )                                               \
599
2.67k
            name##Ptr = wxGet_##name##Ptr();                            \
600
2.67k
        return *name##Ptr;                                              \
601
2.67k
    }
wxGet_wxConvLibc()
Line
Count
Source
597
6
    {                                                                   \
598
6
        if ( !name##Ptr )                                               \
599
6
            name##Ptr = wxGet_##name##Ptr();                            \
600
6
        return *name##Ptr;                                              \
601
6
    }
Unexecuted instantiation: wxGet_wxConvISO8859_1()
wxGet_wxConvUTF8()
Line
Count
Source
597
9.82k
    {                                                                   \
598
9.82k
        if ( !name##Ptr )                                               \
599
9.82k
            name##Ptr = wxGet_##name##Ptr();                            \
600
9.82k
        return *name##Ptr;                                              \
601
9.82k
    }
Unexecuted instantiation: wxGet_wxConvUTF7()
Unexecuted instantiation: wxGet_wxConvWhateverWorks()
602
603
604
// conversion to be used with all standard functions affected by locale, e.g.
605
// strtol(), strftime(), ...
606
WX_DECLARE_GLOBAL_CONV(wxMBConv, wxConvLibc)
607
0
#define wxConvLibc wxGet_wxConvLibc()
608
609
// conversion ISO-8859-1/UTF-7/UTF-8 <-> wchar_t
610
WX_DECLARE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1)
611
0
#define wxConvISO8859_1 wxGet_wxConvISO8859_1()
612
613
WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8)
614
9.82k
#define wxConvUTF8 wxGet_wxConvUTF8()
615
616
WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7)
617
#define wxConvUTF7 wxGet_wxConvUTF7()
618
619
// conversion used when we may not afford to lose data when outputting Unicode
620
// strings (should be avoid in the other direction as it can misinterpret the
621
// input encoding)
622
WX_DECLARE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks)
623
0
#define wxConvWhateverWorks wxGet_wxConvWhateverWorks()
624
625
// conversion used for the file names on the systems where they're not Unicode
626
// (basically anything except Windows)
627
//
628
// this is used by all file functions, can be changed by the application
629
//
630
// by default UTF-8 under Mac OS X and wxConvLibc elsewhere (but it's not used
631
// under Windows normally)
632
extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName;
633
634
// backwards compatible define
635
0
#define wxConvFile (*wxConvFileName)
636
637
// the current conversion object, may be set to any conversion, is used by
638
// default in a couple of places inside wx (initially same as wxConvLibc)
639
extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent;
640
641
// the conversion corresponding to the current locale
642
WX_DECLARE_GLOBAL_CONV(wxCSConv, wxConvLocal)
643
0
#define wxConvLocal wxGet_wxConvLocal()
644
645
// the conversion corresponding to the encoding of the standard UI elements
646
//
647
// by default this is the same as wxConvLocal but may be changed if the program
648
// needs to use a fixed encoding
649
extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI;
650
651
#undef WX_DECLARE_GLOBAL_CONV
652
653
// ----------------------------------------------------------------------------
654
// endianness-dependent conversions
655
// ----------------------------------------------------------------------------
656
657
#ifdef WORDS_BIGENDIAN
658
    typedef wxMBConvUTF16BE wxMBConvUTF16;
659
    typedef wxMBConvUTF32BE wxMBConvUTF32;
660
#else
661
    typedef wxMBConvUTF16LE wxMBConvUTF16;
662
    typedef wxMBConvUTF32LE wxMBConvUTF32;
663
#endif
664
665
// ----------------------------------------------------------------------------
666
// filename conversion macros
667
// ----------------------------------------------------------------------------
668
669
// filenames are multibyte on Unix and widechar on Windows
670
#if wxMBFILES
671
0
    #define wxFNCONV(name) wxConvFileName->cWX2MB(name)
672
    #define wxFNSTRINGCAST wxMBSTRINGCAST
673
#else
674
#if defined(__WXOSX__) && wxMBFILES
675
    #define wxFNCONV(name) wxConvFileName->cWC2MB( wxConvLocal.cWX2WC(name) )
676
#else
677
    #define wxFNCONV(name) name
678
#endif
679
    #define wxFNSTRINGCAST WXSTRINGCAST
680
#endif
681
682
// ----------------------------------------------------------------------------
683
// macros for the most common conversions
684
// ----------------------------------------------------------------------------
685
686
#define wxConvertWX2MB(s)   wxConvCurrent->cWX2MB(s)
687
#define wxConvertMB2WX(s)   wxConvCurrent->cMB2WX(s)
688
689
// these functions should be used when the conversions really, really have
690
// to succeed (usually because we pass their results to a standard C
691
// function which would crash if we passed nullptr to it), so these functions
692
// always return a valid pointer if their argument is non-null
693
694
inline wxWCharBuffer wxSafeConvertMB2WX(const char *s)
695
0
{
696
0
    return wxConvWhateverWorks.cMB2WC(s);
697
0
}
698
699
inline wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
700
0
{
701
0
    return wxConvWhateverWorks.cWC2MB(ws);
702
0
}
703
704
// Macro that indicates the default encoding for converting C strings
705
// to wxString. There are 3 possible cases:
706
//
707
//  - In UTF-8-only build, all strings are supposed to use UTF-8.
708
//  - If wxNO_IMPLICIT_WXSTRING_ENCODING is defined, the conversion must be
709
//    always specified explicitly. This forbids error prone implicit
710
//    conversions (note that this is incompatible with UTF-8-only build).
711
//  - Otherwise strings are considered to use current locale encoding.
712
//
713
// It is used to provide a default value for const wxMBConv& parameters, i.e.
714
// its intended use is:
715
// wxString(const char *data, ...,
716
//          const wxMBConv &conv wxSTRING_DEFAULT_CONV_ARG);
717
#if wxUSE_UTF8_LOCALE_ONLY
718
#define wxSTRING_DEFAULT_CONV_ARG = wxConvUTF8
719
#elif defined(wxNO_IMPLICIT_WXSTRING_ENCODING)
720
#define wxSTRING_DEFAULT_CONV_ARG
721
#else
722
#define wxSTRING_DEFAULT_CONV_ARG = wxConvLibc
723
#endif
724
725
#endif // _WX_STRCONV_H_
726