Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/pystrhex.c
Line
Count
Source
1
/* Format bytes as hexadecimal */
2
3
#include "Python.h"
4
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
5
#include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency()
6
7
/* Scalar hexlify: convert len bytes to 2*len hex characters.
8
   Uses table lookup via Py_hexdigits for the conversion. */
9
static inline void
10
_Py_hexlify_scalar(const unsigned char *src, Py_UCS1 *dst, Py_ssize_t len)
11
182
{
12
    /* Various optimizations like using math instead of a table lookup,
13
       manually unrolling the loop, storing the global table pointer locally,
14
       and doing wider dst writes have been tried and benchmarked; all produced
15
       nearly identical performance on gcc 15.  Using a 256 entry uint16_t
16
       table was a bit slower.  So we keep our old simple and obvious code. */
17
182
    for (Py_ssize_t i = 0; i < len; i++) {
18
0
        unsigned char c = src[i];
19
0
        *dst++ = Py_hexdigits[c >> 4];
20
0
        *dst++ = Py_hexdigits[c & 0x0f];
21
0
    }
22
182
}
23
24
/* Portable SIMD optimization for hexlify using GCC/Clang vector extensions.
25
   Uses __builtin_shufflevector for portable interleave that compiles to
26
   native SIMD instructions (SSE2 punpcklbw/punpckhbw on x86-64 [always],
27
   NEON zip1/zip2 on ARM64 [always], & vzip on ARM32 when compiler flags
28
   for the target microarch allow it [try -march=native if running 32-bit
29
   on an RPi3 or later]).
30
31
   Performance:
32
   - For more common small data it varies between 1.1-3x faster.
33
   - Up to 11x faster on larger data than the scalar code.
34
35
   While faster is possible for big data using AVX2 or AVX512, that
36
   adds a ton of complication. Who ever really hexes huge data?
37
   The 16-64 byte boosts align nicely with md5 - sha512 hexdigests.
38
*/
39
#ifdef HAVE_EFFICIENT_BUILTIN_SHUFFLEVECTOR
40
41
/* 128-bit vector of 16 unsigned bytes */
42
typedef unsigned char v16u8 __attribute__((vector_size(16)));
43
/* 128-bit vector of 16 signed bytes - for efficient comparison.
44
   Using signed comparison generates pcmpgtb on x86-64 instead of
45
   the slower psubusb+pcmpeqb sequence from unsigned comparison.
46
   ARM NEON performs the same either way. */
47
typedef signed char v16s8 __attribute__((vector_size(16)));
48
49
/* Splat a byte value across all 16 lanes */
50
static inline v16u8
51
v16u8_splat(unsigned char x)
52
728
{
53
728
    return (v16u8){x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
54
728
}
55
56
static inline v16s8
57
v16s8_splat(signed char x)
58
182
{
59
182
    return (v16s8){x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
60
182
}
61
62
/* Portable SIMD hexlify: converts 16 bytes to 32 hex chars per iteration.
63
   Compiles to native SSE2 on x86-64, NEON on ARM64 (and some ARM32). */
64
static void
65
_Py_hexlify_simd(const unsigned char *src, Py_UCS1 *dst, Py_ssize_t len)
66
182
{
67
182
    const v16u8 mask_0f = v16u8_splat(0x0f);
68
182
    const v16u8 ascii_0 = v16u8_splat('0');
69
182
    const v16u8 offset = v16u8_splat('a' - '0' - 10);  /* 0x27 */
70
182
    const v16u8 four = v16u8_splat(4);
71
182
    const v16s8 nine = v16s8_splat(9);
72
73
182
    Py_ssize_t i = 0;
74
75
    /* Process 16 bytes at a time */
76
728
    for (; i + 16 <= len; i += 16, dst += 32) {
77
        /* Load 16 bytes (memcpy for safe unaligned access) */
78
546
        v16u8 data;
79
546
        memcpy(&data, src + i, 16);
80
81
        /* Extract high and low nibbles using vector operators */
82
546
        v16u8 hi = (data >> four) & mask_0f;
83
546
        v16u8 lo = data & mask_0f;
84
85
        /* Compare > 9 using signed comparison for efficient codegen.
86
           Nibble values 0-15 are safely in signed byte range.
87
           This generates pcmpgtb on x86-64, avoiding the slower
88
           psubusb+pcmpeqb sequence from unsigned comparison. */
89
546
        v16u8 hi_gt9 = (v16u8)((v16s8)hi > nine);
90
546
        v16u8 lo_gt9 = (v16u8)((v16s8)lo > nine);
91
92
        /* Convert nibbles to hex ASCII */
93
546
        hi = hi + ascii_0 + (hi_gt9 & offset);
94
546
        lo = lo + ascii_0 + (lo_gt9 & offset);
95
96
        /* Interleave hi/lo nibbles using portable shufflevector.
97
           This compiles to punpcklbw/punpckhbw on x86-64, zip1/zip2 on ARM64,
98
           or vzip on ARM32. */
99
546
        v16u8 result0 = __builtin_shufflevector(hi, lo,
100
546
            0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
101
546
        v16u8 result1 = __builtin_shufflevector(hi, lo,
102
546
            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
103
104
        /* Store 32 hex characters */
105
546
        memcpy(dst, &result0, 16);
106
546
        memcpy(dst + 16, &result1, 16);
107
546
    }
108
109
    /* Scalar fallback for remaining 0-15 bytes */
110
182
    _Py_hexlify_scalar(src + i, dst, len - i);
111
182
}
112
113
#endif /* HAVE_EFFICIENT_BUILTIN_SHUFFLEVECTOR */
114
115
static PyObject *
116
_Py_strhex_impl(const char* argbuf, Py_ssize_t arglen,
117
                PyObject* sep, Py_ssize_t bytes_per_sep_group,
118
                int return_bytes)
119
182
{
120
182
    assert(arglen >= 0);
121
122
182
    Py_UCS1 sep_char = 0;
123
182
    if (sep) {
124
0
        Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
125
0
        if (seplen < 0) {
126
0
            return NULL;
127
0
        }
128
0
        if (seplen != 1) {
129
0
            PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
130
0
            return NULL;
131
0
        }
132
0
        if (PyUnicode_Check(sep)) {
133
0
            if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
134
0
                PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
135
0
                return NULL;
136
0
            }
137
0
            sep_char = PyUnicode_READ_CHAR(sep, 0);
138
0
        }
139
0
        else if (PyBytes_Check(sep)) {
140
0
            sep_char = PyBytes_AS_STRING(sep)[0];
141
0
        }
142
0
        else {
143
0
            PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
144
0
            return NULL;
145
0
        }
146
0
        if (sep_char > 127 && !return_bytes) {
147
0
            PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
148
0
            return NULL;
149
0
        }
150
0
    }
151
182
    else {
152
182
        bytes_per_sep_group = 0;
153
182
    }
154
182
    size_t abs_bytes_per_sep = _Py_ABS_CAST(size_t, bytes_per_sep_group);
155
182
    Py_ssize_t resultlen = 0;
156
182
    if (bytes_per_sep_group && arglen > 0) {
157
        /* How many sep characters we'll be inserting. */
158
0
        resultlen = (arglen - 1) / abs_bytes_per_sep;
159
0
    }
160
    /* Bounds checking for our Py_ssize_t indices. */
161
182
    if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
162
0
        return PyErr_NoMemory();
163
0
    }
164
182
    resultlen += arglen * 2;
165
166
182
    if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
167
0
        bytes_per_sep_group = 0;
168
0
        abs_bytes_per_sep = 0;
169
0
    }
170
171
182
    PyObject *retval;
172
182
    Py_UCS1 *retbuf;
173
182
    if (return_bytes) {
174
        /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
175
0
        retval = PyBytes_FromStringAndSize(NULL, resultlen);
176
0
        if (!retval) {
177
0
            return NULL;
178
0
        }
179
0
        retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
180
0
    }
181
182
    else {
182
182
        retval = PyUnicode_New(resultlen, 127);
183
182
        if (!retval) {
184
0
            return NULL;
185
0
        }
186
182
        retbuf = PyUnicode_1BYTE_DATA(retval);
187
182
    }
188
189
    /* Hexlify */
190
182
    Py_ssize_t i, j;
191
182
    unsigned char c;
192
193
182
    if (bytes_per_sep_group == 0) {
194
182
#ifdef HAVE_EFFICIENT_BUILTIN_SHUFFLEVECTOR
195
182
        if (arglen >= 16) {
196
182
            _Py_hexlify_simd((const unsigned char *)argbuf, retbuf, arglen);
197
182
        }
198
0
        else
199
0
#endif
200
0
        {
201
0
            _Py_hexlify_scalar((const unsigned char *)argbuf, retbuf, arglen);
202
0
        }
203
182
    }
204
0
    else {
205
        /* The number of complete chunk+sep periods */
206
0
        Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
207
0
        Py_ssize_t chunk;
208
0
        size_t k;
209
210
0
        if (bytes_per_sep_group < 0) {
211
0
            i = j = 0;
212
0
            for (chunk = 0; chunk < chunks; chunk++) {
213
0
                for (k = 0; k < abs_bytes_per_sep; k++) {
214
0
                    c = argbuf[i++];
215
0
                    retbuf[j++] = Py_hexdigits[c >> 4];
216
0
                    retbuf[j++] = Py_hexdigits[c & 0x0f];
217
0
                }
218
0
                retbuf[j++] = sep_char;
219
0
            }
220
0
            while (i < arglen) {
221
0
                c = argbuf[i++];
222
0
                retbuf[j++] = Py_hexdigits[c >> 4];
223
0
                retbuf[j++] = Py_hexdigits[c & 0x0f];
224
0
            }
225
0
            assert(j == resultlen);
226
0
        }
227
0
        else {
228
0
            i = arglen - 1;
229
0
            j = resultlen - 1;
230
0
            for (chunk = 0; chunk < chunks; chunk++) {
231
0
                for (k = 0; k < abs_bytes_per_sep; k++) {
232
0
                    c = argbuf[i--];
233
0
                    retbuf[j--] = Py_hexdigits[c & 0x0f];
234
0
                    retbuf[j--] = Py_hexdigits[c >> 4];
235
0
                }
236
0
                retbuf[j--] = sep_char;
237
0
            }
238
0
            while (i >= 0) {
239
0
                c = argbuf[i--];
240
0
                retbuf[j--] = Py_hexdigits[c & 0x0f];
241
0
                retbuf[j--] = Py_hexdigits[c >> 4];
242
0
            }
243
0
            assert(j == -1);
244
0
        }
245
0
    }
246
247
#ifdef Py_DEBUG
248
    if (!return_bytes) {
249
        assert(_PyUnicode_CheckConsistency(retval, 1));
250
    }
251
#endif
252
253
182
    return retval;
254
182
}
255
256
PyObject * _Py_strhex(const char* argbuf, Py_ssize_t arglen)
257
182
{
258
182
    return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
259
182
}
260
261
/* Same as above but returns a bytes() instead of str() to avoid the
262
 * need to decode the str() when bytes are needed. */
263
PyObject* _Py_strhex_bytes(const char* argbuf, Py_ssize_t arglen)
264
0
{
265
0
    return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
266
0
}
267
268
/* These variants include support for a separator between every N bytes: */
269
270
PyObject* _Py_strhex_with_sep(const char* argbuf, Py_ssize_t arglen,
271
                              PyObject* sep, Py_ssize_t bytes_per_group)
272
0
{
273
0
    return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
274
0
}
275
276
/* Same as above but returns a bytes() instead of str() to avoid the
277
 * need to decode the str() when bytes are needed. */
278
PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, Py_ssize_t arglen,
279
                                    PyObject* sep, Py_ssize_t bytes_per_group)
280
0
{
281
0
    return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
282
0
}