Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/pystrhex.c
Line
Count
Source
1
/* Format bytes as hexadecimal */
2
3
#include "Python.h"
4
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
5
#include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency()
6
7
/* Scalar hexlify: convert len bytes to 2*len hex characters.
8
   Uses table lookup via Py_hexdigits for the conversion. */
9
static inline void
10
_Py_hexlify_scalar(const unsigned char *src, Py_UCS1 *dst, Py_ssize_t len)
11
90
{
12
    /* Various optimizations like using math instead of a table lookup,
13
       manually unrolling the loop, storing the global table pointer locally,
14
       and doing wider dst writes have been tried and benchmarked; all produced
15
       nearly identical performance on gcc 15.  Using a 256 entry uint16_t
16
       table was a bit slower.  So we keep our old simple and obvious code. */
17
90
    for (Py_ssize_t i = 0; i < len; i++) {
18
0
        unsigned char c = src[i];
19
0
        *dst++ = Py_hexdigits[c >> 4];
20
0
        *dst++ = Py_hexdigits[c & 0x0f];
21
0
    }
22
90
}
23
24
/* Portable SIMD optimization for hexlify using GCC/Clang vector extensions.
25
   Uses __builtin_shufflevector for portable interleave that compiles to
26
   native SIMD instructions (SSE2 punpcklbw/punpckhbw on x86-64 [always],
27
   NEON zip1/zip2 on ARM64 [always], & vzip on ARM32 when compiler flags
28
   for the target microarch allow it [try -march=native if running 32-bit
29
   on an RPi3 or later]).
30
31
   Performance:
32
   - For more common small data it varies between 1.1-3x faster.
33
   - Up to 11x faster on larger data than the scalar code.
34
35
   While faster is possible for big data using AVX2 or AVX512, that
36
   adds a ton of complication. Who ever really hexes huge data?
37
   The 16-64 byte boosts align nicely with md5 - sha512 hexdigests.
38
*/
39
#ifdef HAVE_EFFICIENT_BUILTIN_SHUFFLEVECTOR
40
41
/* 128-bit vector of 16 unsigned bytes */
42
typedef unsigned char v16u8 __attribute__((vector_size(16)));
43
/* 128-bit vector of 16 signed bytes - for efficient comparison.
44
   Using signed comparison generates pcmpgtb on x86-64 instead of
45
   the slower psubusb+pcmpeqb sequence from unsigned comparison.
46
   ARM NEON performs the same either way. */
47
typedef signed char v16s8 __attribute__((vector_size(16)));
48
49
/* Splat a byte value across all 16 lanes */
50
static inline v16u8
51
v16u8_splat(unsigned char x)
52
270
{
53
270
    return (v16u8){x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
54
270
}
55
56
static inline v16s8
57
v16s8_splat(signed char x)
58
90
{
59
90
    return (v16s8){x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
60
90
}
61
62
/* Portable SIMD hexlify: converts 16 bytes to 32 hex chars per iteration.
63
   Compiles to native SSE2 on x86-64, NEON on ARM64 (and some ARM32). */
64
static void
65
_Py_hexlify_simd(const unsigned char *src, Py_UCS1 *dst, Py_ssize_t len)
66
90
{
67
90
    const v16u8 mask_0f = v16u8_splat(0x0f);
68
90
    const v16u8 ascii_0 = v16u8_splat('0');
69
90
    const v16u8 offset = v16u8_splat('a' - '0' - 10);  /* 0x27 */
70
90
    const v16s8 nine = v16s8_splat(9);
71
72
90
    Py_ssize_t i = 0;
73
74
    /* Process 16 bytes at a time */
75
360
    for (; i + 16 <= len; i += 16, dst += 32) {
76
        /* Load 16 bytes (memcpy for safe unaligned access) */
77
270
        v16u8 data;
78
270
        memcpy(&data, src + i, 16);
79
80
        /* Extract high and low nibbles using vector operators */
81
270
        v16u8 hi = (data >> 4) & mask_0f;
82
270
        v16u8 lo = data & mask_0f;
83
84
        /* Compare > 9 using signed comparison for efficient codegen.
85
           Nibble values 0-15 are safely in signed byte range.
86
           This generates pcmpgtb on x86-64, avoiding the slower
87
           psubusb+pcmpeqb sequence from unsigned comparison. */
88
270
        v16u8 hi_gt9 = (v16u8)((v16s8)hi > nine);
89
270
        v16u8 lo_gt9 = (v16u8)((v16s8)lo > nine);
90
91
        /* Convert nibbles to hex ASCII */
92
270
        hi = hi + ascii_0 + (hi_gt9 & offset);
93
270
        lo = lo + ascii_0 + (lo_gt9 & offset);
94
95
        /* Interleave hi/lo nibbles using portable shufflevector.
96
           This compiles to punpcklbw/punpckhbw on x86-64, zip1/zip2 on ARM64,
97
           or vzip on ARM32. */
98
270
        v16u8 result0 = __builtin_shufflevector(hi, lo,
99
270
            0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
100
270
        v16u8 result1 = __builtin_shufflevector(hi, lo,
101
270
            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
102
103
        /* Store 32 hex characters */
104
270
        memcpy(dst, &result0, 16);
105
270
        memcpy(dst + 16, &result1, 16);
106
270
    }
107
108
    /* Scalar fallback for remaining 0-15 bytes */
109
90
    _Py_hexlify_scalar(src + i, dst, len - i);
110
90
}
111
112
#endif /* HAVE_EFFICIENT_BUILTIN_SHUFFLEVECTOR */
113
114
static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
115
                                 PyObject* sep, int bytes_per_sep_group,
116
                                 const int return_bytes)
117
90
{
118
90
    assert(arglen >= 0);
119
120
90
    Py_UCS1 sep_char = 0;
121
90
    if (sep) {
122
0
        Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
123
0
        if (seplen < 0) {
124
0
            return NULL;
125
0
        }
126
0
        if (seplen != 1) {
127
0
            PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
128
0
            return NULL;
129
0
        }
130
0
        if (PyUnicode_Check(sep)) {
131
0
            if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
132
0
                PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
133
0
                return NULL;
134
0
            }
135
0
            sep_char = PyUnicode_READ_CHAR(sep, 0);
136
0
        }
137
0
        else if (PyBytes_Check(sep)) {
138
0
            sep_char = PyBytes_AS_STRING(sep)[0];
139
0
        }
140
0
        else {
141
0
            PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
142
0
            return NULL;
143
0
        }
144
0
        if (sep_char > 127 && !return_bytes) {
145
0
            PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
146
0
            return NULL;
147
0
        }
148
0
    }
149
90
    else {
150
90
        bytes_per_sep_group = 0;
151
90
    }
152
90
    unsigned int abs_bytes_per_sep = _Py_ABS_CAST(unsigned int, bytes_per_sep_group);
153
90
    Py_ssize_t resultlen = 0;
154
90
    if (bytes_per_sep_group && arglen > 0) {
155
        /* How many sep characters we'll be inserting. */
156
0
        resultlen = (arglen - 1) / abs_bytes_per_sep;
157
0
    }
158
    /* Bounds checking for our Py_ssize_t indices. */
159
90
    if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
160
0
        return PyErr_NoMemory();
161
0
    }
162
90
    resultlen += arglen * 2;
163
164
90
    if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
165
0
        bytes_per_sep_group = 0;
166
0
        abs_bytes_per_sep = 0;
167
0
    }
168
169
90
    PyObject *retval;
170
90
    Py_UCS1 *retbuf;
171
90
    if (return_bytes) {
172
        /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
173
0
        retval = PyBytes_FromStringAndSize(NULL, resultlen);
174
0
        if (!retval) {
175
0
            return NULL;
176
0
        }
177
0
        retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
178
0
    }
179
90
    else {
180
90
        retval = PyUnicode_New(resultlen, 127);
181
90
        if (!retval) {
182
0
            return NULL;
183
0
        }
184
90
        retbuf = PyUnicode_1BYTE_DATA(retval);
185
90
    }
186
187
    /* Hexlify */
188
90
    Py_ssize_t i, j;
189
90
    unsigned char c;
190
191
90
    if (bytes_per_sep_group == 0) {
192
90
#ifdef HAVE_EFFICIENT_BUILTIN_SHUFFLEVECTOR
193
90
        if (arglen >= 16) {
194
90
            _Py_hexlify_simd((const unsigned char *)argbuf, retbuf, arglen);
195
90
        }
196
0
        else
197
0
#endif
198
0
        {
199
0
            _Py_hexlify_scalar((const unsigned char *)argbuf, retbuf, arglen);
200
0
        }
201
90
    }
202
0
    else {
203
        /* The number of complete chunk+sep periods */
204
0
        Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
205
0
        Py_ssize_t chunk;
206
0
        unsigned int k;
207
208
0
        if (bytes_per_sep_group < 0) {
209
0
            i = j = 0;
210
0
            for (chunk = 0; chunk < chunks; chunk++) {
211
0
                for (k = 0; k < abs_bytes_per_sep; k++) {
212
0
                    c = argbuf[i++];
213
0
                    retbuf[j++] = Py_hexdigits[c >> 4];
214
0
                    retbuf[j++] = Py_hexdigits[c & 0x0f];
215
0
                }
216
0
                retbuf[j++] = sep_char;
217
0
            }
218
0
            while (i < arglen) {
219
0
                c = argbuf[i++];
220
0
                retbuf[j++] = Py_hexdigits[c >> 4];
221
0
                retbuf[j++] = Py_hexdigits[c & 0x0f];
222
0
            }
223
0
            assert(j == resultlen);
224
0
        }
225
0
        else {
226
0
            i = arglen - 1;
227
0
            j = resultlen - 1;
228
0
            for (chunk = 0; chunk < chunks; chunk++) {
229
0
                for (k = 0; k < abs_bytes_per_sep; k++) {
230
0
                    c = argbuf[i--];
231
0
                    retbuf[j--] = Py_hexdigits[c & 0x0f];
232
0
                    retbuf[j--] = Py_hexdigits[c >> 4];
233
0
                }
234
0
                retbuf[j--] = sep_char;
235
0
            }
236
0
            while (i >= 0) {
237
0
                c = argbuf[i--];
238
0
                retbuf[j--] = Py_hexdigits[c & 0x0f];
239
0
                retbuf[j--] = Py_hexdigits[c >> 4];
240
0
            }
241
0
            assert(j == -1);
242
0
        }
243
0
    }
244
245
#ifdef Py_DEBUG
246
    if (!return_bytes) {
247
        assert(_PyUnicode_CheckConsistency(retval, 1));
248
    }
249
#endif
250
251
90
    return retval;
252
90
}
253
254
PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
255
90
{
256
90
    return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
257
90
}
258
259
/* Same as above but returns a bytes() instead of str() to avoid the
260
 * need to decode the str() when bytes are needed. */
261
PyObject* _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
262
0
{
263
0
    return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
264
0
}
265
266
/* These variants include support for a separator between every N bytes: */
267
268
PyObject* _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen,
269
                              PyObject* sep, const int bytes_per_group)
270
0
{
271
0
    return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
272
0
}
273
274
/* Same as above but returns a bytes() instead of str() to avoid the
275
 * need to decode the str() when bytes are needed. */
276
PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen,
277
                                    PyObject* sep, const int bytes_per_group)
278
0
{
279
0
    return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
280
0
}