/src/cpython/Python/pystrhex.c
Line | Count | Source |
1 | | /* Format bytes as hexadecimal */ |
2 | | |
3 | | #include "Python.h" |
4 | | #include "pycore_strhex.h" // _Py_strhex_with_sep() |
5 | | #include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency() |
6 | | |
7 | | static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, |
8 | | PyObject* sep, int bytes_per_sep_group, |
9 | | const int return_bytes) |
10 | 0 | { |
11 | 0 | assert(arglen >= 0); |
12 | |
|
13 | 0 | Py_UCS1 sep_char = 0; |
14 | 0 | if (sep) { |
15 | 0 | Py_ssize_t seplen = PyObject_Length((PyObject*)sep); |
16 | 0 | if (seplen < 0) { |
17 | 0 | return NULL; |
18 | 0 | } |
19 | 0 | if (seplen != 1) { |
20 | 0 | PyErr_SetString(PyExc_ValueError, "sep must be length 1."); |
21 | 0 | return NULL; |
22 | 0 | } |
23 | 0 | if (PyUnicode_Check(sep)) { |
24 | 0 | if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) { |
25 | 0 | PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); |
26 | 0 | return NULL; |
27 | 0 | } |
28 | 0 | sep_char = PyUnicode_READ_CHAR(sep, 0); |
29 | 0 | } |
30 | 0 | else if (PyBytes_Check(sep)) { |
31 | 0 | sep_char = PyBytes_AS_STRING(sep)[0]; |
32 | 0 | } |
33 | 0 | else { |
34 | 0 | PyErr_SetString(PyExc_TypeError, "sep must be str or bytes."); |
35 | 0 | return NULL; |
36 | 0 | } |
37 | 0 | if (sep_char > 127 && !return_bytes) { |
38 | 0 | PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); |
39 | 0 | return NULL; |
40 | 0 | } |
41 | 0 | } |
42 | 0 | else { |
43 | 0 | bytes_per_sep_group = 0; |
44 | 0 | } |
45 | 0 | unsigned int abs_bytes_per_sep = _Py_ABS_CAST(unsigned int, bytes_per_sep_group); |
46 | 0 | Py_ssize_t resultlen = 0; |
47 | 0 | if (bytes_per_sep_group && arglen > 0) { |
48 | | /* How many sep characters we'll be inserting. */ |
49 | 0 | resultlen = (arglen - 1) / abs_bytes_per_sep; |
50 | 0 | } |
51 | | /* Bounds checking for our Py_ssize_t indices. */ |
52 | 0 | if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) { |
53 | 0 | return PyErr_NoMemory(); |
54 | 0 | } |
55 | 0 | resultlen += arglen * 2; |
56 | |
|
57 | 0 | if ((size_t)abs_bytes_per_sep >= (size_t)arglen) { |
58 | 0 | bytes_per_sep_group = 0; |
59 | 0 | abs_bytes_per_sep = 0; |
60 | 0 | } |
61 | |
|
62 | 0 | PyObject *retval; |
63 | 0 | Py_UCS1 *retbuf; |
64 | 0 | if (return_bytes) { |
65 | | /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */ |
66 | 0 | retval = PyBytes_FromStringAndSize(NULL, resultlen); |
67 | 0 | if (!retval) { |
68 | 0 | return NULL; |
69 | 0 | } |
70 | 0 | retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval); |
71 | 0 | } |
72 | 0 | else { |
73 | 0 | retval = PyUnicode_New(resultlen, 127); |
74 | 0 | if (!retval) { |
75 | 0 | return NULL; |
76 | 0 | } |
77 | 0 | retbuf = PyUnicode_1BYTE_DATA(retval); |
78 | 0 | } |
79 | | |
80 | | /* Hexlify */ |
81 | 0 | Py_ssize_t i, j; |
82 | 0 | unsigned char c; |
83 | |
|
84 | 0 | if (bytes_per_sep_group == 0) { |
85 | 0 | for (i = j = 0; i < arglen; ++i) { |
86 | 0 | assert((j + 1) < resultlen); |
87 | 0 | c = argbuf[i]; |
88 | 0 | retbuf[j++] = Py_hexdigits[c >> 4]; |
89 | 0 | retbuf[j++] = Py_hexdigits[c & 0x0f]; |
90 | 0 | } |
91 | 0 | assert(j == resultlen); |
92 | 0 | } |
93 | 0 | else { |
94 | | /* The number of complete chunk+sep periods */ |
95 | 0 | Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep; |
96 | 0 | Py_ssize_t chunk; |
97 | 0 | unsigned int k; |
98 | |
|
99 | 0 | if (bytes_per_sep_group < 0) { |
100 | 0 | i = j = 0; |
101 | 0 | for (chunk = 0; chunk < chunks; chunk++) { |
102 | 0 | for (k = 0; k < abs_bytes_per_sep; k++) { |
103 | 0 | c = argbuf[i++]; |
104 | 0 | retbuf[j++] = Py_hexdigits[c >> 4]; |
105 | 0 | retbuf[j++] = Py_hexdigits[c & 0x0f]; |
106 | 0 | } |
107 | 0 | retbuf[j++] = sep_char; |
108 | 0 | } |
109 | 0 | while (i < arglen) { |
110 | 0 | c = argbuf[i++]; |
111 | 0 | retbuf[j++] = Py_hexdigits[c >> 4]; |
112 | 0 | retbuf[j++] = Py_hexdigits[c & 0x0f]; |
113 | 0 | } |
114 | 0 | assert(j == resultlen); |
115 | 0 | } |
116 | 0 | else { |
117 | 0 | i = arglen - 1; |
118 | 0 | j = resultlen - 1; |
119 | 0 | for (chunk = 0; chunk < chunks; chunk++) { |
120 | 0 | for (k = 0; k < abs_bytes_per_sep; k++) { |
121 | 0 | c = argbuf[i--]; |
122 | 0 | retbuf[j--] = Py_hexdigits[c & 0x0f]; |
123 | 0 | retbuf[j--] = Py_hexdigits[c >> 4]; |
124 | 0 | } |
125 | 0 | retbuf[j--] = sep_char; |
126 | 0 | } |
127 | 0 | while (i >= 0) { |
128 | 0 | c = argbuf[i--]; |
129 | 0 | retbuf[j--] = Py_hexdigits[c & 0x0f]; |
130 | 0 | retbuf[j--] = Py_hexdigits[c >> 4]; |
131 | 0 | } |
132 | 0 | assert(j == -1); |
133 | 0 | } |
134 | 0 | } |
135 | |
|
136 | | #ifdef Py_DEBUG |
137 | | if (!return_bytes) { |
138 | | assert(_PyUnicode_CheckConsistency(retval, 1)); |
139 | | } |
140 | | #endif |
141 | |
|
142 | 0 | return retval; |
143 | 0 | } |
144 | | |
145 | | PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen) |
146 | 0 | { |
147 | 0 | return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0); |
148 | 0 | } |
149 | | |
150 | | /* Same as above but returns a bytes() instead of str() to avoid the |
151 | | * need to decode the str() when bytes are needed. */ |
152 | | PyObject* _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen) |
153 | 0 | { |
154 | 0 | return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1); |
155 | 0 | } |
156 | | |
157 | | /* These variants include support for a separator between every N bytes: */ |
158 | | |
159 | | PyObject* _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, |
160 | | PyObject* sep, const int bytes_per_group) |
161 | 0 | { |
162 | 0 | return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0); |
163 | 0 | } |
164 | | |
165 | | /* Same as above but returns a bytes() instead of str() to avoid the |
166 | | * need to decode the str() when bytes are needed. */ |
167 | | PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, |
168 | | PyObject* sep, const int bytes_per_group) |
169 | 0 | { |
170 | 0 | return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1); |
171 | 0 | } |