/src/lighttpd1.4/src/buffer.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "first.h" |
2 | | |
3 | | #include "buffer.h" |
4 | | |
5 | | #include <stdlib.h> |
6 | | #include <string.h> |
7 | | #include "sys-time.h" /* strftime() */ |
8 | | |
9 | | static const char hex_chars_lc[] = "0123456789abcdef"; |
10 | | static const char hex_chars_uc[] = "0123456789ABCDEF"; |
11 | | |
12 | | |
13 | | __attribute_noinline__ |
14 | 3.40k | buffer* buffer_init(void) { |
15 | | #if 0 /* buffer_init() and chunk_init() can be hot, |
16 | | * so avoid the additional hop of indirection */ |
17 | | return ck_calloc(1, sizeof(buffer)); |
18 | | #else |
19 | 3.40k | buffer * const b = calloc(1, sizeof(*b)); |
20 | 3.40k | force_assert(b); |
21 | 3.40k | return b; |
22 | 3.40k | #endif |
23 | 3.40k | } |
24 | | |
25 | 3.40k | void buffer_free(buffer *b) { |
26 | 3.40k | if (NULL == b) return; |
27 | 3.40k | free(b->ptr); |
28 | 3.40k | free(b); |
29 | 3.40k | } |
30 | | |
31 | 0 | void buffer_free_ptr(buffer *b) { |
32 | 0 | free(b->ptr); |
33 | 0 | b->ptr = NULL; |
34 | 0 | b->used = 0; |
35 | 0 | b->size = 0; |
36 | 0 | } |
37 | | |
38 | 0 | void buffer_move(buffer * restrict b, buffer * restrict src) { |
39 | 0 | buffer tmp; |
40 | 0 | buffer_clear(b); |
41 | 0 | tmp = *src; *src = *b; *b = tmp; |
42 | 0 | } |
43 | | |
44 | | /* make sure buffer is at least "size" big + 1 for '\0'. keep old data */ |
45 | | __attribute_cold__ |
46 | | __attribute_noinline__ |
47 | | __attribute_nonnull__() |
48 | | __attribute_returns_nonnull__ |
49 | 3.53k | static char* buffer_realloc(buffer * const restrict b, const size_t len) { |
50 | 7.06k | #define BUFFER_PIECE_SIZE 64uL /*(must be power-of-2)*/ |
51 | 3.53k | size_t sz = (len + 1 + BUFFER_PIECE_SIZE-1) & ~(BUFFER_PIECE_SIZE-1); |
52 | 3.53k | force_assert(sz > len); |
53 | 3.53k | if ((sz & (sz-1)) && sz < INT_MAX) {/* not power-2; huge val not expected */ |
54 | | /*(optimizer should recognize this and use ffs or clz or equivalent)*/ |
55 | 1.25k | const size_t psz = sz; |
56 | 8.62k | for (sz = 256; sz < psz; sz <<= 1) ; |
57 | 1.25k | } |
58 | 3.53k | sz |= 1; /*(extra +1 for '\0' when needed buffer size is exact power-2)*/ |
59 | | |
60 | 3.53k | b->size = sz; |
61 | 3.53k | b->ptr = realloc(b->ptr, sz); |
62 | | |
63 | 3.53k | force_assert(NULL != b->ptr); |
64 | 3.53k | return b->ptr; |
65 | 3.53k | } |
66 | | |
67 | | __attribute_cold__ |
68 | | __attribute_noinline__ |
69 | | __attribute_nonnull__() |
70 | | __attribute_returns_nonnull__ |
71 | 3.53k | static char* buffer_alloc_replace(buffer * const restrict b, const size_t size) { |
72 | | /*(discard old data so realloc() does not copy)*/ |
73 | 3.53k | if (NULL != b->ptr) { |
74 | 505 | free(b->ptr); |
75 | 505 | b->ptr = NULL; |
76 | 505 | } |
77 | | /*(note: if size larger than one lshift, use size instead of power-2)*/ |
78 | 3.53k | const size_t bsize2x = (b->size & ~1uL) << 1; |
79 | 3.53k | return buffer_realloc(b, bsize2x > size ? bsize2x-1 : size); |
80 | 3.53k | } |
81 | | |
82 | 1.32k | char* buffer_string_prepare_copy(buffer * const b, const size_t size) { |
83 | 1.32k | b->used = 0; |
84 | | #ifdef __COVERITY__ /*(b->ptr is not NULL if b->size is not 0)*/ |
85 | | force_assert(size >= b->size || b->ptr); |
86 | | #endif |
87 | 1.32k | return (size < b->size) |
88 | 1.32k | ? b->ptr |
89 | 1.32k | : buffer_alloc_replace(b, size); |
90 | 1.32k | } |
91 | | |
92 | | __attribute_cold__ |
93 | | __attribute_noinline__ |
94 | | __attribute_nonnull__() |
95 | | __attribute_returns_nonnull__ |
96 | 0 | static char* buffer_string_prepare_append_resize(buffer * const restrict b, const size_t size) { |
97 | 0 | if (b->used < 2) { /* buffer_is_blank(b) */ |
98 | 0 | char * const s = buffer_string_prepare_copy(b, size); |
99 | 0 | *s = '\0'; /*(for case (1 == b->used))*/ |
100 | 0 | return s; |
101 | 0 | } |
102 | | |
103 | | /* not empty, b->used already includes a terminating 0 */ |
104 | | /*(note: if size larger than one lshift, use size instead of power-2)*/ |
105 | 0 | const size_t bsize2x = (b->size & ~1uL) << 1; |
106 | 0 | const size_t req_size = (bsize2x - b->used > size) |
107 | 0 | ? bsize2x-1 |
108 | 0 | : b->used + size; |
109 | | |
110 | | /* check for overflow: unsigned overflow is defined to wrap around */ |
111 | 0 | force_assert(req_size >= b->used); |
112 | | |
113 | 0 | return buffer_realloc(b, req_size) + b->used - 1; |
114 | 0 | } |
115 | | |
116 | 0 | char* buffer_string_prepare_append(buffer * const b, const size_t size) { |
117 | 0 | const uint32_t len = b->used ? b->used-1 : 0; |
118 | 0 | return (b->size - len >= size + 1) |
119 | 0 | ? b->ptr + len |
120 | 0 | : buffer_string_prepare_append_resize(b, size); |
121 | 0 | } |
122 | | |
123 | | /*(prefer smaller code than inlining buffer_extend in many places in buffer.c)*/ |
124 | | __attribute_noinline__ |
125 | | char* |
126 | | buffer_extend (buffer * const b, const size_t x) |
127 | 132 | { |
128 | | /* extend buffer to append x (reallocate by power-2 (or larger), if needed) |
129 | | * (combine buffer_string_prepare_append() and buffer_commit()) |
130 | | * (future: might make buffer.h static inline func for HTTP/1.1 performance) |
131 | | * pre-sets '\0' byte and b->used (unlike buffer_string_prepare_append())*/ |
132 | | #if 0 |
133 | | char * const s = buffer_string_prepare_append(b, x); |
134 | | b->used += x + (0 == b->used); |
135 | | #else |
136 | 132 | const uint32_t len = b->used ? b->used-1 : 0; |
137 | 132 | char * const s = (b->size - len >= x + 1) |
138 | 132 | ? b->ptr + len |
139 | 132 | : buffer_string_prepare_append_resize(b, x); |
140 | 132 | b->used = len+x+1; |
141 | 132 | #endif |
142 | 132 | s[x] = '\0'; |
143 | 132 | return s; |
144 | 132 | } |
145 | | |
146 | | void buffer_commit(buffer *b, size_t size) |
147 | 0 | { |
148 | 0 | size_t sz = b->used; |
149 | 0 | if (0 == sz) sz = 1; |
150 | |
|
151 | 0 | #if __has_builtin(__builtin_add_overflow) |
152 | 0 | if (__builtin_add_overflow(size, sz, &sz)) |
153 | 0 | ck_assert_failed(__FILE__, __LINE__, "add overflow"); |
154 | | #else |
155 | | if (size > 0) { |
156 | | /* check for overflow: unsigned overflow is defined to wrap around */ |
157 | | sz += size; |
158 | | force_assert(sz > size); |
159 | | } |
160 | | #endif |
161 | | |
162 | 0 | b->used = sz; |
163 | 0 | b->ptr[sz - 1] = '\0'; |
164 | 0 | } |
165 | | |
166 | | __attribute_cold__ /*(reduce code size due to inlining)*/ |
167 | 0 | void buffer_copy_string(buffer * restrict b, const char * restrict s) { |
168 | 0 | if (__builtin_expect( (NULL == s), 0)) s = ""; |
169 | 0 | buffer_copy_string_len(b, s, strlen(s)); |
170 | 0 | } |
171 | | |
172 | 3.15k | void buffer_copy_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) { |
173 | 3.15k | b->used = len + 1; |
174 | 3.15k | char * const restrict d = (len < b->size) |
175 | 3.15k | ? b->ptr |
176 | 3.15k | : buffer_alloc_replace(b, len); |
177 | 3.15k | d[len] = '\0'; |
178 | 3.15k | memcpy(d, s, len); |
179 | 3.15k | } |
180 | | |
181 | | __attribute_cold__ /*(reduce code size due to inlining)*/ |
182 | 0 | void buffer_append_string(buffer * restrict b, const char * restrict s) { |
183 | 0 | if (__builtin_expect( (NULL == s), 0)) s = ""; |
184 | 0 | buffer_append_string_len(b, s, strlen(s)); |
185 | 0 | } |
186 | | |
187 | | /** |
188 | | * append a string to the end of the buffer |
189 | | * |
190 | | * the resulting buffer is terminated with a '\0' |
191 | | * s is treated as a un-terminated string (a \0 is handled a normal character) |
192 | | * |
193 | | * @param b a buffer |
194 | | * @param s the string |
195 | | * @param len size of the string (without the terminating \0) |
196 | | */ |
197 | | |
198 | 132 | void buffer_append_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) { |
199 | 132 | memcpy(buffer_extend(b, len), s, len); |
200 | 132 | } |
201 | | |
202 | 0 | void buffer_append_str2(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2) { |
203 | 0 | char * const restrict s = buffer_extend(b, len1+len2); |
204 | 0 | #ifdef HAVE_MEMPCPY |
205 | 0 | mempcpy(mempcpy(s, s1, len1), s2, len2); |
206 | | #else |
207 | | memcpy(s, s1, len1); |
208 | | memcpy(s+len1, s2, len2); |
209 | | #endif |
210 | 0 | } |
211 | | |
212 | 0 | void buffer_append_str3(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2, const char * const s3, const size_t len3) { |
213 | 0 | char * restrict s = buffer_extend(b, len1+len2+len3); |
214 | 0 | #ifdef HAVE_MEMPCPY |
215 | 0 | mempcpy(mempcpy(mempcpy(s, s1, len1), s2, len2), s3, len3); |
216 | | #else |
217 | | memcpy(s, s1, len1); |
218 | | memcpy((s+=len1), s2, len2); |
219 | | memcpy((s+=len2), s3, len3); |
220 | | #endif |
221 | 0 | } |
222 | | |
223 | 0 | void buffer_append_iovec(buffer * const restrict b, const struct const_iovec * const iov, const size_t n) { |
224 | 0 | size_t len = 0; |
225 | 0 | for (size_t i = 0; i < n; ++i) |
226 | 0 | len += iov[i].iov_len; |
227 | 0 | char *s = buffer_extend(b, len); |
228 | 0 | for (size_t i = 0; i < n; ++i) { |
229 | 0 | if (0 == iov[i].iov_len) continue; |
230 | 0 | #ifdef HAVE_MEMPCPY |
231 | 0 | s = mempcpy(s, iov[i].iov_base, iov[i].iov_len); |
232 | | #else |
233 | | memcpy(s, iov[i].iov_base, iov[i].iov_len); |
234 | | s += iov[i].iov_len; |
235 | | #endif |
236 | 0 | } |
237 | 0 | } |
238 | | |
239 | 0 | void buffer_append_path_len(buffer * restrict b, const char * restrict a, size_t alen) { |
240 | 0 | char * restrict s = buffer_string_prepare_append(b, alen+1); |
241 | | #ifdef _WIN32 |
242 | | const int aslash = (alen && (a[0] == '/' || a[0] == '\\')); |
243 | | if (b->used > 1 && (s[-1] == '/' || s[-1] == '\\')) |
244 | | #else |
245 | 0 | const int aslash = (alen && a[0] == '/'); |
246 | 0 | if (b->used > 1 && s[-1] == '/') |
247 | 0 | #endif |
248 | 0 | { |
249 | 0 | if (aslash) { |
250 | 0 | ++a; |
251 | 0 | --alen; |
252 | 0 | } |
253 | 0 | } |
254 | 0 | else { |
255 | 0 | if (0 == b->used) b->used = 1; |
256 | 0 | if (!aslash) { |
257 | 0 | *s++ = '/'; |
258 | 0 | ++b->used; |
259 | 0 | } |
260 | 0 | } |
261 | 0 | b->used += alen; |
262 | 0 | s[alen] = '\0'; |
263 | 0 | memcpy(s, a, alen); |
264 | 0 | } |
265 | | |
266 | | void |
267 | | buffer_copy_path_len2 (buffer * const restrict b, const char * const restrict s1, size_t len1, const char * const restrict s2, size_t len2) |
268 | 0 | { |
269 | | /*(similar to buffer_copy_string_len(b, s1, len1) but combined allocation)*/ |
270 | 0 | memcpy(buffer_string_prepare_copy(b, len1+len2+1), s1, len1); |
271 | 0 | b->used = len1 + 1; /*('\0' byte will be written below)*/ |
272 | |
|
273 | 0 | buffer_append_path_len(b, s2, len2);/*(choice: not inlined, special-cased)*/ |
274 | 0 | } |
275 | | |
276 | | void |
277 | | buffer_copy_string_len_lc (buffer * const restrict b, const char * const restrict s, const size_t len) |
278 | 0 | { |
279 | 0 | char * const restrict d = buffer_string_prepare_copy(b, len); |
280 | 0 | b->used = len+1; |
281 | 0 | d[len] = '\0'; |
282 | 0 | for (size_t i = 0; i < len; ++i) |
283 | 0 | d[i] = (!light_isupper(s[i])) ? s[i] : s[i] | 0x20; |
284 | 0 | } |
285 | | |
286 | 0 | void buffer_append_uint_hex_lc(buffer *b, uintmax_t value) { |
287 | 0 | char *buf; |
288 | 0 | unsigned int shift = 0; |
289 | |
|
290 | 0 | { |
291 | 0 | uintmax_t copy = value; |
292 | 0 | do { |
293 | 0 | copy >>= 8; |
294 | 0 | shift += 8; /* counting bits */ |
295 | 0 | } while (0 != copy); |
296 | 0 | } |
297 | |
|
298 | 0 | buf = buffer_extend(b, shift >> 2); /*nibbles (4 bits)*/ |
299 | |
|
300 | 0 | while (shift > 0) { |
301 | 0 | shift -= 4; |
302 | 0 | *(buf++) = hex_chars_lc[(value >> shift) & 0x0F]; |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | | __attribute_nonnull__() |
307 | | __attribute_returns_nonnull__ |
308 | 0 | static char* utostr(char buf[LI_ITOSTRING_LENGTH], uintmax_t val) { |
309 | 0 | char *cur = buf+LI_ITOSTRING_LENGTH; |
310 | 0 | uintmax_t x; |
311 | 0 | do { |
312 | 0 | *(--cur) = (char) ('0' + (int)(val - (x = val/10) * 10)); |
313 | 0 | } while (0 != (val = x)); /* val % 10 */ |
314 | 0 | return cur; |
315 | 0 | } |
316 | | |
317 | | __attribute_nonnull__() |
318 | | __attribute_returns_nonnull__ |
319 | 0 | static char* itostr(char buf[LI_ITOSTRING_LENGTH], intmax_t val) { |
320 | | /* absolute value not defined for INTMAX_MIN, but can take absolute |
321 | | * value of any negative number via twos complement cast to unsigned. |
322 | | * negative sign is prepended after (now unsigned) value is converted |
323 | | * to string */ |
324 | 0 | uintmax_t uval = val >= 0 ? (uintmax_t)val : ((uintmax_t)~val) + 1; |
325 | 0 | char *cur = utostr(buf, uval); |
326 | 0 | if (val < 0) *(--cur) = '-'; |
327 | |
|
328 | 0 | return cur; |
329 | 0 | } |
330 | | |
331 | 0 | void buffer_append_int(buffer *b, intmax_t val) { |
332 | 0 | char buf[LI_ITOSTRING_LENGTH]; |
333 | 0 | const char * const str = itostr(buf, val); |
334 | 0 | buffer_append_string_len(b, str, buf+sizeof(buf) - str); |
335 | 0 | } |
336 | | |
337 | 0 | void buffer_append_strftime(buffer * const restrict b, const char * const restrict format, const struct tm * const restrict tm) { |
338 | | /*(localtime_r() or gmtime_r() producing tm should not have failed)*/ |
339 | 0 | if (__builtin_expect( (NULL == tm), 0)) return; |
340 | | |
341 | | /*(expecting typical format strings to result in < 64 bytes needed; |
342 | | * skipping buffer_string_space() calculation and providing fixed size)*/ |
343 | 0 | size_t rv = strftime(buffer_string_prepare_append(b, 63), 64, format, tm); |
344 | | |
345 | | /* 0 (in some apis) signals the string may have been too small; |
346 | | * but the format could also just have lead to an empty string */ |
347 | 0 | if (__builtin_expect( (0 == rv), 0) || __builtin_expect( (rv > 63), 0)) { |
348 | | /* unexpected; give it a second try with a larger string */ |
349 | 0 | rv = strftime(buffer_string_prepare_append(b, 4095), 4096, format, tm); |
350 | 0 | if (__builtin_expect( (rv > 4095), 0))/*(input format was ridiculous)*/ |
351 | 0 | return; |
352 | 0 | } |
353 | | |
354 | | /*buffer_commit(b, rv);*/ |
355 | 0 | b->used += (uint32_t)rv + (0 == b->used); |
356 | 0 | } |
357 | | |
358 | | |
359 | 0 | size_t li_itostrn(char *buf, size_t buf_len, intmax_t val) { |
360 | 0 | char p_buf[LI_ITOSTRING_LENGTH]; |
361 | 0 | char* const str = itostr(p_buf, val); |
362 | 0 | size_t len = (size_t)(p_buf+sizeof(p_buf)-str); |
363 | 0 | force_assert(len <= buf_len); |
364 | 0 | memcpy(buf, str, len); |
365 | 0 | return len; |
366 | 0 | } |
367 | | |
368 | 0 | size_t li_utostrn(char *buf, size_t buf_len, uintmax_t val) { |
369 | 0 | char p_buf[LI_ITOSTRING_LENGTH]; |
370 | 0 | char* const str = utostr(p_buf, val); |
371 | 0 | size_t len = (size_t)(p_buf+sizeof(p_buf)-str); |
372 | 0 | force_assert(len <= buf_len); |
373 | 0 | memcpy(buf, str, len); |
374 | 0 | return len; |
375 | 0 | } |
376 | | |
377 | | #define li_ntox_lc(n) ((n) <= 9 ? (n) + '0' : (n) + 'a' - 10) |
378 | | |
379 | | /* c (char) and n (nibble) MUST be unsigned integer types */ |
380 | | #define li_cton(c,n) \ |
381 | 93.2M | (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0)) |
382 | | |
383 | | /* converts hex char (0-9, A-Z, a-z) to decimal. |
384 | | * returns 0xFF on invalid input. |
385 | | */ |
386 | 93.2M | char hex2int(unsigned char hex) { |
387 | 93.2M | unsigned char n; |
388 | 93.2M | return li_cton(hex,n) ? (char)n : 0xFF; |
389 | 93.2M | } |
390 | | |
391 | | int li_hex2bin (unsigned char * const bin, const size_t binlen, const char * const hexstr, const size_t len) |
392 | 0 | { |
393 | | /* validate and transform 32-byte MD5 hex string to 16-byte binary MD5, |
394 | | * or 64-byte SHA-256 or SHA-512-256 hex string to 32-byte binary digest */ |
395 | 0 | if (len > (binlen << 1)) return -1; |
396 | 0 | for (int i = 0, ilen = (int)len; i < ilen; i+=2) { |
397 | 0 | int hi = hexstr[i]; |
398 | 0 | int lo = hexstr[i+1]; |
399 | 0 | if ('0' <= hi && hi <= '9') hi -= '0'; |
400 | 0 | else if ((uint32_t)(hi |= 0x20)-'a' <= 'f'-'a')hi += -'a' + 10; |
401 | 0 | else return -1; |
402 | 0 | if ('0' <= lo && lo <= '9') lo -= '0'; |
403 | 0 | else if ((uint32_t)(lo |= 0x20)-'a' <= 'f'-'a')lo += -'a' + 10; |
404 | 0 | else return -1; |
405 | 0 | bin[(i >> 1)] = (unsigned char)((hi << 4) | lo); |
406 | 0 | } |
407 | 0 | return 0; |
408 | 0 | } |
409 | | |
410 | | |
411 | | __attribute_noinline__ |
412 | 0 | int buffer_eq_icase_ssn(const char * const a, const char * const b, const size_t len) { |
413 | 0 | for (size_t i = 0; i < len; ++i) { |
414 | 0 | unsigned int ca = ((unsigned char *)a)[i]; |
415 | 0 | unsigned int cb = ((unsigned char *)b)[i]; |
416 | 0 | if (ca != cb && ((ca ^ cb) != 0x20 || !light_isalpha(ca))) return 0; |
417 | 0 | } |
418 | 0 | return 1; |
419 | 0 | } |
420 | | |
421 | 0 | int buffer_eq_icase_ss(const char * const a, const size_t alen, const char * const b, const size_t blen) { |
422 | | /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */ |
423 | 0 | return (alen == blen) ? buffer_eq_icase_ssn(a, b, blen) : 0; |
424 | 0 | } |
425 | | |
426 | 0 | int buffer_eq_icase_slen(const buffer * const b, const char * const s, const size_t slen) { |
427 | | /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/ |
428 | | /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */ |
429 | 0 | return (b->used == slen + 1) ? buffer_eq_icase_ssn(b->ptr, s, slen) : 0; |
430 | 0 | } |
431 | | |
432 | 0 | int buffer_eq_slen(const buffer * const b, const char * const s, const size_t slen) { |
433 | | /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/ |
434 | | /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */ |
435 | 0 | return (b->used == slen + 1 && 0 == memcmp(b->ptr, s, slen)); |
436 | 0 | } |
437 | | |
438 | | |
439 | | /** |
440 | | * check if two buffer contain the same data |
441 | | */ |
442 | | |
443 | 0 | int buffer_is_equal(const buffer *a, const buffer *b) { |
444 | | /* 1 = equal; 0 = not equal */ |
445 | 0 | return (a->used == b->used && 0 == memcmp(a->ptr, b->ptr, a->used)); |
446 | 0 | } |
447 | | |
448 | | |
449 | 0 | void li_tohex_lc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) { |
450 | 0 | force_assert(s_len <= (buf_len >> 1)); |
451 | | |
452 | 0 | for (size_t i = 0; i < s_len; ++i) { |
453 | 0 | buf[2*i] = hex_chars_lc[(s[i] >> 4) & 0x0F]; |
454 | 0 | buf[2*i+1] = hex_chars_lc[s[i] & 0x0F]; |
455 | 0 | } |
456 | 0 | } |
457 | | |
458 | 0 | void li_tohex_uc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) { |
459 | 0 | force_assert(s_len <= (buf_len >> 1)); |
460 | | |
461 | 0 | for (size_t i = 0; i < s_len; ++i) { |
462 | 0 | buf[2*i] = hex_chars_uc[(s[i] >> 4) & 0x0F]; |
463 | 0 | buf[2*i+1] = hex_chars_uc[s[i] & 0x0F]; |
464 | 0 | } |
465 | 0 | } |
466 | | |
467 | | |
468 | | void buffer_substr_replace (buffer * const restrict b, const size_t offset, |
469 | | const size_t len, const buffer * const restrict replace) |
470 | 0 | { |
471 | 0 | const size_t blen = buffer_clen(b); |
472 | 0 | const size_t rlen = buffer_clen(replace); |
473 | |
|
474 | 0 | if (rlen > len) { |
475 | 0 | buffer_extend(b, rlen-len); |
476 | 0 | memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len); |
477 | 0 | } |
478 | |
|
479 | 0 | memcpy(b->ptr+offset, replace->ptr, rlen); |
480 | |
|
481 | 0 | if (rlen < len) { |
482 | 0 | memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len); |
483 | 0 | buffer_truncate(b, blen-len+rlen); |
484 | 0 | } |
485 | 0 | } |
486 | | |
487 | | |
488 | 0 | void buffer_append_string_encoded_hex_lc(buffer * const restrict b, const char * const restrict s, size_t len) { |
489 | 0 | unsigned char * const p = (unsigned char *)buffer_extend(b, len*2); |
490 | 0 | for (size_t i = 0; i < len; ++i) { |
491 | 0 | p[(i<<1)] = hex_chars_lc[(s[i] >> 4) & 0x0F]; |
492 | 0 | p[(i<<1)+1] = hex_chars_lc[(s[i]) & 0x0F]; |
493 | 0 | } |
494 | 0 | } |
495 | | |
496 | 0 | void buffer_append_string_encoded_hex_uc(buffer * const restrict b, const char * const restrict s, size_t len) { |
497 | 0 | unsigned char * const p = (unsigned char *)buffer_extend(b, len*2); |
498 | 0 | for (size_t i = 0; i < len; ++i) { |
499 | 0 | p[(i<<1)] = hex_chars_uc[(s[i] >> 4) & 0x0F]; |
500 | 0 | p[(i<<1)+1] = hex_chars_uc[(s[i]) & 0x0F]; |
501 | 0 | } |
502 | 0 | } |
503 | | |
504 | | |
505 | | /* everything except: ! ( ) * - . 0-9 A-Z _ a-z */ |
506 | | static const char encoded_chars_rel_uri_part[] = { |
507 | | /* |
508 | | 0 1 2 3 4 5 6 7 8 9 A B C D E F |
509 | | */ |
510 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ |
511 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ |
512 | | 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, /* 20 - 2F space " # $ % & ' + , / */ |
513 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 30 - 3F : ; < = > ? */ |
514 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F @ */ |
515 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 50 - 5F [ \ ] ^ */ |
516 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ |
517 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 70 - 7F { | } DEL */ |
518 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ |
519 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ |
520 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ |
521 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ |
522 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ |
523 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ |
524 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ |
525 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ |
526 | | }; |
527 | | |
528 | | /* everything except: ! ( ) * - . / 0-9 A-Z _ a-z */ |
529 | | static const char encoded_chars_rel_uri[] = { |
530 | | /* |
531 | | 0 1 2 3 4 5 6 7 8 9 A B C D E F |
532 | | */ |
533 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ |
534 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ |
535 | | 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, /* 20 - 2F space " # $ % & ' + , */ |
536 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 30 - 3F : ; < = > ? */ |
537 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F @ */ |
538 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 50 - 5F [ \ ] ^ */ |
539 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ |
540 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 70 - 7F { | } DEL */ |
541 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ |
542 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ |
543 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ |
544 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ |
545 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ |
546 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ |
547 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ |
548 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ |
549 | | }; |
550 | | |
551 | | static const char encoded_chars_html[] = { |
552 | | /* |
553 | | 0 1 2 3 4 5 6 7 8 9 A B C D E F |
554 | | */ |
555 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ |
556 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ |
557 | | 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 2F " & ' */ |
558 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 30 - 3F < > */ |
559 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F */ |
560 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50 - 5F */ |
561 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ |
562 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 70 - 7F DEL */ |
563 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ |
564 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ |
565 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ |
566 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ |
567 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ |
568 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ |
569 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ |
570 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ |
571 | | }; |
572 | | |
573 | | static const char encoded_chars_minimal_xml[] = { |
574 | | /* |
575 | | 0 1 2 3 4 5 6 7 8 9 A B C D E F |
576 | | */ |
577 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ |
578 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ |
579 | | 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 2F " & ' */ |
580 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 30 - 3F < > */ |
581 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F */ |
582 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50 - 5F */ |
583 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ |
584 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 70 - 7F DEL */ |
585 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */ |
586 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */ |
587 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */ |
588 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */ |
589 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */ |
590 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */ |
591 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */ |
592 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */ |
593 | | }; |
594 | | |
595 | | static const char * const encoded_chars_maps[] = { |
596 | | encoded_chars_rel_uri, /* .[ENCODING_REL_URI] */ |
597 | | encoded_chars_rel_uri_part, /* .[ENCODING_REL_URI_PART] */ |
598 | | encoded_chars_html, /* .[ENCODING_HTML] */ |
599 | | encoded_chars_minimal_xml /* .[ENCODING_MINIMAL_XML] */ |
600 | | }; |
601 | | |
602 | | |
603 | 0 | void buffer_append_string_encoded(buffer * const restrict b, const char * const restrict s, size_t len, buffer_encoding_t encoding) { |
604 | 0 | if (__builtin_expect( (0 == len), 0)) return; |
605 | | |
606 | 0 | const unsigned char *ds; |
607 | 0 | const unsigned char * const end = (const unsigned char *)s + len; |
608 | 0 | size_t dlen = 0; |
609 | 0 | const char * const map = encoded_chars_maps[encoding]; |
610 | | |
611 | | /* count to-be-encoded-characters: +3 for REL_URI*; +6 for HTML/XML */ |
612 | 0 | ds = (const unsigned char *)s; |
613 | 0 | do { |
614 | 0 | dlen += !map[*ds] ? 1 : (encoding <= ENCODING_REL_URI_PART) ? 3 : 6; |
615 | 0 | } while (++ds < end); |
616 | |
|
617 | 0 | if (dlen == len) { /*(short-circuit; nothing to encode)*/ |
618 | 0 | buffer_append_string_len(b, s, len); |
619 | 0 | return; |
620 | 0 | } |
621 | | |
622 | 0 | unsigned char * restrict d = (unsigned char *)buffer_extend(b, dlen); |
623 | 0 | ds = (const unsigned char *)s; |
624 | 0 | do { |
625 | 0 | if (!map[*ds]) |
626 | 0 | *d++ = *ds; |
627 | 0 | else if (encoding <= ENCODING_REL_URI_PART) { |
628 | 0 | d[0] = '%'; |
629 | 0 | d[1] = hex_chars_uc[*ds >> 4]; |
630 | 0 | d[2] = hex_chars_uc[*ds & 0x0F]; |
631 | 0 | d += 3; |
632 | 0 | } |
633 | 0 | else { |
634 | 0 | d[0] = '&'; |
635 | 0 | d[1] = '#'; |
636 | 0 | d[2] = 'x'; |
637 | 0 | d[3] = hex_chars_uc[*ds >> 4]; |
638 | 0 | d[4] = hex_chars_uc[*ds & 0x0F]; |
639 | 0 | d[5] = ';'; |
640 | 0 | d += 6; |
641 | 0 | } |
642 | 0 | } while (++ds < end); |
643 | 0 | } |
644 | | |
645 | 0 | void buffer_append_string_c_escaped(buffer * const restrict b, const char * const restrict s, size_t len) { |
646 | 0 | if (__builtin_expect( (0 == len), 0)) return; |
647 | | |
648 | 0 | const unsigned char *ds; |
649 | 0 | const unsigned char * const end = (const unsigned char *)s + len; |
650 | 0 | size_t dlen = 0; |
651 | | |
652 | | /* count to-be-encoded-characters: +2 for \t \n \r; +4 for other encs */ |
653 | 0 | ds = (const unsigned char *)s; |
654 | 0 | do { |
655 | 0 | dlen += light_isprint(*ds) |
656 | 0 | ? 1 |
657 | 0 | : (*ds == '\t' || *ds == '\n' || *ds == '\r') ? 2 : 4; |
658 | 0 | } while (++ds < end); |
659 | |
|
660 | 0 | if (dlen == len) { /*(short-circuit; nothing to encode)*/ |
661 | 0 | buffer_append_string_len(b, s, len); |
662 | 0 | return; |
663 | 0 | } |
664 | | |
665 | 0 | unsigned char * restrict d = (unsigned char *)buffer_extend(b, dlen); |
666 | 0 | ds = (const unsigned char *)s; |
667 | 0 | do { |
668 | 0 | if (light_isprint(*ds)) |
669 | 0 | *d++ = *ds; |
670 | 0 | else { /* CTLs or non-ASCII characters */ |
671 | 0 | d[0] = '\\'; |
672 | 0 | switch (*ds) { |
673 | 0 | case '\t': case '\n': case '\r': |
674 | 0 | d[1] = "0000000abtnvfr"[*ds]; |
675 | 0 | d += 2; |
676 | 0 | break; |
677 | 0 | default: |
678 | 0 | d[1] = 'x'; |
679 | 0 | d[2] = hex_chars_lc[*ds >> 4]; |
680 | 0 | d[3] = hex_chars_lc[*ds & 0x0F]; |
681 | 0 | d += 4; |
682 | 0 | break; |
683 | 0 | } |
684 | 0 | } |
685 | 0 | } while (++ds < end); |
686 | 0 | } |
687 | | |
688 | | |
689 | | void |
690 | | buffer_append_bs_escaped (buffer * const restrict b, |
691 | | const char * restrict s, const size_t len) |
692 | 0 | { |
693 | | /* replaces non-printable chars with escaped string |
694 | | * default: \xHH where HH is the hex representation of the byte |
695 | | * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */ |
696 | | /* Intended for use escaping string to be surrounded by double-quotes */ |
697 | | /* Performs single pass over string and is optimized for ASCII; |
698 | | * non-ASCII escaping might be slightly sped up by walking input twice, |
699 | | * first to calculate escaped length and extend the destination b, and |
700 | | * second to do the escaping. (This non-ASCII optim is not done here) */ |
701 | 0 | buffer_string_prepare_append(b, len); |
702 | 0 | for (const char * const end = s+len; s < end; ++s) { |
703 | 0 | const char * const ptr = s; |
704 | 0 | while (light_isprint(*s) && *s != '"' && *s != '\\' && ++s < end) ; |
705 | 0 | if (s - ptr) buffer_append_string_len(b, ptr, s - ptr); |
706 | |
|
707 | 0 | if (s == end) |
708 | 0 | return; |
709 | | |
710 | | /* ('\a', '\v' shortcuts are technically not json-escaping) */ |
711 | | /* ('\0' is also omitted due to the possibility of string corruption if |
712 | | * the receiver supports decoding octal escapes (\000) and the escaped |
713 | | * string contains \0 followed by two digits not part of escaping)*/ |
714 | | |
715 | 0 | unsigned int c = *(const unsigned char *)s; |
716 | 0 | char *d; |
717 | 0 | switch (c) { |
718 | 0 | case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r': |
719 | 0 | c = "0000000abtnvfr"[c]; |
720 | 0 | __attribute_fallthrough__ |
721 | 0 | case '"': case '\\': |
722 | 0 | d = buffer_extend(b, 2); |
723 | 0 | d[0] = '\\'; |
724 | 0 | d[1] = c; |
725 | 0 | break; |
726 | 0 | default: |
727 | | /* non printable char => \xHH */ |
728 | 0 | d = buffer_extend(b, 4); |
729 | 0 | d[0] = '\\'; |
730 | 0 | d[1] = 'x'; |
731 | 0 | d[2] = hex_chars_uc[c >> 4]; |
732 | 0 | d[3] = hex_chars_uc[c & 0xF]; |
733 | 0 | break; |
734 | 0 | } |
735 | 0 | } |
736 | 0 | } |
737 | | |
738 | | |
739 | | void |
740 | | buffer_append_bs_escaped_json (buffer * const restrict b, |
741 | | const char * restrict s, const size_t len) |
742 | 0 | { |
743 | | /* replaces non-printable chars with escaped string |
744 | | * json: \u00HH where HH is the hex representation of the byte |
745 | | * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */ |
746 | | /* Intended for use escaping string to be surrounded by double-quotes */ |
747 | 0 | buffer_string_prepare_append(b, len); |
748 | 0 | for (const char * const end = s+len; s < end; ++s) { |
749 | 0 | const char * const ptr = s; |
750 | 0 | while (!light_iscntrl(*s) && *s != '"' && *s != '\\' && ++s < end) ; |
751 | 0 | if (s - ptr) buffer_append_string_len(b, ptr, s - ptr); |
752 | |
|
753 | 0 | if (s == end) |
754 | 0 | return; |
755 | | |
756 | | /* ('\a', '\v' shortcuts are technically not json-escaping) */ |
757 | | /* ('\0' is also omitted due to the possibility of string corruption if |
758 | | * the receiver supports decoding octal escapes (\000) and the escaped |
759 | | * string contains \0 followed by two digits not part of escaping)*/ |
760 | | |
761 | 0 | unsigned int c = *(const unsigned char *)s; |
762 | 0 | char *d; |
763 | 0 | switch (c) { |
764 | 0 | case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r': |
765 | 0 | c = "0000000abtnvfr"[c]; |
766 | 0 | __attribute_fallthrough__ |
767 | 0 | case '"': case '\\': |
768 | 0 | d = buffer_extend(b, 2); |
769 | 0 | d[0] = '\\'; |
770 | 0 | d[1] = c; |
771 | 0 | break; |
772 | 0 | default: |
773 | 0 | d = buffer_extend(b, 6); |
774 | 0 | d[0] = '\\'; |
775 | 0 | d[1] = 'u'; |
776 | 0 | d[2] = '0'; |
777 | 0 | d[3] = '0'; |
778 | 0 | d[4] = hex_chars_uc[c >> 4]; |
779 | 0 | d[5] = hex_chars_uc[c & 0xF]; |
780 | 0 | break; |
781 | 0 | } |
782 | 0 | } |
783 | 0 | } |
784 | | |
785 | | |
786 | | /* decodes url-special-chars inplace. |
787 | | * replaces non-printable characters with '_' |
788 | | * (If this is used on a portion of query string, then query string should be |
789 | | * split on '&', and '+' replaced with ' ' before calling this routine) |
790 | | */ |
791 | | |
792 | 1.70k | void buffer_urldecode_path(buffer * const b) { |
793 | 1.70k | const size_t len = buffer_clen(b); |
794 | 1.70k | char *src = len ? memchr(b->ptr, '%', len) : NULL; |
795 | 1.70k | if (NULL == src) return; |
796 | | |
797 | 1.36k | char *dst = src; |
798 | 46.6M | do { |
799 | | /* *src == '%' */ |
800 | 46.6M | unsigned char high = ((unsigned char *)src)[1]; |
801 | 46.6M | unsigned char low = high ? hex2int(((unsigned char *)src)[2]) : 0xFF; |
802 | 46.6M | if (0xFF != (high = hex2int(high)) && 0xFF != low) { |
803 | 46.6M | high = (high << 4) | low; /* map ctrls to '_' */ |
804 | 46.6M | *dst = (high >= 32 && high != 127) ? high : '_'; |
805 | 46.6M | src += 2; |
806 | 46.6M | } /* else ignore this '%'; leave as-is and move on */ |
807 | | |
808 | 52.3M | while ((*++dst = *++src) != '%' && *src) ; |
809 | 46.6M | } while (*src); |
810 | 1.36k | b->used = (dst - b->ptr) + 1; |
811 | 1.36k | } |
812 | | |
813 | 0 | int buffer_is_valid_UTF8(const buffer *b) { |
814 | | /* https://www.w3.org/International/questions/qa-forms-utf-8 */ |
815 | | /*assert(b->used);*//*(b->ptr must exist and be '\0'-terminated)*/ |
816 | 0 | const unsigned char *c = (unsigned char *)b->ptr; |
817 | 0 | while (*c) { |
818 | | |
819 | | /*(note: includes ctrls)*/ |
820 | 0 | if ( c[0] < 0x80 ) { ++c; continue; } |
821 | | |
822 | 0 | if ( 0xc2 <= c[0] && c[0] <= 0xdf |
823 | 0 | && 0x80 <= c[1] && c[1] <= 0xbf ) { c+=2; continue; } |
824 | | |
825 | 0 | if ( ( ( 0xe0 == c[0] |
826 | 0 | && 0xa0 <= c[1] && c[1] <= 0xbf) |
827 | 0 | || ( 0xe1 <= c[0] && c[0] <= 0xef && c[0] != 0xed |
828 | 0 | && 0x80 <= c[1] && c[1] <= 0xbf) |
829 | 0 | || ( 0xed == c[0] |
830 | 0 | && 0x80 <= c[1] && c[1] <= 0x9f) ) |
831 | 0 | && 0x80 <= c[2] && c[2] <= 0xbf ) { c+=3; continue; } |
832 | | |
833 | 0 | if ( ( ( 0xf0 == c[0] |
834 | 0 | && 0x90 <= c[1] && c[1] <= 0xbf) |
835 | 0 | || ( 0xf1 <= c[0] && c[0] <= 0xf3 |
836 | 0 | && 0x80 <= c[1] && c[1] <= 0xbf) |
837 | 0 | || ( 0xf4 == c[0] |
838 | 0 | && 0x80 <= c[1] && c[1] <= 0x8f) ) |
839 | 0 | && 0x80 <= c[2] && c[2] <= 0xbf |
840 | 0 | && 0x80 <= c[3] && c[3] <= 0xbf ) { c+=4; continue; } |
841 | | |
842 | 0 | return 0; /* invalid */ |
843 | 0 | } |
844 | 0 | return 1; /* valid */ |
845 | 0 | } |
846 | | |
847 | | /* - special case: empty string returns empty string |
848 | | * - on windows or cygwin: replace \ with / |
849 | | * - strip leading spaces |
850 | | * - prepends "/" if not present already |
851 | | * - resolve "/../", "//" and "/./" the usual way: |
852 | | * the first one removes a preceding component, the other two |
853 | | * get compressed to "/". |
854 | | * - "/." and "/.." at the end are similar, but always leave a trailing |
855 | | * "/" |
856 | | * |
857 | | * /blah/.. gets / |
858 | | * /blah/../foo gets /foo |
859 | | * /abc/./xyz gets /abc/xyz |
860 | | * /abc//xyz gets /abc/xyz |
861 | | */ |
862 | | |
863 | | void buffer_path_simplify(buffer *b) |
864 | 365 | { |
865 | 365 | char *out = b->ptr; |
866 | 365 | char * const end = b->ptr + b->used - 1; |
867 | | |
868 | 365 | if (__builtin_expect( (buffer_is_blank(b)), 0)) { |
869 | 0 | buffer_blank(b); |
870 | 0 | return; |
871 | 0 | } |
872 | | |
873 | | #if defined(_WIN32) || defined(__CYGWIN__) |
874 | | /* cygwin is treating \ and / the same, so we have to that too */ |
875 | | for (char *p = b->ptr; *p; p++) { |
876 | | if (*p == '\\') *p = '/'; |
877 | | } |
878 | | #endif |
879 | | |
880 | 365 | *end = '/'; /*(end of path modified to avoid need to check '\0')*/ |
881 | | |
882 | 365 | char *walk = out; |
883 | 365 | if (__builtin_expect( (*walk == '/'), 1)) { |
884 | | /* scan to detect (potential) need for path simplification |
885 | | * (repeated '/' or "/.") */ |
886 | 551 | do { |
887 | 551 | if (*++walk == '.' || *walk == '/') |
888 | 131 | break; |
889 | 661k | do { ++walk; } while (*walk != '/'); |
890 | 420 | } while (walk != end); |
891 | 131 | if (__builtin_expect( (walk == end), 1)) { |
892 | | /* common case: no repeated '/' or "/." */ |
893 | 0 | *end = '\0'; /* overwrite extra '/' added to end of path */ |
894 | 0 | return; |
895 | 0 | } |
896 | 131 | out = walk-1; |
897 | 131 | } |
898 | 234 | else { |
899 | 234 | if (walk[0] == '.' && walk[1] == '/') |
900 | 67 | *out = *++walk; |
901 | 167 | else if (walk[0] == '.' && walk[1] == '.' && walk[2] == '/') |
902 | 2 | *out = *(walk += 2); |
903 | 165 | else { |
904 | 8.29M | while (*++walk != '/') ; |
905 | 165 | out = walk; |
906 | 165 | } |
907 | 234 | ++walk; |
908 | 234 | } |
909 | | |
910 | 8.61k | while (walk <= end) { |
911 | | /* previous char is '/' at this point (or start of string w/o '/') */ |
912 | 8.41k | if (__builtin_expect( (walk[0] == '/'), 0)) { |
913 | | /* skip repeated '/' (e.g. "///" -> "/") */ |
914 | 2.66k | if (++walk < end) |
915 | 2.59k | continue; |
916 | 69 | else { |
917 | 69 | ++out; |
918 | 69 | break; |
919 | 69 | } |
920 | 2.66k | } |
921 | 5.74k | else if (__builtin_expect( (walk[0] == '.'), 0)) { |
922 | | /* handle "./" and "../" */ |
923 | 3.33k | if (walk[1] == '.' && walk[2] == '/') { |
924 | | /* handle "../" */ |
925 | 3.30M | while (out > b->ptr && *--out != '/') ; |
926 | 1.28k | *out = '/'; /*(in case path had not started with '/')*/ |
927 | 1.28k | if ((walk += 3) >= end) { |
928 | 39 | ++out; |
929 | 39 | break; |
930 | 39 | } |
931 | 1.24k | else |
932 | 1.24k | continue; |
933 | 1.28k | } |
934 | 2.05k | else if (walk[1] == '/') { |
935 | | /* handle "./" */ |
936 | 644 | if ((walk += 2) >= end) { |
937 | 50 | ++out; |
938 | 50 | break; |
939 | 50 | } |
940 | 594 | continue; |
941 | 644 | } |
942 | 1.40k | else { |
943 | | /* accept "." if not part of "../" or "./" */ |
944 | 1.40k | *++out = '.'; |
945 | 1.40k | ++walk; |
946 | 1.40k | } |
947 | 3.33k | } |
948 | | |
949 | 15.6M | while ((*++out = *walk++) != '/') ; |
950 | 3.82k | } |
951 | 365 | *out = *end = '\0'; /* overwrite extra '/' added to end of path */ |
952 | 365 | b->used = (out - b->ptr) + 1; |
953 | | /*buffer_truncate(b, out - b->ptr);*/ |
954 | 365 | } |
955 | | |
956 | 0 | void buffer_to_lower(buffer * const b) { |
957 | 0 | unsigned char * const restrict s = (unsigned char *)b->ptr; |
958 | 0 | const uint_fast32_t used = b->used; |
959 | 0 | for (uint_fast32_t i = 0; i < used; ++i) { |
960 | 0 | if (light_isupper(s[i])) s[i] |= 0x20; |
961 | 0 | } |
962 | 0 | } |
963 | | |
964 | | |
965 | 0 | void buffer_to_upper(buffer * const b) { |
966 | 0 | unsigned char * const restrict s = (unsigned char *)b->ptr; |
967 | 0 | const uint_fast32_t used = b->used; |
968 | 0 | for (uint_fast32_t i = 0; i < used; ++i) { |
969 | 0 | if (light_islower(s[i])) s[i] &= 0xdf; |
970 | 0 | } |
971 | 0 | } |