/src/php-src/ext/uri/uriparser/src/UriSetPath.c
Line | Count | Source |
1 | | /* |
2 | | * uriparser - RFC 3986 URI parsing library |
3 | | * |
4 | | * Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org> |
5 | | * All rights reserved. |
6 | | * |
7 | | * Redistribution and use in source and binary forms, with or without |
8 | | * modification, are permitted provided that the following conditions |
9 | | * are met: |
10 | | * |
11 | | * 1. Redistributions of source code must retain the above |
12 | | * copyright notice, this list of conditions and the following |
13 | | * disclaimer. |
14 | | * |
15 | | * 2. Redistributions in binary form must reproduce the above |
16 | | * copyright notice, this list of conditions and the following |
17 | | * disclaimer in the documentation and/or other materials |
18 | | * provided with the distribution. |
19 | | * |
20 | | * 3. Neither the name of the copyright holder nor the names of |
21 | | * its contributors may be used to endorse or promote products |
22 | | * derived from this software without specific prior written |
23 | | * permission. |
24 | | * |
25 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
26 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
27 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
28 | | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
29 | | * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
30 | | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
31 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
32 | | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
33 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
34 | | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
36 | | * OF THE POSSIBILITY OF SUCH DAMAGE. |
37 | | */ |
38 | | |
39 | | /* What encodings are enabled? */ |
40 | | #include <uriparser/UriDefsConfig.h> |
41 | | #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) |
42 | | /* Include SELF twice */ |
43 | | # ifdef URI_ENABLE_ANSI |
44 | | # define URI_PASS_ANSI 1 |
45 | | # include "UriSetPath.c" |
46 | | # undef URI_PASS_ANSI |
47 | | # endif |
48 | | # ifdef URI_ENABLE_UNICODE |
49 | | # define URI_PASS_UNICODE 1 |
50 | | # include "UriSetPath.c" |
51 | | # undef URI_PASS_UNICODE |
52 | | # endif |
53 | | #else |
54 | | # ifdef URI_PASS_ANSI |
55 | | # include <uriparser/UriDefsAnsi.h> |
56 | | # else |
57 | | # include <uriparser/UriDefsUnicode.h> |
58 | | # include <wchar.h> |
59 | | # endif |
60 | | |
61 | | |
62 | | |
63 | | #ifndef URI_DOXYGEN |
64 | | # include <uriparser/Uri.h> |
65 | | # include "UriCommon.h" |
66 | | # include "UriMemory.h" |
67 | | #endif |
68 | | |
69 | | |
70 | | |
71 | | #include <assert.h> |
72 | | |
73 | | |
74 | | |
75 | | #define URI_SET_DIGIT \ |
76 | 0 | _UT('0'): \ |
77 | 0 | case _UT('1'): \ |
78 | 0 | case _UT('2'): \ |
79 | 0 | case _UT('3'): \ |
80 | 0 | case _UT('4'): \ |
81 | 0 | case _UT('5'): \ |
82 | 0 | case _UT('6'): \ |
83 | 0 | case _UT('7'): \ |
84 | 0 | case _UT('8'): \ |
85 | 0 | case _UT('9') |
86 | | |
87 | | |
88 | | |
89 | | #define URI_SET_HEX_LETTER_UPPER \ |
90 | 0 | _UT('A'): \ |
91 | 0 | case _UT('B'): \ |
92 | 0 | case _UT('C'): \ |
93 | 0 | case _UT('D'): \ |
94 | 0 | case _UT('E'): \ |
95 | 0 | case _UT('F') |
96 | | |
97 | | |
98 | | |
99 | | #define URI_SET_HEX_LETTER_LOWER \ |
100 | 0 | _UT('a'): \ |
101 | 0 | case _UT('b'): \ |
102 | 0 | case _UT('c'): \ |
103 | 0 | case _UT('d'): \ |
104 | 0 | case _UT('e'): \ |
105 | 0 | case _UT('f') |
106 | | |
107 | | |
108 | | |
109 | | #define URI_SET_HEXDIG \ |
110 | 0 | URI_SET_DIGIT: \ |
111 | 0 | case URI_SET_HEX_LETTER_UPPER: \ |
112 | 0 | case URI_SET_HEX_LETTER_LOWER |
113 | | |
114 | | |
115 | | |
116 | | #define URI_SET_ALPHA \ |
117 | 0 | URI_SET_HEX_LETTER_UPPER: \ |
118 | 0 | case URI_SET_HEX_LETTER_LOWER: \ |
119 | 0 | case _UT('g'): \ |
120 | 0 | case _UT('G'): \ |
121 | 0 | case _UT('h'): \ |
122 | 0 | case _UT('H'): \ |
123 | 0 | case _UT('i'): \ |
124 | 0 | case _UT('I'): \ |
125 | 0 | case _UT('j'): \ |
126 | 0 | case _UT('J'): \ |
127 | 0 | case _UT('k'): \ |
128 | 0 | case _UT('K'): \ |
129 | 0 | case _UT('l'): \ |
130 | 0 | case _UT('L'): \ |
131 | 0 | case _UT('m'): \ |
132 | 0 | case _UT('M'): \ |
133 | 0 | case _UT('n'): \ |
134 | 0 | case _UT('N'): \ |
135 | 0 | case _UT('o'): \ |
136 | 0 | case _UT('O'): \ |
137 | 0 | case _UT('p'): \ |
138 | 0 | case _UT('P'): \ |
139 | 0 | case _UT('q'): \ |
140 | 0 | case _UT('Q'): \ |
141 | 0 | case _UT('r'): \ |
142 | 0 | case _UT('R'): \ |
143 | 0 | case _UT('s'): \ |
144 | 0 | case _UT('S'): \ |
145 | 0 | case _UT('t'): \ |
146 | 0 | case _UT('T'): \ |
147 | 0 | case _UT('u'): \ |
148 | 0 | case _UT('U'): \ |
149 | 0 | case _UT('v'): \ |
150 | 0 | case _UT('V'): \ |
151 | 0 | case _UT('w'): \ |
152 | 0 | case _UT('W'): \ |
153 | 0 | case _UT('x'): \ |
154 | 0 | case _UT('X'): \ |
155 | 0 | case _UT('y'): \ |
156 | 0 | case _UT('Y'): \ |
157 | 0 | case _UT('z'): \ |
158 | 0 | case _UT('Z') |
159 | | |
160 | | |
161 | | |
162 | | #define URI_SET_SUB_DELIMS \ |
163 | 0 | _UT('!'): \ |
164 | 0 | case _UT('$'): \ |
165 | 0 | case _UT('&'): \ |
166 | 0 | case _UT('\''): \ |
167 | 0 | case _UT('('): \ |
168 | 0 | case _UT(')'): \ |
169 | 0 | case _UT('*'): \ |
170 | 0 | case _UT('+'): \ |
171 | 0 | case _UT(','): \ |
172 | 0 | case _UT(';'): \ |
173 | 0 | case _UT('=') |
174 | | |
175 | | |
176 | | |
177 | | #define URI_SET_UNRESERVED \ |
178 | 0 | URI_SET_ALPHA: \ |
179 | 0 | case URI_SET_DIGIT: \ |
180 | 0 | case _UT('-'): \ |
181 | 0 | case _UT('.'): \ |
182 | 0 | case _UT('_'): \ |
183 | 0 | case _UT('~') |
184 | | |
185 | | |
186 | | |
187 | 0 | UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast, UriBool hasHost) { |
188 | 0 | if ((first == NULL) || (afterLast == NULL)) { |
189 | 0 | return URI_FALSE; |
190 | 0 | } |
191 | | |
192 | 0 | if ((hasHost == URI_TRUE) && ((first >= afterLast) || (first[0] != _UT('/')))) { |
193 | 0 | return URI_FALSE; |
194 | 0 | } |
195 | | |
196 | | /* The related part of the grammar in RFC 3986 (section 3.3) reads: |
197 | | * |
198 | | * path = path-abempty ; begins with "/" or is empty |
199 | | * / path-absolute ; begins with "/" but not "//" |
200 | | * / path-noscheme ; begins with a non-colon segment |
201 | | * / path-rootless ; begins with a segment |
202 | | * / path-empty ; zero characters |
203 | | * |
204 | | * path-abempty = *( "/" segment ) |
205 | | * path-absolute = "/" [ segment-nz *( "/" segment ) ] |
206 | | * path-noscheme = segment-nz-nc *( "/" segment ) |
207 | | * path-rootless = segment-nz *( "/" segment ) |
208 | | * path-empty = 0<pchar> |
209 | | * |
210 | | * segment = *pchar |
211 | | * segment-nz = 1*pchar |
212 | | * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) |
213 | | * ; non-zero-length segment without any colon ":" |
214 | | * |
215 | | * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" |
216 | | * |
217 | | * The check below simplifies this to .. |
218 | | * |
219 | | * path = *( unreserved / pct-encoded / sub-delims / ":" / "@" / "/" ) |
220 | | * |
221 | | * .. and leaves the rest to pre-return removal of ambiguity |
222 | | * from cases like "path1:/path2" and "//path1/path2" inside SetPath. |
223 | | */ |
224 | 0 | while (first < afterLast) { |
225 | 0 | switch (first[0]) { |
226 | 0 | case URI_SET_UNRESERVED: |
227 | 0 | break; |
228 | | |
229 | | /* pct-encoded */ |
230 | 0 | case _UT('%'): |
231 | 0 | if (afterLast - first < 3) { |
232 | 0 | return URI_FALSE; |
233 | 0 | } |
234 | 0 | switch (first[1]) { |
235 | 0 | case URI_SET_HEXDIG: |
236 | 0 | break; |
237 | 0 | default: |
238 | 0 | return URI_FALSE; |
239 | 0 | } |
240 | 0 | switch (first[2]) { |
241 | 0 | case URI_SET_HEXDIG: |
242 | 0 | break; |
243 | 0 | default: |
244 | 0 | return URI_FALSE; |
245 | 0 | } |
246 | 0 | first += 2; |
247 | 0 | break; |
248 | | |
249 | 0 | case URI_SET_SUB_DELIMS: |
250 | 0 | break; |
251 | | |
252 | | /* ":" / "@" and "/" */ |
253 | 0 | case _UT(':'): |
254 | 0 | case _UT('@'): |
255 | 0 | case _UT('/'): |
256 | 0 | break; |
257 | | |
258 | 0 | default: |
259 | 0 | return URI_FALSE; |
260 | 0 | } |
261 | | |
262 | 0 | first++; |
263 | 0 | } |
264 | 0 | return URI_TRUE; |
265 | 0 | } Unexecuted instantiation: uriIsWellFormedPathA Unexecuted instantiation: uriIsWellFormedPathW |
266 | | |
267 | | |
268 | | |
269 | 0 | static void URI_FUNC(DropEmptyFirstPathSegment)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { |
270 | 0 | assert(uri != NULL); |
271 | 0 | assert(memory != NULL); |
272 | 0 | assert(uri->pathHead != NULL); |
273 | 0 | assert(uri->pathHead->text.first == uri->pathHead->text.afterLast); |
274 | |
|
275 | 0 | { |
276 | 0 | URI_TYPE(PathSegment) * const originalHead = uri->pathHead; |
277 | |
|
278 | 0 | uri->pathHead = uri->pathHead->next; |
279 | |
|
280 | 0 | originalHead->text.first = NULL; |
281 | 0 | originalHead->text.afterLast = NULL; |
282 | 0 | memory->free(memory, originalHead); |
283 | 0 | } |
284 | 0 | } Unexecuted instantiation: UriSetPath.c:uriDropEmptyFirstPathSegmentA Unexecuted instantiation: UriSetPath.c:uriDropEmptyFirstPathSegmentW |
285 | | |
286 | | |
287 | | |
288 | | /* URIs without a host encode a leading slash in the path as .absolutePath == URI_TRUE. |
289 | | * This function checks for a leading empty path segment (that would have the "visual effect" |
290 | | * of a leading slash during stringification) and transforms it into .absolutePath == URI_TRUE |
291 | | * instead, if present. */ |
292 | 0 | static void URI_FUNC(TransformEmptyLeadPathSegments)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { |
293 | 0 | assert(uri != NULL); |
294 | 0 | assert(memory != NULL); |
295 | |
|
296 | 0 | if ((URI_FUNC(HasHost)(uri) == URI_TRUE) |
297 | 0 | || (uri->pathHead == NULL) |
298 | 0 | || (uri->pathHead->text.first != uri->pathHead->text.afterLast)) { |
299 | 0 | return; /* i.e. nothing to do */ |
300 | 0 | } |
301 | | |
302 | 0 | assert(uri->absolutePath == URI_FALSE); |
303 | |
|
304 | 0 | URI_FUNC(DropEmptyFirstPathSegment)(uri, memory); |
305 | |
|
306 | 0 | uri->absolutePath = URI_TRUE; |
307 | 0 | } Unexecuted instantiation: UriSetPath.c:uriTransformEmptyLeadPathSegmentsA Unexecuted instantiation: UriSetPath.c:uriTransformEmptyLeadPathSegmentsW |
308 | | |
309 | | |
310 | | |
311 | | static int URI_FUNC(InternalSetPath)(URI_TYPE(Uri) * destUri, |
312 | | const URI_CHAR * first, |
313 | | const URI_CHAR * afterLast, |
314 | 0 | UriMemoryManager * memory) { |
315 | 0 | assert(destUri != NULL); |
316 | 0 | assert(first != NULL); |
317 | 0 | assert(afterLast != NULL); |
318 | 0 | assert(memory != NULL); |
319 | 0 | assert(destUri->pathHead == NULL); /* set by SetPathMm right before */ |
320 | 0 | assert(destUri->pathTail == NULL); /* set by SetPathMm right before */ |
321 | 0 | assert(destUri->absolutePath == URI_FALSE); /* set by SetPathMm right before */ |
322 | | |
323 | | /* Skip the leading slash from target URIs with a host (so that we can |
324 | | * transfer the path 1:1 further down) */ |
325 | 0 | if (URI_FUNC(HasHost)(destUri) == URI_TRUE) { |
326 | | /* NOTE: This is because SetPathMm called IsWellFormedPath earlier: */ |
327 | 0 | assert((afterLast - first >= 1) && (first[0] == _UT('/'))); |
328 | 0 | first++; |
329 | 0 | } else if (first == afterLast) { |
330 | | /* This avoids (1) all the expensive but unnecessary work below |
331 | | * and also (2) mis-encoding as single empty path segment |
332 | | * that would need (detection and) repair further down otherwise */ |
333 | 0 | return URI_SUCCESS; |
334 | 0 | } |
335 | | |
336 | | /* Assemble "///.." input wrap for upcoming parse as a URI */ |
337 | 0 | { |
338 | 0 | const size_t inputLenChars = (afterLast - first); |
339 | 0 | const size_t MAX_SIZE_T = (size_t)-1; |
340 | | |
341 | | /* Detect overflow */ |
342 | 0 | if (MAX_SIZE_T - inputLenChars < 3 + 1) { |
343 | 0 | return URI_ERROR_MALLOC; |
344 | 0 | } |
345 | | |
346 | 0 | { |
347 | 0 | const size_t candidateLenChars = 3 + inputLenChars; |
348 | | |
349 | | /* Detect overflow */ |
350 | 0 | if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) { |
351 | 0 | return URI_ERROR_MALLOC; |
352 | 0 | } |
353 | | |
354 | 0 | { |
355 | 0 | URI_CHAR * const candidate = memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR)); |
356 | |
|
357 | 0 | if (candidate == NULL) { |
358 | 0 | return URI_ERROR_MALLOC; |
359 | 0 | } |
360 | | |
361 | 0 | memcpy(candidate, _UT("///"), 3 * sizeof(URI_CHAR)); |
362 | 0 | memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR)); |
363 | 0 | candidate[3 + inputLenChars] = _UT('\0'); |
364 | | |
365 | | /* Parse as an RFC 3986 URI */ |
366 | 0 | { |
367 | 0 | URI_TYPE(Uri) tempUri; |
368 | 0 | const int res = URI_FUNC(ParseSingleUriExMm)(&tempUri, |
369 | 0 | candidate, |
370 | 0 | candidate + candidateLenChars, |
371 | 0 | NULL, |
372 | 0 | memory); |
373 | 0 | assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC)); |
374 | 0 | if (res != URI_SUCCESS) { |
375 | 0 | memory->free(memory, candidate); |
376 | 0 | return res; |
377 | 0 | } |
378 | | |
379 | | /* Nothing but path and host is supposed to be set by the parse, in particular not: */ |
380 | 0 | assert(tempUri.query.first == NULL); |
381 | 0 | assert(tempUri.fragment.first == NULL); |
382 | | |
383 | | /* Ensure that the strings in the path segments are all owned by `tempUri` |
384 | | * because we want to (1) rip out and keep the full path list further down |
385 | | * and (2) be able to free the parsed string (`candidate`) also. */ |
386 | 0 | { |
387 | 0 | const int res = URI_FUNC(MakeOwnerMm)(&tempUri, memory); |
388 | 0 | assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC)); |
389 | 0 | if (res != URI_SUCCESS) { |
390 | 0 | URI_FUNC(FreeUriMembersMm)(&tempUri, memory); |
391 | 0 | memory->free(memory, candidate); |
392 | 0 | return res; |
393 | 0 | } |
394 | 0 | assert(tempUri.owner == URI_TRUE); |
395 | 0 | } |
396 | | |
397 | | /* Move path to destination URI */ |
398 | 0 | assert(tempUri.absolutePath == URI_FALSE); /* always URI_FALSE for URIs with host */ |
399 | 0 | destUri->pathHead = tempUri.pathHead; |
400 | 0 | destUri->pathTail = tempUri.pathTail; |
401 | 0 | destUri->absolutePath = URI_FALSE; |
402 | |
|
403 | 0 | tempUri.pathHead = NULL; |
404 | 0 | tempUri.pathTail = NULL; |
405 | | |
406 | | /* Free the rest of the temp URI */ |
407 | 0 | URI_FUNC(FreeUriMembersMm)(&tempUri, memory); |
408 | 0 | memory->free(memory, candidate); |
409 | | |
410 | | /* Restore use of .absolutePath as needed */ |
411 | 0 | URI_FUNC(TransformEmptyLeadPathSegments)(destUri, memory); |
412 | | |
413 | | /* Disambiguate as needed */ |
414 | 0 | { |
415 | 0 | const UriBool success = URI_FUNC(FixPathNoScheme)(destUri, memory); |
416 | 0 | if (success == URI_FALSE) { |
417 | 0 | return URI_ERROR_MALLOC; |
418 | 0 | } |
419 | 0 | } |
420 | 0 | { |
421 | 0 | const UriBool success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(destUri, memory); |
422 | 0 | if (success == URI_FALSE) { |
423 | 0 | return URI_ERROR_MALLOC; |
424 | 0 | } |
425 | 0 | } |
426 | 0 | } |
427 | 0 | } |
428 | 0 | } |
429 | 0 | } |
430 | | |
431 | 0 | return URI_SUCCESS; |
432 | 0 | } Unexecuted instantiation: UriSetPath.c:uriInternalSetPathA Unexecuted instantiation: UriSetPath.c:uriInternalSetPathW |
433 | | |
434 | | |
435 | | |
436 | | int URI_FUNC(SetPathMm)(URI_TYPE(Uri) * uri, |
437 | | const URI_CHAR * first, |
438 | | const URI_CHAR * afterLast, |
439 | 0 | UriMemoryManager * memory) { |
440 | | /* Input validation (before making any changes) */ |
441 | 0 | if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) { |
442 | 0 | return URI_ERROR_NULL; |
443 | 0 | } |
444 | | |
445 | 0 | URI_CHECK_MEMORY_MANAGER(memory); /* may return */ |
446 | | |
447 | 0 | if ((first != NULL) && (URI_FUNC(IsWellFormedPath)(first, afterLast, URI_FUNC(HasHost)(uri)) == URI_FALSE)) { |
448 | 0 | return URI_ERROR_SYNTAX; |
449 | 0 | } |
450 | | |
451 | | /* Clear old value */ |
452 | 0 | { |
453 | 0 | const int res = URI_FUNC(FreeUriPath)(uri, memory); |
454 | 0 | if (res != URI_SUCCESS) { |
455 | 0 | return res; |
456 | 0 | } |
457 | 0 | uri->absolutePath = URI_FALSE; |
458 | 0 | } |
459 | | |
460 | | /* Already done? */ |
461 | 0 | if (first == NULL) { |
462 | 0 | return URI_SUCCESS; |
463 | 0 | } |
464 | | |
465 | 0 | assert(first != NULL); |
466 | | |
467 | | /* Ensure owned */ |
468 | 0 | if (uri->owner == URI_FALSE) { |
469 | 0 | const int res = URI_FUNC(MakeOwnerMm)(uri, memory); |
470 | 0 | if (res != URI_SUCCESS) { |
471 | 0 | return res; |
472 | 0 | } |
473 | 0 | } |
474 | | |
475 | 0 | assert(uri->owner == URI_TRUE); |
476 | | |
477 | | /* Apply new value */ |
478 | 0 | { |
479 | 0 | const int res = URI_FUNC(InternalSetPath)(uri, first, afterLast, memory); |
480 | 0 | assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC)); |
481 | 0 | return res; |
482 | 0 | } |
483 | 0 | } Unexecuted instantiation: uriSetPathMmA Unexecuted instantiation: uriSetPathMmW |
484 | | |
485 | | |
486 | | |
487 | | int URI_FUNC(SetPath)(URI_TYPE(Uri) * uri, |
488 | | const URI_CHAR * first, |
489 | 0 | const URI_CHAR * afterLast) { |
490 | 0 | return URI_FUNC(SetPathMm)(uri, first, afterLast, NULL); |
491 | 0 | } Unexecuted instantiation: uriSetPathA Unexecuted instantiation: uriSetPathW |
492 | | |
493 | | |
494 | | |
495 | | #endif |