/src/php-src/ext/uri/uriparser/src/UriSetPath.c
Line | Count | Source |
1 | | /* |
2 | | * uriparser - RFC 3986 URI parsing library |
3 | | * |
4 | | * Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org> |
5 | | * All rights reserved. |
6 | | * |
7 | | * Redistribution and use in source and binary forms, with or without |
8 | | * modification, are permitted provided that the following conditions |
9 | | * are met: |
10 | | * |
11 | | * 1. Redistributions of source code must retain the above |
12 | | * copyright notice, this list of conditions and the following |
13 | | * disclaimer. |
14 | | * |
15 | | * 2. Redistributions in binary form must reproduce the above |
16 | | * copyright notice, this list of conditions and the following |
17 | | * disclaimer in the documentation and/or other materials |
18 | | * provided with the distribution. |
19 | | * |
20 | | * 3. Neither the name of the copyright holder nor the names of |
21 | | * its contributors may be used to endorse or promote products |
22 | | * derived from this software without specific prior written |
23 | | * permission. |
24 | | * |
25 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
26 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
27 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
28 | | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
29 | | * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
30 | | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
31 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
32 | | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
33 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
34 | | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
36 | | * OF THE POSSIBILITY OF SUCH DAMAGE. |
37 | | */ |
38 | | |
39 | | /* What encodings are enabled? */ |
40 | | #include <uriparser/UriDefsConfig.h> |
41 | | #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) |
42 | | /* Include SELF twice */ |
43 | | # ifdef URI_ENABLE_ANSI |
44 | | # define URI_PASS_ANSI 1 |
45 | | # include "UriSetPath.c" |
46 | | # undef URI_PASS_ANSI |
47 | | # endif |
48 | | # ifdef URI_ENABLE_UNICODE |
49 | | # define URI_PASS_UNICODE 1 |
50 | | # include "UriSetPath.c" |
51 | | # undef URI_PASS_UNICODE |
52 | | # endif |
53 | | #else |
54 | | # ifdef URI_PASS_ANSI |
55 | | # include <uriparser/UriDefsAnsi.h> |
56 | | # else |
57 | | # include <uriparser/UriDefsUnicode.h> |
58 | | # include <wchar.h> |
59 | | # endif |
60 | | |
61 | | # ifndef URI_DOXYGEN |
62 | | # include <uriparser/Uri.h> |
63 | | # include "UriCommon.h" |
64 | | # include "UriMemory.h" |
65 | | # include "UriSets.h" |
66 | | # endif |
67 | | |
68 | | # include <assert.h> |
69 | | |
70 | | UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast, |
71 | 0 | UriBool hasHost) { |
72 | 0 | if ((first == NULL) || (afterLast == NULL)) { |
73 | 0 | return URI_FALSE; |
74 | 0 | } |
75 | | |
76 | 0 | if ((hasHost == URI_TRUE) && ((first >= afterLast) || (first[0] != _UT('/')))) { |
77 | 0 | return URI_FALSE; |
78 | 0 | } |
79 | | |
80 | | /* The related part of the grammar in RFC 3986 (section 3.3) reads: |
81 | | * |
82 | | * path = path-abempty ; begins with "/" or is empty |
83 | | * / path-absolute ; begins with "/" but not "//" |
84 | | * / path-noscheme ; begins with a non-colon segment |
85 | | * / path-rootless ; begins with a segment |
86 | | * / path-empty ; zero characters |
87 | | * |
88 | | * path-abempty = *( "/" segment ) |
89 | | * path-absolute = "/" [ segment-nz *( "/" segment ) ] |
90 | | * path-noscheme = segment-nz-nc *( "/" segment ) |
91 | | * path-rootless = segment-nz *( "/" segment ) |
92 | | * path-empty = 0<pchar> |
93 | | * |
94 | | * segment = *pchar |
95 | | * segment-nz = 1*pchar |
96 | | * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) |
97 | | * ; non-zero-length segment without any colon ":" |
98 | | * |
99 | | * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" |
100 | | * |
101 | | * The check below simplifies this to .. |
102 | | * |
103 | | * path = *( unreserved / pct-encoded / sub-delims / ":" / "@" / "/" ) |
104 | | * |
105 | | * .. and leaves the rest to pre-return removal of ambiguity |
106 | | * from cases like "path1:/path2" and "//path1/path2" inside SetPath. |
107 | | */ |
108 | 0 | while (first < afterLast) { |
109 | 0 | switch (first[0]) { |
110 | 0 | case URI_SET_PCHAR_WITHOUT_PERCENT(_UT): |
111 | 0 | break; |
112 | | |
113 | | /* pct-encoded */ |
114 | 0 | case _UT('%'): |
115 | 0 | if (afterLast - first < 3) { |
116 | 0 | return URI_FALSE; |
117 | 0 | } |
118 | 0 | switch (first[1]) { |
119 | 0 | case URI_SET_HEXDIG(_UT): |
120 | 0 | break; |
121 | 0 | default: |
122 | 0 | return URI_FALSE; |
123 | 0 | } |
124 | 0 | switch (first[2]) { |
125 | 0 | case URI_SET_HEXDIG(_UT): |
126 | 0 | break; |
127 | 0 | default: |
128 | 0 | return URI_FALSE; |
129 | 0 | } |
130 | 0 | first += 2; |
131 | 0 | break; |
132 | | |
133 | 0 | case _UT('/'): |
134 | 0 | break; |
135 | | |
136 | 0 | default: |
137 | 0 | return URI_FALSE; |
138 | 0 | } |
139 | | |
140 | 0 | first++; |
141 | 0 | } |
142 | 0 | return URI_TRUE; |
143 | 0 | } Unexecuted instantiation: uriIsWellFormedPathA Unexecuted instantiation: uriIsWellFormedPathW |
144 | | |
145 | | static void URI_FUNC(DropEmptyFirstPathSegment)(URI_TYPE(Uri) * uri, |
146 | 0 | UriMemoryManager * memory) { |
147 | 0 | assert(uri != NULL); |
148 | 0 | assert(memory != NULL); |
149 | 0 | assert(uri->pathHead != NULL); |
150 | 0 | assert(uri->pathHead->text.first == uri->pathHead->text.afterLast); |
151 | |
|
152 | 0 | URI_TYPE(PathSegment) * const originalHead = uri->pathHead; |
153 | |
|
154 | 0 | uri->pathHead = uri->pathHead->next; |
155 | |
|
156 | 0 | originalHead->text.first = NULL; |
157 | 0 | originalHead->text.afterLast = NULL; |
158 | 0 | memory->free(memory, originalHead); |
159 | 0 | } Unexecuted instantiation: UriSetPath.c:uriDropEmptyFirstPathSegmentA Unexecuted instantiation: UriSetPath.c:uriDropEmptyFirstPathSegmentW |
160 | | |
161 | | /* URIs without a host encode a leading slash in the path as .absolutePath == URI_TRUE. |
162 | | * This function checks for a leading empty path segment (that would have the "visual |
163 | | * effect" of a leading slash during stringification) and transforms it into .absolutePath |
164 | | * == URI_TRUE instead, if present. */ |
165 | | static void URI_FUNC(TransformEmptyLeadPathSegments)(URI_TYPE(Uri) * uri, |
166 | 0 | UriMemoryManager * memory) { |
167 | 0 | assert(uri != NULL); |
168 | 0 | assert(memory != NULL); |
169 | |
|
170 | 0 | if ((URI_FUNC(HasHost)(uri) == URI_TRUE) || (uri->pathHead == NULL) |
171 | 0 | || (uri->pathHead->text.first != uri->pathHead->text.afterLast)) { |
172 | 0 | return; /* i.e. nothing to do */ |
173 | 0 | } |
174 | | |
175 | 0 | assert(uri->absolutePath == URI_FALSE); |
176 | |
|
177 | 0 | URI_FUNC(DropEmptyFirstPathSegment)(uri, memory); |
178 | |
|
179 | 0 | uri->absolutePath = URI_TRUE; |
180 | 0 | } Unexecuted instantiation: UriSetPath.c:uriTransformEmptyLeadPathSegmentsA Unexecuted instantiation: UriSetPath.c:uriTransformEmptyLeadPathSegmentsW |
181 | | |
182 | | static int URI_FUNC(InternalSetPath)(URI_TYPE(Uri) * destUri, const URI_CHAR * first, |
183 | | const URI_CHAR * afterLast, |
184 | 0 | UriMemoryManager * memory) { |
185 | 0 | assert(destUri != NULL); |
186 | 0 | assert(first != NULL); |
187 | 0 | assert(afterLast != NULL); |
188 | 0 | assert(memory != NULL); |
189 | 0 | assert(destUri->pathHead == NULL); /* set by SetPathMm right before */ |
190 | 0 | assert(destUri->pathTail == NULL); /* set by SetPathMm right before */ |
191 | 0 | assert(destUri->absolutePath == URI_FALSE); /* set by SetPathMm right before */ |
192 | | |
193 | | /* Skip the leading slash from target URIs with a host (so that we can |
194 | | * transfer the path 1:1 further down) */ |
195 | 0 | if (URI_FUNC(HasHost)(destUri) == URI_TRUE) { |
196 | | /* NOTE: This is because SetPathMm called IsWellFormedPath earlier: */ |
197 | 0 | assert((afterLast - first >= 1) && (first[0] == _UT('/'))); |
198 | 0 | first++; |
199 | 0 | } else if (first == afterLast) { |
200 | | /* This avoids (1) all the expensive but unnecessary work below |
201 | | * and also (2) mis-encoding as single empty path segment |
202 | | * that would need (detection and) repair further down otherwise */ |
203 | 0 | return URI_SUCCESS; |
204 | 0 | } |
205 | | |
206 | | /* Assemble "///.." input wrap for upcoming parse as a URI */ |
207 | 0 | const size_t inputLenChars = (afterLast - first); |
208 | 0 | const size_t MAX_SIZE_T = (size_t)-1; |
209 | | |
210 | | /* Detect overflow */ |
211 | 0 | if (MAX_SIZE_T - inputLenChars < 3 + 1) { |
212 | 0 | return URI_ERROR_MALLOC; |
213 | 0 | } |
214 | | |
215 | 0 | const size_t candidateLenChars = 3 + inputLenChars; |
216 | | |
217 | | /* Detect overflow */ |
218 | 0 | if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) { |
219 | 0 | return URI_ERROR_MALLOC; |
220 | 0 | } |
221 | | |
222 | 0 | URI_CHAR * const candidate = |
223 | 0 | memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR)); |
224 | |
|
225 | 0 | if (candidate == NULL) { |
226 | 0 | return URI_ERROR_MALLOC; |
227 | 0 | } |
228 | | |
229 | 0 | memcpy(candidate, _UT("///"), 3 * sizeof(URI_CHAR)); |
230 | 0 | memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR)); |
231 | 0 | candidate[3 + inputLenChars] = _UT('\0'); |
232 | | |
233 | | /* Parse as an RFC 3986 URI */ |
234 | 0 | URI_TYPE(Uri) tempUri; |
235 | 0 | int res = URI_FUNC(ParseSingleUriExMm)(&tempUri, candidate, |
236 | 0 | candidate + candidateLenChars, NULL, memory); |
237 | 0 | assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) |
238 | 0 | || (res == URI_ERROR_MALLOC)); |
239 | 0 | if (res != URI_SUCCESS) { |
240 | 0 | memory->free(memory, candidate); |
241 | 0 | return res; |
242 | 0 | } |
243 | | |
244 | | /* Nothing but path and host is supposed to be set by the parse, in |
245 | | * particular not: */ |
246 | 0 | assert(tempUri.query.first == NULL); |
247 | 0 | assert(tempUri.fragment.first == NULL); |
248 | | |
249 | | /* Ensure that the strings in the path segments are all owned by |
250 | | * `tempUri` because we want to (1) rip out and keep the full path |
251 | | * list further down and (2) be able to free the parsed string |
252 | | * (`candidate`) also. */ |
253 | 0 | res = URI_FUNC(MakeOwnerMm)(&tempUri, memory); |
254 | 0 | assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC)); |
255 | 0 | if (res != URI_SUCCESS) { |
256 | 0 | URI_FUNC(FreeUriMembersMm)(&tempUri, memory); |
257 | 0 | memory->free(memory, candidate); |
258 | 0 | return res; |
259 | 0 | } |
260 | 0 | assert(tempUri.owner == URI_TRUE); |
261 | | |
262 | | /* Move path to destination URI */ |
263 | 0 | assert(tempUri.absolutePath == URI_FALSE); /* always URI_FALSE for URIs with host */ |
264 | 0 | destUri->pathHead = tempUri.pathHead; |
265 | 0 | destUri->pathTail = tempUri.pathTail; |
266 | 0 | destUri->absolutePath = URI_FALSE; |
267 | |
|
268 | 0 | tempUri.pathHead = NULL; |
269 | 0 | tempUri.pathTail = NULL; |
270 | | |
271 | | /* Free the rest of the temp URI */ |
272 | 0 | URI_FUNC(FreeUriMembersMm)(&tempUri, memory); |
273 | 0 | memory->free(memory, candidate); |
274 | | |
275 | | /* Restore use of .absolutePath as needed */ |
276 | 0 | URI_FUNC(TransformEmptyLeadPathSegments)(destUri, memory); |
277 | | |
278 | | /* Disambiguate as needed */ |
279 | 0 | UriBool success = URI_FUNC(FixPathNoScheme)(destUri, memory); |
280 | 0 | if (success == URI_FALSE) { |
281 | 0 | return URI_ERROR_MALLOC; |
282 | 0 | } |
283 | | |
284 | 0 | success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(destUri, memory); |
285 | 0 | if (success == URI_FALSE) { |
286 | 0 | return URI_ERROR_MALLOC; |
287 | 0 | } |
288 | | |
289 | 0 | return URI_SUCCESS; |
290 | 0 | } Unexecuted instantiation: UriSetPath.c:uriInternalSetPathA Unexecuted instantiation: UriSetPath.c:uriInternalSetPathW |
291 | | |
292 | | int URI_FUNC(SetPathMm)(URI_TYPE(Uri) * uri, const URI_CHAR * first, |
293 | 0 | const URI_CHAR * afterLast, UriMemoryManager * memory) { |
294 | | /* Input validation (before making any changes) */ |
295 | 0 | if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) { |
296 | 0 | return URI_ERROR_NULL; |
297 | 0 | } |
298 | | |
299 | 0 | URI_CHECK_MEMORY_MANAGER(memory); /* may return */ |
300 | | |
301 | 0 | if ((first != NULL) |
302 | 0 | && (URI_FUNC(IsWellFormedPath)(first, afterLast, URI_FUNC(HasHost)(uri)) |
303 | 0 | == URI_FALSE)) { |
304 | 0 | return URI_ERROR_SYNTAX; |
305 | 0 | } |
306 | | |
307 | | /* Clear old value */ |
308 | 0 | int res = URI_FUNC(FreeUriPath)(uri, memory); |
309 | 0 | if (res != URI_SUCCESS) { |
310 | 0 | return res; |
311 | 0 | } |
312 | 0 | uri->absolutePath = URI_FALSE; |
313 | | |
314 | | /* Already done? */ |
315 | 0 | if (first == NULL) { |
316 | 0 | return URI_SUCCESS; |
317 | 0 | } |
318 | | |
319 | 0 | assert(first != NULL); |
320 | | |
321 | | /* Ensure owned */ |
322 | 0 | if (uri->owner == URI_FALSE) { |
323 | 0 | res = URI_FUNC(MakeOwnerMm)(uri, memory); |
324 | 0 | if (res != URI_SUCCESS) { |
325 | 0 | return res; |
326 | 0 | } |
327 | 0 | } |
328 | | |
329 | 0 | assert(uri->owner == URI_TRUE); |
330 | | |
331 | | /* Apply new value */ |
332 | 0 | res = URI_FUNC(InternalSetPath)(uri, first, afterLast, memory); |
333 | 0 | assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) |
334 | 0 | || (res == URI_ERROR_MALLOC)); |
335 | 0 | return res; |
336 | 0 | } Unexecuted instantiation: uriSetPathMmA Unexecuted instantiation: uriSetPathMmW |
337 | | |
338 | | int URI_FUNC(SetPath)(URI_TYPE(Uri) * uri, const URI_CHAR * first, |
339 | 0 | const URI_CHAR * afterLast) { |
340 | 0 | return URI_FUNC(SetPathMm)(uri, first, afterLast, NULL); |
341 | 0 | } Unexecuted instantiation: uriSetPathA Unexecuted instantiation: uriSetPathW |
342 | | |
343 | | #endif |