Coverage Report

Created: 2025-09-27 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/uri/uriparser/src/UriSetPath.c
Line
Count
Source
1
/*
2
 * uriparser - RFC 3986 URI parsing library
3
 *
4
 * Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source  and binary forms, with or without
8
 * modification, are permitted provided  that the following conditions
9
 * are met:
10
 *
11
 *     1. Redistributions  of  source  code   must  retain  the  above
12
 *        copyright notice, this list  of conditions and the following
13
 *        disclaimer.
14
 *
15
 *     2. Redistributions  in binary  form  must  reproduce the  above
16
 *        copyright notice, this list  of conditions and the following
17
 *        disclaimer  in  the  documentation  and/or  other  materials
18
 *        provided with the distribution.
19
 *
20
 *     3. Neither the  name of the  copyright holder nor the  names of
21
 *        its contributors may be used  to endorse or promote products
22
 *        derived from  this software  without specific  prior written
23
 *        permission.
24
 *
25
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26
 * "AS IS" AND  ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING, BUT NOT
27
 * LIMITED TO,  THE IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS
28
 * FOR  A  PARTICULAR  PURPOSE  ARE  DISCLAIMED.  IN  NO  EVENT  SHALL
29
 * THE  COPYRIGHT HOLDER  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT,
30
 * INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31
 * (INCLUDING, BUT NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE GOODS OR
32
 * SERVICES; LOSS OF USE, DATA,  OR PROFITS; OR BUSINESS INTERRUPTION)
33
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34
 * STRICT  LIABILITY,  OR  TORT (INCLUDING  NEGLIGENCE  OR  OTHERWISE)
35
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
36
 * OF THE POSSIBILITY OF SUCH DAMAGE.
37
 */
38
39
/* What encodings are enabled? */
40
#include <uriparser/UriDefsConfig.h>
41
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
42
/* Include SELF twice */
43
# ifdef URI_ENABLE_ANSI
44
#  define URI_PASS_ANSI 1
45
#  include "UriSetPath.c"
46
#  undef URI_PASS_ANSI
47
# endif
48
# ifdef URI_ENABLE_UNICODE
49
#  define URI_PASS_UNICODE 1
50
#  include "UriSetPath.c"
51
#  undef URI_PASS_UNICODE
52
# endif
53
#else
54
# ifdef URI_PASS_ANSI
55
#  include <uriparser/UriDefsAnsi.h>
56
# else
57
#  include <uriparser/UriDefsUnicode.h>
58
#  include <wchar.h>
59
# endif
60
61
62
63
#ifndef URI_DOXYGEN
64
# include <uriparser/Uri.h>
65
# include "UriCommon.h"
66
# include "UriMemory.h"
67
#endif
68
69
70
71
#include <assert.h>
72
73
74
75
#define URI_SET_DIGIT \
76
0
       _UT('0'): \
77
0
  case _UT('1'): \
78
0
  case _UT('2'): \
79
0
  case _UT('3'): \
80
0
  case _UT('4'): \
81
0
  case _UT('5'): \
82
0
  case _UT('6'): \
83
0
  case _UT('7'): \
84
0
  case _UT('8'): \
85
0
  case _UT('9')
86
87
88
89
#define URI_SET_HEX_LETTER_UPPER \
90
0
       _UT('A'): \
91
0
  case _UT('B'): \
92
0
  case _UT('C'): \
93
0
  case _UT('D'): \
94
0
  case _UT('E'): \
95
0
  case _UT('F')
96
97
98
99
#define URI_SET_HEX_LETTER_LOWER \
100
0
       _UT('a'): \
101
0
  case _UT('b'): \
102
0
  case _UT('c'): \
103
0
  case _UT('d'): \
104
0
  case _UT('e'): \
105
0
  case _UT('f')
106
107
108
109
#define URI_SET_HEXDIG \
110
0
  URI_SET_DIGIT: \
111
0
  case URI_SET_HEX_LETTER_UPPER: \
112
0
  case URI_SET_HEX_LETTER_LOWER
113
114
115
116
#define URI_SET_ALPHA \
117
0
  URI_SET_HEX_LETTER_UPPER: \
118
0
  case URI_SET_HEX_LETTER_LOWER: \
119
0
  case _UT('g'): \
120
0
  case _UT('G'): \
121
0
  case _UT('h'): \
122
0
  case _UT('H'): \
123
0
  case _UT('i'): \
124
0
  case _UT('I'): \
125
0
  case _UT('j'): \
126
0
  case _UT('J'): \
127
0
  case _UT('k'): \
128
0
  case _UT('K'): \
129
0
  case _UT('l'): \
130
0
  case _UT('L'): \
131
0
  case _UT('m'): \
132
0
  case _UT('M'): \
133
0
  case _UT('n'): \
134
0
  case _UT('N'): \
135
0
  case _UT('o'): \
136
0
  case _UT('O'): \
137
0
  case _UT('p'): \
138
0
  case _UT('P'): \
139
0
  case _UT('q'): \
140
0
  case _UT('Q'): \
141
0
  case _UT('r'): \
142
0
  case _UT('R'): \
143
0
  case _UT('s'): \
144
0
  case _UT('S'): \
145
0
  case _UT('t'): \
146
0
  case _UT('T'): \
147
0
  case _UT('u'): \
148
0
  case _UT('U'): \
149
0
  case _UT('v'): \
150
0
  case _UT('V'): \
151
0
  case _UT('w'): \
152
0
  case _UT('W'): \
153
0
  case _UT('x'): \
154
0
  case _UT('X'): \
155
0
  case _UT('y'): \
156
0
  case _UT('Y'): \
157
0
  case _UT('z'): \
158
0
  case _UT('Z')
159
160
161
162
#define URI_SET_SUB_DELIMS \
163
0
       _UT('!'): \
164
0
  case _UT('$'): \
165
0
  case _UT('&'): \
166
0
  case _UT('\''): \
167
0
  case _UT('('): \
168
0
  case _UT(')'): \
169
0
  case _UT('*'): \
170
0
  case _UT('+'): \
171
0
  case _UT(','): \
172
0
  case _UT(';'): \
173
0
  case _UT('=')
174
175
176
177
#define URI_SET_UNRESERVED \
178
0
  URI_SET_ALPHA: \
179
0
  case URI_SET_DIGIT: \
180
0
  case _UT('-'): \
181
0
  case _UT('.'): \
182
0
  case _UT('_'): \
183
0
  case _UT('~')
184
185
186
187
0
UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast, UriBool hasHost) {
188
0
  if ((first == NULL) || (afterLast == NULL)) {
189
0
    return URI_FALSE;
190
0
  }
191
192
0
  if ((hasHost == URI_TRUE) && ((first >= afterLast) || (first[0] != _UT('/')))) {
193
0
    return URI_FALSE;
194
0
  }
195
196
  /* The related part of the grammar in RFC 3986 (section 3.3) reads:
197
   *
198
   *   path          = path-abempty    ; begins with "/" or is empty
199
   *                 / path-absolute   ; begins with "/" but not "//"
200
   *                 / path-noscheme   ; begins with a non-colon segment
201
   *                 / path-rootless   ; begins with a segment
202
   *                 / path-empty      ; zero characters
203
   *
204
   *   path-abempty  = *( "/" segment )
205
   *   path-absolute = "/" [ segment-nz *( "/" segment ) ]
206
   *   path-noscheme = segment-nz-nc *( "/" segment )
207
   *   path-rootless = segment-nz *( "/" segment )
208
   *   path-empty    = 0<pchar>
209
   *
210
   *   segment       = *pchar
211
   *   segment-nz    = 1*pchar
212
   *   segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
213
   *                 ; non-zero-length segment without any colon ":"
214
   *
215
   *   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
216
   *
217
   * The check below simplifies this to ..
218
   *
219
   *   path          = *( unreserved / pct-encoded / sub-delims / ":" / "@" / "/" )
220
   *
221
   * .. and leaves the rest to pre-return removal of ambiguity
222
   * from cases like "path1:/path2" and "//path1/path2" inside SetPath.
223
   */
224
0
  while (first < afterLast) {
225
0
    switch (first[0]) {
226
0
      case URI_SET_UNRESERVED:
227
0
        break;
228
229
      /* pct-encoded */
230
0
      case _UT('%'):
231
0
        if (afterLast - first < 3) {
232
0
          return URI_FALSE;
233
0
        }
234
0
        switch (first[1]) {
235
0
          case URI_SET_HEXDIG:
236
0
            break;
237
0
          default:
238
0
            return URI_FALSE;
239
0
        }
240
0
        switch (first[2]) {
241
0
          case URI_SET_HEXDIG:
242
0
            break;
243
0
          default:
244
0
            return URI_FALSE;
245
0
        }
246
0
        first += 2;
247
0
        break;
248
249
0
      case URI_SET_SUB_DELIMS:
250
0
        break;
251
252
      /* ":" / "@" and "/" */
253
0
      case _UT(':'):
254
0
      case _UT('@'):
255
0
      case _UT('/'):
256
0
        break;
257
258
0
      default:
259
0
        return URI_FALSE;
260
0
    }
261
262
0
    first++;
263
0
  }
264
0
  return URI_TRUE;
265
0
}
Unexecuted instantiation: uriIsWellFormedPathA
Unexecuted instantiation: uriIsWellFormedPathW
266
267
268
269
0
static void URI_FUNC(DropEmptyFirstPathSegment)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
270
0
  assert(uri != NULL);
271
0
  assert(memory != NULL);
272
0
  assert(uri->pathHead != NULL);
273
0
  assert(uri->pathHead->text.first == uri->pathHead->text.afterLast);
274
275
0
  {
276
0
    URI_TYPE(PathSegment) * const originalHead = uri->pathHead;
277
278
0
    uri->pathHead = uri->pathHead->next;
279
280
0
    originalHead->text.first = NULL;
281
0
    originalHead->text.afterLast = NULL;
282
0
    memory->free(memory, originalHead);
283
0
  }
284
0
}
Unexecuted instantiation: UriSetPath.c:uriDropEmptyFirstPathSegmentA
Unexecuted instantiation: UriSetPath.c:uriDropEmptyFirstPathSegmentW
285
286
287
288
/* URIs without a host encode a leading slash in the path as .absolutePath == URI_TRUE.
289
 * This function checks for a leading empty path segment (that would have the "visual effect"
290
 * of a leading slash during stringification) and transforms it into .absolutePath == URI_TRUE
291
 * instead, if present. */
292
0
static void URI_FUNC(TransformEmptyLeadPathSegments)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
293
0
  assert(uri != NULL);
294
0
  assert(memory != NULL);
295
296
0
  if ((URI_FUNC(HasHost)(uri) == URI_TRUE)
297
0
      || (uri->pathHead == NULL)
298
0
      || (uri->pathHead->text.first != uri->pathHead->text.afterLast)) {
299
0
    return;  /* i.e. nothing to do */
300
0
  }
301
302
0
  assert(uri->absolutePath == URI_FALSE);
303
304
0
  URI_FUNC(DropEmptyFirstPathSegment)(uri, memory);
305
306
0
  uri->absolutePath = URI_TRUE;
307
0
}
Unexecuted instantiation: UriSetPath.c:uriTransformEmptyLeadPathSegmentsA
Unexecuted instantiation: UriSetPath.c:uriTransformEmptyLeadPathSegmentsW
308
309
310
311
static int URI_FUNC(InternalSetPath)(URI_TYPE(Uri) * destUri,
312
    const URI_CHAR * first,
313
    const URI_CHAR * afterLast,
314
0
    UriMemoryManager * memory) {
315
0
  assert(destUri != NULL);
316
0
  assert(first != NULL);
317
0
  assert(afterLast != NULL);
318
0
  assert(memory != NULL);
319
0
  assert(destUri->pathHead == NULL);  /* set by SetPathMm right before */
320
0
  assert(destUri->pathTail == NULL);  /* set by SetPathMm right before */
321
0
  assert(destUri->absolutePath == URI_FALSE);  /* set by SetPathMm right before */
322
323
  /* Skip the leading slash from target URIs with a host (so that we can
324
   * transfer the path 1:1 further down) */
325
0
  if (URI_FUNC(HasHost)(destUri) == URI_TRUE) {
326
    /* NOTE: This is because SetPathMm called IsWellFormedPath earlier: */
327
0
    assert((afterLast - first >= 1) && (first[0] == _UT('/')));
328
0
    first++;
329
0
  } else if (first == afterLast) {
330
      /* This avoids (1) all the expensive but unnecessary work below
331
       * and also (2) mis-encoding as single empty path segment
332
       * that would need (detection and) repair further down otherwise */
333
0
      return URI_SUCCESS;
334
0
  }
335
336
  /* Assemble "///.." input wrap for upcoming parse as a URI */
337
0
  {
338
0
    const size_t inputLenChars = (afterLast - first);
339
0
    const size_t MAX_SIZE_T = (size_t)-1;
340
341
    /* Detect overflow */
342
0
    if (MAX_SIZE_T - inputLenChars < 3 + 1) {
343
0
      return URI_ERROR_MALLOC;
344
0
    }
345
346
0
    {
347
0
      const size_t candidateLenChars = 3 + inputLenChars;
348
349
      /* Detect overflow */
350
0
      if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) {
351
0
        return URI_ERROR_MALLOC;
352
0
      }
353
354
0
      {
355
0
        URI_CHAR * const candidate = memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR));
356
357
0
        if (candidate == NULL) {
358
0
          return URI_ERROR_MALLOC;
359
0
        }
360
361
0
        memcpy(candidate, _UT("///"), 3 * sizeof(URI_CHAR));
362
0
        memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR));
363
0
        candidate[3 + inputLenChars] = _UT('\0');
364
365
        /* Parse as an RFC 3986 URI */
366
0
        {
367
0
          URI_TYPE(Uri) tempUri;
368
0
          const int res = URI_FUNC(ParseSingleUriExMm)(&tempUri,
369
0
              candidate,
370
0
              candidate + candidateLenChars,
371
0
              NULL,
372
0
              memory);
373
0
          assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
374
0
          if (res != URI_SUCCESS) {
375
0
            memory->free(memory, candidate);
376
0
            return res;
377
0
          }
378
379
          /* Nothing but path and host is supposed to be set by the parse, in particular not: */
380
0
          assert(tempUri.query.first == NULL);
381
0
          assert(tempUri.fragment.first == NULL);
382
383
          /* Ensure that the strings in the path segments are all owned by `tempUri`
384
           * because we want to (1) rip out and keep the full path list further down
385
           * and (2) be able to free the parsed string (`candidate`) also. */
386
0
          {
387
0
            const int res = URI_FUNC(MakeOwnerMm)(&tempUri, memory);
388
0
            assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC));
389
0
            if (res != URI_SUCCESS) {
390
0
              URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
391
0
              memory->free(memory, candidate);
392
0
              return res;
393
0
            }
394
0
            assert(tempUri.owner == URI_TRUE);
395
0
          }
396
397
          /* Move path to destination URI */
398
0
          assert(tempUri.absolutePath == URI_FALSE);  /* always URI_FALSE for URIs with host */
399
0
          destUri->pathHead = tempUri.pathHead;
400
0
          destUri->pathTail = tempUri.pathTail;
401
0
          destUri->absolutePath = URI_FALSE;
402
403
0
          tempUri.pathHead = NULL;
404
0
          tempUri.pathTail = NULL;
405
406
          /* Free the rest of the temp URI */
407
0
          URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
408
0
          memory->free(memory, candidate);
409
410
          /* Restore use of .absolutePath as needed */
411
0
          URI_FUNC(TransformEmptyLeadPathSegments)(destUri, memory);
412
413
          /* Disambiguate as needed */
414
0
          {
415
0
            const UriBool success = URI_FUNC(FixPathNoScheme)(destUri, memory);
416
0
            if (success == URI_FALSE) {
417
0
              return URI_ERROR_MALLOC;
418
0
            }
419
0
          }
420
0
          {
421
0
            const UriBool success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(destUri, memory);
422
0
            if (success == URI_FALSE) {
423
0
              return URI_ERROR_MALLOC;
424
0
            }
425
0
          }
426
0
        }
427
0
      }
428
0
    }
429
0
  }
430
431
0
  return URI_SUCCESS;
432
0
}
Unexecuted instantiation: UriSetPath.c:uriInternalSetPathA
Unexecuted instantiation: UriSetPath.c:uriInternalSetPathW
433
434
435
436
int URI_FUNC(SetPathMm)(URI_TYPE(Uri) * uri,
437
    const URI_CHAR * first,
438
    const URI_CHAR * afterLast,
439
0
    UriMemoryManager * memory) {
440
  /* Input validation (before making any changes) */
441
0
  if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
442
0
    return URI_ERROR_NULL;
443
0
  }
444
445
0
  URI_CHECK_MEMORY_MANAGER(memory);  /* may return */
446
447
0
  if ((first != NULL) && (URI_FUNC(IsWellFormedPath)(first, afterLast, URI_FUNC(HasHost)(uri)) == URI_FALSE)) {
448
0
    return URI_ERROR_SYNTAX;
449
0
  }
450
451
  /* Clear old value */
452
0
  {
453
0
    const int res = URI_FUNC(FreeUriPath)(uri, memory);
454
0
    if (res != URI_SUCCESS) {
455
0
      return res;
456
0
    }
457
0
    uri->absolutePath = URI_FALSE;
458
0
  }
459
460
  /* Already done? */
461
0
  if (first == NULL) {
462
0
    return URI_SUCCESS;
463
0
  }
464
465
0
  assert(first != NULL);
466
467
  /* Ensure owned */
468
0
  if (uri->owner == URI_FALSE) {
469
0
    const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
470
0
    if (res != URI_SUCCESS) {
471
0
      return res;
472
0
    }
473
0
  }
474
475
0
  assert(uri->owner == URI_TRUE);
476
477
  /* Apply new value */
478
0
  {
479
0
    const int res = URI_FUNC(InternalSetPath)(uri, first, afterLast, memory);
480
0
    assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
481
0
    return res;
482
0
  }
483
0
}
Unexecuted instantiation: uriSetPathMmA
Unexecuted instantiation: uriSetPathMmW
484
485
486
487
int URI_FUNC(SetPath)(URI_TYPE(Uri) * uri,
488
    const URI_CHAR * first,
489
0
    const URI_CHAR * afterLast) {
490
0
  return URI_FUNC(SetPathMm)(uri, first, afterLast, NULL);
491
0
}
Unexecuted instantiation: uriSetPathA
Unexecuted instantiation: uriSetPathW
492
493
494
495
#endif