/src/php-src/ext/uri/uriparser/src/UriEscape.c

Source
/*
 * uriparser - RFC 3986 URI parsing library
 *
 * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
 * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
 * All rights reserved.
 *
 * Redistribution and use in source  and binary forms, with or without
 * modification, are permitted provided  that the following conditions
 * are met:
 *
 *     1. Redistributions  of  source  code   must  retain  the  above
 *        copyright notice, this list  of conditions and the following
 *        disclaimer.
 *
 *     2. Redistributions  in binary  form  must  reproduce the  above
 *        copyright notice, this list  of conditions and the following
 *        disclaimer  in  the  documentation  and/or  other  materials
 *        provided with the distribution.
 *
 *     3. Neither the  name of the  copyright holder nor the  names of
 *        its contributors may be used  to endorse or promote products
 *        derived from  this software  without specific  prior written
 *        permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND  ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING, BUT NOT
 * LIMITED TO,  THE IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS
 * FOR  A  PARTICULAR  PURPOSE  ARE  DISCLAIMED.  IN  NO  EVENT  SHALL
 * THE  COPYRIGHT HOLDER  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA,  OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT  LIABILITY,  OR  TORT (INCLUDING  NEGLIGENCE  OR  OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
#  ifdef URI_ENABLE_ANSI
#    define URI_PASS_ANSI 1
#    include "UriEscape.c"
#    undef URI_PASS_ANSI
#  endif
#  ifdef URI_ENABLE_UNICODE
#    define URI_PASS_UNICODE 1
#    include "UriEscape.c"
#    undef URI_PASS_UNICODE
#  endif
#else
#  ifdef URI_PASS_ANSI
#    include <uriparser/UriDefsAnsi.h>
#  else
#    include <uriparser/UriDefsUnicode.h>
#    include <wchar.h>
#  endif

#  ifndef URI_DOXYGEN
#    include <uriparser/Uri.h>
#    include "UriCommon.h"
#    include "UriSets.h"
#  endif

URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, UriBool spaceToPlus,
                            UriBool normalizeBreaks) {
    return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks);
}

URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, const URI_CHAR * inAfterLast,
                              URI_CHAR * out, UriBool spaceToPlus,
                              UriBool normalizeBreaks) {
    const URI_CHAR * read = inFirst;
    URI_CHAR * write = out;
    UriBool prevWasCr = URI_FALSE;
    if ((out == NULL) || (inFirst == out)) {
        return NULL;
    } else if (inFirst == NULL) {
        if (out != NULL) {
            out[0] = _UT('\0');
        }
        return out;
    }

    for (;;) {
        if ((inAfterLast != NULL) && (read >= inAfterLast)) {
            write[0] = _UT('\0');
            return write;
        }

        switch (read[0]) {
        case _UT('\0'):
            write[0] = _UT('\0');
            return write;

        case _UT(' '):
            if (spaceToPlus) {
                write[0] = _UT('+');
                write++;
            } else {
                write[0] = _UT('%');
                write[1] = _UT('2');
                write[2] = _UT('0');
                write += 3;
            }
            prevWasCr = URI_FALSE;
            break;

        case URI_SET_UNRESERVED(_UT):
            /* Copy unmodified */
            write[0] = read[0];
            write++;

            prevWasCr = URI_FALSE;
            break;

        case _UT('\x0a'):
            if (normalizeBreaks) {
                if (!prevWasCr) {
                    write[0] = _UT('%');
                    write[1] = _UT('0');
                    write[2] = _UT('D');
                    write[3] = _UT('%');
                    write[4] = _UT('0');
                    write[5] = _UT('A');
                    write += 6;
                }
            } else {
                write[0] = _UT('%');
                write[1] = _UT('0');
                write[2] = _UT('A');
                write += 3;
            }
            prevWasCr = URI_FALSE;
            break;

        case _UT('\x0d'):
            if (normalizeBreaks) {
                write[0] = _UT('%');
                write[1] = _UT('0');
                write[2] = _UT('D');
                write[3] = _UT('%');
                write[4] = _UT('0');
                write[5] = _UT('A');
                write += 6;
            } else {
                write[0] = _UT('%');
                write[1] = _UT('0');
                write[2] = _UT('D');
                write += 3;
            }
            prevWasCr = URI_TRUE;
            break;

        default:
            /* Percent encode */
            {
                const unsigned char code = (unsigned char)read[0];
                /* Uppercase recommended in (last sentence of) section 2.1   *
                 * of RFC 3986:                                              *
                 * https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */
                write[0] = _UT('%');
                write[1] = URI_FUNC(HexToLetterEx)(code >> 4, URI_TRUE);
                write[2] = URI_FUNC(HexToLetterEx)(code & 0x0f, URI_TRUE);
                write += 3;
            }
            prevWasCr = URI_FALSE;
            break;
        }

        read++;
    }
}

const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) {
    return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH);
}

const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, UriBool plusToSpace,
                                             UriBreakConversion breakConversion) {
    URI_CHAR * read = inout;
    URI_CHAR * write = inout;
    UriBool prevWasCr = URI_FALSE;

    if (inout == NULL) {
        return NULL;
    }

    for (;;) {
        switch (read[0]) {
        case _UT('\0'):
            if (read > write) {
                write[0] = _UT('\0');
            }
            return write;

        case _UT('%'):
            switch (read[1]) {
            case URI_SET_HEXDIG(_UT):
                switch (read[2]) {
                case URI_SET_HEXDIG(_UT): {
                    /* Percent group found */
                    const unsigned char left = URI_FUNC(HexdigToInt)(read[1]);
                    const unsigned char right = URI_FUNC(HexdigToInt)(read[2]);
                    const int code = 16 * left + right;
                    switch (code) {
                    case 10:
                        switch (breakConversion) {
                        case URI_BR_TO_LF:
                            if (!prevWasCr) {
                                write[0] = (URI_CHAR)10;
                                write++;
                            }
                            break;

                        case URI_BR_TO_CRLF:
                            if (!prevWasCr) {
                                write[0] = (URI_CHAR)13;
                                write[1] = (URI_CHAR)10;
                                write += 2;
                            }
                            break;

                        case URI_BR_TO_CR:
                            if (!prevWasCr) {
                                write[0] = (URI_CHAR)13;
                                write++;
                            }
                            break;

                        case URI_BR_DONT_TOUCH:
                        default:
                            write[0] = (URI_CHAR)10;
                            write++;
                        }
                        prevWasCr = URI_FALSE;
                        break;

                    case 13:
                        switch (breakConversion) {
                        case URI_BR_TO_LF:
                            write[0] = (URI_CHAR)10;
                            write++;
                            break;

                        case URI_BR_TO_CRLF:
                            write[0] = (URI_CHAR)13;
                            write[1] = (URI_CHAR)10;
                            write += 2;
                            break;

                        case URI_BR_TO_CR:
                            write[0] = (URI_CHAR)13;
                            write++;
                            break;

                        case URI_BR_DONT_TOUCH:
                        default:
                            write[0] = (URI_CHAR)13;
                            write++;
                        }
                        prevWasCr = URI_TRUE;
                        break;

                    default:
                        write[0] = (URI_CHAR)(code);
                        write++;

                        prevWasCr = URI_FALSE;
                    }
                    read += 3;
                } break;

                default:
                    /* Copy two chars unmodified and */
                    /* look at this char again */
                    if (read > write) {
                        write[0] = read[0];
                        write[1] = read[1];
                    }
                    read += 2;
                    write += 2;

                    prevWasCr = URI_FALSE;
                }
                break;

            default:
                /* Copy one char unmodified and */
                /* look at this char again */
                if (read > write) {
                    write[0] = read[0];
                }
                read++;
                write++;

                prevWasCr = URI_FALSE;
            }
            break;

        case _UT('+'):
            if (plusToSpace) {
                /* Convert '+' to ' ' */
                write[0] = _UT(' ');
            } else {
                /* Copy one char unmodified */
                if (read > write) {
                    write[0] = read[0];
                }
            }
            read++;
            write++;

            prevWasCr = URI_FALSE;
            break;

        default:
            /* Copy one char unmodified */
            if (read > write) {
                write[0] = read[0];
            }
            read++;
            write++;

            prevWasCr = URI_FALSE;
        }
    }
}

#endif

Coverage Report

Created: 2026-01-18 06:47

Line	Count	Source
1		/*
2		* uriparser - RFC 3986 URI parsing library
3		*
4		* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
5		* Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
6		* All rights reserved.
7		*
8		* Redistribution and use in source and binary forms, with or without
9		* modification, are permitted provided that the following conditions
10		* are met:
11		*
12		* 1. Redistributions of source code must retain the above
13		* copyright notice, this list of conditions and the following
14		* disclaimer.
15		*
16		* 2. Redistributions in binary form must reproduce the above
17		* copyright notice, this list of conditions and the following
18		* disclaimer in the documentation and/or other materials
19		* provided with the distribution.
20		*
21		* 3. Neither the name of the copyright holder nor the names of
22		* its contributors may be used to endorse or promote products
23		* derived from this software without specific prior written
24		* permission.
25		*
26		* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27		* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28		* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29		* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
30		* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
31		* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32		* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33		* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34		* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35		* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36		* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37		* OF THE POSSIBILITY OF SUCH DAMAGE.
38		*/
39
40		/* What encodings are enabled? */
41		#include <uriparser/UriDefsConfig.h>
42		#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
43		/* Include SELF twice */
44		# ifdef URI_ENABLE_ANSI
45		# define URI_PASS_ANSI 1
46		# include "UriEscape.c"
47		# undef URI_PASS_ANSI
48		# endif
49		# ifdef URI_ENABLE_UNICODE
50		# define URI_PASS_UNICODE 1
51		# include "UriEscape.c"
52		# undef URI_PASS_UNICODE
53		# endif
54		#else
55		# ifdef URI_PASS_ANSI
56		# include <uriparser/UriDefsAnsi.h>
57		# else
58		# include <uriparser/UriDefsUnicode.h>
59		# include <wchar.h>
60		# endif
61
62		# ifndef URI_DOXYGEN
63		# include <uriparser/Uri.h>
64		# include "UriCommon.h"
65		# include "UriSets.h"
66		# endif
67
68		URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, UriBool spaceToPlus,
69	0	UriBool normalizeBreaks) {
70	0	return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks);
71	0	} Unexecuted instantiation: uriEscapeA Unexecuted instantiation: uriEscapeW
72
73		URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, const URI_CHAR * inAfterLast,
74		URI_CHAR * out, UriBool spaceToPlus,
75	0	UriBool normalizeBreaks) {
76	0	const URI_CHAR * read = inFirst;
77	0	URI_CHAR * write = out;
78	0	UriBool prevWasCr = URI_FALSE;
79	0	if ((out == NULL) \|\| (inFirst == out)) {
80	0	return NULL;
81	0	} else if (inFirst == NULL) {
82	0	if (out != NULL) {
83	0	out[0] = _UT('\0');
84	0	}
85	0	return out;
86	0	}
87
88	0	for (;;) {
89	0	if ((inAfterLast != NULL) && (read >= inAfterLast)) {
90	0	write[0] = _UT('\0');
91	0	return write;
92	0	}
93
94	0	switch (read[0]) {
95	0	case _UT('\0'):
96	0	write[0] = _UT('\0');
97	0	return write;
98
99	0	case _UT(' '):
100	0	if (spaceToPlus) {
101	0	write[0] = _UT('+');
102	0	write++;
103	0	} else {
104	0	write[0] = _UT('%');
105	0	write[1] = _UT('2');
106	0	write[2] = _UT('0');
107	0	write += 3;
108	0	}
109	0	prevWasCr = URI_FALSE;
110	0	break;
111
112	0	case URI_SET_UNRESERVED(_UT):
113		/* Copy unmodified */
114	0	write[0] = read[0];
115	0	write++;
116
117	0	prevWasCr = URI_FALSE;
118	0	break;
119
120	0	case _UT('\x0a'):
121	0	if (normalizeBreaks) {
122	0	if (!prevWasCr) {
123	0	write[0] = _UT('%');
124	0	write[1] = _UT('0');
125	0	write[2] = _UT('D');
126	0	write[3] = _UT('%');
127	0	write[4] = _UT('0');
128	0	write[5] = _UT('A');
129	0	write += 6;
130	0	}
131	0	} else {
132	0	write[0] = _UT('%');
133	0	write[1] = _UT('0');
134	0	write[2] = _UT('A');
135	0	write += 3;
136	0	}
137	0	prevWasCr = URI_FALSE;
138	0	break;
139
140	0	case _UT('\x0d'):
141	0	if (normalizeBreaks) {
142	0	write[0] = _UT('%');
143	0	write[1] = _UT('0');
144	0	write[2] = _UT('D');
145	0	write[3] = _UT('%');
146	0	write[4] = _UT('0');
147	0	write[5] = _UT('A');
148	0	write += 6;
149	0	} else {
150	0	write[0] = _UT('%');
151	0	write[1] = _UT('0');
152	0	write[2] = _UT('D');
153	0	write += 3;
154	0	}
155	0	prevWasCr = URI_TRUE;
156	0	break;
157
158	0	default:
159		/* Percent encode */
160	0	{
161	0	const unsigned char code = (unsigned char)read[0];
162		/* Uppercase recommended in (last sentence of) section 2.1 *
163		* of RFC 3986: *
164		* https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */
165	0	write[0] = _UT('%');
166	0	write[1] = URI_FUNC(HexToLetterEx)(code >> 4, URI_TRUE);
167	0	write[2] = URI_FUNC(HexToLetterEx)(code & 0x0f, URI_TRUE);
168	0	write += 3;
169	0	}
170	0	prevWasCr = URI_FALSE;
171	0	break;
172	0	}
173
174	0	read++;
175	0	}
176	0	} Unexecuted instantiation: uriEscapeExA Unexecuted instantiation: uriEscapeExW
177
178	0	const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) {
179	0	return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH);
180	0	} Unexecuted instantiation: uriUnescapeInPlaceA Unexecuted instantiation: uriUnescapeInPlaceW
181
182		const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, UriBool plusToSpace,
183	0	UriBreakConversion breakConversion) {
184	0	URI_CHAR * read = inout;
185	0	URI_CHAR * write = inout;
186	0	UriBool prevWasCr = URI_FALSE;
187
188	0	if (inout == NULL) {
189	0	return NULL;
190	0	}
191
192	0	for (;;) {
193	0	switch (read[0]) {
194	0	case _UT('\0'):
195	0	if (read > write) {
196	0	write[0] = _UT('\0');
197	0	}
198	0	return write;
199
200	0	case _UT('%'):
201	0	switch (read[1]) {
202	0	case URI_SET_HEXDIG(_UT):
203	0	switch (read[2]) {
204	0	case URI_SET_HEXDIG(_UT): {
205		/* Percent group found */
206	0	const unsigned char left = URI_FUNC(HexdigToInt)(read[1]);
207	0	const unsigned char right = URI_FUNC(HexdigToInt)(read[2]);
208	0	const int code = 16 * left + right;
209	0	switch (code) {
210	0	case 10:
211	0	switch (breakConversion) {
212	0	case URI_BR_TO_LF:
213	0	if (!prevWasCr) {
214	0	write[0] = (URI_CHAR)10;
215	0	write++;
216	0	}
217	0	break;
218
219	0	case URI_BR_TO_CRLF:
220	0	if (!prevWasCr) {
221	0	write[0] = (URI_CHAR)13;
222	0	write[1] = (URI_CHAR)10;
223	0	write += 2;
224	0	}
225	0	break;
226
227	0	case URI_BR_TO_CR:
228	0	if (!prevWasCr) {
229	0	write[0] = (URI_CHAR)13;
230	0	write++;
231	0	}
232	0	break;
233
234	0	case URI_BR_DONT_TOUCH:
235	0	default:
236	0	write[0] = (URI_CHAR)10;
237	0	write++;
238	0	}
239	0	prevWasCr = URI_FALSE;
240	0	break;
241
242	0	case 13:
243	0	switch (breakConversion) {
244	0	case URI_BR_TO_LF:
245	0	write[0] = (URI_CHAR)10;
246	0	write++;
247	0	break;
248
249	0	case URI_BR_TO_CRLF:
250	0	write[0] = (URI_CHAR)13;
251	0	write[1] = (URI_CHAR)10;
252	0	write += 2;
253	0	break;
254
255	0	case URI_BR_TO_CR:
256	0	write[0] = (URI_CHAR)13;
257	0	write++;
258	0	break;
259
260	0	case URI_BR_DONT_TOUCH:
261	0	default:
262	0	write[0] = (URI_CHAR)13;
263	0	write++;
264	0	}
265	0	prevWasCr = URI_TRUE;
266	0	break;
267
268	0	default:
269	0	write[0] = (URI_CHAR)(code);
270	0	write++;
271
272	0	prevWasCr = URI_FALSE;
273	0	}
274	0	read += 3;
275	0	} break;
276
277	0	default:
278		/* Copy two chars unmodified and */
279		/* look at this char again */
280	0	if (read > write) {
281	0	write[0] = read[0];
282	0	write[1] = read[1];
283	0	}
284	0	read += 2;
285	0	write += 2;
286
287	0	prevWasCr = URI_FALSE;
288	0	}
289	0	break;
290
291	0	default:
292		/* Copy one char unmodified and */
293		/* look at this char again */
294	0	if (read > write) {
295	0	write[0] = read[0];
296	0	}
297	0	read++;
298	0	write++;
299
300	0	prevWasCr = URI_FALSE;
301	0	}
302	0	break;
303
304	0	case _UT('+'):
305	0	if (plusToSpace) {
306		/* Convert '+' to ' ' */
307	0	write[0] = _UT(' ');
308	0	} else {
309		/* Copy one char unmodified */
310	0	if (read > write) {
311	0	write[0] = read[0];
312	0	}
313	0	}
314	0	read++;
315	0	write++;
316
317	0	prevWasCr = URI_FALSE;
318	0	break;
319
320	0	default:
321		/* Copy one char unmodified */
322	0	if (read > write) {
323	0	write[0] = read[0];
324	0	}
325	0	read++;
326	0	write++;
327
328	0	prevWasCr = URI_FALSE;
329	0	}
330	0	}
331	0	} Unexecuted instantiation: uriUnescapeInPlaceExA Unexecuted instantiation: uriUnescapeInPlaceExW
332
333		#endif