/src/samba/lib/util/rfc1738.c
Line | Count | Source |
1 | | /* |
2 | | * Functions for RFC 3986 percent-encoding. |
3 | | * |
4 | | * NOTE: |
5 | | * |
6 | | * This file was originally imported from the Squid project but has been |
7 | | * significantly altered. The licence below is reproduced intact, but refers |
8 | | * to files in Squid's repository, not in Samba. See COPYING for the GPLv3 |
9 | | * notice (being the later version mentioned below). |
10 | | */ |
11 | | |
12 | | /* |
13 | | * $Id$ |
14 | | * |
15 | | * DEBUG: |
16 | | * AUTHOR: Harvest Derived |
17 | | * |
18 | | * SQUID Web Proxy Cache http://www.squid-cache.org/ |
19 | | * ---------------------------------------------------------- |
20 | | * |
21 | | * Squid is the result of efforts by numerous individuals from |
22 | | * the Internet community; see the CONTRIBUTORS file for full |
23 | | * details. Many organizations have provided support for Squid's |
24 | | * development; see the SPONSORS file for full details. Squid is |
25 | | * Copyrighted (C) 2001 by the Regents of the University of |
26 | | * California; see the COPYRIGHT file for full details. Squid |
27 | | * incorporates software developed and/or copyrighted by other |
28 | | * sources; see the CREDITS file for full details. |
29 | | * |
30 | | * This program is free software; you can redistribute it and/or modify |
31 | | * it under the terms of the GNU General Public License as published by |
32 | | * the Free Software Foundation; either version 2 of the License, or |
33 | | * (at your option) any later version. |
34 | | * |
35 | | * This program is distributed in the hope that it will be useful, |
36 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
37 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
38 | | * GNU General Public License for more details. |
39 | | * |
40 | | * You should have received a copy of the GNU General Public License |
41 | | * along with this program; if not, write to the Free Software |
42 | | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. |
43 | | * |
44 | | */ |
45 | | |
46 | | #include "replace.h" |
47 | | #include <talloc.h> |
48 | | #include "lib/util/samba_util.h" |
49 | | |
50 | 0 | #define RFC1738_ENCODE 1 |
51 | 0 | #define RFC1738_RESERVED 2 |
52 | | |
53 | | /* |
54 | | * According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as |
55 | | * that has been obsolete since 2004, we sm instead for RFC 3986, where: |
56 | | * |
57 | | * reserved = : / ? # [ ] @ ! $ & ' ( ) * + , ; = |
58 | | * unreserved = ALPHA DIGIT - . _ ~ |
59 | | * |
60 | | * and whatever is not in either of those are what RFC 1738 called "unsafe", |
61 | | * meaning that they should are canonically but not mandatorily escaped. |
62 | | * |
63 | | * Characters below 0x20 or above 0x7E are always encoded. |
64 | | */ |
65 | | |
66 | | static const unsigned char escapees[127] = { |
67 | | [' '] = RFC1738_ENCODE, |
68 | | ['"'] = RFC1738_ENCODE, |
69 | | ['%'] = RFC1738_ENCODE, |
70 | | ['<'] = RFC1738_ENCODE, |
71 | | ['>'] = RFC1738_ENCODE, |
72 | | ['\\'] = RFC1738_ENCODE, |
73 | | ['^'] = RFC1738_ENCODE, |
74 | | ['`'] = RFC1738_ENCODE, |
75 | | ['{'] = RFC1738_ENCODE, |
76 | | ['|'] = RFC1738_ENCODE, |
77 | | ['}'] = RFC1738_ENCODE, |
78 | | /* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */ |
79 | | [':'] = RFC1738_RESERVED, |
80 | | ['/'] = RFC1738_RESERVED, |
81 | | ['?'] = RFC1738_RESERVED, |
82 | | ['#'] = RFC1738_RESERVED, |
83 | | ['['] = RFC1738_RESERVED, |
84 | | [']'] = RFC1738_RESERVED, |
85 | | ['@'] = RFC1738_RESERVED, |
86 | | ['!'] = RFC1738_RESERVED, |
87 | | ['$'] = RFC1738_RESERVED, |
88 | | ['&'] = RFC1738_RESERVED, |
89 | | ['\''] = RFC1738_RESERVED, |
90 | | ['('] = RFC1738_RESERVED, |
91 | | [')'] = RFC1738_RESERVED, |
92 | | ['*'] = RFC1738_RESERVED, |
93 | | ['+'] = RFC1738_RESERVED, |
94 | | [','] = RFC1738_RESERVED, |
95 | | [';'] = RFC1738_RESERVED, |
96 | | ['='] = RFC1738_RESERVED, |
97 | | }; |
98 | | |
99 | | /* |
100 | | * rfc1738_do_escape - fills a preallocated buffer with an escaped version of |
101 | | * the given string. |
102 | | * |
103 | | * For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED. |
104 | | * For mandatory escaping, mask should be RFC1738_RESERVED. |
105 | | */ |
106 | | static char * |
107 | | rfc1738_do_escape(char *buf, size_t bufsize, |
108 | | const char *url, size_t len, unsigned char mask) |
109 | 0 | { |
110 | 0 | size_t i; |
111 | 0 | size_t j = 0; |
112 | 0 | for (i = 0; i < len; i++) { |
113 | 0 | unsigned int c = (unsigned char) url[i]; |
114 | 0 | if (c > 126 || c < 32 || (escapees[c] & mask)) { |
115 | 0 | if (j + 3 >= bufsize) { |
116 | 0 | return NULL; |
117 | 0 | } |
118 | 0 | (void) snprintf(&buf[j], 4, "%%%02X", c); |
119 | 0 | j += 3; |
120 | 0 | } else { |
121 | 0 | if (j + 1 >= bufsize) { |
122 | 0 | return NULL; |
123 | 0 | } |
124 | 0 | buf[j] = c; |
125 | 0 | j++; |
126 | 0 | } |
127 | 0 | } |
128 | 0 | buf[j] = '\0'; |
129 | 0 | return buf; |
130 | 0 | } |
131 | | |
132 | | /* |
133 | | * rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986 |
134 | | * compliant, escaped version of the given url segment. |
135 | | */ |
136 | | char * |
137 | | rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url) |
138 | 0 | { |
139 | 0 | size_t bufsize = 0; |
140 | 0 | char *buf = NULL; |
141 | |
|
142 | 0 | size_t len = strlen(url); |
143 | 0 | if (len >= SIZE_MAX / 3) { |
144 | 0 | return NULL; |
145 | 0 | } |
146 | | |
147 | 0 | bufsize = len * 3 + 1; |
148 | 0 | buf = talloc_array(mem_ctx, char, bufsize); |
149 | 0 | if (buf == NULL) { |
150 | 0 | return NULL; |
151 | 0 | } |
152 | | |
153 | 0 | talloc_set_name_const(buf, buf); |
154 | |
|
155 | 0 | return rfc1738_do_escape(buf, bufsize, url, len, |
156 | 0 | RFC1738_ENCODE | RFC1738_RESERVED); |
157 | 0 | } |
158 | | |
159 | | /* |
160 | | * rfc1738_unescape() - Converts url-escaped characters in the string. |
161 | | * |
162 | | * The two characters following a '%' in a string should be hex digits that |
163 | | * describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII; |
164 | | * this is the only way to include a % in the unescaped string. Any character |
165 | | * can be escaped, including plain letters (e.g. "%61" for "a"). Anything |
166 | | * other than 2 hex characters following the % is an error. |
167 | | * |
168 | | * The conversion is done in-place, which is always safe as unescapes can only |
169 | | * shorten the string. |
170 | | * |
171 | | * Returns a pointer to the end of the string (that is, the '\0' byte), or |
172 | | * NULL on error, at which point s is in an undefined state. |
173 | | * |
174 | | * Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal |
175 | | * `e - s` if s originally contained "%00". You might want to check for this. |
176 | | */ |
177 | | |
178 | | _PUBLIC_ char *rfc1738_unescape(char *s) |
179 | 0 | { |
180 | 0 | size_t i, j; /* i is write, j is read */ |
181 | 0 | for (i = 0, j = 0; s[j] != '\0'; i++, j++) { |
182 | 0 | if (s[j] == '%') { |
183 | 0 | uint8_t v; |
184 | 0 | bool ok; |
185 | |
|
186 | 0 | ok = hex_byte(&s[j+1], &v); |
187 | 0 | if (!ok) { |
188 | 0 | return NULL; |
189 | 0 | } |
190 | 0 | j += 2; /* OK; hex_byte() has checked ahead */ |
191 | 0 | s[i] = (unsigned char)v; |
192 | 0 | } else { |
193 | 0 | s[i] = s[j]; |
194 | 0 | } |
195 | 0 | } |
196 | 0 | s[i] = '\0'; |
197 | 0 | return s + i; |
198 | 0 | } |