/src/c-ares/src/lib/ares_expand_name.c
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | /* Copyright 1998, 2011 by the Massachusetts Institute of Technology. |
3 | | * |
4 | | * Permission to use, copy, modify, and distribute this |
5 | | * software and its documentation for any purpose and without |
6 | | * fee is hereby granted, provided that the above copyright |
7 | | * notice appear in all copies and that both that copyright |
8 | | * notice and this permission notice appear in supporting |
9 | | * documentation, and that the name of M.I.T. not be used in |
10 | | * advertising or publicity pertaining to distribution of the |
11 | | * software without specific, written prior permission. |
12 | | * M.I.T. makes no representations about the suitability of |
13 | | * this software for any purpose. It is provided "as is" |
14 | | * without express or implied warranty. |
15 | | * |
16 | | * SPDX-License-Identifier: MIT |
17 | | */ |
18 | | |
19 | | #include "ares_setup.h" |
20 | | |
21 | | #ifdef HAVE_NETINET_IN_H |
22 | | # include <netinet/in.h> |
23 | | #endif |
24 | | |
25 | | #include "ares_nameser.h" |
26 | | |
27 | | #include "ares.h" |
28 | | #include "ares_nowarn.h" |
29 | | #include "ares_private.h" /* for the memdebug */ |
30 | | |
31 | | /* Maximum number of indirections allowed for a name */ |
32 | 38.0k | #define MAX_INDIRS 50 |
33 | | |
34 | | static int name_length(const unsigned char *encoded, const unsigned char *abuf, |
35 | | int alen, int is_hostname); |
36 | | |
37 | | /* Reserved characters for names that need to be escaped */ |
38 | | static int is_reservedch(int ch) |
39 | 7.42M | { |
40 | 7.42M | switch (ch) { |
41 | 11.9k | case '"': |
42 | 7.03M | case '.': |
43 | 7.06M | case ';': |
44 | 7.07M | case '\\': |
45 | 7.08M | case '(': |
46 | 7.09M | case ')': |
47 | 7.09M | case '@': |
48 | 7.11M | case '$': |
49 | 7.11M | return 1; |
50 | 313k | default: |
51 | 313k | break; |
52 | 7.42M | } |
53 | | |
54 | 313k | return 0; |
55 | 7.42M | } |
56 | | |
57 | | static int ares__isprint(int ch) |
58 | 473M | { |
59 | 473M | if (ch >= 0x20 && ch <= 0x7E) |
60 | 7.41M | return 1; |
61 | 465M | return 0; |
62 | 473M | } |
63 | | |
64 | | /* Character set allowed by hostnames. This is to include the normal |
65 | | * domain name character set plus: |
66 | | * - underscores which are used in SRV records. |
67 | | * - Forward slashes such as are used for classless in-addr.arpa |
68 | | * delegation (CNAMEs) |
69 | | * - Asterisks may be used for wildcard domains in CNAMEs as seen in the |
70 | | * real world. |
71 | | * While RFC 2181 section 11 does state not to do validation, |
72 | | * that applies to servers, not clients. Vulnerabilities have been |
73 | | * reported when this validation is not performed. Security is more |
74 | | * important than edge-case compatibility (which is probably invalid |
75 | | * anyhow). */ |
76 | | static int is_hostnamech(int ch) |
77 | 4.51k | { |
78 | | /* [A-Za-z0-9-*._/] |
79 | | * Don't use isalnum() as it is locale-specific |
80 | | */ |
81 | 4.51k | if (ch >= 'A' && ch <= 'Z') |
82 | 408 | return 1; |
83 | 4.11k | if (ch >= 'a' && ch <= 'z') |
84 | 776 | return 1; |
85 | 3.33k | if (ch >= '0' && ch <= '9') |
86 | 657 | return 1; |
87 | 2.67k | if (ch == '-' || ch == '.' || ch == '_' || ch == '/' || ch == '*') |
88 | 2.63k | return 1; |
89 | | |
90 | 39 | return 0; |
91 | 2.67k | } |
92 | | |
93 | | /* Expand an RFC1035-encoded domain name given by encoded. The |
94 | | * containing message is given by abuf and alen. The result given by |
95 | | * *s, which is set to a NUL-terminated allocated buffer. *enclen is |
96 | | * set to the length of the encoded name (not the length of the |
97 | | * expanded name; the goal is to tell the caller how many bytes to |
98 | | * move forward to get past the encoded name). |
99 | | * |
100 | | * In the simple case, an encoded name is a series of labels, each |
101 | | * composed of a one-byte length (limited to values between 0 and 63 |
102 | | * inclusive) followed by the label contents. The name is terminated |
103 | | * by a zero-length label. |
104 | | * |
105 | | * In the more complicated case, a label may be terminated by an |
106 | | * indirection pointer, specified by two bytes with the high bits of |
107 | | * the first byte (corresponding to INDIR_MASK) set to 11. With the |
108 | | * two high bits of the first byte stripped off, the indirection |
109 | | * pointer gives an offset from the beginning of the containing |
110 | | * message with more labels to decode. Indirection can happen an |
111 | | * arbitrary number of times, so we have to detect loops. |
112 | | * |
113 | | * Since the expanded name uses '.' as a label separator, we use |
114 | | * backslashes to escape periods or backslashes in the expanded name. |
115 | | * |
116 | | * If the result is expected to be a hostname, then no escaped data is allowed |
117 | | * and will return error. |
118 | | */ |
119 | | |
120 | | int ares__expand_name_validated(const unsigned char *encoded, |
121 | | const unsigned char *abuf, |
122 | | int alen, char **s, long *enclen, |
123 | | int is_hostname) |
124 | 2.92M | { |
125 | 2.92M | int len, indir = 0; |
126 | 2.92M | char *q; |
127 | 2.92M | const unsigned char *p; |
128 | 2.92M | union { |
129 | 2.92M | ares_ssize_t sig; |
130 | 2.92M | size_t uns; |
131 | 2.92M | } nlen; |
132 | | |
133 | 2.92M | nlen.sig = name_length(encoded, abuf, alen, is_hostname); |
134 | 2.92M | if (nlen.sig < 0) |
135 | 10.6k | return ARES_EBADNAME; |
136 | | |
137 | 2.91M | *s = ares_malloc(nlen.uns + 1); |
138 | 2.91M | if (!*s) |
139 | 0 | return ARES_ENOMEM; |
140 | 2.91M | q = *s; |
141 | | |
142 | 2.91M | if (nlen.uns == 0) { |
143 | | /* RFC2181 says this should be ".": the root of the DNS tree. |
144 | | * Since this function strips trailing dots though, it becomes "" |
145 | | */ |
146 | 2.88M | q[0] = '\0'; |
147 | | |
148 | | /* indirect root label (like 0xc0 0x0c) is 2 bytes long (stupid, but |
149 | | valid) */ |
150 | 2.88M | if ((*encoded & INDIR_MASK) == INDIR_MASK) |
151 | 443 | *enclen = 2L; |
152 | 2.88M | else |
153 | 2.88M | *enclen = 1L; /* the caller should move one byte to get past this */ |
154 | | |
155 | 2.88M | return ARES_SUCCESS; |
156 | 2.88M | } |
157 | | |
158 | | /* No error-checking necessary; it was all done by name_length(). */ |
159 | 36.9k | p = encoded; |
160 | 12.9M | while (*p) |
161 | 12.9M | { |
162 | 12.9M | if ((*p & INDIR_MASK) == INDIR_MASK) |
163 | 19.1k | { |
164 | 19.1k | if (!indir) |
165 | 11.5k | { |
166 | 11.5k | *enclen = aresx_uztosl(p + 2U - encoded); |
167 | 11.5k | indir = 1; |
168 | 11.5k | } |
169 | 19.1k | p = abuf + ((*p & ~INDIR_MASK) << 8 | *(p + 1)); |
170 | 19.1k | } |
171 | 12.8M | else |
172 | 12.8M | { |
173 | 12.8M | int name_len = *p; |
174 | 12.8M | len = name_len; |
175 | 12.8M | p++; |
176 | | |
177 | 238M | while (len--) |
178 | 225M | { |
179 | | /* Output as \DDD for consistency with RFC1035 5.1, except |
180 | | * for the special case of a root name response */ |
181 | 225M | if (!ares__isprint(*p) && !(name_len == 1 && *p == 0)) |
182 | 224M | { |
183 | 224M | *q++ = '\\'; |
184 | 224M | *q++ = (char)('0' + *p / 100); |
185 | 224M | *q++ = (char)('0' + (*p % 100) / 10); |
186 | 224M | *q++ = (char)('0' + (*p % 10)); |
187 | 224M | } |
188 | 1.24M | else if (is_reservedch(*p)) |
189 | 1.11M | { |
190 | 1.11M | *q++ = '\\'; |
191 | 1.11M | *q++ = *p; |
192 | 1.11M | } |
193 | 132k | else |
194 | 132k | { |
195 | 132k | *q++ = *p; |
196 | 132k | } |
197 | 225M | p++; |
198 | 225M | } |
199 | 12.8M | *q++ = '.'; |
200 | 12.8M | } |
201 | 12.9M | } |
202 | | |
203 | 36.9k | if (!indir) |
204 | 25.4k | *enclen = aresx_uztosl(p + 1U - encoded); |
205 | | |
206 | | /* Nuke the trailing period if we wrote one. */ |
207 | 36.9k | if (q > *s) |
208 | 36.9k | *(q - 1) = 0; |
209 | 0 | else |
210 | 0 | *q = 0; /* zero terminate; LCOV_EXCL_LINE: empty names exit above */ |
211 | | |
212 | 36.9k | return ARES_SUCCESS; |
213 | 2.91M | } |
214 | | |
215 | | |
216 | | int ares_expand_name(const unsigned char *encoded, const unsigned char *abuf, |
217 | | int alen, char **s, long *enclen) |
218 | 1.53M | { |
219 | 1.53M | return ares__expand_name_validated(encoded, abuf, alen, s, enclen, 0); |
220 | 1.53M | } |
221 | | |
222 | | /* Return the length of the expansion of an encoded domain name, or |
223 | | * -1 if the encoding is invalid. |
224 | | */ |
225 | | static int name_length(const unsigned char *encoded, const unsigned char *abuf, |
226 | | int alen, int is_hostname) |
227 | 2.92M | { |
228 | 2.92M | int n = 0, offset, indir = 0, top; |
229 | | |
230 | | /* Allow the caller to pass us abuf + alen and have us check for it. */ |
231 | 2.92M | if (encoded >= abuf + alen) |
232 | 9.04k | return -1; |
233 | | |
234 | 16.7M | while (*encoded) |
235 | 13.7M | { |
236 | 13.7M | top = (*encoded & INDIR_MASK); |
237 | 13.7M | if (top == INDIR_MASK) |
238 | 38.8k | { |
239 | | /* Check the offset and go there. */ |
240 | 38.8k | if (encoded + 1 >= abuf + alen) |
241 | 62 | return -1; |
242 | 38.7k | offset = (*encoded & ~INDIR_MASK) << 8 | *(encoded + 1); |
243 | 38.7k | if (offset >= alen) |
244 | 244 | return -1; |
245 | 38.5k | encoded = abuf + offset; |
246 | | |
247 | | /* If we've seen more indirects than the message length, |
248 | | * then there's a loop. |
249 | | */ |
250 | 38.5k | ++indir; |
251 | 38.5k | if (indir > alen || indir > MAX_INDIRS) |
252 | 634 | return -1; |
253 | 38.5k | } |
254 | 13.7M | else if (top == 0x00) |
255 | 13.7M | { |
256 | 13.7M | int name_len = *encoded; |
257 | 13.7M | offset = name_len; |
258 | 13.7M | if (encoded + offset + 1 >= abuf + alen) |
259 | 404 | return -1; |
260 | 13.7M | encoded++; |
261 | | |
262 | 261M | while (offset--) |
263 | 247M | { |
264 | 247M | if (!ares__isprint(*encoded) && !(name_len == 1 && *encoded == 0)) |
265 | 241M | { |
266 | 241M | if (is_hostname) |
267 | 44 | return -1; |
268 | 241M | n += 4; |
269 | 241M | } |
270 | 6.17M | else if (is_reservedch(*encoded)) |
271 | 5.99M | { |
272 | 5.99M | if (is_hostname) |
273 | 11 | return -1; |
274 | 5.99M | n += 2; |
275 | 5.99M | } |
276 | 181k | else |
277 | 181k | { |
278 | 181k | if (is_hostname && !is_hostnamech(*encoded)) |
279 | 39 | return -1; |
280 | 181k | n += 1; |
281 | 181k | } |
282 | 247M | encoded++; |
283 | 247M | } |
284 | | |
285 | 13.7M | n++; |
286 | 13.7M | } |
287 | 137 | else |
288 | 137 | { |
289 | | /* RFC 1035 4.1.4 says other options (01, 10) for top 2 |
290 | | * bits are reserved. |
291 | | */ |
292 | 137 | return -1; |
293 | 137 | } |
294 | 13.7M | } |
295 | | |
296 | | /* If there were any labels at all, then the number of dots is one |
297 | | * less than the number of labels, so subtract one. |
298 | | */ |
299 | 2.91M | return (n) ? n - 1 : n; |
300 | 2.92M | } |
301 | | |
302 | | /* Like ares_expand_name_validated but returns EBADRESP in case of invalid |
303 | | * input. */ |
304 | | int ares__expand_name_for_response(const unsigned char *encoded, |
305 | | const unsigned char *abuf, int alen, |
306 | | char **s, long *enclen, int is_hostname) |
307 | 1.39M | { |
308 | 1.39M | int status = ares__expand_name_validated(encoded, abuf, alen, s, enclen, |
309 | 1.39M | is_hostname); |
310 | 1.39M | if (status == ARES_EBADNAME) |
311 | 4.92k | status = ARES_EBADRESP; |
312 | 1.39M | return status; |
313 | 1.39M | } |