/src/bind9/lib/dns/compress.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) Internet Systems Consortium, Inc. ("ISC") |
3 | | * |
4 | | * SPDX-License-Identifier: MPL-2.0 |
5 | | * |
6 | | * This Source Code Form is subject to the terms of the Mozilla Public |
7 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
8 | | * file, you can obtain one at https://mozilla.org/MPL/2.0/. |
9 | | * |
10 | | * See the COPYRIGHT file distributed with this work for additional |
11 | | * information regarding copyright ownership. |
12 | | */ |
13 | | |
14 | | #include <stdbool.h> |
15 | | #include <stdint.h> |
16 | | #include <string.h> |
17 | | |
18 | | #include <isc/ascii.h> |
19 | | #include <isc/buffer.h> |
20 | | #include <isc/hash.h> |
21 | | #include <isc/mem.h> |
22 | | #include <isc/util.h> |
23 | | |
24 | | #include <dns/compress.h> |
25 | | #include <dns/name.h> |
26 | | |
27 | 0 | #define HASH_INIT_DJB2 5381 |
28 | | |
29 | 0 | #define CCTX_MAGIC ISC_MAGIC('C', 'C', 'T', 'X') |
30 | | #define CCTX_VALID(x) ISC_MAGIC_VALID(x, CCTX_MAGIC) |
31 | | |
32 | | void |
33 | | dns_compress_init(dns_compress_t *cctx, isc_mem_t *mctx, |
34 | 0 | dns_compress_flags_t flags) { |
35 | 0 | dns_compress_slot_t *set = NULL; |
36 | 0 | uint16_t mask; |
37 | |
|
38 | 0 | REQUIRE(cctx != NULL); |
39 | 0 | REQUIRE(mctx != NULL); |
40 | |
|
41 | 0 | if ((flags & DNS_COMPRESS_LARGE) != 0) { |
42 | 0 | size_t count = (1 << DNS_COMPRESS_LARGEBITS); |
43 | 0 | mask = count - 1; |
44 | 0 | set = isc_mem_callocate(mctx, count, sizeof(*set)); |
45 | 0 | } else { |
46 | 0 | mask = ARRAY_SIZE(cctx->smallset) - 1; |
47 | 0 | set = cctx->smallset; |
48 | 0 | } |
49 | | |
50 | | /* |
51 | | * The lifetime of this object is limited to the stack frame of the |
52 | | * caller, so we don't need to attach to the memory context. |
53 | | */ |
54 | 0 | *cctx = (dns_compress_t){ |
55 | 0 | .magic = CCTX_MAGIC, |
56 | 0 | .flags = flags | DNS_COMPRESS_PERMITTED, |
57 | 0 | .mctx = mctx, |
58 | 0 | .mask = mask, |
59 | 0 | .set = set, |
60 | 0 | .coff = 0xffff, |
61 | 0 | }; |
62 | 0 | } |
63 | | |
64 | | void |
65 | 0 | dns_compress_invalidate(dns_compress_t *cctx) { |
66 | 0 | REQUIRE(CCTX_VALID(cctx)); |
67 | 0 | if (cctx->set != cctx->smallset) { |
68 | 0 | isc_mem_free(cctx->mctx, cctx->set); |
69 | 0 | } |
70 | 0 | *cctx = (dns_compress_t){ 0 }; |
71 | 0 | } |
72 | | |
73 | | void |
74 | 0 | dns_compress_setmultiuse(dns_compress_t *cctx, bool multi) { |
75 | 0 | REQUIRE(CCTX_VALID(cctx)); |
76 | 0 | if (multi) { |
77 | 0 | cctx->flags |= DNS_COMPRESS_MULTIUSE; |
78 | 0 | } else { |
79 | 0 | cctx->flags &= ~DNS_COMPRESS_MULTIUSE; |
80 | 0 | } |
81 | 0 | cctx->coff = 0xffff; |
82 | 0 | } |
83 | | |
84 | | bool |
85 | 0 | dns_compress_getmultiuse(dns_compress_t *cctx) { |
86 | 0 | REQUIRE(CCTX_VALID(cctx)); |
87 | 0 | return (cctx->flags & DNS_COMPRESS_MULTIUSE) != 0; |
88 | 0 | } |
89 | | |
90 | | void |
91 | 0 | dns_compress_setpermitted(dns_compress_t *cctx, bool permitted) { |
92 | 0 | REQUIRE(CCTX_VALID(cctx)); |
93 | 0 | if (permitted) { |
94 | 0 | cctx->flags |= DNS_COMPRESS_PERMITTED; |
95 | 0 | } else { |
96 | 0 | cctx->flags &= ~DNS_COMPRESS_PERMITTED; |
97 | 0 | } |
98 | 0 | dns_compress_setmultiuse(cctx, false); |
99 | 0 | } |
100 | | |
101 | | bool |
102 | 0 | dns_compress_getpermitted(dns_compress_t *cctx) { |
103 | 0 | REQUIRE(CCTX_VALID(cctx)); |
104 | 0 | return (cctx->flags & DNS_COMPRESS_PERMITTED) != 0; |
105 | 0 | } |
106 | | |
107 | | /* |
108 | | * Our hash value needs to cover the entire suffix of a name, and we need |
109 | | * to calculate it one label at a time. So this function mixes a label into |
110 | | * an existing hash. (We don't use isc_hash32() because the djb2 hash is a |
111 | | * lot faster, and we limit the impact of collision attacks by restricting |
112 | | * the size and occupancy of the hash set.) The accumulator is 32 bits to |
113 | | * keep more of the fun mixing that happens in the upper bits. |
114 | | */ |
115 | | static uint16_t |
116 | 0 | hash_label(uint16_t init, uint8_t *ptr, bool sensitive) { |
117 | 0 | unsigned int len = ptr[0] + 1; |
118 | 0 | uint32_t hash = init; |
119 | |
|
120 | 0 | if (sensitive) { |
121 | 0 | while (len-- > 0) { |
122 | 0 | hash = hash * 33 + *ptr++; |
123 | 0 | } |
124 | 0 | } else { |
125 | | /* using the autovectorize-friendly tolower() */ |
126 | 0 | while (len-- > 0) { |
127 | 0 | hash = hash * 33 + isc__ascii_tolower1(*ptr++); |
128 | 0 | } |
129 | 0 | } |
130 | |
|
131 | 0 | return isc_hash_bits32(hash, 16); |
132 | 0 | } |
133 | | |
134 | | static bool |
135 | 0 | match_wirename(uint8_t *a, uint8_t *b, unsigned int len, bool sensitive) { |
136 | 0 | if (sensitive) { |
137 | 0 | return memcmp(a, b, len) == 0; |
138 | 0 | } else { |
139 | | /* label lengths are < 'A' so unaffected by tolower() */ |
140 | 0 | return isc_ascii_lowerequal(a, b, len); |
141 | 0 | } |
142 | 0 | } |
143 | | |
144 | | /* |
145 | | * We have found a hash set entry whose hash value matches the current |
146 | | * suffix of our name, which is passed to this function via `sptr` and |
147 | | * `slen`. We need to verify that the suffix in the message (referred to |
148 | | * by `new_coff`) actually matches, in case of hash collisions. |
149 | | * |
150 | | * We know that the previous suffix of this name (after the first label) |
151 | | * occurs in the message at `old_coff`, and all the compression offsets in |
152 | | * the hash set and in the message refer to the first occurrence of a |
153 | | * particular name or suffix. |
154 | | * |
155 | | * First, we need to match the label that was just added to our suffix, |
156 | | * and second, verify that it is followed by the previous suffix. |
157 | | * |
158 | | * There are a few ways to match the previous suffix: |
159 | | * |
160 | | * When the first occurrence of this suffix is also the first occurrence |
161 | | * of the previous suffix, `old_coff` points just after the new label. |
162 | | * |
163 | | * Otherwise, if this suffix occurs in a compressed name, it will be |
164 | | * followed by a compression pointer that refers to the previous suffix, |
165 | | * which must be equal to `old_coff`. |
166 | | * |
167 | | * The final possibility is that this suffix occurs in an uncompressed |
168 | | * name, so we have to compare the rest of the suffix in full. |
169 | | * |
170 | | * A special case is when this suffix is a TLD. That can be handled by |
171 | | * the case for uncompressed names, but it is common enough that it is |
172 | | * worth taking a short cut. (In the TLD case, the `old_coff` will be |
173 | | * zero, and the quick checks for the previous suffix will fail.) |
174 | | */ |
175 | | static bool |
176 | | match_suffix(isc_buffer_t *buffer, unsigned int new_coff, uint8_t *sptr, |
177 | 0 | unsigned int slen, unsigned int old_coff, bool sensitive) { |
178 | 0 | uint8_t pptr[] = { 0xC0 | (old_coff >> 8), old_coff & 0xff }; |
179 | 0 | uint8_t *bptr = isc_buffer_base(buffer); |
180 | 0 | unsigned int blen = isc_buffer_usedlength(buffer); |
181 | 0 | unsigned int llen = sptr[0] + 1; |
182 | |
|
183 | 0 | INSIST(llen <= 64 && llen < slen); |
184 | |
|
185 | 0 | if (blen < new_coff + llen) { |
186 | 0 | return false; |
187 | 0 | } |
188 | | |
189 | 0 | blen -= new_coff; |
190 | 0 | bptr += new_coff; |
191 | | |
192 | | /* does the first label of the suffix appear here? */ |
193 | 0 | if (!match_wirename(bptr, sptr, llen, sensitive)) { |
194 | 0 | return false; |
195 | 0 | } |
196 | | |
197 | | /* is this label followed by the previously matched suffix? */ |
198 | 0 | if (old_coff == new_coff + llen) { |
199 | 0 | return true; |
200 | 0 | } |
201 | | |
202 | 0 | blen -= llen; |
203 | 0 | bptr += llen; |
204 | 0 | slen -= llen; |
205 | 0 | sptr += llen; |
206 | | |
207 | | /* are both labels followed by the root label? */ |
208 | 0 | if (blen >= 1 && slen == 1 && bptr[0] == 0 && sptr[0] == 0) { |
209 | 0 | return true; |
210 | 0 | } |
211 | | |
212 | | /* is this label followed by a pointer to the previous match? */ |
213 | 0 | if (blen >= 2 && bptr[0] == pptr[0] && bptr[1] == pptr[1]) { |
214 | 0 | return true; |
215 | 0 | } |
216 | | |
217 | | /* is this label followed by a copy of the rest of the suffix? */ |
218 | 0 | return blen >= slen && match_wirename(bptr, sptr, slen, sensitive); |
219 | 0 | } |
220 | | |
221 | | /* |
222 | | * Robin Hood hashing aims to minimize probe distance when inserting a |
223 | | * new element by ensuring that the new element does not have a worse |
224 | | * probe distance than any other element in its probe sequence. During |
225 | | * insertion, if an existing element is encountered with a shorter |
226 | | * probe distance, it is swapped with the new element, and insertion |
227 | | * continues with the displaced element. |
228 | | */ |
229 | | static unsigned int |
230 | 0 | probe_distance(dns_compress_t *cctx, unsigned int slot) { |
231 | 0 | return (slot - cctx->set[slot].hash) & cctx->mask; |
232 | 0 | } |
233 | | |
234 | | static unsigned int |
235 | 0 | slot_index(dns_compress_t *cctx, unsigned int hash, unsigned int probe) { |
236 | 0 | return (hash + probe) & cctx->mask; |
237 | 0 | } |
238 | | |
239 | | static bool |
240 | | insert_label(dns_compress_t *cctx, isc_buffer_t *buffer, |
241 | | const dns_offsets_t offsets, unsigned int label, uint16_t hash, |
242 | 0 | unsigned int probe) { |
243 | | /* |
244 | | * hash set entries must have valid compression offsets |
245 | | * and the hash set must not get too full (75% load) |
246 | | */ |
247 | 0 | unsigned int prefix_len = offsets[label]; |
248 | 0 | unsigned int coff = isc_buffer_usedlength(buffer) + prefix_len; |
249 | 0 | if (coff >= 0x4000 || cctx->count > cctx->mask * 3 / 4) { |
250 | 0 | return false; |
251 | 0 | } |
252 | 0 | for (;;) { |
253 | 0 | unsigned int slot = slot_index(cctx, hash, probe); |
254 | | /* we can stop when we find an empty slot */ |
255 | 0 | if (cctx->set[slot].coff == 0) { |
256 | 0 | cctx->set[slot].hash = hash; |
257 | 0 | cctx->set[slot].coff = coff; |
258 | 0 | cctx->count++; |
259 | 0 | return true; |
260 | 0 | } |
261 | | /* he steals from the rich and gives to the poor */ |
262 | 0 | if (probe > probe_distance(cctx, slot)) { |
263 | 0 | probe = probe_distance(cctx, slot); |
264 | 0 | ISC_SWAP(cctx->set[slot].hash, hash); |
265 | 0 | ISC_SWAP(cctx->set[slot].coff, coff); |
266 | 0 | } |
267 | 0 | probe++; |
268 | 0 | } |
269 | 0 | } |
270 | | |
271 | | /* |
272 | | * Add the unmatched prefix of the name to the hash set. |
273 | | */ |
274 | | static void |
275 | | insert(dns_compress_t *cctx, isc_buffer_t *buffer, const dns_name_t *name, |
276 | | const dns_offsets_t offsets, unsigned int label, uint16_t hash, |
277 | 0 | unsigned int probe) { |
278 | 0 | bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0; |
279 | | /* |
280 | | * this insertion loop continues from the search loop inside |
281 | | * dns_compress_name() below, iterating over the remaining labels |
282 | | * of the name and accumulating the hash in the same manner |
283 | | */ |
284 | 0 | while (insert_label(cctx, buffer, offsets, label, hash, probe) && |
285 | 0 | label-- > 0) |
286 | 0 | { |
287 | 0 | unsigned int prefix_len = offsets[label]; |
288 | 0 | uint8_t *suffix_ptr = name->ndata + prefix_len; |
289 | 0 | hash = hash_label(hash, suffix_ptr, sensitive); |
290 | 0 | probe = 0; |
291 | 0 | } |
292 | 0 | } |
293 | | |
294 | | void |
295 | | dns_compress_name(dns_compress_t *cctx, isc_buffer_t *buffer, |
296 | | const dns_name_t *name, unsigned int *return_prefix, |
297 | 0 | unsigned int *return_coff) { |
298 | 0 | REQUIRE(CCTX_VALID(cctx)); |
299 | 0 | REQUIRE(ISC_BUFFER_VALID(buffer)); |
300 | 0 | REQUIRE(dns_name_isabsolute(name)); |
301 | 0 | REQUIRE(return_prefix != NULL); |
302 | 0 | REQUIRE(return_coff != NULL); |
303 | 0 | REQUIRE(*return_coff == 0); |
304 | |
|
305 | 0 | if ((cctx->flags & DNS_COMPRESS_DISABLED) != 0) { |
306 | 0 | return; |
307 | 0 | } |
308 | | |
309 | 0 | dns_offsets_t offsets; |
310 | 0 | size_t labels = dns_name_offsets(name, offsets); |
311 | 0 | INSIST(labels > 0); |
312 | |
|
313 | 0 | bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0; |
314 | |
|
315 | 0 | uint16_t hash = HASH_INIT_DJB2; |
316 | 0 | size_t label = labels - 1; /* skip the root label */ |
317 | | |
318 | | /* |
319 | | * find out how much of the name's suffix is in the hash set, |
320 | | * stepping backwards from the end one label at a time |
321 | | */ |
322 | 0 | while (label-- > 0) { |
323 | 0 | unsigned int prefix_len = offsets[label]; |
324 | 0 | unsigned int suffix_len = name->length - prefix_len; |
325 | 0 | uint8_t *suffix_ptr = name->ndata + prefix_len; |
326 | 0 | hash = hash_label(hash, suffix_ptr, sensitive); |
327 | |
|
328 | 0 | for (unsigned int probe = 0; true; probe++) { |
329 | 0 | unsigned int slot = slot_index(cctx, hash, probe); |
330 | 0 | unsigned int coff = cctx->set[slot].coff; |
331 | | |
332 | | /* |
333 | | * if we would have inserted this entry here (as in |
334 | | * insert_label() above), our suffix cannot be in the |
335 | | * hash set, so stop searching and switch to inserting |
336 | | * the rest of the name (its prefix) into the set |
337 | | */ |
338 | 0 | if (coff == 0 || probe > probe_distance(cctx, slot)) { |
339 | 0 | insert(cctx, buffer, name, offsets, label, hash, |
340 | 0 | probe); |
341 | 0 | return; |
342 | 0 | } |
343 | | |
344 | | /* |
345 | | * this slot matches, so provisionally set the |
346 | | * return values and continue with the next label |
347 | | */ |
348 | 0 | if (hash == cctx->set[slot].hash && |
349 | 0 | match_suffix(buffer, coff, suffix_ptr, suffix_len, |
350 | 0 | *return_coff, sensitive)) |
351 | 0 | { |
352 | 0 | *return_coff = coff; |
353 | 0 | *return_prefix = prefix_len; |
354 | 0 | break; |
355 | 0 | } |
356 | 0 | } |
357 | 0 | } |
358 | 0 | } |
359 | | |
360 | | void |
361 | 0 | dns_compress_rollback(dns_compress_t *cctx, unsigned int coff) { |
362 | 0 | REQUIRE(CCTX_VALID(cctx)); |
363 | |
|
364 | 0 | for (unsigned int slot = 0; slot <= cctx->mask; slot++) { |
365 | 0 | if (cctx->set[slot].coff < coff) { |
366 | 0 | continue; |
367 | 0 | } |
368 | | /* |
369 | | * The next few elements might be part of the deleted element's |
370 | | * probe sequence, so we slide them down to overwrite the entry |
371 | | * we are deleting and preserve the probe sequence. Moving an |
372 | | * element to the previous slot reduces its probe distance, so |
373 | | * we stop when we find an element whose probe distance is zero. |
374 | | */ |
375 | 0 | unsigned int prev = slot; |
376 | 0 | unsigned int next = slot_index(cctx, prev, 1); |
377 | 0 | while (cctx->set[next].coff != 0 && |
378 | 0 | probe_distance(cctx, next) != 0) |
379 | 0 | { |
380 | 0 | cctx->set[prev] = cctx->set[next]; |
381 | 0 | prev = next; |
382 | 0 | next = slot_index(cctx, prev, 1); |
383 | 0 | } |
384 | 0 | cctx->set[prev].coff = 0; |
385 | 0 | cctx->set[prev].hash = 0; |
386 | 0 | cctx->count--; |
387 | 0 | } |
388 | 0 | } |