Home | History | Annotate | Line # | Download | only in dns
      1 /*	$NetBSD: compress.c,v 1.10 2025/01/26 16:25:22 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 #include <stdbool.h>
     17 #include <stdint.h>
     18 #include <string.h>
     19 
     20 #include <isc/ascii.h>
     21 #include <isc/buffer.h>
     22 #include <isc/hash.h>
     23 #include <isc/mem.h>
     24 #include <isc/util.h>
     25 
     26 #include <dns/compress.h>
     27 #include <dns/name.h>
     28 
     29 #define HASH_INIT_DJB2 5381
     30 
     31 #define CCTX_MAGIC    ISC_MAGIC('C', 'C', 'T', 'X')
     32 #define CCTX_VALID(x) ISC_MAGIC_VALID(x, CCTX_MAGIC)
     33 
     34 void
     35 dns_compress_init(dns_compress_t *cctx, isc_mem_t *mctx,
     36 		  dns_compress_flags_t flags) {
     37 	dns_compress_slot_t *set = NULL;
     38 	uint16_t mask;
     39 
     40 	REQUIRE(cctx != NULL);
     41 	REQUIRE(mctx != NULL);
     42 
     43 	if ((flags & DNS_COMPRESS_LARGE) != 0) {
     44 		size_t count = (1 << DNS_COMPRESS_LARGEBITS);
     45 		mask = count - 1;
     46 		set = isc_mem_callocate(mctx, count, sizeof(*set));
     47 	} else {
     48 		mask = ARRAY_SIZE(cctx->smallset) - 1;
     49 		set = cctx->smallset;
     50 	}
     51 
     52 	/*
     53 	 * The lifetime of this object is limited to the stack frame of the
     54 	 * caller, so we don't need to attach to the memory context.
     55 	 */
     56 	*cctx = (dns_compress_t){
     57 		.magic = CCTX_MAGIC,
     58 		.flags = flags | DNS_COMPRESS_PERMITTED,
     59 		.mctx = mctx,
     60 		.mask = mask,
     61 		.set = set,
     62 	};
     63 }
     64 
     65 void
     66 dns_compress_invalidate(dns_compress_t *cctx) {
     67 	REQUIRE(CCTX_VALID(cctx));
     68 	if (cctx->set != cctx->smallset) {
     69 		isc_mem_free(cctx->mctx, cctx->set);
     70 	}
     71 	*cctx = (dns_compress_t){ 0 };
     72 }
     73 
     74 void
     75 dns_compress_setpermitted(dns_compress_t *cctx, bool permitted) {
     76 	REQUIRE(CCTX_VALID(cctx));
     77 	if (permitted) {
     78 		cctx->flags |= DNS_COMPRESS_PERMITTED;
     79 	} else {
     80 		cctx->flags &= ~DNS_COMPRESS_PERMITTED;
     81 	}
     82 }
     83 
     84 bool
     85 dns_compress_getpermitted(dns_compress_t *cctx) {
     86 	REQUIRE(CCTX_VALID(cctx));
     87 	return (cctx->flags & DNS_COMPRESS_PERMITTED) != 0;
     88 }
     89 
     90 /*
     91  * Our hash value needs to cover the entire suffix of a name, and we need
     92  * to calculate it one label at a time. So this function mixes a label into
     93  * an existing hash. (We don't use isc_hash32() because the djb2 hash is a
     94  * lot faster, and we limit the impact of collision attacks by restricting
     95  * the size and occupancy of the hash set.) The accumulator is 32 bits to
     96  * keep more of the fun mixing that happens in the upper bits.
     97  */
     98 static uint16_t
     99 hash_label(uint16_t init, uint8_t *ptr, bool sensitive) {
    100 	unsigned int len = ptr[0] + 1;
    101 	uint32_t hash = init;
    102 
    103 	if (sensitive) {
    104 		while (len-- > 0) {
    105 			hash = hash * 33 + *ptr++;
    106 		}
    107 	} else {
    108 		/* using the autovectorize-friendly tolower() */
    109 		while (len-- > 0) {
    110 			hash = hash * 33 + isc__ascii_tolower1(*ptr++);
    111 		}
    112 	}
    113 
    114 	return isc_hash_bits32(hash, 16);
    115 }
    116 
    117 static bool
    118 match_wirename(uint8_t *a, uint8_t *b, unsigned int len, bool sensitive) {
    119 	if (sensitive) {
    120 		return memcmp(a, b, len) == 0;
    121 	} else {
    122 		/* label lengths are < 'A' so unaffected by tolower() */
    123 		return isc_ascii_lowerequal(a, b, len);
    124 	}
    125 }
    126 
    127 /*
    128  * We have found a hash set entry whose hash value matches the current
    129  * suffix of our name, which is passed to this function via `sptr` and
    130  * `slen`. We need to verify that the suffix in the message (referred to
    131  * by `new_coff`) actually matches, in case of hash collisions.
    132  *
    133  * We know that the previous suffix of this name (after the first label)
    134  * occurs in the message at `old_coff`, and all the compression offsets in
    135  * the hash set and in the message refer to the first occurrence of a
    136  * particular name or suffix.
    137  *
    138  * First, we need to match the label that was just added to our suffix,
    139  * and second, verify that it is followed by the previous suffix.
    140  *
    141  * There are a few ways to match the previous suffix:
    142  *
    143  * When the first occurrence of this suffix is also the first occurrence
    144  * of the previous suffix, `old_coff` points just after the new label.
    145  *
    146  * Otherwise, if this suffix occurs in a compressed name, it will be
    147  * followed by a compression pointer that refers to the previous suffix,
    148  * which must be equal to `old_coff`.
    149  *
    150  * The final possibility is that this suffix occurs in an uncompressed
    151  * name, so we have to compare the rest of the suffix in full.
    152  *
    153  * A special case is when this suffix is a TLD. That can be handled by
    154  * the case for uncompressed names, but it is common enough that it is
    155  * worth taking a short cut. (In the TLD case, the `old_coff` will be
    156  * zero, and the quick checks for the previous suffix will fail.)
    157  */
    158 static bool
    159 match_suffix(isc_buffer_t *buffer, unsigned int new_coff, uint8_t *sptr,
    160 	     unsigned int slen, unsigned int old_coff, bool sensitive) {
    161 	uint8_t pptr[] = { 0xC0 | (old_coff >> 8), old_coff & 0xff };
    162 	uint8_t *bptr = isc_buffer_base(buffer);
    163 	unsigned int blen = isc_buffer_usedlength(buffer);
    164 	unsigned int llen = sptr[0] + 1;
    165 
    166 	INSIST(llen <= 64 && llen < slen);
    167 
    168 	if (blen < new_coff + llen) {
    169 		return false;
    170 	}
    171 
    172 	blen -= new_coff;
    173 	bptr += new_coff;
    174 
    175 	/* does the first label of the suffix appear here? */
    176 	if (!match_wirename(bptr, sptr, llen, sensitive)) {
    177 		return false;
    178 	}
    179 
    180 	/* is this label followed by the previously matched suffix? */
    181 	if (old_coff == new_coff + llen) {
    182 		return true;
    183 	}
    184 
    185 	blen -= llen;
    186 	bptr += llen;
    187 	slen -= llen;
    188 	sptr += llen;
    189 
    190 	/* are both labels followed by the root label? */
    191 	if (blen >= 1 && slen == 1 && bptr[0] == 0 && sptr[0] == 0) {
    192 		return true;
    193 	}
    194 
    195 	/* is this label followed by a pointer to the previous match? */
    196 	if (blen >= 2 && bptr[0] == pptr[0] && bptr[1] == pptr[1]) {
    197 		return true;
    198 	}
    199 
    200 	/* is this label followed by a copy of the rest of the suffix? */
    201 	return blen >= slen && match_wirename(bptr, sptr, slen, sensitive);
    202 }
    203 
    204 /*
    205  * Robin Hood hashing aims to minimize probe distance when inserting a
    206  * new element by ensuring that the new element does not have a worse
    207  * probe distance than any other element in its probe sequence. During
    208  * insertion, if an existing element is encountered with a shorter
    209  * probe distance, it is swapped with the new element, and insertion
    210  * continues with the displaced element.
    211  */
    212 static unsigned int
    213 probe_distance(dns_compress_t *cctx, unsigned int slot) {
    214 	return (slot - cctx->set[slot].hash) & cctx->mask;
    215 }
    216 
    217 static unsigned int
    218 slot_index(dns_compress_t *cctx, unsigned int hash, unsigned int probe) {
    219 	return (hash + probe) & cctx->mask;
    220 }
    221 
    222 static bool
    223 insert_label(dns_compress_t *cctx, isc_buffer_t *buffer, const dns_name_t *name,
    224 	     unsigned int label, uint16_t hash, unsigned int probe) {
    225 	/*
    226 	 * hash set entries must have valid compression offsets
    227 	 * and the hash set must not get too full (75% load)
    228 	 */
    229 	unsigned int prefix_len = name->offsets[label];
    230 	unsigned int coff = isc_buffer_usedlength(buffer) + prefix_len;
    231 	if (coff >= 0x4000 || cctx->count > cctx->mask * 3 / 4) {
    232 		return false;
    233 	}
    234 	for (;;) {
    235 		unsigned int slot = slot_index(cctx, hash, probe);
    236 		/* we can stop when we find an empty slot */
    237 		if (cctx->set[slot].coff == 0) {
    238 			cctx->set[slot].hash = hash;
    239 			cctx->set[slot].coff = coff;
    240 			cctx->count++;
    241 			return true;
    242 		}
    243 		/* he steals from the rich and gives to the poor */
    244 		if (probe > probe_distance(cctx, slot)) {
    245 			probe = probe_distance(cctx, slot);
    246 			ISC_SWAP(cctx->set[slot].hash, hash);
    247 			ISC_SWAP(cctx->set[slot].coff, coff);
    248 		}
    249 		probe++;
    250 	}
    251 }
    252 
    253 /*
    254  * Add the unmatched prefix of the name to the hash set.
    255  */
    256 static void
    257 insert(dns_compress_t *cctx, isc_buffer_t *buffer, const dns_name_t *name,
    258        unsigned int label, uint16_t hash, unsigned int probe) {
    259 	bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0;
    260 	/*
    261 	 * this insertion loop continues from the search loop inside
    262 	 * dns_compress_name() below, iterating over the remaining labels
    263 	 * of the name and accumulating the hash in the same manner
    264 	 */
    265 	while (insert_label(cctx, buffer, name, label, hash, probe) &&
    266 	       label-- > 0)
    267 	{
    268 		unsigned int prefix_len = name->offsets[label];
    269 		uint8_t *suffix_ptr = name->ndata + prefix_len;
    270 		hash = hash_label(hash, suffix_ptr, sensitive);
    271 		probe = 0;
    272 	}
    273 }
    274 
    275 void
    276 dns_compress_name(dns_compress_t *cctx, isc_buffer_t *buffer,
    277 		  const dns_name_t *name, unsigned int *return_prefix,
    278 		  unsigned int *return_coff) {
    279 	REQUIRE(CCTX_VALID(cctx));
    280 	REQUIRE(ISC_BUFFER_VALID(buffer));
    281 	REQUIRE(dns_name_isabsolute(name));
    282 	REQUIRE(name->labels > 0);
    283 	REQUIRE(name->offsets != NULL);
    284 	REQUIRE(return_prefix != NULL);
    285 	REQUIRE(return_coff != NULL);
    286 	REQUIRE(*return_coff == 0);
    287 
    288 	if ((cctx->flags & DNS_COMPRESS_DISABLED) != 0) {
    289 		return;
    290 	}
    291 
    292 	bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0;
    293 
    294 	uint16_t hash = HASH_INIT_DJB2;
    295 	unsigned int label = name->labels - 1; /* skip the root label */
    296 
    297 	/*
    298 	 * find out how much of the name's suffix is in the hash set,
    299 	 * stepping backwards from the end one label at a time
    300 	 */
    301 	while (label-- > 0) {
    302 		unsigned int prefix_len = name->offsets[label];
    303 		unsigned int suffix_len = name->length - prefix_len;
    304 		uint8_t *suffix_ptr = name->ndata + prefix_len;
    305 		hash = hash_label(hash, suffix_ptr, sensitive);
    306 
    307 		for (unsigned int probe = 0; true; probe++) {
    308 			unsigned int slot = slot_index(cctx, hash, probe);
    309 			unsigned int coff = cctx->set[slot].coff;
    310 
    311 			/*
    312 			 * if we would have inserted this entry here (as in
    313 			 * insert_label() above), our suffix cannot be in the
    314 			 * hash set, so stop searching and switch to inserting
    315 			 * the rest of the name (its prefix) into the set
    316 			 */
    317 			if (coff == 0 || probe > probe_distance(cctx, slot)) {
    318 				insert(cctx, buffer, name, label, hash, probe);
    319 				return;
    320 			}
    321 
    322 			/*
    323 			 * this slot matches, so provisionally set the
    324 			 * return values and continue with the next label
    325 			 */
    326 			if (hash == cctx->set[slot].hash &&
    327 			    match_suffix(buffer, coff, suffix_ptr, suffix_len,
    328 					 *return_coff, sensitive))
    329 			{
    330 				*return_coff = coff;
    331 				*return_prefix = prefix_len;
    332 				break;
    333 			}
    334 		}
    335 	}
    336 }
    337 
    338 void
    339 dns_compress_rollback(dns_compress_t *cctx, unsigned int coff) {
    340 	REQUIRE(CCTX_VALID(cctx));
    341 
    342 	for (unsigned int slot = 0; slot <= cctx->mask; slot++) {
    343 		if (cctx->set[slot].coff < coff) {
    344 			continue;
    345 		}
    346 		/*
    347 		 * The next few elements might be part of the deleted element's
    348 		 * probe sequence, so we slide them down to overwrite the entry
    349 		 * we are deleting and preserve the probe sequence. Moving an
    350 		 * element to the previous slot reduces its probe distance, so
    351 		 * we stop when we find an element whose probe distance is zero.
    352 		 */
    353 		unsigned int prev = slot;
    354 		unsigned int next = slot_index(cctx, prev, 1);
    355 		while (cctx->set[next].coff != 0 &&
    356 		       probe_distance(cctx, next) != 0)
    357 		{
    358 			cctx->set[prev] = cctx->set[next];
    359 			prev = next;
    360 			next = slot_index(cctx, prev, 1);
    361 		}
    362 		cctx->set[prev].coff = 0;
    363 		cctx->set[prev].hash = 0;
    364 		cctx->count--;
    365 	}
    366 }
    367