Home | History | Annotate | Line # | Download | only in dns
      1  1.10  christos /*	$NetBSD: compress.c,v 1.10 2025/01/26 16:25:22 christos Exp $	*/
      2   1.1  christos 
      3   1.1  christos /*
      4   1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5   1.1  christos  *
      6   1.7  christos  * SPDX-License-Identifier: MPL-2.0
      7   1.7  christos  *
      8   1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      9   1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10   1.6  christos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11   1.1  christos  *
     12   1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     13   1.1  christos  * information regarding copyright ownership.
     14   1.1  christos  */
     15   1.1  christos 
     16   1.3  christos #include <stdbool.h>
     17  1.10  christos #include <stdint.h>
     18  1.10  christos #include <string.h>
     19   1.1  christos 
     20  1.10  christos #include <isc/ascii.h>
     21  1.10  christos #include <isc/buffer.h>
     22  1.10  christos #include <isc/hash.h>
     23   1.1  christos #include <isc/mem.h>
     24   1.1  christos #include <isc/util.h>
     25   1.1  christos 
     26   1.1  christos #include <dns/compress.h>
     27  1.10  christos #include <dns/name.h>
     28  1.10  christos 
     29  1.10  christos #define HASH_INIT_DJB2 5381
     30   1.1  christos 
     31   1.4  christos #define CCTX_MAGIC    ISC_MAGIC('C', 'C', 'T', 'X')
     32  1.10  christos #define CCTX_VALID(x) ISC_MAGIC_VALID(x, CCTX_MAGIC)
     33   1.1  christos 
     34  1.10  christos void
     35  1.10  christos dns_compress_init(dns_compress_t *cctx, isc_mem_t *mctx,
     36  1.10  christos 		  dns_compress_flags_t flags) {
     37  1.10  christos 	dns_compress_slot_t *set = NULL;
     38  1.10  christos 	uint16_t mask;
     39   1.1  christos 
     40   1.1  christos 	REQUIRE(cctx != NULL);
     41  1.10  christos 	REQUIRE(mctx != NULL);
     42   1.1  christos 
     43  1.10  christos 	if ((flags & DNS_COMPRESS_LARGE) != 0) {
     44  1.10  christos 		size_t count = (1 << DNS_COMPRESS_LARGEBITS);
     45  1.10  christos 		mask = count - 1;
     46  1.10  christos 		set = isc_mem_callocate(mctx, count, sizeof(*set));
     47  1.10  christos 	} else {
     48  1.10  christos 		mask = ARRAY_SIZE(cctx->smallset) - 1;
     49  1.10  christos 		set = cctx->smallset;
     50  1.10  christos 	}
     51   1.1  christos 
     52  1.10  christos 	/*
     53  1.10  christos 	 * The lifetime of this object is limited to the stack frame of the
     54  1.10  christos 	 * caller, so we don't need to attach to the memory context.
     55  1.10  christos 	 */
     56  1.10  christos 	*cctx = (dns_compress_t){
     57  1.10  christos 		.magic = CCTX_MAGIC,
     58  1.10  christos 		.flags = flags | DNS_COMPRESS_PERMITTED,
     59  1.10  christos 		.mctx = mctx,
     60  1.10  christos 		.mask = mask,
     61  1.10  christos 		.set = set,
     62  1.10  christos 	};
     63   1.1  christos }
     64   1.1  christos 
     65   1.1  christos void
     66   1.1  christos dns_compress_invalidate(dns_compress_t *cctx) {
     67  1.10  christos 	REQUIRE(CCTX_VALID(cctx));
     68  1.10  christos 	if (cctx->set != cctx->smallset) {
     69  1.10  christos 		isc_mem_free(cctx->mctx, cctx->set);
     70   1.1  christos 	}
     71  1.10  christos 	*cctx = (dns_compress_t){ 0 };
     72   1.1  christos }
     73   1.1  christos 
     74   1.1  christos void
     75  1.10  christos dns_compress_setpermitted(dns_compress_t *cctx, bool permitted) {
     76  1.10  christos 	REQUIRE(CCTX_VALID(cctx));
     77  1.10  christos 	if (permitted) {
     78  1.10  christos 		cctx->flags |= DNS_COMPRESS_PERMITTED;
     79  1.10  christos 	} else {
     80  1.10  christos 		cctx->flags &= ~DNS_COMPRESS_PERMITTED;
     81  1.10  christos 	}
     82   1.1  christos }
     83   1.1  christos 
     84  1.10  christos bool
     85  1.10  christos dns_compress_getpermitted(dns_compress_t *cctx) {
     86  1.10  christos 	REQUIRE(CCTX_VALID(cctx));
     87  1.10  christos 	return (cctx->flags & DNS_COMPRESS_PERMITTED) != 0;
     88   1.1  christos }
     89   1.1  christos 
     90  1.10  christos /*
     91  1.10  christos  * Our hash value needs to cover the entire suffix of a name, and we need
     92  1.10  christos  * to calculate it one label at a time. So this function mixes a label into
     93  1.10  christos  * an existing hash. (We don't use isc_hash32() because the djb2 hash is a
     94  1.10  christos  * lot faster, and we limit the impact of collision attacks by restricting
     95  1.10  christos  * the size and occupancy of the hash set.) The accumulator is 32 bits to
     96  1.10  christos  * keep more of the fun mixing that happens in the upper bits.
     97  1.10  christos  */
     98  1.10  christos static uint16_t
     99  1.10  christos hash_label(uint16_t init, uint8_t *ptr, bool sensitive) {
    100  1.10  christos 	unsigned int len = ptr[0] + 1;
    101  1.10  christos 	uint32_t hash = init;
    102   1.1  christos 
    103   1.4  christos 	if (sensitive) {
    104  1.10  christos 		while (len-- > 0) {
    105  1.10  christos 			hash = hash * 33 + *ptr++;
    106  1.10  christos 		}
    107   1.4  christos 	} else {
    108  1.10  christos 		/* using the autovectorize-friendly tolower() */
    109  1.10  christos 		while (len-- > 0) {
    110  1.10  christos 			hash = hash * 33 + isc__ascii_tolower1(*ptr++);
    111  1.10  christos 		}
    112   1.4  christos 	}
    113   1.1  christos 
    114  1.10  christos 	return isc_hash_bits32(hash, 16);
    115   1.1  christos }
    116   1.1  christos 
    117  1.10  christos static bool
    118  1.10  christos match_wirename(uint8_t *a, uint8_t *b, unsigned int len, bool sensitive) {
    119  1.10  christos 	if (sensitive) {
    120  1.10  christos 		return memcmp(a, b, len) == 0;
    121  1.10  christos 	} else {
    122  1.10  christos 		/* label lengths are < 'A' so unaffected by tolower() */
    123  1.10  christos 		return isc_ascii_lowerequal(a, b, len);
    124  1.10  christos 	}
    125   1.1  christos }
    126   1.1  christos 
    127   1.1  christos /*
    128  1.10  christos  * We have found a hash set entry whose hash value matches the current
    129  1.10  christos  * suffix of our name, which is passed to this function via `sptr` and
    130  1.10  christos  * `slen`. We need to verify that the suffix in the message (referred to
    131  1.10  christos  * by `new_coff`) actually matches, in case of hash collisions.
    132  1.10  christos  *
    133  1.10  christos  * We know that the previous suffix of this name (after the first label)
    134  1.10  christos  * occurs in the message at `old_coff`, and all the compression offsets in
    135  1.10  christos  * the hash set and in the message refer to the first occurrence of a
    136  1.10  christos  * particular name or suffix.
    137  1.10  christos  *
    138  1.10  christos  * First, we need to match the label that was just added to our suffix,
    139  1.10  christos  * and second, verify that it is followed by the previous suffix.
    140  1.10  christos  *
    141  1.10  christos  * There are a few ways to match the previous suffix:
    142  1.10  christos  *
    143  1.10  christos  * When the first occurrence of this suffix is also the first occurrence
    144  1.10  christos  * of the previous suffix, `old_coff` points just after the new label.
    145  1.10  christos  *
    146  1.10  christos  * Otherwise, if this suffix occurs in a compressed name, it will be
    147  1.10  christos  * followed by a compression pointer that refers to the previous suffix,
    148  1.10  christos  * which must be equal to `old_coff`.
    149  1.10  christos  *
    150  1.10  christos  * The final possibility is that this suffix occurs in an uncompressed
    151  1.10  christos  * name, so we have to compare the rest of the suffix in full.
    152  1.10  christos  *
    153  1.10  christos  * A special case is when this suffix is a TLD. That can be handled by
    154  1.10  christos  * the case for uncompressed names, but it is common enough that it is
    155  1.10  christos  * worth taking a short cut. (In the TLD case, the `old_coff` will be
    156  1.10  christos  * zero, and the quick checks for the previous suffix will fail.)
    157   1.1  christos  */
    158  1.10  christos static bool
    159  1.10  christos match_suffix(isc_buffer_t *buffer, unsigned int new_coff, uint8_t *sptr,
    160  1.10  christos 	     unsigned int slen, unsigned int old_coff, bool sensitive) {
    161  1.10  christos 	uint8_t pptr[] = { 0xC0 | (old_coff >> 8), old_coff & 0xff };
    162  1.10  christos 	uint8_t *bptr = isc_buffer_base(buffer);
    163  1.10  christos 	unsigned int blen = isc_buffer_usedlength(buffer);
    164  1.10  christos 	unsigned int llen = sptr[0] + 1;
    165  1.10  christos 
    166  1.10  christos 	INSIST(llen <= 64 && llen < slen);
    167  1.10  christos 
    168  1.10  christos 	if (blen < new_coff + llen) {
    169  1.10  christos 		return false;
    170  1.10  christos 	}
    171   1.1  christos 
    172  1.10  christos 	blen -= new_coff;
    173  1.10  christos 	bptr += new_coff;
    174   1.1  christos 
    175  1.10  christos 	/* does the first label of the suffix appear here? */
    176  1.10  christos 	if (!match_wirename(bptr, sptr, llen, sensitive)) {
    177  1.10  christos 		return false;
    178   1.4  christos 	}
    179   1.1  christos 
    180  1.10  christos 	/* is this label followed by the previously matched suffix? */
    181  1.10  christos 	if (old_coff == new_coff + llen) {
    182  1.10  christos 		return true;
    183   1.4  christos 	}
    184   1.1  christos 
    185  1.10  christos 	blen -= llen;
    186  1.10  christos 	bptr += llen;
    187  1.10  christos 	slen -= llen;
    188  1.10  christos 	sptr += llen;
    189   1.1  christos 
    190  1.10  christos 	/* are both labels followed by the root label? */
    191  1.10  christos 	if (blen >= 1 && slen == 1 && bptr[0] == 0 && sptr[0] == 0) {
    192  1.10  christos 		return true;
    193  1.10  christos 	}
    194   1.1  christos 
    195  1.10  christos 	/* is this label followed by a pointer to the previous match? */
    196  1.10  christos 	if (blen >= 2 && bptr[0] == pptr[0] && bptr[1] == pptr[1]) {
    197  1.10  christos 		return true;
    198  1.10  christos 	}
    199   1.1  christos 
    200  1.10  christos 	/* is this label followed by a copy of the rest of the suffix? */
    201  1.10  christos 	return blen >= slen && match_wirename(bptr, sptr, slen, sensitive);
    202  1.10  christos }
    203   1.1  christos 
    204  1.10  christos /*
    205  1.10  christos  * Robin Hood hashing aims to minimize probe distance when inserting a
    206  1.10  christos  * new element by ensuring that the new element does not have a worse
    207  1.10  christos  * probe distance than any other element in its probe sequence. During
    208  1.10  christos  * insertion, if an existing element is encountered with a shorter
    209  1.10  christos  * probe distance, it is swapped with the new element, and insertion
    210  1.10  christos  * continues with the displaced element.
    211  1.10  christos  */
    212  1.10  christos static unsigned int
    213  1.10  christos probe_distance(dns_compress_t *cctx, unsigned int slot) {
    214  1.10  christos 	return (slot - cctx->set[slot].hash) & cctx->mask;
    215  1.10  christos }
    216   1.1  christos 
    217  1.10  christos static unsigned int
    218  1.10  christos slot_index(dns_compress_t *cctx, unsigned int hash, unsigned int probe) {
    219  1.10  christos 	return (hash + probe) & cctx->mask;
    220  1.10  christos }
    221  1.10  christos 
    222  1.10  christos static bool
    223  1.10  christos insert_label(dns_compress_t *cctx, isc_buffer_t *buffer, const dns_name_t *name,
    224  1.10  christos 	     unsigned int label, uint16_t hash, unsigned int probe) {
    225  1.10  christos 	/*
    226  1.10  christos 	 * hash set entries must have valid compression offsets
    227  1.10  christos 	 * and the hash set must not get too full (75% load)
    228  1.10  christos 	 */
    229  1.10  christos 	unsigned int prefix_len = name->offsets[label];
    230  1.10  christos 	unsigned int coff = isc_buffer_usedlength(buffer) + prefix_len;
    231  1.10  christos 	if (coff >= 0x4000 || cctx->count > cctx->mask * 3 / 4) {
    232  1.10  christos 		return false;
    233  1.10  christos 	}
    234  1.10  christos 	for (;;) {
    235  1.10  christos 		unsigned int slot = slot_index(cctx, hash, probe);
    236  1.10  christos 		/* we can stop when we find an empty slot */
    237  1.10  christos 		if (cctx->set[slot].coff == 0) {
    238  1.10  christos 			cctx->set[slot].hash = hash;
    239  1.10  christos 			cctx->set[slot].coff = coff;
    240  1.10  christos 			cctx->count++;
    241  1.10  christos 			return true;
    242   1.1  christos 		}
    243  1.10  christos 		/* he steals from the rich and gives to the poor */
    244  1.10  christos 		if (probe > probe_distance(cctx, slot)) {
    245  1.10  christos 			probe = probe_distance(cctx, slot);
    246  1.10  christos 			ISC_SWAP(cctx->set[slot].hash, hash);
    247  1.10  christos 			ISC_SWAP(cctx->set[slot].coff, coff);
    248   1.4  christos 		}
    249  1.10  christos 		probe++;
    250   1.1  christos 	}
    251  1.10  christos }
    252   1.1  christos 
    253  1.10  christos /*
    254  1.10  christos  * Add the unmatched prefix of the name to the hash set.
    255  1.10  christos  */
    256  1.10  christos static void
    257  1.10  christos insert(dns_compress_t *cctx, isc_buffer_t *buffer, const dns_name_t *name,
    258  1.10  christos        unsigned int label, uint16_t hash, unsigned int probe) {
    259  1.10  christos 	bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0;
    260   1.1  christos 	/*
    261  1.10  christos 	 * this insertion loop continues from the search loop inside
    262  1.10  christos 	 * dns_compress_name() below, iterating over the remaining labels
    263  1.10  christos 	 * of the name and accumulating the hash in the same manner
    264   1.1  christos 	 */
    265  1.10  christos 	while (insert_label(cctx, buffer, name, label, hash, probe) &&
    266  1.10  christos 	       label-- > 0)
    267  1.10  christos 	{
    268  1.10  christos 		unsigned int prefix_len = name->offsets[label];
    269  1.10  christos 		uint8_t *suffix_ptr = name->ndata + prefix_len;
    270  1.10  christos 		hash = hash_label(hash, suffix_ptr, sensitive);
    271  1.10  christos 		probe = 0;
    272   1.4  christos 	}
    273   1.1  christos }
    274   1.1  christos 
    275   1.1  christos void
    276  1.10  christos dns_compress_name(dns_compress_t *cctx, isc_buffer_t *buffer,
    277  1.10  christos 		  const dns_name_t *name, unsigned int *return_prefix,
    278  1.10  christos 		  unsigned int *return_coff) {
    279  1.10  christos 	REQUIRE(CCTX_VALID(cctx));
    280  1.10  christos 	REQUIRE(ISC_BUFFER_VALID(buffer));
    281   1.1  christos 	REQUIRE(dns_name_isabsolute(name));
    282  1.10  christos 	REQUIRE(name->labels > 0);
    283  1.10  christos 	REQUIRE(name->offsets != NULL);
    284  1.10  christos 	REQUIRE(return_prefix != NULL);
    285  1.10  christos 	REQUIRE(return_coff != NULL);
    286  1.10  christos 	REQUIRE(*return_coff == 0);
    287   1.1  christos 
    288  1.10  christos 	if ((cctx->flags & DNS_COMPRESS_DISABLED) != 0) {
    289   1.1  christos 		return;
    290   1.4  christos 	}
    291   1.1  christos 
    292  1.10  christos 	bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0;
    293  1.10  christos 
    294  1.10  christos 	uint16_t hash = HASH_INIT_DJB2;
    295  1.10  christos 	unsigned int label = name->labels - 1; /* skip the root label */
    296   1.1  christos 
    297   1.1  christos 	/*
    298  1.10  christos 	 * find out how much of the name's suffix is in the hash set,
    299  1.10  christos 	 * stepping backwards from the end one label at a time
    300   1.1  christos 	 */
    301  1.10  christos 	while (label-- > 0) {
    302  1.10  christos 		unsigned int prefix_len = name->offsets[label];
    303  1.10  christos 		unsigned int suffix_len = name->length - prefix_len;
    304  1.10  christos 		uint8_t *suffix_ptr = name->ndata + prefix_len;
    305  1.10  christos 		hash = hash_label(hash, suffix_ptr, sensitive);
    306  1.10  christos 
    307  1.10  christos 		for (unsigned int probe = 0; true; probe++) {
    308  1.10  christos 			unsigned int slot = slot_index(cctx, hash, probe);
    309  1.10  christos 			unsigned int coff = cctx->set[slot].coff;
    310  1.10  christos 
    311  1.10  christos 			/*
    312  1.10  christos 			 * if we would have inserted this entry here (as in
    313  1.10  christos 			 * insert_label() above), our suffix cannot be in the
    314  1.10  christos 			 * hash set, so stop searching and switch to inserting
    315  1.10  christos 			 * the rest of the name (its prefix) into the set
    316  1.10  christos 			 */
    317  1.10  christos 			if (coff == 0 || probe > probe_distance(cctx, slot)) {
    318  1.10  christos 				insert(cctx, buffer, name, label, hash, probe);
    319  1.10  christos 				return;
    320  1.10  christos 			}
    321   1.1  christos 
    322  1.10  christos 			/*
    323  1.10  christos 			 * this slot matches, so provisionally set the
    324  1.10  christos 			 * return values and continue with the next label
    325  1.10  christos 			 */
    326  1.10  christos 			if (hash == cctx->set[slot].hash &&
    327  1.10  christos 			    match_suffix(buffer, coff, suffix_ptr, suffix_len,
    328  1.10  christos 					 *return_coff, sensitive))
    329  1.10  christos 			{
    330  1.10  christos 				*return_coff = coff;
    331  1.10  christos 				*return_prefix = prefix_len;
    332  1.10  christos 				break;
    333  1.10  christos 			}
    334   1.4  christos 		}
    335   1.4  christos 	}
    336   1.1  christos }
    337   1.1  christos 
    338   1.1  christos void
    339  1.10  christos dns_compress_rollback(dns_compress_t *cctx, unsigned int coff) {
    340  1.10  christos 	REQUIRE(CCTX_VALID(cctx));
    341   1.1  christos 
    342  1.10  christos 	for (unsigned int slot = 0; slot <= cctx->mask; slot++) {
    343  1.10  christos 		if (cctx->set[slot].coff < coff) {
    344  1.10  christos 			continue;
    345  1.10  christos 		}
    346   1.1  christos 		/*
    347  1.10  christos 		 * The next few elements might be part of the deleted element's
    348  1.10  christos 		 * probe sequence, so we slide them down to overwrite the entry
    349  1.10  christos 		 * we are deleting and preserve the probe sequence. Moving an
    350  1.10  christos 		 * element to the previous slot reduces its probe distance, so
    351  1.10  christos 		 * we stop when we find an element whose probe distance is zero.
    352   1.1  christos 		 */
    353  1.10  christos 		unsigned int prev = slot;
    354  1.10  christos 		unsigned int next = slot_index(cctx, prev, 1);
    355  1.10  christos 		while (cctx->set[next].coff != 0 &&
    356  1.10  christos 		       probe_distance(cctx, next) != 0)
    357  1.10  christos 		{
    358  1.10  christos 			cctx->set[prev] = cctx->set[next];
    359  1.10  christos 			prev = next;
    360  1.10  christos 			next = slot_index(cctx, prev, 1);
    361   1.1  christos 		}
    362  1.10  christos 		cctx->set[prev].coff = 0;
    363  1.10  christos 		cctx->set[prev].hash = 0;
    364  1.10  christos 		cctx->count--;
    365   1.1  christos 	}
    366   1.1  christos }
    367