1 1.10 christos /* $NetBSD: compress.c,v 1.10 2025/01/26 16:25:22 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* 4 1.1 christos * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 1.1 christos * 6 1.7 christos * SPDX-License-Identifier: MPL-2.0 7 1.7 christos * 8 1.1 christos * This Source Code Form is subject to the terms of the Mozilla Public 9 1.1 christos * License, v. 2.0. If a copy of the MPL was not distributed with this 10 1.6 christos * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 1.1 christos * 12 1.1 christos * See the COPYRIGHT file distributed with this work for additional 13 1.1 christos * information regarding copyright ownership. 14 1.1 christos */ 15 1.1 christos 16 1.3 christos #include <stdbool.h> 17 1.10 christos #include <stdint.h> 18 1.10 christos #include <string.h> 19 1.1 christos 20 1.10 christos #include <isc/ascii.h> 21 1.10 christos #include <isc/buffer.h> 22 1.10 christos #include <isc/hash.h> 23 1.1 christos #include <isc/mem.h> 24 1.1 christos #include <isc/util.h> 25 1.1 christos 26 1.1 christos #include <dns/compress.h> 27 1.10 christos #include <dns/name.h> 28 1.10 christos 29 1.10 christos #define HASH_INIT_DJB2 5381 30 1.1 christos 31 1.4 christos #define CCTX_MAGIC ISC_MAGIC('C', 'C', 'T', 'X') 32 1.10 christos #define CCTX_VALID(x) ISC_MAGIC_VALID(x, CCTX_MAGIC) 33 1.1 christos 34 1.10 christos void 35 1.10 christos dns_compress_init(dns_compress_t *cctx, isc_mem_t *mctx, 36 1.10 christos dns_compress_flags_t flags) { 37 1.10 christos dns_compress_slot_t *set = NULL; 38 1.10 christos uint16_t mask; 39 1.1 christos 40 1.1 christos REQUIRE(cctx != NULL); 41 1.10 christos REQUIRE(mctx != NULL); 42 1.1 christos 43 1.10 christos if ((flags & DNS_COMPRESS_LARGE) != 0) { 44 1.10 christos size_t count = (1 << DNS_COMPRESS_LARGEBITS); 45 1.10 christos mask = count - 1; 46 1.10 christos set = isc_mem_callocate(mctx, count, sizeof(*set)); 47 1.10 christos } else { 48 1.10 christos mask = ARRAY_SIZE(cctx->smallset) - 1; 49 1.10 christos set = cctx->smallset; 50 1.10 christos } 51 1.1 christos 52 1.10 christos /* 53 1.10 christos * The lifetime of this object is limited to the stack frame of the 54 1.10 christos * caller, so we don't need to attach to the memory context. 55 1.10 christos */ 56 1.10 christos *cctx = (dns_compress_t){ 57 1.10 christos .magic = CCTX_MAGIC, 58 1.10 christos .flags = flags | DNS_COMPRESS_PERMITTED, 59 1.10 christos .mctx = mctx, 60 1.10 christos .mask = mask, 61 1.10 christos .set = set, 62 1.10 christos }; 63 1.1 christos } 64 1.1 christos 65 1.1 christos void 66 1.1 christos dns_compress_invalidate(dns_compress_t *cctx) { 67 1.10 christos REQUIRE(CCTX_VALID(cctx)); 68 1.10 christos if (cctx->set != cctx->smallset) { 69 1.10 christos isc_mem_free(cctx->mctx, cctx->set); 70 1.1 christos } 71 1.10 christos *cctx = (dns_compress_t){ 0 }; 72 1.1 christos } 73 1.1 christos 74 1.1 christos void 75 1.10 christos dns_compress_setpermitted(dns_compress_t *cctx, bool permitted) { 76 1.10 christos REQUIRE(CCTX_VALID(cctx)); 77 1.10 christos if (permitted) { 78 1.10 christos cctx->flags |= DNS_COMPRESS_PERMITTED; 79 1.10 christos } else { 80 1.10 christos cctx->flags &= ~DNS_COMPRESS_PERMITTED; 81 1.10 christos } 82 1.1 christos } 83 1.1 christos 84 1.10 christos bool 85 1.10 christos dns_compress_getpermitted(dns_compress_t *cctx) { 86 1.10 christos REQUIRE(CCTX_VALID(cctx)); 87 1.10 christos return (cctx->flags & DNS_COMPRESS_PERMITTED) != 0; 88 1.1 christos } 89 1.1 christos 90 1.10 christos /* 91 1.10 christos * Our hash value needs to cover the entire suffix of a name, and we need 92 1.10 christos * to calculate it one label at a time. So this function mixes a label into 93 1.10 christos * an existing hash. (We don't use isc_hash32() because the djb2 hash is a 94 1.10 christos * lot faster, and we limit the impact of collision attacks by restricting 95 1.10 christos * the size and occupancy of the hash set.) The accumulator is 32 bits to 96 1.10 christos * keep more of the fun mixing that happens in the upper bits. 97 1.10 christos */ 98 1.10 christos static uint16_t 99 1.10 christos hash_label(uint16_t init, uint8_t *ptr, bool sensitive) { 100 1.10 christos unsigned int len = ptr[0] + 1; 101 1.10 christos uint32_t hash = init; 102 1.1 christos 103 1.4 christos if (sensitive) { 104 1.10 christos while (len-- > 0) { 105 1.10 christos hash = hash * 33 + *ptr++; 106 1.10 christos } 107 1.4 christos } else { 108 1.10 christos /* using the autovectorize-friendly tolower() */ 109 1.10 christos while (len-- > 0) { 110 1.10 christos hash = hash * 33 + isc__ascii_tolower1(*ptr++); 111 1.10 christos } 112 1.4 christos } 113 1.1 christos 114 1.10 christos return isc_hash_bits32(hash, 16); 115 1.1 christos } 116 1.1 christos 117 1.10 christos static bool 118 1.10 christos match_wirename(uint8_t *a, uint8_t *b, unsigned int len, bool sensitive) { 119 1.10 christos if (sensitive) { 120 1.10 christos return memcmp(a, b, len) == 0; 121 1.10 christos } else { 122 1.10 christos /* label lengths are < 'A' so unaffected by tolower() */ 123 1.10 christos return isc_ascii_lowerequal(a, b, len); 124 1.10 christos } 125 1.1 christos } 126 1.1 christos 127 1.1 christos /* 128 1.10 christos * We have found a hash set entry whose hash value matches the current 129 1.10 christos * suffix of our name, which is passed to this function via `sptr` and 130 1.10 christos * `slen`. We need to verify that the suffix in the message (referred to 131 1.10 christos * by `new_coff`) actually matches, in case of hash collisions. 132 1.10 christos * 133 1.10 christos * We know that the previous suffix of this name (after the first label) 134 1.10 christos * occurs in the message at `old_coff`, and all the compression offsets in 135 1.10 christos * the hash set and in the message refer to the first occurrence of a 136 1.10 christos * particular name or suffix. 137 1.10 christos * 138 1.10 christos * First, we need to match the label that was just added to our suffix, 139 1.10 christos * and second, verify that it is followed by the previous suffix. 140 1.10 christos * 141 1.10 christos * There are a few ways to match the previous suffix: 142 1.10 christos * 143 1.10 christos * When the first occurrence of this suffix is also the first occurrence 144 1.10 christos * of the previous suffix, `old_coff` points just after the new label. 145 1.10 christos * 146 1.10 christos * Otherwise, if this suffix occurs in a compressed name, it will be 147 1.10 christos * followed by a compression pointer that refers to the previous suffix, 148 1.10 christos * which must be equal to `old_coff`. 149 1.10 christos * 150 1.10 christos * The final possibility is that this suffix occurs in an uncompressed 151 1.10 christos * name, so we have to compare the rest of the suffix in full. 152 1.10 christos * 153 1.10 christos * A special case is when this suffix is a TLD. That can be handled by 154 1.10 christos * the case for uncompressed names, but it is common enough that it is 155 1.10 christos * worth taking a short cut. (In the TLD case, the `old_coff` will be 156 1.10 christos * zero, and the quick checks for the previous suffix will fail.) 157 1.1 christos */ 158 1.10 christos static bool 159 1.10 christos match_suffix(isc_buffer_t *buffer, unsigned int new_coff, uint8_t *sptr, 160 1.10 christos unsigned int slen, unsigned int old_coff, bool sensitive) { 161 1.10 christos uint8_t pptr[] = { 0xC0 | (old_coff >> 8), old_coff & 0xff }; 162 1.10 christos uint8_t *bptr = isc_buffer_base(buffer); 163 1.10 christos unsigned int blen = isc_buffer_usedlength(buffer); 164 1.10 christos unsigned int llen = sptr[0] + 1; 165 1.10 christos 166 1.10 christos INSIST(llen <= 64 && llen < slen); 167 1.10 christos 168 1.10 christos if (blen < new_coff + llen) { 169 1.10 christos return false; 170 1.10 christos } 171 1.1 christos 172 1.10 christos blen -= new_coff; 173 1.10 christos bptr += new_coff; 174 1.1 christos 175 1.10 christos /* does the first label of the suffix appear here? */ 176 1.10 christos if (!match_wirename(bptr, sptr, llen, sensitive)) { 177 1.10 christos return false; 178 1.4 christos } 179 1.1 christos 180 1.10 christos /* is this label followed by the previously matched suffix? */ 181 1.10 christos if (old_coff == new_coff + llen) { 182 1.10 christos return true; 183 1.4 christos } 184 1.1 christos 185 1.10 christos blen -= llen; 186 1.10 christos bptr += llen; 187 1.10 christos slen -= llen; 188 1.10 christos sptr += llen; 189 1.1 christos 190 1.10 christos /* are both labels followed by the root label? */ 191 1.10 christos if (blen >= 1 && slen == 1 && bptr[0] == 0 && sptr[0] == 0) { 192 1.10 christos return true; 193 1.10 christos } 194 1.1 christos 195 1.10 christos /* is this label followed by a pointer to the previous match? */ 196 1.10 christos if (blen >= 2 && bptr[0] == pptr[0] && bptr[1] == pptr[1]) { 197 1.10 christos return true; 198 1.10 christos } 199 1.1 christos 200 1.10 christos /* is this label followed by a copy of the rest of the suffix? */ 201 1.10 christos return blen >= slen && match_wirename(bptr, sptr, slen, sensitive); 202 1.10 christos } 203 1.1 christos 204 1.10 christos /* 205 1.10 christos * Robin Hood hashing aims to minimize probe distance when inserting a 206 1.10 christos * new element by ensuring that the new element does not have a worse 207 1.10 christos * probe distance than any other element in its probe sequence. During 208 1.10 christos * insertion, if an existing element is encountered with a shorter 209 1.10 christos * probe distance, it is swapped with the new element, and insertion 210 1.10 christos * continues with the displaced element. 211 1.10 christos */ 212 1.10 christos static unsigned int 213 1.10 christos probe_distance(dns_compress_t *cctx, unsigned int slot) { 214 1.10 christos return (slot - cctx->set[slot].hash) & cctx->mask; 215 1.10 christos } 216 1.1 christos 217 1.10 christos static unsigned int 218 1.10 christos slot_index(dns_compress_t *cctx, unsigned int hash, unsigned int probe) { 219 1.10 christos return (hash + probe) & cctx->mask; 220 1.10 christos } 221 1.10 christos 222 1.10 christos static bool 223 1.10 christos insert_label(dns_compress_t *cctx, isc_buffer_t *buffer, const dns_name_t *name, 224 1.10 christos unsigned int label, uint16_t hash, unsigned int probe) { 225 1.10 christos /* 226 1.10 christos * hash set entries must have valid compression offsets 227 1.10 christos * and the hash set must not get too full (75% load) 228 1.10 christos */ 229 1.10 christos unsigned int prefix_len = name->offsets[label]; 230 1.10 christos unsigned int coff = isc_buffer_usedlength(buffer) + prefix_len; 231 1.10 christos if (coff >= 0x4000 || cctx->count > cctx->mask * 3 / 4) { 232 1.10 christos return false; 233 1.10 christos } 234 1.10 christos for (;;) { 235 1.10 christos unsigned int slot = slot_index(cctx, hash, probe); 236 1.10 christos /* we can stop when we find an empty slot */ 237 1.10 christos if (cctx->set[slot].coff == 0) { 238 1.10 christos cctx->set[slot].hash = hash; 239 1.10 christos cctx->set[slot].coff = coff; 240 1.10 christos cctx->count++; 241 1.10 christos return true; 242 1.1 christos } 243 1.10 christos /* he steals from the rich and gives to the poor */ 244 1.10 christos if (probe > probe_distance(cctx, slot)) { 245 1.10 christos probe = probe_distance(cctx, slot); 246 1.10 christos ISC_SWAP(cctx->set[slot].hash, hash); 247 1.10 christos ISC_SWAP(cctx->set[slot].coff, coff); 248 1.4 christos } 249 1.10 christos probe++; 250 1.1 christos } 251 1.10 christos } 252 1.1 christos 253 1.10 christos /* 254 1.10 christos * Add the unmatched prefix of the name to the hash set. 255 1.10 christos */ 256 1.10 christos static void 257 1.10 christos insert(dns_compress_t *cctx, isc_buffer_t *buffer, const dns_name_t *name, 258 1.10 christos unsigned int label, uint16_t hash, unsigned int probe) { 259 1.10 christos bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0; 260 1.1 christos /* 261 1.10 christos * this insertion loop continues from the search loop inside 262 1.10 christos * dns_compress_name() below, iterating over the remaining labels 263 1.10 christos * of the name and accumulating the hash in the same manner 264 1.1 christos */ 265 1.10 christos while (insert_label(cctx, buffer, name, label, hash, probe) && 266 1.10 christos label-- > 0) 267 1.10 christos { 268 1.10 christos unsigned int prefix_len = name->offsets[label]; 269 1.10 christos uint8_t *suffix_ptr = name->ndata + prefix_len; 270 1.10 christos hash = hash_label(hash, suffix_ptr, sensitive); 271 1.10 christos probe = 0; 272 1.4 christos } 273 1.1 christos } 274 1.1 christos 275 1.1 christos void 276 1.10 christos dns_compress_name(dns_compress_t *cctx, isc_buffer_t *buffer, 277 1.10 christos const dns_name_t *name, unsigned int *return_prefix, 278 1.10 christos unsigned int *return_coff) { 279 1.10 christos REQUIRE(CCTX_VALID(cctx)); 280 1.10 christos REQUIRE(ISC_BUFFER_VALID(buffer)); 281 1.1 christos REQUIRE(dns_name_isabsolute(name)); 282 1.10 christos REQUIRE(name->labels > 0); 283 1.10 christos REQUIRE(name->offsets != NULL); 284 1.10 christos REQUIRE(return_prefix != NULL); 285 1.10 christos REQUIRE(return_coff != NULL); 286 1.10 christos REQUIRE(*return_coff == 0); 287 1.1 christos 288 1.10 christos if ((cctx->flags & DNS_COMPRESS_DISABLED) != 0) { 289 1.1 christos return; 290 1.4 christos } 291 1.1 christos 292 1.10 christos bool sensitive = (cctx->flags & DNS_COMPRESS_CASE) != 0; 293 1.10 christos 294 1.10 christos uint16_t hash = HASH_INIT_DJB2; 295 1.10 christos unsigned int label = name->labels - 1; /* skip the root label */ 296 1.1 christos 297 1.1 christos /* 298 1.10 christos * find out how much of the name's suffix is in the hash set, 299 1.10 christos * stepping backwards from the end one label at a time 300 1.1 christos */ 301 1.10 christos while (label-- > 0) { 302 1.10 christos unsigned int prefix_len = name->offsets[label]; 303 1.10 christos unsigned int suffix_len = name->length - prefix_len; 304 1.10 christos uint8_t *suffix_ptr = name->ndata + prefix_len; 305 1.10 christos hash = hash_label(hash, suffix_ptr, sensitive); 306 1.10 christos 307 1.10 christos for (unsigned int probe = 0; true; probe++) { 308 1.10 christos unsigned int slot = slot_index(cctx, hash, probe); 309 1.10 christos unsigned int coff = cctx->set[slot].coff; 310 1.10 christos 311 1.10 christos /* 312 1.10 christos * if we would have inserted this entry here (as in 313 1.10 christos * insert_label() above), our suffix cannot be in the 314 1.10 christos * hash set, so stop searching and switch to inserting 315 1.10 christos * the rest of the name (its prefix) into the set 316 1.10 christos */ 317 1.10 christos if (coff == 0 || probe > probe_distance(cctx, slot)) { 318 1.10 christos insert(cctx, buffer, name, label, hash, probe); 319 1.10 christos return; 320 1.10 christos } 321 1.1 christos 322 1.10 christos /* 323 1.10 christos * this slot matches, so provisionally set the 324 1.10 christos * return values and continue with the next label 325 1.10 christos */ 326 1.10 christos if (hash == cctx->set[slot].hash && 327 1.10 christos match_suffix(buffer, coff, suffix_ptr, suffix_len, 328 1.10 christos *return_coff, sensitive)) 329 1.10 christos { 330 1.10 christos *return_coff = coff; 331 1.10 christos *return_prefix = prefix_len; 332 1.10 christos break; 333 1.10 christos } 334 1.4 christos } 335 1.4 christos } 336 1.1 christos } 337 1.1 christos 338 1.1 christos void 339 1.10 christos dns_compress_rollback(dns_compress_t *cctx, unsigned int coff) { 340 1.10 christos REQUIRE(CCTX_VALID(cctx)); 341 1.1 christos 342 1.10 christos for (unsigned int slot = 0; slot <= cctx->mask; slot++) { 343 1.10 christos if (cctx->set[slot].coff < coff) { 344 1.10 christos continue; 345 1.10 christos } 346 1.1 christos /* 347 1.10 christos * The next few elements might be part of the deleted element's 348 1.10 christos * probe sequence, so we slide them down to overwrite the entry 349 1.10 christos * we are deleting and preserve the probe sequence. Moving an 350 1.10 christos * element to the previous slot reduces its probe distance, so 351 1.10 christos * we stop when we find an element whose probe distance is zero. 352 1.1 christos */ 353 1.10 christos unsigned int prev = slot; 354 1.10 christos unsigned int next = slot_index(cctx, prev, 1); 355 1.10 christos while (cctx->set[next].coff != 0 && 356 1.10 christos probe_distance(cctx, next) != 0) 357 1.10 christos { 358 1.10 christos cctx->set[prev] = cctx->set[next]; 359 1.10 christos prev = next; 360 1.10 christos next = slot_index(cctx, prev, 1); 361 1.1 christos } 362 1.10 christos cctx->set[prev].coff = 0; 363 1.10 christos cctx->set[prev].hash = 0; 364 1.10 christos cctx->count--; 365 1.1 christos } 366 1.1 christos } 367