1 1.1 christos /*- 2 1.1 christos * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu> 3 1.1 christos * All rights reserved. 4 1.1 christos * 5 1.1 christos * Redistribution and use in source and binary forms, with or without 6 1.1 christos * modification, are permitted provided that the following conditions 7 1.1 christos * are met: 8 1.1 christos * 1. Redistributions of source code must retain the above copyright 9 1.1 christos * notice, this list of conditions and the following disclaimer. 10 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright 11 1.1 christos * notice, this list of conditions and the following disclaimer in the 12 1.1 christos * documentation and/or other materials provided with the distribution. 13 1.1 christos * 14 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 1.1 christos * SUCH DAMAGE. 25 1.1 christos */ 26 1.1 christos 27 1.1 christos /* 28 1.5 rmind * Longest Prefix Match (LPM) library supporting IPv4 and IPv6. 29 1.5 rmind * 30 1.5 rmind * Algorithm: 31 1.5 rmind * 32 1.5 rmind * Each prefix gets its own hash map and all added prefixes are saved 33 1.5 rmind * in a bitmap. On a lookup, we perform a linear scan of hash maps, 34 1.5 rmind * iterating through the added prefixes only. Usually, there are only 35 1.5 rmind * a few unique prefixes used and such simple algorithm is very efficient. 36 1.5 rmind * With many IPv6 prefixes, the linear scan might become a bottleneck. 37 1.1 christos */ 38 1.1 christos 39 1.1 christos #if defined(_KERNEL) 40 1.1 christos #include <sys/cdefs.h> 41 1.6 christos __KERNEL_RCSID(0, "$NetBSD: lpm.c,v 1.6 2019/06/12 14:36:32 christos Exp $"); 42 1.1 christos 43 1.1 christos #include <sys/param.h> 44 1.1 christos #include <sys/types.h> 45 1.1 christos #include <sys/malloc.h> 46 1.1 christos #include <sys/kmem.h> 47 1.1 christos #else 48 1.1 christos #include <sys/socket.h> 49 1.1 christos #include <arpa/inet.h> 50 1.1 christos 51 1.1 christos #include <stdio.h> 52 1.1 christos #include <stdlib.h> 53 1.1 christos #include <stdbool.h> 54 1.1 christos #include <stddef.h> 55 1.1 christos #include <string.h> 56 1.1 christos #include <strings.h> 57 1.1 christos #include <errno.h> 58 1.1 christos #include <assert.h> 59 1.1 christos #define kmem_alloc(a, b) malloc(a) 60 1.1 christos #define kmem_free(a, b) free(a) 61 1.1 christos #define kmem_zalloc(a, b) calloc(a, 1) 62 1.1 christos #endif 63 1.1 christos 64 1.1 christos #include "lpm.h" 65 1.1 christos 66 1.1 christos #define LPM_MAX_PREFIX (128) 67 1.1 christos #define LPM_MAX_WORDS (LPM_MAX_PREFIX >> 5) 68 1.1 christos #define LPM_TO_WORDS(x) ((x) >> 2) 69 1.1 christos #define LPM_HASH_STEP (8) 70 1.5 rmind #define LPM_LEN_IDX(len) ((len) >> 4) 71 1.1 christos 72 1.1 christos #ifdef DEBUG 73 1.5 rmind #define ASSERT assert 74 1.1 christos #else 75 1.5 rmind #define ASSERT(x) 76 1.1 christos #endif 77 1.1 christos 78 1.1 christos typedef struct lpm_ent { 79 1.1 christos struct lpm_ent *next; 80 1.1 christos void * val; 81 1.1 christos unsigned len; 82 1.1 christos uint8_t key[]; 83 1.1 christos } lpm_ent_t; 84 1.1 christos 85 1.1 christos typedef struct { 86 1.5 rmind unsigned hashsize; 87 1.5 rmind unsigned nitems; 88 1.5 rmind lpm_ent_t ** bucket; 89 1.1 christos } lpm_hmap_t; 90 1.1 christos 91 1.1 christos struct lpm { 92 1.1 christos uint32_t bitmask[LPM_MAX_WORDS]; 93 1.6 christos int flags; 94 1.5 rmind void * defvals[2]; 95 1.1 christos lpm_hmap_t prefix[LPM_MAX_PREFIX + 1]; 96 1.1 christos }; 97 1.1 christos 98 1.5 rmind static const uint32_t zero_address[LPM_MAX_WORDS]; 99 1.5 rmind 100 1.1 christos lpm_t * 101 1.6 christos lpm_create(int flags) 102 1.1 christos { 103 1.6 christos lpm_t *lpm = kmem_zalloc(sizeof(*lpm), KM_SLEEP); 104 1.6 christos lpm->flags = flags; 105 1.6 christos return lpm; 106 1.1 christos } 107 1.1 christos 108 1.1 christos void 109 1.1 christos lpm_clear(lpm_t *lpm, lpm_dtor_t dtor, void *arg) 110 1.1 christos { 111 1.1 christos for (unsigned n = 0; n <= LPM_MAX_PREFIX; n++) { 112 1.1 christos lpm_hmap_t *hmap = &lpm->prefix[n]; 113 1.1 christos 114 1.1 christos if (!hmap->hashsize) { 115 1.1 christos KASSERT(!hmap->bucket); 116 1.1 christos continue; 117 1.1 christos } 118 1.1 christos for (unsigned i = 0; i < hmap->hashsize; i++) { 119 1.1 christos lpm_ent_t *entry = hmap->bucket[i]; 120 1.1 christos 121 1.1 christos while (entry) { 122 1.1 christos lpm_ent_t *next = entry->next; 123 1.1 christos 124 1.1 christos if (dtor) { 125 1.1 christos dtor(arg, entry->key, 126 1.1 christos entry->len, entry->val); 127 1.1 christos } 128 1.1 christos kmem_free(entry, 129 1.1 christos offsetof(lpm_ent_t, key[entry->len])); 130 1.1 christos entry = next; 131 1.1 christos } 132 1.1 christos } 133 1.2 rmind kmem_free(hmap->bucket, hmap->hashsize * sizeof(lpm_ent_t *)); 134 1.1 christos hmap->bucket = NULL; 135 1.1 christos hmap->hashsize = 0; 136 1.1 christos hmap->nitems = 0; 137 1.1 christos } 138 1.5 rmind if (dtor) { 139 1.5 rmind dtor(arg, zero_address, 4, lpm->defvals[0]); 140 1.5 rmind dtor(arg, zero_address, 16, lpm->defvals[1]); 141 1.5 rmind } 142 1.1 christos memset(lpm->bitmask, 0, sizeof(lpm->bitmask)); 143 1.5 rmind memset(lpm->defvals, 0, sizeof(lpm->defvals)); 144 1.1 christos } 145 1.1 christos 146 1.1 christos void 147 1.1 christos lpm_destroy(lpm_t *lpm) 148 1.1 christos { 149 1.1 christos lpm_clear(lpm, NULL, NULL); 150 1.1 christos kmem_free(lpm, sizeof(*lpm)); 151 1.1 christos } 152 1.1 christos 153 1.1 christos /* 154 1.1 christos * fnv1a_hash: Fowler-Noll-Vo hash function (FNV-1a variant). 155 1.1 christos */ 156 1.1 christos static uint32_t 157 1.1 christos fnv1a_hash(const void *buf, size_t len) 158 1.1 christos { 159 1.1 christos uint32_t hash = 2166136261UL; 160 1.1 christos const uint8_t *p = buf; 161 1.1 christos 162 1.1 christos while (len--) { 163 1.1 christos hash ^= *p++; 164 1.1 christos hash *= 16777619U; 165 1.1 christos } 166 1.1 christos return hash; 167 1.1 christos } 168 1.1 christos 169 1.1 christos static bool 170 1.6 christos hashmap_rehash(lpm_hmap_t *hmap, unsigned size, int flags) 171 1.1 christos { 172 1.1 christos lpm_ent_t **bucket; 173 1.5 rmind unsigned hashsize; 174 1.1 christos 175 1.1 christos for (hashsize = 1; hashsize < size; hashsize <<= 1) { 176 1.1 christos continue; 177 1.1 christos } 178 1.6 christos bucket = kmem_zalloc(hashsize * sizeof(lpm_ent_t *), flags); 179 1.6 christos if (bucket == NULL) 180 1.6 christos return false; 181 1.1 christos for (unsigned n = 0; n < hmap->hashsize; n++) { 182 1.1 christos lpm_ent_t *list = hmap->bucket[n]; 183 1.1 christos 184 1.1 christos while (list) { 185 1.1 christos lpm_ent_t *entry = list; 186 1.1 christos uint32_t hash = fnv1a_hash(entry->key, entry->len); 187 1.5 rmind const unsigned i = hash & (hashsize - 1); 188 1.1 christos 189 1.1 christos list = entry->next; 190 1.1 christos entry->next = bucket[i]; 191 1.1 christos bucket[i] = entry; 192 1.1 christos } 193 1.1 christos } 194 1.1 christos if (hmap->bucket) 195 1.2 rmind kmem_free(hmap->bucket, hmap->hashsize * sizeof(lpm_ent_t *)); 196 1.1 christos hmap->bucket = bucket; 197 1.1 christos hmap->hashsize = hashsize; 198 1.1 christos return true; 199 1.1 christos } 200 1.1 christos 201 1.1 christos static lpm_ent_t * 202 1.6 christos hashmap_insert(lpm_hmap_t *hmap, const void *key, size_t len, int flags) 203 1.1 christos { 204 1.5 rmind const unsigned target = hmap->nitems + LPM_HASH_STEP; 205 1.1 christos const size_t entlen = offsetof(lpm_ent_t, key[len]); 206 1.1 christos uint32_t hash, i; 207 1.1 christos lpm_ent_t *entry; 208 1.1 christos 209 1.6 christos if (hmap->hashsize < target && !hashmap_rehash(hmap, target, flags)) { 210 1.1 christos return NULL; 211 1.1 christos } 212 1.1 christos 213 1.1 christos hash = fnv1a_hash(key, len); 214 1.1 christos i = hash & (hmap->hashsize - 1); 215 1.1 christos entry = hmap->bucket[i]; 216 1.1 christos while (entry) { 217 1.1 christos if (entry->len == len && memcmp(entry->key, key, len) == 0) { 218 1.1 christos return entry; 219 1.1 christos } 220 1.1 christos entry = entry->next; 221 1.1 christos } 222 1.1 christos 223 1.6 christos if ((entry = kmem_alloc(entlen, flags)) != NULL) { 224 1.5 rmind memcpy(entry->key, key, len); 225 1.5 rmind entry->next = hmap->bucket[i]; 226 1.5 rmind entry->len = len; 227 1.1 christos 228 1.5 rmind hmap->bucket[i] = entry; 229 1.5 rmind hmap->nitems++; 230 1.5 rmind } 231 1.1 christos return entry; 232 1.1 christos } 233 1.1 christos 234 1.1 christos static lpm_ent_t * 235 1.1 christos hashmap_lookup(lpm_hmap_t *hmap, const void *key, size_t len) 236 1.1 christos { 237 1.1 christos const uint32_t hash = fnv1a_hash(key, len); 238 1.5 rmind const unsigned i = hash & (hmap->hashsize - 1); 239 1.5 rmind lpm_ent_t *entry; 240 1.5 rmind 241 1.5 rmind if (hmap->hashsize == 0) { 242 1.5 rmind return NULL; 243 1.5 rmind } 244 1.5 rmind entry = hmap->bucket[i]; 245 1.1 christos 246 1.1 christos while (entry) { 247 1.1 christos if (entry->len == len && memcmp(entry->key, key, len) == 0) { 248 1.1 christos return entry; 249 1.1 christos } 250 1.1 christos entry = entry->next; 251 1.1 christos } 252 1.1 christos return NULL; 253 1.1 christos } 254 1.1 christos 255 1.1 christos static int 256 1.1 christos hashmap_remove(lpm_hmap_t *hmap, const void *key, size_t len) 257 1.1 christos { 258 1.1 christos const uint32_t hash = fnv1a_hash(key, len); 259 1.5 rmind const unsigned i = hash & (hmap->hashsize - 1); 260 1.5 rmind lpm_ent_t *prev = NULL, *entry; 261 1.5 rmind 262 1.5 rmind if (hmap->hashsize == 0) { 263 1.5 rmind return -1; 264 1.5 rmind } 265 1.5 rmind entry = hmap->bucket[i]; 266 1.1 christos 267 1.1 christos while (entry) { 268 1.1 christos if (entry->len == len && memcmp(entry->key, key, len) == 0) { 269 1.1 christos if (prev) { 270 1.1 christos prev->next = entry->next; 271 1.1 christos } else { 272 1.1 christos hmap->bucket[i] = entry->next; 273 1.1 christos } 274 1.3 rmind kmem_free(entry, offsetof(lpm_ent_t, key[len])); 275 1.1 christos return 0; 276 1.1 christos } 277 1.1 christos prev = entry; 278 1.1 christos entry = entry->next; 279 1.1 christos } 280 1.1 christos return -1; 281 1.1 christos } 282 1.1 christos 283 1.1 christos /* 284 1.1 christos * compute_prefix: given the address and prefix length, compute and 285 1.1 christos * return the address prefix. 286 1.1 christos */ 287 1.1 christos static inline void 288 1.1 christos compute_prefix(const unsigned nwords, const uint32_t *addr, 289 1.1 christos unsigned preflen, uint32_t *prefix) 290 1.1 christos { 291 1.1 christos uint32_t addr2[4]; 292 1.1 christos 293 1.1 christos if ((uintptr_t)addr & 3) { 294 1.1 christos /* Unaligned address: just copy for now. */ 295 1.1 christos memcpy(addr2, addr, nwords * 4); 296 1.1 christos addr = addr2; 297 1.1 christos } 298 1.1 christos for (unsigned i = 0; i < nwords; i++) { 299 1.1 christos if (preflen == 0) { 300 1.1 christos prefix[i] = 0; 301 1.1 christos continue; 302 1.1 christos } 303 1.1 christos if (preflen < 32) { 304 1.1 christos uint32_t mask = htonl(0xffffffff << (32 - preflen)); 305 1.1 christos prefix[i] = addr[i] & mask; 306 1.1 christos preflen = 0; 307 1.1 christos } else { 308 1.1 christos prefix[i] = addr[i]; 309 1.1 christos preflen -= 32; 310 1.1 christos } 311 1.1 christos } 312 1.1 christos } 313 1.1 christos 314 1.1 christos /* 315 1.1 christos * lpm_insert: insert the CIDR into the LPM table. 316 1.1 christos * 317 1.1 christos * => Returns zero on success and -1 on failure. 318 1.1 christos */ 319 1.1 christos int 320 1.1 christos lpm_insert(lpm_t *lpm, const void *addr, 321 1.1 christos size_t len, unsigned preflen, void *val) 322 1.1 christos { 323 1.1 christos const unsigned nwords = LPM_TO_WORDS(len); 324 1.1 christos uint32_t prefix[LPM_MAX_WORDS]; 325 1.1 christos lpm_ent_t *entry; 326 1.5 rmind KASSERT(len == 4 || len == 16); 327 1.1 christos 328 1.1 christos if (preflen == 0) { 329 1.5 rmind /* 0-length prefix is a special case. */ 330 1.5 rmind lpm->defvals[LPM_LEN_IDX(len)] = val; 331 1.1 christos return 0; 332 1.1 christos } 333 1.1 christos compute_prefix(nwords, addr, preflen, prefix); 334 1.6 christos entry = hashmap_insert(&lpm->prefix[preflen], prefix, len, lpm->flags); 335 1.1 christos if (entry) { 336 1.1 christos const unsigned n = --preflen >> 5; 337 1.1 christos lpm->bitmask[n] |= 0x80000000U >> (preflen & 31); 338 1.1 christos entry->val = val; 339 1.1 christos return 0; 340 1.1 christos } 341 1.1 christos return -1; 342 1.1 christos } 343 1.1 christos 344 1.1 christos /* 345 1.1 christos * lpm_remove: remove the specified prefix. 346 1.1 christos */ 347 1.1 christos int 348 1.1 christos lpm_remove(lpm_t *lpm, const void *addr, size_t len, unsigned preflen) 349 1.1 christos { 350 1.1 christos const unsigned nwords = LPM_TO_WORDS(len); 351 1.1 christos uint32_t prefix[LPM_MAX_WORDS]; 352 1.5 rmind KASSERT(len == 4 || len == 16); 353 1.1 christos 354 1.1 christos if (preflen == 0) { 355 1.5 rmind lpm->defvals[LPM_LEN_IDX(len)] = NULL; 356 1.1 christos return 0; 357 1.1 christos } 358 1.1 christos compute_prefix(nwords, addr, preflen, prefix); 359 1.1 christos return hashmap_remove(&lpm->prefix[preflen], prefix, len); 360 1.1 christos } 361 1.1 christos 362 1.1 christos /* 363 1.1 christos * lpm_lookup: find the longest matching prefix given the IP address. 364 1.1 christos * 365 1.1 christos * => Returns the associated value on success or NULL on failure. 366 1.1 christos */ 367 1.1 christos void * 368 1.1 christos lpm_lookup(lpm_t *lpm, const void *addr, size_t len) 369 1.1 christos { 370 1.1 christos const unsigned nwords = LPM_TO_WORDS(len); 371 1.1 christos unsigned i, n = nwords; 372 1.1 christos uint32_t prefix[LPM_MAX_WORDS]; 373 1.1 christos 374 1.1 christos while (n--) { 375 1.1 christos uint32_t bitmask = lpm->bitmask[n]; 376 1.1 christos 377 1.1 christos while ((i = ffs(bitmask)) != 0) { 378 1.1 christos const unsigned preflen = (32 * n) + (32 - --i); 379 1.1 christos lpm_hmap_t *hmap = &lpm->prefix[preflen]; 380 1.1 christos lpm_ent_t *entry; 381 1.1 christos 382 1.1 christos compute_prefix(nwords, addr, preflen, prefix); 383 1.1 christos entry = hashmap_lookup(hmap, prefix, len); 384 1.1 christos if (entry) { 385 1.1 christos return entry->val; 386 1.1 christos } 387 1.1 christos bitmask &= ~(1U << i); 388 1.1 christos } 389 1.1 christos } 390 1.5 rmind return lpm->defvals[LPM_LEN_IDX(len)]; 391 1.5 rmind } 392 1.5 rmind 393 1.5 rmind /* 394 1.5 rmind * lpm_lookup_prefix: return the value associated with a prefix 395 1.5 rmind * 396 1.5 rmind * => Returns the associated value on success or NULL on failure. 397 1.5 rmind */ 398 1.5 rmind void * 399 1.5 rmind lpm_lookup_prefix(lpm_t *lpm, const void *addr, size_t len, unsigned preflen) 400 1.5 rmind { 401 1.5 rmind const unsigned nwords = LPM_TO_WORDS(len); 402 1.5 rmind uint32_t prefix[LPM_MAX_WORDS]; 403 1.5 rmind lpm_ent_t *entry; 404 1.5 rmind KASSERT(len == 4 || len == 16); 405 1.5 rmind 406 1.5 rmind if (preflen == 0) { 407 1.5 rmind return lpm->defvals[LPM_LEN_IDX(len)]; 408 1.5 rmind } 409 1.5 rmind compute_prefix(nwords, addr, preflen, prefix); 410 1.5 rmind entry = hashmap_lookup(&lpm->prefix[preflen], prefix, len); 411 1.5 rmind if (entry) { 412 1.5 rmind return entry->val; 413 1.5 rmind } 414 1.5 rmind return NULL; 415 1.1 christos } 416 1.1 christos 417 1.1 christos #if !defined(_KERNEL) 418 1.1 christos /* 419 1.1 christos * lpm_strtobin: convert CIDR string to the binary IP address and mask. 420 1.1 christos * 421 1.1 christos * => The address will be in the network byte order. 422 1.1 christos * => Returns 0 on success or -1 on failure. 423 1.1 christos */ 424 1.1 christos int 425 1.1 christos lpm_strtobin(const char *cidr, void *addr, size_t *len, unsigned *preflen) 426 1.1 christos { 427 1.1 christos char *p, buf[INET6_ADDRSTRLEN]; 428 1.1 christos 429 1.1 christos strncpy(buf, cidr, sizeof(buf)); 430 1.1 christos buf[sizeof(buf) - 1] = '\0'; 431 1.1 christos 432 1.1 christos if ((p = strchr(buf, '/')) != NULL) { 433 1.1 christos const ptrdiff_t off = p - buf; 434 1.1 christos *preflen = atoi(&buf[off + 1]); 435 1.1 christos buf[off] = '\0'; 436 1.1 christos } else { 437 1.1 christos *preflen = LPM_MAX_PREFIX; 438 1.1 christos } 439 1.1 christos 440 1.1 christos if (inet_pton(AF_INET6, buf, addr) == 1) { 441 1.1 christos *len = 16; 442 1.1 christos return 0; 443 1.1 christos } 444 1.1 christos if (inet_pton(AF_INET, buf, addr) == 1) { 445 1.1 christos if (*preflen == LPM_MAX_PREFIX) { 446 1.1 christos *preflen = 32; 447 1.1 christos } 448 1.1 christos *len = 4; 449 1.1 christos return 0; 450 1.1 christos } 451 1.1 christos return -1; 452 1.1 christos } 453 1.1 christos #endif 454