1 1.1 dyoung /* 2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc. 3 1.1 dyoung * All rights reserved. 4 1.1 dyoung * 5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation 6 1.1 dyoung * by Coyote Point Systems, Inc. 7 1.1 dyoung * 8 1.1 dyoung * Redistribution and use in source and binary forms, with or without 9 1.1 dyoung * modification, are permitted provided that the following conditions 10 1.1 dyoung * are met: 11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright 12 1.1 dyoung * notice, this list of conditions and the following disclaimer. 13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright 14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the 15 1.1 dyoung * documentation and/or other materials provided with the distribution. 16 1.1 dyoung * 17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE. 28 1.1 dyoung */ 29 1.9 yamt 30 1.9 yamt /* 31 1.9 yamt * Reduces the resources demanded by TCP sessions in TIME_WAIT-state using 32 1.9 yamt * methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime 33 1.9 yamt * Truncation (MSLT). 34 1.9 yamt * 35 1.9 yamt * MSLT and VTW were contributed by Coyote Point Systems, Inc. 36 1.9 yamt * 37 1.9 yamt * Even after a TCP session enters the TIME_WAIT state, its corresponding 38 1.9 yamt * socket and protocol control blocks (PCBs) stick around until the TCP 39 1.9 yamt * Maximum Segment Lifetime (MSL) expires. On a host whose workload 40 1.9 yamt * necessarily creates and closes down many TCP sockets, the sockets & PCBs 41 1.9 yamt * for TCP sessions in TIME_WAIT state amount to many megabytes of dead 42 1.9 yamt * weight in RAM. 43 1.9 yamt * 44 1.9 yamt * Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to 45 1.9 yamt * a class based on the nearness of the peer. Corresponding to each class 46 1.9 yamt * is an MSL, and a session uses the MSL of its class. The classes are 47 1.9 yamt * loopback (local host equals remote host), local (local host and remote 48 1.9 yamt * host are on the same link/subnet), and remote (local host and remote 49 1.9 yamt * host communicate via one or more gateways). Classes corresponding to 50 1.9 yamt * nearer peers have lower MSLs by default: 2 seconds for loopback, 10 51 1.9 yamt * seconds for local, 60 seconds for remote. Loopback and local sessions 52 1.9 yamt * expire more quickly when MSLT is used. 53 1.9 yamt * 54 1.9 yamt * Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket 55 1.9 yamt * dead weight with a compact representation of the session, called a 56 1.9 yamt * "vestigial PCB". VTW data structures are designed to be very fast and 57 1.9 yamt * memory-efficient: for fast insertion and lookup of vestigial PCBs, 58 1.9 yamt * the PCBs are stored in a hash table that is designed to minimize the 59 1.9 yamt * number of cacheline visits per lookup/insertion. The memory both 60 1.9 yamt * for vestigial PCBs and for elements of the PCB hashtable come from 61 1.9 yamt * fixed-size pools, and linked data structures exploit this to conserve 62 1.9 yamt * memory by representing references with a narrow index/offset from the 63 1.9 yamt * start of a pool instead of a pointer. When space for new vestigial PCBs 64 1.9 yamt * runs out, VTW makes room by discarding old vestigial PCBs, oldest first. 65 1.9 yamt * VTW cooperates with MSLT. 66 1.9 yamt * 67 1.9 yamt * It may help to think of VTW as a "FIN cache" by analogy to the SYN 68 1.9 yamt * cache. 69 1.9 yamt * 70 1.9 yamt * A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT 71 1.9 yamt * sessions as fast as it can is approximately 17% idle when VTW is active 72 1.9 yamt * versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM 73 1.9 yamt * when VTW is active (approximately 64k vestigial PCBs are created) than 74 1.9 yamt * when it is inactive. 75 1.9 yamt */ 76 1.9 yamt 77 1.1 dyoung #include <sys/cdefs.h> 78 1.1 dyoung 79 1.14 pooka #ifdef _KERNEL_OPT 80 1.1 dyoung #include "opt_ddb.h" 81 1.1 dyoung #include "opt_inet.h" 82 1.1 dyoung #include "opt_inet_csum.h" 83 1.1 dyoung #include "opt_tcp_debug.h" 84 1.14 pooka #endif 85 1.1 dyoung 86 1.1 dyoung #include <sys/param.h> 87 1.1 dyoung #include <sys/systm.h> 88 1.1 dyoung #include <sys/kmem.h> 89 1.1 dyoung #include <sys/mbuf.h> 90 1.1 dyoung #include <sys/protosw.h> 91 1.1 dyoung #include <sys/socket.h> 92 1.1 dyoung #include <sys/socketvar.h> 93 1.1 dyoung #include <sys/errno.h> 94 1.1 dyoung #include <sys/syslog.h> 95 1.1 dyoung #include <sys/pool.h> 96 1.1 dyoung #include <sys/domain.h> 97 1.1 dyoung #include <sys/kernel.h> 98 1.1 dyoung #include <net/if.h> 99 1.1 dyoung #include <net/if_types.h> 100 1.1 dyoung 101 1.1 dyoung #include <netinet/in.h> 102 1.1 dyoung #include <netinet/in_systm.h> 103 1.1 dyoung #include <netinet/ip.h> 104 1.1 dyoung #include <netinet/in_pcb.h> 105 1.1 dyoung #include <netinet/in_var.h> 106 1.1 dyoung #include <netinet/ip_var.h> 107 1.1 dyoung #include <netinet/in_offload.h> 108 1.1 dyoung #include <netinet/ip6.h> 109 1.1 dyoung #include <netinet6/ip6_var.h> 110 1.1 dyoung #include <netinet6/in6_pcb.h> 111 1.1 dyoung #include <netinet6/ip6_var.h> 112 1.1 dyoung #include <netinet6/in6_var.h> 113 1.1 dyoung #include <netinet/icmp6.h> 114 1.1 dyoung 115 1.1 dyoung #include <netinet/tcp.h> 116 1.1 dyoung #include <netinet/tcp_fsm.h> 117 1.1 dyoung #include <netinet/tcp_seq.h> 118 1.1 dyoung #include <netinet/tcp_timer.h> 119 1.1 dyoung #include <netinet/tcp_var.h> 120 1.1 dyoung #include <netinet/tcp_private.h> 121 1.1 dyoung 122 1.1 dyoung #include <netinet/tcp_vtw.h> 123 1.1 dyoung 124 1.25 jakllsch __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.25 2024/10/07 23:17:00 jakllsch Exp $"); 125 1.1 dyoung 126 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0) 127 1.1 dyoung 128 1.1 dyoung static void vtw_debug_init(void); 129 1.1 dyoung 130 1.1 dyoung fatp_ctl_t fat_tcpv4; 131 1.1 dyoung fatp_ctl_t fat_tcpv6; 132 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS]; 133 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS]; 134 1.1 dyoung vtw_stats_t vtw_stats; 135 1.1 dyoung 136 1.1 dyoung /* We provide state for the lookup_ports iterator. 137 1.1 dyoung * As currently we are netlock-protected, there is one. 138 1.1 dyoung * If we were finer-grain, we would have one per CPU. 139 1.1 dyoung * I do not want to be in the business of alloc/free. 140 1.1 dyoung * The best alternate would be allocate on the caller's 141 1.1 dyoung * stack, but that would require them to know the struct, 142 1.1 dyoung * or at least the size. 143 1.1 dyoung * See how she goes. 144 1.1 dyoung */ 145 1.1 dyoung struct tcp_ports_iterator { 146 1.1 dyoung union { 147 1.1 dyoung struct in_addr v4; 148 1.1 dyoung struct in6_addr v6; 149 1.1 dyoung } addr; 150 1.1 dyoung u_int port; 151 1.1 dyoung 152 1.1 dyoung uint32_t wild : 1; 153 1.1 dyoung 154 1.1 dyoung vtw_ctl_t *ctl; 155 1.1 dyoung fatp_t *fp; 156 1.1 dyoung 157 1.1 dyoung uint16_t slot_idx; 158 1.1 dyoung uint16_t ctl_idx; 159 1.1 dyoung }; 160 1.1 dyoung 161 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4; 162 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6; 163 1.1 dyoung 164 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *); 165 1.1 dyoung 166 1.1 dyoung /*!\brief allocate a fat pointer from a collection. 167 1.1 dyoung */ 168 1.1 dyoung static fatp_t * 169 1.1 dyoung fatp_alloc(fatp_ctl_t *fat) 170 1.1 dyoung { 171 1.1 dyoung fatp_t *fp = 0; 172 1.1 dyoung 173 1.1 dyoung if (fat->nfree) { 174 1.1 dyoung fp = fat->free; 175 1.1 dyoung if (fp) { 176 1.1 dyoung fat->free = fatp_next(fat, fp); 177 1.1 dyoung --fat->nfree; 178 1.1 dyoung ++fat->nalloc; 179 1.1 dyoung fp->nxt = 0; 180 1.1 dyoung 181 1.1 dyoung KASSERT(!fp->inuse); 182 1.1 dyoung } 183 1.1 dyoung } 184 1.1 dyoung 185 1.1 dyoung return fp; 186 1.1 dyoung } 187 1.1 dyoung 188 1.1 dyoung /*!\brief free a fat pointer. 189 1.1 dyoung */ 190 1.1 dyoung static void 191 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp) 192 1.1 dyoung { 193 1.1 dyoung if (fp) { 194 1.1 dyoung KASSERT(!fp->inuse); 195 1.1 dyoung KASSERT(!fp->nxt); 196 1.1 dyoung 197 1.1 dyoung fp->nxt = fatp_index(fat, fat->free); 198 1.1 dyoung fat->free = fp; 199 1.1 dyoung 200 1.1 dyoung ++fat->nfree; 201 1.1 dyoung --fat->nalloc; 202 1.1 dyoung } 203 1.1 dyoung } 204 1.1 dyoung 205 1.1 dyoung /*!\brief initialise a collection of fat pointers. 206 1.1 dyoung * 207 1.1 dyoung *\param n # hash buckets 208 1.1 dyoung *\param m total # fat pointers to allocate 209 1.1 dyoung * 210 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only. 211 1.1 dyoung */ 212 1.1 dyoung static void 213 1.6 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m, 214 1.6 dyoung fatp_t *fat_base, fatp_t **fat_hash) 215 1.1 dyoung { 216 1.1 dyoung fatp_t *fp; 217 1.1 dyoung 218 1.1 dyoung KASSERT(n <= FATP_MAX / 2); 219 1.1 dyoung 220 1.6 dyoung fat->hash = fat_hash; 221 1.6 dyoung fat->base = fat_base; 222 1.1 dyoung 223 1.1 dyoung fat->port = &fat->hash[m]; 224 1.1 dyoung 225 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m) 226 1.1 dyoung fat->lim = fat->base + 2*n - 1; 227 1.1 dyoung fat->nfree = 0; 228 1.1 dyoung fat->nalloc = 2*n; 229 1.1 dyoung 230 1.1 dyoung /* Initialise the free list. 231 1.1 dyoung */ 232 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) { 233 1.1 dyoung fatp_free(fat, fp); 234 1.1 dyoung } 235 1.1 dyoung } 236 1.1 dyoung 237 1.1 dyoung /* 238 1.1 dyoung * The `xtra' is XORed into the tag stored. 239 1.1 dyoung */ 240 1.1 dyoung static uint32_t fatp_xtra[] = { 241 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444, 242 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888, 243 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343, 244 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787, 245 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433, 246 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877, 247 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333, 248 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777, 249 1.1 dyoung }; 250 1.1 dyoung 251 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key. 252 1.1 dyoung * 253 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot, 254 1.1 dyoung * as it directly encodes them. 255 1.1 dyoung */ 256 1.1 dyoung static inline uint32_t 257 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot) 258 1.1 dyoung { 259 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 || 260 1.1 dyoung CACHE_LINE_SIZE == 64 || 261 1.25 jakllsch CACHE_LINE_SIZE == 128 || 262 1.25 jakllsch CACHE_LINE_SIZE == 256); 263 1.1 dyoung 264 1.1 dyoung switch (fatp_ntags()) { 265 1.1 dyoung case 7: 266 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot; 267 1.1 dyoung case 15: 268 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot; 269 1.1 dyoung case 31: 270 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot; 271 1.1 dyoung default: 272 1.1 dyoung KASSERT(0 && "no support, for no good reason"); 273 1.1 dyoung return ~0; 274 1.1 dyoung } 275 1.1 dyoung } 276 1.1 dyoung 277 1.1 dyoung static inline uint32_t 278 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key) 279 1.1 dyoung { 280 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 || 281 1.1 dyoung CACHE_LINE_SIZE == 64 || 282 1.25 jakllsch CACHE_LINE_SIZE == 128 || 283 1.25 jakllsch CACHE_LINE_SIZE == 256); 284 1.1 dyoung 285 1.1 dyoung switch (fatp_ntags()) { 286 1.1 dyoung case 7: 287 1.1 dyoung return key & 7; 288 1.1 dyoung case 15: 289 1.1 dyoung return key & 15; 290 1.1 dyoung case 31: 291 1.1 dyoung return key & 31; 292 1.1 dyoung default: 293 1.1 dyoung KASSERT(0 && "no support, for no good reason"); 294 1.1 dyoung return ~0; 295 1.1 dyoung } 296 1.1 dyoung } 297 1.1 dyoung 298 1.1 dyoung static inline fatp_t * 299 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key) 300 1.1 dyoung { 301 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 || 302 1.1 dyoung CACHE_LINE_SIZE == 64 || 303 1.25 jakllsch CACHE_LINE_SIZE == 128 || 304 1.25 jakllsch CACHE_LINE_SIZE == 256); 305 1.1 dyoung 306 1.1 dyoung switch (fatp_ntags()) { 307 1.1 dyoung case 7: 308 1.1 dyoung key >>= 3; 309 1.1 dyoung break; 310 1.1 dyoung case 15: 311 1.1 dyoung key >>= 4; 312 1.1 dyoung break; 313 1.1 dyoung case 31: 314 1.1 dyoung key >>= 5; 315 1.1 dyoung break; 316 1.1 dyoung default: 317 1.1 dyoung KASSERT(0 && "no support, for no good reason"); 318 1.1 dyoung return 0; 319 1.1 dyoung } 320 1.1 dyoung 321 1.1 dyoung return key ? fat->base + key - 1 : 0; 322 1.1 dyoung } 323 1.1 dyoung 324 1.1 dyoung static inline uint32_t 325 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx) 326 1.1 dyoung { 327 1.1 dyoung return (idx << ctl->idx_bits) | idx; 328 1.1 dyoung } 329 1.1 dyoung 330 1.1 dyoung static inline uint32_t 331 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits) 332 1.1 dyoung { 333 1.1 dyoung uint32_t idx = bits & ctl->idx_mask; 334 1.1 dyoung 335 1.1 dyoung if (idx_encode(ctl, idx) == bits) 336 1.1 dyoung return idx; 337 1.1 dyoung else 338 1.1 dyoung return ~0; 339 1.1 dyoung } 340 1.1 dyoung 341 1.1 dyoung /*!\brief insert index into fatp hash 342 1.1 dyoung * 343 1.1 dyoung *\param idx - index of element being placed in hash chain 344 1.1 dyoung *\param tag - 32-bit tag identifier 345 1.1 dyoung * 346 1.1 dyoung *\returns 347 1.1 dyoung * value which can be used to locate entry. 348 1.1 dyoung * 349 1.1 dyoung *\note 350 1.1 dyoung * we rely on the fact that there are unused high bits in the index 351 1.1 dyoung * for verification purposes on lookup. 352 1.1 dyoung */ 353 1.1 dyoung 354 1.1 dyoung static inline uint32_t 355 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which, 356 1.1 dyoung void *dbg) 357 1.1 dyoung { 358 1.1 dyoung fatp_t *fp; 359 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash); 360 1.1 dyoung int i; 361 1.1 dyoung 362 1.1 dyoung fp = hash[tag & fat->mask]; 363 1.1 dyoung 364 1.1 dyoung while (!fp || fatp_full(fp)) { 365 1.1 dyoung fatp_t *fq; 366 1.1 dyoung 367 1.1 dyoung /* All entries are inuse at the top level. 368 1.1 dyoung * We allocate a spare, and push the top level 369 1.1 dyoung * down one. All entries in the fp we push down 370 1.1 dyoung * (think of a tape worm here) will be expelled sooner than 371 1.1 dyoung * any entries added subsequently to this hash bucket. 372 1.1 dyoung * This is a property of the time waits we are exploiting. 373 1.1 dyoung */ 374 1.1 dyoung 375 1.1 dyoung fq = fatp_alloc(fat); 376 1.1 dyoung if (!fq) { 377 1.1 dyoung vtw_age(fat->vtw, 0); 378 1.1 dyoung fp = hash[tag & fat->mask]; 379 1.1 dyoung continue; 380 1.1 dyoung } 381 1.1 dyoung 382 1.1 dyoung fq->inuse = 0; 383 1.1 dyoung fq->nxt = fatp_index(fat, fp); 384 1.1 dyoung 385 1.1 dyoung hash[tag & fat->mask] = fq; 386 1.1 dyoung 387 1.1 dyoung fp = fq; 388 1.1 dyoung } 389 1.1 dyoung 390 1.1 dyoung KASSERT(!fatp_full(fp)); 391 1.1 dyoung 392 1.1 dyoung /* Fill highest index first. Lookup is lowest first. 393 1.1 dyoung */ 394 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) { 395 1.1 dyoung if (!((1 << i) & fp->inuse)) { 396 1.1 dyoung break; 397 1.1 dyoung } 398 1.1 dyoung } 399 1.1 dyoung 400 1.1 dyoung fp->inuse |= 1 << i; 401 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i]; 402 1.1 dyoung 403 1.1 dyoung db_trace(KTR_VTW 404 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x" 405 1.1 dyoung , fp->inuse 406 1.1 dyoung , i, fp->tag[i])); 407 1.1 dyoung 408 1.1 dyoung return fatp_key(fat, fp, i); 409 1.1 dyoung } 410 1.1 dyoung 411 1.1 dyoung static inline int 412 1.1 dyoung vtw_alive(const vtw_t *vtw) 413 1.1 dyoung { 414 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec; 415 1.1 dyoung } 416 1.1 dyoung 417 1.1 dyoung static inline uint32_t 418 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4) 419 1.1 dyoung { 420 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4) 421 1.1 dyoung return v4 - ctl->base.v4; 422 1.1 dyoung 423 1.1 dyoung KASSERT(0 && "vtw out of bounds"); 424 1.1 dyoung 425 1.1 dyoung return ~0; 426 1.1 dyoung } 427 1.1 dyoung 428 1.1 dyoung static inline uint32_t 429 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6) 430 1.1 dyoung { 431 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6) 432 1.1 dyoung return v6 - ctl->base.v6; 433 1.1 dyoung 434 1.1 dyoung KASSERT(0 && "vtw out of bounds"); 435 1.1 dyoung 436 1.1 dyoung return ~0; 437 1.1 dyoung } 438 1.1 dyoung 439 1.1 dyoung static inline uint32_t 440 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw) 441 1.1 dyoung { 442 1.1 dyoung if (ctl->clidx) 443 1.1 dyoung ctl = ctl->ctl; 444 1.1 dyoung 445 1.1 dyoung if (ctl->is_v4) 446 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw); 447 1.1 dyoung 448 1.1 dyoung if (ctl->is_v6) 449 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw); 450 1.1 dyoung 451 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious."); 452 1.1 dyoung 453 1.1 dyoung return ~0; 454 1.1 dyoung } 455 1.1 dyoung 456 1.1 dyoung static inline vtw_t * 457 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx) 458 1.1 dyoung { 459 1.1 dyoung if (ctl->clidx) 460 1.1 dyoung ctl = ctl->ctl; 461 1.1 dyoung 462 1.1 dyoung /* See if the index looks like it might be an index. 463 1.1 dyoung * Bits on outside of the valid index bits is a give away. 464 1.1 dyoung */ 465 1.1 dyoung idx = idx_decode(ctl, idx); 466 1.1 dyoung 467 1.1 dyoung if (idx == ~0) { 468 1.1 dyoung return 0; 469 1.1 dyoung } else if (ctl->is_v4) { 470 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx; 471 1.1 dyoung 472 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4) 473 1.1 dyoung ? &vtw->common : 0; 474 1.1 dyoung } else if (ctl->is_v6) { 475 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx; 476 1.1 dyoung 477 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6) 478 1.1 dyoung ? &vtw->common : 0; 479 1.1 dyoung } else { 480 1.1 dyoung KASSERT(0 && "badness"); 481 1.1 dyoung return 0; 482 1.1 dyoung } 483 1.1 dyoung } 484 1.1 dyoung 485 1.1 dyoung /*!\brief return the next vtw after this one. 486 1.1 dyoung * 487 1.1 dyoung * Due to the differing sizes of the entries in differing 488 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type. 489 1.1 dyoung * 490 1.1 dyoung * Also handles wrap. 491 1.1 dyoung */ 492 1.1 dyoung static inline vtw_t * 493 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw) 494 1.1 dyoung { 495 1.1 dyoung if (ctl->is_v4) { 496 1.1 dyoung vtw_v4_t *v4 = (void*)vtw; 497 1.1 dyoung 498 1.1 dyoung vtw = &(++v4)->common; 499 1.1 dyoung } else { 500 1.1 dyoung vtw_v6_t *v6 = (void*)vtw; 501 1.1 dyoung 502 1.1 dyoung vtw = &(++v6)->common; 503 1.1 dyoung } 504 1.1 dyoung 505 1.1 dyoung if (vtw > ctl->lim.v) 506 1.1 dyoung vtw = ctl->base.v; 507 1.1 dyoung 508 1.1 dyoung return vtw; 509 1.1 dyoung } 510 1.1 dyoung 511 1.1 dyoung /*!\brief remove entry from FATP hash chains 512 1.1 dyoung */ 513 1.1 dyoung static inline void 514 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw) 515 1.1 dyoung { 516 1.1 dyoung fatp_ctl_t *fat = ctl->fat; 517 1.1 dyoung fatp_t *fp; 518 1.1 dyoung uint32_t key = vtw->key; 519 1.1 dyoung uint32_t tag, slot, idx; 520 1.1 dyoung vtw_v4_t *v4 = (void*)vtw; 521 1.1 dyoung vtw_v6_t *v6 = (void*)vtw; 522 1.1 dyoung 523 1.1 dyoung if (!vtw->hashed) { 524 1.1 dyoung KASSERT(0 && "unhashed"); 525 1.1 dyoung return; 526 1.1 dyoung } 527 1.1 dyoung 528 1.1 dyoung if (fat->vtw->is_v4) { 529 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport); 530 1.1 dyoung } else if (fat->vtw->is_v6) { 531 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport); 532 1.1 dyoung } else { 533 1.1 dyoung tag = 0; 534 1.1 dyoung KASSERT(0 && "not reached"); 535 1.1 dyoung } 536 1.1 dyoung 537 1.1 dyoung /* Remove from fat->hash[] 538 1.1 dyoung */ 539 1.1 dyoung slot = fatp_slot_from_key(fat, key); 540 1.1 dyoung fp = fatp_from_key(fat, key); 541 1.1 dyoung idx = vtw_index(ctl, vtw); 542 1.1 dyoung 543 1.1 dyoung db_trace(KTR_VTW 544 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x" 545 1.1 dyoung , fp->inuse, slot, idx, key, tag)); 546 1.1 dyoung 547 1.1 dyoung KASSERT(fp->inuse & (1 << slot)); 548 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx) 549 1.1 dyoung ^ fatp_xtra[slot])); 550 1.1 dyoung 551 1.1 dyoung if ((fp->inuse & (1 << slot)) 552 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx) 553 1.1 dyoung ^ fatp_xtra[slot])) { 554 1.1 dyoung fp->inuse ^= 1 << slot; 555 1.1 dyoung fp->tag[slot] = 0; 556 1.1 dyoung 557 1.1 dyoung /* When we delete entries, we do not compact. This is 558 1.1 dyoung * due to temporality. We add entries, and they 559 1.1 dyoung * (eventually) expire. Older entries will be further 560 1.1 dyoung * down the chain. 561 1.1 dyoung */ 562 1.1 dyoung if (!fp->inuse) { 563 1.1 dyoung uint32_t hi = tag & fat->mask; 564 1.1 dyoung fatp_t *fq = 0; 565 1.1 dyoung fatp_t *fr = fat->hash[hi]; 566 1.1 dyoung 567 1.1 dyoung while (fr && fr != fp) { 568 1.1 dyoung fr = fatp_next(fat, fq = fr); 569 1.1 dyoung } 570 1.1 dyoung 571 1.1 dyoung if (fr == fp) { 572 1.1 dyoung if (fq) { 573 1.1 dyoung fq->nxt = fp->nxt; 574 1.1 dyoung fp->nxt = 0; 575 1.1 dyoung fatp_free(fat, fp); 576 1.1 dyoung } else { 577 1.1 dyoung KASSERT(fat->hash[hi] == fp); 578 1.1 dyoung 579 1.1 dyoung if (fp->nxt) { 580 1.1 dyoung fat->hash[hi] 581 1.1 dyoung = fatp_next(fat, fp); 582 1.1 dyoung fp->nxt = 0; 583 1.1 dyoung fatp_free(fat, fp); 584 1.1 dyoung } else { 585 1.1 dyoung /* retain for next use. 586 1.1 dyoung */ 587 1.1 dyoung ; 588 1.1 dyoung } 589 1.1 dyoung } 590 1.1 dyoung } else { 591 1.1 dyoung fr = fat->hash[hi]; 592 1.1 dyoung 593 1.1 dyoung do { 594 1.1 dyoung db_trace(KTR_VTW 595 1.1 dyoung , (fr 596 1.1 dyoung , "fat:*del inuse %5.5x" 597 1.1 dyoung " nxt %x" 598 1.1 dyoung , fr->inuse, fr->nxt)); 599 1.1 dyoung 600 1.1 dyoung fr = fatp_next(fat, fq = fr); 601 1.1 dyoung } while (fr && fr != fp); 602 1.1 dyoung 603 1.1 dyoung KASSERT(0 && "oops"); 604 1.1 dyoung } 605 1.1 dyoung } 606 1.1 dyoung vtw->key ^= ~0; 607 1.1 dyoung } 608 1.1 dyoung 609 1.1 dyoung if (fat->vtw->is_v4) { 610 1.1 dyoung tag = v4_port_tag(v4->lport); 611 1.1 dyoung } else if (fat->vtw->is_v6) { 612 1.1 dyoung tag = v6_port_tag(v6->lport); 613 1.1 dyoung } 614 1.1 dyoung 615 1.1 dyoung /* Remove from fat->port[] 616 1.1 dyoung */ 617 1.1 dyoung key = vtw->port_key; 618 1.1 dyoung slot = fatp_slot_from_key(fat, key); 619 1.1 dyoung fp = fatp_from_key(fat, key); 620 1.1 dyoung idx = vtw_index(ctl, vtw); 621 1.1 dyoung 622 1.1 dyoung db_trace(KTR_VTW 623 1.1 dyoung , (fp, "fatport: del inuse %5.5x" 624 1.1 dyoung " slot %x idx %x key %x tag %x" 625 1.1 dyoung , fp->inuse, slot, idx, key, tag)); 626 1.1 dyoung 627 1.1 dyoung KASSERT(fp->inuse & (1 << slot)); 628 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx) 629 1.1 dyoung ^ fatp_xtra[slot])); 630 1.1 dyoung 631 1.1 dyoung if ((fp->inuse & (1 << slot)) 632 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx) 633 1.1 dyoung ^ fatp_xtra[slot])) { 634 1.1 dyoung fp->inuse ^= 1 << slot; 635 1.1 dyoung fp->tag[slot] = 0; 636 1.1 dyoung 637 1.1 dyoung if (!fp->inuse) { 638 1.1 dyoung uint32_t hi = tag & fat->mask; 639 1.1 dyoung fatp_t *fq = 0; 640 1.1 dyoung fatp_t *fr = fat->port[hi]; 641 1.1 dyoung 642 1.1 dyoung while (fr && fr != fp) { 643 1.1 dyoung fr = fatp_next(fat, fq = fr); 644 1.1 dyoung } 645 1.1 dyoung 646 1.1 dyoung if (fr == fp) { 647 1.1 dyoung if (fq) { 648 1.1 dyoung fq->nxt = fp->nxt; 649 1.1 dyoung fp->nxt = 0; 650 1.1 dyoung fatp_free(fat, fp); 651 1.1 dyoung } else { 652 1.1 dyoung KASSERT(fat->port[hi] == fp); 653 1.1 dyoung 654 1.1 dyoung if (fp->nxt) { 655 1.1 dyoung fat->port[hi] 656 1.1 dyoung = fatp_next(fat, fp); 657 1.1 dyoung fp->nxt = 0; 658 1.1 dyoung fatp_free(fat, fp); 659 1.1 dyoung } else { 660 1.1 dyoung /* retain for next use. 661 1.1 dyoung */ 662 1.1 dyoung ; 663 1.1 dyoung } 664 1.1 dyoung } 665 1.1 dyoung } 666 1.1 dyoung } 667 1.1 dyoung vtw->port_key ^= ~0; 668 1.1 dyoung } 669 1.1 dyoung 670 1.1 dyoung vtw->hashed = 0; 671 1.1 dyoung } 672 1.1 dyoung 673 1.1 dyoung /*!\brief remove entry from hash, possibly free. 674 1.1 dyoung */ 675 1.1 dyoung void 676 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw) 677 1.1 dyoung { 678 1.1 dyoung KASSERT(mutex_owned(softnet_lock)); 679 1.1 dyoung 680 1.1 dyoung if (vtw->hashed) { 681 1.1 dyoung ++vtw_stats.del; 682 1.1 dyoung vtw_unhash(ctl, vtw); 683 1.1 dyoung } 684 1.1 dyoung 685 1.1 dyoung /* We only delete the oldest entry. 686 1.1 dyoung */ 687 1.1 dyoung if (vtw != ctl->oldest.v) 688 1.1 dyoung return; 689 1.1 dyoung 690 1.1 dyoung --ctl->nalloc; 691 1.1 dyoung ++ctl->nfree; 692 1.1 dyoung 693 1.1 dyoung vtw->expire.tv_sec = 0; 694 1.1 dyoung vtw->expire.tv_usec = ~0; 695 1.1 dyoung 696 1.1 dyoung if (!ctl->nalloc) 697 1.1 dyoung ctl->oldest.v = 0; 698 1.1 dyoung 699 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw); 700 1.1 dyoung } 701 1.1 dyoung 702 1.4 dholland /*!\brief insert vestigial timewait in hash chain 703 1.1 dyoung */ 704 1.1 dyoung static void 705 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw) 706 1.1 dyoung { 707 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw); 708 1.1 dyoung uint32_t tag; 709 1.1 dyoung vtw_v4_t *v4 = (void*)vtw; 710 1.1 dyoung 711 1.1 dyoung KASSERT(mutex_owned(softnet_lock)); 712 1.1 dyoung KASSERT(!vtw->hashed); 713 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class); 714 1.1 dyoung 715 1.1 dyoung ++vtw_stats.ins; 716 1.1 dyoung 717 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, 718 1.1 dyoung v4->laddr, v4->lport); 719 1.1 dyoung 720 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw); 721 1.1 dyoung 722 1.1 dyoung db_trace(KTR_VTW, (ctl 723 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x" 724 1.1 dyoung " tag %8.8x key %8.8x" 725 1.1 dyoung , v4->faddr, v4->fport 726 1.1 dyoung , v4->laddr, v4->lport 727 1.1 dyoung , tag 728 1.1 dyoung , vtw->key)); 729 1.1 dyoung 730 1.1 dyoung tag = v4_port_tag(v4->lport); 731 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw); 732 1.1 dyoung 733 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x" 734 1.1 dyoung , v4->lport, v4->lport 735 1.1 dyoung , tag 736 1.1 dyoung , vtw->key)); 737 1.1 dyoung 738 1.1 dyoung vtw->hashed = 1; 739 1.1 dyoung } 740 1.1 dyoung 741 1.4 dholland /*!\brief insert vestigial timewait in hash chain 742 1.1 dyoung */ 743 1.1 dyoung static void 744 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw) 745 1.1 dyoung { 746 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw); 747 1.1 dyoung uint32_t tag; 748 1.1 dyoung vtw_v6_t *v6 = (void*)vtw; 749 1.1 dyoung 750 1.1 dyoung KASSERT(mutex_owned(softnet_lock)); 751 1.1 dyoung KASSERT(!vtw->hashed); 752 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class); 753 1.1 dyoung 754 1.1 dyoung ++vtw_stats.ins; 755 1.1 dyoung 756 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, 757 1.1 dyoung &v6->laddr, v6->lport); 758 1.1 dyoung 759 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw); 760 1.1 dyoung 761 1.1 dyoung tag = v6_port_tag(v6->lport); 762 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw); 763 1.1 dyoung 764 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x" 765 1.1 dyoung , v6->lport, v6->lport 766 1.1 dyoung , tag 767 1.1 dyoung , vtw->key)); 768 1.1 dyoung 769 1.1 dyoung vtw->hashed = 1; 770 1.1 dyoung } 771 1.1 dyoung 772 1.1 dyoung static vtw_t * 773 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport 774 1.1 dyoung , uint32_t laddr, uint16_t lport 775 1.1 dyoung , int which) 776 1.1 dyoung { 777 1.1 dyoung vtw_v4_t *v4; 778 1.1 dyoung vtw_t *vtw; 779 1.1 dyoung uint32_t tag; 780 1.1 dyoung fatp_t *fp; 781 1.1 dyoung int i; 782 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0; 783 1.1 dyoung 784 1.1 dyoung if (!ctl || !ctl->fat) 785 1.1 dyoung return 0; 786 1.1 dyoung 787 1.1 dyoung ++vtw_stats.look[which]; 788 1.1 dyoung 789 1.1 dyoung if (which) { 790 1.1 dyoung tag = v4_port_tag(lport); 791 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask]; 792 1.1 dyoung } else { 793 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport); 794 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask]; 795 1.1 dyoung } 796 1.1 dyoung 797 1.1 dyoung while (fp && fp->inuse) { 798 1.1 dyoung uint32_t inuse = fp->inuse; 799 1.1 dyoung 800 1.1 dyoung ++fatps; 801 1.1 dyoung 802 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) { 803 1.1 dyoung uint32_t idx; 804 1.1 dyoung 805 1.1 dyoung if (!(inuse & (1 << i))) 806 1.1 dyoung continue; 807 1.1 dyoung 808 1.1 dyoung inuse ^= 1 << i; 809 1.1 dyoung 810 1.1 dyoung ++probes; 811 1.1 dyoung ++vtw_stats.probe[which]; 812 1.1 dyoung 813 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i]; 814 1.1 dyoung vtw = vtw_from_index(ctl, idx); 815 1.1 dyoung 816 1.1 dyoung if (!vtw) { 817 1.1 dyoung /* Hopefully fast path. 818 1.1 dyoung */ 819 1.1 dyoung db_trace(KTR_VTW 820 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P" 821 1.1 dyoung " idx %x tag %x" 822 1.1 dyoung , faddr, fport 823 1.1 dyoung , laddr, lport 824 1.1 dyoung , idx, tag)); 825 1.1 dyoung continue; 826 1.1 dyoung } 827 1.1 dyoung 828 1.1 dyoung v4 = (void*)vtw; 829 1.1 dyoung 830 1.1 dyoung /* The de-referencing of vtw is what we want to avoid. 831 1.1 dyoung * Losing. 832 1.1 dyoung */ 833 1.1 dyoung if (vtw_alive(vtw) 834 1.1 dyoung && ((which ? vtw->port_key : vtw->key) 835 1.1 dyoung == fatp_key(ctl->fat, fp, i)) 836 1.1 dyoung && (which 837 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr 838 1.1 dyoung && v4->fport == fport)) 839 1.1 dyoung && v4->lport == lport) { 840 1.1 dyoung ++vtw_stats.hit[which]; 841 1.1 dyoung 842 1.1 dyoung db_trace(KTR_VTW 843 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x" 844 1.1 dyoung " %8.8x:%4.4x idx %x key %x" 845 1.1 dyoung , faddr, fport 846 1.1 dyoung , laddr, lport 847 1.1 dyoung , idx_decode(ctl, idx), vtw->key)); 848 1.1 dyoung 849 1.1 dyoung KASSERT(vtw->hashed); 850 1.1 dyoung 851 1.1 dyoung goto out; 852 1.1 dyoung } 853 1.1 dyoung ++vtw_stats.losing[which]; 854 1.1 dyoung ++losings; 855 1.1 dyoung 856 1.1 dyoung if (vtw_alive(vtw)) { 857 1.1 dyoung db_trace(KTR_VTW 858 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x" 859 1.1 dyoung " %8.8x:%4.4x key %x tag %x" 860 1.1 dyoung , faddr, fport 861 1.1 dyoung , laddr, lport 862 1.1 dyoung , fatp_key(ctl->fat, fp, i) 863 1.1 dyoung , v4_tag(faddr, fport 864 1.1 dyoung , laddr, lport))); 865 1.1 dyoung db_trace(KTR_VTW 866 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x" 867 1.1 dyoung " %8.8x:%4.4x key %x tag %x" 868 1.1 dyoung , v4->faddr, v4->fport 869 1.1 dyoung , v4->laddr, v4->lport 870 1.1 dyoung , vtw->key 871 1.1 dyoung , v4_tag(v4->faddr, v4->fport 872 1.1 dyoung , v4->laddr, v4->lport))); 873 1.1 dyoung 874 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) { 875 1.1 dyoung db_trace(KTR_VTW 876 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x" 877 1.1 dyoung " %8.8x:%4.4x key %x" 878 1.1 dyoung " which %x" 879 1.1 dyoung , v4->faddr, v4->fport 880 1.1 dyoung , v4->laddr, v4->lport 881 1.1 dyoung , vtw->key 882 1.1 dyoung , which)); 883 1.1 dyoung 884 1.1 dyoung } else { 885 1.1 dyoung db_trace(KTR_VTW 886 1.1 dyoung , (vtw 887 1.1 dyoung , "vtw:!mis" 888 1.1 dyoung " key %8.8x != %8.8x" 889 1.1 dyoung " idx %x i %x which %x" 890 1.1 dyoung , vtw->key 891 1.1 dyoung , fatp_key(ctl->fat, fp, i) 892 1.1 dyoung , idx_decode(ctl, idx) 893 1.1 dyoung , i 894 1.1 dyoung , which)); 895 1.1 dyoung } 896 1.1 dyoung } else { 897 1.1 dyoung db_trace(KTR_VTW 898 1.1 dyoung , (fp 899 1.1 dyoung , "vtw:!mis free entry" 900 1.1 dyoung " idx %x vtw %p which %x" 901 1.1 dyoung , idx_decode(ctl, idx) 902 1.1 dyoung , vtw, which)); 903 1.1 dyoung } 904 1.1 dyoung } 905 1.1 dyoung 906 1.1 dyoung if (fp->nxt) { 907 1.1 dyoung fp = fatp_next(ctl->fat, fp); 908 1.1 dyoung } else { 909 1.1 dyoung break; 910 1.1 dyoung } 911 1.1 dyoung } 912 1.1 dyoung ++vtw_stats.miss[which]; 913 1.1 dyoung vtw = 0; 914 1.1 dyoung out: 915 1.1 dyoung if (fatps > vtw_stats.max_chain[which]) 916 1.1 dyoung vtw_stats.max_chain[which] = fatps; 917 1.1 dyoung if (probes > vtw_stats.max_probe[which]) 918 1.1 dyoung vtw_stats.max_probe[which] = probes; 919 1.1 dyoung if (losings > vtw_stats.max_loss[which]) 920 1.1 dyoung vtw_stats.max_loss[which] = losings; 921 1.1 dyoung 922 1.1 dyoung return vtw; 923 1.1 dyoung } 924 1.1 dyoung 925 1.1 dyoung static vtw_t * 926 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport 927 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport 928 1.1 dyoung , int which) 929 1.1 dyoung { 930 1.1 dyoung vtw_v6_t *v6; 931 1.1 dyoung vtw_t *vtw; 932 1.1 dyoung uint32_t tag; 933 1.1 dyoung fatp_t *fp; 934 1.1 dyoung int i; 935 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0; 936 1.1 dyoung 937 1.1 dyoung ++vtw_stats.look[which]; 938 1.1 dyoung 939 1.1 dyoung if (!ctl || !ctl->fat) 940 1.1 dyoung return 0; 941 1.1 dyoung 942 1.1 dyoung if (which) { 943 1.1 dyoung tag = v6_port_tag(lport); 944 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask]; 945 1.1 dyoung } else { 946 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport); 947 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask]; 948 1.1 dyoung } 949 1.1 dyoung 950 1.1 dyoung while (fp && fp->inuse) { 951 1.1 dyoung uint32_t inuse = fp->inuse; 952 1.1 dyoung 953 1.1 dyoung ++fatps; 954 1.1 dyoung 955 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) { 956 1.1 dyoung uint32_t idx; 957 1.1 dyoung 958 1.1 dyoung if (!(inuse & (1 << i))) 959 1.1 dyoung continue; 960 1.1 dyoung 961 1.1 dyoung inuse ^= 1 << i; 962 1.1 dyoung 963 1.1 dyoung ++probes; 964 1.1 dyoung ++vtw_stats.probe[which]; 965 1.1 dyoung 966 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i]; 967 1.1 dyoung vtw = vtw_from_index(ctl, idx); 968 1.1 dyoung 969 1.1 dyoung db_trace(KTR_VTW 970 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x" 971 1.1 dyoung , i 972 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport 973 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport 974 1.1 dyoung , idx_decode(ctl, idx))); 975 1.1 dyoung 976 1.1 dyoung if (!vtw) { 977 1.1 dyoung /* Hopefully fast path. 978 1.1 dyoung */ 979 1.1 dyoung continue; 980 1.1 dyoung } 981 1.1 dyoung 982 1.1 dyoung v6 = (void*)vtw; 983 1.1 dyoung 984 1.1 dyoung if (vtw_alive(vtw) 985 1.1 dyoung && ((which ? vtw->port_key : vtw->key) 986 1.1 dyoung == fatp_key(ctl->fat, fp, i)) 987 1.1 dyoung && v6->lport == lport 988 1.1 dyoung && (which 989 1.1 dyoung || (v6->fport == fport 990 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr)) 991 1.1 dyoung && !bcmp(&v6->laddr, laddr 992 1.1 dyoung , sizeof (*laddr))))) { 993 1.1 dyoung ++vtw_stats.hit[which]; 994 1.1 dyoung 995 1.1 dyoung KASSERT(vtw->hashed); 996 1.1 dyoung goto out; 997 1.1 dyoung } else { 998 1.1 dyoung ++vtw_stats.losing[which]; 999 1.1 dyoung ++losings; 1000 1.1 dyoung } 1001 1.1 dyoung } 1002 1.1 dyoung 1003 1.1 dyoung if (fp->nxt) { 1004 1.1 dyoung fp = fatp_next(ctl->fat, fp); 1005 1.1 dyoung } else { 1006 1.1 dyoung break; 1007 1.1 dyoung } 1008 1.1 dyoung } 1009 1.1 dyoung ++vtw_stats.miss[which]; 1010 1.1 dyoung vtw = 0; 1011 1.1 dyoung out: 1012 1.1 dyoung if (fatps > vtw_stats.max_chain[which]) 1013 1.1 dyoung vtw_stats.max_chain[which] = fatps; 1014 1.1 dyoung if (probes > vtw_stats.max_probe[which]) 1015 1.1 dyoung vtw_stats.max_probe[which] = probes; 1016 1.1 dyoung if (losings > vtw_stats.max_loss[which]) 1017 1.1 dyoung vtw_stats.max_loss[which] = losings; 1018 1.1 dyoung 1019 1.1 dyoung return vtw; 1020 1.1 dyoung } 1021 1.1 dyoung 1022 1.1 dyoung /*!\brief port iterator 1023 1.1 dyoung */ 1024 1.1 dyoung static vtw_t * 1025 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it) 1026 1.1 dyoung { 1027 1.1 dyoung vtw_ctl_t *ctl = it->ctl; 1028 1.1 dyoung vtw_v4_t *v4; 1029 1.1 dyoung vtw_t *vtw; 1030 1.1 dyoung uint32_t tag; 1031 1.1 dyoung uint16_t lport = it->port; 1032 1.1 dyoung fatp_t *fp; 1033 1.1 dyoung int i; 1034 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0; 1035 1.1 dyoung 1036 1.1 dyoung tag = v4_port_tag(lport); 1037 1.1 dyoung if (!it->fp) { 1038 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask]; 1039 1.1 dyoung it->slot_idx = 0; 1040 1.1 dyoung } 1041 1.1 dyoung fp = it->fp; 1042 1.1 dyoung 1043 1.1 dyoung while (fp) { 1044 1.1 dyoung uint32_t inuse = fp->inuse; 1045 1.1 dyoung 1046 1.1 dyoung ++fatps; 1047 1.1 dyoung 1048 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) { 1049 1.1 dyoung uint32_t idx; 1050 1.1 dyoung 1051 1.1 dyoung if (!(inuse & (1 << i))) 1052 1.1 dyoung continue; 1053 1.1 dyoung 1054 1.16 martin inuse &= ~0U << i; 1055 1.1 dyoung 1056 1.1 dyoung if (i < it->slot_idx) 1057 1.1 dyoung continue; 1058 1.1 dyoung 1059 1.1 dyoung ++vtw_stats.probe[1]; 1060 1.1 dyoung ++probes; 1061 1.1 dyoung 1062 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i]; 1063 1.1 dyoung vtw = vtw_from_index(ctl, idx); 1064 1.1 dyoung 1065 1.1 dyoung if (!vtw) { 1066 1.1 dyoung /* Hopefully fast path. 1067 1.1 dyoung */ 1068 1.1 dyoung continue; 1069 1.1 dyoung } 1070 1.1 dyoung 1071 1.1 dyoung v4 = (void*)vtw; 1072 1.1 dyoung 1073 1.1 dyoung if (vtw_alive(vtw) 1074 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i) 1075 1.1 dyoung && v4->lport == lport) { 1076 1.1 dyoung ++vtw_stats.hit[1]; 1077 1.1 dyoung 1078 1.1 dyoung it->slot_idx = i + 1; 1079 1.1 dyoung 1080 1.1 dyoung goto out; 1081 1.1 dyoung } else if (vtw_alive(vtw)) { 1082 1.1 dyoung ++vtw_stats.losing[1]; 1083 1.1 dyoung ++losings; 1084 1.1 dyoung 1085 1.1 dyoung db_trace(KTR_VTW 1086 1.1 dyoung , (vtw, "vtw:!mis" 1087 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x" 1088 1.1 dyoung " key %x port %x" 1089 1.1 dyoung , v4->faddr, v4->fport 1090 1.1 dyoung , v4->laddr, v4->lport 1091 1.1 dyoung , vtw->key 1092 1.1 dyoung , lport)); 1093 1.1 dyoung } else { 1094 1.1 dyoung /* Really losing here. We are coming 1095 1.1 dyoung * up with references to free entries. 1096 1.1 dyoung * Might find it better to use 1097 1.1 dyoung * traditional, or need another 1098 1.1 dyoung * add-hockery. The other add-hockery 1099 1.1 dyoung * would be to pul more into into the 1100 1.1 dyoung * cache line to reject the false 1101 1.1 dyoung * hits. 1102 1.1 dyoung */ 1103 1.1 dyoung ++vtw_stats.losing[1]; 1104 1.1 dyoung ++losings; 1105 1.1 dyoung db_trace(KTR_VTW 1106 1.1 dyoung , (fp, "vtw:!mis port %x" 1107 1.1 dyoung " - free entry idx %x vtw %p" 1108 1.1 dyoung , lport 1109 1.1 dyoung , idx_decode(ctl, idx) 1110 1.1 dyoung , vtw)); 1111 1.1 dyoung } 1112 1.1 dyoung } 1113 1.1 dyoung 1114 1.1 dyoung if (fp->nxt) { 1115 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp); 1116 1.1 dyoung it->slot_idx = 0; 1117 1.1 dyoung } else { 1118 1.1 dyoung it->fp = 0; 1119 1.1 dyoung break; 1120 1.1 dyoung } 1121 1.1 dyoung } 1122 1.1 dyoung ++vtw_stats.miss[1]; 1123 1.1 dyoung 1124 1.1 dyoung vtw = 0; 1125 1.1 dyoung out: 1126 1.1 dyoung if (fatps > vtw_stats.max_chain[1]) 1127 1.1 dyoung vtw_stats.max_chain[1] = fatps; 1128 1.1 dyoung if (probes > vtw_stats.max_probe[1]) 1129 1.1 dyoung vtw_stats.max_probe[1] = probes; 1130 1.1 dyoung if (losings > vtw_stats.max_loss[1]) 1131 1.1 dyoung vtw_stats.max_loss[1] = losings; 1132 1.1 dyoung 1133 1.1 dyoung return vtw; 1134 1.1 dyoung } 1135 1.1 dyoung 1136 1.1 dyoung /*!\brief port iterator 1137 1.1 dyoung */ 1138 1.1 dyoung static vtw_t * 1139 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it) 1140 1.1 dyoung { 1141 1.1 dyoung vtw_ctl_t *ctl = it->ctl; 1142 1.1 dyoung vtw_v6_t *v6; 1143 1.1 dyoung vtw_t *vtw; 1144 1.1 dyoung uint32_t tag; 1145 1.1 dyoung uint16_t lport = it->port; 1146 1.1 dyoung fatp_t *fp; 1147 1.1 dyoung int i; 1148 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0; 1149 1.1 dyoung 1150 1.1 dyoung tag = v6_port_tag(lport); 1151 1.1 dyoung if (!it->fp) { 1152 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask]; 1153 1.1 dyoung it->slot_idx = 0; 1154 1.1 dyoung } 1155 1.1 dyoung fp = it->fp; 1156 1.1 dyoung 1157 1.1 dyoung while (fp) { 1158 1.1 dyoung uint32_t inuse = fp->inuse; 1159 1.1 dyoung 1160 1.1 dyoung ++fatps; 1161 1.1 dyoung 1162 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) { 1163 1.1 dyoung uint32_t idx; 1164 1.1 dyoung 1165 1.1 dyoung if (!(inuse & (1 << i))) 1166 1.1 dyoung continue; 1167 1.1 dyoung 1168 1.16 martin inuse &= ~0U << i; 1169 1.1 dyoung 1170 1.1 dyoung if (i < it->slot_idx) 1171 1.1 dyoung continue; 1172 1.1 dyoung 1173 1.1 dyoung ++vtw_stats.probe[1]; 1174 1.1 dyoung ++probes; 1175 1.1 dyoung 1176 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i]; 1177 1.1 dyoung vtw = vtw_from_index(ctl, idx); 1178 1.1 dyoung 1179 1.1 dyoung if (!vtw) { 1180 1.1 dyoung /* Hopefully fast path. 1181 1.1 dyoung */ 1182 1.1 dyoung continue; 1183 1.1 dyoung } 1184 1.1 dyoung 1185 1.1 dyoung v6 = (void*)vtw; 1186 1.1 dyoung 1187 1.1 dyoung db_trace(KTR_VTW 1188 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x" 1189 1.1 dyoung " tag %x xtra %x" 1190 1.1 dyoung , i, idx_decode(ctl, idx) 1191 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i])); 1192 1.1 dyoung 1193 1.1 dyoung if (vtw_alive(vtw) 1194 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i) 1195 1.1 dyoung && v6->lport == lport) { 1196 1.1 dyoung ++vtw_stats.hit[1]; 1197 1.1 dyoung 1198 1.1 dyoung db_trace(KTR_VTW 1199 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x" 1200 1.1 dyoung " idx %x key %x" 1201 1.1 dyoung , lport, lport 1202 1.1 dyoung , idx_decode(ctl, idx), vtw->key)); 1203 1.1 dyoung 1204 1.1 dyoung it->slot_idx = i + 1; 1205 1.1 dyoung goto out; 1206 1.1 dyoung } else if (vtw_alive(vtw)) { 1207 1.1 dyoung ++vtw_stats.losing[1]; 1208 1.1 dyoung 1209 1.1 dyoung db_trace(KTR_VTW 1210 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x" 1211 1.1 dyoung " %6A:%4.4x key %x port %x" 1212 1.1 dyoung , db_store(&v6->faddr 1213 1.1 dyoung , sizeof (v6->faddr)) 1214 1.1 dyoung , v6->fport 1215 1.1 dyoung , db_store(&v6->laddr 1216 1.1 dyoung , sizeof (v6->faddr)) 1217 1.1 dyoung , v6->lport 1218 1.1 dyoung , vtw->key 1219 1.1 dyoung , lport)); 1220 1.1 dyoung } else { 1221 1.1 dyoung /* Really losing here. We are coming 1222 1.1 dyoung * up with references to free entries. 1223 1.1 dyoung * Might find it better to use 1224 1.1 dyoung * traditional, or need another 1225 1.1 dyoung * add-hockery. The other add-hockery 1226 1.1 dyoung * would be to pul more into into the 1227 1.1 dyoung * cache line to reject the false 1228 1.1 dyoung * hits. 1229 1.1 dyoung */ 1230 1.1 dyoung ++vtw_stats.losing[1]; 1231 1.1 dyoung ++losings; 1232 1.1 dyoung 1233 1.1 dyoung db_trace(KTR_VTW 1234 1.1 dyoung , (fp 1235 1.1 dyoung , "vtw:!mis port %x" 1236 1.1 dyoung " - free entry idx %x vtw %p" 1237 1.1 dyoung , lport, idx_decode(ctl, idx) 1238 1.1 dyoung , vtw)); 1239 1.1 dyoung } 1240 1.1 dyoung } 1241 1.1 dyoung 1242 1.1 dyoung if (fp->nxt) { 1243 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp); 1244 1.1 dyoung it->slot_idx = 0; 1245 1.1 dyoung } else { 1246 1.1 dyoung it->fp = 0; 1247 1.1 dyoung break; 1248 1.1 dyoung } 1249 1.1 dyoung } 1250 1.1 dyoung ++vtw_stats.miss[1]; 1251 1.1 dyoung 1252 1.1 dyoung vtw = 0; 1253 1.1 dyoung out: 1254 1.1 dyoung if (fatps > vtw_stats.max_chain[1]) 1255 1.1 dyoung vtw_stats.max_chain[1] = fatps; 1256 1.1 dyoung if (probes > vtw_stats.max_probe[1]) 1257 1.1 dyoung vtw_stats.max_probe[1] = probes; 1258 1.1 dyoung if (losings > vtw_stats.max_loss[1]) 1259 1.1 dyoung vtw_stats.max_loss[1] = losings; 1260 1.1 dyoung 1261 1.1 dyoung return vtw; 1262 1.1 dyoung } 1263 1.1 dyoung 1264 1.1 dyoung /*!\brief initialise the VTW allocation arena 1265 1.1 dyoung * 1266 1.1 dyoung * There are 1+3 allocation classes: 1267 1.1 dyoung * 0 classless 1268 1.1 dyoung * {1,2,3} MSL-class based allocation 1269 1.1 dyoung * 1270 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the 1271 1.1 dyoung * space. MSL-class based divides the arena, so that allocation 1272 1.1 dyoung * within a class can proceed without having to consider entries 1273 1.1 dyoung * (aka: cache lines) from different classes. 1274 1.1 dyoung * 1275 1.1 dyoung * Usually, we are completely classless or class-based, but there can be 1276 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config 1277 1.1 dyoung * by the operator. 1278 1.1 dyoung */ 1279 1.1 dyoung static void 1280 1.6 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v) 1281 1.1 dyoung { 1282 1.6 dyoung int class_n, i; 1283 1.6 dyoung vtw_t *base; 1284 1.1 dyoung 1285 1.6 dyoung ctl->base.v = ctl_base_v; 1286 1.1 dyoung 1287 1.6 dyoung if (ctl->is_v4) { 1288 1.6 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1; 1289 1.6 dyoung ctl->alloc.v4 = ctl->base.v4; 1290 1.6 dyoung } else { 1291 1.6 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1; 1292 1.6 dyoung ctl->alloc.v6 = ctl->base.v6; 1293 1.6 dyoung } 1294 1.1 dyoung 1295 1.6 dyoung ctl->nfree = n; 1296 1.6 dyoung ctl->ctl = ctl; 1297 1.1 dyoung 1298 1.6 dyoung ctl->idx_bits = 32; 1299 1.6 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) { 1300 1.6 dyoung ctl->idx_mask >>= 1; 1301 1.6 dyoung ctl->idx_bits -= 1; 1302 1.6 dyoung } 1303 1.1 dyoung 1304 1.6 dyoung ctl->idx_mask <<= 1; 1305 1.6 dyoung ctl->idx_mask |= 1; 1306 1.6 dyoung ctl->idx_bits += 1; 1307 1.1 dyoung 1308 1.6 dyoung ctl->fat = fat; 1309 1.6 dyoung fat->vtw = ctl; 1310 1.1 dyoung 1311 1.6 dyoung /* Divide the resources equally amongst the classes. 1312 1.6 dyoung * This is not optimal, as the different classes 1313 1.6 dyoung * arrive and leave at different rates, but it is 1314 1.6 dyoung * the best I can do for now. 1315 1.6 dyoung */ 1316 1.6 dyoung class_n = n / (VTW_NCLASS-1); 1317 1.6 dyoung base = ctl->base.v; 1318 1.1 dyoung 1319 1.6 dyoung for (i = 1; i < VTW_NCLASS; ++i) { 1320 1.6 dyoung int j; 1321 1.1 dyoung 1322 1.6 dyoung ctl[i] = ctl[0]; 1323 1.6 dyoung ctl[i].clidx = i; 1324 1.1 dyoung 1325 1.6 dyoung ctl[i].base.v = base; 1326 1.6 dyoung ctl[i].alloc = ctl[i].base; 1327 1.1 dyoung 1328 1.6 dyoung for (j = 0; j < class_n - 1; ++j) { 1329 1.6 dyoung if (tcp_msl_enable) 1330 1.6 dyoung base->msl_class = i; 1331 1.1 dyoung base = vtw_next(ctl, base); 1332 1.1 dyoung } 1333 1.6 dyoung 1334 1.6 dyoung ctl[i].lim.v = base; 1335 1.6 dyoung base = vtw_next(ctl, base); 1336 1.6 dyoung ctl[i].nfree = class_n; 1337 1.1 dyoung } 1338 1.1 dyoung 1339 1.1 dyoung vtw_debug_init(); 1340 1.1 dyoung } 1341 1.1 dyoung 1342 1.1 dyoung /*!\brief map class to TCP MSL 1343 1.1 dyoung */ 1344 1.1 dyoung static inline uint32_t 1345 1.11 matt class_to_msl(int msl_class) 1346 1.1 dyoung { 1347 1.11 matt switch (msl_class) { 1348 1.1 dyoung case 0: 1349 1.1 dyoung case 1: 1350 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0); 1351 1.1 dyoung case 2: 1352 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1); 1353 1.1 dyoung default: 1354 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2); 1355 1.1 dyoung } 1356 1.1 dyoung } 1357 1.1 dyoung 1358 1.1 dyoung /*!\brief map TCP MSL to class 1359 1.1 dyoung */ 1360 1.1 dyoung static inline uint32_t 1361 1.1 dyoung msl_to_class(int msl) 1362 1.1 dyoung { 1363 1.1 dyoung if (tcp_msl_enable) { 1364 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2))) 1365 1.1 dyoung return 1+2; 1366 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1))) 1367 1.1 dyoung return 1+1; 1368 1.1 dyoung return 1; 1369 1.1 dyoung } 1370 1.1 dyoung return 0; 1371 1.1 dyoung } 1372 1.1 dyoung 1373 1.1 dyoung /*!\brief allocate a vtw entry 1374 1.1 dyoung */ 1375 1.1 dyoung static inline vtw_t * 1376 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl) 1377 1.1 dyoung { 1378 1.1 dyoung vtw_t *vtw = 0; 1379 1.1 dyoung int stuck = 0; 1380 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0; 1381 1.1 dyoung int msl; 1382 1.1 dyoung 1383 1.1 dyoung KASSERT(mutex_owned(softnet_lock)); 1384 1.1 dyoung 1385 1.1 dyoung /* If no resources, we will not get far. 1386 1.1 dyoung */ 1387 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0) 1388 1.1 dyoung return 0; 1389 1.1 dyoung 1390 1.1 dyoung /* Obtain a free one. 1391 1.1 dyoung */ 1392 1.1 dyoung while (!ctl->nfree) { 1393 1.1 dyoung vtw_age(ctl, 0); 1394 1.1 dyoung 1395 1.1 dyoung if (++stuck > avail) { 1396 1.1 dyoung /* When in transition between 1397 1.1 dyoung * schemes (classless, classed) we 1398 1.1 dyoung * can be stuck having to await the 1399 1.1 dyoung * expiration of cross-allocated entries. 1400 1.1 dyoung * 1401 1.1 dyoung * Returning zero means we will fall back to the 1402 1.1 dyoung * traditional TIME_WAIT handling, except in the 1403 1.1 dyoung * case of a re-shed, in which case we cannot 1404 1.1 dyoung * perform the reshecd, but will retain the extant 1405 1.1 dyoung * entry. 1406 1.1 dyoung */ 1407 1.1 dyoung db_trace(KTR_VTW 1408 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x" 1409 1.1 dyoung , ctl->clidx 1410 1.1 dyoung , ctl->nalloc, ctl->nfree)); 1411 1.1 dyoung 1412 1.1 dyoung return 0; 1413 1.1 dyoung } 1414 1.1 dyoung } 1415 1.1 dyoung 1416 1.1 dyoung vtw = ctl->alloc.v; 1417 1.1 dyoung 1418 1.1 dyoung if (vtw->msl_class != ctl->clidx) { 1419 1.1 dyoung /* Usurping rules: 1420 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0 1421 1.1 dyoung */ 1422 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx); 1423 1.1 dyoung 1424 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) { 1425 1.1 dyoung /* As this is owned by some other class, 1426 1.1 dyoung * we must wait for it to expire it. 1427 1.1 dyoung * This will only happen on class/classless 1428 1.1 dyoung * transitions, which are guaranteed to progress 1429 1.1 dyoung * to completion in small finite time, barring bugs. 1430 1.1 dyoung */ 1431 1.1 dyoung db_trace(KTR_VTW 1432 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s" 1433 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx 1434 1.1 dyoung , vtw->expire.tv_sec 1435 1.1 dyoung , vtw->expire.tv_usec 1436 1.1 dyoung , vtw->hashed ? " hashed" : "")); 1437 1.1 dyoung 1438 1.1 dyoung return 0; 1439 1.1 dyoung } 1440 1.1 dyoung 1441 1.1 dyoung db_trace(KTR_VTW 1442 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x" 1443 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx)); 1444 1.1 dyoung 1445 1.1 dyoung vtw->msl_class = ctl->clidx; 1446 1.1 dyoung } 1447 1.1 dyoung 1448 1.1 dyoung if (vtw_alive(vtw)) { 1449 1.1 dyoung KASSERT(0 && "next free not free"); 1450 1.1 dyoung return 0; 1451 1.1 dyoung } 1452 1.1 dyoung 1453 1.21 andvar /* Advance allocation pointer. 1454 1.1 dyoung */ 1455 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw); 1456 1.1 dyoung 1457 1.1 dyoung --ctl->nfree; 1458 1.1 dyoung ++ctl->nalloc; 1459 1.1 dyoung 1460 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec 1461 1.1 dyoung 1462 1.1 dyoung /* mark expiration 1463 1.1 dyoung */ 1464 1.3 drochner getmicrouptime(&vtw->expire); 1465 1.1 dyoung 1466 1.1 dyoung /* Move expiration into the future. 1467 1.1 dyoung */ 1468 1.1 dyoung vtw->expire.tv_sec += msl / 1000; 1469 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000); 1470 1.1 dyoung 1471 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) { 1472 1.1 dyoung vtw->expire.tv_usec -= 1000*1000; 1473 1.1 dyoung vtw->expire.tv_sec += 1; 1474 1.1 dyoung } 1475 1.1 dyoung 1476 1.1 dyoung if (!ctl->oldest.v) 1477 1.1 dyoung ctl->oldest.v = vtw; 1478 1.1 dyoung 1479 1.1 dyoung return vtw; 1480 1.1 dyoung } 1481 1.1 dyoung 1482 1.1 dyoung /*!\brief expiration 1483 1.1 dyoung */ 1484 1.1 dyoung static int 1485 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when) 1486 1.1 dyoung { 1487 1.1 dyoung vtw_t *vtw; 1488 1.1 dyoung struct timeval then, *when = _when; 1489 1.1 dyoung int maxtries = 0; 1490 1.1 dyoung 1491 1.1 dyoung if (!ctl->oldest.v) { 1492 1.1 dyoung KASSERT(!ctl->nalloc); 1493 1.1 dyoung return 0; 1494 1.1 dyoung } 1495 1.1 dyoung 1496 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) { 1497 1.1 dyoung if (++maxtries > ctl->nalloc) 1498 1.1 dyoung break; 1499 1.1 dyoung 1500 1.1 dyoung if (vtw->msl_class != ctl->clidx) { 1501 1.1 dyoung db_trace(KTR_VTW 1502 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x" 1503 1.1 dyoung , vtw->msl_class, ctl->clidx)); 1504 1.1 dyoung /* XXXX 1505 1.1 dyoung * See if the appropriate action is to skip to the next. 1506 1.1 dyoung * XXXX 1507 1.1 dyoung */ 1508 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw); 1509 1.1 dyoung continue; 1510 1.1 dyoung } 1511 1.1 dyoung if (!when) { 1512 1.1 dyoung /* Latch oldest timeval if none specified. 1513 1.1 dyoung */ 1514 1.1 dyoung then = vtw->expire; 1515 1.1 dyoung when = &then; 1516 1.1 dyoung } 1517 1.1 dyoung 1518 1.1 dyoung if (!timercmp(&vtw->expire, when, <=)) 1519 1.1 dyoung break; 1520 1.1 dyoung 1521 1.1 dyoung db_trace(KTR_VTW 1522 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x" 1523 1.1 dyoung , ctl->clidx 1524 1.1 dyoung , vtw->expire.tv_sec 1525 1.1 dyoung , vtw->expire.tv_usec 1526 1.1 dyoung , ctl->nalloc 1527 1.1 dyoung , ctl->nfree)); 1528 1.1 dyoung 1529 1.1 dyoung if (!_when) 1530 1.1 dyoung ++vtw_stats.kill; 1531 1.1 dyoung 1532 1.1 dyoung vtw_del(ctl, vtw); 1533 1.1 dyoung vtw = ctl->oldest.v; 1534 1.1 dyoung } 1535 1.1 dyoung 1536 1.1 dyoung return ctl->nalloc; // # remaining allocated 1537 1.1 dyoung } 1538 1.1 dyoung 1539 1.1 dyoung static callout_t vtw_cs; 1540 1.1 dyoung 1541 1.1 dyoung /*!\brief notice the passage of time. 1542 1.1 dyoung * It seems to be getting faster. What happened to the year? 1543 1.1 dyoung */ 1544 1.1 dyoung static void 1545 1.1 dyoung vtw_tick(void *arg) 1546 1.1 dyoung { 1547 1.1 dyoung struct timeval now; 1548 1.1 dyoung int i, cnt = 0; 1549 1.1 dyoung 1550 1.3 drochner getmicrouptime(&now); 1551 1.1 dyoung 1552 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x" 1553 1.1 dyoung , now.tv_sec, now.tv_usec)); 1554 1.1 dyoung 1555 1.1 dyoung mutex_enter(softnet_lock); 1556 1.1 dyoung 1557 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) { 1558 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now); 1559 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now); 1560 1.1 dyoung } 1561 1.1 dyoung 1562 1.1 dyoung /* Keep ticks coming while we need them. 1563 1.1 dyoung */ 1564 1.1 dyoung if (cnt) 1565 1.1 dyoung callout_schedule(&vtw_cs, hz / 5); 1566 1.1 dyoung else { 1567 1.1 dyoung tcp_vtw_was_enabled = 0; 1568 1.1 dyoung tcbtable.vestige = 0; 1569 1.1 dyoung } 1570 1.1 dyoung mutex_exit(softnet_lock); 1571 1.1 dyoung } 1572 1.1 dyoung 1573 1.24 ozaki /* inpcb_lookup_locals assist for handling vestigial entries. 1574 1.1 dyoung */ 1575 1.1 dyoung static void * 1576 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild) 1577 1.1 dyoung { 1578 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4; 1579 1.1 dyoung 1580 1.1 dyoung bzero(it, sizeof (*it)); 1581 1.1 dyoung 1582 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine. 1583 1.1 dyoung * We do not need per-class iteration. We just 1584 1.1 dyoung * need to get to the fat, and there is one 1585 1.1 dyoung * shared fat. 1586 1.1 dyoung */ 1587 1.1 dyoung if (vtw_tcpv4[0].fat) { 1588 1.1 dyoung it->addr.v4 = addr; 1589 1.1 dyoung it->port = port; 1590 1.1 dyoung it->wild = !!wild; 1591 1.1 dyoung it->ctl = &vtw_tcpv4[0]; 1592 1.1 dyoung 1593 1.1 dyoung ++vtw_stats.look[1]; 1594 1.1 dyoung } 1595 1.1 dyoung 1596 1.1 dyoung return it; 1597 1.1 dyoung } 1598 1.1 dyoung 1599 1.1 dyoung /*!\brief export an IPv4 vtw. 1600 1.1 dyoung */ 1601 1.1 dyoung static int 1602 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res) 1603 1.1 dyoung { 1604 1.1 dyoung vtw_v4_t *v4 = (void*)vtw; 1605 1.1 dyoung 1606 1.1 dyoung bzero(res, sizeof (*res)); 1607 1.1 dyoung 1608 1.1 dyoung if (ctl && vtw) { 1609 1.1 dyoung if (!ctl->clidx && vtw->msl_class) 1610 1.1 dyoung ctl += vtw->msl_class; 1611 1.1 dyoung else 1612 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class); 1613 1.1 dyoung 1614 1.1 dyoung res->valid = 1; 1615 1.1 dyoung res->v4 = 1; 1616 1.1 dyoung 1617 1.1 dyoung res->faddr.v4.s_addr = v4->faddr; 1618 1.1 dyoung res->laddr.v4.s_addr = v4->laddr; 1619 1.1 dyoung res->fport = v4->fport; 1620 1.1 dyoung res->lport = v4->lport; 1621 1.1 dyoung res->vtw = vtw; // netlock held over call(s) 1622 1.1 dyoung res->ctl = ctl; 1623 1.1 dyoung res->reuse_addr = vtw->reuse_addr; 1624 1.1 dyoung res->reuse_port = vtw->reuse_port; 1625 1.1 dyoung res->snd_nxt = vtw->snd_nxt; 1626 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt; 1627 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd; 1628 1.1 dyoung res->uid = vtw->uid; 1629 1.1 dyoung } 1630 1.1 dyoung 1631 1.1 dyoung return res->valid; 1632 1.1 dyoung } 1633 1.1 dyoung 1634 1.1 dyoung /*!\brief return next port in the port iterator. yowza. 1635 1.1 dyoung */ 1636 1.1 dyoung static int 1637 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res) 1638 1.1 dyoung { 1639 1.1 dyoung struct tcp_ports_iterator *it = arg; 1640 1.1 dyoung vtw_t *vtw = 0; 1641 1.1 dyoung 1642 1.1 dyoung if (it->ctl) 1643 1.1 dyoung vtw = vtw_next_port_v4(it); 1644 1.1 dyoung 1645 1.1 dyoung if (!vtw) 1646 1.1 dyoung it->ctl = 0; 1647 1.1 dyoung 1648 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res); 1649 1.1 dyoung } 1650 1.1 dyoung 1651 1.1 dyoung static int 1652 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport, 1653 1.1 dyoung struct in_addr laddr, uint16_t lport, 1654 1.1 dyoung struct vestigial_inpcb *res) 1655 1.1 dyoung { 1656 1.1 dyoung vtw_t *vtw; 1657 1.1 dyoung vtw_ctl_t *ctl; 1658 1.1 dyoung 1659 1.1 dyoung 1660 1.1 dyoung db_trace(KTR_VTW 1661 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P" 1662 1.1 dyoung , faddr, fport 1663 1.1 dyoung , laddr, lport)); 1664 1.1 dyoung 1665 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0]) 1666 1.1 dyoung , faddr.s_addr, fport 1667 1.1 dyoung , laddr.s_addr, lport, 0); 1668 1.1 dyoung 1669 1.1 dyoung return vtw_export_v4(ctl, vtw, res); 1670 1.1 dyoung } 1671 1.1 dyoung 1672 1.24 ozaki /* inpcb_lookup_locals assist for handling vestigial entries. 1673 1.1 dyoung */ 1674 1.1 dyoung static void * 1675 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild) 1676 1.1 dyoung { 1677 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6; 1678 1.1 dyoung 1679 1.1 dyoung bzero(it, sizeof (*it)); 1680 1.1 dyoung 1681 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine. 1682 1.1 dyoung * We do not need per-class iteration. We just 1683 1.1 dyoung * need to get to the fat, and there is one 1684 1.1 dyoung * shared fat. 1685 1.1 dyoung */ 1686 1.1 dyoung if (vtw_tcpv6[0].fat) { 1687 1.1 dyoung it->addr.v6 = *addr; 1688 1.1 dyoung it->port = port; 1689 1.1 dyoung it->wild = !!wild; 1690 1.1 dyoung it->ctl = &vtw_tcpv6[0]; 1691 1.1 dyoung 1692 1.1 dyoung ++vtw_stats.look[1]; 1693 1.1 dyoung } 1694 1.1 dyoung 1695 1.1 dyoung return it; 1696 1.1 dyoung } 1697 1.1 dyoung 1698 1.1 dyoung /*!\brief export an IPv6 vtw. 1699 1.1 dyoung */ 1700 1.1 dyoung static int 1701 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res) 1702 1.1 dyoung { 1703 1.1 dyoung vtw_v6_t *v6 = (void*)vtw; 1704 1.1 dyoung 1705 1.1 dyoung bzero(res, sizeof (*res)); 1706 1.1 dyoung 1707 1.1 dyoung if (ctl && vtw) { 1708 1.1 dyoung if (!ctl->clidx && vtw->msl_class) 1709 1.1 dyoung ctl += vtw->msl_class; 1710 1.1 dyoung else 1711 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class); 1712 1.1 dyoung 1713 1.1 dyoung res->valid = 1; 1714 1.1 dyoung res->v4 = 0; 1715 1.1 dyoung 1716 1.1 dyoung res->faddr.v6 = v6->faddr; 1717 1.1 dyoung res->laddr.v6 = v6->laddr; 1718 1.1 dyoung res->fport = v6->fport; 1719 1.1 dyoung res->lport = v6->lport; 1720 1.1 dyoung res->vtw = vtw; // netlock held over call(s) 1721 1.1 dyoung res->ctl = ctl; 1722 1.1 dyoung 1723 1.1 dyoung res->v6only = vtw->v6only; 1724 1.1 dyoung res->reuse_addr = vtw->reuse_addr; 1725 1.1 dyoung res->reuse_port = vtw->reuse_port; 1726 1.1 dyoung 1727 1.1 dyoung res->snd_nxt = vtw->snd_nxt; 1728 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt; 1729 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd; 1730 1.1 dyoung res->uid = vtw->uid; 1731 1.1 dyoung } 1732 1.1 dyoung 1733 1.1 dyoung return res->valid; 1734 1.1 dyoung } 1735 1.1 dyoung 1736 1.1 dyoung static int 1737 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res) 1738 1.1 dyoung { 1739 1.1 dyoung struct tcp_ports_iterator *it = arg; 1740 1.1 dyoung vtw_t *vtw = 0; 1741 1.1 dyoung 1742 1.1 dyoung if (it->ctl) 1743 1.1 dyoung vtw = vtw_next_port_v6(it); 1744 1.1 dyoung 1745 1.1 dyoung if (!vtw) 1746 1.1 dyoung it->ctl = 0; 1747 1.1 dyoung 1748 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res); 1749 1.1 dyoung } 1750 1.1 dyoung 1751 1.1 dyoung static int 1752 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport, 1753 1.1 dyoung const struct in6_addr *laddr, uint16_t lport, 1754 1.1 dyoung struct vestigial_inpcb *res) 1755 1.1 dyoung { 1756 1.1 dyoung vtw_ctl_t *ctl; 1757 1.1 dyoung vtw_t *vtw; 1758 1.1 dyoung 1759 1.1 dyoung db_trace(KTR_VTW 1760 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P" 1761 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport 1762 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport)); 1763 1.1 dyoung 1764 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0]) 1765 1.1 dyoung , faddr, fport 1766 1.1 dyoung , laddr, lport, 0); 1767 1.1 dyoung 1768 1.1 dyoung return vtw_export_v6(ctl, vtw, res); 1769 1.1 dyoung } 1770 1.1 dyoung 1771 1.1 dyoung static vestigial_hooks_t tcp_hooks = { 1772 1.1 dyoung .init_ports4 = tcp_init_ports_v4, 1773 1.1 dyoung .next_port4 = tcp_next_port_v4, 1774 1.1 dyoung .lookup4 = tcp_lookup_v4, 1775 1.1 dyoung .init_ports6 = tcp_init_ports_v6, 1776 1.1 dyoung .next_port6 = tcp_next_port_v6, 1777 1.1 dyoung .lookup6 = tcp_lookup_v6, 1778 1.1 dyoung }; 1779 1.1 dyoung 1780 1.1 dyoung static bool 1781 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp) 1782 1.1 dyoung { 1783 1.1 dyoung fatp_ctl_t *fat; 1784 1.1 dyoung vtw_ctl_t *ctl; 1785 1.1 dyoung 1786 1.1 dyoung switch (af) { 1787 1.1 dyoung case AF_INET: 1788 1.1 dyoung fat = &fat_tcpv4; 1789 1.1 dyoung ctl = &vtw_tcpv4[0]; 1790 1.1 dyoung break; 1791 1.1 dyoung case AF_INET6: 1792 1.1 dyoung fat = &fat_tcpv6; 1793 1.1 dyoung ctl = &vtw_tcpv6[0]; 1794 1.1 dyoung break; 1795 1.1 dyoung default: 1796 1.1 dyoung return false; 1797 1.1 dyoung } 1798 1.1 dyoung if (fatp != NULL) 1799 1.1 dyoung *fatp = fat; 1800 1.1 dyoung if (ctlp != NULL) 1801 1.1 dyoung *ctlp = ctl; 1802 1.1 dyoung return true; 1803 1.1 dyoung } 1804 1.1 dyoung 1805 1.1 dyoung /*!\brief initialize controlling instance 1806 1.1 dyoung */ 1807 1.1 dyoung static int 1808 1.1 dyoung vtw_control_init(int af) 1809 1.1 dyoung { 1810 1.1 dyoung fatp_ctl_t *fat; 1811 1.1 dyoung vtw_ctl_t *ctl; 1812 1.6 dyoung fatp_t *fat_base; 1813 1.6 dyoung fatp_t **fat_hash; 1814 1.6 dyoung vtw_t *ctl_base_v; 1815 1.6 dyoung uint32_t n, m; 1816 1.6 dyoung size_t sz; 1817 1.6 dyoung 1818 1.6 dyoung KASSERT(powerof2(tcp_vtw_entries)); 1819 1.1 dyoung 1820 1.1 dyoung if (!vtw_select(af, &fat, &ctl)) 1821 1.1 dyoung return EAFNOSUPPORT; 1822 1.1 dyoung 1823 1.6 dyoung if (fat->hash != NULL) { 1824 1.6 dyoung KASSERT(fat->base != NULL && ctl->base.v != NULL); 1825 1.6 dyoung return 0; 1826 1.6 dyoung } 1827 1.6 dyoung 1828 1.6 dyoung /* Allocate 10% more capacity in the fat pointers. 1829 1.6 dyoung * We should only need ~#hash additional based on 1830 1.6 dyoung * how they age, but TIME_WAIT assassination could cause 1831 1.6 dyoung * sparse fat pointer utilisation. 1832 1.6 dyoung */ 1833 1.6 dyoung m = 512; 1834 1.6 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10; 1835 1.6 dyoung sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t)); 1836 1.6 dyoung 1837 1.20 chs fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_SLEEP); 1838 1.20 chs fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_SLEEP); 1839 1.20 chs ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_SLEEP); 1840 1.6 dyoung fatp_init(fat, n, m, fat_base, fat_hash); 1841 1.6 dyoung vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v); 1842 1.1 dyoung 1843 1.1 dyoung return 0; 1844 1.1 dyoung } 1845 1.1 dyoung 1846 1.1 dyoung /*!\brief select controlling instance 1847 1.1 dyoung */ 1848 1.1 dyoung static vtw_ctl_t * 1849 1.1 dyoung vtw_control(int af, uint32_t msl) 1850 1.1 dyoung { 1851 1.1 dyoung fatp_ctl_t *fat; 1852 1.1 dyoung vtw_ctl_t *ctl; 1853 1.11 matt int msl_class = msl_to_class(msl); 1854 1.1 dyoung 1855 1.1 dyoung if (!vtw_select(af, &fat, &ctl)) 1856 1.1 dyoung return NULL; 1857 1.1 dyoung 1858 1.1 dyoung if (!fat->base || !ctl->base.v) 1859 1.1 dyoung return NULL; 1860 1.1 dyoung 1861 1.5 dyoung if (!tcp_vtw_was_enabled) { 1862 1.5 dyoung /* This guarantees is timer ticks until we no longer need them. 1863 1.5 dyoung */ 1864 1.5 dyoung tcp_vtw_was_enabled = 1; 1865 1.5 dyoung 1866 1.5 dyoung callout_schedule(&vtw_cs, hz / 5); 1867 1.5 dyoung 1868 1.5 dyoung tcbtable.vestige = &tcp_hooks; 1869 1.5 dyoung } 1870 1.5 dyoung 1871 1.11 matt return ctl + msl_class; 1872 1.1 dyoung } 1873 1.1 dyoung 1874 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait 1875 1.1 dyoung */ 1876 1.1 dyoung int 1877 1.1 dyoung vtw_add(int af, struct tcpcb *tp) 1878 1.1 dyoung { 1879 1.10 martin #ifdef VTW_DEBUG 1880 1.1 dyoung int enable; 1881 1.10 martin #endif 1882 1.1 dyoung vtw_ctl_t *ctl; 1883 1.1 dyoung vtw_t *vtw; 1884 1.1 dyoung 1885 1.1 dyoung KASSERT(mutex_owned(softnet_lock)); 1886 1.1 dyoung 1887 1.1 dyoung ctl = vtw_control(af, tp->t_msl); 1888 1.1 dyoung if (!ctl) 1889 1.1 dyoung return 0; 1890 1.1 dyoung 1891 1.10 martin #ifdef VTW_DEBUG 1892 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable; 1893 1.10 martin #endif 1894 1.1 dyoung 1895 1.1 dyoung vtw = vtw_alloc(ctl); 1896 1.1 dyoung 1897 1.1 dyoung if (vtw) { 1898 1.1 dyoung vtw->snd_nxt = tp->snd_nxt; 1899 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt; 1900 1.1 dyoung 1901 1.1 dyoung switch (af) { 1902 1.1 dyoung case AF_INET: { 1903 1.1 dyoung struct inpcb *inp = tp->t_inpcb; 1904 1.1 dyoung vtw_v4_t *v4 = (void*)vtw; 1905 1.1 dyoung 1906 1.23 ozaki v4->faddr = in4p_faddr(inp).s_addr; 1907 1.23 ozaki v4->laddr = in4p_laddr(inp).s_addr; 1908 1.1 dyoung v4->fport = inp->inp_fport; 1909 1.1 dyoung v4->lport = inp->inp_lport; 1910 1.1 dyoung 1911 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options 1912 1.1 dyoung & SO_REUSEPORT); 1913 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options 1914 1.1 dyoung & SO_REUSEADDR); 1915 1.1 dyoung vtw->v6only = 0; 1916 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid; 1917 1.1 dyoung 1918 1.1 dyoung vtw_inshash_v4(ctl, vtw); 1919 1.1 dyoung 1920 1.1 dyoung 1921 1.1 dyoung #ifdef VTW_DEBUG 1922 1.1 dyoung /* Immediate lookup (connected and port) to 1923 1.1 dyoung * ensure at least that works! 1924 1.1 dyoung */ 1925 1.1 dyoung if (enable & 4) { 1926 1.1 dyoung KASSERT(vtw_lookup_hash_v4 1927 1.1 dyoung (ctl 1928 1.23 ozaki , in4p_faddr(inp).s_addr, inp->inp_fport 1929 1.23 ozaki , in4p_laddr(inp).s_addr, inp->inp_lport 1930 1.1 dyoung , 0) 1931 1.1 dyoung == vtw); 1932 1.1 dyoung KASSERT(vtw_lookup_hash_v4 1933 1.1 dyoung (ctl 1934 1.23 ozaki , in4p_faddr(inp).s_addr, inp->inp_fport 1935 1.23 ozaki , in4p_laddr(inp).s_addr, inp->inp_lport 1936 1.1 dyoung , 1)); 1937 1.1 dyoung } 1938 1.1 dyoung /* Immediate port iterator functionality check: not wild 1939 1.1 dyoung */ 1940 1.1 dyoung if (enable & 8) { 1941 1.1 dyoung struct tcp_ports_iterator *it; 1942 1.1 dyoung struct vestigial_inpcb res; 1943 1.1 dyoung int cnt = 0; 1944 1.1 dyoung 1945 1.23 ozaki it = tcp_init_ports_v4(in4p_laddr(inp) 1946 1.1 dyoung , inp->inp_lport, 0); 1947 1.1 dyoung 1948 1.1 dyoung while (tcp_next_port_v4(it, &res)) { 1949 1.1 dyoung ++cnt; 1950 1.1 dyoung } 1951 1.1 dyoung KASSERT(cnt); 1952 1.1 dyoung } 1953 1.1 dyoung /* Immediate port iterator functionality check: wild 1954 1.1 dyoung */ 1955 1.1 dyoung if (enable & 16) { 1956 1.1 dyoung struct tcp_ports_iterator *it; 1957 1.1 dyoung struct vestigial_inpcb res; 1958 1.1 dyoung struct in_addr any; 1959 1.1 dyoung int cnt = 0; 1960 1.1 dyoung 1961 1.1 dyoung any.s_addr = htonl(INADDR_ANY); 1962 1.1 dyoung 1963 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1); 1964 1.1 dyoung 1965 1.1 dyoung while (tcp_next_port_v4(it, &res)) { 1966 1.1 dyoung ++cnt; 1967 1.1 dyoung } 1968 1.1 dyoung KASSERT(cnt); 1969 1.1 dyoung } 1970 1.1 dyoung #endif /* VTW_DEBUG */ 1971 1.1 dyoung break; 1972 1.1 dyoung } 1973 1.1 dyoung 1974 1.1 dyoung case AF_INET6: { 1975 1.22 ozaki struct inpcb *inp = tp->t_inpcb; 1976 1.1 dyoung vtw_v6_t *v6 = (void*)vtw; 1977 1.1 dyoung 1978 1.23 ozaki v6->faddr = in6p_faddr(inp); 1979 1.23 ozaki v6->laddr = in6p_laddr(inp); 1980 1.22 ozaki v6->fport = inp->inp_fport; 1981 1.22 ozaki v6->lport = inp->inp_lport; 1982 1.1 dyoung 1983 1.22 ozaki vtw->reuse_port = !!(inp->inp_socket->so_options 1984 1.1 dyoung & SO_REUSEPORT); 1985 1.22 ozaki vtw->reuse_addr = !!(inp->inp_socket->so_options 1986 1.1 dyoung & SO_REUSEADDR); 1987 1.22 ozaki vtw->v6only = !!(inp->inp_flags 1988 1.1 dyoung & IN6P_IPV6_V6ONLY); 1989 1.22 ozaki vtw->uid = inp->inp_socket->so_uidinfo->ui_uid; 1990 1.1 dyoung 1991 1.1 dyoung vtw_inshash_v6(ctl, vtw); 1992 1.1 dyoung #ifdef VTW_DEBUG 1993 1.1 dyoung /* Immediate lookup (connected and port) to 1994 1.1 dyoung * ensure at least that works! 1995 1.1 dyoung */ 1996 1.1 dyoung if (enable & 4) { 1997 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl 1998 1.23 ozaki , &in6p_faddr(inp), inp->inp_fport 1999 1.23 ozaki , &in6p_laddr(inp), inp->inp_lport 2000 1.1 dyoung , 0) 2001 1.1 dyoung == vtw); 2002 1.1 dyoung KASSERT(vtw_lookup_hash_v6 2003 1.1 dyoung (ctl 2004 1.23 ozaki , &in6p_faddr(inp), inp->inp_fport 2005 1.23 ozaki , &in6p_laddr(inp), inp->inp_lport 2006 1.1 dyoung , 1)); 2007 1.1 dyoung } 2008 1.1 dyoung /* Immediate port iterator functionality check: not wild 2009 1.1 dyoung */ 2010 1.1 dyoung if (enable & 8) { 2011 1.1 dyoung struct tcp_ports_iterator *it; 2012 1.1 dyoung struct vestigial_inpcb res; 2013 1.1 dyoung int cnt = 0; 2014 1.1 dyoung 2015 1.23 ozaki it = tcp_init_ports_v6(&in6p_laddr(inp) 2016 1.22 ozaki , inp->inp_lport, 0); 2017 1.1 dyoung 2018 1.1 dyoung while (tcp_next_port_v6(it, &res)) { 2019 1.1 dyoung ++cnt; 2020 1.1 dyoung } 2021 1.1 dyoung KASSERT(cnt); 2022 1.1 dyoung } 2023 1.1 dyoung /* Immediate port iterator functionality check: wild 2024 1.1 dyoung */ 2025 1.1 dyoung if (enable & 16) { 2026 1.1 dyoung struct tcp_ports_iterator *it; 2027 1.1 dyoung struct vestigial_inpcb res; 2028 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT; 2029 1.1 dyoung int cnt = 0; 2030 1.1 dyoung 2031 1.1 dyoung it = tcp_init_ports_v6(&any 2032 1.22 ozaki , inp->inp_lport, 1); 2033 1.1 dyoung 2034 1.1 dyoung while (tcp_next_port_v6(it, &res)) { 2035 1.1 dyoung ++cnt; 2036 1.1 dyoung } 2037 1.1 dyoung KASSERT(cnt); 2038 1.1 dyoung } 2039 1.1 dyoung #endif /* VTW_DEBUG */ 2040 1.1 dyoung break; 2041 1.1 dyoung } 2042 1.1 dyoung } 2043 1.1 dyoung 2044 1.1 dyoung tcp_canceltimers(tp); 2045 1.1 dyoung tp = tcp_close(tp); 2046 1.1 dyoung KASSERT(!tp); 2047 1.1 dyoung 2048 1.1 dyoung return 1; 2049 1.1 dyoung } 2050 1.1 dyoung 2051 1.1 dyoung return 0; 2052 1.1 dyoung } 2053 1.1 dyoung 2054 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry 2055 1.1 dyoung */ 2056 1.1 dyoung static void 2057 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp) 2058 1.1 dyoung { 2059 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw; 2060 1.1 dyoung vtw_t *vtw; 2061 1.1 dyoung vtw_t *cp = ©.common; 2062 1.1 dyoung vtw_ctl_t *ctl; 2063 1.1 dyoung 2064 1.1 dyoung KASSERT(mutex_owned(softnet_lock)); 2065 1.1 dyoung 2066 1.1 dyoung db_trace(KTR_VTW 2067 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P" 2068 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport 2069 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport)); 2070 1.1 dyoung 2071 1.1 dyoung /* Class might have changed, so have a squiz. 2072 1.1 dyoung */ 2073 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class)); 2074 1.1 dyoung vtw = vtw_alloc(ctl); 2075 1.1 dyoung 2076 1.1 dyoung if (vtw) { 2077 1.1 dyoung vtw_v4_t *v4 = (void*)vtw; 2078 1.1 dyoung 2079 1.1 dyoung /* Safe now to unhash the old entry 2080 1.1 dyoung */ 2081 1.1 dyoung vtw_del(vp->ctl, vp->vtw); 2082 1.1 dyoung 2083 1.1 dyoung vtw->snd_nxt = cp->snd_nxt; 2084 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt; 2085 1.1 dyoung 2086 1.1 dyoung v4->faddr = copy.faddr; 2087 1.1 dyoung v4->laddr = copy.laddr; 2088 1.1 dyoung v4->fport = copy.fport; 2089 1.1 dyoung v4->lport = copy.lport; 2090 1.1 dyoung 2091 1.1 dyoung vtw->reuse_port = cp->reuse_port; 2092 1.1 dyoung vtw->reuse_addr = cp->reuse_addr; 2093 1.1 dyoung vtw->v6only = 0; 2094 1.1 dyoung vtw->uid = cp->uid; 2095 1.1 dyoung 2096 1.1 dyoung vtw_inshash_v4(ctl, vtw); 2097 1.1 dyoung } 2098 1.1 dyoung 2099 1.1 dyoung vp->valid = 0; 2100 1.1 dyoung } 2101 1.1 dyoung 2102 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry 2103 1.1 dyoung */ 2104 1.1 dyoung static void 2105 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp) 2106 1.1 dyoung { 2107 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw; 2108 1.1 dyoung vtw_t *vtw; 2109 1.1 dyoung vtw_t *cp = ©.common; 2110 1.1 dyoung vtw_ctl_t *ctl; 2111 1.1 dyoung 2112 1.1 dyoung KASSERT(mutex_owned(softnet_lock)); 2113 1.1 dyoung 2114 1.1 dyoung db_trace(KTR_VTW 2115 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P" 2116 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6)) 2117 1.1 dyoung , vp->fport 2118 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6)) 2119 1.1 dyoung , vp->lport)); 2120 1.1 dyoung 2121 1.1 dyoung /* Class might have changed, so have a squiz. 2122 1.1 dyoung */ 2123 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class)); 2124 1.1 dyoung vtw = vtw_alloc(ctl); 2125 1.1 dyoung 2126 1.1 dyoung if (vtw) { 2127 1.1 dyoung vtw_v6_t *v6 = (void*)vtw; 2128 1.1 dyoung 2129 1.1 dyoung /* Safe now to unhash the old entry 2130 1.1 dyoung */ 2131 1.1 dyoung vtw_del(vp->ctl, vp->vtw); 2132 1.1 dyoung 2133 1.1 dyoung vtw->snd_nxt = cp->snd_nxt; 2134 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt; 2135 1.1 dyoung 2136 1.1 dyoung v6->faddr = copy.faddr; 2137 1.1 dyoung v6->laddr = copy.laddr; 2138 1.1 dyoung v6->fport = copy.fport; 2139 1.1 dyoung v6->lport = copy.lport; 2140 1.1 dyoung 2141 1.1 dyoung vtw->reuse_port = cp->reuse_port; 2142 1.1 dyoung vtw->reuse_addr = cp->reuse_addr; 2143 1.1 dyoung vtw->v6only = cp->v6only; 2144 1.1 dyoung vtw->uid = cp->uid; 2145 1.1 dyoung 2146 1.1 dyoung vtw_inshash_v6(ctl, vtw); 2147 1.1 dyoung } 2148 1.1 dyoung 2149 1.1 dyoung vp->valid = 0; 2150 1.1 dyoung } 2151 1.1 dyoung 2152 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry 2153 1.1 dyoung */ 2154 1.1 dyoung void 2155 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp) 2156 1.1 dyoung { 2157 1.1 dyoung if (!vp || !vp->valid) 2158 1.1 dyoung return; 2159 1.1 dyoung 2160 1.1 dyoung if (vp->v4) 2161 1.1 dyoung vtw_restart_v4(vp); 2162 1.1 dyoung else 2163 1.1 dyoung vtw_restart_v6(vp); 2164 1.1 dyoung } 2165 1.1 dyoung 2166 1.1 dyoung int 2167 1.7 dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS) 2168 1.7 dyoung { 2169 1.7 dyoung int en, rc; 2170 1.7 dyoung struct sysctlnode node; 2171 1.7 dyoung 2172 1.7 dyoung node = *rnode; 2173 1.7 dyoung en = *(int *)rnode->sysctl_data; 2174 1.7 dyoung node.sysctl_data = &en; 2175 1.7 dyoung 2176 1.7 dyoung rc = sysctl_lookup(SYSCTLFN_CALL(&node)); 2177 1.7 dyoung if (rc != 0 || newp == NULL) 2178 1.7 dyoung return rc; 2179 1.7 dyoung 2180 1.7 dyoung if (rnode->sysctl_data != &tcp4_vtw_enable && 2181 1.7 dyoung rnode->sysctl_data != &tcp6_vtw_enable) 2182 1.7 dyoung rc = ENOENT; 2183 1.7 dyoung else if ((en & 1) == 0) 2184 1.7 dyoung rc = 0; 2185 1.7 dyoung else if (rnode->sysctl_data == &tcp4_vtw_enable) 2186 1.7 dyoung rc = vtw_control_init(AF_INET); 2187 1.7 dyoung else /* rnode->sysctl_data == &tcp6_vtw_enable */ 2188 1.7 dyoung rc = vtw_control_init(AF_INET6); 2189 1.7 dyoung 2190 1.7 dyoung if (rc == 0) 2191 1.7 dyoung *(int *)rnode->sysctl_data = en; 2192 1.7 dyoung 2193 1.7 dyoung return rc; 2194 1.7 dyoung } 2195 1.7 dyoung 2196 1.7 dyoung int 2197 1.1 dyoung vtw_earlyinit(void) 2198 1.1 dyoung { 2199 1.5 dyoung int i, rc; 2200 1.1 dyoung 2201 1.5 dyoung callout_init(&vtw_cs, 0); 2202 1.5 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0); 2203 1.1 dyoung 2204 1.5 dyoung for (i = 0; i < VTW_NCLASS; ++i) { 2205 1.5 dyoung vtw_tcpv4[i].is_v4 = 1; 2206 1.5 dyoung vtw_tcpv6[i].is_v6 = 1; 2207 1.1 dyoung } 2208 1.1 dyoung 2209 1.7 dyoung if ((tcp4_vtw_enable & 1) != 0 && 2210 1.7 dyoung (rc = vtw_control_init(AF_INET)) != 0) 2211 1.7 dyoung return rc; 2212 1.7 dyoung 2213 1.7 dyoung if ((tcp6_vtw_enable & 1) != 0 && 2214 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0) 2215 1.1 dyoung return rc; 2216 1.1 dyoung 2217 1.1 dyoung return 0; 2218 1.1 dyoung } 2219 1.1 dyoung 2220 1.1 dyoung #ifdef VTW_DEBUG 2221 1.1 dyoung #include <sys/syscallargs.h> 2222 1.1 dyoung #include <sys/sysctl.h> 2223 1.1 dyoung 2224 1.1 dyoung /*!\brief add lalp, fafp entries for debug 2225 1.1 dyoung */ 2226 1.1 dyoung int 2227 1.11 matt vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int msl_class) 2228 1.1 dyoung { 2229 1.1 dyoung vtw_ctl_t *ctl; 2230 1.1 dyoung vtw_t *vtw; 2231 1.1 dyoung 2232 1.11 matt ctl = vtw_control(af, msl ? msl : class_to_msl(msl_class)); 2233 1.1 dyoung if (!ctl) 2234 1.1 dyoung return 0; 2235 1.1 dyoung 2236 1.1 dyoung vtw = vtw_alloc(ctl); 2237 1.1 dyoung 2238 1.1 dyoung if (vtw) { 2239 1.1 dyoung vtw->snd_nxt = 0; 2240 1.1 dyoung vtw->rcv_nxt = 0; 2241 1.1 dyoung 2242 1.1 dyoung switch (af) { 2243 1.1 dyoung case AF_INET: { 2244 1.1 dyoung vtw_v4_t *v4 = (void*)vtw; 2245 1.1 dyoung 2246 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr; 2247 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr; 2248 1.1 dyoung v4->fport = fa->sin_port; 2249 1.1 dyoung v4->lport = la->sin_port; 2250 1.1 dyoung 2251 1.1 dyoung vtw->reuse_port = 1; 2252 1.1 dyoung vtw->reuse_addr = 1; 2253 1.1 dyoung vtw->v6only = 0; 2254 1.1 dyoung vtw->uid = 0; 2255 1.1 dyoung 2256 1.1 dyoung vtw_inshash_v4(ctl, vtw); 2257 1.1 dyoung break; 2258 1.1 dyoung } 2259 1.1 dyoung 2260 1.1 dyoung case AF_INET6: { 2261 1.1 dyoung vtw_v6_t *v6 = (void*)vtw; 2262 1.1 dyoung 2263 1.1 dyoung v6->faddr = fa->sin_addr.v6; 2264 1.1 dyoung v6->laddr = la->sin_addr.v6; 2265 1.1 dyoung 2266 1.1 dyoung v6->fport = fa->sin_port; 2267 1.1 dyoung v6->lport = la->sin_port; 2268 1.1 dyoung 2269 1.1 dyoung vtw->reuse_port = 1; 2270 1.1 dyoung vtw->reuse_addr = 1; 2271 1.1 dyoung vtw->v6only = 0; 2272 1.1 dyoung vtw->uid = 0; 2273 1.1 dyoung 2274 1.1 dyoung vtw_inshash_v6(ctl, vtw); 2275 1.1 dyoung break; 2276 1.1 dyoung } 2277 1.1 dyoung 2278 1.1 dyoung default: 2279 1.1 dyoung break; 2280 1.1 dyoung } 2281 1.1 dyoung 2282 1.1 dyoung return 1; 2283 1.1 dyoung } 2284 1.1 dyoung 2285 1.1 dyoung return 0; 2286 1.1 dyoung } 2287 1.1 dyoung 2288 1.1 dyoung static int vtw_syscall = 0; 2289 1.1 dyoung 2290 1.1 dyoung static int 2291 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap) 2292 1.1 dyoung { 2293 1.1 dyoung struct vestigial_inpcb vestige; 2294 1.1 dyoung int rc = 0; 2295 1.1 dyoung 2296 1.1 dyoung mutex_enter(softnet_lock); 2297 1.1 dyoung 2298 1.1 dyoung switch (ap->op) { 2299 1.1 dyoung case 0: // insert 2300 1.1 dyoung vtw_debug_add(ap->la.sin_family 2301 1.1 dyoung , &ap->la 2302 1.1 dyoung , &ap->fa 2303 1.1 dyoung , TCPTV_MSL 2304 1.1 dyoung , 0); 2305 1.1 dyoung break; 2306 1.1 dyoung 2307 1.1 dyoung case 1: // lookup 2308 1.1 dyoung case 2: // restart 2309 1.1 dyoung switch (ap->la.sin_family) { 2310 1.1 dyoung case AF_INET: 2311 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port, 2312 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port, 2313 1.1 dyoung &vestige)) { 2314 1.1 dyoung if (ap->op == 2) { 2315 1.1 dyoung vtw_restart(&vestige); 2316 1.1 dyoung } 2317 1.1 dyoung rc = 0; 2318 1.1 dyoung } else 2319 1.1 dyoung rc = ESRCH; 2320 1.1 dyoung break; 2321 1.1 dyoung 2322 1.1 dyoung case AF_INET6: 2323 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port, 2324 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port, 2325 1.1 dyoung &vestige)) { 2326 1.1 dyoung if (ap->op == 2) { 2327 1.1 dyoung vtw_restart(&vestige); 2328 1.1 dyoung } 2329 1.1 dyoung rc = 0; 2330 1.1 dyoung } else 2331 1.1 dyoung rc = ESRCH; 2332 1.1 dyoung break; 2333 1.1 dyoung default: 2334 1.1 dyoung rc = EINVAL; 2335 1.1 dyoung } 2336 1.1 dyoung break; 2337 1.1 dyoung 2338 1.1 dyoung default: 2339 1.1 dyoung rc = EINVAL; 2340 1.1 dyoung } 2341 1.1 dyoung 2342 1.1 dyoung mutex_exit(softnet_lock); 2343 1.1 dyoung return rc; 2344 1.1 dyoung } 2345 1.1 dyoung 2346 1.1 dyoung struct sys_vtw_args { 2347 1.1 dyoung syscallarg(const vtw_sysargs_t *) req; 2348 1.1 dyoung syscallarg(size_t) len; 2349 1.1 dyoung }; 2350 1.1 dyoung 2351 1.1 dyoung static int 2352 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval) 2353 1.1 dyoung { 2354 1.1 dyoung const struct sys_vtw_args *uap = _; 2355 1.1 dyoung void *buf; 2356 1.1 dyoung int rc; 2357 1.1 dyoung size_t len = SCARG(uap, len); 2358 1.1 dyoung 2359 1.1 dyoung if (len != sizeof (vtw_sysargs_t)) 2360 1.1 dyoung return EINVAL; 2361 1.1 dyoung 2362 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP); 2363 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len); 2364 1.1 dyoung if (!rc) { 2365 1.1 dyoung rc = vtw_debug_process(buf); 2366 1.1 dyoung } 2367 1.1 dyoung kmem_free(buf, len); 2368 1.1 dyoung 2369 1.1 dyoung return rc; 2370 1.1 dyoung } 2371 1.1 dyoung 2372 1.1 dyoung static void 2373 1.1 dyoung vtw_sanity_check(void) 2374 1.1 dyoung { 2375 1.1 dyoung vtw_ctl_t *ctl; 2376 1.1 dyoung vtw_t *vtw; 2377 1.1 dyoung int i; 2378 1.1 dyoung int n; 2379 1.1 dyoung 2380 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) { 2381 1.1 dyoung ctl = &vtw_tcpv4[i]; 2382 1.1 dyoung 2383 1.1 dyoung if (!ctl->base.v || ctl->nalloc) 2384 1.1 dyoung continue; 2385 1.1 dyoung 2386 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) { 2387 1.1 dyoung ++n; 2388 1.1 dyoung vtw = vtw_next(ctl, vtw); 2389 1.1 dyoung if (vtw == ctl->base.v) 2390 1.1 dyoung break; 2391 1.1 dyoung } 2392 1.1 dyoung db_trace(KTR_VTW 2393 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x" 2394 1.1 dyoung , i, n, ctl->nfree)); 2395 1.1 dyoung 2396 1.1 dyoung KASSERT(n == ctl->nfree); 2397 1.1 dyoung } 2398 1.1 dyoung 2399 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) { 2400 1.1 dyoung ctl = &vtw_tcpv6[i]; 2401 1.1 dyoung 2402 1.1 dyoung if (!ctl->base.v || ctl->nalloc) 2403 1.1 dyoung continue; 2404 1.1 dyoung 2405 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) { 2406 1.1 dyoung ++n; 2407 1.1 dyoung vtw = vtw_next(ctl, vtw); 2408 1.1 dyoung if (vtw == ctl->base.v) 2409 1.1 dyoung break; 2410 1.1 dyoung } 2411 1.1 dyoung db_trace(KTR_VTW 2412 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x" 2413 1.1 dyoung , i, n, ctl->nfree)); 2414 1.1 dyoung KASSERT(n == ctl->nfree); 2415 1.1 dyoung } 2416 1.1 dyoung } 2417 1.1 dyoung 2418 1.1 dyoung /*!\brief Initialise debug support. 2419 1.1 dyoung */ 2420 1.1 dyoung static void 2421 1.1 dyoung vtw_debug_init(void) 2422 1.1 dyoung { 2423 1.1 dyoung int i; 2424 1.1 dyoung 2425 1.1 dyoung vtw_sanity_check(); 2426 1.1 dyoung 2427 1.1 dyoung if (vtw_syscall) 2428 1.1 dyoung return; 2429 1.1 dyoung 2430 1.1 dyoung for (i = 511; i; --i) { 2431 1.1 dyoung if (sysent[i].sy_call == sys_nosys) { 2432 1.1 dyoung sysent[i].sy_call = vtw_sys; 2433 1.1 dyoung sysent[i].sy_narg = 2; 2434 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args); 2435 1.1 dyoung sysent[i].sy_flags = 0; 2436 1.1 dyoung 2437 1.1 dyoung vtw_syscall = i; 2438 1.1 dyoung break; 2439 1.1 dyoung } 2440 1.1 dyoung } 2441 1.1 dyoung if (i) { 2442 1.1 dyoung const struct sysctlnode *node; 2443 1.1 dyoung uint32_t flags; 2444 1.1 dyoung 2445 1.1 dyoung flags = sysctl_root.sysctl_flags; 2446 1.1 dyoung 2447 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE; 2448 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT; 2449 1.1 dyoung 2450 1.1 dyoung sysctl_createv(0, 0, 0, &node, 2451 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE, 2452 1.1 dyoung "koff", 2453 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"), 2454 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL); 2455 1.1 dyoung 2456 1.1 dyoung if (!node) { 2457 1.1 dyoung sysctl_createv(0, 0, 0, &node, 2458 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE, 2459 1.1 dyoung "koffka", 2460 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel" 2461 1.1 dyoung " Obscure Feature Finder"), 2462 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL); 2463 1.1 dyoung } 2464 1.1 dyoung if (node) { 2465 1.1 dyoung sysctl_createv(0, 0, 0, 0, 2466 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY, 2467 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall", 2468 1.1 dyoung SYSCTL_DESCR("vtw debug" 2469 1.1 dyoung " system call number"), 2470 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num, 2471 1.1 dyoung CTL_CREATE, CTL_EOL); 2472 1.1 dyoung } 2473 1.1 dyoung sysctl_root.sysctl_flags = flags; 2474 1.1 dyoung } 2475 1.1 dyoung } 2476 1.1 dyoung #else /* !VTW_DEBUG */ 2477 1.1 dyoung static void 2478 1.1 dyoung vtw_debug_init(void) 2479 1.1 dyoung { 2480 1.1 dyoung return; 2481 1.1 dyoung } 2482 1.1 dyoung #endif /* !VTW_DEBUG */ 2483