Home | History | Annotate | Line # | Download | only in netinet
tcp_vtw.c revision 1.8
      1  1.1    dyoung /*
      2  1.1    dyoung  * Copyright (c) 2011 The NetBSD Foundation, Inc.
      3  1.1    dyoung  * All rights reserved.
      4  1.1    dyoung  *
      5  1.1    dyoung  * This code is derived from software contributed to The NetBSD Foundation
      6  1.1    dyoung  * by Coyote Point Systems, Inc.
      7  1.1    dyoung  *
      8  1.1    dyoung  * Redistribution and use in source and binary forms, with or without
      9  1.1    dyoung  * modification, are permitted provided that the following conditions
     10  1.1    dyoung  * are met:
     11  1.1    dyoung  * 1. Redistributions of source code must retain the above copyright
     12  1.1    dyoung  *    notice, this list of conditions and the following disclaimer.
     13  1.1    dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     14  1.1    dyoung  *    notice, this list of conditions and the following disclaimer in the
     15  1.1    dyoung  *    documentation and/or other materials provided with the distribution.
     16  1.1    dyoung  *
     17  1.1    dyoung  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  1.1    dyoung  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  1.1    dyoung  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  1.1    dyoung  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  1.1    dyoung  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  1.1    dyoung  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  1.1    dyoung  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  1.1    dyoung  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  1.1    dyoung  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  1.1    dyoung  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  1.1    dyoung  * POSSIBILITY OF SUCH DAMAGE.
     28  1.1    dyoung  */
     29  1.1    dyoung #include <sys/cdefs.h>
     30  1.1    dyoung 
     31  1.1    dyoung #include "opt_ddb.h"
     32  1.1    dyoung #include "opt_inet.h"
     33  1.1    dyoung #include "opt_ipsec.h"
     34  1.1    dyoung #include "opt_inet_csum.h"
     35  1.1    dyoung #include "opt_tcp_debug.h"
     36  1.1    dyoung 
     37  1.1    dyoung #include <sys/param.h>
     38  1.1    dyoung #include <sys/systm.h>
     39  1.1    dyoung #include <sys/malloc.h>
     40  1.1    dyoung #include <sys/kmem.h>
     41  1.1    dyoung #include <sys/mbuf.h>
     42  1.1    dyoung #include <sys/protosw.h>
     43  1.1    dyoung #include <sys/socket.h>
     44  1.1    dyoung #include <sys/socketvar.h>
     45  1.1    dyoung #include <sys/errno.h>
     46  1.1    dyoung #include <sys/syslog.h>
     47  1.1    dyoung #include <sys/pool.h>
     48  1.1    dyoung #include <sys/domain.h>
     49  1.1    dyoung #include <sys/kernel.h>
     50  1.1    dyoung #include <net/if.h>
     51  1.1    dyoung #include <net/route.h>
     52  1.1    dyoung #include <net/if_types.h>
     53  1.1    dyoung 
     54  1.1    dyoung #include <netinet/in.h>
     55  1.1    dyoung #include <netinet/in_systm.h>
     56  1.1    dyoung #include <netinet/ip.h>
     57  1.1    dyoung #include <netinet/in_pcb.h>
     58  1.1    dyoung #include <netinet/in_var.h>
     59  1.1    dyoung #include <netinet/ip_var.h>
     60  1.1    dyoung #include <netinet/in_offload.h>
     61  1.1    dyoung #include <netinet/ip6.h>
     62  1.1    dyoung #include <netinet6/ip6_var.h>
     63  1.1    dyoung #include <netinet6/in6_pcb.h>
     64  1.1    dyoung #include <netinet6/ip6_var.h>
     65  1.1    dyoung #include <netinet6/in6_var.h>
     66  1.1    dyoung #include <netinet/icmp6.h>
     67  1.1    dyoung #include <netinet6/nd6.h>
     68  1.1    dyoung 
     69  1.1    dyoung #include <netinet/tcp.h>
     70  1.1    dyoung #include <netinet/tcp_fsm.h>
     71  1.1    dyoung #include <netinet/tcp_seq.h>
     72  1.1    dyoung #include <netinet/tcp_timer.h>
     73  1.1    dyoung #include <netinet/tcp_var.h>
     74  1.1    dyoung #include <netinet/tcp_private.h>
     75  1.1    dyoung #include <netinet/tcpip.h>
     76  1.1    dyoung 
     77  1.1    dyoung #include <netinet/tcp_vtw.h>
     78  1.1    dyoung 
     79  1.8     joerg __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.8 2011/07/17 20:54:53 joerg Exp $");
     80  1.1    dyoung 
     81  1.1    dyoung #define db_trace(__a, __b)	do { } while (/*CONSTCOND*/0)
     82  1.1    dyoung 
     83  1.1    dyoung static void vtw_debug_init(void);
     84  1.1    dyoung 
     85  1.1    dyoung fatp_ctl_t fat_tcpv4;
     86  1.1    dyoung fatp_ctl_t fat_tcpv6;
     87  1.1    dyoung vtw_ctl_t  vtw_tcpv4[VTW_NCLASS];
     88  1.1    dyoung vtw_ctl_t  vtw_tcpv6[VTW_NCLASS];
     89  1.1    dyoung vtw_stats_t vtw_stats;
     90  1.1    dyoung 
     91  1.1    dyoung /* We provide state for the lookup_ports iterator.
     92  1.1    dyoung  * As currently we are netlock-protected, there is one.
     93  1.1    dyoung  * If we were finer-grain, we would have one per CPU.
     94  1.1    dyoung  * I do not want to be in the business of alloc/free.
     95  1.1    dyoung  * The best alternate would be allocate on the caller's
     96  1.1    dyoung  * stack, but that would require them to know the struct,
     97  1.1    dyoung  * or at least the size.
     98  1.1    dyoung  * See how she goes.
     99  1.1    dyoung  */
    100  1.1    dyoung struct tcp_ports_iterator {
    101  1.1    dyoung 	union {
    102  1.1    dyoung 		struct in_addr	v4;
    103  1.1    dyoung 		struct in6_addr	v6;
    104  1.1    dyoung 	}		addr;
    105  1.1    dyoung 	u_int		port;
    106  1.1    dyoung 
    107  1.1    dyoung 	uint32_t	wild	: 1;
    108  1.1    dyoung 
    109  1.1    dyoung 	vtw_ctl_t	*ctl;
    110  1.1    dyoung 	fatp_t		*fp;
    111  1.1    dyoung 
    112  1.1    dyoung 	uint16_t	slot_idx;
    113  1.1    dyoung 	uint16_t	ctl_idx;
    114  1.1    dyoung };
    115  1.1    dyoung 
    116  1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
    117  1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
    118  1.1    dyoung 
    119  1.1    dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
    120  1.1    dyoung 
    121  1.1    dyoung /*!\brief allocate a fat pointer from a collection.
    122  1.1    dyoung  */
    123  1.1    dyoung static fatp_t *
    124  1.1    dyoung fatp_alloc(fatp_ctl_t *fat)
    125  1.1    dyoung {
    126  1.1    dyoung 	fatp_t	*fp	= 0;
    127  1.1    dyoung 
    128  1.1    dyoung 	if (fat->nfree) {
    129  1.1    dyoung 		fp = fat->free;
    130  1.1    dyoung 		if (fp) {
    131  1.1    dyoung 			fat->free = fatp_next(fat, fp);
    132  1.1    dyoung 			--fat->nfree;
    133  1.1    dyoung 			++fat->nalloc;
    134  1.1    dyoung 			fp->nxt = 0;
    135  1.1    dyoung 
    136  1.1    dyoung 			KASSERT(!fp->inuse);
    137  1.1    dyoung 		}
    138  1.1    dyoung 	}
    139  1.1    dyoung 
    140  1.1    dyoung 	return fp;
    141  1.1    dyoung }
    142  1.1    dyoung 
    143  1.1    dyoung /*!\brief free a fat pointer.
    144  1.1    dyoung  */
    145  1.1    dyoung static void
    146  1.1    dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
    147  1.1    dyoung {
    148  1.1    dyoung 	if (fp) {
    149  1.1    dyoung 		KASSERT(!fp->inuse);
    150  1.1    dyoung 		KASSERT(!fp->nxt);
    151  1.1    dyoung 
    152  1.1    dyoung 		fp->nxt = fatp_index(fat, fat->free);
    153  1.1    dyoung 		fat->free = fp;
    154  1.1    dyoung 
    155  1.1    dyoung 		++fat->nfree;
    156  1.1    dyoung 		--fat->nalloc;
    157  1.1    dyoung 	}
    158  1.1    dyoung }
    159  1.1    dyoung 
    160  1.1    dyoung /*!\brief initialise a collection of fat pointers.
    161  1.1    dyoung  *
    162  1.1    dyoung  *\param n	# hash buckets
    163  1.1    dyoung  *\param m	total # fat pointers to allocate
    164  1.1    dyoung  *
    165  1.1    dyoung  * We allocate 2x as much, as we have two hashes: full and lport only.
    166  1.1    dyoung  */
    167  1.1    dyoung static void
    168  1.6    dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
    169  1.6    dyoung     fatp_t *fat_base, fatp_t **fat_hash)
    170  1.1    dyoung {
    171  1.1    dyoung 	fatp_t	*fp;
    172  1.1    dyoung 
    173  1.1    dyoung 	KASSERT(n <= FATP_MAX / 2);
    174  1.1    dyoung 
    175  1.6    dyoung 	fat->hash = fat_hash;
    176  1.6    dyoung 	fat->base = fat_base;
    177  1.1    dyoung 
    178  1.1    dyoung 	fat->port = &fat->hash[m];
    179  1.1    dyoung 
    180  1.1    dyoung 	fat->mask   = m - 1;	// ASSERT is power of 2 (m)
    181  1.1    dyoung 	fat->lim    = fat->base + 2*n - 1;
    182  1.1    dyoung 	fat->nfree  = 0;
    183  1.1    dyoung 	fat->nalloc = 2*n;
    184  1.1    dyoung 
    185  1.1    dyoung 	/* Initialise the free list.
    186  1.1    dyoung 	 */
    187  1.1    dyoung 	for (fp = fat->lim; fp >= fat->base; --fp) {
    188  1.1    dyoung 		fatp_free(fat, fp);
    189  1.1    dyoung 	}
    190  1.1    dyoung }
    191  1.1    dyoung 
    192  1.1    dyoung /*
    193  1.1    dyoung  * The `xtra' is XORed into the tag stored.
    194  1.1    dyoung  */
    195  1.1    dyoung static uint32_t fatp_xtra[] = {
    196  1.1    dyoung 	0x11111111,0x22222222,0x33333333,0x44444444,
    197  1.1    dyoung 	0x55555555,0x66666666,0x77777777,0x88888888,
    198  1.1    dyoung 	0x12121212,0x21212121,0x34343434,0x43434343,
    199  1.1    dyoung 	0x56565656,0x65656565,0x78787878,0x87878787,
    200  1.1    dyoung 	0x11221122,0x22112211,0x33443344,0x44334433,
    201  1.1    dyoung 	0x55665566,0x66556655,0x77887788,0x88778877,
    202  1.1    dyoung 	0x11112222,0x22221111,0x33334444,0x44443333,
    203  1.1    dyoung 	0x55556666,0x66665555,0x77778888,0x88887777,
    204  1.1    dyoung };
    205  1.1    dyoung 
    206  1.1    dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
    207  1.1    dyoung  *
    208  1.1    dyoung  * The key can be used to obtain the fatp_t, and the slot,
    209  1.1    dyoung  * as it directly encodes them.
    210  1.1    dyoung  */
    211  1.1    dyoung static inline uint32_t
    212  1.1    dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
    213  1.1    dyoung {
    214  1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    215  1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    216  1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    217  1.1    dyoung 
    218  1.1    dyoung 	switch (fatp_ntags()) {
    219  1.1    dyoung 	case 7:
    220  1.1    dyoung 		return (fatp_index(fat, fp) << 3) | slot;
    221  1.1    dyoung 	case 15:
    222  1.1    dyoung 		return (fatp_index(fat, fp) << 4) | slot;
    223  1.1    dyoung 	case 31:
    224  1.1    dyoung 		return (fatp_index(fat, fp) << 5) | slot;
    225  1.1    dyoung 	default:
    226  1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    227  1.1    dyoung 		return ~0;
    228  1.1    dyoung 	}
    229  1.1    dyoung }
    230  1.1    dyoung 
    231  1.1    dyoung static inline uint32_t
    232  1.1    dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
    233  1.1    dyoung {
    234  1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    235  1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    236  1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    237  1.1    dyoung 
    238  1.1    dyoung 	switch (fatp_ntags()) {
    239  1.1    dyoung 	case 7:
    240  1.1    dyoung 		return key & 7;
    241  1.1    dyoung 	case 15:
    242  1.1    dyoung 		return key & 15;
    243  1.1    dyoung 	case 31:
    244  1.1    dyoung 		return key & 31;
    245  1.1    dyoung 	default:
    246  1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    247  1.1    dyoung 		return ~0;
    248  1.1    dyoung 	}
    249  1.1    dyoung }
    250  1.1    dyoung 
    251  1.1    dyoung static inline fatp_t *
    252  1.1    dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
    253  1.1    dyoung {
    254  1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    255  1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    256  1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    257  1.1    dyoung 
    258  1.1    dyoung 	switch (fatp_ntags()) {
    259  1.1    dyoung 	case 7:
    260  1.1    dyoung 		key >>= 3;
    261  1.1    dyoung 		break;
    262  1.1    dyoung 	case 15:
    263  1.1    dyoung 		key >>= 4;
    264  1.1    dyoung 		break;
    265  1.1    dyoung 	case 31:
    266  1.1    dyoung 		key >>= 5;
    267  1.1    dyoung 		break;
    268  1.1    dyoung 	default:
    269  1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    270  1.1    dyoung 		return 0;
    271  1.1    dyoung 	}
    272  1.1    dyoung 
    273  1.1    dyoung 	return key ? fat->base + key - 1 : 0;
    274  1.1    dyoung }
    275  1.1    dyoung 
    276  1.1    dyoung static inline uint32_t
    277  1.1    dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
    278  1.1    dyoung {
    279  1.1    dyoung 	return (idx << ctl->idx_bits) | idx;
    280  1.1    dyoung }
    281  1.1    dyoung 
    282  1.1    dyoung static inline uint32_t
    283  1.1    dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
    284  1.1    dyoung {
    285  1.1    dyoung 	uint32_t	idx	= bits & ctl->idx_mask;
    286  1.1    dyoung 
    287  1.1    dyoung 	if (idx_encode(ctl, idx) == bits)
    288  1.1    dyoung 		return idx;
    289  1.1    dyoung 	else
    290  1.1    dyoung 		return ~0;
    291  1.1    dyoung }
    292  1.1    dyoung 
    293  1.1    dyoung /*!\brief	insert index into fatp hash
    294  1.1    dyoung  *
    295  1.1    dyoung  *\param	idx	-	index of element being placed in hash chain
    296  1.1    dyoung  *\param	tag	-	32-bit tag identifier
    297  1.1    dyoung  *
    298  1.1    dyoung  *\returns
    299  1.1    dyoung  *	value which can be used to locate entry.
    300  1.1    dyoung  *
    301  1.1    dyoung  *\note
    302  1.1    dyoung  *	we rely on the fact that there are unused high bits in the index
    303  1.1    dyoung  *	for verification purposes on lookup.
    304  1.1    dyoung  */
    305  1.1    dyoung 
    306  1.1    dyoung static inline uint32_t
    307  1.1    dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
    308  1.1    dyoung     void *dbg)
    309  1.1    dyoung {
    310  1.1    dyoung 	fatp_t	*fp;
    311  1.1    dyoung 	fatp_t	**hash = (which ? fat->port : fat->hash);
    312  1.1    dyoung 	int	i;
    313  1.1    dyoung 
    314  1.1    dyoung 	fp = hash[tag & fat->mask];
    315  1.1    dyoung 
    316  1.1    dyoung 	while (!fp || fatp_full(fp)) {
    317  1.1    dyoung 		fatp_t	*fq;
    318  1.1    dyoung 
    319  1.1    dyoung 		/* All entries are inuse at the top level.
    320  1.1    dyoung 		 * We allocate a spare, and push the top level
    321  1.1    dyoung 		 * down one.  All entries in the fp we push down
    322  1.1    dyoung 		 * (think of a tape worm here) will be expelled sooner than
    323  1.1    dyoung 		 * any entries added subsequently to this hash bucket.
    324  1.1    dyoung 		 * This is a property of the time waits we are exploiting.
    325  1.1    dyoung 		 */
    326  1.1    dyoung 
    327  1.1    dyoung 		fq = fatp_alloc(fat);
    328  1.1    dyoung 		if (!fq) {
    329  1.1    dyoung 			vtw_age(fat->vtw, 0);
    330  1.1    dyoung 			fp = hash[tag & fat->mask];
    331  1.1    dyoung 			continue;
    332  1.1    dyoung 		}
    333  1.1    dyoung 
    334  1.1    dyoung 		fq->inuse = 0;
    335  1.1    dyoung 		fq->nxt   = fatp_index(fat, fp);
    336  1.1    dyoung 
    337  1.1    dyoung 		hash[tag & fat->mask] = fq;
    338  1.1    dyoung 
    339  1.1    dyoung 		fp = fq;
    340  1.1    dyoung 	}
    341  1.1    dyoung 
    342  1.1    dyoung 	KASSERT(!fatp_full(fp));
    343  1.1    dyoung 
    344  1.1    dyoung 	/* Fill highest index first.  Lookup is lowest first.
    345  1.1    dyoung 	 */
    346  1.1    dyoung 	for (i = fatp_ntags(); --i >= 0; ) {
    347  1.1    dyoung 		if (!((1 << i) & fp->inuse)) {
    348  1.1    dyoung 			break;
    349  1.1    dyoung 		}
    350  1.1    dyoung 	}
    351  1.1    dyoung 
    352  1.1    dyoung 	fp->inuse |= 1 << i;
    353  1.1    dyoung 	fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
    354  1.1    dyoung 
    355  1.1    dyoung 	db_trace(KTR_VTW
    356  1.1    dyoung 		 , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
    357  1.1    dyoung 		    , fp->inuse
    358  1.1    dyoung 		    , i, fp->tag[i]));
    359  1.1    dyoung 
    360  1.1    dyoung 	return fatp_key(fat, fp, i);
    361  1.1    dyoung }
    362  1.1    dyoung 
    363  1.1    dyoung static inline int
    364  1.1    dyoung vtw_alive(const vtw_t *vtw)
    365  1.1    dyoung {
    366  1.1    dyoung 	return vtw->hashed && vtw->expire.tv_sec;
    367  1.1    dyoung }
    368  1.1    dyoung 
    369  1.1    dyoung static inline uint32_t
    370  1.1    dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
    371  1.1    dyoung {
    372  1.1    dyoung 	if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
    373  1.1    dyoung 		return v4 - ctl->base.v4;
    374  1.1    dyoung 
    375  1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    376  1.1    dyoung 
    377  1.1    dyoung 	return ~0;
    378  1.1    dyoung }
    379  1.1    dyoung 
    380  1.1    dyoung static inline uint32_t
    381  1.1    dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
    382  1.1    dyoung {
    383  1.1    dyoung 	if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
    384  1.1    dyoung 		return v6 - ctl->base.v6;
    385  1.1    dyoung 
    386  1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    387  1.1    dyoung 
    388  1.1    dyoung 	return ~0;
    389  1.1    dyoung }
    390  1.1    dyoung 
    391  1.1    dyoung static inline uint32_t
    392  1.1    dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
    393  1.1    dyoung {
    394  1.1    dyoung 	if (ctl->clidx)
    395  1.1    dyoung 		ctl = ctl->ctl;
    396  1.1    dyoung 
    397  1.1    dyoung 	if (ctl->is_v4)
    398  1.1    dyoung 		return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
    399  1.1    dyoung 
    400  1.1    dyoung 	if (ctl->is_v6)
    401  1.1    dyoung 		return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
    402  1.1    dyoung 
    403  1.1    dyoung 	KASSERT(0 && "neither 4 nor 6.  most curious.");
    404  1.1    dyoung 
    405  1.1    dyoung 	return ~0;
    406  1.1    dyoung }
    407  1.1    dyoung 
    408  1.1    dyoung static inline vtw_t *
    409  1.1    dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
    410  1.1    dyoung {
    411  1.1    dyoung 	if (ctl->clidx)
    412  1.1    dyoung 		ctl = ctl->ctl;
    413  1.1    dyoung 
    414  1.1    dyoung 	/* See if the index looks like it might be an index.
    415  1.1    dyoung 	 * Bits on outside of the valid index bits is a give away.
    416  1.1    dyoung 	 */
    417  1.1    dyoung 	idx = idx_decode(ctl, idx);
    418  1.1    dyoung 
    419  1.1    dyoung 	if (idx == ~0) {
    420  1.1    dyoung 		return 0;
    421  1.1    dyoung 	} else if (ctl->is_v4) {
    422  1.1    dyoung 		vtw_v4_t	*vtw = ctl->base.v4 + idx;
    423  1.1    dyoung 
    424  1.1    dyoung 		return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
    425  1.1    dyoung 			? &vtw->common : 0;
    426  1.1    dyoung 	} else if (ctl->is_v6) {
    427  1.1    dyoung 		vtw_v6_t	*vtw = ctl->base.v6 + idx;
    428  1.1    dyoung 
    429  1.1    dyoung 		return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
    430  1.1    dyoung 			? &vtw->common : 0;
    431  1.1    dyoung 	} else {
    432  1.1    dyoung 		KASSERT(0 && "badness");
    433  1.1    dyoung 		return 0;
    434  1.1    dyoung 	}
    435  1.1    dyoung }
    436  1.1    dyoung 
    437  1.1    dyoung /*!\brief return the next vtw after this one.
    438  1.1    dyoung  *
    439  1.1    dyoung  * Due to the differing sizes of the entries in differing
    440  1.1    dyoung  * arenas, we have to ensure we ++ the correct pointer type.
    441  1.1    dyoung  *
    442  1.1    dyoung  * Also handles wrap.
    443  1.1    dyoung  */
    444  1.1    dyoung static inline vtw_t *
    445  1.1    dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
    446  1.1    dyoung {
    447  1.1    dyoung 	if (ctl->is_v4) {
    448  1.1    dyoung 		vtw_v4_t	*v4 = (void*)vtw;
    449  1.1    dyoung 
    450  1.1    dyoung 		vtw = &(++v4)->common;
    451  1.1    dyoung 	} else {
    452  1.1    dyoung 		vtw_v6_t	*v6 = (void*)vtw;
    453  1.1    dyoung 
    454  1.1    dyoung 		vtw = &(++v6)->common;
    455  1.1    dyoung 	}
    456  1.1    dyoung 
    457  1.1    dyoung 	if (vtw > ctl->lim.v)
    458  1.1    dyoung 		vtw = ctl->base.v;
    459  1.1    dyoung 
    460  1.1    dyoung 	return vtw;
    461  1.1    dyoung }
    462  1.1    dyoung 
    463  1.1    dyoung /*!\brief	remove entry from FATP hash chains
    464  1.1    dyoung  */
    465  1.1    dyoung static inline void
    466  1.1    dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
    467  1.1    dyoung {
    468  1.1    dyoung 	fatp_ctl_t	*fat	= ctl->fat;
    469  1.1    dyoung 	fatp_t		*fp;
    470  1.1    dyoung 	uint32_t	key = vtw->key;
    471  1.1    dyoung 	uint32_t	tag, slot, idx;
    472  1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    473  1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
    474  1.1    dyoung 
    475  1.1    dyoung 	if (!vtw->hashed) {
    476  1.1    dyoung 		KASSERT(0 && "unhashed");
    477  1.1    dyoung 		return;
    478  1.1    dyoung 	}
    479  1.1    dyoung 
    480  1.1    dyoung 	if (fat->vtw->is_v4) {
    481  1.1    dyoung 		tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
    482  1.1    dyoung 	} else if (fat->vtw->is_v6) {
    483  1.1    dyoung 		tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
    484  1.1    dyoung 	} else {
    485  1.1    dyoung 		tag = 0;
    486  1.1    dyoung 		KASSERT(0 && "not reached");
    487  1.1    dyoung 	}
    488  1.1    dyoung 
    489  1.1    dyoung 	/* Remove from fat->hash[]
    490  1.1    dyoung 	 */
    491  1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    492  1.1    dyoung 	fp   = fatp_from_key(fat, key);
    493  1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    494  1.1    dyoung 
    495  1.1    dyoung 	db_trace(KTR_VTW
    496  1.1    dyoung 		 , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
    497  1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    498  1.1    dyoung 
    499  1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    500  1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    501  1.1    dyoung 				  ^ fatp_xtra[slot]));
    502  1.1    dyoung 
    503  1.1    dyoung 	if ((fp->inuse & (1 << slot))
    504  1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    505  1.1    dyoung 				 ^ fatp_xtra[slot])) {
    506  1.1    dyoung 		fp->inuse ^= 1 << slot;
    507  1.1    dyoung 		fp->tag[slot] = 0;
    508  1.1    dyoung 
    509  1.1    dyoung 		/* When we delete entries, we do not compact.  This is
    510  1.1    dyoung 		 * due to temporality.  We add entries, and they
    511  1.1    dyoung 		 * (eventually) expire. Older entries will be further
    512  1.1    dyoung 		 * down the chain.
    513  1.1    dyoung 		 */
    514  1.1    dyoung 		if (!fp->inuse) {
    515  1.1    dyoung 			uint32_t hi = tag & fat->mask;
    516  1.1    dyoung 			fatp_t	*fq = 0;
    517  1.1    dyoung 			fatp_t	*fr = fat->hash[hi];
    518  1.1    dyoung 
    519  1.1    dyoung 			while (fr && fr != fp) {
    520  1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    521  1.1    dyoung 			}
    522  1.1    dyoung 
    523  1.1    dyoung 			if (fr == fp) {
    524  1.1    dyoung 				if (fq) {
    525  1.1    dyoung 					fq->nxt = fp->nxt;
    526  1.1    dyoung 					fp->nxt = 0;
    527  1.1    dyoung 					fatp_free(fat, fp);
    528  1.1    dyoung 				} else {
    529  1.1    dyoung 					KASSERT(fat->hash[hi] == fp);
    530  1.1    dyoung 
    531  1.1    dyoung 					if (fp->nxt) {
    532  1.1    dyoung 						fat->hash[hi]
    533  1.1    dyoung 							= fatp_next(fat, fp);
    534  1.1    dyoung 						fp->nxt = 0;
    535  1.1    dyoung 						fatp_free(fat, fp);
    536  1.1    dyoung 					} else {
    537  1.1    dyoung 						/* retain for next use.
    538  1.1    dyoung 						 */
    539  1.1    dyoung 						;
    540  1.1    dyoung 					}
    541  1.1    dyoung 				}
    542  1.1    dyoung 			} else {
    543  1.1    dyoung 				fr = fat->hash[hi];
    544  1.1    dyoung 
    545  1.1    dyoung 				do {
    546  1.1    dyoung 					db_trace(KTR_VTW
    547  1.1    dyoung 						 , (fr
    548  1.1    dyoung 						    , "fat:*del inuse %5.5x"
    549  1.1    dyoung 						    " nxt %x"
    550  1.1    dyoung 						    , fr->inuse, fr->nxt));
    551  1.1    dyoung 
    552  1.1    dyoung 					fr = fatp_next(fat, fq = fr);
    553  1.1    dyoung 				} while (fr && fr != fp);
    554  1.1    dyoung 
    555  1.1    dyoung 				KASSERT(0 && "oops");
    556  1.1    dyoung 			}
    557  1.1    dyoung 		}
    558  1.1    dyoung 		vtw->key ^= ~0;
    559  1.1    dyoung 	}
    560  1.1    dyoung 
    561  1.1    dyoung 	if (fat->vtw->is_v4) {
    562  1.1    dyoung 		tag = v4_port_tag(v4->lport);
    563  1.1    dyoung 	} else if (fat->vtw->is_v6) {
    564  1.1    dyoung 		tag = v6_port_tag(v6->lport);
    565  1.1    dyoung 	}
    566  1.1    dyoung 
    567  1.1    dyoung 	/* Remove from fat->port[]
    568  1.1    dyoung 	 */
    569  1.1    dyoung 	key  = vtw->port_key;
    570  1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    571  1.1    dyoung 	fp   = fatp_from_key(fat, key);
    572  1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    573  1.1    dyoung 
    574  1.1    dyoung 	db_trace(KTR_VTW
    575  1.1    dyoung 		 , (fp, "fatport: del inuse %5.5x"
    576  1.1    dyoung 		    " slot %x idx %x key %x tag %x"
    577  1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    578  1.1    dyoung 
    579  1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    580  1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    581  1.1    dyoung 				  ^ fatp_xtra[slot]));
    582  1.1    dyoung 
    583  1.1    dyoung 	if ((fp->inuse & (1 << slot))
    584  1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    585  1.1    dyoung 				 ^ fatp_xtra[slot])) {
    586  1.1    dyoung 		fp->inuse ^= 1 << slot;
    587  1.1    dyoung 		fp->tag[slot] = 0;
    588  1.1    dyoung 
    589  1.1    dyoung 		if (!fp->inuse) {
    590  1.1    dyoung 			uint32_t hi = tag & fat->mask;
    591  1.1    dyoung 			fatp_t	*fq = 0;
    592  1.1    dyoung 			fatp_t	*fr = fat->port[hi];
    593  1.1    dyoung 
    594  1.1    dyoung 			while (fr && fr != fp) {
    595  1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    596  1.1    dyoung 			}
    597  1.1    dyoung 
    598  1.1    dyoung 			if (fr == fp) {
    599  1.1    dyoung 				if (fq) {
    600  1.1    dyoung 					fq->nxt = fp->nxt;
    601  1.1    dyoung 					fp->nxt = 0;
    602  1.1    dyoung 					fatp_free(fat, fp);
    603  1.1    dyoung 				} else {
    604  1.1    dyoung 					KASSERT(fat->port[hi] == fp);
    605  1.1    dyoung 
    606  1.1    dyoung 					if (fp->nxt) {
    607  1.1    dyoung 						fat->port[hi]
    608  1.1    dyoung 							= fatp_next(fat, fp);
    609  1.1    dyoung 						fp->nxt = 0;
    610  1.1    dyoung 						fatp_free(fat, fp);
    611  1.1    dyoung 					} else {
    612  1.1    dyoung 						/* retain for next use.
    613  1.1    dyoung 						 */
    614  1.1    dyoung 						;
    615  1.1    dyoung 					}
    616  1.1    dyoung 				}
    617  1.1    dyoung 			}
    618  1.1    dyoung 		}
    619  1.1    dyoung 		vtw->port_key ^= ~0;
    620  1.1    dyoung 	}
    621  1.1    dyoung 
    622  1.1    dyoung 	vtw->hashed = 0;
    623  1.1    dyoung }
    624  1.1    dyoung 
    625  1.1    dyoung /*!\brief	remove entry from hash, possibly free.
    626  1.1    dyoung  */
    627  1.1    dyoung void
    628  1.1    dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
    629  1.1    dyoung {
    630  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    631  1.1    dyoung 
    632  1.1    dyoung 	if (vtw->hashed) {
    633  1.1    dyoung 		++vtw_stats.del;
    634  1.1    dyoung 		vtw_unhash(ctl, vtw);
    635  1.1    dyoung 	}
    636  1.1    dyoung 
    637  1.1    dyoung 	/* We only delete the oldest entry.
    638  1.1    dyoung 	 */
    639  1.1    dyoung 	if (vtw != ctl->oldest.v)
    640  1.1    dyoung 		return;
    641  1.1    dyoung 
    642  1.1    dyoung 	--ctl->nalloc;
    643  1.1    dyoung 	++ctl->nfree;
    644  1.1    dyoung 
    645  1.1    dyoung 	vtw->expire.tv_sec  = 0;
    646  1.1    dyoung 	vtw->expire.tv_usec = ~0;
    647  1.1    dyoung 
    648  1.1    dyoung 	if (!ctl->nalloc)
    649  1.1    dyoung 		ctl->oldest.v = 0;
    650  1.1    dyoung 
    651  1.1    dyoung 	ctl->oldest.v = vtw_next(ctl, vtw);
    652  1.1    dyoung }
    653  1.1    dyoung 
    654  1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    655  1.1    dyoung  */
    656  1.1    dyoung static void
    657  1.1    dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
    658  1.1    dyoung {
    659  1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    660  1.1    dyoung 	uint32_t	tag;
    661  1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    662  1.1    dyoung 
    663  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    664  1.1    dyoung 	KASSERT(!vtw->hashed);
    665  1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    666  1.1    dyoung 
    667  1.1    dyoung 	++vtw_stats.ins;
    668  1.1    dyoung 
    669  1.1    dyoung 	tag = v4_tag(v4->faddr, v4->fport,
    670  1.1    dyoung 		     v4->laddr, v4->lport);
    671  1.1    dyoung 
    672  1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    673  1.1    dyoung 
    674  1.1    dyoung 	db_trace(KTR_VTW, (ctl
    675  1.1    dyoung 			   , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
    676  1.1    dyoung 			   " tag %8.8x key %8.8x"
    677  1.1    dyoung 			   , v4->faddr, v4->fport
    678  1.1    dyoung 			   , v4->laddr, v4->lport
    679  1.1    dyoung 			   , tag
    680  1.1    dyoung 			   , vtw->key));
    681  1.1    dyoung 
    682  1.1    dyoung 	tag = v4_port_tag(v4->lport);
    683  1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    684  1.1    dyoung 
    685  1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    686  1.1    dyoung 			   , v4->lport, v4->lport
    687  1.1    dyoung 			   , tag
    688  1.1    dyoung 			   , vtw->key));
    689  1.1    dyoung 
    690  1.1    dyoung 	vtw->hashed = 1;
    691  1.1    dyoung }
    692  1.1    dyoung 
    693  1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    694  1.1    dyoung  */
    695  1.1    dyoung static void
    696  1.1    dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
    697  1.1    dyoung {
    698  1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    699  1.1    dyoung 	uint32_t	tag;
    700  1.1    dyoung 	vtw_v6_t	*v6	= (void*)vtw;
    701  1.1    dyoung 
    702  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    703  1.1    dyoung 	KASSERT(!vtw->hashed);
    704  1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    705  1.1    dyoung 
    706  1.1    dyoung 	++vtw_stats.ins;
    707  1.1    dyoung 
    708  1.1    dyoung 	tag = v6_tag(&v6->faddr, v6->fport,
    709  1.1    dyoung 		     &v6->laddr, v6->lport);
    710  1.1    dyoung 
    711  1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    712  1.1    dyoung 
    713  1.1    dyoung 	tag = v6_port_tag(v6->lport);
    714  1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    715  1.1    dyoung 
    716  1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    717  1.1    dyoung 			   , v6->lport, v6->lport
    718  1.1    dyoung 			   , tag
    719  1.1    dyoung 			   , vtw->key));
    720  1.1    dyoung 
    721  1.1    dyoung 	vtw->hashed = 1;
    722  1.1    dyoung }
    723  1.1    dyoung 
    724  1.1    dyoung static vtw_t *
    725  1.1    dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
    726  1.1    dyoung 				 , uint32_t laddr, uint16_t lport
    727  1.1    dyoung 				 , int which)
    728  1.1    dyoung {
    729  1.1    dyoung 	vtw_v4_t	*v4;
    730  1.1    dyoung 	vtw_t		*vtw;
    731  1.1    dyoung 	uint32_t	tag;
    732  1.1    dyoung 	fatp_t		*fp;
    733  1.1    dyoung 	int		i;
    734  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    735  1.1    dyoung 
    736  1.1    dyoung 	if (!ctl || !ctl->fat)
    737  1.1    dyoung 		return 0;
    738  1.1    dyoung 
    739  1.1    dyoung 	++vtw_stats.look[which];
    740  1.1    dyoung 
    741  1.1    dyoung 	if (which) {
    742  1.1    dyoung 		tag = v4_port_tag(lport);
    743  1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    744  1.1    dyoung 	} else {
    745  1.1    dyoung 		tag = v4_tag(faddr, fport, laddr, lport);
    746  1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    747  1.1    dyoung 	}
    748  1.1    dyoung 
    749  1.1    dyoung 	while (fp && fp->inuse) {
    750  1.1    dyoung 		uint32_t	inuse = fp->inuse;
    751  1.1    dyoung 
    752  1.1    dyoung 		++fatps;
    753  1.1    dyoung 
    754  1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    755  1.1    dyoung 			uint32_t	idx;
    756  1.1    dyoung 
    757  1.1    dyoung 			if (!(inuse & (1 << i)))
    758  1.1    dyoung 				continue;
    759  1.1    dyoung 
    760  1.1    dyoung 			inuse ^= 1 << i;
    761  1.1    dyoung 
    762  1.1    dyoung 			++probes;
    763  1.1    dyoung 			++vtw_stats.probe[which];
    764  1.1    dyoung 
    765  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    766  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    767  1.1    dyoung 
    768  1.1    dyoung 			if (!vtw) {
    769  1.1    dyoung 				/* Hopefully fast path.
    770  1.1    dyoung 				 */
    771  1.1    dyoung 				db_trace(KTR_VTW
    772  1.1    dyoung 					 , (fp, "vtw: fast %A:%P %A:%P"
    773  1.1    dyoung 					    " idx %x tag %x"
    774  1.1    dyoung 					    , faddr, fport
    775  1.1    dyoung 					    , laddr, lport
    776  1.1    dyoung 					    , idx, tag));
    777  1.1    dyoung 				continue;
    778  1.1    dyoung 			}
    779  1.1    dyoung 
    780  1.1    dyoung 			v4 = (void*)vtw;
    781  1.1    dyoung 
    782  1.1    dyoung 			/* The de-referencing of vtw is what we want to avoid.
    783  1.1    dyoung 			 * Losing.
    784  1.1    dyoung 			 */
    785  1.1    dyoung 			if (vtw_alive(vtw)
    786  1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    787  1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    788  1.1    dyoung 			    && (which
    789  1.1    dyoung 				|| (v4->faddr == faddr && v4->laddr == laddr
    790  1.1    dyoung 				    && v4->fport == fport))
    791  1.1    dyoung 			    && v4->lport == lport) {
    792  1.1    dyoung 				++vtw_stats.hit[which];
    793  1.1    dyoung 
    794  1.1    dyoung 				db_trace(KTR_VTW
    795  1.1    dyoung 					 , (fp, "vtw: hit %8.8x:%4.4x"
    796  1.1    dyoung 					    " %8.8x:%4.4x idx %x key %x"
    797  1.1    dyoung 					    , faddr, fport
    798  1.1    dyoung 					    , laddr, lport
    799  1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
    800  1.1    dyoung 
    801  1.1    dyoung 				KASSERT(vtw->hashed);
    802  1.1    dyoung 
    803  1.1    dyoung 				goto out;
    804  1.1    dyoung 			}
    805  1.1    dyoung 			++vtw_stats.losing[which];
    806  1.1    dyoung 			++losings;
    807  1.1    dyoung 
    808  1.1    dyoung 			if (vtw_alive(vtw)) {
    809  1.1    dyoung 				db_trace(KTR_VTW
    810  1.1    dyoung 					 , (fp, "vtw:!mis %8.8x:%4.4x"
    811  1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    812  1.1    dyoung 					    , faddr, fport
    813  1.1    dyoung 					    , laddr, lport
    814  1.1    dyoung 					    , fatp_key(ctl->fat, fp, i)
    815  1.1    dyoung 					    , v4_tag(faddr, fport
    816  1.1    dyoung 						     , laddr, lport)));
    817  1.1    dyoung 				db_trace(KTR_VTW
    818  1.1    dyoung 					 , (vtw, "vtw:!mis %8.8x:%4.4x"
    819  1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    820  1.1    dyoung 					    , v4->faddr, v4->fport
    821  1.1    dyoung 					    , v4->laddr, v4->lport
    822  1.1    dyoung 					    , vtw->key
    823  1.1    dyoung 					    , v4_tag(v4->faddr, v4->fport
    824  1.1    dyoung 						     , v4->laddr, v4->lport)));
    825  1.1    dyoung 
    826  1.1    dyoung 				if (vtw->key == fatp_key(ctl->fat, fp, i)) {
    827  1.1    dyoung 					db_trace(KTR_VTW
    828  1.1    dyoung 						 , (vtw, "vtw:!mis %8.8x:%4.4x"
    829  1.1    dyoung 						    " %8.8x:%4.4x key %x"
    830  1.1    dyoung 						    " which %x"
    831  1.1    dyoung 						    , v4->faddr, v4->fport
    832  1.1    dyoung 						    , v4->laddr, v4->lport
    833  1.1    dyoung 						    , vtw->key
    834  1.1    dyoung 						    , which));
    835  1.1    dyoung 
    836  1.1    dyoung 				} else {
    837  1.1    dyoung 					db_trace(KTR_VTW
    838  1.1    dyoung 						 , (vtw
    839  1.1    dyoung 						    , "vtw:!mis"
    840  1.1    dyoung 						    " key %8.8x != %8.8x"
    841  1.1    dyoung 						    " idx %x i %x which %x"
    842  1.1    dyoung 						    , vtw->key
    843  1.1    dyoung 						    , fatp_key(ctl->fat, fp, i)
    844  1.1    dyoung 						    , idx_decode(ctl, idx)
    845  1.1    dyoung 						    , i
    846  1.1    dyoung 						    , which));
    847  1.1    dyoung 				}
    848  1.1    dyoung 			} else {
    849  1.1    dyoung 				db_trace(KTR_VTW
    850  1.1    dyoung 					 , (fp
    851  1.1    dyoung 					    , "vtw:!mis free entry"
    852  1.1    dyoung 					    " idx %x vtw %p which %x"
    853  1.1    dyoung 					    , idx_decode(ctl, idx)
    854  1.1    dyoung 					    , vtw, which));
    855  1.1    dyoung 			}
    856  1.1    dyoung 		}
    857  1.1    dyoung 
    858  1.1    dyoung 		if (fp->nxt) {
    859  1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
    860  1.1    dyoung 		} else {
    861  1.1    dyoung 			break;
    862  1.1    dyoung 		}
    863  1.1    dyoung 	}
    864  1.1    dyoung 	++vtw_stats.miss[which];
    865  1.1    dyoung 	vtw = 0;
    866  1.1    dyoung out:
    867  1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
    868  1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
    869  1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
    870  1.1    dyoung 		vtw_stats.max_probe[which] = probes;
    871  1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
    872  1.1    dyoung 		vtw_stats.max_loss[which] = losings;
    873  1.1    dyoung 
    874  1.1    dyoung 	return vtw;
    875  1.1    dyoung }
    876  1.1    dyoung 
    877  1.1    dyoung static vtw_t *
    878  1.1    dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
    879  1.1    dyoung 				 , const struct in6_addr *laddr, uint16_t lport
    880  1.1    dyoung 				 , int which)
    881  1.1    dyoung {
    882  1.1    dyoung 	vtw_v6_t	*v6;
    883  1.1    dyoung 	vtw_t		*vtw;
    884  1.1    dyoung 	uint32_t	tag;
    885  1.1    dyoung 	fatp_t		*fp;
    886  1.1    dyoung 	int		i;
    887  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    888  1.1    dyoung 
    889  1.1    dyoung 	++vtw_stats.look[which];
    890  1.1    dyoung 
    891  1.1    dyoung 	if (!ctl || !ctl->fat)
    892  1.1    dyoung 		return 0;
    893  1.1    dyoung 
    894  1.1    dyoung 	if (which) {
    895  1.1    dyoung 		tag = v6_port_tag(lport);
    896  1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    897  1.1    dyoung 	} else {
    898  1.1    dyoung 		tag = v6_tag(faddr, fport, laddr, lport);
    899  1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    900  1.1    dyoung 	}
    901  1.1    dyoung 
    902  1.1    dyoung 	while (fp && fp->inuse) {
    903  1.1    dyoung 		uint32_t	inuse = fp->inuse;
    904  1.1    dyoung 
    905  1.1    dyoung 		++fatps;
    906  1.1    dyoung 
    907  1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    908  1.1    dyoung 			uint32_t	idx;
    909  1.1    dyoung 
    910  1.1    dyoung 			if (!(inuse & (1 << i)))
    911  1.1    dyoung 				continue;
    912  1.1    dyoung 
    913  1.1    dyoung 			inuse ^= 1 << i;
    914  1.1    dyoung 
    915  1.1    dyoung 			++probes;
    916  1.1    dyoung 			++vtw_stats.probe[which];
    917  1.1    dyoung 
    918  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    919  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    920  1.1    dyoung 
    921  1.1    dyoung 			db_trace(KTR_VTW
    922  1.1    dyoung 				 , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
    923  1.1    dyoung 				    , i
    924  1.1    dyoung 				    , db_store(faddr, sizeof (*faddr)), fport
    925  1.1    dyoung 				    , db_store(laddr, sizeof (*laddr)), lport
    926  1.1    dyoung 				    , idx_decode(ctl, idx)));
    927  1.1    dyoung 
    928  1.1    dyoung 			if (!vtw) {
    929  1.1    dyoung 				/* Hopefully fast path.
    930  1.1    dyoung 				 */
    931  1.1    dyoung 				continue;
    932  1.1    dyoung 			}
    933  1.1    dyoung 
    934  1.1    dyoung 			v6 = (void*)vtw;
    935  1.1    dyoung 
    936  1.1    dyoung 			if (vtw_alive(vtw)
    937  1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    938  1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    939  1.1    dyoung 			    && v6->lport == lport
    940  1.1    dyoung 			    && (which
    941  1.1    dyoung 				|| (v6->fport == fport
    942  1.1    dyoung 				    && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
    943  1.1    dyoung 				    && !bcmp(&v6->laddr, laddr
    944  1.1    dyoung 					     , sizeof (*laddr))))) {
    945  1.1    dyoung 				++vtw_stats.hit[which];
    946  1.1    dyoung 
    947  1.1    dyoung 				KASSERT(vtw->hashed);
    948  1.1    dyoung 				goto out;
    949  1.1    dyoung 			} else {
    950  1.1    dyoung 				++vtw_stats.losing[which];
    951  1.1    dyoung 				++losings;
    952  1.1    dyoung 			}
    953  1.1    dyoung 		}
    954  1.1    dyoung 
    955  1.1    dyoung 		if (fp->nxt) {
    956  1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
    957  1.1    dyoung 		} else {
    958  1.1    dyoung 			break;
    959  1.1    dyoung 		}
    960  1.1    dyoung 	}
    961  1.1    dyoung 	++vtw_stats.miss[which];
    962  1.1    dyoung 	vtw = 0;
    963  1.1    dyoung out:
    964  1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
    965  1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
    966  1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
    967  1.1    dyoung 		vtw_stats.max_probe[which] = probes;
    968  1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
    969  1.1    dyoung 		vtw_stats.max_loss[which] = losings;
    970  1.1    dyoung 
    971  1.1    dyoung 	return vtw;
    972  1.1    dyoung }
    973  1.1    dyoung 
    974  1.1    dyoung /*!\brief port iterator
    975  1.1    dyoung  */
    976  1.1    dyoung static vtw_t *
    977  1.1    dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
    978  1.1    dyoung {
    979  1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
    980  1.1    dyoung 	vtw_v4_t	*v4;
    981  1.1    dyoung 	vtw_t		*vtw;
    982  1.1    dyoung 	uint32_t	tag;
    983  1.1    dyoung 	uint16_t	lport = it->port;
    984  1.1    dyoung 	fatp_t		*fp;
    985  1.1    dyoung 	int		i;
    986  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    987  1.1    dyoung 
    988  1.1    dyoung 	tag = v4_port_tag(lport);
    989  1.1    dyoung 	if (!it->fp) {
    990  1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
    991  1.1    dyoung 		it->slot_idx = 0;
    992  1.1    dyoung 	}
    993  1.1    dyoung 	fp  = it->fp;
    994  1.1    dyoung 
    995  1.1    dyoung 	while (fp) {
    996  1.1    dyoung 		uint32_t	inuse = fp->inuse;
    997  1.1    dyoung 
    998  1.1    dyoung 		++fatps;
    999  1.1    dyoung 
   1000  1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1001  1.1    dyoung 			uint32_t	idx;
   1002  1.1    dyoung 
   1003  1.1    dyoung 			if (!(inuse & (1 << i)))
   1004  1.1    dyoung 				continue;
   1005  1.1    dyoung 
   1006  1.1    dyoung 			inuse &= ~0 << i;
   1007  1.1    dyoung 
   1008  1.1    dyoung 			if (i < it->slot_idx)
   1009  1.1    dyoung 				continue;
   1010  1.1    dyoung 
   1011  1.1    dyoung 			++vtw_stats.probe[1];
   1012  1.1    dyoung 			++probes;
   1013  1.1    dyoung 
   1014  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1015  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1016  1.1    dyoung 
   1017  1.1    dyoung 			if (!vtw) {
   1018  1.1    dyoung 				/* Hopefully fast path.
   1019  1.1    dyoung 				 */
   1020  1.1    dyoung 				continue;
   1021  1.1    dyoung 			}
   1022  1.1    dyoung 
   1023  1.1    dyoung 			v4 = (void*)vtw;
   1024  1.1    dyoung 
   1025  1.1    dyoung 			if (vtw_alive(vtw)
   1026  1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1027  1.1    dyoung 			    && v4->lport == lport) {
   1028  1.1    dyoung 				++vtw_stats.hit[1];
   1029  1.1    dyoung 
   1030  1.1    dyoung 				it->slot_idx = i + 1;
   1031  1.1    dyoung 
   1032  1.1    dyoung 				goto out;
   1033  1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1034  1.1    dyoung 				++vtw_stats.losing[1];
   1035  1.1    dyoung 				++losings;
   1036  1.1    dyoung 
   1037  1.1    dyoung 				db_trace(KTR_VTW
   1038  1.1    dyoung 					 , (vtw, "vtw:!mis"
   1039  1.1    dyoung 					    " port %8.8x:%4.4x %8.8x:%4.4x"
   1040  1.1    dyoung 					    " key %x port %x"
   1041  1.1    dyoung 					    , v4->faddr, v4->fport
   1042  1.1    dyoung 					    , v4->laddr, v4->lport
   1043  1.1    dyoung 					    , vtw->key
   1044  1.1    dyoung 					    , lport));
   1045  1.1    dyoung 			} else {
   1046  1.1    dyoung 				/* Really losing here.  We are coming
   1047  1.1    dyoung 				 * up with references to free entries.
   1048  1.1    dyoung 				 * Might find it better to use
   1049  1.1    dyoung 				 * traditional, or need another
   1050  1.1    dyoung 				 * add-hockery.  The other add-hockery
   1051  1.1    dyoung 				 * would be to pul more into into the
   1052  1.1    dyoung 				 * cache line to reject the false
   1053  1.1    dyoung 				 * hits.
   1054  1.1    dyoung 				 */
   1055  1.1    dyoung 				++vtw_stats.losing[1];
   1056  1.1    dyoung 				++losings;
   1057  1.1    dyoung 				db_trace(KTR_VTW
   1058  1.1    dyoung 					 , (fp, "vtw:!mis port %x"
   1059  1.1    dyoung 					    " - free entry idx %x vtw %p"
   1060  1.1    dyoung 					    , lport
   1061  1.1    dyoung 					    , idx_decode(ctl, idx)
   1062  1.1    dyoung 					    , vtw));
   1063  1.1    dyoung 			}
   1064  1.1    dyoung 		}
   1065  1.1    dyoung 
   1066  1.1    dyoung 		if (fp->nxt) {
   1067  1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1068  1.1    dyoung 			it->slot_idx = 0;
   1069  1.1    dyoung 		} else {
   1070  1.1    dyoung 			it->fp = 0;
   1071  1.1    dyoung 			break;
   1072  1.1    dyoung 		}
   1073  1.1    dyoung 	}
   1074  1.1    dyoung 	++vtw_stats.miss[1];
   1075  1.1    dyoung 
   1076  1.1    dyoung 	vtw = 0;
   1077  1.1    dyoung out:
   1078  1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1079  1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1080  1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1081  1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1082  1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1083  1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1084  1.1    dyoung 
   1085  1.1    dyoung 	return vtw;
   1086  1.1    dyoung }
   1087  1.1    dyoung 
   1088  1.1    dyoung /*!\brief port iterator
   1089  1.1    dyoung  */
   1090  1.1    dyoung static vtw_t *
   1091  1.1    dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
   1092  1.1    dyoung {
   1093  1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
   1094  1.1    dyoung 	vtw_v6_t	*v6;
   1095  1.1    dyoung 	vtw_t		*vtw;
   1096  1.1    dyoung 	uint32_t	tag;
   1097  1.1    dyoung 	uint16_t	lport = it->port;
   1098  1.1    dyoung 	fatp_t		*fp;
   1099  1.1    dyoung 	int		i;
   1100  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1101  1.1    dyoung 
   1102  1.1    dyoung 	tag = v6_port_tag(lport);
   1103  1.1    dyoung 	if (!it->fp) {
   1104  1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1105  1.1    dyoung 		it->slot_idx = 0;
   1106  1.1    dyoung 	}
   1107  1.1    dyoung 	fp  = it->fp;
   1108  1.1    dyoung 
   1109  1.1    dyoung 	while (fp) {
   1110  1.1    dyoung 		uint32_t	inuse = fp->inuse;
   1111  1.1    dyoung 
   1112  1.1    dyoung 		++fatps;
   1113  1.1    dyoung 
   1114  1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1115  1.1    dyoung 			uint32_t	idx;
   1116  1.1    dyoung 
   1117  1.1    dyoung 			if (!(inuse & (1 << i)))
   1118  1.1    dyoung 				continue;
   1119  1.1    dyoung 
   1120  1.1    dyoung 			inuse &= ~0 << i;
   1121  1.1    dyoung 
   1122  1.1    dyoung 			if (i < it->slot_idx)
   1123  1.1    dyoung 				continue;
   1124  1.1    dyoung 
   1125  1.1    dyoung 			++vtw_stats.probe[1];
   1126  1.1    dyoung 			++probes;
   1127  1.1    dyoung 
   1128  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1129  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1130  1.1    dyoung 
   1131  1.1    dyoung 			if (!vtw) {
   1132  1.1    dyoung 				/* Hopefully fast path.
   1133  1.1    dyoung 				 */
   1134  1.1    dyoung 				continue;
   1135  1.1    dyoung 			}
   1136  1.1    dyoung 
   1137  1.1    dyoung 			v6 = (void*)vtw;
   1138  1.1    dyoung 
   1139  1.1    dyoung 			db_trace(KTR_VTW
   1140  1.1    dyoung 				 , (vtw, "vtw: i %x idx %x fp->tag %x"
   1141  1.1    dyoung 				    " tag %x xtra %x"
   1142  1.1    dyoung 				    , i, idx_decode(ctl, idx)
   1143  1.1    dyoung 				    , fp->tag[i], tag, fatp_xtra[i]));
   1144  1.1    dyoung 
   1145  1.1    dyoung 			if (vtw_alive(vtw)
   1146  1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1147  1.1    dyoung 			    && v6->lport == lport) {
   1148  1.1    dyoung 				++vtw_stats.hit[1];
   1149  1.1    dyoung 
   1150  1.1    dyoung 				db_trace(KTR_VTW
   1151  1.1    dyoung 					 , (fp, "vtw: nxt port %P - %4.4x"
   1152  1.1    dyoung 					    " idx %x key %x"
   1153  1.1    dyoung 					    , lport, lport
   1154  1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
   1155  1.1    dyoung 
   1156  1.1    dyoung 				it->slot_idx = i + 1;
   1157  1.1    dyoung 				goto out;
   1158  1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1159  1.1    dyoung 				++vtw_stats.losing[1];
   1160  1.1    dyoung 
   1161  1.1    dyoung 				db_trace(KTR_VTW
   1162  1.1    dyoung 					 , (vtw, "vtw:!mis port %6A:%4.4x"
   1163  1.1    dyoung 					    " %6A:%4.4x key %x port %x"
   1164  1.1    dyoung 					    , db_store(&v6->faddr
   1165  1.1    dyoung 						       , sizeof (v6->faddr))
   1166  1.1    dyoung 					    , v6->fport
   1167  1.1    dyoung 					    , db_store(&v6->laddr
   1168  1.1    dyoung 						       , sizeof (v6->faddr))
   1169  1.1    dyoung 					    , v6->lport
   1170  1.1    dyoung 					    , vtw->key
   1171  1.1    dyoung 					    , lport));
   1172  1.1    dyoung 			} else {
   1173  1.1    dyoung 				/* Really losing here.  We are coming
   1174  1.1    dyoung 				 * up with references to free entries.
   1175  1.1    dyoung 				 * Might find it better to use
   1176  1.1    dyoung 				 * traditional, or need another
   1177  1.1    dyoung 				 * add-hockery.  The other add-hockery
   1178  1.1    dyoung 				 * would be to pul more into into the
   1179  1.1    dyoung 				 * cache line to reject the false
   1180  1.1    dyoung 				 * hits.
   1181  1.1    dyoung 				 */
   1182  1.1    dyoung 				++vtw_stats.losing[1];
   1183  1.1    dyoung 				++losings;
   1184  1.1    dyoung 
   1185  1.1    dyoung 				db_trace(KTR_VTW
   1186  1.1    dyoung 					 , (fp
   1187  1.1    dyoung 					    , "vtw:!mis port %x"
   1188  1.1    dyoung 					    " - free entry idx %x vtw %p"
   1189  1.1    dyoung 					    , lport, idx_decode(ctl, idx)
   1190  1.1    dyoung 					    , vtw));
   1191  1.1    dyoung 			}
   1192  1.1    dyoung 		}
   1193  1.1    dyoung 
   1194  1.1    dyoung 		if (fp->nxt) {
   1195  1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1196  1.1    dyoung 			it->slot_idx = 0;
   1197  1.1    dyoung 		} else {
   1198  1.1    dyoung 			it->fp = 0;
   1199  1.1    dyoung 			break;
   1200  1.1    dyoung 		}
   1201  1.1    dyoung 	}
   1202  1.1    dyoung 	++vtw_stats.miss[1];
   1203  1.1    dyoung 
   1204  1.1    dyoung 	vtw = 0;
   1205  1.1    dyoung out:
   1206  1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1207  1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1208  1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1209  1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1210  1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1211  1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1212  1.1    dyoung 
   1213  1.1    dyoung 	return vtw;
   1214  1.1    dyoung }
   1215  1.1    dyoung 
   1216  1.1    dyoung /*!\brief initialise the VTW allocation arena
   1217  1.1    dyoung  *
   1218  1.1    dyoung  * There are 1+3 allocation classes:
   1219  1.1    dyoung  *	0	classless
   1220  1.1    dyoung  *	{1,2,3}	MSL-class based allocation
   1221  1.1    dyoung  *
   1222  1.1    dyoung  * The allocation arenas are all initialised.  Classless gets all the
   1223  1.1    dyoung  * space.  MSL-class based divides the arena, so that allocation
   1224  1.1    dyoung  * within a class can proceed without having to consider entries
   1225  1.1    dyoung  * (aka: cache lines) from different classes.
   1226  1.1    dyoung  *
   1227  1.1    dyoung  * Usually, we are completely classless or class-based, but there can be
   1228  1.1    dyoung  * transition periods, corresponding to dynamic adjustments in the config
   1229  1.1    dyoung  * by the operator.
   1230  1.1    dyoung  */
   1231  1.1    dyoung static void
   1232  1.6    dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
   1233  1.1    dyoung {
   1234  1.6    dyoung 	int class_n, i;
   1235  1.6    dyoung 	vtw_t	*base;
   1236  1.1    dyoung 
   1237  1.6    dyoung 	ctl->base.v = ctl_base_v;
   1238  1.1    dyoung 
   1239  1.6    dyoung 	if (ctl->is_v4) {
   1240  1.6    dyoung 		ctl->lim.v4    = ctl->base.v4 + n - 1;
   1241  1.6    dyoung 		ctl->alloc.v4  = ctl->base.v4;
   1242  1.6    dyoung 	} else {
   1243  1.6    dyoung 		ctl->lim.v6    = ctl->base.v6 + n - 1;
   1244  1.6    dyoung 		ctl->alloc.v6  = ctl->base.v6;
   1245  1.6    dyoung 	}
   1246  1.1    dyoung 
   1247  1.6    dyoung 	ctl->nfree  = n;
   1248  1.6    dyoung 	ctl->ctl    = ctl;
   1249  1.1    dyoung 
   1250  1.6    dyoung 	ctl->idx_bits = 32;
   1251  1.6    dyoung 	for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
   1252  1.6    dyoung 		ctl->idx_mask >>= 1;
   1253  1.6    dyoung 		ctl->idx_bits  -= 1;
   1254  1.6    dyoung 	}
   1255  1.1    dyoung 
   1256  1.6    dyoung 	ctl->idx_mask <<= 1;
   1257  1.6    dyoung 	ctl->idx_mask  |= 1;
   1258  1.6    dyoung 	ctl->idx_bits  += 1;
   1259  1.1    dyoung 
   1260  1.6    dyoung 	ctl->fat = fat;
   1261  1.6    dyoung 	fat->vtw = ctl;
   1262  1.1    dyoung 
   1263  1.6    dyoung 	/* Divide the resources equally amongst the classes.
   1264  1.6    dyoung 	 * This is not optimal, as the different classes
   1265  1.6    dyoung 	 * arrive and leave at different rates, but it is
   1266  1.6    dyoung 	 * the best I can do for now.
   1267  1.6    dyoung 	 */
   1268  1.6    dyoung 	class_n = n / (VTW_NCLASS-1);
   1269  1.6    dyoung 	base    = ctl->base.v;
   1270  1.1    dyoung 
   1271  1.6    dyoung 	for (i = 1; i < VTW_NCLASS; ++i) {
   1272  1.6    dyoung 		int j;
   1273  1.1    dyoung 
   1274  1.6    dyoung 		ctl[i] = ctl[0];
   1275  1.6    dyoung 		ctl[i].clidx = i;
   1276  1.1    dyoung 
   1277  1.6    dyoung 		ctl[i].base.v = base;
   1278  1.6    dyoung 		ctl[i].alloc  = ctl[i].base;
   1279  1.1    dyoung 
   1280  1.6    dyoung 		for (j = 0; j < class_n - 1; ++j) {
   1281  1.6    dyoung 			if (tcp_msl_enable)
   1282  1.6    dyoung 				base->msl_class = i;
   1283  1.1    dyoung 			base = vtw_next(ctl, base);
   1284  1.1    dyoung 		}
   1285  1.6    dyoung 
   1286  1.6    dyoung 		ctl[i].lim.v = base;
   1287  1.6    dyoung 		base = vtw_next(ctl, base);
   1288  1.6    dyoung 		ctl[i].nfree = class_n;
   1289  1.1    dyoung 	}
   1290  1.1    dyoung 
   1291  1.1    dyoung 	vtw_debug_init();
   1292  1.1    dyoung }
   1293  1.1    dyoung 
   1294  1.1    dyoung /*!\brief	map class to TCP MSL
   1295  1.1    dyoung  */
   1296  1.1    dyoung static inline uint32_t
   1297  1.1    dyoung class_to_msl(int class)
   1298  1.1    dyoung {
   1299  1.1    dyoung 	switch (class) {
   1300  1.1    dyoung 	case 0:
   1301  1.1    dyoung 	case 1:
   1302  1.1    dyoung 		return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
   1303  1.1    dyoung 	case 2:
   1304  1.1    dyoung 		return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
   1305  1.1    dyoung 	default:
   1306  1.1    dyoung 		return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
   1307  1.1    dyoung 	}
   1308  1.1    dyoung }
   1309  1.1    dyoung 
   1310  1.1    dyoung /*!\brief	map TCP MSL to class
   1311  1.1    dyoung  */
   1312  1.1    dyoung static inline uint32_t
   1313  1.1    dyoung msl_to_class(int msl)
   1314  1.1    dyoung {
   1315  1.1    dyoung 	if (tcp_msl_enable) {
   1316  1.1    dyoung 		if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
   1317  1.1    dyoung 			return 1+2;
   1318  1.1    dyoung 		if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
   1319  1.1    dyoung 			return 1+1;
   1320  1.1    dyoung 		return 1;
   1321  1.1    dyoung 	}
   1322  1.1    dyoung 	return 0;
   1323  1.1    dyoung }
   1324  1.1    dyoung 
   1325  1.1    dyoung /*!\brief allocate a vtw entry
   1326  1.1    dyoung  */
   1327  1.1    dyoung static inline vtw_t *
   1328  1.1    dyoung vtw_alloc(vtw_ctl_t *ctl)
   1329  1.1    dyoung {
   1330  1.1    dyoung 	vtw_t	*vtw	= 0;
   1331  1.1    dyoung 	int	stuck	= 0;
   1332  1.1    dyoung 	int	avail	= ctl ? (ctl->nalloc + ctl->nfree) : 0;
   1333  1.1    dyoung 	int	msl;
   1334  1.1    dyoung 
   1335  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1336  1.1    dyoung 
   1337  1.1    dyoung 	/* If no resources, we will not get far.
   1338  1.1    dyoung 	 */
   1339  1.1    dyoung 	if (!ctl || !ctl->base.v4 || avail <= 0)
   1340  1.1    dyoung 		return 0;
   1341  1.1    dyoung 
   1342  1.1    dyoung 	/* Obtain a free one.
   1343  1.1    dyoung 	 */
   1344  1.1    dyoung 	while (!ctl->nfree) {
   1345  1.1    dyoung 		vtw_age(ctl, 0);
   1346  1.1    dyoung 
   1347  1.1    dyoung 		if (++stuck > avail) {
   1348  1.1    dyoung 			/* When in transition between
   1349  1.1    dyoung 			 * schemes (classless, classed) we
   1350  1.1    dyoung 			 * can be stuck having to await the
   1351  1.1    dyoung 			 * expiration of cross-allocated entries.
   1352  1.1    dyoung 			 *
   1353  1.1    dyoung 			 * Returning zero means we will fall back to the
   1354  1.1    dyoung 			 * traditional TIME_WAIT handling, except in the
   1355  1.1    dyoung 			 * case of a re-shed, in which case we cannot
   1356  1.1    dyoung 			 * perform the reshecd, but will retain the extant
   1357  1.1    dyoung 			 * entry.
   1358  1.1    dyoung 			 */
   1359  1.1    dyoung 			db_trace(KTR_VTW
   1360  1.1    dyoung 				 , (ctl, "vtw:!none free in class %x %x/%x"
   1361  1.1    dyoung 				    , ctl->clidx
   1362  1.1    dyoung 				    , ctl->nalloc, ctl->nfree));
   1363  1.1    dyoung 
   1364  1.1    dyoung 			return 0;
   1365  1.1    dyoung 		}
   1366  1.1    dyoung 	}
   1367  1.1    dyoung 
   1368  1.1    dyoung 	vtw = ctl->alloc.v;
   1369  1.1    dyoung 
   1370  1.1    dyoung 	if (vtw->msl_class != ctl->clidx) {
   1371  1.1    dyoung 		/* Usurping rules:
   1372  1.1    dyoung 		 * 	0 -> {1,2,3} or {1,2,3} -> 0
   1373  1.1    dyoung 		 */
   1374  1.1    dyoung 		KASSERT(!vtw->msl_class || !ctl->clidx);
   1375  1.1    dyoung 
   1376  1.1    dyoung 		if (vtw->hashed || vtw->expire.tv_sec) {
   1377  1.1    dyoung 		    /* As this is owned by some other class,
   1378  1.1    dyoung 		     * we must wait for it to expire it.
   1379  1.1    dyoung 		     * This will only happen on class/classless
   1380  1.1    dyoung 		     * transitions, which are guaranteed to progress
   1381  1.1    dyoung 		     * to completion in small finite time, barring bugs.
   1382  1.1    dyoung 		     */
   1383  1.1    dyoung 		    db_trace(KTR_VTW
   1384  1.1    dyoung 			     , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
   1385  1.1    dyoung 				, vtw, vtw->msl_class, ctl->clidx
   1386  1.1    dyoung 				, vtw->expire.tv_sec
   1387  1.1    dyoung 				, vtw->expire.tv_usec
   1388  1.1    dyoung 				, vtw->hashed ? " hashed" : ""));
   1389  1.1    dyoung 
   1390  1.1    dyoung 		    return 0;
   1391  1.1    dyoung 		}
   1392  1.1    dyoung 
   1393  1.1    dyoung 		db_trace(KTR_VTW
   1394  1.1    dyoung 			 , (ctl, "vtw:!%p usurped from %x to %x"
   1395  1.1    dyoung 			    , vtw, vtw->msl_class, ctl->clidx));
   1396  1.1    dyoung 
   1397  1.1    dyoung 		vtw->msl_class = ctl->clidx;
   1398  1.1    dyoung 	}
   1399  1.1    dyoung 
   1400  1.1    dyoung 	if (vtw_alive(vtw)) {
   1401  1.1    dyoung 		KASSERT(0 && "next free not free");
   1402  1.1    dyoung 		return 0;
   1403  1.1    dyoung 	}
   1404  1.1    dyoung 
   1405  1.1    dyoung 	/* Advance allocation poiter.
   1406  1.1    dyoung 	 */
   1407  1.1    dyoung 	ctl->alloc.v = vtw_next(ctl, vtw);
   1408  1.1    dyoung 
   1409  1.1    dyoung 	--ctl->nfree;
   1410  1.1    dyoung 	++ctl->nalloc;
   1411  1.1    dyoung 
   1412  1.1    dyoung 	msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ;	// msec
   1413  1.1    dyoung 
   1414  1.1    dyoung 	/* mark expiration
   1415  1.1    dyoung 	 */
   1416  1.3  drochner 	getmicrouptime(&vtw->expire);
   1417  1.1    dyoung 
   1418  1.1    dyoung 	/* Move expiration into the future.
   1419  1.1    dyoung 	 */
   1420  1.1    dyoung 	vtw->expire.tv_sec  += msl / 1000;
   1421  1.1    dyoung 	vtw->expire.tv_usec += 1000 * (msl % 1000);
   1422  1.1    dyoung 
   1423  1.1    dyoung 	while (vtw->expire.tv_usec >= 1000*1000) {
   1424  1.1    dyoung 		vtw->expire.tv_usec -= 1000*1000;
   1425  1.1    dyoung 		vtw->expire.tv_sec  += 1;
   1426  1.1    dyoung 	}
   1427  1.1    dyoung 
   1428  1.1    dyoung 	if (!ctl->oldest.v)
   1429  1.1    dyoung 		ctl->oldest.v = vtw;
   1430  1.1    dyoung 
   1431  1.1    dyoung 	return vtw;
   1432  1.1    dyoung }
   1433  1.1    dyoung 
   1434  1.1    dyoung /*!\brief expiration
   1435  1.1    dyoung  */
   1436  1.1    dyoung static int
   1437  1.1    dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
   1438  1.1    dyoung {
   1439  1.1    dyoung 	vtw_t	*vtw;
   1440  1.1    dyoung 	struct timeval then, *when = _when;
   1441  1.1    dyoung 	int	maxtries = 0;
   1442  1.1    dyoung 
   1443  1.1    dyoung 	if (!ctl->oldest.v) {
   1444  1.1    dyoung 		KASSERT(!ctl->nalloc);
   1445  1.1    dyoung 		return 0;
   1446  1.1    dyoung 	}
   1447  1.1    dyoung 
   1448  1.1    dyoung 	for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
   1449  1.1    dyoung 		if (++maxtries > ctl->nalloc)
   1450  1.1    dyoung 			break;
   1451  1.1    dyoung 
   1452  1.1    dyoung 		if (vtw->msl_class != ctl->clidx) {
   1453  1.1    dyoung 			db_trace(KTR_VTW
   1454  1.1    dyoung 				 , (vtw, "vtw:!age class mismatch %x != %x"
   1455  1.1    dyoung 				    , vtw->msl_class, ctl->clidx));
   1456  1.1    dyoung 			/* XXXX
   1457  1.1    dyoung 			 * See if the appropriate action is to skip to the next.
   1458  1.1    dyoung 			 * XXXX
   1459  1.1    dyoung 			 */
   1460  1.1    dyoung 			ctl->oldest.v = vtw = vtw_next(ctl, vtw);
   1461  1.1    dyoung 			continue;
   1462  1.1    dyoung 		}
   1463  1.1    dyoung 		if (!when) {
   1464  1.1    dyoung 			/* Latch oldest timeval if none specified.
   1465  1.1    dyoung 			 */
   1466  1.1    dyoung 			then = vtw->expire;
   1467  1.1    dyoung 			when = &then;
   1468  1.1    dyoung 		}
   1469  1.1    dyoung 
   1470  1.1    dyoung 		if (!timercmp(&vtw->expire, when, <=))
   1471  1.1    dyoung 			break;
   1472  1.1    dyoung 
   1473  1.1    dyoung 		db_trace(KTR_VTW
   1474  1.1    dyoung 			 , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
   1475  1.1    dyoung 			    , ctl->clidx
   1476  1.1    dyoung 			    , vtw->expire.tv_sec
   1477  1.1    dyoung 			    , vtw->expire.tv_usec
   1478  1.1    dyoung 			    , ctl->nalloc
   1479  1.1    dyoung 			    , ctl->nfree));
   1480  1.1    dyoung 
   1481  1.1    dyoung 		if (!_when)
   1482  1.1    dyoung 			++vtw_stats.kill;
   1483  1.1    dyoung 
   1484  1.1    dyoung 		vtw_del(ctl, vtw);
   1485  1.1    dyoung 		vtw = ctl->oldest.v;
   1486  1.1    dyoung 	}
   1487  1.1    dyoung 
   1488  1.1    dyoung 	return ctl->nalloc;	// # remaining allocated
   1489  1.1    dyoung }
   1490  1.1    dyoung 
   1491  1.1    dyoung static callout_t vtw_cs;
   1492  1.1    dyoung 
   1493  1.1    dyoung /*!\brief notice the passage of time.
   1494  1.1    dyoung  * It seems to be getting faster.  What happened to the year?
   1495  1.1    dyoung  */
   1496  1.1    dyoung static void
   1497  1.1    dyoung vtw_tick(void *arg)
   1498  1.1    dyoung {
   1499  1.1    dyoung 	struct timeval now;
   1500  1.1    dyoung 	int i, cnt = 0;
   1501  1.1    dyoung 
   1502  1.3  drochner 	getmicrouptime(&now);
   1503  1.1    dyoung 
   1504  1.1    dyoung 	db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
   1505  1.1    dyoung 			   , now.tv_sec, now.tv_usec));
   1506  1.1    dyoung 
   1507  1.1    dyoung 	mutex_enter(softnet_lock);
   1508  1.1    dyoung 
   1509  1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   1510  1.1    dyoung 		cnt += vtw_age(&vtw_tcpv4[i], &now);
   1511  1.1    dyoung 		cnt += vtw_age(&vtw_tcpv6[i], &now);
   1512  1.1    dyoung 	}
   1513  1.1    dyoung 
   1514  1.1    dyoung 	/* Keep ticks coming while we need them.
   1515  1.1    dyoung 	 */
   1516  1.1    dyoung 	if (cnt)
   1517  1.1    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1518  1.1    dyoung 	else {
   1519  1.1    dyoung 		tcp_vtw_was_enabled = 0;
   1520  1.1    dyoung 		tcbtable.vestige    = 0;
   1521  1.1    dyoung 	}
   1522  1.1    dyoung 	mutex_exit(softnet_lock);
   1523  1.1    dyoung }
   1524  1.1    dyoung 
   1525  1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1526  1.1    dyoung  */
   1527  1.1    dyoung static void *
   1528  1.1    dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
   1529  1.1    dyoung {
   1530  1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
   1531  1.1    dyoung 
   1532  1.1    dyoung 	bzero(it, sizeof (*it));
   1533  1.1    dyoung 
   1534  1.1    dyoung 	/* Note: the reference to vtw_tcpv4[0] is fine.
   1535  1.1    dyoung 	 * We do not need per-class iteration.  We just
   1536  1.1    dyoung 	 * need to get to the fat, and there is one
   1537  1.1    dyoung 	 * shared fat.
   1538  1.1    dyoung 	 */
   1539  1.1    dyoung 	if (vtw_tcpv4[0].fat) {
   1540  1.1    dyoung 		it->addr.v4 = addr;
   1541  1.1    dyoung 		it->port = port;
   1542  1.1    dyoung 		it->wild = !!wild;
   1543  1.1    dyoung 		it->ctl  = &vtw_tcpv4[0];
   1544  1.1    dyoung 
   1545  1.1    dyoung 		++vtw_stats.look[1];
   1546  1.1    dyoung 	}
   1547  1.1    dyoung 
   1548  1.1    dyoung 	return it;
   1549  1.1    dyoung }
   1550  1.1    dyoung 
   1551  1.1    dyoung /*!\brief export an IPv4 vtw.
   1552  1.1    dyoung  */
   1553  1.1    dyoung static int
   1554  1.1    dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1555  1.1    dyoung {
   1556  1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
   1557  1.1    dyoung 
   1558  1.1    dyoung 	bzero(res, sizeof (*res));
   1559  1.1    dyoung 
   1560  1.1    dyoung 	if (ctl && vtw) {
   1561  1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1562  1.1    dyoung 			ctl += vtw->msl_class;
   1563  1.1    dyoung 		else
   1564  1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1565  1.1    dyoung 
   1566  1.1    dyoung 		res->valid = 1;
   1567  1.1    dyoung 		res->v4    = 1;
   1568  1.1    dyoung 
   1569  1.1    dyoung 		res->faddr.v4.s_addr = v4->faddr;
   1570  1.1    dyoung 		res->laddr.v4.s_addr = v4->laddr;
   1571  1.1    dyoung 		res->fport	= v4->fport;
   1572  1.1    dyoung 		res->lport	= v4->lport;
   1573  1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1574  1.1    dyoung 		res->ctl	= ctl;
   1575  1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1576  1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1577  1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1578  1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1579  1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1580  1.1    dyoung 		res->uid	= vtw->uid;
   1581  1.1    dyoung 	}
   1582  1.1    dyoung 
   1583  1.1    dyoung 	return res->valid;
   1584  1.1    dyoung }
   1585  1.1    dyoung 
   1586  1.1    dyoung /*!\brief return next port in the port iterator.  yowza.
   1587  1.1    dyoung  */
   1588  1.1    dyoung static int
   1589  1.1    dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
   1590  1.1    dyoung {
   1591  1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1592  1.1    dyoung 	vtw_t		*vtw = 0;
   1593  1.1    dyoung 
   1594  1.1    dyoung 	if (it->ctl)
   1595  1.1    dyoung 		vtw = vtw_next_port_v4(it);
   1596  1.1    dyoung 
   1597  1.1    dyoung 	if (!vtw)
   1598  1.1    dyoung 		it->ctl = 0;
   1599  1.1    dyoung 
   1600  1.1    dyoung 	return vtw_export_v4(it->ctl, vtw, res);
   1601  1.1    dyoung }
   1602  1.1    dyoung 
   1603  1.1    dyoung static int
   1604  1.1    dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
   1605  1.1    dyoung               struct in_addr laddr, uint16_t lport,
   1606  1.1    dyoung 	      struct vestigial_inpcb *res)
   1607  1.1    dyoung {
   1608  1.1    dyoung 	vtw_t		*vtw;
   1609  1.1    dyoung 	vtw_ctl_t	*ctl;
   1610  1.1    dyoung 
   1611  1.1    dyoung 
   1612  1.1    dyoung 	db_trace(KTR_VTW
   1613  1.1    dyoung 		 , (res, "vtw: lookup %A:%P %A:%P"
   1614  1.1    dyoung 		    , faddr, fport
   1615  1.1    dyoung 		    , laddr, lport));
   1616  1.1    dyoung 
   1617  1.1    dyoung 	vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
   1618  1.1    dyoung 				 , faddr.s_addr, fport
   1619  1.1    dyoung 				 , laddr.s_addr, lport, 0);
   1620  1.1    dyoung 
   1621  1.1    dyoung 	return vtw_export_v4(ctl, vtw, res);
   1622  1.1    dyoung }
   1623  1.1    dyoung 
   1624  1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1625  1.1    dyoung  */
   1626  1.1    dyoung static void *
   1627  1.1    dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
   1628  1.1    dyoung {
   1629  1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
   1630  1.1    dyoung 
   1631  1.1    dyoung 	bzero(it, sizeof (*it));
   1632  1.1    dyoung 
   1633  1.1    dyoung 	/* Note: the reference to vtw_tcpv6[0] is fine.
   1634  1.1    dyoung 	 * We do not need per-class iteration.  We just
   1635  1.1    dyoung 	 * need to get to the fat, and there is one
   1636  1.1    dyoung 	 * shared fat.
   1637  1.1    dyoung 	 */
   1638  1.1    dyoung 	if (vtw_tcpv6[0].fat) {
   1639  1.1    dyoung 		it->addr.v6 = *addr;
   1640  1.1    dyoung 		it->port = port;
   1641  1.1    dyoung 		it->wild = !!wild;
   1642  1.1    dyoung 		it->ctl  = &vtw_tcpv6[0];
   1643  1.1    dyoung 
   1644  1.1    dyoung 		++vtw_stats.look[1];
   1645  1.1    dyoung 	}
   1646  1.1    dyoung 
   1647  1.1    dyoung 	return it;
   1648  1.1    dyoung }
   1649  1.1    dyoung 
   1650  1.1    dyoung /*!\brief export an IPv6 vtw.
   1651  1.1    dyoung  */
   1652  1.1    dyoung static int
   1653  1.1    dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1654  1.1    dyoung {
   1655  1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
   1656  1.1    dyoung 
   1657  1.1    dyoung 	bzero(res, sizeof (*res));
   1658  1.1    dyoung 
   1659  1.1    dyoung 	if (ctl && vtw) {
   1660  1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1661  1.1    dyoung 			ctl += vtw->msl_class;
   1662  1.1    dyoung 		else
   1663  1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1664  1.1    dyoung 
   1665  1.1    dyoung 		res->valid = 1;
   1666  1.1    dyoung 		res->v4    = 0;
   1667  1.1    dyoung 
   1668  1.1    dyoung 		res->faddr.v6	= v6->faddr;
   1669  1.1    dyoung 		res->laddr.v6	= v6->laddr;
   1670  1.1    dyoung 		res->fport	= v6->fport;
   1671  1.1    dyoung 		res->lport	= v6->lport;
   1672  1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1673  1.1    dyoung 		res->ctl	= ctl;
   1674  1.1    dyoung 
   1675  1.1    dyoung 		res->v6only	= vtw->v6only;
   1676  1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1677  1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1678  1.1    dyoung 
   1679  1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1680  1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1681  1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1682  1.1    dyoung 		res->uid	= vtw->uid;
   1683  1.1    dyoung 	}
   1684  1.1    dyoung 
   1685  1.1    dyoung 	return res->valid;
   1686  1.1    dyoung }
   1687  1.1    dyoung 
   1688  1.1    dyoung static int
   1689  1.1    dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
   1690  1.1    dyoung {
   1691  1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1692  1.1    dyoung 	vtw_t		*vtw = 0;
   1693  1.1    dyoung 
   1694  1.1    dyoung 	if (it->ctl)
   1695  1.1    dyoung 		vtw = vtw_next_port_v6(it);
   1696  1.1    dyoung 
   1697  1.1    dyoung 	if (!vtw)
   1698  1.1    dyoung 		it->ctl = 0;
   1699  1.1    dyoung 
   1700  1.1    dyoung 	return vtw_export_v6(it->ctl, vtw, res);
   1701  1.1    dyoung }
   1702  1.1    dyoung 
   1703  1.1    dyoung static int
   1704  1.1    dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
   1705  1.1    dyoung               const struct in6_addr *laddr, uint16_t lport,
   1706  1.1    dyoung 	      struct vestigial_inpcb *res)
   1707  1.1    dyoung {
   1708  1.1    dyoung 	vtw_ctl_t	*ctl;
   1709  1.1    dyoung 	vtw_t		*vtw;
   1710  1.1    dyoung 
   1711  1.1    dyoung 	db_trace(KTR_VTW
   1712  1.1    dyoung 		 , (res, "vtw: lookup %6A:%P %6A:%P"
   1713  1.1    dyoung 		    , db_store(faddr, sizeof (*faddr)), fport
   1714  1.1    dyoung 		    , db_store(laddr, sizeof (*laddr)), lport));
   1715  1.1    dyoung 
   1716  1.1    dyoung 	vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
   1717  1.1    dyoung 				 , faddr, fport
   1718  1.1    dyoung 				 , laddr, lport, 0);
   1719  1.1    dyoung 
   1720  1.1    dyoung 	return vtw_export_v6(ctl, vtw, res);
   1721  1.1    dyoung }
   1722  1.1    dyoung 
   1723  1.1    dyoung static vestigial_hooks_t tcp_hooks = {
   1724  1.1    dyoung 	.init_ports4	= tcp_init_ports_v4,
   1725  1.1    dyoung 	.next_port4	= tcp_next_port_v4,
   1726  1.1    dyoung 	.lookup4	= tcp_lookup_v4,
   1727  1.1    dyoung 	.init_ports6	= tcp_init_ports_v6,
   1728  1.1    dyoung 	.next_port6	= tcp_next_port_v6,
   1729  1.1    dyoung 	.lookup6	= tcp_lookup_v6,
   1730  1.1    dyoung };
   1731  1.1    dyoung 
   1732  1.1    dyoung static bool
   1733  1.1    dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
   1734  1.1    dyoung {
   1735  1.1    dyoung 	fatp_ctl_t	*fat;
   1736  1.1    dyoung 	vtw_ctl_t	*ctl;
   1737  1.1    dyoung 
   1738  1.1    dyoung 	switch (af) {
   1739  1.1    dyoung 	case AF_INET:
   1740  1.1    dyoung 		fat = &fat_tcpv4;
   1741  1.1    dyoung 		ctl = &vtw_tcpv4[0];
   1742  1.1    dyoung 		break;
   1743  1.1    dyoung 	case AF_INET6:
   1744  1.1    dyoung 		fat = &fat_tcpv6;
   1745  1.1    dyoung 		ctl = &vtw_tcpv6[0];
   1746  1.1    dyoung 		break;
   1747  1.1    dyoung 	default:
   1748  1.1    dyoung 		return false;
   1749  1.1    dyoung 	}
   1750  1.1    dyoung 	if (fatp != NULL)
   1751  1.1    dyoung 		*fatp = fat;
   1752  1.1    dyoung 	if (ctlp != NULL)
   1753  1.1    dyoung 		*ctlp = ctl;
   1754  1.1    dyoung 	return true;
   1755  1.1    dyoung }
   1756  1.1    dyoung 
   1757  1.1    dyoung /*!\brief	initialize controlling instance
   1758  1.1    dyoung  */
   1759  1.1    dyoung static int
   1760  1.1    dyoung vtw_control_init(int af)
   1761  1.1    dyoung {
   1762  1.1    dyoung 	fatp_ctl_t	*fat;
   1763  1.1    dyoung 	vtw_ctl_t	*ctl;
   1764  1.6    dyoung 	fatp_t		*fat_base;
   1765  1.6    dyoung 	fatp_t		**fat_hash;
   1766  1.6    dyoung 	vtw_t		*ctl_base_v;
   1767  1.6    dyoung 	uint32_t	n, m;
   1768  1.6    dyoung 	size_t sz;
   1769  1.6    dyoung 
   1770  1.6    dyoung 	KASSERT(powerof2(tcp_vtw_entries));
   1771  1.1    dyoung 
   1772  1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1773  1.1    dyoung 		return EAFNOSUPPORT;
   1774  1.1    dyoung 
   1775  1.6    dyoung 	if (fat->hash != NULL) {
   1776  1.6    dyoung 		KASSERT(fat->base != NULL && ctl->base.v != NULL);
   1777  1.6    dyoung 		return 0;
   1778  1.6    dyoung 	}
   1779  1.6    dyoung 
   1780  1.6    dyoung 	/* Allocate 10% more capacity in the fat pointers.
   1781  1.6    dyoung 	 * We should only need ~#hash additional based on
   1782  1.6    dyoung 	 * how they age, but TIME_WAIT assassination could cause
   1783  1.6    dyoung 	 * sparse fat pointer utilisation.
   1784  1.6    dyoung 	 */
   1785  1.6    dyoung 	m = 512;
   1786  1.6    dyoung 	n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
   1787  1.6    dyoung 	sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
   1788  1.6    dyoung 
   1789  1.6    dyoung 	fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
   1790  1.6    dyoung 
   1791  1.6    dyoung 	if (fat_hash == NULL) {
   1792  1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1793  1.6    dyoung 		    "hash anchors", __func__, 2*m * sizeof(fatp_t *));
   1794  1.6    dyoung 		return ENOMEM;
   1795  1.6    dyoung 	}
   1796  1.1    dyoung 
   1797  1.6    dyoung 	fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
   1798  1.1    dyoung 
   1799  1.6    dyoung 	if (fat_base == NULL) {
   1800  1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1801  1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1802  1.6    dyoung 		    "fatp_t array", __func__, 2*n * sizeof(fatp_t));
   1803  1.6    dyoung 		return ENOMEM;
   1804  1.6    dyoung 	}
   1805  1.1    dyoung 
   1806  1.6    dyoung 	ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
   1807  1.1    dyoung 
   1808  1.6    dyoung 	if (ctl_base_v == NULL) {
   1809  1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1810  1.6    dyoung 		kmem_free(fat_base, 2*n * sizeof(fatp_t));
   1811  1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1812  1.6    dyoung 		    "vtw_t array", __func__, tcp_vtw_entries * sz);
   1813  1.6    dyoung 		return ENOMEM;
   1814  1.1    dyoung 	}
   1815  1.1    dyoung 
   1816  1.6    dyoung 	fatp_init(fat, n, m, fat_base, fat_hash);
   1817  1.1    dyoung 
   1818  1.6    dyoung 	vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
   1819  1.1    dyoung 
   1820  1.1    dyoung 	return 0;
   1821  1.1    dyoung }
   1822  1.1    dyoung 
   1823  1.1    dyoung /*!\brief	select controlling instance
   1824  1.1    dyoung  */
   1825  1.1    dyoung static vtw_ctl_t *
   1826  1.1    dyoung vtw_control(int af, uint32_t msl)
   1827  1.1    dyoung {
   1828  1.1    dyoung 	fatp_ctl_t	*fat;
   1829  1.1    dyoung 	vtw_ctl_t	*ctl;
   1830  1.1    dyoung 	int		class	= msl_to_class(msl);
   1831  1.1    dyoung 
   1832  1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1833  1.1    dyoung 		return NULL;
   1834  1.1    dyoung 
   1835  1.1    dyoung 	if (!fat->base || !ctl->base.v)
   1836  1.1    dyoung 		return NULL;
   1837  1.1    dyoung 
   1838  1.5    dyoung 	if (!tcp_vtw_was_enabled) {
   1839  1.5    dyoung 		/* This guarantees is timer ticks until we no longer need them.
   1840  1.5    dyoung 		 */
   1841  1.5    dyoung 		tcp_vtw_was_enabled = 1;
   1842  1.5    dyoung 
   1843  1.5    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1844  1.5    dyoung 
   1845  1.5    dyoung 		tcbtable.vestige = &tcp_hooks;
   1846  1.5    dyoung 	}
   1847  1.5    dyoung 
   1848  1.1    dyoung 	return ctl + class;
   1849  1.1    dyoung }
   1850  1.1    dyoung 
   1851  1.1    dyoung /*!\brief	add TCP pcb to vestigial timewait
   1852  1.1    dyoung  */
   1853  1.1    dyoung int
   1854  1.1    dyoung vtw_add(int af, struct tcpcb *tp)
   1855  1.1    dyoung {
   1856  1.1    dyoung 	int		enable;
   1857  1.1    dyoung 	vtw_ctl_t	*ctl;
   1858  1.1    dyoung 	vtw_t		*vtw;
   1859  1.1    dyoung 
   1860  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1861  1.1    dyoung 
   1862  1.1    dyoung 	ctl = vtw_control(af, tp->t_msl);
   1863  1.1    dyoung 	if (!ctl)
   1864  1.1    dyoung 		return 0;
   1865  1.1    dyoung 
   1866  1.1    dyoung 	enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
   1867  1.1    dyoung 
   1868  1.1    dyoung 	vtw = vtw_alloc(ctl);
   1869  1.1    dyoung 
   1870  1.1    dyoung 	if (vtw) {
   1871  1.1    dyoung 		vtw->snd_nxt = tp->snd_nxt;
   1872  1.1    dyoung 		vtw->rcv_nxt = tp->rcv_nxt;
   1873  1.1    dyoung 
   1874  1.1    dyoung 		switch (af) {
   1875  1.1    dyoung 		case AF_INET: {
   1876  1.1    dyoung 			struct inpcb	*inp = tp->t_inpcb;
   1877  1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   1878  1.1    dyoung 
   1879  1.1    dyoung 			v4->faddr = inp->inp_faddr.s_addr;
   1880  1.1    dyoung 			v4->laddr = inp->inp_laddr.s_addr;
   1881  1.1    dyoung 			v4->fport = inp->inp_fport;
   1882  1.1    dyoung 			v4->lport = inp->inp_lport;
   1883  1.1    dyoung 
   1884  1.1    dyoung 			vtw->reuse_port = !!(inp->inp_socket->so_options
   1885  1.1    dyoung 					     & SO_REUSEPORT);
   1886  1.1    dyoung 			vtw->reuse_addr = !!(inp->inp_socket->so_options
   1887  1.1    dyoung 					     & SO_REUSEADDR);
   1888  1.1    dyoung 			vtw->v6only	= 0;
   1889  1.1    dyoung 			vtw->uid	= inp->inp_socket->so_uidinfo->ui_uid;
   1890  1.1    dyoung 
   1891  1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   1892  1.1    dyoung 
   1893  1.1    dyoung 
   1894  1.1    dyoung #ifdef VTW_DEBUG
   1895  1.1    dyoung 			/* Immediate lookup (connected and port) to
   1896  1.1    dyoung 			 * ensure at least that works!
   1897  1.1    dyoung 			 */
   1898  1.1    dyoung 			if (enable & 4) {
   1899  1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1900  1.1    dyoung 					(ctl
   1901  1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1902  1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1903  1.1    dyoung 					 , 0)
   1904  1.1    dyoung 					== vtw);
   1905  1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1906  1.1    dyoung 					(ctl
   1907  1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1908  1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1909  1.1    dyoung 					 , 1));
   1910  1.1    dyoung 			}
   1911  1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   1912  1.1    dyoung 			 */
   1913  1.1    dyoung 			if (enable & 8) {
   1914  1.1    dyoung 				struct tcp_ports_iterator *it;
   1915  1.1    dyoung 				struct vestigial_inpcb res;
   1916  1.1    dyoung 				int cnt = 0;
   1917  1.1    dyoung 
   1918  1.1    dyoung 				it = tcp_init_ports_v4(inp->inp_laddr
   1919  1.1    dyoung 						       , inp->inp_lport, 0);
   1920  1.1    dyoung 
   1921  1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1922  1.1    dyoung 					++cnt;
   1923  1.1    dyoung 				}
   1924  1.1    dyoung 				KASSERT(cnt);
   1925  1.1    dyoung 			}
   1926  1.1    dyoung 			/* Immediate port iterator functionality check: wild
   1927  1.1    dyoung 			 */
   1928  1.1    dyoung 			if (enable & 16) {
   1929  1.1    dyoung 				struct tcp_ports_iterator *it;
   1930  1.1    dyoung 				struct vestigial_inpcb res;
   1931  1.1    dyoung 				struct in_addr any;
   1932  1.1    dyoung 				int cnt = 0;
   1933  1.1    dyoung 
   1934  1.1    dyoung 				any.s_addr = htonl(INADDR_ANY);
   1935  1.1    dyoung 
   1936  1.1    dyoung 				it = tcp_init_ports_v4(any, inp->inp_lport, 1);
   1937  1.1    dyoung 
   1938  1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1939  1.1    dyoung 					++cnt;
   1940  1.1    dyoung 				}
   1941  1.1    dyoung 				KASSERT(cnt);
   1942  1.1    dyoung 			}
   1943  1.1    dyoung #endif /* VTW_DEBUG */
   1944  1.1    dyoung 			break;
   1945  1.1    dyoung 		}
   1946  1.1    dyoung 
   1947  1.1    dyoung 		case AF_INET6: {
   1948  1.1    dyoung 			struct in6pcb	*inp = tp->t_in6pcb;
   1949  1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   1950  1.1    dyoung 
   1951  1.1    dyoung 			v6->faddr = inp->in6p_faddr;
   1952  1.1    dyoung 			v6->laddr = inp->in6p_laddr;
   1953  1.1    dyoung 			v6->fport = inp->in6p_fport;
   1954  1.1    dyoung 			v6->lport = inp->in6p_lport;
   1955  1.1    dyoung 
   1956  1.1    dyoung 			vtw->reuse_port = !!(inp->in6p_socket->so_options
   1957  1.1    dyoung 					     & SO_REUSEPORT);
   1958  1.1    dyoung 			vtw->reuse_addr = !!(inp->in6p_socket->so_options
   1959  1.1    dyoung 					     & SO_REUSEADDR);
   1960  1.1    dyoung 			vtw->v6only	= !!(inp->in6p_flags
   1961  1.1    dyoung 					     & IN6P_IPV6_V6ONLY);
   1962  1.1    dyoung 			vtw->uid	= inp->in6p_socket->so_uidinfo->ui_uid;
   1963  1.1    dyoung 
   1964  1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   1965  1.1    dyoung #ifdef VTW_DEBUG
   1966  1.1    dyoung 			/* Immediate lookup (connected and port) to
   1967  1.1    dyoung 			 * ensure at least that works!
   1968  1.1    dyoung 			 */
   1969  1.1    dyoung 			if (enable & 4) {
   1970  1.1    dyoung 				KASSERT(vtw_lookup_hash_v6(ctl
   1971  1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   1972  1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   1973  1.1    dyoung 					 , 0)
   1974  1.1    dyoung 					== vtw);
   1975  1.1    dyoung 				KASSERT(vtw_lookup_hash_v6
   1976  1.1    dyoung 					(ctl
   1977  1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   1978  1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   1979  1.1    dyoung 					 , 1));
   1980  1.1    dyoung 			}
   1981  1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   1982  1.1    dyoung 			 */
   1983  1.1    dyoung 			if (enable & 8) {
   1984  1.1    dyoung 				struct tcp_ports_iterator *it;
   1985  1.1    dyoung 				struct vestigial_inpcb res;
   1986  1.1    dyoung 				int cnt = 0;
   1987  1.1    dyoung 
   1988  1.1    dyoung 				it = tcp_init_ports_v6(&inp->in6p_laddr
   1989  1.1    dyoung 						       , inp->in6p_lport, 0);
   1990  1.1    dyoung 
   1991  1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   1992  1.1    dyoung 					++cnt;
   1993  1.1    dyoung 				}
   1994  1.1    dyoung 				KASSERT(cnt);
   1995  1.1    dyoung 			}
   1996  1.1    dyoung 			/* Immediate port iterator functionality check: wild
   1997  1.1    dyoung 			 */
   1998  1.1    dyoung 			if (enable & 16) {
   1999  1.1    dyoung 				struct tcp_ports_iterator *it;
   2000  1.1    dyoung 				struct vestigial_inpcb res;
   2001  1.1    dyoung 				static struct in6_addr any = IN6ADDR_ANY_INIT;
   2002  1.1    dyoung 				int cnt = 0;
   2003  1.1    dyoung 
   2004  1.1    dyoung 				it = tcp_init_ports_v6(&any
   2005  1.1    dyoung 						       , inp->in6p_lport, 1);
   2006  1.1    dyoung 
   2007  1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   2008  1.1    dyoung 					++cnt;
   2009  1.1    dyoung 				}
   2010  1.1    dyoung 				KASSERT(cnt);
   2011  1.1    dyoung 			}
   2012  1.1    dyoung #endif /* VTW_DEBUG */
   2013  1.1    dyoung 			break;
   2014  1.1    dyoung 		}
   2015  1.1    dyoung 		}
   2016  1.1    dyoung 
   2017  1.1    dyoung 		tcp_canceltimers(tp);
   2018  1.1    dyoung 		tp = tcp_close(tp);
   2019  1.1    dyoung 		KASSERT(!tp);
   2020  1.1    dyoung 
   2021  1.1    dyoung 		return 1;
   2022  1.1    dyoung 	}
   2023  1.1    dyoung 
   2024  1.1    dyoung 	return 0;
   2025  1.1    dyoung }
   2026  1.1    dyoung 
   2027  1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2028  1.1    dyoung  */
   2029  1.1    dyoung static void
   2030  1.1    dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
   2031  1.1    dyoung {
   2032  1.1    dyoung 	vtw_v4_t	copy = *(vtw_v4_t*)vp->vtw;
   2033  1.1    dyoung 	vtw_t		*vtw;
   2034  1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2035  1.1    dyoung 	vtw_ctl_t	*ctl;
   2036  1.1    dyoung 
   2037  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2038  1.1    dyoung 
   2039  1.1    dyoung 	db_trace(KTR_VTW
   2040  1.1    dyoung 		 , (vp->vtw, "vtw: restart %A:%P %A:%P"
   2041  1.1    dyoung 		    , vp->faddr.v4.s_addr, vp->fport
   2042  1.1    dyoung 		    , vp->laddr.v4.s_addr, vp->lport));
   2043  1.1    dyoung 
   2044  1.1    dyoung 	/* Class might have changed, so have a squiz.
   2045  1.1    dyoung 	 */
   2046  1.1    dyoung 	ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
   2047  1.1    dyoung 	vtw = vtw_alloc(ctl);
   2048  1.1    dyoung 
   2049  1.1    dyoung 	if (vtw) {
   2050  1.1    dyoung 		vtw_v4_t	*v4  = (void*)vtw;
   2051  1.1    dyoung 
   2052  1.1    dyoung 		/* Safe now to unhash the old entry
   2053  1.1    dyoung 		 */
   2054  1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2055  1.1    dyoung 
   2056  1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2057  1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2058  1.1    dyoung 
   2059  1.1    dyoung 		v4->faddr = copy.faddr;
   2060  1.1    dyoung 		v4->laddr = copy.laddr;
   2061  1.1    dyoung 		v4->fport = copy.fport;
   2062  1.1    dyoung 		v4->lport = copy.lport;
   2063  1.1    dyoung 
   2064  1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2065  1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2066  1.1    dyoung 		vtw->v6only	= 0;
   2067  1.1    dyoung 		vtw->uid	= cp->uid;
   2068  1.1    dyoung 
   2069  1.1    dyoung 		vtw_inshash_v4(ctl, vtw);
   2070  1.1    dyoung 	}
   2071  1.1    dyoung 
   2072  1.1    dyoung 	vp->valid = 0;
   2073  1.1    dyoung }
   2074  1.1    dyoung 
   2075  1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2076  1.1    dyoung  */
   2077  1.1    dyoung static void
   2078  1.1    dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
   2079  1.1    dyoung {
   2080  1.1    dyoung 	vtw_v6_t	copy = *(vtw_v6_t*)vp->vtw;
   2081  1.1    dyoung 	vtw_t		*vtw;
   2082  1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2083  1.1    dyoung 	vtw_ctl_t	*ctl;
   2084  1.1    dyoung 
   2085  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2086  1.1    dyoung 
   2087  1.1    dyoung 	db_trace(KTR_VTW
   2088  1.1    dyoung 		 , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
   2089  1.1    dyoung 		    , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
   2090  1.1    dyoung 		    , vp->fport
   2091  1.1    dyoung 		    , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
   2092  1.1    dyoung 		    , vp->lport));
   2093  1.1    dyoung 
   2094  1.1    dyoung 	/* Class might have changed, so have a squiz.
   2095  1.1    dyoung 	 */
   2096  1.1    dyoung 	ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
   2097  1.1    dyoung 	vtw = vtw_alloc(ctl);
   2098  1.1    dyoung 
   2099  1.1    dyoung 	if (vtw) {
   2100  1.1    dyoung 		vtw_v6_t	*v6  = (void*)vtw;
   2101  1.1    dyoung 
   2102  1.1    dyoung 		/* Safe now to unhash the old entry
   2103  1.1    dyoung 		 */
   2104  1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2105  1.1    dyoung 
   2106  1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2107  1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2108  1.1    dyoung 
   2109  1.1    dyoung 		v6->faddr = copy.faddr;
   2110  1.1    dyoung 		v6->laddr = copy.laddr;
   2111  1.1    dyoung 		v6->fport = copy.fport;
   2112  1.1    dyoung 		v6->lport = copy.lport;
   2113  1.1    dyoung 
   2114  1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2115  1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2116  1.1    dyoung 		vtw->v6only	= cp->v6only;
   2117  1.1    dyoung 		vtw->uid	= cp->uid;
   2118  1.1    dyoung 
   2119  1.1    dyoung 		vtw_inshash_v6(ctl, vtw);
   2120  1.1    dyoung 	}
   2121  1.1    dyoung 
   2122  1.1    dyoung 	vp->valid = 0;
   2123  1.1    dyoung }
   2124  1.1    dyoung 
   2125  1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2126  1.1    dyoung  */
   2127  1.1    dyoung void
   2128  1.1    dyoung vtw_restart(vestigial_inpcb_t *vp)
   2129  1.1    dyoung {
   2130  1.1    dyoung 	if (!vp || !vp->valid)
   2131  1.1    dyoung 		return;
   2132  1.1    dyoung 
   2133  1.1    dyoung 	if (vp->v4)
   2134  1.1    dyoung 		vtw_restart_v4(vp);
   2135  1.1    dyoung 	else
   2136  1.1    dyoung 		vtw_restart_v6(vp);
   2137  1.1    dyoung }
   2138  1.1    dyoung 
   2139  1.1    dyoung int
   2140  1.7    dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
   2141  1.7    dyoung {
   2142  1.7    dyoung 	int en, rc;
   2143  1.7    dyoung 	struct sysctlnode node;
   2144  1.7    dyoung 
   2145  1.7    dyoung 	node = *rnode;
   2146  1.7    dyoung 	en = *(int *)rnode->sysctl_data;
   2147  1.7    dyoung 	node.sysctl_data = &en;
   2148  1.7    dyoung 
   2149  1.7    dyoung 	rc = sysctl_lookup(SYSCTLFN_CALL(&node));
   2150  1.7    dyoung 	if (rc != 0 || newp == NULL)
   2151  1.7    dyoung 		return rc;
   2152  1.7    dyoung 
   2153  1.7    dyoung 	if (rnode->sysctl_data != &tcp4_vtw_enable &&
   2154  1.7    dyoung 	    rnode->sysctl_data != &tcp6_vtw_enable)
   2155  1.7    dyoung 		rc = ENOENT;
   2156  1.7    dyoung 	else if ((en & 1) == 0)
   2157  1.7    dyoung 		rc = 0;
   2158  1.7    dyoung 	else if (rnode->sysctl_data == &tcp4_vtw_enable)
   2159  1.7    dyoung 		rc = vtw_control_init(AF_INET);
   2160  1.7    dyoung 	else /* rnode->sysctl_data == &tcp6_vtw_enable */
   2161  1.7    dyoung 		rc = vtw_control_init(AF_INET6);
   2162  1.7    dyoung 
   2163  1.7    dyoung 	if (rc == 0)
   2164  1.7    dyoung 		*(int *)rnode->sysctl_data = en;
   2165  1.7    dyoung 
   2166  1.7    dyoung 	return rc;
   2167  1.7    dyoung }
   2168  1.7    dyoung 
   2169  1.7    dyoung int
   2170  1.1    dyoung vtw_earlyinit(void)
   2171  1.1    dyoung {
   2172  1.5    dyoung 	int i, rc;
   2173  1.1    dyoung 
   2174  1.5    dyoung 	callout_init(&vtw_cs, 0);
   2175  1.5    dyoung 	callout_setfunc(&vtw_cs, vtw_tick, 0);
   2176  1.1    dyoung 
   2177  1.5    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2178  1.5    dyoung 		vtw_tcpv4[i].is_v4 = 1;
   2179  1.5    dyoung 		vtw_tcpv6[i].is_v6 = 1;
   2180  1.1    dyoung 	}
   2181  1.1    dyoung 
   2182  1.7    dyoung 	if ((tcp4_vtw_enable & 1) != 0 &&
   2183  1.7    dyoung 	    (rc = vtw_control_init(AF_INET)) != 0)
   2184  1.7    dyoung 		return rc;
   2185  1.7    dyoung 
   2186  1.7    dyoung 	if ((tcp6_vtw_enable & 1) != 0 &&
   2187  1.1    dyoung 	    (rc = vtw_control_init(AF_INET6)) != 0)
   2188  1.1    dyoung 		return rc;
   2189  1.1    dyoung 
   2190  1.1    dyoung 	return 0;
   2191  1.1    dyoung }
   2192  1.1    dyoung 
   2193  1.1    dyoung #ifdef VTW_DEBUG
   2194  1.1    dyoung #include <sys/syscallargs.h>
   2195  1.1    dyoung #include <sys/sysctl.h>
   2196  1.1    dyoung 
   2197  1.1    dyoung /*!\brief	add lalp, fafp entries for debug
   2198  1.1    dyoung  */
   2199  1.1    dyoung int
   2200  1.1    dyoung vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int class)
   2201  1.1    dyoung {
   2202  1.1    dyoung 	vtw_ctl_t	*ctl;
   2203  1.1    dyoung 	vtw_t		*vtw;
   2204  1.1    dyoung 
   2205  1.1    dyoung 	ctl = vtw_control(af, msl ? msl : class_to_msl(class));
   2206  1.1    dyoung 	if (!ctl)
   2207  1.1    dyoung 		return 0;
   2208  1.1    dyoung 
   2209  1.1    dyoung 	vtw = vtw_alloc(ctl);
   2210  1.1    dyoung 
   2211  1.1    dyoung 	if (vtw) {
   2212  1.1    dyoung 		vtw->snd_nxt = 0;
   2213  1.1    dyoung 		vtw->rcv_nxt = 0;
   2214  1.1    dyoung 
   2215  1.1    dyoung 		switch (af) {
   2216  1.1    dyoung 		case AF_INET: {
   2217  1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   2218  1.1    dyoung 
   2219  1.1    dyoung 			v4->faddr = fa->sin_addr.v4.s_addr;
   2220  1.1    dyoung 			v4->laddr = la->sin_addr.v4.s_addr;
   2221  1.1    dyoung 			v4->fport = fa->sin_port;
   2222  1.1    dyoung 			v4->lport = la->sin_port;
   2223  1.1    dyoung 
   2224  1.1    dyoung 			vtw->reuse_port = 1;
   2225  1.1    dyoung 			vtw->reuse_addr = 1;
   2226  1.1    dyoung 			vtw->v6only	= 0;
   2227  1.1    dyoung 			vtw->uid	= 0;
   2228  1.1    dyoung 
   2229  1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   2230  1.1    dyoung 			break;
   2231  1.1    dyoung 		}
   2232  1.1    dyoung 
   2233  1.1    dyoung 		case AF_INET6: {
   2234  1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   2235  1.1    dyoung 
   2236  1.1    dyoung 			v6->faddr = fa->sin_addr.v6;
   2237  1.1    dyoung 			v6->laddr = la->sin_addr.v6;
   2238  1.1    dyoung 
   2239  1.1    dyoung 			v6->fport = fa->sin_port;
   2240  1.1    dyoung 			v6->lport = la->sin_port;
   2241  1.1    dyoung 
   2242  1.1    dyoung 			vtw->reuse_port = 1;
   2243  1.1    dyoung 			vtw->reuse_addr = 1;
   2244  1.1    dyoung 			vtw->v6only	= 0;
   2245  1.1    dyoung 			vtw->uid	= 0;
   2246  1.1    dyoung 
   2247  1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   2248  1.1    dyoung 			break;
   2249  1.1    dyoung 		}
   2250  1.1    dyoung 
   2251  1.1    dyoung 		default:
   2252  1.1    dyoung 			break;
   2253  1.1    dyoung 		}
   2254  1.1    dyoung 
   2255  1.1    dyoung 		return 1;
   2256  1.1    dyoung 	}
   2257  1.1    dyoung 
   2258  1.1    dyoung 	return 0;
   2259  1.1    dyoung }
   2260  1.1    dyoung 
   2261  1.1    dyoung static int vtw_syscall = 0;
   2262  1.1    dyoung 
   2263  1.1    dyoung static int
   2264  1.1    dyoung vtw_debug_process(vtw_sysargs_t *ap)
   2265  1.1    dyoung {
   2266  1.1    dyoung 	struct vestigial_inpcb vestige;
   2267  1.1    dyoung 	int	rc = 0;
   2268  1.1    dyoung 
   2269  1.1    dyoung 	mutex_enter(softnet_lock);
   2270  1.1    dyoung 
   2271  1.1    dyoung 	switch (ap->op) {
   2272  1.1    dyoung 	case 0:		// insert
   2273  1.1    dyoung 		vtw_debug_add(ap->la.sin_family
   2274  1.1    dyoung 			      , &ap->la
   2275  1.1    dyoung 			      , &ap->fa
   2276  1.1    dyoung 			      , TCPTV_MSL
   2277  1.1    dyoung 			      , 0);
   2278  1.1    dyoung 		break;
   2279  1.1    dyoung 
   2280  1.1    dyoung 	case 1:		// lookup
   2281  1.1    dyoung 	case 2:		// restart
   2282  1.1    dyoung 		switch (ap->la.sin_family) {
   2283  1.1    dyoung 		case AF_INET:
   2284  1.1    dyoung 			if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
   2285  1.1    dyoung 					  ap->la.sin_addr.v4, ap->la.sin_port,
   2286  1.1    dyoung 					  &vestige)) {
   2287  1.1    dyoung 				if (ap->op == 2) {
   2288  1.1    dyoung 					vtw_restart(&vestige);
   2289  1.1    dyoung 				}
   2290  1.1    dyoung 				rc = 0;
   2291  1.1    dyoung 			} else
   2292  1.1    dyoung 				rc = ESRCH;
   2293  1.1    dyoung 			break;
   2294  1.1    dyoung 
   2295  1.1    dyoung 		case AF_INET6:
   2296  1.1    dyoung 			if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
   2297  1.1    dyoung 					  &ap->la.sin_addr.v6, ap->la.sin_port,
   2298  1.1    dyoung 					  &vestige)) {
   2299  1.1    dyoung 				if (ap->op == 2) {
   2300  1.1    dyoung 					vtw_restart(&vestige);
   2301  1.1    dyoung 				}
   2302  1.1    dyoung 				rc = 0;
   2303  1.1    dyoung 			} else
   2304  1.1    dyoung 				rc = ESRCH;
   2305  1.1    dyoung 			break;
   2306  1.1    dyoung 		default:
   2307  1.1    dyoung 			rc = EINVAL;
   2308  1.1    dyoung 		}
   2309  1.1    dyoung 		break;
   2310  1.1    dyoung 
   2311  1.1    dyoung 	default:
   2312  1.1    dyoung 		rc = EINVAL;
   2313  1.1    dyoung 	}
   2314  1.1    dyoung 
   2315  1.1    dyoung 	mutex_exit(softnet_lock);
   2316  1.1    dyoung 	return rc;
   2317  1.1    dyoung }
   2318  1.1    dyoung 
   2319  1.1    dyoung struct sys_vtw_args {
   2320  1.1    dyoung 	syscallarg(const vtw_sysargs_t *) req;
   2321  1.1    dyoung 	syscallarg(size_t) len;
   2322  1.1    dyoung };
   2323  1.1    dyoung 
   2324  1.1    dyoung static int
   2325  1.1    dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
   2326  1.1    dyoung {
   2327  1.1    dyoung 	const struct sys_vtw_args *uap = _;
   2328  1.1    dyoung 	void	*buf;
   2329  1.1    dyoung 	int	rc;
   2330  1.1    dyoung 	size_t	len	= SCARG(uap, len);
   2331  1.1    dyoung 
   2332  1.1    dyoung 	if (len != sizeof (vtw_sysargs_t))
   2333  1.1    dyoung 		return EINVAL;
   2334  1.1    dyoung 
   2335  1.1    dyoung 	buf = kmem_alloc(len, KM_SLEEP);
   2336  1.1    dyoung 	if (!buf)
   2337  1.1    dyoung 		return ENOMEM;
   2338  1.1    dyoung 
   2339  1.1    dyoung 	rc = copyin(SCARG(uap, req), buf, len);
   2340  1.1    dyoung 	if (!rc) {
   2341  1.1    dyoung 		rc = vtw_debug_process(buf);
   2342  1.1    dyoung 	}
   2343  1.1    dyoung 	kmem_free(buf, len);
   2344  1.1    dyoung 
   2345  1.1    dyoung 	return rc;
   2346  1.1    dyoung }
   2347  1.1    dyoung 
   2348  1.1    dyoung static void
   2349  1.1    dyoung vtw_sanity_check(void)
   2350  1.1    dyoung {
   2351  1.1    dyoung 	vtw_ctl_t	*ctl;
   2352  1.1    dyoung 	vtw_t		*vtw;
   2353  1.1    dyoung 	int		i;
   2354  1.1    dyoung 	int		n;
   2355  1.1    dyoung 
   2356  1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2357  1.1    dyoung 		ctl = &vtw_tcpv4[i];
   2358  1.1    dyoung 
   2359  1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2360  1.1    dyoung 			continue;
   2361  1.1    dyoung 
   2362  1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2363  1.1    dyoung 			++n;
   2364  1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2365  1.1    dyoung 			if (vtw == ctl->base.v)
   2366  1.1    dyoung 				break;
   2367  1.1    dyoung 		}
   2368  1.1    dyoung 		db_trace(KTR_VTW
   2369  1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2370  1.1    dyoung 			    , i, n, ctl->nfree));
   2371  1.1    dyoung 
   2372  1.1    dyoung 		KASSERT(n == ctl->nfree);
   2373  1.1    dyoung 	}
   2374  1.1    dyoung 
   2375  1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2376  1.1    dyoung 		ctl = &vtw_tcpv6[i];
   2377  1.1    dyoung 
   2378  1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2379  1.1    dyoung 			continue;
   2380  1.1    dyoung 
   2381  1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2382  1.1    dyoung 			++n;
   2383  1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2384  1.1    dyoung 			if (vtw == ctl->base.v)
   2385  1.1    dyoung 				break;
   2386  1.1    dyoung 		}
   2387  1.1    dyoung 		db_trace(KTR_VTW
   2388  1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2389  1.1    dyoung 			    , i, n, ctl->nfree));
   2390  1.1    dyoung 		KASSERT(n == ctl->nfree);
   2391  1.1    dyoung 	}
   2392  1.1    dyoung }
   2393  1.1    dyoung 
   2394  1.1    dyoung /*!\brief	Initialise debug support.
   2395  1.1    dyoung  */
   2396  1.1    dyoung static void
   2397  1.1    dyoung vtw_debug_init(void)
   2398  1.1    dyoung {
   2399  1.1    dyoung 	int	i;
   2400  1.1    dyoung 
   2401  1.1    dyoung 	vtw_sanity_check();
   2402  1.1    dyoung 
   2403  1.1    dyoung 	if (vtw_syscall)
   2404  1.1    dyoung 		return;
   2405  1.1    dyoung 
   2406  1.1    dyoung 	for (i = 511; i; --i) {
   2407  1.1    dyoung 		if (sysent[i].sy_call == sys_nosys) {
   2408  1.1    dyoung 			sysent[i].sy_call    = vtw_sys;
   2409  1.1    dyoung 			sysent[i].sy_narg    = 2;
   2410  1.1    dyoung 			sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
   2411  1.1    dyoung 			sysent[i].sy_flags   = 0;
   2412  1.1    dyoung 
   2413  1.1    dyoung 			vtw_syscall = i;
   2414  1.1    dyoung 			break;
   2415  1.1    dyoung 		}
   2416  1.1    dyoung 	}
   2417  1.1    dyoung 	if (i) {
   2418  1.1    dyoung 		const struct sysctlnode *node;
   2419  1.1    dyoung 		uint32_t	flags;
   2420  1.1    dyoung 
   2421  1.1    dyoung 		flags = sysctl_root.sysctl_flags;
   2422  1.1    dyoung 
   2423  1.1    dyoung 		sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
   2424  1.1    dyoung 		sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
   2425  1.1    dyoung 
   2426  1.1    dyoung 		sysctl_createv(0, 0, 0, &node,
   2427  1.1    dyoung 			       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2428  1.1    dyoung 			       "koff",
   2429  1.1    dyoung 			       SYSCTL_DESCR("Kernel Obscure Feature Finder"),
   2430  1.1    dyoung 			       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2431  1.1    dyoung 
   2432  1.1    dyoung 		if (!node) {
   2433  1.1    dyoung 			sysctl_createv(0, 0, 0, &node,
   2434  1.1    dyoung 				       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2435  1.1    dyoung 				       "koffka",
   2436  1.1    dyoung 				       SYSCTL_DESCR("The Real(tm) Kernel"
   2437  1.1    dyoung 						    " Obscure Feature Finder"),
   2438  1.1    dyoung 				       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2439  1.1    dyoung 		}
   2440  1.1    dyoung 		if (node) {
   2441  1.1    dyoung 			sysctl_createv(0, 0, 0, 0,
   2442  1.1    dyoung 				       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
   2443  1.1    dyoung 				       CTLTYPE_INT, "vtw_debug_syscall",
   2444  1.1    dyoung 				       SYSCTL_DESCR("vtw debug"
   2445  1.1    dyoung 						    " system call number"),
   2446  1.1    dyoung 				       0, 0, &vtw_syscall, 0, node->sysctl_num,
   2447  1.1    dyoung 				       CTL_CREATE, CTL_EOL);
   2448  1.1    dyoung 		}
   2449  1.1    dyoung 		sysctl_root.sysctl_flags = flags;
   2450  1.1    dyoung 	}
   2451  1.1    dyoung }
   2452  1.1    dyoung #else /* !VTW_DEBUG */
   2453  1.1    dyoung static void
   2454  1.1    dyoung vtw_debug_init(void)
   2455  1.1    dyoung {
   2456  1.1    dyoung 	return;
   2457  1.1    dyoung }
   2458  1.1    dyoung #endif /* !VTW_DEBUG */
   2459