Home | History | Annotate | Line # | Download | only in netinet
tcp_vtw.c revision 1.1
      1  1.1  dyoung /*
      2  1.1  dyoung  * Copyright (c) 2011 The NetBSD Foundation, Inc.
      3  1.1  dyoung  * All rights reserved.
      4  1.1  dyoung  *
      5  1.1  dyoung  * This code is derived from software contributed to The NetBSD Foundation
      6  1.1  dyoung  * by Coyote Point Systems, Inc.
      7  1.1  dyoung  *
      8  1.1  dyoung  * Redistribution and use in source and binary forms, with or without
      9  1.1  dyoung  * modification, are permitted provided that the following conditions
     10  1.1  dyoung  * are met:
     11  1.1  dyoung  * 1. Redistributions of source code must retain the above copyright
     12  1.1  dyoung  *    notice, this list of conditions and the following disclaimer.
     13  1.1  dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     14  1.1  dyoung  *    notice, this list of conditions and the following disclaimer in the
     15  1.1  dyoung  *    documentation and/or other materials provided with the distribution.
     16  1.1  dyoung  *
     17  1.1  dyoung  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  1.1  dyoung  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  1.1  dyoung  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  1.1  dyoung  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  1.1  dyoung  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  1.1  dyoung  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  1.1  dyoung  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  1.1  dyoung  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  1.1  dyoung  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  1.1  dyoung  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  1.1  dyoung  * POSSIBILITY OF SUCH DAMAGE.
     28  1.1  dyoung  */
     29  1.1  dyoung #include <sys/cdefs.h>
     30  1.1  dyoung 
     31  1.1  dyoung #include "opt_ddb.h"
     32  1.1  dyoung #include "opt_inet.h"
     33  1.1  dyoung #include "opt_ipsec.h"
     34  1.1  dyoung #include "opt_inet_csum.h"
     35  1.1  dyoung #include "opt_tcp_debug.h"
     36  1.1  dyoung 
     37  1.1  dyoung #include <sys/param.h>
     38  1.1  dyoung #include <sys/systm.h>
     39  1.1  dyoung #include <sys/malloc.h>
     40  1.1  dyoung #include <sys/kmem.h>
     41  1.1  dyoung #include <sys/mbuf.h>
     42  1.1  dyoung #include <sys/protosw.h>
     43  1.1  dyoung #include <sys/socket.h>
     44  1.1  dyoung #include <sys/socketvar.h>
     45  1.1  dyoung #include <sys/errno.h>
     46  1.1  dyoung #include <sys/syslog.h>
     47  1.1  dyoung #include <sys/pool.h>
     48  1.1  dyoung #include <sys/domain.h>
     49  1.1  dyoung #include <sys/kernel.h>
     50  1.1  dyoung #include <net/if.h>
     51  1.1  dyoung #include <net/route.h>
     52  1.1  dyoung #include <net/if_types.h>
     53  1.1  dyoung 
     54  1.1  dyoung #include <netinet/in.h>
     55  1.1  dyoung #include <netinet/in_systm.h>
     56  1.1  dyoung #include <netinet/ip.h>
     57  1.1  dyoung #include <netinet/in_pcb.h>
     58  1.1  dyoung #include <netinet/in_var.h>
     59  1.1  dyoung #include <netinet/ip_var.h>
     60  1.1  dyoung #include <netinet/in_offload.h>
     61  1.1  dyoung #include <netinet/ip6.h>
     62  1.1  dyoung #include <netinet6/ip6_var.h>
     63  1.1  dyoung #include <netinet6/in6_pcb.h>
     64  1.1  dyoung #include <netinet6/ip6_var.h>
     65  1.1  dyoung #include <netinet6/in6_var.h>
     66  1.1  dyoung #include <netinet/icmp6.h>
     67  1.1  dyoung #include <netinet6/nd6.h>
     68  1.1  dyoung 
     69  1.1  dyoung #include <netinet/tcp.h>
     70  1.1  dyoung #include <netinet/tcp_fsm.h>
     71  1.1  dyoung #include <netinet/tcp_seq.h>
     72  1.1  dyoung #include <netinet/tcp_timer.h>
     73  1.1  dyoung #include <netinet/tcp_var.h>
     74  1.1  dyoung #include <netinet/tcp_private.h>
     75  1.1  dyoung #include <netinet/tcpip.h>
     76  1.1  dyoung 
     77  1.1  dyoung #include <machine/stdarg.h>
     78  1.1  dyoung #include <netinet/tcp_vtw.h>
     79  1.1  dyoung 
     80  1.1  dyoung __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.1 2011/05/03 18:28:45 dyoung Exp $");
     81  1.1  dyoung 
     82  1.1  dyoung #define db_trace(__a, __b)	do { } while (/*CONSTCOND*/0)
     83  1.1  dyoung 
     84  1.1  dyoung static void k_vtw(int c, char **v);
     85  1.1  dyoung static void vtw_debug_init(void);
     86  1.1  dyoung 
     87  1.1  dyoung fatp_ctl_t fat_tcpv4;
     88  1.1  dyoung fatp_ctl_t fat_tcpv6;
     89  1.1  dyoung vtw_ctl_t  vtw_tcpv4[VTW_NCLASS];
     90  1.1  dyoung vtw_ctl_t  vtw_tcpv6[VTW_NCLASS];
     91  1.1  dyoung vtw_stats_t vtw_stats;
     92  1.1  dyoung 
     93  1.1  dyoung /* We provide state for the lookup_ports iterator.
     94  1.1  dyoung  * As currently we are netlock-protected, there is one.
     95  1.1  dyoung  * If we were finer-grain, we would have one per CPU.
     96  1.1  dyoung  * I do not want to be in the business of alloc/free.
     97  1.1  dyoung  * The best alternate would be allocate on the caller's
     98  1.1  dyoung  * stack, but that would require them to know the struct,
     99  1.1  dyoung  * or at least the size.
    100  1.1  dyoung  * See how she goes.
    101  1.1  dyoung  */
    102  1.1  dyoung struct tcp_ports_iterator {
    103  1.1  dyoung 	union {
    104  1.1  dyoung 		struct in_addr	v4;
    105  1.1  dyoung 		struct in6_addr	v6;
    106  1.1  dyoung 	}		addr;
    107  1.1  dyoung 	u_int		port;
    108  1.1  dyoung 
    109  1.1  dyoung 	uint32_t	wild	: 1;
    110  1.1  dyoung 
    111  1.1  dyoung 	vtw_ctl_t	*ctl;
    112  1.1  dyoung 	fatp_t		*fp;
    113  1.1  dyoung 
    114  1.1  dyoung 	uint16_t	slot_idx;
    115  1.1  dyoung 	uint16_t	ctl_idx;
    116  1.1  dyoung };
    117  1.1  dyoung 
    118  1.1  dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
    119  1.1  dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
    120  1.1  dyoung 
    121  1.1  dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
    122  1.1  dyoung 
    123  1.1  dyoung /*!\brief allocate a fat pointer from a collection.
    124  1.1  dyoung  */
    125  1.1  dyoung static fatp_t *
    126  1.1  dyoung fatp_alloc(fatp_ctl_t *fat)
    127  1.1  dyoung {
    128  1.1  dyoung 	fatp_t	*fp	= 0;
    129  1.1  dyoung 
    130  1.1  dyoung 	if (fat->nfree) {
    131  1.1  dyoung 		fp = fat->free;
    132  1.1  dyoung 		if (fp) {
    133  1.1  dyoung 			fat->free = fatp_next(fat, fp);
    134  1.1  dyoung 			--fat->nfree;
    135  1.1  dyoung 			++fat->nalloc;
    136  1.1  dyoung 			fp->nxt = 0;
    137  1.1  dyoung 
    138  1.1  dyoung 			KASSERT(!fp->inuse);
    139  1.1  dyoung 		}
    140  1.1  dyoung 	}
    141  1.1  dyoung 
    142  1.1  dyoung 	return fp;
    143  1.1  dyoung }
    144  1.1  dyoung 
    145  1.1  dyoung /*!\brief free a fat pointer.
    146  1.1  dyoung  */
    147  1.1  dyoung static void
    148  1.1  dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
    149  1.1  dyoung {
    150  1.1  dyoung 	if (fp) {
    151  1.1  dyoung 		KASSERT(!fp->inuse);
    152  1.1  dyoung 		KASSERT(!fp->nxt);
    153  1.1  dyoung 
    154  1.1  dyoung 		fp->nxt = fatp_index(fat, fat->free);
    155  1.1  dyoung 		fat->free = fp;
    156  1.1  dyoung 
    157  1.1  dyoung 		++fat->nfree;
    158  1.1  dyoung 		--fat->nalloc;
    159  1.1  dyoung 	}
    160  1.1  dyoung }
    161  1.1  dyoung 
    162  1.1  dyoung /*!\brief initialise a collection of fat pointers.
    163  1.1  dyoung  *
    164  1.1  dyoung  *\param n	# hash buckets
    165  1.1  dyoung  *\param m	total # fat pointers to allocate
    166  1.1  dyoung  *
    167  1.1  dyoung  * We allocate 2x as much, as we have two hashes: full and lport only.
    168  1.1  dyoung  */
    169  1.1  dyoung static void
    170  1.1  dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m)
    171  1.1  dyoung {
    172  1.1  dyoung 	fatp_t	*fp;
    173  1.1  dyoung 
    174  1.1  dyoung 	k_vtw(0,0);
    175  1.1  dyoung 
    176  1.1  dyoung 	KASSERT(n <= FATP_MAX / 2);
    177  1.1  dyoung 
    178  1.1  dyoung 	fat->hash   = kmem_alloc(2*m * sizeof (fatp_t *), KM_SLEEP);
    179  1.1  dyoung 	fat->base   = kmem_alloc(2*n * sizeof (fatp_t), KM_SLEEP);
    180  1.1  dyoung 
    181  1.1  dyoung 	if (!fat->base) {
    182  1.1  dyoung 		if (fat->hash)
    183  1.1  dyoung 			kmem_free(fat->hash, 2*m * sizeof (fatp_t *));
    184  1.1  dyoung 
    185  1.1  dyoung 		bzero(fat, sizeof (*fat));
    186  1.1  dyoung 		return;
    187  1.1  dyoung 	}
    188  1.1  dyoung 
    189  1.1  dyoung 	fat->port = &fat->hash[m];
    190  1.1  dyoung 
    191  1.1  dyoung 	fat->mask   = m - 1;	// ASSERT is power of 2 (m)
    192  1.1  dyoung 	fat->lim    = fat->base + 2*n - 1;
    193  1.1  dyoung 	fat->nfree  = 0;
    194  1.1  dyoung 	fat->nalloc = 2*n;
    195  1.1  dyoung 
    196  1.1  dyoung 	bzero(fat->hash, 2*m * sizeof (fatp_t *));
    197  1.1  dyoung 	bzero(fat->base, 2*n * sizeof (fatp_t));
    198  1.1  dyoung 
    199  1.1  dyoung 	/* Initialise the free list.
    200  1.1  dyoung 	 */
    201  1.1  dyoung 	for (fp = fat->lim; fp >= fat->base; --fp) {
    202  1.1  dyoung 		fatp_free(fat, fp);
    203  1.1  dyoung 	}
    204  1.1  dyoung }
    205  1.1  dyoung 
    206  1.1  dyoung /*
    207  1.1  dyoung  * The `xtra' is XORed into the tag stored.
    208  1.1  dyoung  */
    209  1.1  dyoung static uint32_t fatp_xtra[] = {
    210  1.1  dyoung 	0x11111111,0x22222222,0x33333333,0x44444444,
    211  1.1  dyoung 	0x55555555,0x66666666,0x77777777,0x88888888,
    212  1.1  dyoung 	0x12121212,0x21212121,0x34343434,0x43434343,
    213  1.1  dyoung 	0x56565656,0x65656565,0x78787878,0x87878787,
    214  1.1  dyoung 	0x11221122,0x22112211,0x33443344,0x44334433,
    215  1.1  dyoung 	0x55665566,0x66556655,0x77887788,0x88778877,
    216  1.1  dyoung 	0x11112222,0x22221111,0x33334444,0x44443333,
    217  1.1  dyoung 	0x55556666,0x66665555,0x77778888,0x88887777,
    218  1.1  dyoung };
    219  1.1  dyoung 
    220  1.1  dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
    221  1.1  dyoung  *
    222  1.1  dyoung  * The key can be used to obtain the fatp_t, and the slot,
    223  1.1  dyoung  * as it directly encodes them.
    224  1.1  dyoung  */
    225  1.1  dyoung static inline uint32_t
    226  1.1  dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
    227  1.1  dyoung {
    228  1.1  dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    229  1.1  dyoung 	         CACHE_LINE_SIZE == 64 ||
    230  1.1  dyoung 		 CACHE_LINE_SIZE == 128);
    231  1.1  dyoung 
    232  1.1  dyoung 	switch (fatp_ntags()) {
    233  1.1  dyoung 	case 7:
    234  1.1  dyoung 		return (fatp_index(fat, fp) << 3) | slot;
    235  1.1  dyoung 	case 15:
    236  1.1  dyoung 		return (fatp_index(fat, fp) << 4) | slot;
    237  1.1  dyoung 	case 31:
    238  1.1  dyoung 		return (fatp_index(fat, fp) << 5) | slot;
    239  1.1  dyoung 	default:
    240  1.1  dyoung 		KASSERT(0 && "no support, for no good reason");
    241  1.1  dyoung 		return ~0;
    242  1.1  dyoung 	}
    243  1.1  dyoung }
    244  1.1  dyoung 
    245  1.1  dyoung static inline uint32_t
    246  1.1  dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
    247  1.1  dyoung {
    248  1.1  dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    249  1.1  dyoung 	         CACHE_LINE_SIZE == 64 ||
    250  1.1  dyoung 		 CACHE_LINE_SIZE == 128);
    251  1.1  dyoung 
    252  1.1  dyoung 	switch (fatp_ntags()) {
    253  1.1  dyoung 	case 7:
    254  1.1  dyoung 		return key & 7;
    255  1.1  dyoung 	case 15:
    256  1.1  dyoung 		return key & 15;
    257  1.1  dyoung 	case 31:
    258  1.1  dyoung 		return key & 31;
    259  1.1  dyoung 	default:
    260  1.1  dyoung 		KASSERT(0 && "no support, for no good reason");
    261  1.1  dyoung 		return ~0;
    262  1.1  dyoung 	}
    263  1.1  dyoung }
    264  1.1  dyoung 
    265  1.1  dyoung static inline fatp_t *
    266  1.1  dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
    267  1.1  dyoung {
    268  1.1  dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    269  1.1  dyoung 	         CACHE_LINE_SIZE == 64 ||
    270  1.1  dyoung 		 CACHE_LINE_SIZE == 128);
    271  1.1  dyoung 
    272  1.1  dyoung 	switch (fatp_ntags()) {
    273  1.1  dyoung 	case 7:
    274  1.1  dyoung 		key >>= 3;
    275  1.1  dyoung 		break;
    276  1.1  dyoung 	case 15:
    277  1.1  dyoung 		key >>= 4;
    278  1.1  dyoung 		break;
    279  1.1  dyoung 	case 31:
    280  1.1  dyoung 		key >>= 5;
    281  1.1  dyoung 		break;
    282  1.1  dyoung 	default:
    283  1.1  dyoung 		KASSERT(0 && "no support, for no good reason");
    284  1.1  dyoung 		return 0;
    285  1.1  dyoung 	}
    286  1.1  dyoung 
    287  1.1  dyoung 	return key ? fat->base + key - 1 : 0;
    288  1.1  dyoung }
    289  1.1  dyoung 
    290  1.1  dyoung static inline uint32_t
    291  1.1  dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
    292  1.1  dyoung {
    293  1.1  dyoung 	return (idx << ctl->idx_bits) | idx;
    294  1.1  dyoung }
    295  1.1  dyoung 
    296  1.1  dyoung static inline uint32_t
    297  1.1  dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
    298  1.1  dyoung {
    299  1.1  dyoung 	uint32_t	idx	= bits & ctl->idx_mask;
    300  1.1  dyoung 
    301  1.1  dyoung 	if (idx_encode(ctl, idx) == bits)
    302  1.1  dyoung 		return idx;
    303  1.1  dyoung 	else
    304  1.1  dyoung 		return ~0;
    305  1.1  dyoung }
    306  1.1  dyoung 
    307  1.1  dyoung /*!\brief	insert index into fatp hash
    308  1.1  dyoung  *
    309  1.1  dyoung  *\param	idx	-	index of element being placed in hash chain
    310  1.1  dyoung  *\param	tag	-	32-bit tag identifier
    311  1.1  dyoung  *
    312  1.1  dyoung  *\returns
    313  1.1  dyoung  *	value which can be used to locate entry.
    314  1.1  dyoung  *
    315  1.1  dyoung  *\note
    316  1.1  dyoung  *	we rely on the fact that there are unused high bits in the index
    317  1.1  dyoung  *	for verification purposes on lookup.
    318  1.1  dyoung  */
    319  1.1  dyoung 
    320  1.1  dyoung static inline uint32_t
    321  1.1  dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
    322  1.1  dyoung     void *dbg)
    323  1.1  dyoung {
    324  1.1  dyoung 	fatp_t	*fp;
    325  1.1  dyoung 	fatp_t	**hash = (which ? fat->port : fat->hash);
    326  1.1  dyoung 	int	i;
    327  1.1  dyoung 
    328  1.1  dyoung 	fp = hash[tag & fat->mask];
    329  1.1  dyoung 
    330  1.1  dyoung 	while (!fp || fatp_full(fp)) {
    331  1.1  dyoung 		fatp_t	*fq;
    332  1.1  dyoung 
    333  1.1  dyoung 		/* All entries are inuse at the top level.
    334  1.1  dyoung 		 * We allocate a spare, and push the top level
    335  1.1  dyoung 		 * down one.  All entries in the fp we push down
    336  1.1  dyoung 		 * (think of a tape worm here) will be expelled sooner than
    337  1.1  dyoung 		 * any entries added subsequently to this hash bucket.
    338  1.1  dyoung 		 * This is a property of the time waits we are exploiting.
    339  1.1  dyoung 		 */
    340  1.1  dyoung 
    341  1.1  dyoung 		fq = fatp_alloc(fat);
    342  1.1  dyoung 		if (!fq) {
    343  1.1  dyoung 			vtw_age(fat->vtw, 0);
    344  1.1  dyoung 			fp = hash[tag & fat->mask];
    345  1.1  dyoung 			continue;
    346  1.1  dyoung 		}
    347  1.1  dyoung 
    348  1.1  dyoung 		fq->inuse = 0;
    349  1.1  dyoung 		fq->nxt   = fatp_index(fat, fp);
    350  1.1  dyoung 
    351  1.1  dyoung 		hash[tag & fat->mask] = fq;
    352  1.1  dyoung 
    353  1.1  dyoung 		fp = fq;
    354  1.1  dyoung 	}
    355  1.1  dyoung 
    356  1.1  dyoung 	KASSERT(!fatp_full(fp));
    357  1.1  dyoung 
    358  1.1  dyoung 	/* Fill highest index first.  Lookup is lowest first.
    359  1.1  dyoung 	 */
    360  1.1  dyoung 	for (i = fatp_ntags(); --i >= 0; ) {
    361  1.1  dyoung 		if (!((1 << i) & fp->inuse)) {
    362  1.1  dyoung 			break;
    363  1.1  dyoung 		}
    364  1.1  dyoung 	}
    365  1.1  dyoung 
    366  1.1  dyoung 	fp->inuse |= 1 << i;
    367  1.1  dyoung 	fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
    368  1.1  dyoung 
    369  1.1  dyoung 	db_trace(KTR_VTW
    370  1.1  dyoung 		 , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
    371  1.1  dyoung 		    , fp->inuse
    372  1.1  dyoung 		    , i, fp->tag[i]));
    373  1.1  dyoung 
    374  1.1  dyoung 	return fatp_key(fat, fp, i);
    375  1.1  dyoung }
    376  1.1  dyoung 
    377  1.1  dyoung static inline int
    378  1.1  dyoung vtw_alive(const vtw_t *vtw)
    379  1.1  dyoung {
    380  1.1  dyoung 	return vtw->hashed && vtw->expire.tv_sec;
    381  1.1  dyoung }
    382  1.1  dyoung 
    383  1.1  dyoung static inline uint32_t
    384  1.1  dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
    385  1.1  dyoung {
    386  1.1  dyoung 	if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
    387  1.1  dyoung 		return v4 - ctl->base.v4;
    388  1.1  dyoung 
    389  1.1  dyoung 	KASSERT(0 && "vtw out of bounds");
    390  1.1  dyoung 
    391  1.1  dyoung 	return ~0;
    392  1.1  dyoung }
    393  1.1  dyoung 
    394  1.1  dyoung static inline uint32_t
    395  1.1  dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
    396  1.1  dyoung {
    397  1.1  dyoung 	if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
    398  1.1  dyoung 		return v6 - ctl->base.v6;
    399  1.1  dyoung 
    400  1.1  dyoung 	KASSERT(0 && "vtw out of bounds");
    401  1.1  dyoung 
    402  1.1  dyoung 	return ~0;
    403  1.1  dyoung }
    404  1.1  dyoung 
    405  1.1  dyoung static inline uint32_t
    406  1.1  dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
    407  1.1  dyoung {
    408  1.1  dyoung 	if (ctl->clidx)
    409  1.1  dyoung 		ctl = ctl->ctl;
    410  1.1  dyoung 
    411  1.1  dyoung 	if (ctl->is_v4)
    412  1.1  dyoung 		return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
    413  1.1  dyoung 
    414  1.1  dyoung 	if (ctl->is_v6)
    415  1.1  dyoung 		return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
    416  1.1  dyoung 
    417  1.1  dyoung 	KASSERT(0 && "neither 4 nor 6.  most curious.");
    418  1.1  dyoung 
    419  1.1  dyoung 	return ~0;
    420  1.1  dyoung }
    421  1.1  dyoung 
    422  1.1  dyoung static inline vtw_t *
    423  1.1  dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
    424  1.1  dyoung {
    425  1.1  dyoung 	if (ctl->clidx)
    426  1.1  dyoung 		ctl = ctl->ctl;
    427  1.1  dyoung 
    428  1.1  dyoung 	/* See if the index looks like it might be an index.
    429  1.1  dyoung 	 * Bits on outside of the valid index bits is a give away.
    430  1.1  dyoung 	 */
    431  1.1  dyoung 	idx = idx_decode(ctl, idx);
    432  1.1  dyoung 
    433  1.1  dyoung 	if (idx == ~0) {
    434  1.1  dyoung 		return 0;
    435  1.1  dyoung 	} else if (ctl->is_v4) {
    436  1.1  dyoung 		vtw_v4_t	*vtw = ctl->base.v4 + idx;
    437  1.1  dyoung 
    438  1.1  dyoung 		return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
    439  1.1  dyoung 			? &vtw->common : 0;
    440  1.1  dyoung 	} else if (ctl->is_v6) {
    441  1.1  dyoung 		vtw_v6_t	*vtw = ctl->base.v6 + idx;
    442  1.1  dyoung 
    443  1.1  dyoung 		return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
    444  1.1  dyoung 			? &vtw->common : 0;
    445  1.1  dyoung 	} else {
    446  1.1  dyoung 		KASSERT(0 && "badness");
    447  1.1  dyoung 		return 0;
    448  1.1  dyoung 	}
    449  1.1  dyoung }
    450  1.1  dyoung 
    451  1.1  dyoung /*!\brief return the next vtw after this one.
    452  1.1  dyoung  *
    453  1.1  dyoung  * Due to the differing sizes of the entries in differing
    454  1.1  dyoung  * arenas, we have to ensure we ++ the correct pointer type.
    455  1.1  dyoung  *
    456  1.1  dyoung  * Also handles wrap.
    457  1.1  dyoung  */
    458  1.1  dyoung static inline vtw_t *
    459  1.1  dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
    460  1.1  dyoung {
    461  1.1  dyoung 	if (ctl->is_v4) {
    462  1.1  dyoung 		vtw_v4_t	*v4 = (void*)vtw;
    463  1.1  dyoung 
    464  1.1  dyoung 		vtw = &(++v4)->common;
    465  1.1  dyoung 	} else {
    466  1.1  dyoung 		vtw_v6_t	*v6 = (void*)vtw;
    467  1.1  dyoung 
    468  1.1  dyoung 		vtw = &(++v6)->common;
    469  1.1  dyoung 	}
    470  1.1  dyoung 
    471  1.1  dyoung 	if (vtw > ctl->lim.v)
    472  1.1  dyoung 		vtw = ctl->base.v;
    473  1.1  dyoung 
    474  1.1  dyoung 	return vtw;
    475  1.1  dyoung }
    476  1.1  dyoung 
    477  1.1  dyoung /*!\brief	remove entry from FATP hash chains
    478  1.1  dyoung  */
    479  1.1  dyoung static inline void
    480  1.1  dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
    481  1.1  dyoung {
    482  1.1  dyoung 	fatp_ctl_t	*fat	= ctl->fat;
    483  1.1  dyoung 	fatp_t		*fp;
    484  1.1  dyoung 	uint32_t	key = vtw->key;
    485  1.1  dyoung 	uint32_t	tag, slot, idx;
    486  1.1  dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    487  1.1  dyoung 	vtw_v6_t	*v6 = (void*)vtw;
    488  1.1  dyoung 
    489  1.1  dyoung 	if (!vtw->hashed) {
    490  1.1  dyoung 		KASSERT(0 && "unhashed");
    491  1.1  dyoung 		return;
    492  1.1  dyoung 	}
    493  1.1  dyoung 
    494  1.1  dyoung 	if (fat->vtw->is_v4) {
    495  1.1  dyoung 		tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
    496  1.1  dyoung 	} else if (fat->vtw->is_v6) {
    497  1.1  dyoung 		tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
    498  1.1  dyoung 	} else {
    499  1.1  dyoung 		tag = 0;
    500  1.1  dyoung 		KASSERT(0 && "not reached");
    501  1.1  dyoung 	}
    502  1.1  dyoung 
    503  1.1  dyoung 	/* Remove from fat->hash[]
    504  1.1  dyoung 	 */
    505  1.1  dyoung 	slot = fatp_slot_from_key(fat, key);
    506  1.1  dyoung 	fp   = fatp_from_key(fat, key);
    507  1.1  dyoung 	idx  = vtw_index(ctl, vtw);
    508  1.1  dyoung 
    509  1.1  dyoung 	db_trace(KTR_VTW
    510  1.1  dyoung 		 , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
    511  1.1  dyoung 		    , fp->inuse, slot, idx, key, tag));
    512  1.1  dyoung 
    513  1.1  dyoung 	KASSERT(fp->inuse & (1 << slot));
    514  1.1  dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    515  1.1  dyoung 				  ^ fatp_xtra[slot]));
    516  1.1  dyoung 
    517  1.1  dyoung 	if ((fp->inuse & (1 << slot))
    518  1.1  dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    519  1.1  dyoung 				 ^ fatp_xtra[slot])) {
    520  1.1  dyoung 		fp->inuse ^= 1 << slot;
    521  1.1  dyoung 		fp->tag[slot] = 0;
    522  1.1  dyoung 
    523  1.1  dyoung 		/* When we delete entries, we do not compact.  This is
    524  1.1  dyoung 		 * due to temporality.  We add entries, and they
    525  1.1  dyoung 		 * (eventually) expire. Older entries will be further
    526  1.1  dyoung 		 * down the chain.
    527  1.1  dyoung 		 */
    528  1.1  dyoung 		if (!fp->inuse) {
    529  1.1  dyoung 			uint32_t hi = tag & fat->mask;
    530  1.1  dyoung 			fatp_t	*fq = 0;
    531  1.1  dyoung 			fatp_t	*fr = fat->hash[hi];
    532  1.1  dyoung 
    533  1.1  dyoung 			while (fr && fr != fp) {
    534  1.1  dyoung 				fr = fatp_next(fat, fq = fr);
    535  1.1  dyoung 			}
    536  1.1  dyoung 
    537  1.1  dyoung 			if (fr == fp) {
    538  1.1  dyoung 				if (fq) {
    539  1.1  dyoung 					fq->nxt = fp->nxt;
    540  1.1  dyoung 					fp->nxt = 0;
    541  1.1  dyoung 					fatp_free(fat, fp);
    542  1.1  dyoung 				} else {
    543  1.1  dyoung 					KASSERT(fat->hash[hi] == fp);
    544  1.1  dyoung 
    545  1.1  dyoung 					if (fp->nxt) {
    546  1.1  dyoung 						fat->hash[hi]
    547  1.1  dyoung 							= fatp_next(fat, fp);
    548  1.1  dyoung 						fp->nxt = 0;
    549  1.1  dyoung 						fatp_free(fat, fp);
    550  1.1  dyoung 					} else {
    551  1.1  dyoung 						/* retain for next use.
    552  1.1  dyoung 						 */
    553  1.1  dyoung 						;
    554  1.1  dyoung 					}
    555  1.1  dyoung 				}
    556  1.1  dyoung 			} else {
    557  1.1  dyoung 				fr = fat->hash[hi];
    558  1.1  dyoung 
    559  1.1  dyoung 				do {
    560  1.1  dyoung 					db_trace(KTR_VTW
    561  1.1  dyoung 						 , (fr
    562  1.1  dyoung 						    , "fat:*del inuse %5.5x"
    563  1.1  dyoung 						    " nxt %x"
    564  1.1  dyoung 						    , fr->inuse, fr->nxt));
    565  1.1  dyoung 
    566  1.1  dyoung 					fr = fatp_next(fat, fq = fr);
    567  1.1  dyoung 				} while (fr && fr != fp);
    568  1.1  dyoung 
    569  1.1  dyoung 				KASSERT(0 && "oops");
    570  1.1  dyoung 			}
    571  1.1  dyoung 		}
    572  1.1  dyoung 		vtw->key ^= ~0;
    573  1.1  dyoung 	}
    574  1.1  dyoung 
    575  1.1  dyoung 	if (fat->vtw->is_v4) {
    576  1.1  dyoung 		tag = v4_port_tag(v4->lport);
    577  1.1  dyoung 	} else if (fat->vtw->is_v6) {
    578  1.1  dyoung 		tag = v6_port_tag(v6->lport);
    579  1.1  dyoung 	}
    580  1.1  dyoung 
    581  1.1  dyoung 	/* Remove from fat->port[]
    582  1.1  dyoung 	 */
    583  1.1  dyoung 	key  = vtw->port_key;
    584  1.1  dyoung 	slot = fatp_slot_from_key(fat, key);
    585  1.1  dyoung 	fp   = fatp_from_key(fat, key);
    586  1.1  dyoung 	idx  = vtw_index(ctl, vtw);
    587  1.1  dyoung 
    588  1.1  dyoung 	db_trace(KTR_VTW
    589  1.1  dyoung 		 , (fp, "fatport: del inuse %5.5x"
    590  1.1  dyoung 		    " slot %x idx %x key %x tag %x"
    591  1.1  dyoung 		    , fp->inuse, slot, idx, key, tag));
    592  1.1  dyoung 
    593  1.1  dyoung 	KASSERT(fp->inuse & (1 << slot));
    594  1.1  dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    595  1.1  dyoung 				  ^ fatp_xtra[slot]));
    596  1.1  dyoung 
    597  1.1  dyoung 	if ((fp->inuse & (1 << slot))
    598  1.1  dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    599  1.1  dyoung 				 ^ fatp_xtra[slot])) {
    600  1.1  dyoung 		fp->inuse ^= 1 << slot;
    601  1.1  dyoung 		fp->tag[slot] = 0;
    602  1.1  dyoung 
    603  1.1  dyoung 		if (!fp->inuse) {
    604  1.1  dyoung 			uint32_t hi = tag & fat->mask;
    605  1.1  dyoung 			fatp_t	*fq = 0;
    606  1.1  dyoung 			fatp_t	*fr = fat->port[hi];
    607  1.1  dyoung 
    608  1.1  dyoung 			while (fr && fr != fp) {
    609  1.1  dyoung 				fr = fatp_next(fat, fq = fr);
    610  1.1  dyoung 			}
    611  1.1  dyoung 
    612  1.1  dyoung 			if (fr == fp) {
    613  1.1  dyoung 				if (fq) {
    614  1.1  dyoung 					fq->nxt = fp->nxt;
    615  1.1  dyoung 					fp->nxt = 0;
    616  1.1  dyoung 					fatp_free(fat, fp);
    617  1.1  dyoung 				} else {
    618  1.1  dyoung 					KASSERT(fat->port[hi] == fp);
    619  1.1  dyoung 
    620  1.1  dyoung 					if (fp->nxt) {
    621  1.1  dyoung 						fat->port[hi]
    622  1.1  dyoung 							= fatp_next(fat, fp);
    623  1.1  dyoung 						fp->nxt = 0;
    624  1.1  dyoung 						fatp_free(fat, fp);
    625  1.1  dyoung 					} else {
    626  1.1  dyoung 						/* retain for next use.
    627  1.1  dyoung 						 */
    628  1.1  dyoung 						;
    629  1.1  dyoung 					}
    630  1.1  dyoung 				}
    631  1.1  dyoung 			}
    632  1.1  dyoung 		}
    633  1.1  dyoung 		vtw->port_key ^= ~0;
    634  1.1  dyoung 	}
    635  1.1  dyoung 
    636  1.1  dyoung 	vtw->hashed = 0;
    637  1.1  dyoung }
    638  1.1  dyoung 
    639  1.1  dyoung /*!\brief	remove entry from hash, possibly free.
    640  1.1  dyoung  */
    641  1.1  dyoung void
    642  1.1  dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
    643  1.1  dyoung {
    644  1.1  dyoung 	KASSERT(mutex_owned(softnet_lock));
    645  1.1  dyoung 
    646  1.1  dyoung 	if (vtw->hashed) {
    647  1.1  dyoung 		++vtw_stats.del;
    648  1.1  dyoung 		vtw_unhash(ctl, vtw);
    649  1.1  dyoung 	}
    650  1.1  dyoung 
    651  1.1  dyoung 	/* We only delete the oldest entry.
    652  1.1  dyoung 	 */
    653  1.1  dyoung 	if (vtw != ctl->oldest.v)
    654  1.1  dyoung 		return;
    655  1.1  dyoung 
    656  1.1  dyoung 	--ctl->nalloc;
    657  1.1  dyoung 	++ctl->nfree;
    658  1.1  dyoung 
    659  1.1  dyoung 	vtw->expire.tv_sec  = 0;
    660  1.1  dyoung 	vtw->expire.tv_usec = ~0;
    661  1.1  dyoung 
    662  1.1  dyoung 	if (!ctl->nalloc)
    663  1.1  dyoung 		ctl->oldest.v = 0;
    664  1.1  dyoung 
    665  1.1  dyoung 	ctl->oldest.v = vtw_next(ctl, vtw);
    666  1.1  dyoung }
    667  1.1  dyoung 
    668  1.1  dyoung /*!\brief	insert vestigeal timewait in hash chain
    669  1.1  dyoung  */
    670  1.1  dyoung static void
    671  1.1  dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
    672  1.1  dyoung {
    673  1.1  dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    674  1.1  dyoung 	uint32_t	tag;
    675  1.1  dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    676  1.1  dyoung 
    677  1.1  dyoung 	KASSERT(mutex_owned(softnet_lock));
    678  1.1  dyoung 	KASSERT(!vtw->hashed);
    679  1.1  dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    680  1.1  dyoung 
    681  1.1  dyoung 	++vtw_stats.ins;
    682  1.1  dyoung 
    683  1.1  dyoung 	tag = v4_tag(v4->faddr, v4->fport,
    684  1.1  dyoung 		     v4->laddr, v4->lport);
    685  1.1  dyoung 
    686  1.1  dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    687  1.1  dyoung 
    688  1.1  dyoung 	db_trace(KTR_VTW, (ctl
    689  1.1  dyoung 			   , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
    690  1.1  dyoung 			   " tag %8.8x key %8.8x"
    691  1.1  dyoung 			   , v4->faddr, v4->fport
    692  1.1  dyoung 			   , v4->laddr, v4->lport
    693  1.1  dyoung 			   , tag
    694  1.1  dyoung 			   , vtw->key));
    695  1.1  dyoung 
    696  1.1  dyoung 	tag = v4_port_tag(v4->lport);
    697  1.1  dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    698  1.1  dyoung 
    699  1.1  dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    700  1.1  dyoung 			   , v4->lport, v4->lport
    701  1.1  dyoung 			   , tag
    702  1.1  dyoung 			   , vtw->key));
    703  1.1  dyoung 
    704  1.1  dyoung 	vtw->hashed = 1;
    705  1.1  dyoung }
    706  1.1  dyoung 
    707  1.1  dyoung /*!\brief	insert vestigeal timewait in hash chain
    708  1.1  dyoung  */
    709  1.1  dyoung static void
    710  1.1  dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
    711  1.1  dyoung {
    712  1.1  dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    713  1.1  dyoung 	uint32_t	tag;
    714  1.1  dyoung 	vtw_v6_t	*v6	= (void*)vtw;
    715  1.1  dyoung 
    716  1.1  dyoung 	KASSERT(mutex_owned(softnet_lock));
    717  1.1  dyoung 	KASSERT(!vtw->hashed);
    718  1.1  dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    719  1.1  dyoung 
    720  1.1  dyoung 	++vtw_stats.ins;
    721  1.1  dyoung 
    722  1.1  dyoung 	tag = v6_tag(&v6->faddr, v6->fport,
    723  1.1  dyoung 		     &v6->laddr, v6->lport);
    724  1.1  dyoung 
    725  1.1  dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    726  1.1  dyoung 
    727  1.1  dyoung 	tag = v6_port_tag(v6->lport);
    728  1.1  dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    729  1.1  dyoung 
    730  1.1  dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    731  1.1  dyoung 			   , v6->lport, v6->lport
    732  1.1  dyoung 			   , tag
    733  1.1  dyoung 			   , vtw->key));
    734  1.1  dyoung 
    735  1.1  dyoung 	vtw->hashed = 1;
    736  1.1  dyoung }
    737  1.1  dyoung 
    738  1.1  dyoung static vtw_t *
    739  1.1  dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
    740  1.1  dyoung 				 , uint32_t laddr, uint16_t lport
    741  1.1  dyoung 				 , int which)
    742  1.1  dyoung {
    743  1.1  dyoung 	vtw_v4_t	*v4;
    744  1.1  dyoung 	vtw_t		*vtw;
    745  1.1  dyoung 	uint32_t	tag;
    746  1.1  dyoung 	fatp_t		*fp;
    747  1.1  dyoung 	int		i;
    748  1.1  dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    749  1.1  dyoung 
    750  1.1  dyoung 	if (!ctl || !ctl->fat)
    751  1.1  dyoung 		return 0;
    752  1.1  dyoung 
    753  1.1  dyoung 	++vtw_stats.look[which];
    754  1.1  dyoung 
    755  1.1  dyoung 	if (which) {
    756  1.1  dyoung 		tag = v4_port_tag(lport);
    757  1.1  dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    758  1.1  dyoung 	} else {
    759  1.1  dyoung 		tag = v4_tag(faddr, fport, laddr, lport);
    760  1.1  dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    761  1.1  dyoung 	}
    762  1.1  dyoung 
    763  1.1  dyoung 	while (fp && fp->inuse) {
    764  1.1  dyoung 		uint32_t	inuse = fp->inuse;
    765  1.1  dyoung 
    766  1.1  dyoung 		++fatps;
    767  1.1  dyoung 
    768  1.1  dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    769  1.1  dyoung 			uint32_t	idx;
    770  1.1  dyoung 
    771  1.1  dyoung 			if (!(inuse & (1 << i)))
    772  1.1  dyoung 				continue;
    773  1.1  dyoung 
    774  1.1  dyoung 			inuse ^= 1 << i;
    775  1.1  dyoung 
    776  1.1  dyoung 			++probes;
    777  1.1  dyoung 			++vtw_stats.probe[which];
    778  1.1  dyoung 
    779  1.1  dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    780  1.1  dyoung 			vtw = vtw_from_index(ctl, idx);
    781  1.1  dyoung 
    782  1.1  dyoung 			if (!vtw) {
    783  1.1  dyoung 				/* Hopefully fast path.
    784  1.1  dyoung 				 */
    785  1.1  dyoung 				db_trace(KTR_VTW
    786  1.1  dyoung 					 , (fp, "vtw: fast %A:%P %A:%P"
    787  1.1  dyoung 					    " idx %x tag %x"
    788  1.1  dyoung 					    , faddr, fport
    789  1.1  dyoung 					    , laddr, lport
    790  1.1  dyoung 					    , idx, tag));
    791  1.1  dyoung 				continue;
    792  1.1  dyoung 			}
    793  1.1  dyoung 
    794  1.1  dyoung 			v4 = (void*)vtw;
    795  1.1  dyoung 
    796  1.1  dyoung 			/* The de-referencing of vtw is what we want to avoid.
    797  1.1  dyoung 			 * Losing.
    798  1.1  dyoung 			 */
    799  1.1  dyoung 			if (vtw_alive(vtw)
    800  1.1  dyoung 			    && ((which ? vtw->port_key : vtw->key)
    801  1.1  dyoung 				== fatp_key(ctl->fat, fp, i))
    802  1.1  dyoung 			    && (which
    803  1.1  dyoung 				|| (v4->faddr == faddr && v4->laddr == laddr
    804  1.1  dyoung 				    && v4->fport == fport))
    805  1.1  dyoung 			    && v4->lport == lport) {
    806  1.1  dyoung 				++vtw_stats.hit[which];
    807  1.1  dyoung 
    808  1.1  dyoung 				db_trace(KTR_VTW
    809  1.1  dyoung 					 , (fp, "vtw: hit %8.8x:%4.4x"
    810  1.1  dyoung 					    " %8.8x:%4.4x idx %x key %x"
    811  1.1  dyoung 					    , faddr, fport
    812  1.1  dyoung 					    , laddr, lport
    813  1.1  dyoung 					    , idx_decode(ctl, idx), vtw->key));
    814  1.1  dyoung 
    815  1.1  dyoung 				KASSERT(vtw->hashed);
    816  1.1  dyoung 
    817  1.1  dyoung 				goto out;
    818  1.1  dyoung 			}
    819  1.1  dyoung 			++vtw_stats.losing[which];
    820  1.1  dyoung 			++losings;
    821  1.1  dyoung 
    822  1.1  dyoung 			if (vtw_alive(vtw)) {
    823  1.1  dyoung 				db_trace(KTR_VTW
    824  1.1  dyoung 					 , (fp, "vtw:!mis %8.8x:%4.4x"
    825  1.1  dyoung 					    " %8.8x:%4.4x key %x tag %x"
    826  1.1  dyoung 					    , faddr, fport
    827  1.1  dyoung 					    , laddr, lport
    828  1.1  dyoung 					    , fatp_key(ctl->fat, fp, i)
    829  1.1  dyoung 					    , v4_tag(faddr, fport
    830  1.1  dyoung 						     , laddr, lport)));
    831  1.1  dyoung 				db_trace(KTR_VTW
    832  1.1  dyoung 					 , (vtw, "vtw:!mis %8.8x:%4.4x"
    833  1.1  dyoung 					    " %8.8x:%4.4x key %x tag %x"
    834  1.1  dyoung 					    , v4->faddr, v4->fport
    835  1.1  dyoung 					    , v4->laddr, v4->lport
    836  1.1  dyoung 					    , vtw->key
    837  1.1  dyoung 					    , v4_tag(v4->faddr, v4->fport
    838  1.1  dyoung 						     , v4->laddr, v4->lport)));
    839  1.1  dyoung 
    840  1.1  dyoung 				if (vtw->key == fatp_key(ctl->fat, fp, i)) {
    841  1.1  dyoung 					db_trace(KTR_VTW
    842  1.1  dyoung 						 , (vtw, "vtw:!mis %8.8x:%4.4x"
    843  1.1  dyoung 						    " %8.8x:%4.4x key %x"
    844  1.1  dyoung 						    " which %x"
    845  1.1  dyoung 						    , v4->faddr, v4->fport
    846  1.1  dyoung 						    , v4->laddr, v4->lport
    847  1.1  dyoung 						    , vtw->key
    848  1.1  dyoung 						    , which));
    849  1.1  dyoung 
    850  1.1  dyoung 				} else {
    851  1.1  dyoung 					db_trace(KTR_VTW
    852  1.1  dyoung 						 , (vtw
    853  1.1  dyoung 						    , "vtw:!mis"
    854  1.1  dyoung 						    " key %8.8x != %8.8x"
    855  1.1  dyoung 						    " idx %x i %x which %x"
    856  1.1  dyoung 						    , vtw->key
    857  1.1  dyoung 						    , fatp_key(ctl->fat, fp, i)
    858  1.1  dyoung 						    , idx_decode(ctl, idx)
    859  1.1  dyoung 						    , i
    860  1.1  dyoung 						    , which));
    861  1.1  dyoung 				}
    862  1.1  dyoung 			} else {
    863  1.1  dyoung 				db_trace(KTR_VTW
    864  1.1  dyoung 					 , (fp
    865  1.1  dyoung 					    , "vtw:!mis free entry"
    866  1.1  dyoung 					    " idx %x vtw %p which %x"
    867  1.1  dyoung 					    , idx_decode(ctl, idx)
    868  1.1  dyoung 					    , vtw, which));
    869  1.1  dyoung 			}
    870  1.1  dyoung 		}
    871  1.1  dyoung 
    872  1.1  dyoung 		if (fp->nxt) {
    873  1.1  dyoung 			fp = fatp_next(ctl->fat, fp);
    874  1.1  dyoung 		} else {
    875  1.1  dyoung 			break;
    876  1.1  dyoung 		}
    877  1.1  dyoung 	}
    878  1.1  dyoung 	++vtw_stats.miss[which];
    879  1.1  dyoung 	vtw = 0;
    880  1.1  dyoung out:
    881  1.1  dyoung 	if (fatps > vtw_stats.max_chain[which])
    882  1.1  dyoung 		vtw_stats.max_chain[which] = fatps;
    883  1.1  dyoung 	if (probes > vtw_stats.max_probe[which])
    884  1.1  dyoung 		vtw_stats.max_probe[which] = probes;
    885  1.1  dyoung 	if (losings > vtw_stats.max_loss[which])
    886  1.1  dyoung 		vtw_stats.max_loss[which] = losings;
    887  1.1  dyoung 
    888  1.1  dyoung 	return vtw;
    889  1.1  dyoung }
    890  1.1  dyoung 
    891  1.1  dyoung static vtw_t *
    892  1.1  dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
    893  1.1  dyoung 				 , const struct in6_addr *laddr, uint16_t lport
    894  1.1  dyoung 				 , int which)
    895  1.1  dyoung {
    896  1.1  dyoung 	vtw_v6_t	*v6;
    897  1.1  dyoung 	vtw_t		*vtw;
    898  1.1  dyoung 	uint32_t	tag;
    899  1.1  dyoung 	fatp_t		*fp;
    900  1.1  dyoung 	int		i;
    901  1.1  dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    902  1.1  dyoung 
    903  1.1  dyoung 	++vtw_stats.look[which];
    904  1.1  dyoung 
    905  1.1  dyoung 	if (!ctl || !ctl->fat)
    906  1.1  dyoung 		return 0;
    907  1.1  dyoung 
    908  1.1  dyoung 	if (which) {
    909  1.1  dyoung 		tag = v6_port_tag(lport);
    910  1.1  dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    911  1.1  dyoung 	} else {
    912  1.1  dyoung 		tag = v6_tag(faddr, fport, laddr, lport);
    913  1.1  dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    914  1.1  dyoung 	}
    915  1.1  dyoung 
    916  1.1  dyoung 	while (fp && fp->inuse) {
    917  1.1  dyoung 		uint32_t	inuse = fp->inuse;
    918  1.1  dyoung 
    919  1.1  dyoung 		++fatps;
    920  1.1  dyoung 
    921  1.1  dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    922  1.1  dyoung 			uint32_t	idx;
    923  1.1  dyoung 
    924  1.1  dyoung 			if (!(inuse & (1 << i)))
    925  1.1  dyoung 				continue;
    926  1.1  dyoung 
    927  1.1  dyoung 			inuse ^= 1 << i;
    928  1.1  dyoung 
    929  1.1  dyoung 			++probes;
    930  1.1  dyoung 			++vtw_stats.probe[which];
    931  1.1  dyoung 
    932  1.1  dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    933  1.1  dyoung 			vtw = vtw_from_index(ctl, idx);
    934  1.1  dyoung 
    935  1.1  dyoung 			db_trace(KTR_VTW
    936  1.1  dyoung 				 , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
    937  1.1  dyoung 				    , i
    938  1.1  dyoung 				    , db_store(faddr, sizeof (*faddr)), fport
    939  1.1  dyoung 				    , db_store(laddr, sizeof (*laddr)), lport
    940  1.1  dyoung 				    , idx_decode(ctl, idx)));
    941  1.1  dyoung 
    942  1.1  dyoung 			if (!vtw) {
    943  1.1  dyoung 				/* Hopefully fast path.
    944  1.1  dyoung 				 */
    945  1.1  dyoung 				continue;
    946  1.1  dyoung 			}
    947  1.1  dyoung 
    948  1.1  dyoung 			v6 = (void*)vtw;
    949  1.1  dyoung 
    950  1.1  dyoung 			if (vtw_alive(vtw)
    951  1.1  dyoung 			    && ((which ? vtw->port_key : vtw->key)
    952  1.1  dyoung 				== fatp_key(ctl->fat, fp, i))
    953  1.1  dyoung 			    && v6->lport == lport
    954  1.1  dyoung 			    && (which
    955  1.1  dyoung 				|| (v6->fport == fport
    956  1.1  dyoung 				    && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
    957  1.1  dyoung 				    && !bcmp(&v6->laddr, laddr
    958  1.1  dyoung 					     , sizeof (*laddr))))) {
    959  1.1  dyoung 				++vtw_stats.hit[which];
    960  1.1  dyoung 
    961  1.1  dyoung 				KASSERT(vtw->hashed);
    962  1.1  dyoung 				goto out;
    963  1.1  dyoung 			} else {
    964  1.1  dyoung 				++vtw_stats.losing[which];
    965  1.1  dyoung 				++losings;
    966  1.1  dyoung 			}
    967  1.1  dyoung 		}
    968  1.1  dyoung 
    969  1.1  dyoung 		if (fp->nxt) {
    970  1.1  dyoung 			fp = fatp_next(ctl->fat, fp);
    971  1.1  dyoung 		} else {
    972  1.1  dyoung 			break;
    973  1.1  dyoung 		}
    974  1.1  dyoung 	}
    975  1.1  dyoung 	++vtw_stats.miss[which];
    976  1.1  dyoung 	vtw = 0;
    977  1.1  dyoung out:
    978  1.1  dyoung 	if (fatps > vtw_stats.max_chain[which])
    979  1.1  dyoung 		vtw_stats.max_chain[which] = fatps;
    980  1.1  dyoung 	if (probes > vtw_stats.max_probe[which])
    981  1.1  dyoung 		vtw_stats.max_probe[which] = probes;
    982  1.1  dyoung 	if (losings > vtw_stats.max_loss[which])
    983  1.1  dyoung 		vtw_stats.max_loss[which] = losings;
    984  1.1  dyoung 
    985  1.1  dyoung 	return vtw;
    986  1.1  dyoung }
    987  1.1  dyoung 
    988  1.1  dyoung /*!\brief port iterator
    989  1.1  dyoung  */
    990  1.1  dyoung static vtw_t *
    991  1.1  dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
    992  1.1  dyoung {
    993  1.1  dyoung 	vtw_ctl_t	*ctl = it->ctl;
    994  1.1  dyoung 	vtw_v4_t	*v4;
    995  1.1  dyoung 	vtw_t		*vtw;
    996  1.1  dyoung 	uint32_t	tag;
    997  1.1  dyoung 	uint16_t	lport = it->port;
    998  1.1  dyoung 	fatp_t		*fp;
    999  1.1  dyoung 	int		i;
   1000  1.1  dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1001  1.1  dyoung 
   1002  1.1  dyoung 	tag = v4_port_tag(lport);
   1003  1.1  dyoung 	if (!it->fp) {
   1004  1.1  dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1005  1.1  dyoung 		it->slot_idx = 0;
   1006  1.1  dyoung 	}
   1007  1.1  dyoung 	fp  = it->fp;
   1008  1.1  dyoung 
   1009  1.1  dyoung 	while (fp) {
   1010  1.1  dyoung 		uint32_t	inuse = fp->inuse;
   1011  1.1  dyoung 
   1012  1.1  dyoung 		++fatps;
   1013  1.1  dyoung 
   1014  1.1  dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1015  1.1  dyoung 			uint32_t	idx;
   1016  1.1  dyoung 
   1017  1.1  dyoung 			if (!(inuse & (1 << i)))
   1018  1.1  dyoung 				continue;
   1019  1.1  dyoung 
   1020  1.1  dyoung 			inuse &= ~0 << i;
   1021  1.1  dyoung 
   1022  1.1  dyoung 			if (i < it->slot_idx)
   1023  1.1  dyoung 				continue;
   1024  1.1  dyoung 
   1025  1.1  dyoung 			++vtw_stats.probe[1];
   1026  1.1  dyoung 			++probes;
   1027  1.1  dyoung 
   1028  1.1  dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1029  1.1  dyoung 			vtw = vtw_from_index(ctl, idx);
   1030  1.1  dyoung 
   1031  1.1  dyoung 			if (!vtw) {
   1032  1.1  dyoung 				/* Hopefully fast path.
   1033  1.1  dyoung 				 */
   1034  1.1  dyoung 				continue;
   1035  1.1  dyoung 			}
   1036  1.1  dyoung 
   1037  1.1  dyoung 			v4 = (void*)vtw;
   1038  1.1  dyoung 
   1039  1.1  dyoung 			if (vtw_alive(vtw)
   1040  1.1  dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1041  1.1  dyoung 			    && v4->lport == lport) {
   1042  1.1  dyoung 				++vtw_stats.hit[1];
   1043  1.1  dyoung 
   1044  1.1  dyoung 				it->slot_idx = i + 1;
   1045  1.1  dyoung 
   1046  1.1  dyoung 				goto out;
   1047  1.1  dyoung 			} else if (vtw_alive(vtw)) {
   1048  1.1  dyoung 				++vtw_stats.losing[1];
   1049  1.1  dyoung 				++losings;
   1050  1.1  dyoung 
   1051  1.1  dyoung 				db_trace(KTR_VTW
   1052  1.1  dyoung 					 , (vtw, "vtw:!mis"
   1053  1.1  dyoung 					    " port %8.8x:%4.4x %8.8x:%4.4x"
   1054  1.1  dyoung 					    " key %x port %x"
   1055  1.1  dyoung 					    , v4->faddr, v4->fport
   1056  1.1  dyoung 					    , v4->laddr, v4->lport
   1057  1.1  dyoung 					    , vtw->key
   1058  1.1  dyoung 					    , lport));
   1059  1.1  dyoung 			} else {
   1060  1.1  dyoung 				/* Really losing here.  We are coming
   1061  1.1  dyoung 				 * up with references to free entries.
   1062  1.1  dyoung 				 * Might find it better to use
   1063  1.1  dyoung 				 * traditional, or need another
   1064  1.1  dyoung 				 * add-hockery.  The other add-hockery
   1065  1.1  dyoung 				 * would be to pul more into into the
   1066  1.1  dyoung 				 * cache line to reject the false
   1067  1.1  dyoung 				 * hits.
   1068  1.1  dyoung 				 */
   1069  1.1  dyoung 				++vtw_stats.losing[1];
   1070  1.1  dyoung 				++losings;
   1071  1.1  dyoung 				db_trace(KTR_VTW
   1072  1.1  dyoung 					 , (fp, "vtw:!mis port %x"
   1073  1.1  dyoung 					    " - free entry idx %x vtw %p"
   1074  1.1  dyoung 					    , lport
   1075  1.1  dyoung 					    , idx_decode(ctl, idx)
   1076  1.1  dyoung 					    , vtw));
   1077  1.1  dyoung 			}
   1078  1.1  dyoung 		}
   1079  1.1  dyoung 
   1080  1.1  dyoung 		if (fp->nxt) {
   1081  1.1  dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1082  1.1  dyoung 			it->slot_idx = 0;
   1083  1.1  dyoung 		} else {
   1084  1.1  dyoung 			it->fp = 0;
   1085  1.1  dyoung 			break;
   1086  1.1  dyoung 		}
   1087  1.1  dyoung 	}
   1088  1.1  dyoung 	++vtw_stats.miss[1];
   1089  1.1  dyoung 
   1090  1.1  dyoung 	vtw = 0;
   1091  1.1  dyoung out:
   1092  1.1  dyoung 	if (fatps > vtw_stats.max_chain[1])
   1093  1.1  dyoung 		vtw_stats.max_chain[1] = fatps;
   1094  1.1  dyoung 	if (probes > vtw_stats.max_probe[1])
   1095  1.1  dyoung 		vtw_stats.max_probe[1] = probes;
   1096  1.1  dyoung 	if (losings > vtw_stats.max_loss[1])
   1097  1.1  dyoung 		vtw_stats.max_loss[1] = losings;
   1098  1.1  dyoung 
   1099  1.1  dyoung 	return vtw;
   1100  1.1  dyoung }
   1101  1.1  dyoung 
   1102  1.1  dyoung /*!\brief port iterator
   1103  1.1  dyoung  */
   1104  1.1  dyoung static vtw_t *
   1105  1.1  dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
   1106  1.1  dyoung {
   1107  1.1  dyoung 	vtw_ctl_t	*ctl = it->ctl;
   1108  1.1  dyoung 	vtw_v6_t	*v6;
   1109  1.1  dyoung 	vtw_t		*vtw;
   1110  1.1  dyoung 	uint32_t	tag;
   1111  1.1  dyoung 	uint16_t	lport = it->port;
   1112  1.1  dyoung 	fatp_t		*fp;
   1113  1.1  dyoung 	int		i;
   1114  1.1  dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1115  1.1  dyoung 
   1116  1.1  dyoung 	tag = v6_port_tag(lport);
   1117  1.1  dyoung 	if (!it->fp) {
   1118  1.1  dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1119  1.1  dyoung 		it->slot_idx = 0;
   1120  1.1  dyoung 	}
   1121  1.1  dyoung 	fp  = it->fp;
   1122  1.1  dyoung 
   1123  1.1  dyoung 	while (fp) {
   1124  1.1  dyoung 		uint32_t	inuse = fp->inuse;
   1125  1.1  dyoung 
   1126  1.1  dyoung 		++fatps;
   1127  1.1  dyoung 
   1128  1.1  dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1129  1.1  dyoung 			uint32_t	idx;
   1130  1.1  dyoung 
   1131  1.1  dyoung 			if (!(inuse & (1 << i)))
   1132  1.1  dyoung 				continue;
   1133  1.1  dyoung 
   1134  1.1  dyoung 			inuse &= ~0 << i;
   1135  1.1  dyoung 
   1136  1.1  dyoung 			if (i < it->slot_idx)
   1137  1.1  dyoung 				continue;
   1138  1.1  dyoung 
   1139  1.1  dyoung 			++vtw_stats.probe[1];
   1140  1.1  dyoung 			++probes;
   1141  1.1  dyoung 
   1142  1.1  dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1143  1.1  dyoung 			vtw = vtw_from_index(ctl, idx);
   1144  1.1  dyoung 
   1145  1.1  dyoung 			if (!vtw) {
   1146  1.1  dyoung 				/* Hopefully fast path.
   1147  1.1  dyoung 				 */
   1148  1.1  dyoung 				continue;
   1149  1.1  dyoung 			}
   1150  1.1  dyoung 
   1151  1.1  dyoung 			v6 = (void*)vtw;
   1152  1.1  dyoung 
   1153  1.1  dyoung 			db_trace(KTR_VTW
   1154  1.1  dyoung 				 , (vtw, "vtw: i %x idx %x fp->tag %x"
   1155  1.1  dyoung 				    " tag %x xtra %x"
   1156  1.1  dyoung 				    , i, idx_decode(ctl, idx)
   1157  1.1  dyoung 				    , fp->tag[i], tag, fatp_xtra[i]));
   1158  1.1  dyoung 
   1159  1.1  dyoung 			if (vtw_alive(vtw)
   1160  1.1  dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1161  1.1  dyoung 			    && v6->lport == lport) {
   1162  1.1  dyoung 				++vtw_stats.hit[1];
   1163  1.1  dyoung 
   1164  1.1  dyoung 				db_trace(KTR_VTW
   1165  1.1  dyoung 					 , (fp, "vtw: nxt port %P - %4.4x"
   1166  1.1  dyoung 					    " idx %x key %x"
   1167  1.1  dyoung 					    , lport, lport
   1168  1.1  dyoung 					    , idx_decode(ctl, idx), vtw->key));
   1169  1.1  dyoung 
   1170  1.1  dyoung 				it->slot_idx = i + 1;
   1171  1.1  dyoung 				goto out;
   1172  1.1  dyoung 			} else if (vtw_alive(vtw)) {
   1173  1.1  dyoung 				++vtw_stats.losing[1];
   1174  1.1  dyoung 
   1175  1.1  dyoung 				db_trace(KTR_VTW
   1176  1.1  dyoung 					 , (vtw, "vtw:!mis port %6A:%4.4x"
   1177  1.1  dyoung 					    " %6A:%4.4x key %x port %x"
   1178  1.1  dyoung 					    , db_store(&v6->faddr
   1179  1.1  dyoung 						       , sizeof (v6->faddr))
   1180  1.1  dyoung 					    , v6->fport
   1181  1.1  dyoung 					    , db_store(&v6->laddr
   1182  1.1  dyoung 						       , sizeof (v6->faddr))
   1183  1.1  dyoung 					    , v6->lport
   1184  1.1  dyoung 					    , vtw->key
   1185  1.1  dyoung 					    , lport));
   1186  1.1  dyoung 			} else {
   1187  1.1  dyoung 				/* Really losing here.  We are coming
   1188  1.1  dyoung 				 * up with references to free entries.
   1189  1.1  dyoung 				 * Might find it better to use
   1190  1.1  dyoung 				 * traditional, or need another
   1191  1.1  dyoung 				 * add-hockery.  The other add-hockery
   1192  1.1  dyoung 				 * would be to pul more into into the
   1193  1.1  dyoung 				 * cache line to reject the false
   1194  1.1  dyoung 				 * hits.
   1195  1.1  dyoung 				 */
   1196  1.1  dyoung 				++vtw_stats.losing[1];
   1197  1.1  dyoung 				++losings;
   1198  1.1  dyoung 
   1199  1.1  dyoung 				db_trace(KTR_VTW
   1200  1.1  dyoung 					 , (fp
   1201  1.1  dyoung 					    , "vtw:!mis port %x"
   1202  1.1  dyoung 					    " - free entry idx %x vtw %p"
   1203  1.1  dyoung 					    , lport, idx_decode(ctl, idx)
   1204  1.1  dyoung 					    , vtw));
   1205  1.1  dyoung 			}
   1206  1.1  dyoung 		}
   1207  1.1  dyoung 
   1208  1.1  dyoung 		if (fp->nxt) {
   1209  1.1  dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1210  1.1  dyoung 			it->slot_idx = 0;
   1211  1.1  dyoung 		} else {
   1212  1.1  dyoung 			it->fp = 0;
   1213  1.1  dyoung 			break;
   1214  1.1  dyoung 		}
   1215  1.1  dyoung 	}
   1216  1.1  dyoung 	++vtw_stats.miss[1];
   1217  1.1  dyoung 
   1218  1.1  dyoung 	vtw = 0;
   1219  1.1  dyoung out:
   1220  1.1  dyoung 	if (fatps > vtw_stats.max_chain[1])
   1221  1.1  dyoung 		vtw_stats.max_chain[1] = fatps;
   1222  1.1  dyoung 	if (probes > vtw_stats.max_probe[1])
   1223  1.1  dyoung 		vtw_stats.max_probe[1] = probes;
   1224  1.1  dyoung 	if (losings > vtw_stats.max_loss[1])
   1225  1.1  dyoung 		vtw_stats.max_loss[1] = losings;
   1226  1.1  dyoung 
   1227  1.1  dyoung 	return vtw;
   1228  1.1  dyoung }
   1229  1.1  dyoung 
   1230  1.1  dyoung /*!\brief initialise the VTW allocation arena
   1231  1.1  dyoung  *
   1232  1.1  dyoung  * There are 1+3 allocation classes:
   1233  1.1  dyoung  *	0	classless
   1234  1.1  dyoung  *	{1,2,3}	MSL-class based allocation
   1235  1.1  dyoung  *
   1236  1.1  dyoung  * The allocation arenas are all initialised.  Classless gets all the
   1237  1.1  dyoung  * space.  MSL-class based divides the arena, so that allocation
   1238  1.1  dyoung  * within a class can proceed without having to consider entries
   1239  1.1  dyoung  * (aka: cache lines) from different classes.
   1240  1.1  dyoung  *
   1241  1.1  dyoung  * Usually, we are completely classless or class-based, but there can be
   1242  1.1  dyoung  * transition periods, corresponding to dynamic adjustments in the config
   1243  1.1  dyoung  * by the operator.
   1244  1.1  dyoung  */
   1245  1.1  dyoung static void
   1246  1.1  dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, uint32_t n)
   1247  1.1  dyoung {
   1248  1.1  dyoung 	int i;
   1249  1.1  dyoung 	int sz = (ctl->is_v4 ? sizeof (vtw_v4_t) : sizeof (vtw_v6_t));
   1250  1.1  dyoung 
   1251  1.1  dyoung 	ctl->base.v4 = kmem_alloc(n * sz, KM_SLEEP);
   1252  1.1  dyoung 	if (ctl->base.v4) {
   1253  1.1  dyoung 		vtw_t	*base;
   1254  1.1  dyoung 		int	class_n;
   1255  1.1  dyoung 
   1256  1.1  dyoung 		bzero(ctl->base.v4, n * sz);
   1257  1.1  dyoung 
   1258  1.1  dyoung 		if (ctl->is_v4) {
   1259  1.1  dyoung 			ctl->lim.v4    = ctl->base.v4 + n - 1;
   1260  1.1  dyoung 			ctl->alloc.v4  = ctl->base.v4;
   1261  1.1  dyoung 		} else {
   1262  1.1  dyoung 			ctl->lim.v6    = ctl->base.v6 + n - 1;
   1263  1.1  dyoung 			ctl->alloc.v6  = ctl->base.v6;
   1264  1.1  dyoung 		}
   1265  1.1  dyoung 
   1266  1.1  dyoung 		ctl->nfree  = n;
   1267  1.1  dyoung 		ctl->ctl    = ctl;
   1268  1.1  dyoung 
   1269  1.1  dyoung 		ctl->idx_bits = 32;
   1270  1.1  dyoung 		for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
   1271  1.1  dyoung 			ctl->idx_mask >>= 1;
   1272  1.1  dyoung 			ctl->idx_bits  -= 1;
   1273  1.1  dyoung 		}
   1274  1.1  dyoung 
   1275  1.1  dyoung 		ctl->idx_mask <<= 1;
   1276  1.1  dyoung 		ctl->idx_mask  |= 1;
   1277  1.1  dyoung 		ctl->idx_bits  += 1;
   1278  1.1  dyoung 
   1279  1.1  dyoung 		ctl->fat = fat;
   1280  1.1  dyoung 		fat->vtw = ctl;
   1281  1.1  dyoung 
   1282  1.1  dyoung 		/* Divide the resources equally amongst the classes.
   1283  1.1  dyoung 		 * This is not optimal, as the different classes
   1284  1.1  dyoung 		 * arrive and leave at different rates, but it is
   1285  1.1  dyoung 		 * the best I can do for now.
   1286  1.1  dyoung 		 */
   1287  1.1  dyoung 		class_n = n / (VTW_NCLASS-1);
   1288  1.1  dyoung 		base    = ctl->base.v;
   1289  1.1  dyoung 
   1290  1.1  dyoung 		for (i = 1; i < VTW_NCLASS; ++i) {
   1291  1.1  dyoung 			int j;
   1292  1.1  dyoung 
   1293  1.1  dyoung 			ctl[i] = ctl[0];
   1294  1.1  dyoung 			ctl[i].clidx = i;
   1295  1.1  dyoung 
   1296  1.1  dyoung 			ctl[i].base.v = base;
   1297  1.1  dyoung 			ctl[i].alloc  = ctl[i].base;
   1298  1.1  dyoung 
   1299  1.1  dyoung 			for (j = 0; j < class_n - 1; ++j) {
   1300  1.1  dyoung 				if (tcp_msl_enable)
   1301  1.1  dyoung 					base->msl_class = i;
   1302  1.1  dyoung 				base = vtw_next(ctl, base);
   1303  1.1  dyoung 			}
   1304  1.1  dyoung 
   1305  1.1  dyoung 			ctl[i].lim.v = base;
   1306  1.1  dyoung 			base = vtw_next(ctl, base);
   1307  1.1  dyoung 			ctl[i].nfree = class_n;
   1308  1.1  dyoung 		}
   1309  1.1  dyoung 	}
   1310  1.1  dyoung 
   1311  1.1  dyoung 	vtw_debug_init();
   1312  1.1  dyoung }
   1313  1.1  dyoung 
   1314  1.1  dyoung /*!\brief	map class to TCP MSL
   1315  1.1  dyoung  */
   1316  1.1  dyoung static inline uint32_t
   1317  1.1  dyoung class_to_msl(int class)
   1318  1.1  dyoung {
   1319  1.1  dyoung 	switch (class) {
   1320  1.1  dyoung 	case 0:
   1321  1.1  dyoung 	case 1:
   1322  1.1  dyoung 		return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
   1323  1.1  dyoung 	case 2:
   1324  1.1  dyoung 		return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
   1325  1.1  dyoung 	default:
   1326  1.1  dyoung 		return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
   1327  1.1  dyoung 	}
   1328  1.1  dyoung }
   1329  1.1  dyoung 
   1330  1.1  dyoung /*!\brief	map TCP MSL to class
   1331  1.1  dyoung  */
   1332  1.1  dyoung static inline uint32_t
   1333  1.1  dyoung msl_to_class(int msl)
   1334  1.1  dyoung {
   1335  1.1  dyoung 	if (tcp_msl_enable) {
   1336  1.1  dyoung 		if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
   1337  1.1  dyoung 			return 1+2;
   1338  1.1  dyoung 		if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
   1339  1.1  dyoung 			return 1+1;
   1340  1.1  dyoung 		return 1;
   1341  1.1  dyoung 	}
   1342  1.1  dyoung 	return 0;
   1343  1.1  dyoung }
   1344  1.1  dyoung 
   1345  1.1  dyoung /*!\brief allocate a vtw entry
   1346  1.1  dyoung  */
   1347  1.1  dyoung static inline vtw_t *
   1348  1.1  dyoung vtw_alloc(vtw_ctl_t *ctl)
   1349  1.1  dyoung {
   1350  1.1  dyoung 	vtw_t	*vtw	= 0;
   1351  1.1  dyoung 	int	stuck	= 0;
   1352  1.1  dyoung 	int	avail	= ctl ? (ctl->nalloc + ctl->nfree) : 0;
   1353  1.1  dyoung 	int	msl;
   1354  1.1  dyoung 
   1355  1.1  dyoung 	KASSERT(mutex_owned(softnet_lock));
   1356  1.1  dyoung 
   1357  1.1  dyoung 	/* If no resources, we will not get far.
   1358  1.1  dyoung 	 */
   1359  1.1  dyoung 	if (!ctl || !ctl->base.v4 || avail <= 0)
   1360  1.1  dyoung 		return 0;
   1361  1.1  dyoung 
   1362  1.1  dyoung 	/* Obtain a free one.
   1363  1.1  dyoung 	 */
   1364  1.1  dyoung 	while (!ctl->nfree) {
   1365  1.1  dyoung 		vtw_age(ctl, 0);
   1366  1.1  dyoung 
   1367  1.1  dyoung 		if (++stuck > avail) {
   1368  1.1  dyoung 			/* When in transition between
   1369  1.1  dyoung 			 * schemes (classless, classed) we
   1370  1.1  dyoung 			 * can be stuck having to await the
   1371  1.1  dyoung 			 * expiration of cross-allocated entries.
   1372  1.1  dyoung 			 *
   1373  1.1  dyoung 			 * Returning zero means we will fall back to the
   1374  1.1  dyoung 			 * traditional TIME_WAIT handling, except in the
   1375  1.1  dyoung 			 * case of a re-shed, in which case we cannot
   1376  1.1  dyoung 			 * perform the reshecd, but will retain the extant
   1377  1.1  dyoung 			 * entry.
   1378  1.1  dyoung 			 */
   1379  1.1  dyoung 			db_trace(KTR_VTW
   1380  1.1  dyoung 				 , (ctl, "vtw:!none free in class %x %x/%x"
   1381  1.1  dyoung 				    , ctl->clidx
   1382  1.1  dyoung 				    , ctl->nalloc, ctl->nfree));
   1383  1.1  dyoung 
   1384  1.1  dyoung 			return 0;
   1385  1.1  dyoung 		}
   1386  1.1  dyoung 	}
   1387  1.1  dyoung 
   1388  1.1  dyoung 	vtw = ctl->alloc.v;
   1389  1.1  dyoung 
   1390  1.1  dyoung 	if (vtw->msl_class != ctl->clidx) {
   1391  1.1  dyoung 		/* Usurping rules:
   1392  1.1  dyoung 		 * 	0 -> {1,2,3} or {1,2,3} -> 0
   1393  1.1  dyoung 		 */
   1394  1.1  dyoung 		KASSERT(!vtw->msl_class || !ctl->clidx);
   1395  1.1  dyoung 
   1396  1.1  dyoung 		if (vtw->hashed || vtw->expire.tv_sec) {
   1397  1.1  dyoung 		    /* As this is owned by some other class,
   1398  1.1  dyoung 		     * we must wait for it to expire it.
   1399  1.1  dyoung 		     * This will only happen on class/classless
   1400  1.1  dyoung 		     * transitions, which are guaranteed to progress
   1401  1.1  dyoung 		     * to completion in small finite time, barring bugs.
   1402  1.1  dyoung 		     */
   1403  1.1  dyoung 		    db_trace(KTR_VTW
   1404  1.1  dyoung 			     , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
   1405  1.1  dyoung 				, vtw, vtw->msl_class, ctl->clidx
   1406  1.1  dyoung 				, vtw->expire.tv_sec
   1407  1.1  dyoung 				, vtw->expire.tv_usec
   1408  1.1  dyoung 				, vtw->hashed ? " hashed" : ""));
   1409  1.1  dyoung 
   1410  1.1  dyoung 		    return 0;
   1411  1.1  dyoung 		}
   1412  1.1  dyoung 
   1413  1.1  dyoung 		db_trace(KTR_VTW
   1414  1.1  dyoung 			 , (ctl, "vtw:!%p usurped from %x to %x"
   1415  1.1  dyoung 			    , vtw, vtw->msl_class, ctl->clidx));
   1416  1.1  dyoung 
   1417  1.1  dyoung 		vtw->msl_class = ctl->clidx;
   1418  1.1  dyoung 	}
   1419  1.1  dyoung 
   1420  1.1  dyoung 	if (vtw_alive(vtw)) {
   1421  1.1  dyoung 		KASSERT(0 && "next free not free");
   1422  1.1  dyoung 		return 0;
   1423  1.1  dyoung 	}
   1424  1.1  dyoung 
   1425  1.1  dyoung 	/* Advance allocation poiter.
   1426  1.1  dyoung 	 */
   1427  1.1  dyoung 	ctl->alloc.v = vtw_next(ctl, vtw);
   1428  1.1  dyoung 
   1429  1.1  dyoung 	--ctl->nfree;
   1430  1.1  dyoung 	++ctl->nalloc;
   1431  1.1  dyoung 
   1432  1.1  dyoung 	msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ;	// msec
   1433  1.1  dyoung 
   1434  1.1  dyoung 	/* mark expiration
   1435  1.1  dyoung 	 */
   1436  1.1  dyoung 	microtime(&vtw->expire);
   1437  1.1  dyoung 
   1438  1.1  dyoung 	/* Move expiration into the future.
   1439  1.1  dyoung 	 */
   1440  1.1  dyoung 	vtw->expire.tv_sec  += msl / 1000;
   1441  1.1  dyoung 	vtw->expire.tv_usec += 1000 * (msl % 1000);
   1442  1.1  dyoung 
   1443  1.1  dyoung 	while (vtw->expire.tv_usec >= 1000*1000) {
   1444  1.1  dyoung 		vtw->expire.tv_usec -= 1000*1000;
   1445  1.1  dyoung 		vtw->expire.tv_sec  += 1;
   1446  1.1  dyoung 	}
   1447  1.1  dyoung 
   1448  1.1  dyoung 	if (!ctl->oldest.v)
   1449  1.1  dyoung 		ctl->oldest.v = vtw;
   1450  1.1  dyoung 
   1451  1.1  dyoung 	return vtw;
   1452  1.1  dyoung }
   1453  1.1  dyoung 
   1454  1.1  dyoung /*!\brief expiration
   1455  1.1  dyoung  */
   1456  1.1  dyoung static int
   1457  1.1  dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
   1458  1.1  dyoung {
   1459  1.1  dyoung 	vtw_t	*vtw;
   1460  1.1  dyoung 	struct timeval then, *when = _when;
   1461  1.1  dyoung 	int	maxtries = 0;
   1462  1.1  dyoung 
   1463  1.1  dyoung 	if (!ctl->oldest.v) {
   1464  1.1  dyoung 		KASSERT(!ctl->nalloc);
   1465  1.1  dyoung 		return 0;
   1466  1.1  dyoung 	}
   1467  1.1  dyoung 
   1468  1.1  dyoung 	for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
   1469  1.1  dyoung 		if (++maxtries > ctl->nalloc)
   1470  1.1  dyoung 			break;
   1471  1.1  dyoung 
   1472  1.1  dyoung 		if (vtw->msl_class != ctl->clidx) {
   1473  1.1  dyoung 			db_trace(KTR_VTW
   1474  1.1  dyoung 				 , (vtw, "vtw:!age class mismatch %x != %x"
   1475  1.1  dyoung 				    , vtw->msl_class, ctl->clidx));
   1476  1.1  dyoung 			/* XXXX
   1477  1.1  dyoung 			 * See if the appropriate action is to skip to the next.
   1478  1.1  dyoung 			 * XXXX
   1479  1.1  dyoung 			 */
   1480  1.1  dyoung 			ctl->oldest.v = vtw = vtw_next(ctl, vtw);
   1481  1.1  dyoung 			continue;
   1482  1.1  dyoung 		}
   1483  1.1  dyoung 		if (!when) {
   1484  1.1  dyoung 			/* Latch oldest timeval if none specified.
   1485  1.1  dyoung 			 */
   1486  1.1  dyoung 			then = vtw->expire;
   1487  1.1  dyoung 			when = &then;
   1488  1.1  dyoung 		}
   1489  1.1  dyoung 
   1490  1.1  dyoung 		if (!timercmp(&vtw->expire, when, <=))
   1491  1.1  dyoung 			break;
   1492  1.1  dyoung 
   1493  1.1  dyoung 		db_trace(KTR_VTW
   1494  1.1  dyoung 			 , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
   1495  1.1  dyoung 			    , ctl->clidx
   1496  1.1  dyoung 			    , vtw->expire.tv_sec
   1497  1.1  dyoung 			    , vtw->expire.tv_usec
   1498  1.1  dyoung 			    , ctl->nalloc
   1499  1.1  dyoung 			    , ctl->nfree));
   1500  1.1  dyoung 
   1501  1.1  dyoung 		if (!_when)
   1502  1.1  dyoung 			++vtw_stats.kill;
   1503  1.1  dyoung 
   1504  1.1  dyoung 		vtw_del(ctl, vtw);
   1505  1.1  dyoung 		vtw = ctl->oldest.v;
   1506  1.1  dyoung 	}
   1507  1.1  dyoung 
   1508  1.1  dyoung 	return ctl->nalloc;	// # remaining allocated
   1509  1.1  dyoung }
   1510  1.1  dyoung 
   1511  1.1  dyoung static callout_t vtw_cs;
   1512  1.1  dyoung 
   1513  1.1  dyoung /*!\brief notice the passage of time.
   1514  1.1  dyoung  * It seems to be getting faster.  What happened to the year?
   1515  1.1  dyoung  */
   1516  1.1  dyoung static void
   1517  1.1  dyoung vtw_tick(void *arg)
   1518  1.1  dyoung {
   1519  1.1  dyoung 	struct timeval now;
   1520  1.1  dyoung 	int i, cnt = 0;
   1521  1.1  dyoung 
   1522  1.1  dyoung 	microtime(&now);
   1523  1.1  dyoung 
   1524  1.1  dyoung 	db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
   1525  1.1  dyoung 			   , now.tv_sec, now.tv_usec));
   1526  1.1  dyoung 
   1527  1.1  dyoung 	mutex_enter(softnet_lock);
   1528  1.1  dyoung 
   1529  1.1  dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   1530  1.1  dyoung 		cnt += vtw_age(&vtw_tcpv4[i], &now);
   1531  1.1  dyoung 		cnt += vtw_age(&vtw_tcpv6[i], &now);
   1532  1.1  dyoung 	}
   1533  1.1  dyoung 
   1534  1.1  dyoung 	/* Keep ticks coming while we need them.
   1535  1.1  dyoung 	 */
   1536  1.1  dyoung 	if (cnt)
   1537  1.1  dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1538  1.1  dyoung 	else {
   1539  1.1  dyoung 		tcp_vtw_was_enabled = 0;
   1540  1.1  dyoung 		tcbtable.vestige    = 0;
   1541  1.1  dyoung 	}
   1542  1.1  dyoung 	mutex_exit(softnet_lock);
   1543  1.1  dyoung }
   1544  1.1  dyoung 
   1545  1.1  dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1546  1.1  dyoung  */
   1547  1.1  dyoung static void *
   1548  1.1  dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
   1549  1.1  dyoung {
   1550  1.1  dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
   1551  1.1  dyoung 
   1552  1.1  dyoung 	bzero(it, sizeof (*it));
   1553  1.1  dyoung 
   1554  1.1  dyoung 	/* Note: the reference to vtw_tcpv4[0] is fine.
   1555  1.1  dyoung 	 * We do not need per-class iteration.  We just
   1556  1.1  dyoung 	 * need to get to the fat, and there is one
   1557  1.1  dyoung 	 * shared fat.
   1558  1.1  dyoung 	 */
   1559  1.1  dyoung 	if (vtw_tcpv4[0].fat) {
   1560  1.1  dyoung 		it->addr.v4 = addr;
   1561  1.1  dyoung 		it->port = port;
   1562  1.1  dyoung 		it->wild = !!wild;
   1563  1.1  dyoung 		it->ctl  = &vtw_tcpv4[0];
   1564  1.1  dyoung 
   1565  1.1  dyoung 		++vtw_stats.look[1];
   1566  1.1  dyoung 	}
   1567  1.1  dyoung 
   1568  1.1  dyoung 	return it;
   1569  1.1  dyoung }
   1570  1.1  dyoung 
   1571  1.1  dyoung /*!\brief export an IPv4 vtw.
   1572  1.1  dyoung  */
   1573  1.1  dyoung static int
   1574  1.1  dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1575  1.1  dyoung {
   1576  1.1  dyoung 	vtw_v4_t	*v4 = (void*)vtw;
   1577  1.1  dyoung 
   1578  1.1  dyoung 	bzero(res, sizeof (*res));
   1579  1.1  dyoung 
   1580  1.1  dyoung 	if (ctl && vtw) {
   1581  1.1  dyoung 		if (!ctl->clidx && vtw->msl_class)
   1582  1.1  dyoung 			ctl += vtw->msl_class;
   1583  1.1  dyoung 		else
   1584  1.1  dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1585  1.1  dyoung 
   1586  1.1  dyoung 		res->valid = 1;
   1587  1.1  dyoung 		res->v4    = 1;
   1588  1.1  dyoung 
   1589  1.1  dyoung 		res->faddr.v4.s_addr = v4->faddr;
   1590  1.1  dyoung 		res->laddr.v4.s_addr = v4->laddr;
   1591  1.1  dyoung 		res->fport	= v4->fport;
   1592  1.1  dyoung 		res->lport	= v4->lport;
   1593  1.1  dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1594  1.1  dyoung 		res->ctl	= ctl;
   1595  1.1  dyoung 		res->reuse_addr = vtw->reuse_addr;
   1596  1.1  dyoung 		res->reuse_port = vtw->reuse_port;
   1597  1.1  dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1598  1.1  dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1599  1.1  dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1600  1.1  dyoung 		res->uid	= vtw->uid;
   1601  1.1  dyoung 	}
   1602  1.1  dyoung 
   1603  1.1  dyoung 	return res->valid;
   1604  1.1  dyoung }
   1605  1.1  dyoung 
   1606  1.1  dyoung /*!\brief return next port in the port iterator.  yowza.
   1607  1.1  dyoung  */
   1608  1.1  dyoung static int
   1609  1.1  dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
   1610  1.1  dyoung {
   1611  1.1  dyoung 	struct tcp_ports_iterator *it = arg;
   1612  1.1  dyoung 	vtw_t		*vtw = 0;
   1613  1.1  dyoung 
   1614  1.1  dyoung 	if (it->ctl)
   1615  1.1  dyoung 		vtw = vtw_next_port_v4(it);
   1616  1.1  dyoung 
   1617  1.1  dyoung 	if (!vtw)
   1618  1.1  dyoung 		it->ctl = 0;
   1619  1.1  dyoung 
   1620  1.1  dyoung 	return vtw_export_v4(it->ctl, vtw, res);
   1621  1.1  dyoung }
   1622  1.1  dyoung 
   1623  1.1  dyoung static int
   1624  1.1  dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
   1625  1.1  dyoung               struct in_addr laddr, uint16_t lport,
   1626  1.1  dyoung 	      struct vestigial_inpcb *res)
   1627  1.1  dyoung {
   1628  1.1  dyoung 	vtw_t		*vtw;
   1629  1.1  dyoung 	vtw_ctl_t	*ctl;
   1630  1.1  dyoung 
   1631  1.1  dyoung 
   1632  1.1  dyoung 	db_trace(KTR_VTW
   1633  1.1  dyoung 		 , (res, "vtw: lookup %A:%P %A:%P"
   1634  1.1  dyoung 		    , faddr, fport
   1635  1.1  dyoung 		    , laddr, lport));
   1636  1.1  dyoung 
   1637  1.1  dyoung 	vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
   1638  1.1  dyoung 				 , faddr.s_addr, fport
   1639  1.1  dyoung 				 , laddr.s_addr, lport, 0);
   1640  1.1  dyoung 
   1641  1.1  dyoung 	return vtw_export_v4(ctl, vtw, res);
   1642  1.1  dyoung }
   1643  1.1  dyoung 
   1644  1.1  dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1645  1.1  dyoung  */
   1646  1.1  dyoung static void *
   1647  1.1  dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
   1648  1.1  dyoung {
   1649  1.1  dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
   1650  1.1  dyoung 
   1651  1.1  dyoung 	bzero(it, sizeof (*it));
   1652  1.1  dyoung 
   1653  1.1  dyoung 	/* Note: the reference to vtw_tcpv6[0] is fine.
   1654  1.1  dyoung 	 * We do not need per-class iteration.  We just
   1655  1.1  dyoung 	 * need to get to the fat, and there is one
   1656  1.1  dyoung 	 * shared fat.
   1657  1.1  dyoung 	 */
   1658  1.1  dyoung 	if (vtw_tcpv6[0].fat) {
   1659  1.1  dyoung 		it->addr.v6 = *addr;
   1660  1.1  dyoung 		it->port = port;
   1661  1.1  dyoung 		it->wild = !!wild;
   1662  1.1  dyoung 		it->ctl  = &vtw_tcpv6[0];
   1663  1.1  dyoung 
   1664  1.1  dyoung 		++vtw_stats.look[1];
   1665  1.1  dyoung 	}
   1666  1.1  dyoung 
   1667  1.1  dyoung 	return it;
   1668  1.1  dyoung }
   1669  1.1  dyoung 
   1670  1.1  dyoung /*!\brief export an IPv6 vtw.
   1671  1.1  dyoung  */
   1672  1.1  dyoung static int
   1673  1.1  dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1674  1.1  dyoung {
   1675  1.1  dyoung 	vtw_v6_t	*v6 = (void*)vtw;
   1676  1.1  dyoung 
   1677  1.1  dyoung 	bzero(res, sizeof (*res));
   1678  1.1  dyoung 
   1679  1.1  dyoung 	if (ctl && vtw) {
   1680  1.1  dyoung 		if (!ctl->clidx && vtw->msl_class)
   1681  1.1  dyoung 			ctl += vtw->msl_class;
   1682  1.1  dyoung 		else
   1683  1.1  dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1684  1.1  dyoung 
   1685  1.1  dyoung 		res->valid = 1;
   1686  1.1  dyoung 		res->v4    = 0;
   1687  1.1  dyoung 
   1688  1.1  dyoung 		res->faddr.v6	= v6->faddr;
   1689  1.1  dyoung 		res->laddr.v6	= v6->laddr;
   1690  1.1  dyoung 		res->fport	= v6->fport;
   1691  1.1  dyoung 		res->lport	= v6->lport;
   1692  1.1  dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1693  1.1  dyoung 		res->ctl	= ctl;
   1694  1.1  dyoung 
   1695  1.1  dyoung 		res->v6only	= vtw->v6only;
   1696  1.1  dyoung 		res->reuse_addr = vtw->reuse_addr;
   1697  1.1  dyoung 		res->reuse_port = vtw->reuse_port;
   1698  1.1  dyoung 
   1699  1.1  dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1700  1.1  dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1701  1.1  dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1702  1.1  dyoung 		res->uid	= vtw->uid;
   1703  1.1  dyoung 	}
   1704  1.1  dyoung 
   1705  1.1  dyoung 	return res->valid;
   1706  1.1  dyoung }
   1707  1.1  dyoung 
   1708  1.1  dyoung static int
   1709  1.1  dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
   1710  1.1  dyoung {
   1711  1.1  dyoung 	struct tcp_ports_iterator *it = arg;
   1712  1.1  dyoung 	vtw_t		*vtw = 0;
   1713  1.1  dyoung 
   1714  1.1  dyoung 	if (it->ctl)
   1715  1.1  dyoung 		vtw = vtw_next_port_v6(it);
   1716  1.1  dyoung 
   1717  1.1  dyoung 	if (!vtw)
   1718  1.1  dyoung 		it->ctl = 0;
   1719  1.1  dyoung 
   1720  1.1  dyoung 	return vtw_export_v6(it->ctl, vtw, res);
   1721  1.1  dyoung }
   1722  1.1  dyoung 
   1723  1.1  dyoung static int
   1724  1.1  dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
   1725  1.1  dyoung               const struct in6_addr *laddr, uint16_t lport,
   1726  1.1  dyoung 	      struct vestigial_inpcb *res)
   1727  1.1  dyoung {
   1728  1.1  dyoung 	vtw_ctl_t	*ctl;
   1729  1.1  dyoung 	vtw_t		*vtw;
   1730  1.1  dyoung 
   1731  1.1  dyoung 	db_trace(KTR_VTW
   1732  1.1  dyoung 		 , (res, "vtw: lookup %6A:%P %6A:%P"
   1733  1.1  dyoung 		    , db_store(faddr, sizeof (*faddr)), fport
   1734  1.1  dyoung 		    , db_store(laddr, sizeof (*laddr)), lport));
   1735  1.1  dyoung 
   1736  1.1  dyoung 	vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
   1737  1.1  dyoung 				 , faddr, fport
   1738  1.1  dyoung 				 , laddr, lport, 0);
   1739  1.1  dyoung 
   1740  1.1  dyoung 	return vtw_export_v6(ctl, vtw, res);
   1741  1.1  dyoung }
   1742  1.1  dyoung 
   1743  1.1  dyoung static vestigial_hooks_t tcp_hooks = {
   1744  1.1  dyoung 	.init_ports4	= tcp_init_ports_v4,
   1745  1.1  dyoung 	.next_port4	= tcp_next_port_v4,
   1746  1.1  dyoung 	.lookup4	= tcp_lookup_v4,
   1747  1.1  dyoung 	.init_ports6	= tcp_init_ports_v6,
   1748  1.1  dyoung 	.next_port6	= tcp_next_port_v6,
   1749  1.1  dyoung 	.lookup6	= tcp_lookup_v6,
   1750  1.1  dyoung };
   1751  1.1  dyoung 
   1752  1.1  dyoung static bool
   1753  1.1  dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
   1754  1.1  dyoung {
   1755  1.1  dyoung 	fatp_ctl_t	*fat;
   1756  1.1  dyoung 	vtw_ctl_t	*ctl;
   1757  1.1  dyoung 
   1758  1.1  dyoung 	switch (af) {
   1759  1.1  dyoung 	case AF_INET:
   1760  1.1  dyoung 		fat = &fat_tcpv4;
   1761  1.1  dyoung 		ctl = &vtw_tcpv4[0];
   1762  1.1  dyoung 		break;
   1763  1.1  dyoung 	case AF_INET6:
   1764  1.1  dyoung 		fat = &fat_tcpv6;
   1765  1.1  dyoung 		ctl = &vtw_tcpv6[0];
   1766  1.1  dyoung 		break;
   1767  1.1  dyoung 	default:
   1768  1.1  dyoung 		return false;
   1769  1.1  dyoung 	}
   1770  1.1  dyoung 	if (fatp != NULL)
   1771  1.1  dyoung 		*fatp = fat;
   1772  1.1  dyoung 	if (ctlp != NULL)
   1773  1.1  dyoung 		*ctlp = ctl;
   1774  1.1  dyoung 	return true;
   1775  1.1  dyoung }
   1776  1.1  dyoung 
   1777  1.1  dyoung /*!\brief	initialize controlling instance
   1778  1.1  dyoung  */
   1779  1.1  dyoung static int
   1780  1.1  dyoung vtw_control_init(int af)
   1781  1.1  dyoung {
   1782  1.1  dyoung 	fatp_ctl_t	*fat;
   1783  1.1  dyoung 	vtw_ctl_t	*ctl;
   1784  1.1  dyoung 
   1785  1.1  dyoung 	if (!vtw_select(af, &fat, &ctl))
   1786  1.1  dyoung 		return EAFNOSUPPORT;
   1787  1.1  dyoung 
   1788  1.1  dyoung 	if (!fat->base) {
   1789  1.1  dyoung 		uint32_t	n, m;
   1790  1.1  dyoung 
   1791  1.1  dyoung 		KASSERT(powerof2(tcp_vtw_entries));
   1792  1.1  dyoung 
   1793  1.1  dyoung 		/* Allocate 10% more capacity in the fat pointers.
   1794  1.1  dyoung 		 * We should only need ~#hash additional based on
   1795  1.1  dyoung 		 * how they age, but TIME_WAIT assassination could cause
   1796  1.1  dyoung 		 * sparse fat pointer utilisation.
   1797  1.1  dyoung 		 */
   1798  1.1  dyoung 		m = 512;
   1799  1.1  dyoung 		n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
   1800  1.1  dyoung 
   1801  1.1  dyoung 		fatp_init(fat, n, m);
   1802  1.1  dyoung 
   1803  1.1  dyoung 		if (!fat->base)
   1804  1.1  dyoung 			return ENOMEM;
   1805  1.1  dyoung 	}
   1806  1.1  dyoung 
   1807  1.1  dyoung 	if (!ctl->base.v) {
   1808  1.1  dyoung 
   1809  1.1  dyoung 		vtw_init(fat, ctl, tcp_vtw_entries);
   1810  1.1  dyoung 		if (!ctl->base.v)
   1811  1.1  dyoung 			return ENOMEM;
   1812  1.1  dyoung 	}
   1813  1.1  dyoung 
   1814  1.1  dyoung 	return 0;
   1815  1.1  dyoung }
   1816  1.1  dyoung 
   1817  1.1  dyoung /*!\brief	select controlling instance
   1818  1.1  dyoung  */
   1819  1.1  dyoung static vtw_ctl_t *
   1820  1.1  dyoung vtw_control(int af, uint32_t msl)
   1821  1.1  dyoung {
   1822  1.1  dyoung 	fatp_ctl_t	*fat;
   1823  1.1  dyoung 	vtw_ctl_t	*ctl;
   1824  1.1  dyoung 	int		class	= msl_to_class(msl);
   1825  1.1  dyoung 
   1826  1.1  dyoung 	if (!vtw_select(af, &fat, &ctl))
   1827  1.1  dyoung 		return NULL;
   1828  1.1  dyoung 
   1829  1.1  dyoung 	if (!fat->base || !ctl->base.v)
   1830  1.1  dyoung 		return NULL;
   1831  1.1  dyoung 
   1832  1.1  dyoung 	return ctl + class;
   1833  1.1  dyoung }
   1834  1.1  dyoung 
   1835  1.1  dyoung /*!\brief	add TCP pcb to vestigial timewait
   1836  1.1  dyoung  */
   1837  1.1  dyoung int
   1838  1.1  dyoung vtw_add(int af, struct tcpcb *tp)
   1839  1.1  dyoung {
   1840  1.1  dyoung 	int		enable;
   1841  1.1  dyoung 	vtw_ctl_t	*ctl;
   1842  1.1  dyoung 	vtw_t		*vtw;
   1843  1.1  dyoung 
   1844  1.1  dyoung 	KASSERT(mutex_owned(softnet_lock));
   1845  1.1  dyoung 
   1846  1.1  dyoung 	ctl = vtw_control(af, tp->t_msl);
   1847  1.1  dyoung 	if (!ctl)
   1848  1.1  dyoung 		return 0;
   1849  1.1  dyoung 
   1850  1.1  dyoung 	enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
   1851  1.1  dyoung 
   1852  1.1  dyoung 	vtw = vtw_alloc(ctl);
   1853  1.1  dyoung 
   1854  1.1  dyoung 	if (vtw) {
   1855  1.1  dyoung 		vtw->snd_nxt = tp->snd_nxt;
   1856  1.1  dyoung 		vtw->rcv_nxt = tp->rcv_nxt;
   1857  1.1  dyoung 
   1858  1.1  dyoung 		switch (af) {
   1859  1.1  dyoung 		case AF_INET: {
   1860  1.1  dyoung 			struct inpcb	*inp = tp->t_inpcb;
   1861  1.1  dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   1862  1.1  dyoung 
   1863  1.1  dyoung 			v4->faddr = inp->inp_faddr.s_addr;
   1864  1.1  dyoung 			v4->laddr = inp->inp_laddr.s_addr;
   1865  1.1  dyoung 			v4->fport = inp->inp_fport;
   1866  1.1  dyoung 			v4->lport = inp->inp_lport;
   1867  1.1  dyoung 
   1868  1.1  dyoung 			vtw->reuse_port = !!(inp->inp_socket->so_options
   1869  1.1  dyoung 					     & SO_REUSEPORT);
   1870  1.1  dyoung 			vtw->reuse_addr = !!(inp->inp_socket->so_options
   1871  1.1  dyoung 					     & SO_REUSEADDR);
   1872  1.1  dyoung 			vtw->v6only	= 0;
   1873  1.1  dyoung 			vtw->uid	= inp->inp_socket->so_uidinfo->ui_uid;
   1874  1.1  dyoung 
   1875  1.1  dyoung 			vtw_inshash_v4(ctl, vtw);
   1876  1.1  dyoung 
   1877  1.1  dyoung 
   1878  1.1  dyoung #ifdef VTW_DEBUG
   1879  1.1  dyoung 			/* Immediate lookup (connected and port) to
   1880  1.1  dyoung 			 * ensure at least that works!
   1881  1.1  dyoung 			 */
   1882  1.1  dyoung 			if (enable & 4) {
   1883  1.1  dyoung 				KASSERT(vtw_lookup_hash_v4
   1884  1.1  dyoung 					(ctl
   1885  1.1  dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1886  1.1  dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1887  1.1  dyoung 					 , 0)
   1888  1.1  dyoung 					== vtw);
   1889  1.1  dyoung 				KASSERT(vtw_lookup_hash_v4
   1890  1.1  dyoung 					(ctl
   1891  1.1  dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1892  1.1  dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1893  1.1  dyoung 					 , 1));
   1894  1.1  dyoung 			}
   1895  1.1  dyoung 			/* Immediate port iterator functionality check: not wild
   1896  1.1  dyoung 			 */
   1897  1.1  dyoung 			if (enable & 8) {
   1898  1.1  dyoung 				struct tcp_ports_iterator *it;
   1899  1.1  dyoung 				struct vestigial_inpcb res;
   1900  1.1  dyoung 				int cnt = 0;
   1901  1.1  dyoung 
   1902  1.1  dyoung 				it = tcp_init_ports_v4(inp->inp_laddr
   1903  1.1  dyoung 						       , inp->inp_lport, 0);
   1904  1.1  dyoung 
   1905  1.1  dyoung 				while (tcp_next_port_v4(it, &res)) {
   1906  1.1  dyoung 					++cnt;
   1907  1.1  dyoung 				}
   1908  1.1  dyoung 				KASSERT(cnt);
   1909  1.1  dyoung 			}
   1910  1.1  dyoung 			/* Immediate port iterator functionality check: wild
   1911  1.1  dyoung 			 */
   1912  1.1  dyoung 			if (enable & 16) {
   1913  1.1  dyoung 				struct tcp_ports_iterator *it;
   1914  1.1  dyoung 				struct vestigial_inpcb res;
   1915  1.1  dyoung 				struct in_addr any;
   1916  1.1  dyoung 				int cnt = 0;
   1917  1.1  dyoung 
   1918  1.1  dyoung 				any.s_addr = htonl(INADDR_ANY);
   1919  1.1  dyoung 
   1920  1.1  dyoung 				it = tcp_init_ports_v4(any, inp->inp_lport, 1);
   1921  1.1  dyoung 
   1922  1.1  dyoung 				while (tcp_next_port_v4(it, &res)) {
   1923  1.1  dyoung 					++cnt;
   1924  1.1  dyoung 				}
   1925  1.1  dyoung 				KASSERT(cnt);
   1926  1.1  dyoung 			}
   1927  1.1  dyoung #endif /* VTW_DEBUG */
   1928  1.1  dyoung 			break;
   1929  1.1  dyoung 		}
   1930  1.1  dyoung 
   1931  1.1  dyoung 		case AF_INET6: {
   1932  1.1  dyoung 			struct in6pcb	*inp = tp->t_in6pcb;
   1933  1.1  dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   1934  1.1  dyoung 
   1935  1.1  dyoung 			v6->faddr = inp->in6p_faddr;
   1936  1.1  dyoung 			v6->laddr = inp->in6p_laddr;
   1937  1.1  dyoung 			v6->fport = inp->in6p_fport;
   1938  1.1  dyoung 			v6->lport = inp->in6p_lport;
   1939  1.1  dyoung 
   1940  1.1  dyoung 			vtw->reuse_port = !!(inp->in6p_socket->so_options
   1941  1.1  dyoung 					     & SO_REUSEPORT);
   1942  1.1  dyoung 			vtw->reuse_addr = !!(inp->in6p_socket->so_options
   1943  1.1  dyoung 					     & SO_REUSEADDR);
   1944  1.1  dyoung 			vtw->v6only	= !!(inp->in6p_flags
   1945  1.1  dyoung 					     & IN6P_IPV6_V6ONLY);
   1946  1.1  dyoung 			vtw->uid	= inp->in6p_socket->so_uidinfo->ui_uid;
   1947  1.1  dyoung 
   1948  1.1  dyoung 			vtw_inshash_v6(ctl, vtw);
   1949  1.1  dyoung #ifdef VTW_DEBUG
   1950  1.1  dyoung 			/* Immediate lookup (connected and port) to
   1951  1.1  dyoung 			 * ensure at least that works!
   1952  1.1  dyoung 			 */
   1953  1.1  dyoung 			if (enable & 4) {
   1954  1.1  dyoung 				KASSERT(vtw_lookup_hash_v6(ctl
   1955  1.1  dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   1956  1.1  dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   1957  1.1  dyoung 					 , 0)
   1958  1.1  dyoung 					== vtw);
   1959  1.1  dyoung 				KASSERT(vtw_lookup_hash_v6
   1960  1.1  dyoung 					(ctl
   1961  1.1  dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   1962  1.1  dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   1963  1.1  dyoung 					 , 1));
   1964  1.1  dyoung 			}
   1965  1.1  dyoung 			/* Immediate port iterator functionality check: not wild
   1966  1.1  dyoung 			 */
   1967  1.1  dyoung 			if (enable & 8) {
   1968  1.1  dyoung 				struct tcp_ports_iterator *it;
   1969  1.1  dyoung 				struct vestigial_inpcb res;
   1970  1.1  dyoung 				int cnt = 0;
   1971  1.1  dyoung 
   1972  1.1  dyoung 				it = tcp_init_ports_v6(&inp->in6p_laddr
   1973  1.1  dyoung 						       , inp->in6p_lport, 0);
   1974  1.1  dyoung 
   1975  1.1  dyoung 				while (tcp_next_port_v6(it, &res)) {
   1976  1.1  dyoung 					++cnt;
   1977  1.1  dyoung 				}
   1978  1.1  dyoung 				KASSERT(cnt);
   1979  1.1  dyoung 			}
   1980  1.1  dyoung 			/* Immediate port iterator functionality check: wild
   1981  1.1  dyoung 			 */
   1982  1.1  dyoung 			if (enable & 16) {
   1983  1.1  dyoung 				struct tcp_ports_iterator *it;
   1984  1.1  dyoung 				struct vestigial_inpcb res;
   1985  1.1  dyoung 				static struct in6_addr any = IN6ADDR_ANY_INIT;
   1986  1.1  dyoung 				int cnt = 0;
   1987  1.1  dyoung 
   1988  1.1  dyoung 				it = tcp_init_ports_v6(&any
   1989  1.1  dyoung 						       , inp->in6p_lport, 1);
   1990  1.1  dyoung 
   1991  1.1  dyoung 				while (tcp_next_port_v6(it, &res)) {
   1992  1.1  dyoung 					++cnt;
   1993  1.1  dyoung 				}
   1994  1.1  dyoung 				KASSERT(cnt);
   1995  1.1  dyoung 			}
   1996  1.1  dyoung #endif /* VTW_DEBUG */
   1997  1.1  dyoung 			break;
   1998  1.1  dyoung 		}
   1999  1.1  dyoung 		}
   2000  1.1  dyoung 
   2001  1.1  dyoung 		tcp_canceltimers(tp);
   2002  1.1  dyoung 		tp = tcp_close(tp);
   2003  1.1  dyoung 		KASSERT(!tp);
   2004  1.1  dyoung 
   2005  1.1  dyoung 		return 1;
   2006  1.1  dyoung 	}
   2007  1.1  dyoung 
   2008  1.1  dyoung 	return 0;
   2009  1.1  dyoung }
   2010  1.1  dyoung 
   2011  1.1  dyoung /*!\brief	restart timer for vestigial time-wait entry
   2012  1.1  dyoung  */
   2013  1.1  dyoung static void
   2014  1.1  dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
   2015  1.1  dyoung {
   2016  1.1  dyoung 	vtw_v4_t	copy = *(vtw_v4_t*)vp->vtw;
   2017  1.1  dyoung 	vtw_t		*vtw;
   2018  1.1  dyoung 	vtw_t		*cp  = &copy.common;
   2019  1.1  dyoung 	vtw_ctl_t	*ctl;
   2020  1.1  dyoung 
   2021  1.1  dyoung 	KASSERT(mutex_owned(softnet_lock));
   2022  1.1  dyoung 
   2023  1.1  dyoung 	db_trace(KTR_VTW
   2024  1.1  dyoung 		 , (vp->vtw, "vtw: restart %A:%P %A:%P"
   2025  1.1  dyoung 		    , vp->faddr.v4.s_addr, vp->fport
   2026  1.1  dyoung 		    , vp->laddr.v4.s_addr, vp->lport));
   2027  1.1  dyoung 
   2028  1.1  dyoung 	/* Class might have changed, so have a squiz.
   2029  1.1  dyoung 	 */
   2030  1.1  dyoung 	ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
   2031  1.1  dyoung 	vtw = vtw_alloc(ctl);
   2032  1.1  dyoung 
   2033  1.1  dyoung 	if (vtw) {
   2034  1.1  dyoung 		vtw_v4_t	*v4  = (void*)vtw;
   2035  1.1  dyoung 
   2036  1.1  dyoung 		/* Safe now to unhash the old entry
   2037  1.1  dyoung 		 */
   2038  1.1  dyoung 		vtw_del(vp->ctl, vp->vtw);
   2039  1.1  dyoung 
   2040  1.1  dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2041  1.1  dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2042  1.1  dyoung 
   2043  1.1  dyoung 		v4->faddr = copy.faddr;
   2044  1.1  dyoung 		v4->laddr = copy.laddr;
   2045  1.1  dyoung 		v4->fport = copy.fport;
   2046  1.1  dyoung 		v4->lport = copy.lport;
   2047  1.1  dyoung 
   2048  1.1  dyoung 		vtw->reuse_port = cp->reuse_port;
   2049  1.1  dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2050  1.1  dyoung 		vtw->v6only	= 0;
   2051  1.1  dyoung 		vtw->uid	= cp->uid;
   2052  1.1  dyoung 
   2053  1.1  dyoung 		vtw_inshash_v4(ctl, vtw);
   2054  1.1  dyoung 	}
   2055  1.1  dyoung 
   2056  1.1  dyoung 	vp->valid = 0;
   2057  1.1  dyoung }
   2058  1.1  dyoung 
   2059  1.1  dyoung /*!\brief	restart timer for vestigial time-wait entry
   2060  1.1  dyoung  */
   2061  1.1  dyoung static void
   2062  1.1  dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
   2063  1.1  dyoung {
   2064  1.1  dyoung 	vtw_v6_t	copy = *(vtw_v6_t*)vp->vtw;
   2065  1.1  dyoung 	vtw_t		*vtw;
   2066  1.1  dyoung 	vtw_t		*cp  = &copy.common;
   2067  1.1  dyoung 	vtw_ctl_t	*ctl;
   2068  1.1  dyoung 
   2069  1.1  dyoung 	KASSERT(mutex_owned(softnet_lock));
   2070  1.1  dyoung 
   2071  1.1  dyoung 	db_trace(KTR_VTW
   2072  1.1  dyoung 		 , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
   2073  1.1  dyoung 		    , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
   2074  1.1  dyoung 		    , vp->fport
   2075  1.1  dyoung 		    , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
   2076  1.1  dyoung 		    , vp->lport));
   2077  1.1  dyoung 
   2078  1.1  dyoung 	/* Class might have changed, so have a squiz.
   2079  1.1  dyoung 	 */
   2080  1.1  dyoung 	ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
   2081  1.1  dyoung 	vtw = vtw_alloc(ctl);
   2082  1.1  dyoung 
   2083  1.1  dyoung 	if (vtw) {
   2084  1.1  dyoung 		vtw_v6_t	*v6  = (void*)vtw;
   2085  1.1  dyoung 
   2086  1.1  dyoung 		/* Safe now to unhash the old entry
   2087  1.1  dyoung 		 */
   2088  1.1  dyoung 		vtw_del(vp->ctl, vp->vtw);
   2089  1.1  dyoung 
   2090  1.1  dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2091  1.1  dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2092  1.1  dyoung 
   2093  1.1  dyoung 		v6->faddr = copy.faddr;
   2094  1.1  dyoung 		v6->laddr = copy.laddr;
   2095  1.1  dyoung 		v6->fport = copy.fport;
   2096  1.1  dyoung 		v6->lport = copy.lport;
   2097  1.1  dyoung 
   2098  1.1  dyoung 		vtw->reuse_port = cp->reuse_port;
   2099  1.1  dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2100  1.1  dyoung 		vtw->v6only	= cp->v6only;
   2101  1.1  dyoung 		vtw->uid	= cp->uid;
   2102  1.1  dyoung 
   2103  1.1  dyoung 		vtw_inshash_v6(ctl, vtw);
   2104  1.1  dyoung 	}
   2105  1.1  dyoung 
   2106  1.1  dyoung 	vp->valid = 0;
   2107  1.1  dyoung }
   2108  1.1  dyoung 
   2109  1.1  dyoung /*!\brief	restart timer for vestigial time-wait entry
   2110  1.1  dyoung  */
   2111  1.1  dyoung void
   2112  1.1  dyoung vtw_restart(vestigial_inpcb_t *vp)
   2113  1.1  dyoung {
   2114  1.1  dyoung 	if (!vp || !vp->valid)
   2115  1.1  dyoung 		return;
   2116  1.1  dyoung 
   2117  1.1  dyoung 	if (vp->v4)
   2118  1.1  dyoung 		vtw_restart_v4(vp);
   2119  1.1  dyoung 	else
   2120  1.1  dyoung 		vtw_restart_v6(vp);
   2121  1.1  dyoung }
   2122  1.1  dyoung 
   2123  1.1  dyoung int
   2124  1.1  dyoung vtw_earlyinit(void)
   2125  1.1  dyoung {
   2126  1.1  dyoung 	int rc;
   2127  1.1  dyoung 
   2128  1.1  dyoung 	if (!tcp_vtw_was_enabled) {
   2129  1.1  dyoung 		int i;
   2130  1.1  dyoung 
   2131  1.1  dyoung 		/* This guarantees is timer ticks until we no longer need them.
   2132  1.1  dyoung 		 */
   2133  1.1  dyoung 		tcp_vtw_was_enabled = 1;
   2134  1.1  dyoung 
   2135  1.1  dyoung 		callout_init(&vtw_cs, 0);
   2136  1.1  dyoung 		callout_setfunc(&vtw_cs, vtw_tick, 0);
   2137  1.1  dyoung 		callout_schedule(&vtw_cs, hz / 5);
   2138  1.1  dyoung 
   2139  1.1  dyoung 		for (i = 0; i < VTW_NCLASS; ++i) {
   2140  1.1  dyoung 			vtw_tcpv4[i].is_v4 = 1;
   2141  1.1  dyoung 			vtw_tcpv6[i].is_v6 = 1;
   2142  1.1  dyoung 		}
   2143  1.1  dyoung 
   2144  1.1  dyoung 		tcbtable.vestige = &tcp_hooks;
   2145  1.1  dyoung 	}
   2146  1.1  dyoung 
   2147  1.1  dyoung 	if ((rc = vtw_control_init(AF_INET)) != 0 ||
   2148  1.1  dyoung 	    (rc = vtw_control_init(AF_INET6)) != 0)
   2149  1.1  dyoung 		return rc;
   2150  1.1  dyoung 
   2151  1.1  dyoung 	return 0;
   2152  1.1  dyoung }
   2153  1.1  dyoung 
   2154  1.1  dyoung #ifdef VTW_DEBUG
   2155  1.1  dyoung #include <sys/syscallargs.h>
   2156  1.1  dyoung #include <sys/sysctl.h>
   2157  1.1  dyoung 
   2158  1.1  dyoung /*!\brief	add lalp, fafp entries for debug
   2159  1.1  dyoung  */
   2160  1.1  dyoung int
   2161  1.1  dyoung vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int class)
   2162  1.1  dyoung {
   2163  1.1  dyoung 	vtw_ctl_t	*ctl;
   2164  1.1  dyoung 	vtw_t		*vtw;
   2165  1.1  dyoung 
   2166  1.1  dyoung 	ctl = vtw_control(af, msl ? msl : class_to_msl(class));
   2167  1.1  dyoung 	if (!ctl)
   2168  1.1  dyoung 		return 0;
   2169  1.1  dyoung 
   2170  1.1  dyoung 	vtw = vtw_alloc(ctl);
   2171  1.1  dyoung 
   2172  1.1  dyoung 	if (vtw) {
   2173  1.1  dyoung 		vtw->snd_nxt = 0;
   2174  1.1  dyoung 		vtw->rcv_nxt = 0;
   2175  1.1  dyoung 
   2176  1.1  dyoung 		switch (af) {
   2177  1.1  dyoung 		case AF_INET: {
   2178  1.1  dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   2179  1.1  dyoung 
   2180  1.1  dyoung 			v4->faddr = fa->sin_addr.v4.s_addr;
   2181  1.1  dyoung 			v4->laddr = la->sin_addr.v4.s_addr;
   2182  1.1  dyoung 			v4->fport = fa->sin_port;
   2183  1.1  dyoung 			v4->lport = la->sin_port;
   2184  1.1  dyoung 
   2185  1.1  dyoung 			vtw->reuse_port = 1;
   2186  1.1  dyoung 			vtw->reuse_addr = 1;
   2187  1.1  dyoung 			vtw->v6only	= 0;
   2188  1.1  dyoung 			vtw->uid	= 0;
   2189  1.1  dyoung 
   2190  1.1  dyoung 			vtw_inshash_v4(ctl, vtw);
   2191  1.1  dyoung 			break;
   2192  1.1  dyoung 		}
   2193  1.1  dyoung 
   2194  1.1  dyoung 		case AF_INET6: {
   2195  1.1  dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   2196  1.1  dyoung 
   2197  1.1  dyoung 			v6->faddr = fa->sin_addr.v6;
   2198  1.1  dyoung 			v6->laddr = la->sin_addr.v6;
   2199  1.1  dyoung 
   2200  1.1  dyoung 			v6->fport = fa->sin_port;
   2201  1.1  dyoung 			v6->lport = la->sin_port;
   2202  1.1  dyoung 
   2203  1.1  dyoung 			vtw->reuse_port = 1;
   2204  1.1  dyoung 			vtw->reuse_addr = 1;
   2205  1.1  dyoung 			vtw->v6only	= 0;
   2206  1.1  dyoung 			vtw->uid	= 0;
   2207  1.1  dyoung 
   2208  1.1  dyoung 			vtw_inshash_v6(ctl, vtw);
   2209  1.1  dyoung 			break;
   2210  1.1  dyoung 		}
   2211  1.1  dyoung 
   2212  1.1  dyoung 		default:
   2213  1.1  dyoung 			break;
   2214  1.1  dyoung 		}
   2215  1.1  dyoung 
   2216  1.1  dyoung 		return 1;
   2217  1.1  dyoung 	}
   2218  1.1  dyoung 
   2219  1.1  dyoung 	return 0;
   2220  1.1  dyoung }
   2221  1.1  dyoung 
   2222  1.1  dyoung static int vtw_syscall = 0;
   2223  1.1  dyoung 
   2224  1.1  dyoung static int
   2225  1.1  dyoung vtw_debug_process(vtw_sysargs_t *ap)
   2226  1.1  dyoung {
   2227  1.1  dyoung 	struct vestigial_inpcb vestige;
   2228  1.1  dyoung 	int	rc = 0;
   2229  1.1  dyoung 
   2230  1.1  dyoung 	mutex_enter(softnet_lock);
   2231  1.1  dyoung 
   2232  1.1  dyoung 	switch (ap->op) {
   2233  1.1  dyoung 	case 0:		// insert
   2234  1.1  dyoung 		vtw_debug_add(ap->la.sin_family
   2235  1.1  dyoung 			      , &ap->la
   2236  1.1  dyoung 			      , &ap->fa
   2237  1.1  dyoung 			      , TCPTV_MSL
   2238  1.1  dyoung 			      , 0);
   2239  1.1  dyoung 		break;
   2240  1.1  dyoung 
   2241  1.1  dyoung 	case 1:		// lookup
   2242  1.1  dyoung 	case 2:		// restart
   2243  1.1  dyoung 		switch (ap->la.sin_family) {
   2244  1.1  dyoung 		case AF_INET:
   2245  1.1  dyoung 			if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
   2246  1.1  dyoung 					  ap->la.sin_addr.v4, ap->la.sin_port,
   2247  1.1  dyoung 					  &vestige)) {
   2248  1.1  dyoung 				if (ap->op == 2) {
   2249  1.1  dyoung 					vtw_restart(&vestige);
   2250  1.1  dyoung 				}
   2251  1.1  dyoung 				rc = 0;
   2252  1.1  dyoung 			} else
   2253  1.1  dyoung 				rc = ESRCH;
   2254  1.1  dyoung 			break;
   2255  1.1  dyoung 
   2256  1.1  dyoung 		case AF_INET6:
   2257  1.1  dyoung 			if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
   2258  1.1  dyoung 					  &ap->la.sin_addr.v6, ap->la.sin_port,
   2259  1.1  dyoung 					  &vestige)) {
   2260  1.1  dyoung 				if (ap->op == 2) {
   2261  1.1  dyoung 					vtw_restart(&vestige);
   2262  1.1  dyoung 				}
   2263  1.1  dyoung 				rc = 0;
   2264  1.1  dyoung 			} else
   2265  1.1  dyoung 				rc = ESRCH;
   2266  1.1  dyoung 			break;
   2267  1.1  dyoung 		default:
   2268  1.1  dyoung 			rc = EINVAL;
   2269  1.1  dyoung 		}
   2270  1.1  dyoung 		break;
   2271  1.1  dyoung 
   2272  1.1  dyoung 	default:
   2273  1.1  dyoung 		rc = EINVAL;
   2274  1.1  dyoung 	}
   2275  1.1  dyoung 
   2276  1.1  dyoung 	mutex_exit(softnet_lock);
   2277  1.1  dyoung 	return rc;
   2278  1.1  dyoung }
   2279  1.1  dyoung 
   2280  1.1  dyoung struct sys_vtw_args {
   2281  1.1  dyoung 	syscallarg(const vtw_sysargs_t *) req;
   2282  1.1  dyoung 	syscallarg(size_t) len;
   2283  1.1  dyoung };
   2284  1.1  dyoung 
   2285  1.1  dyoung static int
   2286  1.1  dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
   2287  1.1  dyoung {
   2288  1.1  dyoung 	const struct sys_vtw_args *uap = _;
   2289  1.1  dyoung 	void	*buf;
   2290  1.1  dyoung 	int	rc;
   2291  1.1  dyoung 	size_t	len	= SCARG(uap, len);
   2292  1.1  dyoung 
   2293  1.1  dyoung 	if (len != sizeof (vtw_sysargs_t))
   2294  1.1  dyoung 		return EINVAL;
   2295  1.1  dyoung 
   2296  1.1  dyoung 	buf = kmem_alloc(len, KM_SLEEP);
   2297  1.1  dyoung 	if (!buf)
   2298  1.1  dyoung 		return ENOMEM;
   2299  1.1  dyoung 
   2300  1.1  dyoung 	rc = copyin(SCARG(uap, req), buf, len);
   2301  1.1  dyoung 	if (!rc) {
   2302  1.1  dyoung 		rc = vtw_debug_process(buf);
   2303  1.1  dyoung 	}
   2304  1.1  dyoung 	kmem_free(buf, len);
   2305  1.1  dyoung 
   2306  1.1  dyoung 	return rc;
   2307  1.1  dyoung }
   2308  1.1  dyoung 
   2309  1.1  dyoung static void
   2310  1.1  dyoung vtw_sanity_check(void)
   2311  1.1  dyoung {
   2312  1.1  dyoung 	vtw_ctl_t	*ctl;
   2313  1.1  dyoung 	vtw_t		*vtw;
   2314  1.1  dyoung 	int		i;
   2315  1.1  dyoung 	int		n;
   2316  1.1  dyoung 
   2317  1.1  dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2318  1.1  dyoung 		ctl = &vtw_tcpv4[i];
   2319  1.1  dyoung 
   2320  1.1  dyoung 		if (!ctl->base.v || ctl->nalloc)
   2321  1.1  dyoung 			continue;
   2322  1.1  dyoung 
   2323  1.1  dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2324  1.1  dyoung 			++n;
   2325  1.1  dyoung 			vtw = vtw_next(ctl, vtw);
   2326  1.1  dyoung 			if (vtw == ctl->base.v)
   2327  1.1  dyoung 				break;
   2328  1.1  dyoung 		}
   2329  1.1  dyoung 		db_trace(KTR_VTW
   2330  1.1  dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2331  1.1  dyoung 			    , i, n, ctl->nfree));
   2332  1.1  dyoung 
   2333  1.1  dyoung 		KASSERT(n == ctl->nfree);
   2334  1.1  dyoung 	}
   2335  1.1  dyoung 
   2336  1.1  dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2337  1.1  dyoung 		ctl = &vtw_tcpv6[i];
   2338  1.1  dyoung 
   2339  1.1  dyoung 		if (!ctl->base.v || ctl->nalloc)
   2340  1.1  dyoung 			continue;
   2341  1.1  dyoung 
   2342  1.1  dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2343  1.1  dyoung 			++n;
   2344  1.1  dyoung 			vtw = vtw_next(ctl, vtw);
   2345  1.1  dyoung 			if (vtw == ctl->base.v)
   2346  1.1  dyoung 				break;
   2347  1.1  dyoung 		}
   2348  1.1  dyoung 		db_trace(KTR_VTW
   2349  1.1  dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2350  1.1  dyoung 			    , i, n, ctl->nfree));
   2351  1.1  dyoung 		KASSERT(n == ctl->nfree);
   2352  1.1  dyoung 	}
   2353  1.1  dyoung }
   2354  1.1  dyoung 
   2355  1.1  dyoung /*!\brief	Initialise debug support.
   2356  1.1  dyoung  */
   2357  1.1  dyoung static void
   2358  1.1  dyoung vtw_debug_init(void)
   2359  1.1  dyoung {
   2360  1.1  dyoung 	int	i;
   2361  1.1  dyoung 
   2362  1.1  dyoung 	vtw_sanity_check();
   2363  1.1  dyoung 
   2364  1.1  dyoung 	if (vtw_syscall)
   2365  1.1  dyoung 		return;
   2366  1.1  dyoung 
   2367  1.1  dyoung 	for (i = 511; i; --i) {
   2368  1.1  dyoung 		if (sysent[i].sy_call == sys_nosys) {
   2369  1.1  dyoung 			sysent[i].sy_call    = vtw_sys;
   2370  1.1  dyoung 			sysent[i].sy_narg    = 2;
   2371  1.1  dyoung 			sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
   2372  1.1  dyoung 			sysent[i].sy_flags   = 0;
   2373  1.1  dyoung 
   2374  1.1  dyoung 			vtw_syscall = i;
   2375  1.1  dyoung 			break;
   2376  1.1  dyoung 		}
   2377  1.1  dyoung 	}
   2378  1.1  dyoung 	if (i) {
   2379  1.1  dyoung 		const struct sysctlnode *node;
   2380  1.1  dyoung 		uint32_t	flags;
   2381  1.1  dyoung 
   2382  1.1  dyoung 		flags = sysctl_root.sysctl_flags;
   2383  1.1  dyoung 
   2384  1.1  dyoung 		sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
   2385  1.1  dyoung 		sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
   2386  1.1  dyoung 
   2387  1.1  dyoung 		sysctl_createv(0, 0, 0, &node,
   2388  1.1  dyoung 			       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2389  1.1  dyoung 			       "koff",
   2390  1.1  dyoung 			       SYSCTL_DESCR("Kernel Obscure Feature Finder"),
   2391  1.1  dyoung 			       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2392  1.1  dyoung 
   2393  1.1  dyoung 		if (!node) {
   2394  1.1  dyoung 			sysctl_createv(0, 0, 0, &node,
   2395  1.1  dyoung 				       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2396  1.1  dyoung 				       "koffka",
   2397  1.1  dyoung 				       SYSCTL_DESCR("The Real(tm) Kernel"
   2398  1.1  dyoung 						    " Obscure Feature Finder"),
   2399  1.1  dyoung 				       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2400  1.1  dyoung 		}
   2401  1.1  dyoung 		if (node) {
   2402  1.1  dyoung 			sysctl_createv(0, 0, 0, 0,
   2403  1.1  dyoung 				       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
   2404  1.1  dyoung 				       CTLTYPE_INT, "vtw_debug_syscall",
   2405  1.1  dyoung 				       SYSCTL_DESCR("vtw debug"
   2406  1.1  dyoung 						    " system call number"),
   2407  1.1  dyoung 				       0, 0, &vtw_syscall, 0, node->sysctl_num,
   2408  1.1  dyoung 				       CTL_CREATE, CTL_EOL);
   2409  1.1  dyoung 		}
   2410  1.1  dyoung 		sysctl_root.sysctl_flags = flags;
   2411  1.1  dyoung 	}
   2412  1.1  dyoung }
   2413  1.1  dyoung #else /* !VTW_DEBUG */
   2414  1.1  dyoung static void
   2415  1.1  dyoung vtw_debug_init(void)
   2416  1.1  dyoung {
   2417  1.1  dyoung 	return;
   2418  1.1  dyoung }
   2419  1.1  dyoung #endif /* !VTW_DEBUG */
   2420  1.1  dyoung 
   2421  1.1  dyoung static void
   2422  1.1  dyoung k_vtw(int c, char **v)
   2423  1.1  dyoung {
   2424  1.1  dyoung 	return;
   2425  1.1  dyoung }
   2426