Home | History | Annotate | Line # | Download | only in netinet
tcp_vtw.c revision 1.7
      1  1.1    dyoung /*
      2  1.1    dyoung  * Copyright (c) 2011 The NetBSD Foundation, Inc.
      3  1.1    dyoung  * All rights reserved.
      4  1.1    dyoung  *
      5  1.1    dyoung  * This code is derived from software contributed to The NetBSD Foundation
      6  1.1    dyoung  * by Coyote Point Systems, Inc.
      7  1.1    dyoung  *
      8  1.1    dyoung  * Redistribution and use in source and binary forms, with or without
      9  1.1    dyoung  * modification, are permitted provided that the following conditions
     10  1.1    dyoung  * are met:
     11  1.1    dyoung  * 1. Redistributions of source code must retain the above copyright
     12  1.1    dyoung  *    notice, this list of conditions and the following disclaimer.
     13  1.1    dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     14  1.1    dyoung  *    notice, this list of conditions and the following disclaimer in the
     15  1.1    dyoung  *    documentation and/or other materials provided with the distribution.
     16  1.1    dyoung  *
     17  1.1    dyoung  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  1.1    dyoung  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  1.1    dyoung  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  1.1    dyoung  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  1.1    dyoung  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  1.1    dyoung  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  1.1    dyoung  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  1.1    dyoung  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  1.1    dyoung  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  1.1    dyoung  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  1.1    dyoung  * POSSIBILITY OF SUCH DAMAGE.
     28  1.1    dyoung  */
     29  1.1    dyoung #include <sys/cdefs.h>
     30  1.1    dyoung 
     31  1.1    dyoung #include "opt_ddb.h"
     32  1.1    dyoung #include "opt_inet.h"
     33  1.1    dyoung #include "opt_ipsec.h"
     34  1.1    dyoung #include "opt_inet_csum.h"
     35  1.1    dyoung #include "opt_tcp_debug.h"
     36  1.1    dyoung 
     37  1.1    dyoung #include <sys/param.h>
     38  1.1    dyoung #include <sys/systm.h>
     39  1.1    dyoung #include <sys/malloc.h>
     40  1.1    dyoung #include <sys/kmem.h>
     41  1.1    dyoung #include <sys/mbuf.h>
     42  1.1    dyoung #include <sys/protosw.h>
     43  1.1    dyoung #include <sys/socket.h>
     44  1.1    dyoung #include <sys/socketvar.h>
     45  1.1    dyoung #include <sys/errno.h>
     46  1.1    dyoung #include <sys/syslog.h>
     47  1.1    dyoung #include <sys/pool.h>
     48  1.1    dyoung #include <sys/domain.h>
     49  1.1    dyoung #include <sys/kernel.h>
     50  1.1    dyoung #include <net/if.h>
     51  1.1    dyoung #include <net/route.h>
     52  1.1    dyoung #include <net/if_types.h>
     53  1.1    dyoung 
     54  1.1    dyoung #include <netinet/in.h>
     55  1.1    dyoung #include <netinet/in_systm.h>
     56  1.1    dyoung #include <netinet/ip.h>
     57  1.1    dyoung #include <netinet/in_pcb.h>
     58  1.1    dyoung #include <netinet/in_var.h>
     59  1.1    dyoung #include <netinet/ip_var.h>
     60  1.1    dyoung #include <netinet/in_offload.h>
     61  1.1    dyoung #include <netinet/ip6.h>
     62  1.1    dyoung #include <netinet6/ip6_var.h>
     63  1.1    dyoung #include <netinet6/in6_pcb.h>
     64  1.1    dyoung #include <netinet6/ip6_var.h>
     65  1.1    dyoung #include <netinet6/in6_var.h>
     66  1.1    dyoung #include <netinet/icmp6.h>
     67  1.1    dyoung #include <netinet6/nd6.h>
     68  1.1    dyoung 
     69  1.1    dyoung #include <netinet/tcp.h>
     70  1.1    dyoung #include <netinet/tcp_fsm.h>
     71  1.1    dyoung #include <netinet/tcp_seq.h>
     72  1.1    dyoung #include <netinet/tcp_timer.h>
     73  1.1    dyoung #include <netinet/tcp_var.h>
     74  1.1    dyoung #include <netinet/tcp_private.h>
     75  1.1    dyoung #include <netinet/tcpip.h>
     76  1.1    dyoung 
     77  1.1    dyoung #include <machine/stdarg.h>
     78  1.1    dyoung #include <netinet/tcp_vtw.h>
     79  1.1    dyoung 
     80  1.7    dyoung __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.7 2011/06/06 19:15:43 dyoung Exp $");
     81  1.1    dyoung 
     82  1.1    dyoung #define db_trace(__a, __b)	do { } while (/*CONSTCOND*/0)
     83  1.1    dyoung 
     84  1.1    dyoung static void vtw_debug_init(void);
     85  1.1    dyoung 
     86  1.1    dyoung fatp_ctl_t fat_tcpv4;
     87  1.1    dyoung fatp_ctl_t fat_tcpv6;
     88  1.1    dyoung vtw_ctl_t  vtw_tcpv4[VTW_NCLASS];
     89  1.1    dyoung vtw_ctl_t  vtw_tcpv6[VTW_NCLASS];
     90  1.1    dyoung vtw_stats_t vtw_stats;
     91  1.1    dyoung 
     92  1.1    dyoung /* We provide state for the lookup_ports iterator.
     93  1.1    dyoung  * As currently we are netlock-protected, there is one.
     94  1.1    dyoung  * If we were finer-grain, we would have one per CPU.
     95  1.1    dyoung  * I do not want to be in the business of alloc/free.
     96  1.1    dyoung  * The best alternate would be allocate on the caller's
     97  1.1    dyoung  * stack, but that would require them to know the struct,
     98  1.1    dyoung  * or at least the size.
     99  1.1    dyoung  * See how she goes.
    100  1.1    dyoung  */
    101  1.1    dyoung struct tcp_ports_iterator {
    102  1.1    dyoung 	union {
    103  1.1    dyoung 		struct in_addr	v4;
    104  1.1    dyoung 		struct in6_addr	v6;
    105  1.1    dyoung 	}		addr;
    106  1.1    dyoung 	u_int		port;
    107  1.1    dyoung 
    108  1.1    dyoung 	uint32_t	wild	: 1;
    109  1.1    dyoung 
    110  1.1    dyoung 	vtw_ctl_t	*ctl;
    111  1.1    dyoung 	fatp_t		*fp;
    112  1.1    dyoung 
    113  1.1    dyoung 	uint16_t	slot_idx;
    114  1.1    dyoung 	uint16_t	ctl_idx;
    115  1.1    dyoung };
    116  1.1    dyoung 
    117  1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
    118  1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
    119  1.1    dyoung 
    120  1.1    dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
    121  1.1    dyoung 
    122  1.1    dyoung /*!\brief allocate a fat pointer from a collection.
    123  1.1    dyoung  */
    124  1.1    dyoung static fatp_t *
    125  1.1    dyoung fatp_alloc(fatp_ctl_t *fat)
    126  1.1    dyoung {
    127  1.1    dyoung 	fatp_t	*fp	= 0;
    128  1.1    dyoung 
    129  1.1    dyoung 	if (fat->nfree) {
    130  1.1    dyoung 		fp = fat->free;
    131  1.1    dyoung 		if (fp) {
    132  1.1    dyoung 			fat->free = fatp_next(fat, fp);
    133  1.1    dyoung 			--fat->nfree;
    134  1.1    dyoung 			++fat->nalloc;
    135  1.1    dyoung 			fp->nxt = 0;
    136  1.1    dyoung 
    137  1.1    dyoung 			KASSERT(!fp->inuse);
    138  1.1    dyoung 		}
    139  1.1    dyoung 	}
    140  1.1    dyoung 
    141  1.1    dyoung 	return fp;
    142  1.1    dyoung }
    143  1.1    dyoung 
    144  1.1    dyoung /*!\brief free a fat pointer.
    145  1.1    dyoung  */
    146  1.1    dyoung static void
    147  1.1    dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
    148  1.1    dyoung {
    149  1.1    dyoung 	if (fp) {
    150  1.1    dyoung 		KASSERT(!fp->inuse);
    151  1.1    dyoung 		KASSERT(!fp->nxt);
    152  1.1    dyoung 
    153  1.1    dyoung 		fp->nxt = fatp_index(fat, fat->free);
    154  1.1    dyoung 		fat->free = fp;
    155  1.1    dyoung 
    156  1.1    dyoung 		++fat->nfree;
    157  1.1    dyoung 		--fat->nalloc;
    158  1.1    dyoung 	}
    159  1.1    dyoung }
    160  1.1    dyoung 
    161  1.1    dyoung /*!\brief initialise a collection of fat pointers.
    162  1.1    dyoung  *
    163  1.1    dyoung  *\param n	# hash buckets
    164  1.1    dyoung  *\param m	total # fat pointers to allocate
    165  1.1    dyoung  *
    166  1.1    dyoung  * We allocate 2x as much, as we have two hashes: full and lport only.
    167  1.1    dyoung  */
    168  1.1    dyoung static void
    169  1.6    dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
    170  1.6    dyoung     fatp_t *fat_base, fatp_t **fat_hash)
    171  1.1    dyoung {
    172  1.1    dyoung 	fatp_t	*fp;
    173  1.1    dyoung 
    174  1.1    dyoung 	KASSERT(n <= FATP_MAX / 2);
    175  1.1    dyoung 
    176  1.6    dyoung 	fat->hash = fat_hash;
    177  1.6    dyoung 	fat->base = fat_base;
    178  1.1    dyoung 
    179  1.1    dyoung 	fat->port = &fat->hash[m];
    180  1.1    dyoung 
    181  1.1    dyoung 	fat->mask   = m - 1;	// ASSERT is power of 2 (m)
    182  1.1    dyoung 	fat->lim    = fat->base + 2*n - 1;
    183  1.1    dyoung 	fat->nfree  = 0;
    184  1.1    dyoung 	fat->nalloc = 2*n;
    185  1.1    dyoung 
    186  1.1    dyoung 	/* Initialise the free list.
    187  1.1    dyoung 	 */
    188  1.1    dyoung 	for (fp = fat->lim; fp >= fat->base; --fp) {
    189  1.1    dyoung 		fatp_free(fat, fp);
    190  1.1    dyoung 	}
    191  1.1    dyoung }
    192  1.1    dyoung 
    193  1.1    dyoung /*
    194  1.1    dyoung  * The `xtra' is XORed into the tag stored.
    195  1.1    dyoung  */
    196  1.1    dyoung static uint32_t fatp_xtra[] = {
    197  1.1    dyoung 	0x11111111,0x22222222,0x33333333,0x44444444,
    198  1.1    dyoung 	0x55555555,0x66666666,0x77777777,0x88888888,
    199  1.1    dyoung 	0x12121212,0x21212121,0x34343434,0x43434343,
    200  1.1    dyoung 	0x56565656,0x65656565,0x78787878,0x87878787,
    201  1.1    dyoung 	0x11221122,0x22112211,0x33443344,0x44334433,
    202  1.1    dyoung 	0x55665566,0x66556655,0x77887788,0x88778877,
    203  1.1    dyoung 	0x11112222,0x22221111,0x33334444,0x44443333,
    204  1.1    dyoung 	0x55556666,0x66665555,0x77778888,0x88887777,
    205  1.1    dyoung };
    206  1.1    dyoung 
    207  1.1    dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
    208  1.1    dyoung  *
    209  1.1    dyoung  * The key can be used to obtain the fatp_t, and the slot,
    210  1.1    dyoung  * as it directly encodes them.
    211  1.1    dyoung  */
    212  1.1    dyoung static inline uint32_t
    213  1.1    dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
    214  1.1    dyoung {
    215  1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    216  1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    217  1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    218  1.1    dyoung 
    219  1.1    dyoung 	switch (fatp_ntags()) {
    220  1.1    dyoung 	case 7:
    221  1.1    dyoung 		return (fatp_index(fat, fp) << 3) | slot;
    222  1.1    dyoung 	case 15:
    223  1.1    dyoung 		return (fatp_index(fat, fp) << 4) | slot;
    224  1.1    dyoung 	case 31:
    225  1.1    dyoung 		return (fatp_index(fat, fp) << 5) | slot;
    226  1.1    dyoung 	default:
    227  1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    228  1.1    dyoung 		return ~0;
    229  1.1    dyoung 	}
    230  1.1    dyoung }
    231  1.1    dyoung 
    232  1.1    dyoung static inline uint32_t
    233  1.1    dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
    234  1.1    dyoung {
    235  1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    236  1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    237  1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    238  1.1    dyoung 
    239  1.1    dyoung 	switch (fatp_ntags()) {
    240  1.1    dyoung 	case 7:
    241  1.1    dyoung 		return key & 7;
    242  1.1    dyoung 	case 15:
    243  1.1    dyoung 		return key & 15;
    244  1.1    dyoung 	case 31:
    245  1.1    dyoung 		return key & 31;
    246  1.1    dyoung 	default:
    247  1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    248  1.1    dyoung 		return ~0;
    249  1.1    dyoung 	}
    250  1.1    dyoung }
    251  1.1    dyoung 
    252  1.1    dyoung static inline fatp_t *
    253  1.1    dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
    254  1.1    dyoung {
    255  1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    256  1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    257  1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    258  1.1    dyoung 
    259  1.1    dyoung 	switch (fatp_ntags()) {
    260  1.1    dyoung 	case 7:
    261  1.1    dyoung 		key >>= 3;
    262  1.1    dyoung 		break;
    263  1.1    dyoung 	case 15:
    264  1.1    dyoung 		key >>= 4;
    265  1.1    dyoung 		break;
    266  1.1    dyoung 	case 31:
    267  1.1    dyoung 		key >>= 5;
    268  1.1    dyoung 		break;
    269  1.1    dyoung 	default:
    270  1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    271  1.1    dyoung 		return 0;
    272  1.1    dyoung 	}
    273  1.1    dyoung 
    274  1.1    dyoung 	return key ? fat->base + key - 1 : 0;
    275  1.1    dyoung }
    276  1.1    dyoung 
    277  1.1    dyoung static inline uint32_t
    278  1.1    dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
    279  1.1    dyoung {
    280  1.1    dyoung 	return (idx << ctl->idx_bits) | idx;
    281  1.1    dyoung }
    282  1.1    dyoung 
    283  1.1    dyoung static inline uint32_t
    284  1.1    dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
    285  1.1    dyoung {
    286  1.1    dyoung 	uint32_t	idx	= bits & ctl->idx_mask;
    287  1.1    dyoung 
    288  1.1    dyoung 	if (idx_encode(ctl, idx) == bits)
    289  1.1    dyoung 		return idx;
    290  1.1    dyoung 	else
    291  1.1    dyoung 		return ~0;
    292  1.1    dyoung }
    293  1.1    dyoung 
    294  1.1    dyoung /*!\brief	insert index into fatp hash
    295  1.1    dyoung  *
    296  1.1    dyoung  *\param	idx	-	index of element being placed in hash chain
    297  1.1    dyoung  *\param	tag	-	32-bit tag identifier
    298  1.1    dyoung  *
    299  1.1    dyoung  *\returns
    300  1.1    dyoung  *	value which can be used to locate entry.
    301  1.1    dyoung  *
    302  1.1    dyoung  *\note
    303  1.1    dyoung  *	we rely on the fact that there are unused high bits in the index
    304  1.1    dyoung  *	for verification purposes on lookup.
    305  1.1    dyoung  */
    306  1.1    dyoung 
    307  1.1    dyoung static inline uint32_t
    308  1.1    dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
    309  1.1    dyoung     void *dbg)
    310  1.1    dyoung {
    311  1.1    dyoung 	fatp_t	*fp;
    312  1.1    dyoung 	fatp_t	**hash = (which ? fat->port : fat->hash);
    313  1.1    dyoung 	int	i;
    314  1.1    dyoung 
    315  1.1    dyoung 	fp = hash[tag & fat->mask];
    316  1.1    dyoung 
    317  1.1    dyoung 	while (!fp || fatp_full(fp)) {
    318  1.1    dyoung 		fatp_t	*fq;
    319  1.1    dyoung 
    320  1.1    dyoung 		/* All entries are inuse at the top level.
    321  1.1    dyoung 		 * We allocate a spare, and push the top level
    322  1.1    dyoung 		 * down one.  All entries in the fp we push down
    323  1.1    dyoung 		 * (think of a tape worm here) will be expelled sooner than
    324  1.1    dyoung 		 * any entries added subsequently to this hash bucket.
    325  1.1    dyoung 		 * This is a property of the time waits we are exploiting.
    326  1.1    dyoung 		 */
    327  1.1    dyoung 
    328  1.1    dyoung 		fq = fatp_alloc(fat);
    329  1.1    dyoung 		if (!fq) {
    330  1.1    dyoung 			vtw_age(fat->vtw, 0);
    331  1.1    dyoung 			fp = hash[tag & fat->mask];
    332  1.1    dyoung 			continue;
    333  1.1    dyoung 		}
    334  1.1    dyoung 
    335  1.1    dyoung 		fq->inuse = 0;
    336  1.1    dyoung 		fq->nxt   = fatp_index(fat, fp);
    337  1.1    dyoung 
    338  1.1    dyoung 		hash[tag & fat->mask] = fq;
    339  1.1    dyoung 
    340  1.1    dyoung 		fp = fq;
    341  1.1    dyoung 	}
    342  1.1    dyoung 
    343  1.1    dyoung 	KASSERT(!fatp_full(fp));
    344  1.1    dyoung 
    345  1.1    dyoung 	/* Fill highest index first.  Lookup is lowest first.
    346  1.1    dyoung 	 */
    347  1.1    dyoung 	for (i = fatp_ntags(); --i >= 0; ) {
    348  1.1    dyoung 		if (!((1 << i) & fp->inuse)) {
    349  1.1    dyoung 			break;
    350  1.1    dyoung 		}
    351  1.1    dyoung 	}
    352  1.1    dyoung 
    353  1.1    dyoung 	fp->inuse |= 1 << i;
    354  1.1    dyoung 	fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
    355  1.1    dyoung 
    356  1.1    dyoung 	db_trace(KTR_VTW
    357  1.1    dyoung 		 , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
    358  1.1    dyoung 		    , fp->inuse
    359  1.1    dyoung 		    , i, fp->tag[i]));
    360  1.1    dyoung 
    361  1.1    dyoung 	return fatp_key(fat, fp, i);
    362  1.1    dyoung }
    363  1.1    dyoung 
    364  1.1    dyoung static inline int
    365  1.1    dyoung vtw_alive(const vtw_t *vtw)
    366  1.1    dyoung {
    367  1.1    dyoung 	return vtw->hashed && vtw->expire.tv_sec;
    368  1.1    dyoung }
    369  1.1    dyoung 
    370  1.1    dyoung static inline uint32_t
    371  1.1    dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
    372  1.1    dyoung {
    373  1.1    dyoung 	if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
    374  1.1    dyoung 		return v4 - ctl->base.v4;
    375  1.1    dyoung 
    376  1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    377  1.1    dyoung 
    378  1.1    dyoung 	return ~0;
    379  1.1    dyoung }
    380  1.1    dyoung 
    381  1.1    dyoung static inline uint32_t
    382  1.1    dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
    383  1.1    dyoung {
    384  1.1    dyoung 	if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
    385  1.1    dyoung 		return v6 - ctl->base.v6;
    386  1.1    dyoung 
    387  1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    388  1.1    dyoung 
    389  1.1    dyoung 	return ~0;
    390  1.1    dyoung }
    391  1.1    dyoung 
    392  1.1    dyoung static inline uint32_t
    393  1.1    dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
    394  1.1    dyoung {
    395  1.1    dyoung 	if (ctl->clidx)
    396  1.1    dyoung 		ctl = ctl->ctl;
    397  1.1    dyoung 
    398  1.1    dyoung 	if (ctl->is_v4)
    399  1.1    dyoung 		return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
    400  1.1    dyoung 
    401  1.1    dyoung 	if (ctl->is_v6)
    402  1.1    dyoung 		return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
    403  1.1    dyoung 
    404  1.1    dyoung 	KASSERT(0 && "neither 4 nor 6.  most curious.");
    405  1.1    dyoung 
    406  1.1    dyoung 	return ~0;
    407  1.1    dyoung }
    408  1.1    dyoung 
    409  1.1    dyoung static inline vtw_t *
    410  1.1    dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
    411  1.1    dyoung {
    412  1.1    dyoung 	if (ctl->clidx)
    413  1.1    dyoung 		ctl = ctl->ctl;
    414  1.1    dyoung 
    415  1.1    dyoung 	/* See if the index looks like it might be an index.
    416  1.1    dyoung 	 * Bits on outside of the valid index bits is a give away.
    417  1.1    dyoung 	 */
    418  1.1    dyoung 	idx = idx_decode(ctl, idx);
    419  1.1    dyoung 
    420  1.1    dyoung 	if (idx == ~0) {
    421  1.1    dyoung 		return 0;
    422  1.1    dyoung 	} else if (ctl->is_v4) {
    423  1.1    dyoung 		vtw_v4_t	*vtw = ctl->base.v4 + idx;
    424  1.1    dyoung 
    425  1.1    dyoung 		return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
    426  1.1    dyoung 			? &vtw->common : 0;
    427  1.1    dyoung 	} else if (ctl->is_v6) {
    428  1.1    dyoung 		vtw_v6_t	*vtw = ctl->base.v6 + idx;
    429  1.1    dyoung 
    430  1.1    dyoung 		return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
    431  1.1    dyoung 			? &vtw->common : 0;
    432  1.1    dyoung 	} else {
    433  1.1    dyoung 		KASSERT(0 && "badness");
    434  1.1    dyoung 		return 0;
    435  1.1    dyoung 	}
    436  1.1    dyoung }
    437  1.1    dyoung 
    438  1.1    dyoung /*!\brief return the next vtw after this one.
    439  1.1    dyoung  *
    440  1.1    dyoung  * Due to the differing sizes of the entries in differing
    441  1.1    dyoung  * arenas, we have to ensure we ++ the correct pointer type.
    442  1.1    dyoung  *
    443  1.1    dyoung  * Also handles wrap.
    444  1.1    dyoung  */
    445  1.1    dyoung static inline vtw_t *
    446  1.1    dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
    447  1.1    dyoung {
    448  1.1    dyoung 	if (ctl->is_v4) {
    449  1.1    dyoung 		vtw_v4_t	*v4 = (void*)vtw;
    450  1.1    dyoung 
    451  1.1    dyoung 		vtw = &(++v4)->common;
    452  1.1    dyoung 	} else {
    453  1.1    dyoung 		vtw_v6_t	*v6 = (void*)vtw;
    454  1.1    dyoung 
    455  1.1    dyoung 		vtw = &(++v6)->common;
    456  1.1    dyoung 	}
    457  1.1    dyoung 
    458  1.1    dyoung 	if (vtw > ctl->lim.v)
    459  1.1    dyoung 		vtw = ctl->base.v;
    460  1.1    dyoung 
    461  1.1    dyoung 	return vtw;
    462  1.1    dyoung }
    463  1.1    dyoung 
    464  1.1    dyoung /*!\brief	remove entry from FATP hash chains
    465  1.1    dyoung  */
    466  1.1    dyoung static inline void
    467  1.1    dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
    468  1.1    dyoung {
    469  1.1    dyoung 	fatp_ctl_t	*fat	= ctl->fat;
    470  1.1    dyoung 	fatp_t		*fp;
    471  1.1    dyoung 	uint32_t	key = vtw->key;
    472  1.1    dyoung 	uint32_t	tag, slot, idx;
    473  1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    474  1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
    475  1.1    dyoung 
    476  1.1    dyoung 	if (!vtw->hashed) {
    477  1.1    dyoung 		KASSERT(0 && "unhashed");
    478  1.1    dyoung 		return;
    479  1.1    dyoung 	}
    480  1.1    dyoung 
    481  1.1    dyoung 	if (fat->vtw->is_v4) {
    482  1.1    dyoung 		tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
    483  1.1    dyoung 	} else if (fat->vtw->is_v6) {
    484  1.1    dyoung 		tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
    485  1.1    dyoung 	} else {
    486  1.1    dyoung 		tag = 0;
    487  1.1    dyoung 		KASSERT(0 && "not reached");
    488  1.1    dyoung 	}
    489  1.1    dyoung 
    490  1.1    dyoung 	/* Remove from fat->hash[]
    491  1.1    dyoung 	 */
    492  1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    493  1.1    dyoung 	fp   = fatp_from_key(fat, key);
    494  1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    495  1.1    dyoung 
    496  1.1    dyoung 	db_trace(KTR_VTW
    497  1.1    dyoung 		 , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
    498  1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    499  1.1    dyoung 
    500  1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    501  1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    502  1.1    dyoung 				  ^ fatp_xtra[slot]));
    503  1.1    dyoung 
    504  1.1    dyoung 	if ((fp->inuse & (1 << slot))
    505  1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    506  1.1    dyoung 				 ^ fatp_xtra[slot])) {
    507  1.1    dyoung 		fp->inuse ^= 1 << slot;
    508  1.1    dyoung 		fp->tag[slot] = 0;
    509  1.1    dyoung 
    510  1.1    dyoung 		/* When we delete entries, we do not compact.  This is
    511  1.1    dyoung 		 * due to temporality.  We add entries, and they
    512  1.1    dyoung 		 * (eventually) expire. Older entries will be further
    513  1.1    dyoung 		 * down the chain.
    514  1.1    dyoung 		 */
    515  1.1    dyoung 		if (!fp->inuse) {
    516  1.1    dyoung 			uint32_t hi = tag & fat->mask;
    517  1.1    dyoung 			fatp_t	*fq = 0;
    518  1.1    dyoung 			fatp_t	*fr = fat->hash[hi];
    519  1.1    dyoung 
    520  1.1    dyoung 			while (fr && fr != fp) {
    521  1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    522  1.1    dyoung 			}
    523  1.1    dyoung 
    524  1.1    dyoung 			if (fr == fp) {
    525  1.1    dyoung 				if (fq) {
    526  1.1    dyoung 					fq->nxt = fp->nxt;
    527  1.1    dyoung 					fp->nxt = 0;
    528  1.1    dyoung 					fatp_free(fat, fp);
    529  1.1    dyoung 				} else {
    530  1.1    dyoung 					KASSERT(fat->hash[hi] == fp);
    531  1.1    dyoung 
    532  1.1    dyoung 					if (fp->nxt) {
    533  1.1    dyoung 						fat->hash[hi]
    534  1.1    dyoung 							= fatp_next(fat, fp);
    535  1.1    dyoung 						fp->nxt = 0;
    536  1.1    dyoung 						fatp_free(fat, fp);
    537  1.1    dyoung 					} else {
    538  1.1    dyoung 						/* retain for next use.
    539  1.1    dyoung 						 */
    540  1.1    dyoung 						;
    541  1.1    dyoung 					}
    542  1.1    dyoung 				}
    543  1.1    dyoung 			} else {
    544  1.1    dyoung 				fr = fat->hash[hi];
    545  1.1    dyoung 
    546  1.1    dyoung 				do {
    547  1.1    dyoung 					db_trace(KTR_VTW
    548  1.1    dyoung 						 , (fr
    549  1.1    dyoung 						    , "fat:*del inuse %5.5x"
    550  1.1    dyoung 						    " nxt %x"
    551  1.1    dyoung 						    , fr->inuse, fr->nxt));
    552  1.1    dyoung 
    553  1.1    dyoung 					fr = fatp_next(fat, fq = fr);
    554  1.1    dyoung 				} while (fr && fr != fp);
    555  1.1    dyoung 
    556  1.1    dyoung 				KASSERT(0 && "oops");
    557  1.1    dyoung 			}
    558  1.1    dyoung 		}
    559  1.1    dyoung 		vtw->key ^= ~0;
    560  1.1    dyoung 	}
    561  1.1    dyoung 
    562  1.1    dyoung 	if (fat->vtw->is_v4) {
    563  1.1    dyoung 		tag = v4_port_tag(v4->lport);
    564  1.1    dyoung 	} else if (fat->vtw->is_v6) {
    565  1.1    dyoung 		tag = v6_port_tag(v6->lport);
    566  1.1    dyoung 	}
    567  1.1    dyoung 
    568  1.1    dyoung 	/* Remove from fat->port[]
    569  1.1    dyoung 	 */
    570  1.1    dyoung 	key  = vtw->port_key;
    571  1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    572  1.1    dyoung 	fp   = fatp_from_key(fat, key);
    573  1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    574  1.1    dyoung 
    575  1.1    dyoung 	db_trace(KTR_VTW
    576  1.1    dyoung 		 , (fp, "fatport: del inuse %5.5x"
    577  1.1    dyoung 		    " slot %x idx %x key %x tag %x"
    578  1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    579  1.1    dyoung 
    580  1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    581  1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    582  1.1    dyoung 				  ^ fatp_xtra[slot]));
    583  1.1    dyoung 
    584  1.1    dyoung 	if ((fp->inuse & (1 << slot))
    585  1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    586  1.1    dyoung 				 ^ fatp_xtra[slot])) {
    587  1.1    dyoung 		fp->inuse ^= 1 << slot;
    588  1.1    dyoung 		fp->tag[slot] = 0;
    589  1.1    dyoung 
    590  1.1    dyoung 		if (!fp->inuse) {
    591  1.1    dyoung 			uint32_t hi = tag & fat->mask;
    592  1.1    dyoung 			fatp_t	*fq = 0;
    593  1.1    dyoung 			fatp_t	*fr = fat->port[hi];
    594  1.1    dyoung 
    595  1.1    dyoung 			while (fr && fr != fp) {
    596  1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    597  1.1    dyoung 			}
    598  1.1    dyoung 
    599  1.1    dyoung 			if (fr == fp) {
    600  1.1    dyoung 				if (fq) {
    601  1.1    dyoung 					fq->nxt = fp->nxt;
    602  1.1    dyoung 					fp->nxt = 0;
    603  1.1    dyoung 					fatp_free(fat, fp);
    604  1.1    dyoung 				} else {
    605  1.1    dyoung 					KASSERT(fat->port[hi] == fp);
    606  1.1    dyoung 
    607  1.1    dyoung 					if (fp->nxt) {
    608  1.1    dyoung 						fat->port[hi]
    609  1.1    dyoung 							= fatp_next(fat, fp);
    610  1.1    dyoung 						fp->nxt = 0;
    611  1.1    dyoung 						fatp_free(fat, fp);
    612  1.1    dyoung 					} else {
    613  1.1    dyoung 						/* retain for next use.
    614  1.1    dyoung 						 */
    615  1.1    dyoung 						;
    616  1.1    dyoung 					}
    617  1.1    dyoung 				}
    618  1.1    dyoung 			}
    619  1.1    dyoung 		}
    620  1.1    dyoung 		vtw->port_key ^= ~0;
    621  1.1    dyoung 	}
    622  1.1    dyoung 
    623  1.1    dyoung 	vtw->hashed = 0;
    624  1.1    dyoung }
    625  1.1    dyoung 
    626  1.1    dyoung /*!\brief	remove entry from hash, possibly free.
    627  1.1    dyoung  */
    628  1.1    dyoung void
    629  1.1    dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
    630  1.1    dyoung {
    631  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    632  1.1    dyoung 
    633  1.1    dyoung 	if (vtw->hashed) {
    634  1.1    dyoung 		++vtw_stats.del;
    635  1.1    dyoung 		vtw_unhash(ctl, vtw);
    636  1.1    dyoung 	}
    637  1.1    dyoung 
    638  1.1    dyoung 	/* We only delete the oldest entry.
    639  1.1    dyoung 	 */
    640  1.1    dyoung 	if (vtw != ctl->oldest.v)
    641  1.1    dyoung 		return;
    642  1.1    dyoung 
    643  1.1    dyoung 	--ctl->nalloc;
    644  1.1    dyoung 	++ctl->nfree;
    645  1.1    dyoung 
    646  1.1    dyoung 	vtw->expire.tv_sec  = 0;
    647  1.1    dyoung 	vtw->expire.tv_usec = ~0;
    648  1.1    dyoung 
    649  1.1    dyoung 	if (!ctl->nalloc)
    650  1.1    dyoung 		ctl->oldest.v = 0;
    651  1.1    dyoung 
    652  1.1    dyoung 	ctl->oldest.v = vtw_next(ctl, vtw);
    653  1.1    dyoung }
    654  1.1    dyoung 
    655  1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    656  1.1    dyoung  */
    657  1.1    dyoung static void
    658  1.1    dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
    659  1.1    dyoung {
    660  1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    661  1.1    dyoung 	uint32_t	tag;
    662  1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    663  1.1    dyoung 
    664  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    665  1.1    dyoung 	KASSERT(!vtw->hashed);
    666  1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    667  1.1    dyoung 
    668  1.1    dyoung 	++vtw_stats.ins;
    669  1.1    dyoung 
    670  1.1    dyoung 	tag = v4_tag(v4->faddr, v4->fport,
    671  1.1    dyoung 		     v4->laddr, v4->lport);
    672  1.1    dyoung 
    673  1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    674  1.1    dyoung 
    675  1.1    dyoung 	db_trace(KTR_VTW, (ctl
    676  1.1    dyoung 			   , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
    677  1.1    dyoung 			   " tag %8.8x key %8.8x"
    678  1.1    dyoung 			   , v4->faddr, v4->fport
    679  1.1    dyoung 			   , v4->laddr, v4->lport
    680  1.1    dyoung 			   , tag
    681  1.1    dyoung 			   , vtw->key));
    682  1.1    dyoung 
    683  1.1    dyoung 	tag = v4_port_tag(v4->lport);
    684  1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    685  1.1    dyoung 
    686  1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    687  1.1    dyoung 			   , v4->lport, v4->lport
    688  1.1    dyoung 			   , tag
    689  1.1    dyoung 			   , vtw->key));
    690  1.1    dyoung 
    691  1.1    dyoung 	vtw->hashed = 1;
    692  1.1    dyoung }
    693  1.1    dyoung 
    694  1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    695  1.1    dyoung  */
    696  1.1    dyoung static void
    697  1.1    dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
    698  1.1    dyoung {
    699  1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    700  1.1    dyoung 	uint32_t	tag;
    701  1.1    dyoung 	vtw_v6_t	*v6	= (void*)vtw;
    702  1.1    dyoung 
    703  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    704  1.1    dyoung 	KASSERT(!vtw->hashed);
    705  1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    706  1.1    dyoung 
    707  1.1    dyoung 	++vtw_stats.ins;
    708  1.1    dyoung 
    709  1.1    dyoung 	tag = v6_tag(&v6->faddr, v6->fport,
    710  1.1    dyoung 		     &v6->laddr, v6->lport);
    711  1.1    dyoung 
    712  1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    713  1.1    dyoung 
    714  1.1    dyoung 	tag = v6_port_tag(v6->lport);
    715  1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    716  1.1    dyoung 
    717  1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    718  1.1    dyoung 			   , v6->lport, v6->lport
    719  1.1    dyoung 			   , tag
    720  1.1    dyoung 			   , vtw->key));
    721  1.1    dyoung 
    722  1.1    dyoung 	vtw->hashed = 1;
    723  1.1    dyoung }
    724  1.1    dyoung 
    725  1.1    dyoung static vtw_t *
    726  1.1    dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
    727  1.1    dyoung 				 , uint32_t laddr, uint16_t lport
    728  1.1    dyoung 				 , int which)
    729  1.1    dyoung {
    730  1.1    dyoung 	vtw_v4_t	*v4;
    731  1.1    dyoung 	vtw_t		*vtw;
    732  1.1    dyoung 	uint32_t	tag;
    733  1.1    dyoung 	fatp_t		*fp;
    734  1.1    dyoung 	int		i;
    735  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    736  1.1    dyoung 
    737  1.1    dyoung 	if (!ctl || !ctl->fat)
    738  1.1    dyoung 		return 0;
    739  1.1    dyoung 
    740  1.1    dyoung 	++vtw_stats.look[which];
    741  1.1    dyoung 
    742  1.1    dyoung 	if (which) {
    743  1.1    dyoung 		tag = v4_port_tag(lport);
    744  1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    745  1.1    dyoung 	} else {
    746  1.1    dyoung 		tag = v4_tag(faddr, fport, laddr, lport);
    747  1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    748  1.1    dyoung 	}
    749  1.1    dyoung 
    750  1.1    dyoung 	while (fp && fp->inuse) {
    751  1.1    dyoung 		uint32_t	inuse = fp->inuse;
    752  1.1    dyoung 
    753  1.1    dyoung 		++fatps;
    754  1.1    dyoung 
    755  1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    756  1.1    dyoung 			uint32_t	idx;
    757  1.1    dyoung 
    758  1.1    dyoung 			if (!(inuse & (1 << i)))
    759  1.1    dyoung 				continue;
    760  1.1    dyoung 
    761  1.1    dyoung 			inuse ^= 1 << i;
    762  1.1    dyoung 
    763  1.1    dyoung 			++probes;
    764  1.1    dyoung 			++vtw_stats.probe[which];
    765  1.1    dyoung 
    766  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    767  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    768  1.1    dyoung 
    769  1.1    dyoung 			if (!vtw) {
    770  1.1    dyoung 				/* Hopefully fast path.
    771  1.1    dyoung 				 */
    772  1.1    dyoung 				db_trace(KTR_VTW
    773  1.1    dyoung 					 , (fp, "vtw: fast %A:%P %A:%P"
    774  1.1    dyoung 					    " idx %x tag %x"
    775  1.1    dyoung 					    , faddr, fport
    776  1.1    dyoung 					    , laddr, lport
    777  1.1    dyoung 					    , idx, tag));
    778  1.1    dyoung 				continue;
    779  1.1    dyoung 			}
    780  1.1    dyoung 
    781  1.1    dyoung 			v4 = (void*)vtw;
    782  1.1    dyoung 
    783  1.1    dyoung 			/* The de-referencing of vtw is what we want to avoid.
    784  1.1    dyoung 			 * Losing.
    785  1.1    dyoung 			 */
    786  1.1    dyoung 			if (vtw_alive(vtw)
    787  1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    788  1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    789  1.1    dyoung 			    && (which
    790  1.1    dyoung 				|| (v4->faddr == faddr && v4->laddr == laddr
    791  1.1    dyoung 				    && v4->fport == fport))
    792  1.1    dyoung 			    && v4->lport == lport) {
    793  1.1    dyoung 				++vtw_stats.hit[which];
    794  1.1    dyoung 
    795  1.1    dyoung 				db_trace(KTR_VTW
    796  1.1    dyoung 					 , (fp, "vtw: hit %8.8x:%4.4x"
    797  1.1    dyoung 					    " %8.8x:%4.4x idx %x key %x"
    798  1.1    dyoung 					    , faddr, fport
    799  1.1    dyoung 					    , laddr, lport
    800  1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
    801  1.1    dyoung 
    802  1.1    dyoung 				KASSERT(vtw->hashed);
    803  1.1    dyoung 
    804  1.1    dyoung 				goto out;
    805  1.1    dyoung 			}
    806  1.1    dyoung 			++vtw_stats.losing[which];
    807  1.1    dyoung 			++losings;
    808  1.1    dyoung 
    809  1.1    dyoung 			if (vtw_alive(vtw)) {
    810  1.1    dyoung 				db_trace(KTR_VTW
    811  1.1    dyoung 					 , (fp, "vtw:!mis %8.8x:%4.4x"
    812  1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    813  1.1    dyoung 					    , faddr, fport
    814  1.1    dyoung 					    , laddr, lport
    815  1.1    dyoung 					    , fatp_key(ctl->fat, fp, i)
    816  1.1    dyoung 					    , v4_tag(faddr, fport
    817  1.1    dyoung 						     , laddr, lport)));
    818  1.1    dyoung 				db_trace(KTR_VTW
    819  1.1    dyoung 					 , (vtw, "vtw:!mis %8.8x:%4.4x"
    820  1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    821  1.1    dyoung 					    , v4->faddr, v4->fport
    822  1.1    dyoung 					    , v4->laddr, v4->lport
    823  1.1    dyoung 					    , vtw->key
    824  1.1    dyoung 					    , v4_tag(v4->faddr, v4->fport
    825  1.1    dyoung 						     , v4->laddr, v4->lport)));
    826  1.1    dyoung 
    827  1.1    dyoung 				if (vtw->key == fatp_key(ctl->fat, fp, i)) {
    828  1.1    dyoung 					db_trace(KTR_VTW
    829  1.1    dyoung 						 , (vtw, "vtw:!mis %8.8x:%4.4x"
    830  1.1    dyoung 						    " %8.8x:%4.4x key %x"
    831  1.1    dyoung 						    " which %x"
    832  1.1    dyoung 						    , v4->faddr, v4->fport
    833  1.1    dyoung 						    , v4->laddr, v4->lport
    834  1.1    dyoung 						    , vtw->key
    835  1.1    dyoung 						    , which));
    836  1.1    dyoung 
    837  1.1    dyoung 				} else {
    838  1.1    dyoung 					db_trace(KTR_VTW
    839  1.1    dyoung 						 , (vtw
    840  1.1    dyoung 						    , "vtw:!mis"
    841  1.1    dyoung 						    " key %8.8x != %8.8x"
    842  1.1    dyoung 						    " idx %x i %x which %x"
    843  1.1    dyoung 						    , vtw->key
    844  1.1    dyoung 						    , fatp_key(ctl->fat, fp, i)
    845  1.1    dyoung 						    , idx_decode(ctl, idx)
    846  1.1    dyoung 						    , i
    847  1.1    dyoung 						    , which));
    848  1.1    dyoung 				}
    849  1.1    dyoung 			} else {
    850  1.1    dyoung 				db_trace(KTR_VTW
    851  1.1    dyoung 					 , (fp
    852  1.1    dyoung 					    , "vtw:!mis free entry"
    853  1.1    dyoung 					    " idx %x vtw %p which %x"
    854  1.1    dyoung 					    , idx_decode(ctl, idx)
    855  1.1    dyoung 					    , vtw, which));
    856  1.1    dyoung 			}
    857  1.1    dyoung 		}
    858  1.1    dyoung 
    859  1.1    dyoung 		if (fp->nxt) {
    860  1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
    861  1.1    dyoung 		} else {
    862  1.1    dyoung 			break;
    863  1.1    dyoung 		}
    864  1.1    dyoung 	}
    865  1.1    dyoung 	++vtw_stats.miss[which];
    866  1.1    dyoung 	vtw = 0;
    867  1.1    dyoung out:
    868  1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
    869  1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
    870  1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
    871  1.1    dyoung 		vtw_stats.max_probe[which] = probes;
    872  1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
    873  1.1    dyoung 		vtw_stats.max_loss[which] = losings;
    874  1.1    dyoung 
    875  1.1    dyoung 	return vtw;
    876  1.1    dyoung }
    877  1.1    dyoung 
    878  1.1    dyoung static vtw_t *
    879  1.1    dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
    880  1.1    dyoung 				 , const struct in6_addr *laddr, uint16_t lport
    881  1.1    dyoung 				 , int which)
    882  1.1    dyoung {
    883  1.1    dyoung 	vtw_v6_t	*v6;
    884  1.1    dyoung 	vtw_t		*vtw;
    885  1.1    dyoung 	uint32_t	tag;
    886  1.1    dyoung 	fatp_t		*fp;
    887  1.1    dyoung 	int		i;
    888  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    889  1.1    dyoung 
    890  1.1    dyoung 	++vtw_stats.look[which];
    891  1.1    dyoung 
    892  1.1    dyoung 	if (!ctl || !ctl->fat)
    893  1.1    dyoung 		return 0;
    894  1.1    dyoung 
    895  1.1    dyoung 	if (which) {
    896  1.1    dyoung 		tag = v6_port_tag(lport);
    897  1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    898  1.1    dyoung 	} else {
    899  1.1    dyoung 		tag = v6_tag(faddr, fport, laddr, lport);
    900  1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    901  1.1    dyoung 	}
    902  1.1    dyoung 
    903  1.1    dyoung 	while (fp && fp->inuse) {
    904  1.1    dyoung 		uint32_t	inuse = fp->inuse;
    905  1.1    dyoung 
    906  1.1    dyoung 		++fatps;
    907  1.1    dyoung 
    908  1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    909  1.1    dyoung 			uint32_t	idx;
    910  1.1    dyoung 
    911  1.1    dyoung 			if (!(inuse & (1 << i)))
    912  1.1    dyoung 				continue;
    913  1.1    dyoung 
    914  1.1    dyoung 			inuse ^= 1 << i;
    915  1.1    dyoung 
    916  1.1    dyoung 			++probes;
    917  1.1    dyoung 			++vtw_stats.probe[which];
    918  1.1    dyoung 
    919  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    920  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    921  1.1    dyoung 
    922  1.1    dyoung 			db_trace(KTR_VTW
    923  1.1    dyoung 				 , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
    924  1.1    dyoung 				    , i
    925  1.1    dyoung 				    , db_store(faddr, sizeof (*faddr)), fport
    926  1.1    dyoung 				    , db_store(laddr, sizeof (*laddr)), lport
    927  1.1    dyoung 				    , idx_decode(ctl, idx)));
    928  1.1    dyoung 
    929  1.1    dyoung 			if (!vtw) {
    930  1.1    dyoung 				/* Hopefully fast path.
    931  1.1    dyoung 				 */
    932  1.1    dyoung 				continue;
    933  1.1    dyoung 			}
    934  1.1    dyoung 
    935  1.1    dyoung 			v6 = (void*)vtw;
    936  1.1    dyoung 
    937  1.1    dyoung 			if (vtw_alive(vtw)
    938  1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    939  1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    940  1.1    dyoung 			    && v6->lport == lport
    941  1.1    dyoung 			    && (which
    942  1.1    dyoung 				|| (v6->fport == fport
    943  1.1    dyoung 				    && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
    944  1.1    dyoung 				    && !bcmp(&v6->laddr, laddr
    945  1.1    dyoung 					     , sizeof (*laddr))))) {
    946  1.1    dyoung 				++vtw_stats.hit[which];
    947  1.1    dyoung 
    948  1.1    dyoung 				KASSERT(vtw->hashed);
    949  1.1    dyoung 				goto out;
    950  1.1    dyoung 			} else {
    951  1.1    dyoung 				++vtw_stats.losing[which];
    952  1.1    dyoung 				++losings;
    953  1.1    dyoung 			}
    954  1.1    dyoung 		}
    955  1.1    dyoung 
    956  1.1    dyoung 		if (fp->nxt) {
    957  1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
    958  1.1    dyoung 		} else {
    959  1.1    dyoung 			break;
    960  1.1    dyoung 		}
    961  1.1    dyoung 	}
    962  1.1    dyoung 	++vtw_stats.miss[which];
    963  1.1    dyoung 	vtw = 0;
    964  1.1    dyoung out:
    965  1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
    966  1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
    967  1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
    968  1.1    dyoung 		vtw_stats.max_probe[which] = probes;
    969  1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
    970  1.1    dyoung 		vtw_stats.max_loss[which] = losings;
    971  1.1    dyoung 
    972  1.1    dyoung 	return vtw;
    973  1.1    dyoung }
    974  1.1    dyoung 
    975  1.1    dyoung /*!\brief port iterator
    976  1.1    dyoung  */
    977  1.1    dyoung static vtw_t *
    978  1.1    dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
    979  1.1    dyoung {
    980  1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
    981  1.1    dyoung 	vtw_v4_t	*v4;
    982  1.1    dyoung 	vtw_t		*vtw;
    983  1.1    dyoung 	uint32_t	tag;
    984  1.1    dyoung 	uint16_t	lport = it->port;
    985  1.1    dyoung 	fatp_t		*fp;
    986  1.1    dyoung 	int		i;
    987  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    988  1.1    dyoung 
    989  1.1    dyoung 	tag = v4_port_tag(lport);
    990  1.1    dyoung 	if (!it->fp) {
    991  1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
    992  1.1    dyoung 		it->slot_idx = 0;
    993  1.1    dyoung 	}
    994  1.1    dyoung 	fp  = it->fp;
    995  1.1    dyoung 
    996  1.1    dyoung 	while (fp) {
    997  1.1    dyoung 		uint32_t	inuse = fp->inuse;
    998  1.1    dyoung 
    999  1.1    dyoung 		++fatps;
   1000  1.1    dyoung 
   1001  1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1002  1.1    dyoung 			uint32_t	idx;
   1003  1.1    dyoung 
   1004  1.1    dyoung 			if (!(inuse & (1 << i)))
   1005  1.1    dyoung 				continue;
   1006  1.1    dyoung 
   1007  1.1    dyoung 			inuse &= ~0 << i;
   1008  1.1    dyoung 
   1009  1.1    dyoung 			if (i < it->slot_idx)
   1010  1.1    dyoung 				continue;
   1011  1.1    dyoung 
   1012  1.1    dyoung 			++vtw_stats.probe[1];
   1013  1.1    dyoung 			++probes;
   1014  1.1    dyoung 
   1015  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1016  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1017  1.1    dyoung 
   1018  1.1    dyoung 			if (!vtw) {
   1019  1.1    dyoung 				/* Hopefully fast path.
   1020  1.1    dyoung 				 */
   1021  1.1    dyoung 				continue;
   1022  1.1    dyoung 			}
   1023  1.1    dyoung 
   1024  1.1    dyoung 			v4 = (void*)vtw;
   1025  1.1    dyoung 
   1026  1.1    dyoung 			if (vtw_alive(vtw)
   1027  1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1028  1.1    dyoung 			    && v4->lport == lport) {
   1029  1.1    dyoung 				++vtw_stats.hit[1];
   1030  1.1    dyoung 
   1031  1.1    dyoung 				it->slot_idx = i + 1;
   1032  1.1    dyoung 
   1033  1.1    dyoung 				goto out;
   1034  1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1035  1.1    dyoung 				++vtw_stats.losing[1];
   1036  1.1    dyoung 				++losings;
   1037  1.1    dyoung 
   1038  1.1    dyoung 				db_trace(KTR_VTW
   1039  1.1    dyoung 					 , (vtw, "vtw:!mis"
   1040  1.1    dyoung 					    " port %8.8x:%4.4x %8.8x:%4.4x"
   1041  1.1    dyoung 					    " key %x port %x"
   1042  1.1    dyoung 					    , v4->faddr, v4->fport
   1043  1.1    dyoung 					    , v4->laddr, v4->lport
   1044  1.1    dyoung 					    , vtw->key
   1045  1.1    dyoung 					    , lport));
   1046  1.1    dyoung 			} else {
   1047  1.1    dyoung 				/* Really losing here.  We are coming
   1048  1.1    dyoung 				 * up with references to free entries.
   1049  1.1    dyoung 				 * Might find it better to use
   1050  1.1    dyoung 				 * traditional, or need another
   1051  1.1    dyoung 				 * add-hockery.  The other add-hockery
   1052  1.1    dyoung 				 * would be to pul more into into the
   1053  1.1    dyoung 				 * cache line to reject the false
   1054  1.1    dyoung 				 * hits.
   1055  1.1    dyoung 				 */
   1056  1.1    dyoung 				++vtw_stats.losing[1];
   1057  1.1    dyoung 				++losings;
   1058  1.1    dyoung 				db_trace(KTR_VTW
   1059  1.1    dyoung 					 , (fp, "vtw:!mis port %x"
   1060  1.1    dyoung 					    " - free entry idx %x vtw %p"
   1061  1.1    dyoung 					    , lport
   1062  1.1    dyoung 					    , idx_decode(ctl, idx)
   1063  1.1    dyoung 					    , vtw));
   1064  1.1    dyoung 			}
   1065  1.1    dyoung 		}
   1066  1.1    dyoung 
   1067  1.1    dyoung 		if (fp->nxt) {
   1068  1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1069  1.1    dyoung 			it->slot_idx = 0;
   1070  1.1    dyoung 		} else {
   1071  1.1    dyoung 			it->fp = 0;
   1072  1.1    dyoung 			break;
   1073  1.1    dyoung 		}
   1074  1.1    dyoung 	}
   1075  1.1    dyoung 	++vtw_stats.miss[1];
   1076  1.1    dyoung 
   1077  1.1    dyoung 	vtw = 0;
   1078  1.1    dyoung out:
   1079  1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1080  1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1081  1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1082  1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1083  1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1084  1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1085  1.1    dyoung 
   1086  1.1    dyoung 	return vtw;
   1087  1.1    dyoung }
   1088  1.1    dyoung 
   1089  1.1    dyoung /*!\brief port iterator
   1090  1.1    dyoung  */
   1091  1.1    dyoung static vtw_t *
   1092  1.1    dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
   1093  1.1    dyoung {
   1094  1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
   1095  1.1    dyoung 	vtw_v6_t	*v6;
   1096  1.1    dyoung 	vtw_t		*vtw;
   1097  1.1    dyoung 	uint32_t	tag;
   1098  1.1    dyoung 	uint16_t	lport = it->port;
   1099  1.1    dyoung 	fatp_t		*fp;
   1100  1.1    dyoung 	int		i;
   1101  1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1102  1.1    dyoung 
   1103  1.1    dyoung 	tag = v6_port_tag(lport);
   1104  1.1    dyoung 	if (!it->fp) {
   1105  1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1106  1.1    dyoung 		it->slot_idx = 0;
   1107  1.1    dyoung 	}
   1108  1.1    dyoung 	fp  = it->fp;
   1109  1.1    dyoung 
   1110  1.1    dyoung 	while (fp) {
   1111  1.1    dyoung 		uint32_t	inuse = fp->inuse;
   1112  1.1    dyoung 
   1113  1.1    dyoung 		++fatps;
   1114  1.1    dyoung 
   1115  1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1116  1.1    dyoung 			uint32_t	idx;
   1117  1.1    dyoung 
   1118  1.1    dyoung 			if (!(inuse & (1 << i)))
   1119  1.1    dyoung 				continue;
   1120  1.1    dyoung 
   1121  1.1    dyoung 			inuse &= ~0 << i;
   1122  1.1    dyoung 
   1123  1.1    dyoung 			if (i < it->slot_idx)
   1124  1.1    dyoung 				continue;
   1125  1.1    dyoung 
   1126  1.1    dyoung 			++vtw_stats.probe[1];
   1127  1.1    dyoung 			++probes;
   1128  1.1    dyoung 
   1129  1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1130  1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1131  1.1    dyoung 
   1132  1.1    dyoung 			if (!vtw) {
   1133  1.1    dyoung 				/* Hopefully fast path.
   1134  1.1    dyoung 				 */
   1135  1.1    dyoung 				continue;
   1136  1.1    dyoung 			}
   1137  1.1    dyoung 
   1138  1.1    dyoung 			v6 = (void*)vtw;
   1139  1.1    dyoung 
   1140  1.1    dyoung 			db_trace(KTR_VTW
   1141  1.1    dyoung 				 , (vtw, "vtw: i %x idx %x fp->tag %x"
   1142  1.1    dyoung 				    " tag %x xtra %x"
   1143  1.1    dyoung 				    , i, idx_decode(ctl, idx)
   1144  1.1    dyoung 				    , fp->tag[i], tag, fatp_xtra[i]));
   1145  1.1    dyoung 
   1146  1.1    dyoung 			if (vtw_alive(vtw)
   1147  1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1148  1.1    dyoung 			    && v6->lport == lport) {
   1149  1.1    dyoung 				++vtw_stats.hit[1];
   1150  1.1    dyoung 
   1151  1.1    dyoung 				db_trace(KTR_VTW
   1152  1.1    dyoung 					 , (fp, "vtw: nxt port %P - %4.4x"
   1153  1.1    dyoung 					    " idx %x key %x"
   1154  1.1    dyoung 					    , lport, lport
   1155  1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
   1156  1.1    dyoung 
   1157  1.1    dyoung 				it->slot_idx = i + 1;
   1158  1.1    dyoung 				goto out;
   1159  1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1160  1.1    dyoung 				++vtw_stats.losing[1];
   1161  1.1    dyoung 
   1162  1.1    dyoung 				db_trace(KTR_VTW
   1163  1.1    dyoung 					 , (vtw, "vtw:!mis port %6A:%4.4x"
   1164  1.1    dyoung 					    " %6A:%4.4x key %x port %x"
   1165  1.1    dyoung 					    , db_store(&v6->faddr
   1166  1.1    dyoung 						       , sizeof (v6->faddr))
   1167  1.1    dyoung 					    , v6->fport
   1168  1.1    dyoung 					    , db_store(&v6->laddr
   1169  1.1    dyoung 						       , sizeof (v6->faddr))
   1170  1.1    dyoung 					    , v6->lport
   1171  1.1    dyoung 					    , vtw->key
   1172  1.1    dyoung 					    , lport));
   1173  1.1    dyoung 			} else {
   1174  1.1    dyoung 				/* Really losing here.  We are coming
   1175  1.1    dyoung 				 * up with references to free entries.
   1176  1.1    dyoung 				 * Might find it better to use
   1177  1.1    dyoung 				 * traditional, or need another
   1178  1.1    dyoung 				 * add-hockery.  The other add-hockery
   1179  1.1    dyoung 				 * would be to pul more into into the
   1180  1.1    dyoung 				 * cache line to reject the false
   1181  1.1    dyoung 				 * hits.
   1182  1.1    dyoung 				 */
   1183  1.1    dyoung 				++vtw_stats.losing[1];
   1184  1.1    dyoung 				++losings;
   1185  1.1    dyoung 
   1186  1.1    dyoung 				db_trace(KTR_VTW
   1187  1.1    dyoung 					 , (fp
   1188  1.1    dyoung 					    , "vtw:!mis port %x"
   1189  1.1    dyoung 					    " - free entry idx %x vtw %p"
   1190  1.1    dyoung 					    , lport, idx_decode(ctl, idx)
   1191  1.1    dyoung 					    , vtw));
   1192  1.1    dyoung 			}
   1193  1.1    dyoung 		}
   1194  1.1    dyoung 
   1195  1.1    dyoung 		if (fp->nxt) {
   1196  1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1197  1.1    dyoung 			it->slot_idx = 0;
   1198  1.1    dyoung 		} else {
   1199  1.1    dyoung 			it->fp = 0;
   1200  1.1    dyoung 			break;
   1201  1.1    dyoung 		}
   1202  1.1    dyoung 	}
   1203  1.1    dyoung 	++vtw_stats.miss[1];
   1204  1.1    dyoung 
   1205  1.1    dyoung 	vtw = 0;
   1206  1.1    dyoung out:
   1207  1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1208  1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1209  1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1210  1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1211  1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1212  1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1213  1.1    dyoung 
   1214  1.1    dyoung 	return vtw;
   1215  1.1    dyoung }
   1216  1.1    dyoung 
   1217  1.1    dyoung /*!\brief initialise the VTW allocation arena
   1218  1.1    dyoung  *
   1219  1.1    dyoung  * There are 1+3 allocation classes:
   1220  1.1    dyoung  *	0	classless
   1221  1.1    dyoung  *	{1,2,3}	MSL-class based allocation
   1222  1.1    dyoung  *
   1223  1.1    dyoung  * The allocation arenas are all initialised.  Classless gets all the
   1224  1.1    dyoung  * space.  MSL-class based divides the arena, so that allocation
   1225  1.1    dyoung  * within a class can proceed without having to consider entries
   1226  1.1    dyoung  * (aka: cache lines) from different classes.
   1227  1.1    dyoung  *
   1228  1.1    dyoung  * Usually, we are completely classless or class-based, but there can be
   1229  1.1    dyoung  * transition periods, corresponding to dynamic adjustments in the config
   1230  1.1    dyoung  * by the operator.
   1231  1.1    dyoung  */
   1232  1.1    dyoung static void
   1233  1.6    dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
   1234  1.1    dyoung {
   1235  1.6    dyoung 	int class_n, i;
   1236  1.6    dyoung 	vtw_t	*base;
   1237  1.1    dyoung 
   1238  1.6    dyoung 	ctl->base.v = ctl_base_v;
   1239  1.1    dyoung 
   1240  1.6    dyoung 	if (ctl->is_v4) {
   1241  1.6    dyoung 		ctl->lim.v4    = ctl->base.v4 + n - 1;
   1242  1.6    dyoung 		ctl->alloc.v4  = ctl->base.v4;
   1243  1.6    dyoung 	} else {
   1244  1.6    dyoung 		ctl->lim.v6    = ctl->base.v6 + n - 1;
   1245  1.6    dyoung 		ctl->alloc.v6  = ctl->base.v6;
   1246  1.6    dyoung 	}
   1247  1.1    dyoung 
   1248  1.6    dyoung 	ctl->nfree  = n;
   1249  1.6    dyoung 	ctl->ctl    = ctl;
   1250  1.1    dyoung 
   1251  1.6    dyoung 	ctl->idx_bits = 32;
   1252  1.6    dyoung 	for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
   1253  1.6    dyoung 		ctl->idx_mask >>= 1;
   1254  1.6    dyoung 		ctl->idx_bits  -= 1;
   1255  1.6    dyoung 	}
   1256  1.1    dyoung 
   1257  1.6    dyoung 	ctl->idx_mask <<= 1;
   1258  1.6    dyoung 	ctl->idx_mask  |= 1;
   1259  1.6    dyoung 	ctl->idx_bits  += 1;
   1260  1.1    dyoung 
   1261  1.6    dyoung 	ctl->fat = fat;
   1262  1.6    dyoung 	fat->vtw = ctl;
   1263  1.1    dyoung 
   1264  1.6    dyoung 	/* Divide the resources equally amongst the classes.
   1265  1.6    dyoung 	 * This is not optimal, as the different classes
   1266  1.6    dyoung 	 * arrive and leave at different rates, but it is
   1267  1.6    dyoung 	 * the best I can do for now.
   1268  1.6    dyoung 	 */
   1269  1.6    dyoung 	class_n = n / (VTW_NCLASS-1);
   1270  1.6    dyoung 	base    = ctl->base.v;
   1271  1.1    dyoung 
   1272  1.6    dyoung 	for (i = 1; i < VTW_NCLASS; ++i) {
   1273  1.6    dyoung 		int j;
   1274  1.1    dyoung 
   1275  1.6    dyoung 		ctl[i] = ctl[0];
   1276  1.6    dyoung 		ctl[i].clidx = i;
   1277  1.1    dyoung 
   1278  1.6    dyoung 		ctl[i].base.v = base;
   1279  1.6    dyoung 		ctl[i].alloc  = ctl[i].base;
   1280  1.1    dyoung 
   1281  1.6    dyoung 		for (j = 0; j < class_n - 1; ++j) {
   1282  1.6    dyoung 			if (tcp_msl_enable)
   1283  1.6    dyoung 				base->msl_class = i;
   1284  1.1    dyoung 			base = vtw_next(ctl, base);
   1285  1.1    dyoung 		}
   1286  1.6    dyoung 
   1287  1.6    dyoung 		ctl[i].lim.v = base;
   1288  1.6    dyoung 		base = vtw_next(ctl, base);
   1289  1.6    dyoung 		ctl[i].nfree = class_n;
   1290  1.1    dyoung 	}
   1291  1.1    dyoung 
   1292  1.1    dyoung 	vtw_debug_init();
   1293  1.1    dyoung }
   1294  1.1    dyoung 
   1295  1.1    dyoung /*!\brief	map class to TCP MSL
   1296  1.1    dyoung  */
   1297  1.1    dyoung static inline uint32_t
   1298  1.1    dyoung class_to_msl(int class)
   1299  1.1    dyoung {
   1300  1.1    dyoung 	switch (class) {
   1301  1.1    dyoung 	case 0:
   1302  1.1    dyoung 	case 1:
   1303  1.1    dyoung 		return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
   1304  1.1    dyoung 	case 2:
   1305  1.1    dyoung 		return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
   1306  1.1    dyoung 	default:
   1307  1.1    dyoung 		return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
   1308  1.1    dyoung 	}
   1309  1.1    dyoung }
   1310  1.1    dyoung 
   1311  1.1    dyoung /*!\brief	map TCP MSL to class
   1312  1.1    dyoung  */
   1313  1.1    dyoung static inline uint32_t
   1314  1.1    dyoung msl_to_class(int msl)
   1315  1.1    dyoung {
   1316  1.1    dyoung 	if (tcp_msl_enable) {
   1317  1.1    dyoung 		if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
   1318  1.1    dyoung 			return 1+2;
   1319  1.1    dyoung 		if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
   1320  1.1    dyoung 			return 1+1;
   1321  1.1    dyoung 		return 1;
   1322  1.1    dyoung 	}
   1323  1.1    dyoung 	return 0;
   1324  1.1    dyoung }
   1325  1.1    dyoung 
   1326  1.1    dyoung /*!\brief allocate a vtw entry
   1327  1.1    dyoung  */
   1328  1.1    dyoung static inline vtw_t *
   1329  1.1    dyoung vtw_alloc(vtw_ctl_t *ctl)
   1330  1.1    dyoung {
   1331  1.1    dyoung 	vtw_t	*vtw	= 0;
   1332  1.1    dyoung 	int	stuck	= 0;
   1333  1.1    dyoung 	int	avail	= ctl ? (ctl->nalloc + ctl->nfree) : 0;
   1334  1.1    dyoung 	int	msl;
   1335  1.1    dyoung 
   1336  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1337  1.1    dyoung 
   1338  1.1    dyoung 	/* If no resources, we will not get far.
   1339  1.1    dyoung 	 */
   1340  1.1    dyoung 	if (!ctl || !ctl->base.v4 || avail <= 0)
   1341  1.1    dyoung 		return 0;
   1342  1.1    dyoung 
   1343  1.1    dyoung 	/* Obtain a free one.
   1344  1.1    dyoung 	 */
   1345  1.1    dyoung 	while (!ctl->nfree) {
   1346  1.1    dyoung 		vtw_age(ctl, 0);
   1347  1.1    dyoung 
   1348  1.1    dyoung 		if (++stuck > avail) {
   1349  1.1    dyoung 			/* When in transition between
   1350  1.1    dyoung 			 * schemes (classless, classed) we
   1351  1.1    dyoung 			 * can be stuck having to await the
   1352  1.1    dyoung 			 * expiration of cross-allocated entries.
   1353  1.1    dyoung 			 *
   1354  1.1    dyoung 			 * Returning zero means we will fall back to the
   1355  1.1    dyoung 			 * traditional TIME_WAIT handling, except in the
   1356  1.1    dyoung 			 * case of a re-shed, in which case we cannot
   1357  1.1    dyoung 			 * perform the reshecd, but will retain the extant
   1358  1.1    dyoung 			 * entry.
   1359  1.1    dyoung 			 */
   1360  1.1    dyoung 			db_trace(KTR_VTW
   1361  1.1    dyoung 				 , (ctl, "vtw:!none free in class %x %x/%x"
   1362  1.1    dyoung 				    , ctl->clidx
   1363  1.1    dyoung 				    , ctl->nalloc, ctl->nfree));
   1364  1.1    dyoung 
   1365  1.1    dyoung 			return 0;
   1366  1.1    dyoung 		}
   1367  1.1    dyoung 	}
   1368  1.1    dyoung 
   1369  1.1    dyoung 	vtw = ctl->alloc.v;
   1370  1.1    dyoung 
   1371  1.1    dyoung 	if (vtw->msl_class != ctl->clidx) {
   1372  1.1    dyoung 		/* Usurping rules:
   1373  1.1    dyoung 		 * 	0 -> {1,2,3} or {1,2,3} -> 0
   1374  1.1    dyoung 		 */
   1375  1.1    dyoung 		KASSERT(!vtw->msl_class || !ctl->clidx);
   1376  1.1    dyoung 
   1377  1.1    dyoung 		if (vtw->hashed || vtw->expire.tv_sec) {
   1378  1.1    dyoung 		    /* As this is owned by some other class,
   1379  1.1    dyoung 		     * we must wait for it to expire it.
   1380  1.1    dyoung 		     * This will only happen on class/classless
   1381  1.1    dyoung 		     * transitions, which are guaranteed to progress
   1382  1.1    dyoung 		     * to completion in small finite time, barring bugs.
   1383  1.1    dyoung 		     */
   1384  1.1    dyoung 		    db_trace(KTR_VTW
   1385  1.1    dyoung 			     , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
   1386  1.1    dyoung 				, vtw, vtw->msl_class, ctl->clidx
   1387  1.1    dyoung 				, vtw->expire.tv_sec
   1388  1.1    dyoung 				, vtw->expire.tv_usec
   1389  1.1    dyoung 				, vtw->hashed ? " hashed" : ""));
   1390  1.1    dyoung 
   1391  1.1    dyoung 		    return 0;
   1392  1.1    dyoung 		}
   1393  1.1    dyoung 
   1394  1.1    dyoung 		db_trace(KTR_VTW
   1395  1.1    dyoung 			 , (ctl, "vtw:!%p usurped from %x to %x"
   1396  1.1    dyoung 			    , vtw, vtw->msl_class, ctl->clidx));
   1397  1.1    dyoung 
   1398  1.1    dyoung 		vtw->msl_class = ctl->clidx;
   1399  1.1    dyoung 	}
   1400  1.1    dyoung 
   1401  1.1    dyoung 	if (vtw_alive(vtw)) {
   1402  1.1    dyoung 		KASSERT(0 && "next free not free");
   1403  1.1    dyoung 		return 0;
   1404  1.1    dyoung 	}
   1405  1.1    dyoung 
   1406  1.1    dyoung 	/* Advance allocation poiter.
   1407  1.1    dyoung 	 */
   1408  1.1    dyoung 	ctl->alloc.v = vtw_next(ctl, vtw);
   1409  1.1    dyoung 
   1410  1.1    dyoung 	--ctl->nfree;
   1411  1.1    dyoung 	++ctl->nalloc;
   1412  1.1    dyoung 
   1413  1.1    dyoung 	msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ;	// msec
   1414  1.1    dyoung 
   1415  1.1    dyoung 	/* mark expiration
   1416  1.1    dyoung 	 */
   1417  1.3  drochner 	getmicrouptime(&vtw->expire);
   1418  1.1    dyoung 
   1419  1.1    dyoung 	/* Move expiration into the future.
   1420  1.1    dyoung 	 */
   1421  1.1    dyoung 	vtw->expire.tv_sec  += msl / 1000;
   1422  1.1    dyoung 	vtw->expire.tv_usec += 1000 * (msl % 1000);
   1423  1.1    dyoung 
   1424  1.1    dyoung 	while (vtw->expire.tv_usec >= 1000*1000) {
   1425  1.1    dyoung 		vtw->expire.tv_usec -= 1000*1000;
   1426  1.1    dyoung 		vtw->expire.tv_sec  += 1;
   1427  1.1    dyoung 	}
   1428  1.1    dyoung 
   1429  1.1    dyoung 	if (!ctl->oldest.v)
   1430  1.1    dyoung 		ctl->oldest.v = vtw;
   1431  1.1    dyoung 
   1432  1.1    dyoung 	return vtw;
   1433  1.1    dyoung }
   1434  1.1    dyoung 
   1435  1.1    dyoung /*!\brief expiration
   1436  1.1    dyoung  */
   1437  1.1    dyoung static int
   1438  1.1    dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
   1439  1.1    dyoung {
   1440  1.1    dyoung 	vtw_t	*vtw;
   1441  1.1    dyoung 	struct timeval then, *when = _when;
   1442  1.1    dyoung 	int	maxtries = 0;
   1443  1.1    dyoung 
   1444  1.1    dyoung 	if (!ctl->oldest.v) {
   1445  1.1    dyoung 		KASSERT(!ctl->nalloc);
   1446  1.1    dyoung 		return 0;
   1447  1.1    dyoung 	}
   1448  1.1    dyoung 
   1449  1.1    dyoung 	for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
   1450  1.1    dyoung 		if (++maxtries > ctl->nalloc)
   1451  1.1    dyoung 			break;
   1452  1.1    dyoung 
   1453  1.1    dyoung 		if (vtw->msl_class != ctl->clidx) {
   1454  1.1    dyoung 			db_trace(KTR_VTW
   1455  1.1    dyoung 				 , (vtw, "vtw:!age class mismatch %x != %x"
   1456  1.1    dyoung 				    , vtw->msl_class, ctl->clidx));
   1457  1.1    dyoung 			/* XXXX
   1458  1.1    dyoung 			 * See if the appropriate action is to skip to the next.
   1459  1.1    dyoung 			 * XXXX
   1460  1.1    dyoung 			 */
   1461  1.1    dyoung 			ctl->oldest.v = vtw = vtw_next(ctl, vtw);
   1462  1.1    dyoung 			continue;
   1463  1.1    dyoung 		}
   1464  1.1    dyoung 		if (!when) {
   1465  1.1    dyoung 			/* Latch oldest timeval if none specified.
   1466  1.1    dyoung 			 */
   1467  1.1    dyoung 			then = vtw->expire;
   1468  1.1    dyoung 			when = &then;
   1469  1.1    dyoung 		}
   1470  1.1    dyoung 
   1471  1.1    dyoung 		if (!timercmp(&vtw->expire, when, <=))
   1472  1.1    dyoung 			break;
   1473  1.1    dyoung 
   1474  1.1    dyoung 		db_trace(KTR_VTW
   1475  1.1    dyoung 			 , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
   1476  1.1    dyoung 			    , ctl->clidx
   1477  1.1    dyoung 			    , vtw->expire.tv_sec
   1478  1.1    dyoung 			    , vtw->expire.tv_usec
   1479  1.1    dyoung 			    , ctl->nalloc
   1480  1.1    dyoung 			    , ctl->nfree));
   1481  1.1    dyoung 
   1482  1.1    dyoung 		if (!_when)
   1483  1.1    dyoung 			++vtw_stats.kill;
   1484  1.1    dyoung 
   1485  1.1    dyoung 		vtw_del(ctl, vtw);
   1486  1.1    dyoung 		vtw = ctl->oldest.v;
   1487  1.1    dyoung 	}
   1488  1.1    dyoung 
   1489  1.1    dyoung 	return ctl->nalloc;	// # remaining allocated
   1490  1.1    dyoung }
   1491  1.1    dyoung 
   1492  1.1    dyoung static callout_t vtw_cs;
   1493  1.1    dyoung 
   1494  1.1    dyoung /*!\brief notice the passage of time.
   1495  1.1    dyoung  * It seems to be getting faster.  What happened to the year?
   1496  1.1    dyoung  */
   1497  1.1    dyoung static void
   1498  1.1    dyoung vtw_tick(void *arg)
   1499  1.1    dyoung {
   1500  1.1    dyoung 	struct timeval now;
   1501  1.1    dyoung 	int i, cnt = 0;
   1502  1.1    dyoung 
   1503  1.3  drochner 	getmicrouptime(&now);
   1504  1.1    dyoung 
   1505  1.1    dyoung 	db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
   1506  1.1    dyoung 			   , now.tv_sec, now.tv_usec));
   1507  1.1    dyoung 
   1508  1.1    dyoung 	mutex_enter(softnet_lock);
   1509  1.1    dyoung 
   1510  1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   1511  1.1    dyoung 		cnt += vtw_age(&vtw_tcpv4[i], &now);
   1512  1.1    dyoung 		cnt += vtw_age(&vtw_tcpv6[i], &now);
   1513  1.1    dyoung 	}
   1514  1.1    dyoung 
   1515  1.1    dyoung 	/* Keep ticks coming while we need them.
   1516  1.1    dyoung 	 */
   1517  1.1    dyoung 	if (cnt)
   1518  1.1    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1519  1.1    dyoung 	else {
   1520  1.1    dyoung 		tcp_vtw_was_enabled = 0;
   1521  1.1    dyoung 		tcbtable.vestige    = 0;
   1522  1.1    dyoung 	}
   1523  1.1    dyoung 	mutex_exit(softnet_lock);
   1524  1.1    dyoung }
   1525  1.1    dyoung 
   1526  1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1527  1.1    dyoung  */
   1528  1.1    dyoung static void *
   1529  1.1    dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
   1530  1.1    dyoung {
   1531  1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
   1532  1.1    dyoung 
   1533  1.1    dyoung 	bzero(it, sizeof (*it));
   1534  1.1    dyoung 
   1535  1.1    dyoung 	/* Note: the reference to vtw_tcpv4[0] is fine.
   1536  1.1    dyoung 	 * We do not need per-class iteration.  We just
   1537  1.1    dyoung 	 * need to get to the fat, and there is one
   1538  1.1    dyoung 	 * shared fat.
   1539  1.1    dyoung 	 */
   1540  1.1    dyoung 	if (vtw_tcpv4[0].fat) {
   1541  1.1    dyoung 		it->addr.v4 = addr;
   1542  1.1    dyoung 		it->port = port;
   1543  1.1    dyoung 		it->wild = !!wild;
   1544  1.1    dyoung 		it->ctl  = &vtw_tcpv4[0];
   1545  1.1    dyoung 
   1546  1.1    dyoung 		++vtw_stats.look[1];
   1547  1.1    dyoung 	}
   1548  1.1    dyoung 
   1549  1.1    dyoung 	return it;
   1550  1.1    dyoung }
   1551  1.1    dyoung 
   1552  1.1    dyoung /*!\brief export an IPv4 vtw.
   1553  1.1    dyoung  */
   1554  1.1    dyoung static int
   1555  1.1    dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1556  1.1    dyoung {
   1557  1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
   1558  1.1    dyoung 
   1559  1.1    dyoung 	bzero(res, sizeof (*res));
   1560  1.1    dyoung 
   1561  1.1    dyoung 	if (ctl && vtw) {
   1562  1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1563  1.1    dyoung 			ctl += vtw->msl_class;
   1564  1.1    dyoung 		else
   1565  1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1566  1.1    dyoung 
   1567  1.1    dyoung 		res->valid = 1;
   1568  1.1    dyoung 		res->v4    = 1;
   1569  1.1    dyoung 
   1570  1.1    dyoung 		res->faddr.v4.s_addr = v4->faddr;
   1571  1.1    dyoung 		res->laddr.v4.s_addr = v4->laddr;
   1572  1.1    dyoung 		res->fport	= v4->fport;
   1573  1.1    dyoung 		res->lport	= v4->lport;
   1574  1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1575  1.1    dyoung 		res->ctl	= ctl;
   1576  1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1577  1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1578  1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1579  1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1580  1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1581  1.1    dyoung 		res->uid	= vtw->uid;
   1582  1.1    dyoung 	}
   1583  1.1    dyoung 
   1584  1.1    dyoung 	return res->valid;
   1585  1.1    dyoung }
   1586  1.1    dyoung 
   1587  1.1    dyoung /*!\brief return next port in the port iterator.  yowza.
   1588  1.1    dyoung  */
   1589  1.1    dyoung static int
   1590  1.1    dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
   1591  1.1    dyoung {
   1592  1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1593  1.1    dyoung 	vtw_t		*vtw = 0;
   1594  1.1    dyoung 
   1595  1.1    dyoung 	if (it->ctl)
   1596  1.1    dyoung 		vtw = vtw_next_port_v4(it);
   1597  1.1    dyoung 
   1598  1.1    dyoung 	if (!vtw)
   1599  1.1    dyoung 		it->ctl = 0;
   1600  1.1    dyoung 
   1601  1.1    dyoung 	return vtw_export_v4(it->ctl, vtw, res);
   1602  1.1    dyoung }
   1603  1.1    dyoung 
   1604  1.1    dyoung static int
   1605  1.1    dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
   1606  1.1    dyoung               struct in_addr laddr, uint16_t lport,
   1607  1.1    dyoung 	      struct vestigial_inpcb *res)
   1608  1.1    dyoung {
   1609  1.1    dyoung 	vtw_t		*vtw;
   1610  1.1    dyoung 	vtw_ctl_t	*ctl;
   1611  1.1    dyoung 
   1612  1.1    dyoung 
   1613  1.1    dyoung 	db_trace(KTR_VTW
   1614  1.1    dyoung 		 , (res, "vtw: lookup %A:%P %A:%P"
   1615  1.1    dyoung 		    , faddr, fport
   1616  1.1    dyoung 		    , laddr, lport));
   1617  1.1    dyoung 
   1618  1.1    dyoung 	vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
   1619  1.1    dyoung 				 , faddr.s_addr, fport
   1620  1.1    dyoung 				 , laddr.s_addr, lport, 0);
   1621  1.1    dyoung 
   1622  1.1    dyoung 	return vtw_export_v4(ctl, vtw, res);
   1623  1.1    dyoung }
   1624  1.1    dyoung 
   1625  1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1626  1.1    dyoung  */
   1627  1.1    dyoung static void *
   1628  1.1    dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
   1629  1.1    dyoung {
   1630  1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
   1631  1.1    dyoung 
   1632  1.1    dyoung 	bzero(it, sizeof (*it));
   1633  1.1    dyoung 
   1634  1.1    dyoung 	/* Note: the reference to vtw_tcpv6[0] is fine.
   1635  1.1    dyoung 	 * We do not need per-class iteration.  We just
   1636  1.1    dyoung 	 * need to get to the fat, and there is one
   1637  1.1    dyoung 	 * shared fat.
   1638  1.1    dyoung 	 */
   1639  1.1    dyoung 	if (vtw_tcpv6[0].fat) {
   1640  1.1    dyoung 		it->addr.v6 = *addr;
   1641  1.1    dyoung 		it->port = port;
   1642  1.1    dyoung 		it->wild = !!wild;
   1643  1.1    dyoung 		it->ctl  = &vtw_tcpv6[0];
   1644  1.1    dyoung 
   1645  1.1    dyoung 		++vtw_stats.look[1];
   1646  1.1    dyoung 	}
   1647  1.1    dyoung 
   1648  1.1    dyoung 	return it;
   1649  1.1    dyoung }
   1650  1.1    dyoung 
   1651  1.1    dyoung /*!\brief export an IPv6 vtw.
   1652  1.1    dyoung  */
   1653  1.1    dyoung static int
   1654  1.1    dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1655  1.1    dyoung {
   1656  1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
   1657  1.1    dyoung 
   1658  1.1    dyoung 	bzero(res, sizeof (*res));
   1659  1.1    dyoung 
   1660  1.1    dyoung 	if (ctl && vtw) {
   1661  1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1662  1.1    dyoung 			ctl += vtw->msl_class;
   1663  1.1    dyoung 		else
   1664  1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1665  1.1    dyoung 
   1666  1.1    dyoung 		res->valid = 1;
   1667  1.1    dyoung 		res->v4    = 0;
   1668  1.1    dyoung 
   1669  1.1    dyoung 		res->faddr.v6	= v6->faddr;
   1670  1.1    dyoung 		res->laddr.v6	= v6->laddr;
   1671  1.1    dyoung 		res->fport	= v6->fport;
   1672  1.1    dyoung 		res->lport	= v6->lport;
   1673  1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1674  1.1    dyoung 		res->ctl	= ctl;
   1675  1.1    dyoung 
   1676  1.1    dyoung 		res->v6only	= vtw->v6only;
   1677  1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1678  1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1679  1.1    dyoung 
   1680  1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1681  1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1682  1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1683  1.1    dyoung 		res->uid	= vtw->uid;
   1684  1.1    dyoung 	}
   1685  1.1    dyoung 
   1686  1.1    dyoung 	return res->valid;
   1687  1.1    dyoung }
   1688  1.1    dyoung 
   1689  1.1    dyoung static int
   1690  1.1    dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
   1691  1.1    dyoung {
   1692  1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1693  1.1    dyoung 	vtw_t		*vtw = 0;
   1694  1.1    dyoung 
   1695  1.1    dyoung 	if (it->ctl)
   1696  1.1    dyoung 		vtw = vtw_next_port_v6(it);
   1697  1.1    dyoung 
   1698  1.1    dyoung 	if (!vtw)
   1699  1.1    dyoung 		it->ctl = 0;
   1700  1.1    dyoung 
   1701  1.1    dyoung 	return vtw_export_v6(it->ctl, vtw, res);
   1702  1.1    dyoung }
   1703  1.1    dyoung 
   1704  1.1    dyoung static int
   1705  1.1    dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
   1706  1.1    dyoung               const struct in6_addr *laddr, uint16_t lport,
   1707  1.1    dyoung 	      struct vestigial_inpcb *res)
   1708  1.1    dyoung {
   1709  1.1    dyoung 	vtw_ctl_t	*ctl;
   1710  1.1    dyoung 	vtw_t		*vtw;
   1711  1.1    dyoung 
   1712  1.1    dyoung 	db_trace(KTR_VTW
   1713  1.1    dyoung 		 , (res, "vtw: lookup %6A:%P %6A:%P"
   1714  1.1    dyoung 		    , db_store(faddr, sizeof (*faddr)), fport
   1715  1.1    dyoung 		    , db_store(laddr, sizeof (*laddr)), lport));
   1716  1.1    dyoung 
   1717  1.1    dyoung 	vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
   1718  1.1    dyoung 				 , faddr, fport
   1719  1.1    dyoung 				 , laddr, lport, 0);
   1720  1.1    dyoung 
   1721  1.1    dyoung 	return vtw_export_v6(ctl, vtw, res);
   1722  1.1    dyoung }
   1723  1.1    dyoung 
   1724  1.1    dyoung static vestigial_hooks_t tcp_hooks = {
   1725  1.1    dyoung 	.init_ports4	= tcp_init_ports_v4,
   1726  1.1    dyoung 	.next_port4	= tcp_next_port_v4,
   1727  1.1    dyoung 	.lookup4	= tcp_lookup_v4,
   1728  1.1    dyoung 	.init_ports6	= tcp_init_ports_v6,
   1729  1.1    dyoung 	.next_port6	= tcp_next_port_v6,
   1730  1.1    dyoung 	.lookup6	= tcp_lookup_v6,
   1731  1.1    dyoung };
   1732  1.1    dyoung 
   1733  1.1    dyoung static bool
   1734  1.1    dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
   1735  1.1    dyoung {
   1736  1.1    dyoung 	fatp_ctl_t	*fat;
   1737  1.1    dyoung 	vtw_ctl_t	*ctl;
   1738  1.1    dyoung 
   1739  1.1    dyoung 	switch (af) {
   1740  1.1    dyoung 	case AF_INET:
   1741  1.1    dyoung 		fat = &fat_tcpv4;
   1742  1.1    dyoung 		ctl = &vtw_tcpv4[0];
   1743  1.1    dyoung 		break;
   1744  1.1    dyoung 	case AF_INET6:
   1745  1.1    dyoung 		fat = &fat_tcpv6;
   1746  1.1    dyoung 		ctl = &vtw_tcpv6[0];
   1747  1.1    dyoung 		break;
   1748  1.1    dyoung 	default:
   1749  1.1    dyoung 		return false;
   1750  1.1    dyoung 	}
   1751  1.1    dyoung 	if (fatp != NULL)
   1752  1.1    dyoung 		*fatp = fat;
   1753  1.1    dyoung 	if (ctlp != NULL)
   1754  1.1    dyoung 		*ctlp = ctl;
   1755  1.1    dyoung 	return true;
   1756  1.1    dyoung }
   1757  1.1    dyoung 
   1758  1.1    dyoung /*!\brief	initialize controlling instance
   1759  1.1    dyoung  */
   1760  1.1    dyoung static int
   1761  1.1    dyoung vtw_control_init(int af)
   1762  1.1    dyoung {
   1763  1.1    dyoung 	fatp_ctl_t	*fat;
   1764  1.1    dyoung 	vtw_ctl_t	*ctl;
   1765  1.6    dyoung 	fatp_t		*fat_base;
   1766  1.6    dyoung 	fatp_t		**fat_hash;
   1767  1.6    dyoung 	vtw_t		*ctl_base_v;
   1768  1.6    dyoung 	uint32_t	n, m;
   1769  1.6    dyoung 	size_t sz;
   1770  1.6    dyoung 
   1771  1.6    dyoung 	KASSERT(powerof2(tcp_vtw_entries));
   1772  1.1    dyoung 
   1773  1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1774  1.1    dyoung 		return EAFNOSUPPORT;
   1775  1.1    dyoung 
   1776  1.6    dyoung 	if (fat->hash != NULL) {
   1777  1.6    dyoung 		KASSERT(fat->base != NULL && ctl->base.v != NULL);
   1778  1.6    dyoung 		return 0;
   1779  1.6    dyoung 	}
   1780  1.6    dyoung 
   1781  1.6    dyoung 	/* Allocate 10% more capacity in the fat pointers.
   1782  1.6    dyoung 	 * We should only need ~#hash additional based on
   1783  1.6    dyoung 	 * how they age, but TIME_WAIT assassination could cause
   1784  1.6    dyoung 	 * sparse fat pointer utilisation.
   1785  1.6    dyoung 	 */
   1786  1.6    dyoung 	m = 512;
   1787  1.6    dyoung 	n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
   1788  1.6    dyoung 	sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
   1789  1.6    dyoung 
   1790  1.6    dyoung 	fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
   1791  1.6    dyoung 
   1792  1.6    dyoung 	if (fat_hash == NULL) {
   1793  1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1794  1.6    dyoung 		    "hash anchors", __func__, 2*m * sizeof(fatp_t *));
   1795  1.6    dyoung 		return ENOMEM;
   1796  1.6    dyoung 	}
   1797  1.1    dyoung 
   1798  1.6    dyoung 	fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
   1799  1.1    dyoung 
   1800  1.6    dyoung 	if (fat_base == NULL) {
   1801  1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1802  1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1803  1.6    dyoung 		    "fatp_t array", __func__, 2*n * sizeof(fatp_t));
   1804  1.6    dyoung 		return ENOMEM;
   1805  1.6    dyoung 	}
   1806  1.1    dyoung 
   1807  1.6    dyoung 	ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
   1808  1.1    dyoung 
   1809  1.6    dyoung 	if (ctl_base_v == NULL) {
   1810  1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1811  1.6    dyoung 		kmem_free(fat_base, 2*n * sizeof(fatp_t));
   1812  1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1813  1.6    dyoung 		    "vtw_t array", __func__, tcp_vtw_entries * sz);
   1814  1.6    dyoung 		return ENOMEM;
   1815  1.1    dyoung 	}
   1816  1.1    dyoung 
   1817  1.6    dyoung 	fatp_init(fat, n, m, fat_base, fat_hash);
   1818  1.1    dyoung 
   1819  1.6    dyoung 	vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
   1820  1.1    dyoung 
   1821  1.1    dyoung 	return 0;
   1822  1.1    dyoung }
   1823  1.1    dyoung 
   1824  1.1    dyoung /*!\brief	select controlling instance
   1825  1.1    dyoung  */
   1826  1.1    dyoung static vtw_ctl_t *
   1827  1.1    dyoung vtw_control(int af, uint32_t msl)
   1828  1.1    dyoung {
   1829  1.1    dyoung 	fatp_ctl_t	*fat;
   1830  1.1    dyoung 	vtw_ctl_t	*ctl;
   1831  1.1    dyoung 	int		class	= msl_to_class(msl);
   1832  1.1    dyoung 
   1833  1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1834  1.1    dyoung 		return NULL;
   1835  1.1    dyoung 
   1836  1.1    dyoung 	if (!fat->base || !ctl->base.v)
   1837  1.1    dyoung 		return NULL;
   1838  1.1    dyoung 
   1839  1.5    dyoung 	if (!tcp_vtw_was_enabled) {
   1840  1.5    dyoung 		/* This guarantees is timer ticks until we no longer need them.
   1841  1.5    dyoung 		 */
   1842  1.5    dyoung 		tcp_vtw_was_enabled = 1;
   1843  1.5    dyoung 
   1844  1.5    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1845  1.5    dyoung 
   1846  1.5    dyoung 		tcbtable.vestige = &tcp_hooks;
   1847  1.5    dyoung 	}
   1848  1.5    dyoung 
   1849  1.1    dyoung 	return ctl + class;
   1850  1.1    dyoung }
   1851  1.1    dyoung 
   1852  1.1    dyoung /*!\brief	add TCP pcb to vestigial timewait
   1853  1.1    dyoung  */
   1854  1.1    dyoung int
   1855  1.1    dyoung vtw_add(int af, struct tcpcb *tp)
   1856  1.1    dyoung {
   1857  1.1    dyoung 	int		enable;
   1858  1.1    dyoung 	vtw_ctl_t	*ctl;
   1859  1.1    dyoung 	vtw_t		*vtw;
   1860  1.1    dyoung 
   1861  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1862  1.1    dyoung 
   1863  1.1    dyoung 	ctl = vtw_control(af, tp->t_msl);
   1864  1.1    dyoung 	if (!ctl)
   1865  1.1    dyoung 		return 0;
   1866  1.1    dyoung 
   1867  1.1    dyoung 	enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
   1868  1.1    dyoung 
   1869  1.1    dyoung 	vtw = vtw_alloc(ctl);
   1870  1.1    dyoung 
   1871  1.1    dyoung 	if (vtw) {
   1872  1.1    dyoung 		vtw->snd_nxt = tp->snd_nxt;
   1873  1.1    dyoung 		vtw->rcv_nxt = tp->rcv_nxt;
   1874  1.1    dyoung 
   1875  1.1    dyoung 		switch (af) {
   1876  1.1    dyoung 		case AF_INET: {
   1877  1.1    dyoung 			struct inpcb	*inp = tp->t_inpcb;
   1878  1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   1879  1.1    dyoung 
   1880  1.1    dyoung 			v4->faddr = inp->inp_faddr.s_addr;
   1881  1.1    dyoung 			v4->laddr = inp->inp_laddr.s_addr;
   1882  1.1    dyoung 			v4->fport = inp->inp_fport;
   1883  1.1    dyoung 			v4->lport = inp->inp_lport;
   1884  1.1    dyoung 
   1885  1.1    dyoung 			vtw->reuse_port = !!(inp->inp_socket->so_options
   1886  1.1    dyoung 					     & SO_REUSEPORT);
   1887  1.1    dyoung 			vtw->reuse_addr = !!(inp->inp_socket->so_options
   1888  1.1    dyoung 					     & SO_REUSEADDR);
   1889  1.1    dyoung 			vtw->v6only	= 0;
   1890  1.1    dyoung 			vtw->uid	= inp->inp_socket->so_uidinfo->ui_uid;
   1891  1.1    dyoung 
   1892  1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   1893  1.1    dyoung 
   1894  1.1    dyoung 
   1895  1.1    dyoung #ifdef VTW_DEBUG
   1896  1.1    dyoung 			/* Immediate lookup (connected and port) to
   1897  1.1    dyoung 			 * ensure at least that works!
   1898  1.1    dyoung 			 */
   1899  1.1    dyoung 			if (enable & 4) {
   1900  1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1901  1.1    dyoung 					(ctl
   1902  1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1903  1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1904  1.1    dyoung 					 , 0)
   1905  1.1    dyoung 					== vtw);
   1906  1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1907  1.1    dyoung 					(ctl
   1908  1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1909  1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1910  1.1    dyoung 					 , 1));
   1911  1.1    dyoung 			}
   1912  1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   1913  1.1    dyoung 			 */
   1914  1.1    dyoung 			if (enable & 8) {
   1915  1.1    dyoung 				struct tcp_ports_iterator *it;
   1916  1.1    dyoung 				struct vestigial_inpcb res;
   1917  1.1    dyoung 				int cnt = 0;
   1918  1.1    dyoung 
   1919  1.1    dyoung 				it = tcp_init_ports_v4(inp->inp_laddr
   1920  1.1    dyoung 						       , inp->inp_lport, 0);
   1921  1.1    dyoung 
   1922  1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1923  1.1    dyoung 					++cnt;
   1924  1.1    dyoung 				}
   1925  1.1    dyoung 				KASSERT(cnt);
   1926  1.1    dyoung 			}
   1927  1.1    dyoung 			/* Immediate port iterator functionality check: wild
   1928  1.1    dyoung 			 */
   1929  1.1    dyoung 			if (enable & 16) {
   1930  1.1    dyoung 				struct tcp_ports_iterator *it;
   1931  1.1    dyoung 				struct vestigial_inpcb res;
   1932  1.1    dyoung 				struct in_addr any;
   1933  1.1    dyoung 				int cnt = 0;
   1934  1.1    dyoung 
   1935  1.1    dyoung 				any.s_addr = htonl(INADDR_ANY);
   1936  1.1    dyoung 
   1937  1.1    dyoung 				it = tcp_init_ports_v4(any, inp->inp_lport, 1);
   1938  1.1    dyoung 
   1939  1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1940  1.1    dyoung 					++cnt;
   1941  1.1    dyoung 				}
   1942  1.1    dyoung 				KASSERT(cnt);
   1943  1.1    dyoung 			}
   1944  1.1    dyoung #endif /* VTW_DEBUG */
   1945  1.1    dyoung 			break;
   1946  1.1    dyoung 		}
   1947  1.1    dyoung 
   1948  1.1    dyoung 		case AF_INET6: {
   1949  1.1    dyoung 			struct in6pcb	*inp = tp->t_in6pcb;
   1950  1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   1951  1.1    dyoung 
   1952  1.1    dyoung 			v6->faddr = inp->in6p_faddr;
   1953  1.1    dyoung 			v6->laddr = inp->in6p_laddr;
   1954  1.1    dyoung 			v6->fport = inp->in6p_fport;
   1955  1.1    dyoung 			v6->lport = inp->in6p_lport;
   1956  1.1    dyoung 
   1957  1.1    dyoung 			vtw->reuse_port = !!(inp->in6p_socket->so_options
   1958  1.1    dyoung 					     & SO_REUSEPORT);
   1959  1.1    dyoung 			vtw->reuse_addr = !!(inp->in6p_socket->so_options
   1960  1.1    dyoung 					     & SO_REUSEADDR);
   1961  1.1    dyoung 			vtw->v6only	= !!(inp->in6p_flags
   1962  1.1    dyoung 					     & IN6P_IPV6_V6ONLY);
   1963  1.1    dyoung 			vtw->uid	= inp->in6p_socket->so_uidinfo->ui_uid;
   1964  1.1    dyoung 
   1965  1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   1966  1.1    dyoung #ifdef VTW_DEBUG
   1967  1.1    dyoung 			/* Immediate lookup (connected and port) to
   1968  1.1    dyoung 			 * ensure at least that works!
   1969  1.1    dyoung 			 */
   1970  1.1    dyoung 			if (enable & 4) {
   1971  1.1    dyoung 				KASSERT(vtw_lookup_hash_v6(ctl
   1972  1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   1973  1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   1974  1.1    dyoung 					 , 0)
   1975  1.1    dyoung 					== vtw);
   1976  1.1    dyoung 				KASSERT(vtw_lookup_hash_v6
   1977  1.1    dyoung 					(ctl
   1978  1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   1979  1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   1980  1.1    dyoung 					 , 1));
   1981  1.1    dyoung 			}
   1982  1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   1983  1.1    dyoung 			 */
   1984  1.1    dyoung 			if (enable & 8) {
   1985  1.1    dyoung 				struct tcp_ports_iterator *it;
   1986  1.1    dyoung 				struct vestigial_inpcb res;
   1987  1.1    dyoung 				int cnt = 0;
   1988  1.1    dyoung 
   1989  1.1    dyoung 				it = tcp_init_ports_v6(&inp->in6p_laddr
   1990  1.1    dyoung 						       , inp->in6p_lport, 0);
   1991  1.1    dyoung 
   1992  1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   1993  1.1    dyoung 					++cnt;
   1994  1.1    dyoung 				}
   1995  1.1    dyoung 				KASSERT(cnt);
   1996  1.1    dyoung 			}
   1997  1.1    dyoung 			/* Immediate port iterator functionality check: wild
   1998  1.1    dyoung 			 */
   1999  1.1    dyoung 			if (enable & 16) {
   2000  1.1    dyoung 				struct tcp_ports_iterator *it;
   2001  1.1    dyoung 				struct vestigial_inpcb res;
   2002  1.1    dyoung 				static struct in6_addr any = IN6ADDR_ANY_INIT;
   2003  1.1    dyoung 				int cnt = 0;
   2004  1.1    dyoung 
   2005  1.1    dyoung 				it = tcp_init_ports_v6(&any
   2006  1.1    dyoung 						       , inp->in6p_lport, 1);
   2007  1.1    dyoung 
   2008  1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   2009  1.1    dyoung 					++cnt;
   2010  1.1    dyoung 				}
   2011  1.1    dyoung 				KASSERT(cnt);
   2012  1.1    dyoung 			}
   2013  1.1    dyoung #endif /* VTW_DEBUG */
   2014  1.1    dyoung 			break;
   2015  1.1    dyoung 		}
   2016  1.1    dyoung 		}
   2017  1.1    dyoung 
   2018  1.1    dyoung 		tcp_canceltimers(tp);
   2019  1.1    dyoung 		tp = tcp_close(tp);
   2020  1.1    dyoung 		KASSERT(!tp);
   2021  1.1    dyoung 
   2022  1.1    dyoung 		return 1;
   2023  1.1    dyoung 	}
   2024  1.1    dyoung 
   2025  1.1    dyoung 	return 0;
   2026  1.1    dyoung }
   2027  1.1    dyoung 
   2028  1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2029  1.1    dyoung  */
   2030  1.1    dyoung static void
   2031  1.1    dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
   2032  1.1    dyoung {
   2033  1.1    dyoung 	vtw_v4_t	copy = *(vtw_v4_t*)vp->vtw;
   2034  1.1    dyoung 	vtw_t		*vtw;
   2035  1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2036  1.1    dyoung 	vtw_ctl_t	*ctl;
   2037  1.1    dyoung 
   2038  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2039  1.1    dyoung 
   2040  1.1    dyoung 	db_trace(KTR_VTW
   2041  1.1    dyoung 		 , (vp->vtw, "vtw: restart %A:%P %A:%P"
   2042  1.1    dyoung 		    , vp->faddr.v4.s_addr, vp->fport
   2043  1.1    dyoung 		    , vp->laddr.v4.s_addr, vp->lport));
   2044  1.1    dyoung 
   2045  1.1    dyoung 	/* Class might have changed, so have a squiz.
   2046  1.1    dyoung 	 */
   2047  1.1    dyoung 	ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
   2048  1.1    dyoung 	vtw = vtw_alloc(ctl);
   2049  1.1    dyoung 
   2050  1.1    dyoung 	if (vtw) {
   2051  1.1    dyoung 		vtw_v4_t	*v4  = (void*)vtw;
   2052  1.1    dyoung 
   2053  1.1    dyoung 		/* Safe now to unhash the old entry
   2054  1.1    dyoung 		 */
   2055  1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2056  1.1    dyoung 
   2057  1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2058  1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2059  1.1    dyoung 
   2060  1.1    dyoung 		v4->faddr = copy.faddr;
   2061  1.1    dyoung 		v4->laddr = copy.laddr;
   2062  1.1    dyoung 		v4->fport = copy.fport;
   2063  1.1    dyoung 		v4->lport = copy.lport;
   2064  1.1    dyoung 
   2065  1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2066  1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2067  1.1    dyoung 		vtw->v6only	= 0;
   2068  1.1    dyoung 		vtw->uid	= cp->uid;
   2069  1.1    dyoung 
   2070  1.1    dyoung 		vtw_inshash_v4(ctl, vtw);
   2071  1.1    dyoung 	}
   2072  1.1    dyoung 
   2073  1.1    dyoung 	vp->valid = 0;
   2074  1.1    dyoung }
   2075  1.1    dyoung 
   2076  1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2077  1.1    dyoung  */
   2078  1.1    dyoung static void
   2079  1.1    dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
   2080  1.1    dyoung {
   2081  1.1    dyoung 	vtw_v6_t	copy = *(vtw_v6_t*)vp->vtw;
   2082  1.1    dyoung 	vtw_t		*vtw;
   2083  1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2084  1.1    dyoung 	vtw_ctl_t	*ctl;
   2085  1.1    dyoung 
   2086  1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2087  1.1    dyoung 
   2088  1.1    dyoung 	db_trace(KTR_VTW
   2089  1.1    dyoung 		 , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
   2090  1.1    dyoung 		    , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
   2091  1.1    dyoung 		    , vp->fport
   2092  1.1    dyoung 		    , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
   2093  1.1    dyoung 		    , vp->lport));
   2094  1.1    dyoung 
   2095  1.1    dyoung 	/* Class might have changed, so have a squiz.
   2096  1.1    dyoung 	 */
   2097  1.1    dyoung 	ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
   2098  1.1    dyoung 	vtw = vtw_alloc(ctl);
   2099  1.1    dyoung 
   2100  1.1    dyoung 	if (vtw) {
   2101  1.1    dyoung 		vtw_v6_t	*v6  = (void*)vtw;
   2102  1.1    dyoung 
   2103  1.1    dyoung 		/* Safe now to unhash the old entry
   2104  1.1    dyoung 		 */
   2105  1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2106  1.1    dyoung 
   2107  1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2108  1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2109  1.1    dyoung 
   2110  1.1    dyoung 		v6->faddr = copy.faddr;
   2111  1.1    dyoung 		v6->laddr = copy.laddr;
   2112  1.1    dyoung 		v6->fport = copy.fport;
   2113  1.1    dyoung 		v6->lport = copy.lport;
   2114  1.1    dyoung 
   2115  1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2116  1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2117  1.1    dyoung 		vtw->v6only	= cp->v6only;
   2118  1.1    dyoung 		vtw->uid	= cp->uid;
   2119  1.1    dyoung 
   2120  1.1    dyoung 		vtw_inshash_v6(ctl, vtw);
   2121  1.1    dyoung 	}
   2122  1.1    dyoung 
   2123  1.1    dyoung 	vp->valid = 0;
   2124  1.1    dyoung }
   2125  1.1    dyoung 
   2126  1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2127  1.1    dyoung  */
   2128  1.1    dyoung void
   2129  1.1    dyoung vtw_restart(vestigial_inpcb_t *vp)
   2130  1.1    dyoung {
   2131  1.1    dyoung 	if (!vp || !vp->valid)
   2132  1.1    dyoung 		return;
   2133  1.1    dyoung 
   2134  1.1    dyoung 	if (vp->v4)
   2135  1.1    dyoung 		vtw_restart_v4(vp);
   2136  1.1    dyoung 	else
   2137  1.1    dyoung 		vtw_restart_v6(vp);
   2138  1.1    dyoung }
   2139  1.1    dyoung 
   2140  1.1    dyoung int
   2141  1.7    dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
   2142  1.7    dyoung {
   2143  1.7    dyoung 	int en, rc;
   2144  1.7    dyoung 	struct sysctlnode node;
   2145  1.7    dyoung 
   2146  1.7    dyoung 	node = *rnode;
   2147  1.7    dyoung 	en = *(int *)rnode->sysctl_data;
   2148  1.7    dyoung 	node.sysctl_data = &en;
   2149  1.7    dyoung 
   2150  1.7    dyoung 	rc = sysctl_lookup(SYSCTLFN_CALL(&node));
   2151  1.7    dyoung 	if (rc != 0 || newp == NULL)
   2152  1.7    dyoung 		return rc;
   2153  1.7    dyoung 
   2154  1.7    dyoung 	if (rnode->sysctl_data != &tcp4_vtw_enable &&
   2155  1.7    dyoung 	    rnode->sysctl_data != &tcp6_vtw_enable)
   2156  1.7    dyoung 		rc = ENOENT;
   2157  1.7    dyoung 	else if ((en & 1) == 0)
   2158  1.7    dyoung 		rc = 0;
   2159  1.7    dyoung 	else if (rnode->sysctl_data == &tcp4_vtw_enable)
   2160  1.7    dyoung 		rc = vtw_control_init(AF_INET);
   2161  1.7    dyoung 	else /* rnode->sysctl_data == &tcp6_vtw_enable */
   2162  1.7    dyoung 		rc = vtw_control_init(AF_INET6);
   2163  1.7    dyoung 
   2164  1.7    dyoung 	if (rc == 0)
   2165  1.7    dyoung 		*(int *)rnode->sysctl_data = en;
   2166  1.7    dyoung 
   2167  1.7    dyoung 	return rc;
   2168  1.7    dyoung }
   2169  1.7    dyoung 
   2170  1.7    dyoung int
   2171  1.1    dyoung vtw_earlyinit(void)
   2172  1.1    dyoung {
   2173  1.5    dyoung 	int i, rc;
   2174  1.1    dyoung 
   2175  1.5    dyoung 	callout_init(&vtw_cs, 0);
   2176  1.5    dyoung 	callout_setfunc(&vtw_cs, vtw_tick, 0);
   2177  1.1    dyoung 
   2178  1.5    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2179  1.5    dyoung 		vtw_tcpv4[i].is_v4 = 1;
   2180  1.5    dyoung 		vtw_tcpv6[i].is_v6 = 1;
   2181  1.1    dyoung 	}
   2182  1.1    dyoung 
   2183  1.7    dyoung 	if ((tcp4_vtw_enable & 1) != 0 &&
   2184  1.7    dyoung 	    (rc = vtw_control_init(AF_INET)) != 0)
   2185  1.7    dyoung 		return rc;
   2186  1.7    dyoung 
   2187  1.7    dyoung 	if ((tcp6_vtw_enable & 1) != 0 &&
   2188  1.1    dyoung 	    (rc = vtw_control_init(AF_INET6)) != 0)
   2189  1.1    dyoung 		return rc;
   2190  1.1    dyoung 
   2191  1.1    dyoung 	return 0;
   2192  1.1    dyoung }
   2193  1.1    dyoung 
   2194  1.1    dyoung #ifdef VTW_DEBUG
   2195  1.1    dyoung #include <sys/syscallargs.h>
   2196  1.1    dyoung #include <sys/sysctl.h>
   2197  1.1    dyoung 
   2198  1.1    dyoung /*!\brief	add lalp, fafp entries for debug
   2199  1.1    dyoung  */
   2200  1.1    dyoung int
   2201  1.1    dyoung vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int class)
   2202  1.1    dyoung {
   2203  1.1    dyoung 	vtw_ctl_t	*ctl;
   2204  1.1    dyoung 	vtw_t		*vtw;
   2205  1.1    dyoung 
   2206  1.1    dyoung 	ctl = vtw_control(af, msl ? msl : class_to_msl(class));
   2207  1.1    dyoung 	if (!ctl)
   2208  1.1    dyoung 		return 0;
   2209  1.1    dyoung 
   2210  1.1    dyoung 	vtw = vtw_alloc(ctl);
   2211  1.1    dyoung 
   2212  1.1    dyoung 	if (vtw) {
   2213  1.1    dyoung 		vtw->snd_nxt = 0;
   2214  1.1    dyoung 		vtw->rcv_nxt = 0;
   2215  1.1    dyoung 
   2216  1.1    dyoung 		switch (af) {
   2217  1.1    dyoung 		case AF_INET: {
   2218  1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   2219  1.1    dyoung 
   2220  1.1    dyoung 			v4->faddr = fa->sin_addr.v4.s_addr;
   2221  1.1    dyoung 			v4->laddr = la->sin_addr.v4.s_addr;
   2222  1.1    dyoung 			v4->fport = fa->sin_port;
   2223  1.1    dyoung 			v4->lport = la->sin_port;
   2224  1.1    dyoung 
   2225  1.1    dyoung 			vtw->reuse_port = 1;
   2226  1.1    dyoung 			vtw->reuse_addr = 1;
   2227  1.1    dyoung 			vtw->v6only	= 0;
   2228  1.1    dyoung 			vtw->uid	= 0;
   2229  1.1    dyoung 
   2230  1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   2231  1.1    dyoung 			break;
   2232  1.1    dyoung 		}
   2233  1.1    dyoung 
   2234  1.1    dyoung 		case AF_INET6: {
   2235  1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   2236  1.1    dyoung 
   2237  1.1    dyoung 			v6->faddr = fa->sin_addr.v6;
   2238  1.1    dyoung 			v6->laddr = la->sin_addr.v6;
   2239  1.1    dyoung 
   2240  1.1    dyoung 			v6->fport = fa->sin_port;
   2241  1.1    dyoung 			v6->lport = la->sin_port;
   2242  1.1    dyoung 
   2243  1.1    dyoung 			vtw->reuse_port = 1;
   2244  1.1    dyoung 			vtw->reuse_addr = 1;
   2245  1.1    dyoung 			vtw->v6only	= 0;
   2246  1.1    dyoung 			vtw->uid	= 0;
   2247  1.1    dyoung 
   2248  1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   2249  1.1    dyoung 			break;
   2250  1.1    dyoung 		}
   2251  1.1    dyoung 
   2252  1.1    dyoung 		default:
   2253  1.1    dyoung 			break;
   2254  1.1    dyoung 		}
   2255  1.1    dyoung 
   2256  1.1    dyoung 		return 1;
   2257  1.1    dyoung 	}
   2258  1.1    dyoung 
   2259  1.1    dyoung 	return 0;
   2260  1.1    dyoung }
   2261  1.1    dyoung 
   2262  1.1    dyoung static int vtw_syscall = 0;
   2263  1.1    dyoung 
   2264  1.1    dyoung static int
   2265  1.1    dyoung vtw_debug_process(vtw_sysargs_t *ap)
   2266  1.1    dyoung {
   2267  1.1    dyoung 	struct vestigial_inpcb vestige;
   2268  1.1    dyoung 	int	rc = 0;
   2269  1.1    dyoung 
   2270  1.1    dyoung 	mutex_enter(softnet_lock);
   2271  1.1    dyoung 
   2272  1.1    dyoung 	switch (ap->op) {
   2273  1.1    dyoung 	case 0:		// insert
   2274  1.1    dyoung 		vtw_debug_add(ap->la.sin_family
   2275  1.1    dyoung 			      , &ap->la
   2276  1.1    dyoung 			      , &ap->fa
   2277  1.1    dyoung 			      , TCPTV_MSL
   2278  1.1    dyoung 			      , 0);
   2279  1.1    dyoung 		break;
   2280  1.1    dyoung 
   2281  1.1    dyoung 	case 1:		// lookup
   2282  1.1    dyoung 	case 2:		// restart
   2283  1.1    dyoung 		switch (ap->la.sin_family) {
   2284  1.1    dyoung 		case AF_INET:
   2285  1.1    dyoung 			if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
   2286  1.1    dyoung 					  ap->la.sin_addr.v4, ap->la.sin_port,
   2287  1.1    dyoung 					  &vestige)) {
   2288  1.1    dyoung 				if (ap->op == 2) {
   2289  1.1    dyoung 					vtw_restart(&vestige);
   2290  1.1    dyoung 				}
   2291  1.1    dyoung 				rc = 0;
   2292  1.1    dyoung 			} else
   2293  1.1    dyoung 				rc = ESRCH;
   2294  1.1    dyoung 			break;
   2295  1.1    dyoung 
   2296  1.1    dyoung 		case AF_INET6:
   2297  1.1    dyoung 			if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
   2298  1.1    dyoung 					  &ap->la.sin_addr.v6, ap->la.sin_port,
   2299  1.1    dyoung 					  &vestige)) {
   2300  1.1    dyoung 				if (ap->op == 2) {
   2301  1.1    dyoung 					vtw_restart(&vestige);
   2302  1.1    dyoung 				}
   2303  1.1    dyoung 				rc = 0;
   2304  1.1    dyoung 			} else
   2305  1.1    dyoung 				rc = ESRCH;
   2306  1.1    dyoung 			break;
   2307  1.1    dyoung 		default:
   2308  1.1    dyoung 			rc = EINVAL;
   2309  1.1    dyoung 		}
   2310  1.1    dyoung 		break;
   2311  1.1    dyoung 
   2312  1.1    dyoung 	default:
   2313  1.1    dyoung 		rc = EINVAL;
   2314  1.1    dyoung 	}
   2315  1.1    dyoung 
   2316  1.1    dyoung 	mutex_exit(softnet_lock);
   2317  1.1    dyoung 	return rc;
   2318  1.1    dyoung }
   2319  1.1    dyoung 
   2320  1.1    dyoung struct sys_vtw_args {
   2321  1.1    dyoung 	syscallarg(const vtw_sysargs_t *) req;
   2322  1.1    dyoung 	syscallarg(size_t) len;
   2323  1.1    dyoung };
   2324  1.1    dyoung 
   2325  1.1    dyoung static int
   2326  1.1    dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
   2327  1.1    dyoung {
   2328  1.1    dyoung 	const struct sys_vtw_args *uap = _;
   2329  1.1    dyoung 	void	*buf;
   2330  1.1    dyoung 	int	rc;
   2331  1.1    dyoung 	size_t	len	= SCARG(uap, len);
   2332  1.1    dyoung 
   2333  1.1    dyoung 	if (len != sizeof (vtw_sysargs_t))
   2334  1.1    dyoung 		return EINVAL;
   2335  1.1    dyoung 
   2336  1.1    dyoung 	buf = kmem_alloc(len, KM_SLEEP);
   2337  1.1    dyoung 	if (!buf)
   2338  1.1    dyoung 		return ENOMEM;
   2339  1.1    dyoung 
   2340  1.1    dyoung 	rc = copyin(SCARG(uap, req), buf, len);
   2341  1.1    dyoung 	if (!rc) {
   2342  1.1    dyoung 		rc = vtw_debug_process(buf);
   2343  1.1    dyoung 	}
   2344  1.1    dyoung 	kmem_free(buf, len);
   2345  1.1    dyoung 
   2346  1.1    dyoung 	return rc;
   2347  1.1    dyoung }
   2348  1.1    dyoung 
   2349  1.1    dyoung static void
   2350  1.1    dyoung vtw_sanity_check(void)
   2351  1.1    dyoung {
   2352  1.1    dyoung 	vtw_ctl_t	*ctl;
   2353  1.1    dyoung 	vtw_t		*vtw;
   2354  1.1    dyoung 	int		i;
   2355  1.1    dyoung 	int		n;
   2356  1.1    dyoung 
   2357  1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2358  1.1    dyoung 		ctl = &vtw_tcpv4[i];
   2359  1.1    dyoung 
   2360  1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2361  1.1    dyoung 			continue;
   2362  1.1    dyoung 
   2363  1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2364  1.1    dyoung 			++n;
   2365  1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2366  1.1    dyoung 			if (vtw == ctl->base.v)
   2367  1.1    dyoung 				break;
   2368  1.1    dyoung 		}
   2369  1.1    dyoung 		db_trace(KTR_VTW
   2370  1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2371  1.1    dyoung 			    , i, n, ctl->nfree));
   2372  1.1    dyoung 
   2373  1.1    dyoung 		KASSERT(n == ctl->nfree);
   2374  1.1    dyoung 	}
   2375  1.1    dyoung 
   2376  1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2377  1.1    dyoung 		ctl = &vtw_tcpv6[i];
   2378  1.1    dyoung 
   2379  1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2380  1.1    dyoung 			continue;
   2381  1.1    dyoung 
   2382  1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2383  1.1    dyoung 			++n;
   2384  1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2385  1.1    dyoung 			if (vtw == ctl->base.v)
   2386  1.1    dyoung 				break;
   2387  1.1    dyoung 		}
   2388  1.1    dyoung 		db_trace(KTR_VTW
   2389  1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2390  1.1    dyoung 			    , i, n, ctl->nfree));
   2391  1.1    dyoung 		KASSERT(n == ctl->nfree);
   2392  1.1    dyoung 	}
   2393  1.1    dyoung }
   2394  1.1    dyoung 
   2395  1.1    dyoung /*!\brief	Initialise debug support.
   2396  1.1    dyoung  */
   2397  1.1    dyoung static void
   2398  1.1    dyoung vtw_debug_init(void)
   2399  1.1    dyoung {
   2400  1.1    dyoung 	int	i;
   2401  1.1    dyoung 
   2402  1.1    dyoung 	vtw_sanity_check();
   2403  1.1    dyoung 
   2404  1.1    dyoung 	if (vtw_syscall)
   2405  1.1    dyoung 		return;
   2406  1.1    dyoung 
   2407  1.1    dyoung 	for (i = 511; i; --i) {
   2408  1.1    dyoung 		if (sysent[i].sy_call == sys_nosys) {
   2409  1.1    dyoung 			sysent[i].sy_call    = vtw_sys;
   2410  1.1    dyoung 			sysent[i].sy_narg    = 2;
   2411  1.1    dyoung 			sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
   2412  1.1    dyoung 			sysent[i].sy_flags   = 0;
   2413  1.1    dyoung 
   2414  1.1    dyoung 			vtw_syscall = i;
   2415  1.1    dyoung 			break;
   2416  1.1    dyoung 		}
   2417  1.1    dyoung 	}
   2418  1.1    dyoung 	if (i) {
   2419  1.1    dyoung 		const struct sysctlnode *node;
   2420  1.1    dyoung 		uint32_t	flags;
   2421  1.1    dyoung 
   2422  1.1    dyoung 		flags = sysctl_root.sysctl_flags;
   2423  1.1    dyoung 
   2424  1.1    dyoung 		sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
   2425  1.1    dyoung 		sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
   2426  1.1    dyoung 
   2427  1.1    dyoung 		sysctl_createv(0, 0, 0, &node,
   2428  1.1    dyoung 			       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2429  1.1    dyoung 			       "koff",
   2430  1.1    dyoung 			       SYSCTL_DESCR("Kernel Obscure Feature Finder"),
   2431  1.1    dyoung 			       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2432  1.1    dyoung 
   2433  1.1    dyoung 		if (!node) {
   2434  1.1    dyoung 			sysctl_createv(0, 0, 0, &node,
   2435  1.1    dyoung 				       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2436  1.1    dyoung 				       "koffka",
   2437  1.1    dyoung 				       SYSCTL_DESCR("The Real(tm) Kernel"
   2438  1.1    dyoung 						    " Obscure Feature Finder"),
   2439  1.1    dyoung 				       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2440  1.1    dyoung 		}
   2441  1.1    dyoung 		if (node) {
   2442  1.1    dyoung 			sysctl_createv(0, 0, 0, 0,
   2443  1.1    dyoung 				       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
   2444  1.1    dyoung 				       CTLTYPE_INT, "vtw_debug_syscall",
   2445  1.1    dyoung 				       SYSCTL_DESCR("vtw debug"
   2446  1.1    dyoung 						    " system call number"),
   2447  1.1    dyoung 				       0, 0, &vtw_syscall, 0, node->sysctl_num,
   2448  1.1    dyoung 				       CTL_CREATE, CTL_EOL);
   2449  1.1    dyoung 		}
   2450  1.1    dyoung 		sysctl_root.sysctl_flags = flags;
   2451  1.1    dyoung 	}
   2452  1.1    dyoung }
   2453  1.1    dyoung #else /* !VTW_DEBUG */
   2454  1.1    dyoung static void
   2455  1.1    dyoung vtw_debug_init(void)
   2456  1.1    dyoung {
   2457  1.1    dyoung 	return;
   2458  1.1    dyoung }
   2459  1.1    dyoung #endif /* !VTW_DEBUG */
   2460