Home | History | Annotate | Line # | Download | only in netinet
tcp_vtw.c revision 1.9.2.2
      1      1.1    dyoung /*
      2      1.1    dyoung  * Copyright (c) 2011 The NetBSD Foundation, Inc.
      3      1.1    dyoung  * All rights reserved.
      4      1.1    dyoung  *
      5      1.1    dyoung  * This code is derived from software contributed to The NetBSD Foundation
      6      1.1    dyoung  * by Coyote Point Systems, Inc.
      7      1.1    dyoung  *
      8      1.1    dyoung  * Redistribution and use in source and binary forms, with or without
      9      1.1    dyoung  * modification, are permitted provided that the following conditions
     10      1.1    dyoung  * are met:
     11      1.1    dyoung  * 1. Redistributions of source code must retain the above copyright
     12      1.1    dyoung  *    notice, this list of conditions and the following disclaimer.
     13      1.1    dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     14      1.1    dyoung  *    notice, this list of conditions and the following disclaimer in the
     15      1.1    dyoung  *    documentation and/or other materials provided with the distribution.
     16      1.1    dyoung  *
     17      1.1    dyoung  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18      1.1    dyoung  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19      1.1    dyoung  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20      1.1    dyoung  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21      1.1    dyoung  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22      1.1    dyoung  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23      1.1    dyoung  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24      1.1    dyoung  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25      1.1    dyoung  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26      1.1    dyoung  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27      1.1    dyoung  * POSSIBILITY OF SUCH DAMAGE.
     28      1.1    dyoung  */
     29      1.9      yamt 
     30      1.9      yamt /*
     31      1.9      yamt  * Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
     32      1.9      yamt  * methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
     33      1.9      yamt  * Truncation (MSLT).
     34      1.9      yamt  *
     35      1.9      yamt  * MSLT and VTW were contributed by Coyote Point Systems, Inc.
     36      1.9      yamt  *
     37      1.9      yamt  * Even after a TCP session enters the TIME_WAIT state, its corresponding
     38      1.9      yamt  * socket and protocol control blocks (PCBs) stick around until the TCP
     39      1.9      yamt  * Maximum Segment Lifetime (MSL) expires.  On a host whose workload
     40      1.9      yamt  * necessarily creates and closes down many TCP sockets, the sockets & PCBs
     41      1.9      yamt  * for TCP sessions in TIME_WAIT state amount to many megabytes of dead
     42      1.9      yamt  * weight in RAM.
     43      1.9      yamt  *
     44      1.9      yamt  * Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
     45      1.9      yamt  * a class based on the nearness of the peer.  Corresponding to each class
     46      1.9      yamt  * is an MSL, and a session uses the MSL of its class.  The classes are
     47      1.9      yamt  * loopback (local host equals remote host), local (local host and remote
     48      1.9      yamt  * host are on the same link/subnet), and remote (local host and remote
     49      1.9      yamt  * host communicate via one or more gateways).  Classes corresponding to
     50      1.9      yamt  * nearer peers have lower MSLs by default: 2 seconds for loopback, 10
     51      1.9      yamt  * seconds for local, 60 seconds for remote.  Loopback and local sessions
     52      1.9      yamt  * expire more quickly when MSLT is used.
     53      1.9      yamt  *
     54      1.9      yamt  * Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
     55      1.9      yamt  * dead weight with a compact representation of the session, called a
     56      1.9      yamt  * "vestigial PCB".  VTW data structures are designed to be very fast and
     57      1.9      yamt  * memory-efficient: for fast insertion and lookup of vestigial PCBs,
     58      1.9      yamt  * the PCBs are stored in a hash table that is designed to minimize the
     59      1.9      yamt  * number of cacheline visits per lookup/insertion.  The memory both
     60      1.9      yamt  * for vestigial PCBs and for elements of the PCB hashtable come from
     61      1.9      yamt  * fixed-size pools, and linked data structures exploit this to conserve
     62      1.9      yamt  * memory by representing references with a narrow index/offset from the
     63      1.9      yamt  * start of a pool instead of a pointer.  When space for new vestigial PCBs
     64      1.9      yamt  * runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
     65      1.9      yamt  * VTW cooperates with MSLT.
     66      1.9      yamt  *
     67      1.9      yamt  * It may help to think of VTW as a "FIN cache" by analogy to the SYN
     68      1.9      yamt  * cache.
     69      1.9      yamt  *
     70      1.9      yamt  * A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
     71      1.9      yamt  * sessions as fast as it can is approximately 17% idle when VTW is active
     72      1.9      yamt  * versus 0% idle when VTW is inactive.  It has 103 megabytes more free RAM
     73      1.9      yamt  * when VTW is active (approximately 64k vestigial PCBs are created) than
     74      1.9      yamt  * when it is inactive.
     75      1.9      yamt  */
     76      1.9      yamt 
     77      1.1    dyoung #include <sys/cdefs.h>
     78      1.1    dyoung 
     79  1.9.2.2  jdolecek #ifdef _KERNEL_OPT
     80      1.1    dyoung #include "opt_ddb.h"
     81      1.1    dyoung #include "opt_inet.h"
     82      1.1    dyoung #include "opt_inet_csum.h"
     83      1.1    dyoung #include "opt_tcp_debug.h"
     84  1.9.2.2  jdolecek #endif
     85      1.1    dyoung 
     86      1.1    dyoung #include <sys/param.h>
     87      1.1    dyoung #include <sys/systm.h>
     88      1.1    dyoung #include <sys/kmem.h>
     89      1.1    dyoung #include <sys/mbuf.h>
     90      1.1    dyoung #include <sys/protosw.h>
     91      1.1    dyoung #include <sys/socket.h>
     92      1.1    dyoung #include <sys/socketvar.h>
     93      1.1    dyoung #include <sys/errno.h>
     94      1.1    dyoung #include <sys/syslog.h>
     95      1.1    dyoung #include <sys/pool.h>
     96      1.1    dyoung #include <sys/domain.h>
     97      1.1    dyoung #include <sys/kernel.h>
     98      1.1    dyoung #include <net/if.h>
     99      1.1    dyoung #include <net/if_types.h>
    100      1.1    dyoung 
    101      1.1    dyoung #include <netinet/in.h>
    102      1.1    dyoung #include <netinet/in_systm.h>
    103      1.1    dyoung #include <netinet/ip.h>
    104      1.1    dyoung #include <netinet/in_pcb.h>
    105      1.1    dyoung #include <netinet/in_var.h>
    106      1.1    dyoung #include <netinet/ip_var.h>
    107      1.1    dyoung #include <netinet/in_offload.h>
    108      1.1    dyoung #include <netinet/ip6.h>
    109      1.1    dyoung #include <netinet6/ip6_var.h>
    110      1.1    dyoung #include <netinet6/in6_pcb.h>
    111      1.1    dyoung #include <netinet6/ip6_var.h>
    112      1.1    dyoung #include <netinet6/in6_var.h>
    113      1.1    dyoung #include <netinet/icmp6.h>
    114      1.1    dyoung 
    115      1.1    dyoung #include <netinet/tcp.h>
    116      1.1    dyoung #include <netinet/tcp_fsm.h>
    117      1.1    dyoung #include <netinet/tcp_seq.h>
    118      1.1    dyoung #include <netinet/tcp_timer.h>
    119      1.1    dyoung #include <netinet/tcp_var.h>
    120      1.1    dyoung #include <netinet/tcp_private.h>
    121      1.1    dyoung #include <netinet/tcpip.h>
    122      1.1    dyoung 
    123      1.1    dyoung #include <netinet/tcp_vtw.h>
    124      1.1    dyoung 
    125  1.9.2.1       tls __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.9.2.2 2017/12/03 11:39:04 jdolecek Exp $");
    126      1.1    dyoung 
    127      1.1    dyoung #define db_trace(__a, __b)	do { } while (/*CONSTCOND*/0)
    128      1.1    dyoung 
    129      1.1    dyoung static void vtw_debug_init(void);
    130      1.1    dyoung 
    131      1.1    dyoung fatp_ctl_t fat_tcpv4;
    132      1.1    dyoung fatp_ctl_t fat_tcpv6;
    133      1.1    dyoung vtw_ctl_t  vtw_tcpv4[VTW_NCLASS];
    134      1.1    dyoung vtw_ctl_t  vtw_tcpv6[VTW_NCLASS];
    135      1.1    dyoung vtw_stats_t vtw_stats;
    136      1.1    dyoung 
    137      1.1    dyoung /* We provide state for the lookup_ports iterator.
    138      1.1    dyoung  * As currently we are netlock-protected, there is one.
    139      1.1    dyoung  * If we were finer-grain, we would have one per CPU.
    140      1.1    dyoung  * I do not want to be in the business of alloc/free.
    141      1.1    dyoung  * The best alternate would be allocate on the caller's
    142      1.1    dyoung  * stack, but that would require them to know the struct,
    143      1.1    dyoung  * or at least the size.
    144      1.1    dyoung  * See how she goes.
    145      1.1    dyoung  */
    146      1.1    dyoung struct tcp_ports_iterator {
    147      1.1    dyoung 	union {
    148      1.1    dyoung 		struct in_addr	v4;
    149      1.1    dyoung 		struct in6_addr	v6;
    150      1.1    dyoung 	}		addr;
    151      1.1    dyoung 	u_int		port;
    152      1.1    dyoung 
    153      1.1    dyoung 	uint32_t	wild	: 1;
    154      1.1    dyoung 
    155      1.1    dyoung 	vtw_ctl_t	*ctl;
    156      1.1    dyoung 	fatp_t		*fp;
    157      1.1    dyoung 
    158      1.1    dyoung 	uint16_t	slot_idx;
    159      1.1    dyoung 	uint16_t	ctl_idx;
    160      1.1    dyoung };
    161      1.1    dyoung 
    162      1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
    163      1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
    164      1.1    dyoung 
    165      1.1    dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
    166      1.1    dyoung 
    167      1.1    dyoung /*!\brief allocate a fat pointer from a collection.
    168      1.1    dyoung  */
    169      1.1    dyoung static fatp_t *
    170      1.1    dyoung fatp_alloc(fatp_ctl_t *fat)
    171      1.1    dyoung {
    172      1.1    dyoung 	fatp_t	*fp	= 0;
    173      1.1    dyoung 
    174      1.1    dyoung 	if (fat->nfree) {
    175      1.1    dyoung 		fp = fat->free;
    176      1.1    dyoung 		if (fp) {
    177      1.1    dyoung 			fat->free = fatp_next(fat, fp);
    178      1.1    dyoung 			--fat->nfree;
    179      1.1    dyoung 			++fat->nalloc;
    180      1.1    dyoung 			fp->nxt = 0;
    181      1.1    dyoung 
    182      1.1    dyoung 			KASSERT(!fp->inuse);
    183      1.1    dyoung 		}
    184      1.1    dyoung 	}
    185      1.1    dyoung 
    186      1.1    dyoung 	return fp;
    187      1.1    dyoung }
    188      1.1    dyoung 
    189      1.1    dyoung /*!\brief free a fat pointer.
    190      1.1    dyoung  */
    191      1.1    dyoung static void
    192      1.1    dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
    193      1.1    dyoung {
    194      1.1    dyoung 	if (fp) {
    195      1.1    dyoung 		KASSERT(!fp->inuse);
    196      1.1    dyoung 		KASSERT(!fp->nxt);
    197      1.1    dyoung 
    198      1.1    dyoung 		fp->nxt = fatp_index(fat, fat->free);
    199      1.1    dyoung 		fat->free = fp;
    200      1.1    dyoung 
    201      1.1    dyoung 		++fat->nfree;
    202      1.1    dyoung 		--fat->nalloc;
    203      1.1    dyoung 	}
    204      1.1    dyoung }
    205      1.1    dyoung 
    206      1.1    dyoung /*!\brief initialise a collection of fat pointers.
    207      1.1    dyoung  *
    208      1.1    dyoung  *\param n	# hash buckets
    209      1.1    dyoung  *\param m	total # fat pointers to allocate
    210      1.1    dyoung  *
    211      1.1    dyoung  * We allocate 2x as much, as we have two hashes: full and lport only.
    212      1.1    dyoung  */
    213      1.1    dyoung static void
    214      1.6    dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
    215      1.6    dyoung     fatp_t *fat_base, fatp_t **fat_hash)
    216      1.1    dyoung {
    217      1.1    dyoung 	fatp_t	*fp;
    218      1.1    dyoung 
    219      1.1    dyoung 	KASSERT(n <= FATP_MAX / 2);
    220      1.1    dyoung 
    221      1.6    dyoung 	fat->hash = fat_hash;
    222      1.6    dyoung 	fat->base = fat_base;
    223      1.1    dyoung 
    224      1.1    dyoung 	fat->port = &fat->hash[m];
    225      1.1    dyoung 
    226      1.1    dyoung 	fat->mask   = m - 1;	// ASSERT is power of 2 (m)
    227      1.1    dyoung 	fat->lim    = fat->base + 2*n - 1;
    228      1.1    dyoung 	fat->nfree  = 0;
    229      1.1    dyoung 	fat->nalloc = 2*n;
    230      1.1    dyoung 
    231      1.1    dyoung 	/* Initialise the free list.
    232      1.1    dyoung 	 */
    233      1.1    dyoung 	for (fp = fat->lim; fp >= fat->base; --fp) {
    234      1.1    dyoung 		fatp_free(fat, fp);
    235      1.1    dyoung 	}
    236      1.1    dyoung }
    237      1.1    dyoung 
    238      1.1    dyoung /*
    239      1.1    dyoung  * The `xtra' is XORed into the tag stored.
    240      1.1    dyoung  */
    241      1.1    dyoung static uint32_t fatp_xtra[] = {
    242      1.1    dyoung 	0x11111111,0x22222222,0x33333333,0x44444444,
    243      1.1    dyoung 	0x55555555,0x66666666,0x77777777,0x88888888,
    244      1.1    dyoung 	0x12121212,0x21212121,0x34343434,0x43434343,
    245      1.1    dyoung 	0x56565656,0x65656565,0x78787878,0x87878787,
    246      1.1    dyoung 	0x11221122,0x22112211,0x33443344,0x44334433,
    247      1.1    dyoung 	0x55665566,0x66556655,0x77887788,0x88778877,
    248      1.1    dyoung 	0x11112222,0x22221111,0x33334444,0x44443333,
    249      1.1    dyoung 	0x55556666,0x66665555,0x77778888,0x88887777,
    250      1.1    dyoung };
    251      1.1    dyoung 
    252      1.1    dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
    253      1.1    dyoung  *
    254      1.1    dyoung  * The key can be used to obtain the fatp_t, and the slot,
    255      1.1    dyoung  * as it directly encodes them.
    256      1.1    dyoung  */
    257      1.1    dyoung static inline uint32_t
    258      1.1    dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
    259      1.1    dyoung {
    260      1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    261      1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    262      1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    263      1.1    dyoung 
    264      1.1    dyoung 	switch (fatp_ntags()) {
    265      1.1    dyoung 	case 7:
    266      1.1    dyoung 		return (fatp_index(fat, fp) << 3) | slot;
    267      1.1    dyoung 	case 15:
    268      1.1    dyoung 		return (fatp_index(fat, fp) << 4) | slot;
    269      1.1    dyoung 	case 31:
    270      1.1    dyoung 		return (fatp_index(fat, fp) << 5) | slot;
    271      1.1    dyoung 	default:
    272      1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    273      1.1    dyoung 		return ~0;
    274      1.1    dyoung 	}
    275      1.1    dyoung }
    276      1.1    dyoung 
    277      1.1    dyoung static inline uint32_t
    278      1.1    dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
    279      1.1    dyoung {
    280      1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    281      1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    282      1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    283      1.1    dyoung 
    284      1.1    dyoung 	switch (fatp_ntags()) {
    285      1.1    dyoung 	case 7:
    286      1.1    dyoung 		return key & 7;
    287      1.1    dyoung 	case 15:
    288      1.1    dyoung 		return key & 15;
    289      1.1    dyoung 	case 31:
    290      1.1    dyoung 		return key & 31;
    291      1.1    dyoung 	default:
    292      1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    293      1.1    dyoung 		return ~0;
    294      1.1    dyoung 	}
    295      1.1    dyoung }
    296      1.1    dyoung 
    297      1.1    dyoung static inline fatp_t *
    298      1.1    dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
    299      1.1    dyoung {
    300      1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    301      1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    302      1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    303      1.1    dyoung 
    304      1.1    dyoung 	switch (fatp_ntags()) {
    305      1.1    dyoung 	case 7:
    306      1.1    dyoung 		key >>= 3;
    307      1.1    dyoung 		break;
    308      1.1    dyoung 	case 15:
    309      1.1    dyoung 		key >>= 4;
    310      1.1    dyoung 		break;
    311      1.1    dyoung 	case 31:
    312      1.1    dyoung 		key >>= 5;
    313      1.1    dyoung 		break;
    314      1.1    dyoung 	default:
    315      1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    316      1.1    dyoung 		return 0;
    317      1.1    dyoung 	}
    318      1.1    dyoung 
    319      1.1    dyoung 	return key ? fat->base + key - 1 : 0;
    320      1.1    dyoung }
    321      1.1    dyoung 
    322      1.1    dyoung static inline uint32_t
    323      1.1    dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
    324      1.1    dyoung {
    325      1.1    dyoung 	return (idx << ctl->idx_bits) | idx;
    326      1.1    dyoung }
    327      1.1    dyoung 
    328      1.1    dyoung static inline uint32_t
    329      1.1    dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
    330      1.1    dyoung {
    331      1.1    dyoung 	uint32_t	idx	= bits & ctl->idx_mask;
    332      1.1    dyoung 
    333      1.1    dyoung 	if (idx_encode(ctl, idx) == bits)
    334      1.1    dyoung 		return idx;
    335      1.1    dyoung 	else
    336      1.1    dyoung 		return ~0;
    337      1.1    dyoung }
    338      1.1    dyoung 
    339      1.1    dyoung /*!\brief	insert index into fatp hash
    340      1.1    dyoung  *
    341      1.1    dyoung  *\param	idx	-	index of element being placed in hash chain
    342      1.1    dyoung  *\param	tag	-	32-bit tag identifier
    343      1.1    dyoung  *
    344      1.1    dyoung  *\returns
    345      1.1    dyoung  *	value which can be used to locate entry.
    346      1.1    dyoung  *
    347      1.1    dyoung  *\note
    348      1.1    dyoung  *	we rely on the fact that there are unused high bits in the index
    349      1.1    dyoung  *	for verification purposes on lookup.
    350      1.1    dyoung  */
    351      1.1    dyoung 
    352      1.1    dyoung static inline uint32_t
    353      1.1    dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
    354      1.1    dyoung     void *dbg)
    355      1.1    dyoung {
    356      1.1    dyoung 	fatp_t	*fp;
    357      1.1    dyoung 	fatp_t	**hash = (which ? fat->port : fat->hash);
    358      1.1    dyoung 	int	i;
    359      1.1    dyoung 
    360      1.1    dyoung 	fp = hash[tag & fat->mask];
    361      1.1    dyoung 
    362      1.1    dyoung 	while (!fp || fatp_full(fp)) {
    363      1.1    dyoung 		fatp_t	*fq;
    364      1.1    dyoung 
    365      1.1    dyoung 		/* All entries are inuse at the top level.
    366      1.1    dyoung 		 * We allocate a spare, and push the top level
    367      1.1    dyoung 		 * down one.  All entries in the fp we push down
    368      1.1    dyoung 		 * (think of a tape worm here) will be expelled sooner than
    369      1.1    dyoung 		 * any entries added subsequently to this hash bucket.
    370      1.1    dyoung 		 * This is a property of the time waits we are exploiting.
    371      1.1    dyoung 		 */
    372      1.1    dyoung 
    373      1.1    dyoung 		fq = fatp_alloc(fat);
    374      1.1    dyoung 		if (!fq) {
    375      1.1    dyoung 			vtw_age(fat->vtw, 0);
    376      1.1    dyoung 			fp = hash[tag & fat->mask];
    377      1.1    dyoung 			continue;
    378      1.1    dyoung 		}
    379      1.1    dyoung 
    380      1.1    dyoung 		fq->inuse = 0;
    381      1.1    dyoung 		fq->nxt   = fatp_index(fat, fp);
    382      1.1    dyoung 
    383      1.1    dyoung 		hash[tag & fat->mask] = fq;
    384      1.1    dyoung 
    385      1.1    dyoung 		fp = fq;
    386      1.1    dyoung 	}
    387      1.1    dyoung 
    388      1.1    dyoung 	KASSERT(!fatp_full(fp));
    389      1.1    dyoung 
    390      1.1    dyoung 	/* Fill highest index first.  Lookup is lowest first.
    391      1.1    dyoung 	 */
    392      1.1    dyoung 	for (i = fatp_ntags(); --i >= 0; ) {
    393      1.1    dyoung 		if (!((1 << i) & fp->inuse)) {
    394      1.1    dyoung 			break;
    395      1.1    dyoung 		}
    396      1.1    dyoung 	}
    397      1.1    dyoung 
    398      1.1    dyoung 	fp->inuse |= 1 << i;
    399      1.1    dyoung 	fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
    400      1.1    dyoung 
    401      1.1    dyoung 	db_trace(KTR_VTW
    402      1.1    dyoung 		 , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
    403      1.1    dyoung 		    , fp->inuse
    404      1.1    dyoung 		    , i, fp->tag[i]));
    405      1.1    dyoung 
    406      1.1    dyoung 	return fatp_key(fat, fp, i);
    407      1.1    dyoung }
    408      1.1    dyoung 
    409      1.1    dyoung static inline int
    410      1.1    dyoung vtw_alive(const vtw_t *vtw)
    411      1.1    dyoung {
    412      1.1    dyoung 	return vtw->hashed && vtw->expire.tv_sec;
    413      1.1    dyoung }
    414      1.1    dyoung 
    415      1.1    dyoung static inline uint32_t
    416      1.1    dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
    417      1.1    dyoung {
    418      1.1    dyoung 	if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
    419      1.1    dyoung 		return v4 - ctl->base.v4;
    420      1.1    dyoung 
    421      1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    422      1.1    dyoung 
    423      1.1    dyoung 	return ~0;
    424      1.1    dyoung }
    425      1.1    dyoung 
    426      1.1    dyoung static inline uint32_t
    427      1.1    dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
    428      1.1    dyoung {
    429      1.1    dyoung 	if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
    430      1.1    dyoung 		return v6 - ctl->base.v6;
    431      1.1    dyoung 
    432      1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    433      1.1    dyoung 
    434      1.1    dyoung 	return ~0;
    435      1.1    dyoung }
    436      1.1    dyoung 
    437      1.1    dyoung static inline uint32_t
    438      1.1    dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
    439      1.1    dyoung {
    440      1.1    dyoung 	if (ctl->clidx)
    441      1.1    dyoung 		ctl = ctl->ctl;
    442      1.1    dyoung 
    443      1.1    dyoung 	if (ctl->is_v4)
    444      1.1    dyoung 		return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
    445      1.1    dyoung 
    446      1.1    dyoung 	if (ctl->is_v6)
    447      1.1    dyoung 		return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
    448      1.1    dyoung 
    449      1.1    dyoung 	KASSERT(0 && "neither 4 nor 6.  most curious.");
    450      1.1    dyoung 
    451      1.1    dyoung 	return ~0;
    452      1.1    dyoung }
    453      1.1    dyoung 
    454      1.1    dyoung static inline vtw_t *
    455      1.1    dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
    456      1.1    dyoung {
    457      1.1    dyoung 	if (ctl->clidx)
    458      1.1    dyoung 		ctl = ctl->ctl;
    459      1.1    dyoung 
    460      1.1    dyoung 	/* See if the index looks like it might be an index.
    461      1.1    dyoung 	 * Bits on outside of the valid index bits is a give away.
    462      1.1    dyoung 	 */
    463      1.1    dyoung 	idx = idx_decode(ctl, idx);
    464      1.1    dyoung 
    465      1.1    dyoung 	if (idx == ~0) {
    466      1.1    dyoung 		return 0;
    467      1.1    dyoung 	} else if (ctl->is_v4) {
    468      1.1    dyoung 		vtw_v4_t	*vtw = ctl->base.v4 + idx;
    469      1.1    dyoung 
    470      1.1    dyoung 		return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
    471      1.1    dyoung 			? &vtw->common : 0;
    472      1.1    dyoung 	} else if (ctl->is_v6) {
    473      1.1    dyoung 		vtw_v6_t	*vtw = ctl->base.v6 + idx;
    474      1.1    dyoung 
    475      1.1    dyoung 		return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
    476      1.1    dyoung 			? &vtw->common : 0;
    477      1.1    dyoung 	} else {
    478      1.1    dyoung 		KASSERT(0 && "badness");
    479      1.1    dyoung 		return 0;
    480      1.1    dyoung 	}
    481      1.1    dyoung }
    482      1.1    dyoung 
    483      1.1    dyoung /*!\brief return the next vtw after this one.
    484      1.1    dyoung  *
    485      1.1    dyoung  * Due to the differing sizes of the entries in differing
    486      1.1    dyoung  * arenas, we have to ensure we ++ the correct pointer type.
    487      1.1    dyoung  *
    488      1.1    dyoung  * Also handles wrap.
    489      1.1    dyoung  */
    490      1.1    dyoung static inline vtw_t *
    491      1.1    dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
    492      1.1    dyoung {
    493      1.1    dyoung 	if (ctl->is_v4) {
    494      1.1    dyoung 		vtw_v4_t	*v4 = (void*)vtw;
    495      1.1    dyoung 
    496      1.1    dyoung 		vtw = &(++v4)->common;
    497      1.1    dyoung 	} else {
    498      1.1    dyoung 		vtw_v6_t	*v6 = (void*)vtw;
    499      1.1    dyoung 
    500      1.1    dyoung 		vtw = &(++v6)->common;
    501      1.1    dyoung 	}
    502      1.1    dyoung 
    503      1.1    dyoung 	if (vtw > ctl->lim.v)
    504      1.1    dyoung 		vtw = ctl->base.v;
    505      1.1    dyoung 
    506      1.1    dyoung 	return vtw;
    507      1.1    dyoung }
    508      1.1    dyoung 
    509      1.1    dyoung /*!\brief	remove entry from FATP hash chains
    510      1.1    dyoung  */
    511      1.1    dyoung static inline void
    512      1.1    dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
    513      1.1    dyoung {
    514      1.1    dyoung 	fatp_ctl_t	*fat	= ctl->fat;
    515      1.1    dyoung 	fatp_t		*fp;
    516      1.1    dyoung 	uint32_t	key = vtw->key;
    517      1.1    dyoung 	uint32_t	tag, slot, idx;
    518      1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    519      1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
    520      1.1    dyoung 
    521      1.1    dyoung 	if (!vtw->hashed) {
    522      1.1    dyoung 		KASSERT(0 && "unhashed");
    523      1.1    dyoung 		return;
    524      1.1    dyoung 	}
    525      1.1    dyoung 
    526      1.1    dyoung 	if (fat->vtw->is_v4) {
    527      1.1    dyoung 		tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
    528      1.1    dyoung 	} else if (fat->vtw->is_v6) {
    529      1.1    dyoung 		tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
    530      1.1    dyoung 	} else {
    531      1.1    dyoung 		tag = 0;
    532      1.1    dyoung 		KASSERT(0 && "not reached");
    533      1.1    dyoung 	}
    534      1.1    dyoung 
    535      1.1    dyoung 	/* Remove from fat->hash[]
    536      1.1    dyoung 	 */
    537      1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    538      1.1    dyoung 	fp   = fatp_from_key(fat, key);
    539      1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    540      1.1    dyoung 
    541      1.1    dyoung 	db_trace(KTR_VTW
    542      1.1    dyoung 		 , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
    543      1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    544      1.1    dyoung 
    545      1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    546      1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    547      1.1    dyoung 				  ^ fatp_xtra[slot]));
    548      1.1    dyoung 
    549      1.1    dyoung 	if ((fp->inuse & (1 << slot))
    550      1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    551      1.1    dyoung 				 ^ fatp_xtra[slot])) {
    552      1.1    dyoung 		fp->inuse ^= 1 << slot;
    553      1.1    dyoung 		fp->tag[slot] = 0;
    554      1.1    dyoung 
    555      1.1    dyoung 		/* When we delete entries, we do not compact.  This is
    556      1.1    dyoung 		 * due to temporality.  We add entries, and they
    557      1.1    dyoung 		 * (eventually) expire. Older entries will be further
    558      1.1    dyoung 		 * down the chain.
    559      1.1    dyoung 		 */
    560      1.1    dyoung 		if (!fp->inuse) {
    561      1.1    dyoung 			uint32_t hi = tag & fat->mask;
    562      1.1    dyoung 			fatp_t	*fq = 0;
    563      1.1    dyoung 			fatp_t	*fr = fat->hash[hi];
    564      1.1    dyoung 
    565      1.1    dyoung 			while (fr && fr != fp) {
    566      1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    567      1.1    dyoung 			}
    568      1.1    dyoung 
    569      1.1    dyoung 			if (fr == fp) {
    570      1.1    dyoung 				if (fq) {
    571      1.1    dyoung 					fq->nxt = fp->nxt;
    572      1.1    dyoung 					fp->nxt = 0;
    573      1.1    dyoung 					fatp_free(fat, fp);
    574      1.1    dyoung 				} else {
    575      1.1    dyoung 					KASSERT(fat->hash[hi] == fp);
    576      1.1    dyoung 
    577      1.1    dyoung 					if (fp->nxt) {
    578      1.1    dyoung 						fat->hash[hi]
    579      1.1    dyoung 							= fatp_next(fat, fp);
    580      1.1    dyoung 						fp->nxt = 0;
    581      1.1    dyoung 						fatp_free(fat, fp);
    582      1.1    dyoung 					} else {
    583      1.1    dyoung 						/* retain for next use.
    584      1.1    dyoung 						 */
    585      1.1    dyoung 						;
    586      1.1    dyoung 					}
    587      1.1    dyoung 				}
    588      1.1    dyoung 			} else {
    589      1.1    dyoung 				fr = fat->hash[hi];
    590      1.1    dyoung 
    591      1.1    dyoung 				do {
    592      1.1    dyoung 					db_trace(KTR_VTW
    593      1.1    dyoung 						 , (fr
    594      1.1    dyoung 						    , "fat:*del inuse %5.5x"
    595      1.1    dyoung 						    " nxt %x"
    596      1.1    dyoung 						    , fr->inuse, fr->nxt));
    597      1.1    dyoung 
    598      1.1    dyoung 					fr = fatp_next(fat, fq = fr);
    599      1.1    dyoung 				} while (fr && fr != fp);
    600      1.1    dyoung 
    601      1.1    dyoung 				KASSERT(0 && "oops");
    602      1.1    dyoung 			}
    603      1.1    dyoung 		}
    604      1.1    dyoung 		vtw->key ^= ~0;
    605      1.1    dyoung 	}
    606      1.1    dyoung 
    607      1.1    dyoung 	if (fat->vtw->is_v4) {
    608      1.1    dyoung 		tag = v4_port_tag(v4->lport);
    609      1.1    dyoung 	} else if (fat->vtw->is_v6) {
    610      1.1    dyoung 		tag = v6_port_tag(v6->lport);
    611      1.1    dyoung 	}
    612      1.1    dyoung 
    613      1.1    dyoung 	/* Remove from fat->port[]
    614      1.1    dyoung 	 */
    615      1.1    dyoung 	key  = vtw->port_key;
    616      1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    617      1.1    dyoung 	fp   = fatp_from_key(fat, key);
    618      1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    619      1.1    dyoung 
    620      1.1    dyoung 	db_trace(KTR_VTW
    621      1.1    dyoung 		 , (fp, "fatport: del inuse %5.5x"
    622      1.1    dyoung 		    " slot %x idx %x key %x tag %x"
    623      1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    624      1.1    dyoung 
    625      1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    626      1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    627      1.1    dyoung 				  ^ fatp_xtra[slot]));
    628      1.1    dyoung 
    629      1.1    dyoung 	if ((fp->inuse & (1 << slot))
    630      1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    631      1.1    dyoung 				 ^ fatp_xtra[slot])) {
    632      1.1    dyoung 		fp->inuse ^= 1 << slot;
    633      1.1    dyoung 		fp->tag[slot] = 0;
    634      1.1    dyoung 
    635      1.1    dyoung 		if (!fp->inuse) {
    636      1.1    dyoung 			uint32_t hi = tag & fat->mask;
    637      1.1    dyoung 			fatp_t	*fq = 0;
    638      1.1    dyoung 			fatp_t	*fr = fat->port[hi];
    639      1.1    dyoung 
    640      1.1    dyoung 			while (fr && fr != fp) {
    641      1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    642      1.1    dyoung 			}
    643      1.1    dyoung 
    644      1.1    dyoung 			if (fr == fp) {
    645      1.1    dyoung 				if (fq) {
    646      1.1    dyoung 					fq->nxt = fp->nxt;
    647      1.1    dyoung 					fp->nxt = 0;
    648      1.1    dyoung 					fatp_free(fat, fp);
    649      1.1    dyoung 				} else {
    650      1.1    dyoung 					KASSERT(fat->port[hi] == fp);
    651      1.1    dyoung 
    652      1.1    dyoung 					if (fp->nxt) {
    653      1.1    dyoung 						fat->port[hi]
    654      1.1    dyoung 							= fatp_next(fat, fp);
    655      1.1    dyoung 						fp->nxt = 0;
    656      1.1    dyoung 						fatp_free(fat, fp);
    657      1.1    dyoung 					} else {
    658      1.1    dyoung 						/* retain for next use.
    659      1.1    dyoung 						 */
    660      1.1    dyoung 						;
    661      1.1    dyoung 					}
    662      1.1    dyoung 				}
    663      1.1    dyoung 			}
    664      1.1    dyoung 		}
    665      1.1    dyoung 		vtw->port_key ^= ~0;
    666      1.1    dyoung 	}
    667      1.1    dyoung 
    668      1.1    dyoung 	vtw->hashed = 0;
    669      1.1    dyoung }
    670      1.1    dyoung 
    671      1.1    dyoung /*!\brief	remove entry from hash, possibly free.
    672      1.1    dyoung  */
    673      1.1    dyoung void
    674      1.1    dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
    675      1.1    dyoung {
    676      1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    677      1.1    dyoung 
    678      1.1    dyoung 	if (vtw->hashed) {
    679      1.1    dyoung 		++vtw_stats.del;
    680      1.1    dyoung 		vtw_unhash(ctl, vtw);
    681      1.1    dyoung 	}
    682      1.1    dyoung 
    683      1.1    dyoung 	/* We only delete the oldest entry.
    684      1.1    dyoung 	 */
    685      1.1    dyoung 	if (vtw != ctl->oldest.v)
    686      1.1    dyoung 		return;
    687      1.1    dyoung 
    688      1.1    dyoung 	--ctl->nalloc;
    689      1.1    dyoung 	++ctl->nfree;
    690      1.1    dyoung 
    691      1.1    dyoung 	vtw->expire.tv_sec  = 0;
    692      1.1    dyoung 	vtw->expire.tv_usec = ~0;
    693      1.1    dyoung 
    694      1.1    dyoung 	if (!ctl->nalloc)
    695      1.1    dyoung 		ctl->oldest.v = 0;
    696      1.1    dyoung 
    697      1.1    dyoung 	ctl->oldest.v = vtw_next(ctl, vtw);
    698      1.1    dyoung }
    699      1.1    dyoung 
    700      1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    701      1.1    dyoung  */
    702      1.1    dyoung static void
    703      1.1    dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
    704      1.1    dyoung {
    705      1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    706      1.1    dyoung 	uint32_t	tag;
    707      1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    708      1.1    dyoung 
    709      1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    710      1.1    dyoung 	KASSERT(!vtw->hashed);
    711      1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    712      1.1    dyoung 
    713      1.1    dyoung 	++vtw_stats.ins;
    714      1.1    dyoung 
    715      1.1    dyoung 	tag = v4_tag(v4->faddr, v4->fport,
    716      1.1    dyoung 		     v4->laddr, v4->lport);
    717      1.1    dyoung 
    718      1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    719      1.1    dyoung 
    720      1.1    dyoung 	db_trace(KTR_VTW, (ctl
    721      1.1    dyoung 			   , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
    722      1.1    dyoung 			   " tag %8.8x key %8.8x"
    723      1.1    dyoung 			   , v4->faddr, v4->fport
    724      1.1    dyoung 			   , v4->laddr, v4->lport
    725      1.1    dyoung 			   , tag
    726      1.1    dyoung 			   , vtw->key));
    727      1.1    dyoung 
    728      1.1    dyoung 	tag = v4_port_tag(v4->lport);
    729      1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    730      1.1    dyoung 
    731      1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    732      1.1    dyoung 			   , v4->lport, v4->lport
    733      1.1    dyoung 			   , tag
    734      1.1    dyoung 			   , vtw->key));
    735      1.1    dyoung 
    736      1.1    dyoung 	vtw->hashed = 1;
    737      1.1    dyoung }
    738      1.1    dyoung 
    739      1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    740      1.1    dyoung  */
    741      1.1    dyoung static void
    742      1.1    dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
    743      1.1    dyoung {
    744      1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    745      1.1    dyoung 	uint32_t	tag;
    746      1.1    dyoung 	vtw_v6_t	*v6	= (void*)vtw;
    747      1.1    dyoung 
    748      1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    749      1.1    dyoung 	KASSERT(!vtw->hashed);
    750      1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    751      1.1    dyoung 
    752      1.1    dyoung 	++vtw_stats.ins;
    753      1.1    dyoung 
    754      1.1    dyoung 	tag = v6_tag(&v6->faddr, v6->fport,
    755      1.1    dyoung 		     &v6->laddr, v6->lport);
    756      1.1    dyoung 
    757      1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    758      1.1    dyoung 
    759      1.1    dyoung 	tag = v6_port_tag(v6->lport);
    760      1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    761      1.1    dyoung 
    762      1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    763      1.1    dyoung 			   , v6->lport, v6->lport
    764      1.1    dyoung 			   , tag
    765      1.1    dyoung 			   , vtw->key));
    766      1.1    dyoung 
    767      1.1    dyoung 	vtw->hashed = 1;
    768      1.1    dyoung }
    769      1.1    dyoung 
    770      1.1    dyoung static vtw_t *
    771      1.1    dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
    772      1.1    dyoung 				 , uint32_t laddr, uint16_t lport
    773      1.1    dyoung 				 , int which)
    774      1.1    dyoung {
    775      1.1    dyoung 	vtw_v4_t	*v4;
    776      1.1    dyoung 	vtw_t		*vtw;
    777      1.1    dyoung 	uint32_t	tag;
    778      1.1    dyoung 	fatp_t		*fp;
    779      1.1    dyoung 	int		i;
    780      1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    781      1.1    dyoung 
    782      1.1    dyoung 	if (!ctl || !ctl->fat)
    783      1.1    dyoung 		return 0;
    784      1.1    dyoung 
    785      1.1    dyoung 	++vtw_stats.look[which];
    786      1.1    dyoung 
    787      1.1    dyoung 	if (which) {
    788      1.1    dyoung 		tag = v4_port_tag(lport);
    789      1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    790      1.1    dyoung 	} else {
    791      1.1    dyoung 		tag = v4_tag(faddr, fport, laddr, lport);
    792      1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    793      1.1    dyoung 	}
    794      1.1    dyoung 
    795      1.1    dyoung 	while (fp && fp->inuse) {
    796      1.1    dyoung 		uint32_t	inuse = fp->inuse;
    797      1.1    dyoung 
    798      1.1    dyoung 		++fatps;
    799      1.1    dyoung 
    800      1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    801      1.1    dyoung 			uint32_t	idx;
    802      1.1    dyoung 
    803      1.1    dyoung 			if (!(inuse & (1 << i)))
    804      1.1    dyoung 				continue;
    805      1.1    dyoung 
    806      1.1    dyoung 			inuse ^= 1 << i;
    807      1.1    dyoung 
    808      1.1    dyoung 			++probes;
    809      1.1    dyoung 			++vtw_stats.probe[which];
    810      1.1    dyoung 
    811      1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    812      1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    813      1.1    dyoung 
    814      1.1    dyoung 			if (!vtw) {
    815      1.1    dyoung 				/* Hopefully fast path.
    816      1.1    dyoung 				 */
    817      1.1    dyoung 				db_trace(KTR_VTW
    818      1.1    dyoung 					 , (fp, "vtw: fast %A:%P %A:%P"
    819      1.1    dyoung 					    " idx %x tag %x"
    820      1.1    dyoung 					    , faddr, fport
    821      1.1    dyoung 					    , laddr, lport
    822      1.1    dyoung 					    , idx, tag));
    823      1.1    dyoung 				continue;
    824      1.1    dyoung 			}
    825      1.1    dyoung 
    826      1.1    dyoung 			v4 = (void*)vtw;
    827      1.1    dyoung 
    828      1.1    dyoung 			/* The de-referencing of vtw is what we want to avoid.
    829      1.1    dyoung 			 * Losing.
    830      1.1    dyoung 			 */
    831      1.1    dyoung 			if (vtw_alive(vtw)
    832      1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    833      1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    834      1.1    dyoung 			    && (which
    835      1.1    dyoung 				|| (v4->faddr == faddr && v4->laddr == laddr
    836      1.1    dyoung 				    && v4->fport == fport))
    837      1.1    dyoung 			    && v4->lport == lport) {
    838      1.1    dyoung 				++vtw_stats.hit[which];
    839      1.1    dyoung 
    840      1.1    dyoung 				db_trace(KTR_VTW
    841      1.1    dyoung 					 , (fp, "vtw: hit %8.8x:%4.4x"
    842      1.1    dyoung 					    " %8.8x:%4.4x idx %x key %x"
    843      1.1    dyoung 					    , faddr, fport
    844      1.1    dyoung 					    , laddr, lport
    845      1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
    846      1.1    dyoung 
    847      1.1    dyoung 				KASSERT(vtw->hashed);
    848      1.1    dyoung 
    849      1.1    dyoung 				goto out;
    850      1.1    dyoung 			}
    851      1.1    dyoung 			++vtw_stats.losing[which];
    852      1.1    dyoung 			++losings;
    853      1.1    dyoung 
    854      1.1    dyoung 			if (vtw_alive(vtw)) {
    855      1.1    dyoung 				db_trace(KTR_VTW
    856      1.1    dyoung 					 , (fp, "vtw:!mis %8.8x:%4.4x"
    857      1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    858      1.1    dyoung 					    , faddr, fport
    859      1.1    dyoung 					    , laddr, lport
    860      1.1    dyoung 					    , fatp_key(ctl->fat, fp, i)
    861      1.1    dyoung 					    , v4_tag(faddr, fport
    862      1.1    dyoung 						     , laddr, lport)));
    863      1.1    dyoung 				db_trace(KTR_VTW
    864      1.1    dyoung 					 , (vtw, "vtw:!mis %8.8x:%4.4x"
    865      1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    866      1.1    dyoung 					    , v4->faddr, v4->fport
    867      1.1    dyoung 					    , v4->laddr, v4->lport
    868      1.1    dyoung 					    , vtw->key
    869      1.1    dyoung 					    , v4_tag(v4->faddr, v4->fport
    870      1.1    dyoung 						     , v4->laddr, v4->lport)));
    871      1.1    dyoung 
    872      1.1    dyoung 				if (vtw->key == fatp_key(ctl->fat, fp, i)) {
    873      1.1    dyoung 					db_trace(KTR_VTW
    874      1.1    dyoung 						 , (vtw, "vtw:!mis %8.8x:%4.4x"
    875      1.1    dyoung 						    " %8.8x:%4.4x key %x"
    876      1.1    dyoung 						    " which %x"
    877      1.1    dyoung 						    , v4->faddr, v4->fport
    878      1.1    dyoung 						    , v4->laddr, v4->lport
    879      1.1    dyoung 						    , vtw->key
    880      1.1    dyoung 						    , which));
    881      1.1    dyoung 
    882      1.1    dyoung 				} else {
    883      1.1    dyoung 					db_trace(KTR_VTW
    884      1.1    dyoung 						 , (vtw
    885      1.1    dyoung 						    , "vtw:!mis"
    886      1.1    dyoung 						    " key %8.8x != %8.8x"
    887      1.1    dyoung 						    " idx %x i %x which %x"
    888      1.1    dyoung 						    , vtw->key
    889      1.1    dyoung 						    , fatp_key(ctl->fat, fp, i)
    890      1.1    dyoung 						    , idx_decode(ctl, idx)
    891      1.1    dyoung 						    , i
    892      1.1    dyoung 						    , which));
    893      1.1    dyoung 				}
    894      1.1    dyoung 			} else {
    895      1.1    dyoung 				db_trace(KTR_VTW
    896      1.1    dyoung 					 , (fp
    897      1.1    dyoung 					    , "vtw:!mis free entry"
    898      1.1    dyoung 					    " idx %x vtw %p which %x"
    899      1.1    dyoung 					    , idx_decode(ctl, idx)
    900      1.1    dyoung 					    , vtw, which));
    901      1.1    dyoung 			}
    902      1.1    dyoung 		}
    903      1.1    dyoung 
    904      1.1    dyoung 		if (fp->nxt) {
    905      1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
    906      1.1    dyoung 		} else {
    907      1.1    dyoung 			break;
    908      1.1    dyoung 		}
    909      1.1    dyoung 	}
    910      1.1    dyoung 	++vtw_stats.miss[which];
    911      1.1    dyoung 	vtw = 0;
    912      1.1    dyoung out:
    913      1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
    914      1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
    915      1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
    916      1.1    dyoung 		vtw_stats.max_probe[which] = probes;
    917      1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
    918      1.1    dyoung 		vtw_stats.max_loss[which] = losings;
    919      1.1    dyoung 
    920      1.1    dyoung 	return vtw;
    921      1.1    dyoung }
    922      1.1    dyoung 
    923      1.1    dyoung static vtw_t *
    924      1.1    dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
    925      1.1    dyoung 				 , const struct in6_addr *laddr, uint16_t lport
    926      1.1    dyoung 				 , int which)
    927      1.1    dyoung {
    928      1.1    dyoung 	vtw_v6_t	*v6;
    929      1.1    dyoung 	vtw_t		*vtw;
    930      1.1    dyoung 	uint32_t	tag;
    931      1.1    dyoung 	fatp_t		*fp;
    932      1.1    dyoung 	int		i;
    933      1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    934      1.1    dyoung 
    935      1.1    dyoung 	++vtw_stats.look[which];
    936      1.1    dyoung 
    937      1.1    dyoung 	if (!ctl || !ctl->fat)
    938      1.1    dyoung 		return 0;
    939      1.1    dyoung 
    940      1.1    dyoung 	if (which) {
    941      1.1    dyoung 		tag = v6_port_tag(lport);
    942      1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    943      1.1    dyoung 	} else {
    944      1.1    dyoung 		tag = v6_tag(faddr, fport, laddr, lport);
    945      1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    946      1.1    dyoung 	}
    947      1.1    dyoung 
    948      1.1    dyoung 	while (fp && fp->inuse) {
    949      1.1    dyoung 		uint32_t	inuse = fp->inuse;
    950      1.1    dyoung 
    951      1.1    dyoung 		++fatps;
    952      1.1    dyoung 
    953      1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    954      1.1    dyoung 			uint32_t	idx;
    955      1.1    dyoung 
    956      1.1    dyoung 			if (!(inuse & (1 << i)))
    957      1.1    dyoung 				continue;
    958      1.1    dyoung 
    959      1.1    dyoung 			inuse ^= 1 << i;
    960      1.1    dyoung 
    961      1.1    dyoung 			++probes;
    962      1.1    dyoung 			++vtw_stats.probe[which];
    963      1.1    dyoung 
    964      1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    965      1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    966      1.1    dyoung 
    967      1.1    dyoung 			db_trace(KTR_VTW
    968      1.1    dyoung 				 , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
    969      1.1    dyoung 				    , i
    970      1.1    dyoung 				    , db_store(faddr, sizeof (*faddr)), fport
    971      1.1    dyoung 				    , db_store(laddr, sizeof (*laddr)), lport
    972      1.1    dyoung 				    , idx_decode(ctl, idx)));
    973      1.1    dyoung 
    974      1.1    dyoung 			if (!vtw) {
    975      1.1    dyoung 				/* Hopefully fast path.
    976      1.1    dyoung 				 */
    977      1.1    dyoung 				continue;
    978      1.1    dyoung 			}
    979      1.1    dyoung 
    980      1.1    dyoung 			v6 = (void*)vtw;
    981      1.1    dyoung 
    982      1.1    dyoung 			if (vtw_alive(vtw)
    983      1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    984      1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    985      1.1    dyoung 			    && v6->lport == lport
    986      1.1    dyoung 			    && (which
    987      1.1    dyoung 				|| (v6->fport == fport
    988      1.1    dyoung 				    && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
    989      1.1    dyoung 				    && !bcmp(&v6->laddr, laddr
    990      1.1    dyoung 					     , sizeof (*laddr))))) {
    991      1.1    dyoung 				++vtw_stats.hit[which];
    992      1.1    dyoung 
    993      1.1    dyoung 				KASSERT(vtw->hashed);
    994      1.1    dyoung 				goto out;
    995      1.1    dyoung 			} else {
    996      1.1    dyoung 				++vtw_stats.losing[which];
    997      1.1    dyoung 				++losings;
    998      1.1    dyoung 			}
    999      1.1    dyoung 		}
   1000      1.1    dyoung 
   1001      1.1    dyoung 		if (fp->nxt) {
   1002      1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
   1003      1.1    dyoung 		} else {
   1004      1.1    dyoung 			break;
   1005      1.1    dyoung 		}
   1006      1.1    dyoung 	}
   1007      1.1    dyoung 	++vtw_stats.miss[which];
   1008      1.1    dyoung 	vtw = 0;
   1009      1.1    dyoung out:
   1010      1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
   1011      1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
   1012      1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
   1013      1.1    dyoung 		vtw_stats.max_probe[which] = probes;
   1014      1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
   1015      1.1    dyoung 		vtw_stats.max_loss[which] = losings;
   1016      1.1    dyoung 
   1017      1.1    dyoung 	return vtw;
   1018      1.1    dyoung }
   1019      1.1    dyoung 
   1020      1.1    dyoung /*!\brief port iterator
   1021      1.1    dyoung  */
   1022      1.1    dyoung static vtw_t *
   1023      1.1    dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
   1024      1.1    dyoung {
   1025      1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
   1026      1.1    dyoung 	vtw_v4_t	*v4;
   1027      1.1    dyoung 	vtw_t		*vtw;
   1028      1.1    dyoung 	uint32_t	tag;
   1029      1.1    dyoung 	uint16_t	lport = it->port;
   1030      1.1    dyoung 	fatp_t		*fp;
   1031      1.1    dyoung 	int		i;
   1032      1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1033      1.1    dyoung 
   1034      1.1    dyoung 	tag = v4_port_tag(lport);
   1035      1.1    dyoung 	if (!it->fp) {
   1036      1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1037      1.1    dyoung 		it->slot_idx = 0;
   1038      1.1    dyoung 	}
   1039      1.1    dyoung 	fp  = it->fp;
   1040      1.1    dyoung 
   1041      1.1    dyoung 	while (fp) {
   1042      1.1    dyoung 		uint32_t	inuse = fp->inuse;
   1043      1.1    dyoung 
   1044      1.1    dyoung 		++fatps;
   1045      1.1    dyoung 
   1046      1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1047      1.1    dyoung 			uint32_t	idx;
   1048      1.1    dyoung 
   1049      1.1    dyoung 			if (!(inuse & (1 << i)))
   1050      1.1    dyoung 				continue;
   1051      1.1    dyoung 
   1052  1.9.2.2  jdolecek 			inuse &= ~0U << i;
   1053      1.1    dyoung 
   1054      1.1    dyoung 			if (i < it->slot_idx)
   1055      1.1    dyoung 				continue;
   1056      1.1    dyoung 
   1057      1.1    dyoung 			++vtw_stats.probe[1];
   1058      1.1    dyoung 			++probes;
   1059      1.1    dyoung 
   1060      1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1061      1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1062      1.1    dyoung 
   1063      1.1    dyoung 			if (!vtw) {
   1064      1.1    dyoung 				/* Hopefully fast path.
   1065      1.1    dyoung 				 */
   1066      1.1    dyoung 				continue;
   1067      1.1    dyoung 			}
   1068      1.1    dyoung 
   1069      1.1    dyoung 			v4 = (void*)vtw;
   1070      1.1    dyoung 
   1071      1.1    dyoung 			if (vtw_alive(vtw)
   1072      1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1073      1.1    dyoung 			    && v4->lport == lport) {
   1074      1.1    dyoung 				++vtw_stats.hit[1];
   1075      1.1    dyoung 
   1076      1.1    dyoung 				it->slot_idx = i + 1;
   1077      1.1    dyoung 
   1078      1.1    dyoung 				goto out;
   1079      1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1080      1.1    dyoung 				++vtw_stats.losing[1];
   1081      1.1    dyoung 				++losings;
   1082      1.1    dyoung 
   1083      1.1    dyoung 				db_trace(KTR_VTW
   1084      1.1    dyoung 					 , (vtw, "vtw:!mis"
   1085      1.1    dyoung 					    " port %8.8x:%4.4x %8.8x:%4.4x"
   1086      1.1    dyoung 					    " key %x port %x"
   1087      1.1    dyoung 					    , v4->faddr, v4->fport
   1088      1.1    dyoung 					    , v4->laddr, v4->lport
   1089      1.1    dyoung 					    , vtw->key
   1090      1.1    dyoung 					    , lport));
   1091      1.1    dyoung 			} else {
   1092      1.1    dyoung 				/* Really losing here.  We are coming
   1093      1.1    dyoung 				 * up with references to free entries.
   1094      1.1    dyoung 				 * Might find it better to use
   1095      1.1    dyoung 				 * traditional, or need another
   1096      1.1    dyoung 				 * add-hockery.  The other add-hockery
   1097      1.1    dyoung 				 * would be to pul more into into the
   1098      1.1    dyoung 				 * cache line to reject the false
   1099      1.1    dyoung 				 * hits.
   1100      1.1    dyoung 				 */
   1101      1.1    dyoung 				++vtw_stats.losing[1];
   1102      1.1    dyoung 				++losings;
   1103      1.1    dyoung 				db_trace(KTR_VTW
   1104      1.1    dyoung 					 , (fp, "vtw:!mis port %x"
   1105      1.1    dyoung 					    " - free entry idx %x vtw %p"
   1106      1.1    dyoung 					    , lport
   1107      1.1    dyoung 					    , idx_decode(ctl, idx)
   1108      1.1    dyoung 					    , vtw));
   1109      1.1    dyoung 			}
   1110      1.1    dyoung 		}
   1111      1.1    dyoung 
   1112      1.1    dyoung 		if (fp->nxt) {
   1113      1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1114      1.1    dyoung 			it->slot_idx = 0;
   1115      1.1    dyoung 		} else {
   1116      1.1    dyoung 			it->fp = 0;
   1117      1.1    dyoung 			break;
   1118      1.1    dyoung 		}
   1119      1.1    dyoung 	}
   1120      1.1    dyoung 	++vtw_stats.miss[1];
   1121      1.1    dyoung 
   1122      1.1    dyoung 	vtw = 0;
   1123      1.1    dyoung out:
   1124      1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1125      1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1126      1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1127      1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1128      1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1129      1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1130      1.1    dyoung 
   1131      1.1    dyoung 	return vtw;
   1132      1.1    dyoung }
   1133      1.1    dyoung 
   1134      1.1    dyoung /*!\brief port iterator
   1135      1.1    dyoung  */
   1136      1.1    dyoung static vtw_t *
   1137      1.1    dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
   1138      1.1    dyoung {
   1139      1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
   1140      1.1    dyoung 	vtw_v6_t	*v6;
   1141      1.1    dyoung 	vtw_t		*vtw;
   1142      1.1    dyoung 	uint32_t	tag;
   1143      1.1    dyoung 	uint16_t	lport = it->port;
   1144      1.1    dyoung 	fatp_t		*fp;
   1145      1.1    dyoung 	int		i;
   1146      1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1147      1.1    dyoung 
   1148      1.1    dyoung 	tag = v6_port_tag(lport);
   1149      1.1    dyoung 	if (!it->fp) {
   1150      1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1151      1.1    dyoung 		it->slot_idx = 0;
   1152      1.1    dyoung 	}
   1153      1.1    dyoung 	fp  = it->fp;
   1154      1.1    dyoung 
   1155      1.1    dyoung 	while (fp) {
   1156      1.1    dyoung 		uint32_t	inuse = fp->inuse;
   1157      1.1    dyoung 
   1158      1.1    dyoung 		++fatps;
   1159      1.1    dyoung 
   1160      1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1161      1.1    dyoung 			uint32_t	idx;
   1162      1.1    dyoung 
   1163      1.1    dyoung 			if (!(inuse & (1 << i)))
   1164      1.1    dyoung 				continue;
   1165      1.1    dyoung 
   1166  1.9.2.2  jdolecek 			inuse &= ~0U << i;
   1167      1.1    dyoung 
   1168      1.1    dyoung 			if (i < it->slot_idx)
   1169      1.1    dyoung 				continue;
   1170      1.1    dyoung 
   1171      1.1    dyoung 			++vtw_stats.probe[1];
   1172      1.1    dyoung 			++probes;
   1173      1.1    dyoung 
   1174      1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1175      1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1176      1.1    dyoung 
   1177      1.1    dyoung 			if (!vtw) {
   1178      1.1    dyoung 				/* Hopefully fast path.
   1179      1.1    dyoung 				 */
   1180      1.1    dyoung 				continue;
   1181      1.1    dyoung 			}
   1182      1.1    dyoung 
   1183      1.1    dyoung 			v6 = (void*)vtw;
   1184      1.1    dyoung 
   1185      1.1    dyoung 			db_trace(KTR_VTW
   1186      1.1    dyoung 				 , (vtw, "vtw: i %x idx %x fp->tag %x"
   1187      1.1    dyoung 				    " tag %x xtra %x"
   1188      1.1    dyoung 				    , i, idx_decode(ctl, idx)
   1189      1.1    dyoung 				    , fp->tag[i], tag, fatp_xtra[i]));
   1190      1.1    dyoung 
   1191      1.1    dyoung 			if (vtw_alive(vtw)
   1192      1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1193      1.1    dyoung 			    && v6->lport == lport) {
   1194      1.1    dyoung 				++vtw_stats.hit[1];
   1195      1.1    dyoung 
   1196      1.1    dyoung 				db_trace(KTR_VTW
   1197      1.1    dyoung 					 , (fp, "vtw: nxt port %P - %4.4x"
   1198      1.1    dyoung 					    " idx %x key %x"
   1199      1.1    dyoung 					    , lport, lport
   1200      1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
   1201      1.1    dyoung 
   1202      1.1    dyoung 				it->slot_idx = i + 1;
   1203      1.1    dyoung 				goto out;
   1204      1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1205      1.1    dyoung 				++vtw_stats.losing[1];
   1206      1.1    dyoung 
   1207      1.1    dyoung 				db_trace(KTR_VTW
   1208      1.1    dyoung 					 , (vtw, "vtw:!mis port %6A:%4.4x"
   1209      1.1    dyoung 					    " %6A:%4.4x key %x port %x"
   1210      1.1    dyoung 					    , db_store(&v6->faddr
   1211      1.1    dyoung 						       , sizeof (v6->faddr))
   1212      1.1    dyoung 					    , v6->fport
   1213      1.1    dyoung 					    , db_store(&v6->laddr
   1214      1.1    dyoung 						       , sizeof (v6->faddr))
   1215      1.1    dyoung 					    , v6->lport
   1216      1.1    dyoung 					    , vtw->key
   1217      1.1    dyoung 					    , lport));
   1218      1.1    dyoung 			} else {
   1219      1.1    dyoung 				/* Really losing here.  We are coming
   1220      1.1    dyoung 				 * up with references to free entries.
   1221      1.1    dyoung 				 * Might find it better to use
   1222      1.1    dyoung 				 * traditional, or need another
   1223      1.1    dyoung 				 * add-hockery.  The other add-hockery
   1224      1.1    dyoung 				 * would be to pul more into into the
   1225      1.1    dyoung 				 * cache line to reject the false
   1226      1.1    dyoung 				 * hits.
   1227      1.1    dyoung 				 */
   1228      1.1    dyoung 				++vtw_stats.losing[1];
   1229      1.1    dyoung 				++losings;
   1230      1.1    dyoung 
   1231      1.1    dyoung 				db_trace(KTR_VTW
   1232      1.1    dyoung 					 , (fp
   1233      1.1    dyoung 					    , "vtw:!mis port %x"
   1234      1.1    dyoung 					    " - free entry idx %x vtw %p"
   1235      1.1    dyoung 					    , lport, idx_decode(ctl, idx)
   1236      1.1    dyoung 					    , vtw));
   1237      1.1    dyoung 			}
   1238      1.1    dyoung 		}
   1239      1.1    dyoung 
   1240      1.1    dyoung 		if (fp->nxt) {
   1241      1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1242      1.1    dyoung 			it->slot_idx = 0;
   1243      1.1    dyoung 		} else {
   1244      1.1    dyoung 			it->fp = 0;
   1245      1.1    dyoung 			break;
   1246      1.1    dyoung 		}
   1247      1.1    dyoung 	}
   1248      1.1    dyoung 	++vtw_stats.miss[1];
   1249      1.1    dyoung 
   1250      1.1    dyoung 	vtw = 0;
   1251      1.1    dyoung out:
   1252      1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1253      1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1254      1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1255      1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1256      1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1257      1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1258      1.1    dyoung 
   1259      1.1    dyoung 	return vtw;
   1260      1.1    dyoung }
   1261      1.1    dyoung 
   1262      1.1    dyoung /*!\brief initialise the VTW allocation arena
   1263      1.1    dyoung  *
   1264      1.1    dyoung  * There are 1+3 allocation classes:
   1265      1.1    dyoung  *	0	classless
   1266      1.1    dyoung  *	{1,2,3}	MSL-class based allocation
   1267      1.1    dyoung  *
   1268      1.1    dyoung  * The allocation arenas are all initialised.  Classless gets all the
   1269      1.1    dyoung  * space.  MSL-class based divides the arena, so that allocation
   1270      1.1    dyoung  * within a class can proceed without having to consider entries
   1271      1.1    dyoung  * (aka: cache lines) from different classes.
   1272      1.1    dyoung  *
   1273      1.1    dyoung  * Usually, we are completely classless or class-based, but there can be
   1274      1.1    dyoung  * transition periods, corresponding to dynamic adjustments in the config
   1275      1.1    dyoung  * by the operator.
   1276      1.1    dyoung  */
   1277      1.1    dyoung static void
   1278      1.6    dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
   1279      1.1    dyoung {
   1280      1.6    dyoung 	int class_n, i;
   1281      1.6    dyoung 	vtw_t	*base;
   1282      1.1    dyoung 
   1283      1.6    dyoung 	ctl->base.v = ctl_base_v;
   1284      1.1    dyoung 
   1285      1.6    dyoung 	if (ctl->is_v4) {
   1286      1.6    dyoung 		ctl->lim.v4    = ctl->base.v4 + n - 1;
   1287      1.6    dyoung 		ctl->alloc.v4  = ctl->base.v4;
   1288      1.6    dyoung 	} else {
   1289      1.6    dyoung 		ctl->lim.v6    = ctl->base.v6 + n - 1;
   1290      1.6    dyoung 		ctl->alloc.v6  = ctl->base.v6;
   1291      1.6    dyoung 	}
   1292      1.1    dyoung 
   1293      1.6    dyoung 	ctl->nfree  = n;
   1294      1.6    dyoung 	ctl->ctl    = ctl;
   1295      1.1    dyoung 
   1296      1.6    dyoung 	ctl->idx_bits = 32;
   1297      1.6    dyoung 	for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
   1298      1.6    dyoung 		ctl->idx_mask >>= 1;
   1299      1.6    dyoung 		ctl->idx_bits  -= 1;
   1300      1.6    dyoung 	}
   1301      1.1    dyoung 
   1302      1.6    dyoung 	ctl->idx_mask <<= 1;
   1303      1.6    dyoung 	ctl->idx_mask  |= 1;
   1304      1.6    dyoung 	ctl->idx_bits  += 1;
   1305      1.1    dyoung 
   1306      1.6    dyoung 	ctl->fat = fat;
   1307      1.6    dyoung 	fat->vtw = ctl;
   1308      1.1    dyoung 
   1309      1.6    dyoung 	/* Divide the resources equally amongst the classes.
   1310      1.6    dyoung 	 * This is not optimal, as the different classes
   1311      1.6    dyoung 	 * arrive and leave at different rates, but it is
   1312      1.6    dyoung 	 * the best I can do for now.
   1313      1.6    dyoung 	 */
   1314      1.6    dyoung 	class_n = n / (VTW_NCLASS-1);
   1315      1.6    dyoung 	base    = ctl->base.v;
   1316      1.1    dyoung 
   1317      1.6    dyoung 	for (i = 1; i < VTW_NCLASS; ++i) {
   1318      1.6    dyoung 		int j;
   1319      1.1    dyoung 
   1320      1.6    dyoung 		ctl[i] = ctl[0];
   1321      1.6    dyoung 		ctl[i].clidx = i;
   1322      1.1    dyoung 
   1323      1.6    dyoung 		ctl[i].base.v = base;
   1324      1.6    dyoung 		ctl[i].alloc  = ctl[i].base;
   1325      1.1    dyoung 
   1326      1.6    dyoung 		for (j = 0; j < class_n - 1; ++j) {
   1327      1.6    dyoung 			if (tcp_msl_enable)
   1328      1.6    dyoung 				base->msl_class = i;
   1329      1.1    dyoung 			base = vtw_next(ctl, base);
   1330      1.1    dyoung 		}
   1331      1.6    dyoung 
   1332      1.6    dyoung 		ctl[i].lim.v = base;
   1333      1.6    dyoung 		base = vtw_next(ctl, base);
   1334      1.6    dyoung 		ctl[i].nfree = class_n;
   1335      1.1    dyoung 	}
   1336      1.1    dyoung 
   1337      1.1    dyoung 	vtw_debug_init();
   1338      1.1    dyoung }
   1339      1.1    dyoung 
   1340      1.1    dyoung /*!\brief	map class to TCP MSL
   1341      1.1    dyoung  */
   1342      1.1    dyoung static inline uint32_t
   1343  1.9.2.2  jdolecek class_to_msl(int msl_class)
   1344      1.1    dyoung {
   1345  1.9.2.2  jdolecek 	switch (msl_class) {
   1346      1.1    dyoung 	case 0:
   1347      1.1    dyoung 	case 1:
   1348      1.1    dyoung 		return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
   1349      1.1    dyoung 	case 2:
   1350      1.1    dyoung 		return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
   1351      1.1    dyoung 	default:
   1352      1.1    dyoung 		return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
   1353      1.1    dyoung 	}
   1354      1.1    dyoung }
   1355      1.1    dyoung 
   1356      1.1    dyoung /*!\brief	map TCP MSL to class
   1357      1.1    dyoung  */
   1358      1.1    dyoung static inline uint32_t
   1359      1.1    dyoung msl_to_class(int msl)
   1360      1.1    dyoung {
   1361      1.1    dyoung 	if (tcp_msl_enable) {
   1362      1.1    dyoung 		if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
   1363      1.1    dyoung 			return 1+2;
   1364      1.1    dyoung 		if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
   1365      1.1    dyoung 			return 1+1;
   1366      1.1    dyoung 		return 1;
   1367      1.1    dyoung 	}
   1368      1.1    dyoung 	return 0;
   1369      1.1    dyoung }
   1370      1.1    dyoung 
   1371      1.1    dyoung /*!\brief allocate a vtw entry
   1372      1.1    dyoung  */
   1373      1.1    dyoung static inline vtw_t *
   1374      1.1    dyoung vtw_alloc(vtw_ctl_t *ctl)
   1375      1.1    dyoung {
   1376      1.1    dyoung 	vtw_t	*vtw	= 0;
   1377      1.1    dyoung 	int	stuck	= 0;
   1378      1.1    dyoung 	int	avail	= ctl ? (ctl->nalloc + ctl->nfree) : 0;
   1379      1.1    dyoung 	int	msl;
   1380      1.1    dyoung 
   1381      1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1382      1.1    dyoung 
   1383      1.1    dyoung 	/* If no resources, we will not get far.
   1384      1.1    dyoung 	 */
   1385      1.1    dyoung 	if (!ctl || !ctl->base.v4 || avail <= 0)
   1386      1.1    dyoung 		return 0;
   1387      1.1    dyoung 
   1388      1.1    dyoung 	/* Obtain a free one.
   1389      1.1    dyoung 	 */
   1390      1.1    dyoung 	while (!ctl->nfree) {
   1391      1.1    dyoung 		vtw_age(ctl, 0);
   1392      1.1    dyoung 
   1393      1.1    dyoung 		if (++stuck > avail) {
   1394      1.1    dyoung 			/* When in transition between
   1395      1.1    dyoung 			 * schemes (classless, classed) we
   1396      1.1    dyoung 			 * can be stuck having to await the
   1397      1.1    dyoung 			 * expiration of cross-allocated entries.
   1398      1.1    dyoung 			 *
   1399      1.1    dyoung 			 * Returning zero means we will fall back to the
   1400      1.1    dyoung 			 * traditional TIME_WAIT handling, except in the
   1401      1.1    dyoung 			 * case of a re-shed, in which case we cannot
   1402      1.1    dyoung 			 * perform the reshecd, but will retain the extant
   1403      1.1    dyoung 			 * entry.
   1404      1.1    dyoung 			 */
   1405      1.1    dyoung 			db_trace(KTR_VTW
   1406      1.1    dyoung 				 , (ctl, "vtw:!none free in class %x %x/%x"
   1407      1.1    dyoung 				    , ctl->clidx
   1408      1.1    dyoung 				    , ctl->nalloc, ctl->nfree));
   1409      1.1    dyoung 
   1410      1.1    dyoung 			return 0;
   1411      1.1    dyoung 		}
   1412      1.1    dyoung 	}
   1413      1.1    dyoung 
   1414      1.1    dyoung 	vtw = ctl->alloc.v;
   1415      1.1    dyoung 
   1416      1.1    dyoung 	if (vtw->msl_class != ctl->clidx) {
   1417      1.1    dyoung 		/* Usurping rules:
   1418      1.1    dyoung 		 * 	0 -> {1,2,3} or {1,2,3} -> 0
   1419      1.1    dyoung 		 */
   1420      1.1    dyoung 		KASSERT(!vtw->msl_class || !ctl->clidx);
   1421      1.1    dyoung 
   1422      1.1    dyoung 		if (vtw->hashed || vtw->expire.tv_sec) {
   1423      1.1    dyoung 		    /* As this is owned by some other class,
   1424      1.1    dyoung 		     * we must wait for it to expire it.
   1425      1.1    dyoung 		     * This will only happen on class/classless
   1426      1.1    dyoung 		     * transitions, which are guaranteed to progress
   1427      1.1    dyoung 		     * to completion in small finite time, barring bugs.
   1428      1.1    dyoung 		     */
   1429      1.1    dyoung 		    db_trace(KTR_VTW
   1430      1.1    dyoung 			     , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
   1431      1.1    dyoung 				, vtw, vtw->msl_class, ctl->clidx
   1432      1.1    dyoung 				, vtw->expire.tv_sec
   1433      1.1    dyoung 				, vtw->expire.tv_usec
   1434      1.1    dyoung 				, vtw->hashed ? " hashed" : ""));
   1435      1.1    dyoung 
   1436      1.1    dyoung 		    return 0;
   1437      1.1    dyoung 		}
   1438      1.1    dyoung 
   1439      1.1    dyoung 		db_trace(KTR_VTW
   1440      1.1    dyoung 			 , (ctl, "vtw:!%p usurped from %x to %x"
   1441      1.1    dyoung 			    , vtw, vtw->msl_class, ctl->clidx));
   1442      1.1    dyoung 
   1443      1.1    dyoung 		vtw->msl_class = ctl->clidx;
   1444      1.1    dyoung 	}
   1445      1.1    dyoung 
   1446      1.1    dyoung 	if (vtw_alive(vtw)) {
   1447      1.1    dyoung 		KASSERT(0 && "next free not free");
   1448      1.1    dyoung 		return 0;
   1449      1.1    dyoung 	}
   1450      1.1    dyoung 
   1451      1.1    dyoung 	/* Advance allocation poiter.
   1452      1.1    dyoung 	 */
   1453      1.1    dyoung 	ctl->alloc.v = vtw_next(ctl, vtw);
   1454      1.1    dyoung 
   1455      1.1    dyoung 	--ctl->nfree;
   1456      1.1    dyoung 	++ctl->nalloc;
   1457      1.1    dyoung 
   1458      1.1    dyoung 	msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ;	// msec
   1459      1.1    dyoung 
   1460      1.1    dyoung 	/* mark expiration
   1461      1.1    dyoung 	 */
   1462      1.3  drochner 	getmicrouptime(&vtw->expire);
   1463      1.1    dyoung 
   1464      1.1    dyoung 	/* Move expiration into the future.
   1465      1.1    dyoung 	 */
   1466      1.1    dyoung 	vtw->expire.tv_sec  += msl / 1000;
   1467      1.1    dyoung 	vtw->expire.tv_usec += 1000 * (msl % 1000);
   1468      1.1    dyoung 
   1469      1.1    dyoung 	while (vtw->expire.tv_usec >= 1000*1000) {
   1470      1.1    dyoung 		vtw->expire.tv_usec -= 1000*1000;
   1471      1.1    dyoung 		vtw->expire.tv_sec  += 1;
   1472      1.1    dyoung 	}
   1473      1.1    dyoung 
   1474      1.1    dyoung 	if (!ctl->oldest.v)
   1475      1.1    dyoung 		ctl->oldest.v = vtw;
   1476      1.1    dyoung 
   1477      1.1    dyoung 	return vtw;
   1478      1.1    dyoung }
   1479      1.1    dyoung 
   1480      1.1    dyoung /*!\brief expiration
   1481      1.1    dyoung  */
   1482      1.1    dyoung static int
   1483      1.1    dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
   1484      1.1    dyoung {
   1485      1.1    dyoung 	vtw_t	*vtw;
   1486      1.1    dyoung 	struct timeval then, *when = _when;
   1487      1.1    dyoung 	int	maxtries = 0;
   1488      1.1    dyoung 
   1489      1.1    dyoung 	if (!ctl->oldest.v) {
   1490      1.1    dyoung 		KASSERT(!ctl->nalloc);
   1491      1.1    dyoung 		return 0;
   1492      1.1    dyoung 	}
   1493      1.1    dyoung 
   1494      1.1    dyoung 	for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
   1495      1.1    dyoung 		if (++maxtries > ctl->nalloc)
   1496      1.1    dyoung 			break;
   1497      1.1    dyoung 
   1498      1.1    dyoung 		if (vtw->msl_class != ctl->clidx) {
   1499      1.1    dyoung 			db_trace(KTR_VTW
   1500      1.1    dyoung 				 , (vtw, "vtw:!age class mismatch %x != %x"
   1501      1.1    dyoung 				    , vtw->msl_class, ctl->clidx));
   1502      1.1    dyoung 			/* XXXX
   1503      1.1    dyoung 			 * See if the appropriate action is to skip to the next.
   1504      1.1    dyoung 			 * XXXX
   1505      1.1    dyoung 			 */
   1506      1.1    dyoung 			ctl->oldest.v = vtw = vtw_next(ctl, vtw);
   1507      1.1    dyoung 			continue;
   1508      1.1    dyoung 		}
   1509      1.1    dyoung 		if (!when) {
   1510      1.1    dyoung 			/* Latch oldest timeval if none specified.
   1511      1.1    dyoung 			 */
   1512      1.1    dyoung 			then = vtw->expire;
   1513      1.1    dyoung 			when = &then;
   1514      1.1    dyoung 		}
   1515      1.1    dyoung 
   1516      1.1    dyoung 		if (!timercmp(&vtw->expire, when, <=))
   1517      1.1    dyoung 			break;
   1518      1.1    dyoung 
   1519      1.1    dyoung 		db_trace(KTR_VTW
   1520      1.1    dyoung 			 , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
   1521      1.1    dyoung 			    , ctl->clidx
   1522      1.1    dyoung 			    , vtw->expire.tv_sec
   1523      1.1    dyoung 			    , vtw->expire.tv_usec
   1524      1.1    dyoung 			    , ctl->nalloc
   1525      1.1    dyoung 			    , ctl->nfree));
   1526      1.1    dyoung 
   1527      1.1    dyoung 		if (!_when)
   1528      1.1    dyoung 			++vtw_stats.kill;
   1529      1.1    dyoung 
   1530      1.1    dyoung 		vtw_del(ctl, vtw);
   1531      1.1    dyoung 		vtw = ctl->oldest.v;
   1532      1.1    dyoung 	}
   1533      1.1    dyoung 
   1534      1.1    dyoung 	return ctl->nalloc;	// # remaining allocated
   1535      1.1    dyoung }
   1536      1.1    dyoung 
   1537      1.1    dyoung static callout_t vtw_cs;
   1538      1.1    dyoung 
   1539      1.1    dyoung /*!\brief notice the passage of time.
   1540      1.1    dyoung  * It seems to be getting faster.  What happened to the year?
   1541      1.1    dyoung  */
   1542      1.1    dyoung static void
   1543      1.1    dyoung vtw_tick(void *arg)
   1544      1.1    dyoung {
   1545      1.1    dyoung 	struct timeval now;
   1546      1.1    dyoung 	int i, cnt = 0;
   1547      1.1    dyoung 
   1548      1.3  drochner 	getmicrouptime(&now);
   1549      1.1    dyoung 
   1550      1.1    dyoung 	db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
   1551      1.1    dyoung 			   , now.tv_sec, now.tv_usec));
   1552      1.1    dyoung 
   1553      1.1    dyoung 	mutex_enter(softnet_lock);
   1554      1.1    dyoung 
   1555      1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   1556      1.1    dyoung 		cnt += vtw_age(&vtw_tcpv4[i], &now);
   1557      1.1    dyoung 		cnt += vtw_age(&vtw_tcpv6[i], &now);
   1558      1.1    dyoung 	}
   1559      1.1    dyoung 
   1560      1.1    dyoung 	/* Keep ticks coming while we need them.
   1561      1.1    dyoung 	 */
   1562      1.1    dyoung 	if (cnt)
   1563      1.1    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1564      1.1    dyoung 	else {
   1565      1.1    dyoung 		tcp_vtw_was_enabled = 0;
   1566      1.1    dyoung 		tcbtable.vestige    = 0;
   1567      1.1    dyoung 	}
   1568      1.1    dyoung 	mutex_exit(softnet_lock);
   1569      1.1    dyoung }
   1570      1.1    dyoung 
   1571      1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1572      1.1    dyoung  */
   1573      1.1    dyoung static void *
   1574      1.1    dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
   1575      1.1    dyoung {
   1576      1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
   1577      1.1    dyoung 
   1578      1.1    dyoung 	bzero(it, sizeof (*it));
   1579      1.1    dyoung 
   1580      1.1    dyoung 	/* Note: the reference to vtw_tcpv4[0] is fine.
   1581      1.1    dyoung 	 * We do not need per-class iteration.  We just
   1582      1.1    dyoung 	 * need to get to the fat, and there is one
   1583      1.1    dyoung 	 * shared fat.
   1584      1.1    dyoung 	 */
   1585      1.1    dyoung 	if (vtw_tcpv4[0].fat) {
   1586      1.1    dyoung 		it->addr.v4 = addr;
   1587      1.1    dyoung 		it->port = port;
   1588      1.1    dyoung 		it->wild = !!wild;
   1589      1.1    dyoung 		it->ctl  = &vtw_tcpv4[0];
   1590      1.1    dyoung 
   1591      1.1    dyoung 		++vtw_stats.look[1];
   1592      1.1    dyoung 	}
   1593      1.1    dyoung 
   1594      1.1    dyoung 	return it;
   1595      1.1    dyoung }
   1596      1.1    dyoung 
   1597      1.1    dyoung /*!\brief export an IPv4 vtw.
   1598      1.1    dyoung  */
   1599      1.1    dyoung static int
   1600      1.1    dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1601      1.1    dyoung {
   1602      1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
   1603      1.1    dyoung 
   1604      1.1    dyoung 	bzero(res, sizeof (*res));
   1605      1.1    dyoung 
   1606      1.1    dyoung 	if (ctl && vtw) {
   1607      1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1608      1.1    dyoung 			ctl += vtw->msl_class;
   1609      1.1    dyoung 		else
   1610      1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1611      1.1    dyoung 
   1612      1.1    dyoung 		res->valid = 1;
   1613      1.1    dyoung 		res->v4    = 1;
   1614      1.1    dyoung 
   1615      1.1    dyoung 		res->faddr.v4.s_addr = v4->faddr;
   1616      1.1    dyoung 		res->laddr.v4.s_addr = v4->laddr;
   1617      1.1    dyoung 		res->fport	= v4->fport;
   1618      1.1    dyoung 		res->lport	= v4->lport;
   1619      1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1620      1.1    dyoung 		res->ctl	= ctl;
   1621      1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1622      1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1623      1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1624      1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1625      1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1626      1.1    dyoung 		res->uid	= vtw->uid;
   1627      1.1    dyoung 	}
   1628      1.1    dyoung 
   1629      1.1    dyoung 	return res->valid;
   1630      1.1    dyoung }
   1631      1.1    dyoung 
   1632      1.1    dyoung /*!\brief return next port in the port iterator.  yowza.
   1633      1.1    dyoung  */
   1634      1.1    dyoung static int
   1635      1.1    dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
   1636      1.1    dyoung {
   1637      1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1638      1.1    dyoung 	vtw_t		*vtw = 0;
   1639      1.1    dyoung 
   1640      1.1    dyoung 	if (it->ctl)
   1641      1.1    dyoung 		vtw = vtw_next_port_v4(it);
   1642      1.1    dyoung 
   1643      1.1    dyoung 	if (!vtw)
   1644      1.1    dyoung 		it->ctl = 0;
   1645      1.1    dyoung 
   1646      1.1    dyoung 	return vtw_export_v4(it->ctl, vtw, res);
   1647      1.1    dyoung }
   1648      1.1    dyoung 
   1649      1.1    dyoung static int
   1650      1.1    dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
   1651      1.1    dyoung               struct in_addr laddr, uint16_t lport,
   1652      1.1    dyoung 	      struct vestigial_inpcb *res)
   1653      1.1    dyoung {
   1654      1.1    dyoung 	vtw_t		*vtw;
   1655      1.1    dyoung 	vtw_ctl_t	*ctl;
   1656      1.1    dyoung 
   1657      1.1    dyoung 
   1658      1.1    dyoung 	db_trace(KTR_VTW
   1659      1.1    dyoung 		 , (res, "vtw: lookup %A:%P %A:%P"
   1660      1.1    dyoung 		    , faddr, fport
   1661      1.1    dyoung 		    , laddr, lport));
   1662      1.1    dyoung 
   1663      1.1    dyoung 	vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
   1664      1.1    dyoung 				 , faddr.s_addr, fport
   1665      1.1    dyoung 				 , laddr.s_addr, lport, 0);
   1666      1.1    dyoung 
   1667      1.1    dyoung 	return vtw_export_v4(ctl, vtw, res);
   1668      1.1    dyoung }
   1669      1.1    dyoung 
   1670      1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1671      1.1    dyoung  */
   1672      1.1    dyoung static void *
   1673      1.1    dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
   1674      1.1    dyoung {
   1675      1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
   1676      1.1    dyoung 
   1677      1.1    dyoung 	bzero(it, sizeof (*it));
   1678      1.1    dyoung 
   1679      1.1    dyoung 	/* Note: the reference to vtw_tcpv6[0] is fine.
   1680      1.1    dyoung 	 * We do not need per-class iteration.  We just
   1681      1.1    dyoung 	 * need to get to the fat, and there is one
   1682      1.1    dyoung 	 * shared fat.
   1683      1.1    dyoung 	 */
   1684      1.1    dyoung 	if (vtw_tcpv6[0].fat) {
   1685      1.1    dyoung 		it->addr.v6 = *addr;
   1686      1.1    dyoung 		it->port = port;
   1687      1.1    dyoung 		it->wild = !!wild;
   1688      1.1    dyoung 		it->ctl  = &vtw_tcpv6[0];
   1689      1.1    dyoung 
   1690      1.1    dyoung 		++vtw_stats.look[1];
   1691      1.1    dyoung 	}
   1692      1.1    dyoung 
   1693      1.1    dyoung 	return it;
   1694      1.1    dyoung }
   1695      1.1    dyoung 
   1696      1.1    dyoung /*!\brief export an IPv6 vtw.
   1697      1.1    dyoung  */
   1698      1.1    dyoung static int
   1699      1.1    dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1700      1.1    dyoung {
   1701      1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
   1702      1.1    dyoung 
   1703      1.1    dyoung 	bzero(res, sizeof (*res));
   1704      1.1    dyoung 
   1705      1.1    dyoung 	if (ctl && vtw) {
   1706      1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1707      1.1    dyoung 			ctl += vtw->msl_class;
   1708      1.1    dyoung 		else
   1709      1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1710      1.1    dyoung 
   1711      1.1    dyoung 		res->valid = 1;
   1712      1.1    dyoung 		res->v4    = 0;
   1713      1.1    dyoung 
   1714      1.1    dyoung 		res->faddr.v6	= v6->faddr;
   1715      1.1    dyoung 		res->laddr.v6	= v6->laddr;
   1716      1.1    dyoung 		res->fport	= v6->fport;
   1717      1.1    dyoung 		res->lport	= v6->lport;
   1718      1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1719      1.1    dyoung 		res->ctl	= ctl;
   1720      1.1    dyoung 
   1721      1.1    dyoung 		res->v6only	= vtw->v6only;
   1722      1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1723      1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1724      1.1    dyoung 
   1725      1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1726      1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1727      1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1728      1.1    dyoung 		res->uid	= vtw->uid;
   1729      1.1    dyoung 	}
   1730      1.1    dyoung 
   1731      1.1    dyoung 	return res->valid;
   1732      1.1    dyoung }
   1733      1.1    dyoung 
   1734      1.1    dyoung static int
   1735      1.1    dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
   1736      1.1    dyoung {
   1737      1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1738      1.1    dyoung 	vtw_t		*vtw = 0;
   1739      1.1    dyoung 
   1740      1.1    dyoung 	if (it->ctl)
   1741      1.1    dyoung 		vtw = vtw_next_port_v6(it);
   1742      1.1    dyoung 
   1743      1.1    dyoung 	if (!vtw)
   1744      1.1    dyoung 		it->ctl = 0;
   1745      1.1    dyoung 
   1746      1.1    dyoung 	return vtw_export_v6(it->ctl, vtw, res);
   1747      1.1    dyoung }
   1748      1.1    dyoung 
   1749      1.1    dyoung static int
   1750      1.1    dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
   1751      1.1    dyoung               const struct in6_addr *laddr, uint16_t lport,
   1752      1.1    dyoung 	      struct vestigial_inpcb *res)
   1753      1.1    dyoung {
   1754      1.1    dyoung 	vtw_ctl_t	*ctl;
   1755      1.1    dyoung 	vtw_t		*vtw;
   1756      1.1    dyoung 
   1757      1.1    dyoung 	db_trace(KTR_VTW
   1758      1.1    dyoung 		 , (res, "vtw: lookup %6A:%P %6A:%P"
   1759      1.1    dyoung 		    , db_store(faddr, sizeof (*faddr)), fport
   1760      1.1    dyoung 		    , db_store(laddr, sizeof (*laddr)), lport));
   1761      1.1    dyoung 
   1762      1.1    dyoung 	vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
   1763      1.1    dyoung 				 , faddr, fport
   1764      1.1    dyoung 				 , laddr, lport, 0);
   1765      1.1    dyoung 
   1766      1.1    dyoung 	return vtw_export_v6(ctl, vtw, res);
   1767      1.1    dyoung }
   1768      1.1    dyoung 
   1769      1.1    dyoung static vestigial_hooks_t tcp_hooks = {
   1770      1.1    dyoung 	.init_ports4	= tcp_init_ports_v4,
   1771      1.1    dyoung 	.next_port4	= tcp_next_port_v4,
   1772      1.1    dyoung 	.lookup4	= tcp_lookup_v4,
   1773      1.1    dyoung 	.init_ports6	= tcp_init_ports_v6,
   1774      1.1    dyoung 	.next_port6	= tcp_next_port_v6,
   1775      1.1    dyoung 	.lookup6	= tcp_lookup_v6,
   1776      1.1    dyoung };
   1777      1.1    dyoung 
   1778      1.1    dyoung static bool
   1779      1.1    dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
   1780      1.1    dyoung {
   1781      1.1    dyoung 	fatp_ctl_t	*fat;
   1782      1.1    dyoung 	vtw_ctl_t	*ctl;
   1783      1.1    dyoung 
   1784      1.1    dyoung 	switch (af) {
   1785      1.1    dyoung 	case AF_INET:
   1786      1.1    dyoung 		fat = &fat_tcpv4;
   1787      1.1    dyoung 		ctl = &vtw_tcpv4[0];
   1788      1.1    dyoung 		break;
   1789      1.1    dyoung 	case AF_INET6:
   1790      1.1    dyoung 		fat = &fat_tcpv6;
   1791      1.1    dyoung 		ctl = &vtw_tcpv6[0];
   1792      1.1    dyoung 		break;
   1793      1.1    dyoung 	default:
   1794      1.1    dyoung 		return false;
   1795      1.1    dyoung 	}
   1796      1.1    dyoung 	if (fatp != NULL)
   1797      1.1    dyoung 		*fatp = fat;
   1798      1.1    dyoung 	if (ctlp != NULL)
   1799      1.1    dyoung 		*ctlp = ctl;
   1800      1.1    dyoung 	return true;
   1801      1.1    dyoung }
   1802      1.1    dyoung 
   1803      1.1    dyoung /*!\brief	initialize controlling instance
   1804      1.1    dyoung  */
   1805      1.1    dyoung static int
   1806      1.1    dyoung vtw_control_init(int af)
   1807      1.1    dyoung {
   1808      1.1    dyoung 	fatp_ctl_t	*fat;
   1809      1.1    dyoung 	vtw_ctl_t	*ctl;
   1810      1.6    dyoung 	fatp_t		*fat_base;
   1811      1.6    dyoung 	fatp_t		**fat_hash;
   1812      1.6    dyoung 	vtw_t		*ctl_base_v;
   1813      1.6    dyoung 	uint32_t	n, m;
   1814      1.6    dyoung 	size_t sz;
   1815      1.6    dyoung 
   1816      1.6    dyoung 	KASSERT(powerof2(tcp_vtw_entries));
   1817      1.1    dyoung 
   1818      1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1819      1.1    dyoung 		return EAFNOSUPPORT;
   1820      1.1    dyoung 
   1821      1.6    dyoung 	if (fat->hash != NULL) {
   1822      1.6    dyoung 		KASSERT(fat->base != NULL && ctl->base.v != NULL);
   1823      1.6    dyoung 		return 0;
   1824      1.6    dyoung 	}
   1825      1.6    dyoung 
   1826      1.6    dyoung 	/* Allocate 10% more capacity in the fat pointers.
   1827      1.6    dyoung 	 * We should only need ~#hash additional based on
   1828      1.6    dyoung 	 * how they age, but TIME_WAIT assassination could cause
   1829      1.6    dyoung 	 * sparse fat pointer utilisation.
   1830      1.6    dyoung 	 */
   1831      1.6    dyoung 	m = 512;
   1832      1.6    dyoung 	n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
   1833      1.6    dyoung 	sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
   1834      1.6    dyoung 
   1835      1.6    dyoung 	fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
   1836      1.6    dyoung 
   1837      1.6    dyoung 	if (fat_hash == NULL) {
   1838      1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1839      1.6    dyoung 		    "hash anchors", __func__, 2*m * sizeof(fatp_t *));
   1840      1.6    dyoung 		return ENOMEM;
   1841      1.6    dyoung 	}
   1842      1.1    dyoung 
   1843      1.6    dyoung 	fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
   1844      1.1    dyoung 
   1845      1.6    dyoung 	if (fat_base == NULL) {
   1846      1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1847      1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1848      1.6    dyoung 		    "fatp_t array", __func__, 2*n * sizeof(fatp_t));
   1849      1.6    dyoung 		return ENOMEM;
   1850      1.6    dyoung 	}
   1851      1.1    dyoung 
   1852      1.6    dyoung 	ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
   1853      1.1    dyoung 
   1854      1.6    dyoung 	if (ctl_base_v == NULL) {
   1855      1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1856      1.6    dyoung 		kmem_free(fat_base, 2*n * sizeof(fatp_t));
   1857      1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1858      1.6    dyoung 		    "vtw_t array", __func__, tcp_vtw_entries * sz);
   1859      1.6    dyoung 		return ENOMEM;
   1860      1.1    dyoung 	}
   1861      1.1    dyoung 
   1862      1.6    dyoung 	fatp_init(fat, n, m, fat_base, fat_hash);
   1863      1.1    dyoung 
   1864      1.6    dyoung 	vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
   1865      1.1    dyoung 
   1866      1.1    dyoung 	return 0;
   1867      1.1    dyoung }
   1868      1.1    dyoung 
   1869      1.1    dyoung /*!\brief	select controlling instance
   1870      1.1    dyoung  */
   1871      1.1    dyoung static vtw_ctl_t *
   1872      1.1    dyoung vtw_control(int af, uint32_t msl)
   1873      1.1    dyoung {
   1874      1.1    dyoung 	fatp_ctl_t	*fat;
   1875      1.1    dyoung 	vtw_ctl_t	*ctl;
   1876  1.9.2.2  jdolecek 	int		msl_class = msl_to_class(msl);
   1877      1.1    dyoung 
   1878      1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1879      1.1    dyoung 		return NULL;
   1880      1.1    dyoung 
   1881      1.1    dyoung 	if (!fat->base || !ctl->base.v)
   1882      1.1    dyoung 		return NULL;
   1883      1.1    dyoung 
   1884      1.5    dyoung 	if (!tcp_vtw_was_enabled) {
   1885      1.5    dyoung 		/* This guarantees is timer ticks until we no longer need them.
   1886      1.5    dyoung 		 */
   1887      1.5    dyoung 		tcp_vtw_was_enabled = 1;
   1888      1.5    dyoung 
   1889      1.5    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1890      1.5    dyoung 
   1891      1.5    dyoung 		tcbtable.vestige = &tcp_hooks;
   1892      1.5    dyoung 	}
   1893      1.5    dyoung 
   1894  1.9.2.2  jdolecek 	return ctl + msl_class;
   1895      1.1    dyoung }
   1896      1.1    dyoung 
   1897      1.1    dyoung /*!\brief	add TCP pcb to vestigial timewait
   1898      1.1    dyoung  */
   1899      1.1    dyoung int
   1900      1.1    dyoung vtw_add(int af, struct tcpcb *tp)
   1901      1.1    dyoung {
   1902  1.9.2.1       tls #ifdef VTW_DEBUG
   1903      1.1    dyoung 	int		enable;
   1904  1.9.2.1       tls #endif
   1905      1.1    dyoung 	vtw_ctl_t	*ctl;
   1906      1.1    dyoung 	vtw_t		*vtw;
   1907      1.1    dyoung 
   1908      1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1909      1.1    dyoung 
   1910      1.1    dyoung 	ctl = vtw_control(af, tp->t_msl);
   1911      1.1    dyoung 	if (!ctl)
   1912      1.1    dyoung 		return 0;
   1913      1.1    dyoung 
   1914  1.9.2.1       tls #ifdef VTW_DEBUG
   1915      1.1    dyoung 	enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
   1916  1.9.2.1       tls #endif
   1917      1.1    dyoung 
   1918      1.1    dyoung 	vtw = vtw_alloc(ctl);
   1919      1.1    dyoung 
   1920      1.1    dyoung 	if (vtw) {
   1921      1.1    dyoung 		vtw->snd_nxt = tp->snd_nxt;
   1922      1.1    dyoung 		vtw->rcv_nxt = tp->rcv_nxt;
   1923      1.1    dyoung 
   1924      1.1    dyoung 		switch (af) {
   1925      1.1    dyoung 		case AF_INET: {
   1926      1.1    dyoung 			struct inpcb	*inp = tp->t_inpcb;
   1927      1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   1928      1.1    dyoung 
   1929      1.1    dyoung 			v4->faddr = inp->inp_faddr.s_addr;
   1930      1.1    dyoung 			v4->laddr = inp->inp_laddr.s_addr;
   1931      1.1    dyoung 			v4->fport = inp->inp_fport;
   1932      1.1    dyoung 			v4->lport = inp->inp_lport;
   1933      1.1    dyoung 
   1934      1.1    dyoung 			vtw->reuse_port = !!(inp->inp_socket->so_options
   1935      1.1    dyoung 					     & SO_REUSEPORT);
   1936      1.1    dyoung 			vtw->reuse_addr = !!(inp->inp_socket->so_options
   1937      1.1    dyoung 					     & SO_REUSEADDR);
   1938      1.1    dyoung 			vtw->v6only	= 0;
   1939      1.1    dyoung 			vtw->uid	= inp->inp_socket->so_uidinfo->ui_uid;
   1940      1.1    dyoung 
   1941      1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   1942      1.1    dyoung 
   1943      1.1    dyoung 
   1944      1.1    dyoung #ifdef VTW_DEBUG
   1945      1.1    dyoung 			/* Immediate lookup (connected and port) to
   1946      1.1    dyoung 			 * ensure at least that works!
   1947      1.1    dyoung 			 */
   1948      1.1    dyoung 			if (enable & 4) {
   1949      1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1950      1.1    dyoung 					(ctl
   1951      1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1952      1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1953      1.1    dyoung 					 , 0)
   1954      1.1    dyoung 					== vtw);
   1955      1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1956      1.1    dyoung 					(ctl
   1957      1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1958      1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1959      1.1    dyoung 					 , 1));
   1960      1.1    dyoung 			}
   1961      1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   1962      1.1    dyoung 			 */
   1963      1.1    dyoung 			if (enable & 8) {
   1964      1.1    dyoung 				struct tcp_ports_iterator *it;
   1965      1.1    dyoung 				struct vestigial_inpcb res;
   1966      1.1    dyoung 				int cnt = 0;
   1967      1.1    dyoung 
   1968      1.1    dyoung 				it = tcp_init_ports_v4(inp->inp_laddr
   1969      1.1    dyoung 						       , inp->inp_lport, 0);
   1970      1.1    dyoung 
   1971      1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1972      1.1    dyoung 					++cnt;
   1973      1.1    dyoung 				}
   1974      1.1    dyoung 				KASSERT(cnt);
   1975      1.1    dyoung 			}
   1976      1.1    dyoung 			/* Immediate port iterator functionality check: wild
   1977      1.1    dyoung 			 */
   1978      1.1    dyoung 			if (enable & 16) {
   1979      1.1    dyoung 				struct tcp_ports_iterator *it;
   1980      1.1    dyoung 				struct vestigial_inpcb res;
   1981      1.1    dyoung 				struct in_addr any;
   1982      1.1    dyoung 				int cnt = 0;
   1983      1.1    dyoung 
   1984      1.1    dyoung 				any.s_addr = htonl(INADDR_ANY);
   1985      1.1    dyoung 
   1986      1.1    dyoung 				it = tcp_init_ports_v4(any, inp->inp_lport, 1);
   1987      1.1    dyoung 
   1988      1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1989      1.1    dyoung 					++cnt;
   1990      1.1    dyoung 				}
   1991      1.1    dyoung 				KASSERT(cnt);
   1992      1.1    dyoung 			}
   1993      1.1    dyoung #endif /* VTW_DEBUG */
   1994      1.1    dyoung 			break;
   1995      1.1    dyoung 		}
   1996      1.1    dyoung 
   1997      1.1    dyoung 		case AF_INET6: {
   1998      1.1    dyoung 			struct in6pcb	*inp = tp->t_in6pcb;
   1999      1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   2000      1.1    dyoung 
   2001      1.1    dyoung 			v6->faddr = inp->in6p_faddr;
   2002      1.1    dyoung 			v6->laddr = inp->in6p_laddr;
   2003      1.1    dyoung 			v6->fport = inp->in6p_fport;
   2004      1.1    dyoung 			v6->lport = inp->in6p_lport;
   2005      1.1    dyoung 
   2006      1.1    dyoung 			vtw->reuse_port = !!(inp->in6p_socket->so_options
   2007      1.1    dyoung 					     & SO_REUSEPORT);
   2008      1.1    dyoung 			vtw->reuse_addr = !!(inp->in6p_socket->so_options
   2009      1.1    dyoung 					     & SO_REUSEADDR);
   2010      1.1    dyoung 			vtw->v6only	= !!(inp->in6p_flags
   2011      1.1    dyoung 					     & IN6P_IPV6_V6ONLY);
   2012      1.1    dyoung 			vtw->uid	= inp->in6p_socket->so_uidinfo->ui_uid;
   2013      1.1    dyoung 
   2014      1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   2015      1.1    dyoung #ifdef VTW_DEBUG
   2016      1.1    dyoung 			/* Immediate lookup (connected and port) to
   2017      1.1    dyoung 			 * ensure at least that works!
   2018      1.1    dyoung 			 */
   2019      1.1    dyoung 			if (enable & 4) {
   2020      1.1    dyoung 				KASSERT(vtw_lookup_hash_v6(ctl
   2021      1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   2022      1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   2023      1.1    dyoung 					 , 0)
   2024      1.1    dyoung 					== vtw);
   2025      1.1    dyoung 				KASSERT(vtw_lookup_hash_v6
   2026      1.1    dyoung 					(ctl
   2027      1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   2028      1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   2029      1.1    dyoung 					 , 1));
   2030      1.1    dyoung 			}
   2031      1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   2032      1.1    dyoung 			 */
   2033      1.1    dyoung 			if (enable & 8) {
   2034      1.1    dyoung 				struct tcp_ports_iterator *it;
   2035      1.1    dyoung 				struct vestigial_inpcb res;
   2036      1.1    dyoung 				int cnt = 0;
   2037      1.1    dyoung 
   2038      1.1    dyoung 				it = tcp_init_ports_v6(&inp->in6p_laddr
   2039      1.1    dyoung 						       , inp->in6p_lport, 0);
   2040      1.1    dyoung 
   2041      1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   2042      1.1    dyoung 					++cnt;
   2043      1.1    dyoung 				}
   2044      1.1    dyoung 				KASSERT(cnt);
   2045      1.1    dyoung 			}
   2046      1.1    dyoung 			/* Immediate port iterator functionality check: wild
   2047      1.1    dyoung 			 */
   2048      1.1    dyoung 			if (enable & 16) {
   2049      1.1    dyoung 				struct tcp_ports_iterator *it;
   2050      1.1    dyoung 				struct vestigial_inpcb res;
   2051      1.1    dyoung 				static struct in6_addr any = IN6ADDR_ANY_INIT;
   2052      1.1    dyoung 				int cnt = 0;
   2053      1.1    dyoung 
   2054      1.1    dyoung 				it = tcp_init_ports_v6(&any
   2055      1.1    dyoung 						       , inp->in6p_lport, 1);
   2056      1.1    dyoung 
   2057      1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   2058      1.1    dyoung 					++cnt;
   2059      1.1    dyoung 				}
   2060      1.1    dyoung 				KASSERT(cnt);
   2061      1.1    dyoung 			}
   2062      1.1    dyoung #endif /* VTW_DEBUG */
   2063      1.1    dyoung 			break;
   2064      1.1    dyoung 		}
   2065      1.1    dyoung 		}
   2066      1.1    dyoung 
   2067      1.1    dyoung 		tcp_canceltimers(tp);
   2068      1.1    dyoung 		tp = tcp_close(tp);
   2069      1.1    dyoung 		KASSERT(!tp);
   2070      1.1    dyoung 
   2071      1.1    dyoung 		return 1;
   2072      1.1    dyoung 	}
   2073      1.1    dyoung 
   2074      1.1    dyoung 	return 0;
   2075      1.1    dyoung }
   2076      1.1    dyoung 
   2077      1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2078      1.1    dyoung  */
   2079      1.1    dyoung static void
   2080      1.1    dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
   2081      1.1    dyoung {
   2082      1.1    dyoung 	vtw_v4_t	copy = *(vtw_v4_t*)vp->vtw;
   2083      1.1    dyoung 	vtw_t		*vtw;
   2084      1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2085      1.1    dyoung 	vtw_ctl_t	*ctl;
   2086      1.1    dyoung 
   2087      1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2088      1.1    dyoung 
   2089      1.1    dyoung 	db_trace(KTR_VTW
   2090      1.1    dyoung 		 , (vp->vtw, "vtw: restart %A:%P %A:%P"
   2091      1.1    dyoung 		    , vp->faddr.v4.s_addr, vp->fport
   2092      1.1    dyoung 		    , vp->laddr.v4.s_addr, vp->lport));
   2093      1.1    dyoung 
   2094      1.1    dyoung 	/* Class might have changed, so have a squiz.
   2095      1.1    dyoung 	 */
   2096      1.1    dyoung 	ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
   2097      1.1    dyoung 	vtw = vtw_alloc(ctl);
   2098      1.1    dyoung 
   2099      1.1    dyoung 	if (vtw) {
   2100      1.1    dyoung 		vtw_v4_t	*v4  = (void*)vtw;
   2101      1.1    dyoung 
   2102      1.1    dyoung 		/* Safe now to unhash the old entry
   2103      1.1    dyoung 		 */
   2104      1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2105      1.1    dyoung 
   2106      1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2107      1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2108      1.1    dyoung 
   2109      1.1    dyoung 		v4->faddr = copy.faddr;
   2110      1.1    dyoung 		v4->laddr = copy.laddr;
   2111      1.1    dyoung 		v4->fport = copy.fport;
   2112      1.1    dyoung 		v4->lport = copy.lport;
   2113      1.1    dyoung 
   2114      1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2115      1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2116      1.1    dyoung 		vtw->v6only	= 0;
   2117      1.1    dyoung 		vtw->uid	= cp->uid;
   2118      1.1    dyoung 
   2119      1.1    dyoung 		vtw_inshash_v4(ctl, vtw);
   2120      1.1    dyoung 	}
   2121      1.1    dyoung 
   2122      1.1    dyoung 	vp->valid = 0;
   2123      1.1    dyoung }
   2124      1.1    dyoung 
   2125      1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2126      1.1    dyoung  */
   2127      1.1    dyoung static void
   2128      1.1    dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
   2129      1.1    dyoung {
   2130      1.1    dyoung 	vtw_v6_t	copy = *(vtw_v6_t*)vp->vtw;
   2131      1.1    dyoung 	vtw_t		*vtw;
   2132      1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2133      1.1    dyoung 	vtw_ctl_t	*ctl;
   2134      1.1    dyoung 
   2135      1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2136      1.1    dyoung 
   2137      1.1    dyoung 	db_trace(KTR_VTW
   2138      1.1    dyoung 		 , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
   2139      1.1    dyoung 		    , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
   2140      1.1    dyoung 		    , vp->fport
   2141      1.1    dyoung 		    , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
   2142      1.1    dyoung 		    , vp->lport));
   2143      1.1    dyoung 
   2144      1.1    dyoung 	/* Class might have changed, so have a squiz.
   2145      1.1    dyoung 	 */
   2146      1.1    dyoung 	ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
   2147      1.1    dyoung 	vtw = vtw_alloc(ctl);
   2148      1.1    dyoung 
   2149      1.1    dyoung 	if (vtw) {
   2150      1.1    dyoung 		vtw_v6_t	*v6  = (void*)vtw;
   2151      1.1    dyoung 
   2152      1.1    dyoung 		/* Safe now to unhash the old entry
   2153      1.1    dyoung 		 */
   2154      1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2155      1.1    dyoung 
   2156      1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2157      1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2158      1.1    dyoung 
   2159      1.1    dyoung 		v6->faddr = copy.faddr;
   2160      1.1    dyoung 		v6->laddr = copy.laddr;
   2161      1.1    dyoung 		v6->fport = copy.fport;
   2162      1.1    dyoung 		v6->lport = copy.lport;
   2163      1.1    dyoung 
   2164      1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2165      1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2166      1.1    dyoung 		vtw->v6only	= cp->v6only;
   2167      1.1    dyoung 		vtw->uid	= cp->uid;
   2168      1.1    dyoung 
   2169      1.1    dyoung 		vtw_inshash_v6(ctl, vtw);
   2170      1.1    dyoung 	}
   2171      1.1    dyoung 
   2172      1.1    dyoung 	vp->valid = 0;
   2173      1.1    dyoung }
   2174      1.1    dyoung 
   2175      1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2176      1.1    dyoung  */
   2177      1.1    dyoung void
   2178      1.1    dyoung vtw_restart(vestigial_inpcb_t *vp)
   2179      1.1    dyoung {
   2180      1.1    dyoung 	if (!vp || !vp->valid)
   2181      1.1    dyoung 		return;
   2182      1.1    dyoung 
   2183      1.1    dyoung 	if (vp->v4)
   2184      1.1    dyoung 		vtw_restart_v4(vp);
   2185      1.1    dyoung 	else
   2186      1.1    dyoung 		vtw_restart_v6(vp);
   2187      1.1    dyoung }
   2188      1.1    dyoung 
   2189      1.1    dyoung int
   2190      1.7    dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
   2191      1.7    dyoung {
   2192      1.7    dyoung 	int en, rc;
   2193      1.7    dyoung 	struct sysctlnode node;
   2194      1.7    dyoung 
   2195      1.7    dyoung 	node = *rnode;
   2196      1.7    dyoung 	en = *(int *)rnode->sysctl_data;
   2197      1.7    dyoung 	node.sysctl_data = &en;
   2198      1.7    dyoung 
   2199      1.7    dyoung 	rc = sysctl_lookup(SYSCTLFN_CALL(&node));
   2200      1.7    dyoung 	if (rc != 0 || newp == NULL)
   2201      1.7    dyoung 		return rc;
   2202      1.7    dyoung 
   2203      1.7    dyoung 	if (rnode->sysctl_data != &tcp4_vtw_enable &&
   2204      1.7    dyoung 	    rnode->sysctl_data != &tcp6_vtw_enable)
   2205      1.7    dyoung 		rc = ENOENT;
   2206      1.7    dyoung 	else if ((en & 1) == 0)
   2207      1.7    dyoung 		rc = 0;
   2208      1.7    dyoung 	else if (rnode->sysctl_data == &tcp4_vtw_enable)
   2209      1.7    dyoung 		rc = vtw_control_init(AF_INET);
   2210      1.7    dyoung 	else /* rnode->sysctl_data == &tcp6_vtw_enable */
   2211      1.7    dyoung 		rc = vtw_control_init(AF_INET6);
   2212      1.7    dyoung 
   2213      1.7    dyoung 	if (rc == 0)
   2214      1.7    dyoung 		*(int *)rnode->sysctl_data = en;
   2215      1.7    dyoung 
   2216      1.7    dyoung 	return rc;
   2217      1.7    dyoung }
   2218      1.7    dyoung 
   2219      1.7    dyoung int
   2220      1.1    dyoung vtw_earlyinit(void)
   2221      1.1    dyoung {
   2222      1.5    dyoung 	int i, rc;
   2223      1.1    dyoung 
   2224      1.5    dyoung 	callout_init(&vtw_cs, 0);
   2225      1.5    dyoung 	callout_setfunc(&vtw_cs, vtw_tick, 0);
   2226      1.1    dyoung 
   2227      1.5    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2228      1.5    dyoung 		vtw_tcpv4[i].is_v4 = 1;
   2229      1.5    dyoung 		vtw_tcpv6[i].is_v6 = 1;
   2230      1.1    dyoung 	}
   2231      1.1    dyoung 
   2232      1.7    dyoung 	if ((tcp4_vtw_enable & 1) != 0 &&
   2233      1.7    dyoung 	    (rc = vtw_control_init(AF_INET)) != 0)
   2234      1.7    dyoung 		return rc;
   2235      1.7    dyoung 
   2236      1.7    dyoung 	if ((tcp6_vtw_enable & 1) != 0 &&
   2237      1.1    dyoung 	    (rc = vtw_control_init(AF_INET6)) != 0)
   2238      1.1    dyoung 		return rc;
   2239      1.1    dyoung 
   2240      1.1    dyoung 	return 0;
   2241      1.1    dyoung }
   2242      1.1    dyoung 
   2243      1.1    dyoung #ifdef VTW_DEBUG
   2244      1.1    dyoung #include <sys/syscallargs.h>
   2245      1.1    dyoung #include <sys/sysctl.h>
   2246      1.1    dyoung 
   2247      1.1    dyoung /*!\brief	add lalp, fafp entries for debug
   2248      1.1    dyoung  */
   2249      1.1    dyoung int
   2250  1.9.2.2  jdolecek vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int msl_class)
   2251      1.1    dyoung {
   2252      1.1    dyoung 	vtw_ctl_t	*ctl;
   2253      1.1    dyoung 	vtw_t		*vtw;
   2254      1.1    dyoung 
   2255  1.9.2.2  jdolecek 	ctl = vtw_control(af, msl ? msl : class_to_msl(msl_class));
   2256      1.1    dyoung 	if (!ctl)
   2257      1.1    dyoung 		return 0;
   2258      1.1    dyoung 
   2259      1.1    dyoung 	vtw = vtw_alloc(ctl);
   2260      1.1    dyoung 
   2261      1.1    dyoung 	if (vtw) {
   2262      1.1    dyoung 		vtw->snd_nxt = 0;
   2263      1.1    dyoung 		vtw->rcv_nxt = 0;
   2264      1.1    dyoung 
   2265      1.1    dyoung 		switch (af) {
   2266      1.1    dyoung 		case AF_INET: {
   2267      1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   2268      1.1    dyoung 
   2269      1.1    dyoung 			v4->faddr = fa->sin_addr.v4.s_addr;
   2270      1.1    dyoung 			v4->laddr = la->sin_addr.v4.s_addr;
   2271      1.1    dyoung 			v4->fport = fa->sin_port;
   2272      1.1    dyoung 			v4->lport = la->sin_port;
   2273      1.1    dyoung 
   2274      1.1    dyoung 			vtw->reuse_port = 1;
   2275      1.1    dyoung 			vtw->reuse_addr = 1;
   2276      1.1    dyoung 			vtw->v6only	= 0;
   2277      1.1    dyoung 			vtw->uid	= 0;
   2278      1.1    dyoung 
   2279      1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   2280      1.1    dyoung 			break;
   2281      1.1    dyoung 		}
   2282      1.1    dyoung 
   2283      1.1    dyoung 		case AF_INET6: {
   2284      1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   2285      1.1    dyoung 
   2286      1.1    dyoung 			v6->faddr = fa->sin_addr.v6;
   2287      1.1    dyoung 			v6->laddr = la->sin_addr.v6;
   2288      1.1    dyoung 
   2289      1.1    dyoung 			v6->fport = fa->sin_port;
   2290      1.1    dyoung 			v6->lport = la->sin_port;
   2291      1.1    dyoung 
   2292      1.1    dyoung 			vtw->reuse_port = 1;
   2293      1.1    dyoung 			vtw->reuse_addr = 1;
   2294      1.1    dyoung 			vtw->v6only	= 0;
   2295      1.1    dyoung 			vtw->uid	= 0;
   2296      1.1    dyoung 
   2297      1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   2298      1.1    dyoung 			break;
   2299      1.1    dyoung 		}
   2300      1.1    dyoung 
   2301      1.1    dyoung 		default:
   2302      1.1    dyoung 			break;
   2303      1.1    dyoung 		}
   2304      1.1    dyoung 
   2305      1.1    dyoung 		return 1;
   2306      1.1    dyoung 	}
   2307      1.1    dyoung 
   2308      1.1    dyoung 	return 0;
   2309      1.1    dyoung }
   2310      1.1    dyoung 
   2311      1.1    dyoung static int vtw_syscall = 0;
   2312      1.1    dyoung 
   2313      1.1    dyoung static int
   2314      1.1    dyoung vtw_debug_process(vtw_sysargs_t *ap)
   2315      1.1    dyoung {
   2316      1.1    dyoung 	struct vestigial_inpcb vestige;
   2317      1.1    dyoung 	int	rc = 0;
   2318      1.1    dyoung 
   2319      1.1    dyoung 	mutex_enter(softnet_lock);
   2320      1.1    dyoung 
   2321      1.1    dyoung 	switch (ap->op) {
   2322      1.1    dyoung 	case 0:		// insert
   2323      1.1    dyoung 		vtw_debug_add(ap->la.sin_family
   2324      1.1    dyoung 			      , &ap->la
   2325      1.1    dyoung 			      , &ap->fa
   2326      1.1    dyoung 			      , TCPTV_MSL
   2327      1.1    dyoung 			      , 0);
   2328      1.1    dyoung 		break;
   2329      1.1    dyoung 
   2330      1.1    dyoung 	case 1:		// lookup
   2331      1.1    dyoung 	case 2:		// restart
   2332      1.1    dyoung 		switch (ap->la.sin_family) {
   2333      1.1    dyoung 		case AF_INET:
   2334      1.1    dyoung 			if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
   2335      1.1    dyoung 					  ap->la.sin_addr.v4, ap->la.sin_port,
   2336      1.1    dyoung 					  &vestige)) {
   2337      1.1    dyoung 				if (ap->op == 2) {
   2338      1.1    dyoung 					vtw_restart(&vestige);
   2339      1.1    dyoung 				}
   2340      1.1    dyoung 				rc = 0;
   2341      1.1    dyoung 			} else
   2342      1.1    dyoung 				rc = ESRCH;
   2343      1.1    dyoung 			break;
   2344      1.1    dyoung 
   2345      1.1    dyoung 		case AF_INET6:
   2346      1.1    dyoung 			if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
   2347      1.1    dyoung 					  &ap->la.sin_addr.v6, ap->la.sin_port,
   2348      1.1    dyoung 					  &vestige)) {
   2349      1.1    dyoung 				if (ap->op == 2) {
   2350      1.1    dyoung 					vtw_restart(&vestige);
   2351      1.1    dyoung 				}
   2352      1.1    dyoung 				rc = 0;
   2353      1.1    dyoung 			} else
   2354      1.1    dyoung 				rc = ESRCH;
   2355      1.1    dyoung 			break;
   2356      1.1    dyoung 		default:
   2357      1.1    dyoung 			rc = EINVAL;
   2358      1.1    dyoung 		}
   2359      1.1    dyoung 		break;
   2360      1.1    dyoung 
   2361      1.1    dyoung 	default:
   2362      1.1    dyoung 		rc = EINVAL;
   2363      1.1    dyoung 	}
   2364      1.1    dyoung 
   2365      1.1    dyoung 	mutex_exit(softnet_lock);
   2366      1.1    dyoung 	return rc;
   2367      1.1    dyoung }
   2368      1.1    dyoung 
   2369      1.1    dyoung struct sys_vtw_args {
   2370      1.1    dyoung 	syscallarg(const vtw_sysargs_t *) req;
   2371      1.1    dyoung 	syscallarg(size_t) len;
   2372      1.1    dyoung };
   2373      1.1    dyoung 
   2374      1.1    dyoung static int
   2375      1.1    dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
   2376      1.1    dyoung {
   2377      1.1    dyoung 	const struct sys_vtw_args *uap = _;
   2378      1.1    dyoung 	void	*buf;
   2379      1.1    dyoung 	int	rc;
   2380      1.1    dyoung 	size_t	len	= SCARG(uap, len);
   2381      1.1    dyoung 
   2382      1.1    dyoung 	if (len != sizeof (vtw_sysargs_t))
   2383      1.1    dyoung 		return EINVAL;
   2384      1.1    dyoung 
   2385      1.1    dyoung 	buf = kmem_alloc(len, KM_SLEEP);
   2386      1.1    dyoung 	rc = copyin(SCARG(uap, req), buf, len);
   2387      1.1    dyoung 	if (!rc) {
   2388      1.1    dyoung 		rc = vtw_debug_process(buf);
   2389      1.1    dyoung 	}
   2390      1.1    dyoung 	kmem_free(buf, len);
   2391      1.1    dyoung 
   2392      1.1    dyoung 	return rc;
   2393      1.1    dyoung }
   2394      1.1    dyoung 
   2395      1.1    dyoung static void
   2396      1.1    dyoung vtw_sanity_check(void)
   2397      1.1    dyoung {
   2398      1.1    dyoung 	vtw_ctl_t	*ctl;
   2399      1.1    dyoung 	vtw_t		*vtw;
   2400      1.1    dyoung 	int		i;
   2401      1.1    dyoung 	int		n;
   2402      1.1    dyoung 
   2403      1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2404      1.1    dyoung 		ctl = &vtw_tcpv4[i];
   2405      1.1    dyoung 
   2406      1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2407      1.1    dyoung 			continue;
   2408      1.1    dyoung 
   2409      1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2410      1.1    dyoung 			++n;
   2411      1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2412      1.1    dyoung 			if (vtw == ctl->base.v)
   2413      1.1    dyoung 				break;
   2414      1.1    dyoung 		}
   2415      1.1    dyoung 		db_trace(KTR_VTW
   2416      1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2417      1.1    dyoung 			    , i, n, ctl->nfree));
   2418      1.1    dyoung 
   2419      1.1    dyoung 		KASSERT(n == ctl->nfree);
   2420      1.1    dyoung 	}
   2421      1.1    dyoung 
   2422      1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2423      1.1    dyoung 		ctl = &vtw_tcpv6[i];
   2424      1.1    dyoung 
   2425      1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2426      1.1    dyoung 			continue;
   2427      1.1    dyoung 
   2428      1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2429      1.1    dyoung 			++n;
   2430      1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2431      1.1    dyoung 			if (vtw == ctl->base.v)
   2432      1.1    dyoung 				break;
   2433      1.1    dyoung 		}
   2434      1.1    dyoung 		db_trace(KTR_VTW
   2435      1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2436      1.1    dyoung 			    , i, n, ctl->nfree));
   2437      1.1    dyoung 		KASSERT(n == ctl->nfree);
   2438      1.1    dyoung 	}
   2439      1.1    dyoung }
   2440      1.1    dyoung 
   2441      1.1    dyoung /*!\brief	Initialise debug support.
   2442      1.1    dyoung  */
   2443      1.1    dyoung static void
   2444      1.1    dyoung vtw_debug_init(void)
   2445      1.1    dyoung {
   2446      1.1    dyoung 	int	i;
   2447      1.1    dyoung 
   2448      1.1    dyoung 	vtw_sanity_check();
   2449      1.1    dyoung 
   2450      1.1    dyoung 	if (vtw_syscall)
   2451      1.1    dyoung 		return;
   2452      1.1    dyoung 
   2453      1.1    dyoung 	for (i = 511; i; --i) {
   2454      1.1    dyoung 		if (sysent[i].sy_call == sys_nosys) {
   2455      1.1    dyoung 			sysent[i].sy_call    = vtw_sys;
   2456      1.1    dyoung 			sysent[i].sy_narg    = 2;
   2457      1.1    dyoung 			sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
   2458      1.1    dyoung 			sysent[i].sy_flags   = 0;
   2459      1.1    dyoung 
   2460      1.1    dyoung 			vtw_syscall = i;
   2461      1.1    dyoung 			break;
   2462      1.1    dyoung 		}
   2463      1.1    dyoung 	}
   2464      1.1    dyoung 	if (i) {
   2465      1.1    dyoung 		const struct sysctlnode *node;
   2466      1.1    dyoung 		uint32_t	flags;
   2467      1.1    dyoung 
   2468      1.1    dyoung 		flags = sysctl_root.sysctl_flags;
   2469      1.1    dyoung 
   2470      1.1    dyoung 		sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
   2471      1.1    dyoung 		sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
   2472      1.1    dyoung 
   2473      1.1    dyoung 		sysctl_createv(0, 0, 0, &node,
   2474      1.1    dyoung 			       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2475      1.1    dyoung 			       "koff",
   2476      1.1    dyoung 			       SYSCTL_DESCR("Kernel Obscure Feature Finder"),
   2477      1.1    dyoung 			       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2478      1.1    dyoung 
   2479      1.1    dyoung 		if (!node) {
   2480      1.1    dyoung 			sysctl_createv(0, 0, 0, &node,
   2481      1.1    dyoung 				       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2482      1.1    dyoung 				       "koffka",
   2483      1.1    dyoung 				       SYSCTL_DESCR("The Real(tm) Kernel"
   2484      1.1    dyoung 						    " Obscure Feature Finder"),
   2485      1.1    dyoung 				       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2486      1.1    dyoung 		}
   2487      1.1    dyoung 		if (node) {
   2488      1.1    dyoung 			sysctl_createv(0, 0, 0, 0,
   2489      1.1    dyoung 				       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
   2490      1.1    dyoung 				       CTLTYPE_INT, "vtw_debug_syscall",
   2491      1.1    dyoung 				       SYSCTL_DESCR("vtw debug"
   2492      1.1    dyoung 						    " system call number"),
   2493      1.1    dyoung 				       0, 0, &vtw_syscall, 0, node->sysctl_num,
   2494      1.1    dyoung 				       CTL_CREATE, CTL_EOL);
   2495      1.1    dyoung 		}
   2496      1.1    dyoung 		sysctl_root.sysctl_flags = flags;
   2497      1.1    dyoung 	}
   2498      1.1    dyoung }
   2499      1.1    dyoung #else /* !VTW_DEBUG */
   2500      1.1    dyoung static void
   2501      1.1    dyoung vtw_debug_init(void)
   2502      1.1    dyoung {
   2503      1.1    dyoung 	return;
   2504      1.1    dyoung }
   2505      1.1    dyoung #endif /* !VTW_DEBUG */
   2506