Home | History | Annotate | Line # | Download | only in netinet
tcp_vtw.c revision 1.16
      1   1.1    dyoung /*
      2   1.1    dyoung  * Copyright (c) 2011 The NetBSD Foundation, Inc.
      3   1.1    dyoung  * All rights reserved.
      4   1.1    dyoung  *
      5   1.1    dyoung  * This code is derived from software contributed to The NetBSD Foundation
      6   1.1    dyoung  * by Coyote Point Systems, Inc.
      7   1.1    dyoung  *
      8   1.1    dyoung  * Redistribution and use in source and binary forms, with or without
      9   1.1    dyoung  * modification, are permitted provided that the following conditions
     10   1.1    dyoung  * are met:
     11   1.1    dyoung  * 1. Redistributions of source code must retain the above copyright
     12   1.1    dyoung  *    notice, this list of conditions and the following disclaimer.
     13   1.1    dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1    dyoung  *    notice, this list of conditions and the following disclaimer in the
     15   1.1    dyoung  *    documentation and/or other materials provided with the distribution.
     16   1.1    dyoung  *
     17   1.1    dyoung  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18   1.1    dyoung  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19   1.1    dyoung  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20   1.1    dyoung  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21   1.1    dyoung  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22   1.1    dyoung  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23   1.1    dyoung  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24   1.1    dyoung  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25   1.1    dyoung  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26   1.1    dyoung  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27   1.1    dyoung  * POSSIBILITY OF SUCH DAMAGE.
     28   1.1    dyoung  */
     29   1.9      yamt 
     30   1.9      yamt /*
     31   1.9      yamt  * Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
     32   1.9      yamt  * methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
     33   1.9      yamt  * Truncation (MSLT).
     34   1.9      yamt  *
     35   1.9      yamt  * MSLT and VTW were contributed by Coyote Point Systems, Inc.
     36   1.9      yamt  *
     37   1.9      yamt  * Even after a TCP session enters the TIME_WAIT state, its corresponding
     38   1.9      yamt  * socket and protocol control blocks (PCBs) stick around until the TCP
     39   1.9      yamt  * Maximum Segment Lifetime (MSL) expires.  On a host whose workload
     40   1.9      yamt  * necessarily creates and closes down many TCP sockets, the sockets & PCBs
     41   1.9      yamt  * for TCP sessions in TIME_WAIT state amount to many megabytes of dead
     42   1.9      yamt  * weight in RAM.
     43   1.9      yamt  *
     44   1.9      yamt  * Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
     45   1.9      yamt  * a class based on the nearness of the peer.  Corresponding to each class
     46   1.9      yamt  * is an MSL, and a session uses the MSL of its class.  The classes are
     47   1.9      yamt  * loopback (local host equals remote host), local (local host and remote
     48   1.9      yamt  * host are on the same link/subnet), and remote (local host and remote
     49   1.9      yamt  * host communicate via one or more gateways).  Classes corresponding to
     50   1.9      yamt  * nearer peers have lower MSLs by default: 2 seconds for loopback, 10
     51   1.9      yamt  * seconds for local, 60 seconds for remote.  Loopback and local sessions
     52   1.9      yamt  * expire more quickly when MSLT is used.
     53   1.9      yamt  *
     54   1.9      yamt  * Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
     55   1.9      yamt  * dead weight with a compact representation of the session, called a
     56   1.9      yamt  * "vestigial PCB".  VTW data structures are designed to be very fast and
     57   1.9      yamt  * memory-efficient: for fast insertion and lookup of vestigial PCBs,
     58   1.9      yamt  * the PCBs are stored in a hash table that is designed to minimize the
     59   1.9      yamt  * number of cacheline visits per lookup/insertion.  The memory both
     60   1.9      yamt  * for vestigial PCBs and for elements of the PCB hashtable come from
     61   1.9      yamt  * fixed-size pools, and linked data structures exploit this to conserve
     62   1.9      yamt  * memory by representing references with a narrow index/offset from the
     63   1.9      yamt  * start of a pool instead of a pointer.  When space for new vestigial PCBs
     64   1.9      yamt  * runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
     65   1.9      yamt  * VTW cooperates with MSLT.
     66   1.9      yamt  *
     67   1.9      yamt  * It may help to think of VTW as a "FIN cache" by analogy to the SYN
     68   1.9      yamt  * cache.
     69   1.9      yamt  *
     70   1.9      yamt  * A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
     71   1.9      yamt  * sessions as fast as it can is approximately 17% idle when VTW is active
     72   1.9      yamt  * versus 0% idle when VTW is inactive.  It has 103 megabytes more free RAM
     73   1.9      yamt  * when VTW is active (approximately 64k vestigial PCBs are created) than
     74   1.9      yamt  * when it is inactive.
     75   1.9      yamt  */
     76   1.9      yamt 
     77   1.1    dyoung #include <sys/cdefs.h>
     78   1.1    dyoung 
     79  1.14     pooka #ifdef _KERNEL_OPT
     80   1.1    dyoung #include "opt_ddb.h"
     81   1.1    dyoung #include "opt_inet.h"
     82   1.1    dyoung #include "opt_inet_csum.h"
     83   1.1    dyoung #include "opt_tcp_debug.h"
     84  1.14     pooka #endif
     85   1.1    dyoung 
     86   1.1    dyoung #include <sys/param.h>
     87   1.1    dyoung #include <sys/systm.h>
     88   1.1    dyoung #include <sys/kmem.h>
     89   1.1    dyoung #include <sys/mbuf.h>
     90   1.1    dyoung #include <sys/protosw.h>
     91   1.1    dyoung #include <sys/socket.h>
     92   1.1    dyoung #include <sys/socketvar.h>
     93   1.1    dyoung #include <sys/errno.h>
     94   1.1    dyoung #include <sys/syslog.h>
     95   1.1    dyoung #include <sys/pool.h>
     96   1.1    dyoung #include <sys/domain.h>
     97   1.1    dyoung #include <sys/kernel.h>
     98   1.1    dyoung #include <net/if.h>
     99   1.1    dyoung #include <net/if_types.h>
    100   1.1    dyoung 
    101   1.1    dyoung #include <netinet/in.h>
    102   1.1    dyoung #include <netinet/in_systm.h>
    103   1.1    dyoung #include <netinet/ip.h>
    104   1.1    dyoung #include <netinet/in_pcb.h>
    105   1.1    dyoung #include <netinet/in_var.h>
    106   1.1    dyoung #include <netinet/ip_var.h>
    107   1.1    dyoung #include <netinet/in_offload.h>
    108   1.1    dyoung #include <netinet/ip6.h>
    109   1.1    dyoung #include <netinet6/ip6_var.h>
    110   1.1    dyoung #include <netinet6/in6_pcb.h>
    111   1.1    dyoung #include <netinet6/ip6_var.h>
    112   1.1    dyoung #include <netinet6/in6_var.h>
    113   1.1    dyoung #include <netinet/icmp6.h>
    114   1.1    dyoung #include <netinet6/nd6.h>
    115   1.1    dyoung 
    116   1.1    dyoung #include <netinet/tcp.h>
    117   1.1    dyoung #include <netinet/tcp_fsm.h>
    118   1.1    dyoung #include <netinet/tcp_seq.h>
    119   1.1    dyoung #include <netinet/tcp_timer.h>
    120   1.1    dyoung #include <netinet/tcp_var.h>
    121   1.1    dyoung #include <netinet/tcp_private.h>
    122   1.1    dyoung #include <netinet/tcpip.h>
    123   1.1    dyoung 
    124   1.1    dyoung #include <netinet/tcp_vtw.h>
    125   1.1    dyoung 
    126  1.16    martin __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.16 2016/07/28 07:54:31 martin Exp $");
    127   1.1    dyoung 
    128   1.1    dyoung #define db_trace(__a, __b)	do { } while (/*CONSTCOND*/0)
    129   1.1    dyoung 
    130   1.1    dyoung static void vtw_debug_init(void);
    131   1.1    dyoung 
    132   1.1    dyoung fatp_ctl_t fat_tcpv4;
    133   1.1    dyoung fatp_ctl_t fat_tcpv6;
    134   1.1    dyoung vtw_ctl_t  vtw_tcpv4[VTW_NCLASS];
    135   1.1    dyoung vtw_ctl_t  vtw_tcpv6[VTW_NCLASS];
    136   1.1    dyoung vtw_stats_t vtw_stats;
    137   1.1    dyoung 
    138   1.1    dyoung /* We provide state for the lookup_ports iterator.
    139   1.1    dyoung  * As currently we are netlock-protected, there is one.
    140   1.1    dyoung  * If we were finer-grain, we would have one per CPU.
    141   1.1    dyoung  * I do not want to be in the business of alloc/free.
    142   1.1    dyoung  * The best alternate would be allocate on the caller's
    143   1.1    dyoung  * stack, but that would require them to know the struct,
    144   1.1    dyoung  * or at least the size.
    145   1.1    dyoung  * See how she goes.
    146   1.1    dyoung  */
    147   1.1    dyoung struct tcp_ports_iterator {
    148   1.1    dyoung 	union {
    149   1.1    dyoung 		struct in_addr	v4;
    150   1.1    dyoung 		struct in6_addr	v6;
    151   1.1    dyoung 	}		addr;
    152   1.1    dyoung 	u_int		port;
    153   1.1    dyoung 
    154   1.1    dyoung 	uint32_t	wild	: 1;
    155   1.1    dyoung 
    156   1.1    dyoung 	vtw_ctl_t	*ctl;
    157   1.1    dyoung 	fatp_t		*fp;
    158   1.1    dyoung 
    159   1.1    dyoung 	uint16_t	slot_idx;
    160   1.1    dyoung 	uint16_t	ctl_idx;
    161   1.1    dyoung };
    162   1.1    dyoung 
    163   1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
    164   1.1    dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
    165   1.1    dyoung 
    166   1.1    dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
    167   1.1    dyoung 
    168   1.1    dyoung /*!\brief allocate a fat pointer from a collection.
    169   1.1    dyoung  */
    170   1.1    dyoung static fatp_t *
    171   1.1    dyoung fatp_alloc(fatp_ctl_t *fat)
    172   1.1    dyoung {
    173   1.1    dyoung 	fatp_t	*fp	= 0;
    174   1.1    dyoung 
    175   1.1    dyoung 	if (fat->nfree) {
    176   1.1    dyoung 		fp = fat->free;
    177   1.1    dyoung 		if (fp) {
    178   1.1    dyoung 			fat->free = fatp_next(fat, fp);
    179   1.1    dyoung 			--fat->nfree;
    180   1.1    dyoung 			++fat->nalloc;
    181   1.1    dyoung 			fp->nxt = 0;
    182   1.1    dyoung 
    183   1.1    dyoung 			KASSERT(!fp->inuse);
    184   1.1    dyoung 		}
    185   1.1    dyoung 	}
    186   1.1    dyoung 
    187   1.1    dyoung 	return fp;
    188   1.1    dyoung }
    189   1.1    dyoung 
    190   1.1    dyoung /*!\brief free a fat pointer.
    191   1.1    dyoung  */
    192   1.1    dyoung static void
    193   1.1    dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
    194   1.1    dyoung {
    195   1.1    dyoung 	if (fp) {
    196   1.1    dyoung 		KASSERT(!fp->inuse);
    197   1.1    dyoung 		KASSERT(!fp->nxt);
    198   1.1    dyoung 
    199   1.1    dyoung 		fp->nxt = fatp_index(fat, fat->free);
    200   1.1    dyoung 		fat->free = fp;
    201   1.1    dyoung 
    202   1.1    dyoung 		++fat->nfree;
    203   1.1    dyoung 		--fat->nalloc;
    204   1.1    dyoung 	}
    205   1.1    dyoung }
    206   1.1    dyoung 
    207   1.1    dyoung /*!\brief initialise a collection of fat pointers.
    208   1.1    dyoung  *
    209   1.1    dyoung  *\param n	# hash buckets
    210   1.1    dyoung  *\param m	total # fat pointers to allocate
    211   1.1    dyoung  *
    212   1.1    dyoung  * We allocate 2x as much, as we have two hashes: full and lport only.
    213   1.1    dyoung  */
    214   1.1    dyoung static void
    215   1.6    dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
    216   1.6    dyoung     fatp_t *fat_base, fatp_t **fat_hash)
    217   1.1    dyoung {
    218   1.1    dyoung 	fatp_t	*fp;
    219   1.1    dyoung 
    220   1.1    dyoung 	KASSERT(n <= FATP_MAX / 2);
    221   1.1    dyoung 
    222   1.6    dyoung 	fat->hash = fat_hash;
    223   1.6    dyoung 	fat->base = fat_base;
    224   1.1    dyoung 
    225   1.1    dyoung 	fat->port = &fat->hash[m];
    226   1.1    dyoung 
    227   1.1    dyoung 	fat->mask   = m - 1;	// ASSERT is power of 2 (m)
    228   1.1    dyoung 	fat->lim    = fat->base + 2*n - 1;
    229   1.1    dyoung 	fat->nfree  = 0;
    230   1.1    dyoung 	fat->nalloc = 2*n;
    231   1.1    dyoung 
    232   1.1    dyoung 	/* Initialise the free list.
    233   1.1    dyoung 	 */
    234   1.1    dyoung 	for (fp = fat->lim; fp >= fat->base; --fp) {
    235   1.1    dyoung 		fatp_free(fat, fp);
    236   1.1    dyoung 	}
    237   1.1    dyoung }
    238   1.1    dyoung 
    239   1.1    dyoung /*
    240   1.1    dyoung  * The `xtra' is XORed into the tag stored.
    241   1.1    dyoung  */
    242   1.1    dyoung static uint32_t fatp_xtra[] = {
    243   1.1    dyoung 	0x11111111,0x22222222,0x33333333,0x44444444,
    244   1.1    dyoung 	0x55555555,0x66666666,0x77777777,0x88888888,
    245   1.1    dyoung 	0x12121212,0x21212121,0x34343434,0x43434343,
    246   1.1    dyoung 	0x56565656,0x65656565,0x78787878,0x87878787,
    247   1.1    dyoung 	0x11221122,0x22112211,0x33443344,0x44334433,
    248   1.1    dyoung 	0x55665566,0x66556655,0x77887788,0x88778877,
    249   1.1    dyoung 	0x11112222,0x22221111,0x33334444,0x44443333,
    250   1.1    dyoung 	0x55556666,0x66665555,0x77778888,0x88887777,
    251   1.1    dyoung };
    252   1.1    dyoung 
    253   1.1    dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
    254   1.1    dyoung  *
    255   1.1    dyoung  * The key can be used to obtain the fatp_t, and the slot,
    256   1.1    dyoung  * as it directly encodes them.
    257   1.1    dyoung  */
    258   1.1    dyoung static inline uint32_t
    259   1.1    dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
    260   1.1    dyoung {
    261   1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    262   1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    263   1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    264   1.1    dyoung 
    265   1.1    dyoung 	switch (fatp_ntags()) {
    266   1.1    dyoung 	case 7:
    267   1.1    dyoung 		return (fatp_index(fat, fp) << 3) | slot;
    268   1.1    dyoung 	case 15:
    269   1.1    dyoung 		return (fatp_index(fat, fp) << 4) | slot;
    270   1.1    dyoung 	case 31:
    271   1.1    dyoung 		return (fatp_index(fat, fp) << 5) | slot;
    272   1.1    dyoung 	default:
    273   1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    274   1.1    dyoung 		return ~0;
    275   1.1    dyoung 	}
    276   1.1    dyoung }
    277   1.1    dyoung 
    278   1.1    dyoung static inline uint32_t
    279   1.1    dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
    280   1.1    dyoung {
    281   1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    282   1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    283   1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    284   1.1    dyoung 
    285   1.1    dyoung 	switch (fatp_ntags()) {
    286   1.1    dyoung 	case 7:
    287   1.1    dyoung 		return key & 7;
    288   1.1    dyoung 	case 15:
    289   1.1    dyoung 		return key & 15;
    290   1.1    dyoung 	case 31:
    291   1.1    dyoung 		return key & 31;
    292   1.1    dyoung 	default:
    293   1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    294   1.1    dyoung 		return ~0;
    295   1.1    dyoung 	}
    296   1.1    dyoung }
    297   1.1    dyoung 
    298   1.1    dyoung static inline fatp_t *
    299   1.1    dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
    300   1.1    dyoung {
    301   1.1    dyoung 	CTASSERT(CACHE_LINE_SIZE == 32 ||
    302   1.1    dyoung 	         CACHE_LINE_SIZE == 64 ||
    303   1.1    dyoung 		 CACHE_LINE_SIZE == 128);
    304   1.1    dyoung 
    305   1.1    dyoung 	switch (fatp_ntags()) {
    306   1.1    dyoung 	case 7:
    307   1.1    dyoung 		key >>= 3;
    308   1.1    dyoung 		break;
    309   1.1    dyoung 	case 15:
    310   1.1    dyoung 		key >>= 4;
    311   1.1    dyoung 		break;
    312   1.1    dyoung 	case 31:
    313   1.1    dyoung 		key >>= 5;
    314   1.1    dyoung 		break;
    315   1.1    dyoung 	default:
    316   1.1    dyoung 		KASSERT(0 && "no support, for no good reason");
    317   1.1    dyoung 		return 0;
    318   1.1    dyoung 	}
    319   1.1    dyoung 
    320   1.1    dyoung 	return key ? fat->base + key - 1 : 0;
    321   1.1    dyoung }
    322   1.1    dyoung 
    323   1.1    dyoung static inline uint32_t
    324   1.1    dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
    325   1.1    dyoung {
    326   1.1    dyoung 	return (idx << ctl->idx_bits) | idx;
    327   1.1    dyoung }
    328   1.1    dyoung 
    329   1.1    dyoung static inline uint32_t
    330   1.1    dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
    331   1.1    dyoung {
    332   1.1    dyoung 	uint32_t	idx	= bits & ctl->idx_mask;
    333   1.1    dyoung 
    334   1.1    dyoung 	if (idx_encode(ctl, idx) == bits)
    335   1.1    dyoung 		return idx;
    336   1.1    dyoung 	else
    337   1.1    dyoung 		return ~0;
    338   1.1    dyoung }
    339   1.1    dyoung 
    340   1.1    dyoung /*!\brief	insert index into fatp hash
    341   1.1    dyoung  *
    342   1.1    dyoung  *\param	idx	-	index of element being placed in hash chain
    343   1.1    dyoung  *\param	tag	-	32-bit tag identifier
    344   1.1    dyoung  *
    345   1.1    dyoung  *\returns
    346   1.1    dyoung  *	value which can be used to locate entry.
    347   1.1    dyoung  *
    348   1.1    dyoung  *\note
    349   1.1    dyoung  *	we rely on the fact that there are unused high bits in the index
    350   1.1    dyoung  *	for verification purposes on lookup.
    351   1.1    dyoung  */
    352   1.1    dyoung 
    353   1.1    dyoung static inline uint32_t
    354   1.1    dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
    355   1.1    dyoung     void *dbg)
    356   1.1    dyoung {
    357   1.1    dyoung 	fatp_t	*fp;
    358   1.1    dyoung 	fatp_t	**hash = (which ? fat->port : fat->hash);
    359   1.1    dyoung 	int	i;
    360   1.1    dyoung 
    361   1.1    dyoung 	fp = hash[tag & fat->mask];
    362   1.1    dyoung 
    363   1.1    dyoung 	while (!fp || fatp_full(fp)) {
    364   1.1    dyoung 		fatp_t	*fq;
    365   1.1    dyoung 
    366   1.1    dyoung 		/* All entries are inuse at the top level.
    367   1.1    dyoung 		 * We allocate a spare, and push the top level
    368   1.1    dyoung 		 * down one.  All entries in the fp we push down
    369   1.1    dyoung 		 * (think of a tape worm here) will be expelled sooner than
    370   1.1    dyoung 		 * any entries added subsequently to this hash bucket.
    371   1.1    dyoung 		 * This is a property of the time waits we are exploiting.
    372   1.1    dyoung 		 */
    373   1.1    dyoung 
    374   1.1    dyoung 		fq = fatp_alloc(fat);
    375   1.1    dyoung 		if (!fq) {
    376   1.1    dyoung 			vtw_age(fat->vtw, 0);
    377   1.1    dyoung 			fp = hash[tag & fat->mask];
    378   1.1    dyoung 			continue;
    379   1.1    dyoung 		}
    380   1.1    dyoung 
    381   1.1    dyoung 		fq->inuse = 0;
    382   1.1    dyoung 		fq->nxt   = fatp_index(fat, fp);
    383   1.1    dyoung 
    384   1.1    dyoung 		hash[tag & fat->mask] = fq;
    385   1.1    dyoung 
    386   1.1    dyoung 		fp = fq;
    387   1.1    dyoung 	}
    388   1.1    dyoung 
    389   1.1    dyoung 	KASSERT(!fatp_full(fp));
    390   1.1    dyoung 
    391   1.1    dyoung 	/* Fill highest index first.  Lookup is lowest first.
    392   1.1    dyoung 	 */
    393   1.1    dyoung 	for (i = fatp_ntags(); --i >= 0; ) {
    394   1.1    dyoung 		if (!((1 << i) & fp->inuse)) {
    395   1.1    dyoung 			break;
    396   1.1    dyoung 		}
    397   1.1    dyoung 	}
    398   1.1    dyoung 
    399   1.1    dyoung 	fp->inuse |= 1 << i;
    400   1.1    dyoung 	fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
    401   1.1    dyoung 
    402   1.1    dyoung 	db_trace(KTR_VTW
    403   1.1    dyoung 		 , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
    404   1.1    dyoung 		    , fp->inuse
    405   1.1    dyoung 		    , i, fp->tag[i]));
    406   1.1    dyoung 
    407   1.1    dyoung 	return fatp_key(fat, fp, i);
    408   1.1    dyoung }
    409   1.1    dyoung 
    410   1.1    dyoung static inline int
    411   1.1    dyoung vtw_alive(const vtw_t *vtw)
    412   1.1    dyoung {
    413   1.1    dyoung 	return vtw->hashed && vtw->expire.tv_sec;
    414   1.1    dyoung }
    415   1.1    dyoung 
    416   1.1    dyoung static inline uint32_t
    417   1.1    dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
    418   1.1    dyoung {
    419   1.1    dyoung 	if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
    420   1.1    dyoung 		return v4 - ctl->base.v4;
    421   1.1    dyoung 
    422   1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    423   1.1    dyoung 
    424   1.1    dyoung 	return ~0;
    425   1.1    dyoung }
    426   1.1    dyoung 
    427   1.1    dyoung static inline uint32_t
    428   1.1    dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
    429   1.1    dyoung {
    430   1.1    dyoung 	if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
    431   1.1    dyoung 		return v6 - ctl->base.v6;
    432   1.1    dyoung 
    433   1.1    dyoung 	KASSERT(0 && "vtw out of bounds");
    434   1.1    dyoung 
    435   1.1    dyoung 	return ~0;
    436   1.1    dyoung }
    437   1.1    dyoung 
    438   1.1    dyoung static inline uint32_t
    439   1.1    dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
    440   1.1    dyoung {
    441   1.1    dyoung 	if (ctl->clidx)
    442   1.1    dyoung 		ctl = ctl->ctl;
    443   1.1    dyoung 
    444   1.1    dyoung 	if (ctl->is_v4)
    445   1.1    dyoung 		return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
    446   1.1    dyoung 
    447   1.1    dyoung 	if (ctl->is_v6)
    448   1.1    dyoung 		return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
    449   1.1    dyoung 
    450   1.1    dyoung 	KASSERT(0 && "neither 4 nor 6.  most curious.");
    451   1.1    dyoung 
    452   1.1    dyoung 	return ~0;
    453   1.1    dyoung }
    454   1.1    dyoung 
    455   1.1    dyoung static inline vtw_t *
    456   1.1    dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
    457   1.1    dyoung {
    458   1.1    dyoung 	if (ctl->clidx)
    459   1.1    dyoung 		ctl = ctl->ctl;
    460   1.1    dyoung 
    461   1.1    dyoung 	/* See if the index looks like it might be an index.
    462   1.1    dyoung 	 * Bits on outside of the valid index bits is a give away.
    463   1.1    dyoung 	 */
    464   1.1    dyoung 	idx = idx_decode(ctl, idx);
    465   1.1    dyoung 
    466   1.1    dyoung 	if (idx == ~0) {
    467   1.1    dyoung 		return 0;
    468   1.1    dyoung 	} else if (ctl->is_v4) {
    469   1.1    dyoung 		vtw_v4_t	*vtw = ctl->base.v4 + idx;
    470   1.1    dyoung 
    471   1.1    dyoung 		return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
    472   1.1    dyoung 			? &vtw->common : 0;
    473   1.1    dyoung 	} else if (ctl->is_v6) {
    474   1.1    dyoung 		vtw_v6_t	*vtw = ctl->base.v6 + idx;
    475   1.1    dyoung 
    476   1.1    dyoung 		return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
    477   1.1    dyoung 			? &vtw->common : 0;
    478   1.1    dyoung 	} else {
    479   1.1    dyoung 		KASSERT(0 && "badness");
    480   1.1    dyoung 		return 0;
    481   1.1    dyoung 	}
    482   1.1    dyoung }
    483   1.1    dyoung 
    484   1.1    dyoung /*!\brief return the next vtw after this one.
    485   1.1    dyoung  *
    486   1.1    dyoung  * Due to the differing sizes of the entries in differing
    487   1.1    dyoung  * arenas, we have to ensure we ++ the correct pointer type.
    488   1.1    dyoung  *
    489   1.1    dyoung  * Also handles wrap.
    490   1.1    dyoung  */
    491   1.1    dyoung static inline vtw_t *
    492   1.1    dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
    493   1.1    dyoung {
    494   1.1    dyoung 	if (ctl->is_v4) {
    495   1.1    dyoung 		vtw_v4_t	*v4 = (void*)vtw;
    496   1.1    dyoung 
    497   1.1    dyoung 		vtw = &(++v4)->common;
    498   1.1    dyoung 	} else {
    499   1.1    dyoung 		vtw_v6_t	*v6 = (void*)vtw;
    500   1.1    dyoung 
    501   1.1    dyoung 		vtw = &(++v6)->common;
    502   1.1    dyoung 	}
    503   1.1    dyoung 
    504   1.1    dyoung 	if (vtw > ctl->lim.v)
    505   1.1    dyoung 		vtw = ctl->base.v;
    506   1.1    dyoung 
    507   1.1    dyoung 	return vtw;
    508   1.1    dyoung }
    509   1.1    dyoung 
    510   1.1    dyoung /*!\brief	remove entry from FATP hash chains
    511   1.1    dyoung  */
    512   1.1    dyoung static inline void
    513   1.1    dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
    514   1.1    dyoung {
    515   1.1    dyoung 	fatp_ctl_t	*fat	= ctl->fat;
    516   1.1    dyoung 	fatp_t		*fp;
    517   1.1    dyoung 	uint32_t	key = vtw->key;
    518   1.1    dyoung 	uint32_t	tag, slot, idx;
    519   1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    520   1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
    521   1.1    dyoung 
    522   1.1    dyoung 	if (!vtw->hashed) {
    523   1.1    dyoung 		KASSERT(0 && "unhashed");
    524   1.1    dyoung 		return;
    525   1.1    dyoung 	}
    526   1.1    dyoung 
    527   1.1    dyoung 	if (fat->vtw->is_v4) {
    528   1.1    dyoung 		tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
    529   1.1    dyoung 	} else if (fat->vtw->is_v6) {
    530   1.1    dyoung 		tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
    531   1.1    dyoung 	} else {
    532   1.1    dyoung 		tag = 0;
    533   1.1    dyoung 		KASSERT(0 && "not reached");
    534   1.1    dyoung 	}
    535   1.1    dyoung 
    536   1.1    dyoung 	/* Remove from fat->hash[]
    537   1.1    dyoung 	 */
    538   1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    539   1.1    dyoung 	fp   = fatp_from_key(fat, key);
    540   1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    541   1.1    dyoung 
    542   1.1    dyoung 	db_trace(KTR_VTW
    543   1.1    dyoung 		 , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
    544   1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    545   1.1    dyoung 
    546   1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    547   1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    548   1.1    dyoung 				  ^ fatp_xtra[slot]));
    549   1.1    dyoung 
    550   1.1    dyoung 	if ((fp->inuse & (1 << slot))
    551   1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    552   1.1    dyoung 				 ^ fatp_xtra[slot])) {
    553   1.1    dyoung 		fp->inuse ^= 1 << slot;
    554   1.1    dyoung 		fp->tag[slot] = 0;
    555   1.1    dyoung 
    556   1.1    dyoung 		/* When we delete entries, we do not compact.  This is
    557   1.1    dyoung 		 * due to temporality.  We add entries, and they
    558   1.1    dyoung 		 * (eventually) expire. Older entries will be further
    559   1.1    dyoung 		 * down the chain.
    560   1.1    dyoung 		 */
    561   1.1    dyoung 		if (!fp->inuse) {
    562   1.1    dyoung 			uint32_t hi = tag & fat->mask;
    563   1.1    dyoung 			fatp_t	*fq = 0;
    564   1.1    dyoung 			fatp_t	*fr = fat->hash[hi];
    565   1.1    dyoung 
    566   1.1    dyoung 			while (fr && fr != fp) {
    567   1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    568   1.1    dyoung 			}
    569   1.1    dyoung 
    570   1.1    dyoung 			if (fr == fp) {
    571   1.1    dyoung 				if (fq) {
    572   1.1    dyoung 					fq->nxt = fp->nxt;
    573   1.1    dyoung 					fp->nxt = 0;
    574   1.1    dyoung 					fatp_free(fat, fp);
    575   1.1    dyoung 				} else {
    576   1.1    dyoung 					KASSERT(fat->hash[hi] == fp);
    577   1.1    dyoung 
    578   1.1    dyoung 					if (fp->nxt) {
    579   1.1    dyoung 						fat->hash[hi]
    580   1.1    dyoung 							= fatp_next(fat, fp);
    581   1.1    dyoung 						fp->nxt = 0;
    582   1.1    dyoung 						fatp_free(fat, fp);
    583   1.1    dyoung 					} else {
    584   1.1    dyoung 						/* retain for next use.
    585   1.1    dyoung 						 */
    586   1.1    dyoung 						;
    587   1.1    dyoung 					}
    588   1.1    dyoung 				}
    589   1.1    dyoung 			} else {
    590   1.1    dyoung 				fr = fat->hash[hi];
    591   1.1    dyoung 
    592   1.1    dyoung 				do {
    593   1.1    dyoung 					db_trace(KTR_VTW
    594   1.1    dyoung 						 , (fr
    595   1.1    dyoung 						    , "fat:*del inuse %5.5x"
    596   1.1    dyoung 						    " nxt %x"
    597   1.1    dyoung 						    , fr->inuse, fr->nxt));
    598   1.1    dyoung 
    599   1.1    dyoung 					fr = fatp_next(fat, fq = fr);
    600   1.1    dyoung 				} while (fr && fr != fp);
    601   1.1    dyoung 
    602   1.1    dyoung 				KASSERT(0 && "oops");
    603   1.1    dyoung 			}
    604   1.1    dyoung 		}
    605   1.1    dyoung 		vtw->key ^= ~0;
    606   1.1    dyoung 	}
    607   1.1    dyoung 
    608   1.1    dyoung 	if (fat->vtw->is_v4) {
    609   1.1    dyoung 		tag = v4_port_tag(v4->lport);
    610   1.1    dyoung 	} else if (fat->vtw->is_v6) {
    611   1.1    dyoung 		tag = v6_port_tag(v6->lport);
    612   1.1    dyoung 	}
    613   1.1    dyoung 
    614   1.1    dyoung 	/* Remove from fat->port[]
    615   1.1    dyoung 	 */
    616   1.1    dyoung 	key  = vtw->port_key;
    617   1.1    dyoung 	slot = fatp_slot_from_key(fat, key);
    618   1.1    dyoung 	fp   = fatp_from_key(fat, key);
    619   1.1    dyoung 	idx  = vtw_index(ctl, vtw);
    620   1.1    dyoung 
    621   1.1    dyoung 	db_trace(KTR_VTW
    622   1.1    dyoung 		 , (fp, "fatport: del inuse %5.5x"
    623   1.1    dyoung 		    " slot %x idx %x key %x tag %x"
    624   1.1    dyoung 		    , fp->inuse, slot, idx, key, tag));
    625   1.1    dyoung 
    626   1.1    dyoung 	KASSERT(fp->inuse & (1 << slot));
    627   1.1    dyoung 	KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    628   1.1    dyoung 				  ^ fatp_xtra[slot]));
    629   1.1    dyoung 
    630   1.1    dyoung 	if ((fp->inuse & (1 << slot))
    631   1.1    dyoung 	    && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
    632   1.1    dyoung 				 ^ fatp_xtra[slot])) {
    633   1.1    dyoung 		fp->inuse ^= 1 << slot;
    634   1.1    dyoung 		fp->tag[slot] = 0;
    635   1.1    dyoung 
    636   1.1    dyoung 		if (!fp->inuse) {
    637   1.1    dyoung 			uint32_t hi = tag & fat->mask;
    638   1.1    dyoung 			fatp_t	*fq = 0;
    639   1.1    dyoung 			fatp_t	*fr = fat->port[hi];
    640   1.1    dyoung 
    641   1.1    dyoung 			while (fr && fr != fp) {
    642   1.1    dyoung 				fr = fatp_next(fat, fq = fr);
    643   1.1    dyoung 			}
    644   1.1    dyoung 
    645   1.1    dyoung 			if (fr == fp) {
    646   1.1    dyoung 				if (fq) {
    647   1.1    dyoung 					fq->nxt = fp->nxt;
    648   1.1    dyoung 					fp->nxt = 0;
    649   1.1    dyoung 					fatp_free(fat, fp);
    650   1.1    dyoung 				} else {
    651   1.1    dyoung 					KASSERT(fat->port[hi] == fp);
    652   1.1    dyoung 
    653   1.1    dyoung 					if (fp->nxt) {
    654   1.1    dyoung 						fat->port[hi]
    655   1.1    dyoung 							= fatp_next(fat, fp);
    656   1.1    dyoung 						fp->nxt = 0;
    657   1.1    dyoung 						fatp_free(fat, fp);
    658   1.1    dyoung 					} else {
    659   1.1    dyoung 						/* retain for next use.
    660   1.1    dyoung 						 */
    661   1.1    dyoung 						;
    662   1.1    dyoung 					}
    663   1.1    dyoung 				}
    664   1.1    dyoung 			}
    665   1.1    dyoung 		}
    666   1.1    dyoung 		vtw->port_key ^= ~0;
    667   1.1    dyoung 	}
    668   1.1    dyoung 
    669   1.1    dyoung 	vtw->hashed = 0;
    670   1.1    dyoung }
    671   1.1    dyoung 
    672   1.1    dyoung /*!\brief	remove entry from hash, possibly free.
    673   1.1    dyoung  */
    674   1.1    dyoung void
    675   1.1    dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
    676   1.1    dyoung {
    677   1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    678   1.1    dyoung 
    679   1.1    dyoung 	if (vtw->hashed) {
    680   1.1    dyoung 		++vtw_stats.del;
    681   1.1    dyoung 		vtw_unhash(ctl, vtw);
    682   1.1    dyoung 	}
    683   1.1    dyoung 
    684   1.1    dyoung 	/* We only delete the oldest entry.
    685   1.1    dyoung 	 */
    686   1.1    dyoung 	if (vtw != ctl->oldest.v)
    687   1.1    dyoung 		return;
    688   1.1    dyoung 
    689   1.1    dyoung 	--ctl->nalloc;
    690   1.1    dyoung 	++ctl->nfree;
    691   1.1    dyoung 
    692   1.1    dyoung 	vtw->expire.tv_sec  = 0;
    693   1.1    dyoung 	vtw->expire.tv_usec = ~0;
    694   1.1    dyoung 
    695   1.1    dyoung 	if (!ctl->nalloc)
    696   1.1    dyoung 		ctl->oldest.v = 0;
    697   1.1    dyoung 
    698   1.1    dyoung 	ctl->oldest.v = vtw_next(ctl, vtw);
    699   1.1    dyoung }
    700   1.1    dyoung 
    701   1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    702   1.1    dyoung  */
    703   1.1    dyoung static void
    704   1.1    dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
    705   1.1    dyoung {
    706   1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    707   1.1    dyoung 	uint32_t	tag;
    708   1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
    709   1.1    dyoung 
    710   1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    711   1.1    dyoung 	KASSERT(!vtw->hashed);
    712   1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    713   1.1    dyoung 
    714   1.1    dyoung 	++vtw_stats.ins;
    715   1.1    dyoung 
    716   1.1    dyoung 	tag = v4_tag(v4->faddr, v4->fport,
    717   1.1    dyoung 		     v4->laddr, v4->lport);
    718   1.1    dyoung 
    719   1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    720   1.1    dyoung 
    721   1.1    dyoung 	db_trace(KTR_VTW, (ctl
    722   1.1    dyoung 			   , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
    723   1.1    dyoung 			   " tag %8.8x key %8.8x"
    724   1.1    dyoung 			   , v4->faddr, v4->fport
    725   1.1    dyoung 			   , v4->laddr, v4->lport
    726   1.1    dyoung 			   , tag
    727   1.1    dyoung 			   , vtw->key));
    728   1.1    dyoung 
    729   1.1    dyoung 	tag = v4_port_tag(v4->lport);
    730   1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    731   1.1    dyoung 
    732   1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    733   1.1    dyoung 			   , v4->lport, v4->lport
    734   1.1    dyoung 			   , tag
    735   1.1    dyoung 			   , vtw->key));
    736   1.1    dyoung 
    737   1.1    dyoung 	vtw->hashed = 1;
    738   1.1    dyoung }
    739   1.1    dyoung 
    740   1.4  dholland /*!\brief	insert vestigial timewait in hash chain
    741   1.1    dyoung  */
    742   1.1    dyoung static void
    743   1.1    dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
    744   1.1    dyoung {
    745   1.1    dyoung 	uint32_t	idx	= vtw_index(ctl, vtw);
    746   1.1    dyoung 	uint32_t	tag;
    747   1.1    dyoung 	vtw_v6_t	*v6	= (void*)vtw;
    748   1.1    dyoung 
    749   1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
    750   1.1    dyoung 	KASSERT(!vtw->hashed);
    751   1.1    dyoung 	KASSERT(ctl->clidx == vtw->msl_class);
    752   1.1    dyoung 
    753   1.1    dyoung 	++vtw_stats.ins;
    754   1.1    dyoung 
    755   1.1    dyoung 	tag = v6_tag(&v6->faddr, v6->fport,
    756   1.1    dyoung 		     &v6->laddr, v6->lport);
    757   1.1    dyoung 
    758   1.1    dyoung 	vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
    759   1.1    dyoung 
    760   1.1    dyoung 	tag = v6_port_tag(v6->lport);
    761   1.1    dyoung 	vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
    762   1.1    dyoung 
    763   1.1    dyoung 	db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
    764   1.1    dyoung 			   , v6->lport, v6->lport
    765   1.1    dyoung 			   , tag
    766   1.1    dyoung 			   , vtw->key));
    767   1.1    dyoung 
    768   1.1    dyoung 	vtw->hashed = 1;
    769   1.1    dyoung }
    770   1.1    dyoung 
    771   1.1    dyoung static vtw_t *
    772   1.1    dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
    773   1.1    dyoung 				 , uint32_t laddr, uint16_t lport
    774   1.1    dyoung 				 , int which)
    775   1.1    dyoung {
    776   1.1    dyoung 	vtw_v4_t	*v4;
    777   1.1    dyoung 	vtw_t		*vtw;
    778   1.1    dyoung 	uint32_t	tag;
    779   1.1    dyoung 	fatp_t		*fp;
    780   1.1    dyoung 	int		i;
    781   1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    782   1.1    dyoung 
    783   1.1    dyoung 	if (!ctl || !ctl->fat)
    784   1.1    dyoung 		return 0;
    785   1.1    dyoung 
    786   1.1    dyoung 	++vtw_stats.look[which];
    787   1.1    dyoung 
    788   1.1    dyoung 	if (which) {
    789   1.1    dyoung 		tag = v4_port_tag(lport);
    790   1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    791   1.1    dyoung 	} else {
    792   1.1    dyoung 		tag = v4_tag(faddr, fport, laddr, lport);
    793   1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    794   1.1    dyoung 	}
    795   1.1    dyoung 
    796   1.1    dyoung 	while (fp && fp->inuse) {
    797   1.1    dyoung 		uint32_t	inuse = fp->inuse;
    798   1.1    dyoung 
    799   1.1    dyoung 		++fatps;
    800   1.1    dyoung 
    801   1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    802   1.1    dyoung 			uint32_t	idx;
    803   1.1    dyoung 
    804   1.1    dyoung 			if (!(inuse & (1 << i)))
    805   1.1    dyoung 				continue;
    806   1.1    dyoung 
    807   1.1    dyoung 			inuse ^= 1 << i;
    808   1.1    dyoung 
    809   1.1    dyoung 			++probes;
    810   1.1    dyoung 			++vtw_stats.probe[which];
    811   1.1    dyoung 
    812   1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    813   1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    814   1.1    dyoung 
    815   1.1    dyoung 			if (!vtw) {
    816   1.1    dyoung 				/* Hopefully fast path.
    817   1.1    dyoung 				 */
    818   1.1    dyoung 				db_trace(KTR_VTW
    819   1.1    dyoung 					 , (fp, "vtw: fast %A:%P %A:%P"
    820   1.1    dyoung 					    " idx %x tag %x"
    821   1.1    dyoung 					    , faddr, fport
    822   1.1    dyoung 					    , laddr, lport
    823   1.1    dyoung 					    , idx, tag));
    824   1.1    dyoung 				continue;
    825   1.1    dyoung 			}
    826   1.1    dyoung 
    827   1.1    dyoung 			v4 = (void*)vtw;
    828   1.1    dyoung 
    829   1.1    dyoung 			/* The de-referencing of vtw is what we want to avoid.
    830   1.1    dyoung 			 * Losing.
    831   1.1    dyoung 			 */
    832   1.1    dyoung 			if (vtw_alive(vtw)
    833   1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    834   1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    835   1.1    dyoung 			    && (which
    836   1.1    dyoung 				|| (v4->faddr == faddr && v4->laddr == laddr
    837   1.1    dyoung 				    && v4->fport == fport))
    838   1.1    dyoung 			    && v4->lport == lport) {
    839   1.1    dyoung 				++vtw_stats.hit[which];
    840   1.1    dyoung 
    841   1.1    dyoung 				db_trace(KTR_VTW
    842   1.1    dyoung 					 , (fp, "vtw: hit %8.8x:%4.4x"
    843   1.1    dyoung 					    " %8.8x:%4.4x idx %x key %x"
    844   1.1    dyoung 					    , faddr, fport
    845   1.1    dyoung 					    , laddr, lport
    846   1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
    847   1.1    dyoung 
    848   1.1    dyoung 				KASSERT(vtw->hashed);
    849   1.1    dyoung 
    850   1.1    dyoung 				goto out;
    851   1.1    dyoung 			}
    852   1.1    dyoung 			++vtw_stats.losing[which];
    853   1.1    dyoung 			++losings;
    854   1.1    dyoung 
    855   1.1    dyoung 			if (vtw_alive(vtw)) {
    856   1.1    dyoung 				db_trace(KTR_VTW
    857   1.1    dyoung 					 , (fp, "vtw:!mis %8.8x:%4.4x"
    858   1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    859   1.1    dyoung 					    , faddr, fport
    860   1.1    dyoung 					    , laddr, lport
    861   1.1    dyoung 					    , fatp_key(ctl->fat, fp, i)
    862   1.1    dyoung 					    , v4_tag(faddr, fport
    863   1.1    dyoung 						     , laddr, lport)));
    864   1.1    dyoung 				db_trace(KTR_VTW
    865   1.1    dyoung 					 , (vtw, "vtw:!mis %8.8x:%4.4x"
    866   1.1    dyoung 					    " %8.8x:%4.4x key %x tag %x"
    867   1.1    dyoung 					    , v4->faddr, v4->fport
    868   1.1    dyoung 					    , v4->laddr, v4->lport
    869   1.1    dyoung 					    , vtw->key
    870   1.1    dyoung 					    , v4_tag(v4->faddr, v4->fport
    871   1.1    dyoung 						     , v4->laddr, v4->lport)));
    872   1.1    dyoung 
    873   1.1    dyoung 				if (vtw->key == fatp_key(ctl->fat, fp, i)) {
    874   1.1    dyoung 					db_trace(KTR_VTW
    875   1.1    dyoung 						 , (vtw, "vtw:!mis %8.8x:%4.4x"
    876   1.1    dyoung 						    " %8.8x:%4.4x key %x"
    877   1.1    dyoung 						    " which %x"
    878   1.1    dyoung 						    , v4->faddr, v4->fport
    879   1.1    dyoung 						    , v4->laddr, v4->lport
    880   1.1    dyoung 						    , vtw->key
    881   1.1    dyoung 						    , which));
    882   1.1    dyoung 
    883   1.1    dyoung 				} else {
    884   1.1    dyoung 					db_trace(KTR_VTW
    885   1.1    dyoung 						 , (vtw
    886   1.1    dyoung 						    , "vtw:!mis"
    887   1.1    dyoung 						    " key %8.8x != %8.8x"
    888   1.1    dyoung 						    " idx %x i %x which %x"
    889   1.1    dyoung 						    , vtw->key
    890   1.1    dyoung 						    , fatp_key(ctl->fat, fp, i)
    891   1.1    dyoung 						    , idx_decode(ctl, idx)
    892   1.1    dyoung 						    , i
    893   1.1    dyoung 						    , which));
    894   1.1    dyoung 				}
    895   1.1    dyoung 			} else {
    896   1.1    dyoung 				db_trace(KTR_VTW
    897   1.1    dyoung 					 , (fp
    898   1.1    dyoung 					    , "vtw:!mis free entry"
    899   1.1    dyoung 					    " idx %x vtw %p which %x"
    900   1.1    dyoung 					    , idx_decode(ctl, idx)
    901   1.1    dyoung 					    , vtw, which));
    902   1.1    dyoung 			}
    903   1.1    dyoung 		}
    904   1.1    dyoung 
    905   1.1    dyoung 		if (fp->nxt) {
    906   1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
    907   1.1    dyoung 		} else {
    908   1.1    dyoung 			break;
    909   1.1    dyoung 		}
    910   1.1    dyoung 	}
    911   1.1    dyoung 	++vtw_stats.miss[which];
    912   1.1    dyoung 	vtw = 0;
    913   1.1    dyoung out:
    914   1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
    915   1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
    916   1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
    917   1.1    dyoung 		vtw_stats.max_probe[which] = probes;
    918   1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
    919   1.1    dyoung 		vtw_stats.max_loss[which] = losings;
    920   1.1    dyoung 
    921   1.1    dyoung 	return vtw;
    922   1.1    dyoung }
    923   1.1    dyoung 
    924   1.1    dyoung static vtw_t *
    925   1.1    dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
    926   1.1    dyoung 				 , const struct in6_addr *laddr, uint16_t lport
    927   1.1    dyoung 				 , int which)
    928   1.1    dyoung {
    929   1.1    dyoung 	vtw_v6_t	*v6;
    930   1.1    dyoung 	vtw_t		*vtw;
    931   1.1    dyoung 	uint32_t	tag;
    932   1.1    dyoung 	fatp_t		*fp;
    933   1.1    dyoung 	int		i;
    934   1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
    935   1.1    dyoung 
    936   1.1    dyoung 	++vtw_stats.look[which];
    937   1.1    dyoung 
    938   1.1    dyoung 	if (!ctl || !ctl->fat)
    939   1.1    dyoung 		return 0;
    940   1.1    dyoung 
    941   1.1    dyoung 	if (which) {
    942   1.1    dyoung 		tag = v6_port_tag(lport);
    943   1.1    dyoung 		fp  = ctl->fat->port[tag & ctl->fat->mask];
    944   1.1    dyoung 	} else {
    945   1.1    dyoung 		tag = v6_tag(faddr, fport, laddr, lport);
    946   1.1    dyoung 		fp  = ctl->fat->hash[tag & ctl->fat->mask];
    947   1.1    dyoung 	}
    948   1.1    dyoung 
    949   1.1    dyoung 	while (fp && fp->inuse) {
    950   1.1    dyoung 		uint32_t	inuse = fp->inuse;
    951   1.1    dyoung 
    952   1.1    dyoung 		++fatps;
    953   1.1    dyoung 
    954   1.1    dyoung 		for (i = 0; inuse && i < fatp_ntags(); ++i) {
    955   1.1    dyoung 			uint32_t	idx;
    956   1.1    dyoung 
    957   1.1    dyoung 			if (!(inuse & (1 << i)))
    958   1.1    dyoung 				continue;
    959   1.1    dyoung 
    960   1.1    dyoung 			inuse ^= 1 << i;
    961   1.1    dyoung 
    962   1.1    dyoung 			++probes;
    963   1.1    dyoung 			++vtw_stats.probe[which];
    964   1.1    dyoung 
    965   1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
    966   1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
    967   1.1    dyoung 
    968   1.1    dyoung 			db_trace(KTR_VTW
    969   1.1    dyoung 				 , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
    970   1.1    dyoung 				    , i
    971   1.1    dyoung 				    , db_store(faddr, sizeof (*faddr)), fport
    972   1.1    dyoung 				    , db_store(laddr, sizeof (*laddr)), lport
    973   1.1    dyoung 				    , idx_decode(ctl, idx)));
    974   1.1    dyoung 
    975   1.1    dyoung 			if (!vtw) {
    976   1.1    dyoung 				/* Hopefully fast path.
    977   1.1    dyoung 				 */
    978   1.1    dyoung 				continue;
    979   1.1    dyoung 			}
    980   1.1    dyoung 
    981   1.1    dyoung 			v6 = (void*)vtw;
    982   1.1    dyoung 
    983   1.1    dyoung 			if (vtw_alive(vtw)
    984   1.1    dyoung 			    && ((which ? vtw->port_key : vtw->key)
    985   1.1    dyoung 				== fatp_key(ctl->fat, fp, i))
    986   1.1    dyoung 			    && v6->lport == lport
    987   1.1    dyoung 			    && (which
    988   1.1    dyoung 				|| (v6->fport == fport
    989   1.1    dyoung 				    && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
    990   1.1    dyoung 				    && !bcmp(&v6->laddr, laddr
    991   1.1    dyoung 					     , sizeof (*laddr))))) {
    992   1.1    dyoung 				++vtw_stats.hit[which];
    993   1.1    dyoung 
    994   1.1    dyoung 				KASSERT(vtw->hashed);
    995   1.1    dyoung 				goto out;
    996   1.1    dyoung 			} else {
    997   1.1    dyoung 				++vtw_stats.losing[which];
    998   1.1    dyoung 				++losings;
    999   1.1    dyoung 			}
   1000   1.1    dyoung 		}
   1001   1.1    dyoung 
   1002   1.1    dyoung 		if (fp->nxt) {
   1003   1.1    dyoung 			fp = fatp_next(ctl->fat, fp);
   1004   1.1    dyoung 		} else {
   1005   1.1    dyoung 			break;
   1006   1.1    dyoung 		}
   1007   1.1    dyoung 	}
   1008   1.1    dyoung 	++vtw_stats.miss[which];
   1009   1.1    dyoung 	vtw = 0;
   1010   1.1    dyoung out:
   1011   1.1    dyoung 	if (fatps > vtw_stats.max_chain[which])
   1012   1.1    dyoung 		vtw_stats.max_chain[which] = fatps;
   1013   1.1    dyoung 	if (probes > vtw_stats.max_probe[which])
   1014   1.1    dyoung 		vtw_stats.max_probe[which] = probes;
   1015   1.1    dyoung 	if (losings > vtw_stats.max_loss[which])
   1016   1.1    dyoung 		vtw_stats.max_loss[which] = losings;
   1017   1.1    dyoung 
   1018   1.1    dyoung 	return vtw;
   1019   1.1    dyoung }
   1020   1.1    dyoung 
   1021   1.1    dyoung /*!\brief port iterator
   1022   1.1    dyoung  */
   1023   1.1    dyoung static vtw_t *
   1024   1.1    dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
   1025   1.1    dyoung {
   1026   1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
   1027   1.1    dyoung 	vtw_v4_t	*v4;
   1028   1.1    dyoung 	vtw_t		*vtw;
   1029   1.1    dyoung 	uint32_t	tag;
   1030   1.1    dyoung 	uint16_t	lport = it->port;
   1031   1.1    dyoung 	fatp_t		*fp;
   1032   1.1    dyoung 	int		i;
   1033   1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1034   1.1    dyoung 
   1035   1.1    dyoung 	tag = v4_port_tag(lport);
   1036   1.1    dyoung 	if (!it->fp) {
   1037   1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1038   1.1    dyoung 		it->slot_idx = 0;
   1039   1.1    dyoung 	}
   1040   1.1    dyoung 	fp  = it->fp;
   1041   1.1    dyoung 
   1042   1.1    dyoung 	while (fp) {
   1043   1.1    dyoung 		uint32_t	inuse = fp->inuse;
   1044   1.1    dyoung 
   1045   1.1    dyoung 		++fatps;
   1046   1.1    dyoung 
   1047   1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1048   1.1    dyoung 			uint32_t	idx;
   1049   1.1    dyoung 
   1050   1.1    dyoung 			if (!(inuse & (1 << i)))
   1051   1.1    dyoung 				continue;
   1052   1.1    dyoung 
   1053  1.16    martin 			inuse &= ~0U << i;
   1054   1.1    dyoung 
   1055   1.1    dyoung 			if (i < it->slot_idx)
   1056   1.1    dyoung 				continue;
   1057   1.1    dyoung 
   1058   1.1    dyoung 			++vtw_stats.probe[1];
   1059   1.1    dyoung 			++probes;
   1060   1.1    dyoung 
   1061   1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1062   1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1063   1.1    dyoung 
   1064   1.1    dyoung 			if (!vtw) {
   1065   1.1    dyoung 				/* Hopefully fast path.
   1066   1.1    dyoung 				 */
   1067   1.1    dyoung 				continue;
   1068   1.1    dyoung 			}
   1069   1.1    dyoung 
   1070   1.1    dyoung 			v4 = (void*)vtw;
   1071   1.1    dyoung 
   1072   1.1    dyoung 			if (vtw_alive(vtw)
   1073   1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1074   1.1    dyoung 			    && v4->lport == lport) {
   1075   1.1    dyoung 				++vtw_stats.hit[1];
   1076   1.1    dyoung 
   1077   1.1    dyoung 				it->slot_idx = i + 1;
   1078   1.1    dyoung 
   1079   1.1    dyoung 				goto out;
   1080   1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1081   1.1    dyoung 				++vtw_stats.losing[1];
   1082   1.1    dyoung 				++losings;
   1083   1.1    dyoung 
   1084   1.1    dyoung 				db_trace(KTR_VTW
   1085   1.1    dyoung 					 , (vtw, "vtw:!mis"
   1086   1.1    dyoung 					    " port %8.8x:%4.4x %8.8x:%4.4x"
   1087   1.1    dyoung 					    " key %x port %x"
   1088   1.1    dyoung 					    , v4->faddr, v4->fport
   1089   1.1    dyoung 					    , v4->laddr, v4->lport
   1090   1.1    dyoung 					    , vtw->key
   1091   1.1    dyoung 					    , lport));
   1092   1.1    dyoung 			} else {
   1093   1.1    dyoung 				/* Really losing here.  We are coming
   1094   1.1    dyoung 				 * up with references to free entries.
   1095   1.1    dyoung 				 * Might find it better to use
   1096   1.1    dyoung 				 * traditional, or need another
   1097   1.1    dyoung 				 * add-hockery.  The other add-hockery
   1098   1.1    dyoung 				 * would be to pul more into into the
   1099   1.1    dyoung 				 * cache line to reject the false
   1100   1.1    dyoung 				 * hits.
   1101   1.1    dyoung 				 */
   1102   1.1    dyoung 				++vtw_stats.losing[1];
   1103   1.1    dyoung 				++losings;
   1104   1.1    dyoung 				db_trace(KTR_VTW
   1105   1.1    dyoung 					 , (fp, "vtw:!mis port %x"
   1106   1.1    dyoung 					    " - free entry idx %x vtw %p"
   1107   1.1    dyoung 					    , lport
   1108   1.1    dyoung 					    , idx_decode(ctl, idx)
   1109   1.1    dyoung 					    , vtw));
   1110   1.1    dyoung 			}
   1111   1.1    dyoung 		}
   1112   1.1    dyoung 
   1113   1.1    dyoung 		if (fp->nxt) {
   1114   1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1115   1.1    dyoung 			it->slot_idx = 0;
   1116   1.1    dyoung 		} else {
   1117   1.1    dyoung 			it->fp = 0;
   1118   1.1    dyoung 			break;
   1119   1.1    dyoung 		}
   1120   1.1    dyoung 	}
   1121   1.1    dyoung 	++vtw_stats.miss[1];
   1122   1.1    dyoung 
   1123   1.1    dyoung 	vtw = 0;
   1124   1.1    dyoung out:
   1125   1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1126   1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1127   1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1128   1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1129   1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1130   1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1131   1.1    dyoung 
   1132   1.1    dyoung 	return vtw;
   1133   1.1    dyoung }
   1134   1.1    dyoung 
   1135   1.1    dyoung /*!\brief port iterator
   1136   1.1    dyoung  */
   1137   1.1    dyoung static vtw_t *
   1138   1.1    dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
   1139   1.1    dyoung {
   1140   1.1    dyoung 	vtw_ctl_t	*ctl = it->ctl;
   1141   1.1    dyoung 	vtw_v6_t	*v6;
   1142   1.1    dyoung 	vtw_t		*vtw;
   1143   1.1    dyoung 	uint32_t	tag;
   1144   1.1    dyoung 	uint16_t	lport = it->port;
   1145   1.1    dyoung 	fatp_t		*fp;
   1146   1.1    dyoung 	int		i;
   1147   1.1    dyoung 	uint32_t	fatps = 0, probes = 0, losings = 0;
   1148   1.1    dyoung 
   1149   1.1    dyoung 	tag = v6_port_tag(lport);
   1150   1.1    dyoung 	if (!it->fp) {
   1151   1.1    dyoung 		it->fp = ctl->fat->port[tag & ctl->fat->mask];
   1152   1.1    dyoung 		it->slot_idx = 0;
   1153   1.1    dyoung 	}
   1154   1.1    dyoung 	fp  = it->fp;
   1155   1.1    dyoung 
   1156   1.1    dyoung 	while (fp) {
   1157   1.1    dyoung 		uint32_t	inuse = fp->inuse;
   1158   1.1    dyoung 
   1159   1.1    dyoung 		++fatps;
   1160   1.1    dyoung 
   1161   1.1    dyoung 		for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
   1162   1.1    dyoung 			uint32_t	idx;
   1163   1.1    dyoung 
   1164   1.1    dyoung 			if (!(inuse & (1 << i)))
   1165   1.1    dyoung 				continue;
   1166   1.1    dyoung 
   1167  1.16    martin 			inuse &= ~0U << i;
   1168   1.1    dyoung 
   1169   1.1    dyoung 			if (i < it->slot_idx)
   1170   1.1    dyoung 				continue;
   1171   1.1    dyoung 
   1172   1.1    dyoung 			++vtw_stats.probe[1];
   1173   1.1    dyoung 			++probes;
   1174   1.1    dyoung 
   1175   1.1    dyoung 			idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
   1176   1.1    dyoung 			vtw = vtw_from_index(ctl, idx);
   1177   1.1    dyoung 
   1178   1.1    dyoung 			if (!vtw) {
   1179   1.1    dyoung 				/* Hopefully fast path.
   1180   1.1    dyoung 				 */
   1181   1.1    dyoung 				continue;
   1182   1.1    dyoung 			}
   1183   1.1    dyoung 
   1184   1.1    dyoung 			v6 = (void*)vtw;
   1185   1.1    dyoung 
   1186   1.1    dyoung 			db_trace(KTR_VTW
   1187   1.1    dyoung 				 , (vtw, "vtw: i %x idx %x fp->tag %x"
   1188   1.1    dyoung 				    " tag %x xtra %x"
   1189   1.1    dyoung 				    , i, idx_decode(ctl, idx)
   1190   1.1    dyoung 				    , fp->tag[i], tag, fatp_xtra[i]));
   1191   1.1    dyoung 
   1192   1.1    dyoung 			if (vtw_alive(vtw)
   1193   1.1    dyoung 			    && vtw->port_key == fatp_key(ctl->fat, fp, i)
   1194   1.1    dyoung 			    && v6->lport == lport) {
   1195   1.1    dyoung 				++vtw_stats.hit[1];
   1196   1.1    dyoung 
   1197   1.1    dyoung 				db_trace(KTR_VTW
   1198   1.1    dyoung 					 , (fp, "vtw: nxt port %P - %4.4x"
   1199   1.1    dyoung 					    " idx %x key %x"
   1200   1.1    dyoung 					    , lport, lport
   1201   1.1    dyoung 					    , idx_decode(ctl, idx), vtw->key));
   1202   1.1    dyoung 
   1203   1.1    dyoung 				it->slot_idx = i + 1;
   1204   1.1    dyoung 				goto out;
   1205   1.1    dyoung 			} else if (vtw_alive(vtw)) {
   1206   1.1    dyoung 				++vtw_stats.losing[1];
   1207   1.1    dyoung 
   1208   1.1    dyoung 				db_trace(KTR_VTW
   1209   1.1    dyoung 					 , (vtw, "vtw:!mis port %6A:%4.4x"
   1210   1.1    dyoung 					    " %6A:%4.4x key %x port %x"
   1211   1.1    dyoung 					    , db_store(&v6->faddr
   1212   1.1    dyoung 						       , sizeof (v6->faddr))
   1213   1.1    dyoung 					    , v6->fport
   1214   1.1    dyoung 					    , db_store(&v6->laddr
   1215   1.1    dyoung 						       , sizeof (v6->faddr))
   1216   1.1    dyoung 					    , v6->lport
   1217   1.1    dyoung 					    , vtw->key
   1218   1.1    dyoung 					    , lport));
   1219   1.1    dyoung 			} else {
   1220   1.1    dyoung 				/* Really losing here.  We are coming
   1221   1.1    dyoung 				 * up with references to free entries.
   1222   1.1    dyoung 				 * Might find it better to use
   1223   1.1    dyoung 				 * traditional, or need another
   1224   1.1    dyoung 				 * add-hockery.  The other add-hockery
   1225   1.1    dyoung 				 * would be to pul more into into the
   1226   1.1    dyoung 				 * cache line to reject the false
   1227   1.1    dyoung 				 * hits.
   1228   1.1    dyoung 				 */
   1229   1.1    dyoung 				++vtw_stats.losing[1];
   1230   1.1    dyoung 				++losings;
   1231   1.1    dyoung 
   1232   1.1    dyoung 				db_trace(KTR_VTW
   1233   1.1    dyoung 					 , (fp
   1234   1.1    dyoung 					    , "vtw:!mis port %x"
   1235   1.1    dyoung 					    " - free entry idx %x vtw %p"
   1236   1.1    dyoung 					    , lport, idx_decode(ctl, idx)
   1237   1.1    dyoung 					    , vtw));
   1238   1.1    dyoung 			}
   1239   1.1    dyoung 		}
   1240   1.1    dyoung 
   1241   1.1    dyoung 		if (fp->nxt) {
   1242   1.1    dyoung 			it->fp = fp = fatp_next(ctl->fat, fp);
   1243   1.1    dyoung 			it->slot_idx = 0;
   1244   1.1    dyoung 		} else {
   1245   1.1    dyoung 			it->fp = 0;
   1246   1.1    dyoung 			break;
   1247   1.1    dyoung 		}
   1248   1.1    dyoung 	}
   1249   1.1    dyoung 	++vtw_stats.miss[1];
   1250   1.1    dyoung 
   1251   1.1    dyoung 	vtw = 0;
   1252   1.1    dyoung out:
   1253   1.1    dyoung 	if (fatps > vtw_stats.max_chain[1])
   1254   1.1    dyoung 		vtw_stats.max_chain[1] = fatps;
   1255   1.1    dyoung 	if (probes > vtw_stats.max_probe[1])
   1256   1.1    dyoung 		vtw_stats.max_probe[1] = probes;
   1257   1.1    dyoung 	if (losings > vtw_stats.max_loss[1])
   1258   1.1    dyoung 		vtw_stats.max_loss[1] = losings;
   1259   1.1    dyoung 
   1260   1.1    dyoung 	return vtw;
   1261   1.1    dyoung }
   1262   1.1    dyoung 
   1263   1.1    dyoung /*!\brief initialise the VTW allocation arena
   1264   1.1    dyoung  *
   1265   1.1    dyoung  * There are 1+3 allocation classes:
   1266   1.1    dyoung  *	0	classless
   1267   1.1    dyoung  *	{1,2,3}	MSL-class based allocation
   1268   1.1    dyoung  *
   1269   1.1    dyoung  * The allocation arenas are all initialised.  Classless gets all the
   1270   1.1    dyoung  * space.  MSL-class based divides the arena, so that allocation
   1271   1.1    dyoung  * within a class can proceed without having to consider entries
   1272   1.1    dyoung  * (aka: cache lines) from different classes.
   1273   1.1    dyoung  *
   1274   1.1    dyoung  * Usually, we are completely classless or class-based, but there can be
   1275   1.1    dyoung  * transition periods, corresponding to dynamic adjustments in the config
   1276   1.1    dyoung  * by the operator.
   1277   1.1    dyoung  */
   1278   1.1    dyoung static void
   1279   1.6    dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
   1280   1.1    dyoung {
   1281   1.6    dyoung 	int class_n, i;
   1282   1.6    dyoung 	vtw_t	*base;
   1283   1.1    dyoung 
   1284   1.6    dyoung 	ctl->base.v = ctl_base_v;
   1285   1.1    dyoung 
   1286   1.6    dyoung 	if (ctl->is_v4) {
   1287   1.6    dyoung 		ctl->lim.v4    = ctl->base.v4 + n - 1;
   1288   1.6    dyoung 		ctl->alloc.v4  = ctl->base.v4;
   1289   1.6    dyoung 	} else {
   1290   1.6    dyoung 		ctl->lim.v6    = ctl->base.v6 + n - 1;
   1291   1.6    dyoung 		ctl->alloc.v6  = ctl->base.v6;
   1292   1.6    dyoung 	}
   1293   1.1    dyoung 
   1294   1.6    dyoung 	ctl->nfree  = n;
   1295   1.6    dyoung 	ctl->ctl    = ctl;
   1296   1.1    dyoung 
   1297   1.6    dyoung 	ctl->idx_bits = 32;
   1298   1.6    dyoung 	for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
   1299   1.6    dyoung 		ctl->idx_mask >>= 1;
   1300   1.6    dyoung 		ctl->idx_bits  -= 1;
   1301   1.6    dyoung 	}
   1302   1.1    dyoung 
   1303   1.6    dyoung 	ctl->idx_mask <<= 1;
   1304   1.6    dyoung 	ctl->idx_mask  |= 1;
   1305   1.6    dyoung 	ctl->idx_bits  += 1;
   1306   1.1    dyoung 
   1307   1.6    dyoung 	ctl->fat = fat;
   1308   1.6    dyoung 	fat->vtw = ctl;
   1309   1.1    dyoung 
   1310   1.6    dyoung 	/* Divide the resources equally amongst the classes.
   1311   1.6    dyoung 	 * This is not optimal, as the different classes
   1312   1.6    dyoung 	 * arrive and leave at different rates, but it is
   1313   1.6    dyoung 	 * the best I can do for now.
   1314   1.6    dyoung 	 */
   1315   1.6    dyoung 	class_n = n / (VTW_NCLASS-1);
   1316   1.6    dyoung 	base    = ctl->base.v;
   1317   1.1    dyoung 
   1318   1.6    dyoung 	for (i = 1; i < VTW_NCLASS; ++i) {
   1319   1.6    dyoung 		int j;
   1320   1.1    dyoung 
   1321   1.6    dyoung 		ctl[i] = ctl[0];
   1322   1.6    dyoung 		ctl[i].clidx = i;
   1323   1.1    dyoung 
   1324   1.6    dyoung 		ctl[i].base.v = base;
   1325   1.6    dyoung 		ctl[i].alloc  = ctl[i].base;
   1326   1.1    dyoung 
   1327   1.6    dyoung 		for (j = 0; j < class_n - 1; ++j) {
   1328   1.6    dyoung 			if (tcp_msl_enable)
   1329   1.6    dyoung 				base->msl_class = i;
   1330   1.1    dyoung 			base = vtw_next(ctl, base);
   1331   1.1    dyoung 		}
   1332   1.6    dyoung 
   1333   1.6    dyoung 		ctl[i].lim.v = base;
   1334   1.6    dyoung 		base = vtw_next(ctl, base);
   1335   1.6    dyoung 		ctl[i].nfree = class_n;
   1336   1.1    dyoung 	}
   1337   1.1    dyoung 
   1338   1.1    dyoung 	vtw_debug_init();
   1339   1.1    dyoung }
   1340   1.1    dyoung 
   1341   1.1    dyoung /*!\brief	map class to TCP MSL
   1342   1.1    dyoung  */
   1343   1.1    dyoung static inline uint32_t
   1344  1.11      matt class_to_msl(int msl_class)
   1345   1.1    dyoung {
   1346  1.11      matt 	switch (msl_class) {
   1347   1.1    dyoung 	case 0:
   1348   1.1    dyoung 	case 1:
   1349   1.1    dyoung 		return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
   1350   1.1    dyoung 	case 2:
   1351   1.1    dyoung 		return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
   1352   1.1    dyoung 	default:
   1353   1.1    dyoung 		return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
   1354   1.1    dyoung 	}
   1355   1.1    dyoung }
   1356   1.1    dyoung 
   1357   1.1    dyoung /*!\brief	map TCP MSL to class
   1358   1.1    dyoung  */
   1359   1.1    dyoung static inline uint32_t
   1360   1.1    dyoung msl_to_class(int msl)
   1361   1.1    dyoung {
   1362   1.1    dyoung 	if (tcp_msl_enable) {
   1363   1.1    dyoung 		if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
   1364   1.1    dyoung 			return 1+2;
   1365   1.1    dyoung 		if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
   1366   1.1    dyoung 			return 1+1;
   1367   1.1    dyoung 		return 1;
   1368   1.1    dyoung 	}
   1369   1.1    dyoung 	return 0;
   1370   1.1    dyoung }
   1371   1.1    dyoung 
   1372   1.1    dyoung /*!\brief allocate a vtw entry
   1373   1.1    dyoung  */
   1374   1.1    dyoung static inline vtw_t *
   1375   1.1    dyoung vtw_alloc(vtw_ctl_t *ctl)
   1376   1.1    dyoung {
   1377   1.1    dyoung 	vtw_t	*vtw	= 0;
   1378   1.1    dyoung 	int	stuck	= 0;
   1379   1.1    dyoung 	int	avail	= ctl ? (ctl->nalloc + ctl->nfree) : 0;
   1380   1.1    dyoung 	int	msl;
   1381   1.1    dyoung 
   1382   1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1383   1.1    dyoung 
   1384   1.1    dyoung 	/* If no resources, we will not get far.
   1385   1.1    dyoung 	 */
   1386   1.1    dyoung 	if (!ctl || !ctl->base.v4 || avail <= 0)
   1387   1.1    dyoung 		return 0;
   1388   1.1    dyoung 
   1389   1.1    dyoung 	/* Obtain a free one.
   1390   1.1    dyoung 	 */
   1391   1.1    dyoung 	while (!ctl->nfree) {
   1392   1.1    dyoung 		vtw_age(ctl, 0);
   1393   1.1    dyoung 
   1394   1.1    dyoung 		if (++stuck > avail) {
   1395   1.1    dyoung 			/* When in transition between
   1396   1.1    dyoung 			 * schemes (classless, classed) we
   1397   1.1    dyoung 			 * can be stuck having to await the
   1398   1.1    dyoung 			 * expiration of cross-allocated entries.
   1399   1.1    dyoung 			 *
   1400   1.1    dyoung 			 * Returning zero means we will fall back to the
   1401   1.1    dyoung 			 * traditional TIME_WAIT handling, except in the
   1402   1.1    dyoung 			 * case of a re-shed, in which case we cannot
   1403   1.1    dyoung 			 * perform the reshecd, but will retain the extant
   1404   1.1    dyoung 			 * entry.
   1405   1.1    dyoung 			 */
   1406   1.1    dyoung 			db_trace(KTR_VTW
   1407   1.1    dyoung 				 , (ctl, "vtw:!none free in class %x %x/%x"
   1408   1.1    dyoung 				    , ctl->clidx
   1409   1.1    dyoung 				    , ctl->nalloc, ctl->nfree));
   1410   1.1    dyoung 
   1411   1.1    dyoung 			return 0;
   1412   1.1    dyoung 		}
   1413   1.1    dyoung 	}
   1414   1.1    dyoung 
   1415   1.1    dyoung 	vtw = ctl->alloc.v;
   1416   1.1    dyoung 
   1417   1.1    dyoung 	if (vtw->msl_class != ctl->clidx) {
   1418   1.1    dyoung 		/* Usurping rules:
   1419   1.1    dyoung 		 * 	0 -> {1,2,3} or {1,2,3} -> 0
   1420   1.1    dyoung 		 */
   1421   1.1    dyoung 		KASSERT(!vtw->msl_class || !ctl->clidx);
   1422   1.1    dyoung 
   1423   1.1    dyoung 		if (vtw->hashed || vtw->expire.tv_sec) {
   1424   1.1    dyoung 		    /* As this is owned by some other class,
   1425   1.1    dyoung 		     * we must wait for it to expire it.
   1426   1.1    dyoung 		     * This will only happen on class/classless
   1427   1.1    dyoung 		     * transitions, which are guaranteed to progress
   1428   1.1    dyoung 		     * to completion in small finite time, barring bugs.
   1429   1.1    dyoung 		     */
   1430   1.1    dyoung 		    db_trace(KTR_VTW
   1431   1.1    dyoung 			     , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
   1432   1.1    dyoung 				, vtw, vtw->msl_class, ctl->clidx
   1433   1.1    dyoung 				, vtw->expire.tv_sec
   1434   1.1    dyoung 				, vtw->expire.tv_usec
   1435   1.1    dyoung 				, vtw->hashed ? " hashed" : ""));
   1436   1.1    dyoung 
   1437   1.1    dyoung 		    return 0;
   1438   1.1    dyoung 		}
   1439   1.1    dyoung 
   1440   1.1    dyoung 		db_trace(KTR_VTW
   1441   1.1    dyoung 			 , (ctl, "vtw:!%p usurped from %x to %x"
   1442   1.1    dyoung 			    , vtw, vtw->msl_class, ctl->clidx));
   1443   1.1    dyoung 
   1444   1.1    dyoung 		vtw->msl_class = ctl->clidx;
   1445   1.1    dyoung 	}
   1446   1.1    dyoung 
   1447   1.1    dyoung 	if (vtw_alive(vtw)) {
   1448   1.1    dyoung 		KASSERT(0 && "next free not free");
   1449   1.1    dyoung 		return 0;
   1450   1.1    dyoung 	}
   1451   1.1    dyoung 
   1452   1.1    dyoung 	/* Advance allocation poiter.
   1453   1.1    dyoung 	 */
   1454   1.1    dyoung 	ctl->alloc.v = vtw_next(ctl, vtw);
   1455   1.1    dyoung 
   1456   1.1    dyoung 	--ctl->nfree;
   1457   1.1    dyoung 	++ctl->nalloc;
   1458   1.1    dyoung 
   1459   1.1    dyoung 	msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ;	// msec
   1460   1.1    dyoung 
   1461   1.1    dyoung 	/* mark expiration
   1462   1.1    dyoung 	 */
   1463   1.3  drochner 	getmicrouptime(&vtw->expire);
   1464   1.1    dyoung 
   1465   1.1    dyoung 	/* Move expiration into the future.
   1466   1.1    dyoung 	 */
   1467   1.1    dyoung 	vtw->expire.tv_sec  += msl / 1000;
   1468   1.1    dyoung 	vtw->expire.tv_usec += 1000 * (msl % 1000);
   1469   1.1    dyoung 
   1470   1.1    dyoung 	while (vtw->expire.tv_usec >= 1000*1000) {
   1471   1.1    dyoung 		vtw->expire.tv_usec -= 1000*1000;
   1472   1.1    dyoung 		vtw->expire.tv_sec  += 1;
   1473   1.1    dyoung 	}
   1474   1.1    dyoung 
   1475   1.1    dyoung 	if (!ctl->oldest.v)
   1476   1.1    dyoung 		ctl->oldest.v = vtw;
   1477   1.1    dyoung 
   1478   1.1    dyoung 	return vtw;
   1479   1.1    dyoung }
   1480   1.1    dyoung 
   1481   1.1    dyoung /*!\brief expiration
   1482   1.1    dyoung  */
   1483   1.1    dyoung static int
   1484   1.1    dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
   1485   1.1    dyoung {
   1486   1.1    dyoung 	vtw_t	*vtw;
   1487   1.1    dyoung 	struct timeval then, *when = _when;
   1488   1.1    dyoung 	int	maxtries = 0;
   1489   1.1    dyoung 
   1490   1.1    dyoung 	if (!ctl->oldest.v) {
   1491   1.1    dyoung 		KASSERT(!ctl->nalloc);
   1492   1.1    dyoung 		return 0;
   1493   1.1    dyoung 	}
   1494   1.1    dyoung 
   1495   1.1    dyoung 	for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
   1496   1.1    dyoung 		if (++maxtries > ctl->nalloc)
   1497   1.1    dyoung 			break;
   1498   1.1    dyoung 
   1499   1.1    dyoung 		if (vtw->msl_class != ctl->clidx) {
   1500   1.1    dyoung 			db_trace(KTR_VTW
   1501   1.1    dyoung 				 , (vtw, "vtw:!age class mismatch %x != %x"
   1502   1.1    dyoung 				    , vtw->msl_class, ctl->clidx));
   1503   1.1    dyoung 			/* XXXX
   1504   1.1    dyoung 			 * See if the appropriate action is to skip to the next.
   1505   1.1    dyoung 			 * XXXX
   1506   1.1    dyoung 			 */
   1507   1.1    dyoung 			ctl->oldest.v = vtw = vtw_next(ctl, vtw);
   1508   1.1    dyoung 			continue;
   1509   1.1    dyoung 		}
   1510   1.1    dyoung 		if (!when) {
   1511   1.1    dyoung 			/* Latch oldest timeval if none specified.
   1512   1.1    dyoung 			 */
   1513   1.1    dyoung 			then = vtw->expire;
   1514   1.1    dyoung 			when = &then;
   1515   1.1    dyoung 		}
   1516   1.1    dyoung 
   1517   1.1    dyoung 		if (!timercmp(&vtw->expire, when, <=))
   1518   1.1    dyoung 			break;
   1519   1.1    dyoung 
   1520   1.1    dyoung 		db_trace(KTR_VTW
   1521   1.1    dyoung 			 , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
   1522   1.1    dyoung 			    , ctl->clidx
   1523   1.1    dyoung 			    , vtw->expire.tv_sec
   1524   1.1    dyoung 			    , vtw->expire.tv_usec
   1525   1.1    dyoung 			    , ctl->nalloc
   1526   1.1    dyoung 			    , ctl->nfree));
   1527   1.1    dyoung 
   1528   1.1    dyoung 		if (!_when)
   1529   1.1    dyoung 			++vtw_stats.kill;
   1530   1.1    dyoung 
   1531   1.1    dyoung 		vtw_del(ctl, vtw);
   1532   1.1    dyoung 		vtw = ctl->oldest.v;
   1533   1.1    dyoung 	}
   1534   1.1    dyoung 
   1535   1.1    dyoung 	return ctl->nalloc;	// # remaining allocated
   1536   1.1    dyoung }
   1537   1.1    dyoung 
   1538   1.1    dyoung static callout_t vtw_cs;
   1539   1.1    dyoung 
   1540   1.1    dyoung /*!\brief notice the passage of time.
   1541   1.1    dyoung  * It seems to be getting faster.  What happened to the year?
   1542   1.1    dyoung  */
   1543   1.1    dyoung static void
   1544   1.1    dyoung vtw_tick(void *arg)
   1545   1.1    dyoung {
   1546   1.1    dyoung 	struct timeval now;
   1547   1.1    dyoung 	int i, cnt = 0;
   1548   1.1    dyoung 
   1549   1.3  drochner 	getmicrouptime(&now);
   1550   1.1    dyoung 
   1551   1.1    dyoung 	db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
   1552   1.1    dyoung 			   , now.tv_sec, now.tv_usec));
   1553   1.1    dyoung 
   1554   1.1    dyoung 	mutex_enter(softnet_lock);
   1555   1.1    dyoung 
   1556   1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   1557   1.1    dyoung 		cnt += vtw_age(&vtw_tcpv4[i], &now);
   1558   1.1    dyoung 		cnt += vtw_age(&vtw_tcpv6[i], &now);
   1559   1.1    dyoung 	}
   1560   1.1    dyoung 
   1561   1.1    dyoung 	/* Keep ticks coming while we need them.
   1562   1.1    dyoung 	 */
   1563   1.1    dyoung 	if (cnt)
   1564   1.1    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1565   1.1    dyoung 	else {
   1566   1.1    dyoung 		tcp_vtw_was_enabled = 0;
   1567   1.1    dyoung 		tcbtable.vestige    = 0;
   1568   1.1    dyoung 	}
   1569   1.1    dyoung 	mutex_exit(softnet_lock);
   1570   1.1    dyoung }
   1571   1.1    dyoung 
   1572   1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1573   1.1    dyoung  */
   1574   1.1    dyoung static void *
   1575   1.1    dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
   1576   1.1    dyoung {
   1577   1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
   1578   1.1    dyoung 
   1579   1.1    dyoung 	bzero(it, sizeof (*it));
   1580   1.1    dyoung 
   1581   1.1    dyoung 	/* Note: the reference to vtw_tcpv4[0] is fine.
   1582   1.1    dyoung 	 * We do not need per-class iteration.  We just
   1583   1.1    dyoung 	 * need to get to the fat, and there is one
   1584   1.1    dyoung 	 * shared fat.
   1585   1.1    dyoung 	 */
   1586   1.1    dyoung 	if (vtw_tcpv4[0].fat) {
   1587   1.1    dyoung 		it->addr.v4 = addr;
   1588   1.1    dyoung 		it->port = port;
   1589   1.1    dyoung 		it->wild = !!wild;
   1590   1.1    dyoung 		it->ctl  = &vtw_tcpv4[0];
   1591   1.1    dyoung 
   1592   1.1    dyoung 		++vtw_stats.look[1];
   1593   1.1    dyoung 	}
   1594   1.1    dyoung 
   1595   1.1    dyoung 	return it;
   1596   1.1    dyoung }
   1597   1.1    dyoung 
   1598   1.1    dyoung /*!\brief export an IPv4 vtw.
   1599   1.1    dyoung  */
   1600   1.1    dyoung static int
   1601   1.1    dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1602   1.1    dyoung {
   1603   1.1    dyoung 	vtw_v4_t	*v4 = (void*)vtw;
   1604   1.1    dyoung 
   1605   1.1    dyoung 	bzero(res, sizeof (*res));
   1606   1.1    dyoung 
   1607   1.1    dyoung 	if (ctl && vtw) {
   1608   1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1609   1.1    dyoung 			ctl += vtw->msl_class;
   1610   1.1    dyoung 		else
   1611   1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1612   1.1    dyoung 
   1613   1.1    dyoung 		res->valid = 1;
   1614   1.1    dyoung 		res->v4    = 1;
   1615   1.1    dyoung 
   1616   1.1    dyoung 		res->faddr.v4.s_addr = v4->faddr;
   1617   1.1    dyoung 		res->laddr.v4.s_addr = v4->laddr;
   1618   1.1    dyoung 		res->fport	= v4->fport;
   1619   1.1    dyoung 		res->lport	= v4->lport;
   1620   1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1621   1.1    dyoung 		res->ctl	= ctl;
   1622   1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1623   1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1624   1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1625   1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1626   1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1627   1.1    dyoung 		res->uid	= vtw->uid;
   1628   1.1    dyoung 	}
   1629   1.1    dyoung 
   1630   1.1    dyoung 	return res->valid;
   1631   1.1    dyoung }
   1632   1.1    dyoung 
   1633   1.1    dyoung /*!\brief return next port in the port iterator.  yowza.
   1634   1.1    dyoung  */
   1635   1.1    dyoung static int
   1636   1.1    dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
   1637   1.1    dyoung {
   1638   1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1639   1.1    dyoung 	vtw_t		*vtw = 0;
   1640   1.1    dyoung 
   1641   1.1    dyoung 	if (it->ctl)
   1642   1.1    dyoung 		vtw = vtw_next_port_v4(it);
   1643   1.1    dyoung 
   1644   1.1    dyoung 	if (!vtw)
   1645   1.1    dyoung 		it->ctl = 0;
   1646   1.1    dyoung 
   1647   1.1    dyoung 	return vtw_export_v4(it->ctl, vtw, res);
   1648   1.1    dyoung }
   1649   1.1    dyoung 
   1650   1.1    dyoung static int
   1651   1.1    dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
   1652   1.1    dyoung               struct in_addr laddr, uint16_t lport,
   1653   1.1    dyoung 	      struct vestigial_inpcb *res)
   1654   1.1    dyoung {
   1655   1.1    dyoung 	vtw_t		*vtw;
   1656   1.1    dyoung 	vtw_ctl_t	*ctl;
   1657   1.1    dyoung 
   1658   1.1    dyoung 
   1659   1.1    dyoung 	db_trace(KTR_VTW
   1660   1.1    dyoung 		 , (res, "vtw: lookup %A:%P %A:%P"
   1661   1.1    dyoung 		    , faddr, fport
   1662   1.1    dyoung 		    , laddr, lport));
   1663   1.1    dyoung 
   1664   1.1    dyoung 	vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
   1665   1.1    dyoung 				 , faddr.s_addr, fport
   1666   1.1    dyoung 				 , laddr.s_addr, lport, 0);
   1667   1.1    dyoung 
   1668   1.1    dyoung 	return vtw_export_v4(ctl, vtw, res);
   1669   1.1    dyoung }
   1670   1.1    dyoung 
   1671   1.1    dyoung /* in_pcblookup_ports assist for handling vestigial entries.
   1672   1.1    dyoung  */
   1673   1.1    dyoung static void *
   1674   1.1    dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
   1675   1.1    dyoung {
   1676   1.1    dyoung 	struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
   1677   1.1    dyoung 
   1678   1.1    dyoung 	bzero(it, sizeof (*it));
   1679   1.1    dyoung 
   1680   1.1    dyoung 	/* Note: the reference to vtw_tcpv6[0] is fine.
   1681   1.1    dyoung 	 * We do not need per-class iteration.  We just
   1682   1.1    dyoung 	 * need to get to the fat, and there is one
   1683   1.1    dyoung 	 * shared fat.
   1684   1.1    dyoung 	 */
   1685   1.1    dyoung 	if (vtw_tcpv6[0].fat) {
   1686   1.1    dyoung 		it->addr.v6 = *addr;
   1687   1.1    dyoung 		it->port = port;
   1688   1.1    dyoung 		it->wild = !!wild;
   1689   1.1    dyoung 		it->ctl  = &vtw_tcpv6[0];
   1690   1.1    dyoung 
   1691   1.1    dyoung 		++vtw_stats.look[1];
   1692   1.1    dyoung 	}
   1693   1.1    dyoung 
   1694   1.1    dyoung 	return it;
   1695   1.1    dyoung }
   1696   1.1    dyoung 
   1697   1.1    dyoung /*!\brief export an IPv6 vtw.
   1698   1.1    dyoung  */
   1699   1.1    dyoung static int
   1700   1.1    dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
   1701   1.1    dyoung {
   1702   1.1    dyoung 	vtw_v6_t	*v6 = (void*)vtw;
   1703   1.1    dyoung 
   1704   1.1    dyoung 	bzero(res, sizeof (*res));
   1705   1.1    dyoung 
   1706   1.1    dyoung 	if (ctl && vtw) {
   1707   1.1    dyoung 		if (!ctl->clidx && vtw->msl_class)
   1708   1.1    dyoung 			ctl += vtw->msl_class;
   1709   1.1    dyoung 		else
   1710   1.1    dyoung 			KASSERT(ctl->clidx == vtw->msl_class);
   1711   1.1    dyoung 
   1712   1.1    dyoung 		res->valid = 1;
   1713   1.1    dyoung 		res->v4    = 0;
   1714   1.1    dyoung 
   1715   1.1    dyoung 		res->faddr.v6	= v6->faddr;
   1716   1.1    dyoung 		res->laddr.v6	= v6->laddr;
   1717   1.1    dyoung 		res->fport	= v6->fport;
   1718   1.1    dyoung 		res->lport	= v6->lport;
   1719   1.1    dyoung 		res->vtw	= vtw;		// netlock held over call(s)
   1720   1.1    dyoung 		res->ctl	= ctl;
   1721   1.1    dyoung 
   1722   1.1    dyoung 		res->v6only	= vtw->v6only;
   1723   1.1    dyoung 		res->reuse_addr = vtw->reuse_addr;
   1724   1.1    dyoung 		res->reuse_port = vtw->reuse_port;
   1725   1.1    dyoung 
   1726   1.1    dyoung 		res->snd_nxt    = vtw->snd_nxt;
   1727   1.1    dyoung 		res->rcv_nxt	= vtw->rcv_nxt;
   1728   1.1    dyoung 		res->rcv_wnd	= vtw->rcv_wnd;
   1729   1.1    dyoung 		res->uid	= vtw->uid;
   1730   1.1    dyoung 	}
   1731   1.1    dyoung 
   1732   1.1    dyoung 	return res->valid;
   1733   1.1    dyoung }
   1734   1.1    dyoung 
   1735   1.1    dyoung static int
   1736   1.1    dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
   1737   1.1    dyoung {
   1738   1.1    dyoung 	struct tcp_ports_iterator *it = arg;
   1739   1.1    dyoung 	vtw_t		*vtw = 0;
   1740   1.1    dyoung 
   1741   1.1    dyoung 	if (it->ctl)
   1742   1.1    dyoung 		vtw = vtw_next_port_v6(it);
   1743   1.1    dyoung 
   1744   1.1    dyoung 	if (!vtw)
   1745   1.1    dyoung 		it->ctl = 0;
   1746   1.1    dyoung 
   1747   1.1    dyoung 	return vtw_export_v6(it->ctl, vtw, res);
   1748   1.1    dyoung }
   1749   1.1    dyoung 
   1750   1.1    dyoung static int
   1751   1.1    dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
   1752   1.1    dyoung               const struct in6_addr *laddr, uint16_t lport,
   1753   1.1    dyoung 	      struct vestigial_inpcb *res)
   1754   1.1    dyoung {
   1755   1.1    dyoung 	vtw_ctl_t	*ctl;
   1756   1.1    dyoung 	vtw_t		*vtw;
   1757   1.1    dyoung 
   1758   1.1    dyoung 	db_trace(KTR_VTW
   1759   1.1    dyoung 		 , (res, "vtw: lookup %6A:%P %6A:%P"
   1760   1.1    dyoung 		    , db_store(faddr, sizeof (*faddr)), fport
   1761   1.1    dyoung 		    , db_store(laddr, sizeof (*laddr)), lport));
   1762   1.1    dyoung 
   1763   1.1    dyoung 	vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
   1764   1.1    dyoung 				 , faddr, fport
   1765   1.1    dyoung 				 , laddr, lport, 0);
   1766   1.1    dyoung 
   1767   1.1    dyoung 	return vtw_export_v6(ctl, vtw, res);
   1768   1.1    dyoung }
   1769   1.1    dyoung 
   1770   1.1    dyoung static vestigial_hooks_t tcp_hooks = {
   1771   1.1    dyoung 	.init_ports4	= tcp_init_ports_v4,
   1772   1.1    dyoung 	.next_port4	= tcp_next_port_v4,
   1773   1.1    dyoung 	.lookup4	= tcp_lookup_v4,
   1774   1.1    dyoung 	.init_ports6	= tcp_init_ports_v6,
   1775   1.1    dyoung 	.next_port6	= tcp_next_port_v6,
   1776   1.1    dyoung 	.lookup6	= tcp_lookup_v6,
   1777   1.1    dyoung };
   1778   1.1    dyoung 
   1779   1.1    dyoung static bool
   1780   1.1    dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
   1781   1.1    dyoung {
   1782   1.1    dyoung 	fatp_ctl_t	*fat;
   1783   1.1    dyoung 	vtw_ctl_t	*ctl;
   1784   1.1    dyoung 
   1785   1.1    dyoung 	switch (af) {
   1786   1.1    dyoung 	case AF_INET:
   1787   1.1    dyoung 		fat = &fat_tcpv4;
   1788   1.1    dyoung 		ctl = &vtw_tcpv4[0];
   1789   1.1    dyoung 		break;
   1790   1.1    dyoung 	case AF_INET6:
   1791   1.1    dyoung 		fat = &fat_tcpv6;
   1792   1.1    dyoung 		ctl = &vtw_tcpv6[0];
   1793   1.1    dyoung 		break;
   1794   1.1    dyoung 	default:
   1795   1.1    dyoung 		return false;
   1796   1.1    dyoung 	}
   1797   1.1    dyoung 	if (fatp != NULL)
   1798   1.1    dyoung 		*fatp = fat;
   1799   1.1    dyoung 	if (ctlp != NULL)
   1800   1.1    dyoung 		*ctlp = ctl;
   1801   1.1    dyoung 	return true;
   1802   1.1    dyoung }
   1803   1.1    dyoung 
   1804   1.1    dyoung /*!\brief	initialize controlling instance
   1805   1.1    dyoung  */
   1806   1.1    dyoung static int
   1807   1.1    dyoung vtw_control_init(int af)
   1808   1.1    dyoung {
   1809   1.1    dyoung 	fatp_ctl_t	*fat;
   1810   1.1    dyoung 	vtw_ctl_t	*ctl;
   1811   1.6    dyoung 	fatp_t		*fat_base;
   1812   1.6    dyoung 	fatp_t		**fat_hash;
   1813   1.6    dyoung 	vtw_t		*ctl_base_v;
   1814   1.6    dyoung 	uint32_t	n, m;
   1815   1.6    dyoung 	size_t sz;
   1816   1.6    dyoung 
   1817   1.6    dyoung 	KASSERT(powerof2(tcp_vtw_entries));
   1818   1.1    dyoung 
   1819   1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1820   1.1    dyoung 		return EAFNOSUPPORT;
   1821   1.1    dyoung 
   1822   1.6    dyoung 	if (fat->hash != NULL) {
   1823   1.6    dyoung 		KASSERT(fat->base != NULL && ctl->base.v != NULL);
   1824   1.6    dyoung 		return 0;
   1825   1.6    dyoung 	}
   1826   1.6    dyoung 
   1827   1.6    dyoung 	/* Allocate 10% more capacity in the fat pointers.
   1828   1.6    dyoung 	 * We should only need ~#hash additional based on
   1829   1.6    dyoung 	 * how they age, but TIME_WAIT assassination could cause
   1830   1.6    dyoung 	 * sparse fat pointer utilisation.
   1831   1.6    dyoung 	 */
   1832   1.6    dyoung 	m = 512;
   1833   1.6    dyoung 	n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
   1834   1.6    dyoung 	sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
   1835   1.6    dyoung 
   1836   1.6    dyoung 	fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
   1837   1.6    dyoung 
   1838   1.6    dyoung 	if (fat_hash == NULL) {
   1839   1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1840   1.6    dyoung 		    "hash anchors", __func__, 2*m * sizeof(fatp_t *));
   1841   1.6    dyoung 		return ENOMEM;
   1842   1.6    dyoung 	}
   1843   1.1    dyoung 
   1844   1.6    dyoung 	fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
   1845   1.1    dyoung 
   1846   1.6    dyoung 	if (fat_base == NULL) {
   1847   1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1848   1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1849   1.6    dyoung 		    "fatp_t array", __func__, 2*n * sizeof(fatp_t));
   1850   1.6    dyoung 		return ENOMEM;
   1851   1.6    dyoung 	}
   1852   1.1    dyoung 
   1853   1.6    dyoung 	ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
   1854   1.1    dyoung 
   1855   1.6    dyoung 	if (ctl_base_v == NULL) {
   1856   1.6    dyoung 		kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
   1857   1.6    dyoung 		kmem_free(fat_base, 2*n * sizeof(fatp_t));
   1858   1.6    dyoung 		printf("%s: could not allocate %zu bytes for "
   1859   1.6    dyoung 		    "vtw_t array", __func__, tcp_vtw_entries * sz);
   1860   1.6    dyoung 		return ENOMEM;
   1861   1.1    dyoung 	}
   1862   1.1    dyoung 
   1863   1.6    dyoung 	fatp_init(fat, n, m, fat_base, fat_hash);
   1864   1.1    dyoung 
   1865   1.6    dyoung 	vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
   1866   1.1    dyoung 
   1867   1.1    dyoung 	return 0;
   1868   1.1    dyoung }
   1869   1.1    dyoung 
   1870   1.1    dyoung /*!\brief	select controlling instance
   1871   1.1    dyoung  */
   1872   1.1    dyoung static vtw_ctl_t *
   1873   1.1    dyoung vtw_control(int af, uint32_t msl)
   1874   1.1    dyoung {
   1875   1.1    dyoung 	fatp_ctl_t	*fat;
   1876   1.1    dyoung 	vtw_ctl_t	*ctl;
   1877  1.11      matt 	int		msl_class = msl_to_class(msl);
   1878   1.1    dyoung 
   1879   1.1    dyoung 	if (!vtw_select(af, &fat, &ctl))
   1880   1.1    dyoung 		return NULL;
   1881   1.1    dyoung 
   1882   1.1    dyoung 	if (!fat->base || !ctl->base.v)
   1883   1.1    dyoung 		return NULL;
   1884   1.1    dyoung 
   1885   1.5    dyoung 	if (!tcp_vtw_was_enabled) {
   1886   1.5    dyoung 		/* This guarantees is timer ticks until we no longer need them.
   1887   1.5    dyoung 		 */
   1888   1.5    dyoung 		tcp_vtw_was_enabled = 1;
   1889   1.5    dyoung 
   1890   1.5    dyoung 		callout_schedule(&vtw_cs, hz / 5);
   1891   1.5    dyoung 
   1892   1.5    dyoung 		tcbtable.vestige = &tcp_hooks;
   1893   1.5    dyoung 	}
   1894   1.5    dyoung 
   1895  1.11      matt 	return ctl + msl_class;
   1896   1.1    dyoung }
   1897   1.1    dyoung 
   1898   1.1    dyoung /*!\brief	add TCP pcb to vestigial timewait
   1899   1.1    dyoung  */
   1900   1.1    dyoung int
   1901   1.1    dyoung vtw_add(int af, struct tcpcb *tp)
   1902   1.1    dyoung {
   1903  1.10    martin #ifdef VTW_DEBUG
   1904   1.1    dyoung 	int		enable;
   1905  1.10    martin #endif
   1906   1.1    dyoung 	vtw_ctl_t	*ctl;
   1907   1.1    dyoung 	vtw_t		*vtw;
   1908   1.1    dyoung 
   1909   1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   1910   1.1    dyoung 
   1911   1.1    dyoung 	ctl = vtw_control(af, tp->t_msl);
   1912   1.1    dyoung 	if (!ctl)
   1913   1.1    dyoung 		return 0;
   1914   1.1    dyoung 
   1915  1.10    martin #ifdef VTW_DEBUG
   1916   1.1    dyoung 	enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
   1917  1.10    martin #endif
   1918   1.1    dyoung 
   1919   1.1    dyoung 	vtw = vtw_alloc(ctl);
   1920   1.1    dyoung 
   1921   1.1    dyoung 	if (vtw) {
   1922   1.1    dyoung 		vtw->snd_nxt = tp->snd_nxt;
   1923   1.1    dyoung 		vtw->rcv_nxt = tp->rcv_nxt;
   1924   1.1    dyoung 
   1925   1.1    dyoung 		switch (af) {
   1926   1.1    dyoung 		case AF_INET: {
   1927   1.1    dyoung 			struct inpcb	*inp = tp->t_inpcb;
   1928   1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   1929   1.1    dyoung 
   1930   1.1    dyoung 			v4->faddr = inp->inp_faddr.s_addr;
   1931   1.1    dyoung 			v4->laddr = inp->inp_laddr.s_addr;
   1932   1.1    dyoung 			v4->fport = inp->inp_fport;
   1933   1.1    dyoung 			v4->lport = inp->inp_lport;
   1934   1.1    dyoung 
   1935   1.1    dyoung 			vtw->reuse_port = !!(inp->inp_socket->so_options
   1936   1.1    dyoung 					     & SO_REUSEPORT);
   1937   1.1    dyoung 			vtw->reuse_addr = !!(inp->inp_socket->so_options
   1938   1.1    dyoung 					     & SO_REUSEADDR);
   1939   1.1    dyoung 			vtw->v6only	= 0;
   1940   1.1    dyoung 			vtw->uid	= inp->inp_socket->so_uidinfo->ui_uid;
   1941   1.1    dyoung 
   1942   1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   1943   1.1    dyoung 
   1944   1.1    dyoung 
   1945   1.1    dyoung #ifdef VTW_DEBUG
   1946   1.1    dyoung 			/* Immediate lookup (connected and port) to
   1947   1.1    dyoung 			 * ensure at least that works!
   1948   1.1    dyoung 			 */
   1949   1.1    dyoung 			if (enable & 4) {
   1950   1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1951   1.1    dyoung 					(ctl
   1952   1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1953   1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1954   1.1    dyoung 					 , 0)
   1955   1.1    dyoung 					== vtw);
   1956   1.1    dyoung 				KASSERT(vtw_lookup_hash_v4
   1957   1.1    dyoung 					(ctl
   1958   1.1    dyoung 					 , inp->inp_faddr.s_addr, inp->inp_fport
   1959   1.1    dyoung 					 , inp->inp_laddr.s_addr, inp->inp_lport
   1960   1.1    dyoung 					 , 1));
   1961   1.1    dyoung 			}
   1962   1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   1963   1.1    dyoung 			 */
   1964   1.1    dyoung 			if (enable & 8) {
   1965   1.1    dyoung 				struct tcp_ports_iterator *it;
   1966   1.1    dyoung 				struct vestigial_inpcb res;
   1967   1.1    dyoung 				int cnt = 0;
   1968   1.1    dyoung 
   1969   1.1    dyoung 				it = tcp_init_ports_v4(inp->inp_laddr
   1970   1.1    dyoung 						       , inp->inp_lport, 0);
   1971   1.1    dyoung 
   1972   1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1973   1.1    dyoung 					++cnt;
   1974   1.1    dyoung 				}
   1975   1.1    dyoung 				KASSERT(cnt);
   1976   1.1    dyoung 			}
   1977   1.1    dyoung 			/* Immediate port iterator functionality check: wild
   1978   1.1    dyoung 			 */
   1979   1.1    dyoung 			if (enable & 16) {
   1980   1.1    dyoung 				struct tcp_ports_iterator *it;
   1981   1.1    dyoung 				struct vestigial_inpcb res;
   1982   1.1    dyoung 				struct in_addr any;
   1983   1.1    dyoung 				int cnt = 0;
   1984   1.1    dyoung 
   1985   1.1    dyoung 				any.s_addr = htonl(INADDR_ANY);
   1986   1.1    dyoung 
   1987   1.1    dyoung 				it = tcp_init_ports_v4(any, inp->inp_lport, 1);
   1988   1.1    dyoung 
   1989   1.1    dyoung 				while (tcp_next_port_v4(it, &res)) {
   1990   1.1    dyoung 					++cnt;
   1991   1.1    dyoung 				}
   1992   1.1    dyoung 				KASSERT(cnt);
   1993   1.1    dyoung 			}
   1994   1.1    dyoung #endif /* VTW_DEBUG */
   1995   1.1    dyoung 			break;
   1996   1.1    dyoung 		}
   1997   1.1    dyoung 
   1998   1.1    dyoung 		case AF_INET6: {
   1999   1.1    dyoung 			struct in6pcb	*inp = tp->t_in6pcb;
   2000   1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   2001   1.1    dyoung 
   2002   1.1    dyoung 			v6->faddr = inp->in6p_faddr;
   2003   1.1    dyoung 			v6->laddr = inp->in6p_laddr;
   2004   1.1    dyoung 			v6->fport = inp->in6p_fport;
   2005   1.1    dyoung 			v6->lport = inp->in6p_lport;
   2006   1.1    dyoung 
   2007   1.1    dyoung 			vtw->reuse_port = !!(inp->in6p_socket->so_options
   2008   1.1    dyoung 					     & SO_REUSEPORT);
   2009   1.1    dyoung 			vtw->reuse_addr = !!(inp->in6p_socket->so_options
   2010   1.1    dyoung 					     & SO_REUSEADDR);
   2011   1.1    dyoung 			vtw->v6only	= !!(inp->in6p_flags
   2012   1.1    dyoung 					     & IN6P_IPV6_V6ONLY);
   2013   1.1    dyoung 			vtw->uid	= inp->in6p_socket->so_uidinfo->ui_uid;
   2014   1.1    dyoung 
   2015   1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   2016   1.1    dyoung #ifdef VTW_DEBUG
   2017   1.1    dyoung 			/* Immediate lookup (connected and port) to
   2018   1.1    dyoung 			 * ensure at least that works!
   2019   1.1    dyoung 			 */
   2020   1.1    dyoung 			if (enable & 4) {
   2021   1.1    dyoung 				KASSERT(vtw_lookup_hash_v6(ctl
   2022   1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   2023   1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   2024   1.1    dyoung 					 , 0)
   2025   1.1    dyoung 					== vtw);
   2026   1.1    dyoung 				KASSERT(vtw_lookup_hash_v6
   2027   1.1    dyoung 					(ctl
   2028   1.1    dyoung 					 , &inp->in6p_faddr, inp->in6p_fport
   2029   1.1    dyoung 					 , &inp->in6p_laddr, inp->in6p_lport
   2030   1.1    dyoung 					 , 1));
   2031   1.1    dyoung 			}
   2032   1.1    dyoung 			/* Immediate port iterator functionality check: not wild
   2033   1.1    dyoung 			 */
   2034   1.1    dyoung 			if (enable & 8) {
   2035   1.1    dyoung 				struct tcp_ports_iterator *it;
   2036   1.1    dyoung 				struct vestigial_inpcb res;
   2037   1.1    dyoung 				int cnt = 0;
   2038   1.1    dyoung 
   2039   1.1    dyoung 				it = tcp_init_ports_v6(&inp->in6p_laddr
   2040   1.1    dyoung 						       , inp->in6p_lport, 0);
   2041   1.1    dyoung 
   2042   1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   2043   1.1    dyoung 					++cnt;
   2044   1.1    dyoung 				}
   2045   1.1    dyoung 				KASSERT(cnt);
   2046   1.1    dyoung 			}
   2047   1.1    dyoung 			/* Immediate port iterator functionality check: wild
   2048   1.1    dyoung 			 */
   2049   1.1    dyoung 			if (enable & 16) {
   2050   1.1    dyoung 				struct tcp_ports_iterator *it;
   2051   1.1    dyoung 				struct vestigial_inpcb res;
   2052   1.1    dyoung 				static struct in6_addr any = IN6ADDR_ANY_INIT;
   2053   1.1    dyoung 				int cnt = 0;
   2054   1.1    dyoung 
   2055   1.1    dyoung 				it = tcp_init_ports_v6(&any
   2056   1.1    dyoung 						       , inp->in6p_lport, 1);
   2057   1.1    dyoung 
   2058   1.1    dyoung 				while (tcp_next_port_v6(it, &res)) {
   2059   1.1    dyoung 					++cnt;
   2060   1.1    dyoung 				}
   2061   1.1    dyoung 				KASSERT(cnt);
   2062   1.1    dyoung 			}
   2063   1.1    dyoung #endif /* VTW_DEBUG */
   2064   1.1    dyoung 			break;
   2065   1.1    dyoung 		}
   2066   1.1    dyoung 		}
   2067   1.1    dyoung 
   2068   1.1    dyoung 		tcp_canceltimers(tp);
   2069   1.1    dyoung 		tp = tcp_close(tp);
   2070   1.1    dyoung 		KASSERT(!tp);
   2071   1.1    dyoung 
   2072   1.1    dyoung 		return 1;
   2073   1.1    dyoung 	}
   2074   1.1    dyoung 
   2075   1.1    dyoung 	return 0;
   2076   1.1    dyoung }
   2077   1.1    dyoung 
   2078   1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2079   1.1    dyoung  */
   2080   1.1    dyoung static void
   2081   1.1    dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
   2082   1.1    dyoung {
   2083   1.1    dyoung 	vtw_v4_t	copy = *(vtw_v4_t*)vp->vtw;
   2084   1.1    dyoung 	vtw_t		*vtw;
   2085   1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2086   1.1    dyoung 	vtw_ctl_t	*ctl;
   2087   1.1    dyoung 
   2088   1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2089   1.1    dyoung 
   2090   1.1    dyoung 	db_trace(KTR_VTW
   2091   1.1    dyoung 		 , (vp->vtw, "vtw: restart %A:%P %A:%P"
   2092   1.1    dyoung 		    , vp->faddr.v4.s_addr, vp->fport
   2093   1.1    dyoung 		    , vp->laddr.v4.s_addr, vp->lport));
   2094   1.1    dyoung 
   2095   1.1    dyoung 	/* Class might have changed, so have a squiz.
   2096   1.1    dyoung 	 */
   2097   1.1    dyoung 	ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
   2098   1.1    dyoung 	vtw = vtw_alloc(ctl);
   2099   1.1    dyoung 
   2100   1.1    dyoung 	if (vtw) {
   2101   1.1    dyoung 		vtw_v4_t	*v4  = (void*)vtw;
   2102   1.1    dyoung 
   2103   1.1    dyoung 		/* Safe now to unhash the old entry
   2104   1.1    dyoung 		 */
   2105   1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2106   1.1    dyoung 
   2107   1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2108   1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2109   1.1    dyoung 
   2110   1.1    dyoung 		v4->faddr = copy.faddr;
   2111   1.1    dyoung 		v4->laddr = copy.laddr;
   2112   1.1    dyoung 		v4->fport = copy.fport;
   2113   1.1    dyoung 		v4->lport = copy.lport;
   2114   1.1    dyoung 
   2115   1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2116   1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2117   1.1    dyoung 		vtw->v6only	= 0;
   2118   1.1    dyoung 		vtw->uid	= cp->uid;
   2119   1.1    dyoung 
   2120   1.1    dyoung 		vtw_inshash_v4(ctl, vtw);
   2121   1.1    dyoung 	}
   2122   1.1    dyoung 
   2123   1.1    dyoung 	vp->valid = 0;
   2124   1.1    dyoung }
   2125   1.1    dyoung 
   2126   1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2127   1.1    dyoung  */
   2128   1.1    dyoung static void
   2129   1.1    dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
   2130   1.1    dyoung {
   2131   1.1    dyoung 	vtw_v6_t	copy = *(vtw_v6_t*)vp->vtw;
   2132   1.1    dyoung 	vtw_t		*vtw;
   2133   1.1    dyoung 	vtw_t		*cp  = &copy.common;
   2134   1.1    dyoung 	vtw_ctl_t	*ctl;
   2135   1.1    dyoung 
   2136   1.1    dyoung 	KASSERT(mutex_owned(softnet_lock));
   2137   1.1    dyoung 
   2138   1.1    dyoung 	db_trace(KTR_VTW
   2139   1.1    dyoung 		 , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
   2140   1.1    dyoung 		    , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
   2141   1.1    dyoung 		    , vp->fport
   2142   1.1    dyoung 		    , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
   2143   1.1    dyoung 		    , vp->lport));
   2144   1.1    dyoung 
   2145   1.1    dyoung 	/* Class might have changed, so have a squiz.
   2146   1.1    dyoung 	 */
   2147   1.1    dyoung 	ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
   2148   1.1    dyoung 	vtw = vtw_alloc(ctl);
   2149   1.1    dyoung 
   2150   1.1    dyoung 	if (vtw) {
   2151   1.1    dyoung 		vtw_v6_t	*v6  = (void*)vtw;
   2152   1.1    dyoung 
   2153   1.1    dyoung 		/* Safe now to unhash the old entry
   2154   1.1    dyoung 		 */
   2155   1.1    dyoung 		vtw_del(vp->ctl, vp->vtw);
   2156   1.1    dyoung 
   2157   1.1    dyoung 		vtw->snd_nxt = cp->snd_nxt;
   2158   1.1    dyoung 		vtw->rcv_nxt = cp->rcv_nxt;
   2159   1.1    dyoung 
   2160   1.1    dyoung 		v6->faddr = copy.faddr;
   2161   1.1    dyoung 		v6->laddr = copy.laddr;
   2162   1.1    dyoung 		v6->fport = copy.fport;
   2163   1.1    dyoung 		v6->lport = copy.lport;
   2164   1.1    dyoung 
   2165   1.1    dyoung 		vtw->reuse_port = cp->reuse_port;
   2166   1.1    dyoung 		vtw->reuse_addr = cp->reuse_addr;
   2167   1.1    dyoung 		vtw->v6only	= cp->v6only;
   2168   1.1    dyoung 		vtw->uid	= cp->uid;
   2169   1.1    dyoung 
   2170   1.1    dyoung 		vtw_inshash_v6(ctl, vtw);
   2171   1.1    dyoung 	}
   2172   1.1    dyoung 
   2173   1.1    dyoung 	vp->valid = 0;
   2174   1.1    dyoung }
   2175   1.1    dyoung 
   2176   1.1    dyoung /*!\brief	restart timer for vestigial time-wait entry
   2177   1.1    dyoung  */
   2178   1.1    dyoung void
   2179   1.1    dyoung vtw_restart(vestigial_inpcb_t *vp)
   2180   1.1    dyoung {
   2181   1.1    dyoung 	if (!vp || !vp->valid)
   2182   1.1    dyoung 		return;
   2183   1.1    dyoung 
   2184   1.1    dyoung 	if (vp->v4)
   2185   1.1    dyoung 		vtw_restart_v4(vp);
   2186   1.1    dyoung 	else
   2187   1.1    dyoung 		vtw_restart_v6(vp);
   2188   1.1    dyoung }
   2189   1.1    dyoung 
   2190   1.1    dyoung int
   2191   1.7    dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
   2192   1.7    dyoung {
   2193   1.7    dyoung 	int en, rc;
   2194   1.7    dyoung 	struct sysctlnode node;
   2195   1.7    dyoung 
   2196   1.7    dyoung 	node = *rnode;
   2197   1.7    dyoung 	en = *(int *)rnode->sysctl_data;
   2198   1.7    dyoung 	node.sysctl_data = &en;
   2199   1.7    dyoung 
   2200   1.7    dyoung 	rc = sysctl_lookup(SYSCTLFN_CALL(&node));
   2201   1.7    dyoung 	if (rc != 0 || newp == NULL)
   2202   1.7    dyoung 		return rc;
   2203   1.7    dyoung 
   2204   1.7    dyoung 	if (rnode->sysctl_data != &tcp4_vtw_enable &&
   2205   1.7    dyoung 	    rnode->sysctl_data != &tcp6_vtw_enable)
   2206   1.7    dyoung 		rc = ENOENT;
   2207   1.7    dyoung 	else if ((en & 1) == 0)
   2208   1.7    dyoung 		rc = 0;
   2209   1.7    dyoung 	else if (rnode->sysctl_data == &tcp4_vtw_enable)
   2210   1.7    dyoung 		rc = vtw_control_init(AF_INET);
   2211   1.7    dyoung 	else /* rnode->sysctl_data == &tcp6_vtw_enable */
   2212   1.7    dyoung 		rc = vtw_control_init(AF_INET6);
   2213   1.7    dyoung 
   2214   1.7    dyoung 	if (rc == 0)
   2215   1.7    dyoung 		*(int *)rnode->sysctl_data = en;
   2216   1.7    dyoung 
   2217   1.7    dyoung 	return rc;
   2218   1.7    dyoung }
   2219   1.7    dyoung 
   2220   1.7    dyoung int
   2221   1.1    dyoung vtw_earlyinit(void)
   2222   1.1    dyoung {
   2223   1.5    dyoung 	int i, rc;
   2224   1.1    dyoung 
   2225   1.5    dyoung 	callout_init(&vtw_cs, 0);
   2226   1.5    dyoung 	callout_setfunc(&vtw_cs, vtw_tick, 0);
   2227   1.1    dyoung 
   2228   1.5    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2229   1.5    dyoung 		vtw_tcpv4[i].is_v4 = 1;
   2230   1.5    dyoung 		vtw_tcpv6[i].is_v6 = 1;
   2231   1.1    dyoung 	}
   2232   1.1    dyoung 
   2233   1.7    dyoung 	if ((tcp4_vtw_enable & 1) != 0 &&
   2234   1.7    dyoung 	    (rc = vtw_control_init(AF_INET)) != 0)
   2235   1.7    dyoung 		return rc;
   2236   1.7    dyoung 
   2237   1.7    dyoung 	if ((tcp6_vtw_enable & 1) != 0 &&
   2238   1.1    dyoung 	    (rc = vtw_control_init(AF_INET6)) != 0)
   2239   1.1    dyoung 		return rc;
   2240   1.1    dyoung 
   2241   1.1    dyoung 	return 0;
   2242   1.1    dyoung }
   2243   1.1    dyoung 
   2244   1.1    dyoung #ifdef VTW_DEBUG
   2245   1.1    dyoung #include <sys/syscallargs.h>
   2246   1.1    dyoung #include <sys/sysctl.h>
   2247   1.1    dyoung 
   2248   1.1    dyoung /*!\brief	add lalp, fafp entries for debug
   2249   1.1    dyoung  */
   2250   1.1    dyoung int
   2251  1.11      matt vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int msl_class)
   2252   1.1    dyoung {
   2253   1.1    dyoung 	vtw_ctl_t	*ctl;
   2254   1.1    dyoung 	vtw_t		*vtw;
   2255   1.1    dyoung 
   2256  1.11      matt 	ctl = vtw_control(af, msl ? msl : class_to_msl(msl_class));
   2257   1.1    dyoung 	if (!ctl)
   2258   1.1    dyoung 		return 0;
   2259   1.1    dyoung 
   2260   1.1    dyoung 	vtw = vtw_alloc(ctl);
   2261   1.1    dyoung 
   2262   1.1    dyoung 	if (vtw) {
   2263   1.1    dyoung 		vtw->snd_nxt = 0;
   2264   1.1    dyoung 		vtw->rcv_nxt = 0;
   2265   1.1    dyoung 
   2266   1.1    dyoung 		switch (af) {
   2267   1.1    dyoung 		case AF_INET: {
   2268   1.1    dyoung 			vtw_v4_t	*v4  = (void*)vtw;
   2269   1.1    dyoung 
   2270   1.1    dyoung 			v4->faddr = fa->sin_addr.v4.s_addr;
   2271   1.1    dyoung 			v4->laddr = la->sin_addr.v4.s_addr;
   2272   1.1    dyoung 			v4->fport = fa->sin_port;
   2273   1.1    dyoung 			v4->lport = la->sin_port;
   2274   1.1    dyoung 
   2275   1.1    dyoung 			vtw->reuse_port = 1;
   2276   1.1    dyoung 			vtw->reuse_addr = 1;
   2277   1.1    dyoung 			vtw->v6only	= 0;
   2278   1.1    dyoung 			vtw->uid	= 0;
   2279   1.1    dyoung 
   2280   1.1    dyoung 			vtw_inshash_v4(ctl, vtw);
   2281   1.1    dyoung 			break;
   2282   1.1    dyoung 		}
   2283   1.1    dyoung 
   2284   1.1    dyoung 		case AF_INET6: {
   2285   1.1    dyoung 			vtw_v6_t	*v6  = (void*)vtw;
   2286   1.1    dyoung 
   2287   1.1    dyoung 			v6->faddr = fa->sin_addr.v6;
   2288   1.1    dyoung 			v6->laddr = la->sin_addr.v6;
   2289   1.1    dyoung 
   2290   1.1    dyoung 			v6->fport = fa->sin_port;
   2291   1.1    dyoung 			v6->lport = la->sin_port;
   2292   1.1    dyoung 
   2293   1.1    dyoung 			vtw->reuse_port = 1;
   2294   1.1    dyoung 			vtw->reuse_addr = 1;
   2295   1.1    dyoung 			vtw->v6only	= 0;
   2296   1.1    dyoung 			vtw->uid	= 0;
   2297   1.1    dyoung 
   2298   1.1    dyoung 			vtw_inshash_v6(ctl, vtw);
   2299   1.1    dyoung 			break;
   2300   1.1    dyoung 		}
   2301   1.1    dyoung 
   2302   1.1    dyoung 		default:
   2303   1.1    dyoung 			break;
   2304   1.1    dyoung 		}
   2305   1.1    dyoung 
   2306   1.1    dyoung 		return 1;
   2307   1.1    dyoung 	}
   2308   1.1    dyoung 
   2309   1.1    dyoung 	return 0;
   2310   1.1    dyoung }
   2311   1.1    dyoung 
   2312   1.1    dyoung static int vtw_syscall = 0;
   2313   1.1    dyoung 
   2314   1.1    dyoung static int
   2315   1.1    dyoung vtw_debug_process(vtw_sysargs_t *ap)
   2316   1.1    dyoung {
   2317   1.1    dyoung 	struct vestigial_inpcb vestige;
   2318   1.1    dyoung 	int	rc = 0;
   2319   1.1    dyoung 
   2320   1.1    dyoung 	mutex_enter(softnet_lock);
   2321   1.1    dyoung 
   2322   1.1    dyoung 	switch (ap->op) {
   2323   1.1    dyoung 	case 0:		// insert
   2324   1.1    dyoung 		vtw_debug_add(ap->la.sin_family
   2325   1.1    dyoung 			      , &ap->la
   2326   1.1    dyoung 			      , &ap->fa
   2327   1.1    dyoung 			      , TCPTV_MSL
   2328   1.1    dyoung 			      , 0);
   2329   1.1    dyoung 		break;
   2330   1.1    dyoung 
   2331   1.1    dyoung 	case 1:		// lookup
   2332   1.1    dyoung 	case 2:		// restart
   2333   1.1    dyoung 		switch (ap->la.sin_family) {
   2334   1.1    dyoung 		case AF_INET:
   2335   1.1    dyoung 			if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
   2336   1.1    dyoung 					  ap->la.sin_addr.v4, ap->la.sin_port,
   2337   1.1    dyoung 					  &vestige)) {
   2338   1.1    dyoung 				if (ap->op == 2) {
   2339   1.1    dyoung 					vtw_restart(&vestige);
   2340   1.1    dyoung 				}
   2341   1.1    dyoung 				rc = 0;
   2342   1.1    dyoung 			} else
   2343   1.1    dyoung 				rc = ESRCH;
   2344   1.1    dyoung 			break;
   2345   1.1    dyoung 
   2346   1.1    dyoung 		case AF_INET6:
   2347   1.1    dyoung 			if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
   2348   1.1    dyoung 					  &ap->la.sin_addr.v6, ap->la.sin_port,
   2349   1.1    dyoung 					  &vestige)) {
   2350   1.1    dyoung 				if (ap->op == 2) {
   2351   1.1    dyoung 					vtw_restart(&vestige);
   2352   1.1    dyoung 				}
   2353   1.1    dyoung 				rc = 0;
   2354   1.1    dyoung 			} else
   2355   1.1    dyoung 				rc = ESRCH;
   2356   1.1    dyoung 			break;
   2357   1.1    dyoung 		default:
   2358   1.1    dyoung 			rc = EINVAL;
   2359   1.1    dyoung 		}
   2360   1.1    dyoung 		break;
   2361   1.1    dyoung 
   2362   1.1    dyoung 	default:
   2363   1.1    dyoung 		rc = EINVAL;
   2364   1.1    dyoung 	}
   2365   1.1    dyoung 
   2366   1.1    dyoung 	mutex_exit(softnet_lock);
   2367   1.1    dyoung 	return rc;
   2368   1.1    dyoung }
   2369   1.1    dyoung 
   2370   1.1    dyoung struct sys_vtw_args {
   2371   1.1    dyoung 	syscallarg(const vtw_sysargs_t *) req;
   2372   1.1    dyoung 	syscallarg(size_t) len;
   2373   1.1    dyoung };
   2374   1.1    dyoung 
   2375   1.1    dyoung static int
   2376   1.1    dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
   2377   1.1    dyoung {
   2378   1.1    dyoung 	const struct sys_vtw_args *uap = _;
   2379   1.1    dyoung 	void	*buf;
   2380   1.1    dyoung 	int	rc;
   2381   1.1    dyoung 	size_t	len	= SCARG(uap, len);
   2382   1.1    dyoung 
   2383   1.1    dyoung 	if (len != sizeof (vtw_sysargs_t))
   2384   1.1    dyoung 		return EINVAL;
   2385   1.1    dyoung 
   2386   1.1    dyoung 	buf = kmem_alloc(len, KM_SLEEP);
   2387   1.1    dyoung 	if (!buf)
   2388   1.1    dyoung 		return ENOMEM;
   2389   1.1    dyoung 
   2390   1.1    dyoung 	rc = copyin(SCARG(uap, req), buf, len);
   2391   1.1    dyoung 	if (!rc) {
   2392   1.1    dyoung 		rc = vtw_debug_process(buf);
   2393   1.1    dyoung 	}
   2394   1.1    dyoung 	kmem_free(buf, len);
   2395   1.1    dyoung 
   2396   1.1    dyoung 	return rc;
   2397   1.1    dyoung }
   2398   1.1    dyoung 
   2399   1.1    dyoung static void
   2400   1.1    dyoung vtw_sanity_check(void)
   2401   1.1    dyoung {
   2402   1.1    dyoung 	vtw_ctl_t	*ctl;
   2403   1.1    dyoung 	vtw_t		*vtw;
   2404   1.1    dyoung 	int		i;
   2405   1.1    dyoung 	int		n;
   2406   1.1    dyoung 
   2407   1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2408   1.1    dyoung 		ctl = &vtw_tcpv4[i];
   2409   1.1    dyoung 
   2410   1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2411   1.1    dyoung 			continue;
   2412   1.1    dyoung 
   2413   1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2414   1.1    dyoung 			++n;
   2415   1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2416   1.1    dyoung 			if (vtw == ctl->base.v)
   2417   1.1    dyoung 				break;
   2418   1.1    dyoung 		}
   2419   1.1    dyoung 		db_trace(KTR_VTW
   2420   1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2421   1.1    dyoung 			    , i, n, ctl->nfree));
   2422   1.1    dyoung 
   2423   1.1    dyoung 		KASSERT(n == ctl->nfree);
   2424   1.1    dyoung 	}
   2425   1.1    dyoung 
   2426   1.1    dyoung 	for (i = 0; i < VTW_NCLASS; ++i) {
   2427   1.1    dyoung 		ctl = &vtw_tcpv6[i];
   2428   1.1    dyoung 
   2429   1.1    dyoung 		if (!ctl->base.v || ctl->nalloc)
   2430   1.1    dyoung 			continue;
   2431   1.1    dyoung 
   2432   1.1    dyoung 		for (n = 0, vtw = ctl->base.v; ; ) {
   2433   1.1    dyoung 			++n;
   2434   1.1    dyoung 			vtw = vtw_next(ctl, vtw);
   2435   1.1    dyoung 			if (vtw == ctl->base.v)
   2436   1.1    dyoung 				break;
   2437   1.1    dyoung 		}
   2438   1.1    dyoung 		db_trace(KTR_VTW
   2439   1.1    dyoung 			 , (ctl, "sanity: class %x n %x nfree %x"
   2440   1.1    dyoung 			    , i, n, ctl->nfree));
   2441   1.1    dyoung 		KASSERT(n == ctl->nfree);
   2442   1.1    dyoung 	}
   2443   1.1    dyoung }
   2444   1.1    dyoung 
   2445   1.1    dyoung /*!\brief	Initialise debug support.
   2446   1.1    dyoung  */
   2447   1.1    dyoung static void
   2448   1.1    dyoung vtw_debug_init(void)
   2449   1.1    dyoung {
   2450   1.1    dyoung 	int	i;
   2451   1.1    dyoung 
   2452   1.1    dyoung 	vtw_sanity_check();
   2453   1.1    dyoung 
   2454   1.1    dyoung 	if (vtw_syscall)
   2455   1.1    dyoung 		return;
   2456   1.1    dyoung 
   2457   1.1    dyoung 	for (i = 511; i; --i) {
   2458   1.1    dyoung 		if (sysent[i].sy_call == sys_nosys) {
   2459   1.1    dyoung 			sysent[i].sy_call    = vtw_sys;
   2460   1.1    dyoung 			sysent[i].sy_narg    = 2;
   2461   1.1    dyoung 			sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
   2462   1.1    dyoung 			sysent[i].sy_flags   = 0;
   2463   1.1    dyoung 
   2464   1.1    dyoung 			vtw_syscall = i;
   2465   1.1    dyoung 			break;
   2466   1.1    dyoung 		}
   2467   1.1    dyoung 	}
   2468   1.1    dyoung 	if (i) {
   2469   1.1    dyoung 		const struct sysctlnode *node;
   2470   1.1    dyoung 		uint32_t	flags;
   2471   1.1    dyoung 
   2472   1.1    dyoung 		flags = sysctl_root.sysctl_flags;
   2473   1.1    dyoung 
   2474   1.1    dyoung 		sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
   2475   1.1    dyoung 		sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
   2476   1.1    dyoung 
   2477   1.1    dyoung 		sysctl_createv(0, 0, 0, &node,
   2478   1.1    dyoung 			       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2479   1.1    dyoung 			       "koff",
   2480   1.1    dyoung 			       SYSCTL_DESCR("Kernel Obscure Feature Finder"),
   2481   1.1    dyoung 			       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2482   1.1    dyoung 
   2483   1.1    dyoung 		if (!node) {
   2484   1.1    dyoung 			sysctl_createv(0, 0, 0, &node,
   2485   1.1    dyoung 				       CTLFLAG_PERMANENT, CTLTYPE_NODE,
   2486   1.1    dyoung 				       "koffka",
   2487   1.1    dyoung 				       SYSCTL_DESCR("The Real(tm) Kernel"
   2488   1.1    dyoung 						    " Obscure Feature Finder"),
   2489   1.1    dyoung 				       0, 0, 0, 0, CTL_CREATE, CTL_EOL);
   2490   1.1    dyoung 		}
   2491   1.1    dyoung 		if (node) {
   2492   1.1    dyoung 			sysctl_createv(0, 0, 0, 0,
   2493   1.1    dyoung 				       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
   2494   1.1    dyoung 				       CTLTYPE_INT, "vtw_debug_syscall",
   2495   1.1    dyoung 				       SYSCTL_DESCR("vtw debug"
   2496   1.1    dyoung 						    " system call number"),
   2497   1.1    dyoung 				       0, 0, &vtw_syscall, 0, node->sysctl_num,
   2498   1.1    dyoung 				       CTL_CREATE, CTL_EOL);
   2499   1.1    dyoung 		}
   2500   1.1    dyoung 		sysctl_root.sysctl_flags = flags;
   2501   1.1    dyoung 	}
   2502   1.1    dyoung }
   2503   1.1    dyoung #else /* !VTW_DEBUG */
   2504   1.1    dyoung static void
   2505   1.1    dyoung vtw_debug_init(void)
   2506   1.1    dyoung {
   2507   1.1    dyoung 	return;
   2508   1.1    dyoung }
   2509   1.1    dyoung #endif /* !VTW_DEBUG */
   2510