Home | History | Annotate | Line # | Download | only in netinet
      1  1.7  riastrad /*	$NetBSD: tcp_syncache.c,v 1.7 2024/06/29 12:59:08 riastradh Exp $	*/
      2  1.1     ozaki 
      3  1.1     ozaki /*
      4  1.1     ozaki  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  1.1     ozaki  * All rights reserved.
      6  1.1     ozaki  *
      7  1.1     ozaki  * Redistribution and use in source and binary forms, with or without
      8  1.1     ozaki  * modification, are permitted provided that the following conditions
      9  1.1     ozaki  * are met:
     10  1.1     ozaki  * 1. Redistributions of source code must retain the above copyright
     11  1.1     ozaki  *    notice, this list of conditions and the following disclaimer.
     12  1.1     ozaki  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.1     ozaki  *    notice, this list of conditions and the following disclaimer in the
     14  1.1     ozaki  *    documentation and/or other materials provided with the distribution.
     15  1.1     ozaki  * 3. Neither the name of the project nor the names of its contributors
     16  1.1     ozaki  *    may be used to endorse or promote products derived from this software
     17  1.1     ozaki  *    without specific prior written permission.
     18  1.1     ozaki  *
     19  1.1     ozaki  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  1.1     ozaki  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  1.1     ozaki  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  1.1     ozaki  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  1.1     ozaki  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  1.1     ozaki  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  1.1     ozaki  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  1.1     ozaki  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  1.1     ozaki  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  1.1     ozaki  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  1.1     ozaki  * SUCH DAMAGE.
     30  1.1     ozaki  */
     31  1.1     ozaki 
     32  1.1     ozaki /*
     33  1.1     ozaki  *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
     34  1.1     ozaki  *
     35  1.1     ozaki  * NRL grants permission for redistribution and use in source and binary
     36  1.1     ozaki  * forms, with or without modification, of the software and documentation
     37  1.1     ozaki  * created at NRL provided that the following conditions are met:
     38  1.1     ozaki  *
     39  1.1     ozaki  * 1. Redistributions of source code must retain the above copyright
     40  1.1     ozaki  *    notice, this list of conditions and the following disclaimer.
     41  1.1     ozaki  * 2. Redistributions in binary form must reproduce the above copyright
     42  1.1     ozaki  *    notice, this list of conditions and the following disclaimer in the
     43  1.1     ozaki  *    documentation and/or other materials provided with the distribution.
     44  1.1     ozaki  * 3. All advertising materials mentioning features or use of this software
     45  1.1     ozaki  *    must display the following acknowledgements:
     46  1.1     ozaki  *      This product includes software developed by the University of
     47  1.1     ozaki  *      California, Berkeley and its contributors.
     48  1.1     ozaki  *      This product includes software developed at the Information
     49  1.1     ozaki  *      Technology Division, US Naval Research Laboratory.
     50  1.1     ozaki  * 4. Neither the name of the NRL nor the names of its contributors
     51  1.1     ozaki  *    may be used to endorse or promote products derived from this software
     52  1.1     ozaki  *    without specific prior written permission.
     53  1.1     ozaki  *
     54  1.1     ozaki  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
     55  1.1     ozaki  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     56  1.1     ozaki  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
     57  1.1     ozaki  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
     58  1.1     ozaki  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     59  1.1     ozaki  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     60  1.1     ozaki  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     61  1.1     ozaki  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     62  1.1     ozaki  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     63  1.1     ozaki  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     64  1.1     ozaki  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     65  1.1     ozaki  *
     66  1.1     ozaki  * The views and conclusions contained in the software and documentation
     67  1.1     ozaki  * are those of the authors and should not be interpreted as representing
     68  1.1     ozaki  * official policies, either expressed or implied, of the US Naval
     69  1.1     ozaki  * Research Laboratory (NRL).
     70  1.1     ozaki  */
     71  1.1     ozaki 
     72  1.1     ozaki /*-
     73  1.1     ozaki  * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006,
     74  1.1     ozaki  * 2011 The NetBSD Foundation, Inc.
     75  1.1     ozaki  * All rights reserved.
     76  1.1     ozaki  *
     77  1.1     ozaki  * This code is derived from software contributed to The NetBSD Foundation
     78  1.1     ozaki  * by Coyote Point Systems, Inc.
     79  1.1     ozaki  * This code is derived from software contributed to The NetBSD Foundation
     80  1.1     ozaki  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
     81  1.1     ozaki  * Facility, NASA Ames Research Center.
     82  1.1     ozaki  * This code is derived from software contributed to The NetBSD Foundation
     83  1.1     ozaki  * by Charles M. Hannum.
     84  1.1     ozaki  * This code is derived from software contributed to The NetBSD Foundation
     85  1.1     ozaki  * by Rui Paulo.
     86  1.1     ozaki  *
     87  1.1     ozaki  * Redistribution and use in source and binary forms, with or without
     88  1.1     ozaki  * modification, are permitted provided that the following conditions
     89  1.1     ozaki  * are met:
     90  1.1     ozaki  * 1. Redistributions of source code must retain the above copyright
     91  1.1     ozaki  *    notice, this list of conditions and the following disclaimer.
     92  1.1     ozaki  * 2. Redistributions in binary form must reproduce the above copyright
     93  1.1     ozaki  *    notice, this list of conditions and the following disclaimer in the
     94  1.1     ozaki  *    documentation and/or other materials provided with the distribution.
     95  1.1     ozaki  *
     96  1.1     ozaki  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     97  1.1     ozaki  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     98  1.1     ozaki  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     99  1.1     ozaki  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
    100  1.1     ozaki  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    101  1.1     ozaki  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    102  1.1     ozaki  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    103  1.1     ozaki  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    104  1.1     ozaki  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    105  1.1     ozaki  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    106  1.1     ozaki  * POSSIBILITY OF SUCH DAMAGE.
    107  1.1     ozaki  */
    108  1.1     ozaki 
    109  1.1     ozaki /*
    110  1.1     ozaki  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
    111  1.1     ozaki  *	The Regents of the University of California.  All rights reserved.
    112  1.1     ozaki  *
    113  1.1     ozaki  * Redistribution and use in source and binary forms, with or without
    114  1.1     ozaki  * modification, are permitted provided that the following conditions
    115  1.1     ozaki  * are met:
    116  1.1     ozaki  * 1. Redistributions of source code must retain the above copyright
    117  1.1     ozaki  *    notice, this list of conditions and the following disclaimer.
    118  1.1     ozaki  * 2. Redistributions in binary form must reproduce the above copyright
    119  1.1     ozaki  *    notice, this list of conditions and the following disclaimer in the
    120  1.1     ozaki  *    documentation and/or other materials provided with the distribution.
    121  1.1     ozaki  * 3. Neither the name of the University nor the names of its contributors
    122  1.1     ozaki  *    may be used to endorse or promote products derived from this software
    123  1.1     ozaki  *    without specific prior written permission.
    124  1.1     ozaki  *
    125  1.1     ozaki  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    126  1.1     ozaki  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    127  1.1     ozaki  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    128  1.1     ozaki  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    129  1.1     ozaki  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    130  1.1     ozaki  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    131  1.1     ozaki  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    132  1.1     ozaki  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    133  1.1     ozaki  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    134  1.1     ozaki  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    135  1.1     ozaki  * SUCH DAMAGE.
    136  1.1     ozaki  *
    137  1.1     ozaki  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
    138  1.1     ozaki  */
    139  1.1     ozaki 
    140  1.1     ozaki /*
    141  1.1     ozaki  *	TODO list for SYN cache stuff:
    142  1.1     ozaki  *
    143  1.1     ozaki  *	Find room for a "state" field, which is needed to keep a
    144  1.1     ozaki  *	compressed state for TIME_WAIT TCBs.  It's been noted already
    145  1.1     ozaki  *	that this is fairly important for very high-volume web and
    146  1.1     ozaki  *	mail servers, which use a large number of short-lived
    147  1.1     ozaki  *	connections.
    148  1.1     ozaki  */
    149  1.1     ozaki 
    150  1.1     ozaki #include <sys/cdefs.h>
    151  1.7  riastrad __KERNEL_RCSID(0, "$NetBSD: tcp_syncache.c,v 1.7 2024/06/29 12:59:08 riastradh Exp $");
    152  1.1     ozaki 
    153  1.1     ozaki #ifdef _KERNEL_OPT
    154  1.1     ozaki #include "opt_inet.h"
    155  1.1     ozaki #include "opt_ipsec.h"
    156  1.1     ozaki #endif
    157  1.1     ozaki 
    158  1.1     ozaki #include <sys/param.h>
    159  1.1     ozaki #include <sys/systm.h>
    160  1.1     ozaki #include <sys/mbuf.h>
    161  1.1     ozaki #include <sys/protosw.h>
    162  1.1     ozaki #include <sys/socket.h>
    163  1.1     ozaki #include <sys/socketvar.h>
    164  1.1     ozaki #include <sys/errno.h>
    165  1.1     ozaki #include <sys/syslog.h>
    166  1.1     ozaki #include <sys/pool.h>
    167  1.1     ozaki #include <sys/domain.h>
    168  1.1     ozaki #include <sys/kernel.h>
    169  1.1     ozaki #include <sys/lwp.h> /* for lwp0 */
    170  1.1     ozaki #include <sys/cprng.h>
    171  1.1     ozaki 
    172  1.1     ozaki #include <netinet/in.h>
    173  1.1     ozaki #include <netinet/ip.h>
    174  1.1     ozaki #include <netinet/in_pcb.h>
    175  1.1     ozaki #include <netinet/in_var.h>
    176  1.1     ozaki #include <netinet/ip_var.h>
    177  1.1     ozaki 
    178  1.1     ozaki #include <netinet/ip6.h>
    179  1.1     ozaki #ifdef INET6
    180  1.1     ozaki #include <netinet6/ip6_var.h>
    181  1.1     ozaki #include <netinet6/in6_pcb.h>
    182  1.1     ozaki #include <netinet6/ip6_var.h>
    183  1.1     ozaki #include <netinet6/in6_var.h>
    184  1.1     ozaki #endif
    185  1.1     ozaki 
    186  1.1     ozaki #include <netinet/tcp.h>
    187  1.1     ozaki #include <netinet/tcp_fsm.h>
    188  1.1     ozaki #include <netinet/tcp_seq.h>
    189  1.1     ozaki #include <netinet/tcp_timer.h>
    190  1.1     ozaki #include <netinet/tcp_var.h>
    191  1.1     ozaki #include <netinet/tcp_private.h>
    192  1.1     ozaki #include <netinet/tcp_syncache.h>
    193  1.1     ozaki 
    194  1.1     ozaki #ifdef TCP_SIGNATURE
    195  1.1     ozaki #ifdef IPSEC
    196  1.1     ozaki #include <netipsec/ipsec.h>
    197  1.1     ozaki #include <netipsec/key.h>
    198  1.1     ozaki #ifdef INET6
    199  1.1     ozaki #include <netipsec/ipsec6.h>
    200  1.1     ozaki #endif
    201  1.1     ozaki #endif	/* IPSEC*/
    202  1.1     ozaki #endif
    203  1.1     ozaki 
    204  1.2     ozaki static void	syn_cache_timer(void *);
    205  1.2     ozaki static struct syn_cache *
    206  1.2     ozaki 		syn_cache_lookup(const struct sockaddr *, const struct sockaddr *,
    207  1.2     ozaki 		struct syn_cache_head **);
    208  1.2     ozaki static int	syn_cache_respond(struct syn_cache *);
    209  1.1     ozaki 
    210  1.1     ozaki /* syn hash parameters */
    211  1.1     ozaki #define	TCP_SYN_HASH_SIZE	293
    212  1.1     ozaki #define	TCP_SYN_BUCKET_SIZE	35
    213  1.1     ozaki static int	tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
    214  1.1     ozaki int		tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
    215  1.1     ozaki int		tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
    216  1.1     ozaki static struct	syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
    217  1.1     ozaki 
    218  1.1     ozaki /*
    219  1.1     ozaki  * TCP compressed state engine.  Currently used to hold compressed
    220  1.1     ozaki  * state for SYN_RECEIVED.
    221  1.1     ozaki  */
    222  1.1     ozaki 
    223  1.1     ozaki u_long	syn_cache_count;
    224  1.1     ozaki static u_int32_t syn_hash1, syn_hash2;
    225  1.1     ozaki 
    226  1.1     ozaki #define SYN_HASH(sa, sp, dp) \
    227  1.1     ozaki 	((((sa)->s_addr^syn_hash1)*(((((u_int32_t)(dp))<<16) + \
    228  1.1     ozaki 				     ((u_int32_t)(sp)))^syn_hash2)))
    229  1.1     ozaki #ifndef INET6
    230  1.1     ozaki #define	SYN_HASHALL(hash, src, dst) \
    231  1.1     ozaki do {									\
    232  1.1     ozaki 	hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr,	\
    233  1.1     ozaki 		((const struct sockaddr_in *)(src))->sin_port,		\
    234  1.1     ozaki 		((const struct sockaddr_in *)(dst))->sin_port);		\
    235  1.1     ozaki } while (/*CONSTCOND*/ 0)
    236  1.1     ozaki #else
    237  1.1     ozaki #define SYN_HASH6(sa, sp, dp) \
    238  1.1     ozaki 	((((sa)->s6_addr32[0] ^ (sa)->s6_addr32[3] ^ syn_hash1) * \
    239  1.1     ozaki 	  (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp)))^syn_hash2)) \
    240  1.1     ozaki 	 & 0x7fffffff)
    241  1.1     ozaki 
    242  1.1     ozaki #define SYN_HASHALL(hash, src, dst) \
    243  1.1     ozaki do {									\
    244  1.1     ozaki 	switch ((src)->sa_family) {					\
    245  1.1     ozaki 	case AF_INET:							\
    246  1.1     ozaki 		hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
    247  1.1     ozaki 			((const struct sockaddr_in *)(src))->sin_port,	\
    248  1.1     ozaki 			((const struct sockaddr_in *)(dst))->sin_port);	\
    249  1.1     ozaki 		break;							\
    250  1.1     ozaki 	case AF_INET6:							\
    251  1.1     ozaki 		hash = SYN_HASH6(&((const struct sockaddr_in6 *)(src))->sin6_addr, \
    252  1.1     ozaki 			((const struct sockaddr_in6 *)(src))->sin6_port,	\
    253  1.1     ozaki 			((const struct sockaddr_in6 *)(dst))->sin6_port);	\
    254  1.1     ozaki 		break;							\
    255  1.1     ozaki 	default:							\
    256  1.1     ozaki 		hash = 0;						\
    257  1.1     ozaki 	}								\
    258  1.1     ozaki } while (/*CONSTCOND*/0)
    259  1.1     ozaki #endif /* INET6 */
    260  1.1     ozaki 
    261  1.1     ozaki static struct pool syn_cache_pool;
    262  1.1     ozaki 
    263  1.1     ozaki /*
    264  1.1     ozaki  * We don't estimate RTT with SYNs, so each packet starts with the default
    265  1.1     ozaki  * RTT and each timer step has a fixed timeout value.
    266  1.1     ozaki  */
    267  1.1     ozaki static inline void
    268  1.1     ozaki syn_cache_timer_arm(struct syn_cache *sc)
    269  1.1     ozaki {
    270  1.1     ozaki 
    271  1.1     ozaki 	TCPT_RANGESET(sc->sc_rxtcur,
    272  1.1     ozaki 	    TCPTV_SRTTDFLT * tcp_backoff[sc->sc_rxtshift], TCPTV_MIN,
    273  1.1     ozaki 	    TCPTV_REXMTMAX);
    274  1.1     ozaki 	callout_reset(&sc->sc_timer,
    275  1.1     ozaki 	    sc->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, sc);
    276  1.1     ozaki }
    277  1.1     ozaki 
    278  1.1     ozaki #define	SYN_CACHE_TIMESTAMP(sc)	(tcp_now - (sc)->sc_timebase)
    279  1.1     ozaki 
    280  1.1     ozaki static inline void
    281  1.1     ozaki syn_cache_rm(struct syn_cache *sc)
    282  1.1     ozaki {
    283  1.1     ozaki 	TAILQ_REMOVE(&tcp_syn_cache[sc->sc_bucketidx].sch_bucket,
    284  1.1     ozaki 	    sc, sc_bucketq);
    285  1.1     ozaki 	sc->sc_tp = NULL;
    286  1.1     ozaki 	LIST_REMOVE(sc, sc_tpq);
    287  1.1     ozaki 	tcp_syn_cache[sc->sc_bucketidx].sch_length--;
    288  1.1     ozaki 	callout_stop(&sc->sc_timer);
    289  1.1     ozaki 	syn_cache_count--;
    290  1.1     ozaki }
    291  1.1     ozaki 
    292  1.1     ozaki static inline void
    293  1.1     ozaki syn_cache_put(struct syn_cache *sc)
    294  1.1     ozaki {
    295  1.1     ozaki 	if (sc->sc_ipopts)
    296  1.1     ozaki 		(void) m_free(sc->sc_ipopts);
    297  1.1     ozaki 	rtcache_free(&sc->sc_route);
    298  1.1     ozaki 	sc->sc_flags |= SCF_DEAD;
    299  1.1     ozaki 	if (!callout_invoking(&sc->sc_timer))
    300  1.1     ozaki 		callout_schedule(&(sc)->sc_timer, 1);
    301  1.1     ozaki }
    302  1.1     ozaki 
    303  1.1     ozaki void
    304  1.1     ozaki syn_cache_init(void)
    305  1.1     ozaki {
    306  1.1     ozaki 	int i;
    307  1.1     ozaki 
    308  1.1     ozaki 	pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
    309  1.1     ozaki 	    "synpl", NULL, IPL_SOFTNET);
    310  1.1     ozaki 
    311  1.1     ozaki 	/* Initialize the hash buckets. */
    312  1.1     ozaki 	for (i = 0; i < tcp_syn_cache_size; i++)
    313  1.1     ozaki 		TAILQ_INIT(&tcp_syn_cache[i].sch_bucket);
    314  1.1     ozaki }
    315  1.1     ozaki 
    316  1.1     ozaki void
    317  1.1     ozaki syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp)
    318  1.1     ozaki {
    319  1.1     ozaki 	struct syn_cache_head *scp;
    320  1.1     ozaki 	struct syn_cache *sc2;
    321  1.1     ozaki 	int s;
    322  1.1     ozaki 
    323  1.1     ozaki 	/*
    324  1.1     ozaki 	 * If there are no entries in the hash table, reinitialize
    325  1.1     ozaki 	 * the hash secrets.
    326  1.1     ozaki 	 */
    327  1.1     ozaki 	if (syn_cache_count == 0) {
    328  1.1     ozaki 		syn_hash1 = cprng_fast32();
    329  1.1     ozaki 		syn_hash2 = cprng_fast32();
    330  1.1     ozaki 	}
    331  1.1     ozaki 
    332  1.1     ozaki 	SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa);
    333  1.1     ozaki 	sc->sc_bucketidx = sc->sc_hash % tcp_syn_cache_size;
    334  1.1     ozaki 	scp = &tcp_syn_cache[sc->sc_bucketidx];
    335  1.1     ozaki 
    336  1.1     ozaki 	/*
    337  1.1     ozaki 	 * Make sure that we don't overflow the per-bucket
    338  1.1     ozaki 	 * limit or the total cache size limit.
    339  1.1     ozaki 	 */
    340  1.1     ozaki 	s = splsoftnet();
    341  1.1     ozaki 	if (scp->sch_length >= tcp_syn_bucket_limit) {
    342  1.1     ozaki 		TCP_STATINC(TCP_STAT_SC_BUCKETOVERFLOW);
    343  1.1     ozaki 		/*
    344  1.1     ozaki 		 * The bucket is full.  Toss the oldest element in the
    345  1.1     ozaki 		 * bucket.  This will be the first entry in the bucket.
    346  1.1     ozaki 		 */
    347  1.1     ozaki 		sc2 = TAILQ_FIRST(&scp->sch_bucket);
    348  1.1     ozaki #ifdef DIAGNOSTIC
    349  1.1     ozaki 		/*
    350  1.1     ozaki 		 * This should never happen; we should always find an
    351  1.1     ozaki 		 * entry in our bucket.
    352  1.1     ozaki 		 */
    353  1.1     ozaki 		if (sc2 == NULL)
    354  1.1     ozaki 			panic("syn_cache_insert: bucketoverflow: impossible");
    355  1.1     ozaki #endif
    356  1.1     ozaki 		syn_cache_rm(sc2);
    357  1.1     ozaki 		syn_cache_put(sc2);	/* calls pool_put but see spl above */
    358  1.1     ozaki 	} else if (syn_cache_count >= tcp_syn_cache_limit) {
    359  1.1     ozaki 		struct syn_cache_head *scp2, *sce;
    360  1.1     ozaki 
    361  1.1     ozaki 		TCP_STATINC(TCP_STAT_SC_OVERFLOWED);
    362  1.1     ozaki 		/*
    363  1.1     ozaki 		 * The cache is full.  Toss the oldest entry in the
    364  1.1     ozaki 		 * first non-empty bucket we can find.
    365  1.1     ozaki 		 *
    366  1.1     ozaki 		 * XXX We would really like to toss the oldest
    367  1.1     ozaki 		 * entry in the cache, but we hope that this
    368  1.1     ozaki 		 * condition doesn't happen very often.
    369  1.1     ozaki 		 */
    370  1.1     ozaki 		scp2 = scp;
    371  1.1     ozaki 		if (TAILQ_EMPTY(&scp2->sch_bucket)) {
    372  1.1     ozaki 			sce = &tcp_syn_cache[tcp_syn_cache_size];
    373  1.1     ozaki 			for (++scp2; scp2 != scp; scp2++) {
    374  1.1     ozaki 				if (scp2 >= sce)
    375  1.1     ozaki 					scp2 = &tcp_syn_cache[0];
    376  1.1     ozaki 				if (! TAILQ_EMPTY(&scp2->sch_bucket))
    377  1.1     ozaki 					break;
    378  1.1     ozaki 			}
    379  1.1     ozaki #ifdef DIAGNOSTIC
    380  1.1     ozaki 			/*
    381  1.1     ozaki 			 * This should never happen; we should always find a
    382  1.1     ozaki 			 * non-empty bucket.
    383  1.1     ozaki 			 */
    384  1.1     ozaki 			if (scp2 == scp)
    385  1.1     ozaki 				panic("syn_cache_insert: cacheoverflow: "
    386  1.1     ozaki 				    "impossible");
    387  1.1     ozaki #endif
    388  1.1     ozaki 		}
    389  1.1     ozaki 		sc2 = TAILQ_FIRST(&scp2->sch_bucket);
    390  1.1     ozaki 		syn_cache_rm(sc2);
    391  1.1     ozaki 		syn_cache_put(sc2);	/* calls pool_put but see spl above */
    392  1.1     ozaki 	}
    393  1.1     ozaki 
    394  1.1     ozaki 	/*
    395  1.1     ozaki 	 * Initialize the entry's timer.
    396  1.1     ozaki 	 */
    397  1.1     ozaki 	sc->sc_rxttot = 0;
    398  1.1     ozaki 	sc->sc_rxtshift = 0;
    399  1.1     ozaki 	syn_cache_timer_arm(sc);
    400  1.1     ozaki 
    401  1.1     ozaki 	/* Link it from tcpcb entry */
    402  1.1     ozaki 	LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
    403  1.1     ozaki 
    404  1.1     ozaki 	/* Put it into the bucket. */
    405  1.1     ozaki 	TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
    406  1.1     ozaki 	scp->sch_length++;
    407  1.1     ozaki 	syn_cache_count++;
    408  1.1     ozaki 
    409  1.1     ozaki 	TCP_STATINC(TCP_STAT_SC_ADDED);
    410  1.1     ozaki 	splx(s);
    411  1.1     ozaki }
    412  1.1     ozaki 
    413  1.1     ozaki /*
    414  1.1     ozaki  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
    415  1.1     ozaki  * If we have retransmitted an entry the maximum number of times, expire
    416  1.1     ozaki  * that entry.
    417  1.1     ozaki  */
    418  1.1     ozaki static void
    419  1.1     ozaki syn_cache_timer(void *arg)
    420  1.1     ozaki {
    421  1.1     ozaki 	struct syn_cache *sc = arg;
    422  1.1     ozaki 
    423  1.1     ozaki 	mutex_enter(softnet_lock);
    424  1.1     ozaki 	KERNEL_LOCK(1, NULL);
    425  1.1     ozaki 
    426  1.1     ozaki 	callout_ack(&sc->sc_timer);
    427  1.1     ozaki 
    428  1.1     ozaki 	if (__predict_false(sc->sc_flags & SCF_DEAD)) {
    429  1.1     ozaki 		TCP_STATINC(TCP_STAT_SC_DELAYED_FREE);
    430  1.1     ozaki 		goto free;
    431  1.1     ozaki 	}
    432  1.1     ozaki 
    433  1.1     ozaki 	if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
    434  1.1     ozaki 		/* Drop it -- too many retransmissions. */
    435  1.1     ozaki 		goto dropit;
    436  1.1     ozaki 	}
    437  1.1     ozaki 
    438  1.1     ozaki 	/*
    439  1.1     ozaki 	 * Compute the total amount of time this entry has
    440  1.1     ozaki 	 * been on a queue.  If this entry has been on longer
    441  1.1     ozaki 	 * than the keep alive timer would allow, expire it.
    442  1.1     ozaki 	 */
    443  1.1     ozaki 	sc->sc_rxttot += sc->sc_rxtcur;
    444  1.1     ozaki 	if (sc->sc_rxttot >= MIN(tcp_keepinit, TCP_TIMER_MAXTICKS))
    445  1.1     ozaki 		goto dropit;
    446  1.1     ozaki 
    447  1.1     ozaki 	TCP_STATINC(TCP_STAT_SC_RETRANSMITTED);
    448  1.1     ozaki 	(void)syn_cache_respond(sc);
    449  1.1     ozaki 
    450  1.1     ozaki 	/* Advance the timer back-off. */
    451  1.1     ozaki 	sc->sc_rxtshift++;
    452  1.1     ozaki 	syn_cache_timer_arm(sc);
    453  1.1     ozaki 
    454  1.1     ozaki 	goto out;
    455  1.1     ozaki 
    456  1.1     ozaki  dropit:
    457  1.1     ozaki 	TCP_STATINC(TCP_STAT_SC_TIMED_OUT);
    458  1.1     ozaki 	syn_cache_rm(sc);
    459  1.1     ozaki 	if (sc->sc_ipopts)
    460  1.1     ozaki 		(void) m_free(sc->sc_ipopts);
    461  1.1     ozaki 	rtcache_free(&sc->sc_route);
    462  1.1     ozaki 
    463  1.1     ozaki  free:
    464  1.1     ozaki 	callout_destroy(&sc->sc_timer);
    465  1.1     ozaki 	pool_put(&syn_cache_pool, sc);
    466  1.1     ozaki 
    467  1.1     ozaki  out:
    468  1.1     ozaki 	KERNEL_UNLOCK_ONE(NULL);
    469  1.1     ozaki 	mutex_exit(softnet_lock);
    470  1.1     ozaki }
    471  1.1     ozaki 
    472  1.1     ozaki /*
    473  1.1     ozaki  * Remove syn cache created by the specified tcb entry,
    474  1.1     ozaki  * because this does not make sense to keep them
    475  1.1     ozaki  * (if there's no tcb entry, syn cache entry will never be used)
    476  1.1     ozaki  */
    477  1.1     ozaki void
    478  1.1     ozaki syn_cache_cleanup(struct tcpcb *tp)
    479  1.1     ozaki {
    480  1.1     ozaki 	struct syn_cache *sc, *nsc;
    481  1.1     ozaki 	int s;
    482  1.1     ozaki 
    483  1.1     ozaki 	s = splsoftnet();
    484  1.1     ozaki 
    485  1.1     ozaki 	for (sc = LIST_FIRST(&tp->t_sc); sc != NULL; sc = nsc) {
    486  1.1     ozaki 		nsc = LIST_NEXT(sc, sc_tpq);
    487  1.1     ozaki 
    488  1.1     ozaki #ifdef DIAGNOSTIC
    489  1.1     ozaki 		if (sc->sc_tp != tp)
    490  1.1     ozaki 			panic("invalid sc_tp in syn_cache_cleanup");
    491  1.1     ozaki #endif
    492  1.1     ozaki 		syn_cache_rm(sc);
    493  1.1     ozaki 		syn_cache_put(sc);	/* calls pool_put but see spl above */
    494  1.1     ozaki 	}
    495  1.1     ozaki 	/* just for safety */
    496  1.1     ozaki 	LIST_INIT(&tp->t_sc);
    497  1.1     ozaki 
    498  1.1     ozaki 	splx(s);
    499  1.1     ozaki }
    500  1.1     ozaki 
    501  1.1     ozaki /*
    502  1.1     ozaki  * Find an entry in the syn cache.
    503  1.1     ozaki  */
    504  1.2     ozaki static struct syn_cache *
    505  1.1     ozaki syn_cache_lookup(const struct sockaddr *src, const struct sockaddr *dst,
    506  1.1     ozaki     struct syn_cache_head **headp)
    507  1.1     ozaki {
    508  1.1     ozaki 	struct syn_cache *sc;
    509  1.1     ozaki 	struct syn_cache_head *scp;
    510  1.1     ozaki 	u_int32_t hash;
    511  1.1     ozaki 	int s;
    512  1.1     ozaki 
    513  1.1     ozaki 	SYN_HASHALL(hash, src, dst);
    514  1.1     ozaki 
    515  1.1     ozaki 	scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
    516  1.1     ozaki 	*headp = scp;
    517  1.1     ozaki 	s = splsoftnet();
    518  1.1     ozaki 	for (sc = TAILQ_FIRST(&scp->sch_bucket); sc != NULL;
    519  1.1     ozaki 	     sc = TAILQ_NEXT(sc, sc_bucketq)) {
    520  1.1     ozaki 		if (sc->sc_hash != hash)
    521  1.1     ozaki 			continue;
    522  1.1     ozaki 		if (!memcmp(&sc->sc_src, src, src->sa_len) &&
    523  1.1     ozaki 		    !memcmp(&sc->sc_dst, dst, dst->sa_len)) {
    524  1.1     ozaki 			splx(s);
    525  1.1     ozaki 			return (sc);
    526  1.1     ozaki 		}
    527  1.1     ozaki 	}
    528  1.1     ozaki 	splx(s);
    529  1.1     ozaki 	return (NULL);
    530  1.1     ozaki }
    531  1.1     ozaki 
    532  1.1     ozaki /*
    533  1.1     ozaki  * This function gets called when we receive an ACK for a socket in the
    534  1.1     ozaki  * LISTEN state. We look up the connection in the syn cache, and if it's
    535  1.1     ozaki  * there, we pull it out of the cache and turn it into a full-blown
    536  1.1     ozaki  * connection in the SYN-RECEIVED state.
    537  1.1     ozaki  *
    538  1.1     ozaki  * The return values may not be immediately obvious, and their effects
    539  1.1     ozaki  * can be subtle, so here they are:
    540  1.1     ozaki  *
    541  1.1     ozaki  *	NULL	SYN was not found in cache; caller should drop the
    542  1.1     ozaki  *		packet and send an RST.
    543  1.1     ozaki  *
    544  1.1     ozaki  *	-1	We were unable to create the new connection, and are
    545  1.1     ozaki  *		aborting it.  An ACK,RST is being sent to the peer
    546  1.1     ozaki  *		(unless we got screwey sequence numbers; see below),
    547  1.1     ozaki  *		because the 3-way handshake has been completed.  Caller
    548  1.1     ozaki  *		should not free the mbuf, since we may be using it.  If
    549  1.1     ozaki  *		we are not, we will free it.
    550  1.1     ozaki  *
    551  1.1     ozaki  *	Otherwise, the return value is a pointer to the new socket
    552  1.1     ozaki  *	associated with the connection.
    553  1.1     ozaki  */
    554  1.1     ozaki struct socket *
    555  1.1     ozaki syn_cache_get(struct sockaddr *src, struct sockaddr *dst,
    556  1.1     ozaki     struct tcphdr *th, struct socket *so, struct mbuf *m)
    557  1.1     ozaki {
    558  1.1     ozaki 	struct syn_cache *sc;
    559  1.1     ozaki 	struct syn_cache_head *scp;
    560  1.1     ozaki 	struct inpcb *inp = NULL;
    561  1.1     ozaki 	struct tcpcb *tp;
    562  1.1     ozaki 	int s;
    563  1.1     ozaki 	struct socket *oso;
    564  1.1     ozaki 
    565  1.1     ozaki 	s = splsoftnet();
    566  1.1     ozaki 	if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
    567  1.1     ozaki 		splx(s);
    568  1.1     ozaki 		return NULL;
    569  1.1     ozaki 	}
    570  1.1     ozaki 
    571  1.1     ozaki 	/*
    572  1.1     ozaki 	 * Verify the sequence and ack numbers.  Try getting the correct
    573  1.1     ozaki 	 * response again.
    574  1.1     ozaki 	 */
    575  1.1     ozaki 	if ((th->th_ack != sc->sc_iss + 1) ||
    576  1.1     ozaki 	    SEQ_LEQ(th->th_seq, sc->sc_irs) ||
    577  1.1     ozaki 	    SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
    578  1.1     ozaki 		m_freem(m);
    579  1.1     ozaki 		(void)syn_cache_respond(sc);
    580  1.1     ozaki 		splx(s);
    581  1.1     ozaki 		return ((struct socket *)(-1));
    582  1.1     ozaki 	}
    583  1.1     ozaki 
    584  1.1     ozaki 	/* Remove this cache entry */
    585  1.1     ozaki 	syn_cache_rm(sc);
    586  1.1     ozaki 	splx(s);
    587  1.1     ozaki 
    588  1.1     ozaki 	/*
    589  1.1     ozaki 	 * Ok, create the full blown connection, and set things up
    590  1.1     ozaki 	 * as they would have been set up if we had created the
    591  1.1     ozaki 	 * connection when the SYN arrived.  If we can't create
    592  1.1     ozaki 	 * the connection, abort it.
    593  1.1     ozaki 	 */
    594  1.1     ozaki 	/*
    595  1.1     ozaki 	 * inp still has the OLD in_pcb stuff, set the
    596  1.1     ozaki 	 * v6-related flags on the new guy, too.   This is
    597  1.1     ozaki 	 * done particularly for the case where an AF_INET6
    598  1.1     ozaki 	 * socket is bound only to a port, and a v4 connection
    599  1.1     ozaki 	 * comes in on that port.
    600  1.1     ozaki 	 * we also copy the flowinfo from the original pcb
    601  1.1     ozaki 	 * to the new one.
    602  1.1     ozaki 	 */
    603  1.1     ozaki 	oso = so;
    604  1.1     ozaki 	so = sonewconn(so, true);
    605  1.1     ozaki 	if (so == NULL)
    606  1.1     ozaki 		goto resetandabort;
    607  1.1     ozaki 
    608  1.3     ozaki 	inp = sotoinpcb(so);
    609  1.1     ozaki 
    610  1.1     ozaki 	switch (src->sa_family) {
    611  1.1     ozaki 	case AF_INET:
    612  1.3     ozaki 		if (inp->inp_af == AF_INET) {
    613  1.4     ozaki 			in4p_laddr(inp) = ((struct sockaddr_in *)dst)->sin_addr;
    614  1.1     ozaki 			inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port;
    615  1.1     ozaki 			inp->inp_options = ip_srcroute(m);
    616  1.5     ozaki 			inpcb_set_state(inp, INP_BOUND);
    617  1.1     ozaki 			if (inp->inp_options == NULL) {
    618  1.1     ozaki 				inp->inp_options = sc->sc_ipopts;
    619  1.1     ozaki 				sc->sc_ipopts = NULL;
    620  1.1     ozaki 			}
    621  1.1     ozaki 		}
    622  1.1     ozaki #ifdef INET6
    623  1.3     ozaki 		else if (inp->inp_af == AF_INET6) {
    624  1.1     ozaki 			/* IPv4 packet to AF_INET6 socket */
    625  1.4     ozaki 			memset(&in6p_laddr(inp), 0, sizeof(in6p_laddr(inp)));
    626  1.4     ozaki 			in6p_laddr(inp).s6_addr16[5] = htons(0xffff);
    627  1.1     ozaki 			bcopy(&((struct sockaddr_in *)dst)->sin_addr,
    628  1.4     ozaki 				&in6p_laddr(inp).s6_addr32[3],
    629  1.1     ozaki 				sizeof(((struct sockaddr_in *)dst)->sin_addr));
    630  1.3     ozaki 			inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port;
    631  1.3     ozaki 			intotcpcb(inp)->t_family = AF_INET;
    632  1.3     ozaki 			if (sotoinpcb(oso)->inp_flags & IN6P_IPV6_V6ONLY)
    633  1.3     ozaki 				inp->inp_flags |= IN6P_IPV6_V6ONLY;
    634  1.1     ozaki 			else
    635  1.3     ozaki 				inp->inp_flags &= ~IN6P_IPV6_V6ONLY;
    636  1.5     ozaki 			inpcb_set_state(inp, INP_BOUND);
    637  1.1     ozaki 		}
    638  1.1     ozaki #endif
    639  1.1     ozaki 		break;
    640  1.1     ozaki #ifdef INET6
    641  1.1     ozaki 	case AF_INET6:
    642  1.3     ozaki 		if (inp->inp_af == AF_INET6) {
    643  1.4     ozaki 			in6p_laddr(inp) = ((struct sockaddr_in6 *)dst)->sin6_addr;
    644  1.3     ozaki 			inp->inp_lport = ((struct sockaddr_in6 *)dst)->sin6_port;
    645  1.5     ozaki 			inpcb_set_state(inp, INP_BOUND);
    646  1.1     ozaki 		}
    647  1.1     ozaki 		break;
    648  1.1     ozaki #endif
    649  1.1     ozaki 	}
    650  1.1     ozaki 
    651  1.1     ozaki #ifdef INET6
    652  1.3     ozaki 	if (inp && intotcpcb(inp)->t_family == AF_INET6 && sotoinpcb(oso)) {
    653  1.3     ozaki 		struct inpcb *oinp = sotoinpcb(oso);
    654  1.1     ozaki 		/* inherit socket options from the listening socket */
    655  1.3     ozaki 		inp->inp_flags |= (oinp->inp_flags & IN6P_CONTROLOPTS);
    656  1.3     ozaki 		if (inp->inp_flags & IN6P_CONTROLOPTS) {
    657  1.3     ozaki 			m_freem(inp->inp_options);
    658  1.3     ozaki 			inp->inp_options = NULL;
    659  1.1     ozaki 		}
    660  1.3     ozaki 		ip6_savecontrol(inp, &inp->inp_options,
    661  1.1     ozaki 		    mtod(m, struct ip6_hdr *), m);
    662  1.1     ozaki 	}
    663  1.1     ozaki #endif
    664  1.1     ozaki 
    665  1.1     ozaki 	/*
    666  1.1     ozaki 	 * Give the new socket our cached route reference.
    667  1.1     ozaki 	 */
    668  1.3     ozaki 	rtcache_copy(&inp->inp_route, &sc->sc_route);
    669  1.3     ozaki 	rtcache_free(&sc->sc_route);
    670  1.1     ozaki 
    671  1.3     ozaki 	if (inp->inp_af == AF_INET) {
    672  1.1     ozaki 		struct sockaddr_in sin;
    673  1.1     ozaki 		memcpy(&sin, src, src->sa_len);
    674  1.5     ozaki 		if (inpcb_connect(inp, &sin, &lwp0)) {
    675  1.1     ozaki 			goto resetandabort;
    676  1.1     ozaki 		}
    677  1.1     ozaki 	}
    678  1.1     ozaki #ifdef INET6
    679  1.3     ozaki 	else if (inp->inp_af == AF_INET6) {
    680  1.1     ozaki 		struct sockaddr_in6 sin6;
    681  1.1     ozaki 		memcpy(&sin6, src, src->sa_len);
    682  1.1     ozaki 		if (src->sa_family == AF_INET) {
    683  1.1     ozaki 			/* IPv4 packet to AF_INET6 socket */
    684  1.1     ozaki 			in6_sin_2_v4mapsin6((struct sockaddr_in *)src, &sin6);
    685  1.1     ozaki 		}
    686  1.6     ozaki 		if (in6pcb_connect(inp, &sin6, NULL)) {
    687  1.1     ozaki 			goto resetandabort;
    688  1.1     ozaki 		}
    689  1.1     ozaki 	}
    690  1.1     ozaki #endif
    691  1.1     ozaki 	else {
    692  1.1     ozaki 		goto resetandabort;
    693  1.1     ozaki 	}
    694  1.1     ozaki 
    695  1.3     ozaki 	tp = intotcpcb(inp);
    696  1.1     ozaki 
    697  1.1     ozaki 	tp->t_flags = sototcpcb(oso)->t_flags & TF_NODELAY;
    698  1.1     ozaki 	if (sc->sc_request_r_scale != 15) {
    699  1.1     ozaki 		tp->requested_s_scale = sc->sc_requested_s_scale;
    700  1.1     ozaki 		tp->request_r_scale = sc->sc_request_r_scale;
    701  1.1     ozaki 		tp->snd_scale = sc->sc_requested_s_scale;
    702  1.1     ozaki 		tp->rcv_scale = sc->sc_request_r_scale;
    703  1.1     ozaki 		tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
    704  1.1     ozaki 	}
    705  1.1     ozaki 	if (sc->sc_flags & SCF_TIMESTAMP)
    706  1.1     ozaki 		tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
    707  1.1     ozaki 	tp->ts_timebase = sc->sc_timebase;
    708  1.1     ozaki 
    709  1.1     ozaki 	tp->t_template = tcp_template(tp);
    710  1.1     ozaki 	if (tp->t_template == 0) {
    711  1.1     ozaki 		tp = tcp_drop(tp, ENOBUFS);	/* destroys socket */
    712  1.1     ozaki 		so = NULL;
    713  1.1     ozaki 		m_freem(m);
    714  1.1     ozaki 		goto abort;
    715  1.1     ozaki 	}
    716  1.1     ozaki 
    717  1.1     ozaki 	tp->iss = sc->sc_iss;
    718  1.1     ozaki 	tp->irs = sc->sc_irs;
    719  1.1     ozaki 	tcp_sendseqinit(tp);
    720  1.1     ozaki 	tcp_rcvseqinit(tp);
    721  1.1     ozaki 	tp->t_state = TCPS_SYN_RECEIVED;
    722  1.1     ozaki 	TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
    723  1.1     ozaki 	TCP_STATINC(TCP_STAT_ACCEPTS);
    724  1.1     ozaki 
    725  1.1     ozaki 	if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack)
    726  1.1     ozaki 		tp->t_flags |= TF_WILL_SACK;
    727  1.1     ozaki 
    728  1.1     ozaki 	if ((sc->sc_flags & SCF_ECN_PERMIT) && tcp_do_ecn)
    729  1.1     ozaki 		tp->t_flags |= TF_ECN_PERMIT;
    730  1.1     ozaki 
    731  1.1     ozaki #ifdef TCP_SIGNATURE
    732  1.1     ozaki 	if (sc->sc_flags & SCF_SIGNATURE)
    733  1.1     ozaki 		tp->t_flags |= TF_SIGNATURE;
    734  1.1     ozaki #endif
    735  1.1     ozaki 
    736  1.1     ozaki 	/* Initialize tp->t_ourmss before we deal with the peer's! */
    737  1.1     ozaki 	tp->t_ourmss = sc->sc_ourmaxseg;
    738  1.1     ozaki 	tcp_mss_from_peer(tp, sc->sc_peermaxseg);
    739  1.1     ozaki 
    740  1.1     ozaki 	/*
    741  1.1     ozaki 	 * Initialize the initial congestion window.  If we
    742  1.1     ozaki 	 * had to retransmit the SYN,ACK, we must initialize cwnd
    743  1.1     ozaki 	 * to 1 segment (i.e. the Loss Window).
    744  1.1     ozaki 	 */
    745  1.1     ozaki 	if (sc->sc_rxtshift)
    746  1.1     ozaki 		tp->snd_cwnd = tp->t_peermss;
    747  1.1     ozaki 	else {
    748  1.1     ozaki 		int ss = tcp_init_win;
    749  1.4     ozaki 		if (inp->inp_af == AF_INET && in_localaddr(in4p_faddr(inp)))
    750  1.1     ozaki 			ss = tcp_init_win_local;
    751  1.1     ozaki #ifdef INET6
    752  1.4     ozaki 		else if (inp->inp_af == AF_INET6 && in6_localaddr(&in6p_faddr(inp)))
    753  1.1     ozaki 			ss = tcp_init_win_local;
    754  1.1     ozaki #endif
    755  1.1     ozaki 		tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
    756  1.1     ozaki 	}
    757  1.1     ozaki 
    758  1.1     ozaki 	tcp_rmx_rtt(tp);
    759  1.1     ozaki 	tp->snd_wl1 = sc->sc_irs;
    760  1.1     ozaki 	tp->rcv_up = sc->sc_irs + 1;
    761  1.1     ozaki 
    762  1.1     ozaki 	/*
    763  1.1     ozaki 	 * This is what would have happened in tcp_output() when
    764  1.1     ozaki 	 * the SYN,ACK was sent.
    765  1.1     ozaki 	 */
    766  1.1     ozaki 	tp->snd_up = tp->snd_una;
    767  1.1     ozaki 	tp->snd_max = tp->snd_nxt = tp->iss+1;
    768  1.1     ozaki 	TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
    769  1.1     ozaki 	if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv))
    770  1.1     ozaki 		tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
    771  1.1     ozaki 	tp->last_ack_sent = tp->rcv_nxt;
    772  1.1     ozaki 	tp->t_partialacks = -1;
    773  1.1     ozaki 	tp->t_dupacks = 0;
    774  1.1     ozaki 
    775  1.1     ozaki 	TCP_STATINC(TCP_STAT_SC_COMPLETED);
    776  1.1     ozaki 	s = splsoftnet();
    777  1.1     ozaki 	syn_cache_put(sc);
    778  1.1     ozaki 	splx(s);
    779  1.1     ozaki 	return so;
    780  1.1     ozaki 
    781  1.1     ozaki resetandabort:
    782  1.1     ozaki 	(void)tcp_respond(NULL, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
    783  1.1     ozaki abort:
    784  1.1     ozaki 	if (so != NULL) {
    785  1.1     ozaki 		(void) soqremque(so, 1);
    786  1.1     ozaki 		(void) soabort(so);
    787  1.1     ozaki 		mutex_enter(softnet_lock);
    788  1.1     ozaki 	}
    789  1.1     ozaki 	s = splsoftnet();
    790  1.1     ozaki 	syn_cache_put(sc);
    791  1.1     ozaki 	splx(s);
    792  1.1     ozaki 	TCP_STATINC(TCP_STAT_SC_ABORTED);
    793  1.1     ozaki 	return ((struct socket *)(-1));
    794  1.1     ozaki }
    795  1.1     ozaki 
    796  1.1     ozaki /*
    797  1.1     ozaki  * This function is called when we get a RST for a
    798  1.1     ozaki  * non-existent connection, so that we can see if the
    799  1.1     ozaki  * connection is in the syn cache.  If it is, zap it.
    800  1.1     ozaki  */
    801  1.1     ozaki 
    802  1.1     ozaki void
    803  1.1     ozaki syn_cache_reset(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th)
    804  1.1     ozaki {
    805  1.1     ozaki 	struct syn_cache *sc;
    806  1.1     ozaki 	struct syn_cache_head *scp;
    807  1.1     ozaki 	int s = splsoftnet();
    808  1.1     ozaki 
    809  1.1     ozaki 	if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
    810  1.1     ozaki 		splx(s);
    811  1.1     ozaki 		return;
    812  1.1     ozaki 	}
    813  1.1     ozaki 	if (SEQ_LT(th->th_seq, sc->sc_irs) ||
    814  1.1     ozaki 	    SEQ_GT(th->th_seq, sc->sc_irs+1)) {
    815  1.1     ozaki 		splx(s);
    816  1.1     ozaki 		return;
    817  1.1     ozaki 	}
    818  1.1     ozaki 	syn_cache_rm(sc);
    819  1.1     ozaki 	TCP_STATINC(TCP_STAT_SC_RESET);
    820  1.1     ozaki 	syn_cache_put(sc);	/* calls pool_put but see spl above */
    821  1.1     ozaki 	splx(s);
    822  1.1     ozaki }
    823  1.1     ozaki 
    824  1.1     ozaki void
    825  1.1     ozaki syn_cache_unreach(const struct sockaddr *src, const struct sockaddr *dst,
    826  1.1     ozaki     struct tcphdr *th)
    827  1.1     ozaki {
    828  1.1     ozaki 	struct syn_cache *sc;
    829  1.1     ozaki 	struct syn_cache_head *scp;
    830  1.1     ozaki 	int s;
    831  1.1     ozaki 
    832  1.1     ozaki 	s = splsoftnet();
    833  1.1     ozaki 	if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
    834  1.1     ozaki 		splx(s);
    835  1.1     ozaki 		return;
    836  1.1     ozaki 	}
    837  1.1     ozaki 	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
    838  1.1     ozaki 	if (ntohl(th->th_seq) != sc->sc_iss) {
    839  1.1     ozaki 		splx(s);
    840  1.1     ozaki 		return;
    841  1.1     ozaki 	}
    842  1.1     ozaki 
    843  1.1     ozaki 	/*
    844  1.1     ozaki 	 * If we've retransmitted 3 times and this is our second error,
    845  1.1     ozaki 	 * we remove the entry.  Otherwise, we allow it to continue on.
    846  1.1     ozaki 	 * This prevents us from incorrectly nuking an entry during a
    847  1.1     ozaki 	 * spurious network outage.
    848  1.1     ozaki 	 *
    849  1.1     ozaki 	 * See tcp_notify().
    850  1.1     ozaki 	 */
    851  1.1     ozaki 	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) {
    852  1.1     ozaki 		sc->sc_flags |= SCF_UNREACH;
    853  1.1     ozaki 		splx(s);
    854  1.1     ozaki 		return;
    855  1.1     ozaki 	}
    856  1.1     ozaki 
    857  1.1     ozaki 	syn_cache_rm(sc);
    858  1.1     ozaki 	TCP_STATINC(TCP_STAT_SC_UNREACH);
    859  1.1     ozaki 	syn_cache_put(sc);	/* calls pool_put but see spl above */
    860  1.1     ozaki 	splx(s);
    861  1.1     ozaki }
    862  1.1     ozaki 
    863  1.1     ozaki /*
    864  1.1     ozaki  * Given a LISTEN socket and an inbound SYN request, add this to the syn
    865  1.1     ozaki  * cache, and send back a segment:
    866  1.1     ozaki  *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
    867  1.1     ozaki  * to the source.
    868  1.1     ozaki  *
    869  1.1     ozaki  * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
    870  1.1     ozaki  * Doing so would require that we hold onto the data and deliver it
    871  1.1     ozaki  * to the application.  However, if we are the target of a SYN-flood
    872  1.1     ozaki  * DoS attack, an attacker could send data which would eventually
    873  1.1     ozaki  * consume all available buffer space if it were ACKed.  By not ACKing
    874  1.1     ozaki  * the data, we avoid this DoS scenario.
    875  1.1     ozaki  */
    876  1.1     ozaki int
    877  1.1     ozaki syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
    878  1.1     ozaki     unsigned int toff, struct socket *so, struct mbuf *m, u_char *optp,
    879  1.1     ozaki     int optlen, struct tcp_opt_info *oi)
    880  1.1     ozaki {
    881  1.1     ozaki 	struct tcpcb tb, *tp;
    882  1.1     ozaki 	long win;
    883  1.1     ozaki 	struct syn_cache *sc;
    884  1.1     ozaki 	struct syn_cache_head *scp;
    885  1.1     ozaki 	struct mbuf *ipopts;
    886  1.1     ozaki 	int s;
    887  1.1     ozaki 
    888  1.1     ozaki 	tp = sototcpcb(so);
    889  1.1     ozaki 
    890  1.1     ozaki 	/*
    891  1.1     ozaki 	 * Initialize some local state.
    892  1.1     ozaki 	 */
    893  1.1     ozaki 	win = sbspace(&so->so_rcv);
    894  1.1     ozaki 	if (win > TCP_MAXWIN)
    895  1.1     ozaki 		win = TCP_MAXWIN;
    896  1.1     ozaki 
    897  1.1     ozaki #ifdef TCP_SIGNATURE
    898  1.1     ozaki 	if (optp || (tp->t_flags & TF_SIGNATURE))
    899  1.1     ozaki #else
    900  1.1     ozaki 	if (optp)
    901  1.1     ozaki #endif
    902  1.1     ozaki 	{
    903  1.1     ozaki 		tb.t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
    904  1.1     ozaki #ifdef TCP_SIGNATURE
    905  1.1     ozaki 		tb.t_flags |= (tp->t_flags & TF_SIGNATURE);
    906  1.1     ozaki #endif
    907  1.1     ozaki 		tb.t_state = TCPS_LISTEN;
    908  1.1     ozaki 		if (tcp_dooptions(&tb, optp, optlen, th, m, toff, oi) < 0)
    909  1.1     ozaki 			return 0;
    910  1.1     ozaki 	} else
    911  1.1     ozaki 		tb.t_flags = 0;
    912  1.1     ozaki 
    913  1.1     ozaki 	switch (src->sa_family) {
    914  1.1     ozaki 	case AF_INET:
    915  1.1     ozaki 		/* Remember the IP options, if any. */
    916  1.1     ozaki 		ipopts = ip_srcroute(m);
    917  1.1     ozaki 		break;
    918  1.1     ozaki 	default:
    919  1.1     ozaki 		ipopts = NULL;
    920  1.1     ozaki 	}
    921  1.1     ozaki 
    922  1.1     ozaki 	/*
    923  1.1     ozaki 	 * See if we already have an entry for this connection.
    924  1.1     ozaki 	 * If we do, resend the SYN,ACK.  We do not count this
    925  1.1     ozaki 	 * as a retransmission (XXX though maybe we should).
    926  1.1     ozaki 	 */
    927  1.1     ozaki 	if ((sc = syn_cache_lookup(src, dst, &scp)) != NULL) {
    928  1.1     ozaki 		TCP_STATINC(TCP_STAT_SC_DUPESYN);
    929  1.1     ozaki 		if (ipopts) {
    930  1.1     ozaki 			/*
    931  1.1     ozaki 			 * If we were remembering a previous source route,
    932  1.1     ozaki 			 * forget it and use the new one we've been given.
    933  1.1     ozaki 			 */
    934  1.1     ozaki 			if (sc->sc_ipopts)
    935  1.1     ozaki 				(void)m_free(sc->sc_ipopts);
    936  1.1     ozaki 			sc->sc_ipopts = ipopts;
    937  1.1     ozaki 		}
    938  1.1     ozaki 		sc->sc_timestamp = tb.ts_recent;
    939  1.1     ozaki 		m_freem(m);
    940  1.1     ozaki 		if (syn_cache_respond(sc) == 0) {
    941  1.7  riastrad 			net_stat_ref_t tcps = TCP_STAT_GETREF();
    942  1.7  riastrad 			_NET_STATINC_REF(tcps, TCP_STAT_SNDACKS);
    943  1.7  riastrad 			_NET_STATINC_REF(tcps, TCP_STAT_SNDTOTAL);
    944  1.1     ozaki 			TCP_STAT_PUTREF();
    945  1.1     ozaki 		}
    946  1.1     ozaki 		return 1;
    947  1.1     ozaki 	}
    948  1.1     ozaki 
    949  1.1     ozaki 	s = splsoftnet();
    950  1.1     ozaki 	sc = pool_get(&syn_cache_pool, PR_NOWAIT);
    951  1.1     ozaki 	splx(s);
    952  1.1     ozaki 	if (sc == NULL) {
    953  1.1     ozaki 		if (ipopts)
    954  1.1     ozaki 			(void)m_free(ipopts);
    955  1.1     ozaki 		return 0;
    956  1.1     ozaki 	}
    957  1.1     ozaki 
    958  1.1     ozaki 	/*
    959  1.1     ozaki 	 * Fill in the cache, and put the necessary IP and TCP
    960  1.1     ozaki 	 * options into the reply.
    961  1.1     ozaki 	 */
    962  1.1     ozaki 	memset(sc, 0, sizeof(struct syn_cache));
    963  1.1     ozaki 	callout_init(&sc->sc_timer, CALLOUT_MPSAFE);
    964  1.1     ozaki 	memcpy(&sc->sc_src, src, src->sa_len);
    965  1.1     ozaki 	memcpy(&sc->sc_dst, dst, dst->sa_len);
    966  1.1     ozaki 	sc->sc_flags = 0;
    967  1.1     ozaki 	sc->sc_ipopts = ipopts;
    968  1.1     ozaki 	sc->sc_irs = th->th_seq;
    969  1.1     ozaki 	switch (src->sa_family) {
    970  1.1     ozaki 	case AF_INET:
    971  1.1     ozaki 	    {
    972  1.1     ozaki 		struct sockaddr_in *srcin = (void *)src;
    973  1.1     ozaki 		struct sockaddr_in *dstin = (void *)dst;
    974  1.1     ozaki 
    975  1.1     ozaki 		sc->sc_iss = tcp_new_iss1(&dstin->sin_addr,
    976  1.1     ozaki 		    &srcin->sin_addr, dstin->sin_port,
    977  1.1     ozaki 		    srcin->sin_port, sizeof(dstin->sin_addr));
    978  1.1     ozaki 		break;
    979  1.1     ozaki 	    }
    980  1.1     ozaki #ifdef INET6
    981  1.1     ozaki 	case AF_INET6:
    982  1.1     ozaki 	    {
    983  1.1     ozaki 		struct sockaddr_in6 *srcin6 = (void *)src;
    984  1.1     ozaki 		struct sockaddr_in6 *dstin6 = (void *)dst;
    985  1.1     ozaki 
    986  1.1     ozaki 		sc->sc_iss = tcp_new_iss1(&dstin6->sin6_addr,
    987  1.1     ozaki 		    &srcin6->sin6_addr, dstin6->sin6_port,
    988  1.1     ozaki 		    srcin6->sin6_port, sizeof(dstin6->sin6_addr));
    989  1.1     ozaki 		break;
    990  1.1     ozaki 	    }
    991  1.1     ozaki #endif
    992  1.1     ozaki 	}
    993  1.1     ozaki 	sc->sc_peermaxseg = oi->maxseg;
    994  1.1     ozaki 	sc->sc_ourmaxseg = tcp_mss_to_advertise(m->m_flags & M_PKTHDR ?
    995  1.1     ozaki 	    m_get_rcvif_NOMPSAFE(m) : NULL, sc->sc_src.sa.sa_family);
    996  1.1     ozaki 	sc->sc_win = win;
    997  1.1     ozaki 	sc->sc_timebase = tcp_now - 1;	/* see tcp_newtcpcb() */
    998  1.1     ozaki 	sc->sc_timestamp = tb.ts_recent;
    999  1.1     ozaki 	if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
   1000  1.1     ozaki 	    (TF_REQ_TSTMP|TF_RCVD_TSTMP))
   1001  1.1     ozaki 		sc->sc_flags |= SCF_TIMESTAMP;
   1002  1.1     ozaki 	if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
   1003  1.1     ozaki 	    (TF_RCVD_SCALE|TF_REQ_SCALE)) {
   1004  1.1     ozaki 		sc->sc_requested_s_scale = tb.requested_s_scale;
   1005  1.1     ozaki 		sc->sc_request_r_scale = 0;
   1006  1.1     ozaki 		/*
   1007  1.1     ozaki 		 * Pick the smallest possible scaling factor that
   1008  1.1     ozaki 		 * will still allow us to scale up to sb_max.
   1009  1.1     ozaki 		 *
   1010  1.1     ozaki 		 * We do this because there are broken firewalls that
   1011  1.1     ozaki 		 * will corrupt the window scale option, leading to
   1012  1.1     ozaki 		 * the other endpoint believing that our advertised
   1013  1.1     ozaki 		 * window is unscaled.  At scale factors larger than
   1014  1.1     ozaki 		 * 5 the unscaled window will drop below 1500 bytes,
   1015  1.1     ozaki 		 * leading to serious problems when traversing these
   1016  1.1     ozaki 		 * broken firewalls.
   1017  1.1     ozaki 		 *
   1018  1.1     ozaki 		 * With the default sbmax of 256K, a scale factor
   1019  1.1     ozaki 		 * of 3 will be chosen by this algorithm.  Those who
   1020  1.1     ozaki 		 * choose a larger sbmax should watch out
   1021  1.1     ozaki 		 * for the compatibility problems mentioned above.
   1022  1.1     ozaki 		 *
   1023  1.1     ozaki 		 * RFC1323: The Window field in a SYN (i.e., a <SYN>
   1024  1.1     ozaki 		 * or <SYN,ACK>) segment itself is never scaled.
   1025  1.1     ozaki 		 */
   1026  1.1     ozaki 		while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
   1027  1.1     ozaki 		    (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
   1028  1.1     ozaki 			sc->sc_request_r_scale++;
   1029  1.1     ozaki 	} else {
   1030  1.1     ozaki 		sc->sc_requested_s_scale = 15;
   1031  1.1     ozaki 		sc->sc_request_r_scale = 15;
   1032  1.1     ozaki 	}
   1033  1.1     ozaki 	if ((tb.t_flags & TF_SACK_PERMIT) && tcp_do_sack)
   1034  1.1     ozaki 		sc->sc_flags |= SCF_SACK_PERMIT;
   1035  1.1     ozaki 
   1036  1.1     ozaki 	/*
   1037  1.1     ozaki 	 * ECN setup packet received.
   1038  1.1     ozaki 	 */
   1039  1.1     ozaki 	if ((th->th_flags & (TH_ECE|TH_CWR)) && tcp_do_ecn)
   1040  1.1     ozaki 		sc->sc_flags |= SCF_ECN_PERMIT;
   1041  1.1     ozaki 
   1042  1.1     ozaki #ifdef TCP_SIGNATURE
   1043  1.1     ozaki 	if (tb.t_flags & TF_SIGNATURE)
   1044  1.1     ozaki 		sc->sc_flags |= SCF_SIGNATURE;
   1045  1.1     ozaki #endif
   1046  1.1     ozaki 	sc->sc_tp = tp;
   1047  1.1     ozaki 	m_freem(m);
   1048  1.1     ozaki 	if (syn_cache_respond(sc) == 0) {
   1049  1.7  riastrad 		net_stat_ref_t tcps = TCP_STAT_GETREF();
   1050  1.7  riastrad 		_NET_STATINC_REF(tcps, TCP_STAT_SNDACKS);
   1051  1.7  riastrad 		_NET_STATINC_REF(tcps, TCP_STAT_SNDTOTAL);
   1052  1.1     ozaki 		TCP_STAT_PUTREF();
   1053  1.1     ozaki 		syn_cache_insert(sc, tp);
   1054  1.1     ozaki 	} else {
   1055  1.1     ozaki 		s = splsoftnet();
   1056  1.1     ozaki 		/*
   1057  1.1     ozaki 		 * syn_cache_put() will try to schedule the timer, so
   1058  1.1     ozaki 		 * we need to initialize it
   1059  1.1     ozaki 		 */
   1060  1.1     ozaki 		syn_cache_timer_arm(sc);
   1061  1.1     ozaki 		syn_cache_put(sc);
   1062  1.1     ozaki 		splx(s);
   1063  1.1     ozaki 		TCP_STATINC(TCP_STAT_SC_DROPPED);
   1064  1.1     ozaki 	}
   1065  1.1     ozaki 	return 1;
   1066  1.1     ozaki }
   1067  1.1     ozaki 
   1068  1.1     ozaki /*
   1069  1.1     ozaki  * syn_cache_respond: (re)send SYN+ACK.
   1070  1.1     ozaki  *
   1071  1.1     ozaki  * Returns 0 on success.
   1072  1.1     ozaki  */
   1073  1.1     ozaki 
   1074  1.2     ozaki static int
   1075  1.1     ozaki syn_cache_respond(struct syn_cache *sc)
   1076  1.1     ozaki {
   1077  1.1     ozaki #ifdef INET6
   1078  1.1     ozaki 	struct rtentry *rt = NULL;
   1079  1.1     ozaki #endif
   1080  1.1     ozaki 	struct route *ro;
   1081  1.1     ozaki 	u_int8_t *optp;
   1082  1.1     ozaki 	int optlen, error;
   1083  1.1     ozaki 	u_int16_t tlen;
   1084  1.1     ozaki 	struct ip *ip = NULL;
   1085  1.1     ozaki #ifdef INET6
   1086  1.1     ozaki 	struct ip6_hdr *ip6 = NULL;
   1087  1.1     ozaki #endif
   1088  1.1     ozaki 	struct tcpcb *tp;
   1089  1.1     ozaki 	struct tcphdr *th;
   1090  1.1     ozaki 	struct mbuf *m;
   1091  1.1     ozaki 	u_int hlen;
   1092  1.1     ozaki #ifdef TCP_SIGNATURE
   1093  1.1     ozaki 	struct secasvar *sav = NULL;
   1094  1.1     ozaki 	u_int8_t *sigp = NULL;
   1095  1.1     ozaki #endif
   1096  1.1     ozaki 
   1097  1.1     ozaki 	ro = &sc->sc_route;
   1098  1.1     ozaki 	switch (sc->sc_src.sa.sa_family) {
   1099  1.1     ozaki 	case AF_INET:
   1100  1.1     ozaki 		hlen = sizeof(struct ip);
   1101  1.1     ozaki 		break;
   1102  1.1     ozaki #ifdef INET6
   1103  1.1     ozaki 	case AF_INET6:
   1104  1.1     ozaki 		hlen = sizeof(struct ip6_hdr);
   1105  1.1     ozaki 		break;
   1106  1.1     ozaki #endif
   1107  1.1     ozaki 	default:
   1108  1.1     ozaki 		return EAFNOSUPPORT;
   1109  1.1     ozaki 	}
   1110  1.1     ozaki 
   1111  1.1     ozaki 	/* Worst case scenario, since we don't know the option size yet. */
   1112  1.1     ozaki 	tlen = hlen + sizeof(struct tcphdr) + MAX_TCPOPTLEN;
   1113  1.1     ozaki 	KASSERT(max_linkhdr + tlen <= MCLBYTES);
   1114  1.1     ozaki 
   1115  1.1     ozaki 	/*
   1116  1.1     ozaki 	 * Create the IP+TCP header from scratch.
   1117  1.1     ozaki 	 */
   1118  1.1     ozaki 	MGETHDR(m, M_DONTWAIT, MT_DATA);
   1119  1.1     ozaki 	if (m && (max_linkhdr + tlen) > MHLEN) {
   1120  1.1     ozaki 		MCLGET(m, M_DONTWAIT);
   1121  1.1     ozaki 		if ((m->m_flags & M_EXT) == 0) {
   1122  1.1     ozaki 			m_freem(m);
   1123  1.1     ozaki 			m = NULL;
   1124  1.1     ozaki 		}
   1125  1.1     ozaki 	}
   1126  1.1     ozaki 	if (m == NULL)
   1127  1.1     ozaki 		return ENOBUFS;
   1128  1.1     ozaki 	MCLAIM(m, &tcp_tx_mowner);
   1129  1.1     ozaki 
   1130  1.1     ozaki 	tp = sc->sc_tp;
   1131  1.1     ozaki 
   1132  1.1     ozaki 	/* Fixup the mbuf. */
   1133  1.1     ozaki 	m->m_data += max_linkhdr;
   1134  1.1     ozaki 	m_reset_rcvif(m);
   1135  1.1     ozaki 	memset(mtod(m, void *), 0, tlen);
   1136  1.1     ozaki 
   1137  1.1     ozaki 	switch (sc->sc_src.sa.sa_family) {
   1138  1.1     ozaki 	case AF_INET:
   1139  1.1     ozaki 		ip = mtod(m, struct ip *);
   1140  1.1     ozaki 		ip->ip_v = 4;
   1141  1.1     ozaki 		ip->ip_dst = sc->sc_src.sin.sin_addr;
   1142  1.1     ozaki 		ip->ip_src = sc->sc_dst.sin.sin_addr;
   1143  1.1     ozaki 		ip->ip_p = IPPROTO_TCP;
   1144  1.1     ozaki 		th = (struct tcphdr *)(ip + 1);
   1145  1.1     ozaki 		th->th_dport = sc->sc_src.sin.sin_port;
   1146  1.1     ozaki 		th->th_sport = sc->sc_dst.sin.sin_port;
   1147  1.1     ozaki 		break;
   1148  1.1     ozaki #ifdef INET6
   1149  1.1     ozaki 	case AF_INET6:
   1150  1.1     ozaki 		ip6 = mtod(m, struct ip6_hdr *);
   1151  1.1     ozaki 		ip6->ip6_vfc = IPV6_VERSION;
   1152  1.1     ozaki 		ip6->ip6_dst = sc->sc_src.sin6.sin6_addr;
   1153  1.1     ozaki 		ip6->ip6_src = sc->sc_dst.sin6.sin6_addr;
   1154  1.1     ozaki 		ip6->ip6_nxt = IPPROTO_TCP;
   1155  1.1     ozaki 		/* ip6_plen will be updated in ip6_output() */
   1156  1.1     ozaki 		th = (struct tcphdr *)(ip6 + 1);
   1157  1.1     ozaki 		th->th_dport = sc->sc_src.sin6.sin6_port;
   1158  1.1     ozaki 		th->th_sport = sc->sc_dst.sin6.sin6_port;
   1159  1.1     ozaki 		break;
   1160  1.1     ozaki #endif
   1161  1.1     ozaki 	default:
   1162  1.1     ozaki 		panic("%s: impossible (1)", __func__);
   1163  1.1     ozaki 	}
   1164  1.1     ozaki 
   1165  1.1     ozaki 	th->th_seq = htonl(sc->sc_iss);
   1166  1.1     ozaki 	th->th_ack = htonl(sc->sc_irs + 1);
   1167  1.1     ozaki 	th->th_flags = TH_SYN|TH_ACK;
   1168  1.1     ozaki 	th->th_win = htons(sc->sc_win);
   1169  1.1     ozaki 	/* th_x2, th_sum, th_urp already 0 from memset */
   1170  1.1     ozaki 
   1171  1.1     ozaki 	/* Tack on the TCP options. */
   1172  1.1     ozaki 	optp = (u_int8_t *)(th + 1);
   1173  1.1     ozaki 	optlen = 0;
   1174  1.1     ozaki 	*optp++ = TCPOPT_MAXSEG;
   1175  1.1     ozaki 	*optp++ = TCPOLEN_MAXSEG;
   1176  1.1     ozaki 	*optp++ = (sc->sc_ourmaxseg >> 8) & 0xff;
   1177  1.1     ozaki 	*optp++ = sc->sc_ourmaxseg & 0xff;
   1178  1.1     ozaki 	optlen += TCPOLEN_MAXSEG;
   1179  1.1     ozaki 
   1180  1.1     ozaki 	if (sc->sc_request_r_scale != 15) {
   1181  1.1     ozaki 		*((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
   1182  1.1     ozaki 		    TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
   1183  1.1     ozaki 		    sc->sc_request_r_scale);
   1184  1.1     ozaki 		optp += TCPOLEN_WINDOW + TCPOLEN_NOP;
   1185  1.1     ozaki 		optlen += TCPOLEN_WINDOW + TCPOLEN_NOP;
   1186  1.1     ozaki 	}
   1187  1.1     ozaki 
   1188  1.1     ozaki 	if (sc->sc_flags & SCF_SACK_PERMIT) {
   1189  1.1     ozaki 		/* Let the peer know that we will SACK. */
   1190  1.1     ozaki 		*optp++ = TCPOPT_SACK_PERMITTED;
   1191  1.1     ozaki 		*optp++ = TCPOLEN_SACK_PERMITTED;
   1192  1.1     ozaki 		optlen += TCPOLEN_SACK_PERMITTED;
   1193  1.1     ozaki 	}
   1194  1.1     ozaki 
   1195  1.1     ozaki 	if (sc->sc_flags & SCF_TIMESTAMP) {
   1196  1.1     ozaki 		while (optlen % 4 != 2) {
   1197  1.1     ozaki 			optlen += TCPOLEN_NOP;
   1198  1.1     ozaki 			*optp++ = TCPOPT_NOP;
   1199  1.1     ozaki 		}
   1200  1.1     ozaki 		*optp++ = TCPOPT_TIMESTAMP;
   1201  1.1     ozaki 		*optp++ = TCPOLEN_TIMESTAMP;
   1202  1.1     ozaki 		u_int32_t *lp = (u_int32_t *)(optp);
   1203  1.1     ozaki 		/* Form timestamp option as shown in appendix A of RFC 1323. */
   1204  1.1     ozaki 		*lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
   1205  1.1     ozaki 		*lp   = htonl(sc->sc_timestamp);
   1206  1.1     ozaki 		optp += TCPOLEN_TIMESTAMP - 2;
   1207  1.1     ozaki 		optlen += TCPOLEN_TIMESTAMP;
   1208  1.1     ozaki 	}
   1209  1.1     ozaki 
   1210  1.1     ozaki #ifdef TCP_SIGNATURE
   1211  1.1     ozaki 	if (sc->sc_flags & SCF_SIGNATURE) {
   1212  1.1     ozaki 		sav = tcp_signature_getsav(m);
   1213  1.1     ozaki 		if (sav == NULL) {
   1214  1.1     ozaki 			m_freem(m);
   1215  1.1     ozaki 			return EPERM;
   1216  1.1     ozaki 		}
   1217  1.1     ozaki 
   1218  1.1     ozaki 		*optp++ = TCPOPT_SIGNATURE;
   1219  1.1     ozaki 		*optp++ = TCPOLEN_SIGNATURE;
   1220  1.1     ozaki 		sigp = optp;
   1221  1.1     ozaki 		memset(optp, 0, TCP_SIGLEN);
   1222  1.1     ozaki 		optp += TCP_SIGLEN;
   1223  1.1     ozaki 		optlen += TCPOLEN_SIGNATURE;
   1224  1.1     ozaki 	}
   1225  1.1     ozaki #endif
   1226  1.1     ozaki 
   1227  1.1     ozaki 	/*
   1228  1.1     ozaki 	 * Terminate and pad TCP options to a 4 byte boundary.
   1229  1.1     ozaki 	 *
   1230  1.1     ozaki 	 * According to RFC793: "The content of the header beyond the
   1231  1.1     ozaki 	 * End-of-Option option must be header padding (i.e., zero)."
   1232  1.1     ozaki 	 * And later: "The padding is composed of zeros."
   1233  1.1     ozaki 	 */
   1234  1.1     ozaki 	if (optlen % 4) {
   1235  1.1     ozaki 		optlen += TCPOLEN_EOL;
   1236  1.1     ozaki 		*optp++ = TCPOPT_EOL;
   1237  1.1     ozaki 	}
   1238  1.1     ozaki 	while (optlen % 4) {
   1239  1.1     ozaki 		optlen += TCPOLEN_PAD;
   1240  1.1     ozaki 		*optp++ = TCPOPT_PAD;
   1241  1.1     ozaki 	}
   1242  1.1     ozaki 
   1243  1.1     ozaki 	/* Compute the actual values now that we've added the options. */
   1244  1.1     ozaki 	tlen = hlen + sizeof(struct tcphdr) + optlen;
   1245  1.1     ozaki 	m->m_len = m->m_pkthdr.len = tlen;
   1246  1.1     ozaki 	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
   1247  1.1     ozaki 
   1248  1.1     ozaki #ifdef TCP_SIGNATURE
   1249  1.1     ozaki 	if (sav) {
   1250  1.1     ozaki 		(void)tcp_signature(m, th, hlen, sav, sigp);
   1251  1.1     ozaki 		key_sa_recordxfer(sav, m);
   1252  1.1     ozaki 		KEY_SA_UNREF(&sav);
   1253  1.1     ozaki 	}
   1254  1.1     ozaki #endif
   1255  1.1     ozaki 
   1256  1.1     ozaki 	/*
   1257  1.1     ozaki 	 * Send ECN SYN-ACK setup packet.
   1258  1.1     ozaki 	 * Routes can be asymmetric, so, even if we receive a packet
   1259  1.1     ozaki 	 * with ECE and CWR set, we must not assume no one will block
   1260  1.1     ozaki 	 * the ECE packet we are about to send.
   1261  1.1     ozaki 	 */
   1262  1.1     ozaki 	if ((sc->sc_flags & SCF_ECN_PERMIT) && tp &&
   1263  1.1     ozaki 	    SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
   1264  1.1     ozaki 		th->th_flags |= TH_ECE;
   1265  1.1     ozaki 		TCP_STATINC(TCP_STAT_ECN_SHS);
   1266  1.1     ozaki 
   1267  1.1     ozaki 		/*
   1268  1.1     ozaki 		 * draft-ietf-tcpm-ecnsyn-00.txt
   1269  1.1     ozaki 		 *
   1270  1.1     ozaki 		 * "[...] a TCP node MAY respond to an ECN-setup
   1271  1.1     ozaki 		 * SYN packet by setting ECT in the responding
   1272  1.1     ozaki 		 * ECN-setup SYN/ACK packet, indicating to routers
   1273  1.1     ozaki 		 * that the SYN/ACK packet is ECN-Capable.
   1274  1.1     ozaki 		 * This allows a congested router along the path
   1275  1.1     ozaki 		 * to mark the packet instead of dropping the
   1276  1.1     ozaki 		 * packet as an indication of congestion."
   1277  1.1     ozaki 		 *
   1278  1.1     ozaki 		 * "[...] There can be a great benefit in setting
   1279  1.1     ozaki 		 * an ECN-capable codepoint in SYN/ACK packets [...]
   1280  1.1     ozaki 		 * Congestion is  most likely to occur in
   1281  1.1     ozaki 		 * the server-to-client direction.  As a result,
   1282  1.1     ozaki 		 * setting an ECN-capable codepoint in SYN/ACK
   1283  1.1     ozaki 		 * packets can reduce the occurrence of three-second
   1284  1.1     ozaki 		 * retransmit timeouts resulting from the drop
   1285  1.1     ozaki 		 * of SYN/ACK packets."
   1286  1.1     ozaki 		 *
   1287  1.1     ozaki 		 * Page 4 and 6, January 2006.
   1288  1.1     ozaki 		 */
   1289  1.1     ozaki 
   1290  1.1     ozaki 		switch (sc->sc_src.sa.sa_family) {
   1291  1.1     ozaki 		case AF_INET:
   1292  1.1     ozaki 			ip->ip_tos |= IPTOS_ECN_ECT0;
   1293  1.1     ozaki 			break;
   1294  1.1     ozaki #ifdef INET6
   1295  1.1     ozaki 		case AF_INET6:
   1296  1.1     ozaki 			ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
   1297  1.1     ozaki 			break;
   1298  1.1     ozaki #endif
   1299  1.1     ozaki 		}
   1300  1.1     ozaki 		TCP_STATINC(TCP_STAT_ECN_ECT);
   1301  1.1     ozaki 	}
   1302  1.1     ozaki 
   1303  1.1     ozaki 
   1304  1.1     ozaki 	/*
   1305  1.1     ozaki 	 * Compute the packet's checksum.
   1306  1.1     ozaki 	 *
   1307  1.1     ozaki 	 * Fill in some straggling IP bits.  Note the stack expects
   1308  1.1     ozaki 	 * ip_len to be in host order, for convenience.
   1309  1.1     ozaki 	 */
   1310  1.1     ozaki 	switch (sc->sc_src.sa.sa_family) {
   1311  1.1     ozaki 	case AF_INET:
   1312  1.1     ozaki 		ip->ip_len = htons(tlen - hlen);
   1313  1.1     ozaki 		th->th_sum = 0;
   1314  1.1     ozaki 		th->th_sum = in4_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
   1315  1.1     ozaki 		ip->ip_len = htons(tlen);
   1316  1.1     ozaki 		ip->ip_ttl = ip_defttl;
   1317  1.1     ozaki 		/* XXX tos? */
   1318  1.1     ozaki 		break;
   1319  1.1     ozaki #ifdef INET6
   1320  1.1     ozaki 	case AF_INET6:
   1321  1.1     ozaki 		ip6->ip6_plen = htons(tlen - hlen);
   1322  1.1     ozaki 		th->th_sum = 0;
   1323  1.1     ozaki 		th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
   1324  1.1     ozaki 		ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
   1325  1.1     ozaki 		ip6->ip6_vfc |= IPV6_VERSION;
   1326  1.1     ozaki 		ip6->ip6_plen = htons(tlen - hlen);
   1327  1.1     ozaki 		/* ip6_hlim will be initialized afterwards */
   1328  1.1     ozaki 		/* XXX flowlabel? */
   1329  1.1     ozaki 		break;
   1330  1.1     ozaki #endif
   1331  1.1     ozaki 	}
   1332  1.1     ozaki 
   1333  1.1     ozaki 	/* XXX use IPsec policy on listening socket, on SYN ACK */
   1334  1.1     ozaki 	tp = sc->sc_tp;
   1335  1.1     ozaki 
   1336  1.1     ozaki 	switch (sc->sc_src.sa.sa_family) {
   1337  1.1     ozaki 	case AF_INET:
   1338  1.1     ozaki 		error = ip_output(m, sc->sc_ipopts, ro,
   1339  1.1     ozaki 		    (ip_mtudisc ? IP_MTUDISC : 0),
   1340  1.1     ozaki 		    NULL, tp ? tp->t_inpcb : NULL);
   1341  1.1     ozaki 		break;
   1342  1.1     ozaki #ifdef INET6
   1343  1.1     ozaki 	case AF_INET6:
   1344  1.6     ozaki 		ip6->ip6_hlim = in6pcb_selecthlim(NULL,
   1345  1.1     ozaki 		    (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp : NULL);
   1346  1.1     ozaki 		rtcache_unref(rt, ro);
   1347  1.1     ozaki 
   1348  1.1     ozaki 		error = ip6_output(m, NULL /*XXX*/, ro, 0, NULL,
   1349  1.3     ozaki 		    tp ? tp->t_inpcb : NULL, NULL);
   1350  1.1     ozaki 		break;
   1351  1.1     ozaki #endif
   1352  1.1     ozaki 	default:
   1353  1.1     ozaki 		panic("%s: impossible (2)", __func__);
   1354  1.1     ozaki 	}
   1355  1.1     ozaki 
   1356  1.1     ozaki 	return error;
   1357  1.1     ozaki }
   1358