1 1.7 riastrad /* $NetBSD: tcp_syncache.c,v 1.7 2024/06/29 12:59:08 riastradh Exp $ */ 2 1.1 ozaki 3 1.1 ozaki /* 4 1.1 ozaki * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 1.1 ozaki * All rights reserved. 6 1.1 ozaki * 7 1.1 ozaki * Redistribution and use in source and binary forms, with or without 8 1.1 ozaki * modification, are permitted provided that the following conditions 9 1.1 ozaki * are met: 10 1.1 ozaki * 1. Redistributions of source code must retain the above copyright 11 1.1 ozaki * notice, this list of conditions and the following disclaimer. 12 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 ozaki * notice, this list of conditions and the following disclaimer in the 14 1.1 ozaki * documentation and/or other materials provided with the distribution. 15 1.1 ozaki * 3. Neither the name of the project nor the names of its contributors 16 1.1 ozaki * may be used to endorse or promote products derived from this software 17 1.1 ozaki * without specific prior written permission. 18 1.1 ozaki * 19 1.1 ozaki * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 1.1 ozaki * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 1.1 ozaki * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.1 ozaki * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 1.1 ozaki * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.1 ozaki * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 1.1 ozaki * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 1.1 ozaki * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 1.1 ozaki * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 1.1 ozaki * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 1.1 ozaki * SUCH DAMAGE. 30 1.1 ozaki */ 31 1.1 ozaki 32 1.1 ozaki /* 33 1.1 ozaki * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 34 1.1 ozaki * 35 1.1 ozaki * NRL grants permission for redistribution and use in source and binary 36 1.1 ozaki * forms, with or without modification, of the software and documentation 37 1.1 ozaki * created at NRL provided that the following conditions are met: 38 1.1 ozaki * 39 1.1 ozaki * 1. Redistributions of source code must retain the above copyright 40 1.1 ozaki * notice, this list of conditions and the following disclaimer. 41 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright 42 1.1 ozaki * notice, this list of conditions and the following disclaimer in the 43 1.1 ozaki * documentation and/or other materials provided with the distribution. 44 1.1 ozaki * 3. All advertising materials mentioning features or use of this software 45 1.1 ozaki * must display the following acknowledgements: 46 1.1 ozaki * This product includes software developed by the University of 47 1.1 ozaki * California, Berkeley and its contributors. 48 1.1 ozaki * This product includes software developed at the Information 49 1.1 ozaki * Technology Division, US Naval Research Laboratory. 50 1.1 ozaki * 4. Neither the name of the NRL nor the names of its contributors 51 1.1 ozaki * may be used to endorse or promote products derived from this software 52 1.1 ozaki * without specific prior written permission. 53 1.1 ozaki * 54 1.1 ozaki * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 55 1.1 ozaki * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 56 1.1 ozaki * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 57 1.1 ozaki * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 58 1.1 ozaki * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 59 1.1 ozaki * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 60 1.1 ozaki * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 61 1.1 ozaki * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 62 1.1 ozaki * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 63 1.1 ozaki * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 64 1.1 ozaki * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 1.1 ozaki * 66 1.1 ozaki * The views and conclusions contained in the software and documentation 67 1.1 ozaki * are those of the authors and should not be interpreted as representing 68 1.1 ozaki * official policies, either expressed or implied, of the US Naval 69 1.1 ozaki * Research Laboratory (NRL). 70 1.1 ozaki */ 71 1.1 ozaki 72 1.1 ozaki /*- 73 1.1 ozaki * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006, 74 1.1 ozaki * 2011 The NetBSD Foundation, Inc. 75 1.1 ozaki * All rights reserved. 76 1.1 ozaki * 77 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation 78 1.1 ozaki * by Coyote Point Systems, Inc. 79 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation 80 1.1 ozaki * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 81 1.1 ozaki * Facility, NASA Ames Research Center. 82 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation 83 1.1 ozaki * by Charles M. Hannum. 84 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation 85 1.1 ozaki * by Rui Paulo. 86 1.1 ozaki * 87 1.1 ozaki * Redistribution and use in source and binary forms, with or without 88 1.1 ozaki * modification, are permitted provided that the following conditions 89 1.1 ozaki * are met: 90 1.1 ozaki * 1. Redistributions of source code must retain the above copyright 91 1.1 ozaki * notice, this list of conditions and the following disclaimer. 92 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright 93 1.1 ozaki * notice, this list of conditions and the following disclaimer in the 94 1.1 ozaki * documentation and/or other materials provided with the distribution. 95 1.1 ozaki * 96 1.1 ozaki * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 97 1.1 ozaki * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 98 1.1 ozaki * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 99 1.1 ozaki * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 100 1.1 ozaki * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 101 1.1 ozaki * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 102 1.1 ozaki * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 103 1.1 ozaki * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 104 1.1 ozaki * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 105 1.1 ozaki * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 106 1.1 ozaki * POSSIBILITY OF SUCH DAMAGE. 107 1.1 ozaki */ 108 1.1 ozaki 109 1.1 ozaki /* 110 1.1 ozaki * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 111 1.1 ozaki * The Regents of the University of California. All rights reserved. 112 1.1 ozaki * 113 1.1 ozaki * Redistribution and use in source and binary forms, with or without 114 1.1 ozaki * modification, are permitted provided that the following conditions 115 1.1 ozaki * are met: 116 1.1 ozaki * 1. Redistributions of source code must retain the above copyright 117 1.1 ozaki * notice, this list of conditions and the following disclaimer. 118 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright 119 1.1 ozaki * notice, this list of conditions and the following disclaimer in the 120 1.1 ozaki * documentation and/or other materials provided with the distribution. 121 1.1 ozaki * 3. Neither the name of the University nor the names of its contributors 122 1.1 ozaki * may be used to endorse or promote products derived from this software 123 1.1 ozaki * without specific prior written permission. 124 1.1 ozaki * 125 1.1 ozaki * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 126 1.1 ozaki * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 127 1.1 ozaki * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 128 1.1 ozaki * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 129 1.1 ozaki * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 130 1.1 ozaki * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 131 1.1 ozaki * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 132 1.1 ozaki * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 133 1.1 ozaki * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 134 1.1 ozaki * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 135 1.1 ozaki * SUCH DAMAGE. 136 1.1 ozaki * 137 1.1 ozaki * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 138 1.1 ozaki */ 139 1.1 ozaki 140 1.1 ozaki /* 141 1.1 ozaki * TODO list for SYN cache stuff: 142 1.1 ozaki * 143 1.1 ozaki * Find room for a "state" field, which is needed to keep a 144 1.1 ozaki * compressed state for TIME_WAIT TCBs. It's been noted already 145 1.1 ozaki * that this is fairly important for very high-volume web and 146 1.1 ozaki * mail servers, which use a large number of short-lived 147 1.1 ozaki * connections. 148 1.1 ozaki */ 149 1.1 ozaki 150 1.1 ozaki #include <sys/cdefs.h> 151 1.7 riastrad __KERNEL_RCSID(0, "$NetBSD: tcp_syncache.c,v 1.7 2024/06/29 12:59:08 riastradh Exp $"); 152 1.1 ozaki 153 1.1 ozaki #ifdef _KERNEL_OPT 154 1.1 ozaki #include "opt_inet.h" 155 1.1 ozaki #include "opt_ipsec.h" 156 1.1 ozaki #endif 157 1.1 ozaki 158 1.1 ozaki #include <sys/param.h> 159 1.1 ozaki #include <sys/systm.h> 160 1.1 ozaki #include <sys/mbuf.h> 161 1.1 ozaki #include <sys/protosw.h> 162 1.1 ozaki #include <sys/socket.h> 163 1.1 ozaki #include <sys/socketvar.h> 164 1.1 ozaki #include <sys/errno.h> 165 1.1 ozaki #include <sys/syslog.h> 166 1.1 ozaki #include <sys/pool.h> 167 1.1 ozaki #include <sys/domain.h> 168 1.1 ozaki #include <sys/kernel.h> 169 1.1 ozaki #include <sys/lwp.h> /* for lwp0 */ 170 1.1 ozaki #include <sys/cprng.h> 171 1.1 ozaki 172 1.1 ozaki #include <netinet/in.h> 173 1.1 ozaki #include <netinet/ip.h> 174 1.1 ozaki #include <netinet/in_pcb.h> 175 1.1 ozaki #include <netinet/in_var.h> 176 1.1 ozaki #include <netinet/ip_var.h> 177 1.1 ozaki 178 1.1 ozaki #include <netinet/ip6.h> 179 1.1 ozaki #ifdef INET6 180 1.1 ozaki #include <netinet6/ip6_var.h> 181 1.1 ozaki #include <netinet6/in6_pcb.h> 182 1.1 ozaki #include <netinet6/ip6_var.h> 183 1.1 ozaki #include <netinet6/in6_var.h> 184 1.1 ozaki #endif 185 1.1 ozaki 186 1.1 ozaki #include <netinet/tcp.h> 187 1.1 ozaki #include <netinet/tcp_fsm.h> 188 1.1 ozaki #include <netinet/tcp_seq.h> 189 1.1 ozaki #include <netinet/tcp_timer.h> 190 1.1 ozaki #include <netinet/tcp_var.h> 191 1.1 ozaki #include <netinet/tcp_private.h> 192 1.1 ozaki #include <netinet/tcp_syncache.h> 193 1.1 ozaki 194 1.1 ozaki #ifdef TCP_SIGNATURE 195 1.1 ozaki #ifdef IPSEC 196 1.1 ozaki #include <netipsec/ipsec.h> 197 1.1 ozaki #include <netipsec/key.h> 198 1.1 ozaki #ifdef INET6 199 1.1 ozaki #include <netipsec/ipsec6.h> 200 1.1 ozaki #endif 201 1.1 ozaki #endif /* IPSEC*/ 202 1.1 ozaki #endif 203 1.1 ozaki 204 1.2 ozaki static void syn_cache_timer(void *); 205 1.2 ozaki static struct syn_cache * 206 1.2 ozaki syn_cache_lookup(const struct sockaddr *, const struct sockaddr *, 207 1.2 ozaki struct syn_cache_head **); 208 1.2 ozaki static int syn_cache_respond(struct syn_cache *); 209 1.1 ozaki 210 1.1 ozaki /* syn hash parameters */ 211 1.1 ozaki #define TCP_SYN_HASH_SIZE 293 212 1.1 ozaki #define TCP_SYN_BUCKET_SIZE 35 213 1.1 ozaki static int tcp_syn_cache_size = TCP_SYN_HASH_SIZE; 214 1.1 ozaki int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE; 215 1.1 ozaki int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE; 216 1.1 ozaki static struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE]; 217 1.1 ozaki 218 1.1 ozaki /* 219 1.1 ozaki * TCP compressed state engine. Currently used to hold compressed 220 1.1 ozaki * state for SYN_RECEIVED. 221 1.1 ozaki */ 222 1.1 ozaki 223 1.1 ozaki u_long syn_cache_count; 224 1.1 ozaki static u_int32_t syn_hash1, syn_hash2; 225 1.1 ozaki 226 1.1 ozaki #define SYN_HASH(sa, sp, dp) \ 227 1.1 ozaki ((((sa)->s_addr^syn_hash1)*(((((u_int32_t)(dp))<<16) + \ 228 1.1 ozaki ((u_int32_t)(sp)))^syn_hash2))) 229 1.1 ozaki #ifndef INET6 230 1.1 ozaki #define SYN_HASHALL(hash, src, dst) \ 231 1.1 ozaki do { \ 232 1.1 ozaki hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \ 233 1.1 ozaki ((const struct sockaddr_in *)(src))->sin_port, \ 234 1.1 ozaki ((const struct sockaddr_in *)(dst))->sin_port); \ 235 1.1 ozaki } while (/*CONSTCOND*/ 0) 236 1.1 ozaki #else 237 1.1 ozaki #define SYN_HASH6(sa, sp, dp) \ 238 1.1 ozaki ((((sa)->s6_addr32[0] ^ (sa)->s6_addr32[3] ^ syn_hash1) * \ 239 1.1 ozaki (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp)))^syn_hash2)) \ 240 1.1 ozaki & 0x7fffffff) 241 1.1 ozaki 242 1.1 ozaki #define SYN_HASHALL(hash, src, dst) \ 243 1.1 ozaki do { \ 244 1.1 ozaki switch ((src)->sa_family) { \ 245 1.1 ozaki case AF_INET: \ 246 1.1 ozaki hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \ 247 1.1 ozaki ((const struct sockaddr_in *)(src))->sin_port, \ 248 1.1 ozaki ((const struct sockaddr_in *)(dst))->sin_port); \ 249 1.1 ozaki break; \ 250 1.1 ozaki case AF_INET6: \ 251 1.1 ozaki hash = SYN_HASH6(&((const struct sockaddr_in6 *)(src))->sin6_addr, \ 252 1.1 ozaki ((const struct sockaddr_in6 *)(src))->sin6_port, \ 253 1.1 ozaki ((const struct sockaddr_in6 *)(dst))->sin6_port); \ 254 1.1 ozaki break; \ 255 1.1 ozaki default: \ 256 1.1 ozaki hash = 0; \ 257 1.1 ozaki } \ 258 1.1 ozaki } while (/*CONSTCOND*/0) 259 1.1 ozaki #endif /* INET6 */ 260 1.1 ozaki 261 1.1 ozaki static struct pool syn_cache_pool; 262 1.1 ozaki 263 1.1 ozaki /* 264 1.1 ozaki * We don't estimate RTT with SYNs, so each packet starts with the default 265 1.1 ozaki * RTT and each timer step has a fixed timeout value. 266 1.1 ozaki */ 267 1.1 ozaki static inline void 268 1.1 ozaki syn_cache_timer_arm(struct syn_cache *sc) 269 1.1 ozaki { 270 1.1 ozaki 271 1.1 ozaki TCPT_RANGESET(sc->sc_rxtcur, 272 1.1 ozaki TCPTV_SRTTDFLT * tcp_backoff[sc->sc_rxtshift], TCPTV_MIN, 273 1.1 ozaki TCPTV_REXMTMAX); 274 1.1 ozaki callout_reset(&sc->sc_timer, 275 1.1 ozaki sc->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, sc); 276 1.1 ozaki } 277 1.1 ozaki 278 1.1 ozaki #define SYN_CACHE_TIMESTAMP(sc) (tcp_now - (sc)->sc_timebase) 279 1.1 ozaki 280 1.1 ozaki static inline void 281 1.1 ozaki syn_cache_rm(struct syn_cache *sc) 282 1.1 ozaki { 283 1.1 ozaki TAILQ_REMOVE(&tcp_syn_cache[sc->sc_bucketidx].sch_bucket, 284 1.1 ozaki sc, sc_bucketq); 285 1.1 ozaki sc->sc_tp = NULL; 286 1.1 ozaki LIST_REMOVE(sc, sc_tpq); 287 1.1 ozaki tcp_syn_cache[sc->sc_bucketidx].sch_length--; 288 1.1 ozaki callout_stop(&sc->sc_timer); 289 1.1 ozaki syn_cache_count--; 290 1.1 ozaki } 291 1.1 ozaki 292 1.1 ozaki static inline void 293 1.1 ozaki syn_cache_put(struct syn_cache *sc) 294 1.1 ozaki { 295 1.1 ozaki if (sc->sc_ipopts) 296 1.1 ozaki (void) m_free(sc->sc_ipopts); 297 1.1 ozaki rtcache_free(&sc->sc_route); 298 1.1 ozaki sc->sc_flags |= SCF_DEAD; 299 1.1 ozaki if (!callout_invoking(&sc->sc_timer)) 300 1.1 ozaki callout_schedule(&(sc)->sc_timer, 1); 301 1.1 ozaki } 302 1.1 ozaki 303 1.1 ozaki void 304 1.1 ozaki syn_cache_init(void) 305 1.1 ozaki { 306 1.1 ozaki int i; 307 1.1 ozaki 308 1.1 ozaki pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0, 309 1.1 ozaki "synpl", NULL, IPL_SOFTNET); 310 1.1 ozaki 311 1.1 ozaki /* Initialize the hash buckets. */ 312 1.1 ozaki for (i = 0; i < tcp_syn_cache_size; i++) 313 1.1 ozaki TAILQ_INIT(&tcp_syn_cache[i].sch_bucket); 314 1.1 ozaki } 315 1.1 ozaki 316 1.1 ozaki void 317 1.1 ozaki syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp) 318 1.1 ozaki { 319 1.1 ozaki struct syn_cache_head *scp; 320 1.1 ozaki struct syn_cache *sc2; 321 1.1 ozaki int s; 322 1.1 ozaki 323 1.1 ozaki /* 324 1.1 ozaki * If there are no entries in the hash table, reinitialize 325 1.1 ozaki * the hash secrets. 326 1.1 ozaki */ 327 1.1 ozaki if (syn_cache_count == 0) { 328 1.1 ozaki syn_hash1 = cprng_fast32(); 329 1.1 ozaki syn_hash2 = cprng_fast32(); 330 1.1 ozaki } 331 1.1 ozaki 332 1.1 ozaki SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa); 333 1.1 ozaki sc->sc_bucketidx = sc->sc_hash % tcp_syn_cache_size; 334 1.1 ozaki scp = &tcp_syn_cache[sc->sc_bucketidx]; 335 1.1 ozaki 336 1.1 ozaki /* 337 1.1 ozaki * Make sure that we don't overflow the per-bucket 338 1.1 ozaki * limit or the total cache size limit. 339 1.1 ozaki */ 340 1.1 ozaki s = splsoftnet(); 341 1.1 ozaki if (scp->sch_length >= tcp_syn_bucket_limit) { 342 1.1 ozaki TCP_STATINC(TCP_STAT_SC_BUCKETOVERFLOW); 343 1.1 ozaki /* 344 1.1 ozaki * The bucket is full. Toss the oldest element in the 345 1.1 ozaki * bucket. This will be the first entry in the bucket. 346 1.1 ozaki */ 347 1.1 ozaki sc2 = TAILQ_FIRST(&scp->sch_bucket); 348 1.1 ozaki #ifdef DIAGNOSTIC 349 1.1 ozaki /* 350 1.1 ozaki * This should never happen; we should always find an 351 1.1 ozaki * entry in our bucket. 352 1.1 ozaki */ 353 1.1 ozaki if (sc2 == NULL) 354 1.1 ozaki panic("syn_cache_insert: bucketoverflow: impossible"); 355 1.1 ozaki #endif 356 1.1 ozaki syn_cache_rm(sc2); 357 1.1 ozaki syn_cache_put(sc2); /* calls pool_put but see spl above */ 358 1.1 ozaki } else if (syn_cache_count >= tcp_syn_cache_limit) { 359 1.1 ozaki struct syn_cache_head *scp2, *sce; 360 1.1 ozaki 361 1.1 ozaki TCP_STATINC(TCP_STAT_SC_OVERFLOWED); 362 1.1 ozaki /* 363 1.1 ozaki * The cache is full. Toss the oldest entry in the 364 1.1 ozaki * first non-empty bucket we can find. 365 1.1 ozaki * 366 1.1 ozaki * XXX We would really like to toss the oldest 367 1.1 ozaki * entry in the cache, but we hope that this 368 1.1 ozaki * condition doesn't happen very often. 369 1.1 ozaki */ 370 1.1 ozaki scp2 = scp; 371 1.1 ozaki if (TAILQ_EMPTY(&scp2->sch_bucket)) { 372 1.1 ozaki sce = &tcp_syn_cache[tcp_syn_cache_size]; 373 1.1 ozaki for (++scp2; scp2 != scp; scp2++) { 374 1.1 ozaki if (scp2 >= sce) 375 1.1 ozaki scp2 = &tcp_syn_cache[0]; 376 1.1 ozaki if (! TAILQ_EMPTY(&scp2->sch_bucket)) 377 1.1 ozaki break; 378 1.1 ozaki } 379 1.1 ozaki #ifdef DIAGNOSTIC 380 1.1 ozaki /* 381 1.1 ozaki * This should never happen; we should always find a 382 1.1 ozaki * non-empty bucket. 383 1.1 ozaki */ 384 1.1 ozaki if (scp2 == scp) 385 1.1 ozaki panic("syn_cache_insert: cacheoverflow: " 386 1.1 ozaki "impossible"); 387 1.1 ozaki #endif 388 1.1 ozaki } 389 1.1 ozaki sc2 = TAILQ_FIRST(&scp2->sch_bucket); 390 1.1 ozaki syn_cache_rm(sc2); 391 1.1 ozaki syn_cache_put(sc2); /* calls pool_put but see spl above */ 392 1.1 ozaki } 393 1.1 ozaki 394 1.1 ozaki /* 395 1.1 ozaki * Initialize the entry's timer. 396 1.1 ozaki */ 397 1.1 ozaki sc->sc_rxttot = 0; 398 1.1 ozaki sc->sc_rxtshift = 0; 399 1.1 ozaki syn_cache_timer_arm(sc); 400 1.1 ozaki 401 1.1 ozaki /* Link it from tcpcb entry */ 402 1.1 ozaki LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq); 403 1.1 ozaki 404 1.1 ozaki /* Put it into the bucket. */ 405 1.1 ozaki TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq); 406 1.1 ozaki scp->sch_length++; 407 1.1 ozaki syn_cache_count++; 408 1.1 ozaki 409 1.1 ozaki TCP_STATINC(TCP_STAT_SC_ADDED); 410 1.1 ozaki splx(s); 411 1.1 ozaki } 412 1.1 ozaki 413 1.1 ozaki /* 414 1.1 ozaki * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. 415 1.1 ozaki * If we have retransmitted an entry the maximum number of times, expire 416 1.1 ozaki * that entry. 417 1.1 ozaki */ 418 1.1 ozaki static void 419 1.1 ozaki syn_cache_timer(void *arg) 420 1.1 ozaki { 421 1.1 ozaki struct syn_cache *sc = arg; 422 1.1 ozaki 423 1.1 ozaki mutex_enter(softnet_lock); 424 1.1 ozaki KERNEL_LOCK(1, NULL); 425 1.1 ozaki 426 1.1 ozaki callout_ack(&sc->sc_timer); 427 1.1 ozaki 428 1.1 ozaki if (__predict_false(sc->sc_flags & SCF_DEAD)) { 429 1.1 ozaki TCP_STATINC(TCP_STAT_SC_DELAYED_FREE); 430 1.1 ozaki goto free; 431 1.1 ozaki } 432 1.1 ozaki 433 1.1 ozaki if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) { 434 1.1 ozaki /* Drop it -- too many retransmissions. */ 435 1.1 ozaki goto dropit; 436 1.1 ozaki } 437 1.1 ozaki 438 1.1 ozaki /* 439 1.1 ozaki * Compute the total amount of time this entry has 440 1.1 ozaki * been on a queue. If this entry has been on longer 441 1.1 ozaki * than the keep alive timer would allow, expire it. 442 1.1 ozaki */ 443 1.1 ozaki sc->sc_rxttot += sc->sc_rxtcur; 444 1.1 ozaki if (sc->sc_rxttot >= MIN(tcp_keepinit, TCP_TIMER_MAXTICKS)) 445 1.1 ozaki goto dropit; 446 1.1 ozaki 447 1.1 ozaki TCP_STATINC(TCP_STAT_SC_RETRANSMITTED); 448 1.1 ozaki (void)syn_cache_respond(sc); 449 1.1 ozaki 450 1.1 ozaki /* Advance the timer back-off. */ 451 1.1 ozaki sc->sc_rxtshift++; 452 1.1 ozaki syn_cache_timer_arm(sc); 453 1.1 ozaki 454 1.1 ozaki goto out; 455 1.1 ozaki 456 1.1 ozaki dropit: 457 1.1 ozaki TCP_STATINC(TCP_STAT_SC_TIMED_OUT); 458 1.1 ozaki syn_cache_rm(sc); 459 1.1 ozaki if (sc->sc_ipopts) 460 1.1 ozaki (void) m_free(sc->sc_ipopts); 461 1.1 ozaki rtcache_free(&sc->sc_route); 462 1.1 ozaki 463 1.1 ozaki free: 464 1.1 ozaki callout_destroy(&sc->sc_timer); 465 1.1 ozaki pool_put(&syn_cache_pool, sc); 466 1.1 ozaki 467 1.1 ozaki out: 468 1.1 ozaki KERNEL_UNLOCK_ONE(NULL); 469 1.1 ozaki mutex_exit(softnet_lock); 470 1.1 ozaki } 471 1.1 ozaki 472 1.1 ozaki /* 473 1.1 ozaki * Remove syn cache created by the specified tcb entry, 474 1.1 ozaki * because this does not make sense to keep them 475 1.1 ozaki * (if there's no tcb entry, syn cache entry will never be used) 476 1.1 ozaki */ 477 1.1 ozaki void 478 1.1 ozaki syn_cache_cleanup(struct tcpcb *tp) 479 1.1 ozaki { 480 1.1 ozaki struct syn_cache *sc, *nsc; 481 1.1 ozaki int s; 482 1.1 ozaki 483 1.1 ozaki s = splsoftnet(); 484 1.1 ozaki 485 1.1 ozaki for (sc = LIST_FIRST(&tp->t_sc); sc != NULL; sc = nsc) { 486 1.1 ozaki nsc = LIST_NEXT(sc, sc_tpq); 487 1.1 ozaki 488 1.1 ozaki #ifdef DIAGNOSTIC 489 1.1 ozaki if (sc->sc_tp != tp) 490 1.1 ozaki panic("invalid sc_tp in syn_cache_cleanup"); 491 1.1 ozaki #endif 492 1.1 ozaki syn_cache_rm(sc); 493 1.1 ozaki syn_cache_put(sc); /* calls pool_put but see spl above */ 494 1.1 ozaki } 495 1.1 ozaki /* just for safety */ 496 1.1 ozaki LIST_INIT(&tp->t_sc); 497 1.1 ozaki 498 1.1 ozaki splx(s); 499 1.1 ozaki } 500 1.1 ozaki 501 1.1 ozaki /* 502 1.1 ozaki * Find an entry in the syn cache. 503 1.1 ozaki */ 504 1.2 ozaki static struct syn_cache * 505 1.1 ozaki syn_cache_lookup(const struct sockaddr *src, const struct sockaddr *dst, 506 1.1 ozaki struct syn_cache_head **headp) 507 1.1 ozaki { 508 1.1 ozaki struct syn_cache *sc; 509 1.1 ozaki struct syn_cache_head *scp; 510 1.1 ozaki u_int32_t hash; 511 1.1 ozaki int s; 512 1.1 ozaki 513 1.1 ozaki SYN_HASHALL(hash, src, dst); 514 1.1 ozaki 515 1.1 ozaki scp = &tcp_syn_cache[hash % tcp_syn_cache_size]; 516 1.1 ozaki *headp = scp; 517 1.1 ozaki s = splsoftnet(); 518 1.1 ozaki for (sc = TAILQ_FIRST(&scp->sch_bucket); sc != NULL; 519 1.1 ozaki sc = TAILQ_NEXT(sc, sc_bucketq)) { 520 1.1 ozaki if (sc->sc_hash != hash) 521 1.1 ozaki continue; 522 1.1 ozaki if (!memcmp(&sc->sc_src, src, src->sa_len) && 523 1.1 ozaki !memcmp(&sc->sc_dst, dst, dst->sa_len)) { 524 1.1 ozaki splx(s); 525 1.1 ozaki return (sc); 526 1.1 ozaki } 527 1.1 ozaki } 528 1.1 ozaki splx(s); 529 1.1 ozaki return (NULL); 530 1.1 ozaki } 531 1.1 ozaki 532 1.1 ozaki /* 533 1.1 ozaki * This function gets called when we receive an ACK for a socket in the 534 1.1 ozaki * LISTEN state. We look up the connection in the syn cache, and if it's 535 1.1 ozaki * there, we pull it out of the cache and turn it into a full-blown 536 1.1 ozaki * connection in the SYN-RECEIVED state. 537 1.1 ozaki * 538 1.1 ozaki * The return values may not be immediately obvious, and their effects 539 1.1 ozaki * can be subtle, so here they are: 540 1.1 ozaki * 541 1.1 ozaki * NULL SYN was not found in cache; caller should drop the 542 1.1 ozaki * packet and send an RST. 543 1.1 ozaki * 544 1.1 ozaki * -1 We were unable to create the new connection, and are 545 1.1 ozaki * aborting it. An ACK,RST is being sent to the peer 546 1.1 ozaki * (unless we got screwey sequence numbers; see below), 547 1.1 ozaki * because the 3-way handshake has been completed. Caller 548 1.1 ozaki * should not free the mbuf, since we may be using it. If 549 1.1 ozaki * we are not, we will free it. 550 1.1 ozaki * 551 1.1 ozaki * Otherwise, the return value is a pointer to the new socket 552 1.1 ozaki * associated with the connection. 553 1.1 ozaki */ 554 1.1 ozaki struct socket * 555 1.1 ozaki syn_cache_get(struct sockaddr *src, struct sockaddr *dst, 556 1.1 ozaki struct tcphdr *th, struct socket *so, struct mbuf *m) 557 1.1 ozaki { 558 1.1 ozaki struct syn_cache *sc; 559 1.1 ozaki struct syn_cache_head *scp; 560 1.1 ozaki struct inpcb *inp = NULL; 561 1.1 ozaki struct tcpcb *tp; 562 1.1 ozaki int s; 563 1.1 ozaki struct socket *oso; 564 1.1 ozaki 565 1.1 ozaki s = splsoftnet(); 566 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) { 567 1.1 ozaki splx(s); 568 1.1 ozaki return NULL; 569 1.1 ozaki } 570 1.1 ozaki 571 1.1 ozaki /* 572 1.1 ozaki * Verify the sequence and ack numbers. Try getting the correct 573 1.1 ozaki * response again. 574 1.1 ozaki */ 575 1.1 ozaki if ((th->th_ack != sc->sc_iss + 1) || 576 1.1 ozaki SEQ_LEQ(th->th_seq, sc->sc_irs) || 577 1.1 ozaki SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) { 578 1.1 ozaki m_freem(m); 579 1.1 ozaki (void)syn_cache_respond(sc); 580 1.1 ozaki splx(s); 581 1.1 ozaki return ((struct socket *)(-1)); 582 1.1 ozaki } 583 1.1 ozaki 584 1.1 ozaki /* Remove this cache entry */ 585 1.1 ozaki syn_cache_rm(sc); 586 1.1 ozaki splx(s); 587 1.1 ozaki 588 1.1 ozaki /* 589 1.1 ozaki * Ok, create the full blown connection, and set things up 590 1.1 ozaki * as they would have been set up if we had created the 591 1.1 ozaki * connection when the SYN arrived. If we can't create 592 1.1 ozaki * the connection, abort it. 593 1.1 ozaki */ 594 1.1 ozaki /* 595 1.1 ozaki * inp still has the OLD in_pcb stuff, set the 596 1.1 ozaki * v6-related flags on the new guy, too. This is 597 1.1 ozaki * done particularly for the case where an AF_INET6 598 1.1 ozaki * socket is bound only to a port, and a v4 connection 599 1.1 ozaki * comes in on that port. 600 1.1 ozaki * we also copy the flowinfo from the original pcb 601 1.1 ozaki * to the new one. 602 1.1 ozaki */ 603 1.1 ozaki oso = so; 604 1.1 ozaki so = sonewconn(so, true); 605 1.1 ozaki if (so == NULL) 606 1.1 ozaki goto resetandabort; 607 1.1 ozaki 608 1.3 ozaki inp = sotoinpcb(so); 609 1.1 ozaki 610 1.1 ozaki switch (src->sa_family) { 611 1.1 ozaki case AF_INET: 612 1.3 ozaki if (inp->inp_af == AF_INET) { 613 1.4 ozaki in4p_laddr(inp) = ((struct sockaddr_in *)dst)->sin_addr; 614 1.1 ozaki inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port; 615 1.1 ozaki inp->inp_options = ip_srcroute(m); 616 1.5 ozaki inpcb_set_state(inp, INP_BOUND); 617 1.1 ozaki if (inp->inp_options == NULL) { 618 1.1 ozaki inp->inp_options = sc->sc_ipopts; 619 1.1 ozaki sc->sc_ipopts = NULL; 620 1.1 ozaki } 621 1.1 ozaki } 622 1.1 ozaki #ifdef INET6 623 1.3 ozaki else if (inp->inp_af == AF_INET6) { 624 1.1 ozaki /* IPv4 packet to AF_INET6 socket */ 625 1.4 ozaki memset(&in6p_laddr(inp), 0, sizeof(in6p_laddr(inp))); 626 1.4 ozaki in6p_laddr(inp).s6_addr16[5] = htons(0xffff); 627 1.1 ozaki bcopy(&((struct sockaddr_in *)dst)->sin_addr, 628 1.4 ozaki &in6p_laddr(inp).s6_addr32[3], 629 1.1 ozaki sizeof(((struct sockaddr_in *)dst)->sin_addr)); 630 1.3 ozaki inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port; 631 1.3 ozaki intotcpcb(inp)->t_family = AF_INET; 632 1.3 ozaki if (sotoinpcb(oso)->inp_flags & IN6P_IPV6_V6ONLY) 633 1.3 ozaki inp->inp_flags |= IN6P_IPV6_V6ONLY; 634 1.1 ozaki else 635 1.3 ozaki inp->inp_flags &= ~IN6P_IPV6_V6ONLY; 636 1.5 ozaki inpcb_set_state(inp, INP_BOUND); 637 1.1 ozaki } 638 1.1 ozaki #endif 639 1.1 ozaki break; 640 1.1 ozaki #ifdef INET6 641 1.1 ozaki case AF_INET6: 642 1.3 ozaki if (inp->inp_af == AF_INET6) { 643 1.4 ozaki in6p_laddr(inp) = ((struct sockaddr_in6 *)dst)->sin6_addr; 644 1.3 ozaki inp->inp_lport = ((struct sockaddr_in6 *)dst)->sin6_port; 645 1.5 ozaki inpcb_set_state(inp, INP_BOUND); 646 1.1 ozaki } 647 1.1 ozaki break; 648 1.1 ozaki #endif 649 1.1 ozaki } 650 1.1 ozaki 651 1.1 ozaki #ifdef INET6 652 1.3 ozaki if (inp && intotcpcb(inp)->t_family == AF_INET6 && sotoinpcb(oso)) { 653 1.3 ozaki struct inpcb *oinp = sotoinpcb(oso); 654 1.1 ozaki /* inherit socket options from the listening socket */ 655 1.3 ozaki inp->inp_flags |= (oinp->inp_flags & IN6P_CONTROLOPTS); 656 1.3 ozaki if (inp->inp_flags & IN6P_CONTROLOPTS) { 657 1.3 ozaki m_freem(inp->inp_options); 658 1.3 ozaki inp->inp_options = NULL; 659 1.1 ozaki } 660 1.3 ozaki ip6_savecontrol(inp, &inp->inp_options, 661 1.1 ozaki mtod(m, struct ip6_hdr *), m); 662 1.1 ozaki } 663 1.1 ozaki #endif 664 1.1 ozaki 665 1.1 ozaki /* 666 1.1 ozaki * Give the new socket our cached route reference. 667 1.1 ozaki */ 668 1.3 ozaki rtcache_copy(&inp->inp_route, &sc->sc_route); 669 1.3 ozaki rtcache_free(&sc->sc_route); 670 1.1 ozaki 671 1.3 ozaki if (inp->inp_af == AF_INET) { 672 1.1 ozaki struct sockaddr_in sin; 673 1.1 ozaki memcpy(&sin, src, src->sa_len); 674 1.5 ozaki if (inpcb_connect(inp, &sin, &lwp0)) { 675 1.1 ozaki goto resetandabort; 676 1.1 ozaki } 677 1.1 ozaki } 678 1.1 ozaki #ifdef INET6 679 1.3 ozaki else if (inp->inp_af == AF_INET6) { 680 1.1 ozaki struct sockaddr_in6 sin6; 681 1.1 ozaki memcpy(&sin6, src, src->sa_len); 682 1.1 ozaki if (src->sa_family == AF_INET) { 683 1.1 ozaki /* IPv4 packet to AF_INET6 socket */ 684 1.1 ozaki in6_sin_2_v4mapsin6((struct sockaddr_in *)src, &sin6); 685 1.1 ozaki } 686 1.6 ozaki if (in6pcb_connect(inp, &sin6, NULL)) { 687 1.1 ozaki goto resetandabort; 688 1.1 ozaki } 689 1.1 ozaki } 690 1.1 ozaki #endif 691 1.1 ozaki else { 692 1.1 ozaki goto resetandabort; 693 1.1 ozaki } 694 1.1 ozaki 695 1.3 ozaki tp = intotcpcb(inp); 696 1.1 ozaki 697 1.1 ozaki tp->t_flags = sototcpcb(oso)->t_flags & TF_NODELAY; 698 1.1 ozaki if (sc->sc_request_r_scale != 15) { 699 1.1 ozaki tp->requested_s_scale = sc->sc_requested_s_scale; 700 1.1 ozaki tp->request_r_scale = sc->sc_request_r_scale; 701 1.1 ozaki tp->snd_scale = sc->sc_requested_s_scale; 702 1.1 ozaki tp->rcv_scale = sc->sc_request_r_scale; 703 1.1 ozaki tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE; 704 1.1 ozaki } 705 1.1 ozaki if (sc->sc_flags & SCF_TIMESTAMP) 706 1.1 ozaki tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; 707 1.1 ozaki tp->ts_timebase = sc->sc_timebase; 708 1.1 ozaki 709 1.1 ozaki tp->t_template = tcp_template(tp); 710 1.1 ozaki if (tp->t_template == 0) { 711 1.1 ozaki tp = tcp_drop(tp, ENOBUFS); /* destroys socket */ 712 1.1 ozaki so = NULL; 713 1.1 ozaki m_freem(m); 714 1.1 ozaki goto abort; 715 1.1 ozaki } 716 1.1 ozaki 717 1.1 ozaki tp->iss = sc->sc_iss; 718 1.1 ozaki tp->irs = sc->sc_irs; 719 1.1 ozaki tcp_sendseqinit(tp); 720 1.1 ozaki tcp_rcvseqinit(tp); 721 1.1 ozaki tp->t_state = TCPS_SYN_RECEIVED; 722 1.1 ozaki TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 723 1.1 ozaki TCP_STATINC(TCP_STAT_ACCEPTS); 724 1.1 ozaki 725 1.1 ozaki if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack) 726 1.1 ozaki tp->t_flags |= TF_WILL_SACK; 727 1.1 ozaki 728 1.1 ozaki if ((sc->sc_flags & SCF_ECN_PERMIT) && tcp_do_ecn) 729 1.1 ozaki tp->t_flags |= TF_ECN_PERMIT; 730 1.1 ozaki 731 1.1 ozaki #ifdef TCP_SIGNATURE 732 1.1 ozaki if (sc->sc_flags & SCF_SIGNATURE) 733 1.1 ozaki tp->t_flags |= TF_SIGNATURE; 734 1.1 ozaki #endif 735 1.1 ozaki 736 1.1 ozaki /* Initialize tp->t_ourmss before we deal with the peer's! */ 737 1.1 ozaki tp->t_ourmss = sc->sc_ourmaxseg; 738 1.1 ozaki tcp_mss_from_peer(tp, sc->sc_peermaxseg); 739 1.1 ozaki 740 1.1 ozaki /* 741 1.1 ozaki * Initialize the initial congestion window. If we 742 1.1 ozaki * had to retransmit the SYN,ACK, we must initialize cwnd 743 1.1 ozaki * to 1 segment (i.e. the Loss Window). 744 1.1 ozaki */ 745 1.1 ozaki if (sc->sc_rxtshift) 746 1.1 ozaki tp->snd_cwnd = tp->t_peermss; 747 1.1 ozaki else { 748 1.1 ozaki int ss = tcp_init_win; 749 1.4 ozaki if (inp->inp_af == AF_INET && in_localaddr(in4p_faddr(inp))) 750 1.1 ozaki ss = tcp_init_win_local; 751 1.1 ozaki #ifdef INET6 752 1.4 ozaki else if (inp->inp_af == AF_INET6 && in6_localaddr(&in6p_faddr(inp))) 753 1.1 ozaki ss = tcp_init_win_local; 754 1.1 ozaki #endif 755 1.1 ozaki tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss); 756 1.1 ozaki } 757 1.1 ozaki 758 1.1 ozaki tcp_rmx_rtt(tp); 759 1.1 ozaki tp->snd_wl1 = sc->sc_irs; 760 1.1 ozaki tp->rcv_up = sc->sc_irs + 1; 761 1.1 ozaki 762 1.1 ozaki /* 763 1.1 ozaki * This is what would have happened in tcp_output() when 764 1.1 ozaki * the SYN,ACK was sent. 765 1.1 ozaki */ 766 1.1 ozaki tp->snd_up = tp->snd_una; 767 1.1 ozaki tp->snd_max = tp->snd_nxt = tp->iss+1; 768 1.1 ozaki TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 769 1.1 ozaki if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv)) 770 1.1 ozaki tp->rcv_adv = tp->rcv_nxt + sc->sc_win; 771 1.1 ozaki tp->last_ack_sent = tp->rcv_nxt; 772 1.1 ozaki tp->t_partialacks = -1; 773 1.1 ozaki tp->t_dupacks = 0; 774 1.1 ozaki 775 1.1 ozaki TCP_STATINC(TCP_STAT_SC_COMPLETED); 776 1.1 ozaki s = splsoftnet(); 777 1.1 ozaki syn_cache_put(sc); 778 1.1 ozaki splx(s); 779 1.1 ozaki return so; 780 1.1 ozaki 781 1.1 ozaki resetandabort: 782 1.1 ozaki (void)tcp_respond(NULL, m, m, th, (tcp_seq)0, th->th_ack, TH_RST); 783 1.1 ozaki abort: 784 1.1 ozaki if (so != NULL) { 785 1.1 ozaki (void) soqremque(so, 1); 786 1.1 ozaki (void) soabort(so); 787 1.1 ozaki mutex_enter(softnet_lock); 788 1.1 ozaki } 789 1.1 ozaki s = splsoftnet(); 790 1.1 ozaki syn_cache_put(sc); 791 1.1 ozaki splx(s); 792 1.1 ozaki TCP_STATINC(TCP_STAT_SC_ABORTED); 793 1.1 ozaki return ((struct socket *)(-1)); 794 1.1 ozaki } 795 1.1 ozaki 796 1.1 ozaki /* 797 1.1 ozaki * This function is called when we get a RST for a 798 1.1 ozaki * non-existent connection, so that we can see if the 799 1.1 ozaki * connection is in the syn cache. If it is, zap it. 800 1.1 ozaki */ 801 1.1 ozaki 802 1.1 ozaki void 803 1.1 ozaki syn_cache_reset(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th) 804 1.1 ozaki { 805 1.1 ozaki struct syn_cache *sc; 806 1.1 ozaki struct syn_cache_head *scp; 807 1.1 ozaki int s = splsoftnet(); 808 1.1 ozaki 809 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) { 810 1.1 ozaki splx(s); 811 1.1 ozaki return; 812 1.1 ozaki } 813 1.1 ozaki if (SEQ_LT(th->th_seq, sc->sc_irs) || 814 1.1 ozaki SEQ_GT(th->th_seq, sc->sc_irs+1)) { 815 1.1 ozaki splx(s); 816 1.1 ozaki return; 817 1.1 ozaki } 818 1.1 ozaki syn_cache_rm(sc); 819 1.1 ozaki TCP_STATINC(TCP_STAT_SC_RESET); 820 1.1 ozaki syn_cache_put(sc); /* calls pool_put but see spl above */ 821 1.1 ozaki splx(s); 822 1.1 ozaki } 823 1.1 ozaki 824 1.1 ozaki void 825 1.1 ozaki syn_cache_unreach(const struct sockaddr *src, const struct sockaddr *dst, 826 1.1 ozaki struct tcphdr *th) 827 1.1 ozaki { 828 1.1 ozaki struct syn_cache *sc; 829 1.1 ozaki struct syn_cache_head *scp; 830 1.1 ozaki int s; 831 1.1 ozaki 832 1.1 ozaki s = splsoftnet(); 833 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) { 834 1.1 ozaki splx(s); 835 1.1 ozaki return; 836 1.1 ozaki } 837 1.1 ozaki /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ 838 1.1 ozaki if (ntohl(th->th_seq) != sc->sc_iss) { 839 1.1 ozaki splx(s); 840 1.1 ozaki return; 841 1.1 ozaki } 842 1.1 ozaki 843 1.1 ozaki /* 844 1.1 ozaki * If we've retransmitted 3 times and this is our second error, 845 1.1 ozaki * we remove the entry. Otherwise, we allow it to continue on. 846 1.1 ozaki * This prevents us from incorrectly nuking an entry during a 847 1.1 ozaki * spurious network outage. 848 1.1 ozaki * 849 1.1 ozaki * See tcp_notify(). 850 1.1 ozaki */ 851 1.1 ozaki if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) { 852 1.1 ozaki sc->sc_flags |= SCF_UNREACH; 853 1.1 ozaki splx(s); 854 1.1 ozaki return; 855 1.1 ozaki } 856 1.1 ozaki 857 1.1 ozaki syn_cache_rm(sc); 858 1.1 ozaki TCP_STATINC(TCP_STAT_SC_UNREACH); 859 1.1 ozaki syn_cache_put(sc); /* calls pool_put but see spl above */ 860 1.1 ozaki splx(s); 861 1.1 ozaki } 862 1.1 ozaki 863 1.1 ozaki /* 864 1.1 ozaki * Given a LISTEN socket and an inbound SYN request, add this to the syn 865 1.1 ozaki * cache, and send back a segment: 866 1.1 ozaki * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 867 1.1 ozaki * to the source. 868 1.1 ozaki * 869 1.1 ozaki * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN. 870 1.1 ozaki * Doing so would require that we hold onto the data and deliver it 871 1.1 ozaki * to the application. However, if we are the target of a SYN-flood 872 1.1 ozaki * DoS attack, an attacker could send data which would eventually 873 1.1 ozaki * consume all available buffer space if it were ACKed. By not ACKing 874 1.1 ozaki * the data, we avoid this DoS scenario. 875 1.1 ozaki */ 876 1.1 ozaki int 877 1.1 ozaki syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, 878 1.1 ozaki unsigned int toff, struct socket *so, struct mbuf *m, u_char *optp, 879 1.1 ozaki int optlen, struct tcp_opt_info *oi) 880 1.1 ozaki { 881 1.1 ozaki struct tcpcb tb, *tp; 882 1.1 ozaki long win; 883 1.1 ozaki struct syn_cache *sc; 884 1.1 ozaki struct syn_cache_head *scp; 885 1.1 ozaki struct mbuf *ipopts; 886 1.1 ozaki int s; 887 1.1 ozaki 888 1.1 ozaki tp = sototcpcb(so); 889 1.1 ozaki 890 1.1 ozaki /* 891 1.1 ozaki * Initialize some local state. 892 1.1 ozaki */ 893 1.1 ozaki win = sbspace(&so->so_rcv); 894 1.1 ozaki if (win > TCP_MAXWIN) 895 1.1 ozaki win = TCP_MAXWIN; 896 1.1 ozaki 897 1.1 ozaki #ifdef TCP_SIGNATURE 898 1.1 ozaki if (optp || (tp->t_flags & TF_SIGNATURE)) 899 1.1 ozaki #else 900 1.1 ozaki if (optp) 901 1.1 ozaki #endif 902 1.1 ozaki { 903 1.1 ozaki tb.t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; 904 1.1 ozaki #ifdef TCP_SIGNATURE 905 1.1 ozaki tb.t_flags |= (tp->t_flags & TF_SIGNATURE); 906 1.1 ozaki #endif 907 1.1 ozaki tb.t_state = TCPS_LISTEN; 908 1.1 ozaki if (tcp_dooptions(&tb, optp, optlen, th, m, toff, oi) < 0) 909 1.1 ozaki return 0; 910 1.1 ozaki } else 911 1.1 ozaki tb.t_flags = 0; 912 1.1 ozaki 913 1.1 ozaki switch (src->sa_family) { 914 1.1 ozaki case AF_INET: 915 1.1 ozaki /* Remember the IP options, if any. */ 916 1.1 ozaki ipopts = ip_srcroute(m); 917 1.1 ozaki break; 918 1.1 ozaki default: 919 1.1 ozaki ipopts = NULL; 920 1.1 ozaki } 921 1.1 ozaki 922 1.1 ozaki /* 923 1.1 ozaki * See if we already have an entry for this connection. 924 1.1 ozaki * If we do, resend the SYN,ACK. We do not count this 925 1.1 ozaki * as a retransmission (XXX though maybe we should). 926 1.1 ozaki */ 927 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) != NULL) { 928 1.1 ozaki TCP_STATINC(TCP_STAT_SC_DUPESYN); 929 1.1 ozaki if (ipopts) { 930 1.1 ozaki /* 931 1.1 ozaki * If we were remembering a previous source route, 932 1.1 ozaki * forget it and use the new one we've been given. 933 1.1 ozaki */ 934 1.1 ozaki if (sc->sc_ipopts) 935 1.1 ozaki (void)m_free(sc->sc_ipopts); 936 1.1 ozaki sc->sc_ipopts = ipopts; 937 1.1 ozaki } 938 1.1 ozaki sc->sc_timestamp = tb.ts_recent; 939 1.1 ozaki m_freem(m); 940 1.1 ozaki if (syn_cache_respond(sc) == 0) { 941 1.7 riastrad net_stat_ref_t tcps = TCP_STAT_GETREF(); 942 1.7 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDACKS); 943 1.7 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDTOTAL); 944 1.1 ozaki TCP_STAT_PUTREF(); 945 1.1 ozaki } 946 1.1 ozaki return 1; 947 1.1 ozaki } 948 1.1 ozaki 949 1.1 ozaki s = splsoftnet(); 950 1.1 ozaki sc = pool_get(&syn_cache_pool, PR_NOWAIT); 951 1.1 ozaki splx(s); 952 1.1 ozaki if (sc == NULL) { 953 1.1 ozaki if (ipopts) 954 1.1 ozaki (void)m_free(ipopts); 955 1.1 ozaki return 0; 956 1.1 ozaki } 957 1.1 ozaki 958 1.1 ozaki /* 959 1.1 ozaki * Fill in the cache, and put the necessary IP and TCP 960 1.1 ozaki * options into the reply. 961 1.1 ozaki */ 962 1.1 ozaki memset(sc, 0, sizeof(struct syn_cache)); 963 1.1 ozaki callout_init(&sc->sc_timer, CALLOUT_MPSAFE); 964 1.1 ozaki memcpy(&sc->sc_src, src, src->sa_len); 965 1.1 ozaki memcpy(&sc->sc_dst, dst, dst->sa_len); 966 1.1 ozaki sc->sc_flags = 0; 967 1.1 ozaki sc->sc_ipopts = ipopts; 968 1.1 ozaki sc->sc_irs = th->th_seq; 969 1.1 ozaki switch (src->sa_family) { 970 1.1 ozaki case AF_INET: 971 1.1 ozaki { 972 1.1 ozaki struct sockaddr_in *srcin = (void *)src; 973 1.1 ozaki struct sockaddr_in *dstin = (void *)dst; 974 1.1 ozaki 975 1.1 ozaki sc->sc_iss = tcp_new_iss1(&dstin->sin_addr, 976 1.1 ozaki &srcin->sin_addr, dstin->sin_port, 977 1.1 ozaki srcin->sin_port, sizeof(dstin->sin_addr)); 978 1.1 ozaki break; 979 1.1 ozaki } 980 1.1 ozaki #ifdef INET6 981 1.1 ozaki case AF_INET6: 982 1.1 ozaki { 983 1.1 ozaki struct sockaddr_in6 *srcin6 = (void *)src; 984 1.1 ozaki struct sockaddr_in6 *dstin6 = (void *)dst; 985 1.1 ozaki 986 1.1 ozaki sc->sc_iss = tcp_new_iss1(&dstin6->sin6_addr, 987 1.1 ozaki &srcin6->sin6_addr, dstin6->sin6_port, 988 1.1 ozaki srcin6->sin6_port, sizeof(dstin6->sin6_addr)); 989 1.1 ozaki break; 990 1.1 ozaki } 991 1.1 ozaki #endif 992 1.1 ozaki } 993 1.1 ozaki sc->sc_peermaxseg = oi->maxseg; 994 1.1 ozaki sc->sc_ourmaxseg = tcp_mss_to_advertise(m->m_flags & M_PKTHDR ? 995 1.1 ozaki m_get_rcvif_NOMPSAFE(m) : NULL, sc->sc_src.sa.sa_family); 996 1.1 ozaki sc->sc_win = win; 997 1.1 ozaki sc->sc_timebase = tcp_now - 1; /* see tcp_newtcpcb() */ 998 1.1 ozaki sc->sc_timestamp = tb.ts_recent; 999 1.1 ozaki if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) == 1000 1.1 ozaki (TF_REQ_TSTMP|TF_RCVD_TSTMP)) 1001 1.1 ozaki sc->sc_flags |= SCF_TIMESTAMP; 1002 1.1 ozaki if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 1003 1.1 ozaki (TF_RCVD_SCALE|TF_REQ_SCALE)) { 1004 1.1 ozaki sc->sc_requested_s_scale = tb.requested_s_scale; 1005 1.1 ozaki sc->sc_request_r_scale = 0; 1006 1.1 ozaki /* 1007 1.1 ozaki * Pick the smallest possible scaling factor that 1008 1.1 ozaki * will still allow us to scale up to sb_max. 1009 1.1 ozaki * 1010 1.1 ozaki * We do this because there are broken firewalls that 1011 1.1 ozaki * will corrupt the window scale option, leading to 1012 1.1 ozaki * the other endpoint believing that our advertised 1013 1.1 ozaki * window is unscaled. At scale factors larger than 1014 1.1 ozaki * 5 the unscaled window will drop below 1500 bytes, 1015 1.1 ozaki * leading to serious problems when traversing these 1016 1.1 ozaki * broken firewalls. 1017 1.1 ozaki * 1018 1.1 ozaki * With the default sbmax of 256K, a scale factor 1019 1.1 ozaki * of 3 will be chosen by this algorithm. Those who 1020 1.1 ozaki * choose a larger sbmax should watch out 1021 1.1 ozaki * for the compatibility problems mentioned above. 1022 1.1 ozaki * 1023 1.1 ozaki * RFC1323: The Window field in a SYN (i.e., a <SYN> 1024 1.1 ozaki * or <SYN,ACK>) segment itself is never scaled. 1025 1.1 ozaki */ 1026 1.1 ozaki while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT && 1027 1.1 ozaki (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max) 1028 1.1 ozaki sc->sc_request_r_scale++; 1029 1.1 ozaki } else { 1030 1.1 ozaki sc->sc_requested_s_scale = 15; 1031 1.1 ozaki sc->sc_request_r_scale = 15; 1032 1.1 ozaki } 1033 1.1 ozaki if ((tb.t_flags & TF_SACK_PERMIT) && tcp_do_sack) 1034 1.1 ozaki sc->sc_flags |= SCF_SACK_PERMIT; 1035 1.1 ozaki 1036 1.1 ozaki /* 1037 1.1 ozaki * ECN setup packet received. 1038 1.1 ozaki */ 1039 1.1 ozaki if ((th->th_flags & (TH_ECE|TH_CWR)) && tcp_do_ecn) 1040 1.1 ozaki sc->sc_flags |= SCF_ECN_PERMIT; 1041 1.1 ozaki 1042 1.1 ozaki #ifdef TCP_SIGNATURE 1043 1.1 ozaki if (tb.t_flags & TF_SIGNATURE) 1044 1.1 ozaki sc->sc_flags |= SCF_SIGNATURE; 1045 1.1 ozaki #endif 1046 1.1 ozaki sc->sc_tp = tp; 1047 1.1 ozaki m_freem(m); 1048 1.1 ozaki if (syn_cache_respond(sc) == 0) { 1049 1.7 riastrad net_stat_ref_t tcps = TCP_STAT_GETREF(); 1050 1.7 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDACKS); 1051 1.7 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDTOTAL); 1052 1.1 ozaki TCP_STAT_PUTREF(); 1053 1.1 ozaki syn_cache_insert(sc, tp); 1054 1.1 ozaki } else { 1055 1.1 ozaki s = splsoftnet(); 1056 1.1 ozaki /* 1057 1.1 ozaki * syn_cache_put() will try to schedule the timer, so 1058 1.1 ozaki * we need to initialize it 1059 1.1 ozaki */ 1060 1.1 ozaki syn_cache_timer_arm(sc); 1061 1.1 ozaki syn_cache_put(sc); 1062 1.1 ozaki splx(s); 1063 1.1 ozaki TCP_STATINC(TCP_STAT_SC_DROPPED); 1064 1.1 ozaki } 1065 1.1 ozaki return 1; 1066 1.1 ozaki } 1067 1.1 ozaki 1068 1.1 ozaki /* 1069 1.1 ozaki * syn_cache_respond: (re)send SYN+ACK. 1070 1.1 ozaki * 1071 1.1 ozaki * Returns 0 on success. 1072 1.1 ozaki */ 1073 1.1 ozaki 1074 1.2 ozaki static int 1075 1.1 ozaki syn_cache_respond(struct syn_cache *sc) 1076 1.1 ozaki { 1077 1.1 ozaki #ifdef INET6 1078 1.1 ozaki struct rtentry *rt = NULL; 1079 1.1 ozaki #endif 1080 1.1 ozaki struct route *ro; 1081 1.1 ozaki u_int8_t *optp; 1082 1.1 ozaki int optlen, error; 1083 1.1 ozaki u_int16_t tlen; 1084 1.1 ozaki struct ip *ip = NULL; 1085 1.1 ozaki #ifdef INET6 1086 1.1 ozaki struct ip6_hdr *ip6 = NULL; 1087 1.1 ozaki #endif 1088 1.1 ozaki struct tcpcb *tp; 1089 1.1 ozaki struct tcphdr *th; 1090 1.1 ozaki struct mbuf *m; 1091 1.1 ozaki u_int hlen; 1092 1.1 ozaki #ifdef TCP_SIGNATURE 1093 1.1 ozaki struct secasvar *sav = NULL; 1094 1.1 ozaki u_int8_t *sigp = NULL; 1095 1.1 ozaki #endif 1096 1.1 ozaki 1097 1.1 ozaki ro = &sc->sc_route; 1098 1.1 ozaki switch (sc->sc_src.sa.sa_family) { 1099 1.1 ozaki case AF_INET: 1100 1.1 ozaki hlen = sizeof(struct ip); 1101 1.1 ozaki break; 1102 1.1 ozaki #ifdef INET6 1103 1.1 ozaki case AF_INET6: 1104 1.1 ozaki hlen = sizeof(struct ip6_hdr); 1105 1.1 ozaki break; 1106 1.1 ozaki #endif 1107 1.1 ozaki default: 1108 1.1 ozaki return EAFNOSUPPORT; 1109 1.1 ozaki } 1110 1.1 ozaki 1111 1.1 ozaki /* Worst case scenario, since we don't know the option size yet. */ 1112 1.1 ozaki tlen = hlen + sizeof(struct tcphdr) + MAX_TCPOPTLEN; 1113 1.1 ozaki KASSERT(max_linkhdr + tlen <= MCLBYTES); 1114 1.1 ozaki 1115 1.1 ozaki /* 1116 1.1 ozaki * Create the IP+TCP header from scratch. 1117 1.1 ozaki */ 1118 1.1 ozaki MGETHDR(m, M_DONTWAIT, MT_DATA); 1119 1.1 ozaki if (m && (max_linkhdr + tlen) > MHLEN) { 1120 1.1 ozaki MCLGET(m, M_DONTWAIT); 1121 1.1 ozaki if ((m->m_flags & M_EXT) == 0) { 1122 1.1 ozaki m_freem(m); 1123 1.1 ozaki m = NULL; 1124 1.1 ozaki } 1125 1.1 ozaki } 1126 1.1 ozaki if (m == NULL) 1127 1.1 ozaki return ENOBUFS; 1128 1.1 ozaki MCLAIM(m, &tcp_tx_mowner); 1129 1.1 ozaki 1130 1.1 ozaki tp = sc->sc_tp; 1131 1.1 ozaki 1132 1.1 ozaki /* Fixup the mbuf. */ 1133 1.1 ozaki m->m_data += max_linkhdr; 1134 1.1 ozaki m_reset_rcvif(m); 1135 1.1 ozaki memset(mtod(m, void *), 0, tlen); 1136 1.1 ozaki 1137 1.1 ozaki switch (sc->sc_src.sa.sa_family) { 1138 1.1 ozaki case AF_INET: 1139 1.1 ozaki ip = mtod(m, struct ip *); 1140 1.1 ozaki ip->ip_v = 4; 1141 1.1 ozaki ip->ip_dst = sc->sc_src.sin.sin_addr; 1142 1.1 ozaki ip->ip_src = sc->sc_dst.sin.sin_addr; 1143 1.1 ozaki ip->ip_p = IPPROTO_TCP; 1144 1.1 ozaki th = (struct tcphdr *)(ip + 1); 1145 1.1 ozaki th->th_dport = sc->sc_src.sin.sin_port; 1146 1.1 ozaki th->th_sport = sc->sc_dst.sin.sin_port; 1147 1.1 ozaki break; 1148 1.1 ozaki #ifdef INET6 1149 1.1 ozaki case AF_INET6: 1150 1.1 ozaki ip6 = mtod(m, struct ip6_hdr *); 1151 1.1 ozaki ip6->ip6_vfc = IPV6_VERSION; 1152 1.1 ozaki ip6->ip6_dst = sc->sc_src.sin6.sin6_addr; 1153 1.1 ozaki ip6->ip6_src = sc->sc_dst.sin6.sin6_addr; 1154 1.1 ozaki ip6->ip6_nxt = IPPROTO_TCP; 1155 1.1 ozaki /* ip6_plen will be updated in ip6_output() */ 1156 1.1 ozaki th = (struct tcphdr *)(ip6 + 1); 1157 1.1 ozaki th->th_dport = sc->sc_src.sin6.sin6_port; 1158 1.1 ozaki th->th_sport = sc->sc_dst.sin6.sin6_port; 1159 1.1 ozaki break; 1160 1.1 ozaki #endif 1161 1.1 ozaki default: 1162 1.1 ozaki panic("%s: impossible (1)", __func__); 1163 1.1 ozaki } 1164 1.1 ozaki 1165 1.1 ozaki th->th_seq = htonl(sc->sc_iss); 1166 1.1 ozaki th->th_ack = htonl(sc->sc_irs + 1); 1167 1.1 ozaki th->th_flags = TH_SYN|TH_ACK; 1168 1.1 ozaki th->th_win = htons(sc->sc_win); 1169 1.1 ozaki /* th_x2, th_sum, th_urp already 0 from memset */ 1170 1.1 ozaki 1171 1.1 ozaki /* Tack on the TCP options. */ 1172 1.1 ozaki optp = (u_int8_t *)(th + 1); 1173 1.1 ozaki optlen = 0; 1174 1.1 ozaki *optp++ = TCPOPT_MAXSEG; 1175 1.1 ozaki *optp++ = TCPOLEN_MAXSEG; 1176 1.1 ozaki *optp++ = (sc->sc_ourmaxseg >> 8) & 0xff; 1177 1.1 ozaki *optp++ = sc->sc_ourmaxseg & 0xff; 1178 1.1 ozaki optlen += TCPOLEN_MAXSEG; 1179 1.1 ozaki 1180 1.1 ozaki if (sc->sc_request_r_scale != 15) { 1181 1.1 ozaki *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 | 1182 1.1 ozaki TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | 1183 1.1 ozaki sc->sc_request_r_scale); 1184 1.1 ozaki optp += TCPOLEN_WINDOW + TCPOLEN_NOP; 1185 1.1 ozaki optlen += TCPOLEN_WINDOW + TCPOLEN_NOP; 1186 1.1 ozaki } 1187 1.1 ozaki 1188 1.1 ozaki if (sc->sc_flags & SCF_SACK_PERMIT) { 1189 1.1 ozaki /* Let the peer know that we will SACK. */ 1190 1.1 ozaki *optp++ = TCPOPT_SACK_PERMITTED; 1191 1.1 ozaki *optp++ = TCPOLEN_SACK_PERMITTED; 1192 1.1 ozaki optlen += TCPOLEN_SACK_PERMITTED; 1193 1.1 ozaki } 1194 1.1 ozaki 1195 1.1 ozaki if (sc->sc_flags & SCF_TIMESTAMP) { 1196 1.1 ozaki while (optlen % 4 != 2) { 1197 1.1 ozaki optlen += TCPOLEN_NOP; 1198 1.1 ozaki *optp++ = TCPOPT_NOP; 1199 1.1 ozaki } 1200 1.1 ozaki *optp++ = TCPOPT_TIMESTAMP; 1201 1.1 ozaki *optp++ = TCPOLEN_TIMESTAMP; 1202 1.1 ozaki u_int32_t *lp = (u_int32_t *)(optp); 1203 1.1 ozaki /* Form timestamp option as shown in appendix A of RFC 1323. */ 1204 1.1 ozaki *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc)); 1205 1.1 ozaki *lp = htonl(sc->sc_timestamp); 1206 1.1 ozaki optp += TCPOLEN_TIMESTAMP - 2; 1207 1.1 ozaki optlen += TCPOLEN_TIMESTAMP; 1208 1.1 ozaki } 1209 1.1 ozaki 1210 1.1 ozaki #ifdef TCP_SIGNATURE 1211 1.1 ozaki if (sc->sc_flags & SCF_SIGNATURE) { 1212 1.1 ozaki sav = tcp_signature_getsav(m); 1213 1.1 ozaki if (sav == NULL) { 1214 1.1 ozaki m_freem(m); 1215 1.1 ozaki return EPERM; 1216 1.1 ozaki } 1217 1.1 ozaki 1218 1.1 ozaki *optp++ = TCPOPT_SIGNATURE; 1219 1.1 ozaki *optp++ = TCPOLEN_SIGNATURE; 1220 1.1 ozaki sigp = optp; 1221 1.1 ozaki memset(optp, 0, TCP_SIGLEN); 1222 1.1 ozaki optp += TCP_SIGLEN; 1223 1.1 ozaki optlen += TCPOLEN_SIGNATURE; 1224 1.1 ozaki } 1225 1.1 ozaki #endif 1226 1.1 ozaki 1227 1.1 ozaki /* 1228 1.1 ozaki * Terminate and pad TCP options to a 4 byte boundary. 1229 1.1 ozaki * 1230 1.1 ozaki * According to RFC793: "The content of the header beyond the 1231 1.1 ozaki * End-of-Option option must be header padding (i.e., zero)." 1232 1.1 ozaki * And later: "The padding is composed of zeros." 1233 1.1 ozaki */ 1234 1.1 ozaki if (optlen % 4) { 1235 1.1 ozaki optlen += TCPOLEN_EOL; 1236 1.1 ozaki *optp++ = TCPOPT_EOL; 1237 1.1 ozaki } 1238 1.1 ozaki while (optlen % 4) { 1239 1.1 ozaki optlen += TCPOLEN_PAD; 1240 1.1 ozaki *optp++ = TCPOPT_PAD; 1241 1.1 ozaki } 1242 1.1 ozaki 1243 1.1 ozaki /* Compute the actual values now that we've added the options. */ 1244 1.1 ozaki tlen = hlen + sizeof(struct tcphdr) + optlen; 1245 1.1 ozaki m->m_len = m->m_pkthdr.len = tlen; 1246 1.1 ozaki th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; 1247 1.1 ozaki 1248 1.1 ozaki #ifdef TCP_SIGNATURE 1249 1.1 ozaki if (sav) { 1250 1.1 ozaki (void)tcp_signature(m, th, hlen, sav, sigp); 1251 1.1 ozaki key_sa_recordxfer(sav, m); 1252 1.1 ozaki KEY_SA_UNREF(&sav); 1253 1.1 ozaki } 1254 1.1 ozaki #endif 1255 1.1 ozaki 1256 1.1 ozaki /* 1257 1.1 ozaki * Send ECN SYN-ACK setup packet. 1258 1.1 ozaki * Routes can be asymmetric, so, even if we receive a packet 1259 1.1 ozaki * with ECE and CWR set, we must not assume no one will block 1260 1.1 ozaki * the ECE packet we are about to send. 1261 1.1 ozaki */ 1262 1.1 ozaki if ((sc->sc_flags & SCF_ECN_PERMIT) && tp && 1263 1.1 ozaki SEQ_GEQ(tp->snd_nxt, tp->snd_max)) { 1264 1.1 ozaki th->th_flags |= TH_ECE; 1265 1.1 ozaki TCP_STATINC(TCP_STAT_ECN_SHS); 1266 1.1 ozaki 1267 1.1 ozaki /* 1268 1.1 ozaki * draft-ietf-tcpm-ecnsyn-00.txt 1269 1.1 ozaki * 1270 1.1 ozaki * "[...] a TCP node MAY respond to an ECN-setup 1271 1.1 ozaki * SYN packet by setting ECT in the responding 1272 1.1 ozaki * ECN-setup SYN/ACK packet, indicating to routers 1273 1.1 ozaki * that the SYN/ACK packet is ECN-Capable. 1274 1.1 ozaki * This allows a congested router along the path 1275 1.1 ozaki * to mark the packet instead of dropping the 1276 1.1 ozaki * packet as an indication of congestion." 1277 1.1 ozaki * 1278 1.1 ozaki * "[...] There can be a great benefit in setting 1279 1.1 ozaki * an ECN-capable codepoint in SYN/ACK packets [...] 1280 1.1 ozaki * Congestion is most likely to occur in 1281 1.1 ozaki * the server-to-client direction. As a result, 1282 1.1 ozaki * setting an ECN-capable codepoint in SYN/ACK 1283 1.1 ozaki * packets can reduce the occurrence of three-second 1284 1.1 ozaki * retransmit timeouts resulting from the drop 1285 1.1 ozaki * of SYN/ACK packets." 1286 1.1 ozaki * 1287 1.1 ozaki * Page 4 and 6, January 2006. 1288 1.1 ozaki */ 1289 1.1 ozaki 1290 1.1 ozaki switch (sc->sc_src.sa.sa_family) { 1291 1.1 ozaki case AF_INET: 1292 1.1 ozaki ip->ip_tos |= IPTOS_ECN_ECT0; 1293 1.1 ozaki break; 1294 1.1 ozaki #ifdef INET6 1295 1.1 ozaki case AF_INET6: 1296 1.1 ozaki ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); 1297 1.1 ozaki break; 1298 1.1 ozaki #endif 1299 1.1 ozaki } 1300 1.1 ozaki TCP_STATINC(TCP_STAT_ECN_ECT); 1301 1.1 ozaki } 1302 1.1 ozaki 1303 1.1 ozaki 1304 1.1 ozaki /* 1305 1.1 ozaki * Compute the packet's checksum. 1306 1.1 ozaki * 1307 1.1 ozaki * Fill in some straggling IP bits. Note the stack expects 1308 1.1 ozaki * ip_len to be in host order, for convenience. 1309 1.1 ozaki */ 1310 1.1 ozaki switch (sc->sc_src.sa.sa_family) { 1311 1.1 ozaki case AF_INET: 1312 1.1 ozaki ip->ip_len = htons(tlen - hlen); 1313 1.1 ozaki th->th_sum = 0; 1314 1.1 ozaki th->th_sum = in4_cksum(m, IPPROTO_TCP, hlen, tlen - hlen); 1315 1.1 ozaki ip->ip_len = htons(tlen); 1316 1.1 ozaki ip->ip_ttl = ip_defttl; 1317 1.1 ozaki /* XXX tos? */ 1318 1.1 ozaki break; 1319 1.1 ozaki #ifdef INET6 1320 1.1 ozaki case AF_INET6: 1321 1.1 ozaki ip6->ip6_plen = htons(tlen - hlen); 1322 1.1 ozaki th->th_sum = 0; 1323 1.1 ozaki th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen); 1324 1.1 ozaki ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 1325 1.1 ozaki ip6->ip6_vfc |= IPV6_VERSION; 1326 1.1 ozaki ip6->ip6_plen = htons(tlen - hlen); 1327 1.1 ozaki /* ip6_hlim will be initialized afterwards */ 1328 1.1 ozaki /* XXX flowlabel? */ 1329 1.1 ozaki break; 1330 1.1 ozaki #endif 1331 1.1 ozaki } 1332 1.1 ozaki 1333 1.1 ozaki /* XXX use IPsec policy on listening socket, on SYN ACK */ 1334 1.1 ozaki tp = sc->sc_tp; 1335 1.1 ozaki 1336 1.1 ozaki switch (sc->sc_src.sa.sa_family) { 1337 1.1 ozaki case AF_INET: 1338 1.1 ozaki error = ip_output(m, sc->sc_ipopts, ro, 1339 1.1 ozaki (ip_mtudisc ? IP_MTUDISC : 0), 1340 1.1 ozaki NULL, tp ? tp->t_inpcb : NULL); 1341 1.1 ozaki break; 1342 1.1 ozaki #ifdef INET6 1343 1.1 ozaki case AF_INET6: 1344 1.6 ozaki ip6->ip6_hlim = in6pcb_selecthlim(NULL, 1345 1.1 ozaki (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp : NULL); 1346 1.1 ozaki rtcache_unref(rt, ro); 1347 1.1 ozaki 1348 1.1 ozaki error = ip6_output(m, NULL /*XXX*/, ro, 0, NULL, 1349 1.3 ozaki tp ? tp->t_inpcb : NULL, NULL); 1350 1.1 ozaki break; 1351 1.1 ozaki #endif 1352 1.1 ozaki default: 1353 1.1 ozaki panic("%s: impossible (2)", __func__); 1354 1.1 ozaki } 1355 1.1 ozaki 1356 1.1 ozaki return error; 1357 1.1 ozaki } 1358