tcp_syncache.c revision 1.1 1 1.1 ozaki /* $NetBSD: tcp_syncache.c,v 1.1 2022/09/20 07:19:14 ozaki-r Exp $ */
2 1.1 ozaki
3 1.1 ozaki /*
4 1.1 ozaki * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 1.1 ozaki * All rights reserved.
6 1.1 ozaki *
7 1.1 ozaki * Redistribution and use in source and binary forms, with or without
8 1.1 ozaki * modification, are permitted provided that the following conditions
9 1.1 ozaki * are met:
10 1.1 ozaki * 1. Redistributions of source code must retain the above copyright
11 1.1 ozaki * notice, this list of conditions and the following disclaimer.
12 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 ozaki * notice, this list of conditions and the following disclaimer in the
14 1.1 ozaki * documentation and/or other materials provided with the distribution.
15 1.1 ozaki * 3. Neither the name of the project nor the names of its contributors
16 1.1 ozaki * may be used to endorse or promote products derived from this software
17 1.1 ozaki * without specific prior written permission.
18 1.1 ozaki *
19 1.1 ozaki * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 1.1 ozaki * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.1 ozaki * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.1 ozaki * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 1.1 ozaki * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.1 ozaki * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.1 ozaki * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.1 ozaki * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.1 ozaki * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.1 ozaki * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.1 ozaki * SUCH DAMAGE.
30 1.1 ozaki */
31 1.1 ozaki
32 1.1 ozaki /*
33 1.1 ozaki * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
34 1.1 ozaki *
35 1.1 ozaki * NRL grants permission for redistribution and use in source and binary
36 1.1 ozaki * forms, with or without modification, of the software and documentation
37 1.1 ozaki * created at NRL provided that the following conditions are met:
38 1.1 ozaki *
39 1.1 ozaki * 1. Redistributions of source code must retain the above copyright
40 1.1 ozaki * notice, this list of conditions and the following disclaimer.
41 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright
42 1.1 ozaki * notice, this list of conditions and the following disclaimer in the
43 1.1 ozaki * documentation and/or other materials provided with the distribution.
44 1.1 ozaki * 3. All advertising materials mentioning features or use of this software
45 1.1 ozaki * must display the following acknowledgements:
46 1.1 ozaki * This product includes software developed by the University of
47 1.1 ozaki * California, Berkeley and its contributors.
48 1.1 ozaki * This product includes software developed at the Information
49 1.1 ozaki * Technology Division, US Naval Research Laboratory.
50 1.1 ozaki * 4. Neither the name of the NRL nor the names of its contributors
51 1.1 ozaki * may be used to endorse or promote products derived from this software
52 1.1 ozaki * without specific prior written permission.
53 1.1 ozaki *
54 1.1 ozaki * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
55 1.1 ozaki * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56 1.1 ozaki * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
57 1.1 ozaki * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
58 1.1 ozaki * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
59 1.1 ozaki * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 1.1 ozaki * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
61 1.1 ozaki * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
62 1.1 ozaki * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
63 1.1 ozaki * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
64 1.1 ozaki * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 1.1 ozaki *
66 1.1 ozaki * The views and conclusions contained in the software and documentation
67 1.1 ozaki * are those of the authors and should not be interpreted as representing
68 1.1 ozaki * official policies, either expressed or implied, of the US Naval
69 1.1 ozaki * Research Laboratory (NRL).
70 1.1 ozaki */
71 1.1 ozaki
72 1.1 ozaki /*-
73 1.1 ozaki * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006,
74 1.1 ozaki * 2011 The NetBSD Foundation, Inc.
75 1.1 ozaki * All rights reserved.
76 1.1 ozaki *
77 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation
78 1.1 ozaki * by Coyote Point Systems, Inc.
79 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation
80 1.1 ozaki * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
81 1.1 ozaki * Facility, NASA Ames Research Center.
82 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation
83 1.1 ozaki * by Charles M. Hannum.
84 1.1 ozaki * This code is derived from software contributed to The NetBSD Foundation
85 1.1 ozaki * by Rui Paulo.
86 1.1 ozaki *
87 1.1 ozaki * Redistribution and use in source and binary forms, with or without
88 1.1 ozaki * modification, are permitted provided that the following conditions
89 1.1 ozaki * are met:
90 1.1 ozaki * 1. Redistributions of source code must retain the above copyright
91 1.1 ozaki * notice, this list of conditions and the following disclaimer.
92 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright
93 1.1 ozaki * notice, this list of conditions and the following disclaimer in the
94 1.1 ozaki * documentation and/or other materials provided with the distribution.
95 1.1 ozaki *
96 1.1 ozaki * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
97 1.1 ozaki * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
98 1.1 ozaki * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
99 1.1 ozaki * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
100 1.1 ozaki * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
101 1.1 ozaki * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
102 1.1 ozaki * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
103 1.1 ozaki * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
104 1.1 ozaki * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
105 1.1 ozaki * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
106 1.1 ozaki * POSSIBILITY OF SUCH DAMAGE.
107 1.1 ozaki */
108 1.1 ozaki
109 1.1 ozaki /*
110 1.1 ozaki * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
111 1.1 ozaki * The Regents of the University of California. All rights reserved.
112 1.1 ozaki *
113 1.1 ozaki * Redistribution and use in source and binary forms, with or without
114 1.1 ozaki * modification, are permitted provided that the following conditions
115 1.1 ozaki * are met:
116 1.1 ozaki * 1. Redistributions of source code must retain the above copyright
117 1.1 ozaki * notice, this list of conditions and the following disclaimer.
118 1.1 ozaki * 2. Redistributions in binary form must reproduce the above copyright
119 1.1 ozaki * notice, this list of conditions and the following disclaimer in the
120 1.1 ozaki * documentation and/or other materials provided with the distribution.
121 1.1 ozaki * 3. Neither the name of the University nor the names of its contributors
122 1.1 ozaki * may be used to endorse or promote products derived from this software
123 1.1 ozaki * without specific prior written permission.
124 1.1 ozaki *
125 1.1 ozaki * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
126 1.1 ozaki * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
127 1.1 ozaki * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
128 1.1 ozaki * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
129 1.1 ozaki * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
130 1.1 ozaki * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
131 1.1 ozaki * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
132 1.1 ozaki * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
133 1.1 ozaki * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
134 1.1 ozaki * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
135 1.1 ozaki * SUCH DAMAGE.
136 1.1 ozaki *
137 1.1 ozaki * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
138 1.1 ozaki */
139 1.1 ozaki
140 1.1 ozaki /*
141 1.1 ozaki * TODO list for SYN cache stuff:
142 1.1 ozaki *
143 1.1 ozaki * Find room for a "state" field, which is needed to keep a
144 1.1 ozaki * compressed state for TIME_WAIT TCBs. It's been noted already
145 1.1 ozaki * that this is fairly important for very high-volume web and
146 1.1 ozaki * mail servers, which use a large number of short-lived
147 1.1 ozaki * connections.
148 1.1 ozaki */
149 1.1 ozaki
150 1.1 ozaki #include <sys/cdefs.h>
151 1.1 ozaki __KERNEL_RCSID(0, "$NetBSD: tcp_syncache.c,v 1.1 2022/09/20 07:19:14 ozaki-r Exp $");
152 1.1 ozaki
153 1.1 ozaki #ifdef _KERNEL_OPT
154 1.1 ozaki #include "opt_inet.h"
155 1.1 ozaki #include "opt_ipsec.h"
156 1.1 ozaki #endif
157 1.1 ozaki
158 1.1 ozaki #include <sys/param.h>
159 1.1 ozaki #include <sys/systm.h>
160 1.1 ozaki #include <sys/mbuf.h>
161 1.1 ozaki #include <sys/protosw.h>
162 1.1 ozaki #include <sys/socket.h>
163 1.1 ozaki #include <sys/socketvar.h>
164 1.1 ozaki #include <sys/errno.h>
165 1.1 ozaki #include <sys/syslog.h>
166 1.1 ozaki #include <sys/pool.h>
167 1.1 ozaki #include <sys/domain.h>
168 1.1 ozaki #include <sys/kernel.h>
169 1.1 ozaki #include <sys/lwp.h> /* for lwp0 */
170 1.1 ozaki #include <sys/cprng.h>
171 1.1 ozaki
172 1.1 ozaki #include <netinet/in.h>
173 1.1 ozaki #include <netinet/ip.h>
174 1.1 ozaki #include <netinet/in_pcb.h>
175 1.1 ozaki #include <netinet/in_var.h>
176 1.1 ozaki #include <netinet/ip_var.h>
177 1.1 ozaki
178 1.1 ozaki #include <netinet/ip6.h>
179 1.1 ozaki #ifdef INET6
180 1.1 ozaki #include <netinet6/ip6_var.h>
181 1.1 ozaki #include <netinet6/in6_pcb.h>
182 1.1 ozaki #include <netinet6/ip6_var.h>
183 1.1 ozaki #include <netinet6/in6_var.h>
184 1.1 ozaki #endif
185 1.1 ozaki
186 1.1 ozaki #include <netinet/tcp.h>
187 1.1 ozaki #include <netinet/tcp_fsm.h>
188 1.1 ozaki #include <netinet/tcp_seq.h>
189 1.1 ozaki #include <netinet/tcp_timer.h>
190 1.1 ozaki #include <netinet/tcp_var.h>
191 1.1 ozaki #include <netinet/tcp_private.h>
192 1.1 ozaki #include <netinet/tcp_syncache.h>
193 1.1 ozaki
194 1.1 ozaki #ifdef TCP_SIGNATURE
195 1.1 ozaki #ifdef IPSEC
196 1.1 ozaki #include <netipsec/ipsec.h>
197 1.1 ozaki #include <netipsec/key.h>
198 1.1 ozaki #ifdef INET6
199 1.1 ozaki #include <netipsec/ipsec6.h>
200 1.1 ozaki #endif
201 1.1 ozaki #endif /* IPSEC*/
202 1.1 ozaki #endif
203 1.1 ozaki
204 1.1 ozaki static void syn_cache_timer(void *);
205 1.1 ozaki
206 1.1 ozaki /* syn hash parameters */
207 1.1 ozaki #define TCP_SYN_HASH_SIZE 293
208 1.1 ozaki #define TCP_SYN_BUCKET_SIZE 35
209 1.1 ozaki static int tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
210 1.1 ozaki int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
211 1.1 ozaki int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
212 1.1 ozaki static struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
213 1.1 ozaki
214 1.1 ozaki /*
215 1.1 ozaki * TCP compressed state engine. Currently used to hold compressed
216 1.1 ozaki * state for SYN_RECEIVED.
217 1.1 ozaki */
218 1.1 ozaki
219 1.1 ozaki u_long syn_cache_count;
220 1.1 ozaki static u_int32_t syn_hash1, syn_hash2;
221 1.1 ozaki
222 1.1 ozaki #define SYN_HASH(sa, sp, dp) \
223 1.1 ozaki ((((sa)->s_addr^syn_hash1)*(((((u_int32_t)(dp))<<16) + \
224 1.1 ozaki ((u_int32_t)(sp)))^syn_hash2)))
225 1.1 ozaki #ifndef INET6
226 1.1 ozaki #define SYN_HASHALL(hash, src, dst) \
227 1.1 ozaki do { \
228 1.1 ozaki hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
229 1.1 ozaki ((const struct sockaddr_in *)(src))->sin_port, \
230 1.1 ozaki ((const struct sockaddr_in *)(dst))->sin_port); \
231 1.1 ozaki } while (/*CONSTCOND*/ 0)
232 1.1 ozaki #else
233 1.1 ozaki #define SYN_HASH6(sa, sp, dp) \
234 1.1 ozaki ((((sa)->s6_addr32[0] ^ (sa)->s6_addr32[3] ^ syn_hash1) * \
235 1.1 ozaki (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp)))^syn_hash2)) \
236 1.1 ozaki & 0x7fffffff)
237 1.1 ozaki
238 1.1 ozaki #define SYN_HASHALL(hash, src, dst) \
239 1.1 ozaki do { \
240 1.1 ozaki switch ((src)->sa_family) { \
241 1.1 ozaki case AF_INET: \
242 1.1 ozaki hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
243 1.1 ozaki ((const struct sockaddr_in *)(src))->sin_port, \
244 1.1 ozaki ((const struct sockaddr_in *)(dst))->sin_port); \
245 1.1 ozaki break; \
246 1.1 ozaki case AF_INET6: \
247 1.1 ozaki hash = SYN_HASH6(&((const struct sockaddr_in6 *)(src))->sin6_addr, \
248 1.1 ozaki ((const struct sockaddr_in6 *)(src))->sin6_port, \
249 1.1 ozaki ((const struct sockaddr_in6 *)(dst))->sin6_port); \
250 1.1 ozaki break; \
251 1.1 ozaki default: \
252 1.1 ozaki hash = 0; \
253 1.1 ozaki } \
254 1.1 ozaki } while (/*CONSTCOND*/0)
255 1.1 ozaki #endif /* INET6 */
256 1.1 ozaki
257 1.1 ozaki static struct pool syn_cache_pool;
258 1.1 ozaki
259 1.1 ozaki /*
260 1.1 ozaki * We don't estimate RTT with SYNs, so each packet starts with the default
261 1.1 ozaki * RTT and each timer step has a fixed timeout value.
262 1.1 ozaki */
263 1.1 ozaki static inline void
264 1.1 ozaki syn_cache_timer_arm(struct syn_cache *sc)
265 1.1 ozaki {
266 1.1 ozaki
267 1.1 ozaki TCPT_RANGESET(sc->sc_rxtcur,
268 1.1 ozaki TCPTV_SRTTDFLT * tcp_backoff[sc->sc_rxtshift], TCPTV_MIN,
269 1.1 ozaki TCPTV_REXMTMAX);
270 1.1 ozaki callout_reset(&sc->sc_timer,
271 1.1 ozaki sc->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, sc);
272 1.1 ozaki }
273 1.1 ozaki
274 1.1 ozaki #define SYN_CACHE_TIMESTAMP(sc) (tcp_now - (sc)->sc_timebase)
275 1.1 ozaki
276 1.1 ozaki static inline void
277 1.1 ozaki syn_cache_rm(struct syn_cache *sc)
278 1.1 ozaki {
279 1.1 ozaki TAILQ_REMOVE(&tcp_syn_cache[sc->sc_bucketidx].sch_bucket,
280 1.1 ozaki sc, sc_bucketq);
281 1.1 ozaki sc->sc_tp = NULL;
282 1.1 ozaki LIST_REMOVE(sc, sc_tpq);
283 1.1 ozaki tcp_syn_cache[sc->sc_bucketidx].sch_length--;
284 1.1 ozaki callout_stop(&sc->sc_timer);
285 1.1 ozaki syn_cache_count--;
286 1.1 ozaki }
287 1.1 ozaki
288 1.1 ozaki static inline void
289 1.1 ozaki syn_cache_put(struct syn_cache *sc)
290 1.1 ozaki {
291 1.1 ozaki if (sc->sc_ipopts)
292 1.1 ozaki (void) m_free(sc->sc_ipopts);
293 1.1 ozaki rtcache_free(&sc->sc_route);
294 1.1 ozaki sc->sc_flags |= SCF_DEAD;
295 1.1 ozaki if (!callout_invoking(&sc->sc_timer))
296 1.1 ozaki callout_schedule(&(sc)->sc_timer, 1);
297 1.1 ozaki }
298 1.1 ozaki
299 1.1 ozaki void
300 1.1 ozaki syn_cache_init(void)
301 1.1 ozaki {
302 1.1 ozaki int i;
303 1.1 ozaki
304 1.1 ozaki pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
305 1.1 ozaki "synpl", NULL, IPL_SOFTNET);
306 1.1 ozaki
307 1.1 ozaki /* Initialize the hash buckets. */
308 1.1 ozaki for (i = 0; i < tcp_syn_cache_size; i++)
309 1.1 ozaki TAILQ_INIT(&tcp_syn_cache[i].sch_bucket);
310 1.1 ozaki }
311 1.1 ozaki
312 1.1 ozaki void
313 1.1 ozaki syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp)
314 1.1 ozaki {
315 1.1 ozaki struct syn_cache_head *scp;
316 1.1 ozaki struct syn_cache *sc2;
317 1.1 ozaki int s;
318 1.1 ozaki
319 1.1 ozaki /*
320 1.1 ozaki * If there are no entries in the hash table, reinitialize
321 1.1 ozaki * the hash secrets.
322 1.1 ozaki */
323 1.1 ozaki if (syn_cache_count == 0) {
324 1.1 ozaki syn_hash1 = cprng_fast32();
325 1.1 ozaki syn_hash2 = cprng_fast32();
326 1.1 ozaki }
327 1.1 ozaki
328 1.1 ozaki SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa);
329 1.1 ozaki sc->sc_bucketidx = sc->sc_hash % tcp_syn_cache_size;
330 1.1 ozaki scp = &tcp_syn_cache[sc->sc_bucketidx];
331 1.1 ozaki
332 1.1 ozaki /*
333 1.1 ozaki * Make sure that we don't overflow the per-bucket
334 1.1 ozaki * limit or the total cache size limit.
335 1.1 ozaki */
336 1.1 ozaki s = splsoftnet();
337 1.1 ozaki if (scp->sch_length >= tcp_syn_bucket_limit) {
338 1.1 ozaki TCP_STATINC(TCP_STAT_SC_BUCKETOVERFLOW);
339 1.1 ozaki /*
340 1.1 ozaki * The bucket is full. Toss the oldest element in the
341 1.1 ozaki * bucket. This will be the first entry in the bucket.
342 1.1 ozaki */
343 1.1 ozaki sc2 = TAILQ_FIRST(&scp->sch_bucket);
344 1.1 ozaki #ifdef DIAGNOSTIC
345 1.1 ozaki /*
346 1.1 ozaki * This should never happen; we should always find an
347 1.1 ozaki * entry in our bucket.
348 1.1 ozaki */
349 1.1 ozaki if (sc2 == NULL)
350 1.1 ozaki panic("syn_cache_insert: bucketoverflow: impossible");
351 1.1 ozaki #endif
352 1.1 ozaki syn_cache_rm(sc2);
353 1.1 ozaki syn_cache_put(sc2); /* calls pool_put but see spl above */
354 1.1 ozaki } else if (syn_cache_count >= tcp_syn_cache_limit) {
355 1.1 ozaki struct syn_cache_head *scp2, *sce;
356 1.1 ozaki
357 1.1 ozaki TCP_STATINC(TCP_STAT_SC_OVERFLOWED);
358 1.1 ozaki /*
359 1.1 ozaki * The cache is full. Toss the oldest entry in the
360 1.1 ozaki * first non-empty bucket we can find.
361 1.1 ozaki *
362 1.1 ozaki * XXX We would really like to toss the oldest
363 1.1 ozaki * entry in the cache, but we hope that this
364 1.1 ozaki * condition doesn't happen very often.
365 1.1 ozaki */
366 1.1 ozaki scp2 = scp;
367 1.1 ozaki if (TAILQ_EMPTY(&scp2->sch_bucket)) {
368 1.1 ozaki sce = &tcp_syn_cache[tcp_syn_cache_size];
369 1.1 ozaki for (++scp2; scp2 != scp; scp2++) {
370 1.1 ozaki if (scp2 >= sce)
371 1.1 ozaki scp2 = &tcp_syn_cache[0];
372 1.1 ozaki if (! TAILQ_EMPTY(&scp2->sch_bucket))
373 1.1 ozaki break;
374 1.1 ozaki }
375 1.1 ozaki #ifdef DIAGNOSTIC
376 1.1 ozaki /*
377 1.1 ozaki * This should never happen; we should always find a
378 1.1 ozaki * non-empty bucket.
379 1.1 ozaki */
380 1.1 ozaki if (scp2 == scp)
381 1.1 ozaki panic("syn_cache_insert: cacheoverflow: "
382 1.1 ozaki "impossible");
383 1.1 ozaki #endif
384 1.1 ozaki }
385 1.1 ozaki sc2 = TAILQ_FIRST(&scp2->sch_bucket);
386 1.1 ozaki syn_cache_rm(sc2);
387 1.1 ozaki syn_cache_put(sc2); /* calls pool_put but see spl above */
388 1.1 ozaki }
389 1.1 ozaki
390 1.1 ozaki /*
391 1.1 ozaki * Initialize the entry's timer.
392 1.1 ozaki */
393 1.1 ozaki sc->sc_rxttot = 0;
394 1.1 ozaki sc->sc_rxtshift = 0;
395 1.1 ozaki syn_cache_timer_arm(sc);
396 1.1 ozaki
397 1.1 ozaki /* Link it from tcpcb entry */
398 1.1 ozaki LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
399 1.1 ozaki
400 1.1 ozaki /* Put it into the bucket. */
401 1.1 ozaki TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
402 1.1 ozaki scp->sch_length++;
403 1.1 ozaki syn_cache_count++;
404 1.1 ozaki
405 1.1 ozaki TCP_STATINC(TCP_STAT_SC_ADDED);
406 1.1 ozaki splx(s);
407 1.1 ozaki }
408 1.1 ozaki
409 1.1 ozaki /*
410 1.1 ozaki * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
411 1.1 ozaki * If we have retransmitted an entry the maximum number of times, expire
412 1.1 ozaki * that entry.
413 1.1 ozaki */
414 1.1 ozaki static void
415 1.1 ozaki syn_cache_timer(void *arg)
416 1.1 ozaki {
417 1.1 ozaki struct syn_cache *sc = arg;
418 1.1 ozaki
419 1.1 ozaki mutex_enter(softnet_lock);
420 1.1 ozaki KERNEL_LOCK(1, NULL);
421 1.1 ozaki
422 1.1 ozaki callout_ack(&sc->sc_timer);
423 1.1 ozaki
424 1.1 ozaki if (__predict_false(sc->sc_flags & SCF_DEAD)) {
425 1.1 ozaki TCP_STATINC(TCP_STAT_SC_DELAYED_FREE);
426 1.1 ozaki goto free;
427 1.1 ozaki }
428 1.1 ozaki
429 1.1 ozaki if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
430 1.1 ozaki /* Drop it -- too many retransmissions. */
431 1.1 ozaki goto dropit;
432 1.1 ozaki }
433 1.1 ozaki
434 1.1 ozaki /*
435 1.1 ozaki * Compute the total amount of time this entry has
436 1.1 ozaki * been on a queue. If this entry has been on longer
437 1.1 ozaki * than the keep alive timer would allow, expire it.
438 1.1 ozaki */
439 1.1 ozaki sc->sc_rxttot += sc->sc_rxtcur;
440 1.1 ozaki if (sc->sc_rxttot >= MIN(tcp_keepinit, TCP_TIMER_MAXTICKS))
441 1.1 ozaki goto dropit;
442 1.1 ozaki
443 1.1 ozaki TCP_STATINC(TCP_STAT_SC_RETRANSMITTED);
444 1.1 ozaki (void)syn_cache_respond(sc);
445 1.1 ozaki
446 1.1 ozaki /* Advance the timer back-off. */
447 1.1 ozaki sc->sc_rxtshift++;
448 1.1 ozaki syn_cache_timer_arm(sc);
449 1.1 ozaki
450 1.1 ozaki goto out;
451 1.1 ozaki
452 1.1 ozaki dropit:
453 1.1 ozaki TCP_STATINC(TCP_STAT_SC_TIMED_OUT);
454 1.1 ozaki syn_cache_rm(sc);
455 1.1 ozaki if (sc->sc_ipopts)
456 1.1 ozaki (void) m_free(sc->sc_ipopts);
457 1.1 ozaki rtcache_free(&sc->sc_route);
458 1.1 ozaki
459 1.1 ozaki free:
460 1.1 ozaki callout_destroy(&sc->sc_timer);
461 1.1 ozaki pool_put(&syn_cache_pool, sc);
462 1.1 ozaki
463 1.1 ozaki out:
464 1.1 ozaki KERNEL_UNLOCK_ONE(NULL);
465 1.1 ozaki mutex_exit(softnet_lock);
466 1.1 ozaki }
467 1.1 ozaki
468 1.1 ozaki /*
469 1.1 ozaki * Remove syn cache created by the specified tcb entry,
470 1.1 ozaki * because this does not make sense to keep them
471 1.1 ozaki * (if there's no tcb entry, syn cache entry will never be used)
472 1.1 ozaki */
473 1.1 ozaki void
474 1.1 ozaki syn_cache_cleanup(struct tcpcb *tp)
475 1.1 ozaki {
476 1.1 ozaki struct syn_cache *sc, *nsc;
477 1.1 ozaki int s;
478 1.1 ozaki
479 1.1 ozaki s = splsoftnet();
480 1.1 ozaki
481 1.1 ozaki for (sc = LIST_FIRST(&tp->t_sc); sc != NULL; sc = nsc) {
482 1.1 ozaki nsc = LIST_NEXT(sc, sc_tpq);
483 1.1 ozaki
484 1.1 ozaki #ifdef DIAGNOSTIC
485 1.1 ozaki if (sc->sc_tp != tp)
486 1.1 ozaki panic("invalid sc_tp in syn_cache_cleanup");
487 1.1 ozaki #endif
488 1.1 ozaki syn_cache_rm(sc);
489 1.1 ozaki syn_cache_put(sc); /* calls pool_put but see spl above */
490 1.1 ozaki }
491 1.1 ozaki /* just for safety */
492 1.1 ozaki LIST_INIT(&tp->t_sc);
493 1.1 ozaki
494 1.1 ozaki splx(s);
495 1.1 ozaki }
496 1.1 ozaki
497 1.1 ozaki /*
498 1.1 ozaki * Find an entry in the syn cache.
499 1.1 ozaki */
500 1.1 ozaki struct syn_cache *
501 1.1 ozaki syn_cache_lookup(const struct sockaddr *src, const struct sockaddr *dst,
502 1.1 ozaki struct syn_cache_head **headp)
503 1.1 ozaki {
504 1.1 ozaki struct syn_cache *sc;
505 1.1 ozaki struct syn_cache_head *scp;
506 1.1 ozaki u_int32_t hash;
507 1.1 ozaki int s;
508 1.1 ozaki
509 1.1 ozaki SYN_HASHALL(hash, src, dst);
510 1.1 ozaki
511 1.1 ozaki scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
512 1.1 ozaki *headp = scp;
513 1.1 ozaki s = splsoftnet();
514 1.1 ozaki for (sc = TAILQ_FIRST(&scp->sch_bucket); sc != NULL;
515 1.1 ozaki sc = TAILQ_NEXT(sc, sc_bucketq)) {
516 1.1 ozaki if (sc->sc_hash != hash)
517 1.1 ozaki continue;
518 1.1 ozaki if (!memcmp(&sc->sc_src, src, src->sa_len) &&
519 1.1 ozaki !memcmp(&sc->sc_dst, dst, dst->sa_len)) {
520 1.1 ozaki splx(s);
521 1.1 ozaki return (sc);
522 1.1 ozaki }
523 1.1 ozaki }
524 1.1 ozaki splx(s);
525 1.1 ozaki return (NULL);
526 1.1 ozaki }
527 1.1 ozaki
528 1.1 ozaki /*
529 1.1 ozaki * This function gets called when we receive an ACK for a socket in the
530 1.1 ozaki * LISTEN state. We look up the connection in the syn cache, and if it's
531 1.1 ozaki * there, we pull it out of the cache and turn it into a full-blown
532 1.1 ozaki * connection in the SYN-RECEIVED state.
533 1.1 ozaki *
534 1.1 ozaki * The return values may not be immediately obvious, and their effects
535 1.1 ozaki * can be subtle, so here they are:
536 1.1 ozaki *
537 1.1 ozaki * NULL SYN was not found in cache; caller should drop the
538 1.1 ozaki * packet and send an RST.
539 1.1 ozaki *
540 1.1 ozaki * -1 We were unable to create the new connection, and are
541 1.1 ozaki * aborting it. An ACK,RST is being sent to the peer
542 1.1 ozaki * (unless we got screwey sequence numbers; see below),
543 1.1 ozaki * because the 3-way handshake has been completed. Caller
544 1.1 ozaki * should not free the mbuf, since we may be using it. If
545 1.1 ozaki * we are not, we will free it.
546 1.1 ozaki *
547 1.1 ozaki * Otherwise, the return value is a pointer to the new socket
548 1.1 ozaki * associated with the connection.
549 1.1 ozaki */
550 1.1 ozaki struct socket *
551 1.1 ozaki syn_cache_get(struct sockaddr *src, struct sockaddr *dst,
552 1.1 ozaki struct tcphdr *th, struct socket *so, struct mbuf *m)
553 1.1 ozaki {
554 1.1 ozaki struct syn_cache *sc;
555 1.1 ozaki struct syn_cache_head *scp;
556 1.1 ozaki struct inpcb *inp = NULL;
557 1.1 ozaki #ifdef INET6
558 1.1 ozaki struct in6pcb *in6p = NULL;
559 1.1 ozaki #endif
560 1.1 ozaki struct tcpcb *tp;
561 1.1 ozaki int s;
562 1.1 ozaki struct socket *oso;
563 1.1 ozaki
564 1.1 ozaki s = splsoftnet();
565 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
566 1.1 ozaki splx(s);
567 1.1 ozaki return NULL;
568 1.1 ozaki }
569 1.1 ozaki
570 1.1 ozaki /*
571 1.1 ozaki * Verify the sequence and ack numbers. Try getting the correct
572 1.1 ozaki * response again.
573 1.1 ozaki */
574 1.1 ozaki if ((th->th_ack != sc->sc_iss + 1) ||
575 1.1 ozaki SEQ_LEQ(th->th_seq, sc->sc_irs) ||
576 1.1 ozaki SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
577 1.1 ozaki m_freem(m);
578 1.1 ozaki (void)syn_cache_respond(sc);
579 1.1 ozaki splx(s);
580 1.1 ozaki return ((struct socket *)(-1));
581 1.1 ozaki }
582 1.1 ozaki
583 1.1 ozaki /* Remove this cache entry */
584 1.1 ozaki syn_cache_rm(sc);
585 1.1 ozaki splx(s);
586 1.1 ozaki
587 1.1 ozaki /*
588 1.1 ozaki * Ok, create the full blown connection, and set things up
589 1.1 ozaki * as they would have been set up if we had created the
590 1.1 ozaki * connection when the SYN arrived. If we can't create
591 1.1 ozaki * the connection, abort it.
592 1.1 ozaki */
593 1.1 ozaki /*
594 1.1 ozaki * inp still has the OLD in_pcb stuff, set the
595 1.1 ozaki * v6-related flags on the new guy, too. This is
596 1.1 ozaki * done particularly for the case where an AF_INET6
597 1.1 ozaki * socket is bound only to a port, and a v4 connection
598 1.1 ozaki * comes in on that port.
599 1.1 ozaki * we also copy the flowinfo from the original pcb
600 1.1 ozaki * to the new one.
601 1.1 ozaki */
602 1.1 ozaki oso = so;
603 1.1 ozaki so = sonewconn(so, true);
604 1.1 ozaki if (so == NULL)
605 1.1 ozaki goto resetandabort;
606 1.1 ozaki
607 1.1 ozaki switch (so->so_proto->pr_domain->dom_family) {
608 1.1 ozaki case AF_INET:
609 1.1 ozaki inp = sotoinpcb(so);
610 1.1 ozaki break;
611 1.1 ozaki #ifdef INET6
612 1.1 ozaki case AF_INET6:
613 1.1 ozaki in6p = sotoin6pcb(so);
614 1.1 ozaki break;
615 1.1 ozaki #endif
616 1.1 ozaki }
617 1.1 ozaki
618 1.1 ozaki switch (src->sa_family) {
619 1.1 ozaki case AF_INET:
620 1.1 ozaki if (inp) {
621 1.1 ozaki inp->inp_laddr = ((struct sockaddr_in *)dst)->sin_addr;
622 1.1 ozaki inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port;
623 1.1 ozaki inp->inp_options = ip_srcroute(m);
624 1.1 ozaki in_pcbstate(inp, INP_BOUND);
625 1.1 ozaki if (inp->inp_options == NULL) {
626 1.1 ozaki inp->inp_options = sc->sc_ipopts;
627 1.1 ozaki sc->sc_ipopts = NULL;
628 1.1 ozaki }
629 1.1 ozaki }
630 1.1 ozaki #ifdef INET6
631 1.1 ozaki else if (in6p) {
632 1.1 ozaki /* IPv4 packet to AF_INET6 socket */
633 1.1 ozaki memset(&in6p->in6p_laddr, 0, sizeof(in6p->in6p_laddr));
634 1.1 ozaki in6p->in6p_laddr.s6_addr16[5] = htons(0xffff);
635 1.1 ozaki bcopy(&((struct sockaddr_in *)dst)->sin_addr,
636 1.1 ozaki &in6p->in6p_laddr.s6_addr32[3],
637 1.1 ozaki sizeof(((struct sockaddr_in *)dst)->sin_addr));
638 1.1 ozaki in6p->in6p_lport = ((struct sockaddr_in *)dst)->sin_port;
639 1.1 ozaki in6totcpcb(in6p)->t_family = AF_INET;
640 1.1 ozaki if (sotoin6pcb(oso)->in6p_flags & IN6P_IPV6_V6ONLY)
641 1.1 ozaki in6p->in6p_flags |= IN6P_IPV6_V6ONLY;
642 1.1 ozaki else
643 1.1 ozaki in6p->in6p_flags &= ~IN6P_IPV6_V6ONLY;
644 1.1 ozaki in6_pcbstate(in6p, IN6P_BOUND);
645 1.1 ozaki }
646 1.1 ozaki #endif
647 1.1 ozaki break;
648 1.1 ozaki #ifdef INET6
649 1.1 ozaki case AF_INET6:
650 1.1 ozaki if (in6p) {
651 1.1 ozaki in6p->in6p_laddr = ((struct sockaddr_in6 *)dst)->sin6_addr;
652 1.1 ozaki in6p->in6p_lport = ((struct sockaddr_in6 *)dst)->sin6_port;
653 1.1 ozaki in6_pcbstate(in6p, IN6P_BOUND);
654 1.1 ozaki }
655 1.1 ozaki break;
656 1.1 ozaki #endif
657 1.1 ozaki }
658 1.1 ozaki
659 1.1 ozaki #ifdef INET6
660 1.1 ozaki if (in6p && in6totcpcb(in6p)->t_family == AF_INET6 && sotoinpcb(oso)) {
661 1.1 ozaki struct in6pcb *oin6p = sotoin6pcb(oso);
662 1.1 ozaki /* inherit socket options from the listening socket */
663 1.1 ozaki in6p->in6p_flags |= (oin6p->in6p_flags & IN6P_CONTROLOPTS);
664 1.1 ozaki if (in6p->in6p_flags & IN6P_CONTROLOPTS) {
665 1.1 ozaki m_freem(in6p->in6p_options);
666 1.1 ozaki in6p->in6p_options = NULL;
667 1.1 ozaki }
668 1.1 ozaki ip6_savecontrol(in6p, &in6p->in6p_options,
669 1.1 ozaki mtod(m, struct ip6_hdr *), m);
670 1.1 ozaki }
671 1.1 ozaki #endif
672 1.1 ozaki
673 1.1 ozaki /*
674 1.1 ozaki * Give the new socket our cached route reference.
675 1.1 ozaki */
676 1.1 ozaki if (inp) {
677 1.1 ozaki rtcache_copy(&inp->inp_route, &sc->sc_route);
678 1.1 ozaki rtcache_free(&sc->sc_route);
679 1.1 ozaki }
680 1.1 ozaki #ifdef INET6
681 1.1 ozaki else {
682 1.1 ozaki rtcache_copy(&in6p->in6p_route, &sc->sc_route);
683 1.1 ozaki rtcache_free(&sc->sc_route);
684 1.1 ozaki }
685 1.1 ozaki #endif
686 1.1 ozaki
687 1.1 ozaki if (inp) {
688 1.1 ozaki struct sockaddr_in sin;
689 1.1 ozaki memcpy(&sin, src, src->sa_len);
690 1.1 ozaki if (in_pcbconnect(inp, &sin, &lwp0)) {
691 1.1 ozaki goto resetandabort;
692 1.1 ozaki }
693 1.1 ozaki }
694 1.1 ozaki #ifdef INET6
695 1.1 ozaki else if (in6p) {
696 1.1 ozaki struct sockaddr_in6 sin6;
697 1.1 ozaki memcpy(&sin6, src, src->sa_len);
698 1.1 ozaki if (src->sa_family == AF_INET) {
699 1.1 ozaki /* IPv4 packet to AF_INET6 socket */
700 1.1 ozaki in6_sin_2_v4mapsin6((struct sockaddr_in *)src, &sin6);
701 1.1 ozaki }
702 1.1 ozaki if (in6_pcbconnect(in6p, &sin6, NULL)) {
703 1.1 ozaki goto resetandabort;
704 1.1 ozaki }
705 1.1 ozaki }
706 1.1 ozaki #endif
707 1.1 ozaki else {
708 1.1 ozaki goto resetandabort;
709 1.1 ozaki }
710 1.1 ozaki
711 1.1 ozaki if (inp)
712 1.1 ozaki tp = intotcpcb(inp);
713 1.1 ozaki #ifdef INET6
714 1.1 ozaki else if (in6p)
715 1.1 ozaki tp = in6totcpcb(in6p);
716 1.1 ozaki #endif
717 1.1 ozaki else
718 1.1 ozaki tp = NULL;
719 1.1 ozaki
720 1.1 ozaki tp->t_flags = sototcpcb(oso)->t_flags & TF_NODELAY;
721 1.1 ozaki if (sc->sc_request_r_scale != 15) {
722 1.1 ozaki tp->requested_s_scale = sc->sc_requested_s_scale;
723 1.1 ozaki tp->request_r_scale = sc->sc_request_r_scale;
724 1.1 ozaki tp->snd_scale = sc->sc_requested_s_scale;
725 1.1 ozaki tp->rcv_scale = sc->sc_request_r_scale;
726 1.1 ozaki tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
727 1.1 ozaki }
728 1.1 ozaki if (sc->sc_flags & SCF_TIMESTAMP)
729 1.1 ozaki tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
730 1.1 ozaki tp->ts_timebase = sc->sc_timebase;
731 1.1 ozaki
732 1.1 ozaki tp->t_template = tcp_template(tp);
733 1.1 ozaki if (tp->t_template == 0) {
734 1.1 ozaki tp = tcp_drop(tp, ENOBUFS); /* destroys socket */
735 1.1 ozaki so = NULL;
736 1.1 ozaki m_freem(m);
737 1.1 ozaki goto abort;
738 1.1 ozaki }
739 1.1 ozaki
740 1.1 ozaki tp->iss = sc->sc_iss;
741 1.1 ozaki tp->irs = sc->sc_irs;
742 1.1 ozaki tcp_sendseqinit(tp);
743 1.1 ozaki tcp_rcvseqinit(tp);
744 1.1 ozaki tp->t_state = TCPS_SYN_RECEIVED;
745 1.1 ozaki TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
746 1.1 ozaki TCP_STATINC(TCP_STAT_ACCEPTS);
747 1.1 ozaki
748 1.1 ozaki if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack)
749 1.1 ozaki tp->t_flags |= TF_WILL_SACK;
750 1.1 ozaki
751 1.1 ozaki if ((sc->sc_flags & SCF_ECN_PERMIT) && tcp_do_ecn)
752 1.1 ozaki tp->t_flags |= TF_ECN_PERMIT;
753 1.1 ozaki
754 1.1 ozaki #ifdef TCP_SIGNATURE
755 1.1 ozaki if (sc->sc_flags & SCF_SIGNATURE)
756 1.1 ozaki tp->t_flags |= TF_SIGNATURE;
757 1.1 ozaki #endif
758 1.1 ozaki
759 1.1 ozaki /* Initialize tp->t_ourmss before we deal with the peer's! */
760 1.1 ozaki tp->t_ourmss = sc->sc_ourmaxseg;
761 1.1 ozaki tcp_mss_from_peer(tp, sc->sc_peermaxseg);
762 1.1 ozaki
763 1.1 ozaki /*
764 1.1 ozaki * Initialize the initial congestion window. If we
765 1.1 ozaki * had to retransmit the SYN,ACK, we must initialize cwnd
766 1.1 ozaki * to 1 segment (i.e. the Loss Window).
767 1.1 ozaki */
768 1.1 ozaki if (sc->sc_rxtshift)
769 1.1 ozaki tp->snd_cwnd = tp->t_peermss;
770 1.1 ozaki else {
771 1.1 ozaki int ss = tcp_init_win;
772 1.1 ozaki if (inp != NULL && in_localaddr(inp->inp_faddr))
773 1.1 ozaki ss = tcp_init_win_local;
774 1.1 ozaki #ifdef INET6
775 1.1 ozaki if (in6p != NULL && in6_localaddr(&in6p->in6p_faddr))
776 1.1 ozaki ss = tcp_init_win_local;
777 1.1 ozaki #endif
778 1.1 ozaki tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
779 1.1 ozaki }
780 1.1 ozaki
781 1.1 ozaki tcp_rmx_rtt(tp);
782 1.1 ozaki tp->snd_wl1 = sc->sc_irs;
783 1.1 ozaki tp->rcv_up = sc->sc_irs + 1;
784 1.1 ozaki
785 1.1 ozaki /*
786 1.1 ozaki * This is what would have happened in tcp_output() when
787 1.1 ozaki * the SYN,ACK was sent.
788 1.1 ozaki */
789 1.1 ozaki tp->snd_up = tp->snd_una;
790 1.1 ozaki tp->snd_max = tp->snd_nxt = tp->iss+1;
791 1.1 ozaki TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
792 1.1 ozaki if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv))
793 1.1 ozaki tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
794 1.1 ozaki tp->last_ack_sent = tp->rcv_nxt;
795 1.1 ozaki tp->t_partialacks = -1;
796 1.1 ozaki tp->t_dupacks = 0;
797 1.1 ozaki
798 1.1 ozaki TCP_STATINC(TCP_STAT_SC_COMPLETED);
799 1.1 ozaki s = splsoftnet();
800 1.1 ozaki syn_cache_put(sc);
801 1.1 ozaki splx(s);
802 1.1 ozaki return so;
803 1.1 ozaki
804 1.1 ozaki resetandabort:
805 1.1 ozaki (void)tcp_respond(NULL, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
806 1.1 ozaki abort:
807 1.1 ozaki if (so != NULL) {
808 1.1 ozaki (void) soqremque(so, 1);
809 1.1 ozaki (void) soabort(so);
810 1.1 ozaki mutex_enter(softnet_lock);
811 1.1 ozaki }
812 1.1 ozaki s = splsoftnet();
813 1.1 ozaki syn_cache_put(sc);
814 1.1 ozaki splx(s);
815 1.1 ozaki TCP_STATINC(TCP_STAT_SC_ABORTED);
816 1.1 ozaki return ((struct socket *)(-1));
817 1.1 ozaki }
818 1.1 ozaki
819 1.1 ozaki /*
820 1.1 ozaki * This function is called when we get a RST for a
821 1.1 ozaki * non-existent connection, so that we can see if the
822 1.1 ozaki * connection is in the syn cache. If it is, zap it.
823 1.1 ozaki */
824 1.1 ozaki
825 1.1 ozaki void
826 1.1 ozaki syn_cache_reset(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th)
827 1.1 ozaki {
828 1.1 ozaki struct syn_cache *sc;
829 1.1 ozaki struct syn_cache_head *scp;
830 1.1 ozaki int s = splsoftnet();
831 1.1 ozaki
832 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
833 1.1 ozaki splx(s);
834 1.1 ozaki return;
835 1.1 ozaki }
836 1.1 ozaki if (SEQ_LT(th->th_seq, sc->sc_irs) ||
837 1.1 ozaki SEQ_GT(th->th_seq, sc->sc_irs+1)) {
838 1.1 ozaki splx(s);
839 1.1 ozaki return;
840 1.1 ozaki }
841 1.1 ozaki syn_cache_rm(sc);
842 1.1 ozaki TCP_STATINC(TCP_STAT_SC_RESET);
843 1.1 ozaki syn_cache_put(sc); /* calls pool_put but see spl above */
844 1.1 ozaki splx(s);
845 1.1 ozaki }
846 1.1 ozaki
847 1.1 ozaki void
848 1.1 ozaki syn_cache_unreach(const struct sockaddr *src, const struct sockaddr *dst,
849 1.1 ozaki struct tcphdr *th)
850 1.1 ozaki {
851 1.1 ozaki struct syn_cache *sc;
852 1.1 ozaki struct syn_cache_head *scp;
853 1.1 ozaki int s;
854 1.1 ozaki
855 1.1 ozaki s = splsoftnet();
856 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
857 1.1 ozaki splx(s);
858 1.1 ozaki return;
859 1.1 ozaki }
860 1.1 ozaki /* If the sequence number != sc_iss, then it's a bogus ICMP msg */
861 1.1 ozaki if (ntohl(th->th_seq) != sc->sc_iss) {
862 1.1 ozaki splx(s);
863 1.1 ozaki return;
864 1.1 ozaki }
865 1.1 ozaki
866 1.1 ozaki /*
867 1.1 ozaki * If we've retransmitted 3 times and this is our second error,
868 1.1 ozaki * we remove the entry. Otherwise, we allow it to continue on.
869 1.1 ozaki * This prevents us from incorrectly nuking an entry during a
870 1.1 ozaki * spurious network outage.
871 1.1 ozaki *
872 1.1 ozaki * See tcp_notify().
873 1.1 ozaki */
874 1.1 ozaki if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) {
875 1.1 ozaki sc->sc_flags |= SCF_UNREACH;
876 1.1 ozaki splx(s);
877 1.1 ozaki return;
878 1.1 ozaki }
879 1.1 ozaki
880 1.1 ozaki syn_cache_rm(sc);
881 1.1 ozaki TCP_STATINC(TCP_STAT_SC_UNREACH);
882 1.1 ozaki syn_cache_put(sc); /* calls pool_put but see spl above */
883 1.1 ozaki splx(s);
884 1.1 ozaki }
885 1.1 ozaki
886 1.1 ozaki /*
887 1.1 ozaki * Given a LISTEN socket and an inbound SYN request, add this to the syn
888 1.1 ozaki * cache, and send back a segment:
889 1.1 ozaki * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
890 1.1 ozaki * to the source.
891 1.1 ozaki *
892 1.1 ozaki * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
893 1.1 ozaki * Doing so would require that we hold onto the data and deliver it
894 1.1 ozaki * to the application. However, if we are the target of a SYN-flood
895 1.1 ozaki * DoS attack, an attacker could send data which would eventually
896 1.1 ozaki * consume all available buffer space if it were ACKed. By not ACKing
897 1.1 ozaki * the data, we avoid this DoS scenario.
898 1.1 ozaki */
899 1.1 ozaki int
900 1.1 ozaki syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
901 1.1 ozaki unsigned int toff, struct socket *so, struct mbuf *m, u_char *optp,
902 1.1 ozaki int optlen, struct tcp_opt_info *oi)
903 1.1 ozaki {
904 1.1 ozaki struct tcpcb tb, *tp;
905 1.1 ozaki long win;
906 1.1 ozaki struct syn_cache *sc;
907 1.1 ozaki struct syn_cache_head *scp;
908 1.1 ozaki struct mbuf *ipopts;
909 1.1 ozaki int s;
910 1.1 ozaki
911 1.1 ozaki tp = sototcpcb(so);
912 1.1 ozaki
913 1.1 ozaki /*
914 1.1 ozaki * Initialize some local state.
915 1.1 ozaki */
916 1.1 ozaki win = sbspace(&so->so_rcv);
917 1.1 ozaki if (win > TCP_MAXWIN)
918 1.1 ozaki win = TCP_MAXWIN;
919 1.1 ozaki
920 1.1 ozaki #ifdef TCP_SIGNATURE
921 1.1 ozaki if (optp || (tp->t_flags & TF_SIGNATURE))
922 1.1 ozaki #else
923 1.1 ozaki if (optp)
924 1.1 ozaki #endif
925 1.1 ozaki {
926 1.1 ozaki tb.t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
927 1.1 ozaki #ifdef TCP_SIGNATURE
928 1.1 ozaki tb.t_flags |= (tp->t_flags & TF_SIGNATURE);
929 1.1 ozaki #endif
930 1.1 ozaki tb.t_state = TCPS_LISTEN;
931 1.1 ozaki if (tcp_dooptions(&tb, optp, optlen, th, m, toff, oi) < 0)
932 1.1 ozaki return 0;
933 1.1 ozaki } else
934 1.1 ozaki tb.t_flags = 0;
935 1.1 ozaki
936 1.1 ozaki switch (src->sa_family) {
937 1.1 ozaki case AF_INET:
938 1.1 ozaki /* Remember the IP options, if any. */
939 1.1 ozaki ipopts = ip_srcroute(m);
940 1.1 ozaki break;
941 1.1 ozaki default:
942 1.1 ozaki ipopts = NULL;
943 1.1 ozaki }
944 1.1 ozaki
945 1.1 ozaki /*
946 1.1 ozaki * See if we already have an entry for this connection.
947 1.1 ozaki * If we do, resend the SYN,ACK. We do not count this
948 1.1 ozaki * as a retransmission (XXX though maybe we should).
949 1.1 ozaki */
950 1.1 ozaki if ((sc = syn_cache_lookup(src, dst, &scp)) != NULL) {
951 1.1 ozaki TCP_STATINC(TCP_STAT_SC_DUPESYN);
952 1.1 ozaki if (ipopts) {
953 1.1 ozaki /*
954 1.1 ozaki * If we were remembering a previous source route,
955 1.1 ozaki * forget it and use the new one we've been given.
956 1.1 ozaki */
957 1.1 ozaki if (sc->sc_ipopts)
958 1.1 ozaki (void)m_free(sc->sc_ipopts);
959 1.1 ozaki sc->sc_ipopts = ipopts;
960 1.1 ozaki }
961 1.1 ozaki sc->sc_timestamp = tb.ts_recent;
962 1.1 ozaki m_freem(m);
963 1.1 ozaki if (syn_cache_respond(sc) == 0) {
964 1.1 ozaki uint64_t *tcps = TCP_STAT_GETREF();
965 1.1 ozaki tcps[TCP_STAT_SNDACKS]++;
966 1.1 ozaki tcps[TCP_STAT_SNDTOTAL]++;
967 1.1 ozaki TCP_STAT_PUTREF();
968 1.1 ozaki }
969 1.1 ozaki return 1;
970 1.1 ozaki }
971 1.1 ozaki
972 1.1 ozaki s = splsoftnet();
973 1.1 ozaki sc = pool_get(&syn_cache_pool, PR_NOWAIT);
974 1.1 ozaki splx(s);
975 1.1 ozaki if (sc == NULL) {
976 1.1 ozaki if (ipopts)
977 1.1 ozaki (void)m_free(ipopts);
978 1.1 ozaki return 0;
979 1.1 ozaki }
980 1.1 ozaki
981 1.1 ozaki /*
982 1.1 ozaki * Fill in the cache, and put the necessary IP and TCP
983 1.1 ozaki * options into the reply.
984 1.1 ozaki */
985 1.1 ozaki memset(sc, 0, sizeof(struct syn_cache));
986 1.1 ozaki callout_init(&sc->sc_timer, CALLOUT_MPSAFE);
987 1.1 ozaki memcpy(&sc->sc_src, src, src->sa_len);
988 1.1 ozaki memcpy(&sc->sc_dst, dst, dst->sa_len);
989 1.1 ozaki sc->sc_flags = 0;
990 1.1 ozaki sc->sc_ipopts = ipopts;
991 1.1 ozaki sc->sc_irs = th->th_seq;
992 1.1 ozaki switch (src->sa_family) {
993 1.1 ozaki case AF_INET:
994 1.1 ozaki {
995 1.1 ozaki struct sockaddr_in *srcin = (void *)src;
996 1.1 ozaki struct sockaddr_in *dstin = (void *)dst;
997 1.1 ozaki
998 1.1 ozaki sc->sc_iss = tcp_new_iss1(&dstin->sin_addr,
999 1.1 ozaki &srcin->sin_addr, dstin->sin_port,
1000 1.1 ozaki srcin->sin_port, sizeof(dstin->sin_addr));
1001 1.1 ozaki break;
1002 1.1 ozaki }
1003 1.1 ozaki #ifdef INET6
1004 1.1 ozaki case AF_INET6:
1005 1.1 ozaki {
1006 1.1 ozaki struct sockaddr_in6 *srcin6 = (void *)src;
1007 1.1 ozaki struct sockaddr_in6 *dstin6 = (void *)dst;
1008 1.1 ozaki
1009 1.1 ozaki sc->sc_iss = tcp_new_iss1(&dstin6->sin6_addr,
1010 1.1 ozaki &srcin6->sin6_addr, dstin6->sin6_port,
1011 1.1 ozaki srcin6->sin6_port, sizeof(dstin6->sin6_addr));
1012 1.1 ozaki break;
1013 1.1 ozaki }
1014 1.1 ozaki #endif
1015 1.1 ozaki }
1016 1.1 ozaki sc->sc_peermaxseg = oi->maxseg;
1017 1.1 ozaki sc->sc_ourmaxseg = tcp_mss_to_advertise(m->m_flags & M_PKTHDR ?
1018 1.1 ozaki m_get_rcvif_NOMPSAFE(m) : NULL, sc->sc_src.sa.sa_family);
1019 1.1 ozaki sc->sc_win = win;
1020 1.1 ozaki sc->sc_timebase = tcp_now - 1; /* see tcp_newtcpcb() */
1021 1.1 ozaki sc->sc_timestamp = tb.ts_recent;
1022 1.1 ozaki if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
1023 1.1 ozaki (TF_REQ_TSTMP|TF_RCVD_TSTMP))
1024 1.1 ozaki sc->sc_flags |= SCF_TIMESTAMP;
1025 1.1 ozaki if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1026 1.1 ozaki (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1027 1.1 ozaki sc->sc_requested_s_scale = tb.requested_s_scale;
1028 1.1 ozaki sc->sc_request_r_scale = 0;
1029 1.1 ozaki /*
1030 1.1 ozaki * Pick the smallest possible scaling factor that
1031 1.1 ozaki * will still allow us to scale up to sb_max.
1032 1.1 ozaki *
1033 1.1 ozaki * We do this because there are broken firewalls that
1034 1.1 ozaki * will corrupt the window scale option, leading to
1035 1.1 ozaki * the other endpoint believing that our advertised
1036 1.1 ozaki * window is unscaled. At scale factors larger than
1037 1.1 ozaki * 5 the unscaled window will drop below 1500 bytes,
1038 1.1 ozaki * leading to serious problems when traversing these
1039 1.1 ozaki * broken firewalls.
1040 1.1 ozaki *
1041 1.1 ozaki * With the default sbmax of 256K, a scale factor
1042 1.1 ozaki * of 3 will be chosen by this algorithm. Those who
1043 1.1 ozaki * choose a larger sbmax should watch out
1044 1.1 ozaki * for the compatibility problems mentioned above.
1045 1.1 ozaki *
1046 1.1 ozaki * RFC1323: The Window field in a SYN (i.e., a <SYN>
1047 1.1 ozaki * or <SYN,ACK>) segment itself is never scaled.
1048 1.1 ozaki */
1049 1.1 ozaki while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
1050 1.1 ozaki (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
1051 1.1 ozaki sc->sc_request_r_scale++;
1052 1.1 ozaki } else {
1053 1.1 ozaki sc->sc_requested_s_scale = 15;
1054 1.1 ozaki sc->sc_request_r_scale = 15;
1055 1.1 ozaki }
1056 1.1 ozaki if ((tb.t_flags & TF_SACK_PERMIT) && tcp_do_sack)
1057 1.1 ozaki sc->sc_flags |= SCF_SACK_PERMIT;
1058 1.1 ozaki
1059 1.1 ozaki /*
1060 1.1 ozaki * ECN setup packet received.
1061 1.1 ozaki */
1062 1.1 ozaki if ((th->th_flags & (TH_ECE|TH_CWR)) && tcp_do_ecn)
1063 1.1 ozaki sc->sc_flags |= SCF_ECN_PERMIT;
1064 1.1 ozaki
1065 1.1 ozaki #ifdef TCP_SIGNATURE
1066 1.1 ozaki if (tb.t_flags & TF_SIGNATURE)
1067 1.1 ozaki sc->sc_flags |= SCF_SIGNATURE;
1068 1.1 ozaki #endif
1069 1.1 ozaki sc->sc_tp = tp;
1070 1.1 ozaki m_freem(m);
1071 1.1 ozaki if (syn_cache_respond(sc) == 0) {
1072 1.1 ozaki uint64_t *tcps = TCP_STAT_GETREF();
1073 1.1 ozaki tcps[TCP_STAT_SNDACKS]++;
1074 1.1 ozaki tcps[TCP_STAT_SNDTOTAL]++;
1075 1.1 ozaki TCP_STAT_PUTREF();
1076 1.1 ozaki syn_cache_insert(sc, tp);
1077 1.1 ozaki } else {
1078 1.1 ozaki s = splsoftnet();
1079 1.1 ozaki /*
1080 1.1 ozaki * syn_cache_put() will try to schedule the timer, so
1081 1.1 ozaki * we need to initialize it
1082 1.1 ozaki */
1083 1.1 ozaki syn_cache_timer_arm(sc);
1084 1.1 ozaki syn_cache_put(sc);
1085 1.1 ozaki splx(s);
1086 1.1 ozaki TCP_STATINC(TCP_STAT_SC_DROPPED);
1087 1.1 ozaki }
1088 1.1 ozaki return 1;
1089 1.1 ozaki }
1090 1.1 ozaki
1091 1.1 ozaki /*
1092 1.1 ozaki * syn_cache_respond: (re)send SYN+ACK.
1093 1.1 ozaki *
1094 1.1 ozaki * Returns 0 on success.
1095 1.1 ozaki */
1096 1.1 ozaki
1097 1.1 ozaki int
1098 1.1 ozaki syn_cache_respond(struct syn_cache *sc)
1099 1.1 ozaki {
1100 1.1 ozaki #ifdef INET6
1101 1.1 ozaki struct rtentry *rt = NULL;
1102 1.1 ozaki #endif
1103 1.1 ozaki struct route *ro;
1104 1.1 ozaki u_int8_t *optp;
1105 1.1 ozaki int optlen, error;
1106 1.1 ozaki u_int16_t tlen;
1107 1.1 ozaki struct ip *ip = NULL;
1108 1.1 ozaki #ifdef INET6
1109 1.1 ozaki struct ip6_hdr *ip6 = NULL;
1110 1.1 ozaki #endif
1111 1.1 ozaki struct tcpcb *tp;
1112 1.1 ozaki struct tcphdr *th;
1113 1.1 ozaki struct mbuf *m;
1114 1.1 ozaki u_int hlen;
1115 1.1 ozaki #ifdef TCP_SIGNATURE
1116 1.1 ozaki struct secasvar *sav = NULL;
1117 1.1 ozaki u_int8_t *sigp = NULL;
1118 1.1 ozaki #endif
1119 1.1 ozaki
1120 1.1 ozaki ro = &sc->sc_route;
1121 1.1 ozaki switch (sc->sc_src.sa.sa_family) {
1122 1.1 ozaki case AF_INET:
1123 1.1 ozaki hlen = sizeof(struct ip);
1124 1.1 ozaki break;
1125 1.1 ozaki #ifdef INET6
1126 1.1 ozaki case AF_INET6:
1127 1.1 ozaki hlen = sizeof(struct ip6_hdr);
1128 1.1 ozaki break;
1129 1.1 ozaki #endif
1130 1.1 ozaki default:
1131 1.1 ozaki return EAFNOSUPPORT;
1132 1.1 ozaki }
1133 1.1 ozaki
1134 1.1 ozaki /* Worst case scenario, since we don't know the option size yet. */
1135 1.1 ozaki tlen = hlen + sizeof(struct tcphdr) + MAX_TCPOPTLEN;
1136 1.1 ozaki KASSERT(max_linkhdr + tlen <= MCLBYTES);
1137 1.1 ozaki
1138 1.1 ozaki /*
1139 1.1 ozaki * Create the IP+TCP header from scratch.
1140 1.1 ozaki */
1141 1.1 ozaki MGETHDR(m, M_DONTWAIT, MT_DATA);
1142 1.1 ozaki if (m && (max_linkhdr + tlen) > MHLEN) {
1143 1.1 ozaki MCLGET(m, M_DONTWAIT);
1144 1.1 ozaki if ((m->m_flags & M_EXT) == 0) {
1145 1.1 ozaki m_freem(m);
1146 1.1 ozaki m = NULL;
1147 1.1 ozaki }
1148 1.1 ozaki }
1149 1.1 ozaki if (m == NULL)
1150 1.1 ozaki return ENOBUFS;
1151 1.1 ozaki MCLAIM(m, &tcp_tx_mowner);
1152 1.1 ozaki
1153 1.1 ozaki tp = sc->sc_tp;
1154 1.1 ozaki
1155 1.1 ozaki /* Fixup the mbuf. */
1156 1.1 ozaki m->m_data += max_linkhdr;
1157 1.1 ozaki m_reset_rcvif(m);
1158 1.1 ozaki memset(mtod(m, void *), 0, tlen);
1159 1.1 ozaki
1160 1.1 ozaki switch (sc->sc_src.sa.sa_family) {
1161 1.1 ozaki case AF_INET:
1162 1.1 ozaki ip = mtod(m, struct ip *);
1163 1.1 ozaki ip->ip_v = 4;
1164 1.1 ozaki ip->ip_dst = sc->sc_src.sin.sin_addr;
1165 1.1 ozaki ip->ip_src = sc->sc_dst.sin.sin_addr;
1166 1.1 ozaki ip->ip_p = IPPROTO_TCP;
1167 1.1 ozaki th = (struct tcphdr *)(ip + 1);
1168 1.1 ozaki th->th_dport = sc->sc_src.sin.sin_port;
1169 1.1 ozaki th->th_sport = sc->sc_dst.sin.sin_port;
1170 1.1 ozaki break;
1171 1.1 ozaki #ifdef INET6
1172 1.1 ozaki case AF_INET6:
1173 1.1 ozaki ip6 = mtod(m, struct ip6_hdr *);
1174 1.1 ozaki ip6->ip6_vfc = IPV6_VERSION;
1175 1.1 ozaki ip6->ip6_dst = sc->sc_src.sin6.sin6_addr;
1176 1.1 ozaki ip6->ip6_src = sc->sc_dst.sin6.sin6_addr;
1177 1.1 ozaki ip6->ip6_nxt = IPPROTO_TCP;
1178 1.1 ozaki /* ip6_plen will be updated in ip6_output() */
1179 1.1 ozaki th = (struct tcphdr *)(ip6 + 1);
1180 1.1 ozaki th->th_dport = sc->sc_src.sin6.sin6_port;
1181 1.1 ozaki th->th_sport = sc->sc_dst.sin6.sin6_port;
1182 1.1 ozaki break;
1183 1.1 ozaki #endif
1184 1.1 ozaki default:
1185 1.1 ozaki panic("%s: impossible (1)", __func__);
1186 1.1 ozaki }
1187 1.1 ozaki
1188 1.1 ozaki th->th_seq = htonl(sc->sc_iss);
1189 1.1 ozaki th->th_ack = htonl(sc->sc_irs + 1);
1190 1.1 ozaki th->th_flags = TH_SYN|TH_ACK;
1191 1.1 ozaki th->th_win = htons(sc->sc_win);
1192 1.1 ozaki /* th_x2, th_sum, th_urp already 0 from memset */
1193 1.1 ozaki
1194 1.1 ozaki /* Tack on the TCP options. */
1195 1.1 ozaki optp = (u_int8_t *)(th + 1);
1196 1.1 ozaki optlen = 0;
1197 1.1 ozaki *optp++ = TCPOPT_MAXSEG;
1198 1.1 ozaki *optp++ = TCPOLEN_MAXSEG;
1199 1.1 ozaki *optp++ = (sc->sc_ourmaxseg >> 8) & 0xff;
1200 1.1 ozaki *optp++ = sc->sc_ourmaxseg & 0xff;
1201 1.1 ozaki optlen += TCPOLEN_MAXSEG;
1202 1.1 ozaki
1203 1.1 ozaki if (sc->sc_request_r_scale != 15) {
1204 1.1 ozaki *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
1205 1.1 ozaki TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
1206 1.1 ozaki sc->sc_request_r_scale);
1207 1.1 ozaki optp += TCPOLEN_WINDOW + TCPOLEN_NOP;
1208 1.1 ozaki optlen += TCPOLEN_WINDOW + TCPOLEN_NOP;
1209 1.1 ozaki }
1210 1.1 ozaki
1211 1.1 ozaki if (sc->sc_flags & SCF_SACK_PERMIT) {
1212 1.1 ozaki /* Let the peer know that we will SACK. */
1213 1.1 ozaki *optp++ = TCPOPT_SACK_PERMITTED;
1214 1.1 ozaki *optp++ = TCPOLEN_SACK_PERMITTED;
1215 1.1 ozaki optlen += TCPOLEN_SACK_PERMITTED;
1216 1.1 ozaki }
1217 1.1 ozaki
1218 1.1 ozaki if (sc->sc_flags & SCF_TIMESTAMP) {
1219 1.1 ozaki while (optlen % 4 != 2) {
1220 1.1 ozaki optlen += TCPOLEN_NOP;
1221 1.1 ozaki *optp++ = TCPOPT_NOP;
1222 1.1 ozaki }
1223 1.1 ozaki *optp++ = TCPOPT_TIMESTAMP;
1224 1.1 ozaki *optp++ = TCPOLEN_TIMESTAMP;
1225 1.1 ozaki u_int32_t *lp = (u_int32_t *)(optp);
1226 1.1 ozaki /* Form timestamp option as shown in appendix A of RFC 1323. */
1227 1.1 ozaki *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
1228 1.1 ozaki *lp = htonl(sc->sc_timestamp);
1229 1.1 ozaki optp += TCPOLEN_TIMESTAMP - 2;
1230 1.1 ozaki optlen += TCPOLEN_TIMESTAMP;
1231 1.1 ozaki }
1232 1.1 ozaki
1233 1.1 ozaki #ifdef TCP_SIGNATURE
1234 1.1 ozaki if (sc->sc_flags & SCF_SIGNATURE) {
1235 1.1 ozaki sav = tcp_signature_getsav(m);
1236 1.1 ozaki if (sav == NULL) {
1237 1.1 ozaki m_freem(m);
1238 1.1 ozaki return EPERM;
1239 1.1 ozaki }
1240 1.1 ozaki
1241 1.1 ozaki *optp++ = TCPOPT_SIGNATURE;
1242 1.1 ozaki *optp++ = TCPOLEN_SIGNATURE;
1243 1.1 ozaki sigp = optp;
1244 1.1 ozaki memset(optp, 0, TCP_SIGLEN);
1245 1.1 ozaki optp += TCP_SIGLEN;
1246 1.1 ozaki optlen += TCPOLEN_SIGNATURE;
1247 1.1 ozaki }
1248 1.1 ozaki #endif
1249 1.1 ozaki
1250 1.1 ozaki /*
1251 1.1 ozaki * Terminate and pad TCP options to a 4 byte boundary.
1252 1.1 ozaki *
1253 1.1 ozaki * According to RFC793: "The content of the header beyond the
1254 1.1 ozaki * End-of-Option option must be header padding (i.e., zero)."
1255 1.1 ozaki * And later: "The padding is composed of zeros."
1256 1.1 ozaki */
1257 1.1 ozaki if (optlen % 4) {
1258 1.1 ozaki optlen += TCPOLEN_EOL;
1259 1.1 ozaki *optp++ = TCPOPT_EOL;
1260 1.1 ozaki }
1261 1.1 ozaki while (optlen % 4) {
1262 1.1 ozaki optlen += TCPOLEN_PAD;
1263 1.1 ozaki *optp++ = TCPOPT_PAD;
1264 1.1 ozaki }
1265 1.1 ozaki
1266 1.1 ozaki /* Compute the actual values now that we've added the options. */
1267 1.1 ozaki tlen = hlen + sizeof(struct tcphdr) + optlen;
1268 1.1 ozaki m->m_len = m->m_pkthdr.len = tlen;
1269 1.1 ozaki th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
1270 1.1 ozaki
1271 1.1 ozaki #ifdef TCP_SIGNATURE
1272 1.1 ozaki if (sav) {
1273 1.1 ozaki (void)tcp_signature(m, th, hlen, sav, sigp);
1274 1.1 ozaki key_sa_recordxfer(sav, m);
1275 1.1 ozaki KEY_SA_UNREF(&sav);
1276 1.1 ozaki }
1277 1.1 ozaki #endif
1278 1.1 ozaki
1279 1.1 ozaki /*
1280 1.1 ozaki * Send ECN SYN-ACK setup packet.
1281 1.1 ozaki * Routes can be asymmetric, so, even if we receive a packet
1282 1.1 ozaki * with ECE and CWR set, we must not assume no one will block
1283 1.1 ozaki * the ECE packet we are about to send.
1284 1.1 ozaki */
1285 1.1 ozaki if ((sc->sc_flags & SCF_ECN_PERMIT) && tp &&
1286 1.1 ozaki SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
1287 1.1 ozaki th->th_flags |= TH_ECE;
1288 1.1 ozaki TCP_STATINC(TCP_STAT_ECN_SHS);
1289 1.1 ozaki
1290 1.1 ozaki /*
1291 1.1 ozaki * draft-ietf-tcpm-ecnsyn-00.txt
1292 1.1 ozaki *
1293 1.1 ozaki * "[...] a TCP node MAY respond to an ECN-setup
1294 1.1 ozaki * SYN packet by setting ECT in the responding
1295 1.1 ozaki * ECN-setup SYN/ACK packet, indicating to routers
1296 1.1 ozaki * that the SYN/ACK packet is ECN-Capable.
1297 1.1 ozaki * This allows a congested router along the path
1298 1.1 ozaki * to mark the packet instead of dropping the
1299 1.1 ozaki * packet as an indication of congestion."
1300 1.1 ozaki *
1301 1.1 ozaki * "[...] There can be a great benefit in setting
1302 1.1 ozaki * an ECN-capable codepoint in SYN/ACK packets [...]
1303 1.1 ozaki * Congestion is most likely to occur in
1304 1.1 ozaki * the server-to-client direction. As a result,
1305 1.1 ozaki * setting an ECN-capable codepoint in SYN/ACK
1306 1.1 ozaki * packets can reduce the occurrence of three-second
1307 1.1 ozaki * retransmit timeouts resulting from the drop
1308 1.1 ozaki * of SYN/ACK packets."
1309 1.1 ozaki *
1310 1.1 ozaki * Page 4 and 6, January 2006.
1311 1.1 ozaki */
1312 1.1 ozaki
1313 1.1 ozaki switch (sc->sc_src.sa.sa_family) {
1314 1.1 ozaki case AF_INET:
1315 1.1 ozaki ip->ip_tos |= IPTOS_ECN_ECT0;
1316 1.1 ozaki break;
1317 1.1 ozaki #ifdef INET6
1318 1.1 ozaki case AF_INET6:
1319 1.1 ozaki ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
1320 1.1 ozaki break;
1321 1.1 ozaki #endif
1322 1.1 ozaki }
1323 1.1 ozaki TCP_STATINC(TCP_STAT_ECN_ECT);
1324 1.1 ozaki }
1325 1.1 ozaki
1326 1.1 ozaki
1327 1.1 ozaki /*
1328 1.1 ozaki * Compute the packet's checksum.
1329 1.1 ozaki *
1330 1.1 ozaki * Fill in some straggling IP bits. Note the stack expects
1331 1.1 ozaki * ip_len to be in host order, for convenience.
1332 1.1 ozaki */
1333 1.1 ozaki switch (sc->sc_src.sa.sa_family) {
1334 1.1 ozaki case AF_INET:
1335 1.1 ozaki ip->ip_len = htons(tlen - hlen);
1336 1.1 ozaki th->th_sum = 0;
1337 1.1 ozaki th->th_sum = in4_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
1338 1.1 ozaki ip->ip_len = htons(tlen);
1339 1.1 ozaki ip->ip_ttl = ip_defttl;
1340 1.1 ozaki /* XXX tos? */
1341 1.1 ozaki break;
1342 1.1 ozaki #ifdef INET6
1343 1.1 ozaki case AF_INET6:
1344 1.1 ozaki ip6->ip6_plen = htons(tlen - hlen);
1345 1.1 ozaki th->th_sum = 0;
1346 1.1 ozaki th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
1347 1.1 ozaki ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1348 1.1 ozaki ip6->ip6_vfc |= IPV6_VERSION;
1349 1.1 ozaki ip6->ip6_plen = htons(tlen - hlen);
1350 1.1 ozaki /* ip6_hlim will be initialized afterwards */
1351 1.1 ozaki /* XXX flowlabel? */
1352 1.1 ozaki break;
1353 1.1 ozaki #endif
1354 1.1 ozaki }
1355 1.1 ozaki
1356 1.1 ozaki /* XXX use IPsec policy on listening socket, on SYN ACK */
1357 1.1 ozaki tp = sc->sc_tp;
1358 1.1 ozaki
1359 1.1 ozaki switch (sc->sc_src.sa.sa_family) {
1360 1.1 ozaki case AF_INET:
1361 1.1 ozaki error = ip_output(m, sc->sc_ipopts, ro,
1362 1.1 ozaki (ip_mtudisc ? IP_MTUDISC : 0),
1363 1.1 ozaki NULL, tp ? tp->t_inpcb : NULL);
1364 1.1 ozaki break;
1365 1.1 ozaki #ifdef INET6
1366 1.1 ozaki case AF_INET6:
1367 1.1 ozaki ip6->ip6_hlim = in6_selecthlim(NULL,
1368 1.1 ozaki (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp : NULL);
1369 1.1 ozaki rtcache_unref(rt, ro);
1370 1.1 ozaki
1371 1.1 ozaki error = ip6_output(m, NULL /*XXX*/, ro, 0, NULL,
1372 1.1 ozaki tp ? tp->t_in6pcb : NULL, NULL);
1373 1.1 ozaki break;
1374 1.1 ozaki #endif
1375 1.1 ozaki default:
1376 1.1 ozaki panic("%s: impossible (2)", __func__);
1377 1.1 ozaki }
1378 1.1 ozaki
1379 1.1 ozaki return error;
1380 1.1 ozaki }
1381