tcp_usrreq.c revision 1.185 1 /* $NetBSD: tcp_usrreq.c,v 1.185 2014/07/09 04:54:04 rtr Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
38 * Facility, NASA Ames Research Center.
39 * This code is derived from software contributed to The NetBSD Foundation
40 * by Charles M. Hannum.
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Rui Paulo.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 /*
67 * Copyright (c) 1982, 1986, 1988, 1993, 1995
68 * The Regents of the University of California. All rights reserved.
69 *
70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions
72 * are met:
73 * 1. Redistributions of source code must retain the above copyright
74 * notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright
76 * notice, this list of conditions and the following disclaimer in the
77 * documentation and/or other materials provided with the distribution.
78 * 3. Neither the name of the University nor the names of its contributors
79 * may be used to endorse or promote products derived from this software
80 * without specific prior written permission.
81 *
82 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
83 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
84 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
85 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
86 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
87 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
88 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
89 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
90 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
91 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
92 * SUCH DAMAGE.
93 *
94 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95
95 */
96
97 /*
98 * TCP protocol interface to socket abstraction.
99 */
100
101 #include <sys/cdefs.h>
102 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.185 2014/07/09 04:54:04 rtr Exp $");
103
104 #include "opt_inet.h"
105 #include "opt_ipsec.h"
106 #include "opt_tcp_debug.h"
107 #include "opt_mbuftrace.h"
108
109 #include <sys/param.h>
110 #include <sys/systm.h>
111 #include <sys/kernel.h>
112 #include <sys/malloc.h>
113 #include <sys/mbuf.h>
114 #include <sys/socket.h>
115 #include <sys/socketvar.h>
116 #include <sys/protosw.h>
117 #include <sys/errno.h>
118 #include <sys/stat.h>
119 #include <sys/proc.h>
120 #include <sys/domain.h>
121 #include <sys/sysctl.h>
122 #include <sys/kauth.h>
123 #include <sys/uidinfo.h>
124
125 #include <net/if.h>
126 #include <net/route.h>
127
128 #include <netinet/in.h>
129 #include <netinet/in_systm.h>
130 #include <netinet/in_var.h>
131 #include <netinet/ip.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/ip_var.h>
134 #include <netinet/in_offload.h>
135
136 #ifdef INET6
137 #ifndef INET
138 #include <netinet/in.h>
139 #endif
140 #include <netinet/ip6.h>
141 #include <netinet6/in6_pcb.h>
142 #include <netinet6/ip6_var.h>
143 #include <netinet6/scope6_var.h>
144 #endif
145
146 #include <netinet/tcp.h>
147 #include <netinet/tcp_fsm.h>
148 #include <netinet/tcp_seq.h>
149 #include <netinet/tcp_timer.h>
150 #include <netinet/tcp_var.h>
151 #include <netinet/tcp_private.h>
152 #include <netinet/tcp_congctl.h>
153 #include <netinet/tcpip.h>
154 #include <netinet/tcp_debug.h>
155 #include <netinet/tcp_vtw.h>
156
157 #include "opt_tcp_space.h"
158
159 static int
160 tcp_debug_capture(struct tcpcb *tp, int req)
161 {
162 #ifdef KPROF
163 tcp_acounts[tp->t_state][req]++;
164 #endif
165 #ifdef TCP_DEBUG
166 return tp->t_state;
167 #endif
168 return 0;
169 }
170
171 static inline void
172 tcp_debug_trace(struct socket *so, struct tcpcb *tp, int ostate, int req)
173 {
174 #ifdef TCP_DEBUG
175 if (tp && (so->so_options & SO_DEBUG))
176 tcp_trace(TA_USER, ostate, tp, NULL, req);
177 #endif
178 }
179
180 /*
181 * Process a TCP user request for TCP tb. If this is a send request
182 * then m is the mbuf chain of send data. If this is a timer expiration
183 * (called from the software clock routine), then timertype tells which timer.
184 */
185 static int
186 tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
187 struct mbuf *control, struct lwp *l)
188 {
189 struct inpcb *inp;
190 #ifdef INET6
191 struct in6pcb *in6p;
192 #endif
193 struct tcpcb *tp = NULL;
194 int s;
195 int error = 0;
196 int ostate = 0;
197 int family; /* family of the socket */
198
199 KASSERT(req != PRU_ATTACH);
200 KASSERT(req != PRU_DETACH);
201 KASSERT(req != PRU_CONTROL);
202 KASSERT(req != PRU_SENSE);
203 KASSERT(req != PRU_PEERADDR);
204 KASSERT(req != PRU_SOCKADDR);
205
206 family = so->so_proto->pr_domain->dom_family;
207
208 s = splsoftnet();
209
210 if (req == PRU_PURGEIF) {
211 mutex_enter(softnet_lock);
212 switch (family) {
213 #ifdef INET
214 case PF_INET:
215 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control);
216 in_purgeif((struct ifnet *)control);
217 in_pcbpurgeif(&tcbtable, (struct ifnet *)control);
218 break;
219 #endif
220 #ifdef INET6
221 case PF_INET6:
222 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control);
223 in6_purgeif((struct ifnet *)control);
224 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control);
225 break;
226 #endif
227 default:
228 mutex_exit(softnet_lock);
229 splx(s);
230 return (EAFNOSUPPORT);
231 }
232 mutex_exit(softnet_lock);
233 splx(s);
234 return (0);
235 }
236
237 KASSERT(solocked(so));
238
239 switch (family) {
240 #ifdef INET
241 case PF_INET:
242 inp = sotoinpcb(so);
243 #ifdef INET6
244 in6p = NULL;
245 #endif
246 break;
247 #endif
248 #ifdef INET6
249 case PF_INET6:
250 inp = NULL;
251 in6p = sotoin6pcb(so);
252 break;
253 #endif
254 default:
255 splx(s);
256 return EAFNOSUPPORT;
257 }
258 KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB));
259 #ifdef INET6
260 /* XXX: KASSERT((inp != NULL) ^ (in6p != NULL)); */
261 #endif
262 /*
263 * When a TCP is attached to a socket, then there will be
264 * a (struct inpcb) pointed at by the socket, and this
265 * structure will point at a subsidary (struct tcpcb).
266 */
267 if (inp == NULL
268 #ifdef INET6
269 && in6p == NULL
270 #endif
271 )
272 {
273 error = EINVAL;
274 goto release;
275 }
276 #ifdef INET
277 if (inp) {
278 tp = intotcpcb(inp);
279 /* WHAT IF TP IS 0? */
280 ostate = tcp_debug_capture(tp, req);
281 }
282 #endif
283 #ifdef INET6
284 if (in6p) {
285 tp = in6totcpcb(in6p);
286 /* WHAT IF TP IS 0? */
287 ostate = tcp_debug_capture(tp, req);
288 }
289 #endif
290
291 switch (req) {
292
293 /*
294 * Give the socket an address.
295 */
296 case PRU_BIND:
297 switch (family) {
298 #ifdef INET
299 case PF_INET:
300 error = in_pcbbind(inp, nam, l);
301 break;
302 #endif
303 #ifdef INET6
304 case PF_INET6:
305 error = in6_pcbbind(in6p, nam, l);
306 if (!error) {
307 /* mapped addr case */
308 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
309 tp->t_family = AF_INET;
310 else
311 tp->t_family = AF_INET6;
312 }
313 break;
314 #endif
315 }
316 break;
317
318 /*
319 * Prepare to accept connections.
320 */
321 case PRU_LISTEN:
322 #ifdef INET
323 if (inp && inp->inp_lport == 0) {
324 error = in_pcbbind(inp, NULL, l);
325 if (error)
326 break;
327 }
328 #endif
329 #ifdef INET6
330 if (in6p && in6p->in6p_lport == 0) {
331 error = in6_pcbbind(in6p, NULL, l);
332 if (error)
333 break;
334 }
335 #endif
336 tp->t_state = TCPS_LISTEN;
337 break;
338
339 /*
340 * Initiate connection to peer.
341 * Create a template for use in transmissions on this connection.
342 * Enter SYN_SENT state, and mark socket as connecting.
343 * Start keep-alive timer, and seed output sequence space.
344 * Send initial segment on connection.
345 */
346 case PRU_CONNECT:
347 #ifdef INET
348 if (inp) {
349 if (inp->inp_lport == 0) {
350 error = in_pcbbind(inp, NULL, l);
351 if (error)
352 break;
353 }
354 error = in_pcbconnect(inp, nam, l);
355 }
356 #endif
357 #ifdef INET6
358 if (in6p) {
359 if (in6p->in6p_lport == 0) {
360 error = in6_pcbbind(in6p, NULL, l);
361 if (error)
362 break;
363 }
364 error = in6_pcbconnect(in6p, nam, l);
365 if (!error) {
366 /* mapped addr case */
367 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
368 tp->t_family = AF_INET;
369 else
370 tp->t_family = AF_INET6;
371 }
372 }
373 #endif
374 if (error)
375 break;
376 tp->t_template = tcp_template(tp);
377 if (tp->t_template == 0) {
378 #ifdef INET
379 if (inp)
380 in_pcbdisconnect(inp);
381 #endif
382 #ifdef INET6
383 if (in6p)
384 in6_pcbdisconnect(in6p);
385 #endif
386 error = ENOBUFS;
387 break;
388 }
389 /*
390 * Compute window scaling to request.
391 * XXX: This should be moved to tcp_output().
392 */
393 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
394 (TCP_MAXWIN << tp->request_r_scale) < sb_max)
395 tp->request_r_scale++;
396 soisconnecting(so);
397 TCP_STATINC(TCP_STAT_CONNATTEMPT);
398 tp->t_state = TCPS_SYN_SENT;
399 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
400 tp->iss = tcp_new_iss(tp, 0);
401 tcp_sendseqinit(tp);
402 error = tcp_output(tp);
403 break;
404
405 /*
406 * Create a TCP connection between two sockets.
407 */
408 case PRU_CONNECT2:
409 error = EOPNOTSUPP;
410 break;
411
412 /*
413 * Initiate disconnect from peer.
414 * If connection never passed embryonic stage, just drop;
415 * else if don't need to let data drain, then can just drop anyways,
416 * else have to begin TCP shutdown process: mark socket disconnecting,
417 * drain unread data, state switch to reflect user close, and
418 * send segment (e.g. FIN) to peer. Socket will be really disconnected
419 * when peer sends FIN and acks ours.
420 *
421 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
422 */
423 case PRU_DISCONNECT:
424 tp = tcp_disconnect(tp);
425 break;
426
427 /*
428 * Accept a connection. Essentially all the work is
429 * done at higher levels; just return the address
430 * of the peer, storing through addr.
431 */
432 case PRU_ACCEPT:
433 #ifdef INET
434 if (inp)
435 in_setpeeraddr(inp, nam);
436 #endif
437 #ifdef INET6
438 if (in6p)
439 in6_setpeeraddr(in6p, nam);
440 #endif
441 break;
442
443 /*
444 * Mark the connection as being incapable of further output.
445 */
446 case PRU_SHUTDOWN:
447 socantsendmore(so);
448 tp = tcp_usrclosed(tp);
449 if (tp)
450 error = tcp_output(tp);
451 break;
452
453 /*
454 * After a receive, possibly send window update to peer.
455 */
456 case PRU_RCVD:
457 /*
458 * soreceive() calls this function when a user receives
459 * ancillary data on a listening socket. We don't call
460 * tcp_output in such a case, since there is no header
461 * template for a listening socket and hence the kernel
462 * will panic.
463 */
464 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
465 (void) tcp_output(tp);
466 break;
467
468 /*
469 * Do a send by putting data in output queue and updating urgent
470 * marker if URG set. Possibly send more data.
471 */
472 case PRU_SEND:
473 if (control && control->m_len) {
474 m_freem(control);
475 m_freem(m);
476 error = EINVAL;
477 break;
478 }
479 sbappendstream(&so->so_snd, m);
480 error = tcp_output(tp);
481 break;
482
483 /*
484 * Abort the TCP.
485 */
486 case PRU_ABORT:
487 tp = tcp_drop(tp, ECONNABORTED);
488 break;
489
490 case PRU_RCVOOB:
491 if (control && control->m_len) {
492 m_freem(control);
493 m_freem(m);
494 error = EINVAL;
495 break;
496 }
497 if ((so->so_oobmark == 0 &&
498 (so->so_state & SS_RCVATMARK) == 0) ||
499 so->so_options & SO_OOBINLINE ||
500 tp->t_oobflags & TCPOOB_HADDATA) {
501 error = EINVAL;
502 break;
503 }
504 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
505 error = EWOULDBLOCK;
506 break;
507 }
508 m->m_len = 1;
509 *mtod(m, char *) = tp->t_iobc;
510 if (((long)nam & MSG_PEEK) == 0)
511 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
512 break;
513
514 case PRU_SENDOOB:
515 if (sbspace(&so->so_snd) < -512) {
516 m_freem(m);
517 error = ENOBUFS;
518 break;
519 }
520 /*
521 * According to RFC961 (Assigned Protocols),
522 * the urgent pointer points to the last octet
523 * of urgent data. We continue, however,
524 * to consider it to indicate the first octet
525 * of data past the urgent section.
526 * Otherwise, snd_up should be one lower.
527 */
528 sbappendstream(&so->so_snd, m);
529 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
530 tp->t_force = 1;
531 error = tcp_output(tp);
532 tp->t_force = 0;
533 break;
534
535 default:
536 panic("tcp_usrreq");
537 }
538
539 tcp_debug_trace(so, tp, ostate, req);
540
541 release:
542 splx(s);
543 return (error);
544 }
545
546 static void
547 change_keepalive(struct socket *so, struct tcpcb *tp)
548 {
549 tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl;
550 TCP_TIMER_DISARM(tp, TCPT_KEEP);
551 TCP_TIMER_DISARM(tp, TCPT_2MSL);
552
553 if (tp->t_state == TCPS_SYN_RECEIVED ||
554 tp->t_state == TCPS_SYN_SENT) {
555 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
556 } else if (so->so_options & SO_KEEPALIVE &&
557 tp->t_state <= TCPS_CLOSE_WAIT) {
558 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl);
559 } else {
560 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
561 }
562
563 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
564 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
565 }
566
567
568 int
569 tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
570 {
571 int error = 0, s;
572 struct inpcb *inp;
573 #ifdef INET6
574 struct in6pcb *in6p;
575 #endif
576 struct tcpcb *tp;
577 u_int ui;
578 int family; /* family of the socket */
579 int level, optname, optval;
580
581 level = sopt->sopt_level;
582 optname = sopt->sopt_name;
583
584 family = so->so_proto->pr_domain->dom_family;
585
586 s = splsoftnet();
587 switch (family) {
588 #ifdef INET
589 case PF_INET:
590 inp = sotoinpcb(so);
591 #ifdef INET6
592 in6p = NULL;
593 #endif
594 break;
595 #endif
596 #ifdef INET6
597 case PF_INET6:
598 inp = NULL;
599 in6p = sotoin6pcb(so);
600 break;
601 #endif
602 default:
603 splx(s);
604 panic("%s: af %d", __func__, family);
605 }
606 #ifndef INET6
607 if (inp == NULL)
608 #else
609 if (inp == NULL && in6p == NULL)
610 #endif
611 {
612 splx(s);
613 return (ECONNRESET);
614 }
615 if (level != IPPROTO_TCP) {
616 switch (family) {
617 #ifdef INET
618 case PF_INET:
619 error = ip_ctloutput(op, so, sopt);
620 break;
621 #endif
622 #ifdef INET6
623 case PF_INET6:
624 error = ip6_ctloutput(op, so, sopt);
625 break;
626 #endif
627 }
628 splx(s);
629 return (error);
630 }
631 if (inp)
632 tp = intotcpcb(inp);
633 #ifdef INET6
634 else if (in6p)
635 tp = in6totcpcb(in6p);
636 #endif
637 else
638 tp = NULL;
639
640 switch (op) {
641 case PRCO_SETOPT:
642 switch (optname) {
643 #ifdef TCP_SIGNATURE
644 case TCP_MD5SIG:
645 error = sockopt_getint(sopt, &optval);
646 if (error)
647 break;
648 if (optval > 0)
649 tp->t_flags |= TF_SIGNATURE;
650 else
651 tp->t_flags &= ~TF_SIGNATURE;
652 break;
653 #endif /* TCP_SIGNATURE */
654
655 case TCP_NODELAY:
656 error = sockopt_getint(sopt, &optval);
657 if (error)
658 break;
659 if (optval)
660 tp->t_flags |= TF_NODELAY;
661 else
662 tp->t_flags &= ~TF_NODELAY;
663 break;
664
665 case TCP_MAXSEG:
666 error = sockopt_getint(sopt, &optval);
667 if (error)
668 break;
669 if (optval > 0 && optval <= tp->t_peermss)
670 tp->t_peermss = optval; /* limit on send size */
671 else
672 error = EINVAL;
673 break;
674 #ifdef notyet
675 case TCP_CONGCTL:
676 /* XXX string overflow XXX */
677 error = tcp_congctl_select(tp, sopt->sopt_data);
678 break;
679 #endif
680
681 case TCP_KEEPIDLE:
682 error = sockopt_get(sopt, &ui, sizeof(ui));
683 if (error)
684 break;
685 if (ui > 0) {
686 tp->t_keepidle = ui;
687 change_keepalive(so, tp);
688 } else
689 error = EINVAL;
690 break;
691
692 case TCP_KEEPINTVL:
693 error = sockopt_get(sopt, &ui, sizeof(ui));
694 if (error)
695 break;
696 if (ui > 0) {
697 tp->t_keepintvl = ui;
698 change_keepalive(so, tp);
699 } else
700 error = EINVAL;
701 break;
702
703 case TCP_KEEPCNT:
704 error = sockopt_get(sopt, &ui, sizeof(ui));
705 if (error)
706 break;
707 if (ui > 0) {
708 tp->t_keepcnt = ui;
709 change_keepalive(so, tp);
710 } else
711 error = EINVAL;
712 break;
713
714 case TCP_KEEPINIT:
715 error = sockopt_get(sopt, &ui, sizeof(ui));
716 if (error)
717 break;
718 if (ui > 0) {
719 tp->t_keepinit = ui;
720 change_keepalive(so, tp);
721 } else
722 error = EINVAL;
723 break;
724
725 default:
726 error = ENOPROTOOPT;
727 break;
728 }
729 break;
730
731 case PRCO_GETOPT:
732 switch (optname) {
733 #ifdef TCP_SIGNATURE
734 case TCP_MD5SIG:
735 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
736 error = sockopt_set(sopt, &optval, sizeof(optval));
737 break;
738 #endif
739 case TCP_NODELAY:
740 optval = tp->t_flags & TF_NODELAY;
741 error = sockopt_set(sopt, &optval, sizeof(optval));
742 break;
743 case TCP_MAXSEG:
744 optval = tp->t_peermss;
745 error = sockopt_set(sopt, &optval, sizeof(optval));
746 break;
747 #ifdef notyet
748 case TCP_CONGCTL:
749 break;
750 #endif
751 default:
752 error = ENOPROTOOPT;
753 break;
754 }
755 break;
756 }
757 splx(s);
758 return (error);
759 }
760
761 #ifndef TCP_SENDSPACE
762 #define TCP_SENDSPACE 1024*32
763 #endif
764 int tcp_sendspace = TCP_SENDSPACE;
765 #ifndef TCP_RECVSPACE
766 #define TCP_RECVSPACE 1024*32
767 #endif
768 int tcp_recvspace = TCP_RECVSPACE;
769
770 /*
771 * tcp_attach: attach TCP protocol to socket, allocating internet protocol
772 * control block, TCP control block, buffer space and entering LISTEN state
773 * if to accept connections.
774 */
775 static int
776 tcp_attach(struct socket *so, int proto)
777 {
778 struct tcpcb *tp;
779 struct inpcb *inp;
780 #ifdef INET6
781 struct in6pcb *in6p;
782 #endif
783 int s, error, family;
784
785 /* Assign the lock (must happen even if we will error out). */
786 s = splsoftnet();
787 sosetlock(so);
788 KASSERT(solocked(so));
789
790 family = so->so_proto->pr_domain->dom_family;
791 switch (family) {
792 #ifdef INET
793 case PF_INET:
794 inp = sotoinpcb(so);
795 #ifdef INET6
796 in6p = NULL;
797 #endif
798 break;
799 #endif
800 #ifdef INET6
801 case PF_INET6:
802 inp = NULL;
803 in6p = sotoin6pcb(so);
804 break;
805 #endif
806 default:
807 error = EAFNOSUPPORT;
808 goto out;
809 }
810
811 KASSERT(inp == NULL);
812 #ifdef INET6
813 KASSERT(in6p == NULL);
814 #endif
815
816 #ifdef MBUFTRACE
817 so->so_mowner = &tcp_sock_mowner;
818 so->so_rcv.sb_mowner = &tcp_sock_rx_mowner;
819 so->so_snd.sb_mowner = &tcp_sock_tx_mowner;
820 #endif
821 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
822 error = soreserve(so, tcp_sendspace, tcp_recvspace);
823 if (error)
824 goto out;
825 }
826
827 so->so_rcv.sb_flags |= SB_AUTOSIZE;
828 so->so_snd.sb_flags |= SB_AUTOSIZE;
829
830 switch (family) {
831 #ifdef INET
832 case PF_INET:
833 error = in_pcballoc(so, &tcbtable);
834 if (error)
835 goto out;
836 inp = sotoinpcb(so);
837 #ifdef INET6
838 in6p = NULL;
839 #endif
840 break;
841 #endif
842 #ifdef INET6
843 case PF_INET6:
844 error = in6_pcballoc(so, &tcbtable);
845 if (error)
846 goto out;
847 inp = NULL;
848 in6p = sotoin6pcb(so);
849 break;
850 #endif
851 default:
852 error = EAFNOSUPPORT;
853 goto out;
854 }
855 if (inp)
856 tp = tcp_newtcpcb(family, (void *)inp);
857 #ifdef INET6
858 else if (in6p)
859 tp = tcp_newtcpcb(family, (void *)in6p);
860 #endif
861 else
862 tp = NULL;
863
864 if (tp == NULL) {
865 int nofd = so->so_state & SS_NOFDREF; /* XXX */
866
867 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
868 #ifdef INET
869 if (inp)
870 in_pcbdetach(inp);
871 #endif
872 #ifdef INET6
873 if (in6p)
874 in6_pcbdetach(in6p);
875 #endif
876 so->so_state |= nofd;
877 error = ENOBUFS;
878 goto out;
879 }
880 tp->t_state = TCPS_CLOSED;
881 if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
882 so->so_linger = TCP_LINGERTIME;
883 }
884 out:
885 KASSERT(solocked(so));
886 splx(s);
887 return error;
888 }
889
890 static void
891 tcp_detach(struct socket *so)
892 {
893 struct inpcb *inp;
894 #ifdef INET6
895 struct in6pcb *in6p;
896 #endif
897 struct tcpcb *tp = NULL;
898 int s, family;
899
900 KASSERT(solocked(so));
901
902 s = splsoftnet();
903 family = so->so_proto->pr_domain->dom_family;
904 switch (family) {
905 #ifdef INET
906 case PF_INET:
907 inp = sotoinpcb(so);
908 tp = intotcpcb(inp);
909 break;
910 #endif
911 #ifdef INET6
912 case PF_INET6:
913 in6p = sotoin6pcb(so);
914 tp = in6totcpcb(in6p);
915 break;
916 #endif
917 default:
918 splx(s);
919 return;
920 }
921 KASSERT(tp != NULL);
922 (void)tcp_disconnect(tp);
923 splx(s);
924 }
925
926 static int
927 tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
928 {
929 switch (so->so_proto->pr_domain->dom_family) {
930 #ifdef INET
931 case PF_INET:
932 return in_control(so, cmd, nam, ifp);
933 #endif
934 #ifdef INET6
935 case PF_INET6:
936 return in6_control(so, cmd, nam, ifp);
937 #endif
938 default:
939 return EAFNOSUPPORT;
940 }
941 }
942
943 static int
944 tcp_stat(struct socket *so, struct stat *ub)
945 {
946 KASSERT(solocked(so));
947
948 /* stat: don't bother with a blocksize. */
949 return 0;
950 }
951
952 static int
953 tcp_peeraddr(struct socket *so, struct mbuf *nam)
954 {
955 struct inpcb *inp = NULL;
956 #ifdef INET6
957 struct in6pcb *in6p = NULL;
958 #endif
959 struct tcpcb *tp = NULL;
960 int ostate = 0;
961
962 switch (so->so_proto->pr_domain->dom_family) {
963 #ifdef INET
964 case PF_INET:
965 inp = sotoinpcb(so);
966 break;
967 #endif
968 #ifdef INET6
969 case PF_INET6:
970 in6p = sotoin6pcb(so);
971 break;
972 #endif
973 default:
974 return EAFNOSUPPORT;
975 }
976
977 if (inp == NULL
978 #ifdef INET6
979 && in6p == NULL
980 #endif
981 )
982 return EINVAL;
983
984 #ifdef INET
985 if (inp) {
986 tp = intotcpcb(inp);
987 ostate = tcp_debug_capture(tp, PRU_PEERADDR);
988 in_setpeeraddr(inp, nam);
989 }
990 #endif
991 #ifdef INET6
992 if (in6p) {
993 tp = in6totcpcb(in6p);
994 ostate = tcp_debug_capture(tp, PRU_PEERADDR);
995 in6_setpeeraddr(in6p, nam);
996 }
997 #endif
998
999 tcp_debug_trace(so, tp, ostate, PRU_PEERADDR);
1000
1001 return 0;
1002 }
1003
1004 static int
1005 tcp_sockaddr(struct socket *so, struct mbuf *nam)
1006 {
1007 struct inpcb *inp = NULL;
1008 #ifdef INET6
1009 struct in6pcb *in6p = NULL;
1010 #endif
1011 struct tcpcb *tp = NULL;
1012 int ostate = 0;
1013
1014 switch (so->so_proto->pr_domain->dom_family) {
1015 #ifdef INET
1016 case PF_INET:
1017 inp = sotoinpcb(so);
1018 break;
1019 #endif
1020 #ifdef INET6
1021 case PF_INET6:
1022 in6p = sotoin6pcb(so);
1023 break;
1024 #endif
1025 default:
1026 return EAFNOSUPPORT;
1027 }
1028
1029 if (inp == NULL
1030 #ifdef INET6
1031 && in6p == NULL
1032 #endif
1033 )
1034 return EINVAL;
1035
1036 #ifdef INET
1037 if (inp) {
1038 tp = intotcpcb(inp);
1039 ostate = tcp_debug_capture(tp, PRU_SOCKADDR);
1040 in_setsockaddr(inp, nam);
1041 }
1042 #endif
1043 #ifdef INET6
1044 if (in6p) {
1045 tp = in6totcpcb(in6p);
1046 ostate = tcp_debug_capture(tp, PRU_SOCKADDR);
1047 in6_setsockaddr(in6p, nam);
1048 }
1049 #endif
1050
1051 tcp_debug_trace(so, tp, ostate, PRU_SOCKADDR);
1052
1053 return 0;
1054 }
1055
1056 /*
1057 * Initiate (or continue) disconnect.
1058 * If embryonic state, just send reset (once).
1059 * If in ``let data drain'' option and linger null, just drop.
1060 * Otherwise (hard), mark socket disconnecting and drop
1061 * current input data; switch states based on user close, and
1062 * send segment to peer (with FIN).
1063 */
1064 struct tcpcb *
1065 tcp_disconnect(struct tcpcb *tp)
1066 {
1067 struct socket *so;
1068
1069 if (tp->t_inpcb)
1070 so = tp->t_inpcb->inp_socket;
1071 #ifdef INET6
1072 else if (tp->t_in6pcb)
1073 so = tp->t_in6pcb->in6p_socket;
1074 #endif
1075 else
1076 so = NULL;
1077
1078 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
1079 tp = tcp_close(tp);
1080 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1081 tp = tcp_drop(tp, 0);
1082 else {
1083 soisdisconnecting(so);
1084 sbflush(&so->so_rcv);
1085 tp = tcp_usrclosed(tp);
1086 if (tp)
1087 (void) tcp_output(tp);
1088 }
1089 return (tp);
1090 }
1091
1092 /*
1093 * User issued close, and wish to trail through shutdown states:
1094 * if never received SYN, just forget it. If got a SYN from peer,
1095 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1096 * If already got a FIN from peer, then almost done; go to LAST_ACK
1097 * state. In all other cases, have already sent FIN to peer (e.g.
1098 * after PRU_SHUTDOWN), and just have to play tedious game waiting
1099 * for peer to send FIN or not respond to keep-alives, etc.
1100 * We can let the user exit from the close as soon as the FIN is acked.
1101 */
1102 struct tcpcb *
1103 tcp_usrclosed(struct tcpcb *tp)
1104 {
1105
1106 switch (tp->t_state) {
1107
1108 case TCPS_CLOSED:
1109 case TCPS_LISTEN:
1110 case TCPS_SYN_SENT:
1111 tp->t_state = TCPS_CLOSED;
1112 tp = tcp_close(tp);
1113 break;
1114
1115 case TCPS_SYN_RECEIVED:
1116 case TCPS_ESTABLISHED:
1117 tp->t_state = TCPS_FIN_WAIT_1;
1118 break;
1119
1120 case TCPS_CLOSE_WAIT:
1121 tp->t_state = TCPS_LAST_ACK;
1122 break;
1123 }
1124 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
1125 struct socket *so;
1126 if (tp->t_inpcb)
1127 so = tp->t_inpcb->inp_socket;
1128 #ifdef INET6
1129 else if (tp->t_in6pcb)
1130 so = tp->t_in6pcb->in6p_socket;
1131 #endif
1132 else
1133 so = NULL;
1134 if (so)
1135 soisdisconnected(so);
1136 /*
1137 * If we are in FIN_WAIT_2, we arrived here because the
1138 * application did a shutdown of the send side. Like the
1139 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
1140 * a full close, we start a timer to make sure sockets are
1141 * not left in FIN_WAIT_2 forever.
1142 */
1143 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
1144 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
1145 else if (tp->t_state == TCPS_TIME_WAIT
1146 && ((tp->t_inpcb
1147 && (tcp4_vtw_enable & 1)
1148 && vtw_add(AF_INET, tp))
1149 ||
1150 (tp->t_in6pcb
1151 && (tcp6_vtw_enable & 1)
1152 && vtw_add(AF_INET6, tp)))) {
1153 tp = 0;
1154 }
1155 }
1156 return (tp);
1157 }
1158
1159 /*
1160 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less
1161 * than 32.
1162 */
1163 static int
1164 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS)
1165 {
1166 int error, mssdflt;
1167 struct sysctlnode node;
1168
1169 mssdflt = tcp_mssdflt;
1170 node = *rnode;
1171 node.sysctl_data = &mssdflt;
1172 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1173 if (error || newp == NULL)
1174 return (error);
1175
1176 if (mssdflt < 32)
1177 return (EINVAL);
1178 tcp_mssdflt = mssdflt;
1179
1180 mutex_enter(softnet_lock);
1181 tcp_tcpcb_template();
1182 mutex_exit(softnet_lock);
1183
1184 return (0);
1185 }
1186
1187 /*
1188 * sysctl helper for TCP CB template update
1189 */
1190 static int
1191 sysctl_update_tcpcb_template(SYSCTLFN_ARGS)
1192 {
1193 int t, error;
1194 struct sysctlnode node;
1195
1196 /* follow procedures in sysctl(9) manpage */
1197 t = *(int *)rnode->sysctl_data;
1198 node = *rnode;
1199 node.sysctl_data = &t;
1200 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1201 if (error || newp == NULL)
1202 return error;
1203
1204 if (t < 0)
1205 return EINVAL;
1206
1207 *(int *)rnode->sysctl_data = t;
1208
1209 mutex_enter(softnet_lock);
1210 tcp_tcpcb_template();
1211 mutex_exit(softnet_lock);
1212
1213 return 0;
1214 }
1215
1216 /*
1217 * sysctl helper routine for setting port related values under
1218 * net.inet.ip and net.inet6.ip6. does basic range checking and does
1219 * additional checks for each type. this code has placed in
1220 * tcp_input.c since INET and INET6 both use the same tcp code.
1221 *
1222 * this helper is not static so that both inet and inet6 can use it.
1223 */
1224 int
1225 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS)
1226 {
1227 int error, tmp;
1228 int apmin, apmax;
1229 #ifndef IPNOPRIVPORTS
1230 int lpmin, lpmax;
1231 #endif /* IPNOPRIVPORTS */
1232 struct sysctlnode node;
1233
1234 if (namelen != 0)
1235 return (EINVAL);
1236
1237 switch (name[-3]) {
1238 #ifdef INET
1239 case PF_INET:
1240 apmin = anonportmin;
1241 apmax = anonportmax;
1242 #ifndef IPNOPRIVPORTS
1243 lpmin = lowportmin;
1244 lpmax = lowportmax;
1245 #endif /* IPNOPRIVPORTS */
1246 break;
1247 #endif /* INET */
1248 #ifdef INET6
1249 case PF_INET6:
1250 apmin = ip6_anonportmin;
1251 apmax = ip6_anonportmax;
1252 #ifndef IPNOPRIVPORTS
1253 lpmin = ip6_lowportmin;
1254 lpmax = ip6_lowportmax;
1255 #endif /* IPNOPRIVPORTS */
1256 break;
1257 #endif /* INET6 */
1258 default:
1259 return (EINVAL);
1260 }
1261
1262 /*
1263 * insert temporary copy into node, perform lookup on
1264 * temporary, then restore pointer
1265 */
1266 node = *rnode;
1267 tmp = *(int*)rnode->sysctl_data;
1268 node.sysctl_data = &tmp;
1269 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1270 if (error || newp == NULL)
1271 return (error);
1272
1273 /*
1274 * simple port range check
1275 */
1276 if (tmp < 0 || tmp > 65535)
1277 return (EINVAL);
1278
1279 /*
1280 * per-node range checks
1281 */
1282 switch (rnode->sysctl_num) {
1283 case IPCTL_ANONPORTMIN:
1284 case IPV6CTL_ANONPORTMIN:
1285 if (tmp >= apmax)
1286 return (EINVAL);
1287 #ifndef IPNOPRIVPORTS
1288 if (tmp < IPPORT_RESERVED)
1289 return (EINVAL);
1290 #endif /* IPNOPRIVPORTS */
1291 break;
1292
1293 case IPCTL_ANONPORTMAX:
1294 case IPV6CTL_ANONPORTMAX:
1295 if (apmin >= tmp)
1296 return (EINVAL);
1297 #ifndef IPNOPRIVPORTS
1298 if (tmp < IPPORT_RESERVED)
1299 return (EINVAL);
1300 #endif /* IPNOPRIVPORTS */
1301 break;
1302
1303 #ifndef IPNOPRIVPORTS
1304 case IPCTL_LOWPORTMIN:
1305 case IPV6CTL_LOWPORTMIN:
1306 if (tmp >= lpmax ||
1307 tmp > IPPORT_RESERVEDMAX ||
1308 tmp < IPPORT_RESERVEDMIN)
1309 return (EINVAL);
1310 break;
1311
1312 case IPCTL_LOWPORTMAX:
1313 case IPV6CTL_LOWPORTMAX:
1314 if (lpmin >= tmp ||
1315 tmp > IPPORT_RESERVEDMAX ||
1316 tmp < IPPORT_RESERVEDMIN)
1317 return (EINVAL);
1318 break;
1319 #endif /* IPNOPRIVPORTS */
1320
1321 default:
1322 return (EINVAL);
1323 }
1324
1325 *(int*)rnode->sysctl_data = tmp;
1326
1327 return (0);
1328 }
1329
1330 static inline int
1331 copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp)
1332 {
1333 if (oldp) {
1334 size_t sz;
1335 uid_t uid;
1336 int error;
1337
1338 if (sockp->so_cred == NULL)
1339 return EPERM;
1340
1341 uid = kauth_cred_geteuid(sockp->so_cred);
1342 sz = MIN(sizeof(uid), *oldlenp);
1343 if ((error = copyout(&uid, oldp, sz)) != 0)
1344 return error;
1345 }
1346 *oldlenp = sizeof(uid_t);
1347 return 0;
1348 }
1349
1350 static inline int
1351 inet4_ident_core(struct in_addr raddr, u_int rport,
1352 struct in_addr laddr, u_int lport,
1353 void *oldp, size_t *oldlenp,
1354 struct lwp *l, int dodrop)
1355 {
1356 struct inpcb *inp;
1357 struct socket *sockp;
1358
1359 inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0);
1360
1361 if (inp == NULL || (sockp = inp->inp_socket) == NULL)
1362 return ESRCH;
1363
1364 if (dodrop) {
1365 struct tcpcb *tp;
1366 int error;
1367
1368 if (inp == NULL || (tp = intotcpcb(inp)) == NULL ||
1369 (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0)
1370 return ESRCH;
1371
1372 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
1373 KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL);
1374 if (error)
1375 return (error);
1376
1377 (void)tcp_drop(tp, ECONNABORTED);
1378 return 0;
1379 }
1380 else
1381 return copyout_uid(sockp, oldp, oldlenp);
1382 }
1383
1384 #ifdef INET6
1385 static inline int
1386 inet6_ident_core(struct in6_addr *raddr, u_int rport,
1387 struct in6_addr *laddr, u_int lport,
1388 void *oldp, size_t *oldlenp,
1389 struct lwp *l, int dodrop)
1390 {
1391 struct in6pcb *in6p;
1392 struct socket *sockp;
1393
1394 in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0, 0);
1395
1396 if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL)
1397 return ESRCH;
1398
1399 if (dodrop) {
1400 struct tcpcb *tp;
1401 int error;
1402
1403 if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL ||
1404 (in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0)
1405 return ESRCH;
1406
1407 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
1408 KAUTH_REQ_NETWORK_SOCKET_DROP, in6p->in6p_socket, tp, NULL);
1409 if (error)
1410 return (error);
1411
1412 (void)tcp_drop(tp, ECONNABORTED);
1413 return 0;
1414 }
1415 else
1416 return copyout_uid(sockp, oldp, oldlenp);
1417 }
1418 #endif
1419
1420 /*
1421 * sysctl helper routine for the net.inet.tcp.drop and
1422 * net.inet6.tcp6.drop nodes.
1423 */
1424 #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident
1425
1426 /*
1427 * sysctl helper routine for the net.inet.tcp.ident and
1428 * net.inet6.tcp6.ident nodes. contains backwards compat code for the
1429 * old way of looking up the ident information for ipv4 which involves
1430 * stuffing the port/addr pairs into the mib lookup.
1431 */
1432 static int
1433 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS)
1434 {
1435 #ifdef INET
1436 struct sockaddr_in *si4[2];
1437 #endif /* INET */
1438 #ifdef INET6
1439 struct sockaddr_in6 *si6[2];
1440 #endif /* INET6 */
1441 struct sockaddr_storage sa[2];
1442 int error, pf, dodrop;
1443
1444 dodrop = name[-1] == TCPCTL_DROP;
1445 if (dodrop) {
1446 if (oldp != NULL || *oldlenp != 0)
1447 return EINVAL;
1448 if (newp == NULL)
1449 return EPERM;
1450 if (newlen < sizeof(sa))
1451 return ENOMEM;
1452 }
1453 if (namelen != 4 && namelen != 0)
1454 return EINVAL;
1455 if (name[-2] != IPPROTO_TCP)
1456 return EINVAL;
1457 pf = name[-3];
1458
1459 /* old style lookup, ipv4 only */
1460 if (namelen == 4) {
1461 #ifdef INET
1462 struct in_addr laddr, raddr;
1463 u_int lport, rport;
1464
1465 if (pf != PF_INET)
1466 return EPROTONOSUPPORT;
1467 raddr.s_addr = (uint32_t)name[0];
1468 rport = (u_int)name[1];
1469 laddr.s_addr = (uint32_t)name[2];
1470 lport = (u_int)name[3];
1471
1472 mutex_enter(softnet_lock);
1473 error = inet4_ident_core(raddr, rport, laddr, lport,
1474 oldp, oldlenp, l, dodrop);
1475 mutex_exit(softnet_lock);
1476 return error;
1477 #else /* INET */
1478 return EINVAL;
1479 #endif /* INET */
1480 }
1481
1482 if (newp == NULL || newlen != sizeof(sa))
1483 return EINVAL;
1484 error = copyin(newp, &sa, newlen);
1485 if (error)
1486 return error;
1487
1488 /*
1489 * requested families must match
1490 */
1491 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family)
1492 return EINVAL;
1493
1494 switch (pf) {
1495 #ifdef INET6
1496 case PF_INET6:
1497 si6[0] = (struct sockaddr_in6*)&sa[0];
1498 si6[1] = (struct sockaddr_in6*)&sa[1];
1499 if (si6[0]->sin6_len != sizeof(*si6[0]) ||
1500 si6[1]->sin6_len != sizeof(*si6[1]))
1501 return EINVAL;
1502
1503 if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) &&
1504 !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) {
1505 error = sa6_embedscope(si6[0], ip6_use_defzone);
1506 if (error)
1507 return error;
1508 error = sa6_embedscope(si6[1], ip6_use_defzone);
1509 if (error)
1510 return error;
1511
1512 mutex_enter(softnet_lock);
1513 error = inet6_ident_core(&si6[0]->sin6_addr,
1514 si6[0]->sin6_port, &si6[1]->sin6_addr,
1515 si6[1]->sin6_port, oldp, oldlenp, l, dodrop);
1516 mutex_exit(softnet_lock);
1517 return error;
1518 }
1519
1520 if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) !=
1521 IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr))
1522 return EINVAL;
1523
1524 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]);
1525 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]);
1526 /*FALLTHROUGH*/
1527 #endif /* INET6 */
1528 #ifdef INET
1529 case PF_INET:
1530 si4[0] = (struct sockaddr_in*)&sa[0];
1531 si4[1] = (struct sockaddr_in*)&sa[1];
1532 if (si4[0]->sin_len != sizeof(*si4[0]) ||
1533 si4[0]->sin_len != sizeof(*si4[1]))
1534 return EINVAL;
1535
1536 mutex_enter(softnet_lock);
1537 error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port,
1538 si4[1]->sin_addr, si4[1]->sin_port,
1539 oldp, oldlenp, l, dodrop);
1540 mutex_exit(softnet_lock);
1541 return error;
1542 #endif /* INET */
1543 default:
1544 return EPROTONOSUPPORT;
1545 }
1546 }
1547
1548 /*
1549 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and
1550 * inet/inet6, as well as raw pcbs for each. specifically not
1551 * declared static so that raw sockets and udp/udp6 can use it as
1552 * well.
1553 */
1554 int
1555 sysctl_inpcblist(SYSCTLFN_ARGS)
1556 {
1557 #ifdef INET
1558 struct sockaddr_in *in;
1559 const struct inpcb *inp;
1560 #endif
1561 #ifdef INET6
1562 struct sockaddr_in6 *in6;
1563 const struct in6pcb *in6p;
1564 #endif
1565 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data);
1566 const struct inpcb_hdr *inph;
1567 struct tcpcb *tp;
1568 struct kinfo_pcb pcb;
1569 char *dp;
1570 size_t len, needed, elem_size, out_size;
1571 int error, elem_count, pf, proto, pf2;
1572
1573 if (namelen != 4)
1574 return (EINVAL);
1575
1576 if (oldp != NULL) {
1577 len = *oldlenp;
1578 elem_size = name[2];
1579 elem_count = name[3];
1580 if (elem_size != sizeof(pcb))
1581 return EINVAL;
1582 } else {
1583 len = 0;
1584 elem_count = INT_MAX;
1585 elem_size = sizeof(pcb);
1586 }
1587 error = 0;
1588 dp = oldp;
1589 out_size = elem_size;
1590 needed = 0;
1591
1592 if (namelen == 1 && name[0] == CTL_QUERY)
1593 return (sysctl_query(SYSCTLFN_CALL(rnode)));
1594
1595 if (name - oname != 4)
1596 return (EINVAL);
1597
1598 pf = oname[1];
1599 proto = oname[2];
1600 pf2 = (oldp != NULL) ? pf : 0;
1601
1602 mutex_enter(softnet_lock);
1603
1604 TAILQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) {
1605 #ifdef INET
1606 inp = (const struct inpcb *)inph;
1607 #endif
1608 #ifdef INET6
1609 in6p = (const struct in6pcb *)inph;
1610 #endif
1611
1612 if (inph->inph_af != pf)
1613 continue;
1614
1615 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
1616 KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL,
1617 NULL) != 0)
1618 continue;
1619
1620 memset(&pcb, 0, sizeof(pcb));
1621
1622 pcb.ki_family = pf;
1623 pcb.ki_type = proto;
1624
1625 switch (pf2) {
1626 case 0:
1627 /* just probing for size */
1628 break;
1629 #ifdef INET
1630 case PF_INET:
1631 pcb.ki_family = inp->inp_socket->so_proto->
1632 pr_domain->dom_family;
1633 pcb.ki_type = inp->inp_socket->so_proto->
1634 pr_type;
1635 pcb.ki_protocol = inp->inp_socket->so_proto->
1636 pr_protocol;
1637 pcb.ki_pflags = inp->inp_flags;
1638
1639 pcb.ki_sostate = inp->inp_socket->so_state;
1640 pcb.ki_prstate = inp->inp_state;
1641 if (proto == IPPROTO_TCP) {
1642 tp = intotcpcb(inp);
1643 pcb.ki_tstate = tp->t_state;
1644 pcb.ki_tflags = tp->t_flags;
1645 }
1646
1647 pcb.ki_pcbaddr = PTRTOUINT64(inp);
1648 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb);
1649 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket);
1650
1651 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc;
1652 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc;
1653
1654 in = satosin(&pcb.ki_src);
1655 in->sin_len = sizeof(*in);
1656 in->sin_family = pf;
1657 in->sin_port = inp->inp_lport;
1658 in->sin_addr = inp->inp_laddr;
1659 if (pcb.ki_prstate >= INP_CONNECTED) {
1660 in = satosin(&pcb.ki_dst);
1661 in->sin_len = sizeof(*in);
1662 in->sin_family = pf;
1663 in->sin_port = inp->inp_fport;
1664 in->sin_addr = inp->inp_faddr;
1665 }
1666 break;
1667 #endif
1668 #ifdef INET6
1669 case PF_INET6:
1670 pcb.ki_family = in6p->in6p_socket->so_proto->
1671 pr_domain->dom_family;
1672 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type;
1673 pcb.ki_protocol = in6p->in6p_socket->so_proto->
1674 pr_protocol;
1675 pcb.ki_pflags = in6p->in6p_flags;
1676
1677 pcb.ki_sostate = in6p->in6p_socket->so_state;
1678 pcb.ki_prstate = in6p->in6p_state;
1679 if (proto == IPPROTO_TCP) {
1680 tp = in6totcpcb(in6p);
1681 pcb.ki_tstate = tp->t_state;
1682 pcb.ki_tflags = tp->t_flags;
1683 }
1684
1685 pcb.ki_pcbaddr = PTRTOUINT64(in6p);
1686 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb);
1687 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket);
1688
1689 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc;
1690 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc;
1691
1692 in6 = satosin6(&pcb.ki_src);
1693 in6->sin6_len = sizeof(*in6);
1694 in6->sin6_family = pf;
1695 in6->sin6_port = in6p->in6p_lport;
1696 in6->sin6_flowinfo = in6p->in6p_flowinfo;
1697 in6->sin6_addr = in6p->in6p_laddr;
1698 in6->sin6_scope_id = 0; /* XXX? */
1699
1700 if (pcb.ki_prstate >= IN6P_CONNECTED) {
1701 in6 = satosin6(&pcb.ki_dst);
1702 in6->sin6_len = sizeof(*in6);
1703 in6->sin6_family = pf;
1704 in6->sin6_port = in6p->in6p_fport;
1705 in6->sin6_flowinfo = in6p->in6p_flowinfo;
1706 in6->sin6_addr = in6p->in6p_faddr;
1707 in6->sin6_scope_id = 0; /* XXX? */
1708 }
1709 break;
1710 #endif
1711 }
1712
1713 if (len >= elem_size && elem_count > 0) {
1714 error = copyout(&pcb, dp, out_size);
1715 if (error) {
1716 mutex_exit(softnet_lock);
1717 return (error);
1718 }
1719 dp += elem_size;
1720 len -= elem_size;
1721 }
1722 needed += elem_size;
1723 if (elem_count > 0 && elem_count != INT_MAX)
1724 elem_count--;
1725 }
1726
1727 *oldlenp = needed;
1728 if (oldp == NULL)
1729 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb);
1730
1731 mutex_exit(softnet_lock);
1732
1733 return (error);
1734 }
1735
1736 static int
1737 sysctl_tcp_congctl(SYSCTLFN_ARGS)
1738 {
1739 struct sysctlnode node;
1740 int error;
1741 char newname[TCPCC_MAXLEN];
1742
1743 strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1);
1744
1745 node = *rnode;
1746 node.sysctl_data = newname;
1747 node.sysctl_size = sizeof(newname);
1748
1749 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1750
1751 if (error ||
1752 newp == NULL ||
1753 strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0)
1754 return error;
1755
1756 mutex_enter(softnet_lock);
1757 error = tcp_congctl_select(NULL, newname);
1758 mutex_exit(softnet_lock);
1759
1760 return error;
1761 }
1762
1763 static int
1764 sysctl_tcp_init_win(SYSCTLFN_ARGS)
1765 {
1766 int error;
1767 u_int iw;
1768 struct sysctlnode node;
1769
1770 iw = *(u_int *)rnode->sysctl_data;
1771 node = *rnode;
1772 node.sysctl_data = &iw;
1773 node.sysctl_size = sizeof(iw);
1774 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1775 if (error || newp == NULL)
1776 return error;
1777
1778 if (iw >= __arraycount(tcp_init_win_max))
1779 return EINVAL;
1780 *(u_int *)rnode->sysctl_data = iw;
1781 return 0;
1782 }
1783
1784 static int
1785 sysctl_tcp_keep(SYSCTLFN_ARGS)
1786 {
1787 int error;
1788 u_int tmp;
1789 struct sysctlnode node;
1790
1791 node = *rnode;
1792 tmp = *(u_int *)rnode->sysctl_data;
1793 node.sysctl_data = &tmp;
1794
1795 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1796 if (error || newp == NULL)
1797 return error;
1798
1799 mutex_enter(softnet_lock);
1800
1801 *(u_int *)rnode->sysctl_data = tmp;
1802 tcp_tcpcb_template(); /* update the template */
1803
1804 mutex_exit(softnet_lock);
1805 return 0;
1806 }
1807
1808 static int
1809 sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS)
1810 {
1811
1812 return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS));
1813 }
1814
1815 /*
1816 * this (second stage) setup routine is a replacement for tcp_sysctl()
1817 * (which is currently used for ipv4 and ipv6)
1818 */
1819 static void
1820 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname,
1821 const char *tcpname)
1822 {
1823 const struct sysctlnode *sack_node;
1824 const struct sysctlnode *abc_node;
1825 const struct sysctlnode *ecn_node;
1826 const struct sysctlnode *congctl_node;
1827 const struct sysctlnode *mslt_node;
1828 const struct sysctlnode *vtw_node;
1829 #ifdef TCP_DEBUG
1830 extern struct tcp_debug tcp_debug[TCP_NDEBUG];
1831 extern int tcp_debx;
1832 #endif
1833
1834 sysctl_createv(clog, 0, NULL, NULL,
1835 CTLFLAG_PERMANENT,
1836 CTLTYPE_NODE, pfname, NULL,
1837 NULL, 0, NULL, 0,
1838 CTL_NET, pf, CTL_EOL);
1839 sysctl_createv(clog, 0, NULL, NULL,
1840 CTLFLAG_PERMANENT,
1841 CTLTYPE_NODE, tcpname,
1842 SYSCTL_DESCR("TCP related settings"),
1843 NULL, 0, NULL, 0,
1844 CTL_NET, pf, IPPROTO_TCP, CTL_EOL);
1845
1846 sysctl_createv(clog, 0, NULL, NULL,
1847 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1848 CTLTYPE_INT, "rfc1323",
1849 SYSCTL_DESCR("Enable RFC1323 TCP extensions"),
1850 sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0,
1851 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL);
1852 sysctl_createv(clog, 0, NULL, NULL,
1853 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1854 CTLTYPE_INT, "sendspace",
1855 SYSCTL_DESCR("Default TCP send buffer size"),
1856 NULL, 0, &tcp_sendspace, 0,
1857 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL);
1858 sysctl_createv(clog, 0, NULL, NULL,
1859 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1860 CTLTYPE_INT, "recvspace",
1861 SYSCTL_DESCR("Default TCP receive buffer size"),
1862 NULL, 0, &tcp_recvspace, 0,
1863 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL);
1864 sysctl_createv(clog, 0, NULL, NULL,
1865 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1866 CTLTYPE_INT, "mssdflt",
1867 SYSCTL_DESCR("Default maximum segment size"),
1868 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0,
1869 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL);
1870 sysctl_createv(clog, 0, NULL, NULL,
1871 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1872 CTLTYPE_INT, "minmss",
1873 SYSCTL_DESCR("Lower limit for TCP maximum segment size"),
1874 NULL, 0, &tcp_minmss, 0,
1875 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
1876 sysctl_createv(clog, 0, NULL, NULL,
1877 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1878 CTLTYPE_INT, "msl",
1879 SYSCTL_DESCR("Maximum Segment Life"),
1880 NULL, 0, &tcp_msl, 0,
1881 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL);
1882 sysctl_createv(clog, 0, NULL, NULL,
1883 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1884 CTLTYPE_INT, "syn_cache_limit",
1885 SYSCTL_DESCR("Maximum number of entries in the TCP "
1886 "compressed state engine"),
1887 NULL, 0, &tcp_syn_cache_limit, 0,
1888 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT,
1889 CTL_EOL);
1890 sysctl_createv(clog, 0, NULL, NULL,
1891 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1892 CTLTYPE_INT, "syn_bucket_limit",
1893 SYSCTL_DESCR("Maximum number of entries per hash "
1894 "bucket in the TCP compressed state "
1895 "engine"),
1896 NULL, 0, &tcp_syn_bucket_limit, 0,
1897 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT,
1898 CTL_EOL);
1899 #if 0 /* obsoleted */
1900 sysctl_createv(clog, 0, NULL, NULL,
1901 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1902 CTLTYPE_INT, "syn_cache_interval",
1903 SYSCTL_DESCR("TCP compressed state engine's timer interval"),
1904 NULL, 0, &tcp_syn_cache_interval, 0,
1905 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER,
1906 CTL_EOL);
1907 #endif
1908 sysctl_createv(clog, 0, NULL, NULL,
1909 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1910 CTLTYPE_INT, "init_win",
1911 SYSCTL_DESCR("Initial TCP congestion window"),
1912 sysctl_tcp_init_win, 0, &tcp_init_win, 0,
1913 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL);
1914 sysctl_createv(clog, 0, NULL, NULL,
1915 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1916 CTLTYPE_INT, "mss_ifmtu",
1917 SYSCTL_DESCR("Use interface MTU for calculating MSS"),
1918 NULL, 0, &tcp_mss_ifmtu, 0,
1919 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL);
1920 sysctl_createv(clog, 0, NULL, &sack_node,
1921 CTLFLAG_PERMANENT,
1922 CTLTYPE_NODE, "sack",
1923 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"),
1924 NULL, 0, NULL, 0,
1925 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL);
1926
1927 /* Congctl subtree */
1928 sysctl_createv(clog, 0, NULL, &congctl_node,
1929 CTLFLAG_PERMANENT,
1930 CTLTYPE_NODE, "congctl",
1931 SYSCTL_DESCR("TCP Congestion Control"),
1932 NULL, 0, NULL, 0,
1933 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
1934 sysctl_createv(clog, 0, &congctl_node, NULL,
1935 CTLFLAG_PERMANENT,
1936 CTLTYPE_STRING, "available",
1937 SYSCTL_DESCR("Available Congestion Control Mechanisms"),
1938 NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL);
1939 sysctl_createv(clog, 0, &congctl_node, NULL,
1940 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1941 CTLTYPE_STRING, "selected",
1942 SYSCTL_DESCR("Selected Congestion Control Mechanism"),
1943 sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN,
1944 CTL_CREATE, CTL_EOL);
1945
1946 sysctl_createv(clog, 0, NULL, NULL,
1947 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1948 CTLTYPE_INT, "win_scale",
1949 SYSCTL_DESCR("Use RFC1323 window scale options"),
1950 sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0,
1951 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL);
1952 sysctl_createv(clog, 0, NULL, NULL,
1953 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1954 CTLTYPE_INT, "timestamps",
1955 SYSCTL_DESCR("Use RFC1323 time stamp options"),
1956 sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0,
1957 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL);
1958 sysctl_createv(clog, 0, NULL, NULL,
1959 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1960 CTLTYPE_INT, "compat_42",
1961 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"),
1962 NULL, 0, &tcp_compat_42, 0,
1963 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL);
1964 sysctl_createv(clog, 0, NULL, NULL,
1965 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1966 CTLTYPE_INT, "cwm",
1967 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window "
1968 "Monitoring"),
1969 NULL, 0, &tcp_cwm, 0,
1970 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL);
1971 sysctl_createv(clog, 0, NULL, NULL,
1972 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1973 CTLTYPE_INT, "cwm_burstsize",
1974 SYSCTL_DESCR("Congestion Window Monitoring allowed "
1975 "burst count in packets"),
1976 NULL, 0, &tcp_cwm_burstsize, 0,
1977 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE,
1978 CTL_EOL);
1979 sysctl_createv(clog, 0, NULL, NULL,
1980 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1981 CTLTYPE_INT, "ack_on_push",
1982 SYSCTL_DESCR("Immediately return ACK when PSH is "
1983 "received"),
1984 NULL, 0, &tcp_ack_on_push, 0,
1985 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL);
1986 sysctl_createv(clog, 0, NULL, NULL,
1987 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1988 CTLTYPE_INT, "keepidle",
1989 SYSCTL_DESCR("Allowed connection idle ticks before a "
1990 "keepalive probe is sent"),
1991 sysctl_tcp_keep, 0, &tcp_keepidle, 0,
1992 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL);
1993 sysctl_createv(clog, 0, NULL, NULL,
1994 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1995 CTLTYPE_INT, "keepintvl",
1996 SYSCTL_DESCR("Ticks before next keepalive probe is sent"),
1997 sysctl_tcp_keep, 0, &tcp_keepintvl, 0,
1998 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL);
1999 sysctl_createv(clog, 0, NULL, NULL,
2000 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2001 CTLTYPE_INT, "keepcnt",
2002 SYSCTL_DESCR("Number of keepalive probes to send"),
2003 sysctl_tcp_keep, 0, &tcp_keepcnt, 0,
2004 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
2005 sysctl_createv(clog, 0, NULL, NULL,
2006 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
2007 CTLTYPE_INT, "slowhz",
2008 SYSCTL_DESCR("Keepalive ticks per second"),
2009 NULL, PR_SLOWHZ, NULL, 0,
2010 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL);
2011 sysctl_createv(clog, 0, NULL, NULL,
2012 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2013 CTLTYPE_INT, "log_refused",
2014 SYSCTL_DESCR("Log refused TCP connections"),
2015 NULL, 0, &tcp_log_refused, 0,
2016 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL);
2017 #if 0 /* obsoleted */
2018 sysctl_createv(clog, 0, NULL, NULL,
2019 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2020 CTLTYPE_INT, "rstratelimit", NULL,
2021 NULL, 0, &tcp_rst_ratelim, 0,
2022 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL);
2023 #endif
2024 sysctl_createv(clog, 0, NULL, NULL,
2025 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2026 CTLTYPE_INT, "rstppslimit",
2027 SYSCTL_DESCR("Maximum number of RST packets to send "
2028 "per second"),
2029 NULL, 0, &tcp_rst_ppslim, 0,
2030 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL);
2031 sysctl_createv(clog, 0, NULL, NULL,
2032 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2033 CTLTYPE_INT, "delack_ticks",
2034 SYSCTL_DESCR("Number of ticks to delay sending an ACK"),
2035 NULL, 0, &tcp_delack_ticks, 0,
2036 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL);
2037 sysctl_createv(clog, 0, NULL, NULL,
2038 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2039 CTLTYPE_INT, "init_win_local",
2040 SYSCTL_DESCR("Initial TCP window size (in segments)"),
2041 sysctl_tcp_init_win, 0, &tcp_init_win_local, 0,
2042 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL,
2043 CTL_EOL);
2044 sysctl_createv(clog, 0, NULL, NULL,
2045 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2046 CTLTYPE_STRUCT, "ident",
2047 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"),
2048 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t),
2049 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL);
2050 sysctl_createv(clog, 0, NULL, NULL,
2051 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2052 CTLTYPE_INT, "do_loopback_cksum",
2053 SYSCTL_DESCR("Perform TCP checksum on loopback"),
2054 NULL, 0, &tcp_do_loopback_cksum, 0,
2055 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM,
2056 CTL_EOL);
2057 sysctl_createv(clog, 0, NULL, NULL,
2058 CTLFLAG_PERMANENT,
2059 CTLTYPE_STRUCT, "pcblist",
2060 SYSCTL_DESCR("TCP protocol control block list"),
2061 sysctl_inpcblist, 0, &tcbtable, 0,
2062 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
2063 CTL_EOL);
2064 sysctl_createv(clog, 0, NULL, NULL,
2065 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2066 CTLTYPE_INT, "keepinit",
2067 SYSCTL_DESCR("Ticks before initial tcp connection times out"),
2068 sysctl_tcp_keep, 0, &tcp_keepinit, 0,
2069 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2070
2071 /* TCP socket buffers auto-sizing nodes */
2072 sysctl_createv(clog, 0, NULL, NULL,
2073 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2074 CTLTYPE_INT, "recvbuf_auto",
2075 SYSCTL_DESCR("Enable automatic receive "
2076 "buffer sizing (experimental)"),
2077 NULL, 0, &tcp_do_autorcvbuf, 0,
2078 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2079 sysctl_createv(clog, 0, NULL, NULL,
2080 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2081 CTLTYPE_INT, "recvbuf_inc",
2082 SYSCTL_DESCR("Incrementor step size of "
2083 "automatic receive buffer"),
2084 NULL, 0, &tcp_autorcvbuf_inc, 0,
2085 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2086 sysctl_createv(clog, 0, NULL, NULL,
2087 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2088 CTLTYPE_INT, "recvbuf_max",
2089 SYSCTL_DESCR("Max size of automatic receive buffer"),
2090 NULL, 0, &tcp_autorcvbuf_max, 0,
2091 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2092
2093 sysctl_createv(clog, 0, NULL, NULL,
2094 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2095 CTLTYPE_INT, "sendbuf_auto",
2096 SYSCTL_DESCR("Enable automatic send "
2097 "buffer sizing (experimental)"),
2098 NULL, 0, &tcp_do_autosndbuf, 0,
2099 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2100 sysctl_createv(clog, 0, NULL, NULL,
2101 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2102 CTLTYPE_INT, "sendbuf_inc",
2103 SYSCTL_DESCR("Incrementor step size of "
2104 "automatic send buffer"),
2105 NULL, 0, &tcp_autosndbuf_inc, 0,
2106 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2107 sysctl_createv(clog, 0, NULL, NULL,
2108 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2109 CTLTYPE_INT, "sendbuf_max",
2110 SYSCTL_DESCR("Max size of automatic send buffer"),
2111 NULL, 0, &tcp_autosndbuf_max, 0,
2112 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2113
2114 /* ECN subtree */
2115 sysctl_createv(clog, 0, NULL, &ecn_node,
2116 CTLFLAG_PERMANENT,
2117 CTLTYPE_NODE, "ecn",
2118 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"),
2119 NULL, 0, NULL, 0,
2120 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2121 sysctl_createv(clog, 0, &ecn_node, NULL,
2122 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2123 CTLTYPE_INT, "enable",
2124 SYSCTL_DESCR("Enable TCP Explicit Congestion "
2125 "Notification"),
2126 NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL);
2127 sysctl_createv(clog, 0, &ecn_node, NULL,
2128 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2129 CTLTYPE_INT, "maxretries",
2130 SYSCTL_DESCR("Number of times to retry ECN setup "
2131 "before disabling ECN on the connection"),
2132 NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL);
2133
2134 /* SACK gets it's own little subtree. */
2135 sysctl_createv(clog, 0, NULL, &sack_node,
2136 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2137 CTLTYPE_INT, "enable",
2138 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"),
2139 NULL, 0, &tcp_do_sack, 0,
2140 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2141 sysctl_createv(clog, 0, NULL, &sack_node,
2142 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2143 CTLTYPE_INT, "maxholes",
2144 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"),
2145 NULL, 0, &tcp_sack_tp_maxholes, 0,
2146 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2147 sysctl_createv(clog, 0, NULL, &sack_node,
2148 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2149 CTLTYPE_INT, "globalmaxholes",
2150 SYSCTL_DESCR("Global maximum number of TCP SACK holes"),
2151 NULL, 0, &tcp_sack_globalmaxholes, 0,
2152 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2153 sysctl_createv(clog, 0, NULL, &sack_node,
2154 CTLFLAG_PERMANENT,
2155 CTLTYPE_INT, "globalholes",
2156 SYSCTL_DESCR("Global number of TCP SACK holes"),
2157 NULL, 0, &tcp_sack_globalholes, 0,
2158 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2159
2160 sysctl_createv(clog, 0, NULL, NULL,
2161 CTLFLAG_PERMANENT,
2162 CTLTYPE_STRUCT, "stats",
2163 SYSCTL_DESCR("TCP statistics"),
2164 sysctl_net_inet_tcp_stats, 0, NULL, 0,
2165 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS,
2166 CTL_EOL);
2167 sysctl_createv(clog, 0, NULL, NULL,
2168 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2169 CTLTYPE_INT, "local_by_rtt",
2170 SYSCTL_DESCR("Use RTT estimator to decide which hosts "
2171 "are local"),
2172 NULL, 0, &tcp_rttlocal, 0,
2173 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2174 #ifdef TCP_DEBUG
2175 sysctl_createv(clog, 0, NULL, NULL,
2176 CTLFLAG_PERMANENT,
2177 CTLTYPE_STRUCT, "debug",
2178 SYSCTL_DESCR("TCP sockets debug information"),
2179 NULL, 0, &tcp_debug, sizeof(tcp_debug),
2180 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG,
2181 CTL_EOL);
2182 sysctl_createv(clog, 0, NULL, NULL,
2183 CTLFLAG_PERMANENT,
2184 CTLTYPE_INT, "debx",
2185 SYSCTL_DESCR("Number of TCP debug sockets messages"),
2186 NULL, 0, &tcp_debx, sizeof(tcp_debx),
2187 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX,
2188 CTL_EOL);
2189 #endif
2190 sysctl_createv(clog, 0, NULL, NULL,
2191 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2192 CTLTYPE_STRUCT, "drop",
2193 SYSCTL_DESCR("TCP drop connection"),
2194 sysctl_net_inet_tcp_drop, 0, NULL, 0,
2195 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL);
2196 sysctl_createv(clog, 0, NULL, NULL,
2197 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2198 CTLTYPE_INT, "iss_hash",
2199 SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic "
2200 "hash computation"),
2201 NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948),
2202 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
2203 CTL_EOL);
2204
2205 /* ABC subtree */
2206
2207 sysctl_createv(clog, 0, NULL, &abc_node,
2208 CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc",
2209 SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"),
2210 NULL, 0, NULL, 0,
2211 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2212 sysctl_createv(clog, 0, &abc_node, NULL,
2213 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2214 CTLTYPE_INT, "enable",
2215 SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
2216 NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL);
2217 sysctl_createv(clog, 0, &abc_node, NULL,
2218 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2219 CTLTYPE_INT, "aggressive",
2220 SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"),
2221 NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL);
2222
2223 /* MSL tuning subtree */
2224
2225 sysctl_createv(clog, 0, NULL, &mslt_node,
2226 CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt",
2227 SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"),
2228 NULL, 0, NULL, 0,
2229 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2230 sysctl_createv(clog, 0, &mslt_node, NULL,
2231 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2232 CTLTYPE_INT, "enable",
2233 SYSCTL_DESCR("Enable TIME_WAIT truncation"),
2234 NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL);
2235 sysctl_createv(clog, 0, &mslt_node, NULL,
2236 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2237 CTLTYPE_INT, "loopback",
2238 SYSCTL_DESCR("MSL value to use for loopback connections"),
2239 NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL);
2240 sysctl_createv(clog, 0, &mslt_node, NULL,
2241 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2242 CTLTYPE_INT, "local",
2243 SYSCTL_DESCR("MSL value to use for local connections"),
2244 NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL);
2245 sysctl_createv(clog, 0, &mslt_node, NULL,
2246 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2247 CTLTYPE_INT, "remote",
2248 SYSCTL_DESCR("MSL value to use for remote connections"),
2249 NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL);
2250 sysctl_createv(clog, 0, &mslt_node, NULL,
2251 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2252 CTLTYPE_INT, "remote_threshold",
2253 SYSCTL_DESCR("RTT estimate value to promote local to remote"),
2254 NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL);
2255
2256 /* vestigial TIME_WAIT tuning subtree */
2257
2258 sysctl_createv(clog, 0, NULL, &vtw_node,
2259 CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw",
2260 SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"),
2261 NULL, 0, NULL, 0,
2262 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2263 sysctl_createv(clog, 0, &vtw_node, NULL,
2264 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2265 CTLTYPE_INT, "enable",
2266 SYSCTL_DESCR("Enable Vestigial TIME_WAIT"),
2267 sysctl_tcp_vtw_enable, 0,
2268 (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable,
2269 0, CTL_CREATE, CTL_EOL);
2270 sysctl_createv(clog, 0, &vtw_node, NULL,
2271 CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2272 CTLTYPE_INT, "entries",
2273 SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"),
2274 NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL);
2275 }
2276
2277 void
2278 tcp_usrreq_init(void)
2279 {
2280
2281 #ifdef INET
2282 sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp");
2283 #endif
2284 #ifdef INET6
2285 sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6");
2286 #endif
2287 }
2288
2289 PR_WRAP_USRREQS(tcp)
2290 #define tcp_attach tcp_attach_wrapper
2291 #define tcp_detach tcp_detach_wrapper
2292 #define tcp_ioctl tcp_ioctl_wrapper
2293 #define tcp_stat tcp_stat_wrapper
2294 #define tcp_peeraddr tcp_peeraddr_wrapper
2295 #define tcp_sockaddr tcp_sockaddr_wrapper
2296 #define tcp_usrreq tcp_usrreq_wrapper
2297
2298 const struct pr_usrreqs tcp_usrreqs = {
2299 .pr_attach = tcp_attach,
2300 .pr_detach = tcp_detach,
2301 .pr_ioctl = tcp_ioctl,
2302 .pr_stat = tcp_stat,
2303 .pr_peeraddr = tcp_peeraddr,
2304 .pr_sockaddr = tcp_sockaddr,
2305 .pr_generic = tcp_usrreq,
2306 };
2307