Home | History | Annotate | Line # | Download | only in netinet
tcp_usrreq.c revision 1.74
      1 /*	$NetBSD: tcp_usrreq.c,v 1.74 2002/10/22 03:14:16 simonb Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
     38  * Facility, NASA Ames Research Center.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993, 1995
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)tcp_usrreq.c	8.5 (Berkeley) 6/21/95
    102  */
    103 
    104 #include <sys/cdefs.h>
    105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.74 2002/10/22 03:14:16 simonb Exp $");
    106 
    107 #include "opt_inet.h"
    108 #include "opt_ipsec.h"
    109 #include "opt_tcp_debug.h"
    110 
    111 #include <sys/param.h>
    112 #include <sys/systm.h>
    113 #include <sys/kernel.h>
    114 #include <sys/malloc.h>
    115 #include <sys/mbuf.h>
    116 #include <sys/socket.h>
    117 #include <sys/socketvar.h>
    118 #include <sys/protosw.h>
    119 #include <sys/errno.h>
    120 #include <sys/stat.h>
    121 #include <sys/proc.h>
    122 #include <sys/domain.h>
    123 #include <sys/sysctl.h>
    124 
    125 #include <net/if.h>
    126 #include <net/route.h>
    127 
    128 #include <netinet/in.h>
    129 #include <netinet/in_systm.h>
    130 #include <netinet/in_var.h>
    131 #include <netinet/ip.h>
    132 #include <netinet/in_pcb.h>
    133 #include <netinet/ip_var.h>
    134 
    135 #ifdef INET6
    136 #ifndef INET
    137 #include <netinet/in.h>
    138 #endif
    139 #include <netinet/ip6.h>
    140 #include <netinet6/in6_pcb.h>
    141 #include <netinet6/ip6_var.h>
    142 #endif
    143 
    144 #include <netinet/tcp.h>
    145 #include <netinet/tcp_fsm.h>
    146 #include <netinet/tcp_seq.h>
    147 #include <netinet/tcp_timer.h>
    148 #include <netinet/tcp_var.h>
    149 #include <netinet/tcpip.h>
    150 #include <netinet/tcp_debug.h>
    151 
    152 #include "opt_tcp_space.h"
    153 
    154 #ifdef IPSEC
    155 #include <netinet6/ipsec.h>
    156 #endif /*IPSEC*/
    157 
    158 /*
    159  * TCP protocol interface to socket abstraction.
    160  */
    161 extern	char *tcpstates[];
    162 
    163 /*
    164  * Process a TCP user request for TCP tb.  If this is a send request
    165  * then m is the mbuf chain of send data.  If this is a timer expiration
    166  * (called from the software clock routine), then timertype tells which timer.
    167  */
    168 /*ARGSUSED*/
    169 int
    170 tcp_usrreq(so, req, m, nam, control, p)
    171 	struct socket *so;
    172 	int req;
    173 	struct mbuf *m, *nam, *control;
    174 	struct proc *p;
    175 {
    176 	struct inpcb *inp;
    177 #ifdef INET6
    178 	struct in6pcb *in6p;
    179 #endif
    180 	struct tcpcb *tp = NULL;
    181 	int s;
    182 	int error = 0;
    183 #ifdef TCP_DEBUG
    184 	int ostate = 0;
    185 #endif
    186 	int family;	/* family of the socket */
    187 
    188 	family = so->so_proto->pr_domain->dom_family;
    189 
    190 	if (req == PRU_CONTROL) {
    191 		switch (family) {
    192 #ifdef INET
    193 		case PF_INET:
    194 			return (in_control(so, (long)m, (caddr_t)nam,
    195 			    (struct ifnet *)control, p));
    196 #endif
    197 #ifdef INET6
    198 		case PF_INET6:
    199 			return (in6_control(so, (long)m, (caddr_t)nam,
    200 			    (struct ifnet *)control, p));
    201 #endif
    202 		default:
    203 			return EAFNOSUPPORT;
    204 		}
    205 	}
    206 
    207 	if (req == PRU_PURGEIF) {
    208 		switch (family) {
    209 #ifdef INET
    210 		case PF_INET:
    211 			in_pcbpurgeif0(&tcbtable, (struct ifnet *)control);
    212 			in_purgeif((struct ifnet *)control);
    213 			in_pcbpurgeif(&tcbtable, (struct ifnet *)control);
    214 			break;
    215 #endif
    216 #ifdef INET6
    217 		case PF_INET6:
    218 			in6_pcbpurgeif0(&tcb6, (struct ifnet *)control);
    219 			in6_purgeif((struct ifnet *)control);
    220 			in6_pcbpurgeif(&tcb6, (struct ifnet *)control);
    221 			break;
    222 #endif
    223 		default:
    224 			return (EAFNOSUPPORT);
    225 		}
    226 		return (0);
    227 	}
    228 
    229 	s = splsoftnet();
    230 	switch (family) {
    231 #ifdef INET
    232 	case PF_INET:
    233 		inp = sotoinpcb(so);
    234 #ifdef INET6
    235 		in6p = NULL;
    236 #endif
    237 		break;
    238 #endif
    239 #ifdef INET6
    240 	case PF_INET6:
    241 		inp = NULL;
    242 		in6p = sotoin6pcb(so);
    243 		break;
    244 #endif
    245 	default:
    246 		splx(s);
    247 		return EAFNOSUPPORT;
    248 	}
    249 
    250 #ifdef DIAGNOSTIC
    251 #ifdef INET6
    252 	if (inp && in6p)
    253 		panic("tcp_usrreq: both inp and in6p set to non-NULL");
    254 #endif
    255 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
    256 		panic("tcp_usrreq: unexpected control mbuf");
    257 #endif
    258 	/*
    259 	 * When a TCP is attached to a socket, then there will be
    260 	 * a (struct inpcb) pointed at by the socket, and this
    261 	 * structure will point at a subsidary (struct tcpcb).
    262 	 */
    263 #ifndef INET6
    264 	if (inp == 0 && req != PRU_ATTACH)
    265 #else
    266 	if ((inp == 0 && in6p == 0) && req != PRU_ATTACH)
    267 #endif
    268 	{
    269 		error = EINVAL;
    270 		goto release;
    271 	}
    272 #ifdef INET
    273 	if (inp) {
    274 		tp = intotcpcb(inp);
    275 		/* WHAT IF TP IS 0? */
    276 #ifdef KPROF
    277 		tcp_acounts[tp->t_state][req]++;
    278 #endif
    279 #ifdef TCP_DEBUG
    280 		ostate = tp->t_state;
    281 #endif
    282 	}
    283 #endif
    284 #ifdef INET6
    285 	if (in6p) {
    286 		tp = in6totcpcb(in6p);
    287 		/* WHAT IF TP IS 0? */
    288 #ifdef KPROF
    289 		tcp_acounts[tp->t_state][req]++;
    290 #endif
    291 #ifdef TCP_DEBUG
    292 		ostate = tp->t_state;
    293 #endif
    294 	}
    295 #endif
    296 
    297 	switch (req) {
    298 
    299 	/*
    300 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
    301 	 * and an internet control block.
    302 	 */
    303 	case PRU_ATTACH:
    304 #ifndef INET6
    305 		if (inp != 0)
    306 #else
    307 		if (inp != 0 || in6p != 0)
    308 #endif
    309 		{
    310 			error = EISCONN;
    311 			break;
    312 		}
    313 		error = tcp_attach(so);
    314 		if (error)
    315 			break;
    316 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
    317 			so->so_linger = TCP_LINGERTIME;
    318 		tp = sototcpcb(so);
    319 		break;
    320 
    321 	/*
    322 	 * PRU_DETACH detaches the TCP protocol from the socket.
    323 	 */
    324 	case PRU_DETACH:
    325 		tp = tcp_disconnect(tp);
    326 		break;
    327 
    328 	/*
    329 	 * Give the socket an address.
    330 	 */
    331 	case PRU_BIND:
    332 		switch (family) {
    333 #ifdef INET
    334 		case PF_INET:
    335 			error = in_pcbbind(inp, nam, p);
    336 			break;
    337 #endif
    338 #ifdef INET6
    339 		case PF_INET6:
    340 			error = in6_pcbbind(in6p, nam, p);
    341 			if (!error) {
    342 				/* mapped addr case */
    343 				if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
    344 					tp->t_family = AF_INET;
    345 				else
    346 					tp->t_family = AF_INET6;
    347 			}
    348 			break;
    349 #endif
    350 		}
    351 		break;
    352 
    353 	/*
    354 	 * Prepare to accept connections.
    355 	 */
    356 	case PRU_LISTEN:
    357 #ifdef INET
    358 		if (inp && inp->inp_lport == 0) {
    359 			error = in_pcbbind(inp, (struct mbuf *)0,
    360 			    (struct proc *)0);
    361 			if (error)
    362 				break;
    363 		}
    364 #endif
    365 #ifdef INET6
    366 		if (in6p && in6p->in6p_lport == 0) {
    367 			error = in6_pcbbind(in6p, (struct mbuf *)0,
    368 			    (struct proc *)0);
    369 			if (error)
    370 				break;
    371 		}
    372 #endif
    373 		tp->t_state = TCPS_LISTEN;
    374 		break;
    375 
    376 	/*
    377 	 * Initiate connection to peer.
    378 	 * Create a template for use in transmissions on this connection.
    379 	 * Enter SYN_SENT state, and mark socket as connecting.
    380 	 * Start keep-alive timer, and seed output sequence space.
    381 	 * Send initial segment on connection.
    382 	 */
    383 	case PRU_CONNECT:
    384 #ifdef INET
    385 		if (inp) {
    386 			if (inp->inp_lport == 0) {
    387 				error = in_pcbbind(inp, (struct mbuf *)0,
    388 				    (struct proc *)0);
    389 				if (error)
    390 					break;
    391 			}
    392 			error = in_pcbconnect(inp, nam);
    393 		}
    394 #endif
    395 #ifdef INET6
    396 		if (in6p) {
    397 			if (in6p->in6p_lport == 0) {
    398 				error = in6_pcbbind(in6p, (struct mbuf *)0,
    399 				    (struct proc *)0);
    400 				if (error)
    401 					break;
    402 			}
    403 			error = in6_pcbconnect(in6p, nam);
    404 			if (!error) {
    405 				/* mapped addr case */
    406 				if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
    407 					tp->t_family = AF_INET;
    408 				else
    409 					tp->t_family = AF_INET6;
    410 			}
    411 		}
    412 #endif
    413 		if (error)
    414 			break;
    415 		tp->t_template = tcp_template(tp);
    416 		if (tp->t_template == 0) {
    417 #ifdef INET
    418 			if (inp)
    419 				in_pcbdisconnect(inp);
    420 #endif
    421 #ifdef INET6
    422 			if (in6p)
    423 				in6_pcbdisconnect(in6p);
    424 #endif
    425 			error = ENOBUFS;
    426 			break;
    427 		}
    428 		/* Compute window scaling to request.  */
    429 		while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
    430 		    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
    431 			tp->request_r_scale++;
    432 		soisconnecting(so);
    433 		tcpstat.tcps_connattempt++;
    434 		tp->t_state = TCPS_SYN_SENT;
    435 		TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT);
    436 		tp->iss = tcp_new_iss(tp, 0);
    437 		tcp_sendseqinit(tp);
    438 		error = tcp_output(tp);
    439 		break;
    440 
    441 	/*
    442 	 * Create a TCP connection between two sockets.
    443 	 */
    444 	case PRU_CONNECT2:
    445 		error = EOPNOTSUPP;
    446 		break;
    447 
    448 	/*
    449 	 * Initiate disconnect from peer.
    450 	 * If connection never passed embryonic stage, just drop;
    451 	 * else if don't need to let data drain, then can just drop anyways,
    452 	 * else have to begin TCP shutdown process: mark socket disconnecting,
    453 	 * drain unread data, state switch to reflect user close, and
    454 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
    455 	 * when peer sends FIN and acks ours.
    456 	 *
    457 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
    458 	 */
    459 	case PRU_DISCONNECT:
    460 		tp = tcp_disconnect(tp);
    461 		break;
    462 
    463 	/*
    464 	 * Accept a connection.  Essentially all the work is
    465 	 * done at higher levels; just return the address
    466 	 * of the peer, storing through addr.
    467 	 */
    468 	case PRU_ACCEPT:
    469 #ifdef INET
    470 		if (inp)
    471 			in_setpeeraddr(inp, nam);
    472 #endif
    473 #ifdef INET6
    474 		if (in6p)
    475 			in6_setpeeraddr(in6p, nam);
    476 #endif
    477 		break;
    478 
    479 	/*
    480 	 * Mark the connection as being incapable of further output.
    481 	 */
    482 	case PRU_SHUTDOWN:
    483 		socantsendmore(so);
    484 		tp = tcp_usrclosed(tp);
    485 		if (tp)
    486 			error = tcp_output(tp);
    487 		break;
    488 
    489 	/*
    490 	 * After a receive, possibly send window update to peer.
    491 	 */
    492 	case PRU_RCVD:
    493 		/*
    494 		 * soreceive() calls this function when a user receives
    495 		 * ancillary data on a listening socket. We don't call
    496 		 * tcp_output in such a case, since there is no header
    497 		 * template for a listening socket and hence the kernel
    498 		 * will panic.
    499 		 */
    500 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
    501 			(void) tcp_output(tp);
    502 		break;
    503 
    504 	/*
    505 	 * Do a send by putting data in output queue and updating urgent
    506 	 * marker if URG set.  Possibly send more data.
    507 	 */
    508 	case PRU_SEND:
    509 		if (control && control->m_len) {
    510 			m_freem(control);
    511 			m_freem(m);
    512 			error = EINVAL;
    513 			break;
    514 		}
    515 		sbappendstream(&so->so_snd, m);
    516 		error = tcp_output(tp);
    517 		break;
    518 
    519 	/*
    520 	 * Abort the TCP.
    521 	 */
    522 	case PRU_ABORT:
    523 		tp = tcp_drop(tp, ECONNABORTED);
    524 		break;
    525 
    526 	case PRU_SENSE:
    527 		/*
    528 		 * stat: don't bother with a blocksize.
    529 		 */
    530 		splx(s);
    531 		return (0);
    532 
    533 	case PRU_RCVOOB:
    534 		if (control && control->m_len) {
    535 			m_freem(control);
    536 			m_freem(m);
    537 			error = EINVAL;
    538 			break;
    539 		}
    540 		if ((so->so_oobmark == 0 &&
    541 		    (so->so_state & SS_RCVATMARK) == 0) ||
    542 		    so->so_options & SO_OOBINLINE ||
    543 		    tp->t_oobflags & TCPOOB_HADDATA) {
    544 			error = EINVAL;
    545 			break;
    546 		}
    547 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
    548 			error = EWOULDBLOCK;
    549 			break;
    550 		}
    551 		m->m_len = 1;
    552 		*mtod(m, caddr_t) = tp->t_iobc;
    553 		if (((long)nam & MSG_PEEK) == 0)
    554 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
    555 		break;
    556 
    557 	case PRU_SENDOOB:
    558 		if (sbspace(&so->so_snd) < -512) {
    559 			m_freem(m);
    560 			error = ENOBUFS;
    561 			break;
    562 		}
    563 		/*
    564 		 * According to RFC961 (Assigned Protocols),
    565 		 * the urgent pointer points to the last octet
    566 		 * of urgent data.  We continue, however,
    567 		 * to consider it to indicate the first octet
    568 		 * of data past the urgent section.
    569 		 * Otherwise, snd_up should be one lower.
    570 		 */
    571 		sbappendstream(&so->so_snd, m);
    572 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
    573 		tp->t_force = 1;
    574 		error = tcp_output(tp);
    575 		tp->t_force = 0;
    576 		break;
    577 
    578 	case PRU_SOCKADDR:
    579 #ifdef INET
    580 		if (inp)
    581 			in_setsockaddr(inp, nam);
    582 #endif
    583 #ifdef INET6
    584 		if (in6p)
    585 			in6_setsockaddr(in6p, nam);
    586 #endif
    587 		break;
    588 
    589 	case PRU_PEERADDR:
    590 #ifdef INET
    591 		if (inp)
    592 			in_setpeeraddr(inp, nam);
    593 #endif
    594 #ifdef INET6
    595 		if (in6p)
    596 			in6_setpeeraddr(in6p, nam);
    597 #endif
    598 		break;
    599 
    600 	default:
    601 		panic("tcp_usrreq");
    602 	}
    603 #ifdef TCP_DEBUG
    604 	if (tp && (so->so_options & SO_DEBUG))
    605 		tcp_trace(TA_USER, ostate, tp, NULL, req);
    606 #endif
    607 
    608 release:
    609 	splx(s);
    610 	return (error);
    611 }
    612 
    613 int
    614 tcp_ctloutput(op, so, level, optname, mp)
    615 	int op;
    616 	struct socket *so;
    617 	int level, optname;
    618 	struct mbuf **mp;
    619 {
    620 	int error = 0, s;
    621 	struct inpcb *inp;
    622 #ifdef INET6
    623 	struct in6pcb *in6p;
    624 #endif
    625 	struct tcpcb *tp;
    626 	struct mbuf *m;
    627 	int i;
    628 	int family;	/* family of the socket */
    629 
    630 	family = so->so_proto->pr_domain->dom_family;
    631 
    632 	s = splsoftnet();
    633 	switch (family) {
    634 #ifdef INET
    635 	case PF_INET:
    636 		inp = sotoinpcb(so);
    637 #ifdef INET6
    638 		in6p = NULL;
    639 #endif
    640 		break;
    641 #endif
    642 #ifdef INET6
    643 	case PF_INET6:
    644 		inp = NULL;
    645 		in6p = sotoin6pcb(so);
    646 		break;
    647 #endif
    648 	default:
    649 		splx(s);
    650 		return EAFNOSUPPORT;
    651 	}
    652 #ifndef INET6
    653 	if (inp == NULL)
    654 #else
    655 	if (inp == NULL && in6p == NULL)
    656 #endif
    657 	{
    658 		splx(s);
    659 		if (op == PRCO_SETOPT && *mp)
    660 			(void) m_free(*mp);
    661 		return (ECONNRESET);
    662 	}
    663 	if (level != IPPROTO_TCP) {
    664 		switch (family) {
    665 #ifdef INET
    666 		case PF_INET:
    667 			error = ip_ctloutput(op, so, level, optname, mp);
    668 			break;
    669 #endif
    670 #ifdef INET6
    671 		case PF_INET6:
    672 			error = ip6_ctloutput(op, so, level, optname, mp);
    673 			break;
    674 #endif
    675 		}
    676 		splx(s);
    677 		return (error);
    678 	}
    679 	if (inp)
    680 		tp = intotcpcb(inp);
    681 #ifdef INET6
    682 	else if (in6p)
    683 		tp = in6totcpcb(in6p);
    684 #endif
    685 	else
    686 		tp = NULL;
    687 
    688 	switch (op) {
    689 
    690 	case PRCO_SETOPT:
    691 		m = *mp;
    692 		switch (optname) {
    693 
    694 		case TCP_NODELAY:
    695 			if (m == NULL || m->m_len < sizeof (int))
    696 				error = EINVAL;
    697 			else if (*mtod(m, int *))
    698 				tp->t_flags |= TF_NODELAY;
    699 			else
    700 				tp->t_flags &= ~TF_NODELAY;
    701 			break;
    702 
    703 		case TCP_MAXSEG:
    704 			if (m && (i = *mtod(m, int *)) > 0 &&
    705 			    i <= tp->t_peermss)
    706 				tp->t_peermss = i;  /* limit on send size */
    707 			else
    708 				error = EINVAL;
    709 			break;
    710 
    711 		default:
    712 			error = ENOPROTOOPT;
    713 			break;
    714 		}
    715 		if (m)
    716 			(void) m_free(m);
    717 		break;
    718 
    719 	case PRCO_GETOPT:
    720 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
    721 		m->m_len = sizeof(int);
    722 
    723 		switch (optname) {
    724 		case TCP_NODELAY:
    725 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
    726 			break;
    727 		case TCP_MAXSEG:
    728 			*mtod(m, int *) = tp->t_peermss;
    729 			break;
    730 		default:
    731 			error = ENOPROTOOPT;
    732 			break;
    733 		}
    734 		break;
    735 	}
    736 	splx(s);
    737 	return (error);
    738 }
    739 
    740 #ifndef TCP_SENDSPACE
    741 #define	TCP_SENDSPACE	1024*16
    742 #endif
    743 int	tcp_sendspace = TCP_SENDSPACE;
    744 #ifndef TCP_RECVSPACE
    745 #define	TCP_RECVSPACE	1024*16
    746 #endif
    747 int	tcp_recvspace = TCP_RECVSPACE;
    748 
    749 /*
    750  * Attach TCP protocol to socket, allocating
    751  * internet protocol control block, tcp control block,
    752  * bufer space, and entering LISTEN state if to accept connections.
    753  */
    754 int
    755 tcp_attach(so)
    756 	struct socket *so;
    757 {
    758 	struct tcpcb *tp;
    759 	struct inpcb *inp;
    760 #ifdef INET6
    761 	struct in6pcb *in6p;
    762 #endif
    763 	int error;
    764 	int family;	/* family of the socket */
    765 
    766 	family = so->so_proto->pr_domain->dom_family;
    767 
    768 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    769 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
    770 		if (error)
    771 			return (error);
    772 	}
    773 	switch (family) {
    774 #ifdef INET
    775 	case PF_INET:
    776 		error = in_pcballoc(so, &tcbtable);
    777 		if (error)
    778 			return (error);
    779 		inp = sotoinpcb(so);
    780 #ifdef INET6
    781 		in6p = NULL;
    782 #endif
    783 		break;
    784 #endif
    785 #ifdef INET6
    786 	case PF_INET6:
    787 		error = in6_pcballoc(so, &tcb6);
    788 		if (error)
    789 			return (error);
    790 		inp = NULL;
    791 		in6p = sotoin6pcb(so);
    792 		break;
    793 #endif
    794 	default:
    795 		return EAFNOSUPPORT;
    796 	}
    797 	if (inp)
    798 		tp = tcp_newtcpcb(family, (void *)inp);
    799 #ifdef INET6
    800 	else if (in6p)
    801 		tp = tcp_newtcpcb(family, (void *)in6p);
    802 #endif
    803 	else
    804 		tp = NULL;
    805 
    806 	if (tp == 0) {
    807 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
    808 
    809 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
    810 #ifdef INET
    811 		if (inp)
    812 			in_pcbdetach(inp);
    813 #endif
    814 #ifdef INET6
    815 		if (in6p)
    816 			in6_pcbdetach(in6p);
    817 #endif
    818 		so->so_state |= nofd;
    819 		return (ENOBUFS);
    820 	}
    821 	tp->t_state = TCPS_CLOSED;
    822 	return (0);
    823 }
    824 
    825 /*
    826  * Initiate (or continue) disconnect.
    827  * If embryonic state, just send reset (once).
    828  * If in ``let data drain'' option and linger null, just drop.
    829  * Otherwise (hard), mark socket disconnecting and drop
    830  * current input data; switch states based on user close, and
    831  * send segment to peer (with FIN).
    832  */
    833 struct tcpcb *
    834 tcp_disconnect(tp)
    835 	struct tcpcb *tp;
    836 {
    837 	struct socket *so;
    838 
    839 	if (tp->t_inpcb)
    840 		so = tp->t_inpcb->inp_socket;
    841 #ifdef INET6
    842 	else if (tp->t_in6pcb)
    843 		so = tp->t_in6pcb->in6p_socket;
    844 #endif
    845 	else
    846 		so = NULL;
    847 
    848 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
    849 		tp = tcp_close(tp);
    850 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
    851 		tp = tcp_drop(tp, 0);
    852 	else {
    853 		soisdisconnecting(so);
    854 		sbflush(&so->so_rcv);
    855 		tp = tcp_usrclosed(tp);
    856 		if (tp)
    857 			(void) tcp_output(tp);
    858 	}
    859 	return (tp);
    860 }
    861 
    862 /*
    863  * User issued close, and wish to trail through shutdown states:
    864  * if never received SYN, just forget it.  If got a SYN from peer,
    865  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
    866  * If already got a FIN from peer, then almost done; go to LAST_ACK
    867  * state.  In all other cases, have already sent FIN to peer (e.g.
    868  * after PRU_SHUTDOWN), and just have to play tedious game waiting
    869  * for peer to send FIN or not respond to keep-alives, etc.
    870  * We can let the user exit from the close as soon as the FIN is acked.
    871  */
    872 struct tcpcb *
    873 tcp_usrclosed(tp)
    874 	struct tcpcb *tp;
    875 {
    876 
    877 	switch (tp->t_state) {
    878 
    879 	case TCPS_CLOSED:
    880 	case TCPS_LISTEN:
    881 	case TCPS_SYN_SENT:
    882 		tp->t_state = TCPS_CLOSED;
    883 		tp = tcp_close(tp);
    884 		break;
    885 
    886 	case TCPS_SYN_RECEIVED:
    887 	case TCPS_ESTABLISHED:
    888 		tp->t_state = TCPS_FIN_WAIT_1;
    889 		break;
    890 
    891 	case TCPS_CLOSE_WAIT:
    892 		tp->t_state = TCPS_LAST_ACK;
    893 		break;
    894 	}
    895 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
    896 		struct socket *so;
    897 		if (tp->t_inpcb)
    898 			so = tp->t_inpcb->inp_socket;
    899 #ifdef INET6
    900 		else if (tp->t_in6pcb)
    901 			so = tp->t_in6pcb->in6p_socket;
    902 #endif
    903 		else
    904 			so = NULL;
    905 		soisdisconnected(so);
    906 		/*
    907 		 * If we are in FIN_WAIT_2, we arrived here because the
    908 		 * application did a shutdown of the send side.  Like the
    909 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
    910 		 * a full close, we start a timer to make sure sockets are
    911 		 * not left in FIN_WAIT_2 forever.
    912 		 */
    913 		if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0))
    914 			TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
    915 	}
    916 	return (tp);
    917 }
    918 
    919 static const struct {
    920 	 unsigned int valid : 1;
    921 	 unsigned int rdonly : 1;
    922 	 int *var;
    923 	 int val;
    924 	 } tcp_ctlvars[] = TCPCTL_VARIABLES;
    925 
    926 /*
    927  * Sysctl for tcp variables.
    928  */
    929 int
    930 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    931 	int *name;
    932 	u_int namelen;
    933 	void *oldp;
    934 	size_t *oldlenp;
    935 	void *newp;
    936 	size_t newlen;
    937 {
    938 	int error, saved_value = 0;
    939 
    940 	/* All sysctl names at this level are terminal. */
    941 	if (namelen != 1)
    942 		return (ENOTDIR);
    943 
    944 	if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0])
    945 	    && tcp_ctlvars[name[0]].valid) {
    946 		if (tcp_ctlvars[name[0]].rdonly) {
    947 			return (sysctl_rdint(oldp, oldlenp, newp,
    948 			    tcp_ctlvars[name[0]].val));
    949 		} else {
    950 			switch (name[0]) {
    951 			case TCPCTL_MSSDFLT:
    952 				saved_value = tcp_mssdflt;
    953 				break;
    954 			}
    955 			error = sysctl_int(oldp, oldlenp, newp, newlen,
    956 			    tcp_ctlvars[name[0]].var);
    957 			if (error)
    958 				return (error);
    959 			switch (name[0]) {
    960 			case TCPCTL_MSSDFLT:
    961 				if (tcp_mssdflt < 32) {
    962 					tcp_mssdflt = saved_value;
    963 					return (EINVAL);
    964 				}
    965 				break;
    966 			}
    967 			return (0);
    968 		}
    969 	}
    970 
    971 	return (ENOPROTOOPT);
    972 }
    973