Home | History | Annotate | Line # | Download | only in netinet
tcp_usrreq.c revision 1.54
      1 /*	$NetBSD: tcp_usrreq.c,v 1.54 2000/10/06 09:24:40 enami Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
     38  * Facility, NASA Ames Research Center.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993, 1995
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)tcp_usrreq.c	8.5 (Berkeley) 6/21/95
    102  */
    103 
    104 #include "opt_inet.h"
    105 #include "opt_ipsec.h"
    106 
    107 #include <sys/param.h>
    108 #include <sys/systm.h>
    109 #include <sys/kernel.h>
    110 #include <sys/malloc.h>
    111 #include <sys/mbuf.h>
    112 #include <sys/socket.h>
    113 #include <sys/socketvar.h>
    114 #include <sys/protosw.h>
    115 #include <sys/errno.h>
    116 #include <sys/stat.h>
    117 #include <sys/proc.h>
    118 #include <sys/ucred.h>
    119 #include <sys/domain.h>
    120 
    121 #include <uvm/uvm_extern.h>
    122 #include <sys/sysctl.h>
    123 
    124 #include <net/if.h>
    125 #include <net/route.h>
    126 
    127 #include <netinet/in.h>
    128 #include <netinet/in_systm.h>
    129 #include <netinet/in_var.h>
    130 #include <netinet/ip.h>
    131 #include <netinet/in_pcb.h>
    132 #include <netinet/ip_var.h>
    133 
    134 #ifdef INET6
    135 #ifndef INET
    136 #include <netinet/in.h>
    137 #endif
    138 #include <netinet/ip6.h>
    139 #include <netinet6/in6_pcb.h>
    140 #include <netinet6/ip6_var.h>
    141 #endif
    142 
    143 #include <netinet/tcp.h>
    144 #include <netinet/tcp_fsm.h>
    145 #include <netinet/tcp_seq.h>
    146 #include <netinet/tcp_timer.h>
    147 #include <netinet/tcp_var.h>
    148 #include <netinet/tcpip.h>
    149 #include <netinet/tcp_debug.h>
    150 
    151 #include "opt_tcp_recvspace.h"
    152 #include "opt_tcp_sendspace.h"
    153 
    154 #ifdef IPSEC
    155 #include <netinet6/ipsec.h>
    156 #endif /*IPSEC*/
    157 
    158 /*
    159  * TCP protocol interface to socket abstraction.
    160  */
    161 extern	char *tcpstates[];
    162 
    163 /*
    164  * Process a TCP user request for TCP tb.  If this is a send request
    165  * then m is the mbuf chain of send data.  If this is a timer expiration
    166  * (called from the software clock routine), then timertype tells which timer.
    167  */
    168 /*ARGSUSED*/
    169 int
    170 tcp_usrreq(so, req, m, nam, control, p)
    171 	struct socket *so;
    172 	int req;
    173 	struct mbuf *m, *nam, *control;
    174 	struct proc *p;
    175 {
    176 	struct inpcb *inp;
    177 #ifdef INET6
    178 	struct in6pcb *in6p;
    179 #endif
    180 	struct tcpcb *tp = NULL;
    181 	int s;
    182 	int error = 0;
    183 	int ostate;
    184 	int family;	/* family of the socket */
    185 
    186 	family = so->so_proto->pr_domain->dom_family;
    187 
    188 	if (req == PRU_CONTROL) {
    189 		switch (family) {
    190 		case PF_INET:
    191 			return (in_control(so, (long)m, (caddr_t)nam,
    192 			    (struct ifnet *)control, p));
    193 #ifdef INET6
    194 		case PF_INET6:
    195 			return (in6_control(so, (long)m, (caddr_t)nam,
    196 			    (struct ifnet *)control, p));
    197 #endif
    198 		default:
    199 			return EAFNOSUPPORT;
    200 		}
    201 	}
    202 
    203 	if (req == PRU_PURGEIF) {
    204 		switch (family) {
    205 		case PF_INET:
    206 			in_purgeif((struct ifnet *)control);
    207 			in_pcbpurgeif(&tcbtable, (struct ifnet *)control);
    208 		break;
    209 #ifdef INET6
    210 		case PF_INET6:
    211 			in6_purgeif((struct ifnet *)control);
    212 			in6_pcbpurgeif(&tcb6, (struct ifnet *)control);
    213 		break;
    214 #endif
    215 		default:
    216 			return (EAFNOSUPPORT);
    217 		}
    218 		return (0);
    219 	}
    220 
    221 	s = splsoftnet();
    222 	switch (family) {
    223 	case PF_INET:
    224 		inp = sotoinpcb(so);
    225 #ifdef INET6
    226 		in6p = NULL;
    227 #endif
    228 		break;
    229 #ifdef INET6
    230 	case PF_INET6:
    231 		inp = NULL;
    232 		in6p = sotoin6pcb(so);
    233 		break;
    234 #endif
    235 	default:
    236 		splx(s);
    237 		return EAFNOSUPPORT;
    238 	}
    239 
    240 #ifdef DIAGNOSTIC
    241 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
    242 		panic("tcp_usrreq: unexpected control mbuf");
    243 #endif
    244 	/*
    245 	 * When a TCP is attached to a socket, then there will be
    246 	 * a (struct inpcb) pointed at by the socket, and this
    247 	 * structure will point at a subsidary (struct tcpcb).
    248 	 */
    249 #ifndef INET6
    250 	if (inp == 0 && req != PRU_ATTACH)
    251 #else
    252 	if ((inp == 0 && in6p == 0) && req != PRU_ATTACH)
    253 #endif
    254 	{
    255 		error = EINVAL;
    256 		goto release;
    257 	}
    258 	if (inp) {
    259 		tp = intotcpcb(inp);
    260 		/* WHAT IF TP IS 0? */
    261 #ifdef KPROF
    262 		tcp_acounts[tp->t_state][req]++;
    263 #endif
    264 		ostate = tp->t_state;
    265 	}
    266 #ifdef INET6
    267 	else if (in6p) {
    268 		tp = in6totcpcb(in6p);
    269 		/* WHAT IF TP IS 0? */
    270 #ifdef KPROF
    271 		tcp_acounts[tp->t_state][req]++;
    272 #endif
    273 		ostate = tp->t_state;
    274 	}
    275 #endif
    276 	else
    277 		ostate = 0;
    278 
    279 	switch (req) {
    280 
    281 	/*
    282 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
    283 	 * and an internet control block.
    284 	 */
    285 	case PRU_ATTACH:
    286 #ifndef INET6
    287 		if (inp != 0)
    288 #else
    289 		if (inp != 0 || in6p != 0)
    290 #endif
    291 		{
    292 			error = EISCONN;
    293 			break;
    294 		}
    295 		error = tcp_attach(so);
    296 		if (error)
    297 			break;
    298 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
    299 			so->so_linger = TCP_LINGERTIME;
    300 		tp = sototcpcb(so);
    301 		break;
    302 
    303 	/*
    304 	 * PRU_DETACH detaches the TCP protocol from the socket.
    305 	 */
    306 	case PRU_DETACH:
    307 		tp = tcp_disconnect(tp);
    308 		break;
    309 
    310 	/*
    311 	 * Give the socket an address.
    312 	 */
    313 	case PRU_BIND:
    314 		switch (family) {
    315 		case PF_INET:
    316 			error = in_pcbbind(inp, nam, p);
    317 			break;
    318 #ifdef INET6
    319 		case PF_INET6:
    320 			error = in6_pcbbind(in6p, nam, p);
    321 			/* mapped addr case */
    322 			if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
    323 				tp->t_family = AF_INET;
    324 			break;
    325 #endif
    326 		}
    327 		break;
    328 
    329 	/*
    330 	 * Prepare to accept connections.
    331 	 */
    332 	case PRU_LISTEN:
    333 		if (inp && inp->inp_lport == 0) {
    334 			error = in_pcbbind(inp, (struct mbuf *)0,
    335 			    (struct proc *)0);
    336 			if (error)
    337 				break;
    338 		}
    339 #ifdef INET6
    340 		else if (in6p && in6p->in6p_lport == 0) {
    341 			error = in6_pcbbind(in6p, (struct mbuf *)0,
    342 			    (struct proc *)0);
    343 			if (error)
    344 				break;
    345 		}
    346 #endif
    347 		tp->t_state = TCPS_LISTEN;
    348 		break;
    349 
    350 	/*
    351 	 * Initiate connection to peer.
    352 	 * Create a template for use in transmissions on this connection.
    353 	 * Enter SYN_SENT state, and mark socket as connecting.
    354 	 * Start keep-alive timer, and seed output sequence space.
    355 	 * Send initial segment on connection.
    356 	 */
    357 	case PRU_CONNECT:
    358 		if (inp) {
    359 			if (inp->inp_lport == 0) {
    360 				error = in_pcbbind(inp, (struct mbuf *)0,
    361 				    (struct proc *)0);
    362 				if (error)
    363 					break;
    364 			}
    365 			error = in_pcbconnect(inp, nam);
    366 		}
    367 #ifdef INET6
    368 		else if (in6p) {
    369 			if (in6p->in6p_lport == 0) {
    370 				error = in6_pcbbind(in6p, (struct mbuf *)0,
    371 				    (struct proc *)0);
    372 				if (error)
    373 					break;
    374 			}
    375 			error = in6_pcbconnect(in6p, nam);
    376 			/* mapped addr case */
    377 			if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
    378 				tp->t_family = AF_INET;
    379 		}
    380 #endif
    381 		if (error)
    382 			break;
    383 		tp->t_template = tcp_template(tp);
    384 		if (tp->t_template == 0) {
    385 			if (inp)
    386 				in_pcbdisconnect(inp);
    387 #ifdef INET6
    388 			else if (in6p)
    389 				in6_pcbdisconnect(in6p);
    390 #endif
    391 			error = ENOBUFS;
    392 			break;
    393 		}
    394 		/* Compute window scaling to request.  */
    395 		while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
    396 		    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
    397 			tp->request_r_scale++;
    398 		soisconnecting(so);
    399 		tcpstat.tcps_connattempt++;
    400 		tp->t_state = TCPS_SYN_SENT;
    401 		TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT);
    402 		tp->iss = tcp_new_iss(tp, sizeof(struct tcpcb), 0);
    403 		tcp_sendseqinit(tp);
    404 		error = tcp_output(tp);
    405 		break;
    406 
    407 	/*
    408 	 * Create a TCP connection between two sockets.
    409 	 */
    410 	case PRU_CONNECT2:
    411 		error = EOPNOTSUPP;
    412 		break;
    413 
    414 	/*
    415 	 * Initiate disconnect from peer.
    416 	 * If connection never passed embryonic stage, just drop;
    417 	 * else if don't need to let data drain, then can just drop anyways,
    418 	 * else have to begin TCP shutdown process: mark socket disconnecting,
    419 	 * drain unread data, state switch to reflect user close, and
    420 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
    421 	 * when peer sends FIN and acks ours.
    422 	 *
    423 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
    424 	 */
    425 	case PRU_DISCONNECT:
    426 		tp = tcp_disconnect(tp);
    427 		break;
    428 
    429 	/*
    430 	 * Accept a connection.  Essentially all the work is
    431 	 * done at higher levels; just return the address
    432 	 * of the peer, storing through addr.
    433 	 */
    434 	case PRU_ACCEPT:
    435 		if (inp)
    436 			in_setpeeraddr(inp, nam);
    437 #ifdef INET6
    438 		else if (in6p)
    439 			in6_setpeeraddr(in6p, nam);
    440 #endif
    441 		break;
    442 
    443 	/*
    444 	 * Mark the connection as being incapable of further output.
    445 	 */
    446 	case PRU_SHUTDOWN:
    447 		socantsendmore(so);
    448 		tp = tcp_usrclosed(tp);
    449 		if (tp)
    450 			error = tcp_output(tp);
    451 		break;
    452 
    453 	/*
    454 	 * After a receive, possibly send window update to peer.
    455 	 */
    456 	case PRU_RCVD:
    457 		(void) tcp_output(tp);
    458 		break;
    459 
    460 	/*
    461 	 * Do a send by putting data in output queue and updating urgent
    462 	 * marker if URG set.  Possibly send more data.
    463 	 */
    464 	case PRU_SEND:
    465 		if (control && control->m_len) {
    466 			m_freem(control);
    467 			m_freem(m);
    468 			error = EINVAL;
    469 			break;
    470 		}
    471 		sbappend(&so->so_snd, m);
    472 		error = tcp_output(tp);
    473 		break;
    474 
    475 	/*
    476 	 * Abort the TCP.
    477 	 */
    478 	case PRU_ABORT:
    479 		tp = tcp_drop(tp, ECONNABORTED);
    480 		break;
    481 
    482 	case PRU_SENSE:
    483 		/*
    484 		 * stat: don't bother with a blocksize.
    485 		 */
    486 		splx(s);
    487 		return (0);
    488 
    489 	case PRU_RCVOOB:
    490 		if (control && control->m_len) {
    491 			m_freem(control);
    492 			m_freem(m);
    493 			error = EINVAL;
    494 			break;
    495 		}
    496 		if ((so->so_oobmark == 0 &&
    497 		    (so->so_state & SS_RCVATMARK) == 0) ||
    498 		    so->so_options & SO_OOBINLINE ||
    499 		    tp->t_oobflags & TCPOOB_HADDATA) {
    500 			error = EINVAL;
    501 			break;
    502 		}
    503 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
    504 			error = EWOULDBLOCK;
    505 			break;
    506 		}
    507 		m->m_len = 1;
    508 		*mtod(m, caddr_t) = tp->t_iobc;
    509 		if (((long)nam & MSG_PEEK) == 0)
    510 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
    511 		break;
    512 
    513 	case PRU_SENDOOB:
    514 		if (sbspace(&so->so_snd) < -512) {
    515 			m_freem(m);
    516 			error = ENOBUFS;
    517 			break;
    518 		}
    519 		/*
    520 		 * According to RFC961 (Assigned Protocols),
    521 		 * the urgent pointer points to the last octet
    522 		 * of urgent data.  We continue, however,
    523 		 * to consider it to indicate the first octet
    524 		 * of data past the urgent section.
    525 		 * Otherwise, snd_up should be one lower.
    526 		 */
    527 		sbappend(&so->so_snd, m);
    528 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
    529 		tp->t_force = 1;
    530 		error = tcp_output(tp);
    531 		tp->t_force = 0;
    532 		break;
    533 
    534 	case PRU_SOCKADDR:
    535 		if (inp)
    536 			in_setsockaddr(inp, nam);
    537 #ifdef INET6
    538 		else if (in6p)
    539 			in6_setsockaddr(in6p, nam);
    540 #endif
    541 		break;
    542 
    543 	case PRU_PEERADDR:
    544 		if (inp)
    545 			in_setpeeraddr(inp, nam);
    546 #ifdef INET6
    547 		else if (in6p)
    548 			in6_setpeeraddr(in6p, nam);
    549 #endif
    550 		break;
    551 
    552 	/*
    553 	 * TCP slow timer went off; going through this
    554 	 * routine for tracing's sake.
    555 	 */
    556 	case PRU_SLOWTIMO:
    557 		tp = tcp_timers(tp, (long)nam);
    558 		req |= (long)nam << 8;		/* for debug's sake */
    559 		break;
    560 
    561 	default:
    562 		panic("tcp_usrreq");
    563 	}
    564 	if (tp && (so->so_options & SO_DEBUG))
    565 		tcp_trace(TA_USER, ostate, tp, NULL, req);
    566 
    567 release:
    568 	splx(s);
    569 	return (error);
    570 }
    571 
    572 int
    573 tcp_ctloutput(op, so, level, optname, mp)
    574 	int op;
    575 	struct socket *so;
    576 	int level, optname;
    577 	struct mbuf **mp;
    578 {
    579 	int error = 0, s;
    580 	struct inpcb *inp;
    581 #ifdef INET6
    582 	struct in6pcb *in6p;
    583 #endif
    584 	struct tcpcb *tp;
    585 	struct mbuf *m;
    586 	int i;
    587 	int family;	/* family of the socket */
    588 
    589 	family = so->so_proto->pr_domain->dom_family;
    590 
    591 	s = splsoftnet();
    592 	switch (family) {
    593 	case PF_INET:
    594 		inp = sotoinpcb(so);
    595 #ifdef INET6
    596 		in6p = NULL;
    597 #endif
    598 		break;
    599 #ifdef INET6
    600 	case PF_INET6:
    601 		inp = NULL;
    602 		in6p = sotoin6pcb(so);
    603 		break;
    604 #endif
    605 	default:
    606 		splx(s);
    607 		return EAFNOSUPPORT;
    608 	}
    609 #ifndef INET6
    610 	if (inp == NULL)
    611 #else
    612 	if (inp == NULL && in6p == NULL)
    613 #endif
    614 	{
    615 		splx(s);
    616 		if (op == PRCO_SETOPT && *mp)
    617 			(void) m_free(*mp);
    618 		return (ECONNRESET);
    619 	}
    620 	if (level != IPPROTO_TCP) {
    621 		switch (family) {
    622 		case PF_INET:
    623 			error = ip_ctloutput(op, so, level, optname, mp);
    624 			break;
    625 #ifdef INET6
    626 		case PF_INET6:
    627 			error = ip6_ctloutput(op, so, level, optname, mp);
    628 			break;
    629 #endif
    630 		}
    631 		splx(s);
    632 		return (error);
    633 	}
    634 	if (inp)
    635 		tp = intotcpcb(inp);
    636 #ifdef INET6
    637 	else if (in6p)
    638 		tp = in6totcpcb(in6p);
    639 #endif
    640 	else
    641 		tp = NULL;
    642 
    643 	switch (op) {
    644 
    645 	case PRCO_SETOPT:
    646 		m = *mp;
    647 		switch (optname) {
    648 
    649 		case TCP_NODELAY:
    650 			if (m == NULL || m->m_len < sizeof (int))
    651 				error = EINVAL;
    652 			else if (*mtod(m, int *))
    653 				tp->t_flags |= TF_NODELAY;
    654 			else
    655 				tp->t_flags &= ~TF_NODELAY;
    656 			break;
    657 
    658 		case TCP_MAXSEG:
    659 			if (m && (i = *mtod(m, int *)) > 0 &&
    660 			    i <= tp->t_peermss)
    661 				tp->t_peermss = i;  /* limit on send size */
    662 			else
    663 				error = EINVAL;
    664 			break;
    665 
    666 		default:
    667 			error = ENOPROTOOPT;
    668 			break;
    669 		}
    670 		if (m)
    671 			(void) m_free(m);
    672 		break;
    673 
    674 	case PRCO_GETOPT:
    675 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
    676 		m->m_len = sizeof(int);
    677 
    678 		switch (optname) {
    679 		case TCP_NODELAY:
    680 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
    681 			break;
    682 		case TCP_MAXSEG:
    683 			*mtod(m, int *) = tp->t_peermss;
    684 			break;
    685 		default:
    686 			error = ENOPROTOOPT;
    687 			break;
    688 		}
    689 		break;
    690 	}
    691 	splx(s);
    692 	return (error);
    693 }
    694 
    695 #ifndef TCP_SENDSPACE
    696 #define	TCP_SENDSPACE	1024*16;
    697 #endif
    698 int	tcp_sendspace = TCP_SENDSPACE;
    699 #ifndef TCP_RECVSPACE
    700 #define	TCP_RECVSPACE	1024*16;
    701 #endif
    702 int	tcp_recvspace = TCP_RECVSPACE;
    703 
    704 /*
    705  * Attach TCP protocol to socket, allocating
    706  * internet protocol control block, tcp control block,
    707  * bufer space, and entering LISTEN state if to accept connections.
    708  */
    709 int
    710 tcp_attach(so)
    711 	struct socket *so;
    712 {
    713 	struct tcpcb *tp;
    714 	struct inpcb *inp;
    715 #ifdef INET6
    716 	struct in6pcb *in6p;
    717 #endif
    718 	int error;
    719 	int family;	/* family of the socket */
    720 
    721 	family = so->so_proto->pr_domain->dom_family;
    722 
    723 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    724 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
    725 		if (error)
    726 			return (error);
    727 	}
    728 	switch (family) {
    729 	case PF_INET:
    730 		error = in_pcballoc(so, &tcbtable);
    731 		if (error)
    732 			return (error);
    733 		inp = sotoinpcb(so);
    734 #ifdef INET6
    735 		in6p = NULL;
    736 #endif
    737 		break;
    738 #ifdef INET6
    739 	case PF_INET6:
    740 		error = in6_pcballoc(so, &tcb6);
    741 		if (error)
    742 			return (error);
    743 		inp = NULL;
    744 		in6p = sotoin6pcb(so);
    745 		break;
    746 #endif
    747 	default:
    748 		return EAFNOSUPPORT;
    749 	}
    750 #ifdef IPSEC
    751 	if (inp) {
    752 		error = ipsec_init_policy(so, &inp->inp_sp);
    753 		if (error != 0) {
    754 			in_pcbdetach(inp);
    755 			return (error);
    756 		}
    757 	}
    758 #ifdef INET6
    759 	else if (in6p) {
    760 		error = ipsec_init_policy(so, &in6p->in6p_sp);
    761 		if (error != 0) {
    762 			in6_pcbdetach(in6p);
    763 			return (error);
    764 		}
    765 	}
    766 #endif
    767 #endif /*IPSEC*/
    768 	if (inp)
    769 		tp = tcp_newtcpcb(family, (void *)inp);
    770 #ifdef INET6
    771 	else if (in6p)
    772 		tp = tcp_newtcpcb(family, (void *)in6p);
    773 #endif
    774 	else
    775 		tp = NULL;
    776 
    777 	if (tp == 0) {
    778 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
    779 
    780 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
    781 		if (inp)
    782 			in_pcbdetach(inp);
    783 #ifdef INET6
    784 		else if (in6p)
    785 			in6_pcbdetach(in6p);
    786 #endif
    787 		so->so_state |= nofd;
    788 		return (ENOBUFS);
    789 	}
    790 	tp->t_state = TCPS_CLOSED;
    791 	return (0);
    792 }
    793 
    794 /*
    795  * Initiate (or continue) disconnect.
    796  * If embryonic state, just send reset (once).
    797  * If in ``let data drain'' option and linger null, just drop.
    798  * Otherwise (hard), mark socket disconnecting and drop
    799  * current input data; switch states based on user close, and
    800  * send segment to peer (with FIN).
    801  */
    802 struct tcpcb *
    803 tcp_disconnect(tp)
    804 	struct tcpcb *tp;
    805 {
    806 	struct socket *so;
    807 
    808 	if (tp->t_inpcb)
    809 		so = tp->t_inpcb->inp_socket;
    810 #ifdef INET6
    811 	else if (tp->t_in6pcb)
    812 		so = tp->t_in6pcb->in6p_socket;
    813 #endif
    814 	else
    815 		so = NULL;
    816 
    817 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
    818 		tp = tcp_close(tp);
    819 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
    820 		tp = tcp_drop(tp, 0);
    821 	else {
    822 		soisdisconnecting(so);
    823 		sbflush(&so->so_rcv);
    824 		tp = tcp_usrclosed(tp);
    825 		if (tp)
    826 			(void) tcp_output(tp);
    827 	}
    828 	return (tp);
    829 }
    830 
    831 /*
    832  * User issued close, and wish to trail through shutdown states:
    833  * if never received SYN, just forget it.  If got a SYN from peer,
    834  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
    835  * If already got a FIN from peer, then almost done; go to LAST_ACK
    836  * state.  In all other cases, have already sent FIN to peer (e.g.
    837  * after PRU_SHUTDOWN), and just have to play tedious game waiting
    838  * for peer to send FIN or not respond to keep-alives, etc.
    839  * We can let the user exit from the close as soon as the FIN is acked.
    840  */
    841 struct tcpcb *
    842 tcp_usrclosed(tp)
    843 	struct tcpcb *tp;
    844 {
    845 
    846 	switch (tp->t_state) {
    847 
    848 	case TCPS_CLOSED:
    849 	case TCPS_LISTEN:
    850 	case TCPS_SYN_SENT:
    851 		tp->t_state = TCPS_CLOSED;
    852 		tp = tcp_close(tp);
    853 		break;
    854 
    855 	case TCPS_SYN_RECEIVED:
    856 	case TCPS_ESTABLISHED:
    857 		tp->t_state = TCPS_FIN_WAIT_1;
    858 		break;
    859 
    860 	case TCPS_CLOSE_WAIT:
    861 		tp->t_state = TCPS_LAST_ACK;
    862 		break;
    863 	}
    864 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
    865 		struct socket *so;
    866 		if (tp->t_inpcb)
    867 			so = tp->t_inpcb->inp_socket;
    868 #ifdef INET6
    869 		else if (tp->t_in6pcb)
    870 			so = tp->t_in6pcb->in6p_socket;
    871 #endif
    872 		else
    873 			so = NULL;
    874 		soisdisconnected(so);
    875 		/*
    876 		 * If we are in FIN_WAIT_2, we arrived here because the
    877 		 * application did a shutdown of the send side.  Like the
    878 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
    879 		 * a full close, we start a timer to make sure sockets are
    880 		 * not left in FIN_WAIT_2 forever.
    881 		 */
    882 		if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0))
    883 			TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
    884 	}
    885 	return (tp);
    886 }
    887 
    888 static struct {
    889 	 unsigned int valid : 1;
    890 	 unsigned int rdonly : 1;
    891 	 int *var;
    892 	 int val;
    893 	 } tcp_ctlvars[] = TCPCTL_VARIABLES;
    894 
    895 /*
    896  * Sysctl for tcp variables.
    897  */
    898 int
    899 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    900 	int *name;
    901 	u_int namelen;
    902 	void *oldp;
    903 	size_t *oldlenp;
    904 	void *newp;
    905 	size_t newlen;
    906 {
    907 
    908 	/* All sysctl names at this level are terminal. */
    909 	if (namelen != 1)
    910 		return (ENOTDIR);
    911 
    912 	if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0])
    913 	    && tcp_ctlvars[name[0]].valid) {
    914 		if (tcp_ctlvars[name[0]].rdonly)
    915 			return (sysctl_rdint(oldp, oldlenp, newp,
    916 			    tcp_ctlvars[name[0]].val));
    917 		else
    918 			return (sysctl_int(oldp, oldlenp, newp, newlen,
    919 			    tcp_ctlvars[name[0]].var));
    920 	}
    921 
    922 	return (ENOPROTOOPT);
    923 }
    924