Home | History | Annotate | Line # | Download | only in kern
uipc_socket.c revision 1.35
      1 /*	$NetBSD: uipc_socket.c,v 1.35 1998/06/25 23:41:20 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)uipc_socket.c	8.6 (Berkeley) 5/2/95
     36  */
     37 
     38 #include "opt_compat_sunos.h"
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/proc.h>
     43 #include <sys/file.h>
     44 #include <sys/malloc.h>
     45 #include <sys/mbuf.h>
     46 #include <sys/domain.h>
     47 #include <sys/kernel.h>
     48 #include <sys/protosw.h>
     49 #include <sys/socket.h>
     50 #include <sys/socketvar.h>
     51 #include <sys/signalvar.h>
     52 #include <sys/resourcevar.h>
     53 
     54 /*
     55  * Socket operation routines.
     56  * These routines are called by the routines in
     57  * sys_socket.c or from a system process, and
     58  * implement the semantics of socket operations by
     59  * switching out to the protocol specific routines.
     60  */
     61 /*ARGSUSED*/
     62 int
     63 socreate(dom, aso, type, proto)
     64 	int dom;
     65 	struct socket **aso;
     66 	register int type;
     67 	int proto;
     68 {
     69 	struct proc *p = curproc;		/* XXX */
     70 	register struct protosw *prp;
     71 	register struct socket *so;
     72 	register int error;
     73 
     74 	if (proto)
     75 		prp = pffindproto(dom, proto, type);
     76 	else
     77 		prp = pffindtype(dom, type);
     78 	if (prp == 0 || prp->pr_usrreq == 0)
     79 		return (EPROTONOSUPPORT);
     80 	if (prp->pr_type != type)
     81 		return (EPROTOTYPE);
     82 	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
     83 	bzero((caddr_t)so, sizeof(*so));
     84 	TAILQ_INIT(&so->so_q0);
     85 	TAILQ_INIT(&so->so_q);
     86 	so->so_type = type;
     87 	so->so_proto = prp;
     88 	so->so_send = sosend;
     89 	so->so_receive = soreceive;
     90 	error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
     91 	    (struct mbuf *)(long)proto, (struct mbuf *)0, p);
     92 	if (error) {
     93 		so->so_state |= SS_NOFDREF;
     94 		sofree(so);
     95 		return (error);
     96 	}
     97 #ifdef COMPAT_SUNOS
     98 	{
     99 		extern struct emul emul_sunos;
    100 		if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
    101 			so->so_options |= SO_BROADCAST;
    102 	}
    103 #endif
    104 	*aso = so;
    105 	return (0);
    106 }
    107 
    108 int
    109 sobind(so, nam)
    110 	struct socket *so;
    111 	struct mbuf *nam;
    112 {
    113 	struct proc *p = curproc;		/* XXX */
    114 	int s = splsoftnet();
    115 	int error;
    116 
    117 	error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
    118 	    nam, (struct mbuf *)0, p);
    119 	splx(s);
    120 	return (error);
    121 }
    122 
    123 int
    124 solisten(so, backlog)
    125 	register struct socket *so;
    126 	int backlog;
    127 {
    128 	int s = splsoftnet(), error;
    129 
    130 	error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
    131 	    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    132 	if (error) {
    133 		splx(s);
    134 		return (error);
    135 	}
    136 	if (so->so_q.tqh_first == NULL)
    137 		so->so_options |= SO_ACCEPTCONN;
    138 	if (backlog < 0)
    139 		backlog = 0;
    140 	so->so_qlimit = min(backlog, SOMAXCONN);
    141 	splx(s);
    142 	return (0);
    143 }
    144 
    145 void
    146 sofree(so)
    147 	register struct socket *so;
    148 {
    149 
    150 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
    151 		return;
    152 	if (so->so_head) {
    153 		if (!soqremque(so, 0) && !soqremque(so, 1))
    154 			panic("sofree dq");
    155 		so->so_head = 0;
    156 	}
    157 	sbrelease(&so->so_snd);
    158 	sorflush(so);
    159 	FREE(so, M_SOCKET);
    160 }
    161 
    162 /*
    163  * Close a socket on last file table reference removal.
    164  * Initiate disconnect if connected.
    165  * Free socket when disconnect complete.
    166  */
    167 int
    168 soclose(so)
    169 	register struct socket *so;
    170 {
    171 	int s = splsoftnet();		/* conservative */
    172 	int error = 0;
    173 
    174 	if (so->so_options & SO_ACCEPTCONN) {
    175 		while (so->so_q0.tqh_first)
    176 			(void) soabort(so->so_q0.tqh_first);
    177 		while (so->so_q.tqh_first)
    178 			(void) soabort(so->so_q.tqh_first);
    179 	}
    180 	if (so->so_pcb == 0)
    181 		goto discard;
    182 	if (so->so_state & SS_ISCONNECTED) {
    183 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
    184 			error = sodisconnect(so);
    185 			if (error)
    186 				goto drop;
    187 		}
    188 		if (so->so_options & SO_LINGER) {
    189 			if ((so->so_state & SS_ISDISCONNECTING) &&
    190 			    (so->so_state & SS_NBIO))
    191 				goto drop;
    192 			while (so->so_state & SS_ISCONNECTED) {
    193 				error = tsleep((caddr_t)&so->so_timeo,
    194 					       PSOCK | PCATCH, netcls,
    195 					       so->so_linger * hz);
    196 				if (error)
    197 					break;
    198 			}
    199 		}
    200 	}
    201 drop:
    202 	if (so->so_pcb) {
    203 		int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
    204 		    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
    205 		    (struct proc *)0);
    206 		if (error == 0)
    207 			error = error2;
    208 	}
    209 discard:
    210 	if (so->so_state & SS_NOFDREF)
    211 		panic("soclose: NOFDREF");
    212 	so->so_state |= SS_NOFDREF;
    213 	sofree(so);
    214 	splx(s);
    215 	return (error);
    216 }
    217 
    218 /*
    219  * Must be called at splsoftnet...
    220  */
    221 int
    222 soabort(so)
    223 	struct socket *so;
    224 {
    225 
    226 	return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
    227 	    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    228 }
    229 
    230 int
    231 soaccept(so, nam)
    232 	register struct socket *so;
    233 	struct mbuf *nam;
    234 {
    235 	int s = splsoftnet();
    236 	int error;
    237 
    238 	if ((so->so_state & SS_NOFDREF) == 0)
    239 		panic("soaccept: !NOFDREF");
    240 	so->so_state &= ~SS_NOFDREF;
    241 	error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0,
    242 	    nam, (struct mbuf *)0, (struct proc *)0);
    243 	splx(s);
    244 	return (error);
    245 }
    246 
    247 int
    248 soconnect(so, nam)
    249 	register struct socket *so;
    250 	struct mbuf *nam;
    251 {
    252 	struct proc *p = curproc;		/* XXX */
    253 	int s;
    254 	int error;
    255 
    256 	if (so->so_options & SO_ACCEPTCONN)
    257 		return (EOPNOTSUPP);
    258 	s = splsoftnet();
    259 	/*
    260 	 * If protocol is connection-based, can only connect once.
    261 	 * Otherwise, if connected, try to disconnect first.
    262 	 * This allows user to disconnect by connecting to, e.g.,
    263 	 * a null address.
    264 	 */
    265 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
    266 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
    267 	    (error = sodisconnect(so))))
    268 		error = EISCONN;
    269 	else
    270 		error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
    271 		    (struct mbuf *)0, nam, (struct mbuf *)0, p);
    272 	splx(s);
    273 	return (error);
    274 }
    275 
    276 int
    277 soconnect2(so1, so2)
    278 	register struct socket *so1;
    279 	struct socket *so2;
    280 {
    281 	int s = splsoftnet();
    282 	int error;
    283 
    284 	error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
    285 	    (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
    286 	    (struct proc *)0);
    287 	splx(s);
    288 	return (error);
    289 }
    290 
    291 int
    292 sodisconnect(so)
    293 	register struct socket *so;
    294 {
    295 	int s = splsoftnet();
    296 	int error;
    297 
    298 	if ((so->so_state & SS_ISCONNECTED) == 0) {
    299 		error = ENOTCONN;
    300 		goto bad;
    301 	}
    302 	if (so->so_state & SS_ISDISCONNECTING) {
    303 		error = EALREADY;
    304 		goto bad;
    305 	}
    306 	error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
    307 	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
    308 	    (struct proc *)0);
    309 bad:
    310 	splx(s);
    311 	return (error);
    312 }
    313 
    314 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
    315 /*
    316  * Send on a socket.
    317  * If send must go all at once and message is larger than
    318  * send buffering, then hard error.
    319  * Lock against other senders.
    320  * If must go all at once and not enough room now, then
    321  * inform user that this would block and do nothing.
    322  * Otherwise, if nonblocking, send as much as possible.
    323  * The data to be sent is described by "uio" if nonzero,
    324  * otherwise by the mbuf chain "top" (which must be null
    325  * if uio is not).  Data provided in mbuf chain must be small
    326  * enough to send all at once.
    327  *
    328  * Returns nonzero on error, timeout or signal; callers
    329  * must check for short counts if EINTR/ERESTART are returned.
    330  * Data and control buffers are freed on return.
    331  */
    332 int
    333 sosend(so, addr, uio, top, control, flags)
    334 	register struct socket *so;
    335 	struct mbuf *addr;
    336 	struct uio *uio;
    337 	struct mbuf *top;
    338 	struct mbuf *control;
    339 	int flags;
    340 {
    341 	struct proc *p = curproc;		/* XXX */
    342 	struct mbuf **mp;
    343 	register struct mbuf *m;
    344 	register long space, len, resid;
    345 	int clen = 0, error, s, dontroute, mlen;
    346 	int atomic = sosendallatonce(so) || top;
    347 
    348 	if (uio)
    349 		resid = uio->uio_resid;
    350 	else
    351 		resid = top->m_pkthdr.len;
    352 	/*
    353 	 * In theory resid should be unsigned.
    354 	 * However, space must be signed, as it might be less than 0
    355 	 * if we over-committed, and we must use a signed comparison
    356 	 * of space and resid.  On the other hand, a negative resid
    357 	 * causes us to loop sending 0-length segments to the protocol.
    358 	 */
    359 	if (resid < 0) {
    360 		error = EINVAL;
    361 		goto out;
    362 	}
    363 	dontroute =
    364 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
    365 	    (so->so_proto->pr_flags & PR_ATOMIC);
    366 	p->p_stats->p_ru.ru_msgsnd++;
    367 	if (control)
    368 		clen = control->m_len;
    369 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
    370 
    371 restart:
    372 	if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
    373 		goto out;
    374 	do {
    375 		s = splsoftnet();
    376 		if (so->so_state & SS_CANTSENDMORE)
    377 			snderr(EPIPE);
    378 		if (so->so_error)
    379 			snderr(so->so_error);
    380 		if ((so->so_state & SS_ISCONNECTED) == 0) {
    381 			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    382 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
    383 				    !(resid == 0 && clen != 0))
    384 					snderr(ENOTCONN);
    385 			} else if (addr == 0)
    386 				snderr(EDESTADDRREQ);
    387 		}
    388 		space = sbspace(&so->so_snd);
    389 		if (flags & MSG_OOB)
    390 			space += 1024;
    391 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
    392 		    clen > so->so_snd.sb_hiwat)
    393 			snderr(EMSGSIZE);
    394 		if (space < resid + clen && uio &&
    395 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
    396 			if (so->so_state & SS_NBIO)
    397 				snderr(EWOULDBLOCK);
    398 			sbunlock(&so->so_snd);
    399 			error = sbwait(&so->so_snd);
    400 			splx(s);
    401 			if (error)
    402 				goto out;
    403 			goto restart;
    404 		}
    405 		splx(s);
    406 		mp = &top;
    407 		space -= clen;
    408 		do {
    409 		    if (uio == NULL) {
    410 			/*
    411 			 * Data is prepackaged in "top".
    412 			 */
    413 			resid = 0;
    414 			if (flags & MSG_EOR)
    415 				top->m_flags |= M_EOR;
    416 		    } else do {
    417 			if (top == 0) {
    418 				MGETHDR(m, M_WAIT, MT_DATA);
    419 				mlen = MHLEN;
    420 				m->m_pkthdr.len = 0;
    421 				m->m_pkthdr.rcvif = (struct ifnet *)0;
    422 			} else {
    423 				MGET(m, M_WAIT, MT_DATA);
    424 				mlen = MLEN;
    425 			}
    426 			if (resid >= MINCLSIZE && space >= MCLBYTES) {
    427 				MCLGET(m, M_WAIT);
    428 				if ((m->m_flags & M_EXT) == 0)
    429 					goto nopages;
    430 				mlen = MCLBYTES;
    431 #ifdef	MAPPED_MBUFS
    432 				len = min(MCLBYTES, resid);
    433 #else
    434 				if (atomic && top == 0) {
    435 					len = min(MCLBYTES - max_hdr, resid);
    436 					m->m_data += max_hdr;
    437 				} else
    438 					len = min(MCLBYTES, resid);
    439 #endif
    440 				space -= len;
    441 			} else {
    442 nopages:
    443 				len = min(min(mlen, resid), space);
    444 				space -= len;
    445 				/*
    446 				 * For datagram protocols, leave room
    447 				 * for protocol headers in first mbuf.
    448 				 */
    449 				if (atomic && top == 0 && len < mlen)
    450 					MH_ALIGN(m, len);
    451 			}
    452 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
    453 			resid = uio->uio_resid;
    454 			m->m_len = len;
    455 			*mp = m;
    456 			top->m_pkthdr.len += len;
    457 			if (error)
    458 				goto release;
    459 			mp = &m->m_next;
    460 			if (resid <= 0) {
    461 				if (flags & MSG_EOR)
    462 					top->m_flags |= M_EOR;
    463 				break;
    464 			}
    465 		    } while (space > 0 && atomic);
    466 		    if (dontroute)
    467 			    so->so_options |= SO_DONTROUTE;
    468 		    s = splsoftnet();				/* XXX */
    469 		    error = (*so->so_proto->pr_usrreq)(so,
    470 			(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
    471 			top, addr, control, p);
    472 		    splx(s);
    473 		    if (dontroute)
    474 			    so->so_options &= ~SO_DONTROUTE;
    475 		    clen = 0;
    476 		    control = 0;
    477 		    top = 0;
    478 		    mp = &top;
    479 		    if (error)
    480 			goto release;
    481 		} while (resid && space > 0);
    482 	} while (resid);
    483 
    484 release:
    485 	sbunlock(&so->so_snd);
    486 out:
    487 	if (top)
    488 		m_freem(top);
    489 	if (control)
    490 		m_freem(control);
    491 	return (error);
    492 }
    493 
    494 /*
    495  * Implement receive operations on a socket.
    496  * We depend on the way that records are added to the sockbuf
    497  * by sbappend*.  In particular, each record (mbufs linked through m_next)
    498  * must begin with an address if the protocol so specifies,
    499  * followed by an optional mbuf or mbufs containing ancillary data,
    500  * and then zero or more mbufs of data.
    501  * In order to avoid blocking network interrupts for the entire time here,
    502  * we splx() while doing the actual copy to user space.
    503  * Although the sockbuf is locked, new data may still be appended,
    504  * and thus we must maintain consistency of the sockbuf during that time.
    505  *
    506  * The caller may receive the data as a single mbuf chain by supplying
    507  * an mbuf **mp0 for use in returning the chain.  The uio is then used
    508  * only for the count in uio_resid.
    509  */
    510 int
    511 soreceive(so, paddr, uio, mp0, controlp, flagsp)
    512 	register struct socket *so;
    513 	struct mbuf **paddr;
    514 	struct uio *uio;
    515 	struct mbuf **mp0;
    516 	struct mbuf **controlp;
    517 	int *flagsp;
    518 {
    519 	register struct mbuf *m, **mp;
    520 	register int flags, len, error, s, offset;
    521 	struct protosw *pr = so->so_proto;
    522 	struct mbuf *nextrecord;
    523 	int moff, type = 0;
    524 	int orig_resid = uio->uio_resid;
    525 
    526 	mp = mp0;
    527 	if (paddr)
    528 		*paddr = 0;
    529 	if (controlp)
    530 		*controlp = 0;
    531 	if (flagsp)
    532 		flags = *flagsp &~ MSG_EOR;
    533 	else
    534 		flags = 0;
    535 	if (flags & MSG_OOB) {
    536 		m = m_get(M_WAIT, MT_DATA);
    537 		error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
    538 		    (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
    539 		    (struct proc *)0);
    540 		if (error)
    541 			goto bad;
    542 		do {
    543 			error = uiomove(mtod(m, caddr_t),
    544 			    (int) min(uio->uio_resid, m->m_len), uio);
    545 			m = m_free(m);
    546 		} while (uio->uio_resid && error == 0 && m);
    547 bad:
    548 		if (m)
    549 			m_freem(m);
    550 		return (error);
    551 	}
    552 	if (mp)
    553 		*mp = (struct mbuf *)0;
    554 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
    555 		(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
    556 		    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    557 
    558 restart:
    559 	if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
    560 		return (error);
    561 	s = splsoftnet();
    562 
    563 	m = so->so_rcv.sb_mb;
    564 	/*
    565 	 * If we have less data than requested, block awaiting more
    566 	 * (subject to any timeout) if:
    567 	 *   1. the current count is less than the low water mark,
    568 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
    569 	 *	receive operation at once if we block (resid <= hiwat), or
    570 	 *   3. MSG_DONTWAIT is not set.
    571 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
    572 	 * we have to do the receive in sections, and thus risk returning
    573 	 * a short count if a timeout or signal occurs after we start.
    574 	 */
    575 	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
    576 	    so->so_rcv.sb_cc < uio->uio_resid) &&
    577 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
    578 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
    579 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
    580 #ifdef DIAGNOSTIC
    581 		if (m == 0 && so->so_rcv.sb_cc)
    582 			panic("receive 1");
    583 #endif
    584 		if (so->so_error) {
    585 			if (m)
    586 				goto dontblock;
    587 			error = so->so_error;
    588 			if ((flags & MSG_PEEK) == 0)
    589 				so->so_error = 0;
    590 			goto release;
    591 		}
    592 		if (so->so_state & SS_CANTRCVMORE) {
    593 			if (m)
    594 				goto dontblock;
    595 			else
    596 				goto release;
    597 		}
    598 		for (; m; m = m->m_next)
    599 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
    600 				m = so->so_rcv.sb_mb;
    601 				goto dontblock;
    602 			}
    603 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
    604 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
    605 			error = ENOTCONN;
    606 			goto release;
    607 		}
    608 		if (uio->uio_resid == 0)
    609 			goto release;
    610 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
    611 			error = EWOULDBLOCK;
    612 			goto release;
    613 		}
    614 		sbunlock(&so->so_rcv);
    615 		error = sbwait(&so->so_rcv);
    616 		splx(s);
    617 		if (error)
    618 			return (error);
    619 		goto restart;
    620 	}
    621 dontblock:
    622 #ifdef notyet /* XXXX */
    623 	if (uio->uio_procp)
    624 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
    625 #endif
    626 	nextrecord = m->m_nextpkt;
    627 	if (pr->pr_flags & PR_ADDR) {
    628 #ifdef DIAGNOSTIC
    629 		if (m->m_type != MT_SONAME)
    630 			panic("receive 1a");
    631 #endif
    632 		orig_resid = 0;
    633 		if (flags & MSG_PEEK) {
    634 			if (paddr)
    635 				*paddr = m_copy(m, 0, m->m_len);
    636 			m = m->m_next;
    637 		} else {
    638 			sbfree(&so->so_rcv, m);
    639 			if (paddr) {
    640 				*paddr = m;
    641 				so->so_rcv.sb_mb = m->m_next;
    642 				m->m_next = 0;
    643 				m = so->so_rcv.sb_mb;
    644 			} else {
    645 				MFREE(m, so->so_rcv.sb_mb);
    646 				m = so->so_rcv.sb_mb;
    647 			}
    648 		}
    649 	}
    650 	while (m && m->m_type == MT_CONTROL && error == 0) {
    651 		if (flags & MSG_PEEK) {
    652 			if (controlp)
    653 				*controlp = m_copy(m, 0, m->m_len);
    654 			m = m->m_next;
    655 		} else {
    656 			sbfree(&so->so_rcv, m);
    657 			if (controlp) {
    658 				if (pr->pr_domain->dom_externalize &&
    659 				    mtod(m, struct cmsghdr *)->cmsg_type ==
    660 				    SCM_RIGHTS)
    661 				   error = (*pr->pr_domain->dom_externalize)(m);
    662 				*controlp = m;
    663 				so->so_rcv.sb_mb = m->m_next;
    664 				m->m_next = 0;
    665 				m = so->so_rcv.sb_mb;
    666 			} else {
    667 				MFREE(m, so->so_rcv.sb_mb);
    668 				m = so->so_rcv.sb_mb;
    669 			}
    670 		}
    671 		if (controlp) {
    672 			orig_resid = 0;
    673 			controlp = &(*controlp)->m_next;
    674 		}
    675 	}
    676 	if (m) {
    677 		if ((flags & MSG_PEEK) == 0)
    678 			m->m_nextpkt = nextrecord;
    679 		type = m->m_type;
    680 		if (type == MT_OOBDATA)
    681 			flags |= MSG_OOB;
    682 	}
    683 	moff = 0;
    684 	offset = 0;
    685 	while (m && uio->uio_resid > 0 && error == 0) {
    686 		if (m->m_type == MT_OOBDATA) {
    687 			if (type != MT_OOBDATA)
    688 				break;
    689 		} else if (type == MT_OOBDATA)
    690 			break;
    691 #ifdef DIAGNOSTIC
    692 		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
    693 			panic("receive 3");
    694 #endif
    695 		so->so_state &= ~SS_RCVATMARK;
    696 		len = uio->uio_resid;
    697 		if (so->so_oobmark && len > so->so_oobmark - offset)
    698 			len = so->so_oobmark - offset;
    699 		if (len > m->m_len - moff)
    700 			len = m->m_len - moff;
    701 		/*
    702 		 * If mp is set, just pass back the mbufs.
    703 		 * Otherwise copy them out via the uio, then free.
    704 		 * Sockbuf must be consistent here (points to current mbuf,
    705 		 * it points to next record) when we drop priority;
    706 		 * we must note any additions to the sockbuf when we
    707 		 * block interrupts again.
    708 		 */
    709 		if (mp == 0) {
    710 			splx(s);
    711 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
    712 			s = splsoftnet();
    713 		} else
    714 			uio->uio_resid -= len;
    715 		if (len == m->m_len - moff) {
    716 			if (m->m_flags & M_EOR)
    717 				flags |= MSG_EOR;
    718 			if (flags & MSG_PEEK) {
    719 				m = m->m_next;
    720 				moff = 0;
    721 			} else {
    722 				nextrecord = m->m_nextpkt;
    723 				sbfree(&so->so_rcv, m);
    724 				if (mp) {
    725 					*mp = m;
    726 					mp = &m->m_next;
    727 					so->so_rcv.sb_mb = m = m->m_next;
    728 					*mp = (struct mbuf *)0;
    729 				} else {
    730 					MFREE(m, so->so_rcv.sb_mb);
    731 					m = so->so_rcv.sb_mb;
    732 				}
    733 				if (m)
    734 					m->m_nextpkt = nextrecord;
    735 			}
    736 		} else {
    737 			if (flags & MSG_PEEK)
    738 				moff += len;
    739 			else {
    740 				if (mp)
    741 					*mp = m_copym(m, 0, len, M_WAIT);
    742 				m->m_data += len;
    743 				m->m_len -= len;
    744 				so->so_rcv.sb_cc -= len;
    745 			}
    746 		}
    747 		if (so->so_oobmark) {
    748 			if ((flags & MSG_PEEK) == 0) {
    749 				so->so_oobmark -= len;
    750 				if (so->so_oobmark == 0) {
    751 					so->so_state |= SS_RCVATMARK;
    752 					break;
    753 				}
    754 			} else {
    755 				offset += len;
    756 				if (offset == so->so_oobmark)
    757 					break;
    758 			}
    759 		}
    760 		if (flags & MSG_EOR)
    761 			break;
    762 		/*
    763 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
    764 		 * we must not quit until "uio->uio_resid == 0" or an error
    765 		 * termination.  If a signal/timeout occurs, return
    766 		 * with a short count but without error.
    767 		 * Keep sockbuf locked against other readers.
    768 		 */
    769 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
    770 		    !sosendallatonce(so) && !nextrecord) {
    771 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
    772 				break;
    773 			error = sbwait(&so->so_rcv);
    774 			if (error) {
    775 				sbunlock(&so->so_rcv);
    776 				splx(s);
    777 				return (0);
    778 			}
    779 			if ((m = so->so_rcv.sb_mb) != NULL)
    780 				nextrecord = m->m_nextpkt;
    781 		}
    782 	}
    783 
    784 	if (m && pr->pr_flags & PR_ATOMIC) {
    785 		flags |= MSG_TRUNC;
    786 		if ((flags & MSG_PEEK) == 0)
    787 			(void) sbdroprecord(&so->so_rcv);
    788 	}
    789 	if ((flags & MSG_PEEK) == 0) {
    790 		if (m == 0)
    791 			so->so_rcv.sb_mb = nextrecord;
    792 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
    793 			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
    794 			    (struct mbuf *)(long)flags, (struct mbuf *)0,
    795 			    (struct proc *)0);
    796 	}
    797 	if (orig_resid == uio->uio_resid && orig_resid &&
    798 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
    799 		sbunlock(&so->so_rcv);
    800 		splx(s);
    801 		goto restart;
    802 	}
    803 
    804 	if (flagsp)
    805 		*flagsp |= flags;
    806 release:
    807 	sbunlock(&so->so_rcv);
    808 	splx(s);
    809 	return (error);
    810 }
    811 
    812 int
    813 soshutdown(so, how)
    814 	struct socket *so;
    815 	int how;
    816 {
    817 	struct protosw *pr = so->so_proto;
    818 
    819 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
    820 		return (EINVAL);
    821 
    822 	if (how == SHUT_RD || how == SHUT_RDWR)
    823 		sorflush(so);
    824 	if (how == SHUT_WR || how == SHUT_RDWR)
    825 		return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
    826 		    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    827 	return (0);
    828 }
    829 
    830 void
    831 sorflush(so)
    832 	register struct socket *so;
    833 {
    834 	register struct sockbuf *sb = &so->so_rcv;
    835 	register struct protosw *pr = so->so_proto;
    836 	register int s;
    837 	struct sockbuf asb;
    838 
    839 	sb->sb_flags |= SB_NOINTR;
    840 	(void) sblock(sb, M_WAITOK);
    841 	s = splimp();
    842 	socantrcvmore(so);
    843 	sbunlock(sb);
    844 	asb = *sb;
    845 	bzero((caddr_t)sb, sizeof (*sb));
    846 	splx(s);
    847 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
    848 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
    849 	sbrelease(&asb);
    850 }
    851 
    852 int
    853 sosetopt(so, level, optname, m0)
    854 	register struct socket *so;
    855 	int level, optname;
    856 	struct mbuf *m0;
    857 {
    858 	int error = 0;
    859 	register struct mbuf *m = m0;
    860 
    861 	if (level != SOL_SOCKET) {
    862 		if (so->so_proto && so->so_proto->pr_ctloutput)
    863 			return ((*so->so_proto->pr_ctloutput)
    864 				  (PRCO_SETOPT, so, level, optname, &m0));
    865 		error = ENOPROTOOPT;
    866 	} else {
    867 		switch (optname) {
    868 
    869 		case SO_LINGER:
    870 			if (m == NULL || m->m_len != sizeof (struct linger)) {
    871 				error = EINVAL;
    872 				goto bad;
    873 			}
    874 			so->so_linger = mtod(m, struct linger *)->l_linger;
    875 			/* fall thru... */
    876 
    877 		case SO_DEBUG:
    878 		case SO_KEEPALIVE:
    879 		case SO_DONTROUTE:
    880 		case SO_USELOOPBACK:
    881 		case SO_BROADCAST:
    882 		case SO_REUSEADDR:
    883 		case SO_REUSEPORT:
    884 		case SO_OOBINLINE:
    885 		case SO_TIMESTAMP:
    886 			if (m == NULL || m->m_len < sizeof (int)) {
    887 				error = EINVAL;
    888 				goto bad;
    889 			}
    890 			if (*mtod(m, int *))
    891 				so->so_options |= optname;
    892 			else
    893 				so->so_options &= ~optname;
    894 			break;
    895 
    896 		case SO_SNDBUF:
    897 		case SO_RCVBUF:
    898 		case SO_SNDLOWAT:
    899 		case SO_RCVLOWAT:
    900 		    {
    901 			int optval;
    902 
    903 			if (m == NULL || m->m_len < sizeof (int)) {
    904 				error = EINVAL;
    905 				goto bad;
    906 			}
    907 
    908 			/*
    909 			 * Values < 1 make no sense for any of these
    910 			 * options, so disallow them.
    911 			 */
    912 			optval = *mtod(m, int *);
    913 			if (optval < 1) {
    914 				error = EINVAL;
    915 				goto bad;
    916 			}
    917 
    918 			switch (optname) {
    919 
    920 			case SO_SNDBUF:
    921 			case SO_RCVBUF:
    922 				if (sbreserve(optname == SO_SNDBUF ?
    923 				    &so->so_snd : &so->so_rcv,
    924 				    (u_long) optval) == 0) {
    925 					error = ENOBUFS;
    926 					goto bad;
    927 				}
    928 				break;
    929 
    930 			/*
    931 			 * Make sure the low-water is never greater than
    932 			 * the high-water.
    933 			 */
    934 			case SO_SNDLOWAT:
    935 				so->so_snd.sb_lowat =
    936 				    (optval > so->so_snd.sb_hiwat) ?
    937 				    so->so_snd.sb_hiwat : optval;
    938 				break;
    939 			case SO_RCVLOWAT:
    940 				so->so_rcv.sb_lowat =
    941 				    (optval > so->so_rcv.sb_hiwat) ?
    942 				    so->so_rcv.sb_hiwat : optval;
    943 				break;
    944 			}
    945 			break;
    946 		    }
    947 
    948 		case SO_SNDTIMEO:
    949 		case SO_RCVTIMEO:
    950 		    {
    951 			struct timeval *tv;
    952 			short val;
    953 
    954 			if (m == NULL || m->m_len < sizeof (*tv)) {
    955 				error = EINVAL;
    956 				goto bad;
    957 			}
    958 			tv = mtod(m, struct timeval *);
    959 			if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
    960 				error = EDOM;
    961 				goto bad;
    962 			}
    963 			val = tv->tv_sec * hz + tv->tv_usec / tick;
    964 
    965 			switch (optname) {
    966 
    967 			case SO_SNDTIMEO:
    968 				so->so_snd.sb_timeo = val;
    969 				break;
    970 			case SO_RCVTIMEO:
    971 				so->so_rcv.sb_timeo = val;
    972 				break;
    973 			}
    974 			break;
    975 		    }
    976 
    977 		default:
    978 			error = ENOPROTOOPT;
    979 			break;
    980 		}
    981 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
    982 			(void) ((*so->so_proto->pr_ctloutput)
    983 				  (PRCO_SETOPT, so, level, optname, &m0));
    984 			m = NULL;	/* freed by protocol */
    985 		}
    986 	}
    987 bad:
    988 	if (m)
    989 		(void) m_free(m);
    990 	return (error);
    991 }
    992 
    993 int
    994 sogetopt(so, level, optname, mp)
    995 	register struct socket *so;
    996 	int level, optname;
    997 	struct mbuf **mp;
    998 {
    999 	register struct mbuf *m;
   1000 
   1001 	if (level != SOL_SOCKET) {
   1002 		if (so->so_proto && so->so_proto->pr_ctloutput) {
   1003 			return ((*so->so_proto->pr_ctloutput)
   1004 				  (PRCO_GETOPT, so, level, optname, mp));
   1005 		} else
   1006 			return (ENOPROTOOPT);
   1007 	} else {
   1008 		m = m_get(M_WAIT, MT_SOOPTS);
   1009 		m->m_len = sizeof (int);
   1010 
   1011 		switch (optname) {
   1012 
   1013 		case SO_LINGER:
   1014 			m->m_len = sizeof (struct linger);
   1015 			mtod(m, struct linger *)->l_onoff =
   1016 				so->so_options & SO_LINGER;
   1017 			mtod(m, struct linger *)->l_linger = so->so_linger;
   1018 			break;
   1019 
   1020 		case SO_USELOOPBACK:
   1021 		case SO_DONTROUTE:
   1022 		case SO_DEBUG:
   1023 		case SO_KEEPALIVE:
   1024 		case SO_REUSEADDR:
   1025 		case SO_REUSEPORT:
   1026 		case SO_BROADCAST:
   1027 		case SO_OOBINLINE:
   1028 		case SO_TIMESTAMP:
   1029 			*mtod(m, int *) = so->so_options & optname;
   1030 			break;
   1031 
   1032 		case SO_TYPE:
   1033 			*mtod(m, int *) = so->so_type;
   1034 			break;
   1035 
   1036 		case SO_ERROR:
   1037 			*mtod(m, int *) = so->so_error;
   1038 			so->so_error = 0;
   1039 			break;
   1040 
   1041 		case SO_SNDBUF:
   1042 			*mtod(m, int *) = so->so_snd.sb_hiwat;
   1043 			break;
   1044 
   1045 		case SO_RCVBUF:
   1046 			*mtod(m, int *) = so->so_rcv.sb_hiwat;
   1047 			break;
   1048 
   1049 		case SO_SNDLOWAT:
   1050 			*mtod(m, int *) = so->so_snd.sb_lowat;
   1051 			break;
   1052 
   1053 		case SO_RCVLOWAT:
   1054 			*mtod(m, int *) = so->so_rcv.sb_lowat;
   1055 			break;
   1056 
   1057 		case SO_SNDTIMEO:
   1058 		case SO_RCVTIMEO:
   1059 		    {
   1060 			int val = (optname == SO_SNDTIMEO ?
   1061 			     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
   1062 
   1063 			m->m_len = sizeof(struct timeval);
   1064 			mtod(m, struct timeval *)->tv_sec = val / hz;
   1065 			mtod(m, struct timeval *)->tv_usec =
   1066 			    (val % hz) * tick;
   1067 			break;
   1068 		    }
   1069 
   1070 		default:
   1071 			(void)m_free(m);
   1072 			return (ENOPROTOOPT);
   1073 		}
   1074 		*mp = m;
   1075 		return (0);
   1076 	}
   1077 }
   1078 
   1079 void
   1080 sohasoutofband(so)
   1081 	register struct socket *so;
   1082 {
   1083 	struct proc *p;
   1084 
   1085 	if (so->so_pgid < 0)
   1086 		gsignal(-so->so_pgid, SIGURG);
   1087 	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
   1088 		psignal(p, SIGURG);
   1089 	selwakeup(&so->so_rcv.sb_sel);
   1090 }
   1091