Home | History | Annotate | Line # | Download | only in kern
uipc_socket.c revision 1.38
      1 /*	$NetBSD: uipc_socket.c,v 1.38 1998/08/04 04:03:17 perry Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)uipc_socket.c	8.6 (Berkeley) 5/2/95
     36  */
     37 
     38 #include "opt_compat_sunos.h"
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/proc.h>
     43 #include <sys/file.h>
     44 #include <sys/malloc.h>
     45 #include <sys/mbuf.h>
     46 #include <sys/domain.h>
     47 #include <sys/kernel.h>
     48 #include <sys/protosw.h>
     49 #include <sys/socket.h>
     50 #include <sys/socketvar.h>
     51 #include <sys/signalvar.h>
     52 #include <sys/resourcevar.h>
     53 #include <sys/pool.h>
     54 
     55 struct pool socket_pool;
     56 
     57 void
     58 soinit()
     59 {
     60 
     61 	pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
     62 	    "sockpl", 0, NULL, NULL, M_SOCKET);
     63 }
     64 
     65 /*
     66  * Socket operation routines.
     67  * These routines are called by the routines in
     68  * sys_socket.c or from a system process, and
     69  * implement the semantics of socket operations by
     70  * switching out to the protocol specific routines.
     71  */
     72 /*ARGSUSED*/
     73 int
     74 socreate(dom, aso, type, proto)
     75 	int dom;
     76 	struct socket **aso;
     77 	register int type;
     78 	int proto;
     79 {
     80 	struct proc *p = curproc;		/* XXX */
     81 	register struct protosw *prp;
     82 	register struct socket *so;
     83 	register int error;
     84 
     85 	if (proto)
     86 		prp = pffindproto(dom, proto, type);
     87 	else
     88 		prp = pffindtype(dom, type);
     89 	if (prp == 0 || prp->pr_usrreq == 0)
     90 		return (EPROTONOSUPPORT);
     91 	if (prp->pr_type != type)
     92 		return (EPROTOTYPE);
     93 	so = pool_get(&socket_pool, PR_WAITOK);
     94 	memset((caddr_t)so, 0, sizeof(*so));
     95 	TAILQ_INIT(&so->so_q0);
     96 	TAILQ_INIT(&so->so_q);
     97 	so->so_type = type;
     98 	so->so_proto = prp;
     99 	so->so_send = sosend;
    100 	so->so_receive = soreceive;
    101 	error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
    102 	    (struct mbuf *)(long)proto, (struct mbuf *)0, p);
    103 	if (error) {
    104 		so->so_state |= SS_NOFDREF;
    105 		sofree(so);
    106 		return (error);
    107 	}
    108 #ifdef COMPAT_SUNOS
    109 	{
    110 		extern struct emul emul_sunos;
    111 		if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
    112 			so->so_options |= SO_BROADCAST;
    113 	}
    114 #endif
    115 	*aso = so;
    116 	return (0);
    117 }
    118 
    119 int
    120 sobind(so, nam)
    121 	struct socket *so;
    122 	struct mbuf *nam;
    123 {
    124 	struct proc *p = curproc;		/* XXX */
    125 	int s = splsoftnet();
    126 	int error;
    127 
    128 	error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
    129 	    nam, (struct mbuf *)0, p);
    130 	splx(s);
    131 	return (error);
    132 }
    133 
    134 int
    135 solisten(so, backlog)
    136 	register struct socket *so;
    137 	int backlog;
    138 {
    139 	int s = splsoftnet(), error;
    140 
    141 	error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
    142 	    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    143 	if (error) {
    144 		splx(s);
    145 		return (error);
    146 	}
    147 	if (so->so_q.tqh_first == NULL)
    148 		so->so_options |= SO_ACCEPTCONN;
    149 	if (backlog < 0)
    150 		backlog = 0;
    151 	so->so_qlimit = min(backlog, SOMAXCONN);
    152 	splx(s);
    153 	return (0);
    154 }
    155 
    156 void
    157 sofree(so)
    158 	register struct socket *so;
    159 {
    160 
    161 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
    162 		return;
    163 	if (so->so_head) {
    164 		if (!soqremque(so, 0) && !soqremque(so, 1))
    165 			panic("sofree dq");
    166 		so->so_head = 0;
    167 	}
    168 	sbrelease(&so->so_snd);
    169 	sorflush(so);
    170 	pool_put(&socket_pool, so);
    171 }
    172 
    173 /*
    174  * Close a socket on last file table reference removal.
    175  * Initiate disconnect if connected.
    176  * Free socket when disconnect complete.
    177  */
    178 int
    179 soclose(so)
    180 	register struct socket *so;
    181 {
    182 	int s = splsoftnet();		/* conservative */
    183 	int error = 0;
    184 
    185 	if (so->so_options & SO_ACCEPTCONN) {
    186 		while (so->so_q0.tqh_first)
    187 			(void) soabort(so->so_q0.tqh_first);
    188 		while (so->so_q.tqh_first)
    189 			(void) soabort(so->so_q.tqh_first);
    190 	}
    191 	if (so->so_pcb == 0)
    192 		goto discard;
    193 	if (so->so_state & SS_ISCONNECTED) {
    194 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
    195 			error = sodisconnect(so);
    196 			if (error)
    197 				goto drop;
    198 		}
    199 		if (so->so_options & SO_LINGER) {
    200 			if ((so->so_state & SS_ISDISCONNECTING) &&
    201 			    (so->so_state & SS_NBIO))
    202 				goto drop;
    203 			while (so->so_state & SS_ISCONNECTED) {
    204 				error = tsleep((caddr_t)&so->so_timeo,
    205 					       PSOCK | PCATCH, netcls,
    206 					       so->so_linger * hz);
    207 				if (error)
    208 					break;
    209 			}
    210 		}
    211 	}
    212 drop:
    213 	if (so->so_pcb) {
    214 		int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
    215 		    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
    216 		    (struct proc *)0);
    217 		if (error == 0)
    218 			error = error2;
    219 	}
    220 discard:
    221 	if (so->so_state & SS_NOFDREF)
    222 		panic("soclose: NOFDREF");
    223 	so->so_state |= SS_NOFDREF;
    224 	sofree(so);
    225 	splx(s);
    226 	return (error);
    227 }
    228 
    229 /*
    230  * Must be called at splsoftnet...
    231  */
    232 int
    233 soabort(so)
    234 	struct socket *so;
    235 {
    236 
    237 	return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
    238 	    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    239 }
    240 
    241 int
    242 soaccept(so, nam)
    243 	register struct socket *so;
    244 	struct mbuf *nam;
    245 {
    246 	int s = splsoftnet();
    247 	int error;
    248 
    249 	if ((so->so_state & SS_NOFDREF) == 0)
    250 		panic("soaccept: !NOFDREF");
    251 	so->so_state &= ~SS_NOFDREF;
    252 	error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0,
    253 	    nam, (struct mbuf *)0, (struct proc *)0);
    254 	splx(s);
    255 	return (error);
    256 }
    257 
    258 int
    259 soconnect(so, nam)
    260 	register struct socket *so;
    261 	struct mbuf *nam;
    262 {
    263 	struct proc *p = curproc;		/* XXX */
    264 	int s;
    265 	int error;
    266 
    267 	if (so->so_options & SO_ACCEPTCONN)
    268 		return (EOPNOTSUPP);
    269 	s = splsoftnet();
    270 	/*
    271 	 * If protocol is connection-based, can only connect once.
    272 	 * Otherwise, if connected, try to disconnect first.
    273 	 * This allows user to disconnect by connecting to, e.g.,
    274 	 * a null address.
    275 	 */
    276 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
    277 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
    278 	    (error = sodisconnect(so))))
    279 		error = EISCONN;
    280 	else
    281 		error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
    282 		    (struct mbuf *)0, nam, (struct mbuf *)0, p);
    283 	splx(s);
    284 	return (error);
    285 }
    286 
    287 int
    288 soconnect2(so1, so2)
    289 	register struct socket *so1;
    290 	struct socket *so2;
    291 {
    292 	int s = splsoftnet();
    293 	int error;
    294 
    295 	error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
    296 	    (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
    297 	    (struct proc *)0);
    298 	splx(s);
    299 	return (error);
    300 }
    301 
    302 int
    303 sodisconnect(so)
    304 	register struct socket *so;
    305 {
    306 	int s = splsoftnet();
    307 	int error;
    308 
    309 	if ((so->so_state & SS_ISCONNECTED) == 0) {
    310 		error = ENOTCONN;
    311 		goto bad;
    312 	}
    313 	if (so->so_state & SS_ISDISCONNECTING) {
    314 		error = EALREADY;
    315 		goto bad;
    316 	}
    317 	error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
    318 	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
    319 	    (struct proc *)0);
    320 bad:
    321 	splx(s);
    322 	return (error);
    323 }
    324 
    325 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
    326 /*
    327  * Send on a socket.
    328  * If send must go all at once and message is larger than
    329  * send buffering, then hard error.
    330  * Lock against other senders.
    331  * If must go all at once and not enough room now, then
    332  * inform user that this would block and do nothing.
    333  * Otherwise, if nonblocking, send as much as possible.
    334  * The data to be sent is described by "uio" if nonzero,
    335  * otherwise by the mbuf chain "top" (which must be null
    336  * if uio is not).  Data provided in mbuf chain must be small
    337  * enough to send all at once.
    338  *
    339  * Returns nonzero on error, timeout or signal; callers
    340  * must check for short counts if EINTR/ERESTART are returned.
    341  * Data and control buffers are freed on return.
    342  */
    343 int
    344 sosend(so, addr, uio, top, control, flags)
    345 	register struct socket *so;
    346 	struct mbuf *addr;
    347 	struct uio *uio;
    348 	struct mbuf *top;
    349 	struct mbuf *control;
    350 	int flags;
    351 {
    352 	struct proc *p = curproc;		/* XXX */
    353 	struct mbuf **mp;
    354 	register struct mbuf *m;
    355 	register long space, len, resid;
    356 	int clen = 0, error, s, dontroute, mlen;
    357 	int atomic = sosendallatonce(so) || top;
    358 
    359 	if (uio)
    360 		resid = uio->uio_resid;
    361 	else
    362 		resid = top->m_pkthdr.len;
    363 	/*
    364 	 * In theory resid should be unsigned.
    365 	 * However, space must be signed, as it might be less than 0
    366 	 * if we over-committed, and we must use a signed comparison
    367 	 * of space and resid.  On the other hand, a negative resid
    368 	 * causes us to loop sending 0-length segments to the protocol.
    369 	 */
    370 	if (resid < 0) {
    371 		error = EINVAL;
    372 		goto out;
    373 	}
    374 	dontroute =
    375 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
    376 	    (so->so_proto->pr_flags & PR_ATOMIC);
    377 	p->p_stats->p_ru.ru_msgsnd++;
    378 	if (control)
    379 		clen = control->m_len;
    380 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
    381 
    382 restart:
    383 	if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
    384 		goto out;
    385 	do {
    386 		s = splsoftnet();
    387 		if (so->so_state & SS_CANTSENDMORE)
    388 			snderr(EPIPE);
    389 		if (so->so_error)
    390 			snderr(so->so_error);
    391 		if ((so->so_state & SS_ISCONNECTED) == 0) {
    392 			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    393 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
    394 				    !(resid == 0 && clen != 0))
    395 					snderr(ENOTCONN);
    396 			} else if (addr == 0)
    397 				snderr(EDESTADDRREQ);
    398 		}
    399 		space = sbspace(&so->so_snd);
    400 		if (flags & MSG_OOB)
    401 			space += 1024;
    402 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
    403 		    clen > so->so_snd.sb_hiwat)
    404 			snderr(EMSGSIZE);
    405 		if (space < resid + clen && uio &&
    406 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
    407 			if (so->so_state & SS_NBIO)
    408 				snderr(EWOULDBLOCK);
    409 			sbunlock(&so->so_snd);
    410 			error = sbwait(&so->so_snd);
    411 			splx(s);
    412 			if (error)
    413 				goto out;
    414 			goto restart;
    415 		}
    416 		splx(s);
    417 		mp = &top;
    418 		space -= clen;
    419 		do {
    420 		    if (uio == NULL) {
    421 			/*
    422 			 * Data is prepackaged in "top".
    423 			 */
    424 			resid = 0;
    425 			if (flags & MSG_EOR)
    426 				top->m_flags |= M_EOR;
    427 		    } else do {
    428 			if (top == 0) {
    429 				MGETHDR(m, M_WAIT, MT_DATA);
    430 				mlen = MHLEN;
    431 				m->m_pkthdr.len = 0;
    432 				m->m_pkthdr.rcvif = (struct ifnet *)0;
    433 			} else {
    434 				MGET(m, M_WAIT, MT_DATA);
    435 				mlen = MLEN;
    436 			}
    437 			if (resid >= MINCLSIZE && space >= MCLBYTES) {
    438 				MCLGET(m, M_WAIT);
    439 				if ((m->m_flags & M_EXT) == 0)
    440 					goto nopages;
    441 				mlen = MCLBYTES;
    442 #ifdef	MAPPED_MBUFS
    443 				len = min(MCLBYTES, resid);
    444 #else
    445 				if (atomic && top == 0) {
    446 					len = min(MCLBYTES - max_hdr, resid);
    447 					m->m_data += max_hdr;
    448 				} else
    449 					len = min(MCLBYTES, resid);
    450 #endif
    451 				space -= len;
    452 			} else {
    453 nopages:
    454 				len = min(min(mlen, resid), space);
    455 				space -= len;
    456 				/*
    457 				 * For datagram protocols, leave room
    458 				 * for protocol headers in first mbuf.
    459 				 */
    460 				if (atomic && top == 0 && len < mlen)
    461 					MH_ALIGN(m, len);
    462 			}
    463 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
    464 			resid = uio->uio_resid;
    465 			m->m_len = len;
    466 			*mp = m;
    467 			top->m_pkthdr.len += len;
    468 			if (error)
    469 				goto release;
    470 			mp = &m->m_next;
    471 			if (resid <= 0) {
    472 				if (flags & MSG_EOR)
    473 					top->m_flags |= M_EOR;
    474 				break;
    475 			}
    476 		    } while (space > 0 && atomic);
    477 		    if (dontroute)
    478 			    so->so_options |= SO_DONTROUTE;
    479 		    s = splsoftnet();				/* XXX */
    480 		    error = (*so->so_proto->pr_usrreq)(so,
    481 			(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
    482 			top, addr, control, p);
    483 		    splx(s);
    484 		    if (dontroute)
    485 			    so->so_options &= ~SO_DONTROUTE;
    486 		    clen = 0;
    487 		    control = 0;
    488 		    top = 0;
    489 		    mp = &top;
    490 		    if (error)
    491 			goto release;
    492 		} while (resid && space > 0);
    493 	} while (resid);
    494 
    495 release:
    496 	sbunlock(&so->so_snd);
    497 out:
    498 	if (top)
    499 		m_freem(top);
    500 	if (control)
    501 		m_freem(control);
    502 	return (error);
    503 }
    504 
    505 /*
    506  * Implement receive operations on a socket.
    507  * We depend on the way that records are added to the sockbuf
    508  * by sbappend*.  In particular, each record (mbufs linked through m_next)
    509  * must begin with an address if the protocol so specifies,
    510  * followed by an optional mbuf or mbufs containing ancillary data,
    511  * and then zero or more mbufs of data.
    512  * In order to avoid blocking network interrupts for the entire time here,
    513  * we splx() while doing the actual copy to user space.
    514  * Although the sockbuf is locked, new data may still be appended,
    515  * and thus we must maintain consistency of the sockbuf during that time.
    516  *
    517  * The caller may receive the data as a single mbuf chain by supplying
    518  * an mbuf **mp0 for use in returning the chain.  The uio is then used
    519  * only for the count in uio_resid.
    520  */
    521 int
    522 soreceive(so, paddr, uio, mp0, controlp, flagsp)
    523 	register struct socket *so;
    524 	struct mbuf **paddr;
    525 	struct uio *uio;
    526 	struct mbuf **mp0;
    527 	struct mbuf **controlp;
    528 	int *flagsp;
    529 {
    530 	register struct mbuf *m, **mp;
    531 	register int flags, len, error, s, offset;
    532 	struct protosw *pr = so->so_proto;
    533 	struct mbuf *nextrecord;
    534 	int moff, type = 0;
    535 	int orig_resid = uio->uio_resid;
    536 
    537 	mp = mp0;
    538 	if (paddr)
    539 		*paddr = 0;
    540 	if (controlp)
    541 		*controlp = 0;
    542 	if (flagsp)
    543 		flags = *flagsp &~ MSG_EOR;
    544 	else
    545 		flags = 0;
    546 	if (flags & MSG_OOB) {
    547 		m = m_get(M_WAIT, MT_DATA);
    548 		error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
    549 		    (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
    550 		    (struct proc *)0);
    551 		if (error)
    552 			goto bad;
    553 		do {
    554 			error = uiomove(mtod(m, caddr_t),
    555 			    (int) min(uio->uio_resid, m->m_len), uio);
    556 			m = m_free(m);
    557 		} while (uio->uio_resid && error == 0 && m);
    558 bad:
    559 		if (m)
    560 			m_freem(m);
    561 		return (error);
    562 	}
    563 	if (mp)
    564 		*mp = (struct mbuf *)0;
    565 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
    566 		(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
    567 		    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    568 
    569 restart:
    570 	if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
    571 		return (error);
    572 	s = splsoftnet();
    573 
    574 	m = so->so_rcv.sb_mb;
    575 	/*
    576 	 * If we have less data than requested, block awaiting more
    577 	 * (subject to any timeout) if:
    578 	 *   1. the current count is less than the low water mark,
    579 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
    580 	 *	receive operation at once if we block (resid <= hiwat), or
    581 	 *   3. MSG_DONTWAIT is not set.
    582 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
    583 	 * we have to do the receive in sections, and thus risk returning
    584 	 * a short count if a timeout or signal occurs after we start.
    585 	 */
    586 	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
    587 	    so->so_rcv.sb_cc < uio->uio_resid) &&
    588 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
    589 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
    590 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
    591 #ifdef DIAGNOSTIC
    592 		if (m == 0 && so->so_rcv.sb_cc)
    593 			panic("receive 1");
    594 #endif
    595 		if (so->so_error) {
    596 			if (m)
    597 				goto dontblock;
    598 			error = so->so_error;
    599 			if ((flags & MSG_PEEK) == 0)
    600 				so->so_error = 0;
    601 			goto release;
    602 		}
    603 		if (so->so_state & SS_CANTRCVMORE) {
    604 			if (m)
    605 				goto dontblock;
    606 			else
    607 				goto release;
    608 		}
    609 		for (; m; m = m->m_next)
    610 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
    611 				m = so->so_rcv.sb_mb;
    612 				goto dontblock;
    613 			}
    614 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
    615 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
    616 			error = ENOTCONN;
    617 			goto release;
    618 		}
    619 		if (uio->uio_resid == 0)
    620 			goto release;
    621 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
    622 			error = EWOULDBLOCK;
    623 			goto release;
    624 		}
    625 		sbunlock(&so->so_rcv);
    626 		error = sbwait(&so->so_rcv);
    627 		splx(s);
    628 		if (error)
    629 			return (error);
    630 		goto restart;
    631 	}
    632 dontblock:
    633 #ifdef notyet /* XXXX */
    634 	if (uio->uio_procp)
    635 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
    636 #endif
    637 	nextrecord = m->m_nextpkt;
    638 	if (pr->pr_flags & PR_ADDR) {
    639 #ifdef DIAGNOSTIC
    640 		if (m->m_type != MT_SONAME)
    641 			panic("receive 1a");
    642 #endif
    643 		orig_resid = 0;
    644 		if (flags & MSG_PEEK) {
    645 			if (paddr)
    646 				*paddr = m_copy(m, 0, m->m_len);
    647 			m = m->m_next;
    648 		} else {
    649 			sbfree(&so->so_rcv, m);
    650 			if (paddr) {
    651 				*paddr = m;
    652 				so->so_rcv.sb_mb = m->m_next;
    653 				m->m_next = 0;
    654 				m = so->so_rcv.sb_mb;
    655 			} else {
    656 				MFREE(m, so->so_rcv.sb_mb);
    657 				m = so->so_rcv.sb_mb;
    658 			}
    659 		}
    660 	}
    661 	while (m && m->m_type == MT_CONTROL && error == 0) {
    662 		if (flags & MSG_PEEK) {
    663 			if (controlp)
    664 				*controlp = m_copy(m, 0, m->m_len);
    665 			m = m->m_next;
    666 		} else {
    667 			sbfree(&so->so_rcv, m);
    668 			if (controlp) {
    669 				if (pr->pr_domain->dom_externalize &&
    670 				    mtod(m, struct cmsghdr *)->cmsg_type ==
    671 				    SCM_RIGHTS)
    672 				   error = (*pr->pr_domain->dom_externalize)(m);
    673 				*controlp = m;
    674 				so->so_rcv.sb_mb = m->m_next;
    675 				m->m_next = 0;
    676 				m = so->so_rcv.sb_mb;
    677 			} else {
    678 				MFREE(m, so->so_rcv.sb_mb);
    679 				m = so->so_rcv.sb_mb;
    680 			}
    681 		}
    682 		if (controlp) {
    683 			orig_resid = 0;
    684 			controlp = &(*controlp)->m_next;
    685 		}
    686 	}
    687 	if (m) {
    688 		if ((flags & MSG_PEEK) == 0)
    689 			m->m_nextpkt = nextrecord;
    690 		type = m->m_type;
    691 		if (type == MT_OOBDATA)
    692 			flags |= MSG_OOB;
    693 	}
    694 	moff = 0;
    695 	offset = 0;
    696 	while (m && uio->uio_resid > 0 && error == 0) {
    697 		if (m->m_type == MT_OOBDATA) {
    698 			if (type != MT_OOBDATA)
    699 				break;
    700 		} else if (type == MT_OOBDATA)
    701 			break;
    702 #ifdef DIAGNOSTIC
    703 		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
    704 			panic("receive 3");
    705 #endif
    706 		so->so_state &= ~SS_RCVATMARK;
    707 		len = uio->uio_resid;
    708 		if (so->so_oobmark && len > so->so_oobmark - offset)
    709 			len = so->so_oobmark - offset;
    710 		if (len > m->m_len - moff)
    711 			len = m->m_len - moff;
    712 		/*
    713 		 * If mp is set, just pass back the mbufs.
    714 		 * Otherwise copy them out via the uio, then free.
    715 		 * Sockbuf must be consistent here (points to current mbuf,
    716 		 * it points to next record) when we drop priority;
    717 		 * we must note any additions to the sockbuf when we
    718 		 * block interrupts again.
    719 		 */
    720 		if (mp == 0) {
    721 			splx(s);
    722 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
    723 			s = splsoftnet();
    724 		} else
    725 			uio->uio_resid -= len;
    726 		if (len == m->m_len - moff) {
    727 			if (m->m_flags & M_EOR)
    728 				flags |= MSG_EOR;
    729 			if (flags & MSG_PEEK) {
    730 				m = m->m_next;
    731 				moff = 0;
    732 			} else {
    733 				nextrecord = m->m_nextpkt;
    734 				sbfree(&so->so_rcv, m);
    735 				if (mp) {
    736 					*mp = m;
    737 					mp = &m->m_next;
    738 					so->so_rcv.sb_mb = m = m->m_next;
    739 					*mp = (struct mbuf *)0;
    740 				} else {
    741 					MFREE(m, so->so_rcv.sb_mb);
    742 					m = so->so_rcv.sb_mb;
    743 				}
    744 				if (m)
    745 					m->m_nextpkt = nextrecord;
    746 			}
    747 		} else {
    748 			if (flags & MSG_PEEK)
    749 				moff += len;
    750 			else {
    751 				if (mp)
    752 					*mp = m_copym(m, 0, len, M_WAIT);
    753 				m->m_data += len;
    754 				m->m_len -= len;
    755 				so->so_rcv.sb_cc -= len;
    756 			}
    757 		}
    758 		if (so->so_oobmark) {
    759 			if ((flags & MSG_PEEK) == 0) {
    760 				so->so_oobmark -= len;
    761 				if (so->so_oobmark == 0) {
    762 					so->so_state |= SS_RCVATMARK;
    763 					break;
    764 				}
    765 			} else {
    766 				offset += len;
    767 				if (offset == so->so_oobmark)
    768 					break;
    769 			}
    770 		}
    771 		if (flags & MSG_EOR)
    772 			break;
    773 		/*
    774 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
    775 		 * we must not quit until "uio->uio_resid == 0" or an error
    776 		 * termination.  If a signal/timeout occurs, return
    777 		 * with a short count but without error.
    778 		 * Keep sockbuf locked against other readers.
    779 		 */
    780 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
    781 		    !sosendallatonce(so) && !nextrecord) {
    782 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
    783 				break;
    784 			error = sbwait(&so->so_rcv);
    785 			if (error) {
    786 				sbunlock(&so->so_rcv);
    787 				splx(s);
    788 				return (0);
    789 			}
    790 			if ((m = so->so_rcv.sb_mb) != NULL)
    791 				nextrecord = m->m_nextpkt;
    792 		}
    793 	}
    794 
    795 	if (m && pr->pr_flags & PR_ATOMIC) {
    796 		flags |= MSG_TRUNC;
    797 		if ((flags & MSG_PEEK) == 0)
    798 			(void) sbdroprecord(&so->so_rcv);
    799 	}
    800 	if ((flags & MSG_PEEK) == 0) {
    801 		if (m == 0)
    802 			so->so_rcv.sb_mb = nextrecord;
    803 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
    804 			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
    805 			    (struct mbuf *)(long)flags, (struct mbuf *)0,
    806 			    (struct proc *)0);
    807 	}
    808 	if (orig_resid == uio->uio_resid && orig_resid &&
    809 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
    810 		sbunlock(&so->so_rcv);
    811 		splx(s);
    812 		goto restart;
    813 	}
    814 
    815 	if (flagsp)
    816 		*flagsp |= flags;
    817 release:
    818 	sbunlock(&so->so_rcv);
    819 	splx(s);
    820 	return (error);
    821 }
    822 
    823 int
    824 soshutdown(so, how)
    825 	struct socket *so;
    826 	int how;
    827 {
    828 	struct protosw *pr = so->so_proto;
    829 
    830 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
    831 		return (EINVAL);
    832 
    833 	if (how == SHUT_RD || how == SHUT_RDWR)
    834 		sorflush(so);
    835 	if (how == SHUT_WR || how == SHUT_RDWR)
    836 		return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
    837 		    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
    838 	return (0);
    839 }
    840 
    841 void
    842 sorflush(so)
    843 	register struct socket *so;
    844 {
    845 	register struct sockbuf *sb = &so->so_rcv;
    846 	register struct protosw *pr = so->so_proto;
    847 	register int s;
    848 	struct sockbuf asb;
    849 
    850 	sb->sb_flags |= SB_NOINTR;
    851 	(void) sblock(sb, M_WAITOK);
    852 	s = splimp();
    853 	socantrcvmore(so);
    854 	sbunlock(sb);
    855 	asb = *sb;
    856 	memset((caddr_t)sb, 0, sizeof(*sb));
    857 	splx(s);
    858 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
    859 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
    860 	sbrelease(&asb);
    861 }
    862 
    863 int
    864 sosetopt(so, level, optname, m0)
    865 	register struct socket *so;
    866 	int level, optname;
    867 	struct mbuf *m0;
    868 {
    869 	int error = 0;
    870 	register struct mbuf *m = m0;
    871 
    872 	if (level != SOL_SOCKET) {
    873 		if (so->so_proto && so->so_proto->pr_ctloutput)
    874 			return ((*so->so_proto->pr_ctloutput)
    875 				  (PRCO_SETOPT, so, level, optname, &m0));
    876 		error = ENOPROTOOPT;
    877 	} else {
    878 		switch (optname) {
    879 
    880 		case SO_LINGER:
    881 			if (m == NULL || m->m_len != sizeof(struct linger)) {
    882 				error = EINVAL;
    883 				goto bad;
    884 			}
    885 			so->so_linger = mtod(m, struct linger *)->l_linger;
    886 			/* fall thru... */
    887 
    888 		case SO_DEBUG:
    889 		case SO_KEEPALIVE:
    890 		case SO_DONTROUTE:
    891 		case SO_USELOOPBACK:
    892 		case SO_BROADCAST:
    893 		case SO_REUSEADDR:
    894 		case SO_REUSEPORT:
    895 		case SO_OOBINLINE:
    896 		case SO_TIMESTAMP:
    897 			if (m == NULL || m->m_len < sizeof(int)) {
    898 				error = EINVAL;
    899 				goto bad;
    900 			}
    901 			if (*mtod(m, int *))
    902 				so->so_options |= optname;
    903 			else
    904 				so->so_options &= ~optname;
    905 			break;
    906 
    907 		case SO_SNDBUF:
    908 		case SO_RCVBUF:
    909 		case SO_SNDLOWAT:
    910 		case SO_RCVLOWAT:
    911 		    {
    912 			int optval;
    913 
    914 			if (m == NULL || m->m_len < sizeof(int)) {
    915 				error = EINVAL;
    916 				goto bad;
    917 			}
    918 
    919 			/*
    920 			 * Values < 1 make no sense for any of these
    921 			 * options, so disallow them.
    922 			 */
    923 			optval = *mtod(m, int *);
    924 			if (optval < 1) {
    925 				error = EINVAL;
    926 				goto bad;
    927 			}
    928 
    929 			switch (optname) {
    930 
    931 			case SO_SNDBUF:
    932 			case SO_RCVBUF:
    933 				if (sbreserve(optname == SO_SNDBUF ?
    934 				    &so->so_snd : &so->so_rcv,
    935 				    (u_long) optval) == 0) {
    936 					error = ENOBUFS;
    937 					goto bad;
    938 				}
    939 				break;
    940 
    941 			/*
    942 			 * Make sure the low-water is never greater than
    943 			 * the high-water.
    944 			 */
    945 			case SO_SNDLOWAT:
    946 				so->so_snd.sb_lowat =
    947 				    (optval > so->so_snd.sb_hiwat) ?
    948 				    so->so_snd.sb_hiwat : optval;
    949 				break;
    950 			case SO_RCVLOWAT:
    951 				so->so_rcv.sb_lowat =
    952 				    (optval > so->so_rcv.sb_hiwat) ?
    953 				    so->so_rcv.sb_hiwat : optval;
    954 				break;
    955 			}
    956 			break;
    957 		    }
    958 
    959 		case SO_SNDTIMEO:
    960 		case SO_RCVTIMEO:
    961 		    {
    962 			struct timeval *tv;
    963 			short val;
    964 
    965 			if (m == NULL || m->m_len < sizeof(*tv)) {
    966 				error = EINVAL;
    967 				goto bad;
    968 			}
    969 			tv = mtod(m, struct timeval *);
    970 			if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
    971 				error = EDOM;
    972 				goto bad;
    973 			}
    974 			val = tv->tv_sec * hz + tv->tv_usec / tick;
    975 
    976 			switch (optname) {
    977 
    978 			case SO_SNDTIMEO:
    979 				so->so_snd.sb_timeo = val;
    980 				break;
    981 			case SO_RCVTIMEO:
    982 				so->so_rcv.sb_timeo = val;
    983 				break;
    984 			}
    985 			break;
    986 		    }
    987 
    988 		default:
    989 			error = ENOPROTOOPT;
    990 			break;
    991 		}
    992 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
    993 			(void) ((*so->so_proto->pr_ctloutput)
    994 				  (PRCO_SETOPT, so, level, optname, &m0));
    995 			m = NULL;	/* freed by protocol */
    996 		}
    997 	}
    998 bad:
    999 	if (m)
   1000 		(void) m_free(m);
   1001 	return (error);
   1002 }
   1003 
   1004 int
   1005 sogetopt(so, level, optname, mp)
   1006 	register struct socket *so;
   1007 	int level, optname;
   1008 	struct mbuf **mp;
   1009 {
   1010 	register struct mbuf *m;
   1011 
   1012 	if (level != SOL_SOCKET) {
   1013 		if (so->so_proto && so->so_proto->pr_ctloutput) {
   1014 			return ((*so->so_proto->pr_ctloutput)
   1015 				  (PRCO_GETOPT, so, level, optname, mp));
   1016 		} else
   1017 			return (ENOPROTOOPT);
   1018 	} else {
   1019 		m = m_get(M_WAIT, MT_SOOPTS);
   1020 		m->m_len = sizeof(int);
   1021 
   1022 		switch (optname) {
   1023 
   1024 		case SO_LINGER:
   1025 			m->m_len = sizeof(struct linger);
   1026 			mtod(m, struct linger *)->l_onoff =
   1027 				so->so_options & SO_LINGER;
   1028 			mtod(m, struct linger *)->l_linger = so->so_linger;
   1029 			break;
   1030 
   1031 		case SO_USELOOPBACK:
   1032 		case SO_DONTROUTE:
   1033 		case SO_DEBUG:
   1034 		case SO_KEEPALIVE:
   1035 		case SO_REUSEADDR:
   1036 		case SO_REUSEPORT:
   1037 		case SO_BROADCAST:
   1038 		case SO_OOBINLINE:
   1039 		case SO_TIMESTAMP:
   1040 			*mtod(m, int *) = so->so_options & optname;
   1041 			break;
   1042 
   1043 		case SO_TYPE:
   1044 			*mtod(m, int *) = so->so_type;
   1045 			break;
   1046 
   1047 		case SO_ERROR:
   1048 			*mtod(m, int *) = so->so_error;
   1049 			so->so_error = 0;
   1050 			break;
   1051 
   1052 		case SO_SNDBUF:
   1053 			*mtod(m, int *) = so->so_snd.sb_hiwat;
   1054 			break;
   1055 
   1056 		case SO_RCVBUF:
   1057 			*mtod(m, int *) = so->so_rcv.sb_hiwat;
   1058 			break;
   1059 
   1060 		case SO_SNDLOWAT:
   1061 			*mtod(m, int *) = so->so_snd.sb_lowat;
   1062 			break;
   1063 
   1064 		case SO_RCVLOWAT:
   1065 			*mtod(m, int *) = so->so_rcv.sb_lowat;
   1066 			break;
   1067 
   1068 		case SO_SNDTIMEO:
   1069 		case SO_RCVTIMEO:
   1070 		    {
   1071 			int val = (optname == SO_SNDTIMEO ?
   1072 			     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
   1073 
   1074 			m->m_len = sizeof(struct timeval);
   1075 			mtod(m, struct timeval *)->tv_sec = val / hz;
   1076 			mtod(m, struct timeval *)->tv_usec =
   1077 			    (val % hz) * tick;
   1078 			break;
   1079 		    }
   1080 
   1081 		default:
   1082 			(void)m_free(m);
   1083 			return (ENOPROTOOPT);
   1084 		}
   1085 		*mp = m;
   1086 		return (0);
   1087 	}
   1088 }
   1089 
   1090 void
   1091 sohasoutofband(so)
   1092 	register struct socket *so;
   1093 {
   1094 	struct proc *p;
   1095 
   1096 	if (so->so_pgid < 0)
   1097 		gsignal(-so->so_pgid, SIGURG);
   1098 	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
   1099 		psignal(p, SIGURG);
   1100 	selwakeup(&so->so_rcv.sb_sel);
   1101 }
   1102