Home | History | Annotate | Line # | Download | only in kern
uipc_usrreq.c revision 1.20
      1 /*	$NetBSD: uipc_usrreq.c,v 1.20 1996/05/23 16:03:45 mycroft Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
     36  */
     37 
     38 #include <sys/param.h>
     39 #include <sys/systm.h>
     40 #include <sys/proc.h>
     41 #include <sys/filedesc.h>
     42 #include <sys/domain.h>
     43 #include <sys/protosw.h>
     44 #include <sys/socket.h>
     45 #include <sys/socketvar.h>
     46 #include <sys/unpcb.h>
     47 #include <sys/un.h>
     48 #include <sys/namei.h>
     49 #include <sys/vnode.h>
     50 #include <sys/file.h>
     51 #include <sys/stat.h>
     52 #include <sys/mbuf.h>
     53 
     54 /*
     55  * Unix communications domain.
     56  *
     57  * TODO:
     58  *	SEQPACKET, RDM
     59  *	rethink name space problems
     60  *	need a proper out-of-band
     61  */
     62 struct	sockaddr_un sun_noname = { sizeof(sun_noname), AF_UNIX };
     63 ino_t	unp_ino;			/* prototype for fake inode numbers */
     64 
     65 int
     66 unp_output(m, control, unp)
     67 	struct mbuf *m, *control;
     68 	struct unpcb *unp;
     69 {
     70 	struct socket *so2;
     71 	struct sockaddr_un *sun;
     72 
     73 	so2 = unp->unp_conn->unp_socket;
     74 	if (unp->unp_addr)
     75 		sun = unp->unp_addr;
     76 	else
     77 		sun = &sun_noname;
     78 	if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
     79 	    control) == 0) {
     80 		m_freem(control);
     81 		m_freem(m);
     82 		return (EINVAL);
     83 	} else {
     84 		sorwakeup(so2);
     85 		return (0);
     86 	}
     87 }
     88 
     89 void
     90 unp_setsockaddr(unp, nam)
     91 	register struct unpcb *unp;
     92 	struct mbuf *nam;
     93 {
     94 	struct sockaddr_un *sun;
     95 
     96 	if (unp->unp_addr)
     97 		sun = unp->unp_addr;
     98 	else
     99 		sun = &sun_noname;
    100 	nam->m_len = sun->sun_len;
    101 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
    102 }
    103 
    104 void
    105 unp_setpeeraddr(unp, nam)
    106 	register struct unpcb *unp;
    107 	struct mbuf *nam;
    108 {
    109 	struct sockaddr_un *sun;
    110 
    111 	if (unp->unp_conn && unp->unp_conn->unp_addr)
    112 		sun = unp->unp_conn->unp_addr;
    113 	else
    114 		sun = &sun_noname;
    115 	nam->m_len = sun->sun_len;
    116 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
    117 }
    118 
    119 /*ARGSUSED*/
    120 int
    121 uipc_usrreq(so, req, m, nam, control, p)
    122 	struct socket *so;
    123 	int req;
    124 	struct mbuf *m, *nam, *control;
    125 	struct proc *p;
    126 {
    127 	struct unpcb *unp = sotounpcb(so);
    128 	register struct socket *so2;
    129 	register int error = 0;
    130 
    131 	if (req == PRU_CONTROL)
    132 		return (EOPNOTSUPP);
    133 
    134 	if (req != PRU_SEND && control && control->m_len) {
    135 		error = EOPNOTSUPP;
    136 		goto release;
    137 	}
    138 	if (unp == 0 && req != PRU_ATTACH) {
    139 		error = EINVAL;
    140 		goto release;
    141 	}
    142 
    143 	switch (req) {
    144 
    145 	case PRU_ATTACH:
    146 		if (unp != 0) {
    147 			error = EISCONN;
    148 			break;
    149 		}
    150 		error = unp_attach(so);
    151 		break;
    152 
    153 	case PRU_DETACH:
    154 		unp_detach(unp);
    155 		break;
    156 
    157 	case PRU_BIND:
    158 		error = unp_bind(unp, nam, p);
    159 		break;
    160 
    161 	case PRU_LISTEN:
    162 		if (unp->unp_vnode == 0)
    163 			error = EINVAL;
    164 		break;
    165 
    166 	case PRU_CONNECT:
    167 		error = unp_connect(so, nam, p);
    168 		break;
    169 
    170 	case PRU_CONNECT2:
    171 		error = unp_connect2(so, (struct socket *)nam);
    172 		break;
    173 
    174 	case PRU_DISCONNECT:
    175 		unp_disconnect(unp);
    176 		break;
    177 
    178 	case PRU_ACCEPT:
    179 		unp_setpeeraddr(unp, nam);
    180 		break;
    181 
    182 	case PRU_SHUTDOWN:
    183 		socantsendmore(so);
    184 		unp_shutdown(unp);
    185 		break;
    186 
    187 	case PRU_RCVD:
    188 		switch (so->so_type) {
    189 
    190 		case SOCK_DGRAM:
    191 			panic("uipc 1");
    192 			/*NOTREACHED*/
    193 
    194 		case SOCK_STREAM:
    195 #define	rcv (&so->so_rcv)
    196 #define snd (&so2->so_snd)
    197 			if (unp->unp_conn == 0)
    198 				break;
    199 			so2 = unp->unp_conn->unp_socket;
    200 			/*
    201 			 * Adjust backpressure on sender
    202 			 * and wakeup any waiting to write.
    203 			 */
    204 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
    205 			unp->unp_mbcnt = rcv->sb_mbcnt;
    206 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
    207 			unp->unp_cc = rcv->sb_cc;
    208 			sowwakeup(so2);
    209 #undef snd
    210 #undef rcv
    211 			break;
    212 
    213 		default:
    214 			panic("uipc 2");
    215 		}
    216 		break;
    217 
    218 	case PRU_SEND:
    219 		if (control && (error = unp_internalize(control, p)))
    220 			break;
    221 		switch (so->so_type) {
    222 
    223 		case SOCK_DGRAM: {
    224 			if (nam) {
    225 				if ((so->so_state & SS_ISCONNECTED) != 0) {
    226 					m_freem(m);
    227 					error = EISCONN;
    228 					break;
    229 				}
    230 				error = unp_connect(so, nam, p);
    231 				if (error) {
    232 					m_freem(m);
    233 					break;
    234 				}
    235 			} else {
    236 				if ((so->so_state & SS_ISCONNECTED) == 0) {
    237 					m_freem(m);
    238 					error = ENOTCONN;
    239 					break;
    240 				}
    241 			}
    242 			error = unp_output(m, control, unp);
    243 			if (nam)
    244 				unp_disconnect(unp);
    245 			break;
    246 		}
    247 
    248 		case SOCK_STREAM:
    249 #define	rcv (&so2->so_rcv)
    250 #define	snd (&so->so_snd)
    251 			if (so->so_state & SS_CANTSENDMORE) {
    252 				error = EPIPE;
    253 				break;
    254 			}
    255 			if (unp->unp_conn == 0)
    256 				panic("uipc 3");
    257 			so2 = unp->unp_conn->unp_socket;
    258 			/*
    259 			 * Send to paired receive port, and then reduce
    260 			 * send buffer hiwater marks to maintain backpressure.
    261 			 * Wake up readers.
    262 			 */
    263 			if (control) {
    264 				if (sbappendcontrol(rcv, m, control))
    265 					control = 0;
    266 			} else
    267 				sbappend(rcv, m);
    268 			snd->sb_mbmax -=
    269 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
    270 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
    271 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
    272 			unp->unp_conn->unp_cc = rcv->sb_cc;
    273 			sorwakeup(so2);
    274 			m = 0;
    275 #undef snd
    276 #undef rcv
    277 			break;
    278 
    279 		default:
    280 			panic("uipc 4");
    281 		}
    282 		break;
    283 
    284 	case PRU_ABORT:
    285 		unp_drop(unp, ECONNABORTED);
    286 		break;
    287 
    288 	case PRU_SENSE:
    289 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
    290 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
    291 			so2 = unp->unp_conn->unp_socket;
    292 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
    293 		}
    294 		((struct stat *) m)->st_dev = NODEV;
    295 		if (unp->unp_ino == 0)
    296 			unp->unp_ino = unp_ino++;
    297 		((struct stat *) m)->st_ino = unp->unp_ino;
    298 		return (0);
    299 
    300 	case PRU_RCVOOB:
    301 		error = EOPNOTSUPP;
    302 		break;
    303 
    304 	case PRU_SENDOOB:
    305 		m_freem(m);
    306 		error = EOPNOTSUPP;
    307 		break;
    308 
    309 	case PRU_SOCKADDR:
    310 		unp_setsockaddr(unp, nam);
    311 		break;
    312 
    313 	case PRU_PEERADDR:
    314 		unp_setpeeraddr(unp, nam);
    315 		break;
    316 
    317 	default:
    318 		panic("piusrreq");
    319 	}
    320 
    321 release:
    322 	return (error);
    323 }
    324 
    325 /*
    326  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
    327  * for stream sockets, although the total for sender and receiver is
    328  * actually only PIPSIZ.
    329  * Datagram sockets really use the sendspace as the maximum datagram size,
    330  * and don't really want to reserve the sendspace.  Their recvspace should
    331  * be large enough for at least one max-size datagram plus address.
    332  */
    333 #define	PIPSIZ	4096
    334 u_long	unpst_sendspace = PIPSIZ;
    335 u_long	unpst_recvspace = PIPSIZ;
    336 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
    337 u_long	unpdg_recvspace = 4*1024;
    338 
    339 int	unp_rights;			/* file descriptors in flight */
    340 
    341 int
    342 unp_attach(so)
    343 	struct socket *so;
    344 {
    345 	register struct unpcb *unp;
    346 	int error;
    347 
    348 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    349 		switch (so->so_type) {
    350 
    351 		case SOCK_STREAM:
    352 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
    353 			break;
    354 
    355 		case SOCK_DGRAM:
    356 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
    357 			break;
    358 
    359 		default:
    360 			panic("unp_attach");
    361 		}
    362 		if (error)
    363 			return (error);
    364 	}
    365 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
    366 	if (unp == NULL)
    367 		return (ENOBUFS);
    368 	bzero((caddr_t)unp, sizeof(*unp));
    369 	unp->unp_socket = so;
    370 	so->so_pcb = unp;
    371 	return (0);
    372 }
    373 
    374 void
    375 unp_detach(unp)
    376 	register struct unpcb *unp;
    377 {
    378 
    379 	if (unp->unp_vnode) {
    380 		unp->unp_vnode->v_socket = 0;
    381 		vrele(unp->unp_vnode);
    382 		unp->unp_vnode = 0;
    383 	}
    384 	if (unp->unp_conn)
    385 		unp_disconnect(unp);
    386 	while (unp->unp_refs)
    387 		unp_drop(unp->unp_refs, ECONNRESET);
    388 	soisdisconnected(unp->unp_socket);
    389 	unp->unp_socket->so_pcb = 0;
    390 	if (unp->unp_addr)
    391 		m_freem(dtom(unp->unp_addr));
    392 	if (unp_rights) {
    393 		/*
    394 		 * Normally the receive buffer is flushed later,
    395 		 * in sofree, but if our receive buffer holds references
    396 		 * to descriptors that are now garbage, we will dispose
    397 		 * of those descriptor references after the garbage collector
    398 		 * gets them (resulting in a "panic: closef: count < 0").
    399 		 */
    400 		sorflush(unp->unp_socket);
    401 		free(unp, M_PCB);
    402 		unp_gc();
    403 	} else
    404 		free(unp, M_PCB);
    405 }
    406 
    407 int
    408 unp_bind(unp, nam, p)
    409 	struct unpcb *unp;
    410 	struct mbuf *nam;
    411 	struct proc *p;
    412 {
    413 	struct sockaddr_un *sun = mtod(nam, struct sockaddr_un *);
    414 	register struct vnode *vp;
    415 	struct vattr vattr;
    416 	int error;
    417 	struct nameidata nd;
    418 
    419 	if (unp->unp_vnode != 0)
    420 		return (EINVAL);
    421 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
    422 	    sun->sun_path, p);
    423 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
    424 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
    425 			return (EINVAL);
    426 	} else
    427 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
    428 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
    429 	if ((error = namei(&nd)) != 0)
    430 		return (error);
    431 	vp = nd.ni_vp;
    432 	if (vp != NULL) {
    433 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
    434 		if (nd.ni_dvp == vp)
    435 			vrele(nd.ni_dvp);
    436 		else
    437 			vput(nd.ni_dvp);
    438 		vrele(vp);
    439 		return (EADDRINUSE);
    440 	}
    441 	VATTR_NULL(&vattr);
    442 	vattr.va_type = VSOCK;
    443 	vattr.va_mode = ACCESSPERMS;
    444 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
    445 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
    446 	if (error)
    447 		return (error);
    448 	vp = nd.ni_vp;
    449 	vp->v_socket = unp->unp_socket;
    450 	unp->unp_vnode = vp;
    451 	unp->unp_addr =
    452 	    mtod(m_copy(nam, 0, (int)M_COPYALL), struct sockaddr_un *);
    453 	VOP_UNLOCK(vp);
    454 	return (0);
    455 }
    456 
    457 int
    458 unp_connect(so, nam, p)
    459 	struct socket *so;
    460 	struct mbuf *nam;
    461 	struct proc *p;
    462 {
    463 	register struct sockaddr_un *sun = mtod(nam, struct sockaddr_un *);
    464 	register struct vnode *vp;
    465 	register struct socket *so2, *so3;
    466 	struct unpcb *unp2, *unp3;
    467 	int error;
    468 	struct nameidata nd;
    469 
    470 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
    471 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
    472 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
    473 			return (EINVAL);
    474 	} else
    475 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
    476 	if ((error = namei(&nd)) != 0)
    477 		return (error);
    478 	vp = nd.ni_vp;
    479 	if (vp->v_type != VSOCK) {
    480 		error = ENOTSOCK;
    481 		goto bad;
    482 	}
    483 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
    484 		goto bad;
    485 	so2 = vp->v_socket;
    486 	if (so2 == 0) {
    487 		error = ECONNREFUSED;
    488 		goto bad;
    489 	}
    490 	if (so->so_type != so2->so_type) {
    491 		error = EPROTOTYPE;
    492 		goto bad;
    493 	}
    494 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    495 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
    496 		    (so3 = sonewconn(so2, 0)) == 0) {
    497 			error = ECONNREFUSED;
    498 			goto bad;
    499 		}
    500 		unp2 = sotounpcb(so2);
    501 		unp3 = sotounpcb(so3);
    502 		if (unp2->unp_addr)
    503 			unp3->unp_addr = mtod(m_copy(dtom(unp2->unp_addr), 0,
    504 			    (int)M_COPYALL), struct sockaddr_un *);
    505 		so2 = so3;
    506 	}
    507 	error = unp_connect2(so, so2);
    508 bad:
    509 	vput(vp);
    510 	return (error);
    511 }
    512 
    513 int
    514 unp_connect2(so, so2)
    515 	register struct socket *so;
    516 	register struct socket *so2;
    517 {
    518 	register struct unpcb *unp = sotounpcb(so);
    519 	register struct unpcb *unp2;
    520 
    521 	if (so2->so_type != so->so_type)
    522 		return (EPROTOTYPE);
    523 	unp2 = sotounpcb(so2);
    524 	unp->unp_conn = unp2;
    525 	switch (so->so_type) {
    526 
    527 	case SOCK_DGRAM:
    528 		unp->unp_nextref = unp2->unp_refs;
    529 		unp2->unp_refs = unp;
    530 		soisconnected(so);
    531 		break;
    532 
    533 	case SOCK_STREAM:
    534 		unp2->unp_conn = unp;
    535 		soisconnected(so);
    536 		soisconnected(so2);
    537 		break;
    538 
    539 	default:
    540 		panic("unp_connect2");
    541 	}
    542 	return (0);
    543 }
    544 
    545 void
    546 unp_disconnect(unp)
    547 	struct unpcb *unp;
    548 {
    549 	register struct unpcb *unp2 = unp->unp_conn;
    550 
    551 	if (unp2 == 0)
    552 		return;
    553 	unp->unp_conn = 0;
    554 	switch (unp->unp_socket->so_type) {
    555 
    556 	case SOCK_DGRAM:
    557 		if (unp2->unp_refs == unp)
    558 			unp2->unp_refs = unp->unp_nextref;
    559 		else {
    560 			unp2 = unp2->unp_refs;
    561 			for (;;) {
    562 				if (unp2 == 0)
    563 					panic("unp_disconnect");
    564 				if (unp2->unp_nextref == unp)
    565 					break;
    566 				unp2 = unp2->unp_nextref;
    567 			}
    568 			unp2->unp_nextref = unp->unp_nextref;
    569 		}
    570 		unp->unp_nextref = 0;
    571 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
    572 		break;
    573 
    574 	case SOCK_STREAM:
    575 		soisdisconnected(unp->unp_socket);
    576 		unp2->unp_conn = 0;
    577 		soisdisconnected(unp2->unp_socket);
    578 		break;
    579 	}
    580 }
    581 
    582 #ifdef notdef
    583 unp_abort(unp)
    584 	struct unpcb *unp;
    585 {
    586 
    587 	unp_detach(unp);
    588 }
    589 #endif
    590 
    591 void
    592 unp_shutdown(unp)
    593 	struct unpcb *unp;
    594 {
    595 	struct socket *so;
    596 
    597 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
    598 	    (so = unp->unp_conn->unp_socket))
    599 		socantrcvmore(so);
    600 }
    601 
    602 void
    603 unp_drop(unp, errno)
    604 	struct unpcb *unp;
    605 	int errno;
    606 {
    607 	struct socket *so = unp->unp_socket;
    608 
    609 	so->so_error = errno;
    610 	unp_disconnect(unp);
    611 	if (so->so_head) {
    612 		so->so_pcb = 0;
    613 		sofree(so);
    614 		if (unp->unp_addr)
    615 			m_freem(dtom(unp->unp_addr));
    616 		free(unp, M_PCB);
    617 	}
    618 }
    619 
    620 #ifdef notdef
    621 unp_drain()
    622 {
    623 
    624 }
    625 #endif
    626 
    627 int
    628 unp_externalize(rights)
    629 	struct mbuf *rights;
    630 {
    631 	struct proc *p = curproc;		/* XXX */
    632 	register int i;
    633 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
    634 	register struct file **rp = (struct file **)(cm + 1);
    635 	register struct file *fp;
    636 	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
    637 	int f;
    638 
    639 	if (!fdavail(p, newfds)) {
    640 		for (i = 0; i < newfds; i++) {
    641 			fp = *rp;
    642 			unp_discard(fp);
    643 			*rp++ = 0;
    644 		}
    645 		return (EMSGSIZE);
    646 	}
    647 	for (i = 0; i < newfds; i++) {
    648 		if (fdalloc(p, 0, &f))
    649 			panic("unp_externalize");
    650 		fp = *rp;
    651 		p->p_fd->fd_ofiles[f] = fp;
    652 		fp->f_msgcount--;
    653 		unp_rights--;
    654 		*(int *)rp++ = f;
    655 	}
    656 	return (0);
    657 }
    658 
    659 int
    660 unp_internalize(control, p)
    661 	struct mbuf *control;
    662 	struct proc *p;
    663 {
    664 	struct filedesc *fdp = p->p_fd;
    665 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
    666 	register struct file **rp;
    667 	register struct file *fp;
    668 	register int i, fd;
    669 	int oldfds;
    670 
    671 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
    672 	    cm->cmsg_len != control->m_len)
    673 		return (EINVAL);
    674 	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
    675 	rp = (struct file **)(cm + 1);
    676 	for (i = 0; i < oldfds; i++) {
    677 		fd = *(int *)rp++;
    678 		if ((unsigned)fd >= fdp->fd_nfiles ||
    679 		    fdp->fd_ofiles[fd] == NULL)
    680 			return (EBADF);
    681 	}
    682 	rp = (struct file **)(cm + 1);
    683 	for (i = 0; i < oldfds; i++) {
    684 		fp = fdp->fd_ofiles[*(int *)rp];
    685 		*rp++ = fp;
    686 		fp->f_count++;
    687 		fp->f_msgcount++;
    688 		unp_rights++;
    689 	}
    690 	return (0);
    691 }
    692 
    693 int	unp_defer, unp_gcing;
    694 extern	struct domain unixdomain;
    695 
    696 void
    697 unp_gc()
    698 {
    699 	register struct file *fp, *nextfp;
    700 	register struct socket *so;
    701 	struct file **extra_ref, **fpp;
    702 	int nunref, i;
    703 
    704 	if (unp_gcing)
    705 		return;
    706 	unp_gcing = 1;
    707 	unp_defer = 0;
    708 	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
    709 		fp->f_flag &= ~(FMARK|FDEFER);
    710 	do {
    711 		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
    712 			if (fp->f_count == 0)
    713 				continue;
    714 			if (fp->f_flag & FDEFER) {
    715 				fp->f_flag &= ~FDEFER;
    716 				unp_defer--;
    717 			} else {
    718 				if (fp->f_flag & FMARK)
    719 					continue;
    720 				if (fp->f_count == fp->f_msgcount)
    721 					continue;
    722 				fp->f_flag |= FMARK;
    723 			}
    724 			if (fp->f_type != DTYPE_SOCKET ||
    725 			    (so = (struct socket *)fp->f_data) == 0)
    726 				continue;
    727 			if (so->so_proto->pr_domain != &unixdomain ||
    728 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
    729 				continue;
    730 #ifdef notdef
    731 			if (so->so_rcv.sb_flags & SB_LOCK) {
    732 				/*
    733 				 * This is problematical; it's not clear
    734 				 * we need to wait for the sockbuf to be
    735 				 * unlocked (on a uniprocessor, at least),
    736 				 * and it's also not clear what to do
    737 				 * if sbwait returns an error due to receipt
    738 				 * of a signal.  If sbwait does return
    739 				 * an error, we'll go into an infinite
    740 				 * loop.  Delete all of this for now.
    741 				 */
    742 				(void) sbwait(&so->so_rcv);
    743 				goto restart;
    744 			}
    745 #endif
    746 			unp_scan(so->so_rcv.sb_mb, unp_mark);
    747 		}
    748 	} while (unp_defer);
    749 	/*
    750 	 * We grab an extra reference to each of the file table entries
    751 	 * that are not otherwise accessible and then free the rights
    752 	 * that are stored in messages on them.
    753 	 *
    754 	 * The bug in the orginal code is a little tricky, so I'll describe
    755 	 * what's wrong with it here.
    756 	 *
    757 	 * It is incorrect to simply unp_discard each entry for f_msgcount
    758 	 * times -- consider the case of sockets A and B that contain
    759 	 * references to each other.  On a last close of some other socket,
    760 	 * we trigger a gc since the number of outstanding rights (unp_rights)
    761 	 * is non-zero.  If during the sweep phase the gc code un_discards,
    762 	 * we end up doing a (full) closef on the descriptor.  A closef on A
    763 	 * results in the following chain.  Closef calls soo_close, which
    764 	 * calls soclose.   Soclose calls first (through the switch
    765 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
    766 	 * returns because the previous instance had set unp_gcing, and
    767 	 * we return all the way back to soclose, which marks the socket
    768 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
    769 	 * to free up the rights that are queued in messages on the socket A,
    770 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
    771 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
    772 	 * instance of unp_discard just calls closef on B.
    773 	 *
    774 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
    775 	 * which results in another closef on A.  Unfortunately, A is already
    776 	 * being closed, and the descriptor has already been marked with
    777 	 * SS_NOFDREF, and soclose panics at this point.
    778 	 *
    779 	 * Here, we first take an extra reference to each inaccessible
    780 	 * descriptor.  Then, we call sorflush ourself, since we know
    781 	 * it is a Unix domain socket anyhow.  After we destroy all the
    782 	 * rights carried in messages, we do a last closef to get rid
    783 	 * of our extra reference.  This is the last close, and the
    784 	 * unp_detach etc will shut down the socket.
    785 	 *
    786 	 * 91/09/19, bsy (at) cs.cmu.edu
    787 	 */
    788 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
    789 	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
    790 	    fp = nextfp) {
    791 		nextfp = fp->f_list.le_next;
    792 		if (fp->f_count == 0)
    793 			continue;
    794 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
    795 			*fpp++ = fp;
    796 			nunref++;
    797 			fp->f_count++;
    798 		}
    799 	}
    800 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    801 		sorflush((struct socket *)(*fpp)->f_data);
    802 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    803 		(void) closef(*fpp, (struct proc *)0);
    804 	free((caddr_t)extra_ref, M_FILE);
    805 	unp_gcing = 0;
    806 }
    807 
    808 void
    809 unp_dispose(m)
    810 	struct mbuf *m;
    811 {
    812 
    813 	if (m)
    814 		unp_scan(m, unp_discard);
    815 }
    816 
    817 void
    818 unp_scan(m0, op)
    819 	register struct mbuf *m0;
    820 	void (*op) __P((struct file *));
    821 {
    822 	register struct mbuf *m;
    823 	register struct file **rp;
    824 	register struct cmsghdr *cm;
    825 	register int i;
    826 	int qfds;
    827 
    828 	while (m0) {
    829 		for (m = m0; m; m = m->m_next)
    830 			if (m->m_type == MT_CONTROL &&
    831 			    m->m_len >= sizeof(*cm)) {
    832 				cm = mtod(m, struct cmsghdr *);
    833 				if (cm->cmsg_level != SOL_SOCKET ||
    834 				    cm->cmsg_type != SCM_RIGHTS)
    835 					continue;
    836 				qfds = (cm->cmsg_len - sizeof *cm)
    837 						/ sizeof (struct file *);
    838 				rp = (struct file **)(cm + 1);
    839 				for (i = 0; i < qfds; i++)
    840 					(*op)(*rp++);
    841 				break;		/* XXX, but saves time */
    842 			}
    843 		m0 = m0->m_act;
    844 	}
    845 }
    846 
    847 void
    848 unp_mark(fp)
    849 	struct file *fp;
    850 {
    851 
    852 	if (fp->f_flag & FMARK)
    853 		return;
    854 	unp_defer++;
    855 	fp->f_flag |= (FMARK|FDEFER);
    856 }
    857 
    858 void
    859 unp_discard(fp)
    860 	struct file *fp;
    861 {
    862 
    863 	fp->f_msgcount--;
    864 	unp_rights--;
    865 	(void) closef(fp, (struct proc *)0);
    866 }
    867