Home | History | Annotate | Line # | Download | only in kern
uipc_usrreq.c revision 1.8
      1 /*
      2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  *
     33  *	from: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
     34  *	$Id: uipc_usrreq.c,v 1.8 1994/05/04 09:50:11 mycroft Exp $
     35  */
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 #include <sys/proc.h>
     40 #include <sys/filedesc.h>
     41 #include <sys/domain.h>
     42 #include <sys/protosw.h>
     43 #include <sys/socket.h>
     44 #include <sys/socketvar.h>
     45 #include <sys/unpcb.h>
     46 #include <sys/un.h>
     47 #include <sys/namei.h>
     48 #include <sys/vnode.h>
     49 #include <sys/file.h>
     50 #include <sys/stat.h>
     51 #include <sys/mbuf.h>
     52 
     53 /*
     54  * Unix communications domain.
     55  *
     56  * TODO:
     57  *	SEQPACKET, RDM
     58  *	rethink name space problems
     59  *	need a proper out-of-band
     60  */
     61 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
     62 ino_t	unp_ino;			/* prototype for fake inode numbers */
     63 
     64 /*ARGSUSED*/
     65 int
     66 uipc_usrreq(so, req, m, nam, control)
     67 	struct socket *so;
     68 	int req;
     69 	struct mbuf *m, *nam, *control;
     70 {
     71 	struct unpcb *unp = sotounpcb(so);
     72 	register struct socket *so2;
     73 	register int error = 0;
     74 	struct proc *p = curproc;	/* XXX */
     75 
     76 	if (req == PRU_CONTROL)
     77 		return (EOPNOTSUPP);
     78 	if (req != PRU_SEND && control && control->m_len) {
     79 		error = EOPNOTSUPP;
     80 		goto release;
     81 	}
     82 	if (unp == 0 && req != PRU_ATTACH) {
     83 		error = EINVAL;
     84 		goto release;
     85 	}
     86 	switch (req) {
     87 
     88 	case PRU_ATTACH:
     89 		if (unp) {
     90 			error = EISCONN;
     91 			break;
     92 		}
     93 		error = unp_attach(so);
     94 		break;
     95 
     96 	case PRU_DETACH:
     97 		unp_detach(unp);
     98 		break;
     99 
    100 	case PRU_BIND:
    101 		error = unp_bind(unp, nam, p);
    102 		break;
    103 
    104 	case PRU_LISTEN:
    105 		if (unp->unp_vnode == 0)
    106 			error = EINVAL;
    107 		break;
    108 
    109 	case PRU_CONNECT:
    110 		error = unp_connect(so, nam, p);
    111 		break;
    112 
    113 	case PRU_CONNECT2:
    114 		error = unp_connect2(so, (struct socket *)nam);
    115 		break;
    116 
    117 	case PRU_DISCONNECT:
    118 		unp_disconnect(unp);
    119 		break;
    120 
    121 	case PRU_ACCEPT:
    122 		/*
    123 		 * Pass back name of connected socket,
    124 		 * if it was bound and we are still connected
    125 		 * (our peer may have closed already!).
    126 		 */
    127 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
    128 			nam->m_len = unp->unp_conn->unp_addr->m_len;
    129 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
    130 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
    131 		} else {
    132 			nam->m_len = sizeof(sun_noname);
    133 			*(mtod(nam, struct sockaddr *)) = sun_noname;
    134 		}
    135 		break;
    136 
    137 	case PRU_SHUTDOWN:
    138 		socantsendmore(so);
    139 		unp_shutdown(unp);
    140 		break;
    141 
    142 	case PRU_RCVD:
    143 		switch (so->so_type) {
    144 
    145 		case SOCK_DGRAM:
    146 			panic("uipc 1");
    147 			/*NOTREACHED*/
    148 
    149 		case SOCK_STREAM:
    150 #define	rcv (&so->so_rcv)
    151 #define snd (&so2->so_snd)
    152 			if (unp->unp_conn == 0)
    153 				break;
    154 			so2 = unp->unp_conn->unp_socket;
    155 			/*
    156 			 * Adjust backpressure on sender
    157 			 * and wakeup any waiting to write.
    158 			 */
    159 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
    160 			unp->unp_mbcnt = rcv->sb_mbcnt;
    161 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
    162 			unp->unp_cc = rcv->sb_cc;
    163 			sowwakeup(so2);
    164 #undef snd
    165 #undef rcv
    166 			break;
    167 
    168 		default:
    169 			panic("uipc 2");
    170 		}
    171 		break;
    172 
    173 	case PRU_SEND:
    174 		if (control && (error = unp_internalize(control, p)))
    175 			break;
    176 		switch (so->so_type) {
    177 
    178 		case SOCK_DGRAM: {
    179 			struct sockaddr *from;
    180 
    181 			if (nam) {
    182 				if (unp->unp_conn) {
    183 					error = EISCONN;
    184 					break;
    185 				}
    186 				error = unp_connect(so, nam, p);
    187 				if (error)
    188 					break;
    189 			} else {
    190 				if (unp->unp_conn == 0) {
    191 					error = ENOTCONN;
    192 					break;
    193 				}
    194 			}
    195 			so2 = unp->unp_conn->unp_socket;
    196 			if (unp->unp_addr)
    197 				from = mtod(unp->unp_addr, struct sockaddr *);
    198 			else
    199 				from = &sun_noname;
    200 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
    201 				sorwakeup(so2);
    202 				m = 0;
    203 				control = 0;
    204 			} else
    205 				error = ENOBUFS;
    206 			if (nam)
    207 				unp_disconnect(unp);
    208 			break;
    209 		}
    210 
    211 		case SOCK_STREAM:
    212 #define	rcv (&so2->so_rcv)
    213 #define	snd (&so->so_snd)
    214 			if (so->so_state & SS_CANTSENDMORE) {
    215 				error = EPIPE;
    216 				break;
    217 			}
    218 			if (unp->unp_conn == 0)
    219 				panic("uipc 3");
    220 			so2 = unp->unp_conn->unp_socket;
    221 			/*
    222 			 * Send to paired receive port, and then reduce
    223 			 * send buffer hiwater marks to maintain backpressure.
    224 			 * Wake up readers.
    225 			 */
    226 			if (control) {
    227 				if (sbappendcontrol(rcv, m, control))
    228 					control = 0;
    229 			} else
    230 				sbappend(rcv, m);
    231 			snd->sb_mbmax -=
    232 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
    233 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
    234 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
    235 			unp->unp_conn->unp_cc = rcv->sb_cc;
    236 			sorwakeup(so2);
    237 			m = 0;
    238 #undef snd
    239 #undef rcv
    240 			break;
    241 
    242 		default:
    243 			panic("uipc 4");
    244 		}
    245 		break;
    246 
    247 	case PRU_ABORT:
    248 		unp_drop(unp, ECONNABORTED);
    249 		break;
    250 
    251 	case PRU_SENSE:
    252 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
    253 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
    254 			so2 = unp->unp_conn->unp_socket;
    255 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
    256 		}
    257 		((struct stat *) m)->st_dev = NODEV;
    258 		if (unp->unp_ino == 0)
    259 			unp->unp_ino = unp_ino++;
    260 		((struct stat *) m)->st_ino = unp->unp_ino;
    261 		return (0);
    262 
    263 	case PRU_RCVOOB:
    264 		return (EOPNOTSUPP);
    265 
    266 	case PRU_SENDOOB:
    267 		error = EOPNOTSUPP;
    268 		break;
    269 
    270 	case PRU_SOCKADDR:
    271 		if (unp->unp_addr) {
    272 			nam->m_len = unp->unp_addr->m_len;
    273 			bcopy(mtod(unp->unp_addr, caddr_t),
    274 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
    275 		} else
    276 			nam->m_len = 0;
    277 		break;
    278 
    279 	case PRU_PEERADDR:
    280 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
    281 			nam->m_len = unp->unp_conn->unp_addr->m_len;
    282 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
    283 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
    284 		} else
    285 			nam->m_len = 0;
    286 		break;
    287 
    288 	case PRU_SLOWTIMO:
    289 		break;
    290 
    291 	default:
    292 		panic("piusrreq");
    293 	}
    294 release:
    295 	if (control)
    296 		m_freem(control);
    297 	if (m)
    298 		m_freem(m);
    299 	return (error);
    300 }
    301 
    302 /*
    303  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
    304  * for stream sockets, although the total for sender and receiver is
    305  * actually only PIPSIZ.
    306  * Datagram sockets really use the sendspace as the maximum datagram size,
    307  * and don't really want to reserve the sendspace.  Their recvspace should
    308  * be large enough for at least one max-size datagram plus address.
    309  */
    310 #define	PIPSIZ	4096
    311 u_long	unpst_sendspace = PIPSIZ;
    312 u_long	unpst_recvspace = PIPSIZ;
    313 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
    314 u_long	unpdg_recvspace = 4*1024;
    315 
    316 int	unp_rights;			/* file descriptors in flight */
    317 
    318 int
    319 unp_attach(so)
    320 	struct socket *so;
    321 {
    322 	register struct mbuf *m;
    323 	register struct unpcb *unp;
    324 	int error;
    325 
    326 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    327 		switch (so->so_type) {
    328 
    329 		case SOCK_STREAM:
    330 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
    331 			break;
    332 
    333 		case SOCK_DGRAM:
    334 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
    335 			break;
    336 
    337 		default:
    338 			panic("unp_attach");
    339 		}
    340 		if (error)
    341 			return (error);
    342 	}
    343 	m = m_getclr(M_DONTWAIT, MT_PCB);
    344 	if (m == NULL)
    345 		return (ENOBUFS);
    346 	unp = mtod(m, struct unpcb *);
    347 	so->so_pcb = (caddr_t)unp;
    348 	unp->unp_socket = so;
    349 	return (0);
    350 }
    351 
    352 int
    353 unp_detach(unp)
    354 	register struct unpcb *unp;
    355 {
    356 
    357 	if (unp->unp_vnode) {
    358 		unp->unp_vnode->v_socket = 0;
    359 		vrele(unp->unp_vnode);
    360 		unp->unp_vnode = 0;
    361 	}
    362 	if (unp->unp_conn)
    363 		unp_disconnect(unp);
    364 	while (unp->unp_refs)
    365 		unp_drop(unp->unp_refs, ECONNRESET);
    366 	soisdisconnected(unp->unp_socket);
    367 	unp->unp_socket->so_pcb = 0;
    368 	m_freem(unp->unp_addr);
    369 	(void) m_free(dtom(unp));
    370 	if (unp_rights) {
    371 		/*
    372 		 * Normally the receive buffer is flushed later,
    373 		 * in sofree, but if our receive buffer holds references
    374 		 * to descriptors that are now garbage, we will dispose
    375 		 * of those descriptor references after the garbage collector
    376 		 * gets them (resulting in a "panic: closef: count < 0").
    377 		 */
    378 		sorflush(unp->unp_socket);
    379 		unp_gc();
    380 	}
    381 }
    382 
    383 int
    384 unp_bind(unp, nam, p)
    385 	struct unpcb *unp;
    386 	struct mbuf *nam;
    387 	struct proc *p;
    388 {
    389 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
    390 	register struct vnode *vp;
    391 	register struct nameidata *ndp;
    392 	struct vattr vattr;
    393 	int error;
    394 	struct nameidata nd;
    395 
    396 	ndp = &nd;
    397 	ndp->ni_dirp = soun->sun_path;
    398 	if (unp->unp_vnode != NULL)
    399 		return (EINVAL);
    400 	if (nam->m_len == MLEN) {
    401 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
    402 			return (EINVAL);
    403 	} else
    404 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
    405 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
    406 	ndp->ni_nameiop = CREATE | FOLLOW | LOCKPARENT;
    407 	ndp->ni_segflg = UIO_SYSSPACE;
    408 	if (error = namei(ndp, p))
    409 		return (error);
    410 	vp = ndp->ni_vp;
    411 	if (vp != NULL) {
    412 		VOP_ABORTOP(ndp);
    413 		if (ndp->ni_dvp == vp)
    414 			vrele(ndp->ni_dvp);
    415 		else
    416 			vput(ndp->ni_dvp);
    417 		vrele(vp);
    418 		return (EADDRINUSE);
    419 	}
    420 	VATTR_NULL(&vattr);
    421 	vattr.va_type = VSOCK;
    422 	vattr.va_mode = 0777;
    423 	if (error = VOP_CREATE(ndp, &vattr, p))
    424 		return (error);
    425 	vp = ndp->ni_vp;
    426 	vp->v_socket = unp->unp_socket;
    427 	unp->unp_vnode = vp;
    428 	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
    429 	VOP_UNLOCK(vp);
    430 	return (0);
    431 }
    432 
    433 int
    434 unp_connect(so, nam, p)
    435 	struct socket *so;
    436 	struct mbuf *nam;
    437 	struct proc *p;
    438 {
    439 	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
    440 	register struct vnode *vp;
    441 	register struct socket *so2, *so3;
    442 	register struct nameidata *ndp;
    443 	struct unpcb *unp2, *unp3;
    444 	int error;
    445 	struct nameidata nd;
    446 
    447 	ndp = &nd;
    448 	ndp->ni_dirp = soun->sun_path;
    449 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
    450 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
    451 			return (EMSGSIZE);
    452 	} else
    453 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
    454 	ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF;
    455 	ndp->ni_segflg = UIO_SYSSPACE;
    456 	if (error = namei(ndp, p))
    457 		return (error);
    458 	vp = ndp->ni_vp;
    459 	if (vp->v_type != VSOCK) {
    460 		error = ENOTSOCK;
    461 		goto bad;
    462 	}
    463 	if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
    464 		goto bad;
    465 	so2 = vp->v_socket;
    466 	if (so2 == 0) {
    467 		error = ECONNREFUSED;
    468 		goto bad;
    469 	}
    470 	if (so->so_type != so2->so_type) {
    471 		error = EPROTOTYPE;
    472 		goto bad;
    473 	}
    474 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    475 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
    476 		    (so3 = sonewconn(so2, 0)) == 0) {
    477 			error = ECONNREFUSED;
    478 			goto bad;
    479 		}
    480 		unp2 = sotounpcb(so2);
    481 		unp3 = sotounpcb(so3);
    482 		if (unp2->unp_addr)
    483 			unp3->unp_addr =
    484 				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
    485 		so2 = so3;
    486 	}
    487 	error = unp_connect2(so, so2);
    488 bad:
    489 	vput(vp);
    490 	return (error);
    491 }
    492 
    493 int
    494 unp_connect2(so, so2)
    495 	register struct socket *so;
    496 	register struct socket *so2;
    497 {
    498 	register struct unpcb *unp = sotounpcb(so);
    499 	register struct unpcb *unp2;
    500 
    501 	if (so2->so_type != so->so_type)
    502 		return (EPROTOTYPE);
    503 	unp2 = sotounpcb(so2);
    504 	unp->unp_conn = unp2;
    505 	switch (so->so_type) {
    506 
    507 	case SOCK_DGRAM:
    508 		unp->unp_nextref = unp2->unp_refs;
    509 		unp2->unp_refs = unp;
    510 		soisconnected(so);
    511 		break;
    512 
    513 	case SOCK_STREAM:
    514 		unp2->unp_conn = unp;
    515 		soisconnected(so);
    516 		soisconnected(so2);
    517 		break;
    518 
    519 	default:
    520 		panic("unp_connect2");
    521 	}
    522 	return (0);
    523 }
    524 
    525 void
    526 unp_disconnect(unp)
    527 	struct unpcb *unp;
    528 {
    529 	register struct unpcb *unp2 = unp->unp_conn;
    530 
    531 	if (unp2 == 0)
    532 		return;
    533 	unp->unp_conn = 0;
    534 	switch (unp->unp_socket->so_type) {
    535 
    536 	case SOCK_DGRAM:
    537 		if (unp2->unp_refs == unp)
    538 			unp2->unp_refs = unp->unp_nextref;
    539 		else {
    540 			unp2 = unp2->unp_refs;
    541 			for (;;) {
    542 				if (unp2 == 0)
    543 					panic("unp_disconnect");
    544 				if (unp2->unp_nextref == unp)
    545 					break;
    546 				unp2 = unp2->unp_nextref;
    547 			}
    548 			unp2->unp_nextref = unp->unp_nextref;
    549 		}
    550 		unp->unp_nextref = 0;
    551 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
    552 		break;
    553 
    554 	case SOCK_STREAM:
    555 		soisdisconnected(unp->unp_socket);
    556 		unp2->unp_conn = 0;
    557 		soisdisconnected(unp2->unp_socket);
    558 		break;
    559 	}
    560 }
    561 
    562 #ifdef notdef
    563 unp_abort(unp)
    564 	struct unpcb *unp;
    565 {
    566 
    567 	unp_detach(unp);
    568 }
    569 #endif
    570 
    571 void
    572 unp_shutdown(unp)
    573 	struct unpcb *unp;
    574 {
    575 	struct socket *so;
    576 
    577 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
    578 	    (so = unp->unp_conn->unp_socket))
    579 		socantrcvmore(so);
    580 }
    581 
    582 void
    583 unp_drop(unp, errno)
    584 	struct unpcb *unp;
    585 	int errno;
    586 {
    587 	struct socket *so = unp->unp_socket;
    588 
    589 	so->so_error = errno;
    590 	unp_disconnect(unp);
    591 	if (so->so_head) {
    592 		so->so_pcb = (caddr_t) 0;
    593 		m_freem(unp->unp_addr);
    594 		(void) m_free(dtom(unp));
    595 		sofree(so);
    596 	}
    597 }
    598 
    599 #ifdef notdef
    600 unp_drain()
    601 {
    602 
    603 }
    604 #endif
    605 
    606 int
    607 unp_externalize(rights)
    608 	struct mbuf *rights;
    609 {
    610 	struct proc *p = curproc;		/* XXX */
    611 	register int i;
    612 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
    613 	register struct file **rp = (struct file **)(cm + 1);
    614 	register struct file *fp;
    615 	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
    616 	int f;
    617 
    618 	if (!fdavail(p, newfds)) {
    619 		for (i = 0; i < newfds; i++) {
    620 			fp = *rp;
    621 			unp_discard(fp);
    622 			*rp++ = 0;
    623 		}
    624 		return (EMSGSIZE);
    625 	}
    626 	for (i = 0; i < newfds; i++) {
    627 		if (fdalloc(p, 0, &f))
    628 			panic("unp_externalize");
    629 		fp = *rp;
    630 		p->p_fd->fd_ofiles[f] = fp;
    631 		fp->f_msgcount--;
    632 		unp_rights--;
    633 		*(int *)rp++ = f;
    634 	}
    635 	return (0);
    636 }
    637 
    638 int
    639 unp_internalize(control, p)
    640 	struct mbuf *control;
    641 	struct proc *p;
    642 {
    643 	struct filedesc *fdp = p->p_fd;
    644 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
    645 	register struct file **rp;
    646 	register struct file *fp;
    647 	register int i, fd;
    648 	int oldfds;
    649 
    650 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
    651 	    cm->cmsg_len != control->m_len)
    652 		return (EINVAL);
    653 	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
    654 	rp = (struct file **)(cm + 1);
    655 	for (i = 0; i < oldfds; i++) {
    656 		fd = *(int *)rp++;
    657 		if ((unsigned)fd >= fdp->fd_nfiles ||
    658 		    fdp->fd_ofiles[fd] == NULL)
    659 			return (EBADF);
    660 	}
    661 	rp = (struct file **)(cm + 1);
    662 	for (i = 0; i < oldfds; i++) {
    663 		fp = fdp->fd_ofiles[*(int *)rp];
    664 		*rp++ = fp;
    665 		fp->f_count++;
    666 		fp->f_msgcount++;
    667 		unp_rights++;
    668 	}
    669 	return (0);
    670 }
    671 
    672 int	unp_defer, unp_gcing;
    673 extern	struct domain unixdomain;
    674 
    675 void
    676 unp_gc()
    677 {
    678 	register struct file *fp, *nextfp;
    679 	register struct socket *so;
    680 	struct file **extra_ref, **fpp;
    681 	int nunref, i;
    682 
    683 	if (unp_gcing)
    684 		return;
    685 	unp_gcing = 1;
    686 	unp_defer = 0;
    687 	for (fp = filehead; fp; fp = fp->f_filef)
    688 		fp->f_flag &= ~(FMARK|FDEFER);
    689 	do {
    690 		for (fp = filehead; fp; fp = fp->f_filef) {
    691 			if (fp->f_count == 0)
    692 				continue;
    693 			if (fp->f_flag & FDEFER) {
    694 				fp->f_flag &= ~FDEFER;
    695 				unp_defer--;
    696 			} else {
    697 				if (fp->f_flag & FMARK)
    698 					continue;
    699 				if (fp->f_count == fp->f_msgcount)
    700 					continue;
    701 				fp->f_flag |= FMARK;
    702 			}
    703 			if (fp->f_type != DTYPE_SOCKET ||
    704 			    (so = (struct socket *)fp->f_data) == 0)
    705 				continue;
    706 			if (so->so_proto->pr_domain != &unixdomain ||
    707 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
    708 				continue;
    709 #ifdef notdef
    710 			if (so->so_rcv.sb_flags & SB_LOCK) {
    711 				/*
    712 				 * This is problematical; it's not clear
    713 				 * we need to wait for the sockbuf to be
    714 				 * unlocked (on a uniprocessor, at least),
    715 				 * and it's also not clear what to do
    716 				 * if sbwait returns an error due to receipt
    717 				 * of a signal.  If sbwait does return
    718 				 * an error, we'll go into an infinite
    719 				 * loop.  Delete all of this for now.
    720 				 */
    721 				(void) sbwait(&so->so_rcv);
    722 				goto restart;
    723 			}
    724 #endif
    725 			unp_scan(so->so_rcv.sb_mb, unp_mark);
    726 		}
    727 	} while (unp_defer);
    728 	/*
    729 	 * We grab an extra reference to each of the file table entries
    730 	 * that are not otherwise accessible and then free the rights
    731 	 * that are stored in messages on them.
    732 	 *
    733 	 * The bug in the orginal code is a little tricky, so I'll describe
    734 	 * what's wrong with it here.
    735 	 *
    736 	 * It is incorrect to simply unp_discard each entry for f_msgcount
    737 	 * times -- consider the case of sockets A and B that contain
    738 	 * references to each other.  On a last close of some other socket,
    739 	 * we trigger a gc since the number of outstanding rights (unp_rights)
    740 	 * is non-zero.  If during the sweep phase the gc code un_discards,
    741 	 * we end up doing a (full) closef on the descriptor.  A closef on A
    742 	 * results in the following chain.  Closef calls soo_close, which
    743 	 * calls soclose.   Soclose calls first (through the switch
    744 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
    745 	 * returns because the previous instance had set unp_gcing, and
    746 	 * we return all the way back to soclose, which marks the socket
    747 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
    748 	 * to free up the rights that are queued in messages on the socket A,
    749 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
    750 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
    751 	 * instance of unp_discard just calls closef on B.
    752 	 *
    753 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
    754 	 * which results in another closef on A.  Unfortunately, A is already
    755 	 * being closed, and the descriptor has already been marked with
    756 	 * SS_NOFDREF, and soclose panics at this point.
    757 	 *
    758 	 * Here, we first take an extra reference to each inaccessible
    759 	 * descriptor.  Then, we call sorflush ourself, since we know
    760 	 * it is a Unix domain socket anyhow.  After we destroy all the
    761 	 * rights carried in messages, we do a last closef to get rid
    762 	 * of our extra reference.  This is the last close, and the
    763 	 * unp_detach etc will shut down the socket.
    764 	 *
    765 	 * 91/09/19, bsy (at) cs.cmu.edu
    766 	 */
    767 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
    768 	for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
    769 		nextfp = fp->f_filef;
    770 		if (fp->f_count == 0)
    771 			continue;
    772 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
    773 			*fpp++ = fp;
    774 			nunref++;
    775 			fp->f_count++;
    776 		}
    777 	}
    778 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    779 		sorflush((struct socket *)(*fpp)->f_data);
    780 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    781 		closef(*fpp);
    782 	free((caddr_t)extra_ref, M_FILE);
    783 	unp_gcing = 0;
    784 }
    785 
    786 void
    787 unp_dispose(m)
    788 	struct mbuf *m;
    789 {
    790 
    791 	if (m)
    792 		unp_scan(m, unp_discard);
    793 }
    794 
    795 void
    796 unp_scan(m0, op)
    797 	register struct mbuf *m0;
    798 	void (*op) __P((struct file *));
    799 {
    800 	register struct mbuf *m;
    801 	register struct file **rp;
    802 	register struct cmsghdr *cm;
    803 	register int i;
    804 	int qfds;
    805 
    806 	while (m0) {
    807 		for (m = m0; m; m = m->m_next)
    808 			if (m->m_type == MT_CONTROL &&
    809 			    m->m_len >= sizeof(*cm)) {
    810 				cm = mtod(m, struct cmsghdr *);
    811 				if (cm->cmsg_level != SOL_SOCKET ||
    812 				    cm->cmsg_type != SCM_RIGHTS)
    813 					continue;
    814 				qfds = (cm->cmsg_len - sizeof *cm)
    815 						/ sizeof (struct file *);
    816 				rp = (struct file **)(cm + 1);
    817 				for (i = 0; i < qfds; i++)
    818 					(*op)(*rp++);
    819 				break;		/* XXX, but saves time */
    820 			}
    821 		m0 = m0->m_act;
    822 	}
    823 }
    824 
    825 void
    826 unp_mark(fp)
    827 	struct file *fp;
    828 {
    829 
    830 	if (fp->f_flag & FMARK)
    831 		return;
    832 	unp_defer++;
    833 	fp->f_flag |= (FMARK|FDEFER);
    834 }
    835 
    836 void
    837 unp_discard(fp)
    838 	struct file *fp;
    839 {
    840 
    841 	fp->f_msgcount--;
    842 	unp_rights--;
    843 	(void) closef(fp, (struct proc *)NULL);
    844 }
    845