Home | History | Annotate | Line # | Download | only in kern
uipc_usrreq.c revision 1.16
      1 /*	$NetBSD: uipc_usrreq.c,v 1.16 1996/02/04 02:18:00 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
     36  */
     37 
     38 #include <sys/param.h>
     39 #include <sys/systm.h>
     40 #include <sys/proc.h>
     41 #include <sys/filedesc.h>
     42 #include <sys/domain.h>
     43 #include <sys/protosw.h>
     44 #include <sys/socket.h>
     45 #include <sys/socketvar.h>
     46 #include <sys/unpcb.h>
     47 #include <sys/un.h>
     48 #include <sys/namei.h>
     49 #include <sys/vnode.h>
     50 #include <sys/file.h>
     51 #include <sys/stat.h>
     52 #include <sys/mbuf.h>
     53 
     54 #include <kern/kern_extern.h>
     55 /*
     56  * Unix communications domain.
     57  *
     58  * TODO:
     59  *	SEQPACKET, RDM
     60  *	rethink name space problems
     61  *	need a proper out-of-band
     62  */
     63 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
     64 ino_t	unp_ino;			/* prototype for fake inode numbers */
     65 
     66 /*ARGSUSED*/
     67 int
     68 uipc_usrreq(so, req, m, nam, control)
     69 	struct socket *so;
     70 	int req;
     71 	struct mbuf *m, *nam, *control;
     72 {
     73 	struct unpcb *unp = sotounpcb(so);
     74 	register struct socket *so2;
     75 	register int error = 0;
     76 	struct proc *p = curproc;	/* XXX */
     77 
     78 	if (req == PRU_CONTROL)
     79 		return (EOPNOTSUPP);
     80 	if (req != PRU_SEND && control && control->m_len) {
     81 		error = EOPNOTSUPP;
     82 		goto release;
     83 	}
     84 	if (unp == 0 && req != PRU_ATTACH) {
     85 		error = EINVAL;
     86 		goto release;
     87 	}
     88 	switch (req) {
     89 
     90 	case PRU_ATTACH:
     91 		if (unp) {
     92 			error = EISCONN;
     93 			break;
     94 		}
     95 		error = unp_attach(so);
     96 		break;
     97 
     98 	case PRU_DETACH:
     99 		unp_detach(unp);
    100 		break;
    101 
    102 	case PRU_BIND:
    103 		error = unp_bind(unp, nam, p);
    104 		break;
    105 
    106 	case PRU_LISTEN:
    107 		if (unp->unp_vnode == 0)
    108 			error = EINVAL;
    109 		break;
    110 
    111 	case PRU_CONNECT:
    112 		error = unp_connect(so, nam, p);
    113 		break;
    114 
    115 	case PRU_CONNECT2:
    116 		error = unp_connect2(so, (struct socket *)nam);
    117 		break;
    118 
    119 	case PRU_DISCONNECT:
    120 		unp_disconnect(unp);
    121 		break;
    122 
    123 	case PRU_ACCEPT:
    124 		/*
    125 		 * Pass back name of connected socket,
    126 		 * if it was bound and we are still connected
    127 		 * (our peer may have closed already!).
    128 		 */
    129 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
    130 			nam->m_len = unp->unp_conn->unp_addr->m_len;
    131 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
    132 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
    133 		} else {
    134 			nam->m_len = sizeof(sun_noname);
    135 			*(mtod(nam, struct sockaddr *)) = sun_noname;
    136 		}
    137 		break;
    138 
    139 	case PRU_SHUTDOWN:
    140 		socantsendmore(so);
    141 		unp_shutdown(unp);
    142 		break;
    143 
    144 	case PRU_RCVD:
    145 		switch (so->so_type) {
    146 
    147 		case SOCK_DGRAM:
    148 			panic("uipc 1");
    149 			/*NOTREACHED*/
    150 
    151 		case SOCK_STREAM:
    152 #define	rcv (&so->so_rcv)
    153 #define snd (&so2->so_snd)
    154 			if (unp->unp_conn == 0)
    155 				break;
    156 			so2 = unp->unp_conn->unp_socket;
    157 			/*
    158 			 * Adjust backpressure on sender
    159 			 * and wakeup any waiting to write.
    160 			 */
    161 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
    162 			unp->unp_mbcnt = rcv->sb_mbcnt;
    163 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
    164 			unp->unp_cc = rcv->sb_cc;
    165 			sowwakeup(so2);
    166 #undef snd
    167 #undef rcv
    168 			break;
    169 
    170 		default:
    171 			panic("uipc 2");
    172 		}
    173 		break;
    174 
    175 	case PRU_SEND:
    176 		if (control && (error = unp_internalize(control, p)))
    177 			break;
    178 		switch (so->so_type) {
    179 
    180 		case SOCK_DGRAM: {
    181 			struct sockaddr *from;
    182 
    183 			if (nam) {
    184 				if (unp->unp_conn) {
    185 					error = EISCONN;
    186 					break;
    187 				}
    188 				error = unp_connect(so, nam, p);
    189 				if (error)
    190 					break;
    191 			} else {
    192 				if (unp->unp_conn == 0) {
    193 					error = ENOTCONN;
    194 					break;
    195 				}
    196 			}
    197 			so2 = unp->unp_conn->unp_socket;
    198 			if (unp->unp_addr)
    199 				from = mtod(unp->unp_addr, struct sockaddr *);
    200 			else
    201 				from = &sun_noname;
    202 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
    203 				sorwakeup(so2);
    204 				m = 0;
    205 				control = 0;
    206 			} else
    207 				error = ENOBUFS;
    208 			if (nam)
    209 				unp_disconnect(unp);
    210 			break;
    211 		}
    212 
    213 		case SOCK_STREAM:
    214 #define	rcv (&so2->so_rcv)
    215 #define	snd (&so->so_snd)
    216 			if (so->so_state & SS_CANTSENDMORE) {
    217 				error = EPIPE;
    218 				break;
    219 			}
    220 			if (unp->unp_conn == 0)
    221 				panic("uipc 3");
    222 			so2 = unp->unp_conn->unp_socket;
    223 			/*
    224 			 * Send to paired receive port, and then reduce
    225 			 * send buffer hiwater marks to maintain backpressure.
    226 			 * Wake up readers.
    227 			 */
    228 			if (control) {
    229 				if (sbappendcontrol(rcv, m, control))
    230 					control = 0;
    231 			} else
    232 				sbappend(rcv, m);
    233 			snd->sb_mbmax -=
    234 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
    235 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
    236 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
    237 			unp->unp_conn->unp_cc = rcv->sb_cc;
    238 			sorwakeup(so2);
    239 			m = 0;
    240 #undef snd
    241 #undef rcv
    242 			break;
    243 
    244 		default:
    245 			panic("uipc 4");
    246 		}
    247 		break;
    248 
    249 	case PRU_ABORT:
    250 		unp_drop(unp, ECONNABORTED);
    251 		break;
    252 
    253 	case PRU_SENSE:
    254 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
    255 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
    256 			so2 = unp->unp_conn->unp_socket;
    257 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
    258 		}
    259 		((struct stat *) m)->st_dev = NODEV;
    260 		if (unp->unp_ino == 0)
    261 			unp->unp_ino = unp_ino++;
    262 		((struct stat *) m)->st_ino = unp->unp_ino;
    263 		return (0);
    264 
    265 	case PRU_RCVOOB:
    266 		return (EOPNOTSUPP);
    267 
    268 	case PRU_SENDOOB:
    269 		error = EOPNOTSUPP;
    270 		break;
    271 
    272 	case PRU_SOCKADDR:
    273 		if (unp->unp_addr) {
    274 			nam->m_len = unp->unp_addr->m_len;
    275 			bcopy(mtod(unp->unp_addr, caddr_t),
    276 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
    277 		} else
    278 			nam->m_len = 0;
    279 		break;
    280 
    281 	case PRU_PEERADDR:
    282 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
    283 			nam->m_len = unp->unp_conn->unp_addr->m_len;
    284 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
    285 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
    286 		} else
    287 			nam->m_len = 0;
    288 		break;
    289 
    290 	case PRU_SLOWTIMO:
    291 		break;
    292 
    293 	default:
    294 		panic("piusrreq");
    295 	}
    296 release:
    297 	if (control)
    298 		m_freem(control);
    299 	if (m)
    300 		m_freem(m);
    301 	return (error);
    302 }
    303 
    304 /*
    305  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
    306  * for stream sockets, although the total for sender and receiver is
    307  * actually only PIPSIZ.
    308  * Datagram sockets really use the sendspace as the maximum datagram size,
    309  * and don't really want to reserve the sendspace.  Their recvspace should
    310  * be large enough for at least one max-size datagram plus address.
    311  */
    312 #define	PIPSIZ	4096
    313 u_long	unpst_sendspace = PIPSIZ;
    314 u_long	unpst_recvspace = PIPSIZ;
    315 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
    316 u_long	unpdg_recvspace = 4*1024;
    317 
    318 int	unp_rights;			/* file descriptors in flight */
    319 
    320 int
    321 unp_attach(so)
    322 	struct socket *so;
    323 {
    324 	register struct unpcb *unp;
    325 	int error;
    326 
    327 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    328 		switch (so->so_type) {
    329 
    330 		case SOCK_STREAM:
    331 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
    332 			break;
    333 
    334 		case SOCK_DGRAM:
    335 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
    336 			break;
    337 
    338 		default:
    339 			panic("unp_attach");
    340 		}
    341 		if (error)
    342 			return (error);
    343 	}
    344 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
    345 	if (unp == NULL)
    346 		return (ENOBUFS);
    347 	bzero((caddr_t)unp, sizeof(*unp));
    348 	unp->unp_socket = so;
    349 	so->so_pcb = unp;
    350 	return (0);
    351 }
    352 
    353 int
    354 unp_detach(unp)
    355 	register struct unpcb *unp;
    356 {
    357 
    358 	if (unp->unp_vnode) {
    359 		unp->unp_vnode->v_socket = 0;
    360 		vrele(unp->unp_vnode);
    361 		unp->unp_vnode = 0;
    362 	}
    363 	if (unp->unp_conn)
    364 		unp_disconnect(unp);
    365 	while (unp->unp_refs)
    366 		unp_drop(unp->unp_refs, ECONNRESET);
    367 	soisdisconnected(unp->unp_socket);
    368 	unp->unp_socket->so_pcb = 0;
    369 	m_freem(unp->unp_addr);
    370 	if (unp_rights) {
    371 		/*
    372 		 * Normally the receive buffer is flushed later,
    373 		 * in sofree, but if our receive buffer holds references
    374 		 * to descriptors that are now garbage, we will dispose
    375 		 * of those descriptor references after the garbage collector
    376 		 * gets them (resulting in a "panic: closef: count < 0").
    377 		 */
    378 		sorflush(unp->unp_socket);
    379 		free(unp, M_PCB);
    380 		unp_gc();
    381 	} else
    382 		free(unp, M_PCB);
    383 }
    384 
    385 int
    386 unp_bind(unp, nam, p)
    387 	struct unpcb *unp;
    388 	struct mbuf *nam;
    389 	struct proc *p;
    390 {
    391 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
    392 	register struct vnode *vp;
    393 	struct vattr vattr;
    394 	int error;
    395 	struct nameidata nd;
    396 
    397 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
    398 	    soun->sun_path, p);
    399 	if (unp->unp_vnode != NULL)
    400 		return (EINVAL);
    401 	if (nam->m_len == MLEN) {
    402 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
    403 			return (EINVAL);
    404 	} else
    405 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
    406 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
    407 	if ((error = namei(&nd)) != 0)
    408 		return (error);
    409 	vp = nd.ni_vp;
    410 	if (vp != NULL) {
    411 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
    412 		if (nd.ni_dvp == vp)
    413 			vrele(nd.ni_dvp);
    414 		else
    415 			vput(nd.ni_dvp);
    416 		vrele(vp);
    417 		return (EADDRINUSE);
    418 	}
    419 	VATTR_NULL(&vattr);
    420 	vattr.va_type = VSOCK;
    421 	vattr.va_mode = ACCESSPERMS;
    422 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
    423 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
    424 	if (error)
    425 		return (error);
    426 	vp = nd.ni_vp;
    427 	vp->v_socket = unp->unp_socket;
    428 	unp->unp_vnode = vp;
    429 	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
    430 	VOP_UNLOCK(vp);
    431 	return (0);
    432 }
    433 
    434 int
    435 unp_connect(so, nam, p)
    436 	struct socket *so;
    437 	struct mbuf *nam;
    438 	struct proc *p;
    439 {
    440 	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
    441 	register struct vnode *vp;
    442 	register struct socket *so2, *so3;
    443 	struct unpcb *unp2, *unp3;
    444 	int error;
    445 	struct nameidata nd;
    446 
    447 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
    448 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
    449 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
    450 			return (EMSGSIZE);
    451 	} else
    452 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
    453 	if ((error = namei(&nd)) != 0)
    454 		return (error);
    455 	vp = nd.ni_vp;
    456 	if (vp->v_type != VSOCK) {
    457 		error = ENOTSOCK;
    458 		goto bad;
    459 	}
    460 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
    461 		goto bad;
    462 	so2 = vp->v_socket;
    463 	if (so2 == 0) {
    464 		error = ECONNREFUSED;
    465 		goto bad;
    466 	}
    467 	if (so->so_type != so2->so_type) {
    468 		error = EPROTOTYPE;
    469 		goto bad;
    470 	}
    471 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    472 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
    473 		    (so3 = sonewconn(so2, 0)) == 0) {
    474 			error = ECONNREFUSED;
    475 			goto bad;
    476 		}
    477 		unp2 = sotounpcb(so2);
    478 		unp3 = sotounpcb(so3);
    479 		if (unp2->unp_addr)
    480 			unp3->unp_addr =
    481 				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
    482 		so2 = so3;
    483 	}
    484 	error = unp_connect2(so, so2);
    485 bad:
    486 	vput(vp);
    487 	return (error);
    488 }
    489 
    490 int
    491 unp_connect2(so, so2)
    492 	register struct socket *so;
    493 	register struct socket *so2;
    494 {
    495 	register struct unpcb *unp = sotounpcb(so);
    496 	register struct unpcb *unp2;
    497 
    498 	if (so2->so_type != so->so_type)
    499 		return (EPROTOTYPE);
    500 	unp2 = sotounpcb(so2);
    501 	unp->unp_conn = unp2;
    502 	switch (so->so_type) {
    503 
    504 	case SOCK_DGRAM:
    505 		unp->unp_nextref = unp2->unp_refs;
    506 		unp2->unp_refs = unp;
    507 		soisconnected(so);
    508 		break;
    509 
    510 	case SOCK_STREAM:
    511 		unp2->unp_conn = unp;
    512 		soisconnected(so);
    513 		soisconnected(so2);
    514 		break;
    515 
    516 	default:
    517 		panic("unp_connect2");
    518 	}
    519 	return (0);
    520 }
    521 
    522 void
    523 unp_disconnect(unp)
    524 	struct unpcb *unp;
    525 {
    526 	register struct unpcb *unp2 = unp->unp_conn;
    527 
    528 	if (unp2 == 0)
    529 		return;
    530 	unp->unp_conn = 0;
    531 	switch (unp->unp_socket->so_type) {
    532 
    533 	case SOCK_DGRAM:
    534 		if (unp2->unp_refs == unp)
    535 			unp2->unp_refs = unp->unp_nextref;
    536 		else {
    537 			unp2 = unp2->unp_refs;
    538 			for (;;) {
    539 				if (unp2 == 0)
    540 					panic("unp_disconnect");
    541 				if (unp2->unp_nextref == unp)
    542 					break;
    543 				unp2 = unp2->unp_nextref;
    544 			}
    545 			unp2->unp_nextref = unp->unp_nextref;
    546 		}
    547 		unp->unp_nextref = 0;
    548 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
    549 		break;
    550 
    551 	case SOCK_STREAM:
    552 		soisdisconnected(unp->unp_socket);
    553 		unp2->unp_conn = 0;
    554 		soisdisconnected(unp2->unp_socket);
    555 		break;
    556 	}
    557 }
    558 
    559 #ifdef notdef
    560 unp_abort(unp)
    561 	struct unpcb *unp;
    562 {
    563 
    564 	unp_detach(unp);
    565 }
    566 #endif
    567 
    568 void
    569 unp_shutdown(unp)
    570 	struct unpcb *unp;
    571 {
    572 	struct socket *so;
    573 
    574 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
    575 	    (so = unp->unp_conn->unp_socket))
    576 		socantrcvmore(so);
    577 }
    578 
    579 void
    580 unp_drop(unp, errno)
    581 	struct unpcb *unp;
    582 	int errno;
    583 {
    584 	struct socket *so = unp->unp_socket;
    585 
    586 	so->so_error = errno;
    587 	unp_disconnect(unp);
    588 	if (so->so_head) {
    589 		so->so_pcb = 0;
    590 		sofree(so);
    591 		m_freem(unp->unp_addr);
    592 		free(unp, M_PCB);
    593 	}
    594 }
    595 
    596 #ifdef notdef
    597 unp_drain()
    598 {
    599 
    600 }
    601 #endif
    602 
    603 int
    604 unp_externalize(rights)
    605 	struct mbuf *rights;
    606 {
    607 	struct proc *p = curproc;		/* XXX */
    608 	register int i;
    609 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
    610 	register struct file **rp = (struct file **)(cm + 1);
    611 	register struct file *fp;
    612 	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
    613 	int f;
    614 
    615 	if (!fdavail(p, newfds)) {
    616 		for (i = 0; i < newfds; i++) {
    617 			fp = *rp;
    618 			unp_discard(fp);
    619 			*rp++ = 0;
    620 		}
    621 		return (EMSGSIZE);
    622 	}
    623 	for (i = 0; i < newfds; i++) {
    624 		if (fdalloc(p, 0, &f))
    625 			panic("unp_externalize");
    626 		fp = *rp;
    627 		p->p_fd->fd_ofiles[f] = fp;
    628 		fp->f_msgcount--;
    629 		unp_rights--;
    630 		*(int *)rp++ = f;
    631 	}
    632 	return (0);
    633 }
    634 
    635 int
    636 unp_internalize(control, p)
    637 	struct mbuf *control;
    638 	struct proc *p;
    639 {
    640 	struct filedesc *fdp = p->p_fd;
    641 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
    642 	register struct file **rp;
    643 	register struct file *fp;
    644 	register int i, fd;
    645 	int oldfds;
    646 
    647 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
    648 	    cm->cmsg_len != control->m_len)
    649 		return (EINVAL);
    650 	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
    651 	rp = (struct file **)(cm + 1);
    652 	for (i = 0; i < oldfds; i++) {
    653 		fd = *(int *)rp++;
    654 		if ((unsigned)fd >= fdp->fd_nfiles ||
    655 		    fdp->fd_ofiles[fd] == NULL)
    656 			return (EBADF);
    657 	}
    658 	rp = (struct file **)(cm + 1);
    659 	for (i = 0; i < oldfds; i++) {
    660 		fp = fdp->fd_ofiles[*(int *)rp];
    661 		*rp++ = fp;
    662 		fp->f_count++;
    663 		fp->f_msgcount++;
    664 		unp_rights++;
    665 	}
    666 	return (0);
    667 }
    668 
    669 int	unp_defer, unp_gcing;
    670 extern	struct domain unixdomain;
    671 
    672 void
    673 unp_gc()
    674 {
    675 	register struct file *fp, *nextfp;
    676 	register struct socket *so;
    677 	struct file **extra_ref, **fpp;
    678 	int nunref, i;
    679 
    680 	if (unp_gcing)
    681 		return;
    682 	unp_gcing = 1;
    683 	unp_defer = 0;
    684 	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
    685 		fp->f_flag &= ~(FMARK|FDEFER);
    686 	do {
    687 		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
    688 			if (fp->f_count == 0)
    689 				continue;
    690 			if (fp->f_flag & FDEFER) {
    691 				fp->f_flag &= ~FDEFER;
    692 				unp_defer--;
    693 			} else {
    694 				if (fp->f_flag & FMARK)
    695 					continue;
    696 				if (fp->f_count == fp->f_msgcount)
    697 					continue;
    698 				fp->f_flag |= FMARK;
    699 			}
    700 			if (fp->f_type != DTYPE_SOCKET ||
    701 			    (so = (struct socket *)fp->f_data) == 0)
    702 				continue;
    703 			if (so->so_proto->pr_domain != &unixdomain ||
    704 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
    705 				continue;
    706 #ifdef notdef
    707 			if (so->so_rcv.sb_flags & SB_LOCK) {
    708 				/*
    709 				 * This is problematical; it's not clear
    710 				 * we need to wait for the sockbuf to be
    711 				 * unlocked (on a uniprocessor, at least),
    712 				 * and it's also not clear what to do
    713 				 * if sbwait returns an error due to receipt
    714 				 * of a signal.  If sbwait does return
    715 				 * an error, we'll go into an infinite
    716 				 * loop.  Delete all of this for now.
    717 				 */
    718 				(void) sbwait(&so->so_rcv);
    719 				goto restart;
    720 			}
    721 #endif
    722 			unp_scan(so->so_rcv.sb_mb, unp_mark);
    723 		}
    724 	} while (unp_defer);
    725 	/*
    726 	 * We grab an extra reference to each of the file table entries
    727 	 * that are not otherwise accessible and then free the rights
    728 	 * that are stored in messages on them.
    729 	 *
    730 	 * The bug in the orginal code is a little tricky, so I'll describe
    731 	 * what's wrong with it here.
    732 	 *
    733 	 * It is incorrect to simply unp_discard each entry for f_msgcount
    734 	 * times -- consider the case of sockets A and B that contain
    735 	 * references to each other.  On a last close of some other socket,
    736 	 * we trigger a gc since the number of outstanding rights (unp_rights)
    737 	 * is non-zero.  If during the sweep phase the gc code un_discards,
    738 	 * we end up doing a (full) closef on the descriptor.  A closef on A
    739 	 * results in the following chain.  Closef calls soo_close, which
    740 	 * calls soclose.   Soclose calls first (through the switch
    741 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
    742 	 * returns because the previous instance had set unp_gcing, and
    743 	 * we return all the way back to soclose, which marks the socket
    744 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
    745 	 * to free up the rights that are queued in messages on the socket A,
    746 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
    747 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
    748 	 * instance of unp_discard just calls closef on B.
    749 	 *
    750 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
    751 	 * which results in another closef on A.  Unfortunately, A is already
    752 	 * being closed, and the descriptor has already been marked with
    753 	 * SS_NOFDREF, and soclose panics at this point.
    754 	 *
    755 	 * Here, we first take an extra reference to each inaccessible
    756 	 * descriptor.  Then, we call sorflush ourself, since we know
    757 	 * it is a Unix domain socket anyhow.  After we destroy all the
    758 	 * rights carried in messages, we do a last closef to get rid
    759 	 * of our extra reference.  This is the last close, and the
    760 	 * unp_detach etc will shut down the socket.
    761 	 *
    762 	 * 91/09/19, bsy (at) cs.cmu.edu
    763 	 */
    764 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
    765 	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
    766 	    fp = nextfp) {
    767 		nextfp = fp->f_list.le_next;
    768 		if (fp->f_count == 0)
    769 			continue;
    770 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
    771 			*fpp++ = fp;
    772 			nunref++;
    773 			fp->f_count++;
    774 		}
    775 	}
    776 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    777 		sorflush((struct socket *)(*fpp)->f_data);
    778 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    779 		(void) closef(*fpp, (struct proc *)0);
    780 	free((caddr_t)extra_ref, M_FILE);
    781 	unp_gcing = 0;
    782 }
    783 
    784 void
    785 unp_dispose(m)
    786 	struct mbuf *m;
    787 {
    788 
    789 	if (m)
    790 		unp_scan(m, unp_discard);
    791 }
    792 
    793 void
    794 unp_scan(m0, op)
    795 	register struct mbuf *m0;
    796 	void (*op) __P((struct file *));
    797 {
    798 	register struct mbuf *m;
    799 	register struct file **rp;
    800 	register struct cmsghdr *cm;
    801 	register int i;
    802 	int qfds;
    803 
    804 	while (m0) {
    805 		for (m = m0; m; m = m->m_next)
    806 			if (m->m_type == MT_CONTROL &&
    807 			    m->m_len >= sizeof(*cm)) {
    808 				cm = mtod(m, struct cmsghdr *);
    809 				if (cm->cmsg_level != SOL_SOCKET ||
    810 				    cm->cmsg_type != SCM_RIGHTS)
    811 					continue;
    812 				qfds = (cm->cmsg_len - sizeof *cm)
    813 						/ sizeof (struct file *);
    814 				rp = (struct file **)(cm + 1);
    815 				for (i = 0; i < qfds; i++)
    816 					(*op)(*rp++);
    817 				break;		/* XXX, but saves time */
    818 			}
    819 		m0 = m0->m_act;
    820 	}
    821 }
    822 
    823 void
    824 unp_mark(fp)
    825 	struct file *fp;
    826 {
    827 
    828 	if (fp->f_flag & FMARK)
    829 		return;
    830 	unp_defer++;
    831 	fp->f_flag |= (FMARK|FDEFER);
    832 }
    833 
    834 void
    835 unp_discard(fp)
    836 	struct file *fp;
    837 {
    838 
    839 	fp->f_msgcount--;
    840 	unp_rights--;
    841 	(void) closef(fp, (struct proc *)0);
    842 }
    843