Home | History | Annotate | Line # | Download | only in kern
uipc_usrreq.c revision 1.27
      1 /*	$NetBSD: uipc_usrreq.c,v 1.27 1997/06/26 06:06:40 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
      5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
     37  */
     38 
     39 #include <sys/param.h>
     40 #include <sys/systm.h>
     41 #include <sys/proc.h>
     42 #include <sys/filedesc.h>
     43 #include <sys/domain.h>
     44 #include <sys/protosw.h>
     45 #include <sys/socket.h>
     46 #include <sys/socketvar.h>
     47 #include <sys/unpcb.h>
     48 #include <sys/un.h>
     49 #include <sys/namei.h>
     50 #include <sys/vnode.h>
     51 #include <sys/file.h>
     52 #include <sys/stat.h>
     53 #include <sys/mbuf.h>
     54 
     55 /*
     56  * Unix communications domain.
     57  *
     58  * TODO:
     59  *	SEQPACKET, RDM
     60  *	rethink name space problems
     61  *	need a proper out-of-band
     62  */
     63 struct	sockaddr_un sun_noname = { sizeof(sun_noname), AF_UNIX };
     64 ino_t	unp_ino;			/* prototype for fake inode numbers */
     65 
     66 int
     67 unp_output(m, control, unp)
     68 	struct mbuf *m, *control;
     69 	struct unpcb *unp;
     70 {
     71 	struct socket *so2;
     72 	struct sockaddr_un *sun;
     73 
     74 	so2 = unp->unp_conn->unp_socket;
     75 	if (unp->unp_addr)
     76 		sun = unp->unp_addr;
     77 	else
     78 		sun = &sun_noname;
     79 	if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
     80 	    control) == 0) {
     81 		m_freem(control);
     82 		m_freem(m);
     83 		return (EINVAL);
     84 	} else {
     85 		sorwakeup(so2);
     86 		return (0);
     87 	}
     88 }
     89 
     90 void
     91 unp_setsockaddr(unp, nam)
     92 	register struct unpcb *unp;
     93 	struct mbuf *nam;
     94 {
     95 	struct sockaddr_un *sun;
     96 
     97 	if (unp->unp_addr)
     98 		sun = unp->unp_addr;
     99 	else
    100 		sun = &sun_noname;
    101 	nam->m_len = sun->sun_len;
    102 	if (nam->m_len > MLEN)
    103 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
    104 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
    105 }
    106 
    107 void
    108 unp_setpeeraddr(unp, nam)
    109 	register struct unpcb *unp;
    110 	struct mbuf *nam;
    111 {
    112 	struct sockaddr_un *sun;
    113 
    114 	if (unp->unp_conn && unp->unp_conn->unp_addr)
    115 		sun = unp->unp_conn->unp_addr;
    116 	else
    117 		sun = &sun_noname;
    118 	nam->m_len = sun->sun_len;
    119 	if (nam->m_len > MLEN)
    120 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
    121 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
    122 }
    123 
    124 /*ARGSUSED*/
    125 int
    126 uipc_usrreq(so, req, m, nam, control, p)
    127 	struct socket *so;
    128 	int req;
    129 	struct mbuf *m, *nam, *control;
    130 	struct proc *p;
    131 {
    132 	struct unpcb *unp = sotounpcb(so);
    133 	register struct socket *so2;
    134 	register int error = 0;
    135 
    136 	if (req == PRU_CONTROL)
    137 		return (EOPNOTSUPP);
    138 
    139 #ifdef DIAGNOSTIC
    140 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
    141 		panic("uipc_usrreq: unexpected control mbuf");
    142 #endif
    143 	if (unp == 0 && req != PRU_ATTACH) {
    144 		error = EINVAL;
    145 		goto release;
    146 	}
    147 
    148 	switch (req) {
    149 
    150 	case PRU_ATTACH:
    151 		if (unp != 0) {
    152 			error = EISCONN;
    153 			break;
    154 		}
    155 		error = unp_attach(so);
    156 		break;
    157 
    158 	case PRU_DETACH:
    159 		unp_detach(unp);
    160 		break;
    161 
    162 	case PRU_BIND:
    163 		error = unp_bind(unp, nam, p);
    164 		break;
    165 
    166 	case PRU_LISTEN:
    167 		if (unp->unp_vnode == 0)
    168 			error = EINVAL;
    169 		break;
    170 
    171 	case PRU_CONNECT:
    172 		error = unp_connect(so, nam, p);
    173 		break;
    174 
    175 	case PRU_CONNECT2:
    176 		error = unp_connect2(so, (struct socket *)nam);
    177 		break;
    178 
    179 	case PRU_DISCONNECT:
    180 		unp_disconnect(unp);
    181 		break;
    182 
    183 	case PRU_ACCEPT:
    184 		unp_setpeeraddr(unp, nam);
    185 		break;
    186 
    187 	case PRU_SHUTDOWN:
    188 		socantsendmore(so);
    189 		unp_shutdown(unp);
    190 		break;
    191 
    192 	case PRU_RCVD:
    193 		switch (so->so_type) {
    194 
    195 		case SOCK_DGRAM:
    196 			panic("uipc 1");
    197 			/*NOTREACHED*/
    198 
    199 		case SOCK_STREAM:
    200 #define	rcv (&so->so_rcv)
    201 #define snd (&so2->so_snd)
    202 			if (unp->unp_conn == 0)
    203 				break;
    204 			so2 = unp->unp_conn->unp_socket;
    205 			/*
    206 			 * Adjust backpressure on sender
    207 			 * and wakeup any waiting to write.
    208 			 */
    209 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
    210 			unp->unp_mbcnt = rcv->sb_mbcnt;
    211 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
    212 			unp->unp_cc = rcv->sb_cc;
    213 			sowwakeup(so2);
    214 #undef snd
    215 #undef rcv
    216 			break;
    217 
    218 		default:
    219 			panic("uipc 2");
    220 		}
    221 		break;
    222 
    223 	case PRU_SEND:
    224 		if (control && (error = unp_internalize(control, p)))
    225 			break;
    226 		switch (so->so_type) {
    227 
    228 		case SOCK_DGRAM: {
    229 			if (nam) {
    230 				if ((so->so_state & SS_ISCONNECTED) != 0) {
    231 					error = EISCONN;
    232 					goto die;
    233 				}
    234 				error = unp_connect(so, nam, p);
    235 				if (error) {
    236 				die:
    237 					m_freem(control);
    238 					m_freem(m);
    239 					break;
    240 				}
    241 			} else {
    242 				if ((so->so_state & SS_ISCONNECTED) == 0) {
    243 					error = ENOTCONN;
    244 					goto die;
    245 				}
    246 			}
    247 			error = unp_output(m, control, unp);
    248 			if (nam)
    249 				unp_disconnect(unp);
    250 			break;
    251 		}
    252 
    253 		case SOCK_STREAM:
    254 #define	rcv (&so2->so_rcv)
    255 #define	snd (&so->so_snd)
    256 			if (unp->unp_conn == 0)
    257 				panic("uipc 3");
    258 			so2 = unp->unp_conn->unp_socket;
    259 			/*
    260 			 * Send to paired receive port, and then reduce
    261 			 * send buffer hiwater marks to maintain backpressure.
    262 			 * Wake up readers.
    263 			 */
    264 			if (control) {
    265 				if (sbappendcontrol(rcv, m, control) == 0)
    266 					m_freem(control);
    267 			} else
    268 				sbappend(rcv, m);
    269 			snd->sb_mbmax -=
    270 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
    271 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
    272 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
    273 			unp->unp_conn->unp_cc = rcv->sb_cc;
    274 			sorwakeup(so2);
    275 #undef snd
    276 #undef rcv
    277 			break;
    278 
    279 		default:
    280 			panic("uipc 4");
    281 		}
    282 		break;
    283 
    284 	case PRU_ABORT:
    285 		unp_drop(unp, ECONNABORTED);
    286 		break;
    287 
    288 	case PRU_SENSE:
    289 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
    290 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
    291 			so2 = unp->unp_conn->unp_socket;
    292 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
    293 		}
    294 		((struct stat *) m)->st_dev = NODEV;
    295 		if (unp->unp_ino == 0)
    296 			unp->unp_ino = unp_ino++;
    297 		((struct stat *) m)->st_atimespec =
    298 		    ((struct stat *) m)->st_mtimespec =
    299 		    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
    300 		((struct stat *) m)->st_ino = unp->unp_ino;
    301 		return (0);
    302 
    303 	case PRU_RCVOOB:
    304 		error = EOPNOTSUPP;
    305 		break;
    306 
    307 	case PRU_SENDOOB:
    308 		m_freem(control);
    309 		m_freem(m);
    310 		error = EOPNOTSUPP;
    311 		break;
    312 
    313 	case PRU_SOCKADDR:
    314 		unp_setsockaddr(unp, nam);
    315 		break;
    316 
    317 	case PRU_PEERADDR:
    318 		unp_setpeeraddr(unp, nam);
    319 		break;
    320 
    321 	default:
    322 		panic("piusrreq");
    323 	}
    324 
    325 release:
    326 	return (error);
    327 }
    328 
    329 /*
    330  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
    331  * for stream sockets, although the total for sender and receiver is
    332  * actually only PIPSIZ.
    333  * Datagram sockets really use the sendspace as the maximum datagram size,
    334  * and don't really want to reserve the sendspace.  Their recvspace should
    335  * be large enough for at least one max-size datagram plus address.
    336  */
    337 #define	PIPSIZ	4096
    338 u_long	unpst_sendspace = PIPSIZ;
    339 u_long	unpst_recvspace = PIPSIZ;
    340 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
    341 u_long	unpdg_recvspace = 4*1024;
    342 
    343 int	unp_rights;			/* file descriptors in flight */
    344 
    345 int
    346 unp_attach(so)
    347 	struct socket *so;
    348 {
    349 	register struct unpcb *unp;
    350 	struct timeval tv;
    351 	int error;
    352 
    353 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    354 		switch (so->so_type) {
    355 
    356 		case SOCK_STREAM:
    357 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
    358 			break;
    359 
    360 		case SOCK_DGRAM:
    361 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
    362 			break;
    363 
    364 		default:
    365 			panic("unp_attach");
    366 		}
    367 		if (error)
    368 			return (error);
    369 	}
    370 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
    371 	if (unp == NULL)
    372 		return (ENOBUFS);
    373 	bzero((caddr_t)unp, sizeof(*unp));
    374 	unp->unp_socket = so;
    375 	so->so_pcb = unp;
    376 	microtime(&tv);
    377 	TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
    378 	return (0);
    379 }
    380 
    381 void
    382 unp_detach(unp)
    383 	register struct unpcb *unp;
    384 {
    385 
    386 	if (unp->unp_vnode) {
    387 		unp->unp_vnode->v_socket = 0;
    388 		vrele(unp->unp_vnode);
    389 		unp->unp_vnode = 0;
    390 	}
    391 	if (unp->unp_conn)
    392 		unp_disconnect(unp);
    393 	while (unp->unp_refs)
    394 		unp_drop(unp->unp_refs, ECONNRESET);
    395 	soisdisconnected(unp->unp_socket);
    396 	unp->unp_socket->so_pcb = 0;
    397 	if (unp->unp_addr)
    398 		free(unp->unp_addr, M_SONAME);
    399 	if (unp_rights) {
    400 		/*
    401 		 * Normally the receive buffer is flushed later,
    402 		 * in sofree, but if our receive buffer holds references
    403 		 * to descriptors that are now garbage, we will dispose
    404 		 * of those descriptor references after the garbage collector
    405 		 * gets them (resulting in a "panic: closef: count < 0").
    406 		 */
    407 		sorflush(unp->unp_socket);
    408 		free(unp, M_PCB);
    409 		unp_gc();
    410 	} else
    411 		free(unp, M_PCB);
    412 }
    413 
    414 int
    415 unp_bind(unp, nam, p)
    416 	struct unpcb *unp;
    417 	struct mbuf *nam;
    418 	struct proc *p;
    419 {
    420 	struct sockaddr_un *sun;
    421 	register struct vnode *vp;
    422 	struct vattr vattr;
    423 	size_t addrlen;
    424 	int error;
    425 	struct nameidata nd;
    426 
    427 	if (unp->unp_vnode != 0)
    428 		return (EINVAL);
    429 
    430 	/*
    431 	 * Allocate the new sockaddr.  We have to allocate one
    432 	 * extra byte so that we can ensure that the pathname
    433 	 * is nul-terminated.
    434 	 */
    435 	addrlen = nam->m_len + 1;
    436 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
    437 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
    438 	*(((char *)sun) + nam->m_len) = '\0';
    439 
    440 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
    441 	    sun->sun_path, p);
    442 
    443 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
    444 	if ((error = namei(&nd)) != 0)
    445 		goto bad;
    446 	vp = nd.ni_vp;
    447 	if (vp != NULL) {
    448 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
    449 		if (nd.ni_dvp == vp)
    450 			vrele(nd.ni_dvp);
    451 		else
    452 			vput(nd.ni_dvp);
    453 		vrele(vp);
    454 		error = EADDRINUSE;
    455 		goto bad;
    456 	}
    457 	VATTR_NULL(&vattr);
    458 	vattr.va_type = VSOCK;
    459 	vattr.va_mode = ACCESSPERMS;
    460 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
    461 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
    462 	if (error)
    463 		goto bad;
    464 	vp = nd.ni_vp;
    465 	vp->v_socket = unp->unp_socket;
    466 	unp->unp_vnode = vp;
    467 	unp->unp_addrlen = addrlen;
    468 	unp->unp_addr = sun;
    469 	VOP_UNLOCK(vp);
    470 	return (0);
    471 
    472  bad:
    473 	free(sun, M_SONAME);
    474 	return (error);
    475 }
    476 
    477 int
    478 unp_connect(so, nam, p)
    479 	struct socket *so;
    480 	struct mbuf *nam;
    481 	struct proc *p;
    482 {
    483 	register struct sockaddr_un *sun;
    484 	register struct vnode *vp;
    485 	register struct socket *so2, *so3;
    486 	struct unpcb *unp2, *unp3;
    487 	size_t addrlen;
    488 	int error;
    489 	struct nameidata nd;
    490 
    491 	/*
    492 	 * Allocate a temporary sockaddr.  We have to allocate one extra
    493 	 * byte so that we can ensure that the pathname is nul-terminated.
    494 	 * When we establish the connection, we copy the other PCB's
    495 	 * sockaddr to our own.
    496 	 */
    497 	addrlen = nam->m_len + 1;
    498 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
    499 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
    500 	*(((char *)sun) + nam->m_len) = '\0';
    501 
    502 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
    503 
    504 	if ((error = namei(&nd)) != 0)
    505 		goto bad2;
    506 	vp = nd.ni_vp;
    507 	if (vp->v_type != VSOCK) {
    508 		error = ENOTSOCK;
    509 		goto bad;
    510 	}
    511 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
    512 		goto bad;
    513 	so2 = vp->v_socket;
    514 	if (so2 == 0) {
    515 		error = ECONNREFUSED;
    516 		goto bad;
    517 	}
    518 	if (so->so_type != so2->so_type) {
    519 		error = EPROTOTYPE;
    520 		goto bad;
    521 	}
    522 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    523 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
    524 		    (so3 = sonewconn(so2, 0)) == 0) {
    525 			error = ECONNREFUSED;
    526 			goto bad;
    527 		}
    528 		unp2 = sotounpcb(so2);
    529 		unp3 = sotounpcb(so3);
    530 		if (unp2->unp_addr) {
    531 			unp3->unp_addr = malloc(unp2->unp_addrlen,
    532 			    M_SONAME, M_WAITOK);
    533 			bcopy(unp2->unp_addr, unp3->unp_addr,
    534 			    unp2->unp_addrlen);
    535 			unp3->unp_addrlen = unp2->unp_addrlen;
    536 		}
    537 		so2 = so3;
    538 	}
    539 	error = unp_connect2(so, so2);
    540  bad:
    541 	vput(vp);
    542  bad2:
    543 	free(sun, M_SONAME);
    544 	return (error);
    545 }
    546 
    547 int
    548 unp_connect2(so, so2)
    549 	register struct socket *so;
    550 	register struct socket *so2;
    551 {
    552 	register struct unpcb *unp = sotounpcb(so);
    553 	register struct unpcb *unp2;
    554 
    555 	if (so2->so_type != so->so_type)
    556 		return (EPROTOTYPE);
    557 	unp2 = sotounpcb(so2);
    558 	unp->unp_conn = unp2;
    559 	switch (so->so_type) {
    560 
    561 	case SOCK_DGRAM:
    562 		unp->unp_nextref = unp2->unp_refs;
    563 		unp2->unp_refs = unp;
    564 		soisconnected(so);
    565 		break;
    566 
    567 	case SOCK_STREAM:
    568 		unp2->unp_conn = unp;
    569 		soisconnected(so);
    570 		soisconnected(so2);
    571 		break;
    572 
    573 	default:
    574 		panic("unp_connect2");
    575 	}
    576 	return (0);
    577 }
    578 
    579 void
    580 unp_disconnect(unp)
    581 	struct unpcb *unp;
    582 {
    583 	register struct unpcb *unp2 = unp->unp_conn;
    584 
    585 	if (unp2 == 0)
    586 		return;
    587 	unp->unp_conn = 0;
    588 	switch (unp->unp_socket->so_type) {
    589 
    590 	case SOCK_DGRAM:
    591 		if (unp2->unp_refs == unp)
    592 			unp2->unp_refs = unp->unp_nextref;
    593 		else {
    594 			unp2 = unp2->unp_refs;
    595 			for (;;) {
    596 				if (unp2 == 0)
    597 					panic("unp_disconnect");
    598 				if (unp2->unp_nextref == unp)
    599 					break;
    600 				unp2 = unp2->unp_nextref;
    601 			}
    602 			unp2->unp_nextref = unp->unp_nextref;
    603 		}
    604 		unp->unp_nextref = 0;
    605 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
    606 		break;
    607 
    608 	case SOCK_STREAM:
    609 		soisdisconnected(unp->unp_socket);
    610 		unp2->unp_conn = 0;
    611 		soisdisconnected(unp2->unp_socket);
    612 		break;
    613 	}
    614 }
    615 
    616 #ifdef notdef
    617 unp_abort(unp)
    618 	struct unpcb *unp;
    619 {
    620 
    621 	unp_detach(unp);
    622 }
    623 #endif
    624 
    625 void
    626 unp_shutdown(unp)
    627 	struct unpcb *unp;
    628 {
    629 	struct socket *so;
    630 
    631 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
    632 	    (so = unp->unp_conn->unp_socket))
    633 		socantrcvmore(so);
    634 }
    635 
    636 void
    637 unp_drop(unp, errno)
    638 	struct unpcb *unp;
    639 	int errno;
    640 {
    641 	struct socket *so = unp->unp_socket;
    642 
    643 	so->so_error = errno;
    644 	unp_disconnect(unp);
    645 	if (so->so_head) {
    646 		so->so_pcb = 0;
    647 		sofree(so);
    648 		if (unp->unp_addr)
    649 			free(unp->unp_addr, M_SONAME);
    650 		free(unp, M_PCB);
    651 	}
    652 }
    653 
    654 #ifdef notdef
    655 unp_drain()
    656 {
    657 
    658 }
    659 #endif
    660 
    661 int
    662 unp_externalize(rights)
    663 	struct mbuf *rights;
    664 {
    665 	struct proc *p = curproc;		/* XXX */
    666 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
    667 	register int i, *fdp = (int *)(cm + 1);
    668 	register struct file **rp = (struct file **)ALIGN(cm + 1);
    669 	register struct file *fp;
    670 	int nfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) / sizeof (struct file *);
    671 	int f;
    672 
    673 	/* Make sure that the recipient has space */
    674 	if (!fdavail(p, nfds)) {
    675 		for (i = 0; i < nfds; i++) {
    676 			fp = *rp;
    677 			unp_discard(fp);
    678 			*rp++ = 0;
    679 		}
    680 		return (EMSGSIZE);
    681 	}
    682 
    683 	/*
    684 	 * Add file to the recipient's open file table, converting them
    685 	 * to integer file descriptors as we go.  Done in forward order
    686 	 * because an integer will always come in the same place or before
    687 	 * its corresponding struct file pointer.
    688 	 */
    689 	for (i = 0; i < nfds; i++) {
    690 		if (fdalloc(p, 0, &f))
    691 			panic("unp_externalize");
    692 		fp = *rp;
    693 		p->p_fd->fd_ofiles[f] = fp;
    694 		fp->f_msgcount--;
    695 		unp_rights--;
    696 		*fdp++ = f;
    697 	}
    698 
    699 	/*
    700 	 * Adjust length, in case of transition from large struct file
    701 	 * pointers to ints.
    702 	 */
    703 	cm->cmsg_len = sizeof(*cm) + (nfds * sizeof(int));
    704 	rights->m_len = cm->cmsg_len;
    705 	return (0);
    706 }
    707 
    708 int
    709 unp_internalize(control, p)
    710 	struct mbuf *control;
    711 	struct proc *p;
    712 {
    713 	struct filedesc *fdescp = p->p_fd;
    714 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
    715 	register struct file **rp;
    716 	register struct file *fp;
    717 	register int i, fd, *fdp;
    718 	int nfds;
    719 	u_int neededspace;
    720 
    721 	/* Sanity check the control message header */
    722 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
    723 	    cm->cmsg_len != control->m_len)
    724 		return (EINVAL);
    725 
    726 	/* Verify that the file descriptors are valid */
    727 	nfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
    728 	fdp = (int *)(cm + 1);
    729 	for (i = 0; i < nfds; i++) {
    730 		fd = *fdp++;
    731 		if ((unsigned)fd >= fdescp->fd_nfiles ||
    732 		    fdescp->fd_ofiles[fd] == NULL)
    733 			return (EBADF);
    734 	}
    735 
    736 	/* Make sure we have room for the struct file pointers */
    737 morespace:
    738 	neededspace = (ALIGN(sizeof (*cm)) + nfds * sizeof (struct file *)) -
    739 		control->m_len;
    740 	if (neededspace > M_TRAILINGSPACE(control)) {
    741 
    742 		/* if we already have a cluster, the message is just too big */
    743 		if (control->m_flags & M_EXT)
    744 			return (E2BIG);
    745 
    746 		/* allocate a cluster and try again */
    747 		MCLGET(control, M_WAIT);
    748 		if ((control->m_flags & M_EXT) == 0)
    749 			return (ENOBUFS);	/* allocation failed */
    750 
    751 		/* copy the data to the cluster */
    752 		bcopy(cm, mtod(control, char *), cm->cmsg_len);
    753 		cm = mtod(control, struct cmsghdr *);
    754 		goto morespace;
    755 	}
    756 
    757 	/* adjust message & mbuf to note amount of space actually used. */
    758 	cm->cmsg_len += neededspace;
    759 	control->m_len = cm->cmsg_len;
    760 
    761 	/*
    762 	 * Transform the file descriptors into struct file pointers, in
    763 	 * reverse order so that if pointers are bigger than ints, the
    764 	 * int won't get until we're done.
    765 	 */
    766 	fdp = ((int *)(cm + 1)) + nfds - 1;
    767 	rp = ((struct file **)ALIGN(cm + 1)) + nfds - 1;
    768 	for (i = 0; i < nfds; i++) {
    769 		fp = fdescp->fd_ofiles[*fdp];
    770 		*rp-- = fp;
    771 		fp->f_count++;
    772 		fp->f_msgcount++;
    773 		unp_rights++;
    774 	}
    775 	return (0);
    776 }
    777 
    778 int	unp_defer, unp_gcing;
    779 extern	struct domain unixdomain;
    780 
    781 void
    782 unp_gc()
    783 {
    784 	register struct file *fp, *nextfp;
    785 	register struct socket *so;
    786 	struct file **extra_ref, **fpp;
    787 	int nunref, i;
    788 
    789 	if (unp_gcing)
    790 		return;
    791 	unp_gcing = 1;
    792 	unp_defer = 0;
    793 	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
    794 		fp->f_flag &= ~(FMARK|FDEFER);
    795 	do {
    796 		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
    797 			if (fp->f_count == 0)
    798 				continue;
    799 			if (fp->f_flag & FDEFER) {
    800 				fp->f_flag &= ~FDEFER;
    801 				unp_defer--;
    802 			} else {
    803 				if (fp->f_flag & FMARK)
    804 					continue;
    805 				if (fp->f_count == fp->f_msgcount)
    806 					continue;
    807 				fp->f_flag |= FMARK;
    808 			}
    809 			if (fp->f_type != DTYPE_SOCKET ||
    810 			    (so = (struct socket *)fp->f_data) == 0)
    811 				continue;
    812 			if (so->so_proto->pr_domain != &unixdomain ||
    813 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
    814 				continue;
    815 #ifdef notdef
    816 			if (so->so_rcv.sb_flags & SB_LOCK) {
    817 				/*
    818 				 * This is problematical; it's not clear
    819 				 * we need to wait for the sockbuf to be
    820 				 * unlocked (on a uniprocessor, at least),
    821 				 * and it's also not clear what to do
    822 				 * if sbwait returns an error due to receipt
    823 				 * of a signal.  If sbwait does return
    824 				 * an error, we'll go into an infinite
    825 				 * loop.  Delete all of this for now.
    826 				 */
    827 				(void) sbwait(&so->so_rcv);
    828 				goto restart;
    829 			}
    830 #endif
    831 			unp_scan(so->so_rcv.sb_mb, unp_mark);
    832 		}
    833 	} while (unp_defer);
    834 	/*
    835 	 * We grab an extra reference to each of the file table entries
    836 	 * that are not otherwise accessible and then free the rights
    837 	 * that are stored in messages on them.
    838 	 *
    839 	 * The bug in the orginal code is a little tricky, so I'll describe
    840 	 * what's wrong with it here.
    841 	 *
    842 	 * It is incorrect to simply unp_discard each entry for f_msgcount
    843 	 * times -- consider the case of sockets A and B that contain
    844 	 * references to each other.  On a last close of some other socket,
    845 	 * we trigger a gc since the number of outstanding rights (unp_rights)
    846 	 * is non-zero.  If during the sweep phase the gc code un_discards,
    847 	 * we end up doing a (full) closef on the descriptor.  A closef on A
    848 	 * results in the following chain.  Closef calls soo_close, which
    849 	 * calls soclose.   Soclose calls first (through the switch
    850 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
    851 	 * returns because the previous instance had set unp_gcing, and
    852 	 * we return all the way back to soclose, which marks the socket
    853 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
    854 	 * to free up the rights that are queued in messages on the socket A,
    855 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
    856 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
    857 	 * instance of unp_discard just calls closef on B.
    858 	 *
    859 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
    860 	 * which results in another closef on A.  Unfortunately, A is already
    861 	 * being closed, and the descriptor has already been marked with
    862 	 * SS_NOFDREF, and soclose panics at this point.
    863 	 *
    864 	 * Here, we first take an extra reference to each inaccessible
    865 	 * descriptor.  Then, we call sorflush ourself, since we know
    866 	 * it is a Unix domain socket anyhow.  After we destroy all the
    867 	 * rights carried in messages, we do a last closef to get rid
    868 	 * of our extra reference.  This is the last close, and the
    869 	 * unp_detach etc will shut down the socket.
    870 	 *
    871 	 * 91/09/19, bsy (at) cs.cmu.edu
    872 	 */
    873 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
    874 	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
    875 	    fp = nextfp) {
    876 		nextfp = fp->f_list.le_next;
    877 		if (fp->f_count == 0)
    878 			continue;
    879 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
    880 			*fpp++ = fp;
    881 			nunref++;
    882 			fp->f_count++;
    883 		}
    884 	}
    885 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    886 		sorflush((struct socket *)(*fpp)->f_data);
    887 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
    888 		(void) closef(*fpp, (struct proc *)0);
    889 	free((caddr_t)extra_ref, M_FILE);
    890 	unp_gcing = 0;
    891 }
    892 
    893 void
    894 unp_dispose(m)
    895 	struct mbuf *m;
    896 {
    897 
    898 	if (m)
    899 		unp_scan(m, unp_discard);
    900 }
    901 
    902 void
    903 unp_scan(m0, op)
    904 	register struct mbuf *m0;
    905 	void (*op) __P((struct file *));
    906 {
    907 	register struct mbuf *m;
    908 	register struct file **rp;
    909 	register struct cmsghdr *cm;
    910 	register int i;
    911 	int qfds;
    912 
    913 	while (m0) {
    914 		for (m = m0; m; m = m->m_next)
    915 			if (m->m_type == MT_CONTROL &&
    916 			    m->m_len >= sizeof(*cm)) {
    917 				cm = mtod(m, struct cmsghdr *);
    918 				if (cm->cmsg_level != SOL_SOCKET ||
    919 				    cm->cmsg_type != SCM_RIGHTS)
    920 					continue;
    921 				qfds = (cm->cmsg_len - sizeof *cm)
    922 						/ sizeof (struct file *);
    923 				rp = (struct file **)(cm + 1);
    924 				for (i = 0; i < qfds; i++)
    925 					(*op)(*rp++);
    926 				break;		/* XXX, but saves time */
    927 			}
    928 		m0 = m0->m_act;
    929 	}
    930 }
    931 
    932 void
    933 unp_mark(fp)
    934 	struct file *fp;
    935 {
    936 
    937 	if (fp->f_flag & FMARK)
    938 		return;
    939 	unp_defer++;
    940 	fp->f_flag |= (FMARK|FDEFER);
    941 }
    942 
    943 void
    944 unp_discard(fp)
    945 	struct file *fp;
    946 {
    947 
    948 	fp->f_msgcount--;
    949 	unp_rights--;
    950 	(void) closef(fp, (struct proc *)0);
    951 }
    952