Home | History | Annotate | Line # | Download | only in kern
uipc_usrreq.c revision 1.33
      1 /*	$NetBSD: uipc_usrreq.c,v 1.33 1998/07/16 00:46:50 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the NetBSD
     22  *	Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 /*
     41  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
     42  * Copyright (c) 1982, 1986, 1989, 1991, 1993
     43  *	The Regents of the University of California.  All rights reserved.
     44  *
     45  * Redistribution and use in source and binary forms, with or without
     46  * modification, are permitted provided that the following conditions
     47  * are met:
     48  * 1. Redistributions of source code must retain the above copyright
     49  *    notice, this list of conditions and the following disclaimer.
     50  * 2. Redistributions in binary form must reproduce the above copyright
     51  *    notice, this list of conditions and the following disclaimer in the
     52  *    documentation and/or other materials provided with the distribution.
     53  * 3. All advertising materials mentioning features or use of this software
     54  *    must display the following acknowledgement:
     55  *	This product includes software developed by the University of
     56  *	California, Berkeley and its contributors.
     57  * 4. Neither the name of the University nor the names of its contributors
     58  *    may be used to endorse or promote products derived from this software
     59  *    without specific prior written permission.
     60  *
     61  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     62  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     63  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     64  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     65  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     66  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     67  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     68  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     69  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     70  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     71  * SUCH DAMAGE.
     72  *
     73  *	@(#)uipc_usrreq.c	8.9 (Berkeley) 5/14/95
     74  */
     75 
     76 #include <sys/param.h>
     77 #include <sys/systm.h>
     78 #include <sys/proc.h>
     79 #include <sys/filedesc.h>
     80 #include <sys/domain.h>
     81 #include <sys/protosw.h>
     82 #include <sys/socket.h>
     83 #include <sys/socketvar.h>
     84 #include <sys/unpcb.h>
     85 #include <sys/un.h>
     86 #include <sys/namei.h>
     87 #include <sys/vnode.h>
     88 #include <sys/file.h>
     89 #include <sys/stat.h>
     90 #include <sys/mbuf.h>
     91 
     92 /*
     93  * Unix communications domain.
     94  *
     95  * TODO:
     96  *	SEQPACKET, RDM
     97  *	rethink name space problems
     98  *	need a proper out-of-band
     99  */
    100 struct	sockaddr_un sun_noname = { sizeof(sun_noname), AF_UNIX };
    101 ino_t	unp_ino;			/* prototype for fake inode numbers */
    102 
    103 struct mbuf *unp_addsockcred __P((struct proc *, struct mbuf *));
    104 
    105 int
    106 unp_output(m, control, unp, p)
    107 	struct mbuf *m, *control;
    108 	struct unpcb *unp;
    109 	struct proc *p;
    110 {
    111 	struct socket *so2;
    112 	struct sockaddr_un *sun;
    113 
    114 	so2 = unp->unp_conn->unp_socket;
    115 	if (unp->unp_addr)
    116 		sun = unp->unp_addr;
    117 	else
    118 		sun = &sun_noname;
    119 	if (unp->unp_conn->unp_flags & UNP_WANTCRED)
    120 		control = unp_addsockcred(p, control);
    121 	if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
    122 	    control) == 0) {
    123 		m_freem(control);
    124 		m_freem(m);
    125 		return (EINVAL);
    126 	} else {
    127 		sorwakeup(so2);
    128 		return (0);
    129 	}
    130 }
    131 
    132 void
    133 unp_setsockaddr(unp, nam)
    134 	register struct unpcb *unp;
    135 	struct mbuf *nam;
    136 {
    137 	struct sockaddr_un *sun;
    138 
    139 	if (unp->unp_addr)
    140 		sun = unp->unp_addr;
    141 	else
    142 		sun = &sun_noname;
    143 	nam->m_len = sun->sun_len;
    144 	if (nam->m_len > MLEN)
    145 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
    146 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
    147 }
    148 
    149 void
    150 unp_setpeeraddr(unp, nam)
    151 	register struct unpcb *unp;
    152 	struct mbuf *nam;
    153 {
    154 	struct sockaddr_un *sun;
    155 
    156 	if (unp->unp_conn && unp->unp_conn->unp_addr)
    157 		sun = unp->unp_conn->unp_addr;
    158 	else
    159 		sun = &sun_noname;
    160 	nam->m_len = sun->sun_len;
    161 	if (nam->m_len > MLEN)
    162 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
    163 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
    164 }
    165 
    166 /*ARGSUSED*/
    167 int
    168 uipc_usrreq(so, req, m, nam, control, p)
    169 	struct socket *so;
    170 	int req;
    171 	struct mbuf *m, *nam, *control;
    172 	struct proc *p;
    173 {
    174 	struct unpcb *unp = sotounpcb(so);
    175 	register struct socket *so2;
    176 	register int error = 0;
    177 
    178 	if (req == PRU_CONTROL)
    179 		return (EOPNOTSUPP);
    180 
    181 #ifdef DIAGNOSTIC
    182 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
    183 		panic("uipc_usrreq: unexpected control mbuf");
    184 #endif
    185 	if (unp == 0 && req != PRU_ATTACH) {
    186 		error = EINVAL;
    187 		goto release;
    188 	}
    189 
    190 	switch (req) {
    191 
    192 	case PRU_ATTACH:
    193 		if (unp != 0) {
    194 			error = EISCONN;
    195 			break;
    196 		}
    197 		error = unp_attach(so);
    198 		break;
    199 
    200 	case PRU_DETACH:
    201 		unp_detach(unp);
    202 		break;
    203 
    204 	case PRU_BIND:
    205 		error = unp_bind(unp, nam, p);
    206 		break;
    207 
    208 	case PRU_LISTEN:
    209 		if (unp->unp_vnode == 0)
    210 			error = EINVAL;
    211 		break;
    212 
    213 	case PRU_CONNECT:
    214 		error = unp_connect(so, nam, p);
    215 		break;
    216 
    217 	case PRU_CONNECT2:
    218 		error = unp_connect2(so, (struct socket *)nam);
    219 		break;
    220 
    221 	case PRU_DISCONNECT:
    222 		unp_disconnect(unp);
    223 		break;
    224 
    225 	case PRU_ACCEPT:
    226 		unp_setpeeraddr(unp, nam);
    227 		break;
    228 
    229 	case PRU_SHUTDOWN:
    230 		socantsendmore(so);
    231 		unp_shutdown(unp);
    232 		break;
    233 
    234 	case PRU_RCVD:
    235 		switch (so->so_type) {
    236 
    237 		case SOCK_DGRAM:
    238 			panic("uipc 1");
    239 			/*NOTREACHED*/
    240 
    241 		case SOCK_STREAM:
    242 #define	rcv (&so->so_rcv)
    243 #define snd (&so2->so_snd)
    244 			if (unp->unp_conn == 0)
    245 				break;
    246 			so2 = unp->unp_conn->unp_socket;
    247 			/*
    248 			 * Adjust backpressure on sender
    249 			 * and wakeup any waiting to write.
    250 			 */
    251 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
    252 			unp->unp_mbcnt = rcv->sb_mbcnt;
    253 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
    254 			unp->unp_cc = rcv->sb_cc;
    255 			sowwakeup(so2);
    256 #undef snd
    257 #undef rcv
    258 			break;
    259 
    260 		default:
    261 			panic("uipc 2");
    262 		}
    263 		break;
    264 
    265 	case PRU_SEND:
    266 		/*
    267 		 * Note: unp_internalize() rejects any control message
    268 		 * other than SCM_RIGHTS, and only allows one.  This
    269 		 * has the side-effect of preventing a caller from
    270 		 * forging SCM_CREDS.
    271 		 */
    272 		if (control && (error = unp_internalize(control, p)))
    273 			break;
    274 		switch (so->so_type) {
    275 
    276 		case SOCK_DGRAM: {
    277 			if (nam) {
    278 				if ((so->so_state & SS_ISCONNECTED) != 0) {
    279 					error = EISCONN;
    280 					goto die;
    281 				}
    282 				error = unp_connect(so, nam, p);
    283 				if (error) {
    284 				die:
    285 					m_freem(control);
    286 					m_freem(m);
    287 					break;
    288 				}
    289 			} else {
    290 				if ((so->so_state & SS_ISCONNECTED) == 0) {
    291 					error = ENOTCONN;
    292 					goto die;
    293 				}
    294 			}
    295 			error = unp_output(m, control, unp, p);
    296 			if (nam)
    297 				unp_disconnect(unp);
    298 			break;
    299 		}
    300 
    301 		case SOCK_STREAM:
    302 #define	rcv (&so2->so_rcv)
    303 #define	snd (&so->so_snd)
    304 			if (unp->unp_conn == 0)
    305 				panic("uipc 3");
    306 			so2 = unp->unp_conn->unp_socket;
    307 			if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
    308 				/*
    309 				 * Credentials are passed only once on
    310 				 * SOCK_STREAM.
    311 				 */
    312 				unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
    313 				control = unp_addsockcred(p, control);
    314 			}
    315 			/*
    316 			 * Send to paired receive port, and then reduce
    317 			 * send buffer hiwater marks to maintain backpressure.
    318 			 * Wake up readers.
    319 			 */
    320 			if (control) {
    321 				if (sbappendcontrol(rcv, m, control) == 0)
    322 					m_freem(control);
    323 			} else
    324 				sbappend(rcv, m);
    325 			snd->sb_mbmax -=
    326 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
    327 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
    328 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
    329 			unp->unp_conn->unp_cc = rcv->sb_cc;
    330 			sorwakeup(so2);
    331 #undef snd
    332 #undef rcv
    333 			break;
    334 
    335 		default:
    336 			panic("uipc 4");
    337 		}
    338 		break;
    339 
    340 	case PRU_ABORT:
    341 		unp_drop(unp, ECONNABORTED);
    342 		break;
    343 
    344 	case PRU_SENSE:
    345 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
    346 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
    347 			so2 = unp->unp_conn->unp_socket;
    348 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
    349 		}
    350 		((struct stat *) m)->st_dev = NODEV;
    351 		if (unp->unp_ino == 0)
    352 			unp->unp_ino = unp_ino++;
    353 		((struct stat *) m)->st_atimespec =
    354 		    ((struct stat *) m)->st_mtimespec =
    355 		    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
    356 		((struct stat *) m)->st_ino = unp->unp_ino;
    357 		return (0);
    358 
    359 	case PRU_RCVOOB:
    360 		error = EOPNOTSUPP;
    361 		break;
    362 
    363 	case PRU_SENDOOB:
    364 		m_freem(control);
    365 		m_freem(m);
    366 		error = EOPNOTSUPP;
    367 		break;
    368 
    369 	case PRU_SOCKADDR:
    370 		unp_setsockaddr(unp, nam);
    371 		break;
    372 
    373 	case PRU_PEERADDR:
    374 		unp_setpeeraddr(unp, nam);
    375 		break;
    376 
    377 	default:
    378 		panic("piusrreq");
    379 	}
    380 
    381 release:
    382 	return (error);
    383 }
    384 
    385 /*
    386  * Unix domain socket option processing.
    387  */
    388 int
    389 uipc_ctloutput(op, so, level, optname, mp)
    390 	int op;
    391 	struct socket *so;
    392 	int level, optname;
    393 	struct mbuf **mp;
    394 {
    395 	struct unpcb *unp = sotounpcb(so);
    396 	struct mbuf *m = *mp;
    397 	int optval = 0, error = 0;
    398 
    399 	if (level != 0) {
    400 		error = EINVAL;
    401 		if (op == PRCO_SETOPT && m)
    402 			(void) m_free(m);
    403 	} else switch (op) {
    404 
    405 	case PRCO_SETOPT:
    406 		switch (optname) {
    407 		case LOCAL_CREDS:
    408 			if (m == NULL || m->m_len != sizeof(int))
    409 				error = EINVAL;
    410 			else {
    411 				optval = *mtod(m, int *);
    412 				switch (optname) {
    413 #define	OPTSET(bit) \
    414 	if (optval) \
    415 		unp->unp_flags |= (bit); \
    416 	else \
    417 		unp->unp_flags &= ~(bit);
    418 
    419 				case LOCAL_CREDS:
    420 					OPTSET(UNP_WANTCRED);
    421 					break;
    422 				}
    423 			}
    424 			break;
    425 #undef OPTSET
    426 
    427 		default:
    428 			error = ENOPROTOOPT;
    429 			break;
    430 		}
    431 		if (m)
    432 			(void) m_free(m);
    433 		break;
    434 
    435 	case PRCO_GETOPT:
    436 		switch (optname) {
    437 		case LOCAL_CREDS:
    438 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
    439 			m->m_len = sizeof(int);
    440 			switch (optname) {
    441 
    442 #define	OPTBIT(bit)	(unp->unp_flags & (bit) ? 1 : 0)
    443 
    444 			case LOCAL_CREDS:
    445 				optval = OPTBIT(UNP_WANTCRED);
    446 				break;
    447 			}
    448 			*mtod(m, int *) = optval;
    449 			break;
    450 #undef OPTBIT
    451 
    452 		default:
    453 			error = ENOPROTOOPT;
    454 			break;
    455 		}
    456 		break;
    457 	}
    458 	return (error);
    459 }
    460 
    461 /*
    462  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
    463  * for stream sockets, although the total for sender and receiver is
    464  * actually only PIPSIZ.
    465  * Datagram sockets really use the sendspace as the maximum datagram size,
    466  * and don't really want to reserve the sendspace.  Their recvspace should
    467  * be large enough for at least one max-size datagram plus address.
    468  */
    469 #define	PIPSIZ	4096
    470 u_long	unpst_sendspace = PIPSIZ;
    471 u_long	unpst_recvspace = PIPSIZ;
    472 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
    473 u_long	unpdg_recvspace = 4*1024;
    474 
    475 int	unp_rights;			/* file descriptors in flight */
    476 
    477 int
    478 unp_attach(so)
    479 	struct socket *so;
    480 {
    481 	register struct unpcb *unp;
    482 	struct timeval tv;
    483 	int error;
    484 
    485 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    486 		switch (so->so_type) {
    487 
    488 		case SOCK_STREAM:
    489 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
    490 			break;
    491 
    492 		case SOCK_DGRAM:
    493 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
    494 			break;
    495 
    496 		default:
    497 			panic("unp_attach");
    498 		}
    499 		if (error)
    500 			return (error);
    501 	}
    502 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
    503 	if (unp == NULL)
    504 		return (ENOBUFS);
    505 	bzero((caddr_t)unp, sizeof(*unp));
    506 	unp->unp_socket = so;
    507 	so->so_pcb = unp;
    508 	microtime(&tv);
    509 	TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
    510 	return (0);
    511 }
    512 
    513 void
    514 unp_detach(unp)
    515 	register struct unpcb *unp;
    516 {
    517 
    518 	if (unp->unp_vnode) {
    519 		unp->unp_vnode->v_socket = 0;
    520 		vrele(unp->unp_vnode);
    521 		unp->unp_vnode = 0;
    522 	}
    523 	if (unp->unp_conn)
    524 		unp_disconnect(unp);
    525 	while (unp->unp_refs)
    526 		unp_drop(unp->unp_refs, ECONNRESET);
    527 	soisdisconnected(unp->unp_socket);
    528 	unp->unp_socket->so_pcb = 0;
    529 	if (unp->unp_addr)
    530 		free(unp->unp_addr, M_SONAME);
    531 	if (unp_rights) {
    532 		/*
    533 		 * Normally the receive buffer is flushed later,
    534 		 * in sofree, but if our receive buffer holds references
    535 		 * to descriptors that are now garbage, we will dispose
    536 		 * of those descriptor references after the garbage collector
    537 		 * gets them (resulting in a "panic: closef: count < 0").
    538 		 */
    539 		sorflush(unp->unp_socket);
    540 		free(unp, M_PCB);
    541 		unp_gc();
    542 	} else
    543 		free(unp, M_PCB);
    544 }
    545 
    546 int
    547 unp_bind(unp, nam, p)
    548 	struct unpcb *unp;
    549 	struct mbuf *nam;
    550 	struct proc *p;
    551 {
    552 	struct sockaddr_un *sun;
    553 	register struct vnode *vp;
    554 	struct vattr vattr;
    555 	size_t addrlen;
    556 	int error;
    557 	struct nameidata nd;
    558 
    559 	if (unp->unp_vnode != 0)
    560 		return (EINVAL);
    561 
    562 	/*
    563 	 * Allocate the new sockaddr.  We have to allocate one
    564 	 * extra byte so that we can ensure that the pathname
    565 	 * is nul-terminated.
    566 	 */
    567 	addrlen = nam->m_len + 1;
    568 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
    569 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
    570 	*(((char *)sun) + nam->m_len) = '\0';
    571 
    572 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
    573 	    sun->sun_path, p);
    574 
    575 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
    576 	if ((error = namei(&nd)) != 0)
    577 		goto bad;
    578 	vp = nd.ni_vp;
    579 	if (vp != NULL) {
    580 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
    581 		if (nd.ni_dvp == vp)
    582 			vrele(nd.ni_dvp);
    583 		else
    584 			vput(nd.ni_dvp);
    585 		vrele(vp);
    586 		error = EADDRINUSE;
    587 		goto bad;
    588 	}
    589 	VATTR_NULL(&vattr);
    590 	vattr.va_type = VSOCK;
    591 	vattr.va_mode = ACCESSPERMS;
    592 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
    593 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
    594 	if (error)
    595 		goto bad;
    596 	vp = nd.ni_vp;
    597 	vp->v_socket = unp->unp_socket;
    598 	unp->unp_vnode = vp;
    599 	unp->unp_addrlen = addrlen;
    600 	unp->unp_addr = sun;
    601 	VOP_UNLOCK(vp, 0);
    602 	return (0);
    603 
    604  bad:
    605 	free(sun, M_SONAME);
    606 	return (error);
    607 }
    608 
    609 int
    610 unp_connect(so, nam, p)
    611 	struct socket *so;
    612 	struct mbuf *nam;
    613 	struct proc *p;
    614 {
    615 	register struct sockaddr_un *sun;
    616 	register struct vnode *vp;
    617 	register struct socket *so2, *so3;
    618 	struct unpcb *unp2, *unp3;
    619 	size_t addrlen;
    620 	int error;
    621 	struct nameidata nd;
    622 
    623 	/*
    624 	 * Allocate a temporary sockaddr.  We have to allocate one extra
    625 	 * byte so that we can ensure that the pathname is nul-terminated.
    626 	 * When we establish the connection, we copy the other PCB's
    627 	 * sockaddr to our own.
    628 	 */
    629 	addrlen = nam->m_len + 1;
    630 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
    631 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
    632 	*(((char *)sun) + nam->m_len) = '\0';
    633 
    634 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
    635 
    636 	if ((error = namei(&nd)) != 0)
    637 		goto bad2;
    638 	vp = nd.ni_vp;
    639 	if (vp->v_type != VSOCK) {
    640 		error = ENOTSOCK;
    641 		goto bad;
    642 	}
    643 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
    644 		goto bad;
    645 	so2 = vp->v_socket;
    646 	if (so2 == 0) {
    647 		error = ECONNREFUSED;
    648 		goto bad;
    649 	}
    650 	if (so->so_type != so2->so_type) {
    651 		error = EPROTOTYPE;
    652 		goto bad;
    653 	}
    654 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    655 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
    656 		    (so3 = sonewconn(so2, 0)) == 0) {
    657 			error = ECONNREFUSED;
    658 			goto bad;
    659 		}
    660 		unp2 = sotounpcb(so2);
    661 		unp3 = sotounpcb(so3);
    662 		if (unp2->unp_addr) {
    663 			unp3->unp_addr = malloc(unp2->unp_addrlen,
    664 			    M_SONAME, M_WAITOK);
    665 			bcopy(unp2->unp_addr, unp3->unp_addr,
    666 			    unp2->unp_addrlen);
    667 			unp3->unp_addrlen = unp2->unp_addrlen;
    668 		}
    669 		unp3->unp_flags = unp2->unp_flags;
    670 		so2 = so3;
    671 	}
    672 	error = unp_connect2(so, so2);
    673  bad:
    674 	vput(vp);
    675  bad2:
    676 	free(sun, M_SONAME);
    677 	return (error);
    678 }
    679 
    680 int
    681 unp_connect2(so, so2)
    682 	register struct socket *so;
    683 	register struct socket *so2;
    684 {
    685 	register struct unpcb *unp = sotounpcb(so);
    686 	register struct unpcb *unp2;
    687 
    688 	if (so2->so_type != so->so_type)
    689 		return (EPROTOTYPE);
    690 	unp2 = sotounpcb(so2);
    691 	unp->unp_conn = unp2;
    692 	switch (so->so_type) {
    693 
    694 	case SOCK_DGRAM:
    695 		unp->unp_nextref = unp2->unp_refs;
    696 		unp2->unp_refs = unp;
    697 		soisconnected(so);
    698 		break;
    699 
    700 	case SOCK_STREAM:
    701 		unp2->unp_conn = unp;
    702 		soisconnected(so);
    703 		soisconnected(so2);
    704 		break;
    705 
    706 	default:
    707 		panic("unp_connect2");
    708 	}
    709 	return (0);
    710 }
    711 
    712 void
    713 unp_disconnect(unp)
    714 	struct unpcb *unp;
    715 {
    716 	register struct unpcb *unp2 = unp->unp_conn;
    717 
    718 	if (unp2 == 0)
    719 		return;
    720 	unp->unp_conn = 0;
    721 	switch (unp->unp_socket->so_type) {
    722 
    723 	case SOCK_DGRAM:
    724 		if (unp2->unp_refs == unp)
    725 			unp2->unp_refs = unp->unp_nextref;
    726 		else {
    727 			unp2 = unp2->unp_refs;
    728 			for (;;) {
    729 				if (unp2 == 0)
    730 					panic("unp_disconnect");
    731 				if (unp2->unp_nextref == unp)
    732 					break;
    733 				unp2 = unp2->unp_nextref;
    734 			}
    735 			unp2->unp_nextref = unp->unp_nextref;
    736 		}
    737 		unp->unp_nextref = 0;
    738 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
    739 		break;
    740 
    741 	case SOCK_STREAM:
    742 		soisdisconnected(unp->unp_socket);
    743 		unp2->unp_conn = 0;
    744 		soisdisconnected(unp2->unp_socket);
    745 		break;
    746 	}
    747 }
    748 
    749 #ifdef notdef
    750 unp_abort(unp)
    751 	struct unpcb *unp;
    752 {
    753 
    754 	unp_detach(unp);
    755 }
    756 #endif
    757 
    758 void
    759 unp_shutdown(unp)
    760 	struct unpcb *unp;
    761 {
    762 	struct socket *so;
    763 
    764 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
    765 	    (so = unp->unp_conn->unp_socket))
    766 		socantrcvmore(so);
    767 }
    768 
    769 void
    770 unp_drop(unp, errno)
    771 	struct unpcb *unp;
    772 	int errno;
    773 {
    774 	struct socket *so = unp->unp_socket;
    775 
    776 	so->so_error = errno;
    777 	unp_disconnect(unp);
    778 	if (so->so_head) {
    779 		so->so_pcb = 0;
    780 		sofree(so);
    781 		if (unp->unp_addr)
    782 			free(unp->unp_addr, M_SONAME);
    783 		free(unp, M_PCB);
    784 	}
    785 }
    786 
    787 #ifdef notdef
    788 unp_drain()
    789 {
    790 
    791 }
    792 #endif
    793 
    794 int
    795 unp_externalize(rights)
    796 	struct mbuf *rights;
    797 {
    798 	struct proc *p = curproc;		/* XXX */
    799 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
    800 	register int i, *fdp = (int *)(cm + 1);
    801 	register struct file **rp = (struct file **)ALIGN(cm + 1);
    802 	register struct file *fp;
    803 	int nfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) / sizeof (struct file *);
    804 	int f;
    805 
    806 	/* Make sure that the recipient has space */
    807 	if (!fdavail(p, nfds)) {
    808 		for (i = 0; i < nfds; i++) {
    809 			fp = *rp;
    810 			unp_discard(fp);
    811 			*rp++ = 0;
    812 		}
    813 		return (EMSGSIZE);
    814 	}
    815 
    816 	/*
    817 	 * Add file to the recipient's open file table, converting them
    818 	 * to integer file descriptors as we go.  Done in forward order
    819 	 * because an integer will always come in the same place or before
    820 	 * its corresponding struct file pointer.
    821 	 */
    822 	for (i = 0; i < nfds; i++) {
    823 		if (fdalloc(p, 0, &f))
    824 			panic("unp_externalize");
    825 		fp = *rp++;
    826 		p->p_fd->fd_ofiles[f] = fp;
    827 		fp->f_msgcount--;
    828 		unp_rights--;
    829 		*fdp++ = f;
    830 	}
    831 
    832 	/*
    833 	 * Adjust length, in case of transition from large struct file
    834 	 * pointers to ints.
    835 	 */
    836 	cm->cmsg_len = sizeof(*cm) + (nfds * sizeof(int));
    837 	rights->m_len = cm->cmsg_len;
    838 	return (0);
    839 }
    840 
    841 int
    842 unp_internalize(control, p)
    843 	struct mbuf *control;
    844 	struct proc *p;
    845 {
    846 	struct filedesc *fdescp = p->p_fd;
    847 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
    848 	register struct file **rp;
    849 	register struct file *fp;
    850 	register int i, fd, *fdp;
    851 	int nfds;
    852 	u_int neededspace;
    853 
    854 	/* Sanity check the control message header */
    855 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
    856 	    cm->cmsg_len != control->m_len)
    857 		return (EINVAL);
    858 
    859 	/* Verify that the file descriptors are valid */
    860 	nfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
    861 	fdp = (int *)(cm + 1);
    862 	for (i = 0; i < nfds; i++) {
    863 		fd = *fdp++;
    864 		if ((unsigned)fd >= fdescp->fd_nfiles ||
    865 		    fdescp->fd_ofiles[fd] == NULL)
    866 			return (EBADF);
    867 	}
    868 
    869 	/* Make sure we have room for the struct file pointers */
    870 morespace:
    871 	neededspace = (ALIGN(sizeof (*cm)) + nfds * sizeof (struct file *)) -
    872 		control->m_len;
    873 	if (neededspace > M_TRAILINGSPACE(control)) {
    874 
    875 		/* if we already have a cluster, the message is just too big */
    876 		if (control->m_flags & M_EXT)
    877 			return (E2BIG);
    878 
    879 		/* allocate a cluster and try again */
    880 		MCLGET(control, M_WAIT);
    881 		if ((control->m_flags & M_EXT) == 0)
    882 			return (ENOBUFS);	/* allocation failed */
    883 
    884 		/* copy the data to the cluster */
    885 		bcopy(cm, mtod(control, char *), cm->cmsg_len);
    886 		cm = mtod(control, struct cmsghdr *);
    887 		goto morespace;
    888 	}
    889 
    890 	/* adjust message & mbuf to note amount of space actually used. */
    891 	cm->cmsg_len += neededspace;
    892 	control->m_len = cm->cmsg_len;
    893 
    894 	/*
    895 	 * Transform the file descriptors into struct file pointers, in
    896 	 * reverse order so that if pointers are bigger than ints, the
    897 	 * int won't get until we're done.
    898 	 */
    899 	fdp = ((int *)(cm + 1)) + nfds - 1;
    900 	rp = ((struct file **)ALIGN(cm + 1)) + nfds - 1;
    901 	for (i = 0; i < nfds; i++) {
    902 		fp = fdescp->fd_ofiles[*fdp--];
    903 		*rp-- = fp;
    904 		fp->f_count++;
    905 		fp->f_msgcount++;
    906 		unp_rights++;
    907 	}
    908 	return (0);
    909 }
    910 
    911 struct mbuf *
    912 unp_addsockcred(p, control)
    913 	struct proc *p;
    914 	struct mbuf *control;
    915 {
    916 	struct cmsghdr *cmp;
    917 	struct sockcred *sc;
    918 	struct mbuf *m, *n;
    919 	int len, i;
    920 
    921 	len = sizeof(struct cmsghdr) + SOCKCREDSIZE(p->p_ucred->cr_ngroups);
    922 
    923 	m = m_get(M_WAIT, MT_CONTROL);
    924 	if (len > MLEN) {
    925 		if (len > MCLBYTES)
    926 			MEXTMALLOC(m, len, M_WAITOK);
    927 		else
    928 			MCLGET(m, M_WAIT);
    929 		if ((m->m_flags & M_EXT) == 0) {
    930 			m_free(m);
    931 			return (control);
    932 		}
    933 	}
    934 
    935 	m->m_len = len;
    936 	m->m_next = NULL;
    937 	cmp = mtod(m, struct cmsghdr *);
    938 	sc = (struct sockcred *)CMSG_DATA(cmp);
    939 	cmp->cmsg_len = len;
    940 	cmp->cmsg_level = SOL_SOCKET;
    941 	cmp->cmsg_type = SCM_CREDS;
    942 	sc->sc_uid = p->p_cred->p_ruid;
    943 	sc->sc_euid = p->p_ucred->cr_uid;
    944 	sc->sc_gid = p->p_cred->p_rgid;
    945 	sc->sc_egid = p->p_ucred->cr_gid;
    946 	sc->sc_ngroups = p->p_ucred->cr_ngroups;
    947 	for (i = 0; i < sc->sc_ngroups; i++)
    948 		sc->sc_groups[i] = p->p_ucred->cr_groups[i];
    949 
    950 	/*
    951 	 * If a control message already exists, append us to the end.
    952 	 */
    953 	if (control != NULL) {
    954 		for (n = control; n->m_next != NULL; n = n->m_next)
    955 			;
    956 		n->m_next = m;
    957 	} else
    958 		control = m;
    959 
    960 	return (control);
    961 }
    962 
    963 int	unp_defer, unp_gcing;
    964 extern	struct domain unixdomain;
    965 
    966 void
    967 unp_gc()
    968 {
    969 	register struct file *fp, *nextfp;
    970 	register struct socket *so;
    971 	struct file **extra_ref, **fpp;
    972 	int nunref, i;
    973 
    974 	if (unp_gcing)
    975 		return;
    976 	unp_gcing = 1;
    977 	unp_defer = 0;
    978 	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
    979 		fp->f_flag &= ~(FMARK|FDEFER);
    980 	do {
    981 		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
    982 			if (fp->f_count == 0)
    983 				continue;
    984 			if (fp->f_flag & FDEFER) {
    985 				fp->f_flag &= ~FDEFER;
    986 				unp_defer--;
    987 			} else {
    988 				if (fp->f_flag & FMARK)
    989 					continue;
    990 				if (fp->f_count == fp->f_msgcount)
    991 					continue;
    992 				fp->f_flag |= FMARK;
    993 			}
    994 			if (fp->f_type != DTYPE_SOCKET ||
    995 			    (so = (struct socket *)fp->f_data) == 0)
    996 				continue;
    997 			if (so->so_proto->pr_domain != &unixdomain ||
    998 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
    999 				continue;
   1000 #ifdef notdef
   1001 			if (so->so_rcv.sb_flags & SB_LOCK) {
   1002 				/*
   1003 				 * This is problematical; it's not clear
   1004 				 * we need to wait for the sockbuf to be
   1005 				 * unlocked (on a uniprocessor, at least),
   1006 				 * and it's also not clear what to do
   1007 				 * if sbwait returns an error due to receipt
   1008 				 * of a signal.  If sbwait does return
   1009 				 * an error, we'll go into an infinite
   1010 				 * loop.  Delete all of this for now.
   1011 				 */
   1012 				(void) sbwait(&so->so_rcv);
   1013 				goto restart;
   1014 			}
   1015 #endif
   1016 			unp_scan(so->so_rcv.sb_mb, unp_mark);
   1017 		}
   1018 	} while (unp_defer);
   1019 	/*
   1020 	 * We grab an extra reference to each of the file table entries
   1021 	 * that are not otherwise accessible and then free the rights
   1022 	 * that are stored in messages on them.
   1023 	 *
   1024 	 * The bug in the orginal code is a little tricky, so I'll describe
   1025 	 * what's wrong with it here.
   1026 	 *
   1027 	 * It is incorrect to simply unp_discard each entry for f_msgcount
   1028 	 * times -- consider the case of sockets A and B that contain
   1029 	 * references to each other.  On a last close of some other socket,
   1030 	 * we trigger a gc since the number of outstanding rights (unp_rights)
   1031 	 * is non-zero.  If during the sweep phase the gc code un_discards,
   1032 	 * we end up doing a (full) closef on the descriptor.  A closef on A
   1033 	 * results in the following chain.  Closef calls soo_close, which
   1034 	 * calls soclose.   Soclose calls first (through the switch
   1035 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
   1036 	 * returns because the previous instance had set unp_gcing, and
   1037 	 * we return all the way back to soclose, which marks the socket
   1038 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
   1039 	 * to free up the rights that are queued in messages on the socket A,
   1040 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
   1041 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
   1042 	 * instance of unp_discard just calls closef on B.
   1043 	 *
   1044 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
   1045 	 * which results in another closef on A.  Unfortunately, A is already
   1046 	 * being closed, and the descriptor has already been marked with
   1047 	 * SS_NOFDREF, and soclose panics at this point.
   1048 	 *
   1049 	 * Here, we first take an extra reference to each inaccessible
   1050 	 * descriptor.  Then, we call sorflush ourself, since we know
   1051 	 * it is a Unix domain socket anyhow.  After we destroy all the
   1052 	 * rights carried in messages, we do a last closef to get rid
   1053 	 * of our extra reference.  This is the last close, and the
   1054 	 * unp_detach etc will shut down the socket.
   1055 	 *
   1056 	 * 91/09/19, bsy (at) cs.cmu.edu
   1057 	 */
   1058 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
   1059 	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
   1060 	    fp = nextfp) {
   1061 		nextfp = fp->f_list.le_next;
   1062 		if (fp->f_count == 0)
   1063 			continue;
   1064 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
   1065 			*fpp++ = fp;
   1066 			nunref++;
   1067 			fp->f_count++;
   1068 		}
   1069 	}
   1070 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
   1071 		sorflush((struct socket *)(*fpp)->f_data);
   1072 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
   1073 		(void) closef(*fpp, (struct proc *)0);
   1074 	free((caddr_t)extra_ref, M_FILE);
   1075 	unp_gcing = 0;
   1076 }
   1077 
   1078 void
   1079 unp_dispose(m)
   1080 	struct mbuf *m;
   1081 {
   1082 
   1083 	if (m)
   1084 		unp_scan(m, unp_discard);
   1085 }
   1086 
   1087 void
   1088 unp_scan(m0, op)
   1089 	register struct mbuf *m0;
   1090 	void (*op) __P((struct file *));
   1091 {
   1092 	register struct mbuf *m;
   1093 	register struct file **rp;
   1094 	register struct cmsghdr *cm;
   1095 	register int i;
   1096 	int qfds;
   1097 
   1098 	while (m0) {
   1099 		for (m = m0; m; m = m->m_next)
   1100 			if (m->m_type == MT_CONTROL &&
   1101 			    m->m_len >= sizeof(*cm)) {
   1102 				cm = mtod(m, struct cmsghdr *);
   1103 				if (cm->cmsg_level != SOL_SOCKET ||
   1104 				    cm->cmsg_type != SCM_RIGHTS)
   1105 					continue;
   1106 				qfds = (cm->cmsg_len - sizeof *cm)
   1107 						/ sizeof (struct file *);
   1108 				rp = (struct file **)(cm + 1);
   1109 				for (i = 0; i < qfds; i++)
   1110 					(*op)(*rp++);
   1111 				break;		/* XXX, but saves time */
   1112 			}
   1113 		m0 = m0->m_act;
   1114 	}
   1115 }
   1116 
   1117 void
   1118 unp_mark(fp)
   1119 	struct file *fp;
   1120 {
   1121 
   1122 	if (fp->f_flag & FMARK)
   1123 		return;
   1124 	unp_defer++;
   1125 	fp->f_flag |= (FMARK|FDEFER);
   1126 }
   1127 
   1128 void
   1129 unp_discard(fp)
   1130 	struct file *fp;
   1131 {
   1132 
   1133 	fp->f_msgcount--;
   1134 	unp_rights--;
   1135 	(void) closef(fp, (struct proc *)0);
   1136 }
   1137