Home | History | Annotate | Line # | Download | only in kern
uipc_syscalls.c revision 1.130
      1 /*	$NetBSD: uipc_syscalls.c,v 1.130 2008/04/24 15:35:30 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the NetBSD
     18  *	Foundation, Inc. and its contributors.
     19  * 4. Neither the name of The NetBSD Foundation nor the names of its
     20  *    contributors may be used to endorse or promote products derived
     21  *    from this software without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     33  * POSSIBILITY OF SUCH DAMAGE.
     34  */
     35 
     36 /*
     37  * Copyright (c) 1982, 1986, 1989, 1990, 1993
     38  *	The Regents of the University of California.  All rights reserved.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. Neither the name of the University nor the names of its contributors
     49  *    may be used to endorse or promote products derived from this software
     50  *    without specific prior written permission.
     51  *
     52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     62  * SUCH DAMAGE.
     63  *
     64  *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
     65  */
     66 
     67 #include <sys/cdefs.h>
     68 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.130 2008/04/24 15:35:30 ad Exp $");
     69 
     70 #include "opt_pipe.h"
     71 
     72 #include <sys/param.h>
     73 #include <sys/systm.h>
     74 #include <sys/filedesc.h>
     75 #include <sys/proc.h>
     76 #include <sys/file.h>
     77 #include <sys/buf.h>
     78 #include <sys/malloc.h>
     79 #include <sys/mbuf.h>
     80 #include <sys/protosw.h>
     81 #include <sys/socket.h>
     82 #include <sys/socketvar.h>
     83 #include <sys/signalvar.h>
     84 #include <sys/un.h>
     85 #include <sys/ktrace.h>
     86 #include <sys/event.h>
     87 
     88 #include <sys/mount.h>
     89 #include <sys/syscallargs.h>
     90 
     91 #include <uvm/uvm_extern.h>
     92 
     93 /*
     94  * System call interface to the socket abstraction.
     95  */
     96 extern const struct fileops socketops;
     97 
     98 int
     99 sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
    100 {
    101 	/* {
    102 		syscallarg(int)	domain;
    103 		syscallarg(int)	type;
    104 		syscallarg(int)	protocol;
    105 	} */
    106 	int		fd, error;
    107 
    108 	error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
    109 			 SCARG(uap, protocol), l, &fd);
    110 	if (error == 0)
    111 		*retval = fd;
    112 	return error;
    113 }
    114 
    115 /* ARGSUSED */
    116 int
    117 sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
    118 {
    119 	/* {
    120 		syscallarg(int)				s;
    121 		syscallarg(const struct sockaddr *)	name;
    122 		syscallarg(unsigned int)		namelen;
    123 	} */
    124 	struct mbuf	*nam;
    125 	int		error;
    126 
    127 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
    128 	    MT_SONAME);
    129 	if (error)
    130 		return error;
    131 
    132 	return do_sys_bind(l, SCARG(uap, s), nam);
    133 }
    134 
    135 int
    136 do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
    137 {
    138 	struct socket	*so;
    139 	int		error;
    140 
    141 	if ((error = fd_getsock(fd, &so)) != 0) {
    142 		m_freem(nam);
    143 		return (error);
    144 	}
    145 	MCLAIM(nam, so->so_mowner);
    146 	error = sobind(so, nam, l);
    147 	m_freem(nam);
    148 	fd_putfile(fd);
    149 	return error;
    150 }
    151 
    152 /* ARGSUSED */
    153 int
    154 sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
    155 {
    156 	/* {
    157 		syscallarg(int)	s;
    158 		syscallarg(int)	backlog;
    159 	} */
    160 	struct socket	*so;
    161 	int		error;
    162 
    163 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
    164 		return (error);
    165 	error = solisten(so, SCARG(uap, backlog), l);
    166 	fd_putfile(SCARG(uap, s));
    167 	return error;
    168 }
    169 
    170 int
    171 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock)
    172 {
    173 	file_t		*fp, *fp2;
    174 	struct mbuf	*nam;
    175 	int		error, fd;
    176 	struct socket	*so, *so2;
    177 
    178 	if ((fp = fd_getfile(sock)) == NULL)
    179 		return (EBADF);
    180 	if (fp->f_type != DTYPE_SOCKET)
    181 		return (ENOTSOCK);
    182 	if ((error = fd_allocfile(&fp2, &fd)) != 0)
    183 		return (error);
    184 	nam = m_get(M_WAIT, MT_SONAME);
    185 	*new_sock = fd;
    186 	so = fp->f_data;
    187 	solock(so);
    188 	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
    189 		error = EOPNOTSUPP;
    190 		goto bad;
    191 	}
    192 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
    193 		error = EINVAL;
    194 		goto bad;
    195 	}
    196 	if (so->so_nbio && so->so_qlen == 0) {
    197 		error = EWOULDBLOCK;
    198 		goto bad;
    199 	}
    200 	while (so->so_qlen == 0 && so->so_error == 0) {
    201 		if (so->so_state & SS_CANTRCVMORE) {
    202 			so->so_error = ECONNABORTED;
    203 			break;
    204 		}
    205 		error = sowait(so, 0);
    206 		if (error) {
    207 			goto bad;
    208 		}
    209 	}
    210 	if (so->so_error) {
    211 		error = so->so_error;
    212 		so->so_error = 0;
    213 		goto bad;
    214 	}
    215 	/* connection has been removed from the listen queue */
    216 	KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
    217 	so2 = TAILQ_FIRST(&so->so_q);
    218 	if (soqremque(so2, 1) == 0)
    219 		panic("accept");
    220 	fp2->f_type = DTYPE_SOCKET;
    221 	fp2->f_flag = fp->f_flag;
    222 	fp2->f_ops = &socketops;
    223 	fp2->f_data = so2;
    224 	error = soaccept(so2, nam);
    225 	sounlock(so);
    226 	if (error) {
    227 		/* an error occurred, free the file descriptor and mbuf */
    228 		m_freem(nam);
    229 		mutex_enter(&fp2->f_lock);
    230 		fp2->f_count++;
    231 		mutex_exit(&fp2->f_lock);
    232 		closef(fp2);
    233 		fd_abort(curproc, NULL, fd);
    234 	} else {
    235 		fd_affix(curproc, fp2, fd);
    236 		*name = nam;
    237 	}
    238 	fd_putfile(sock);
    239 	return (error);
    240  bad:
    241  	sounlock(so);
    242  	m_freem(nam);
    243 	fd_putfile(sock);
    244  	fd_abort(curproc, fp2, fd);
    245  	return (error);
    246 }
    247 
    248 int
    249 sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
    250 {
    251 	/* {
    252 		syscallarg(int)			s;
    253 		syscallarg(struct sockaddr *)	name;
    254 		syscallarg(unsigned int *)	anamelen;
    255 	} */
    256 	int error, fd;
    257 	struct mbuf *name;
    258 
    259 	error = do_sys_accept(l, SCARG(uap, s), &name, retval);
    260 	if (error != 0)
    261 		return error;
    262 	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
    263 	    MSG_LENUSRSPACE, name);
    264 	if (name != NULL)
    265 		m_free(name);
    266 	if (error != 0) {
    267 		fd = (int)*retval;
    268 		if (fd_getfile(fd) != NULL)
    269 			(void)fd_close(fd);
    270 	}
    271 	return error;
    272 }
    273 
    274 /* ARGSUSED */
    275 int
    276 sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
    277 {
    278 	/* {
    279 		syscallarg(int)				s;
    280 		syscallarg(const struct sockaddr *)	name;
    281 		syscallarg(unsigned int)		namelen;
    282 	} */
    283 	int		error;
    284 	struct mbuf	*nam;
    285 
    286 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
    287 	    MT_SONAME);
    288 	if (error)
    289 		return error;
    290 	return do_sys_connect(l,  SCARG(uap, s), nam);
    291 }
    292 
    293 int
    294 do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
    295 {
    296 	struct socket	*so;
    297 	int		error;
    298 	int		interrupted = 0;
    299 
    300 	if ((error = fd_getsock(fd, &so)) != 0) {
    301 		m_freem(nam);
    302 		return (error);
    303 	}
    304 	solock(so);
    305 	MCLAIM(nam, so->so_mowner);
    306 	if (so->so_state & SS_ISCONNECTING) {
    307 		error = EALREADY;
    308 		goto out;
    309 	}
    310 
    311 	error = soconnect(so, nam, l);
    312 	if (error)
    313 		goto bad;
    314 	if (so->so_nbio && (so->so_state & SS_ISCONNECTING)) {
    315 		error = EINPROGRESS;
    316 		goto out;
    317 	}
    318 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
    319 		error = sowait(so, 0);
    320 		if (error) {
    321 			if (error == EINTR || error == ERESTART)
    322 				interrupted = 1;
    323 			break;
    324 		}
    325 	}
    326 	if (error == 0) {
    327 		error = so->so_error;
    328 		so->so_error = 0;
    329 	}
    330  bad:
    331 	if (!interrupted)
    332 		so->so_state &= ~SS_ISCONNECTING;
    333 	if (error == ERESTART)
    334 		error = EINTR;
    335  out:
    336  	sounlock(so);
    337  	fd_putfile(fd);
    338 	m_freem(nam);
    339 	return (error);
    340 }
    341 
    342 int
    343 sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap, register_t *retval)
    344 {
    345 	/* {
    346 		syscallarg(int)		domain;
    347 		syscallarg(int)		type;
    348 		syscallarg(int)		protocol;
    349 		syscallarg(int *)	rsv;
    350 	} */
    351 	file_t		*fp1, *fp2;
    352 	struct socket	*so1, *so2;
    353 	int		fd, error, sv[2];
    354 	proc_t		*p;
    355 
    356 	p = curproc;
    357 	error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
    358 	    SCARG(uap, protocol), l, NULL);
    359 	if (error)
    360 		return (error);
    361 	error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
    362 	    SCARG(uap, protocol), l, so1);
    363 	if (error)
    364 		goto free1;
    365 	if ((error = fd_allocfile(&fp1, &fd)) != 0)
    366 		goto free2;
    367 	sv[0] = fd;
    368 	fp1->f_flag = FREAD|FWRITE;
    369 	fp1->f_type = DTYPE_SOCKET;
    370 	fp1->f_ops = &socketops;
    371 	fp1->f_data = so1;
    372 	if ((error = fd_allocfile(&fp2, &fd)) != 0)
    373 		goto free3;
    374 	fp2->f_flag = FREAD|FWRITE;
    375 	fp2->f_type = DTYPE_SOCKET;
    376 	fp2->f_ops = &socketops;
    377 	fp2->f_data = so2;
    378 	sv[1] = fd;
    379 	solock(so1);
    380 	error = soconnect2(so1, so2);
    381 	if (error == 0 && SCARG(uap, type) == SOCK_DGRAM) {
    382 		/*
    383 		 * Datagram socket connection is asymmetric.
    384 		 */
    385 		error = soconnect2(so2, so1);
    386 	}
    387 	sounlock(so1);
    388 	if (error == 0)
    389 		error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int));
    390 	if (error == 0) {
    391 		fd_affix(p, fp2, sv[1]);
    392 		fd_affix(p, fp1, sv[0]);
    393 		return (0);
    394 	}
    395 	fd_abort(p, fp2, sv[1]);
    396  free3:
    397 	fd_abort(p, fp1, sv[0]);
    398  free2:
    399 	(void)soclose(so2);
    400  free1:
    401 	(void)soclose(so1);
    402 	return (error);
    403 }
    404 
    405 int
    406 sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
    407 {
    408 	/* {
    409 		syscallarg(int)				s;
    410 		syscallarg(const void *)		buf;
    411 		syscallarg(size_t)			len;
    412 		syscallarg(int)				flags;
    413 		syscallarg(const struct sockaddr *)	to;
    414 		syscallarg(unsigned int)		tolen;
    415 	} */
    416 	struct msghdr	msg;
    417 	struct iovec	aiov;
    418 
    419 	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
    420 	msg.msg_namelen = SCARG(uap, tolen);
    421 	msg.msg_iov = &aiov;
    422 	msg.msg_iovlen = 1;
    423 	msg.msg_control = NULL;
    424 	msg.msg_flags = 0;
    425 	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
    426 	aiov.iov_len = SCARG(uap, len);
    427 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
    428 }
    429 
    430 int
    431 sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
    432 {
    433 	/* {
    434 		syscallarg(int)				s;
    435 		syscallarg(const struct msghdr *)	msg;
    436 		syscallarg(int)				flags;
    437 	} */
    438 	struct msghdr	msg;
    439 	int		error;
    440 
    441 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
    442 	if (error)
    443 		return (error);
    444 
    445 	msg.msg_flags = MSG_IOVUSRSPACE;
    446 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
    447 }
    448 
    449 int
    450 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
    451 		register_t *retsize)
    452 {
    453 	struct uio	auio;
    454 	int		i, len, error, iovlen;
    455 	struct mbuf	*to, *control;
    456 	struct socket	*so;
    457 	struct iovec	*tiov;
    458 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
    459 	struct iovec	*ktriov = NULL;
    460 
    461 	ktrkuser("msghdr", mp, sizeof *mp);
    462 
    463 	/* If the caller passed us stuff in mbufs, we must free them */
    464 	if (mp->msg_flags & MSG_NAMEMBUF)
    465 		to = mp->msg_name;
    466 	else
    467 		to = NULL;
    468 
    469 	if (mp->msg_flags & MSG_CONTROLMBUF)
    470 		control = mp->msg_control;
    471 	else
    472 		control = NULL;
    473 
    474 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
    475 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
    476 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
    477 				error = EMSGSIZE;
    478 				goto bad;
    479 			}
    480 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
    481 			    M_IOV, M_WAITOK);
    482 		}
    483 		if (mp->msg_iovlen != 0) {
    484 			error = copyin(mp->msg_iov, iov,
    485 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
    486 			if (error)
    487 				goto bad;
    488 		}
    489 		mp->msg_iov = iov;
    490 	}
    491 
    492 	auio.uio_iov = mp->msg_iov;
    493 	auio.uio_iovcnt = mp->msg_iovlen;
    494 	auio.uio_rw = UIO_WRITE;
    495 	auio.uio_offset = 0;			/* XXX */
    496 	auio.uio_resid = 0;
    497 	KASSERT(l == curlwp);
    498 	auio.uio_vmspace = l->l_proc->p_vmspace;
    499 
    500 	for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
    501 #if 0
    502 		/* cannot happen; iov_len is unsigned */
    503 		if (tiov->iov_len < 0) {
    504 			error = EINVAL;
    505 			goto bad;
    506 		}
    507 #endif
    508 		/*
    509 		 * Writes return ssize_t because -1 is returned on error.
    510 		 * Therefore, we must restrict the length to SSIZE_MAX to
    511 		 * avoid garbage return values.
    512 		 */
    513 		auio.uio_resid += tiov->iov_len;
    514 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
    515 			error = EINVAL;
    516 			goto bad;
    517 		}
    518 	}
    519 
    520 	if (mp->msg_name && to == NULL) {
    521 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
    522 		    MT_SONAME);
    523 		if (error)
    524 			goto bad;
    525 	}
    526 
    527 	if (mp->msg_control) {
    528 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
    529 			error = EINVAL;
    530 			goto bad;
    531 		}
    532 		if (control == NULL) {
    533 			error = sockargs(&control, mp->msg_control,
    534 			    mp->msg_controllen, MT_CONTROL);
    535 			if (error)
    536 				goto bad;
    537 		}
    538 	}
    539 
    540 	if (ktrpoint(KTR_GENIO)) {
    541 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
    542 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
    543 		memcpy(ktriov, auio.uio_iov, iovlen);
    544 	}
    545 
    546 	if ((error = fd_getsock(s, &so)) != 0)
    547 		goto bad;
    548 
    549 	if (mp->msg_name)
    550 		MCLAIM(to, so->so_mowner);
    551 	if (mp->msg_control)
    552 		MCLAIM(control, so->so_mowner);
    553 
    554 	len = auio.uio_resid;
    555 	error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
    556 	/* Protocol is responsible for freeing 'control' */
    557 	control = NULL;
    558 
    559 	fd_putfile(s);
    560 
    561 	if (error) {
    562 		if (auio.uio_resid != len && (error == ERESTART ||
    563 		    error == EINTR || error == EWOULDBLOCK))
    564 			error = 0;
    565 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
    566 			mutex_enter(proc_lock);
    567 			psignal(l->l_proc, SIGPIPE);
    568 			mutex_exit(proc_lock);
    569 		}
    570 	}
    571 	if (error == 0)
    572 		*retsize = len - auio.uio_resid;
    573 
    574 bad:
    575 	if (ktriov != NULL) {
    576 		ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
    577 		free(ktriov, M_TEMP);
    578 	}
    579 
    580  	if (iov != aiov)
    581 		free(iov, M_IOV);
    582 	if (to)
    583 		m_freem(to);
    584 	if (control)
    585 		m_freem(control);
    586 
    587 	return (error);
    588 }
    589 
    590 int
    591 sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
    592 {
    593 	/* {
    594 		syscallarg(int)			s;
    595 		syscallarg(void *)		buf;
    596 		syscallarg(size_t)		len;
    597 		syscallarg(int)			flags;
    598 		syscallarg(struct sockaddr *)	from;
    599 		syscallarg(unsigned int *)	fromlenaddr;
    600 	} */
    601 	struct msghdr	msg;
    602 	struct iovec	aiov;
    603 	int		error;
    604 	struct mbuf	*from;
    605 
    606 	msg.msg_name = NULL;
    607 	msg.msg_iov = &aiov;
    608 	msg.msg_iovlen = 1;
    609 	aiov.iov_base = SCARG(uap, buf);
    610 	aiov.iov_len = SCARG(uap, len);
    611 	msg.msg_control = NULL;
    612 	msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
    613 
    614 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
    615 	if (error != 0)
    616 		return error;
    617 
    618 	error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
    619 	    MSG_LENUSRSPACE, from);
    620 	if (from != NULL)
    621 		m_free(from);
    622 	return error;
    623 }
    624 
    625 int
    626 sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
    627 {
    628 	/* {
    629 		syscallarg(int)			s;
    630 		syscallarg(struct msghdr *)	msg;
    631 		syscallarg(int)			flags;
    632 	} */
    633 	struct msghdr	msg;
    634 	int		error;
    635 	struct mbuf	*from, *control;
    636 
    637 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
    638 	if (error)
    639 		return (error);
    640 
    641 	msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
    642 
    643 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
    644 	    msg.msg_control != NULL ? &control : NULL, retval);
    645 	if (error != 0)
    646 		return error;
    647 
    648 	if (msg.msg_control != NULL)
    649 		error = copyout_msg_control(l, &msg, control);
    650 
    651 	if (error == 0)
    652 		error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
    653 			from);
    654 	if (from != NULL)
    655 		m_free(from);
    656 	if (error == 0) {
    657 		ktrkuser("msghdr", &msg, sizeof msg);
    658 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
    659 	}
    660 
    661 	return (error);
    662 }
    663 
    664 /*
    665  * Adjust for a truncated SCM_RIGHTS control message.
    666  *  This means closing any file descriptors that aren't present
    667  *  in the returned buffer.
    668  *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
    669  */
    670 static void
    671 free_rights(struct mbuf *m)
    672 {
    673 	int nfd;
    674 	int i;
    675 	int *fdv;
    676 
    677 	nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0
    678 	    : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1;
    679 	fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
    680 	for (i = 0; i < nfd; i++) {
    681 		if (fd_getfile(fdv[i]) != NULL)
    682 			(void)fd_close(fdv[i]);
    683 	}
    684 }
    685 
    686 void
    687 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
    688 {
    689 	struct mbuf *next;
    690 	struct cmsghdr *cmsg;
    691 	bool do_free_rights = false;
    692 
    693 	while (control != NULL) {
    694 		cmsg = mtod(control, struct cmsghdr *);
    695 		if (control == uncopied)
    696 			do_free_rights = true;
    697 		if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
    698 		    && cmsg->cmsg_type == SCM_RIGHTS)
    699 			free_rights(control);
    700 		next = control->m_next;
    701 		m_free(control);
    702 		control = next;
    703 	}
    704 }
    705 
    706 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
    707 int
    708 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
    709 {
    710 	int i, len, error = 0;
    711 	struct cmsghdr *cmsg;
    712 	struct mbuf *m;
    713 	char *q;
    714 
    715 	len = mp->msg_controllen;
    716 	if (len <= 0 || control == 0) {
    717 		mp->msg_controllen = 0;
    718 		free_control_mbuf(l, control, control);
    719 		return 0;
    720 	}
    721 
    722 	q = (char *)mp->msg_control;
    723 
    724 	for (m = control; m != NULL; ) {
    725 		cmsg = mtod(m, struct cmsghdr *);
    726 		i = m->m_len;
    727 		if (len < i) {
    728 			mp->msg_flags |= MSG_CTRUNC;
    729 			if (cmsg->cmsg_level == SOL_SOCKET
    730 			    && cmsg->cmsg_type == SCM_RIGHTS)
    731 				/* Do not truncate me ... */
    732 				break;
    733 			i = len;
    734 		}
    735 		error = copyout(mtod(m, void *), q, i);
    736 		ktrkuser("msgcontrol", mtod(m, void *), i);
    737 		if (error != 0) {
    738 			/* We must free all the SCM_RIGHTS */
    739 			m = control;
    740 			break;
    741 		}
    742 		m = m->m_next;
    743 		if (m)
    744 			i = ALIGN(i);
    745 		q += i;
    746 		len -= i;
    747 		if (len <= 0)
    748 			break;
    749 	}
    750 
    751 	free_control_mbuf(l, control, m);
    752 
    753 	mp->msg_controllen = q - (char *)mp->msg_control;
    754 	return error;
    755 }
    756 
    757 int
    758 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
    759     struct mbuf **control, register_t *retsize)
    760 {
    761 	struct uio	auio;
    762 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
    763 	struct iovec	*tiov;
    764 	int		i, len, error, iovlen;
    765 	struct socket	*so;
    766 	struct iovec	*ktriov;
    767 
    768 	ktrkuser("msghdr", mp, sizeof *mp);
    769 
    770 	*from = NULL;
    771 	if (control != NULL)
    772 		*control = NULL;
    773 
    774 	if ((error = fd_getsock(s, &so)) != 0)
    775 		return (error);
    776 
    777 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
    778 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
    779 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
    780 				error = EMSGSIZE;
    781 				goto out;
    782 			}
    783 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
    784 			    M_IOV, M_WAITOK);
    785 		}
    786 		if (mp->msg_iovlen != 0) {
    787 			error = copyin(mp->msg_iov, iov,
    788 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
    789 			if (error)
    790 				goto out;
    791 		}
    792 		auio.uio_iov = iov;
    793 	} else
    794 		auio.uio_iov = mp->msg_iov;
    795 	auio.uio_iovcnt = mp->msg_iovlen;
    796 	auio.uio_rw = UIO_READ;
    797 	auio.uio_offset = 0;			/* XXX */
    798 	auio.uio_resid = 0;
    799 	KASSERT(l == curlwp);
    800 	auio.uio_vmspace = l->l_proc->p_vmspace;
    801 
    802 	tiov = auio.uio_iov;
    803 	for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
    804 #if 0
    805 		/* cannot happen iov_len is unsigned */
    806 		if (tiov->iov_len < 0) {
    807 			error = EINVAL;
    808 			goto out;
    809 		}
    810 #endif
    811 		/*
    812 		 * Reads return ssize_t because -1 is returned on error.
    813 		 * Therefore we must restrict the length to SSIZE_MAX to
    814 		 * avoid garbage return values.
    815 		 */
    816 		auio.uio_resid += tiov->iov_len;
    817 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
    818 			error = EINVAL;
    819 			goto out;
    820 		}
    821 	}
    822 
    823 	ktriov = NULL;
    824 	if (ktrpoint(KTR_GENIO)) {
    825 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
    826 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
    827 		memcpy(ktriov, auio.uio_iov, iovlen);
    828 	}
    829 
    830 	len = auio.uio_resid;
    831 	mp->msg_flags &= MSG_USERFLAGS;
    832 	error = (*so->so_receive)(so, from, &auio, NULL, control,
    833 	    &mp->msg_flags);
    834 	len -= auio.uio_resid;
    835 	*retsize = len;
    836 	if (error != 0 && len != 0
    837 	    && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
    838 		/* Some data transferred */
    839 		error = 0;
    840 
    841 	if (ktriov != NULL) {
    842 		ktrgeniov(s, UIO_READ, ktriov, len, error);
    843 		free(ktriov, M_TEMP);
    844 	}
    845 
    846 	if (error != 0) {
    847 		m_freem(*from);
    848 		*from = NULL;
    849 		if (control != NULL) {
    850 			free_control_mbuf(l, *control, *control);
    851 			*control = NULL;
    852 		}
    853 	}
    854  out:
    855 	if (iov != aiov)
    856 		free(iov, M_TEMP);
    857 	fd_putfile(s);
    858 	return (error);
    859 }
    860 
    861 
    862 /* ARGSUSED */
    863 int
    864 sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
    865 {
    866 	/* {
    867 		syscallarg(int)	s;
    868 		syscallarg(int)	how;
    869 	} */
    870 	struct socket	*so;
    871 	int		error;
    872 
    873 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
    874 		return (error);
    875 	solock(so);
    876 	error = soshutdown(so, SCARG(uap, how));
    877 	sounlock(so);
    878 	fd_putfile(SCARG(uap, s));
    879 	return (error);
    880 }
    881 
    882 /* ARGSUSED */
    883 int
    884 sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
    885 {
    886 	/* {
    887 		syscallarg(int)			s;
    888 		syscallarg(int)			level;
    889 		syscallarg(int)			name;
    890 		syscallarg(const void *)	val;
    891 		syscallarg(unsigned int)	valsize;
    892 	} */
    893 	struct proc	*p;
    894 	struct mbuf	*m;
    895 	struct socket	*so;
    896 	int		error;
    897 	unsigned int	len;
    898 
    899 	p = l->l_proc;
    900 	m = NULL;
    901 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
    902 		return (error);
    903 	len = SCARG(uap, valsize);
    904 	if (len > MCLBYTES) {
    905 		error = EINVAL;
    906 		goto out;
    907 	}
    908 	if (SCARG(uap, val)) {
    909 		m = getsombuf(so, MT_SOOPTS);
    910 		if (len > MLEN)
    911 			m_clget(m, M_WAIT);
    912 		error = copyin(SCARG(uap, val), mtod(m, void *), len);
    913 		if (error) {
    914 			(void) m_free(m);
    915 			goto out;
    916 		}
    917 		m->m_len = SCARG(uap, valsize);
    918 	}
    919 	error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
    920  out:
    921  	fd_putfile(SCARG(uap, s));
    922 	return (error);
    923 }
    924 
    925 /* ARGSUSED */
    926 int
    927 sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
    928 {
    929 	/* {
    930 		syscallarg(int)			s;
    931 		syscallarg(int)			level;
    932 		syscallarg(int)			name;
    933 		syscallarg(void *)		val;
    934 		syscallarg(unsigned int *)	avalsize;
    935 	} */
    936 	struct socket	*so;
    937 	struct mbuf	*m;
    938 	unsigned int	op, i, valsize;
    939 	int		error;
    940 	char *val = SCARG(uap, val);
    941 
    942 	m = NULL;
    943 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
    944 		return (error);
    945 	if (val != NULL) {
    946 		error = copyin(SCARG(uap, avalsize),
    947 			       &valsize, sizeof(valsize));
    948 		if (error)
    949 			goto out;
    950 	} else
    951 		valsize = 0;
    952 	error = sogetopt(so, SCARG(uap, level), SCARG(uap, name), &m);
    953 	if (error == 0 && val != NULL && valsize && m != NULL) {
    954 		op = 0;
    955 		while (m && !error && op < valsize) {
    956 			i = min(m->m_len, (valsize - op));
    957 			error = copyout(mtod(m, void *), val, i);
    958 			op += i;
    959 			val += i;
    960 			m = m_free(m);
    961 		}
    962 		valsize = op;
    963 		if (error == 0)
    964 			error = copyout(&valsize,
    965 					SCARG(uap, avalsize), sizeof(valsize));
    966 	}
    967 	if (m != NULL)
    968 		(void) m_freem(m);
    969  out:
    970  	fd_putfile(SCARG(uap, s));
    971 	return (error);
    972 }
    973 
    974 #ifdef PIPE_SOCKETPAIR
    975 /* ARGSUSED */
    976 int
    977 sys_pipe(struct lwp *l, const void *v, register_t *retval)
    978 {
    979 	file_t		*rf, *wf;
    980 	struct socket	*rso, *wso;
    981 	int		fd, error;
    982 	proc_t		*p;
    983 
    984 	p = curproc;
    985 	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
    986 		return (error);
    987 	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
    988 		goto free1;
    989 	/* remember this socket pair implements a pipe */
    990 	wso->so_state |= SS_ISAPIPE;
    991 	rso->so_state |= SS_ISAPIPE;
    992 	if ((error = fd_allocfile(&rf, &fd)) != 0)
    993 		goto free2;
    994 	retval[0] = fd;
    995 	rf->f_flag = FREAD;
    996 	rf->f_type = DTYPE_SOCKET;
    997 	rf->f_ops = &socketops;
    998 	rf->f_data = rso;
    999 	if ((error = fd_allocfile(&wf, &fd)) != 0)
   1000 		goto free3;
   1001 	wf->f_flag = FWRITE;
   1002 	wf->f_type = DTYPE_SOCKET;
   1003 	wf->f_ops = &socketops;
   1004 	wf->f_data = wso;
   1005 	retval[1] = fd;
   1006 	solock(wso);
   1007 	error = unp_connect2(wso, rso, PRU_CONNECT2);
   1008 	sounlock(wso);
   1009 	if (error != 0)
   1010 		goto free4;
   1011 	fd_affix(p, wf, (int)retval[1]);
   1012 	fd_affix(p, rf, (int)retval[0]);
   1013 	return (0);
   1014  free4:
   1015 	fd_abort(p, wf, (int)retval[1]);
   1016  free3:
   1017 	fd_abort(p, rf, (int)retval[0]);
   1018  free2:
   1019 	(void)soclose(wso);
   1020  free1:
   1021 	(void)soclose(rso);
   1022 	return (error);
   1023 }
   1024 #endif /* PIPE_SOCKETPAIR */
   1025 
   1026 /*
   1027  * Get socket name.
   1028  */
   1029 /* ARGSUSED */
   1030 int
   1031 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
   1032 {
   1033 	struct socket	*so;
   1034 	struct mbuf	*m;
   1035 	int		error;
   1036 
   1037 	if ((error = fd_getsock(fd, &so)) != 0)
   1038 		return error;
   1039 
   1040 	m = m_getclr(M_WAIT, MT_SONAME);
   1041 	MCLAIM(m, so->so_mowner);
   1042 
   1043 	solock(so);
   1044 	if (which == PRU_PEERADDR
   1045 	    && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
   1046 		error = ENOTCONN;
   1047 	} else {
   1048 		*nam = m;
   1049 		error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
   1050 		    NULL);
   1051 	}
   1052  	sounlock(so);
   1053 	if (error != 0)
   1054 		m_free(m);
   1055  	fd_putfile(fd);
   1056 	return error;
   1057 }
   1058 
   1059 int
   1060 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
   1061     struct mbuf *addr)
   1062 {
   1063 	int len;
   1064 	int error;
   1065 
   1066 	if (asa == NULL)
   1067 		/* Assume application not interested */
   1068 		return 0;
   1069 
   1070 	if (flags & MSG_LENUSRSPACE) {
   1071 		error = copyin(alen, &len, sizeof(len));
   1072 		if (error)
   1073 			return error;
   1074 	} else
   1075 		len = *alen;
   1076 	if (len < 0)
   1077 		return EINVAL;
   1078 
   1079 	if (addr == NULL) {
   1080 		len = 0;
   1081 		error = 0;
   1082 	} else {
   1083 		if (len > addr->m_len)
   1084 			len = addr->m_len;
   1085 		/* Maybe this ought to copy a chain ? */
   1086 		ktrkuser("sockname", mtod(addr, void *), len);
   1087 		error = copyout(mtod(addr, void *), asa, len);
   1088 	}
   1089 
   1090 	if (error == 0) {
   1091 		if (flags & MSG_LENUSRSPACE)
   1092 			error = copyout(&len, alen, sizeof(len));
   1093 		else
   1094 			*alen = len;
   1095 	}
   1096 
   1097 	return error;
   1098 }
   1099 
   1100 /*
   1101  * Get socket name.
   1102  */
   1103 /* ARGSUSED */
   1104 int
   1105 sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
   1106 {
   1107 	/* {
   1108 		syscallarg(int)			fdes;
   1109 		syscallarg(struct sockaddr *)	asa;
   1110 		syscallarg(unsigned int *)	alen;
   1111 	} */
   1112 	struct mbuf	*m;
   1113 	int		error;
   1114 
   1115 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
   1116 	if (error != 0)
   1117 		return error;
   1118 
   1119 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
   1120 	    MSG_LENUSRSPACE, m);
   1121 	if (m != NULL)
   1122 		m_free(m);
   1123 	return error;
   1124 }
   1125 
   1126 /*
   1127  * Get name of peer for connected socket.
   1128  */
   1129 /* ARGSUSED */
   1130 int
   1131 sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
   1132 {
   1133 	/* {
   1134 		syscallarg(int)			fdes;
   1135 		syscallarg(struct sockaddr *)	asa;
   1136 		syscallarg(unsigned int *)	alen;
   1137 	} */
   1138 	struct mbuf	*m;
   1139 	int		error;
   1140 
   1141 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
   1142 	if (error != 0)
   1143 		return error;
   1144 
   1145 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
   1146 	    MSG_LENUSRSPACE, m);
   1147 	if (m != NULL)
   1148 		m_free(m);
   1149 	return error;
   1150 }
   1151 
   1152 /*
   1153  * XXX In a perfect world, we wouldn't pass around socket control
   1154  * XXX arguments in mbufs, and this could go away.
   1155  */
   1156 int
   1157 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
   1158 {
   1159 	struct sockaddr	*sa;
   1160 	struct mbuf	*m;
   1161 	int		error;
   1162 
   1163 	/*
   1164 	 * We can't allow socket names > UCHAR_MAX in length, since that
   1165 	 * will overflow sa_len.  Control data more than a page size in
   1166 	 * length is just too much.
   1167 	 */
   1168 	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
   1169 		return (EINVAL);
   1170 
   1171 	/* Allocate an mbuf to hold the arguments. */
   1172 	m = m_get(M_WAIT, type);
   1173 	/* can't claim.  don't who to assign it to. */
   1174 	if (buflen > MLEN) {
   1175 		/*
   1176 		 * Won't fit into a regular mbuf, so we allocate just
   1177 		 * enough external storage to hold the argument.
   1178 		 */
   1179 		MEXTMALLOC(m, buflen, M_WAITOK);
   1180 	}
   1181 	m->m_len = buflen;
   1182 	error = copyin(bf, mtod(m, void *), buflen);
   1183 	if (error) {
   1184 		(void) m_free(m);
   1185 		return (error);
   1186 	}
   1187 	ktrkuser("sockargs", mtod(m, void *), buflen);
   1188 	*mp = m;
   1189 	if (type == MT_SONAME) {
   1190 		sa = mtod(m, struct sockaddr *);
   1191 #if BYTE_ORDER != BIG_ENDIAN
   1192 		/*
   1193 		 * 4.3BSD compat thing - need to stay, since bind(2),
   1194 		 * connect(2), sendto(2) were not versioned for COMPAT_43.
   1195 		 */
   1196 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
   1197 			sa->sa_family = sa->sa_len;
   1198 #endif
   1199 		sa->sa_len = buflen;
   1200 	}
   1201 	return (0);
   1202 }
   1203 
   1204 int
   1205 getsock(int fdes, struct file **fpp)
   1206 {
   1207 	file_t		*fp;
   1208 
   1209 	if ((fp = fd_getfile(fdes)) == NULL)
   1210 		return (EBADF);
   1211 
   1212 	if (fp->f_type != DTYPE_SOCKET) {
   1213 		fd_putfile(fdes);
   1214 		return (ENOTSOCK);
   1215 	}
   1216 	*fpp = fp;
   1217 	return (0);
   1218 }
   1219