uipc_syscalls.c revision 1.154.2.4 1 /* $NetBSD: uipc_syscalls.c,v 1.154.2.4 2013/02/18 22:00:49 riz Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1989, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)uipc_syscalls.c 8.6 (Berkeley) 2/14/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.154.2.4 2013/02/18 22:00:49 riz Exp $");
65
66 #include "opt_pipe.h"
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc.h>
72 #include <sys/file.h>
73 #include <sys/buf.h>
74 #define MBUFTYPES
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/signalvar.h>
80 #include <sys/un.h>
81 #include <sys/ktrace.h>
82 #include <sys/event.h>
83 #include <sys/atomic.h>
84 #include <sys/kauth.h>
85
86 #include <sys/mount.h>
87 #include <sys/syscallargs.h>
88
89 /*
90 * System call interface to the socket abstraction.
91 */
92 extern const struct fileops socketops;
93
94 int
95 sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
96 {
97 /* {
98 syscallarg(int) domain;
99 syscallarg(int) type;
100 syscallarg(int) protocol;
101 } */
102 int fd, error;
103
104 error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
105 SCARG(uap, protocol), l, &fd);
106 if (error == 0)
107 *retval = fd;
108 return error;
109 }
110
111 /* ARGSUSED */
112 int
113 sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
114 {
115 /* {
116 syscallarg(int) s;
117 syscallarg(const struct sockaddr *) name;
118 syscallarg(unsigned int) namelen;
119 } */
120 struct mbuf *nam;
121 int error;
122
123 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
124 MT_SONAME);
125 if (error)
126 return error;
127
128 return do_sys_bind(l, SCARG(uap, s), nam);
129 }
130
131 int
132 do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
133 {
134 struct socket *so;
135 int error;
136
137 if ((error = fd_getsock(fd, &so)) != 0) {
138 m_freem(nam);
139 return (error);
140 }
141 MCLAIM(nam, so->so_mowner);
142 error = sobind(so, nam, l);
143 m_freem(nam);
144 fd_putfile(fd);
145 return error;
146 }
147
148 /* ARGSUSED */
149 int
150 sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
151 {
152 /* {
153 syscallarg(int) s;
154 syscallarg(int) backlog;
155 } */
156 struct socket *so;
157 int error;
158
159 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
160 return (error);
161 error = solisten(so, SCARG(uap, backlog), l);
162 fd_putfile(SCARG(uap, s));
163 return error;
164 }
165
166 int
167 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock,
168 const sigset_t *mask, int flags, int clrflags)
169 {
170 file_t *fp, *fp2;
171 struct mbuf *nam;
172 int error, fd;
173 struct socket *so, *so2;
174 short wakeup_state = 0;
175
176 if ((fp = fd_getfile(sock)) == NULL)
177 return (EBADF);
178 if (fp->f_type != DTYPE_SOCKET) {
179 fd_putfile(sock);
180 return (ENOTSOCK);
181 }
182 if ((error = fd_allocfile(&fp2, &fd)) != 0) {
183 fd_putfile(sock);
184 return (error);
185 }
186 nam = m_get(M_WAIT, MT_SONAME);
187 *new_sock = fd;
188 so = fp->f_data;
189 solock(so);
190
191 if (__predict_false(mask))
192 sigsuspendsetup(l, mask);
193
194 if (!(so->so_proto->pr_flags & PR_LISTEN)) {
195 error = EOPNOTSUPP;
196 goto bad;
197 }
198 if ((so->so_options & SO_ACCEPTCONN) == 0) {
199 error = EINVAL;
200 goto bad;
201 }
202 if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
203 error = EWOULDBLOCK;
204 goto bad;
205 }
206 while (so->so_qlen == 0 && so->so_error == 0) {
207 if (so->so_state & SS_CANTRCVMORE) {
208 so->so_error = ECONNABORTED;
209 break;
210 }
211 if (wakeup_state & SS_RESTARTSYS) {
212 error = ERESTART;
213 goto bad;
214 }
215 error = sowait(so, true, 0);
216 if (error) {
217 goto bad;
218 }
219 wakeup_state = so->so_state;
220 }
221 if (so->so_error) {
222 error = so->so_error;
223 so->so_error = 0;
224 goto bad;
225 }
226 /* connection has been removed from the listen queue */
227 KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
228 so2 = TAILQ_FIRST(&so->so_q);
229 if (soqremque(so2, 1) == 0)
230 panic("accept");
231 fp2->f_type = DTYPE_SOCKET;
232 fp2->f_flag = (fp->f_flag & ~clrflags) |
233 ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
234 ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
235 fp2->f_ops = &socketops;
236 fp2->f_data = so2;
237 if (flags & SOCK_NONBLOCK)
238 so2->so_state |= SS_NBIO;
239 error = soaccept(so2, nam);
240 so2->so_cred = kauth_cred_dup(so->so_cred);
241 sounlock(so);
242 if (error) {
243 /* an error occurred, free the file descriptor and mbuf */
244 m_freem(nam);
245 mutex_enter(&fp2->f_lock);
246 fp2->f_count++;
247 mutex_exit(&fp2->f_lock);
248 closef(fp2);
249 fd_abort(curproc, NULL, fd);
250 } else {
251 fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
252 fd_affix(curproc, fp2, fd);
253 *name = nam;
254 }
255 fd_putfile(sock);
256 if (__predict_false(mask))
257 sigsuspendteardown(l);
258 return (error);
259 bad:
260 sounlock(so);
261 m_freem(nam);
262 fd_putfile(sock);
263 fd_abort(curproc, fp2, fd);
264 if (__predict_false(mask))
265 sigsuspendteardown(l);
266 return (error);
267 }
268
269 int
270 sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
271 {
272 /* {
273 syscallarg(int) s;
274 syscallarg(struct sockaddr *) name;
275 syscallarg(unsigned int *) anamelen;
276 } */
277 int error, fd;
278 struct mbuf *name;
279
280 error = do_sys_accept(l, SCARG(uap, s), &name, retval, NULL, 0, 0);
281 if (error != 0)
282 return error;
283 error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
284 MSG_LENUSRSPACE, name);
285 if (name != NULL)
286 m_free(name);
287 if (error != 0) {
288 fd = (int)*retval;
289 if (fd_getfile(fd) != NULL)
290 (void)fd_close(fd);
291 }
292 return error;
293 }
294
295 int
296 sys_paccept(struct lwp *l, const struct sys_paccept_args *uap,
297 register_t *retval)
298 {
299 /* {
300 syscallarg(int) s;
301 syscallarg(struct sockaddr *) name;
302 syscallarg(unsigned int *) anamelen;
303 syscallarg(const sigset_t *) mask;
304 syscallarg(int) flags;
305 } */
306 int error, fd;
307 struct mbuf *name;
308 sigset_t *mask, amask;
309
310 if (SCARG(uap, mask) != NULL) {
311 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
312 if (error)
313 return error;
314 mask = &amask;
315 } else
316 mask = NULL;
317
318 error = do_sys_accept(l, SCARG(uap, s), &name, retval, mask,
319 SCARG(uap, flags), FNONBLOCK);
320 if (error != 0)
321 return error;
322 error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
323 MSG_LENUSRSPACE, name);
324 if (name != NULL)
325 m_free(name);
326 if (error != 0) {
327 fd = (int)*retval;
328 if (fd_getfile(fd) != NULL)
329 (void)fd_close(fd);
330 }
331 return error;
332 }
333
334 /* ARGSUSED */
335 int
336 sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
337 {
338 /* {
339 syscallarg(int) s;
340 syscallarg(const struct sockaddr *) name;
341 syscallarg(unsigned int) namelen;
342 } */
343 int error;
344 struct mbuf *nam;
345
346 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
347 MT_SONAME);
348 if (error)
349 return error;
350 return do_sys_connect(l, SCARG(uap, s), nam);
351 }
352
353 int
354 do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
355 {
356 struct socket *so;
357 int error;
358 int interrupted = 0;
359
360 if ((error = fd_getsock(fd, &so)) != 0) {
361 m_freem(nam);
362 return (error);
363 }
364 solock(so);
365 MCLAIM(nam, so->so_mowner);
366 if ((so->so_state & SS_ISCONNECTING) != 0) {
367 error = EALREADY;
368 goto out;
369 }
370
371 error = soconnect(so, nam, l);
372 if (error)
373 goto bad;
374 if ((so->so_state & (SS_NBIO|SS_ISCONNECTING)) ==
375 (SS_NBIO|SS_ISCONNECTING)) {
376 error = EINPROGRESS;
377 goto out;
378 }
379 while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
380 error = sowait(so, true, 0);
381 if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
382 error = EPIPE;
383 interrupted = 1;
384 break;
385 }
386 if (error) {
387 if (error == EINTR || error == ERESTART)
388 interrupted = 1;
389 break;
390 }
391 }
392 if (error == 0) {
393 error = so->so_error;
394 so->so_error = 0;
395 }
396 bad:
397 if (!interrupted)
398 so->so_state &= ~SS_ISCONNECTING;
399 if (error == ERESTART)
400 error = EINTR;
401 out:
402 sounlock(so);
403 fd_putfile(fd);
404 m_freem(nam);
405 return (error);
406 }
407
408 static int
409 makesocket(struct lwp *l, file_t **fp, int *fd, int flags, int type,
410 int domain, int proto, struct socket *soo)
411 {
412 int error;
413 struct socket *so;
414
415 if ((error = socreate(domain, &so, type, proto, l, soo)) != 0)
416 return error;
417
418 if ((error = fd_allocfile(fp, fd)) != 0) {
419 soclose(so);
420 return error;
421 }
422 fd_set_exclose(l, *fd, (flags & SOCK_CLOEXEC) != 0);
423 (*fp)->f_flag = FREAD|FWRITE|
424 ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
425 ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
426 (*fp)->f_type = DTYPE_SOCKET;
427 (*fp)->f_ops = &socketops;
428 (*fp)->f_data = so;
429 if (flags & SOCK_NONBLOCK)
430 so->so_state |= SS_NBIO;
431 return 0;
432 }
433
434 int
435 sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap,
436 register_t *retval)
437 {
438 /* {
439 syscallarg(int) domain;
440 syscallarg(int) type;
441 syscallarg(int) protocol;
442 syscallarg(int *) rsv;
443 } */
444 file_t *fp1, *fp2;
445 struct socket *so1, *so2;
446 int fd, error, sv[2];
447 proc_t *p;
448 int flags = SCARG(uap, type) & SOCK_FLAGS_MASK;
449 int type = SCARG(uap, type) & ~SOCK_FLAGS_MASK;
450 int domain = SCARG(uap, domain);
451 int proto = SCARG(uap, protocol);
452
453 p = curproc;
454
455 error = makesocket(l, &fp1, &fd, flags, type, domain, proto, NULL);
456 if (error)
457 return error;
458 so1 = fp1->f_data;
459 sv[0] = fd;
460
461 error = makesocket(l, &fp2, &fd, flags, type, domain, proto, so1);
462 if (error)
463 goto out;
464 so2 = fp2->f_data;
465 sv[1] = fd;
466
467 solock(so1);
468 error = soconnect2(so1, so2);
469 if (error == 0 && type == SOCK_DGRAM) {
470 /*
471 * Datagram socket connection is asymmetric.
472 */
473 error = soconnect2(so2, so1);
474 }
475 sounlock(so1);
476
477 if (error == 0)
478 error = copyout(sv, SCARG(uap, rsv), sizeof(sv));
479 if (error == 0) {
480 fd_affix(p, fp2, sv[1]);
481 fd_affix(p, fp1, sv[0]);
482 return 0;
483 }
484 fd_abort(p, fp2, sv[1]);
485 (void)soclose(so2);
486 out:
487 fd_abort(p, fp1, sv[0]);
488 (void)soclose(so1);
489 return error;
490 }
491
492 int
493 sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
494 {
495 /* {
496 syscallarg(int) s;
497 syscallarg(const void *) buf;
498 syscallarg(size_t) len;
499 syscallarg(int) flags;
500 syscallarg(const struct sockaddr *) to;
501 syscallarg(unsigned int) tolen;
502 } */
503 struct msghdr msg;
504 struct iovec aiov;
505
506 msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
507 msg.msg_namelen = SCARG(uap, tolen);
508 msg.msg_iov = &aiov;
509 msg.msg_iovlen = 1;
510 msg.msg_control = NULL;
511 msg.msg_flags = 0;
512 aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
513 aiov.iov_len = SCARG(uap, len);
514 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
515 }
516
517 int
518 sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
519 {
520 /* {
521 syscallarg(int) s;
522 syscallarg(const struct msghdr *) msg;
523 syscallarg(int) flags;
524 } */
525 struct msghdr msg;
526 int error;
527
528 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
529 if (error)
530 return (error);
531
532 msg.msg_flags = MSG_IOVUSRSPACE;
533 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
534 }
535
536 int
537 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
538 register_t *retsize)
539 {
540 struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
541 struct mbuf *to, *control;
542 struct socket *so;
543 file_t *fp;
544 struct uio auio;
545 size_t len, iovsz;
546 int i, error;
547
548 ktrkuser("msghdr", mp, sizeof *mp);
549
550 /* If the caller passed us stuff in mbufs, we must free them. */
551 to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL;
552 control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL;
553 iovsz = mp->msg_iovlen * sizeof(struct iovec);
554
555 if (mp->msg_flags & MSG_IOVUSRSPACE) {
556 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
557 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
558 error = EMSGSIZE;
559 goto bad;
560 }
561 iov = kmem_alloc(iovsz, KM_SLEEP);
562 }
563 if (mp->msg_iovlen != 0) {
564 error = copyin(mp->msg_iov, iov, iovsz);
565 if (error)
566 goto bad;
567 }
568 mp->msg_iov = iov;
569 }
570
571 auio.uio_iov = mp->msg_iov;
572 auio.uio_iovcnt = mp->msg_iovlen;
573 auio.uio_rw = UIO_WRITE;
574 auio.uio_offset = 0; /* XXX */
575 auio.uio_resid = 0;
576 KASSERT(l == curlwp);
577 auio.uio_vmspace = l->l_proc->p_vmspace;
578
579 for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
580 /*
581 * Writes return ssize_t because -1 is returned on error.
582 * Therefore, we must restrict the length to SSIZE_MAX to
583 * avoid garbage return values.
584 */
585 auio.uio_resid += tiov->iov_len;
586 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
587 error = EINVAL;
588 goto bad;
589 }
590 }
591
592 if (mp->msg_name && to == NULL) {
593 error = sockargs(&to, mp->msg_name, mp->msg_namelen,
594 MT_SONAME);
595 if (error)
596 goto bad;
597 }
598
599 if (mp->msg_control) {
600 if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
601 error = EINVAL;
602 goto bad;
603 }
604 if (control == NULL) {
605 error = sockargs(&control, mp->msg_control,
606 mp->msg_controllen, MT_CONTROL);
607 if (error)
608 goto bad;
609 }
610 }
611
612 if (ktrpoint(KTR_GENIO) && iovsz > 0) {
613 ktriov = kmem_alloc(iovsz, KM_SLEEP);
614 memcpy(ktriov, auio.uio_iov, iovsz);
615 }
616
617 if ((error = fd_getsock1(s, &so, &fp)) != 0)
618 goto bad;
619
620 if (mp->msg_name)
621 MCLAIM(to, so->so_mowner);
622 if (mp->msg_control)
623 MCLAIM(control, so->so_mowner);
624
625 len = auio.uio_resid;
626 error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
627 /* Protocol is responsible for freeing 'control' */
628 control = NULL;
629
630 fd_putfile(s);
631
632 if (error) {
633 if (auio.uio_resid != len && (error == ERESTART ||
634 error == EINTR || error == EWOULDBLOCK))
635 error = 0;
636 if (error == EPIPE && (fp->f_flag & FNOSIGPIPE) == 0 &&
637 (flags & MSG_NOSIGNAL) == 0) {
638 mutex_enter(proc_lock);
639 psignal(l->l_proc, SIGPIPE);
640 mutex_exit(proc_lock);
641 }
642 }
643 if (error == 0)
644 *retsize = len - auio.uio_resid;
645
646 bad:
647 if (ktriov != NULL) {
648 ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
649 kmem_free(ktriov, iovsz);
650 }
651
652 if (iov != aiov)
653 kmem_free(iov, iovsz);
654 if (to)
655 m_freem(to);
656 if (control)
657 m_freem(control);
658
659 return (error);
660 }
661
662 int
663 sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
664 {
665 /* {
666 syscallarg(int) s;
667 syscallarg(void *) buf;
668 syscallarg(size_t) len;
669 syscallarg(int) flags;
670 syscallarg(struct sockaddr *) from;
671 syscallarg(unsigned int *) fromlenaddr;
672 } */
673 struct msghdr msg;
674 struct iovec aiov;
675 int error;
676 struct mbuf *from;
677
678 msg.msg_name = NULL;
679 msg.msg_iov = &aiov;
680 msg.msg_iovlen = 1;
681 aiov.iov_base = SCARG(uap, buf);
682 aiov.iov_len = SCARG(uap, len);
683 msg.msg_control = NULL;
684 msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
685
686 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
687 if (error != 0)
688 return error;
689
690 error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
691 MSG_LENUSRSPACE, from);
692 if (from != NULL)
693 m_free(from);
694 return error;
695 }
696
697 int
698 sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
699 {
700 /* {
701 syscallarg(int) s;
702 syscallarg(struct msghdr *) msg;
703 syscallarg(int) flags;
704 } */
705 struct msghdr msg;
706 int error;
707 struct mbuf *from, *control;
708
709 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
710 if (error)
711 return (error);
712
713 msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
714
715 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
716 msg.msg_control != NULL ? &control : NULL, retval);
717 if (error != 0)
718 return error;
719
720 if (msg.msg_control != NULL)
721 error = copyout_msg_control(l, &msg, control);
722
723 if (error == 0)
724 error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
725 from);
726 if (from != NULL)
727 m_free(from);
728 if (error == 0) {
729 ktrkuser("msghdr", &msg, sizeof msg);
730 error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
731 }
732
733 return (error);
734 }
735
736 /*
737 * Adjust for a truncated SCM_RIGHTS control message.
738 * This means closing any file descriptors that aren't present
739 * in the returned buffer.
740 * m is the mbuf holding the (already externalized) SCM_RIGHTS message.
741 */
742 static void
743 free_rights(struct mbuf *m)
744 {
745 struct cmsghdr *cm;
746 int *fdv;
747 unsigned int nfds, i;
748
749 KASSERT(sizeof(*cm) <= m->m_len);
750 cm = mtod(m, struct cmsghdr *);
751
752 KASSERT(CMSG_ALIGN(sizeof(*cm)) <= cm->cmsg_len);
753 KASSERT(cm->cmsg_len <= m->m_len);
754 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
755 fdv = (int *)CMSG_DATA(cm);
756
757 for (i = 0; i < nfds; i++)
758 if (fd_getfile(fdv[i]) != NULL)
759 (void)fd_close(fdv[i]);
760 }
761
762 void
763 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
764 {
765 struct mbuf *next;
766 struct cmsghdr *cmsg;
767 bool do_free_rights = false;
768
769 while (control != NULL) {
770 cmsg = mtod(control, struct cmsghdr *);
771 if (control == uncopied)
772 do_free_rights = true;
773 if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
774 && cmsg->cmsg_type == SCM_RIGHTS)
775 free_rights(control);
776 next = control->m_next;
777 m_free(control);
778 control = next;
779 }
780 }
781
782 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
783 int
784 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
785 {
786 int i, len, error = 0;
787 struct cmsghdr *cmsg;
788 struct mbuf *m;
789 char *q;
790
791 len = mp->msg_controllen;
792 if (len <= 0 || control == 0) {
793 mp->msg_controllen = 0;
794 free_control_mbuf(l, control, control);
795 return 0;
796 }
797
798 q = (char *)mp->msg_control;
799
800 for (m = control; m != NULL; ) {
801 cmsg = mtod(m, struct cmsghdr *);
802 i = m->m_len;
803 if (len < i) {
804 mp->msg_flags |= MSG_CTRUNC;
805 if (cmsg->cmsg_level == SOL_SOCKET
806 && cmsg->cmsg_type == SCM_RIGHTS)
807 /* Do not truncate me ... */
808 break;
809 i = len;
810 }
811 error = copyout(mtod(m, void *), q, i);
812 ktrkuser("msgcontrol", mtod(m, void *), i);
813 if (error != 0) {
814 /* We must free all the SCM_RIGHTS */
815 m = control;
816 break;
817 }
818 m = m->m_next;
819 if (m)
820 i = ALIGN(i);
821 q += i;
822 len -= i;
823 if (len <= 0)
824 break;
825 }
826
827 free_control_mbuf(l, control, m);
828
829 mp->msg_controllen = q - (char *)mp->msg_control;
830 return error;
831 }
832
833 int
834 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
835 struct mbuf **control, register_t *retsize)
836 {
837 struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
838 struct socket *so;
839 struct uio auio;
840 size_t len, iovsz;
841 int i, error;
842
843 ktrkuser("msghdr", mp, sizeof *mp);
844
845 *from = NULL;
846 if (control != NULL)
847 *control = NULL;
848
849 if ((error = fd_getsock(s, &so)) != 0)
850 return (error);
851
852 iovsz = mp->msg_iovlen * sizeof(struct iovec);
853
854 if (mp->msg_flags & MSG_IOVUSRSPACE) {
855 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
856 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
857 error = EMSGSIZE;
858 goto out;
859 }
860 iov = kmem_alloc(iovsz, KM_SLEEP);
861 }
862 if (mp->msg_iovlen != 0) {
863 error = copyin(mp->msg_iov, iov, iovsz);
864 if (error)
865 goto out;
866 }
867 auio.uio_iov = iov;
868 } else
869 auio.uio_iov = mp->msg_iov;
870 auio.uio_iovcnt = mp->msg_iovlen;
871 auio.uio_rw = UIO_READ;
872 auio.uio_offset = 0; /* XXX */
873 auio.uio_resid = 0;
874 KASSERT(l == curlwp);
875 auio.uio_vmspace = l->l_proc->p_vmspace;
876
877 tiov = auio.uio_iov;
878 for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
879 /*
880 * Reads return ssize_t because -1 is returned on error.
881 * Therefore we must restrict the length to SSIZE_MAX to
882 * avoid garbage return values.
883 */
884 auio.uio_resid += tiov->iov_len;
885 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
886 error = EINVAL;
887 goto out;
888 }
889 }
890
891 if (ktrpoint(KTR_GENIO) && iovsz > 0) {
892 ktriov = kmem_alloc(iovsz, KM_SLEEP);
893 memcpy(ktriov, auio.uio_iov, iovsz);
894 }
895
896 len = auio.uio_resid;
897 mp->msg_flags &= MSG_USERFLAGS;
898 error = (*so->so_receive)(so, from, &auio, NULL, control,
899 &mp->msg_flags);
900 len -= auio.uio_resid;
901 *retsize = len;
902 if (error != 0 && len != 0
903 && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
904 /* Some data transferred */
905 error = 0;
906
907 if (ktriov != NULL) {
908 ktrgeniov(s, UIO_READ, ktriov, len, error);
909 kmem_free(ktriov, iovsz);
910 }
911
912 if (error != 0) {
913 m_freem(*from);
914 *from = NULL;
915 if (control != NULL) {
916 free_control_mbuf(l, *control, *control);
917 *control = NULL;
918 }
919 }
920 out:
921 if (iov != aiov)
922 kmem_free(iov, iovsz);
923 fd_putfile(s);
924 return (error);
925 }
926
927
928 /* ARGSUSED */
929 int
930 sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
931 {
932 /* {
933 syscallarg(int) s;
934 syscallarg(int) how;
935 } */
936 struct socket *so;
937 int error;
938
939 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
940 return (error);
941 solock(so);
942 error = soshutdown(so, SCARG(uap, how));
943 sounlock(so);
944 fd_putfile(SCARG(uap, s));
945 return (error);
946 }
947
948 /* ARGSUSED */
949 int
950 sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
951 {
952 /* {
953 syscallarg(int) s;
954 syscallarg(int) level;
955 syscallarg(int) name;
956 syscallarg(const void *) val;
957 syscallarg(unsigned int) valsize;
958 } */
959 struct sockopt sopt;
960 struct socket *so;
961 file_t *fp;
962 int error;
963 unsigned int len;
964
965 len = SCARG(uap, valsize);
966 if (len > 0 && SCARG(uap, val) == NULL)
967 return (EINVAL);
968
969 if (len > MCLBYTES)
970 return (EINVAL);
971
972 if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
973 return (error);
974
975 sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);
976
977 if (len > 0) {
978 error = copyin(SCARG(uap, val), sopt.sopt_data, len);
979 if (error)
980 goto out;
981 }
982
983 error = sosetopt(so, &sopt);
984 if (so->so_options & SO_NOSIGPIPE)
985 atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
986 else
987 atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);
988
989 out:
990 sockopt_destroy(&sopt);
991 fd_putfile(SCARG(uap, s));
992 return (error);
993 }
994
995 /* ARGSUSED */
996 int
997 sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
998 {
999 /* {
1000 syscallarg(int) s;
1001 syscallarg(int) level;
1002 syscallarg(int) name;
1003 syscallarg(void *) val;
1004 syscallarg(unsigned int *) avalsize;
1005 } */
1006 struct sockopt sopt;
1007 struct socket *so;
1008 file_t *fp;
1009 unsigned int valsize, len;
1010 int error;
1011
1012 if (SCARG(uap, val) != NULL) {
1013 error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
1014 if (error)
1015 return (error);
1016 } else
1017 valsize = 0;
1018
1019 if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
1020 return (error);
1021
1022 sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0);
1023
1024 if (fp->f_flag & FNOSIGPIPE)
1025 so->so_options |= SO_NOSIGPIPE;
1026 else
1027 so->so_options &= ~SO_NOSIGPIPE;
1028 error = sogetopt(so, &sopt);
1029 if (error)
1030 goto out;
1031
1032 if (valsize > 0) {
1033 len = min(valsize, sopt.sopt_size);
1034 error = copyout(sopt.sopt_data, SCARG(uap, val), len);
1035 if (error)
1036 goto out;
1037
1038 error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
1039 if (error)
1040 goto out;
1041 }
1042
1043 out:
1044 sockopt_destroy(&sopt);
1045 fd_putfile(SCARG(uap, s));
1046 return (error);
1047 }
1048
1049 #ifdef PIPE_SOCKETPAIR
1050 /* ARGSUSED */
1051 int
1052 pipe1(struct lwp *l, register_t *retval, int flags)
1053 {
1054 file_t *rf, *wf;
1055 struct socket *rso, *wso;
1056 int fd, error;
1057 proc_t *p;
1058
1059 if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE))
1060 return EINVAL;
1061 p = curproc;
1062 if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
1063 return (error);
1064 if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
1065 goto free1;
1066 /* remember this socket pair implements a pipe */
1067 wso->so_state |= SS_ISAPIPE;
1068 rso->so_state |= SS_ISAPIPE;
1069 if ((error = fd_allocfile(&rf, &fd)) != 0)
1070 goto free2;
1071 retval[0] = fd;
1072 rf->f_flag = FREAD | flags;
1073 rf->f_type = DTYPE_SOCKET;
1074 rf->f_ops = &socketops;
1075 rf->f_data = rso;
1076 if ((error = fd_allocfile(&wf, &fd)) != 0)
1077 goto free3;
1078 wf->f_flag = FWRITE | flags;
1079 wf->f_type = DTYPE_SOCKET;
1080 wf->f_ops = &socketops;
1081 wf->f_data = wso;
1082 retval[1] = fd;
1083 solock(wso);
1084 error = unp_connect2(wso, rso, PRU_CONNECT2);
1085 sounlock(wso);
1086 if (error != 0)
1087 goto free4;
1088 fd_affix(p, wf, (int)retval[1]);
1089 fd_affix(p, rf, (int)retval[0]);
1090 return (0);
1091 free4:
1092 fd_abort(p, wf, (int)retval[1]);
1093 free3:
1094 fd_abort(p, rf, (int)retval[0]);
1095 free2:
1096 (void)soclose(wso);
1097 free1:
1098 (void)soclose(rso);
1099 return (error);
1100 }
1101 #endif /* PIPE_SOCKETPAIR */
1102
1103 /*
1104 * Get socket name.
1105 */
1106 /* ARGSUSED */
1107 int
1108 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1109 {
1110 struct socket *so;
1111 struct mbuf *m;
1112 int error;
1113
1114 if ((error = fd_getsock(fd, &so)) != 0)
1115 return error;
1116
1117 m = m_getclr(M_WAIT, MT_SONAME);
1118 MCLAIM(m, so->so_mowner);
1119
1120 solock(so);
1121 if (which == PRU_PEERADDR
1122 && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1123 error = ENOTCONN;
1124 } else {
1125 *nam = m;
1126 error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
1127 NULL);
1128 }
1129 sounlock(so);
1130 if (error != 0)
1131 m_free(m);
1132 fd_putfile(fd);
1133 return error;
1134 }
1135
1136 int
1137 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1138 struct mbuf *addr)
1139 {
1140 int len;
1141 int error;
1142
1143 if (asa == NULL)
1144 /* Assume application not interested */
1145 return 0;
1146
1147 if (flags & MSG_LENUSRSPACE) {
1148 error = copyin(alen, &len, sizeof(len));
1149 if (error)
1150 return error;
1151 } else
1152 len = *alen;
1153 if (len < 0)
1154 return EINVAL;
1155
1156 if (addr == NULL) {
1157 len = 0;
1158 error = 0;
1159 } else {
1160 if (len > addr->m_len)
1161 len = addr->m_len;
1162 /* Maybe this ought to copy a chain ? */
1163 ktrkuser("sockname", mtod(addr, void *), len);
1164 error = copyout(mtod(addr, void *), asa, len);
1165 }
1166
1167 if (error == 0) {
1168 if (flags & MSG_LENUSRSPACE)
1169 error = copyout(&len, alen, sizeof(len));
1170 else
1171 *alen = len;
1172 }
1173
1174 return error;
1175 }
1176
1177 /*
1178 * Get socket name.
1179 */
1180 /* ARGSUSED */
1181 int
1182 sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
1183 {
1184 /* {
1185 syscallarg(int) fdes;
1186 syscallarg(struct sockaddr *) asa;
1187 syscallarg(unsigned int *) alen;
1188 } */
1189 struct mbuf *m;
1190 int error;
1191
1192 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1193 if (error != 0)
1194 return error;
1195
1196 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1197 MSG_LENUSRSPACE, m);
1198 if (m != NULL)
1199 m_free(m);
1200 return error;
1201 }
1202
1203 /*
1204 * Get name of peer for connected socket.
1205 */
1206 /* ARGSUSED */
1207 int
1208 sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
1209 {
1210 /* {
1211 syscallarg(int) fdes;
1212 syscallarg(struct sockaddr *) asa;
1213 syscallarg(unsigned int *) alen;
1214 } */
1215 struct mbuf *m;
1216 int error;
1217
1218 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1219 if (error != 0)
1220 return error;
1221
1222 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1223 MSG_LENUSRSPACE, m);
1224 if (m != NULL)
1225 m_free(m);
1226 return error;
1227 }
1228
1229 /*
1230 * XXX In a perfect world, we wouldn't pass around socket control
1231 * XXX arguments in mbufs, and this could go away.
1232 */
1233 int
1234 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1235 {
1236 struct sockaddr *sa;
1237 struct mbuf *m;
1238 int error;
1239
1240 /*
1241 * We can't allow socket names > UCHAR_MAX in length, since that
1242 * will overflow sa_len. Control data more than a page size in
1243 * length is just too much.
1244 */
1245 if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1246 return (EINVAL);
1247
1248 /* Allocate an mbuf to hold the arguments. */
1249 m = m_get(M_WAIT, type);
1250 /* can't claim. don't who to assign it to. */
1251 if (buflen > MLEN) {
1252 /*
1253 * Won't fit into a regular mbuf, so we allocate just
1254 * enough external storage to hold the argument.
1255 */
1256 MEXTMALLOC(m, buflen, M_WAITOK);
1257 }
1258 m->m_len = buflen;
1259 error = copyin(bf, mtod(m, void *), buflen);
1260 if (error) {
1261 (void) m_free(m);
1262 return (error);
1263 }
1264 ktrkuser(mbuftypes[type], mtod(m, void *), buflen);
1265 *mp = m;
1266 if (type == MT_SONAME) {
1267 sa = mtod(m, struct sockaddr *);
1268 #if BYTE_ORDER != BIG_ENDIAN
1269 /*
1270 * 4.3BSD compat thing - need to stay, since bind(2),
1271 * connect(2), sendto(2) were not versioned for COMPAT_43.
1272 */
1273 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1274 sa->sa_family = sa->sa_len;
1275 #endif
1276 sa->sa_len = buflen;
1277 }
1278 return (0);
1279 }
1280