uipc_syscalls.c revision 1.154.2.5 1 /* $NetBSD: uipc_syscalls.c,v 1.154.2.5 2013/12/14 19:36:33 bouyer Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1989, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)uipc_syscalls.c 8.6 (Berkeley) 2/14/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.154.2.5 2013/12/14 19:36:33 bouyer Exp $");
65
66 #include "opt_pipe.h"
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc.h>
72 #include <sys/file.h>
73 #include <sys/buf.h>
74 #define MBUFTYPES
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/signalvar.h>
80 #include <sys/un.h>
81 #include <sys/ktrace.h>
82 #include <sys/event.h>
83 #include <sys/atomic.h>
84 #include <sys/kauth.h>
85
86 #include <sys/mount.h>
87 #include <sys/syscallargs.h>
88
89 /*
90 * System call interface to the socket abstraction.
91 */
92 extern const struct fileops socketops;
93
94 int
95 sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
96 {
97 /* {
98 syscallarg(int) domain;
99 syscallarg(int) type;
100 syscallarg(int) protocol;
101 } */
102 int fd, error;
103
104 error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
105 SCARG(uap, protocol), l, &fd);
106 if (error == 0)
107 *retval = fd;
108 return error;
109 }
110
111 /* ARGSUSED */
112 int
113 sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
114 {
115 /* {
116 syscallarg(int) s;
117 syscallarg(const struct sockaddr *) name;
118 syscallarg(unsigned int) namelen;
119 } */
120 struct mbuf *nam;
121 int error;
122
123 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
124 MT_SONAME);
125 if (error)
126 return error;
127
128 return do_sys_bind(l, SCARG(uap, s), nam);
129 }
130
131 int
132 do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
133 {
134 struct socket *so;
135 int error;
136
137 if ((error = fd_getsock(fd, &so)) != 0) {
138 m_freem(nam);
139 return (error);
140 }
141 MCLAIM(nam, so->so_mowner);
142 error = sobind(so, nam, l);
143 m_freem(nam);
144 fd_putfile(fd);
145 return error;
146 }
147
148 /* ARGSUSED */
149 int
150 sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
151 {
152 /* {
153 syscallarg(int) s;
154 syscallarg(int) backlog;
155 } */
156 struct socket *so;
157 int error;
158
159 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
160 return (error);
161 error = solisten(so, SCARG(uap, backlog), l);
162 fd_putfile(SCARG(uap, s));
163 return error;
164 }
165
166 int
167 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock,
168 const sigset_t *mask, int flags, int clrflags)
169 {
170 file_t *fp, *fp2;
171 struct mbuf *nam;
172 int error, fd;
173 struct socket *so, *so2;
174 short wakeup_state = 0;
175
176 if ((fp = fd_getfile(sock)) == NULL)
177 return (EBADF);
178 if (fp->f_type != DTYPE_SOCKET) {
179 fd_putfile(sock);
180 return (ENOTSOCK);
181 }
182 if ((error = fd_allocfile(&fp2, &fd)) != 0) {
183 fd_putfile(sock);
184 return (error);
185 }
186 nam = m_get(M_WAIT, MT_SONAME);
187 nam->m_len = 0;
188 *new_sock = fd;
189 so = fp->f_data;
190 solock(so);
191
192 if (__predict_false(mask))
193 sigsuspendsetup(l, mask);
194
195 if (!(so->so_proto->pr_flags & PR_LISTEN)) {
196 error = EOPNOTSUPP;
197 goto bad;
198 }
199 if ((so->so_options & SO_ACCEPTCONN) == 0) {
200 error = EINVAL;
201 goto bad;
202 }
203 if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
204 error = EWOULDBLOCK;
205 goto bad;
206 }
207 while (so->so_qlen == 0 && so->so_error == 0) {
208 if (so->so_state & SS_CANTRCVMORE) {
209 so->so_error = ECONNABORTED;
210 break;
211 }
212 if (wakeup_state & SS_RESTARTSYS) {
213 error = ERESTART;
214 goto bad;
215 }
216 error = sowait(so, true, 0);
217 if (error) {
218 goto bad;
219 }
220 wakeup_state = so->so_state;
221 }
222 if (so->so_error) {
223 error = so->so_error;
224 so->so_error = 0;
225 goto bad;
226 }
227 /* connection has been removed from the listen queue */
228 KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
229 so2 = TAILQ_FIRST(&so->so_q);
230 if (soqremque(so2, 1) == 0)
231 panic("accept");
232 fp2->f_type = DTYPE_SOCKET;
233 fp2->f_flag = (fp->f_flag & ~clrflags) |
234 ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
235 ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
236 fp2->f_ops = &socketops;
237 fp2->f_data = so2;
238 if (flags & SOCK_NONBLOCK)
239 so2->so_state |= SS_NBIO;
240 error = soaccept(so2, nam);
241 so2->so_cred = kauth_cred_dup(so->so_cred);
242 sounlock(so);
243 if (error) {
244 /* an error occurred, free the file descriptor and mbuf */
245 m_freem(nam);
246 mutex_enter(&fp2->f_lock);
247 fp2->f_count++;
248 mutex_exit(&fp2->f_lock);
249 closef(fp2);
250 fd_abort(curproc, NULL, fd);
251 } else {
252 fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
253 fd_affix(curproc, fp2, fd);
254 *name = nam;
255 }
256 fd_putfile(sock);
257 if (__predict_false(mask))
258 sigsuspendteardown(l);
259 return (error);
260 bad:
261 sounlock(so);
262 m_freem(nam);
263 fd_putfile(sock);
264 fd_abort(curproc, fp2, fd);
265 if (__predict_false(mask))
266 sigsuspendteardown(l);
267 return (error);
268 }
269
270 int
271 sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
272 {
273 /* {
274 syscallarg(int) s;
275 syscallarg(struct sockaddr *) name;
276 syscallarg(unsigned int *) anamelen;
277 } */
278 int error, fd;
279 struct mbuf *name;
280
281 error = do_sys_accept(l, SCARG(uap, s), &name, retval, NULL, 0, 0);
282 if (error != 0)
283 return error;
284 error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
285 MSG_LENUSRSPACE, name);
286 if (name != NULL)
287 m_free(name);
288 if (error != 0) {
289 fd = (int)*retval;
290 if (fd_getfile(fd) != NULL)
291 (void)fd_close(fd);
292 }
293 return error;
294 }
295
296 int
297 sys_paccept(struct lwp *l, const struct sys_paccept_args *uap,
298 register_t *retval)
299 {
300 /* {
301 syscallarg(int) s;
302 syscallarg(struct sockaddr *) name;
303 syscallarg(unsigned int *) anamelen;
304 syscallarg(const sigset_t *) mask;
305 syscallarg(int) flags;
306 } */
307 int error, fd;
308 struct mbuf *name;
309 sigset_t *mask, amask;
310
311 if (SCARG(uap, mask) != NULL) {
312 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
313 if (error)
314 return error;
315 mask = &amask;
316 } else
317 mask = NULL;
318
319 error = do_sys_accept(l, SCARG(uap, s), &name, retval, mask,
320 SCARG(uap, flags), FNONBLOCK);
321 if (error != 0)
322 return error;
323 error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
324 MSG_LENUSRSPACE, name);
325 if (name != NULL)
326 m_free(name);
327 if (error != 0) {
328 fd = (int)*retval;
329 if (fd_getfile(fd) != NULL)
330 (void)fd_close(fd);
331 }
332 return error;
333 }
334
335 /* ARGSUSED */
336 int
337 sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
338 {
339 /* {
340 syscallarg(int) s;
341 syscallarg(const struct sockaddr *) name;
342 syscallarg(unsigned int) namelen;
343 } */
344 int error;
345 struct mbuf *nam;
346
347 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
348 MT_SONAME);
349 if (error)
350 return error;
351 return do_sys_connect(l, SCARG(uap, s), nam);
352 }
353
354 int
355 do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
356 {
357 struct socket *so;
358 int error;
359 int interrupted = 0;
360
361 if ((error = fd_getsock(fd, &so)) != 0) {
362 m_freem(nam);
363 return (error);
364 }
365 solock(so);
366 MCLAIM(nam, so->so_mowner);
367 if ((so->so_state & SS_ISCONNECTING) != 0) {
368 error = EALREADY;
369 goto out;
370 }
371
372 error = soconnect(so, nam, l);
373 if (error)
374 goto bad;
375 if ((so->so_state & (SS_NBIO|SS_ISCONNECTING)) ==
376 (SS_NBIO|SS_ISCONNECTING)) {
377 error = EINPROGRESS;
378 goto out;
379 }
380 while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
381 error = sowait(so, true, 0);
382 if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
383 error = EPIPE;
384 interrupted = 1;
385 break;
386 }
387 if (error) {
388 if (error == EINTR || error == ERESTART)
389 interrupted = 1;
390 break;
391 }
392 }
393 if (error == 0) {
394 error = so->so_error;
395 so->so_error = 0;
396 }
397 bad:
398 if (!interrupted)
399 so->so_state &= ~SS_ISCONNECTING;
400 if (error == ERESTART)
401 error = EINTR;
402 out:
403 sounlock(so);
404 fd_putfile(fd);
405 m_freem(nam);
406 return (error);
407 }
408
409 static int
410 makesocket(struct lwp *l, file_t **fp, int *fd, int flags, int type,
411 int domain, int proto, struct socket *soo)
412 {
413 int error;
414 struct socket *so;
415
416 if ((error = socreate(domain, &so, type, proto, l, soo)) != 0)
417 return error;
418
419 if ((error = fd_allocfile(fp, fd)) != 0) {
420 soclose(so);
421 return error;
422 }
423 fd_set_exclose(l, *fd, (flags & SOCK_CLOEXEC) != 0);
424 (*fp)->f_flag = FREAD|FWRITE|
425 ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
426 ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
427 (*fp)->f_type = DTYPE_SOCKET;
428 (*fp)->f_ops = &socketops;
429 (*fp)->f_data = so;
430 if (flags & SOCK_NONBLOCK)
431 so->so_state |= SS_NBIO;
432 return 0;
433 }
434
435 int
436 sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap,
437 register_t *retval)
438 {
439 /* {
440 syscallarg(int) domain;
441 syscallarg(int) type;
442 syscallarg(int) protocol;
443 syscallarg(int *) rsv;
444 } */
445 file_t *fp1, *fp2;
446 struct socket *so1, *so2;
447 int fd, error, sv[2];
448 proc_t *p;
449 int flags = SCARG(uap, type) & SOCK_FLAGS_MASK;
450 int type = SCARG(uap, type) & ~SOCK_FLAGS_MASK;
451 int domain = SCARG(uap, domain);
452 int proto = SCARG(uap, protocol);
453
454 p = curproc;
455
456 error = makesocket(l, &fp1, &fd, flags, type, domain, proto, NULL);
457 if (error)
458 return error;
459 so1 = fp1->f_data;
460 sv[0] = fd;
461
462 error = makesocket(l, &fp2, &fd, flags, type, domain, proto, so1);
463 if (error)
464 goto out;
465 so2 = fp2->f_data;
466 sv[1] = fd;
467
468 solock(so1);
469 error = soconnect2(so1, so2);
470 if (error == 0 && type == SOCK_DGRAM) {
471 /*
472 * Datagram socket connection is asymmetric.
473 */
474 error = soconnect2(so2, so1);
475 }
476 sounlock(so1);
477
478 if (error == 0)
479 error = copyout(sv, SCARG(uap, rsv), sizeof(sv));
480 if (error == 0) {
481 fd_affix(p, fp2, sv[1]);
482 fd_affix(p, fp1, sv[0]);
483 return 0;
484 }
485 fd_abort(p, fp2, sv[1]);
486 (void)soclose(so2);
487 out:
488 fd_abort(p, fp1, sv[0]);
489 (void)soclose(so1);
490 return error;
491 }
492
493 int
494 sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
495 {
496 /* {
497 syscallarg(int) s;
498 syscallarg(const void *) buf;
499 syscallarg(size_t) len;
500 syscallarg(int) flags;
501 syscallarg(const struct sockaddr *) to;
502 syscallarg(unsigned int) tolen;
503 } */
504 struct msghdr msg;
505 struct iovec aiov;
506
507 msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
508 msg.msg_namelen = SCARG(uap, tolen);
509 msg.msg_iov = &aiov;
510 msg.msg_iovlen = 1;
511 msg.msg_control = NULL;
512 msg.msg_flags = 0;
513 aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
514 aiov.iov_len = SCARG(uap, len);
515 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
516 }
517
518 int
519 sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
520 {
521 /* {
522 syscallarg(int) s;
523 syscallarg(const struct msghdr *) msg;
524 syscallarg(int) flags;
525 } */
526 struct msghdr msg;
527 int error;
528
529 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
530 if (error)
531 return (error);
532
533 msg.msg_flags = MSG_IOVUSRSPACE;
534 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
535 }
536
537 int
538 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
539 register_t *retsize)
540 {
541 struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
542 struct mbuf *to, *control;
543 struct socket *so;
544 file_t *fp;
545 struct uio auio;
546 size_t len, iovsz;
547 int i, error;
548
549 ktrkuser("msghdr", mp, sizeof *mp);
550
551 /* If the caller passed us stuff in mbufs, we must free them. */
552 to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL;
553 control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL;
554 iovsz = mp->msg_iovlen * sizeof(struct iovec);
555
556 if (mp->msg_flags & MSG_IOVUSRSPACE) {
557 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
558 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
559 error = EMSGSIZE;
560 goto bad;
561 }
562 iov = kmem_alloc(iovsz, KM_SLEEP);
563 }
564 if (mp->msg_iovlen != 0) {
565 error = copyin(mp->msg_iov, iov, iovsz);
566 if (error)
567 goto bad;
568 }
569 mp->msg_iov = iov;
570 }
571
572 auio.uio_iov = mp->msg_iov;
573 auio.uio_iovcnt = mp->msg_iovlen;
574 auio.uio_rw = UIO_WRITE;
575 auio.uio_offset = 0; /* XXX */
576 auio.uio_resid = 0;
577 KASSERT(l == curlwp);
578 auio.uio_vmspace = l->l_proc->p_vmspace;
579
580 for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
581 /*
582 * Writes return ssize_t because -1 is returned on error.
583 * Therefore, we must restrict the length to SSIZE_MAX to
584 * avoid garbage return values.
585 */
586 auio.uio_resid += tiov->iov_len;
587 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
588 error = EINVAL;
589 goto bad;
590 }
591 }
592
593 if (mp->msg_name && to == NULL) {
594 error = sockargs(&to, mp->msg_name, mp->msg_namelen,
595 MT_SONAME);
596 if (error)
597 goto bad;
598 }
599
600 if (mp->msg_control) {
601 if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
602 error = EINVAL;
603 goto bad;
604 }
605 if (control == NULL) {
606 error = sockargs(&control, mp->msg_control,
607 mp->msg_controllen, MT_CONTROL);
608 if (error)
609 goto bad;
610 }
611 }
612
613 if (ktrpoint(KTR_GENIO) && iovsz > 0) {
614 ktriov = kmem_alloc(iovsz, KM_SLEEP);
615 memcpy(ktriov, auio.uio_iov, iovsz);
616 }
617
618 if ((error = fd_getsock1(s, &so, &fp)) != 0)
619 goto bad;
620
621 if (mp->msg_name)
622 MCLAIM(to, so->so_mowner);
623 if (mp->msg_control)
624 MCLAIM(control, so->so_mowner);
625
626 len = auio.uio_resid;
627 error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
628 /* Protocol is responsible for freeing 'control' */
629 control = NULL;
630
631 fd_putfile(s);
632
633 if (error) {
634 if (auio.uio_resid != len && (error == ERESTART ||
635 error == EINTR || error == EWOULDBLOCK))
636 error = 0;
637 if (error == EPIPE && (fp->f_flag & FNOSIGPIPE) == 0 &&
638 (flags & MSG_NOSIGNAL) == 0) {
639 mutex_enter(proc_lock);
640 psignal(l->l_proc, SIGPIPE);
641 mutex_exit(proc_lock);
642 }
643 }
644 if (error == 0)
645 *retsize = len - auio.uio_resid;
646
647 bad:
648 if (ktriov != NULL) {
649 ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
650 kmem_free(ktriov, iovsz);
651 }
652
653 if (iov != aiov)
654 kmem_free(iov, iovsz);
655 if (to)
656 m_freem(to);
657 if (control)
658 m_freem(control);
659
660 return (error);
661 }
662
663 int
664 sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
665 {
666 /* {
667 syscallarg(int) s;
668 syscallarg(void *) buf;
669 syscallarg(size_t) len;
670 syscallarg(int) flags;
671 syscallarg(struct sockaddr *) from;
672 syscallarg(unsigned int *) fromlenaddr;
673 } */
674 struct msghdr msg;
675 struct iovec aiov;
676 int error;
677 struct mbuf *from;
678
679 msg.msg_name = NULL;
680 msg.msg_iov = &aiov;
681 msg.msg_iovlen = 1;
682 aiov.iov_base = SCARG(uap, buf);
683 aiov.iov_len = SCARG(uap, len);
684 msg.msg_control = NULL;
685 msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
686
687 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
688 if (error != 0)
689 return error;
690
691 error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
692 MSG_LENUSRSPACE, from);
693 if (from != NULL)
694 m_free(from);
695 return error;
696 }
697
698 int
699 sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
700 {
701 /* {
702 syscallarg(int) s;
703 syscallarg(struct msghdr *) msg;
704 syscallarg(int) flags;
705 } */
706 struct msghdr msg;
707 int error;
708 struct mbuf *from, *control;
709
710 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
711 if (error)
712 return (error);
713
714 msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
715
716 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
717 msg.msg_control != NULL ? &control : NULL, retval);
718 if (error != 0)
719 return error;
720
721 if (msg.msg_control != NULL)
722 error = copyout_msg_control(l, &msg, control);
723
724 if (error == 0)
725 error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
726 from);
727 if (from != NULL)
728 m_free(from);
729 if (error == 0) {
730 ktrkuser("msghdr", &msg, sizeof msg);
731 error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
732 }
733
734 return (error);
735 }
736
737 /*
738 * Adjust for a truncated SCM_RIGHTS control message.
739 * This means closing any file descriptors that aren't present
740 * in the returned buffer.
741 * m is the mbuf holding the (already externalized) SCM_RIGHTS message.
742 */
743 static void
744 free_rights(struct mbuf *m)
745 {
746 struct cmsghdr *cm;
747 int *fdv;
748 unsigned int nfds, i;
749
750 KASSERT(sizeof(*cm) <= m->m_len);
751 cm = mtod(m, struct cmsghdr *);
752
753 KASSERT(CMSG_ALIGN(sizeof(*cm)) <= cm->cmsg_len);
754 KASSERT(cm->cmsg_len <= m->m_len);
755 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
756 fdv = (int *)CMSG_DATA(cm);
757
758 for (i = 0; i < nfds; i++)
759 if (fd_getfile(fdv[i]) != NULL)
760 (void)fd_close(fdv[i]);
761 }
762
763 void
764 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
765 {
766 struct mbuf *next;
767 struct cmsghdr *cmsg;
768 bool do_free_rights = false;
769
770 while (control != NULL) {
771 cmsg = mtod(control, struct cmsghdr *);
772 if (control == uncopied)
773 do_free_rights = true;
774 if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
775 && cmsg->cmsg_type == SCM_RIGHTS)
776 free_rights(control);
777 next = control->m_next;
778 m_free(control);
779 control = next;
780 }
781 }
782
783 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
784 int
785 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
786 {
787 int i, len, error = 0;
788 struct cmsghdr *cmsg;
789 struct mbuf *m;
790 char *q;
791
792 len = mp->msg_controllen;
793 if (len <= 0 || control == 0) {
794 mp->msg_controllen = 0;
795 free_control_mbuf(l, control, control);
796 return 0;
797 }
798
799 q = (char *)mp->msg_control;
800
801 for (m = control; m != NULL; ) {
802 cmsg = mtod(m, struct cmsghdr *);
803 i = m->m_len;
804 if (len < i) {
805 mp->msg_flags |= MSG_CTRUNC;
806 if (cmsg->cmsg_level == SOL_SOCKET
807 && cmsg->cmsg_type == SCM_RIGHTS)
808 /* Do not truncate me ... */
809 break;
810 i = len;
811 }
812 error = copyout(mtod(m, void *), q, i);
813 ktrkuser("msgcontrol", mtod(m, void *), i);
814 if (error != 0) {
815 /* We must free all the SCM_RIGHTS */
816 m = control;
817 break;
818 }
819 m = m->m_next;
820 if (m)
821 i = ALIGN(i);
822 q += i;
823 len -= i;
824 if (len <= 0)
825 break;
826 }
827
828 free_control_mbuf(l, control, m);
829
830 mp->msg_controllen = q - (char *)mp->msg_control;
831 return error;
832 }
833
834 int
835 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
836 struct mbuf **control, register_t *retsize)
837 {
838 struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
839 struct socket *so;
840 struct uio auio;
841 size_t len, iovsz;
842 int i, error;
843
844 ktrkuser("msghdr", mp, sizeof *mp);
845
846 *from = NULL;
847 if (control != NULL)
848 *control = NULL;
849
850 if ((error = fd_getsock(s, &so)) != 0)
851 return (error);
852
853 iovsz = mp->msg_iovlen * sizeof(struct iovec);
854
855 if (mp->msg_flags & MSG_IOVUSRSPACE) {
856 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
857 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
858 error = EMSGSIZE;
859 goto out;
860 }
861 iov = kmem_alloc(iovsz, KM_SLEEP);
862 }
863 if (mp->msg_iovlen != 0) {
864 error = copyin(mp->msg_iov, iov, iovsz);
865 if (error)
866 goto out;
867 }
868 auio.uio_iov = iov;
869 } else
870 auio.uio_iov = mp->msg_iov;
871 auio.uio_iovcnt = mp->msg_iovlen;
872 auio.uio_rw = UIO_READ;
873 auio.uio_offset = 0; /* XXX */
874 auio.uio_resid = 0;
875 KASSERT(l == curlwp);
876 auio.uio_vmspace = l->l_proc->p_vmspace;
877
878 tiov = auio.uio_iov;
879 for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
880 /*
881 * Reads return ssize_t because -1 is returned on error.
882 * Therefore we must restrict the length to SSIZE_MAX to
883 * avoid garbage return values.
884 */
885 auio.uio_resid += tiov->iov_len;
886 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
887 error = EINVAL;
888 goto out;
889 }
890 }
891
892 if (ktrpoint(KTR_GENIO) && iovsz > 0) {
893 ktriov = kmem_alloc(iovsz, KM_SLEEP);
894 memcpy(ktriov, auio.uio_iov, iovsz);
895 }
896
897 len = auio.uio_resid;
898 mp->msg_flags &= MSG_USERFLAGS;
899 error = (*so->so_receive)(so, from, &auio, NULL, control,
900 &mp->msg_flags);
901 len -= auio.uio_resid;
902 *retsize = len;
903 if (error != 0 && len != 0
904 && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
905 /* Some data transferred */
906 error = 0;
907
908 if (ktriov != NULL) {
909 ktrgeniov(s, UIO_READ, ktriov, len, error);
910 kmem_free(ktriov, iovsz);
911 }
912
913 if (error != 0) {
914 m_freem(*from);
915 *from = NULL;
916 if (control != NULL) {
917 free_control_mbuf(l, *control, *control);
918 *control = NULL;
919 }
920 }
921 out:
922 if (iov != aiov)
923 kmem_free(iov, iovsz);
924 fd_putfile(s);
925 return (error);
926 }
927
928
929 /* ARGSUSED */
930 int
931 sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
932 {
933 /* {
934 syscallarg(int) s;
935 syscallarg(int) how;
936 } */
937 struct socket *so;
938 int error;
939
940 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
941 return (error);
942 solock(so);
943 error = soshutdown(so, SCARG(uap, how));
944 sounlock(so);
945 fd_putfile(SCARG(uap, s));
946 return (error);
947 }
948
949 /* ARGSUSED */
950 int
951 sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
952 {
953 /* {
954 syscallarg(int) s;
955 syscallarg(int) level;
956 syscallarg(int) name;
957 syscallarg(const void *) val;
958 syscallarg(unsigned int) valsize;
959 } */
960 struct sockopt sopt;
961 struct socket *so;
962 file_t *fp;
963 int error;
964 unsigned int len;
965
966 len = SCARG(uap, valsize);
967 if (len > 0 && SCARG(uap, val) == NULL)
968 return (EINVAL);
969
970 if (len > MCLBYTES)
971 return (EINVAL);
972
973 if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
974 return (error);
975
976 sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);
977
978 if (len > 0) {
979 error = copyin(SCARG(uap, val), sopt.sopt_data, len);
980 if (error)
981 goto out;
982 }
983
984 error = sosetopt(so, &sopt);
985 if (so->so_options & SO_NOSIGPIPE)
986 atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
987 else
988 atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);
989
990 out:
991 sockopt_destroy(&sopt);
992 fd_putfile(SCARG(uap, s));
993 return (error);
994 }
995
996 /* ARGSUSED */
997 int
998 sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
999 {
1000 /* {
1001 syscallarg(int) s;
1002 syscallarg(int) level;
1003 syscallarg(int) name;
1004 syscallarg(void *) val;
1005 syscallarg(unsigned int *) avalsize;
1006 } */
1007 struct sockopt sopt;
1008 struct socket *so;
1009 file_t *fp;
1010 unsigned int valsize, len;
1011 int error;
1012
1013 if (SCARG(uap, val) != NULL) {
1014 error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
1015 if (error)
1016 return (error);
1017 } else
1018 valsize = 0;
1019
1020 if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
1021 return (error);
1022
1023 sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0);
1024
1025 if (fp->f_flag & FNOSIGPIPE)
1026 so->so_options |= SO_NOSIGPIPE;
1027 else
1028 so->so_options &= ~SO_NOSIGPIPE;
1029 error = sogetopt(so, &sopt);
1030 if (error)
1031 goto out;
1032
1033 if (valsize > 0) {
1034 len = min(valsize, sopt.sopt_size);
1035 error = copyout(sopt.sopt_data, SCARG(uap, val), len);
1036 if (error)
1037 goto out;
1038
1039 error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
1040 if (error)
1041 goto out;
1042 }
1043
1044 out:
1045 sockopt_destroy(&sopt);
1046 fd_putfile(SCARG(uap, s));
1047 return (error);
1048 }
1049
1050 #ifdef PIPE_SOCKETPAIR
1051 /* ARGSUSED */
1052 int
1053 pipe1(struct lwp *l, register_t *retval, int flags)
1054 {
1055 file_t *rf, *wf;
1056 struct socket *rso, *wso;
1057 int fd, error;
1058 proc_t *p;
1059
1060 if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE))
1061 return EINVAL;
1062 p = curproc;
1063 if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
1064 return (error);
1065 if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
1066 goto free1;
1067 /* remember this socket pair implements a pipe */
1068 wso->so_state |= SS_ISAPIPE;
1069 rso->so_state |= SS_ISAPIPE;
1070 if ((error = fd_allocfile(&rf, &fd)) != 0)
1071 goto free2;
1072 retval[0] = fd;
1073 rf->f_flag = FREAD | flags;
1074 rf->f_type = DTYPE_SOCKET;
1075 rf->f_ops = &socketops;
1076 rf->f_data = rso;
1077 if ((error = fd_allocfile(&wf, &fd)) != 0)
1078 goto free3;
1079 wf->f_flag = FWRITE | flags;
1080 wf->f_type = DTYPE_SOCKET;
1081 wf->f_ops = &socketops;
1082 wf->f_data = wso;
1083 retval[1] = fd;
1084 solock(wso);
1085 error = unp_connect2(wso, rso, PRU_CONNECT2);
1086 sounlock(wso);
1087 if (error != 0)
1088 goto free4;
1089 fd_affix(p, wf, (int)retval[1]);
1090 fd_affix(p, rf, (int)retval[0]);
1091 return (0);
1092 free4:
1093 fd_abort(p, wf, (int)retval[1]);
1094 free3:
1095 fd_abort(p, rf, (int)retval[0]);
1096 free2:
1097 (void)soclose(wso);
1098 free1:
1099 (void)soclose(rso);
1100 return (error);
1101 }
1102 #endif /* PIPE_SOCKETPAIR */
1103
1104 /*
1105 * Get socket name.
1106 */
1107 /* ARGSUSED */
1108 int
1109 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1110 {
1111 struct socket *so;
1112 struct mbuf *m;
1113 int error;
1114
1115 if ((error = fd_getsock(fd, &so)) != 0)
1116 return error;
1117
1118 m = m_getclr(M_WAIT, MT_SONAME);
1119 MCLAIM(m, so->so_mowner);
1120
1121 solock(so);
1122 if (which == PRU_PEERADDR
1123 && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1124 error = ENOTCONN;
1125 } else {
1126 *nam = m;
1127 error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
1128 NULL);
1129 }
1130 sounlock(so);
1131 if (error != 0)
1132 m_free(m);
1133 fd_putfile(fd);
1134 return error;
1135 }
1136
1137 int
1138 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1139 struct mbuf *addr)
1140 {
1141 int len;
1142 int error;
1143
1144 if (asa == NULL)
1145 /* Assume application not interested */
1146 return 0;
1147
1148 if (flags & MSG_LENUSRSPACE) {
1149 error = copyin(alen, &len, sizeof(len));
1150 if (error)
1151 return error;
1152 } else
1153 len = *alen;
1154 if (len < 0)
1155 return EINVAL;
1156
1157 if (addr == NULL) {
1158 len = 0;
1159 error = 0;
1160 } else {
1161 if (len > addr->m_len)
1162 len = addr->m_len;
1163 /* Maybe this ought to copy a chain ? */
1164 ktrkuser("sockname", mtod(addr, void *), len);
1165 error = copyout(mtod(addr, void *), asa, len);
1166 }
1167
1168 if (error == 0) {
1169 if (flags & MSG_LENUSRSPACE)
1170 error = copyout(&len, alen, sizeof(len));
1171 else
1172 *alen = len;
1173 }
1174
1175 return error;
1176 }
1177
1178 /*
1179 * Get socket name.
1180 */
1181 /* ARGSUSED */
1182 int
1183 sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
1184 {
1185 /* {
1186 syscallarg(int) fdes;
1187 syscallarg(struct sockaddr *) asa;
1188 syscallarg(unsigned int *) alen;
1189 } */
1190 struct mbuf *m;
1191 int error;
1192
1193 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1194 if (error != 0)
1195 return error;
1196
1197 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1198 MSG_LENUSRSPACE, m);
1199 if (m != NULL)
1200 m_free(m);
1201 return error;
1202 }
1203
1204 /*
1205 * Get name of peer for connected socket.
1206 */
1207 /* ARGSUSED */
1208 int
1209 sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
1210 {
1211 /* {
1212 syscallarg(int) fdes;
1213 syscallarg(struct sockaddr *) asa;
1214 syscallarg(unsigned int *) alen;
1215 } */
1216 struct mbuf *m;
1217 int error;
1218
1219 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1220 if (error != 0)
1221 return error;
1222
1223 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1224 MSG_LENUSRSPACE, m);
1225 if (m != NULL)
1226 m_free(m);
1227 return error;
1228 }
1229
1230 /*
1231 * XXX In a perfect world, we wouldn't pass around socket control
1232 * XXX arguments in mbufs, and this could go away.
1233 */
1234 int
1235 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1236 {
1237 struct sockaddr *sa;
1238 struct mbuf *m;
1239 int error;
1240
1241 /*
1242 * We can't allow socket names > UCHAR_MAX in length, since that
1243 * will overflow sa_len. Control data more than a page size in
1244 * length is just too much.
1245 */
1246 if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1247 return (EINVAL);
1248
1249 /* Allocate an mbuf to hold the arguments. */
1250 m = m_get(M_WAIT, type);
1251 /* can't claim. don't who to assign it to. */
1252 if (buflen > MLEN) {
1253 /*
1254 * Won't fit into a regular mbuf, so we allocate just
1255 * enough external storage to hold the argument.
1256 */
1257 MEXTMALLOC(m, buflen, M_WAITOK);
1258 }
1259 m->m_len = buflen;
1260 error = copyin(bf, mtod(m, void *), buflen);
1261 if (error) {
1262 (void) m_free(m);
1263 return (error);
1264 }
1265 ktrkuser(mbuftypes[type], mtod(m, void *), buflen);
1266 *mp = m;
1267 if (type == MT_SONAME) {
1268 sa = mtod(m, struct sockaddr *);
1269 #if BYTE_ORDER != BIG_ENDIAN
1270 /*
1271 * 4.3BSD compat thing - need to stay, since bind(2),
1272 * connect(2), sendto(2) were not versioned for COMPAT_43.
1273 */
1274 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1275 sa->sa_family = sa->sa_len;
1276 #endif
1277 sa->sa_len = buflen;
1278 }
1279 return (0);
1280 }
1281