uipc_syscalls.c revision 1.149 1 /* $NetBSD: uipc_syscalls.c,v 1.149 2011/12/20 23:56:28 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1989, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)uipc_syscalls.c 8.6 (Berkeley) 2/14/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.149 2011/12/20 23:56:28 christos Exp $");
65
66 #include "opt_pipe.h"
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc.h>
72 #include <sys/file.h>
73 #include <sys/buf.h>
74 #define MBUFTYPES
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/signalvar.h>
80 #include <sys/un.h>
81 #include <sys/ktrace.h>
82 #include <sys/event.h>
83 #include <sys/kauth.h>
84
85 #include <sys/mount.h>
86 #include <sys/syscallargs.h>
87
88 /*
89 * System call interface to the socket abstraction.
90 */
91 extern const struct fileops socketops;
92
93 int
94 sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
95 {
96 /* {
97 syscallarg(int) domain;
98 syscallarg(int) type;
99 syscallarg(int) protocol;
100 } */
101 int fd, error;
102
103 error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
104 SCARG(uap, protocol), l, &fd);
105 if (error == 0)
106 *retval = fd;
107 return error;
108 }
109
110 /* ARGSUSED */
111 int
112 sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
113 {
114 /* {
115 syscallarg(int) s;
116 syscallarg(const struct sockaddr *) name;
117 syscallarg(unsigned int) namelen;
118 } */
119 struct mbuf *nam;
120 int error;
121
122 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
123 MT_SONAME);
124 if (error)
125 return error;
126
127 return do_sys_bind(l, SCARG(uap, s), nam);
128 }
129
130 int
131 do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
132 {
133 struct socket *so;
134 int error;
135
136 if ((error = fd_getsock(fd, &so)) != 0) {
137 m_freem(nam);
138 return (error);
139 }
140 MCLAIM(nam, so->so_mowner);
141 error = sobind(so, nam, l);
142 m_freem(nam);
143 fd_putfile(fd);
144 return error;
145 }
146
147 /* ARGSUSED */
148 int
149 sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
150 {
151 /* {
152 syscallarg(int) s;
153 syscallarg(int) backlog;
154 } */
155 struct socket *so;
156 int error;
157
158 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
159 return (error);
160 error = solisten(so, SCARG(uap, backlog), l);
161 fd_putfile(SCARG(uap, s));
162 return error;
163 }
164
165 int
166 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock,
167 const sigset_t *mask, int flags, int clrflags)
168 {
169 file_t *fp, *fp2;
170 struct mbuf *nam;
171 int error, fd;
172 struct socket *so, *so2;
173 short wakeup_state = 0;
174
175 if ((fp = fd_getfile(sock)) == NULL)
176 return (EBADF);
177 if (fp->f_type != DTYPE_SOCKET) {
178 fd_putfile(sock);
179 return (ENOTSOCK);
180 }
181 if ((error = fd_allocfile(&fp2, &fd)) != 0) {
182 fd_putfile(sock);
183 return (error);
184 }
185 nam = m_get(M_WAIT, MT_SONAME);
186 *new_sock = fd;
187 so = fp->f_data;
188 solock(so);
189
190 if (__predict_false(mask))
191 sigsuspendsetup(l, mask);
192
193 if (!(so->so_proto->pr_flags & PR_LISTEN)) {
194 error = EOPNOTSUPP;
195 goto bad;
196 }
197 if ((so->so_options & SO_ACCEPTCONN) == 0) {
198 error = EINVAL;
199 goto bad;
200 }
201 if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
202 error = EWOULDBLOCK;
203 goto bad;
204 }
205 while (so->so_qlen == 0 && so->so_error == 0) {
206 if (so->so_state & SS_CANTRCVMORE) {
207 so->so_error = ECONNABORTED;
208 break;
209 }
210 if (wakeup_state & SS_RESTARTSYS) {
211 error = ERESTART;
212 goto bad;
213 }
214 error = sowait(so, true, 0);
215 if (error) {
216 goto bad;
217 }
218 wakeup_state = so->so_state;
219 }
220 if (so->so_error) {
221 error = so->so_error;
222 so->so_error = 0;
223 goto bad;
224 }
225 /* connection has been removed from the listen queue */
226 KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
227 so2 = TAILQ_FIRST(&so->so_q);
228 if (soqremque(so2, 1) == 0)
229 panic("accept");
230 fp2->f_type = DTYPE_SOCKET;
231 fp2->f_flag = (fp->f_flag & ~clrflags) |
232 ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0);
233 fp2->f_ops = &socketops;
234 fp2->f_data = so2;
235 error = soaccept(so2, nam);
236 so2->so_cred = kauth_cred_dup(so->so_cred);
237 sounlock(so);
238 if (error) {
239 /* an error occurred, free the file descriptor and mbuf */
240 m_freem(nam);
241 mutex_enter(&fp2->f_lock);
242 fp2->f_count++;
243 mutex_exit(&fp2->f_lock);
244 closef(fp2);
245 fd_abort(curproc, NULL, fd);
246 } else {
247 fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
248 fd_affix(curproc, fp2, fd);
249 *name = nam;
250 }
251 fd_putfile(sock);
252 if (__predict_false(mask))
253 sigsuspendteardown(l);
254 return (error);
255 bad:
256 sounlock(so);
257 m_freem(nam);
258 fd_putfile(sock);
259 fd_abort(curproc, fp2, fd);
260 if (__predict_false(mask))
261 sigsuspendteardown(l);
262 return (error);
263 }
264
265 int
266 sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
267 {
268 /* {
269 syscallarg(int) s;
270 syscallarg(struct sockaddr *) name;
271 syscallarg(unsigned int *) anamelen;
272 } */
273 int error, fd;
274 struct mbuf *name;
275
276 error = do_sys_accept(l, SCARG(uap, s), &name, retval, NULL, 0, 0);
277 if (error != 0)
278 return error;
279 error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
280 MSG_LENUSRSPACE, name);
281 if (name != NULL)
282 m_free(name);
283 if (error != 0) {
284 fd = (int)*retval;
285 if (fd_getfile(fd) != NULL)
286 (void)fd_close(fd);
287 }
288 return error;
289 }
290
291 int
292 sys_paccept(struct lwp *l, const struct sys_paccept_args *uap,
293 register_t *retval)
294 {
295 /* {
296 syscallarg(int) s;
297 syscallarg(struct sockaddr *) name;
298 syscallarg(unsigned int *) anamelen;
299 syscallarg(const sigset_t *) mask;
300 syscallarg(int) flags;
301 } */
302 int error, fd;
303 struct mbuf *name;
304 sigset_t *mask, amask;
305
306 if (SCARG(uap, mask) != NULL) {
307 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
308 if (error)
309 return error;
310 mask = &amask;
311 } else
312 mask = NULL;
313
314 error = do_sys_accept(l, SCARG(uap, s), &name, retval, mask,
315 SCARG(uap, flags), FNONBLOCK);
316 if (error != 0)
317 return error;
318 error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
319 MSG_LENUSRSPACE, name);
320 if (name != NULL)
321 m_free(name);
322 if (error != 0) {
323 fd = (int)*retval;
324 if (fd_getfile(fd) != NULL)
325 (void)fd_close(fd);
326 }
327 return error;
328 }
329
330 /* ARGSUSED */
331 int
332 sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
333 {
334 /* {
335 syscallarg(int) s;
336 syscallarg(const struct sockaddr *) name;
337 syscallarg(unsigned int) namelen;
338 } */
339 int error;
340 struct mbuf *nam;
341
342 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
343 MT_SONAME);
344 if (error)
345 return error;
346 return do_sys_connect(l, SCARG(uap, s), nam);
347 }
348
349 int
350 do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
351 {
352 struct socket *so;
353 int error;
354 int interrupted = 0;
355
356 if ((error = fd_getsock(fd, &so)) != 0) {
357 m_freem(nam);
358 return (error);
359 }
360 solock(so);
361 MCLAIM(nam, so->so_mowner);
362 if ((so->so_state & SS_ISCONNECTING) != 0) {
363 error = EALREADY;
364 goto out;
365 }
366
367 error = soconnect(so, nam, l);
368 if (error)
369 goto bad;
370 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING) != 0) {
371 error = EINPROGRESS;
372 goto out;
373 }
374 while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
375 error = sowait(so, true, 0);
376 if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
377 error = EPIPE;
378 interrupted = 1;
379 break;
380 }
381 if (error) {
382 if (error == EINTR || error == ERESTART)
383 interrupted = 1;
384 break;
385 }
386 }
387 if (error == 0) {
388 error = so->so_error;
389 so->so_error = 0;
390 }
391 bad:
392 if (!interrupted)
393 so->so_state &= ~SS_ISCONNECTING;
394 if (error == ERESTART)
395 error = EINTR;
396 out:
397 sounlock(so);
398 fd_putfile(fd);
399 m_freem(nam);
400 return (error);
401 }
402
403 static int
404 makesocket(struct lwp *l, file_t **fp, int *fd, int flags, int type,
405 int domain, int proto, struct socket *soo)
406 {
407 int error;
408 struct socket *so;
409 int fnonblock = (flags & SOCK_NONBLOCK) ? FNONBLOCK : 0;
410
411 if ((error = socreate(domain, &so, type, proto, l, soo)) != 0)
412 return error;
413
414 if ((error = fd_allocfile(fp, fd)) != 0) {
415 soclose(so);
416 return error;
417 }
418 fd_set_exclose(l, *fd, (flags & SOCK_CLOEXEC) != 0);
419 (*fp)->f_flag = FREAD|FWRITE|fnonblock;
420 (*fp)->f_type = DTYPE_SOCKET;
421 (*fp)->f_ops = &socketops;
422 (*fp)->f_data = so;
423 return 0;
424 }
425
426 int
427 sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap,
428 register_t *retval)
429 {
430 /* {
431 syscallarg(int) domain;
432 syscallarg(int) type;
433 syscallarg(int) protocol;
434 syscallarg(int *) rsv;
435 } */
436 file_t *fp1, *fp2;
437 struct socket *so1, *so2;
438 int fd, error, sv[2];
439 proc_t *p;
440 int flags = SCARG(uap, type) & SOCK_FLAGS_MASK;
441 int type = SCARG(uap, type) & ~SOCK_FLAGS_MASK;
442 int domain = SCARG(uap, domain);
443 int proto = SCARG(uap, protocol);
444
445 p = curproc;
446
447 error = makesocket(l, &fp1, &fd, flags, type, domain, proto, NULL);
448 if (error)
449 return error;
450 so1 = fp1->f_data;
451 sv[0] = fd;
452
453 error = makesocket(l, &fp2, &fd, flags, type, domain, proto, so1);
454 if (error)
455 goto out;
456 so2 = fp2->f_data;
457 sv[1] = fd;
458
459 solock(so1);
460 error = soconnect2(so1, so2);
461 if (error == 0 && type == SOCK_DGRAM) {
462 /*
463 * Datagram socket connection is asymmetric.
464 */
465 error = soconnect2(so2, so1);
466 }
467 sounlock(so1);
468
469 if (error == 0)
470 error = copyout(sv, SCARG(uap, rsv), sizeof(sv));
471 if (error == 0) {
472 fd_affix(p, fp2, sv[1]);
473 fd_affix(p, fp1, sv[0]);
474 return 0;
475 }
476 fd_abort(p, fp2, sv[1]);
477 (void)soclose(so2);
478 out:
479 fd_abort(p, fp1, sv[0]);
480 (void)soclose(so1);
481 return error;
482 }
483
484 int
485 sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
486 {
487 /* {
488 syscallarg(int) s;
489 syscallarg(const void *) buf;
490 syscallarg(size_t) len;
491 syscallarg(int) flags;
492 syscallarg(const struct sockaddr *) to;
493 syscallarg(unsigned int) tolen;
494 } */
495 struct msghdr msg;
496 struct iovec aiov;
497
498 msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
499 msg.msg_namelen = SCARG(uap, tolen);
500 msg.msg_iov = &aiov;
501 msg.msg_iovlen = 1;
502 msg.msg_control = NULL;
503 msg.msg_flags = 0;
504 aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
505 aiov.iov_len = SCARG(uap, len);
506 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
507 }
508
509 int
510 sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
511 {
512 /* {
513 syscallarg(int) s;
514 syscallarg(const struct msghdr *) msg;
515 syscallarg(int) flags;
516 } */
517 struct msghdr msg;
518 int error;
519
520 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
521 if (error)
522 return (error);
523
524 msg.msg_flags = MSG_IOVUSRSPACE;
525 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
526 }
527
528 int
529 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
530 register_t *retsize)
531 {
532 struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
533 struct mbuf *to, *control;
534 struct socket *so;
535 struct uio auio;
536 size_t len, iovsz;
537 int i, error;
538
539 ktrkuser("msghdr", mp, sizeof *mp);
540
541 /* If the caller passed us stuff in mbufs, we must free them. */
542 to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL;
543 control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL;
544 iovsz = mp->msg_iovlen * sizeof(struct iovec);
545
546 if (mp->msg_flags & MSG_IOVUSRSPACE) {
547 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
548 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
549 error = EMSGSIZE;
550 goto bad;
551 }
552 iov = kmem_alloc(iovsz, KM_SLEEP);
553 }
554 if (mp->msg_iovlen != 0) {
555 error = copyin(mp->msg_iov, iov, iovsz);
556 if (error)
557 goto bad;
558 }
559 mp->msg_iov = iov;
560 }
561
562 auio.uio_iov = mp->msg_iov;
563 auio.uio_iovcnt = mp->msg_iovlen;
564 auio.uio_rw = UIO_WRITE;
565 auio.uio_offset = 0; /* XXX */
566 auio.uio_resid = 0;
567 KASSERT(l == curlwp);
568 auio.uio_vmspace = l->l_proc->p_vmspace;
569
570 for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
571 /*
572 * Writes return ssize_t because -1 is returned on error.
573 * Therefore, we must restrict the length to SSIZE_MAX to
574 * avoid garbage return values.
575 */
576 auio.uio_resid += tiov->iov_len;
577 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
578 error = EINVAL;
579 goto bad;
580 }
581 }
582
583 if (mp->msg_name && to == NULL) {
584 error = sockargs(&to, mp->msg_name, mp->msg_namelen,
585 MT_SONAME);
586 if (error)
587 goto bad;
588 }
589
590 if (mp->msg_control) {
591 if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
592 error = EINVAL;
593 goto bad;
594 }
595 if (control == NULL) {
596 error = sockargs(&control, mp->msg_control,
597 mp->msg_controllen, MT_CONTROL);
598 if (error)
599 goto bad;
600 }
601 }
602
603 if (ktrpoint(KTR_GENIO)) {
604 ktriov = kmem_alloc(iovsz, KM_SLEEP);
605 memcpy(ktriov, auio.uio_iov, iovsz);
606 }
607
608 if ((error = fd_getsock(s, &so)) != 0)
609 goto bad;
610
611 if (mp->msg_name)
612 MCLAIM(to, so->so_mowner);
613 if (mp->msg_control)
614 MCLAIM(control, so->so_mowner);
615
616 len = auio.uio_resid;
617 error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
618 /* Protocol is responsible for freeing 'control' */
619 control = NULL;
620
621 fd_putfile(s);
622
623 if (error) {
624 if (auio.uio_resid != len && (error == ERESTART ||
625 error == EINTR || error == EWOULDBLOCK))
626 error = 0;
627 if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
628 mutex_enter(proc_lock);
629 psignal(l->l_proc, SIGPIPE);
630 mutex_exit(proc_lock);
631 }
632 }
633 if (error == 0)
634 *retsize = len - auio.uio_resid;
635
636 bad:
637 if (ktriov != NULL) {
638 ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
639 kmem_free(ktriov, iovsz);
640 }
641
642 if (iov != aiov)
643 kmem_free(iov, iovsz);
644 if (to)
645 m_freem(to);
646 if (control)
647 m_freem(control);
648
649 return (error);
650 }
651
652 int
653 sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
654 {
655 /* {
656 syscallarg(int) s;
657 syscallarg(void *) buf;
658 syscallarg(size_t) len;
659 syscallarg(int) flags;
660 syscallarg(struct sockaddr *) from;
661 syscallarg(unsigned int *) fromlenaddr;
662 } */
663 struct msghdr msg;
664 struct iovec aiov;
665 int error;
666 struct mbuf *from;
667
668 msg.msg_name = NULL;
669 msg.msg_iov = &aiov;
670 msg.msg_iovlen = 1;
671 aiov.iov_base = SCARG(uap, buf);
672 aiov.iov_len = SCARG(uap, len);
673 msg.msg_control = NULL;
674 msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
675
676 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
677 if (error != 0)
678 return error;
679
680 error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
681 MSG_LENUSRSPACE, from);
682 if (from != NULL)
683 m_free(from);
684 return error;
685 }
686
687 int
688 sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
689 {
690 /* {
691 syscallarg(int) s;
692 syscallarg(struct msghdr *) msg;
693 syscallarg(int) flags;
694 } */
695 struct msghdr msg;
696 int error;
697 struct mbuf *from, *control;
698
699 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
700 if (error)
701 return (error);
702
703 msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
704
705 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
706 msg.msg_control != NULL ? &control : NULL, retval);
707 if (error != 0)
708 return error;
709
710 if (msg.msg_control != NULL)
711 error = copyout_msg_control(l, &msg, control);
712
713 if (error == 0)
714 error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
715 from);
716 if (from != NULL)
717 m_free(from);
718 if (error == 0) {
719 ktrkuser("msghdr", &msg, sizeof msg);
720 error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
721 }
722
723 return (error);
724 }
725
726 /*
727 * Adjust for a truncated SCM_RIGHTS control message.
728 * This means closing any file descriptors that aren't present
729 * in the returned buffer.
730 * m is the mbuf holding the (already externalized) SCM_RIGHTS message.
731 */
732 static void
733 free_rights(struct mbuf *m)
734 {
735 int nfd;
736 int i;
737 int *fdv;
738
739 nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0
740 : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1;
741 fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
742 for (i = 0; i < nfd; i++) {
743 if (fd_getfile(fdv[i]) != NULL)
744 (void)fd_close(fdv[i]);
745 }
746 }
747
748 void
749 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
750 {
751 struct mbuf *next;
752 struct cmsghdr *cmsg;
753 bool do_free_rights = false;
754
755 while (control != NULL) {
756 cmsg = mtod(control, struct cmsghdr *);
757 if (control == uncopied)
758 do_free_rights = true;
759 if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
760 && cmsg->cmsg_type == SCM_RIGHTS)
761 free_rights(control);
762 next = control->m_next;
763 m_free(control);
764 control = next;
765 }
766 }
767
768 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
769 int
770 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
771 {
772 int i, len, error = 0;
773 struct cmsghdr *cmsg;
774 struct mbuf *m;
775 char *q;
776
777 len = mp->msg_controllen;
778 if (len <= 0 || control == 0) {
779 mp->msg_controllen = 0;
780 free_control_mbuf(l, control, control);
781 return 0;
782 }
783
784 q = (char *)mp->msg_control;
785
786 for (m = control; m != NULL; ) {
787 cmsg = mtod(m, struct cmsghdr *);
788 i = m->m_len;
789 if (len < i) {
790 mp->msg_flags |= MSG_CTRUNC;
791 if (cmsg->cmsg_level == SOL_SOCKET
792 && cmsg->cmsg_type == SCM_RIGHTS)
793 /* Do not truncate me ... */
794 break;
795 i = len;
796 }
797 error = copyout(mtod(m, void *), q, i);
798 ktrkuser("msgcontrol", mtod(m, void *), i);
799 if (error != 0) {
800 /* We must free all the SCM_RIGHTS */
801 m = control;
802 break;
803 }
804 m = m->m_next;
805 if (m)
806 i = ALIGN(i);
807 q += i;
808 len -= i;
809 if (len <= 0)
810 break;
811 }
812
813 free_control_mbuf(l, control, m);
814
815 mp->msg_controllen = q - (char *)mp->msg_control;
816 return error;
817 }
818
819 int
820 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
821 struct mbuf **control, register_t *retsize)
822 {
823 struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov;
824 struct socket *so;
825 struct uio auio;
826 size_t len, iovsz;
827 int i, error;
828
829 ktrkuser("msghdr", mp, sizeof *mp);
830
831 *from = NULL;
832 if (control != NULL)
833 *control = NULL;
834
835 if ((error = fd_getsock(s, &so)) != 0)
836 return (error);
837
838 iovsz = mp->msg_iovlen * sizeof(struct iovec);
839
840 if (mp->msg_flags & MSG_IOVUSRSPACE) {
841 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
842 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
843 error = EMSGSIZE;
844 goto out;
845 }
846 iov = kmem_alloc(iovsz, KM_SLEEP);
847 }
848 if (mp->msg_iovlen != 0) {
849 error = copyin(mp->msg_iov, iov, iovsz);
850 if (error)
851 goto out;
852 }
853 auio.uio_iov = iov;
854 } else
855 auio.uio_iov = mp->msg_iov;
856 auio.uio_iovcnt = mp->msg_iovlen;
857 auio.uio_rw = UIO_READ;
858 auio.uio_offset = 0; /* XXX */
859 auio.uio_resid = 0;
860 KASSERT(l == curlwp);
861 auio.uio_vmspace = l->l_proc->p_vmspace;
862
863 tiov = auio.uio_iov;
864 for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
865 /*
866 * Reads return ssize_t because -1 is returned on error.
867 * Therefore we must restrict the length to SSIZE_MAX to
868 * avoid garbage return values.
869 */
870 auio.uio_resid += tiov->iov_len;
871 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
872 error = EINVAL;
873 goto out;
874 }
875 }
876
877 ktriov = NULL;
878 if (ktrpoint(KTR_GENIO)) {
879 ktriov = kmem_alloc(iovsz, KM_SLEEP);
880 memcpy(ktriov, auio.uio_iov, iovsz);
881 }
882
883 len = auio.uio_resid;
884 mp->msg_flags &= MSG_USERFLAGS;
885 error = (*so->so_receive)(so, from, &auio, NULL, control,
886 &mp->msg_flags);
887 len -= auio.uio_resid;
888 *retsize = len;
889 if (error != 0 && len != 0
890 && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
891 /* Some data transferred */
892 error = 0;
893
894 if (ktriov != NULL) {
895 ktrgeniov(s, UIO_READ, ktriov, len, error);
896 kmem_free(ktriov, iovsz);
897 }
898
899 if (error != 0) {
900 m_freem(*from);
901 *from = NULL;
902 if (control != NULL) {
903 free_control_mbuf(l, *control, *control);
904 *control = NULL;
905 }
906 }
907 out:
908 if (iov != aiov)
909 kmem_free(iov, iovsz);
910 fd_putfile(s);
911 return (error);
912 }
913
914
915 /* ARGSUSED */
916 int
917 sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
918 {
919 /* {
920 syscallarg(int) s;
921 syscallarg(int) how;
922 } */
923 struct socket *so;
924 int error;
925
926 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
927 return (error);
928 solock(so);
929 error = soshutdown(so, SCARG(uap, how));
930 sounlock(so);
931 fd_putfile(SCARG(uap, s));
932 return (error);
933 }
934
935 /* ARGSUSED */
936 int
937 sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
938 {
939 /* {
940 syscallarg(int) s;
941 syscallarg(int) level;
942 syscallarg(int) name;
943 syscallarg(const void *) val;
944 syscallarg(unsigned int) valsize;
945 } */
946 struct sockopt sopt;
947 struct socket *so;
948 int error;
949 unsigned int len;
950
951 len = SCARG(uap, valsize);
952 if (len > 0 && SCARG(uap, val) == NULL)
953 return (EINVAL);
954
955 if (len > MCLBYTES)
956 return (EINVAL);
957
958 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
959 return (error);
960
961 sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);
962
963 if (len > 0) {
964 error = copyin(SCARG(uap, val), sopt.sopt_data, len);
965 if (error)
966 goto out;
967 }
968
969 error = sosetopt(so, &sopt);
970
971 out:
972 sockopt_destroy(&sopt);
973 fd_putfile(SCARG(uap, s));
974 return (error);
975 }
976
977 /* ARGSUSED */
978 int
979 sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
980 {
981 /* {
982 syscallarg(int) s;
983 syscallarg(int) level;
984 syscallarg(int) name;
985 syscallarg(void *) val;
986 syscallarg(unsigned int *) avalsize;
987 } */
988 struct sockopt sopt;
989 struct socket *so;
990 unsigned int valsize, len;
991 int error;
992
993 if (SCARG(uap, val) != NULL) {
994 error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
995 if (error)
996 return (error);
997 } else
998 valsize = 0;
999
1000 if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
1001 return (error);
1002
1003 sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0);
1004
1005 error = sogetopt(so, &sopt);
1006 if (error)
1007 goto out;
1008
1009 if (valsize > 0) {
1010 len = min(valsize, sopt.sopt_size);
1011 error = copyout(sopt.sopt_data, SCARG(uap, val), len);
1012 if (error)
1013 goto out;
1014
1015 error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
1016 if (error)
1017 goto out;
1018 }
1019
1020 out:
1021 sockopt_destroy(&sopt);
1022 fd_putfile(SCARG(uap, s));
1023 return (error);
1024 }
1025
1026 #ifdef PIPE_SOCKETPAIR
1027 /* ARGSUSED */
1028 int
1029 pipe1(struct lwp *l, register_t *retval, int flags)
1030 {
1031 file_t *rf, *wf;
1032 struct socket *rso, *wso;
1033 int fd, error;
1034 proc_t *p;
1035
1036 if (flags & ~(O_CLOEXEC|O_NONBLOCK))
1037 return EINVAL;
1038 p = curproc;
1039 if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
1040 return (error);
1041 if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
1042 goto free1;
1043 /* remember this socket pair implements a pipe */
1044 wso->so_state |= SS_ISAPIPE;
1045 rso->so_state |= SS_ISAPIPE;
1046 if ((error = fd_allocfile(&rf, &fd)) != 0)
1047 goto free2;
1048 retval[0] = fd;
1049 rf->f_flag = FREAD | flags;
1050 rf->f_type = DTYPE_SOCKET;
1051 rf->f_ops = &socketops;
1052 rf->f_data = rso;
1053 if ((error = fd_allocfile(&wf, &fd)) != 0)
1054 goto free3;
1055 wf->f_flag = FWRITE | flags;
1056 wf->f_type = DTYPE_SOCKET;
1057 wf->f_ops = &socketops;
1058 wf->f_data = wso;
1059 retval[1] = fd;
1060 solock(wso);
1061 error = unp_connect2(wso, rso, PRU_CONNECT2);
1062 sounlock(wso);
1063 if (error != 0)
1064 goto free4;
1065 fd_affix(p, wf, (int)retval[1]);
1066 fd_affix(p, rf, (int)retval[0]);
1067 return (0);
1068 free4:
1069 fd_abort(p, wf, (int)retval[1]);
1070 free3:
1071 fd_abort(p, rf, (int)retval[0]);
1072 free2:
1073 (void)soclose(wso);
1074 free1:
1075 (void)soclose(rso);
1076 return (error);
1077 }
1078 #endif /* PIPE_SOCKETPAIR */
1079
1080 /*
1081 * Get socket name.
1082 */
1083 /* ARGSUSED */
1084 int
1085 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1086 {
1087 struct socket *so;
1088 struct mbuf *m;
1089 int error;
1090
1091 if ((error = fd_getsock(fd, &so)) != 0)
1092 return error;
1093
1094 m = m_getclr(M_WAIT, MT_SONAME);
1095 MCLAIM(m, so->so_mowner);
1096
1097 solock(so);
1098 if (which == PRU_PEERADDR
1099 && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1100 error = ENOTCONN;
1101 } else {
1102 *nam = m;
1103 error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
1104 NULL);
1105 }
1106 sounlock(so);
1107 if (error != 0)
1108 m_free(m);
1109 fd_putfile(fd);
1110 return error;
1111 }
1112
1113 int
1114 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1115 struct mbuf *addr)
1116 {
1117 int len;
1118 int error;
1119
1120 if (asa == NULL)
1121 /* Assume application not interested */
1122 return 0;
1123
1124 if (flags & MSG_LENUSRSPACE) {
1125 error = copyin(alen, &len, sizeof(len));
1126 if (error)
1127 return error;
1128 } else
1129 len = *alen;
1130 if (len < 0)
1131 return EINVAL;
1132
1133 if (addr == NULL) {
1134 len = 0;
1135 error = 0;
1136 } else {
1137 if (len > addr->m_len)
1138 len = addr->m_len;
1139 /* Maybe this ought to copy a chain ? */
1140 ktrkuser("sockname", mtod(addr, void *), len);
1141 error = copyout(mtod(addr, void *), asa, len);
1142 }
1143
1144 if (error == 0) {
1145 if (flags & MSG_LENUSRSPACE)
1146 error = copyout(&len, alen, sizeof(len));
1147 else
1148 *alen = len;
1149 }
1150
1151 return error;
1152 }
1153
1154 /*
1155 * Get socket name.
1156 */
1157 /* ARGSUSED */
1158 int
1159 sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
1160 {
1161 /* {
1162 syscallarg(int) fdes;
1163 syscallarg(struct sockaddr *) asa;
1164 syscallarg(unsigned int *) alen;
1165 } */
1166 struct mbuf *m;
1167 int error;
1168
1169 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1170 if (error != 0)
1171 return error;
1172
1173 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1174 MSG_LENUSRSPACE, m);
1175 if (m != NULL)
1176 m_free(m);
1177 return error;
1178 }
1179
1180 /*
1181 * Get name of peer for connected socket.
1182 */
1183 /* ARGSUSED */
1184 int
1185 sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
1186 {
1187 /* {
1188 syscallarg(int) fdes;
1189 syscallarg(struct sockaddr *) asa;
1190 syscallarg(unsigned int *) alen;
1191 } */
1192 struct mbuf *m;
1193 int error;
1194
1195 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1196 if (error != 0)
1197 return error;
1198
1199 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1200 MSG_LENUSRSPACE, m);
1201 if (m != NULL)
1202 m_free(m);
1203 return error;
1204 }
1205
1206 /*
1207 * XXX In a perfect world, we wouldn't pass around socket control
1208 * XXX arguments in mbufs, and this could go away.
1209 */
1210 int
1211 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1212 {
1213 struct sockaddr *sa;
1214 struct mbuf *m;
1215 int error;
1216
1217 /*
1218 * We can't allow socket names > UCHAR_MAX in length, since that
1219 * will overflow sa_len. Control data more than a page size in
1220 * length is just too much.
1221 */
1222 if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1223 return (EINVAL);
1224
1225 /* Allocate an mbuf to hold the arguments. */
1226 m = m_get(M_WAIT, type);
1227 /* can't claim. don't who to assign it to. */
1228 if (buflen > MLEN) {
1229 /*
1230 * Won't fit into a regular mbuf, so we allocate just
1231 * enough external storage to hold the argument.
1232 */
1233 MEXTMALLOC(m, buflen, M_WAITOK);
1234 }
1235 m->m_len = buflen;
1236 error = copyin(bf, mtod(m, void *), buflen);
1237 if (error) {
1238 (void) m_free(m);
1239 return (error);
1240 }
1241 ktrkuser(mbuftypes[type], mtod(m, void *), buflen);
1242 *mp = m;
1243 if (type == MT_SONAME) {
1244 sa = mtod(m, struct sockaddr *);
1245 #if BYTE_ORDER != BIG_ENDIAN
1246 /*
1247 * 4.3BSD compat thing - need to stay, since bind(2),
1248 * connect(2), sendto(2) were not versioned for COMPAT_43.
1249 */
1250 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1251 sa->sa_family = sa->sa_len;
1252 #endif
1253 sa->sa_len = buflen;
1254 }
1255 return (0);
1256 }
1257