uipc_syscalls.c revision 1.114 1 /* $NetBSD: uipc_syscalls.c,v 1.114 2007/07/01 18:38:11 dsl Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)uipc_syscalls.c 8.6 (Berkeley) 2/14/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.114 2007/07/01 18:38:11 dsl Exp $");
36
37 #include "opt_ktrace.h"
38 #include "opt_pipe.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/filedesc.h>
43 #include <sys/proc.h>
44 #include <sys/file.h>
45 #include <sys/buf.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/un.h>
53 #ifdef KTRACE
54 #include <sys/ktrace.h>
55 #endif
56 #include <sys/event.h>
57
58 #include <sys/mount.h>
59 #include <sys/syscallargs.h>
60
61 #include <uvm/uvm_extern.h>
62
63 /*
64 * System call interface to the socket abstraction.
65 */
66 extern const struct fileops socketops;
67
68 int
69 sys___socket30(struct lwp *l, void *v, register_t *retval)
70 {
71 struct sys___socket30_args /* {
72 syscallarg(int) domain;
73 syscallarg(int) type;
74 syscallarg(int) protocol;
75 } */ *uap = v;
76
77 struct filedesc *fdp;
78 struct socket *so;
79 struct file *fp;
80 int fd, error;
81
82 fdp = l->l_proc->p_fd;
83 /* falloc() will use the desciptor for us */
84 if ((error = falloc(l, &fp, &fd)) != 0)
85 return (error);
86 fp->f_flag = FREAD|FWRITE;
87 fp->f_type = DTYPE_SOCKET;
88 fp->f_ops = &socketops;
89 error = socreate(SCARG(uap, domain), &so, SCARG(uap, type),
90 SCARG(uap, protocol), l);
91 if (error) {
92 FILE_UNUSE(fp, l);
93 fdremove(fdp, fd);
94 ffree(fp);
95 } else {
96 fp->f_data = so;
97 FILE_SET_MATURE(fp);
98 FILE_UNUSE(fp, l);
99 *retval = fd;
100 }
101 return (error);
102 }
103
104 /* ARGSUSED */
105 int
106 sys_bind(struct lwp *l, void *v, register_t *retval)
107 {
108 struct sys_bind_args /* {
109 syscallarg(int) s;
110 syscallarg(const struct sockaddr *) name;
111 syscallarg(unsigned int) namelen;
112 } */ *uap = v;
113 struct mbuf *nam;
114 int error;
115
116 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
117 MT_SONAME);
118 if (error)
119 return error;
120
121 return do_sys_bind(l, SCARG(uap, s), nam);
122 }
123
124 int
125 do_sys_bind(struct lwp *l, int s, struct mbuf *nam)
126 {
127 struct file *fp;
128 int error;
129
130 /* getsock() will use the descriptor for us */
131 if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0) {
132 m_freem(nam);
133 return (error);
134 }
135 MCLAIM(nam, ((struct socket *)fp->f_data)->so_mowner);
136 error = sobind(fp->f_data, nam, l);
137 m_freem(nam);
138 FILE_UNUSE(fp, l);
139 return error;
140 }
141
142 /* ARGSUSED */
143 int
144 sys_listen(struct lwp *l, void *v, register_t *retval)
145 {
146 struct sys_listen_args /* {
147 syscallarg(int) s;
148 syscallarg(int) backlog;
149 } */ *uap = v;
150 struct file *fp;
151 int error;
152
153 /* getsock() will use the descriptor for us */
154 if ((error = getsock(l->l_proc->p_fd, SCARG(uap, s), &fp)) != 0)
155 return (error);
156 error = solisten(fp->f_data, SCARG(uap, backlog));
157 FILE_UNUSE(fp, l);
158 return error;
159 }
160
161 int
162 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock)
163 {
164 struct filedesc *fdp;
165 struct file *fp;
166 struct mbuf *nam;
167 int error, s, fd;
168 struct socket *so;
169 int fflag;
170
171 fdp = l->l_proc->p_fd;
172
173 /* getsock() will use the descriptor for us */
174 if ((error = getsock(fdp, sock, &fp)) != 0)
175 return (error);
176 s = splsoftnet();
177 so = (struct socket *)fp->f_data;
178 FILE_UNUSE(fp, l);
179 if (!(so->so_proto->pr_flags & PR_LISTEN)) {
180 splx(s);
181 return (EOPNOTSUPP);
182 }
183 if ((so->so_options & SO_ACCEPTCONN) == 0) {
184 splx(s);
185 return (EINVAL);
186 }
187 if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
188 splx(s);
189 return (EWOULDBLOCK);
190 }
191 while (so->so_qlen == 0 && so->so_error == 0) {
192 if (so->so_state & SS_CANTRCVMORE) {
193 so->so_error = ECONNABORTED;
194 break;
195 }
196 error = tsleep(&so->so_timeo, PSOCK | PCATCH,
197 netcon, 0);
198 if (error) {
199 splx(s);
200 return (error);
201 }
202 }
203 if (so->so_error) {
204 error = so->so_error;
205 so->so_error = 0;
206 splx(s);
207 return (error);
208 }
209 fflag = fp->f_flag;
210 /* falloc() will use the descriptor for us */
211 if ((error = falloc(l, &fp, &fd)) != 0) {
212 splx(s);
213 return (error);
214 }
215 *new_sock = fd;
216
217 /* connection has been removed from the listen queue */
218 KNOTE(&so->so_rcv.sb_sel.sel_klist, 0);
219
220 { struct socket *aso = TAILQ_FIRST(&so->so_q);
221 if (soqremque(aso, 1) == 0)
222 panic("accept");
223 so = aso;
224 }
225 fp->f_type = DTYPE_SOCKET;
226 fp->f_flag = fflag;
227 fp->f_ops = &socketops;
228 fp->f_data = so;
229 nam = m_get(M_WAIT, MT_SONAME);
230 error = soaccept(so, nam);
231
232 if (error) {
233 /* an error occurred, free the file descriptor and mbuf */
234 m_freem(nam);
235 fdremove(fdp, fd);
236 closef(fp, l);
237 } else {
238 FILE_SET_MATURE(fp);
239 FILE_UNUSE(fp, l);
240 *name = nam;
241 }
242 splx(s);
243 return (error);
244 }
245
246 int
247 sys_accept(struct lwp *l, void *v, register_t *retval)
248 {
249 struct sys_accept_args /* {
250 syscallarg(int) s;
251 syscallarg(struct sockaddr *) name;
252 syscallarg(unsigned int *) anamelen;
253 } */ *uap = v;
254 int error;
255 struct mbuf *name;
256
257 error = do_sys_accept(l, SCARG(uap, s), &name, retval);
258 if (error != 0)
259 return error;
260
261 error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
262 MSG_LENUSRSPACE, name);
263 if (name != NULL)
264 m_free(name);
265 if (error != 0)
266 fdrelease(l, *retval);
267 return error;
268 }
269
270 /* ARGSUSED */
271 int
272 sys_connect(struct lwp *l, void *v, register_t *retval)
273 {
274 struct sys_connect_args /* {
275 syscallarg(int) s;
276 syscallarg(const struct sockaddr *) name;
277 syscallarg(unsigned int) namelen;
278 } */ *uap = v;
279 int error;
280 struct mbuf *nam;
281
282 error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
283 MT_SONAME);
284 if (error)
285 return error;
286 return do_sys_connect(l, SCARG(uap, s), nam);
287 }
288
289 int
290 do_sys_connect(struct lwp *l, int s, struct mbuf *nam)
291 {
292 struct file *fp;
293 struct socket *so;
294 int error;
295 int interrupted = 0;
296
297 /* getsock() will use the descriptor for us */
298 if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0) {
299 m_freem(nam);
300 return (error);
301 }
302 so = fp->f_data;
303 MCLAIM(nam, so->so_mowner);
304 if (so->so_state & SS_ISCONNECTING) {
305 error = EALREADY;
306 goto out;
307 }
308
309 error = soconnect(so, nam, l);
310 if (error)
311 goto bad;
312 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
313 error = EINPROGRESS;
314 goto out;
315 }
316 s = splsoftnet();
317 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
318 error = tsleep(&so->so_timeo, PSOCK | PCATCH,
319 netcon, 0);
320 if (error) {
321 if (error == EINTR || error == ERESTART)
322 interrupted = 1;
323 break;
324 }
325 }
326 if (error == 0) {
327 error = so->so_error;
328 so->so_error = 0;
329 }
330 splx(s);
331 bad:
332 if (!interrupted)
333 so->so_state &= ~SS_ISCONNECTING;
334 if (error == ERESTART)
335 error = EINTR;
336 out:
337 FILE_UNUSE(fp, l);
338 m_freem(nam);
339 return (error);
340 }
341
342 int
343 sys_socketpair(struct lwp *l, void *v, register_t *retval)
344 {
345 struct sys_socketpair_args /* {
346 syscallarg(int) domain;
347 syscallarg(int) type;
348 syscallarg(int) protocol;
349 syscallarg(int *) rsv;
350 } */ *uap = v;
351 struct filedesc *fdp;
352 struct file *fp1, *fp2;
353 struct socket *so1, *so2;
354 int fd, error, sv[2];
355
356 fdp = l->l_proc->p_fd;
357 error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
358 SCARG(uap, protocol), l);
359 if (error)
360 return (error);
361 error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
362 SCARG(uap, protocol), l);
363 if (error)
364 goto free1;
365 /* falloc() will use the descriptor for us */
366 if ((error = falloc(l, &fp1, &fd)) != 0)
367 goto free2;
368 sv[0] = fd;
369 fp1->f_flag = FREAD|FWRITE;
370 fp1->f_type = DTYPE_SOCKET;
371 fp1->f_ops = &socketops;
372 fp1->f_data = so1;
373 if ((error = falloc(l, &fp2, &fd)) != 0)
374 goto free3;
375 fp2->f_flag = FREAD|FWRITE;
376 fp2->f_type = DTYPE_SOCKET;
377 fp2->f_ops = &socketops;
378 fp2->f_data = so2;
379 sv[1] = fd;
380 if ((error = soconnect2(so1, so2)) != 0)
381 goto free4;
382 if (SCARG(uap, type) == SOCK_DGRAM) {
383 /*
384 * Datagram socket connection is asymmetric.
385 */
386 if ((error = soconnect2(so2, so1)) != 0)
387 goto free4;
388 }
389 error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int));
390 FILE_SET_MATURE(fp1);
391 FILE_SET_MATURE(fp2);
392 FILE_UNUSE(fp1, l);
393 FILE_UNUSE(fp2, l);
394 return (error);
395 free4:
396 FILE_UNUSE(fp2, l);
397 ffree(fp2);
398 fdremove(fdp, sv[1]);
399 free3:
400 FILE_UNUSE(fp1, l);
401 ffree(fp1);
402 fdremove(fdp, sv[0]);
403 free2:
404 (void)soclose(so2);
405 free1:
406 (void)soclose(so1);
407 return (error);
408 }
409
410 int
411 sys_sendto(struct lwp *l, void *v, register_t *retval)
412 {
413 struct sys_sendto_args /* {
414 syscallarg(int) s;
415 syscallarg(const void *) buf;
416 syscallarg(size_t) len;
417 syscallarg(int) flags;
418 syscallarg(const struct sockaddr *) to;
419 syscallarg(unsigned int) tolen;
420 } */ *uap = v;
421 struct msghdr msg;
422 struct iovec aiov;
423
424 msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
425 msg.msg_namelen = SCARG(uap, tolen);
426 msg.msg_iov = &aiov;
427 msg.msg_iovlen = 1;
428 msg.msg_control = 0;
429 msg.msg_flags = 0;
430 aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
431 aiov.iov_len = SCARG(uap, len);
432 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
433 }
434
435 int
436 sys_sendmsg(struct lwp *l, void *v, register_t *retval)
437 {
438 struct sys_sendmsg_args /* {
439 syscallarg(int) s;
440 syscallarg(const struct msghdr *) msg;
441 syscallarg(int) flags;
442 } */ *uap = v;
443 struct msghdr msg;
444 int error;
445
446 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
447 if (error)
448 return (error);
449
450 msg.msg_flags = MSG_IOVUSRSPACE;
451 return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
452 }
453
454 int
455 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
456 register_t *retsize)
457 {
458 struct file *fp;
459 struct uio auio;
460 int i, len, error;
461 struct mbuf *to, *control;
462 struct socket *so;
463 struct iovec *tiov;
464 struct iovec aiov[UIO_SMALLIOV], *iov = aiov;
465 #ifdef KTRACE
466 struct iovec *ktriov;
467 #endif
468
469 /* If the caller passed us stuff in mbufs, we must free them */
470 if (mp->msg_flags & MSG_NAMEMBUF)
471 to = mp->msg_name;
472 else
473 to = NULL;
474
475 if (mp->msg_flags & MSG_CONTROLMBUF)
476 control = mp->msg_control;
477 else
478 control = NULL;
479
480 if (mp->msg_flags & MSG_IOVUSRSPACE) {
481 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
482 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
483 error = EMSGSIZE;
484 goto bad;
485 }
486 iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
487 M_IOV, M_WAITOK);
488 }
489 if (mp->msg_iovlen != 0) {
490 error = copyin(mp->msg_iov, iov,
491 (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
492 if (error)
493 goto bad;
494 }
495 mp->msg_iov = iov;
496 }
497
498 #ifdef KTRACE
499 ktriov = NULL;
500 #endif
501
502 auio.uio_iov = mp->msg_iov;
503 auio.uio_iovcnt = mp->msg_iovlen;
504 auio.uio_rw = UIO_WRITE;
505 auio.uio_offset = 0; /* XXX */
506 auio.uio_resid = 0;
507 KASSERT(l == curlwp);
508 auio.uio_vmspace = l->l_proc->p_vmspace;
509
510 for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
511 #if 0
512 /* cannot happen; iov_len is unsigned */
513 if (tiov->iov_len < 0) {
514 error = EINVAL;
515 goto bad;
516 }
517 #endif
518 /*
519 * Writes return ssize_t because -1 is returned on error.
520 * Therefore, we must restrict the length to SSIZE_MAX to
521 * avoid garbage return values.
522 */
523 auio.uio_resid += tiov->iov_len;
524 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
525 error = EINVAL;
526 goto bad;
527 }
528 }
529
530 if (mp->msg_name && to == NULL) {
531 error = sockargs(&to, mp->msg_name, mp->msg_namelen,
532 MT_SONAME);
533 if (error)
534 goto bad;
535 }
536
537 if (mp->msg_control) {
538 if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
539 error = EINVAL;
540 goto bad;
541 }
542 if (control == NULL) {
543 error = sockargs(&control, mp->msg_control,
544 mp->msg_controllen, MT_CONTROL);
545 if (error)
546 goto bad;
547 }
548 }
549
550 #ifdef KTRACE
551 if (KTRPOINT(l->l_proc, KTR_GENIO)) {
552 int iovlen = auio.uio_iovcnt * sizeof(struct iovec);
553
554 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
555 memcpy(ktriov, auio.uio_iov, iovlen);
556 }
557 #endif
558
559 /* getsock() will use the descriptor for us */
560 if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0)
561 goto bad;
562 so = (struct socket *)fp->f_data;
563
564 if (mp->msg_name)
565 MCLAIM(to, so->so_mowner);
566 if (mp->msg_control)
567 MCLAIM(control, so->so_mowner);
568
569 len = auio.uio_resid;
570 error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
571 /* Protocol is responsible for freeing 'control' */
572 control = NULL;
573
574 FILE_UNUSE(fp, l);
575
576 if (error) {
577 if (auio.uio_resid != len && (error == ERESTART ||
578 error == EINTR || error == EWOULDBLOCK))
579 error = 0;
580 if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
581 mutex_enter(&proclist_mutex);
582 psignal(l->l_proc, SIGPIPE);
583 mutex_exit(&proclist_mutex);
584 }
585 }
586 if (error == 0)
587 *retsize = len - auio.uio_resid;
588
589 #ifdef KTRACE
590 if (ktriov != NULL) {
591 if (error == 0)
592 ktrgenio(l, s, UIO_WRITE, ktriov, *retsize, error);
593 free(ktriov, M_TEMP);
594 }
595 #endif
596
597 bad:
598 if (iov != aiov)
599 free(iov, M_IOV);
600 if (to)
601 m_freem(to);
602 if (control != NULL)
603 m_freem(control);
604
605 return (error);
606 }
607
608 int
609 sys_recvfrom(struct lwp *l, void *v, register_t *retval)
610 {
611 struct sys_recvfrom_args /* {
612 syscallarg(int) s;
613 syscallarg(void *) buf;
614 syscallarg(size_t) len;
615 syscallarg(int) flags;
616 syscallarg(struct sockaddr *) from;
617 syscallarg(unsigned int *) fromlenaddr;
618 } */ *uap = v;
619 struct msghdr msg;
620 struct iovec aiov;
621 int error;
622 struct mbuf *from;
623
624 msg.msg_name = NULL;;
625 msg.msg_iov = &aiov;
626 msg.msg_iovlen = 1;
627 aiov.iov_base = SCARG(uap, buf);
628 aiov.iov_len = SCARG(uap, len);
629 msg.msg_control = NULL;
630 msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
631
632 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
633 if (error != 0)
634 return error;
635
636 error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
637 MSG_LENUSRSPACE, from);
638 if (from != NULL)
639 m_free(from);
640 return error;
641 }
642
643 int
644 sys_recvmsg(struct lwp *l, void *v, register_t *retval)
645 {
646 struct sys_recvmsg_args /* {
647 syscallarg(int) s;
648 syscallarg(struct msghdr *) msg;
649 syscallarg(int) flags;
650 } */ *uap = v;
651 struct msghdr msg;
652 int error;
653 struct mbuf *from, *control;
654
655 error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
656 if (error)
657 return (error);
658
659 msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
660
661 error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
662 msg.msg_control != NULL ? &control : NULL, retval);
663 if (error != 0)
664 return error;
665
666 if (msg.msg_control != NULL)
667 error = copyout_msg_control(l, &msg, control);
668
669 if (error == 0)
670 error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
671 from);
672 if (from != NULL)
673 m_free(from);
674 if (error == 0)
675 error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
676
677 return (error);
678 }
679
680 /*
681 * Adjust for a truncated SCM_RIGHTS control message.
682 * This means closing any file descriptors that aren't present
683 * in the returned buffer.
684 * m is the mbuf holding the (already externalized) SCM_RIGHTS message.
685 */
686 static void
687 free_rights(struct mbuf *m, struct lwp *l)
688 {
689 int nfd;
690 int i;
691 int *fdv;
692
693 nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0
694 : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1;
695 fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
696 for (i = 0; i < nfd; i++)
697 fdrelease(l, fdv[i]);
698 }
699
700 void
701 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
702 {
703 struct mbuf *next;
704 struct cmsghdr *cmsg;
705 bool do_free_rights = false;
706
707 while (control != NULL) {
708 cmsg = mtod(control, struct cmsghdr *);
709 if (control == uncopied)
710 do_free_rights = true;
711 if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
712 && cmsg->cmsg_type == SCM_RIGHTS)
713 free_rights(control, l);
714 next = control->m_next;
715 m_free(control);
716 control = next;
717 }
718 }
719
720 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
721 int
722 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
723 {
724 int i, len, error = 0;
725 struct cmsghdr *cmsg;
726 struct mbuf *m;
727 char *q;
728
729 len = mp->msg_controllen;
730 if (len <= 0 || control == 0) {
731 mp->msg_controllen = 0;
732 free_control_mbuf(l, control, control);
733 return 0;
734 }
735
736 q = (char *)mp->msg_control;
737
738 for (m = control; m != NULL; ) {
739 cmsg = mtod(m, struct cmsghdr *);
740 i = m->m_len;
741 if (len < i) {
742 mp->msg_flags |= MSG_CTRUNC;
743 if (cmsg->cmsg_level == SOL_SOCKET
744 && cmsg->cmsg_type == SCM_RIGHTS)
745 /* Do not truncate me ... */
746 break;
747 i = len;
748 }
749 error = copyout(mtod(m, void *), q, i);
750 if (error != 0) {
751 /* We must free all the SCM_RIGHTS */
752 m = control;
753 break;
754 }
755 m = m->m_next;
756 if (m)
757 i = ALIGN(i);
758 q += i;
759 len -= i;
760 if (len <= 0)
761 break;
762 }
763
764 free_control_mbuf(l, control, m);
765
766 mp->msg_controllen = q - (char *)mp->msg_control;
767 return error;
768 }
769
770 int
771 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
772 struct mbuf **control, register_t *retsize)
773 {
774 struct file *fp;
775 struct uio auio;
776 struct iovec aiov[UIO_SMALLIOV], *iov = aiov;
777 struct iovec *tiov;
778 int i, len, error;
779 struct socket *so;
780 #ifdef KTRACE
781 struct iovec *ktriov;
782 #endif
783
784 *from = NULL;
785 if (control != NULL)
786 *control = NULL;
787 #ifdef KTRACE
788 ktriov = NULL;
789 #endif
790
791 /* getsock() will use the descriptor for us */
792 if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0)
793 return (error);
794 so = (struct socket *)fp->f_data;
795
796 if (mp->msg_flags & MSG_IOVUSRSPACE) {
797 if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
798 if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
799 error = EMSGSIZE;
800 goto out;
801 }
802 iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
803 M_IOV, M_WAITOK);
804 }
805 if (mp->msg_iovlen != 0) {
806 error = copyin(mp->msg_iov, iov,
807 (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
808 if (error)
809 goto out;
810 }
811 auio.uio_iov = iov;
812 } else
813 auio.uio_iov = mp->msg_iov;
814 auio.uio_iovcnt = mp->msg_iovlen;
815 auio.uio_rw = UIO_READ;
816 auio.uio_offset = 0; /* XXX */
817 auio.uio_resid = 0;
818 KASSERT(l == curlwp);
819 auio.uio_vmspace = l->l_proc->p_vmspace;
820
821 tiov = auio.uio_iov;
822 for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
823 #if 0
824 /* cannot happen iov_len is unsigned */
825 if (tiov->iov_len < 0) {
826 error = EINVAL;
827 goto out;
828 }
829 #endif
830 /*
831 * Reads return ssize_t because -1 is returned on error.
832 * Therefore we must restrict the length to SSIZE_MAX to
833 * avoid garbage return values.
834 */
835 auio.uio_resid += tiov->iov_len;
836 if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
837 error = EINVAL;
838 goto out;
839 }
840 }
841 #ifdef KTRACE
842 if (KTRPOINT(l->l_proc, KTR_GENIO)) {
843 int iovlen = auio.uio_iovcnt * sizeof(struct iovec);
844
845 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
846 memcpy(ktriov, auio.uio_iov, iovlen);
847 }
848 #endif
849
850 len = auio.uio_resid;
851 error = (*so->so_receive)(so, from, &auio, NULL, control,
852 &mp->msg_flags);
853 len -= auio.uio_resid;
854 *retsize = len;
855 if (error != 0 && len != 0
856 && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
857 /* Some data transferred */
858 error = 0;
859 #ifdef KTRACE
860 if (ktriov != NULL) {
861 if (error == 0)
862 ktrgenio(l, s, UIO_READ, ktriov, len, 0);
863 free(ktriov, M_TEMP);
864 }
865 #endif
866 if (error != 0) {
867 m_freem(*from);
868 *from = NULL;
869 if (control != NULL) {
870 free_control_mbuf(l, *control, *control);
871 *control = NULL;
872 }
873 }
874 out:
875 if (iov != aiov)
876 free(iov, M_TEMP);
877 FILE_UNUSE(fp, l);
878 return (error);
879 }
880
881
882 /* ARGSUSED */
883 int
884 sys_shutdown(struct lwp *l, void *v, register_t *retval)
885 {
886 struct sys_shutdown_args /* {
887 syscallarg(int) s;
888 syscallarg(int) how;
889 } */ *uap = v;
890 struct proc *p;
891 struct file *fp;
892 int error;
893
894 p = l->l_proc;
895 /* getsock() will use the descriptor for us */
896 if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
897 return (error);
898 error = soshutdown((struct socket *)fp->f_data, SCARG(uap, how));
899 FILE_UNUSE(fp, l);
900 return (error);
901 }
902
903 /* ARGSUSED */
904 int
905 sys_setsockopt(struct lwp *l, void *v, register_t *retval)
906 {
907 struct sys_setsockopt_args /* {
908 syscallarg(int) s;
909 syscallarg(int) level;
910 syscallarg(int) name;
911 syscallarg(const void *) val;
912 syscallarg(unsigned int) valsize;
913 } */ *uap = v;
914 struct proc *p;
915 struct file *fp;
916 struct mbuf *m;
917 struct socket *so;
918 int error;
919 unsigned int len;
920
921 p = l->l_proc;
922 m = NULL;
923 /* getsock() will use the descriptor for us */
924 if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
925 return (error);
926 so = (struct socket *)fp->f_data;
927 len = SCARG(uap, valsize);
928 if (len > MCLBYTES) {
929 error = EINVAL;
930 goto out;
931 }
932 if (SCARG(uap, val)) {
933 m = m_get(M_WAIT, MT_SOOPTS);
934 MCLAIM(m, so->so_mowner);
935 if (len > MLEN)
936 m_clget(m, M_WAIT);
937 error = copyin(SCARG(uap, val), mtod(m, void *), len);
938 if (error) {
939 (void) m_free(m);
940 goto out;
941 }
942 m->m_len = SCARG(uap, valsize);
943 }
944 error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
945 out:
946 FILE_UNUSE(fp, l);
947 return (error);
948 }
949
950 /* ARGSUSED */
951 int
952 sys_getsockopt(struct lwp *l, void *v, register_t *retval)
953 {
954 struct sys_getsockopt_args /* {
955 syscallarg(int) s;
956 syscallarg(int) level;
957 syscallarg(int) name;
958 syscallarg(void *) val;
959 syscallarg(unsigned int *) avalsize;
960 } */ *uap = v;
961 struct file *fp;
962 struct mbuf *m;
963 unsigned int op, i, valsize;
964 int error;
965
966 m = NULL;
967 /* getsock() will use the descriptor for us */
968 if ((error = getsock(l->l_proc->p_fd, SCARG(uap, s), &fp)) != 0)
969 return (error);
970 if (SCARG(uap, val)) {
971 error = copyin(SCARG(uap, avalsize),
972 &valsize, sizeof(valsize));
973 if (error)
974 goto out;
975 } else
976 valsize = 0;
977 if ((error = sogetopt((struct socket *)fp->f_data, SCARG(uap, level),
978 SCARG(uap, name), &m)) == 0 && SCARG(uap, val) && valsize &&
979 m != NULL) {
980 op = 0;
981 while (m && !error && op < valsize) {
982 i = min(m->m_len, (valsize - op));
983 error = copyout(mtod(m, void *), SCARG(uap, val), i);
984 op += i;
985 SCARG(uap, val) = ((uint8_t *)SCARG(uap, val)) + i;
986 m = m_free(m);
987 }
988 valsize = op;
989 if (error == 0)
990 error = copyout(&valsize,
991 SCARG(uap, avalsize), sizeof(valsize));
992 }
993 if (m != NULL)
994 (void) m_freem(m);
995 out:
996 FILE_UNUSE(fp, l);
997 return (error);
998 }
999
1000 #ifdef PIPE_SOCKETPAIR
1001 /* ARGSUSED */
1002 int
1003 sys_pipe(struct lwp *l, void *v, register_t *retval)
1004 {
1005 struct filedesc *fdp;
1006 struct file *rf, *wf;
1007 struct socket *rso, *wso;
1008 int fd, error;
1009
1010 fdp = l->l_proc->p_fd;
1011 if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l)) != 0)
1012 return (error);
1013 if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l)) != 0)
1014 goto free1;
1015 /* remember this socket pair implements a pipe */
1016 wso->so_state |= SS_ISAPIPE;
1017 rso->so_state |= SS_ISAPIPE;
1018 /* falloc() will use the descriptor for us */
1019 if ((error = falloc(l, &rf, &fd)) != 0)
1020 goto free2;
1021 retval[0] = fd;
1022 rf->f_flag = FREAD;
1023 rf->f_type = DTYPE_SOCKET;
1024 rf->f_ops = &socketops;
1025 rf->f_data = rso;
1026 if ((error = falloc(l, &wf, &fd)) != 0)
1027 goto free3;
1028 wf->f_flag = FWRITE;
1029 wf->f_type = DTYPE_SOCKET;
1030 wf->f_ops = &socketops;
1031 wf->f_data = wso;
1032 retval[1] = fd;
1033 if ((error = unp_connect2(wso, rso, PRU_CONNECT2)) != 0)
1034 goto free4;
1035 FILE_SET_MATURE(rf);
1036 FILE_SET_MATURE(wf);
1037 FILE_UNUSE(rf, l);
1038 FILE_UNUSE(wf, l);
1039 return (0);
1040 free4:
1041 FILE_UNUSE(wf, l);
1042 ffree(wf);
1043 fdremove(fdp, retval[1]);
1044 free3:
1045 FILE_UNUSE(rf, l);
1046 ffree(rf);
1047 fdremove(fdp, retval[0]);
1048 free2:
1049 (void)soclose(wso);
1050 free1:
1051 (void)soclose(rso);
1052 return (error);
1053 }
1054 #endif /* PIPE_SOCKETPAIR */
1055
1056 /*
1057 * Get socket name.
1058 */
1059 /* ARGSUSED */
1060 int
1061 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1062 {
1063 struct file *fp;
1064 struct socket *so;
1065 struct mbuf *m;
1066 int error;
1067
1068 /* getsock() will use the descriptor for us */
1069 if ((error = getsock(l->l_proc->p_fd, fd, &fp)) != 0)
1070 return error;
1071 so = (struct socket *)fp->f_data;
1072
1073 if (which == PRU_PEERADDR
1074 && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1075 error = ENOTCONN;
1076 goto bad;
1077 }
1078
1079 m = m_getclr(M_WAIT, MT_SONAME);
1080 *nam = m;
1081 MCLAIM(m, so->so_mowner);
1082 error = (*so->so_proto->pr_usrreq)(so, which, (struct mbuf *)0,
1083 m, (struct mbuf *)0, (struct lwp *)0);
1084 if (error != 0)
1085 m_free(m);
1086 bad:
1087 FILE_UNUSE(fp, l);
1088 return error;
1089 }
1090
1091 int
1092 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1093 struct mbuf *addr)
1094 {
1095 int len;
1096 int error;
1097
1098 if (asa == NULL)
1099 /* Assume application not interested */
1100 return 0;
1101
1102 if (flags & MSG_LENUSRSPACE) {
1103 error = copyin(alen, &len, sizeof(len));
1104 if (error)
1105 return error;
1106 } else
1107 len = *alen;
1108 if (len <= 0)
1109 return EINVAL;
1110
1111 if (addr == NULL) {
1112 len = 0;
1113 error = 0;
1114 } else {
1115 if (len > addr->m_len)
1116 len = addr->m_len;
1117 /* Maybe this ought to copy a chain ? */
1118 error = copyout(mtod(addr, void *), asa, len);
1119 }
1120
1121 if (error == 0) {
1122 if (flags & MSG_LENUSRSPACE)
1123 error = copyout(&len, alen, sizeof(len));
1124 else
1125 *alen = len;
1126 }
1127
1128 return error;
1129 }
1130
1131 /*
1132 * Get socket name.
1133 */
1134 /* ARGSUSED */
1135 int
1136 sys_getsockname(struct lwp *l, void *v, register_t *retval)
1137 {
1138 struct sys_getsockname_args /* {
1139 syscallarg(int) fdes;
1140 syscallarg(struct sockaddr *) asa;
1141 syscallarg(unsigned int *) alen;
1142 } */ *uap = v;
1143 struct mbuf *m;
1144 int error;
1145
1146 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1147 if (error != 0)
1148 return error;
1149
1150 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1151 MSG_LENUSRSPACE, m);
1152 if (m != NULL)
1153 m_free(m);
1154 return error;
1155 }
1156
1157 /*
1158 * Get name of peer for connected socket.
1159 */
1160 /* ARGSUSED */
1161 int
1162 sys_getpeername(struct lwp *l, void *v, register_t *retval)
1163 {
1164 struct sys_getpeername_args /* {
1165 syscallarg(int) fdes;
1166 syscallarg(struct sockaddr *) asa;
1167 syscallarg(unsigned int *) alen;
1168 } */ *uap = v;
1169 struct mbuf *m;
1170 int error;
1171
1172 error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1173 if (error != 0)
1174 return error;
1175
1176 error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1177 MSG_LENUSRSPACE, m);
1178 if (m != NULL)
1179 m_free(m);
1180 return error;
1181 }
1182
1183 /*
1184 * XXX In a perfect world, we wouldn't pass around socket control
1185 * XXX arguments in mbufs, and this could go away.
1186 */
1187 int
1188 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1189 {
1190 struct sockaddr *sa;
1191 struct mbuf *m;
1192 int error;
1193
1194 /*
1195 * We can't allow socket names > UCHAR_MAX in length, since that
1196 * will overflow sa_len. Control data more than a page size in
1197 * length is just too much.
1198 */
1199 if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1200 return (EINVAL);
1201
1202 /* Allocate an mbuf to hold the arguments. */
1203 m = m_get(M_WAIT, type);
1204 /* can't claim. don't who to assign it to. */
1205 if (buflen > MLEN) {
1206 /*
1207 * Won't fit into a regular mbuf, so we allocate just
1208 * enough external storage to hold the argument.
1209 */
1210 MEXTMALLOC(m, buflen, M_WAITOK);
1211 }
1212 m->m_len = buflen;
1213 error = copyin(bf, mtod(m, void *), buflen);
1214 if (error) {
1215 (void) m_free(m);
1216 return (error);
1217 }
1218 *mp = m;
1219 if (type == MT_SONAME) {
1220 sa = mtod(m, struct sockaddr *);
1221 #if BYTE_ORDER != BIG_ENDIAN
1222 /*
1223 * 4.3BSD compat thing - need to stay, since bind(2),
1224 * connect(2), sendto(2) were not versioned for COMPAT_43.
1225 */
1226 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1227 sa->sa_family = sa->sa_len;
1228 #endif
1229 sa->sa_len = buflen;
1230 }
1231 return (0);
1232 }
1233
1234 int
1235 getsock(struct filedesc *fdp, int fdes, struct file **fpp)
1236 {
1237 struct file *fp;
1238
1239 if ((fp = fd_getfile(fdp, fdes)) == NULL)
1240 return (EBADF);
1241
1242 FILE_USE(fp);
1243
1244 if (fp->f_type != DTYPE_SOCKET) {
1245 FILE_UNUSE(fp, NULL);
1246 return (ENOTSOCK);
1247 }
1248 *fpp = fp;
1249 return (0);
1250 }
1251