sockin.c revision 1.66 1 /* $NetBSD: sockin.c,v 1.66 2018/06/26 06:48:03 msaitoh Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.66 2018/06/26 06:48:03 msaitoh Exp $");
30
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53
54 #include <rump-sys/kern.h>
55
56 #include <rump/rumpuser.h>
57
58 #include "sockin_user.h"
59
60 /*
61 * An inet communication domain which uses the socket interface.
62 * Supports IPv4 & IPv6 UDP/TCP.
63 */
64
65 DOMAIN_DEFINE(sockindomain);
66 DOMAIN_DEFINE(sockin6domain);
67
68 static int sockin_do_init(void);
69 static void sockin_init(void);
70 static int sockin_attach(struct socket *, int);
71 static void sockin_detach(struct socket *);
72 static int sockin_accept(struct socket *, struct sockaddr *);
73 static int sockin_connect2(struct socket *, struct socket *);
74 static int sockin_bind(struct socket *, struct sockaddr *, struct lwp *);
75 static int sockin_listen(struct socket *, struct lwp *);
76 static int sockin_connect(struct socket *, struct sockaddr *, struct lwp *);
77 static int sockin_disconnect(struct socket *);
78 static int sockin_shutdown(struct socket *);
79 static int sockin_abort(struct socket *);
80 static int sockin_ioctl(struct socket *, u_long, void *, struct ifnet *);
81 static int sockin_stat(struct socket *, struct stat *);
82 static int sockin_peeraddr(struct socket *, struct sockaddr *);
83 static int sockin_sockaddr(struct socket *, struct sockaddr *);
84 static int sockin_rcvd(struct socket *, int, struct lwp *);
85 static int sockin_recvoob(struct socket *, struct mbuf *, int);
86 static int sockin_send(struct socket *, struct mbuf *, struct sockaddr *,
87 struct mbuf *, struct lwp *);
88 static int sockin_sendoob(struct socket *, struct mbuf *, struct mbuf *);
89 static int sockin_purgeif(struct socket *, struct ifnet *);
90 static int sockin_ctloutput(int op, struct socket *, struct sockopt *);
91
92 static const struct pr_usrreqs sockin_usrreqs = {
93 .pr_attach = sockin_attach,
94 .pr_detach = sockin_detach,
95 .pr_accept = sockin_accept,
96 .pr_bind = sockin_bind,
97 .pr_listen = sockin_listen,
98 .pr_connect = sockin_connect,
99 .pr_connect2 = sockin_connect2,
100 .pr_disconnect = sockin_disconnect,
101 .pr_shutdown = sockin_shutdown,
102 .pr_abort = sockin_abort,
103 .pr_ioctl = sockin_ioctl,
104 .pr_stat = sockin_stat,
105 .pr_peeraddr = sockin_peeraddr,
106 .pr_sockaddr = sockin_sockaddr,
107 .pr_rcvd = sockin_rcvd,
108 .pr_recvoob = sockin_recvoob,
109 .pr_send = sockin_send,
110 .pr_sendoob = sockin_sendoob,
111 .pr_purgeif = sockin_purgeif,
112 };
113
114 const struct protosw sockinsw[] = {
115 {
116 .pr_type = SOCK_DGRAM,
117 .pr_domain = &sockindomain,
118 .pr_protocol = IPPROTO_UDP,
119 .pr_flags = PR_ATOMIC|PR_ADDR,
120 .pr_usrreqs = &sockin_usrreqs,
121 .pr_ctloutput = sockin_ctloutput,
122 },
123 {
124 .pr_type = SOCK_STREAM,
125 .pr_domain = &sockindomain,
126 .pr_protocol = IPPROTO_TCP,
127 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
128 .pr_usrreqs = &sockin_usrreqs,
129 .pr_ctloutput = sockin_ctloutput,
130 }};
131 const struct protosw sockin6sw[] = {
132 {
133 .pr_type = SOCK_DGRAM,
134 .pr_domain = &sockin6domain,
135 .pr_protocol = IPPROTO_UDP,
136 .pr_flags = PR_ATOMIC|PR_ADDR,
137 .pr_usrreqs = &sockin_usrreqs,
138 .pr_ctloutput = sockin_ctloutput,
139 },
140 {
141 .pr_type = SOCK_STREAM,
142 .pr_domain = &sockin6domain,
143 .pr_protocol = IPPROTO_TCP,
144 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
145 .pr_usrreqs = &sockin_usrreqs,
146 .pr_ctloutput = sockin_ctloutput,
147 }};
148
149 struct domain sockindomain = {
150 .dom_family = PF_INET,
151 .dom_name = "socket_inet",
152 .dom_init = sockin_init,
153 .dom_externalize = NULL,
154 .dom_dispose = NULL,
155 .dom_protosw = sockinsw,
156 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
157 .dom_rtattach = rt_inithead,
158 .dom_rtoffset = 32,
159 .dom_maxrtkey = sizeof(struct sockaddr_in),
160 .dom_ifattach = NULL,
161 .dom_ifdetach = NULL,
162 .dom_ifqueues = { NULL },
163 .dom_link = { NULL },
164 .dom_mowner = MOWNER_INIT("",""),
165 .dom_sockaddr_cmp = NULL
166 };
167 struct domain sockin6domain = {
168 .dom_family = PF_INET6,
169 .dom_name = "socket_inet6",
170 .dom_init = sockin_init,
171 .dom_externalize = NULL,
172 .dom_dispose = NULL,
173 .dom_protosw = sockin6sw,
174 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
175 .dom_rtattach = rt_inithead,
176 .dom_rtoffset = 32,
177 .dom_maxrtkey = sizeof(struct sockaddr_in6),
178 .dom_ifattach = NULL,
179 .dom_ifdetach = NULL,
180 .dom_ifqueues = { NULL },
181 .dom_link = { NULL },
182 .dom_mowner = MOWNER_INIT("",""),
183 .dom_sockaddr_cmp = NULL
184 };
185
186 #define SO2S(so) ((intptr_t)(so->so_internal))
187 #define SOCKIN_SBSIZE 65536
188
189 struct sockin_unit {
190 struct socket *su_so;
191
192 LIST_ENTRY(sockin_unit) su_entries;
193 };
194 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
195 static kmutex_t su_mtx;
196 static bool rebuild;
197 static int nsock;
198
199 /* XXX: for the bpf hack */
200 static struct ifnet sockin_if;
201 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
202
203 static int
204 registersock(struct socket *so, int news)
205 {
206 struct sockin_unit *su;
207
208 su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
209 if (!su)
210 return ENOMEM;
211
212 so->so_internal = (void *)(intptr_t)news;
213 su->su_so = so;
214
215 mutex_enter(&su_mtx);
216 LIST_INSERT_HEAD(&su_ent, su, su_entries);
217 nsock++;
218 rebuild = true;
219 mutex_exit(&su_mtx);
220
221 return 0;
222 }
223
224 static void
225 removesock(struct socket *so)
226 {
227 struct sockin_unit *su_iter;
228
229 mutex_enter(&su_mtx);
230 LIST_FOREACH(su_iter, &su_ent, su_entries) {
231 if (su_iter->su_so == so)
232 break;
233 }
234 if (!su_iter)
235 panic("no such socket");
236
237 LIST_REMOVE(su_iter, su_entries);
238 nsock--;
239 rebuild = true;
240 mutex_exit(&su_mtx);
241
242 rumpuser_close(SO2S(su_iter->su_so));
243 kmem_free(su_iter, sizeof(*su_iter));
244 }
245
246 static void
247 sockin_process(struct socket *so)
248 {
249 struct sockaddr_in6 from;
250 struct iovec io;
251 struct msghdr rmsg;
252 struct mbuf *m;
253 size_t n, plen;
254 int error;
255
256 m = m_gethdr(M_WAIT, MT_DATA);
257 if (so->so_proto->pr_type == SOCK_DGRAM) {
258 plen = IP_MAXPACKET;
259 MEXTMALLOC(m, plen, M_DONTWAIT);
260 } else {
261 plen = MCLBYTES;
262 MCLGET(m, M_DONTWAIT);
263 }
264 if ((m->m_flags & M_EXT) == 0) {
265 m_freem(m);
266 return;
267 }
268
269 memset(&rmsg, 0, sizeof(rmsg));
270 io.iov_base = mtod(m, void *);
271 io.iov_len = plen;
272 rmsg.msg_iov = &io;
273 rmsg.msg_iovlen = 1;
274 rmsg.msg_name = (struct sockaddr *)&from;
275 rmsg.msg_namelen = sizeof(from);
276
277 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
278 if (error || n == 0) {
279 m_freem(m);
280
281 /* Treat a TCP socket a goner */
282 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
283 mutex_enter(softnet_lock);
284 soisdisconnected(so);
285 mutex_exit(softnet_lock);
286 removesock(so);
287 }
288 return;
289 }
290 m->m_len = m->m_pkthdr.len = n;
291
292 bpf_mtap_af(&sockin_if, AF_UNSPEC, m, BPF_D_IN);
293
294 mutex_enter(softnet_lock);
295 if (so->so_proto->pr_type == SOCK_DGRAM) {
296 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
297 m_freem(m);
298 }
299 } else {
300 sbappendstream(&so->so_rcv, m);
301 }
302
303 sorwakeup(so);
304 mutex_exit(softnet_lock);
305 }
306
307 static void
308 sockin_waccept(struct socket *so)
309 {
310 struct socket *nso;
311 struct sockaddr_in6 sin;
312 int news, error, slen;
313
314 slen = sizeof(sin);
315 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
316 &slen, &news);
317 if (error)
318 return;
319
320 mutex_enter(softnet_lock);
321 nso = sonewconn(so, true);
322 if (nso == NULL)
323 goto errout;
324 if (registersock(nso, news) != 0)
325 goto errout;
326 mutex_exit(softnet_lock);
327 return;
328
329 errout:
330 rumpuser_close(news);
331 if (nso)
332 soclose(nso);
333 mutex_exit(softnet_lock);
334 }
335
336 #define POLLTIMEOUT 100 /* check for new entries every 100ms */
337
338 /* XXX: doesn't handle socket (kernel) locking properly? */
339 static void
340 sockinworker(void *arg)
341 {
342 struct pollfd *pfds = NULL, *npfds;
343 struct sockin_unit *su_iter;
344 struct socket *so;
345 int cursock = 0, i, rv, error;
346
347 /*
348 * Loop reading requests. Check for new sockets periodically
349 * (could be smarter, but I'm lazy).
350 */
351 for (;;) {
352 if (rebuild) {
353 npfds = NULL;
354 mutex_enter(&su_mtx);
355 if (nsock)
356 npfds = kmem_alloc(nsock * sizeof(*npfds),
357 KM_NOSLEEP);
358 if (npfds || nsock == 0) {
359 if (pfds)
360 kmem_free(pfds, cursock*sizeof(*pfds));
361 pfds = npfds;
362 cursock = nsock;
363 rebuild = false;
364
365 i = 0;
366 LIST_FOREACH(su_iter, &su_ent, su_entries) {
367 pfds[i].fd = SO2S(su_iter->su_so);
368 pfds[i].events = POLLIN;
369 pfds[i].revents = 0;
370 i++;
371 }
372 KASSERT(i == nsock);
373 }
374 mutex_exit(&su_mtx);
375 }
376
377 /* find affected sockets & process */
378 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
379 for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
380 if (pfds[i].revents & POLLIN) {
381 mutex_enter(&su_mtx);
382 LIST_FOREACH(su_iter, &su_ent, su_entries) {
383 if (SO2S(su_iter->su_so)==pfds[i].fd) {
384 so = su_iter->su_so;
385 mutex_exit(&su_mtx);
386 if(so->so_options&SO_ACCEPTCONN)
387 sockin_waccept(so);
388 else
389 sockin_process(so);
390 mutex_enter(&su_mtx);
391 break;
392 }
393 }
394 /* if we can't find it, just wing it */
395 KASSERT(rebuild || su_iter);
396 mutex_exit(&su_mtx);
397 pfds[i].revents = 0;
398 rv--;
399 i = -1;
400 continue;
401 }
402
403 /* something else? ignore */
404 if (pfds[i].revents) {
405 pfds[i].revents = 0;
406 rv--;
407 }
408 }
409 KASSERT(rv <= 0);
410 }
411
412 }
413
414 static int
415 sockin_do_init(void)
416 {
417 int rv;
418
419 if (rump_threads) {
420 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
421 NULL, NULL, "sockwork")) != 0)
422 panic("sockin_init: could not create worker thread\n");
423 } else {
424 printf("sockin_init: no threads => no worker thread\n");
425 }
426 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
427 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
428 bpf_attach(&sockin_if, DLT_NULL, 0);
429 return 0;
430 }
431
432 static void
433 sockin_init(void)
434 {
435 static ONCE_DECL(init);
436
437 RUN_ONCE(&init, sockin_do_init);
438 }
439
440 static int
441 sockin_attach(struct socket *so, int proto)
442 {
443 const int type = so->so_proto->pr_type;
444 int error, news, family;
445
446 sosetlock(so);
447 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
448 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
449 if (error)
450 return error;
451 }
452
453 family = so->so_proto->pr_domain->dom_family;
454 KASSERT(family == PF_INET || family == PF_INET6);
455 error = rumpcomp_sockin_socket(family, type, 0, &news);
456 if (error)
457 return error;
458
459 /* For UDP sockets, make sure we can send/recv maximum. */
460 if (type == SOCK_DGRAM) {
461 int sbsize = SOCKIN_SBSIZE;
462 error = rumpcomp_sockin_setsockopt(news,
463 SOL_SOCKET, SO_SNDBUF,
464 &sbsize, sizeof(sbsize));
465 sbsize = SOCKIN_SBSIZE;
466 error = rumpcomp_sockin_setsockopt(news,
467 SOL_SOCKET, SO_RCVBUF,
468 &sbsize, sizeof(sbsize));
469 }
470
471 if ((error = registersock(so, news)) != 0)
472 rumpuser_close(news);
473
474 return error;
475 }
476
477 static void
478 sockin_detach(struct socket *so)
479 {
480 panic("sockin_detach: IMPLEMENT ME\n");
481 }
482
483 static int
484 sockin_accept(struct socket *so, struct sockaddr *nam)
485 {
486 KASSERT(solocked(so));
487
488 /* we do all the work in the worker thread */
489 return 0;
490 }
491
492 static int
493 sockin_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
494 {
495 KASSERT(solocked(so));
496 KASSERT(nam != NULL);
497
498 return rumpcomp_sockin_bind(SO2S(so), nam, nam->sa_len);
499 }
500
501 static int
502 sockin_listen(struct socket *so, struct lwp *l)
503 {
504 KASSERT(solocked(so));
505
506 return rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
507 }
508
509 static int
510 sockin_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
511 {
512 int error = 0;
513
514 KASSERT(solocked(so));
515 KASSERT(nam != NULL);
516
517 error = rumpcomp_sockin_connect(SO2S(so), nam, nam->sa_len);
518 if (error == 0)
519 soisconnected(so);
520
521 return error;
522 }
523
524 static int
525 sockin_connect2(struct socket *so, struct socket *so2)
526 {
527 KASSERT(solocked(so));
528
529 panic("sockin_connect2: IMPLEMENT ME, connect2 not supported");
530 }
531
532 static int
533 sockin_disconnect(struct socket *so)
534 {
535 KASSERT(solocked(so));
536
537 panic("sockin_disconnect: IMPLEMENT ME, disconnect not supported");
538 }
539
540 static int
541 sockin_shutdown(struct socket *so)
542 {
543 KASSERT(solocked(so));
544
545 removesock(so);
546 return 0;
547 }
548
549 static int
550 sockin_abort(struct socket *so)
551 {
552 KASSERT(solocked(so));
553
554 panic("sockin_abort: IMPLEMENT ME, abort not supported");
555 }
556
557 static int
558 sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
559 {
560 return ENOTTY;
561 }
562
563 static int
564 sockin_stat(struct socket *so, struct stat *ub)
565 {
566 KASSERT(solocked(so));
567
568 return 0;
569 }
570
571 static int
572 sockin_peeraddr(struct socket *so, struct sockaddr *nam)
573 {
574 KASSERT(solocked(so));
575
576 int error = 0;
577 int slen = nam->sa_len;
578
579 error = rumpcomp_sockin_getname(SO2S(so),
580 nam, &slen, RUMPCOMP_SOCKIN_PEERNAME);
581 if (error == 0)
582 nam->sa_len = slen;
583 return error;
584 }
585
586 static int
587 sockin_sockaddr(struct socket *so, struct sockaddr *nam)
588 {
589 KASSERT(solocked(so));
590
591 int error = 0;
592 int slen = nam->sa_len;
593
594 error = rumpcomp_sockin_getname(SO2S(so),
595 nam, &slen, RUMPCOMP_SOCKIN_SOCKNAME);
596 if (error == 0)
597 nam->sa_len = slen;
598 return error;
599 }
600
601 static int
602 sockin_rcvd(struct socket *so, int flags, struct lwp *l)
603 {
604 KASSERT(solocked(so));
605
606 panic("sockin_rcvd: IMPLEMENT ME, rcvd not supported");
607 }
608
609 static int
610 sockin_recvoob(struct socket *so, struct mbuf *m, int flags)
611 {
612 KASSERT(solocked(so));
613
614 panic("sockin_recvoob: IMPLEMENT ME, recvoob not supported");
615 }
616
617 static int
618 sockin_send(struct socket *so, struct mbuf *m, struct sockaddr *saddr,
619 struct mbuf *control, struct lwp *l)
620 {
621 struct msghdr mhdr;
622 size_t iov_max, i;
623 struct iovec iov_buf[32], *iov;
624 struct mbuf *m2;
625 size_t tot, n;
626 int error = 0;
627 int s;
628
629 bpf_mtap_af(&sockin_if, AF_UNSPEC, m, BPF_D_OUT);
630
631 memset(&mhdr, 0, sizeof(mhdr));
632
633 iov_max = 0;
634 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
635 iov_max++;
636 }
637
638 if (iov_max <= __arraycount(iov_buf)) {
639 iov = iov_buf;
640 } else {
641 iov = kmem_alloc(sizeof(struct iovec) * iov_max,
642 KM_SLEEP);
643 }
644
645 tot = 0;
646 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
647 iov[i].iov_base = m2->m_data;
648 iov[i].iov_len = m2->m_len;
649 tot += m2->m_len;
650 }
651 mhdr.msg_iov = iov;
652 mhdr.msg_iovlen = i;
653 s = SO2S(so);
654
655 if (saddr != NULL) {
656 mhdr.msg_name = saddr;
657 mhdr.msg_namelen = saddr->sa_len;
658 }
659
660 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
661
662 if (iov != iov_buf)
663 kmem_free(iov, sizeof(struct iovec) * iov_max);
664
665 m_freem(m);
666 m_freem(control);
667
668 /* this assumes too many things to list.. buthey, testing */
669 if (!rump_threads)
670 sockin_process(so);
671
672 return error;
673 }
674
675 static int
676 sockin_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
677 {
678 KASSERT(solocked(so));
679
680 panic("sockin_sendoob: IMPLEMENT ME, sendoob not supported");
681 }
682
683 static int
684 sockin_purgeif(struct socket *so, struct ifnet *ifp)
685 {
686
687 panic("sockin_purgeif: IMPLEMENT ME, purgeif not supported");
688 }
689
690 static int
691 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
692 {
693
694 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
695 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
696 }
697
698 int sockin_unavailable(void);
699 int
700 sockin_unavailable(void)
701 {
702
703 panic("interface not available in with sockin");
704 }
705 __strong_alias(rtrequest,sockin_unavailable);
706 __strong_alias(ifunit,sockin_unavailable);
707 __strong_alias(ifreq_setaddr,sockin_unavailable);
708 __strong_alias(rt_delete_matched_entries,sockin_unavailable);
709