sockin.c revision 1.62 1 /* $NetBSD: sockin.c,v 1.62 2015/05/02 17:18:04 rtr Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.62 2015/05/02 17:18:04 rtr Exp $");
30
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53
54 #include <rump/rumpuser.h>
55
56 #include "rump_private.h"
57 #include "sockin_user.h"
58
59 /*
60 * An inet communication domain which uses the socket interface.
61 * Supports IPv4 & IPv6 UDP/TCP.
62 */
63
64 DOMAIN_DEFINE(sockindomain);
65 DOMAIN_DEFINE(sockin6domain);
66
67 static int sockin_do_init(void);
68 static void sockin_init(void);
69 static int sockin_attach(struct socket *, int);
70 static void sockin_detach(struct socket *);
71 static int sockin_accept(struct socket *, struct sockaddr *);
72 static int sockin_connect2(struct socket *, struct socket *);
73 static int sockin_bind(struct socket *, struct sockaddr *, struct lwp *);
74 static int sockin_listen(struct socket *, struct lwp *);
75 static int sockin_connect(struct socket *, struct sockaddr *, struct lwp *);
76 static int sockin_disconnect(struct socket *);
77 static int sockin_shutdown(struct socket *);
78 static int sockin_abort(struct socket *);
79 static int sockin_ioctl(struct socket *, u_long, void *, struct ifnet *);
80 static int sockin_stat(struct socket *, struct stat *);
81 static int sockin_peeraddr(struct socket *, struct sockaddr *);
82 static int sockin_sockaddr(struct socket *, struct sockaddr *);
83 static int sockin_rcvd(struct socket *, int, struct lwp *);
84 static int sockin_recvoob(struct socket *, struct mbuf *, int);
85 static int sockin_send(struct socket *, struct mbuf *, struct sockaddr *,
86 struct mbuf *, struct lwp *);
87 static int sockin_sendoob(struct socket *, struct mbuf *, struct mbuf *);
88 static int sockin_purgeif(struct socket *, struct ifnet *);
89 static int sockin_ctloutput(int op, struct socket *, struct sockopt *);
90
91 static const struct pr_usrreqs sockin_usrreqs = {
92 .pr_attach = sockin_attach,
93 .pr_detach = sockin_detach,
94 .pr_accept = sockin_accept,
95 .pr_bind = sockin_bind,
96 .pr_listen = sockin_listen,
97 .pr_connect = sockin_connect,
98 .pr_connect2 = sockin_connect2,
99 .pr_disconnect = sockin_disconnect,
100 .pr_shutdown = sockin_shutdown,
101 .pr_abort = sockin_abort,
102 .pr_ioctl = sockin_ioctl,
103 .pr_stat = sockin_stat,
104 .pr_peeraddr = sockin_peeraddr,
105 .pr_sockaddr = sockin_sockaddr,
106 .pr_rcvd = sockin_rcvd,
107 .pr_recvoob = sockin_recvoob,
108 .pr_send = sockin_send,
109 .pr_sendoob = sockin_sendoob,
110 .pr_purgeif = sockin_purgeif,
111 };
112
113 const struct protosw sockinsw[] = {
114 {
115 .pr_type = SOCK_DGRAM,
116 .pr_domain = &sockindomain,
117 .pr_protocol = IPPROTO_UDP,
118 .pr_flags = PR_ATOMIC|PR_ADDR,
119 .pr_usrreqs = &sockin_usrreqs,
120 .pr_ctloutput = sockin_ctloutput,
121 },
122 {
123 .pr_type = SOCK_STREAM,
124 .pr_domain = &sockindomain,
125 .pr_protocol = IPPROTO_TCP,
126 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
127 .pr_usrreqs = &sockin_usrreqs,
128 .pr_ctloutput = sockin_ctloutput,
129 }};
130 const struct protosw sockin6sw[] = {
131 {
132 .pr_type = SOCK_DGRAM,
133 .pr_domain = &sockin6domain,
134 .pr_protocol = IPPROTO_UDP,
135 .pr_flags = PR_ATOMIC|PR_ADDR,
136 .pr_usrreqs = &sockin_usrreqs,
137 .pr_ctloutput = sockin_ctloutput,
138 },
139 {
140 .pr_type = SOCK_STREAM,
141 .pr_domain = &sockin6domain,
142 .pr_protocol = IPPROTO_TCP,
143 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
144 .pr_usrreqs = &sockin_usrreqs,
145 .pr_ctloutput = sockin_ctloutput,
146 }};
147
148 struct domain sockindomain = {
149 .dom_family = PF_INET,
150 .dom_name = "socket_inet",
151 .dom_init = sockin_init,
152 .dom_externalize = NULL,
153 .dom_dispose = NULL,
154 .dom_protosw = sockinsw,
155 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
156 .dom_rtattach = rt_inithead,
157 .dom_rtoffset = 32,
158 .dom_maxrtkey = sizeof(struct sockaddr_in),
159 .dom_ifattach = NULL,
160 .dom_ifdetach = NULL,
161 .dom_ifqueues = { NULL },
162 .dom_link = { NULL },
163 .dom_mowner = MOWNER_INIT("",""),
164 .dom_rtcache = { NULL },
165 .dom_sockaddr_cmp = NULL
166 };
167 struct domain sockin6domain = {
168 .dom_family = PF_INET6,
169 .dom_name = "socket_inet6",
170 .dom_init = sockin_init,
171 .dom_externalize = NULL,
172 .dom_dispose = NULL,
173 .dom_protosw = sockin6sw,
174 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
175 .dom_rtattach = rt_inithead,
176 .dom_rtoffset = 32,
177 .dom_maxrtkey = sizeof(struct sockaddr_in6),
178 .dom_ifattach = NULL,
179 .dom_ifdetach = NULL,
180 .dom_ifqueues = { NULL },
181 .dom_link = { NULL },
182 .dom_mowner = MOWNER_INIT("",""),
183 .dom_rtcache = { NULL },
184 .dom_sockaddr_cmp = NULL
185 };
186
187 #define SO2S(so) ((intptr_t)(so->so_internal))
188 #define SOCKIN_SBSIZE 65536
189
190 struct sockin_unit {
191 struct socket *su_so;
192
193 LIST_ENTRY(sockin_unit) su_entries;
194 };
195 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
196 static kmutex_t su_mtx;
197 static bool rebuild;
198 static int nsock;
199
200 /* XXX: for the bpf hack */
201 static struct ifnet sockin_if;
202 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
203
204 static int
205 registersock(struct socket *so, int news)
206 {
207 struct sockin_unit *su;
208
209 su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
210 if (!su)
211 return ENOMEM;
212
213 so->so_internal = (void *)(intptr_t)news;
214 su->su_so = so;
215
216 mutex_enter(&su_mtx);
217 LIST_INSERT_HEAD(&su_ent, su, su_entries);
218 nsock++;
219 rebuild = true;
220 mutex_exit(&su_mtx);
221
222 return 0;
223 }
224
225 static void
226 removesock(struct socket *so)
227 {
228 struct sockin_unit *su_iter;
229
230 mutex_enter(&su_mtx);
231 LIST_FOREACH(su_iter, &su_ent, su_entries) {
232 if (su_iter->su_so == so)
233 break;
234 }
235 if (!su_iter)
236 panic("no such socket");
237
238 LIST_REMOVE(su_iter, su_entries);
239 nsock--;
240 rebuild = true;
241 mutex_exit(&su_mtx);
242
243 rumpuser_close(SO2S(su_iter->su_so));
244 kmem_free(su_iter, sizeof(*su_iter));
245 }
246
247 static void
248 sockin_process(struct socket *so)
249 {
250 struct sockaddr_in6 from;
251 struct iovec io;
252 struct msghdr rmsg;
253 struct mbuf *m;
254 size_t n, plen;
255 int error;
256
257 m = m_gethdr(M_WAIT, MT_DATA);
258 if (so->so_proto->pr_type == SOCK_DGRAM) {
259 plen = IP_MAXPACKET;
260 MEXTMALLOC(m, plen, M_DONTWAIT);
261 } else {
262 plen = MCLBYTES;
263 MCLGET(m, M_DONTWAIT);
264 }
265 if ((m->m_flags & M_EXT) == 0) {
266 m_freem(m);
267 return;
268 }
269
270 memset(&rmsg, 0, sizeof(rmsg));
271 io.iov_base = mtod(m, void *);
272 io.iov_len = plen;
273 rmsg.msg_iov = &io;
274 rmsg.msg_iovlen = 1;
275 rmsg.msg_name = (struct sockaddr *)&from;
276 rmsg.msg_namelen = sizeof(from);
277
278 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
279 if (error || n == 0) {
280 m_freem(m);
281
282 /* Treat a TCP socket a goner */
283 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
284 mutex_enter(softnet_lock);
285 soisdisconnected(so);
286 mutex_exit(softnet_lock);
287 removesock(so);
288 }
289 return;
290 }
291 m->m_len = m->m_pkthdr.len = n;
292
293 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
294
295 mutex_enter(softnet_lock);
296 if (so->so_proto->pr_type == SOCK_DGRAM) {
297 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
298 m_freem(m);
299 }
300 } else {
301 sbappendstream(&so->so_rcv, m);
302 }
303
304 sorwakeup(so);
305 mutex_exit(softnet_lock);
306 }
307
308 static void
309 sockin_waccept(struct socket *so)
310 {
311 struct socket *nso;
312 struct sockaddr_in6 sin;
313 int news, error, slen;
314
315 slen = sizeof(sin);
316 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
317 &slen, &news);
318 if (error)
319 return;
320
321 mutex_enter(softnet_lock);
322 nso = sonewconn(so, true);
323 if (nso == NULL)
324 goto errout;
325 if (registersock(nso, news) != 0)
326 goto errout;
327 mutex_exit(softnet_lock);
328 return;
329
330 errout:
331 rumpuser_close(news);
332 if (nso)
333 soclose(nso);
334 mutex_exit(softnet_lock);
335 }
336
337 #define POLLTIMEOUT 100 /* check for new entries every 100ms */
338
339 /* XXX: doesn't handle socket (kernel) locking properly? */
340 static void
341 sockinworker(void *arg)
342 {
343 struct pollfd *pfds = NULL, *npfds;
344 struct sockin_unit *su_iter;
345 struct socket *so;
346 int cursock = 0, i, rv, error;
347
348 /*
349 * Loop reading requests. Check for new sockets periodically
350 * (could be smarter, but I'm lazy).
351 */
352 for (;;) {
353 if (rebuild) {
354 npfds = NULL;
355 mutex_enter(&su_mtx);
356 if (nsock)
357 npfds = kmem_alloc(nsock * sizeof(*npfds),
358 KM_NOSLEEP);
359 if (npfds || nsock == 0) {
360 if (pfds)
361 kmem_free(pfds, cursock*sizeof(*pfds));
362 pfds = npfds;
363 cursock = nsock;
364 rebuild = false;
365
366 i = 0;
367 LIST_FOREACH(su_iter, &su_ent, su_entries) {
368 pfds[i].fd = SO2S(su_iter->su_so);
369 pfds[i].events = POLLIN;
370 pfds[i].revents = 0;
371 i++;
372 }
373 KASSERT(i == nsock);
374 }
375 mutex_exit(&su_mtx);
376 }
377
378 /* find affected sockets & process */
379 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
380 for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
381 if (pfds[i].revents & POLLIN) {
382 mutex_enter(&su_mtx);
383 LIST_FOREACH(su_iter, &su_ent, su_entries) {
384 if (SO2S(su_iter->su_so)==pfds[i].fd) {
385 so = su_iter->su_so;
386 mutex_exit(&su_mtx);
387 if(so->so_options&SO_ACCEPTCONN)
388 sockin_waccept(so);
389 else
390 sockin_process(so);
391 mutex_enter(&su_mtx);
392 break;
393 }
394 }
395 /* if we can't find it, just wing it */
396 KASSERT(rebuild || su_iter);
397 mutex_exit(&su_mtx);
398 pfds[i].revents = 0;
399 rv--;
400 i = -1;
401 continue;
402 }
403
404 /* something else? ignore */
405 if (pfds[i].revents) {
406 pfds[i].revents = 0;
407 rv--;
408 }
409 }
410 KASSERT(rv <= 0);
411 }
412
413 }
414
415 static int
416 sockin_do_init(void)
417 {
418 int rv;
419
420 if (rump_threads) {
421 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
422 NULL, NULL, "sockwork")) != 0)
423 panic("sockin_init: could not create worker thread\n");
424 } else {
425 printf("sockin_init: no threads => no worker thread\n");
426 }
427 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
428 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
429 bpf_attach(&sockin_if, DLT_NULL, 0);
430 return 0;
431 }
432
433 static void
434 sockin_init(void)
435 {
436 static ONCE_DECL(init);
437
438 RUN_ONCE(&init, sockin_do_init);
439 }
440
441 static int
442 sockin_attach(struct socket *so, int proto)
443 {
444 const int type = so->so_proto->pr_type;
445 int error, news, family;
446
447 sosetlock(so);
448 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
449 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
450 if (error)
451 return error;
452 }
453
454 family = so->so_proto->pr_domain->dom_family;
455 KASSERT(family == PF_INET || family == PF_INET6);
456 error = rumpcomp_sockin_socket(family, type, 0, &news);
457 if (error)
458 return error;
459
460 /* For UDP sockets, make sure we can send/recv maximum. */
461 if (type == SOCK_DGRAM) {
462 int sbsize = SOCKIN_SBSIZE;
463 error = rumpcomp_sockin_setsockopt(news,
464 SOL_SOCKET, SO_SNDBUF,
465 &sbsize, sizeof(sbsize));
466 sbsize = SOCKIN_SBSIZE;
467 error = rumpcomp_sockin_setsockopt(news,
468 SOL_SOCKET, SO_RCVBUF,
469 &sbsize, sizeof(sbsize));
470 }
471
472 if ((error = registersock(so, news)) != 0)
473 rumpuser_close(news);
474
475 return error;
476 }
477
478 static void
479 sockin_detach(struct socket *so)
480 {
481 panic("sockin_detach: IMPLEMENT ME\n");
482 }
483
484 static int
485 sockin_accept(struct socket *so, struct sockaddr *nam)
486 {
487 KASSERT(solocked(so));
488
489 /* we do all the work in the worker thread */
490 return 0;
491 }
492
493 static int
494 sockin_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
495 {
496 KASSERT(solocked(so));
497 KASSERT(nam != NULL);
498
499 return rumpcomp_sockin_bind(SO2S(so), nam, nam->sa_len);
500 }
501
502 static int
503 sockin_listen(struct socket *so, struct lwp *l)
504 {
505 KASSERT(solocked(so));
506
507 return rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
508 }
509
510 static int
511 sockin_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
512 {
513 int error = 0;
514
515 KASSERT(solocked(so));
516 KASSERT(nam != NULL);
517
518 error = rumpcomp_sockin_connect(SO2S(so), nam, nam->sa_len);
519 if (error == 0)
520 soisconnected(so);
521
522 return error;
523 }
524
525 static int
526 sockin_connect2(struct socket *so, struct socket *so2)
527 {
528 KASSERT(solocked(so));
529
530 panic("sockin_connect2: IMPLEMENT ME, connect2 not supported");
531 }
532
533 static int
534 sockin_disconnect(struct socket *so)
535 {
536 KASSERT(solocked(so));
537
538 panic("sockin_disconnect: IMPLEMENT ME, disconnect not supported");
539 }
540
541 static int
542 sockin_shutdown(struct socket *so)
543 {
544 KASSERT(solocked(so));
545
546 removesock(so);
547 return 0;
548 }
549
550 static int
551 sockin_abort(struct socket *so)
552 {
553 KASSERT(solocked(so));
554
555 panic("sockin_abort: IMPLEMENT ME, abort not supported");
556 }
557
558 static int
559 sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
560 {
561 return ENOTTY;
562 }
563
564 static int
565 sockin_stat(struct socket *so, struct stat *ub)
566 {
567 KASSERT(solocked(so));
568
569 return 0;
570 }
571
572 static int
573 sockin_peeraddr(struct socket *so, struct sockaddr *nam)
574 {
575 KASSERT(solocked(so));
576
577 int error = 0;
578 int slen = nam->sa_len;
579
580 error = rumpcomp_sockin_getname(SO2S(so),
581 nam, &slen, RUMPCOMP_SOCKIN_PEERNAME);
582 if (error == 0)
583 nam->sa_len = slen;
584 return error;
585 }
586
587 static int
588 sockin_sockaddr(struct socket *so, struct sockaddr *nam)
589 {
590 KASSERT(solocked(so));
591
592 int error = 0;
593 int slen = nam->sa_len;
594
595 error = rumpcomp_sockin_getname(SO2S(so),
596 nam, &slen, RUMPCOMP_SOCKIN_SOCKNAME);
597 if (error == 0)
598 nam->sa_len = slen;
599 return error;
600 }
601
602 static int
603 sockin_rcvd(struct socket *so, int flags, struct lwp *l)
604 {
605 KASSERT(solocked(so));
606
607 panic("sockin_rcvd: IMPLEMENT ME, rcvd not supported");
608 }
609
610 static int
611 sockin_recvoob(struct socket *so, struct mbuf *m, int flags)
612 {
613 KASSERT(solocked(so));
614
615 panic("sockin_recvoob: IMPLEMENT ME, recvoob not supported");
616 }
617
618 static int
619 sockin_send(struct socket *so, struct mbuf *m, struct sockaddr *saddr,
620 struct mbuf *control, struct lwp *l)
621 {
622 struct msghdr mhdr;
623 size_t iov_max, i;
624 struct iovec iov_buf[32], *iov;
625 struct mbuf *m2;
626 size_t tot, n;
627 int error = 0;
628 int s;
629
630 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
631
632 memset(&mhdr, 0, sizeof(mhdr));
633
634 iov_max = 0;
635 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
636 iov_max++;
637 }
638
639 if (iov_max <= __arraycount(iov_buf)) {
640 iov = iov_buf;
641 } else {
642 iov = kmem_alloc(sizeof(struct iovec) * iov_max,
643 KM_SLEEP);
644 }
645
646 tot = 0;
647 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
648 iov[i].iov_base = m2->m_data;
649 iov[i].iov_len = m2->m_len;
650 tot += m2->m_len;
651 }
652 mhdr.msg_iov = iov;
653 mhdr.msg_iovlen = i;
654 s = SO2S(so);
655
656 if (saddr != NULL) {
657 mhdr.msg_name = saddr;
658 mhdr.msg_namelen = saddr->sa_len;
659 }
660
661 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
662
663 if (iov != iov_buf)
664 kmem_free(iov, sizeof(struct iovec) * iov_max);
665
666 m_freem(m);
667 m_freem(control);
668
669 /* this assumes too many things to list.. buthey, testing */
670 if (!rump_threads)
671 sockin_process(so);
672
673 return error;
674 }
675
676 static int
677 sockin_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
678 {
679 KASSERT(solocked(so));
680
681 panic("sockin_sendoob: IMPLEMENT ME, sendoob not supported");
682 }
683
684 static int
685 sockin_purgeif(struct socket *so, struct ifnet *ifp)
686 {
687
688 panic("sockin_purgeif: IMPLEMENT ME, purgeif not supported");
689 }
690
691 static int
692 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
693 {
694
695 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
696 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
697 }
698
699 int sockin_unavailable(void);
700 int
701 sockin_unavailable(void)
702 {
703
704 panic("interface not available in with sockin");
705 }
706 __strong_alias(rtrequest,sockin_unavailable);
707 __strong_alias(ifunit,sockin_unavailable);
708 __strong_alias(ifreq_setaddr,sockin_unavailable);
709