sockin.c revision 1.47 1 /* $NetBSD: sockin.c,v 1.47 2014/07/09 04:54:04 rtr Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.47 2014/07/09 04:54:04 rtr Exp $");
30
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53
54 #include <rump/rumpuser.h>
55
56 #include "rump_private.h"
57 #include "sockin_user.h"
58
59 /*
60 * An inet communication domain which uses the socket interface.
61 * Supports IPv4 & IPv6 UDP/TCP.
62 */
63
64 DOMAIN_DEFINE(sockindomain);
65 DOMAIN_DEFINE(sockin6domain);
66
67 static int sockin_do_init(void);
68 static void sockin_init(void);
69 static int sockin_attach(struct socket *, int);
70 static void sockin_detach(struct socket *);
71 static int sockin_ioctl(struct socket *, u_long, void *, struct ifnet *);
72 static int sockin_stat(struct socket *, struct stat *);
73 static int sockin_peeraddr(struct socket *, struct mbuf *);
74 static int sockin_sockaddr(struct socket *, struct mbuf *);
75 static int sockin_usrreq(struct socket *, int, struct mbuf *,
76 struct mbuf *, struct mbuf *, struct lwp *);
77 static int sockin_ctloutput(int op, struct socket *, struct sockopt *);
78
79 static const struct pr_usrreqs sockin_usrreqs = {
80 .pr_attach = sockin_attach,
81 .pr_detach = sockin_detach,
82 .pr_ioctl = sockin_ioctl,
83 .pr_stat = sockin_stat,
84 .pr_peeraddr = sockin_peeraddr,
85 .pr_sockaddr = sockin_sockaddr,
86 .pr_generic = sockin_usrreq,
87 };
88
89 const struct protosw sockinsw[] = {
90 {
91 .pr_type = SOCK_DGRAM,
92 .pr_domain = &sockindomain,
93 .pr_protocol = IPPROTO_UDP,
94 .pr_flags = PR_ATOMIC|PR_ADDR,
95 .pr_usrreqs = &sockin_usrreqs,
96 .pr_ctloutput = sockin_ctloutput,
97 },
98 {
99 .pr_type = SOCK_STREAM,
100 .pr_domain = &sockindomain,
101 .pr_protocol = IPPROTO_TCP,
102 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
103 .pr_usrreqs = &sockin_usrreqs,
104 .pr_ctloutput = sockin_ctloutput,
105 }};
106 const struct protosw sockin6sw[] = {
107 {
108 .pr_type = SOCK_DGRAM,
109 .pr_domain = &sockin6domain,
110 .pr_protocol = IPPROTO_UDP,
111 .pr_flags = PR_ATOMIC|PR_ADDR,
112 .pr_usrreqs = &sockin_usrreqs,
113 .pr_ctloutput = sockin_ctloutput,
114 },
115 {
116 .pr_type = SOCK_STREAM,
117 .pr_domain = &sockin6domain,
118 .pr_protocol = IPPROTO_TCP,
119 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
120 .pr_usrreqs = &sockin_usrreqs,
121 .pr_ctloutput = sockin_ctloutput,
122 }};
123
124 struct domain sockindomain = {
125 .dom_family = PF_INET,
126 .dom_name = "socket_inet",
127 .dom_init = sockin_init,
128 .dom_externalize = NULL,
129 .dom_dispose = NULL,
130 .dom_protosw = sockinsw,
131 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
132 .dom_rtattach = rt_inithead,
133 .dom_rtoffset = 32,
134 .dom_maxrtkey = sizeof(struct sockaddr_in),
135 .dom_ifattach = NULL,
136 .dom_ifdetach = NULL,
137 .dom_ifqueues = { NULL },
138 .dom_link = { NULL },
139 .dom_mowner = MOWNER_INIT("",""),
140 .dom_rtcache = { NULL },
141 .dom_sockaddr_cmp = NULL
142 };
143 struct domain sockin6domain = {
144 .dom_family = PF_INET6,
145 .dom_name = "socket_inet6",
146 .dom_init = sockin_init,
147 .dom_externalize = NULL,
148 .dom_dispose = NULL,
149 .dom_protosw = sockin6sw,
150 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
151 .dom_rtattach = rt_inithead,
152 .dom_rtoffset = 32,
153 .dom_maxrtkey = sizeof(struct sockaddr_in6),
154 .dom_ifattach = NULL,
155 .dom_ifdetach = NULL,
156 .dom_ifqueues = { NULL },
157 .dom_link = { NULL },
158 .dom_mowner = MOWNER_INIT("",""),
159 .dom_rtcache = { NULL },
160 .dom_sockaddr_cmp = NULL
161 };
162
163 #define SO2S(so) ((intptr_t)(so->so_internal))
164 #define SOCKIN_SBSIZE 65536
165
166 struct sockin_unit {
167 struct socket *su_so;
168
169 LIST_ENTRY(sockin_unit) su_entries;
170 };
171 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
172 static kmutex_t su_mtx;
173 static bool rebuild;
174 static int nsock;
175
176 /* XXX: for the bpf hack */
177 static struct ifnet sockin_if;
178 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
179
180 static int
181 registersock(struct socket *so, int news)
182 {
183 struct sockin_unit *su;
184
185 su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
186 if (!su)
187 return ENOMEM;
188
189 so->so_internal = (void *)(intptr_t)news;
190 su->su_so = so;
191
192 mutex_enter(&su_mtx);
193 LIST_INSERT_HEAD(&su_ent, su, su_entries);
194 nsock++;
195 rebuild = true;
196 mutex_exit(&su_mtx);
197
198 return 0;
199 }
200
201 static void
202 removesock(struct socket *so)
203 {
204 struct sockin_unit *su_iter;
205
206 mutex_enter(&su_mtx);
207 LIST_FOREACH(su_iter, &su_ent, su_entries) {
208 if (su_iter->su_so == so)
209 break;
210 }
211 if (!su_iter)
212 panic("no such socket");
213
214 LIST_REMOVE(su_iter, su_entries);
215 nsock--;
216 rebuild = true;
217 mutex_exit(&su_mtx);
218
219 rumpuser_close(SO2S(su_iter->su_so));
220 kmem_free(su_iter, sizeof(*su_iter));
221 }
222
223 static void
224 sockin_process(struct socket *so)
225 {
226 struct sockaddr_in6 from;
227 struct iovec io;
228 struct msghdr rmsg;
229 struct mbuf *m;
230 size_t n, plen;
231 int error;
232
233 m = m_gethdr(M_WAIT, MT_DATA);
234 if (so->so_proto->pr_type == SOCK_DGRAM) {
235 plen = IP_MAXPACKET;
236 MEXTMALLOC(m, plen, M_DONTWAIT);
237 } else {
238 plen = MCLBYTES;
239 MCLGET(m, M_DONTWAIT);
240 }
241 if ((m->m_flags & M_EXT) == 0) {
242 m_freem(m);
243 return;
244 }
245
246 memset(&rmsg, 0, sizeof(rmsg));
247 io.iov_base = mtod(m, void *);
248 io.iov_len = plen;
249 rmsg.msg_iov = &io;
250 rmsg.msg_iovlen = 1;
251 rmsg.msg_name = (struct sockaddr *)&from;
252 rmsg.msg_namelen = sizeof(from);
253
254 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
255 if (error || n == 0) {
256 m_freem(m);
257
258 /* Treat a TCP socket a goner */
259 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
260 mutex_enter(softnet_lock);
261 soisdisconnected(so);
262 mutex_exit(softnet_lock);
263 removesock(so);
264 }
265 return;
266 }
267 m->m_len = m->m_pkthdr.len = n;
268
269 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
270
271 mutex_enter(softnet_lock);
272 if (so->so_proto->pr_type == SOCK_DGRAM) {
273 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
274 m_freem(m);
275 }
276 } else {
277 sbappendstream(&so->so_rcv, m);
278 }
279
280 sorwakeup(so);
281 mutex_exit(softnet_lock);
282 }
283
284 static void
285 sockin_accept(struct socket *so)
286 {
287 struct socket *nso;
288 struct sockaddr_in6 sin;
289 int news, error, slen;
290
291 slen = sizeof(sin);
292 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
293 &slen, &news);
294 if (error)
295 return;
296
297 mutex_enter(softnet_lock);
298 nso = sonewconn(so, true);
299 if (nso == NULL)
300 goto errout;
301 if (registersock(nso, news) != 0)
302 goto errout;
303 mutex_exit(softnet_lock);
304 return;
305
306 errout:
307 rumpuser_close(news);
308 if (nso)
309 soclose(nso);
310 mutex_exit(softnet_lock);
311 }
312
313 #define POLLTIMEOUT 100 /* check for new entries every 100ms */
314
315 /* XXX: doesn't handle socket (kernel) locking properly? */
316 static void
317 sockinworker(void *arg)
318 {
319 struct pollfd *pfds = NULL, *npfds;
320 struct sockin_unit *su_iter;
321 struct socket *so;
322 int cursock = 0, i, rv, error;
323
324 /*
325 * Loop reading requests. Check for new sockets periodically
326 * (could be smarter, but I'm lazy).
327 */
328 for (;;) {
329 if (rebuild) {
330 npfds = NULL;
331 mutex_enter(&su_mtx);
332 if (nsock)
333 npfds = kmem_alloc(nsock * sizeof(*npfds),
334 KM_NOSLEEP);
335 if (npfds || nsock == 0) {
336 if (pfds)
337 kmem_free(pfds, cursock*sizeof(*pfds));
338 pfds = npfds;
339 cursock = nsock;
340 rebuild = false;
341
342 i = 0;
343 LIST_FOREACH(su_iter, &su_ent, su_entries) {
344 pfds[i].fd = SO2S(su_iter->su_so);
345 pfds[i].events = POLLIN;
346 pfds[i].revents = 0;
347 i++;
348 }
349 KASSERT(i == nsock);
350 }
351 mutex_exit(&su_mtx);
352 }
353
354 /* find affected sockets & process */
355 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
356 for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
357 if (pfds[i].revents & POLLIN) {
358 mutex_enter(&su_mtx);
359 LIST_FOREACH(su_iter, &su_ent, su_entries) {
360 if (SO2S(su_iter->su_so)==pfds[i].fd) {
361 so = su_iter->su_so;
362 mutex_exit(&su_mtx);
363 if(so->so_options&SO_ACCEPTCONN)
364 sockin_accept(so);
365 else
366 sockin_process(so);
367 mutex_enter(&su_mtx);
368 break;
369 }
370 }
371 /* if we can't find it, just wing it */
372 KASSERT(rebuild || su_iter);
373 mutex_exit(&su_mtx);
374 pfds[i].revents = 0;
375 rv--;
376 i = -1;
377 continue;
378 }
379
380 /* something else? ignore */
381 if (pfds[i].revents) {
382 pfds[i].revents = 0;
383 rv--;
384 }
385 }
386 KASSERT(rv <= 0);
387 }
388
389 }
390
391 static int
392 sockin_do_init(void)
393 {
394 int rv;
395
396 if (rump_threads) {
397 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
398 NULL, NULL, "sockwork")) != 0)
399 panic("sockin_init: could not create worker thread\n");
400 } else {
401 printf("sockin_init: no threads => no worker thread\n");
402 }
403 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
404 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
405 bpf_attach(&sockin_if, DLT_NULL, 0);
406 return 0;
407 }
408
409 static void
410 sockin_init(void)
411 {
412 static ONCE_DECL(init);
413
414 RUN_ONCE(&init, sockin_do_init);
415 }
416
417 static int
418 sockin_attach(struct socket *so, int proto)
419 {
420 const int type = so->so_proto->pr_type;
421 int error, news, family;
422
423 sosetlock(so);
424 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
425 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
426 if (error)
427 return error;
428 }
429
430 family = so->so_proto->pr_domain->dom_family;
431 KASSERT(family == PF_INET || family == PF_INET6);
432 error = rumpcomp_sockin_socket(family, type, 0, &news);
433 if (error)
434 return error;
435
436 /* For UDP sockets, make sure we can send/recv maximum. */
437 if (type == SOCK_DGRAM) {
438 int sbsize = SOCKIN_SBSIZE;
439 error = rumpcomp_sockin_setsockopt(news,
440 SOL_SOCKET, SO_SNDBUF,
441 &sbsize, sizeof(sbsize));
442 sbsize = SOCKIN_SBSIZE;
443 error = rumpcomp_sockin_setsockopt(news,
444 SOL_SOCKET, SO_RCVBUF,
445 &sbsize, sizeof(sbsize));
446 }
447
448 if ((error = registersock(so, news)) != 0)
449 rumpuser_close(news);
450
451 return error;
452 }
453
454 static void
455 sockin_detach(struct socket *so)
456 {
457 panic("sockin_detach: IMPLEMENT ME\n");
458 }
459
460 static int
461 sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
462 {
463 return ENOTTY;
464 }
465
466 static int
467 sockin_stat(struct socket *so, struct stat *ub)
468 {
469 KASSERT(solocked(so));
470
471 return 0;
472 }
473
474 static int
475 sockin_peeraddr(struct socket *so, struct mbuf *nam)
476 {
477 KASSERT(solocked(so));
478
479 int error = 0;
480 int slen = nam->m_len;
481
482 error = rumpcomp_sockin_getname(SO2S(so),
483 mtod(nam, struct sockaddr *), &slen, RUMPCOMP_SOCKIN_PEERNAME);
484 if (error == 0)
485 nam->m_len = slen;
486 return error;
487 }
488
489 static int
490 sockin_sockaddr(struct socket *so, struct mbuf *nam)
491 {
492 KASSERT(solocked(so));
493
494 int error = 0;
495 int slen = nam->m_len;
496
497 error = rumpcomp_sockin_getname(SO2S(so),
498 mtod(nam, struct sockaddr *), &slen, RUMPCOMP_SOCKIN_SOCKNAME);
499 if (error == 0)
500 nam->m_len = slen;
501 return error;
502 }
503
504 static int
505 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
506 struct mbuf *control, struct lwp *l)
507 {
508 int error = 0;
509
510 KASSERT(req != PRU_CONTROL);
511 KASSERT(req != PRU_SENSE);
512 KASSERT(req != PRU_PEERADDR);
513 KASSERT(req != PRU_SOCKADDR);
514
515 switch (req) {
516 case PRU_ACCEPT:
517 /* we do all the work in the worker thread */
518 break;
519
520 case PRU_BIND:
521 error = rumpcomp_sockin_bind(SO2S(so),
522 mtod(nam, const struct sockaddr *),
523 nam->m_len);
524 break;
525
526 case PRU_CONNECT:
527 error = rumpcomp_sockin_connect(SO2S(so),
528 mtod(nam, struct sockaddr *), nam->m_len);
529 if (error == 0)
530 soisconnected(so);
531 break;
532
533 case PRU_LISTEN:
534 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
535 break;
536
537 case PRU_SEND:
538 {
539 struct sockaddr *saddr;
540 struct msghdr mhdr;
541 size_t iov_max, i;
542 struct iovec iov_buf[32], *iov;
543 struct mbuf *m2;
544 size_t tot, n;
545 int s;
546
547 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
548
549 memset(&mhdr, 0, sizeof(mhdr));
550
551 iov_max = 0;
552 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
553 iov_max++;
554 }
555
556 if (iov_max <= __arraycount(iov_buf)) {
557 iov = iov_buf;
558 } else {
559 iov = kmem_alloc(sizeof(struct iovec) * iov_max,
560 KM_SLEEP);
561 }
562
563 tot = 0;
564 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
565 iov[i].iov_base = m2->m_data;
566 iov[i].iov_len = m2->m_len;
567 tot += m2->m_len;
568 }
569 mhdr.msg_iov = iov;
570 mhdr.msg_iovlen = i;
571 s = SO2S(so);
572
573 if (nam != NULL) {
574 saddr = mtod(nam, struct sockaddr *);
575 mhdr.msg_name = saddr;
576 mhdr.msg_namelen = saddr->sa_len;
577 }
578
579 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
580
581 if (iov != iov_buf)
582 kmem_free(iov, sizeof(struct iovec) * iov_max);
583
584 m_freem(m);
585 m_freem(control);
586
587 /* this assumes too many things to list.. buthey, testing */
588 if (!rump_threads)
589 sockin_process(so);
590 }
591 break;
592
593 case PRU_SHUTDOWN:
594 removesock(so);
595 break;
596
597 default:
598 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
599 }
600
601 return error;
602 }
603
604 static int
605 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
606 {
607
608 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
609 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
610 }
611
612 int sockin_unavailable(void);
613 int
614 sockin_unavailable(void)
615 {
616
617 panic("interface not available in with sockin");
618 }
619 __strong_alias(rtrequest,sockin_unavailable);
620 __strong_alias(ifunit,sockin_unavailable);
621 __strong_alias(ifreq_setaddr,sockin_unavailable);
622