sockin.c revision 1.46 1 /* $NetBSD: sockin.c,v 1.46 2014/07/07 17:13:57 rtr Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.46 2014/07/07 17:13:57 rtr Exp $");
30
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53
54 #include <rump/rumpuser.h>
55
56 #include "rump_private.h"
57 #include "sockin_user.h"
58
59 /*
60 * An inet communication domain which uses the socket interface.
61 * Supports IPv4 & IPv6 UDP/TCP.
62 */
63
64 DOMAIN_DEFINE(sockindomain);
65 DOMAIN_DEFINE(sockin6domain);
66
67 static int sockin_do_init(void);
68 static void sockin_init(void);
69 static int sockin_attach(struct socket *, int);
70 static void sockin_detach(struct socket *);
71 static int sockin_ioctl(struct socket *, u_long, void *, struct ifnet *);
72 static int sockin_stat(struct socket *, struct stat *);
73 static int sockin_usrreq(struct socket *, int, struct mbuf *,
74 struct mbuf *, struct mbuf *, struct lwp *);
75 static int sockin_ctloutput(int op, struct socket *, struct sockopt *);
76
77 static const struct pr_usrreqs sockin_usrreqs = {
78 .pr_attach = sockin_attach,
79 .pr_detach = sockin_detach,
80 .pr_ioctl = sockin_ioctl,
81 .pr_stat = sockin_stat,
82 .pr_generic = sockin_usrreq,
83 };
84
85 const struct protosw sockinsw[] = {
86 {
87 .pr_type = SOCK_DGRAM,
88 .pr_domain = &sockindomain,
89 .pr_protocol = IPPROTO_UDP,
90 .pr_flags = PR_ATOMIC|PR_ADDR,
91 .pr_usrreqs = &sockin_usrreqs,
92 .pr_ctloutput = sockin_ctloutput,
93 },
94 {
95 .pr_type = SOCK_STREAM,
96 .pr_domain = &sockindomain,
97 .pr_protocol = IPPROTO_TCP,
98 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
99 .pr_usrreqs = &sockin_usrreqs,
100 .pr_ctloutput = sockin_ctloutput,
101 }};
102 const struct protosw sockin6sw[] = {
103 {
104 .pr_type = SOCK_DGRAM,
105 .pr_domain = &sockin6domain,
106 .pr_protocol = IPPROTO_UDP,
107 .pr_flags = PR_ATOMIC|PR_ADDR,
108 .pr_usrreqs = &sockin_usrreqs,
109 .pr_ctloutput = sockin_ctloutput,
110 },
111 {
112 .pr_type = SOCK_STREAM,
113 .pr_domain = &sockin6domain,
114 .pr_protocol = IPPROTO_TCP,
115 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
116 .pr_usrreqs = &sockin_usrreqs,
117 .pr_ctloutput = sockin_ctloutput,
118 }};
119
120 struct domain sockindomain = {
121 .dom_family = PF_INET,
122 .dom_name = "socket_inet",
123 .dom_init = sockin_init,
124 .dom_externalize = NULL,
125 .dom_dispose = NULL,
126 .dom_protosw = sockinsw,
127 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
128 .dom_rtattach = rt_inithead,
129 .dom_rtoffset = 32,
130 .dom_maxrtkey = sizeof(struct sockaddr_in),
131 .dom_ifattach = NULL,
132 .dom_ifdetach = NULL,
133 .dom_ifqueues = { NULL },
134 .dom_link = { NULL },
135 .dom_mowner = MOWNER_INIT("",""),
136 .dom_rtcache = { NULL },
137 .dom_sockaddr_cmp = NULL
138 };
139 struct domain sockin6domain = {
140 .dom_family = PF_INET6,
141 .dom_name = "socket_inet6",
142 .dom_init = sockin_init,
143 .dom_externalize = NULL,
144 .dom_dispose = NULL,
145 .dom_protosw = sockin6sw,
146 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
147 .dom_rtattach = rt_inithead,
148 .dom_rtoffset = 32,
149 .dom_maxrtkey = sizeof(struct sockaddr_in6),
150 .dom_ifattach = NULL,
151 .dom_ifdetach = NULL,
152 .dom_ifqueues = { NULL },
153 .dom_link = { NULL },
154 .dom_mowner = MOWNER_INIT("",""),
155 .dom_rtcache = { NULL },
156 .dom_sockaddr_cmp = NULL
157 };
158
159 #define SO2S(so) ((intptr_t)(so->so_internal))
160 #define SOCKIN_SBSIZE 65536
161
162 struct sockin_unit {
163 struct socket *su_so;
164
165 LIST_ENTRY(sockin_unit) su_entries;
166 };
167 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
168 static kmutex_t su_mtx;
169 static bool rebuild;
170 static int nsock;
171
172 /* XXX: for the bpf hack */
173 static struct ifnet sockin_if;
174 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
175
176 static int
177 registersock(struct socket *so, int news)
178 {
179 struct sockin_unit *su;
180
181 su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
182 if (!su)
183 return ENOMEM;
184
185 so->so_internal = (void *)(intptr_t)news;
186 su->su_so = so;
187
188 mutex_enter(&su_mtx);
189 LIST_INSERT_HEAD(&su_ent, su, su_entries);
190 nsock++;
191 rebuild = true;
192 mutex_exit(&su_mtx);
193
194 return 0;
195 }
196
197 static void
198 removesock(struct socket *so)
199 {
200 struct sockin_unit *su_iter;
201
202 mutex_enter(&su_mtx);
203 LIST_FOREACH(su_iter, &su_ent, su_entries) {
204 if (su_iter->su_so == so)
205 break;
206 }
207 if (!su_iter)
208 panic("no such socket");
209
210 LIST_REMOVE(su_iter, su_entries);
211 nsock--;
212 rebuild = true;
213 mutex_exit(&su_mtx);
214
215 rumpuser_close(SO2S(su_iter->su_so));
216 kmem_free(su_iter, sizeof(*su_iter));
217 }
218
219 static void
220 sockin_process(struct socket *so)
221 {
222 struct sockaddr_in6 from;
223 struct iovec io;
224 struct msghdr rmsg;
225 struct mbuf *m;
226 size_t n, plen;
227 int error;
228
229 m = m_gethdr(M_WAIT, MT_DATA);
230 if (so->so_proto->pr_type == SOCK_DGRAM) {
231 plen = IP_MAXPACKET;
232 MEXTMALLOC(m, plen, M_DONTWAIT);
233 } else {
234 plen = MCLBYTES;
235 MCLGET(m, M_DONTWAIT);
236 }
237 if ((m->m_flags & M_EXT) == 0) {
238 m_freem(m);
239 return;
240 }
241
242 memset(&rmsg, 0, sizeof(rmsg));
243 io.iov_base = mtod(m, void *);
244 io.iov_len = plen;
245 rmsg.msg_iov = &io;
246 rmsg.msg_iovlen = 1;
247 rmsg.msg_name = (struct sockaddr *)&from;
248 rmsg.msg_namelen = sizeof(from);
249
250 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
251 if (error || n == 0) {
252 m_freem(m);
253
254 /* Treat a TCP socket a goner */
255 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
256 mutex_enter(softnet_lock);
257 soisdisconnected(so);
258 mutex_exit(softnet_lock);
259 removesock(so);
260 }
261 return;
262 }
263 m->m_len = m->m_pkthdr.len = n;
264
265 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
266
267 mutex_enter(softnet_lock);
268 if (so->so_proto->pr_type == SOCK_DGRAM) {
269 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
270 m_freem(m);
271 }
272 } else {
273 sbappendstream(&so->so_rcv, m);
274 }
275
276 sorwakeup(so);
277 mutex_exit(softnet_lock);
278 }
279
280 static void
281 sockin_accept(struct socket *so)
282 {
283 struct socket *nso;
284 struct sockaddr_in6 sin;
285 int news, error, slen;
286
287 slen = sizeof(sin);
288 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
289 &slen, &news);
290 if (error)
291 return;
292
293 mutex_enter(softnet_lock);
294 nso = sonewconn(so, true);
295 if (nso == NULL)
296 goto errout;
297 if (registersock(nso, news) != 0)
298 goto errout;
299 mutex_exit(softnet_lock);
300 return;
301
302 errout:
303 rumpuser_close(news);
304 if (nso)
305 soclose(nso);
306 mutex_exit(softnet_lock);
307 }
308
309 #define POLLTIMEOUT 100 /* check for new entries every 100ms */
310
311 /* XXX: doesn't handle socket (kernel) locking properly? */
312 static void
313 sockinworker(void *arg)
314 {
315 struct pollfd *pfds = NULL, *npfds;
316 struct sockin_unit *su_iter;
317 struct socket *so;
318 int cursock = 0, i, rv, error;
319
320 /*
321 * Loop reading requests. Check for new sockets periodically
322 * (could be smarter, but I'm lazy).
323 */
324 for (;;) {
325 if (rebuild) {
326 npfds = NULL;
327 mutex_enter(&su_mtx);
328 if (nsock)
329 npfds = kmem_alloc(nsock * sizeof(*npfds),
330 KM_NOSLEEP);
331 if (npfds || nsock == 0) {
332 if (pfds)
333 kmem_free(pfds, cursock*sizeof(*pfds));
334 pfds = npfds;
335 cursock = nsock;
336 rebuild = false;
337
338 i = 0;
339 LIST_FOREACH(su_iter, &su_ent, su_entries) {
340 pfds[i].fd = SO2S(su_iter->su_so);
341 pfds[i].events = POLLIN;
342 pfds[i].revents = 0;
343 i++;
344 }
345 KASSERT(i == nsock);
346 }
347 mutex_exit(&su_mtx);
348 }
349
350 /* find affected sockets & process */
351 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
352 for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
353 if (pfds[i].revents & POLLIN) {
354 mutex_enter(&su_mtx);
355 LIST_FOREACH(su_iter, &su_ent, su_entries) {
356 if (SO2S(su_iter->su_so)==pfds[i].fd) {
357 so = su_iter->su_so;
358 mutex_exit(&su_mtx);
359 if(so->so_options&SO_ACCEPTCONN)
360 sockin_accept(so);
361 else
362 sockin_process(so);
363 mutex_enter(&su_mtx);
364 break;
365 }
366 }
367 /* if we can't find it, just wing it */
368 KASSERT(rebuild || su_iter);
369 mutex_exit(&su_mtx);
370 pfds[i].revents = 0;
371 rv--;
372 i = -1;
373 continue;
374 }
375
376 /* something else? ignore */
377 if (pfds[i].revents) {
378 pfds[i].revents = 0;
379 rv--;
380 }
381 }
382 KASSERT(rv <= 0);
383 }
384
385 }
386
387 static int
388 sockin_do_init(void)
389 {
390 int rv;
391
392 if (rump_threads) {
393 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
394 NULL, NULL, "sockwork")) != 0)
395 panic("sockin_init: could not create worker thread\n");
396 } else {
397 printf("sockin_init: no threads => no worker thread\n");
398 }
399 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
400 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
401 bpf_attach(&sockin_if, DLT_NULL, 0);
402 return 0;
403 }
404
405 static void
406 sockin_init(void)
407 {
408 static ONCE_DECL(init);
409
410 RUN_ONCE(&init, sockin_do_init);
411 }
412
413 static int
414 sockin_attach(struct socket *so, int proto)
415 {
416 const int type = so->so_proto->pr_type;
417 int error, news, family;
418
419 sosetlock(so);
420 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
421 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
422 if (error)
423 return error;
424 }
425
426 family = so->so_proto->pr_domain->dom_family;
427 KASSERT(family == PF_INET || family == PF_INET6);
428 error = rumpcomp_sockin_socket(family, type, 0, &news);
429 if (error)
430 return error;
431
432 /* For UDP sockets, make sure we can send/recv maximum. */
433 if (type == SOCK_DGRAM) {
434 int sbsize = SOCKIN_SBSIZE;
435 error = rumpcomp_sockin_setsockopt(news,
436 SOL_SOCKET, SO_SNDBUF,
437 &sbsize, sizeof(sbsize));
438 sbsize = SOCKIN_SBSIZE;
439 error = rumpcomp_sockin_setsockopt(news,
440 SOL_SOCKET, SO_RCVBUF,
441 &sbsize, sizeof(sbsize));
442 }
443
444 if ((error = registersock(so, news)) != 0)
445 rumpuser_close(news);
446
447 return error;
448 }
449
450 static void
451 sockin_detach(struct socket *so)
452 {
453 panic("sockin_detach: IMPLEMENT ME\n");
454 }
455
456 static int
457 sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
458 {
459 return ENOTTY;
460 }
461
462 static int
463 sockin_stat(struct socket *so, struct stat *ub)
464 {
465 KASSERT(solocked(so));
466
467 return 0;
468 }
469
470 static int
471 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
472 struct mbuf *control, struct lwp *l)
473 {
474 int error = 0;
475
476 KASSERT(req != PRU_CONTROL);
477 KASSERT(req != PRU_SENSE);
478
479 switch (req) {
480 case PRU_ACCEPT:
481 /* we do all the work in the worker thread */
482 break;
483
484 case PRU_BIND:
485 error = rumpcomp_sockin_bind(SO2S(so),
486 mtod(nam, const struct sockaddr *),
487 nam->m_len);
488 break;
489
490 case PRU_CONNECT:
491 error = rumpcomp_sockin_connect(SO2S(so),
492 mtod(nam, struct sockaddr *), nam->m_len);
493 if (error == 0)
494 soisconnected(so);
495 break;
496
497 case PRU_LISTEN:
498 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
499 break;
500
501 case PRU_SEND:
502 {
503 struct sockaddr *saddr;
504 struct msghdr mhdr;
505 size_t iov_max, i;
506 struct iovec iov_buf[32], *iov;
507 struct mbuf *m2;
508 size_t tot, n;
509 int s;
510
511 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
512
513 memset(&mhdr, 0, sizeof(mhdr));
514
515 iov_max = 0;
516 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
517 iov_max++;
518 }
519
520 if (iov_max <= __arraycount(iov_buf)) {
521 iov = iov_buf;
522 } else {
523 iov = kmem_alloc(sizeof(struct iovec) * iov_max,
524 KM_SLEEP);
525 }
526
527 tot = 0;
528 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
529 iov[i].iov_base = m2->m_data;
530 iov[i].iov_len = m2->m_len;
531 tot += m2->m_len;
532 }
533 mhdr.msg_iov = iov;
534 mhdr.msg_iovlen = i;
535 s = SO2S(so);
536
537 if (nam != NULL) {
538 saddr = mtod(nam, struct sockaddr *);
539 mhdr.msg_name = saddr;
540 mhdr.msg_namelen = saddr->sa_len;
541 }
542
543 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
544
545 if (iov != iov_buf)
546 kmem_free(iov, sizeof(struct iovec) * iov_max);
547
548 m_freem(m);
549 m_freem(control);
550
551 /* this assumes too many things to list.. buthey, testing */
552 if (!rump_threads)
553 sockin_process(so);
554 }
555 break;
556
557 case PRU_SHUTDOWN:
558 removesock(so);
559 break;
560
561 case PRU_SOCKADDR:
562 case PRU_PEERADDR:
563 {
564 int slen = nam->m_len;
565 enum rumpcomp_sockin_getnametype which;
566
567 if (req == PRU_SOCKADDR)
568 which = RUMPCOMP_SOCKIN_SOCKNAME;
569 else
570 which = RUMPCOMP_SOCKIN_PEERNAME;
571 error = rumpcomp_sockin_getname(SO2S(so),
572 mtod(nam, struct sockaddr *), &slen, which);
573 if (error == 0)
574 nam->m_len = slen;
575 break;
576 }
577
578 default:
579 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
580 }
581
582 return error;
583 }
584
585 static int
586 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
587 {
588
589 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
590 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
591 }
592
593 int sockin_unavailable(void);
594 int
595 sockin_unavailable(void)
596 {
597
598 panic("interface not available in with sockin");
599 }
600 __strong_alias(rtrequest,sockin_unavailable);
601 __strong_alias(ifunit,sockin_unavailable);
602 __strong_alias(ifreq_setaddr,sockin_unavailable);
603