sockin.c revision 1.39 1 /* $NetBSD: sockin.c,v 1.39 2014/05/18 17:57:44 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.39 2014/05/18 17:57:44 rmind Exp $");
30
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53
54 #include <rump/rumpuser.h>
55
56 #include "rump_private.h"
57 #include "sockin_user.h"
58
59 /*
60 * An inet communication domain which uses the socket interface.
61 * Supports IPv4 & IPv6 UDP/TCP.
62 */
63
64 DOMAIN_DEFINE(sockindomain);
65 DOMAIN_DEFINE(sockin6domain);
66
67 static int sockin_do_init(void);
68 static void sockin_init(void);
69 static int sockin_usrreq(struct socket *, int, struct mbuf *,
70 struct mbuf *, struct mbuf *, struct lwp *);
71 static int sockin_ctloutput(int op, struct socket *, struct sockopt *);
72
73 static const struct pr_usrreqs sockin_usrreqs = {
74 .pr_generic = sockin_usrreq,
75 };
76
77 const struct protosw sockinsw[] = {
78 {
79 .pr_type = SOCK_DGRAM,
80 .pr_domain = &sockindomain,
81 .pr_protocol = IPPROTO_UDP,
82 .pr_flags = PR_ATOMIC|PR_ADDR,
83 .pr_usrreqs = &sockin_usrreqs,
84 .pr_ctloutput = sockin_ctloutput,
85 },
86 {
87 .pr_type = SOCK_STREAM,
88 .pr_domain = &sockindomain,
89 .pr_protocol = IPPROTO_TCP,
90 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
91 .pr_usrreqs = &sockin_usrreqs,
92 .pr_ctloutput = sockin_ctloutput,
93 }};
94 const struct protosw sockin6sw[] = {
95 {
96 .pr_type = SOCK_DGRAM,
97 .pr_domain = &sockin6domain,
98 .pr_protocol = IPPROTO_UDP,
99 .pr_flags = PR_ATOMIC|PR_ADDR,
100 .pr_usrreqs = &sockin_usrreqs,
101 .pr_ctloutput = sockin_ctloutput,
102 },
103 {
104 .pr_type = SOCK_STREAM,
105 .pr_domain = &sockin6domain,
106 .pr_protocol = IPPROTO_TCP,
107 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
108 .pr_usrreqs = &sockin_usrreqs,
109 .pr_ctloutput = sockin_ctloutput,
110 }};
111
112 struct domain sockindomain = {
113 .dom_family = PF_INET,
114 .dom_name = "socket_inet",
115 .dom_init = sockin_init,
116 .dom_externalize = NULL,
117 .dom_dispose = NULL,
118 .dom_protosw = sockinsw,
119 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
120 .dom_rtattach = rt_inithead,
121 .dom_rtoffset = 32,
122 .dom_maxrtkey = sizeof(struct sockaddr_in),
123 .dom_ifattach = NULL,
124 .dom_ifdetach = NULL,
125 .dom_ifqueues = { NULL },
126 .dom_link = { NULL },
127 .dom_mowner = MOWNER_INIT("",""),
128 .dom_rtcache = { NULL },
129 .dom_sockaddr_cmp = NULL
130 };
131 struct domain sockin6domain = {
132 .dom_family = PF_INET6,
133 .dom_name = "socket_inet6",
134 .dom_init = sockin_init,
135 .dom_externalize = NULL,
136 .dom_dispose = NULL,
137 .dom_protosw = sockin6sw,
138 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
139 .dom_rtattach = rt_inithead,
140 .dom_rtoffset = 32,
141 .dom_maxrtkey = sizeof(struct sockaddr_in6),
142 .dom_ifattach = NULL,
143 .dom_ifdetach = NULL,
144 .dom_ifqueues = { NULL },
145 .dom_link = { NULL },
146 .dom_mowner = MOWNER_INIT("",""),
147 .dom_rtcache = { NULL },
148 .dom_sockaddr_cmp = NULL
149 };
150
151 #define SO2S(so) ((intptr_t)(so->so_internal))
152 #define SOCKIN_SBSIZE 65536
153
154 struct sockin_unit {
155 struct socket *su_so;
156
157 LIST_ENTRY(sockin_unit) su_entries;
158 };
159 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
160 static kmutex_t su_mtx;
161 static bool rebuild;
162 static int nsock;
163
164 /* XXX: for the bpf hack */
165 static struct ifnet sockin_if;
166 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
167
168 static int
169 registersock(struct socket *so, int news)
170 {
171 struct sockin_unit *su;
172
173 su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
174 if (!su)
175 return ENOMEM;
176
177 so->so_internal = (void *)(intptr_t)news;
178 su->su_so = so;
179
180 mutex_enter(&su_mtx);
181 LIST_INSERT_HEAD(&su_ent, su, su_entries);
182 nsock++;
183 rebuild = true;
184 mutex_exit(&su_mtx);
185
186 return 0;
187 }
188
189 static void
190 removesock(struct socket *so)
191 {
192 struct sockin_unit *su_iter;
193
194 mutex_enter(&su_mtx);
195 LIST_FOREACH(su_iter, &su_ent, su_entries) {
196 if (su_iter->su_so == so)
197 break;
198 }
199 if (!su_iter)
200 panic("no such socket");
201
202 LIST_REMOVE(su_iter, su_entries);
203 nsock--;
204 rebuild = true;
205 mutex_exit(&su_mtx);
206
207 rumpuser_close(SO2S(su_iter->su_so));
208 kmem_free(su_iter, sizeof(*su_iter));
209 }
210
211 static void
212 sockin_process(struct socket *so)
213 {
214 struct sockaddr_in6 from;
215 struct iovec io;
216 struct msghdr rmsg;
217 struct mbuf *m;
218 size_t n, plen;
219 int error;
220
221 m = m_gethdr(M_WAIT, MT_DATA);
222 if (so->so_proto->pr_type == SOCK_DGRAM) {
223 plen = IP_MAXPACKET;
224 MEXTMALLOC(m, plen, M_DONTWAIT);
225 } else {
226 plen = MCLBYTES;
227 MCLGET(m, M_DONTWAIT);
228 }
229 if ((m->m_flags & M_EXT) == 0) {
230 m_freem(m);
231 return;
232 }
233
234 memset(&rmsg, 0, sizeof(rmsg));
235 io.iov_base = mtod(m, void *);
236 io.iov_len = plen;
237 rmsg.msg_iov = &io;
238 rmsg.msg_iovlen = 1;
239 rmsg.msg_name = (struct sockaddr *)&from;
240 rmsg.msg_namelen = sizeof(from);
241
242 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
243 if (error || n == 0) {
244 m_freem(m);
245
246 /* Treat a TCP socket a goner */
247 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
248 mutex_enter(softnet_lock);
249 soisdisconnected(so);
250 mutex_exit(softnet_lock);
251 removesock(so);
252 }
253 return;
254 }
255 m->m_len = m->m_pkthdr.len = n;
256
257 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
258
259 mutex_enter(softnet_lock);
260 if (so->so_proto->pr_type == SOCK_DGRAM) {
261 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
262 m_freem(m);
263 }
264 } else {
265 sbappendstream(&so->so_rcv, m);
266 }
267
268 sorwakeup(so);
269 mutex_exit(softnet_lock);
270 }
271
272 static void
273 sockin_accept(struct socket *so)
274 {
275 struct socket *nso;
276 struct sockaddr_in6 sin;
277 int news, error, slen;
278
279 slen = sizeof(sin);
280 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
281 &slen, &news);
282 if (error)
283 return;
284
285 mutex_enter(softnet_lock);
286 nso = sonewconn(so, true);
287 if (nso == NULL)
288 goto errout;
289 if (registersock(nso, news) != 0)
290 goto errout;
291 mutex_exit(softnet_lock);
292 return;
293
294 errout:
295 rumpuser_close(news);
296 if (nso)
297 soclose(nso);
298 mutex_exit(softnet_lock);
299 }
300
301 #define POLLTIMEOUT 100 /* check for new entries every 100ms */
302
303 /* XXX: doesn't handle socket (kernel) locking properly? */
304 static void
305 sockinworker(void *arg)
306 {
307 struct pollfd *pfds = NULL, *npfds;
308 struct sockin_unit *su_iter;
309 struct socket *so;
310 int cursock = 0, i, rv, error;
311
312 /*
313 * Loop reading requests. Check for new sockets periodically
314 * (could be smarter, but I'm lazy).
315 */
316 for (;;) {
317 if (rebuild) {
318 npfds = NULL;
319 mutex_enter(&su_mtx);
320 if (nsock)
321 npfds = kmem_alloc(nsock * sizeof(*npfds),
322 KM_NOSLEEP);
323 if (npfds || nsock == 0) {
324 if (pfds)
325 kmem_free(pfds, cursock*sizeof(*pfds));
326 pfds = npfds;
327 cursock = nsock;
328 rebuild = false;
329
330 i = 0;
331 LIST_FOREACH(su_iter, &su_ent, su_entries) {
332 pfds[i].fd = SO2S(su_iter->su_so);
333 pfds[i].events = POLLIN;
334 pfds[i].revents = 0;
335 i++;
336 }
337 KASSERT(i == nsock);
338 }
339 mutex_exit(&su_mtx);
340 }
341
342 /* find affected sockets & process */
343 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
344 for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
345 if (pfds[i].revents & POLLIN) {
346 mutex_enter(&su_mtx);
347 LIST_FOREACH(su_iter, &su_ent, su_entries) {
348 if (SO2S(su_iter->su_so)==pfds[i].fd) {
349 so = su_iter->su_so;
350 mutex_exit(&su_mtx);
351 if(so->so_options&SO_ACCEPTCONN)
352 sockin_accept(so);
353 else
354 sockin_process(so);
355 mutex_enter(&su_mtx);
356 break;
357 }
358 }
359 /* if we can't find it, just wing it */
360 KASSERT(rebuild || su_iter);
361 mutex_exit(&su_mtx);
362 pfds[i].revents = 0;
363 rv--;
364 i = -1;
365 continue;
366 }
367
368 /* something else? ignore */
369 if (pfds[i].revents) {
370 pfds[i].revents = 0;
371 rv--;
372 }
373 }
374 KASSERT(rv <= 0);
375 }
376
377 }
378
379 static int
380 sockin_do_init(void)
381 {
382 int rv;
383
384 if (rump_threads) {
385 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
386 NULL, NULL, "sockwork")) != 0)
387 panic("sockin_init: could not create worker thread\n");
388 } else {
389 printf("sockin_init: no threads => no worker thread\n");
390 }
391 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
392 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
393 bpf_attach(&sockin_if, DLT_NULL, 0);
394 return 0;
395 }
396
397 static void
398 sockin_init(void)
399 {
400 static ONCE_DECL(init);
401
402 RUN_ONCE(&init, sockin_do_init);
403 }
404
405 static int
406 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
407 struct mbuf *control, struct lwp *l)
408 {
409 int error = 0;
410
411 switch (req) {
412 case PRU_ATTACH:
413 {
414 int news;
415 int sbsize;
416 int family;
417
418 sosetlock(so);
419 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
420 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
421 if (error)
422 break;
423 }
424
425 family = so->so_proto->pr_domain->dom_family;
426 KASSERT(family == PF_INET || family == PF_INET6);
427 error = rumpcomp_sockin_socket(family,
428 so->so_proto->pr_type, 0, &news);
429 if (error)
430 break;
431
432 /* for UDP sockets, make sure we can send&recv max */
433 if (so->so_proto->pr_type == SOCK_DGRAM) {
434 sbsize = SOCKIN_SBSIZE;
435 error = rumpcomp_sockin_setsockopt(news,
436 SOL_SOCKET, SO_SNDBUF,
437 &sbsize, sizeof(sbsize));
438 sbsize = SOCKIN_SBSIZE;
439 error = rumpcomp_sockin_setsockopt(news,
440 SOL_SOCKET, SO_RCVBUF,
441 &sbsize, sizeof(sbsize));
442 }
443
444 if ((error = registersock(so, news)) != 0)
445 rumpuser_close(news);
446
447 break;
448 }
449
450 case PRU_ACCEPT:
451 /* we do all the work in the worker thread */
452 break;
453
454 case PRU_BIND:
455 error = rumpcomp_sockin_bind(SO2S(so),
456 mtod(nam, const struct sockaddr *),
457 nam->m_len);
458 break;
459
460 case PRU_CONNECT:
461 error = rumpcomp_sockin_connect(SO2S(so),
462 mtod(nam, struct sockaddr *), nam->m_len);
463 if (error == 0)
464 soisconnected(so);
465 break;
466
467 case PRU_LISTEN:
468 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
469 break;
470
471 case PRU_SEND:
472 {
473 struct sockaddr *saddr;
474 struct msghdr mhdr;
475 size_t iov_max, i;
476 struct iovec iov_buf[32], *iov;
477 struct mbuf *m2;
478 size_t tot, n;
479 int s;
480
481 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
482
483 memset(&mhdr, 0, sizeof(mhdr));
484
485 iov_max = 0;
486 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
487 iov_max++;
488 }
489
490 if (iov_max <= __arraycount(iov_buf)) {
491 iov = iov_buf;
492 } else {
493 iov = kmem_alloc(sizeof(struct iovec) * iov_max,
494 KM_SLEEP);
495 }
496
497 tot = 0;
498 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
499 iov[i].iov_base = m2->m_data;
500 iov[i].iov_len = m2->m_len;
501 tot += m2->m_len;
502 }
503 mhdr.msg_iov = iov;
504 mhdr.msg_iovlen = i;
505 s = SO2S(so);
506
507 if (nam != NULL) {
508 saddr = mtod(nam, struct sockaddr *);
509 mhdr.msg_name = saddr;
510 mhdr.msg_namelen = saddr->sa_len;
511 }
512
513 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
514
515 if (iov != iov_buf)
516 kmem_free(iov, sizeof(struct iovec) * iov_max);
517
518 m_freem(m);
519 m_freem(control);
520
521 /* this assumes too many things to list.. buthey, testing */
522 if (!rump_threads)
523 sockin_process(so);
524 }
525 break;
526
527 case PRU_SHUTDOWN:
528 removesock(so);
529 break;
530
531 case PRU_SOCKADDR:
532 case PRU_PEERADDR:
533 {
534 int slen = nam->m_len;
535 enum rumpcomp_sockin_getnametype which;
536
537 if (req == PRU_SOCKADDR)
538 which = RUMPCOMP_SOCKIN_SOCKNAME;
539 else
540 which = RUMPCOMP_SOCKIN_PEERNAME;
541 error = rumpcomp_sockin_getname(SO2S(so),
542 mtod(nam, struct sockaddr *), &slen, which);
543 if (error == 0)
544 nam->m_len = slen;
545 break;
546 }
547
548 case PRU_CONTROL:
549 error = ENOTTY;
550 break;
551
552 default:
553 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
554 }
555
556 return error;
557 }
558
559 static int
560 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
561 {
562
563 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
564 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
565 }
566
567 int sockin_unavailable(void);
568 int
569 sockin_unavailable(void)
570 {
571
572 panic("interface not available in with sockin");
573 }
574 __strong_alias(rtrequest,sockin_unavailable);
575 __strong_alias(ifunit,sockin_unavailable);
576 __strong_alias(ifreq_setaddr,sockin_unavailable);
577