sockin.c revision 1.34.2.1 1 /* $NetBSD: sockin.c,v 1.34.2.1 2014/05/18 17:46:20 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.34.2.1 2014/05/18 17:46:20 rmind Exp $");
30
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53
54 #include <rump/rumpuser.h>
55
56 #include "rump_private.h"
57 #include "sockin_user.h"
58
59 /*
60 * An inet communication domain which uses the socket interface.
61 * Supports IPv4 & IPv6 UDP/TCP.
62 */
63
64 DOMAIN_DEFINE(sockindomain);
65 DOMAIN_DEFINE(sockin6domain);
66
67 static int sockin_do_init(void);
68 static void sockin_init(void);
69 static int sockin_usrreq(struct socket *, int, struct mbuf *,
70 struct mbuf *, struct mbuf *, struct lwp *);
71 static int sockin_ctloutput(int op, struct socket *, struct sockopt *);
72
73 const struct protosw sockinsw[] = {
74 {
75 .pr_type = SOCK_DGRAM,
76 .pr_domain = &sockindomain,
77 .pr_protocol = IPPROTO_UDP,
78 .pr_flags = PR_ATOMIC|PR_ADDR,
79 .pr_usrreq = sockin_usrreq,
80 .pr_ctloutput = sockin_ctloutput,
81 },
82 {
83 .pr_type = SOCK_STREAM,
84 .pr_domain = &sockindomain,
85 .pr_protocol = IPPROTO_TCP,
86 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
87 .pr_usrreq = sockin_usrreq,
88 .pr_ctloutput = sockin_ctloutput,
89 }};
90 const struct protosw sockin6sw[] = {
91 {
92 .pr_type = SOCK_DGRAM,
93 .pr_domain = &sockin6domain,
94 .pr_protocol = IPPROTO_UDP,
95 .pr_flags = PR_ATOMIC|PR_ADDR,
96 .pr_usrreq = sockin_usrreq,
97 .pr_ctloutput = sockin_ctloutput,
98 },
99 {
100 .pr_type = SOCK_STREAM,
101 .pr_domain = &sockin6domain,
102 .pr_protocol = IPPROTO_TCP,
103 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
104 .pr_usrreq = sockin_usrreq,
105 .pr_ctloutput = sockin_ctloutput,
106 }};
107
108 struct domain sockindomain = {
109 .dom_family = PF_INET,
110 .dom_name = "socket_inet",
111 .dom_init = sockin_init,
112 .dom_externalize = NULL,
113 .dom_dispose = NULL,
114 .dom_protosw = sockinsw,
115 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
116 .dom_rtattach = rt_inithead,
117 .dom_rtoffset = 32,
118 .dom_maxrtkey = sizeof(struct sockaddr_in),
119 .dom_ifattach = NULL,
120 .dom_ifdetach = NULL,
121 .dom_ifqueues = { NULL },
122 .dom_link = { NULL },
123 .dom_mowner = MOWNER_INIT("",""),
124 .dom_rtcache = { NULL },
125 .dom_sockaddr_cmp = NULL
126 };
127 struct domain sockin6domain = {
128 .dom_family = PF_INET6,
129 .dom_name = "socket_inet6",
130 .dom_init = sockin_init,
131 .dom_externalize = NULL,
132 .dom_dispose = NULL,
133 .dom_protosw = sockin6sw,
134 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
135 .dom_rtattach = rt_inithead,
136 .dom_rtoffset = 32,
137 .dom_maxrtkey = sizeof(struct sockaddr_in6),
138 .dom_ifattach = NULL,
139 .dom_ifdetach = NULL,
140 .dom_ifqueues = { NULL },
141 .dom_link = { NULL },
142 .dom_mowner = MOWNER_INIT("",""),
143 .dom_rtcache = { NULL },
144 .dom_sockaddr_cmp = NULL
145 };
146
147 #define SO2S(so) ((intptr_t)(so->so_internal))
148 #define SOCKIN_SBSIZE 65536
149
150 struct sockin_unit {
151 struct socket *su_so;
152
153 LIST_ENTRY(sockin_unit) su_entries;
154 };
155 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
156 static kmutex_t su_mtx;
157 static bool rebuild;
158 static int nsock;
159
160 /* XXX: for the bpf hack */
161 static struct ifnet sockin_if;
162 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
163
164 static int
165 registersock(struct socket *so, int news)
166 {
167 struct sockin_unit *su;
168
169 su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
170 if (!su)
171 return ENOMEM;
172
173 so->so_internal = (void *)(intptr_t)news;
174 su->su_so = so;
175
176 mutex_enter(&su_mtx);
177 LIST_INSERT_HEAD(&su_ent, su, su_entries);
178 nsock++;
179 rebuild = true;
180 mutex_exit(&su_mtx);
181
182 return 0;
183 }
184
185 static void
186 removesock(struct socket *so)
187 {
188 struct sockin_unit *su_iter;
189
190 mutex_enter(&su_mtx);
191 LIST_FOREACH(su_iter, &su_ent, su_entries) {
192 if (su_iter->su_so == so)
193 break;
194 }
195 if (!su_iter)
196 panic("no such socket");
197
198 LIST_REMOVE(su_iter, su_entries);
199 nsock--;
200 rebuild = true;
201 mutex_exit(&su_mtx);
202
203 rumpuser_close(SO2S(su_iter->su_so));
204 kmem_free(su_iter, sizeof(*su_iter));
205 }
206
207 static void
208 sockin_process(struct socket *so)
209 {
210 struct sockaddr_in6 from;
211 struct iovec io;
212 struct msghdr rmsg;
213 struct mbuf *m;
214 size_t n, plen;
215 int error;
216
217 m = m_gethdr(M_WAIT, MT_DATA);
218 if (so->so_proto->pr_type == SOCK_DGRAM) {
219 plen = IP_MAXPACKET;
220 MEXTMALLOC(m, plen, M_DONTWAIT);
221 } else {
222 plen = MCLBYTES;
223 MCLGET(m, M_DONTWAIT);
224 }
225 if ((m->m_flags & M_EXT) == 0) {
226 m_freem(m);
227 return;
228 }
229
230 memset(&rmsg, 0, sizeof(rmsg));
231 io.iov_base = mtod(m, void *);
232 io.iov_len = plen;
233 rmsg.msg_iov = &io;
234 rmsg.msg_iovlen = 1;
235 rmsg.msg_name = (struct sockaddr *)&from;
236 rmsg.msg_namelen = sizeof(from);
237
238 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
239 if (error || n == 0) {
240 m_freem(m);
241
242 /* Treat a TCP socket a goner */
243 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
244 mutex_enter(softnet_lock);
245 soisdisconnected(so);
246 mutex_exit(softnet_lock);
247 removesock(so);
248 }
249 return;
250 }
251 m->m_len = m->m_pkthdr.len = n;
252
253 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
254
255 mutex_enter(softnet_lock);
256 if (so->so_proto->pr_type == SOCK_DGRAM) {
257 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
258 m_freem(m);
259 }
260 } else {
261 sbappendstream(&so->so_rcv, m);
262 }
263
264 sorwakeup(so);
265 mutex_exit(softnet_lock);
266 }
267
268 static void
269 sockin_accept(struct socket *so)
270 {
271 struct socket *nso;
272 struct sockaddr_in6 sin;
273 int news, error, slen;
274
275 slen = sizeof(sin);
276 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
277 &slen, &news);
278 if (error)
279 return;
280
281 mutex_enter(softnet_lock);
282 nso = sonewconn(so, true);
283 if (nso == NULL)
284 goto errout;
285 if (registersock(nso, news) != 0)
286 goto errout;
287 mutex_exit(softnet_lock);
288 return;
289
290 errout:
291 rumpuser_close(news);
292 if (nso)
293 soclose(nso);
294 mutex_exit(softnet_lock);
295 }
296
297 #define POLLTIMEOUT 100 /* check for new entries every 100ms */
298
299 /* XXX: doesn't handle socket (kernel) locking properly? */
300 static void
301 sockinworker(void *arg)
302 {
303 struct pollfd *pfds = NULL, *npfds;
304 struct sockin_unit *su_iter;
305 struct socket *so;
306 int cursock = 0, i, rv, error;
307
308 /*
309 * Loop reading requests. Check for new sockets periodically
310 * (could be smarter, but I'm lazy).
311 */
312 for (;;) {
313 if (rebuild) {
314 npfds = NULL;
315 mutex_enter(&su_mtx);
316 if (nsock)
317 npfds = kmem_alloc(nsock * sizeof(*npfds),
318 KM_NOSLEEP);
319 if (npfds || nsock == 0) {
320 if (pfds)
321 kmem_free(pfds, cursock*sizeof(*pfds));
322 pfds = npfds;
323 cursock = nsock;
324 rebuild = false;
325
326 i = 0;
327 LIST_FOREACH(su_iter, &su_ent, su_entries) {
328 pfds[i].fd = SO2S(su_iter->su_so);
329 pfds[i].events = POLLIN;
330 pfds[i].revents = 0;
331 i++;
332 }
333 KASSERT(i == nsock);
334 }
335 mutex_exit(&su_mtx);
336 }
337
338 /* find affected sockets & process */
339 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
340 for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
341 if (pfds[i].revents & POLLIN) {
342 mutex_enter(&su_mtx);
343 LIST_FOREACH(su_iter, &su_ent, su_entries) {
344 if (SO2S(su_iter->su_so)==pfds[i].fd) {
345 so = su_iter->su_so;
346 mutex_exit(&su_mtx);
347 if(so->so_options&SO_ACCEPTCONN)
348 sockin_accept(so);
349 else
350 sockin_process(so);
351 mutex_enter(&su_mtx);
352 break;
353 }
354 }
355 /* if we can't find it, just wing it */
356 KASSERT(rebuild || su_iter);
357 mutex_exit(&su_mtx);
358 pfds[i].revents = 0;
359 rv--;
360 i = -1;
361 continue;
362 }
363
364 /* something else? ignore */
365 if (pfds[i].revents) {
366 pfds[i].revents = 0;
367 rv--;
368 }
369 }
370 KASSERT(rv <= 0);
371 }
372
373 }
374
375 static int
376 sockin_do_init(void)
377 {
378 int rv;
379
380 if (rump_threads) {
381 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
382 NULL, NULL, "sockwork")) != 0)
383 panic("sockin_init: could not create worker thread\n");
384 } else {
385 printf("sockin_init: no threads => no worker thread\n");
386 }
387 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
388 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
389 bpf_attach(&sockin_if, DLT_NULL, 0);
390 return 0;
391 }
392
393 static void
394 sockin_init(void)
395 {
396 static ONCE_DECL(init);
397
398 RUN_ONCE(&init, sockin_do_init);
399 }
400
401 static int
402 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
403 struct mbuf *control, struct lwp *l)
404 {
405 int error = 0;
406
407 switch (req) {
408 case PRU_ATTACH:
409 {
410 int news;
411 int sbsize;
412 int family;
413
414 sosetlock(so);
415 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
416 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
417 if (error)
418 break;
419 }
420
421 family = so->so_proto->pr_domain->dom_family;
422 KASSERT(family == PF_INET || family == PF_INET6);
423 error = rumpcomp_sockin_socket(family,
424 so->so_proto->pr_type, 0, &news);
425 if (error)
426 break;
427
428 /* for UDP sockets, make sure we can send&recv max */
429 if (so->so_proto->pr_type == SOCK_DGRAM) {
430 sbsize = SOCKIN_SBSIZE;
431 error = rumpcomp_sockin_setsockopt(news,
432 SOL_SOCKET, SO_SNDBUF,
433 &sbsize, sizeof(sbsize));
434 sbsize = SOCKIN_SBSIZE;
435 error = rumpcomp_sockin_setsockopt(news,
436 SOL_SOCKET, SO_RCVBUF,
437 &sbsize, sizeof(sbsize));
438 }
439
440 if ((error = registersock(so, news)) != 0)
441 rumpuser_close(news);
442
443 break;
444 }
445
446 case PRU_ACCEPT:
447 /* we do all the work in the worker thread */
448 break;
449
450 case PRU_BIND:
451 error = rumpcomp_sockin_bind(SO2S(so),
452 mtod(nam, const struct sockaddr *),
453 nam->m_len);
454 break;
455
456 case PRU_CONNECT:
457 error = rumpcomp_sockin_connect(SO2S(so),
458 mtod(nam, struct sockaddr *), nam->m_len);
459 if (error == 0)
460 soisconnected(so);
461 break;
462
463 case PRU_LISTEN:
464 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
465 break;
466
467 case PRU_SEND:
468 {
469 struct sockaddr *saddr;
470 struct msghdr mhdr;
471 size_t iov_max, i;
472 struct iovec iov_buf[32], *iov;
473 struct mbuf *m2;
474 size_t tot, n;
475 int s;
476
477 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
478
479 memset(&mhdr, 0, sizeof(mhdr));
480
481 iov_max = 0;
482 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
483 iov_max++;
484 }
485
486 if (iov_max <= __arraycount(iov_buf)) {
487 iov = iov_buf;
488 } else {
489 iov = kmem_alloc(sizeof(struct iovec) * iov_max,
490 KM_SLEEP);
491 }
492
493 tot = 0;
494 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
495 iov[i].iov_base = m2->m_data;
496 iov[i].iov_len = m2->m_len;
497 tot += m2->m_len;
498 }
499 mhdr.msg_iov = iov;
500 mhdr.msg_iovlen = i;
501 s = SO2S(so);
502
503 if (nam != NULL) {
504 saddr = mtod(nam, struct sockaddr *);
505 mhdr.msg_name = saddr;
506 mhdr.msg_namelen = saddr->sa_len;
507 }
508
509 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
510
511 if (iov != iov_buf)
512 kmem_free(iov, sizeof(struct iovec) * iov_max);
513
514 m_freem(m);
515 m_freem(control);
516
517 /* this assumes too many things to list.. buthey, testing */
518 if (!rump_threads)
519 sockin_process(so);
520 }
521 break;
522
523 case PRU_SHUTDOWN:
524 removesock(so);
525 break;
526
527 case PRU_SOCKADDR:
528 case PRU_PEERADDR:
529 {
530 int slen = nam->m_len;
531 enum rumpcomp_sockin_getnametype which;
532
533 if (req == PRU_SOCKADDR)
534 which = RUMPCOMP_SOCKIN_SOCKNAME;
535 else
536 which = RUMPCOMP_SOCKIN_PEERNAME;
537 error = rumpcomp_sockin_getname(SO2S(so),
538 mtod(nam, struct sockaddr *), &slen, which);
539 if (error == 0)
540 nam->m_len = slen;
541 break;
542 }
543
544 case PRU_CONTROL:
545 error = ENOTTY;
546 break;
547
548 default:
549 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
550 }
551
552 return error;
553 }
554
555 static int
556 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
557 {
558
559 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
560 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
561 }
562
563 int sockin_unavailable(void);
564 int
565 sockin_unavailable(void)
566 {
567
568 panic("interface not available in with sockin");
569 }
570 __strong_alias(rtrequest,sockin_unavailable);
571 __strong_alias(ifunit,sockin_unavailable);
572 __strong_alias(ifreq_setaddr,sockin_unavailable);
573