sockin.c revision 1.30 1 /* $NetBSD: sockin.c,v 1.30 2013/04/30 00:12:35 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.30 2013/04/30 00:12:35 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/poll.h>
39 #include <sys/protosw.h>
40 #include <sys/queue.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/time.h>
44
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/radix.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_systm.h>
51 #include <netinet/ip.h>
52
53 #include <rump/rumpuser.h>
54
55 #include "rump_private.h"
56 #include "rumpcomp_user.h"
57
58 /*
59 * An inet communication domain which uses the socket interface.
60 * Currently supports only IPv4 UDP, but could easily be extended to
61 * support IPv6 and TCP by adding more stuff to the protosw.
62 */
63
64 DOMAIN_DEFINE(sockindomain);
65
66 static void sockin_init(void);
67 static int sockin_usrreq(struct socket *, int, struct mbuf *,
68 struct mbuf *, struct mbuf *, struct lwp *);
69 static int sockin_ctloutput(int op, struct socket *, struct sockopt *);
70
71 const struct protosw sockinsw[] = {
72 {
73 .pr_type = SOCK_DGRAM,
74 .pr_domain = &sockindomain,
75 .pr_protocol = IPPROTO_UDP,
76 .pr_flags = PR_ATOMIC|PR_ADDR,
77 .pr_usrreq = sockin_usrreq,
78 .pr_ctloutput = sockin_ctloutput,
79 },
80 {
81 .pr_type = SOCK_STREAM,
82 .pr_domain = &sockindomain,
83 .pr_protocol = IPPROTO_TCP,
84 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
85 .pr_usrreq = sockin_usrreq,
86 .pr_ctloutput = sockin_ctloutput,
87 }};
88
89 struct domain sockindomain = {
90 .dom_family = PF_INET,
91 .dom_name = "socket_inet",
92 .dom_init = sockin_init,
93 .dom_externalize = NULL,
94 .dom_dispose = NULL,
95 .dom_protosw = sockinsw,
96 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
97 .dom_rtattach = rt_inithead,
98 .dom_rtoffset = 32,
99 .dom_maxrtkey = sizeof(struct sockaddr_in),
100 .dom_ifattach = NULL,
101 .dom_ifdetach = NULL,
102 .dom_ifqueues = { NULL },
103 .dom_link = { NULL },
104 .dom_mowner = MOWNER_INIT("",""),
105 .dom_rtcache = { NULL },
106 .dom_sockaddr_cmp = NULL
107 };
108
109 #define SO2S(so) ((intptr_t)(so->so_internal))
110 #define SOCKIN_SBSIZE 65536
111
112 struct sockin_unit {
113 struct socket *su_so;
114
115 LIST_ENTRY(sockin_unit) su_entries;
116 };
117 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
118 static kmutex_t su_mtx;
119 static bool rebuild;
120 static int nsock;
121
122 /* XXX: for the bpf hack */
123 static struct ifnet sockin_if;
124 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
125
126 static int
127 registersock(struct socket *so, int news)
128 {
129 struct sockin_unit *su;
130
131 su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
132 if (!su)
133 return ENOMEM;
134
135 so->so_internal = (void *)(intptr_t)news;
136 su->su_so = so;
137
138 mutex_enter(&su_mtx);
139 LIST_INSERT_HEAD(&su_ent, su, su_entries);
140 nsock++;
141 rebuild = true;
142 mutex_exit(&su_mtx);
143
144 return 0;
145 }
146
147 static void
148 removesock(struct socket *so)
149 {
150 struct sockin_unit *su_iter;
151
152 mutex_enter(&su_mtx);
153 LIST_FOREACH(su_iter, &su_ent, su_entries) {
154 if (su_iter->su_so == so)
155 break;
156 }
157 if (!su_iter)
158 panic("no such socket");
159
160 LIST_REMOVE(su_iter, su_entries);
161 nsock--;
162 rebuild = true;
163 mutex_exit(&su_mtx);
164
165 rumpuser_close(SO2S(su_iter->su_so));
166 kmem_free(su_iter, sizeof(*su_iter));
167 }
168
169 static void
170 sockin_process(struct socket *so)
171 {
172 struct sockaddr_in from;
173 struct iovec io;
174 struct msghdr rmsg;
175 struct mbuf *m;
176 size_t n, plen;
177 int error;
178
179 m = m_gethdr(M_WAIT, MT_DATA);
180 if (so->so_proto->pr_type == SOCK_DGRAM) {
181 plen = IP_MAXPACKET;
182 MEXTMALLOC(m, plen, M_DONTWAIT);
183 } else {
184 plen = MCLBYTES;
185 MCLGET(m, M_DONTWAIT);
186 }
187 if ((m->m_flags & M_EXT) == 0) {
188 m_freem(m);
189 return;
190 }
191
192 memset(&rmsg, 0, sizeof(rmsg));
193 io.iov_base = mtod(m, void *);
194 io.iov_len = plen;
195 rmsg.msg_iov = &io;
196 rmsg.msg_iovlen = 1;
197 rmsg.msg_name = (struct sockaddr *)&from;
198 rmsg.msg_namelen = sizeof(from);
199
200 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
201 if (error) {
202 m_freem(m);
203
204 /* Treat a TCP socket a goner */
205 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
206 mutex_enter(softnet_lock);
207 soisdisconnected(so);
208 mutex_exit(softnet_lock);
209 removesock(so);
210 }
211 return;
212 }
213 m->m_len = m->m_pkthdr.len = n;
214
215 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
216
217 mutex_enter(softnet_lock);
218 if (so->so_proto->pr_type == SOCK_DGRAM) {
219 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
220 m_freem(m);
221 }
222 } else {
223 sbappendstream(&so->so_rcv, m);
224 }
225
226 sorwakeup(so);
227 mutex_exit(softnet_lock);
228 }
229
230 static void
231 sockin_accept(struct socket *so)
232 {
233 struct socket *nso;
234 struct sockaddr_in sin;
235 int news, error, slen;
236
237 slen = sizeof(sin);
238 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
239 &slen, &news);
240 if (error)
241 return;
242
243 mutex_enter(softnet_lock);
244 nso = sonewconn(so, SS_ISCONNECTED);
245 if (nso == NULL)
246 goto errout;
247 if (registersock(nso, news) != 0)
248 goto errout;
249 mutex_exit(softnet_lock);
250 return;
251
252 errout:
253 rumpuser_close(news);
254 if (nso)
255 soclose(nso);
256 mutex_exit(softnet_lock);
257 }
258
259 #define POLLTIMEOUT 100 /* check for new entries every 100ms */
260
261 /* XXX: doesn't handle socket (kernel) locking properly? */
262 static void
263 sockinworker(void *arg)
264 {
265 struct pollfd *pfds = NULL, *npfds;
266 struct sockin_unit *su_iter;
267 struct socket *so;
268 int cursock = 0, i, rv, error;
269
270 /*
271 * Loop reading requests. Check for new sockets periodically
272 * (could be smarter, but I'm lazy).
273 */
274 for (;;) {
275 if (rebuild) {
276 npfds = NULL;
277 mutex_enter(&su_mtx);
278 if (nsock)
279 npfds = kmem_alloc(nsock * sizeof(*npfds),
280 KM_NOSLEEP);
281 if (npfds || nsock == 0) {
282 if (pfds)
283 kmem_free(pfds, cursock*sizeof(*pfds));
284 pfds = npfds;
285 cursock = nsock;
286 rebuild = false;
287
288 i = 0;
289 LIST_FOREACH(su_iter, &su_ent, su_entries) {
290 pfds[i].fd = SO2S(su_iter->su_so);
291 pfds[i].events = POLLIN;
292 pfds[i].revents = 0;
293 i++;
294 }
295 KASSERT(i == nsock);
296 }
297 mutex_exit(&su_mtx);
298 }
299
300 /* find affected sockets & process */
301 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
302 for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
303 if (pfds[i].revents & POLLIN) {
304 mutex_enter(&su_mtx);
305 LIST_FOREACH(su_iter, &su_ent, su_entries) {
306 if (SO2S(su_iter->su_so)==pfds[i].fd) {
307 so = su_iter->su_so;
308 mutex_exit(&su_mtx);
309 if(so->so_options&SO_ACCEPTCONN)
310 sockin_accept(so);
311 else
312 sockin_process(so);
313 mutex_enter(&su_mtx);
314 break;
315 }
316 }
317 /* if we can't find it, just wing it */
318 KASSERT(rebuild || su_iter);
319 mutex_exit(&su_mtx);
320 pfds[i].revents = 0;
321 rv--;
322 i = -1;
323 continue;
324 }
325
326 /* something else? ignore */
327 if (pfds[i].revents) {
328 pfds[i].revents = 0;
329 rv--;
330 }
331 }
332 KASSERT(rv <= 0);
333 }
334
335 }
336
337 static void
338 sockin_init(void)
339 {
340 int rv;
341
342 if (rump_threads) {
343 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
344 NULL, NULL, "sockwork")) != 0)
345 panic("sockin_init: could not create worker thread\n");
346 } else {
347 printf("sockin_init: no threads => no worker thread\n");
348 }
349 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
350 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
351 bpf_attach(&sockin_if, DLT_NULL, 0);
352 }
353
354 static int
355 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
356 struct mbuf *control, struct lwp *l)
357 {
358 int error = 0;
359
360 switch (req) {
361 case PRU_ATTACH:
362 {
363 int news;
364 int sbsize;
365
366 sosetlock(so);
367 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
368 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
369 if (error)
370 break;
371 }
372
373 error = rumpcomp_sockin_socket(PF_INET, so->so_proto->pr_type,
374 0, &news);
375 if (error)
376 break;
377
378 /* for UDP sockets, make sure we can send&recv max */
379 if (so->so_proto->pr_type == SOCK_DGRAM) {
380 sbsize = SOCKIN_SBSIZE;
381 error = rumpcomp_sockin_setsockopt(news,
382 SOL_SOCKET, SO_SNDBUF,
383 &sbsize, sizeof(sbsize));
384 sbsize = SOCKIN_SBSIZE;
385 error = rumpcomp_sockin_setsockopt(news,
386 SOL_SOCKET, SO_RCVBUF,
387 &sbsize, sizeof(sbsize));
388 }
389
390 if ((error = registersock(so, news)) != 0)
391 rumpuser_close(news);
392
393 break;
394 }
395
396 case PRU_ACCEPT:
397 /* we do all the work in the worker thread */
398 break;
399
400 case PRU_BIND:
401 error = rumpcomp_sockin_bind(SO2S(so),
402 mtod(nam, const struct sockaddr *),
403 sizeof(struct sockaddr_in));
404 break;
405
406 case PRU_CONNECT:
407 error = rumpcomp_sockin_connect(SO2S(so),
408 mtod(nam, struct sockaddr *), sizeof(struct sockaddr_in));
409 if (error == 0)
410 soisconnected(so);
411 break;
412
413 case PRU_LISTEN:
414 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
415 break;
416
417 case PRU_SEND:
418 {
419 struct sockaddr *saddr;
420 struct msghdr mhdr;
421 size_t iov_max, i;
422 struct iovec iov_buf[32], *iov;
423 struct mbuf *m2;
424 size_t tot, n;
425 int s;
426
427 bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
428
429 memset(&mhdr, 0, sizeof(mhdr));
430
431 iov_max = 0;
432 for (m2 = m; m2 != NULL; m2 = m2->m_next) {
433 iov_max++;
434 }
435
436 if (iov_max <= __arraycount(iov_buf)) {
437 iov = iov_buf;
438 } else {
439 iov = kmem_alloc(sizeof(struct iovec) * iov_max,
440 KM_SLEEP);
441 }
442
443 tot = 0;
444 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
445 iov[i].iov_base = m2->m_data;
446 iov[i].iov_len = m2->m_len;
447 tot += m2->m_len;
448 }
449 mhdr.msg_iov = iov;
450 mhdr.msg_iovlen = i;
451 s = SO2S(so);
452
453 if (nam != NULL) {
454 saddr = mtod(nam, struct sockaddr *);
455 mhdr.msg_name = saddr;
456 mhdr.msg_namelen = saddr->sa_len;
457 }
458
459 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
460
461 if (iov != iov_buf)
462 kmem_free(iov, sizeof(struct iovec) * iov_max);
463
464 m_freem(m);
465 m_freem(control);
466
467 /* this assumes too many things to list.. buthey, testing */
468 if (!rump_threads)
469 sockin_process(so);
470 }
471 break;
472
473 case PRU_SHUTDOWN:
474 removesock(so);
475 break;
476
477 case PRU_SOCKADDR:
478 case PRU_PEERADDR:
479 {
480 int slen = nam->m_len;
481 enum rumpcomp_sockin_getnametype which;
482
483 if (req == PRU_SOCKADDR)
484 which = RUMPCOMP_SOCKIN_SOCKNAME;
485 else
486 which = RUMPCOMP_SOCKIN_PEERNAME;
487 error = rumpcomp_sockin_getname(SO2S(so),
488 mtod(nam, struct sockaddr *), &slen, which);
489 if (error == 0)
490 nam->m_len = slen;
491 break;
492 }
493
494 case PRU_CONTROL:
495 error = ENOTTY;
496 break;
497
498 default:
499 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
500 }
501
502 return error;
503 }
504
505 static int
506 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
507 {
508
509 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
510 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
511 }
512