Home | History | Annotate | Line # | Download | only in libsockin
sockin.c revision 1.36
      1 /*	$NetBSD: sockin.c,v 1.36 2014/03/13 01:40:30 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, 2009 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.36 2014/03/13 01:40:30 pooka Exp $");
     30 
     31 #include <sys/param.h>
     32 #include <sys/condvar.h>
     33 #include <sys/domain.h>
     34 #include <sys/kmem.h>
     35 #include <sys/kthread.h>
     36 #include <sys/mbuf.h>
     37 #include <sys/mutex.h>
     38 #include <sys/once.h>
     39 #include <sys/poll.h>
     40 #include <sys/protosw.h>
     41 #include <sys/queue.h>
     42 #include <sys/socket.h>
     43 #include <sys/socketvar.h>
     44 #include <sys/time.h>
     45 
     46 #include <net/bpf.h>
     47 #include <net/if.h>
     48 #include <net/radix.h>
     49 
     50 #include <netinet/in.h>
     51 #include <netinet/in_systm.h>
     52 #include <netinet/ip.h>
     53 
     54 #include <rump/rumpuser.h>
     55 
     56 #include "rump_private.h"
     57 #include "sockin_user.h"
     58 
     59 /*
     60  * An inet communication domain which uses the socket interface.
     61  * Currently supports only IPv4 UDP, but could easily be extended to
     62  * support IPv6 and TCP by adding more stuff to the protosw.
     63  */
     64 
     65 DOMAIN_DEFINE(sockindomain);
     66 DOMAIN_DEFINE(sockin6domain);
     67 
     68 static int	sockin_do_init(void);
     69 static void	sockin_init(void);
     70 static int	sockin_usrreq(struct socket *, int, struct mbuf *,
     71 			      struct mbuf *, struct mbuf *, struct lwp *);
     72 static int	sockin_ctloutput(int op, struct socket *, struct sockopt *);
     73 
     74 const struct protosw sockinsw[] = {
     75 {
     76 	.pr_type = SOCK_DGRAM,
     77 	.pr_domain = &sockindomain,
     78 	.pr_protocol = IPPROTO_UDP,
     79 	.pr_flags = PR_ATOMIC|PR_ADDR,
     80 	.pr_usrreq = sockin_usrreq,
     81 	.pr_ctloutput = sockin_ctloutput,
     82 },
     83 {
     84 	.pr_type = SOCK_STREAM,
     85 	.pr_domain = &sockindomain,
     86 	.pr_protocol = IPPROTO_TCP,
     87 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
     88 	.pr_usrreq = sockin_usrreq,
     89 	.pr_ctloutput = sockin_ctloutput,
     90 }};
     91 const struct protosw sockin6sw[] = {
     92 {
     93 	.pr_type = SOCK_DGRAM,
     94 	.pr_domain = &sockin6domain,
     95 	.pr_protocol = IPPROTO_UDP,
     96 	.pr_flags = PR_ATOMIC|PR_ADDR,
     97 	.pr_usrreq = sockin_usrreq,
     98 	.pr_ctloutput = sockin_ctloutput,
     99 },
    100 {
    101 	.pr_type = SOCK_STREAM,
    102 	.pr_domain = &sockin6domain,
    103 	.pr_protocol = IPPROTO_TCP,
    104 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
    105 	.pr_usrreq = sockin_usrreq,
    106 	.pr_ctloutput = sockin_ctloutput,
    107 }};
    108 
    109 struct domain sockindomain = {
    110 	.dom_family = PF_INET,
    111 	.dom_name = "socket_inet",
    112 	.dom_init = sockin_init,
    113 	.dom_externalize = NULL,
    114 	.dom_dispose = NULL,
    115 	.dom_protosw = sockinsw,
    116 	.dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
    117 	.dom_rtattach = rt_inithead,
    118 	.dom_rtoffset = 32,
    119 	.dom_maxrtkey = sizeof(struct sockaddr_in),
    120 	.dom_ifattach = NULL,
    121 	.dom_ifdetach = NULL,
    122 	.dom_ifqueues = { NULL },
    123 	.dom_link = { NULL },
    124 	.dom_mowner = MOWNER_INIT("",""),
    125 	.dom_rtcache = { NULL },
    126 	.dom_sockaddr_cmp = NULL
    127 };
    128 struct domain sockin6domain = {
    129 	.dom_family = PF_INET6,
    130 	.dom_name = "socket_inet6",
    131 	.dom_init = sockin_init,
    132 	.dom_externalize = NULL,
    133 	.dom_dispose = NULL,
    134 	.dom_protosw = sockin6sw,
    135 	.dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
    136 	.dom_rtattach = rt_inithead,
    137 	.dom_rtoffset = 32,
    138 	.dom_maxrtkey = sizeof(struct sockaddr_in6),
    139 	.dom_ifattach = NULL,
    140 	.dom_ifdetach = NULL,
    141 	.dom_ifqueues = { NULL },
    142 	.dom_link = { NULL },
    143 	.dom_mowner = MOWNER_INIT("",""),
    144 	.dom_rtcache = { NULL },
    145 	.dom_sockaddr_cmp = NULL
    146 };
    147 
    148 #define SO2S(so) ((intptr_t)(so->so_internal))
    149 #define SOCKIN_SBSIZE 65536
    150 
    151 struct sockin_unit {
    152 	struct socket *su_so;
    153 
    154 	LIST_ENTRY(sockin_unit) su_entries;
    155 };
    156 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
    157 static kmutex_t su_mtx;
    158 static bool rebuild;
    159 static int nsock;
    160 
    161 /* XXX: for the bpf hack */
    162 static struct ifnet sockin_if;
    163 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
    164 
    165 static int
    166 registersock(struct socket *so, int news)
    167 {
    168 	struct sockin_unit *su;
    169 
    170 	su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
    171 	if (!su)
    172 		return ENOMEM;
    173 
    174 	so->so_internal = (void *)(intptr_t)news;
    175 	su->su_so = so;
    176 
    177 	mutex_enter(&su_mtx);
    178 	LIST_INSERT_HEAD(&su_ent, su, su_entries);
    179 	nsock++;
    180 	rebuild = true;
    181 	mutex_exit(&su_mtx);
    182 
    183 	return 0;
    184 }
    185 
    186 static void
    187 removesock(struct socket *so)
    188 {
    189 	struct sockin_unit *su_iter;
    190 
    191 	mutex_enter(&su_mtx);
    192 	LIST_FOREACH(su_iter, &su_ent, su_entries) {
    193 		if (su_iter->su_so == so)
    194 			break;
    195 	}
    196 	if (!su_iter)
    197 		panic("no such socket");
    198 
    199 	LIST_REMOVE(su_iter, su_entries);
    200 	nsock--;
    201 	rebuild = true;
    202 	mutex_exit(&su_mtx);
    203 
    204 	rumpuser_close(SO2S(su_iter->su_so));
    205 	kmem_free(su_iter, sizeof(*su_iter));
    206 }
    207 
    208 static void
    209 sockin_process(struct socket *so)
    210 {
    211 	struct sockaddr_in6 from;
    212 	struct iovec io;
    213 	struct msghdr rmsg;
    214 	struct mbuf *m;
    215 	size_t n, plen;
    216 	int error;
    217 
    218 	m = m_gethdr(M_WAIT, MT_DATA);
    219 	if (so->so_proto->pr_type == SOCK_DGRAM) {
    220 		plen = IP_MAXPACKET;
    221 		MEXTMALLOC(m, plen, M_DONTWAIT);
    222 	} else {
    223 		plen = MCLBYTES;
    224 		MCLGET(m, M_DONTWAIT);
    225 	}
    226 	if ((m->m_flags & M_EXT) == 0) {
    227 		m_freem(m);
    228 		return;
    229 	}
    230 
    231 	memset(&rmsg, 0, sizeof(rmsg));
    232 	io.iov_base = mtod(m, void *);
    233 	io.iov_len = plen;
    234 	rmsg.msg_iov = &io;
    235 	rmsg.msg_iovlen = 1;
    236 	rmsg.msg_name = (struct sockaddr *)&from;
    237 	rmsg.msg_namelen = sizeof(from);
    238 
    239 	error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
    240 	if (error || n == 0) {
    241 		m_freem(m);
    242 
    243 		/* Treat a TCP socket a goner */
    244 		if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
    245 			mutex_enter(softnet_lock);
    246 			soisdisconnected(so);
    247 			mutex_exit(softnet_lock);
    248 			removesock(so);
    249 		}
    250 		return;
    251 	}
    252 	m->m_len = m->m_pkthdr.len = n;
    253 
    254 	bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
    255 
    256 	mutex_enter(softnet_lock);
    257 	if (so->so_proto->pr_type == SOCK_DGRAM) {
    258 		if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
    259 			m_freem(m);
    260 		}
    261 	} else {
    262 		sbappendstream(&so->so_rcv, m);
    263 	}
    264 
    265 	sorwakeup(so);
    266 	mutex_exit(softnet_lock);
    267 }
    268 
    269 static void
    270 sockin_accept(struct socket *so)
    271 {
    272 	struct socket *nso;
    273 	struct sockaddr_in6 sin;
    274 	int news, error, slen;
    275 
    276 	slen = sizeof(sin);
    277 	error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
    278 	    &slen, &news);
    279 	if (error)
    280 		return;
    281 
    282 	mutex_enter(softnet_lock);
    283 	nso = sonewconn(so, true);
    284 	if (nso == NULL)
    285 		goto errout;
    286 	if (registersock(nso, news) != 0)
    287 		goto errout;
    288 	mutex_exit(softnet_lock);
    289 	return;
    290 
    291  errout:
    292 	rumpuser_close(news);
    293 	if (nso)
    294 		soclose(nso);
    295 	mutex_exit(softnet_lock);
    296 }
    297 
    298 #define POLLTIMEOUT 100	/* check for new entries every 100ms */
    299 
    300 /* XXX: doesn't handle socket (kernel) locking properly? */
    301 static void
    302 sockinworker(void *arg)
    303 {
    304 	struct pollfd *pfds = NULL, *npfds;
    305 	struct sockin_unit *su_iter;
    306 	struct socket *so;
    307 	int cursock = 0, i, rv, error;
    308 
    309 	/*
    310 	 * Loop reading requests.  Check for new sockets periodically
    311 	 * (could be smarter, but I'm lazy).
    312 	 */
    313 	for (;;) {
    314 		if (rebuild) {
    315 			npfds = NULL;
    316 			mutex_enter(&su_mtx);
    317 			if (nsock)
    318 				npfds = kmem_alloc(nsock * sizeof(*npfds),
    319 				    KM_NOSLEEP);
    320 			if (npfds || nsock == 0) {
    321 				if (pfds)
    322 					kmem_free(pfds, cursock*sizeof(*pfds));
    323 				pfds = npfds;
    324 				cursock = nsock;
    325 				rebuild = false;
    326 
    327 				i = 0;
    328 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
    329 					pfds[i].fd = SO2S(su_iter->su_so);
    330 					pfds[i].events = POLLIN;
    331 					pfds[i].revents = 0;
    332 					i++;
    333 				}
    334 				KASSERT(i == nsock);
    335 			}
    336 			mutex_exit(&su_mtx);
    337 		}
    338 
    339 		/* find affected sockets & process */
    340 		error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
    341 		for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
    342 			if (pfds[i].revents & POLLIN) {
    343 				mutex_enter(&su_mtx);
    344 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
    345 					if (SO2S(su_iter->su_so)==pfds[i].fd) {
    346 						so = su_iter->su_so;
    347 						mutex_exit(&su_mtx);
    348 						if(so->so_options&SO_ACCEPTCONN)
    349 							sockin_accept(so);
    350 						else
    351 							sockin_process(so);
    352 						mutex_enter(&su_mtx);
    353 						break;
    354 					}
    355 				}
    356 				/* if we can't find it, just wing it */
    357 				KASSERT(rebuild || su_iter);
    358 				mutex_exit(&su_mtx);
    359 				pfds[i].revents = 0;
    360 				rv--;
    361 				i = -1;
    362 				continue;
    363 			}
    364 
    365 			/* something else?  ignore */
    366 			if (pfds[i].revents) {
    367 				pfds[i].revents = 0;
    368 				rv--;
    369 			}
    370 		}
    371 		KASSERT(rv <= 0);
    372 	}
    373 
    374 }
    375 
    376 static int
    377 sockin_do_init(void)
    378 {
    379 	int rv;
    380 
    381 	if (rump_threads) {
    382 		if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
    383 		    NULL, NULL, "sockwork")) != 0)
    384 			panic("sockin_init: could not create worker thread\n");
    385 	} else {
    386 		printf("sockin_init: no threads => no worker thread\n");
    387 	}
    388 	mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
    389 	strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
    390 	bpf_attach(&sockin_if, DLT_NULL, 0);
    391 	return 0;
    392 }
    393 
    394 static void
    395 sockin_init(void)
    396 {
    397 	static ONCE_DECL(init);
    398 
    399 	RUN_ONCE(&init, sockin_do_init);
    400 }
    401 
    402 static int
    403 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
    404 	struct mbuf *control, struct lwp *l)
    405 {
    406 	int error = 0;
    407 
    408 	switch (req) {
    409 	case PRU_ATTACH:
    410 	{
    411 		int news;
    412 		int sbsize;
    413 		int family;
    414 
    415 		sosetlock(so);
    416 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    417 			error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
    418 			if (error)
    419 				break;
    420 		}
    421 
    422 		family = so->so_proto->pr_domain->dom_family;
    423 		KASSERT(family == PF_INET || family == PF_INET6);
    424 		error = rumpcomp_sockin_socket(family,
    425 		    so->so_proto->pr_type, 0, &news);
    426 		if (error)
    427 			break;
    428 
    429 		/* for UDP sockets, make sure we can send&recv max */
    430 		if (so->so_proto->pr_type == SOCK_DGRAM) {
    431 			sbsize = SOCKIN_SBSIZE;
    432 			error = rumpcomp_sockin_setsockopt(news,
    433 			    SOL_SOCKET, SO_SNDBUF,
    434 			    &sbsize, sizeof(sbsize));
    435 			sbsize = SOCKIN_SBSIZE;
    436 			error = rumpcomp_sockin_setsockopt(news,
    437 			    SOL_SOCKET, SO_RCVBUF,
    438 			    &sbsize, sizeof(sbsize));
    439 		}
    440 
    441 		if ((error = registersock(so, news)) != 0)
    442 			rumpuser_close(news);
    443 
    444 		break;
    445 	}
    446 
    447 	case PRU_ACCEPT:
    448 		/* we do all the work in the worker thread */
    449 		break;
    450 
    451 	case PRU_BIND:
    452 		error = rumpcomp_sockin_bind(SO2S(so),
    453 		    mtod(nam, const struct sockaddr *),
    454 		    nam->m_len);
    455 		break;
    456 
    457 	case PRU_CONNECT:
    458 		error = rumpcomp_sockin_connect(SO2S(so),
    459 		    mtod(nam, struct sockaddr *), nam->m_len);
    460 		if (error == 0)
    461 			soisconnected(so);
    462 		break;
    463 
    464 	case PRU_LISTEN:
    465 		error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
    466 		break;
    467 
    468 	case PRU_SEND:
    469 	{
    470 		struct sockaddr *saddr;
    471 		struct msghdr mhdr;
    472 		size_t iov_max, i;
    473 		struct iovec iov_buf[32], *iov;
    474 		struct mbuf *m2;
    475 		size_t tot, n;
    476 		int s;
    477 
    478 		bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
    479 
    480 		memset(&mhdr, 0, sizeof(mhdr));
    481 
    482 		iov_max = 0;
    483 		for (m2 = m; m2 != NULL; m2 = m2->m_next) {
    484 			iov_max++;
    485 		}
    486 
    487 		if (iov_max <= __arraycount(iov_buf)) {
    488 			iov = iov_buf;
    489 		} else {
    490 			iov = kmem_alloc(sizeof(struct iovec) * iov_max,
    491 			    KM_SLEEP);
    492 		}
    493 
    494 		tot = 0;
    495 		for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
    496 			iov[i].iov_base = m2->m_data;
    497 			iov[i].iov_len = m2->m_len;
    498 			tot += m2->m_len;
    499 		}
    500 		mhdr.msg_iov = iov;
    501 		mhdr.msg_iovlen = i;
    502 		s = SO2S(so);
    503 
    504 		if (nam != NULL) {
    505 			saddr = mtod(nam, struct sockaddr *);
    506 			mhdr.msg_name = saddr;
    507 			mhdr.msg_namelen = saddr->sa_len;
    508 		}
    509 
    510 		rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
    511 
    512 		if (iov != iov_buf)
    513 			kmem_free(iov, sizeof(struct iovec) * iov_max);
    514 
    515 		m_freem(m);
    516 		m_freem(control);
    517 
    518 		/* this assumes too many things to list.. buthey, testing */
    519 		if (!rump_threads)
    520 			sockin_process(so);
    521 	}
    522 		break;
    523 
    524 	case PRU_SHUTDOWN:
    525 		removesock(so);
    526 		break;
    527 
    528 	case PRU_SOCKADDR:
    529 	case PRU_PEERADDR:
    530 	{
    531 		int slen = nam->m_len;
    532 		enum rumpcomp_sockin_getnametype which;
    533 
    534 		if (req == PRU_SOCKADDR)
    535 			which = RUMPCOMP_SOCKIN_SOCKNAME;
    536 		else
    537 			which = RUMPCOMP_SOCKIN_PEERNAME;
    538 		error = rumpcomp_sockin_getname(SO2S(so),
    539 		    mtod(nam, struct sockaddr *), &slen, which);
    540 		if (error == 0)
    541 			nam->m_len = slen;
    542 		break;
    543 	}
    544 
    545 	case PRU_CONTROL:
    546 		error = ENOTTY;
    547 		break;
    548 
    549 	default:
    550 		panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
    551 	}
    552 
    553 	return error;
    554 }
    555 
    556 static int
    557 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
    558 {
    559 
    560 	return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
    561 	    sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
    562 }
    563