Home | History | Annotate | Line # | Download | only in libvirtif
if_virt.c revision 1.21
      1 /*	$NetBSD: if_virt.c,v 1.21 2010/11/15 20:23:11 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: if_virt.c,v 1.21 2010/11/15 20:23:11 pooka Exp $");
     30 
     31 #include <sys/param.h>
     32 #include <sys/condvar.h>
     33 #include <sys/fcntl.h>
     34 #include <sys/kernel.h>
     35 #include <sys/kmem.h>
     36 #include <sys/kthread.h>
     37 #include <sys/mutex.h>
     38 #include <sys/poll.h>
     39 #include <sys/sockio.h>
     40 #include <sys/socketvar.h>
     41 
     42 #include <net/bpf.h>
     43 #include <net/if.h>
     44 #include <net/if_ether.h>
     45 #include <net/if_tap.h>
     46 
     47 #include <netinet/in.h>
     48 #include <netinet/in_var.h>
     49 
     50 #include <rump/rump.h>
     51 #include <rump/rumpuser.h>
     52 
     53 #include "rump_private.h"
     54 #include "rump_net_private.h"
     55 
     56 /*
     57  * Virtual interface for userspace purposes.  Uses tap(4) to
     58  * interface with the kernel and just simply shovels data
     59  * to/from /dev/tap.
     60  */
     61 
     62 #define VIRTIF_BASE "virt"
     63 
     64 static int	virtif_init(struct ifnet *);
     65 static int	virtif_ioctl(struct ifnet *, u_long, void *);
     66 static void	virtif_start(struct ifnet *);
     67 static void	virtif_stop(struct ifnet *, int);
     68 
     69 struct virtif_sc {
     70 	struct ethercom sc_ec;
     71 	int sc_tapfd;
     72 	bool sc_dying;
     73 	struct lwp *sc_l_snd, *sc_l_rcv;
     74 	kmutex_t sc_mtx;
     75 	kcondvar_t sc_cv;
     76 };
     77 
     78 static void virtif_receiver(void *);
     79 static void virtif_sender(void *);
     80 static int  virtif_clone(struct if_clone *, int);
     81 static int  virtif_unclone(struct ifnet *);
     82 
     83 struct if_clone virtif_cloner =
     84     IF_CLONE_INITIALIZER(VIRTIF_BASE, virtif_clone, virtif_unclone);
     85 
     86 int
     87 rump_virtif_create(int num)
     88 {
     89 	struct virtif_sc *sc;
     90 	struct ifnet *ifp;
     91 	uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0x0a, 0x00, 0x0b, 0x0e, 0x01 };
     92 	char tapdev[16];
     93 	int fd, error = 0;
     94 
     95 	if (num >= 0x100)
     96 		return E2BIG;
     97 
     98 	snprintf(tapdev, sizeof(tapdev), "/dev/tap%d", num);
     99 	fd = rumpuser_open(tapdev, O_RDWR, &error);
    100 	if (fd == -1) {
    101 		printf("virtif_create: can't open /dev/tap%d: %d\n",
    102 		    num, error);
    103 		return error;
    104 	}
    105 	enaddr[2] = arc4random() & 0xff;
    106 	enaddr[5] = num;
    107 
    108 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    109 	sc->sc_dying = false;
    110 	sc->sc_tapfd = fd;
    111 
    112 	mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE);
    113 	cv_init(&sc->sc_cv, "virtsnd");
    114 	ifp = &sc->sc_ec.ec_if;
    115 	sprintf(ifp->if_xname, "%s%d", VIRTIF_BASE, num);
    116 	ifp->if_softc = sc;
    117 
    118 	if (rump_threads) {
    119 		if ((error = kthread_create(PRI_NONE, KTHREAD_JOINABLE, NULL,
    120 		    virtif_receiver, ifp, &sc->sc_l_rcv, "virtifr")) != 0)
    121 			goto out;
    122 
    123 		if ((error = kthread_create(PRI_NONE,
    124 		    KTHREAD_JOINABLE | KTHREAD_MPSAFE, NULL,
    125 		    virtif_sender, ifp, &sc->sc_l_snd, "virtifs")) != 0)
    126 			goto out;
    127 	} else {
    128 		printf("WARNING: threads not enabled, receive NOT working\n");
    129 	}
    130 
    131 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
    132 	ifp->if_init = virtif_init;
    133 	ifp->if_ioctl = virtif_ioctl;
    134 	ifp->if_start = virtif_start;
    135 	ifp->if_stop = virtif_stop;
    136 	IFQ_SET_READY(&ifp->if_snd);
    137 
    138 	if_attach(ifp);
    139 	ether_ifattach(ifp, enaddr);
    140 
    141  out:
    142 	if (error) {
    143 		virtif_unclone(ifp);
    144 	}
    145 
    146 	return error;
    147 }
    148 
    149 static int
    150 virtif_clone(struct if_clone *ifc, int unit)
    151 {
    152 
    153 	return rump_virtif_create(unit);
    154 }
    155 
    156 static int
    157 virtif_unclone(struct ifnet *ifp)
    158 {
    159 	struct virtif_sc *sc = ifp->if_softc;
    160 
    161 	mutex_enter(&sc->sc_mtx);
    162 	if (sc->sc_dying) {
    163 		mutex_exit(&sc->sc_mtx);
    164 		return EINPROGRESS;
    165 	}
    166 	sc->sc_dying = true;
    167 	cv_broadcast(&sc->sc_cv);
    168 	mutex_exit(&sc->sc_mtx);
    169 
    170 	virtif_stop(ifp, 1);
    171 	if_down(ifp);
    172 
    173 	if (sc->sc_l_snd) {
    174 		kthread_join(sc->sc_l_snd);
    175 		sc->sc_l_snd = NULL;
    176 	}
    177 	if (sc->sc_l_rcv) {
    178 		kthread_join(sc->sc_l_rcv);
    179 		sc->sc_l_rcv = NULL;
    180 	}
    181 
    182 	rumpuser_close(sc->sc_tapfd, NULL);
    183 
    184 	mutex_destroy(&sc->sc_mtx);
    185 	cv_destroy(&sc->sc_cv);
    186 	kmem_free(sc, sizeof(*sc));
    187 
    188 	ether_ifdetach(ifp);
    189 	if_detach(ifp);
    190 
    191 	return 0;
    192 }
    193 
    194 static int
    195 virtif_init(struct ifnet *ifp)
    196 {
    197 	struct virtif_sc *sc = ifp->if_softc;
    198 
    199 	ifp->if_flags |= IFF_RUNNING;
    200 
    201 	mutex_enter(&sc->sc_mtx);
    202 	cv_broadcast(&sc->sc_cv);
    203 	mutex_exit(&sc->sc_mtx);
    204 
    205 	return 0;
    206 }
    207 
    208 static int
    209 virtif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
    210 {
    211 	int s, rv;
    212 
    213 	s = splnet();
    214 	rv = ether_ioctl(ifp, cmd, data);
    215 	if (rv == ENETRESET)
    216 		rv = 0;
    217 	splx(s);
    218 
    219 	return rv;
    220 }
    221 
    222 /* just send everything in-context */
    223 static void
    224 virtif_start(struct ifnet *ifp)
    225 {
    226 	struct virtif_sc *sc = ifp->if_softc;
    227 
    228 	mutex_enter(&sc->sc_mtx);
    229 	ifp->if_flags |= IFF_OACTIVE;
    230 	cv_broadcast(&sc->sc_cv);
    231 	mutex_exit(&sc->sc_mtx);
    232 }
    233 
    234 static void
    235 virtif_stop(struct ifnet *ifp, int disable)
    236 {
    237 	struct virtif_sc *sc = ifp->if_softc;
    238 
    239 	ifp->if_flags &= ~IFF_RUNNING;
    240 
    241 	mutex_enter(&sc->sc_mtx);
    242 	cv_broadcast(&sc->sc_cv);
    243 	mutex_exit(&sc->sc_mtx);
    244 }
    245 
    246 #define POLLTIMO_MS 1
    247 static void
    248 virtif_receiver(void *arg)
    249 {
    250 	struct ifnet *ifp = arg;
    251 	struct virtif_sc *sc = ifp->if_softc;
    252 	struct mbuf *m;
    253 	size_t plen = ETHER_MAX_LEN_JUMBO+1;
    254 	struct pollfd pfd;
    255 	ssize_t n;
    256 	int error, rv;
    257 
    258 	pfd.fd = sc->sc_tapfd;
    259 	pfd.events = POLLIN;
    260 
    261 	KASSERT(rump_kernel_isbiglocked());
    262 
    263 	for (;;) {
    264 		m = m_gethdr(M_WAIT, MT_DATA);
    265 		MEXTMALLOC(m, plen, M_WAIT);
    266 
    267  again:
    268 		/* poll, but periodically check if we should die */
    269 		rv = rumpuser_poll(&pfd, 1, POLLTIMO_MS, &error);
    270 		if (sc->sc_dying) {
    271 			m_freem(m);
    272 			break;
    273 		}
    274 		if (rv == 0)
    275 			goto again;
    276 
    277 		n = rumpuser_read(sc->sc_tapfd, mtod(m, void *), plen, &error);
    278 		KASSERT(n < ETHER_MAX_LEN_JUMBO);
    279 		if (__predict_false(n < 0)) {
    280 			if (n == -1 && error == EAGAIN) {
    281 				goto again;
    282 			}
    283 
    284 			printf("%s: read from /dev/tap failed. host if down?\n",
    285 			    ifp->if_xname);
    286 			mutex_enter(&sc->sc_mtx);
    287 			/* could check if need go, done soon anyway */
    288 			cv_timedwait(&sc->sc_cv, &sc->sc_mtx, hz);
    289 			mutex_exit(&sc->sc_mtx);
    290 			goto again;
    291 		}
    292 
    293 		/* tap sometimes returns EOF.  don't sweat it and plow on */
    294 		if (__predict_false(n == 0))
    295 			goto again;
    296 
    297 		/* discard if we're not up */
    298 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    299 			goto again;
    300 
    301 		m->m_len = m->m_pkthdr.len = n;
    302 		m->m_pkthdr.rcvif = ifp;
    303 		bpf_mtap(ifp, m);
    304 		ether_input(ifp, m);
    305 	}
    306 
    307 	kthread_exit(0);
    308 }
    309 
    310 /* lazy bum stetson-harrison magic value */
    311 #define LB_SH 32
    312 static void
    313 virtif_sender(void *arg)
    314 {
    315 	struct ifnet *ifp = arg;
    316 	struct virtif_sc *sc = ifp->if_softc;
    317 	struct mbuf *m, *m0;
    318 	struct rumpuser_iovec io[LB_SH];
    319 	int i, error;
    320 
    321 	mutex_enter(&sc->sc_mtx);
    322 	KERNEL_LOCK(1, NULL);
    323 	while (!sc->sc_dying) {
    324 		if (!ifp->if_flags & IFF_RUNNING) {
    325 			cv_wait(&sc->sc_cv, &sc->sc_mtx);
    326 			continue;
    327 		}
    328 		IF_DEQUEUE(&ifp->if_snd, m0);
    329 		if (!m0) {
    330 			ifp->if_flags &= ~IFF_OACTIVE;
    331 			cv_wait(&sc->sc_cv, &sc->sc_mtx);
    332 			continue;
    333 		}
    334 		mutex_exit(&sc->sc_mtx);
    335 
    336 		m = m0;
    337 		for (i = 0; i < LB_SH && m; i++) {
    338 			io[i].iov_base = mtod(m, void *);
    339 			io[i].iov_len = m->m_len;
    340 			m = m->m_next;
    341 		}
    342 		if (i == LB_SH)
    343 			panic("lazy bum");
    344 		bpf_mtap(ifp, m0);
    345 		KERNEL_UNLOCK_LAST(curlwp);
    346 
    347 		rumpuser_writev(sc->sc_tapfd, io, i, &error);
    348 
    349 		KERNEL_LOCK(1, NULL);
    350 		m_freem(m0);
    351 		mutex_enter(&sc->sc_mtx);
    352 	}
    353 	KERNEL_UNLOCK_LAST(curlwp);
    354 
    355 	mutex_exit(&sc->sc_mtx);
    356 
    357 	kthread_exit(0);
    358 }
    359 
    360 /*
    361  * dummyif is a nada-interface.
    362  * As it requires nothing external, it can be used for testing
    363  * interface configuration.
    364  */
    365 static int	dummyif_init(struct ifnet *);
    366 static void	dummyif_start(struct ifnet *);
    367 
    368 void
    369 rump_dummyif_create()
    370 {
    371 	struct ifnet *ifp;
    372 	struct ethercom *ec;
    373 	uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0x0a, 0x00, 0x0b, 0x0e, 0x01 };
    374 
    375 	enaddr[2] = arc4random() & 0xff;
    376 	enaddr[5] = arc4random() & 0xff;
    377 
    378 	ec = kmem_zalloc(sizeof(*ec), KM_SLEEP);
    379 
    380 	ifp = &ec->ec_if;
    381 	strlcpy(ifp->if_xname, "dummy0", sizeof(ifp->if_xname));
    382 	ifp->if_softc = ifp;
    383 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
    384 	ifp->if_init = dummyif_init;
    385 	ifp->if_ioctl = virtif_ioctl;
    386 	ifp->if_start = dummyif_start;
    387 
    388 	if_attach(ifp);
    389 	ether_ifattach(ifp, enaddr);
    390 }
    391 
    392 static int
    393 dummyif_init(struct ifnet *ifp)
    394 {
    395 
    396 	ifp->if_flags |= IFF_RUNNING;
    397 	return 0;
    398 }
    399 
    400 static void
    401 dummyif_start(struct ifnet *ifp)
    402 {
    403 
    404 }
    405