Home | History | Annotate | Line # | Download | only in libshmif
if_shmem.c revision 1.6.2.6
      1 /*	$NetBSD: if_shmem.c,v 1.6.2.6 2010/10/09 03:32:45 yamt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by The Nokia Foundation.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     23  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.6.2.6 2010/10/09 03:32:45 yamt Exp $");
     32 
     33 #include <sys/param.h>
     34 #include <sys/atomic.h>
     35 #include <sys/fcntl.h>
     36 #include <sys/kmem.h>
     37 #include <sys/kthread.h>
     38 #include <sys/lock.h>
     39 #include <sys/atomic.h>
     40 
     41 #include <net/if.h>
     42 #include <net/if_ether.h>
     43 
     44 #include <netinet/in.h>
     45 #include <netinet/in_var.h>
     46 
     47 #include <rump/rump.h>
     48 #include <rump/rumpuser.h>
     49 
     50 #include "rump_private.h"
     51 #include "rump_net_private.h"
     52 
     53 /*
     54  * Do r/w prefault for backend pages when attaching the interface.
     55  * This works aroud the most likely kernel/ffs/x86pmap bug described
     56  * in http://mail-index.netbsd.org/tech-kern/2010/08/17/msg008749.html
     57  *
     58  * NOTE: read prefaulting is not enough (that's done always)!
     59  */
     60 
     61 #define PREFAULT_RW
     62 
     63 /*
     64  * A virtual ethernet interface which uses shared memory from a
     65  * memory mapped file as the bus.
     66  */
     67 
     68 static int	shmif_init(struct ifnet *);
     69 static int	shmif_ioctl(struct ifnet *, u_long, void *);
     70 static void	shmif_start(struct ifnet *);
     71 static void	shmif_stop(struct ifnet *, int);
     72 
     73 #include "shmifvar.h"
     74 
     75 struct shmif_sc {
     76 	struct ethercom sc_ec;
     77 	uint8_t sc_myaddr[6];
     78 	struct shmif_mem *sc_busmem;
     79 	int sc_memfd;
     80 	int sc_kq;
     81 
     82 	uint64_t sc_devgen;
     83 	uint32_t sc_nextpacket;
     84 };
     85 
     86 static const uint32_t busversion = SHMIF_VERSION;
     87 
     88 static void shmif_rcv(void *);
     89 
     90 static uint32_t numif;
     91 
     92 #define LOCK_UNLOCKED	0
     93 #define LOCK_LOCKED	1
     94 #define LOCK_COOLDOWN	1001
     95 
     96 /*
     97  * This locking needs work and will misbehave severely if:
     98  * 1) the backing memory has to be paged in
     99  * 2) some lockholder exits while holding the lock
    100  */
    101 static void
    102 shmif_lockbus(struct shmif_mem *busmem)
    103 {
    104 	int i = 0;
    105 
    106 	while (__predict_false(atomic_cas_32(&busmem->shm_lock,
    107 	    LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) {
    108 		if (__predict_false(++i > LOCK_COOLDOWN)) {
    109 			uint64_t sec, nsec;
    110 			int error;
    111 
    112 			sec = 0;
    113 			nsec = 1000*1000; /* 1ms */
    114 			rumpuser_nanosleep(&sec, &nsec, &error);
    115 			i = 0;
    116 		}
    117 		continue;
    118 	}
    119 	membar_enter();
    120 }
    121 
    122 static void
    123 shmif_unlockbus(struct shmif_mem *busmem)
    124 {
    125 	unsigned int old;
    126 
    127 	membar_exit();
    128 	old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED);
    129 	KASSERT(old == LOCK_LOCKED);
    130 }
    131 
    132 int
    133 rump_shmif_create(const char *path, int *ifnum)
    134 {
    135 	struct shmif_sc *sc;
    136 	struct ifnet *ifp;
    137 	uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 };
    138 	uint32_t randnum;
    139 	unsigned mynum;
    140 	volatile uint8_t v;
    141 	volatile uint8_t *p;
    142 	int error;
    143 
    144 	randnum = arc4random();
    145 	memcpy(&enaddr[2], &randnum, sizeof(randnum));
    146 	mynum = atomic_inc_uint_nv(&numif)-1;
    147 
    148 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    149 	ifp = &sc->sc_ec.ec_if;
    150 	memcpy(sc->sc_myaddr, enaddr, sizeof(enaddr));
    151 
    152 	sc->sc_memfd = rumpuser_open(path, O_RDWR | O_CREAT, &error);
    153 	if (sc->sc_memfd == -1)
    154 		goto fail;
    155 	sc->sc_busmem = rumpuser_filemmap(sc->sc_memfd, 0, BUSMEM_SIZE,
    156 	    RUMPUSER_FILEMMAP_TRUNCATE | RUMPUSER_FILEMMAP_SHARED
    157 	    | RUMPUSER_FILEMMAP_READ | RUMPUSER_FILEMMAP_WRITE, &error);
    158 	if (error)
    159 		goto fail;
    160 
    161 	if (sc->sc_busmem->shm_magic && sc->sc_busmem->shm_magic != SHMIF_MAGIC)
    162 		panic("bus is not magical");
    163 
    164 
    165 	/* Prefault in pages to minimize runtime penalty with buslock */
    166 	for (p = (uint8_t *)sc->sc_busmem;
    167 	    p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
    168 	    p += PAGE_SIZE)
    169 		v = *p;
    170 
    171 	shmif_lockbus(sc->sc_busmem);
    172 	/* we're first?  initialize bus */
    173 	if (sc->sc_busmem->shm_magic == 0) {
    174 		sc->sc_busmem->shm_magic = SHMIF_MAGIC;
    175 		sc->sc_busmem->shm_first = BUSMEM_DATASIZE;
    176 	}
    177 
    178 	sc->sc_nextpacket = sc->sc_busmem->shm_last;
    179 	sc->sc_devgen = sc->sc_busmem->shm_gen;
    180 
    181 #ifdef PREFAULT_RW
    182 	for (p = (uint8_t *)sc->sc_busmem;
    183 	    p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
    184 	    p += PAGE_SIZE) {
    185 		v = *p;
    186 		*p = v;
    187 	}
    188 #endif
    189 	shmif_unlockbus(sc->sc_busmem);
    190 
    191 	sc->sc_kq = rumpuser_writewatchfile_setup(-1, sc->sc_memfd, 0, &error);
    192 	if (sc->sc_kq == -1)
    193 		goto fail;
    194 
    195 	sprintf(ifp->if_xname, "shmif%d", mynum);
    196 	ifp->if_softc = sc;
    197 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
    198 	ifp->if_init = shmif_init;
    199 	ifp->if_ioctl = shmif_ioctl;
    200 	ifp->if_start = shmif_start;
    201 	ifp->if_stop = shmif_stop;
    202 	ifp->if_mtu = ETHERMTU;
    203 
    204 	if_attach(ifp);
    205 	ether_ifattach(ifp, enaddr);
    206 
    207 	aprint_verbose("shmif%d: bus %s\n", mynum, path);
    208 	aprint_verbose("shmif%d: Ethernet address %s\n",
    209 	    mynum, ether_sprintf(enaddr));
    210 
    211 	if (ifnum)
    212 		*ifnum = mynum;
    213 	return 0;
    214 
    215  fail:
    216 	panic("rump_shmemif_create: fixme");
    217 }
    218 
    219 static int
    220 shmif_init(struct ifnet *ifp)
    221 {
    222 	int error = 0;
    223 
    224 	if (rump_threads) {
    225 		error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
    226 		    shmif_rcv, ifp, NULL, "shmif");
    227 	} else {
    228 		printf("WARNING: threads not enabled, shmif NOT working\n");
    229 	}
    230 
    231 	ifp->if_flags |= IFF_RUNNING;
    232 	return error;
    233 }
    234 
    235 static int
    236 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
    237 {
    238 	int s, rv;
    239 
    240 	s = splnet();
    241 	rv = ether_ioctl(ifp, cmd, data);
    242 	if (rv == ENETRESET)
    243 		rv = 0;
    244 	splx(s);
    245 
    246 	return rv;
    247 }
    248 
    249 /* send everything in-context */
    250 static void
    251 shmif_start(struct ifnet *ifp)
    252 {
    253 	struct shmif_sc *sc = ifp->if_softc;
    254 	struct shmif_mem *busmem = sc->sc_busmem;
    255 	struct mbuf *m, *m0;
    256 	uint32_t dataoff;
    257 	uint32_t pktsize, pktwrote;
    258 	bool wrote = false;
    259 	bool wrap;
    260 	int error;
    261 
    262 	ifp->if_flags |= IFF_OACTIVE;
    263 
    264 	for (;;) {
    265 		struct shmif_pkthdr sp;
    266 		struct timeval tv;
    267 
    268 		IF_DEQUEUE(&ifp->if_snd, m0);
    269 		if (m0 == NULL) {
    270 			break;
    271 		}
    272 
    273 		pktsize = 0;
    274 		for (m = m0; m != NULL; m = m->m_next) {
    275 			pktsize += m->m_len;
    276 		}
    277 		KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN);
    278 
    279 		getmicrouptime(&tv);
    280 		sp.sp_len = pktsize;
    281 		sp.sp_sec = tv.tv_sec;
    282 		sp.sp_usec = tv.tv_usec;
    283 
    284 		shmif_lockbus(busmem);
    285 		KASSERT(busmem->shm_magic == SHMIF_MAGIC);
    286 		busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last);
    287 
    288 		wrap = false;
    289 		dataoff = shmif_buswrite(busmem,
    290 		    busmem->shm_last, &sp, sizeof(sp), &wrap);
    291 		pktwrote = 0;
    292 		for (m = m0; m != NULL; m = m->m_next) {
    293 			pktwrote += m->m_len;
    294 			dataoff = shmif_buswrite(busmem, dataoff,
    295 			    mtod(m, void *), m->m_len, &wrap);
    296 		}
    297 		KASSERT(pktwrote == pktsize);
    298 		if (wrap) {
    299 			busmem->shm_gen++;
    300 			DPRINTF(("bus generation now %d\n", busmem->shm_gen));
    301 		}
    302 		shmif_unlockbus(busmem);
    303 
    304 		m_freem(m0);
    305 		wrote = true;
    306 
    307 		DPRINTF(("shmif_start: send %d bytes at off %d\n",
    308 		    pktsize, busmem->shm_last));
    309 	}
    310 
    311 	ifp->if_flags &= ~IFF_OACTIVE;
    312 
    313 	/* wakeup */
    314 	if (wrote)
    315 		rumpuser_pwrite(sc->sc_memfd,
    316 		    &busversion, sizeof(busversion), IFMEM_WAKEUP, &error);
    317 }
    318 
    319 static void
    320 shmif_stop(struct ifnet *ifp, int disable)
    321 {
    322 
    323 	panic("%s: unimpl", __func__);
    324 }
    325 
    326 
    327 /*
    328  * Check if we have been sleeping too long.  Basically,
    329  * our in-sc nextpkt must by first <= nextpkt <= last"+1".
    330  * We use the fact that first is guaranteed to never overlap
    331  * with the last frame in the ring.
    332  */
    333 static __inline bool
    334 stillvalid_p(struct shmif_sc *sc)
    335 {
    336 	struct shmif_mem *busmem = sc->sc_busmem;
    337 	unsigned gendiff = busmem->shm_gen - sc->sc_devgen;
    338 	uint32_t lastoff, devoff;
    339 
    340 	KASSERT(busmem->shm_first != busmem->shm_last);
    341 
    342 	/* normalize onto a 2x busmem chunk */
    343 	devoff = sc->sc_nextpacket;
    344 	lastoff = shmif_nextpktoff(busmem, busmem->shm_last);
    345 
    346 	/* trivial case */
    347 	if (gendiff > 1)
    348 		return false;
    349 	KASSERT(gendiff <= 1);
    350 
    351 	/* Normalize onto 2x busmem chunk */
    352 	if (busmem->shm_first >= lastoff) {
    353 		lastoff += BUSMEM_DATASIZE;
    354 		if (gendiff == 0)
    355 			devoff += BUSMEM_DATASIZE;
    356 	} else {
    357 		if (gendiff)
    358 			return false;
    359 	}
    360 
    361 	return devoff >= busmem->shm_first && devoff <= lastoff;
    362 }
    363 
    364 static void
    365 shmif_rcv(void *arg)
    366 {
    367 	struct ifnet *ifp = arg;
    368 	struct shmif_sc *sc = ifp->if_softc;
    369 	struct shmif_mem *busmem = sc->sc_busmem;
    370 	struct mbuf *m = NULL;
    371 	struct ether_header *eth;
    372 	uint32_t nextpkt;
    373 	bool wrap;
    374 	int error;
    375 
    376 	for (;;) {
    377 		struct shmif_pkthdr sp;
    378 
    379 		if (m == NULL) {
    380 			m = m_gethdr(M_WAIT, MT_DATA);
    381 			MCLGET(m, M_WAIT);
    382 		}
    383 
    384 		DPRINTF(("waiting %d/%d\n", sc->sc_nextpacket, sc->sc_devgen));
    385 		KASSERT(m->m_flags & M_EXT);
    386 
    387 		shmif_lockbus(busmem);
    388 		KASSERT(busmem->shm_magic == SHMIF_MAGIC);
    389 		KASSERT(busmem->shm_gen >= sc->sc_devgen);
    390 
    391 		/* need more data? */
    392 		if (sc->sc_devgen == busmem->shm_gen &&
    393 		    shmif_nextpktoff(busmem, busmem->shm_last)
    394 		     == sc->sc_nextpacket) {
    395 			shmif_unlockbus(busmem);
    396 			error = 0;
    397 			rumpuser_writewatchfile_wait(sc->sc_kq, NULL, &error);
    398 			if (__predict_false(error))
    399 				printf("shmif_rcv: wait failed %d\n", error);
    400 			continue;
    401 		}
    402 
    403 		if (stillvalid_p(sc)) {
    404 			nextpkt = sc->sc_nextpacket;
    405 		} else {
    406 			KASSERT(busmem->shm_gen > 0);
    407 			nextpkt = busmem->shm_first;
    408 			if (busmem->shm_first > busmem->shm_last)
    409 				sc->sc_devgen = busmem->shm_gen - 1;
    410 			else
    411 				sc->sc_devgen = busmem->shm_gen;
    412 			DPRINTF(("dev %p overrun, new data: %d/%d\n",
    413 			    sc, nextpkt, sc->sc_devgen));
    414 		}
    415 
    416 		/*
    417 		 * If our read pointer is ahead the bus last write, our
    418 		 * generation must be one behind.
    419 		 */
    420 		KASSERT(!(nextpkt > busmem->shm_last
    421 		    && sc->sc_devgen == busmem->shm_gen));
    422 
    423 		wrap = false;
    424 		nextpkt = shmif_busread(busmem, &sp,
    425 		    nextpkt, sizeof(sp), &wrap);
    426 		KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN);
    427 		nextpkt = shmif_busread(busmem, mtod(m, void *),
    428 		    nextpkt, sp.sp_len, &wrap);
    429 
    430 		DPRINTF(("shmif_rcv: read packet of length %d at %d\n",
    431 		    sp.sp_len, nextpkt));
    432 
    433 		sc->sc_nextpacket = nextpkt;
    434 		shmif_unlockbus(sc->sc_busmem);
    435 
    436 		if (wrap) {
    437 			sc->sc_devgen++;
    438 			DPRINTF(("dev %p generation now %d\n",
    439 			    sc, sc->sc_devgen));
    440 		}
    441 
    442 		m->m_len = m->m_pkthdr.len = sp.sp_len;
    443 		m->m_pkthdr.rcvif = ifp;
    444 
    445 		/* if it's from us, don't pass up and reuse storage space */
    446 		eth = mtod(m, struct ether_header *);
    447 		if (memcmp(eth->ether_shost, sc->sc_myaddr, 6) != 0) {
    448 			KERNEL_LOCK(1, NULL);
    449 			ifp->if_input(ifp, m);
    450 			KERNEL_UNLOCK_ONE(NULL);
    451 			m = NULL;
    452 		}
    453 	}
    454 
    455 	panic("shmif_worker is a lazy boy %d\n", error);
    456 }
    457