if_shmem.c revision 1.27 1 /* $NetBSD: if_shmem.c,v 1.27 2010/08/17 11:35:23 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by The Nokia Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.27 2010/08/17 11:35:23 pooka Exp $");
32
33 #include <sys/param.h>
34 #include <sys/atomic.h>
35 #include <sys/fcntl.h>
36 #include <sys/kmem.h>
37 #include <sys/kthread.h>
38 #include <sys/lock.h>
39 #include <sys/atomic.h>
40
41 #include <net/if.h>
42 #include <net/if_ether.h>
43
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46
47 #include <rump/rump.h>
48 #include <rump/rumpuser.h>
49
50 #include "rump_private.h"
51 #include "rump_net_private.h"
52
53 /*
54 * A virtual ethernet interface which uses shared memory from a
55 * memory mapped file as the bus.
56 */
57
58 static int shmif_init(struct ifnet *);
59 static int shmif_ioctl(struct ifnet *, u_long, void *);
60 static void shmif_start(struct ifnet *);
61 static void shmif_stop(struct ifnet *, int);
62
63 #include "shmifvar.h"
64
65 struct shmif_sc {
66 struct ethercom sc_ec;
67 uint8_t sc_myaddr[6];
68 struct shmif_mem *sc_busmem;
69 int sc_memfd;
70 int sc_kq;
71
72 uint64_t sc_devgen;
73 uint32_t sc_nextpacket;
74 };
75
76 static const uint32_t busversion = SHMIF_VERSION;
77
78 static void shmif_rcv(void *);
79
80 static uint32_t numif;
81
82 #define LOCK_UNLOCKED 0
83 #define LOCK_LOCKED 1
84 #define LOCK_COOLDOWN 1001
85
86 /*
87 * This locking needs work and will misbehave severely if:
88 * 1) the backing memory has to be paged in
89 * 2) some lockholder exits while holding the lock
90 */
91 static void
92 shmif_lockbus(struct shmif_mem *busmem)
93 {
94 int i = 0;
95
96 while (__predict_false(atomic_cas_32(&busmem->shm_lock,
97 LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) {
98 if (__predict_false(++i > LOCK_COOLDOWN)) {
99 uint64_t sec, nsec;
100 int error;
101
102 sec = 0;
103 nsec = 1000*1000; /* 1ms */
104 rumpuser_nanosleep(&sec, &nsec, &error);
105 i = 0;
106 }
107 continue;
108 }
109 membar_enter();
110 }
111
112 static void
113 shmif_unlockbus(struct shmif_mem *busmem)
114 {
115 unsigned int old;
116
117 membar_exit();
118 old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED);
119 KASSERT(old == LOCK_LOCKED);
120 }
121
122 int
123 rump_shmif_create(const char *path, int *ifnum)
124 {
125 struct shmif_sc *sc;
126 struct ifnet *ifp;
127 uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 };
128 uint32_t randnum;
129 unsigned mynum;
130 int error;
131
132 randnum = arc4random();
133 memcpy(&enaddr[2], &randnum, sizeof(randnum));
134 mynum = atomic_inc_uint_nv(&numif)-1;
135
136 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
137 ifp = &sc->sc_ec.ec_if;
138 memcpy(sc->sc_myaddr, enaddr, sizeof(enaddr));
139
140 sc->sc_memfd = rumpuser_open(path, O_RDWR | O_CREAT, &error);
141 if (sc->sc_memfd == -1)
142 goto fail;
143 sc->sc_busmem = rumpuser_filemmap(sc->sc_memfd, 0, BUSMEM_SIZE,
144 RUMPUSER_FILEMMAP_TRUNCATE | RUMPUSER_FILEMMAP_SHARED
145 | RUMPUSER_FILEMMAP_READ | RUMPUSER_FILEMMAP_WRITE, &error);
146 if (error)
147 goto fail;
148
149 if (sc->sc_busmem->shm_magic && sc->sc_busmem->shm_magic != SHMIF_MAGIC)
150 panic("bus is not magical");
151
152 shmif_lockbus(sc->sc_busmem);
153 /* we're first? initialize bus */
154 if (sc->sc_busmem->shm_magic == 0) {
155 sc->sc_busmem->shm_magic = SHMIF_MAGIC;
156 sc->sc_busmem->shm_first = BUSMEM_DATASIZE;
157 }
158
159 sc->sc_nextpacket = sc->sc_busmem->shm_last;
160 sc->sc_devgen = sc->sc_busmem->shm_gen;
161 shmif_unlockbus(sc->sc_busmem);
162
163 sc->sc_kq = rumpuser_writewatchfile_setup(-1, sc->sc_memfd, 0, &error);
164 if (sc->sc_kq == -1)
165 goto fail;
166
167 sprintf(ifp->if_xname, "shmif%d", mynum);
168 ifp->if_softc = sc;
169 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
170 ifp->if_init = shmif_init;
171 ifp->if_ioctl = shmif_ioctl;
172 ifp->if_start = shmif_start;
173 ifp->if_stop = shmif_stop;
174 ifp->if_mtu = ETHERMTU;
175
176 if_attach(ifp);
177 ether_ifattach(ifp, enaddr);
178
179 aprint_verbose("shmif%d: bus %s\n", mynum, path);
180 aprint_verbose("shmif%d: Ethernet address %s\n",
181 mynum, ether_sprintf(enaddr));
182
183 if (ifnum)
184 *ifnum = mynum;
185 return 0;
186
187 fail:
188 panic("rump_shmemif_create: fixme");
189 }
190
191 static int
192 shmif_init(struct ifnet *ifp)
193 {
194 int error = 0;
195
196 if (rump_threads) {
197 error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
198 shmif_rcv, ifp, NULL, "shmif");
199 } else {
200 printf("WARNING: threads not enabled, shmif NOT working\n");
201 }
202
203 ifp->if_flags |= IFF_RUNNING;
204 return error;
205 }
206
207 static int
208 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
209 {
210 int s, rv;
211
212 s = splnet();
213 rv = ether_ioctl(ifp, cmd, data);
214 if (rv == ENETRESET)
215 rv = 0;
216 splx(s);
217
218 return rv;
219 }
220
221 /* send everything in-context */
222 static void
223 shmif_start(struct ifnet *ifp)
224 {
225 struct shmif_sc *sc = ifp->if_softc;
226 struct shmif_mem *busmem = sc->sc_busmem;
227 struct mbuf *m, *m0;
228 uint32_t dataoff;
229 uint32_t pktsize, pktwrote;
230 bool wrote = false;
231 bool wrap;
232 int error;
233
234 ifp->if_flags |= IFF_OACTIVE;
235
236 for (;;) {
237 struct shmif_pkthdr sp;
238 struct timeval tv;
239
240 IF_DEQUEUE(&ifp->if_snd, m0);
241 if (m0 == NULL) {
242 break;
243 }
244
245 pktsize = 0;
246 for (m = m0; m != NULL; m = m->m_next) {
247 pktsize += m->m_len;
248 }
249 KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN);
250
251 getmicrouptime(&tv);
252 sp.sp_len = pktsize;
253 sp.sp_sec = tv.tv_sec;
254 sp.sp_usec = tv.tv_usec;
255
256 shmif_lockbus(busmem);
257 KASSERT(busmem->shm_magic == SHMIF_MAGIC);
258 busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last);
259
260 wrap = false;
261 dataoff = shmif_buswrite(busmem,
262 busmem->shm_last, &sp, sizeof(sp), &wrap);
263 pktwrote = 0;
264 for (m = m0; m != NULL; m = m->m_next) {
265 pktwrote += m->m_len;
266 dataoff = shmif_buswrite(busmem, dataoff,
267 mtod(m, void *), m->m_len, &wrap);
268 }
269 KASSERT(pktwrote == pktsize);
270 if (wrap) {
271 busmem->shm_gen++;
272 DPRINTF(("bus generation now %d\n", busmem->shm_gen));
273 }
274 shmif_unlockbus(busmem);
275
276 m_freem(m0);
277 wrote = true;
278
279 DPRINTF(("shmif_start: send %d bytes at off %d\n",
280 pktsize, busmem->shm_last));
281 }
282
283 ifp->if_flags &= ~IFF_OACTIVE;
284
285 /* wakeup */
286 if (wrote)
287 rumpuser_pwrite(sc->sc_memfd,
288 &busversion, sizeof(busversion), IFMEM_WAKEUP, &error);
289 }
290
291 static void
292 shmif_stop(struct ifnet *ifp, int disable)
293 {
294
295 panic("%s: unimpl", __func__);
296 }
297
298
299 /*
300 * Check if we have been sleeping too long. Basically,
301 * our in-sc nextpkt must by first <= nextpkt <= last"+1".
302 * We use the fact that first is guaranteed to never overlap
303 * with the last frame in the ring.
304 */
305 static __inline bool
306 stillvalid_p(struct shmif_sc *sc)
307 {
308 struct shmif_mem *busmem = sc->sc_busmem;
309 unsigned gendiff = busmem->shm_gen - sc->sc_devgen;
310 uint32_t lastoff, devoff;
311
312 KASSERT(busmem->shm_first != busmem->shm_last);
313
314 /* normalize onto a 2x busmem chunk */
315 devoff = sc->sc_nextpacket;
316 lastoff = shmif_nextpktoff(busmem, busmem->shm_last);
317
318 /* trivial case */
319 if (gendiff > 1)
320 return false;
321 KASSERT(gendiff <= 1);
322
323 /* Normalize onto 2x busmem chunk */
324 if (busmem->shm_first >= lastoff) {
325 lastoff += BUSMEM_DATASIZE;
326 if (gendiff == 0)
327 devoff += BUSMEM_DATASIZE;
328 } else {
329 if (gendiff)
330 return false;
331 }
332
333 return devoff >= busmem->shm_first && devoff <= lastoff;
334 }
335
336 static void
337 shmif_rcv(void *arg)
338 {
339 struct ifnet *ifp = arg;
340 struct shmif_sc *sc = ifp->if_softc;
341 struct shmif_mem *busmem = sc->sc_busmem;
342 struct mbuf *m = NULL;
343 struct ether_header *eth;
344 uint32_t nextpkt;
345 bool wrap;
346 int error;
347
348 for (;;) {
349 struct shmif_pkthdr sp;
350
351 if (m == NULL) {
352 m = m_gethdr(M_WAIT, MT_DATA);
353 MCLGET(m, M_WAIT);
354 }
355
356 DPRINTF(("waiting %d/%d\n", sc->sc_nextpacket, sc->sc_devgen));
357 KASSERT(m->m_flags & M_EXT);
358
359 shmif_lockbus(busmem);
360 KASSERT(busmem->shm_magic == SHMIF_MAGIC);
361 KASSERT(busmem->shm_gen >= sc->sc_devgen);
362
363 /* need more data? */
364 if (sc->sc_devgen == busmem->shm_gen &&
365 shmif_nextpktoff(busmem, busmem->shm_last)
366 == sc->sc_nextpacket) {
367 shmif_unlockbus(busmem);
368 error = 0;
369 rumpuser_writewatchfile_wait(sc->sc_kq, NULL, &error);
370 if (__predict_false(error))
371 printf("shmif_rcv: wait failed %d\n", error);
372 continue;
373 }
374
375 if (stillvalid_p(sc)) {
376 nextpkt = sc->sc_nextpacket;
377 } else {
378 KASSERT(busmem->shm_gen > 0);
379 nextpkt = busmem->shm_first;
380 if (busmem->shm_first > busmem->shm_last)
381 sc->sc_devgen = busmem->shm_gen - 1;
382 else
383 sc->sc_devgen = busmem->shm_gen;
384 DPRINTF(("dev %p overrun, new data: %d/%d\n",
385 sc, nextpkt, sc->sc_devgen));
386 }
387
388 /*
389 * If our read pointer is ahead the bus last write, our
390 * generation must be one behind.
391 */
392 KASSERT(!(nextpkt > busmem->shm_last
393 && sc->sc_devgen == busmem->shm_gen));
394
395 wrap = false;
396 nextpkt = shmif_busread(busmem, &sp,
397 nextpkt, sizeof(sp), &wrap);
398 KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN);
399 nextpkt = shmif_busread(busmem, mtod(m, void *),
400 nextpkt, sp.sp_len, &wrap);
401
402 DPRINTF(("shmif_rcv: read packet of length %d at %d\n",
403 sp.sp_len, nextpkt));
404
405 sc->sc_nextpacket = nextpkt;
406 shmif_unlockbus(sc->sc_busmem);
407
408 if (wrap) {
409 sc->sc_devgen++;
410 DPRINTF(("dev %p generation now %d\n",
411 sc, sc->sc_devgen));
412 }
413
414 m->m_len = m->m_pkthdr.len = sp.sp_len;
415 m->m_pkthdr.rcvif = ifp;
416
417 /* if it's from us, don't pass up and reuse storage space */
418 eth = mtod(m, struct ether_header *);
419 if (memcmp(eth->ether_shost, sc->sc_myaddr, 6) != 0) {
420 KERNEL_LOCK(1, NULL);
421 ifp->if_input(ifp, m);
422 KERNEL_UNLOCK_ONE(NULL);
423 m = NULL;
424 }
425 }
426
427 panic("shmif_worker is a lazy boy %d\n", error);
428 }
429