if_shmem.c revision 1.87 1 /* $NetBSD: if_shmem.c,v 1.87 2024/08/20 16:49:10 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2009, 2010 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by The Nokia Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.87 2024/08/20 16:49:10 riastradh Exp $");
32
33 #include <sys/param.h>
34 #include <sys/atomic.h>
35 #include <sys/fcntl.h>
36 #include <sys/kmem.h>
37 #include <sys/kthread.h>
38 #include <sys/lock.h>
39 #include <sys/vmem.h>
40 #include <sys/cprng.h>
41
42 #include <net/bpf.h>
43 #include <net/if.h>
44 #include <net/if_dl.h>
45 #include <net/if_ether.h>
46 #include <net/if_media.h>
47 #include <net/ether_sw_offload.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51
52 #include <rump-sys/kern.h>
53 #include <rump-sys/net.h>
54
55 #include <rump/rump.h>
56 #include <rump/rumpuser.h>
57
58 #include "shmif_user.h"
59
60 static int shmif_clone(struct if_clone *, int);
61 static int shmif_unclone(struct ifnet *);
62
63 static int shmif_mediachange(struct ifnet *);
64 static void shmif_mediastatus(struct ifnet *, struct ifmediareq *);
65
66 struct if_clone shmif_cloner =
67 IF_CLONE_INITIALIZER("shmif", shmif_clone, shmif_unclone);
68
69 /*
70 * Do r/w prefault for backend pages when attaching the interface.
71 * At least logically thinking improves performance (although no
72 * mlocking is done, so they might go away).
73 */
74 #define PREFAULT_RW
75
76 /*
77 * A virtual ethernet interface which uses shared memory from a
78 * memory mapped file as the bus.
79 */
80
81 static int shmif_init(struct ifnet *);
82 static int shmif_ioctl(struct ifnet *, u_long, void *);
83 static void shmif_start(struct ifnet *);
84 static void shmif_snd(struct ifnet *, struct mbuf *);
85 static void shmif_stop(struct ifnet *, int);
86
87 #include "shmifvar.h"
88
89 struct shmif_sc {
90 struct ethercom sc_ec;
91 struct ifmedia sc_im;
92 struct shmif_mem *sc_busmem;
93 int sc_memfd;
94 int sc_kq;
95 int sc_unit;
96
97 char *sc_backfile;
98 size_t sc_backfilelen;
99
100 uint64_t sc_devgen;
101 uint32_t sc_nextpacket;
102
103 kmutex_t sc_mtx;
104 kcondvar_t sc_cv;
105
106 struct lwp *sc_rcvl;
107 bool sc_dying;
108
109 uint64_t sc_uid;
110 };
111
112 static void shmif_rcv(void *);
113
114 #define LOCK_UNLOCKED 0
115 #define LOCK_LOCKED 1
116 #define LOCK_COOLDOWN 1001
117
118 vmem_t *shmif_units;
119
120 static void
121 dowakeup(struct shmif_sc *sc)
122 {
123 struct rumpuser_iovec iov;
124 uint32_t ver = SHMIF_VERSION;
125 size_t n;
126
127 iov.iov_base = &ver;
128 iov.iov_len = sizeof(ver);
129 rumpuser_iovwrite(sc->sc_memfd, &iov, 1, IFMEM_WAKEUP, &n);
130 }
131
132 /*
133 * This locking needs work and will misbehave severely if:
134 * 1) the backing memory has to be paged in
135 * 2) some lockholder exits while holding the lock
136 */
137 static void
138 shmif_lockbus(struct shmif_mem *busmem)
139 {
140 int i = 0;
141
142 while (__predict_false(atomic_cas_32(&busmem->shm_lock,
143 LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) {
144 if (__predict_false(++i > LOCK_COOLDOWN)) {
145 /* wait 1ms */
146 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL,
147 0, 1000*1000);
148 i = 0;
149 }
150 continue;
151 }
152 membar_acquire();
153 }
154
155 static void
156 shmif_unlockbus(struct shmif_mem *busmem)
157 {
158 unsigned int old __diagused;
159
160 membar_release();
161 old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED);
162 KASSERT(old == LOCK_LOCKED);
163 }
164
165 static int
166 allocif(int unit, struct shmif_sc **scp)
167 {
168 uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 };
169 struct shmif_sc *sc;
170 struct ifnet *ifp;
171 uint64_t randnum;
172 int error = 0;
173
174 randnum = cprng_strong64();
175 memcpy(&enaddr[2], &randnum, 4);
176
177 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
178 sc->sc_memfd = -1;
179 sc->sc_unit = unit;
180 sc->sc_uid = randnum;
181
182 ifp = &sc->sc_ec.ec_if;
183
184 ifmedia_init(&sc->sc_im, 0, shmif_mediachange, shmif_mediastatus);
185 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL);
186 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_NONE, 0, NULL);
187 ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO);
188
189 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "shmif%d", unit);
190 ifp->if_softc = sc;
191 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
192 ifp->if_init = shmif_init;
193 ifp->if_ioctl = shmif_ioctl;
194 ifp->if_start = shmif_start;
195 ifp->if_stop = shmif_stop;
196 ifp->if_mtu = ETHERMTU;
197 ifp->if_dlt = DLT_EN10MB;
198 ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6 |
199 IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
200 IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
201 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx |
202 IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx |
203 IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx;
204 IFQ_SET_READY(&ifp->if_snd);
205
206 mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE);
207 cv_init(&sc->sc_cv, "shmifcv");
208
209 if_initialize(ifp);
210 #if 1
211 char buf[256];
212
213 if (rumpuser_getparam("RUMP_SHMIF_CAPENABLE", buf, sizeof(buf)) == 0) {
214 uint64_t capen = strtoul(buf, NULL, 0);
215
216 ifp->if_capenable = capen & ifp->if_capabilities;
217 }
218 #endif
219
220 if_deferred_start_init(ifp, NULL);
221 ether_ifattach(ifp, enaddr);
222 if_register(ifp);
223
224 aprint_verbose("shmif%d: Ethernet address %s\n",
225 unit, ether_sprintf(enaddr));
226
227 if (scp)
228 *scp = sc;
229
230 if (rump_threads) {
231 error = kthread_create(PRI_NONE,
232 KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL,
233 shmif_rcv, ifp, &sc->sc_rcvl, "shmif");
234 } else {
235 printf("WARNING: threads not enabled, shmif NOT working\n");
236 }
237
238 if (error) {
239 shmif_unclone(ifp);
240 }
241
242 return 0;
243 }
244
245 static int
246 initbackend(struct shmif_sc *sc, int memfd)
247 {
248 volatile uint8_t v;
249 volatile uint8_t *p;
250 void *mem;
251 int error;
252
253 error = rumpcomp_shmif_mmap(memfd, BUSMEM_SIZE, &mem);
254 if (error)
255 return error;
256 sc->sc_busmem = mem;
257
258 if (sc->sc_busmem->shm_magic
259 && sc->sc_busmem->shm_magic != SHMIF_MAGIC) {
260 printf("bus is not magical");
261 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE);
262 return ENOEXEC;
263 }
264
265 /*
266 * Prefault in pages to minimize runtime penalty with buslock.
267 * Use 512 instead of PAGE_SIZE to make sure we catch cases where
268 * rump kernel PAGE_SIZE > host page size.
269 */
270 for (p = (uint8_t *)sc->sc_busmem;
271 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
272 p += 512)
273 v = *p;
274
275 shmif_lockbus(sc->sc_busmem);
276 /* we're first? initialize bus */
277 if (sc->sc_busmem->shm_magic == 0) {
278 sc->sc_busmem->shm_magic = SHMIF_MAGIC;
279 sc->sc_busmem->shm_first = BUSMEM_DATASIZE;
280 }
281
282 sc->sc_nextpacket = sc->sc_busmem->shm_last;
283 sc->sc_devgen = sc->sc_busmem->shm_gen;
284
285 #ifdef PREFAULT_RW
286 for (p = (uint8_t *)sc->sc_busmem;
287 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
288 p += PAGE_SIZE) {
289 v = *p;
290 *p = v;
291 }
292 #endif
293 shmif_unlockbus(sc->sc_busmem);
294
295 sc->sc_kq = -1;
296 error = rumpcomp_shmif_watchsetup(&sc->sc_kq, memfd);
297 if (error) {
298 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE);
299 return error;
300 }
301
302 sc->sc_memfd = memfd;
303
304 return error;
305 }
306
307 static void
308 finibackend(struct shmif_sc *sc)
309 {
310
311 if (sc->sc_backfile == NULL)
312 return;
313
314 if (sc->sc_backfile) {
315 kmem_free(sc->sc_backfile, sc->sc_backfilelen);
316 sc->sc_backfile = NULL;
317 sc->sc_backfilelen = 0;
318 }
319
320 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE);
321 rumpuser_close(sc->sc_memfd);
322 rumpuser_close(sc->sc_kq);
323
324 sc->sc_memfd = -1;
325 }
326
327 int
328 rump_shmif_create(const char *path, int *ifnum)
329 {
330 struct shmif_sc *sc;
331 vmem_addr_t t;
332 int unit, error;
333 int memfd = -1; /* XXXgcc */
334
335 if (path) {
336 error = rumpuser_open(path,
337 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd);
338 if (error)
339 return error;
340 }
341
342 error = vmem_xalloc(shmif_units, 1, 0, 0, 0,
343 VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_INSTANTFIT | VM_SLEEP, &t);
344
345 if (error != 0) {
346 if (path)
347 rumpuser_close(memfd);
348 return error;
349 }
350
351 unit = t - 1;
352
353 if ((error = allocif(unit, &sc)) != 0) {
354 if (path)
355 rumpuser_close(memfd);
356 return error;
357 }
358
359 if (!path)
360 goto out;
361
362 error = initbackend(sc, memfd);
363 if (error) {
364 shmif_unclone(&sc->sc_ec.ec_if);
365 return error;
366 }
367
368 sc->sc_backfilelen = strlen(path)+1;
369 sc->sc_backfile = kmem_alloc(sc->sc_backfilelen, KM_SLEEP);
370 strcpy(sc->sc_backfile, path);
371
372 out:
373 if (ifnum)
374 *ifnum = unit;
375
376 return 0;
377 }
378
379 static int
380 shmif_clone(struct if_clone *ifc, int unit)
381 {
382 int rc __diagused;
383 vmem_addr_t unit2;
384
385 /*
386 * Ok, we know the unit number, but we must still reserve it.
387 * Otherwise the wildcard-side of things might get the same one.
388 * This is slightly offset-happy due to vmem. First, we offset
389 * the range of unit numbers by +1 since vmem cannot deal with
390 * ranges starting from 0. Talk about uuuh.
391 */
392 rc = vmem_xalloc(shmif_units, 1, 0, 0, 0, unit+1, unit+1,
393 VM_SLEEP | VM_INSTANTFIT, &unit2);
394 KASSERT(rc == 0 && unit2-1 == unit);
395
396 return allocif(unit, NULL);
397 }
398
399 static int
400 shmif_unclone(struct ifnet *ifp)
401 {
402 struct shmif_sc *sc = ifp->if_softc;
403
404 shmif_stop(ifp, 1);
405 if_down(ifp);
406
407 mutex_enter(&sc->sc_mtx);
408 sc->sc_dying = true;
409 cv_broadcast(&sc->sc_cv);
410 mutex_exit(&sc->sc_mtx);
411
412 if (sc->sc_rcvl)
413 kthread_join(sc->sc_rcvl);
414 sc->sc_rcvl = NULL;
415
416 /*
417 * Need to be called after the kthread left, otherwise closing kqueue
418 * (sc_kq) hangs sometimes perhaps because of a race condition between
419 * close and kevent in the kthread on the kqueue.
420 */
421 finibackend(sc);
422
423 vmem_xfree(shmif_units, sc->sc_unit+1, 1);
424
425 ether_ifdetach(ifp);
426 if_detach(ifp);
427
428 cv_destroy(&sc->sc_cv);
429 mutex_destroy(&sc->sc_mtx);
430
431 kmem_free(sc, sizeof(*sc));
432
433 return 0;
434 }
435
436 static int
437 shmif_init(struct ifnet *ifp)
438 {
439 struct shmif_sc *sc = ifp->if_softc;
440 int error = 0;
441
442 if (sc->sc_memfd == -1)
443 return ENXIO;
444 KASSERT(sc->sc_busmem);
445
446 ifp->if_flags |= IFF_RUNNING;
447
448 mutex_enter(&sc->sc_mtx);
449 sc->sc_nextpacket = sc->sc_busmem->shm_last;
450 sc->sc_devgen = sc->sc_busmem->shm_gen;
451
452 cv_broadcast(&sc->sc_cv);
453 mutex_exit(&sc->sc_mtx);
454
455 return error;
456 }
457
458 static int
459 shmif_mediachange(struct ifnet *ifp)
460 {
461 struct shmif_sc *sc = ifp->if_softc;
462
463 if (IFM_SUBTYPE(sc->sc_im.ifm_cur->ifm_media) == IFM_NONE &&
464 ifp->if_link_state != LINK_STATE_DOWN) {
465 if_link_state_change(ifp, LINK_STATE_DOWN);
466 } else if (IFM_SUBTYPE(sc->sc_im.ifm_cur->ifm_media) == IFM_AUTO &&
467 ifp->if_link_state != LINK_STATE_UP) {
468 if_link_state_change(ifp, LINK_STATE_UP);
469 }
470 return 0;
471 }
472
473 static void
474 shmif_mediastatus(struct ifnet *ifp, struct ifmediareq *imr)
475 {
476 struct shmif_sc *sc = ifp->if_softc;
477 imr->ifm_active = sc->sc_im.ifm_cur->ifm_media;
478 }
479
480 static int
481 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
482 {
483 struct shmif_sc *sc = ifp->if_softc;
484 struct ifdrv *ifd;
485 char *path;
486 int s, rv, memfd;
487
488 s = splnet();
489 switch (cmd) {
490 case SIOCGLINKSTR:
491 ifd = data;
492
493 if (sc->sc_backfilelen == 0) {
494 rv = ENOENT;
495 break;
496 }
497
498 ifd->ifd_len = sc->sc_backfilelen;
499 if (ifd->ifd_cmd == IFLINKSTR_QUERYLEN) {
500 rv = 0;
501 break;
502 }
503
504 if (ifd->ifd_cmd != 0) {
505 rv = EINVAL;
506 break;
507 }
508
509 rv = copyoutstr(sc->sc_backfile, ifd->ifd_data,
510 MIN(sc->sc_backfilelen, ifd->ifd_len), NULL);
511 break;
512 case SIOCSLINKSTR:
513 if (ifp->if_flags & IFF_UP) {
514 rv = EBUSY;
515 break;
516 }
517
518 ifd = data;
519 if (ifd->ifd_cmd == IFLINKSTR_UNSET) {
520 finibackend(sc);
521 /* Back to the default just in case */
522 ifp->if_link_state = LINK_STATE_UNKNOWN;
523 rv = 0;
524 break;
525 } else if (ifd->ifd_cmd != 0) {
526 rv = EINVAL;
527 break;
528 } else if (sc->sc_backfile) {
529 rv = EBUSY;
530 break;
531 }
532
533 if (ifd->ifd_len > MAXPATHLEN) {
534 rv = E2BIG;
535 break;
536 } else if (ifd->ifd_len < 1) {
537 rv = EINVAL;
538 break;
539 }
540
541 path = kmem_alloc(ifd->ifd_len, KM_SLEEP);
542 rv = copyinstr(ifd->ifd_data, path, ifd->ifd_len, NULL);
543 if (rv) {
544 kmem_free(path, ifd->ifd_len);
545 break;
546 }
547 rv = rumpuser_open(path,
548 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd);
549 if (rv) {
550 kmem_free(path, ifd->ifd_len);
551 break;
552 }
553 rv = initbackend(sc, memfd);
554 if (rv) {
555 kmem_free(path, ifd->ifd_len);
556 rumpuser_close(memfd);
557 break;
558 }
559 sc->sc_backfile = path;
560 sc->sc_backfilelen = ifd->ifd_len;
561
562 if_link_state_change(ifp, LINK_STATE_UP);
563 break;
564
565 #ifdef OSIOCSIFMEDIA
566 case OSIOCSIFMEDIA:
567 #endif
568 case SIOCSIFMEDIA:
569 case SIOCGIFMEDIA:
570 rv = ifmedia_ioctl(ifp, data, &sc->sc_im, cmd);
571 break;
572
573 default:
574 rv = ether_ioctl(ifp, cmd, data);
575 if (rv == ENETRESET)
576 rv = 0;
577 break;
578 }
579 splx(s);
580
581 return rv;
582 }
583
584 static void
585 shmif_start(struct ifnet *ifp)
586 {
587 struct shmif_sc *sc = ifp->if_softc;
588 struct mbuf *m, *n;
589 bool wrote = false;
590
591 ifp->if_flags |= IFF_OACTIVE;
592
593 for (;;) {
594 IFQ_DEQUEUE(&ifp->if_snd, m);
595 if (m == NULL)
596 break;
597
598 m = ether_sw_offload_tx(ifp, m);
599 if (m == NULL) {
600 if_statinc(ifp, if_oerrors);
601 break;
602 }
603
604 do {
605 n = m->m_nextpkt;
606 shmif_snd(ifp, m);
607 m = n;
608 } while (m != NULL);
609
610 wrote = true;
611 }
612
613 ifp->if_flags &= ~IFF_OACTIVE;
614
615 /* wakeup? */
616 if (wrote) {
617 dowakeup(sc);
618 }
619 }
620
621 /* send everything in-context since it's just a matter of mem-to-mem copy */
622 static void
623 shmif_snd(struct ifnet *ifp, struct mbuf *m0)
624 {
625 struct shmif_sc *sc = ifp->if_softc;
626 struct shmif_mem *busmem = sc->sc_busmem;
627 struct shmif_pkthdr sp;
628 struct timeval tv;
629 struct mbuf *m;
630 uint32_t dataoff;
631 uint32_t pktsize, pktwrote;
632 bool wrap;
633
634 pktsize = 0;
635 for (m = m0; m != NULL; m = m->m_next) {
636 pktsize += m->m_len;
637 }
638 KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN);
639
640 getmicrouptime(&tv);
641 sp.sp_len = pktsize;
642 sp.sp_sec = tv.tv_sec;
643 sp.sp_usec = tv.tv_usec;
644 sp.sp_sender = sc->sc_uid;
645
646 bpf_mtap(ifp, m0, BPF_D_OUT);
647
648 /*
649 * Compare with DOWN to allow UNKNOWN (the default value),
650 * which is required by some ATF tests using rump servers
651 * written in C.
652 */
653 if (ifp->if_link_state == LINK_STATE_DOWN)
654 goto dontsend;
655
656 shmif_lockbus(busmem);
657 KASSERT(busmem->shm_magic == SHMIF_MAGIC);
658 busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last);
659
660 wrap = false;
661 dataoff =
662 shmif_buswrite(busmem, busmem->shm_last, &sp, sizeof(sp), &wrap);
663 pktwrote = 0;
664 for (m = m0; m != NULL; m = m->m_next) {
665 pktwrote += m->m_len;
666 dataoff = shmif_buswrite(busmem, dataoff, mtod(m, void *),
667 m->m_len, &wrap);
668 }
669 KASSERT(pktwrote == pktsize);
670 if (wrap) {
671 busmem->shm_gen++;
672 DPRINTF(("bus generation now %" PRIu64 "\n", busmem->shm_gen));
673 }
674 shmif_unlockbus(busmem);
675
676 dontsend:
677 m_freem(m0);
678 if_statinc(ifp, if_opackets);
679
680 DPRINTF(("shmif_start: send %d bytes at off %d\n", pktsize,
681 busmem->shm_last));
682 }
683
684 static void
685 shmif_stop(struct ifnet *ifp, int disable)
686 {
687 struct shmif_sc *sc = ifp->if_softc;
688
689 ifp->if_flags &= ~IFF_RUNNING;
690 membar_producer();
691
692 /*
693 * wakeup thread. this will of course wake up all bus
694 * listeners, but that's life.
695 */
696 if (sc->sc_memfd != -1) {
697 dowakeup(sc);
698 }
699 }
700
701
702 /*
703 * Check if we have been sleeping too long. Basically,
704 * our in-sc nextpkt must by first <= nextpkt <= last"+1".
705 * We use the fact that first is guaranteed to never overlap
706 * with the last frame in the ring.
707 */
708 static __inline bool
709 stillvalid_p(struct shmif_sc *sc)
710 {
711 struct shmif_mem *busmem = sc->sc_busmem;
712 unsigned gendiff = busmem->shm_gen - sc->sc_devgen;
713 uint32_t lastoff, devoff;
714
715 KASSERT(busmem->shm_first != busmem->shm_last);
716
717 /* normalize onto a 2x busmem chunk */
718 devoff = sc->sc_nextpacket;
719 lastoff = shmif_nextpktoff(busmem, busmem->shm_last);
720
721 /* trivial case */
722 if (gendiff > 1)
723 return false;
724 KASSERT(gendiff <= 1);
725
726 /* Normalize onto 2x busmem chunk */
727 if (busmem->shm_first >= lastoff) {
728 lastoff += BUSMEM_DATASIZE;
729 if (gendiff == 0)
730 devoff += BUSMEM_DATASIZE;
731 } else {
732 if (gendiff)
733 return false;
734 }
735
736 return devoff >= busmem->shm_first && devoff <= lastoff;
737 }
738
739 static void
740 shmif_rcv(void *arg)
741 {
742 struct ifnet *ifp = arg;
743 struct shmif_sc *sc = ifp->if_softc;
744 struct shmif_mem *busmem;
745 struct mbuf *m = NULL;
746 struct ether_header *eth;
747 uint32_t nextpkt;
748 bool wrap, passup;
749 int error;
750 const int align
751 = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header);
752
753 reup:
754 mutex_enter(&sc->sc_mtx);
755 while ((ifp->if_flags & IFF_RUNNING) == 0 && !sc->sc_dying)
756 cv_wait(&sc->sc_cv, &sc->sc_mtx);
757 mutex_exit(&sc->sc_mtx);
758
759 busmem = sc->sc_busmem;
760
761 while (ifp->if_flags & IFF_RUNNING) {
762 struct shmif_pkthdr sp;
763
764 if (m == NULL) {
765 m = m_gethdr(M_WAIT, MT_DATA);
766 MCLGET(m, M_WAIT);
767 m->m_data += align;
768 }
769
770 DPRINTF(("waiting %d/%" PRIu64 "\n",
771 sc->sc_nextpacket, sc->sc_devgen));
772 KASSERT(m->m_flags & M_EXT);
773
774 shmif_lockbus(busmem);
775 KASSERT(busmem->shm_magic == SHMIF_MAGIC);
776 KASSERT(busmem->shm_gen >= sc->sc_devgen);
777
778 /* need more data? */
779 if (sc->sc_devgen == busmem->shm_gen &&
780 shmif_nextpktoff(busmem, busmem->shm_last)
781 == sc->sc_nextpacket) {
782 shmif_unlockbus(busmem);
783 error = rumpcomp_shmif_watchwait(sc->sc_kq);
784 if (__predict_false(error))
785 printf("shmif_rcv: wait failed %d\n", error);
786 membar_consumer();
787 continue;
788 }
789
790 if (stillvalid_p(sc)) {
791 nextpkt = sc->sc_nextpacket;
792 } else {
793 KASSERT(busmem->shm_gen > 0);
794 nextpkt = busmem->shm_first;
795 if (busmem->shm_first > busmem->shm_last)
796 sc->sc_devgen = busmem->shm_gen - 1;
797 else
798 sc->sc_devgen = busmem->shm_gen;
799 DPRINTF(("dev %p overrun, new data: %d/%" PRIu64 "\n",
800 sc, nextpkt, sc->sc_devgen));
801 }
802
803 /*
804 * If our read pointer is ahead the bus last write, our
805 * generation must be one behind.
806 */
807 KASSERT(!(nextpkt > busmem->shm_last
808 && sc->sc_devgen == busmem->shm_gen));
809
810 wrap = false;
811 nextpkt = shmif_busread(busmem, &sp,
812 nextpkt, sizeof(sp), &wrap);
813 KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN);
814 nextpkt = shmif_busread(busmem, mtod(m, void *),
815 nextpkt, sp.sp_len, &wrap);
816
817 DPRINTF(("shmif_rcv: read packet of length %d at %d\n",
818 sp.sp_len, nextpkt));
819
820 sc->sc_nextpacket = nextpkt;
821 shmif_unlockbus(sc->sc_busmem);
822
823 if (wrap) {
824 sc->sc_devgen++;
825 DPRINTF(("dev %p generation now %" PRIu64 "\n",
826 sc, sc->sc_devgen));
827 }
828
829 /*
830 * Ignore packets too short to possibly be valid.
831 * This is hit at least for the first frame on a new bus.
832 */
833 if (__predict_false(sp.sp_len < ETHER_HDR_LEN)) {
834 DPRINTF(("shmif read packet len %d < ETHER_HDR_LEN\n",
835 sp.sp_len));
836 continue;
837 }
838
839 m->m_len = m->m_pkthdr.len = sp.sp_len;
840 m_set_rcvif(m, ifp);
841
842 /*
843 * Test if we want to pass the packet upwards
844 */
845 eth = mtod(m, struct ether_header *);
846 /*
847 * Compare with DOWN to allow UNKNOWN (the default value),
848 * which is required by some ATF tests using rump servers
849 * written in C.
850 */
851 if (ifp->if_link_state == LINK_STATE_DOWN) {
852 passup = false;
853 } else if (sp.sp_sender == sc->sc_uid) {
854 passup = false;
855 } else if (memcmp(eth->ether_dhost, CLLADDR(ifp->if_sadl),
856 ETHER_ADDR_LEN) == 0) {
857 passup = true;
858 } else if (ETHER_IS_MULTICAST(eth->ether_dhost)) {
859 passup = true;
860 } else if (ifp->if_flags & IFF_PROMISC) {
861 m->m_flags |= M_PROMISC;
862 passup = true;
863 } else {
864 passup = false;
865 }
866
867 if (passup) {
868 int bound;
869
870 m = ether_sw_offload_rx(ifp, m);
871
872 KERNEL_LOCK(1, NULL);
873 /* Prevent LWP migrations between CPUs for psref(9) */
874 bound = curlwp_bind();
875 if_input(ifp, m);
876 curlwp_bindx(bound);
877 KERNEL_UNLOCK_ONE(NULL);
878
879 m = NULL;
880 }
881 /* else: reuse mbuf for a future packet */
882 }
883 m_freem(m);
884 m = NULL;
885
886 if (!sc->sc_dying)
887 goto reup;
888
889 kthread_exit(0);
890 }
891