1 /* $NetBSD: if_xennet_xenbus.c,v 1.132 2025/08/28 05:59:16 mrg Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Copyright (c) 2004 Christian Limpach. 29 * All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 41 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 43 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 44 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 47 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 48 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 49 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 */ 51 52 /* 53 * This file contains the xennet frontend code required for the network 54 * communication between two Xen domains. 55 * It ressembles xbd, but is a little more complex as it must deal with two 56 * rings: 57 * - the TX ring, to transmit packets to backend (inside => outside) 58 * - the RX ring, to receive packets from backend (outside => inside) 59 * 60 * Principles are following. 61 * 62 * For TX: 63 * Purpose is to transmit packets to the outside. The start of day is in 64 * xennet_start() (output routine of xennet) scheduled via a softint. 65 * xennet_start() generates the requests associated 66 * to the TX mbufs queued (see altq(9)). 67 * The backend's responses are processed by xennet_tx_complete(), called 68 * from xennet_start() 69 * 70 * for RX: 71 * Purpose is to process the packets received from the outside. RX buffers 72 * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf 73 * attach. During pre-allocation, frontend pushes requests in the I/O ring, in 74 * preparation for incoming packets from backend. 75 * When RX packets need to be processed, backend takes the requests previously 76 * offered by frontend and pushes the associated responses inside the I/O ring. 77 * When done, it notifies frontend through an event notification, which will 78 * asynchronously call xennet_handler() in frontend. 79 * xennet_handler() processes the responses, generates the associated mbuf, and 80 * passes it to the MI layer for further processing. 81 */ 82 83 #include <sys/cdefs.h> 84 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.132 2025/08/28 05:59:16 mrg Exp $"); 85 86 #include "opt_xen.h" 87 #include "opt_nfs_boot.h" 88 #include "opt_net_mpsafe.h" 89 90 #include <sys/param.h> 91 #include <sys/device.h> 92 #include <sys/conf.h> 93 #include <sys/kernel.h> 94 #include <sys/proc.h> 95 #include <sys/sysctl.h> 96 #include <sys/systm.h> 97 #include <sys/intr.h> 98 #include <sys/rndsource.h> 99 100 #include <net/if.h> 101 #include <net/if_dl.h> 102 #include <net/if_ether.h> 103 #include <net/bpf.h> 104 105 #if defined(NFS_BOOT_BOOTSTATIC) 106 #include <sys/fstypes.h> 107 #include <sys/mount.h> 108 #include <sys/statvfs.h> 109 #include <netinet/in.h> 110 #include <nfs/rpcv2.h> 111 #include <nfs/nfsproto.h> 112 #include <nfs/nfs.h> 113 #include <nfs/nfsmount.h> 114 #include <nfs/nfsdiskless.h> 115 #include <xen/if_xennetvar.h> 116 #endif /* defined(NFS_BOOT_BOOTSTATIC) */ 117 118 #include <xen/xennet_checksum.h> 119 120 #include <uvm/uvm.h> 121 122 #include <xen/intr.h> 123 #include <xen/hypervisor.h> 124 #include <xen/evtchn.h> 125 #include <xen/granttables.h> 126 #include <xen/include/public/io/netif.h> 127 #include <xen/xenpmap.h> 128 129 #include <xen/xenbus.h> 130 #include "locators.h" 131 132 #undef XENNET_DEBUG_DUMP 133 #undef XENNET_DEBUG 134 135 #ifdef XENNET_DEBUG 136 #define XEDB_FOLLOW 0x01 137 #define XEDB_INIT 0x02 138 #define XEDB_EVENT 0x04 139 #define XEDB_MBUF 0x08 140 #define XEDB_MEM 0x10 141 int xennet_debug = 0xff; 142 #define DPRINTF(x) if (xennet_debug) printf x; 143 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x; 144 #else 145 #define DPRINTF(x) 146 #define DPRINTFN(n,x) 147 #endif 148 149 #define GRANT_INVALID_REF -1 /* entry is free */ 150 151 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) 152 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) 153 154 struct xennet_txreq { 155 SLIST_ENTRY(xennet_txreq) txreq_next; 156 uint16_t txreq_id; /* ID passed to backend */ 157 grant_ref_t txreq_gntref; /* grant ref of this request */ 158 struct mbuf *txreq_m; /* mbuf being transmitted */ 159 bus_dmamap_t txreq_dmamap; 160 }; 161 162 struct xennet_rxreq { 163 SLIST_ENTRY(xennet_rxreq) rxreq_next; 164 uint16_t rxreq_id; /* ID passed to backend */ 165 grant_ref_t rxreq_gntref; /* grant ref of this request */ 166 struct mbuf *rxreq_m; 167 bus_dmamap_t rxreq_dmamap; 168 }; 169 170 struct xennet_xenbus_softc { 171 device_t sc_dev; 172 struct ethercom sc_ethercom; 173 uint8_t sc_enaddr[ETHER_ADDR_LEN]; 174 struct xenbus_device *sc_xbusd; 175 176 netif_tx_front_ring_t sc_tx_ring; 177 netif_rx_front_ring_t sc_rx_ring; 178 179 unsigned int sc_evtchn; 180 struct intrhand *sc_ih; 181 182 grant_ref_t sc_tx_ring_gntref; 183 grant_ref_t sc_rx_ring_gntref; 184 185 kmutex_t sc_tx_lock; /* protects free TX list, TX ring */ 186 kmutex_t sc_rx_lock; /* protects free RX list, RX ring, rxreql */ 187 struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE]; 188 struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE]; 189 SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */ 190 SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */ 191 int sc_free_txreql; /* number of free transmit request structs */ 192 int sc_free_rxreql; /* number of free receive request structs */ 193 194 int sc_backend_status; /* our status with backend */ 195 #define BEST_CLOSED 0 196 #define BEST_DISCONNECTED 1 197 #define BEST_CONNECTED 2 198 #define BEST_SUSPENDED 3 199 int sc_features; 200 #define FEATURE_IPV6CSUM 0x01 /* IPv6 checksum offload */ 201 #define FEATURE_SG 0x02 /* scatter-gatter */ 202 #define FEATURE_RX_COPY 0x04 /* RX-copy */ 203 #define FEATURE_BITS "\20\1IPV6-CSUM\2SG\3RX-COPY" 204 krndsource_t sc_rnd_source; 205 struct evcnt sc_cnt_tx_defrag; 206 struct evcnt sc_cnt_tx_queue_full; 207 struct evcnt sc_cnt_tx_drop; 208 struct evcnt sc_cnt_tx_frag; 209 struct evcnt sc_cnt_rx_frag; 210 struct evcnt sc_cnt_rx_cksum_blank; 211 struct evcnt sc_cnt_rx_cksum_undefer; 212 }; 213 214 215 /* 216 * infrastructure for the sysctl variable: hw.xennet.xnfrx_lowat 217 */ 218 219 static struct sysctllog *xennet_log; 220 static int xennet_xnfrx_nodenum; 221 222 223 #ifndef XENNET_XNFRX_LOWAT 224 #define XENNET_XNFRX_LOWAT 0 /* HOW FEW XNFRX BUFS SHOULD WE KEEP? */ 225 #endif 226 227 #define IF_XNFRX_LOWAT_MAX 128 /* Maximum minum of xnfrx buffers */ 228 229 static int if_xnfrx_lowat = XENNET_XNFRX_LOWAT; 230 231 static pool_cache_t if_xennetrxbuf_cache; 232 static int if_xennetrxbuf_cache_inited = 0; 233 234 static int xennet_xenbus_match(device_t, cfdata_t, void *); 235 static void xennet_xenbus_attach(device_t, device_t, void *); 236 static int xennet_xenbus_detach(device_t, int); 237 static void xennet_backend_changed(void *, XenbusState); 238 239 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *); 240 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *, bool); 241 static void xennet_tx_complete(struct xennet_xenbus_softc *); 242 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *); 243 static int xennet_handler(void *); 244 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *); 245 #ifdef XENNET_DEBUG_DUMP 246 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int); 247 #endif 248 249 static void xennet_sysctl_init(struct xennet_xenbus_softc *); 250 static int xennet_sysctl_verify(SYSCTLFN_PROTO); 251 static int xennet_init(struct ifnet *); 252 static void xennet_stop(struct ifnet *, int); 253 static void xennet_start(struct ifnet *); 254 static int xennet_ioctl(struct ifnet *, u_long, void *); 255 256 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *); 257 static bool xennet_xenbus_resume(device_t dev, const pmf_qual_t *); 258 259 CFATTACH_DECL3_NEW(xennet, sizeof(struct xennet_xenbus_softc), 260 xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL, 261 NULL, NULL, DVF_DETACH_SHUTDOWN); 262 263 static int 264 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux) 265 { 266 struct xenbusdev_attach_args *xa = aux; 267 268 if (strcmp(xa->xa_type, "vif") != 0) 269 return 0; 270 271 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT && 272 match->cf_loc[XENBUSCF_ID] != xa->xa_id) 273 return 0; 274 275 return 1; 276 } 277 278 static void 279 xennet_xenbus_attach(device_t parent, device_t self, void *aux) 280 { 281 struct xennet_xenbus_softc *sc = device_private(self); 282 struct xenbusdev_attach_args *xa = aux; 283 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 284 int err; 285 netif_tx_sring_t *tx_ring; 286 netif_rx_sring_t *rx_ring; 287 RING_IDX i; 288 char *e, *p; 289 unsigned long uval; 290 extern int ifqmaxlen; /* XXX */ 291 char mac[32]; 292 char buf[64]; 293 bus_size_t maxsz; 294 int nsegs; 295 296 aprint_normal(": Xen Virtual Network Interface\n"); 297 sc->sc_dev = self; 298 299 sc->sc_xbusd = xa->xa_xbusd; 300 sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed; 301 302 /* read feature support flags */ 303 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 304 "feature-rx-copy", &uval, 10); 305 if (!err && uval == 1) 306 sc->sc_features |= FEATURE_RX_COPY; 307 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 308 "feature-ipv6-csum-offload", &uval, 10); 309 if (!err && uval == 1) 310 sc->sc_features |= FEATURE_IPV6CSUM; 311 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 312 "feature-sg", &uval, 10); 313 if (!err && uval == 1) 314 sc->sc_features |= FEATURE_SG; 315 snprintb(buf, sizeof(buf), FEATURE_BITS, sc->sc_features); 316 aprint_normal_dev(sc->sc_dev, "backend features %s\n", buf); 317 318 /* xenbus ensure 2 devices can't be probed at the same time */ 319 if (if_xennetrxbuf_cache_inited == 0) { 320 if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0, 321 "xnfrx", NULL, IPL_NET, NULL, NULL, NULL); 322 if (if_xnfrx_lowat) { 323 pool_cache_setlowat(if_xennetrxbuf_cache, if_xnfrx_lowat); 324 } 325 xennet_sysctl_init(sc); 326 if_xennetrxbuf_cache_inited = 1; 327 } 328 329 /* initialize free RX and RX request lists */ 330 if (sc->sc_features & FEATURE_SG) { 331 maxsz = ETHER_MAX_LEN_JUMBO; 332 /* 333 * Linux netback drops the packet if the request has more 334 * segments than XEN_NETIF_NR_SLOTS_MIN (== 18). With 2KB 335 * MCLBYTES this means maximum packet size 36KB, in reality 336 * less due to mbuf chain fragmentation. 337 */ 338 nsegs = XEN_NETIF_NR_SLOTS_MIN; 339 } else { 340 maxsz = PAGE_SIZE; 341 nsegs = 1; 342 } 343 mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET); 344 SLIST_INIT(&sc->sc_txreq_head); 345 for (i = 0; i < NET_TX_RING_SIZE; i++) { 346 struct xennet_txreq *txreq = &sc->sc_txreqs[i]; 347 348 txreq->txreq_id = i; 349 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs, 350 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 351 &txreq->txreq_dmamap) != 0) 352 break; 353 354 SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i], 355 txreq_next); 356 } 357 sc->sc_free_txreql = i; 358 359 mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET); 360 SLIST_INIT(&sc->sc_rxreq_head); 361 for (i = 0; i < NET_RX_RING_SIZE; i++) { 362 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i]; 363 rxreq->rxreq_id = i; 364 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs, 365 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 366 &rxreq->rxreq_dmamap) != 0) 367 break; 368 rxreq->rxreq_gntref = GRANT_INVALID_REF; 369 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next); 370 } 371 sc->sc_free_rxreql = i; 372 if (sc->sc_free_rxreql == 0) { 373 aprint_error_dev(self, "failed to allocate rx memory\n"); 374 return; 375 } 376 377 /* read mac address */ 378 err = xenbus_read(NULL, sc->sc_xbusd->xbusd_path, "mac", 379 mac, sizeof(mac)); 380 if (err) { 381 aprint_error_dev(self, "can't read mac address, err %d\n", err); 382 return; 383 } 384 for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) { 385 sc->sc_enaddr[i] = strtoul(p, &e, 16); 386 if ((e[0] == '\0' && i != 5) && e[0] != ':') { 387 aprint_error_dev(self, 388 "%s is not a valid mac address\n", mac); 389 return; 390 } 391 p = &e[1]; 392 } 393 aprint_normal_dev(self, "MAC address %s\n", 394 ether_sprintf(sc->sc_enaddr)); 395 396 /* Initialize ifnet structure and attach interface */ 397 strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ); 398 sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU; 399 if (sc->sc_features & FEATURE_SG) 400 sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU; 401 ifp->if_softc = sc; 402 ifp->if_start = xennet_start; 403 ifp->if_ioctl = xennet_ioctl; 404 ifp->if_init = xennet_init; 405 ifp->if_stop = xennet_stop; 406 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 407 ifp->if_extflags = IFEF_MPSAFE; 408 ifp->if_snd.ifq_maxlen = uimax(ifqmaxlen, NET_TX_RING_SIZE * 2); 409 ifp->if_capabilities = 410 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx 411 | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx 412 | IFCAP_CSUM_UDPv6_Rx 413 | IFCAP_CSUM_TCPv6_Rx; 414 #define XN_M_CSUM_SUPPORTED \ 415 (M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6) 416 417 if (sc->sc_features & FEATURE_IPV6CSUM) { 418 /* 419 * If backend supports IPv6 csum offloading, we can skip 420 * IPv6 csum for Tx packets. Rx packet validation can 421 * be skipped regardless. 422 */ 423 ifp->if_capabilities |= 424 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_TCPv6_Tx; 425 } 426 427 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN)); 428 IFQ_SET_READY(&ifp->if_snd); 429 if_attach(ifp); 430 if_deferred_start_init(ifp, NULL); 431 ether_ifattach(ifp, sc->sc_enaddr); 432 433 /* alloc shared rings */ 434 tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 435 UVM_KMF_WIRED); 436 rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 437 UVM_KMF_WIRED); 438 if (tx_ring == NULL || rx_ring == NULL) 439 panic("%s: can't alloc rings", device_xname(self)); 440 441 sc->sc_tx_ring.sring = tx_ring; 442 sc->sc_rx_ring.sring = rx_ring; 443 444 rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev), 445 RND_TYPE_NET, RND_FLAG_DEFAULT); 446 447 evcnt_attach_dynamic(&sc->sc_cnt_tx_defrag, EVCNT_TYPE_MISC, 448 NULL, device_xname(sc->sc_dev), "Tx packet defrag"); 449 evcnt_attach_dynamic(&sc->sc_cnt_tx_frag, EVCNT_TYPE_MISC, 450 NULL, device_xname(sc->sc_dev), "Tx multi-segment packet"); 451 evcnt_attach_dynamic(&sc->sc_cnt_tx_drop, EVCNT_TYPE_MISC, 452 NULL, device_xname(sc->sc_dev), "Tx packet dropped"); 453 evcnt_attach_dynamic(&sc->sc_cnt_tx_queue_full, EVCNT_TYPE_MISC, 454 NULL, device_xname(sc->sc_dev), "Tx queue full"); 455 evcnt_attach_dynamic(&sc->sc_cnt_rx_frag, EVCNT_TYPE_MISC, 456 NULL, device_xname(sc->sc_dev), "Rx multi-segment packet"); 457 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_blank, EVCNT_TYPE_MISC, 458 NULL, device_xname(sc->sc_dev), "Rx csum blank"); 459 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC, 460 NULL, device_xname(sc->sc_dev), "Rx csum undeferred"); 461 462 if (!pmf_device_register(self, xennet_xenbus_suspend, 463 xennet_xenbus_resume)) 464 aprint_error_dev(self, "couldn't establish power handler\n"); 465 else 466 pmf_class_network_register(self, ifp); 467 468 /* resume shared structures and tell backend that we are ready */ 469 if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) { 470 uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE, 471 UVM_KMF_WIRED); 472 uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE, 473 UVM_KMF_WIRED); 474 return; 475 } 476 } 477 478 static int 479 xennet_xenbus_detach(device_t self, int flags) 480 { 481 struct xennet_xenbus_softc *sc = device_private(self); 482 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 483 484 if ((flags & (DETACH_SHUTDOWN | DETACH_FORCE)) == DETACH_SHUTDOWN) { 485 /* Trigger state transition with backend */ 486 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing); 487 return EBUSY; 488 } 489 490 DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self))); 491 492 /* stop interface */ 493 IFNET_LOCK(ifp); 494 xennet_stop(ifp, 1); 495 IFNET_UNLOCK(ifp); 496 if (sc->sc_ih != NULL) { 497 xen_intr_disestablish(sc->sc_ih); 498 sc->sc_ih = NULL; 499 } 500 501 /* collect any outstanding TX responses */ 502 mutex_enter(&sc->sc_tx_lock); 503 xennet_tx_complete(sc); 504 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) { 505 kpause("xndetach", true, hz/2, &sc->sc_tx_lock); 506 xennet_tx_complete(sc); 507 } 508 mutex_exit(&sc->sc_tx_lock); 509 510 mutex_enter(&sc->sc_rx_lock); 511 xennet_free_rx_buffer(sc, true); 512 mutex_exit(&sc->sc_rx_lock); 513 514 ether_ifdetach(ifp); 515 if_detach(ifp); 516 517 evcnt_detach(&sc->sc_cnt_tx_defrag); 518 evcnt_detach(&sc->sc_cnt_tx_frag); 519 evcnt_detach(&sc->sc_cnt_tx_drop); 520 evcnt_detach(&sc->sc_cnt_tx_queue_full); 521 evcnt_detach(&sc->sc_cnt_rx_frag); 522 evcnt_detach(&sc->sc_cnt_rx_cksum_blank); 523 evcnt_detach(&sc->sc_cnt_rx_cksum_undefer); 524 525 /* Unhook the entropy source. */ 526 rnd_detach_source(&sc->sc_rnd_source); 527 528 /* Wait until the tx/rx rings stop being used by backend */ 529 mutex_enter(&sc->sc_tx_lock); 530 while (xengnt_status(sc->sc_tx_ring_gntref)) 531 kpause("xntxref", true, hz/2, &sc->sc_tx_lock); 532 xengnt_revoke_access(sc->sc_tx_ring_gntref); 533 mutex_exit(&sc->sc_tx_lock); 534 uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE, 535 UVM_KMF_WIRED); 536 mutex_enter(&sc->sc_rx_lock); 537 while (xengnt_status(sc->sc_rx_ring_gntref)) 538 kpause("xnrxref", true, hz/2, &sc->sc_rx_lock); 539 xengnt_revoke_access(sc->sc_rx_ring_gntref); 540 mutex_exit(&sc->sc_rx_lock); 541 uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE, 542 UVM_KMF_WIRED); 543 544 pmf_device_deregister(self); 545 546 sc->sc_backend_status = BEST_DISCONNECTED; 547 548 DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self))); 549 return 0; 550 } 551 552 static bool 553 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual) 554 { 555 struct xennet_xenbus_softc *sc = device_private(dev); 556 int error; 557 netif_tx_sring_t *tx_ring; 558 netif_rx_sring_t *rx_ring; 559 paddr_t ma; 560 561 /* All grants were removed during suspend */ 562 sc->sc_tx_ring_gntref = GRANT_INVALID_REF; 563 sc->sc_rx_ring_gntref = GRANT_INVALID_REF; 564 565 mutex_enter(&sc->sc_rx_lock); 566 /* Free but don't revoke, the grant is gone */ 567 xennet_free_rx_buffer(sc, false); 568 KASSERT(sc->sc_free_rxreql == NET_TX_RING_SIZE); 569 mutex_exit(&sc->sc_rx_lock); 570 571 tx_ring = sc->sc_tx_ring.sring; 572 rx_ring = sc->sc_rx_ring.sring; 573 574 /* Initialize rings */ 575 memset(tx_ring, 0, PAGE_SIZE); 576 SHARED_RING_INIT(tx_ring); 577 FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE); 578 579 memset(rx_ring, 0, PAGE_SIZE); 580 SHARED_RING_INIT(rx_ring); 581 FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE); 582 583 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma); 584 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref); 585 if (error) 586 goto abort_resume; 587 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma); 588 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref); 589 if (error) 590 goto abort_resume; 591 592 if (sc->sc_ih != NULL) { 593 xen_intr_disestablish(sc->sc_ih); 594 sc->sc_ih = NULL; 595 } 596 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn); 597 if (error) 598 goto abort_resume; 599 aprint_verbose_dev(dev, "using event channel %d\n", 600 sc->sc_evtchn); 601 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn, 602 IST_LEVEL, IPL_NET, &xennet_handler, sc, true, device_xname(dev)); 603 KASSERT(sc->sc_ih != NULL); 604 605 /* Re-fill Rx ring */ 606 mutex_enter(&sc->sc_rx_lock); 607 xennet_alloc_rx_buffer(sc); 608 KASSERT(sc->sc_free_rxreql == 0); 609 mutex_exit(&sc->sc_rx_lock); 610 611 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised); 612 613 if (sc->sc_backend_status == BEST_SUSPENDED) { 614 if (xennet_talk_to_backend(sc)) { 615 xenbus_device_resume(sc->sc_xbusd); 616 hypervisor_unmask_event(sc->sc_evtchn); 617 xenbus_switch_state(sc->sc_xbusd, NULL, 618 XenbusStateConnected); 619 } 620 } 621 622 return true; 623 624 abort_resume: 625 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device"); 626 return false; 627 } 628 629 static bool 630 xennet_talk_to_backend(struct xennet_xenbus_softc *sc) 631 { 632 int error; 633 struct xenbus_transaction *xbt; 634 const char *errmsg; 635 636 again: 637 xbt = xenbus_transaction_start(); 638 if (xbt == NULL) 639 return false; 640 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 641 "vifname", "%s", device_xname(sc->sc_dev)); 642 if (error) { 643 errmsg = "vifname"; 644 goto abort_transaction; 645 } 646 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 647 "tx-ring-ref","%u", sc->sc_tx_ring_gntref); 648 if (error) { 649 errmsg = "writing tx ring-ref"; 650 goto abort_transaction; 651 } 652 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 653 "rx-ring-ref","%u", sc->sc_rx_ring_gntref); 654 if (error) { 655 errmsg = "writing rx ring-ref"; 656 goto abort_transaction; 657 } 658 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 659 "request-rx-copy", "%u", 1); 660 if (error) { 661 errmsg = "writing request-rx-copy"; 662 goto abort_transaction; 663 } 664 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 665 "feature-rx-notify", "%u", 1); 666 if (error) { 667 errmsg = "writing feature-rx-notify"; 668 goto abort_transaction; 669 } 670 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 671 "feature-ipv6-csum-offload", "%u", 1); 672 if (error) { 673 errmsg = "writing feature-ipv6-csum-offload"; 674 goto abort_transaction; 675 } 676 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 677 "feature-sg", "%u", 1); 678 if (error) { 679 errmsg = "writing feature-sg"; 680 goto abort_transaction; 681 } 682 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 683 "event-channel", "%u", sc->sc_evtchn); 684 if (error) { 685 errmsg = "writing event channel"; 686 goto abort_transaction; 687 } 688 error = xenbus_transaction_end(xbt, 0); 689 if (error == EAGAIN) 690 goto again; 691 if (error) { 692 xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction"); 693 return false; 694 } 695 mutex_enter(&sc->sc_rx_lock); 696 xennet_alloc_rx_buffer(sc); 697 mutex_exit(&sc->sc_rx_lock); 698 699 sc->sc_backend_status = BEST_CONNECTED; 700 701 return true; 702 703 abort_transaction: 704 xenbus_transaction_end(xbt, 1); 705 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg); 706 return false; 707 } 708 709 static bool 710 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual) 711 { 712 struct xennet_xenbus_softc *sc = device_private(dev); 713 714 /* 715 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(), 716 * so we do not mask event channel here 717 */ 718 719 mutex_enter(&sc->sc_tx_lock); 720 721 /* collect any outstanding TX responses */ 722 xennet_tx_complete(sc); 723 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) { 724 kpause("xnsuspend", true, hz/2, &sc->sc_tx_lock); 725 xennet_tx_complete(sc); 726 } 727 KASSERT(sc->sc_free_txreql == NET_RX_RING_SIZE); 728 mutex_exit(&sc->sc_tx_lock); 729 730 /* 731 * dom0 may still use references to the grants we gave away 732 * earlier during RX buffers allocation. So we do not free RX buffers 733 * here, as dom0 does not expect the guest domain to suddenly revoke 734 * access to these grants. 735 */ 736 sc->sc_backend_status = BEST_SUSPENDED; 737 738 xenbus_device_suspend(sc->sc_xbusd); 739 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn); 740 741 return true; 742 } 743 744 static void xennet_backend_changed(void *arg, XenbusState new_state) 745 { 746 struct xennet_xenbus_softc *sc = device_private((device_t)arg); 747 DPRINTF(("%s: new backend state %d\n", 748 device_xname(sc->sc_dev), new_state)); 749 750 switch (new_state) { 751 case XenbusStateInitialising: 752 case XenbusStateInitialised: 753 case XenbusStateConnected: 754 break; 755 case XenbusStateClosing: 756 sc->sc_backend_status = BEST_CLOSED; 757 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed); 758 break; 759 case XenbusStateInitWait: 760 if (sc->sc_backend_status == BEST_CONNECTED 761 || sc->sc_backend_status == BEST_SUSPENDED) 762 break; 763 764 if (xennet_talk_to_backend(sc)) 765 xenbus_switch_state(sc->sc_xbusd, NULL, 766 XenbusStateConnected); 767 break; 768 case XenbusStateUnknown: 769 default: 770 panic("bad backend state %d", new_state); 771 } 772 } 773 774 /* 775 * Allocate RX buffers and put the associated request structures 776 * in the ring. This allows the backend to use them to communicate with 777 * frontend when some data is destined to frontend 778 */ 779 static void 780 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc) 781 { 782 RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt; 783 RING_IDX i; 784 netif_rx_request_t *rxreq; 785 struct xennet_rxreq *req; 786 int otherend_id, notify; 787 struct mbuf *m; 788 vaddr_t va; 789 paddr_t pa, ma; 790 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 791 792 KASSERT(mutex_owned(&sc->sc_rx_lock)); 793 794 otherend_id = sc->sc_xbusd->xbusd_otherend_id; 795 796 for (i = 0; sc->sc_free_rxreql != 0; i++) { 797 req = SLIST_FIRST(&sc->sc_rxreq_head); 798 KASSERT(req != NULL); 799 KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]); 800 KASSERT(req->rxreq_m == NULL); 801 KASSERT(req->rxreq_gntref == GRANT_INVALID_REF); 802 803 MGETHDR(m, M_DONTWAIT, MT_DATA); 804 if (__predict_false(m == NULL)) { 805 printf("%s: rx no mbuf\n", ifp->if_xname); 806 break; 807 } 808 809 va = (vaddr_t)pool_cache_get_paddr( 810 if_xennetrxbuf_cache, PR_NOWAIT, &pa); 811 if (__predict_false(va == 0)) { 812 printf("%s: rx no cluster\n", ifp->if_xname); 813 m_freem(m); 814 break; 815 } 816 817 MEXTADD(m, va, PAGE_SIZE, 818 M_DEVBUF, xennet_rx_mbuf_free, NULL); 819 m->m_len = m->m_pkthdr.len = PAGE_SIZE; 820 m->m_ext.ext_paddr = pa; 821 m->m_flags |= M_EXT_RW; /* we own the buffer */ 822 823 /* Set M_EXT_CLUSTER so that load_mbuf uses m_ext.ext_paddr */ 824 m->m_flags |= M_EXT_CLUSTER; 825 if (__predict_false(bus_dmamap_load_mbuf( 826 sc->sc_xbusd->xbusd_dmat, 827 req->rxreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 828 printf("%s: rx mbuf load failed", ifp->if_xname); 829 m->m_flags &= ~M_EXT_CLUSTER; 830 m_freem(m); 831 break; 832 } 833 m->m_flags &= ~M_EXT_CLUSTER; 834 835 KASSERT(req->rxreq_dmamap->dm_nsegs == 1); 836 ma = req->rxreq_dmamap->dm_segs[0].ds_addr; 837 838 if (xengnt_grant_access(otherend_id, trunc_page(ma), 839 0, &req->rxreq_gntref) != 0) { 840 m_freem(m); 841 break; 842 } 843 844 req->rxreq_m = m; 845 846 rxreq = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i); 847 rxreq->id = req->rxreq_id; 848 rxreq->gref = req->rxreq_gntref; 849 850 SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next); 851 sc->sc_free_rxreql--; 852 } 853 854 /* Notify backend if more Rx is possible */ 855 if (i > 0) { 856 sc->sc_rx_ring.req_prod_pvt = req_prod + i; 857 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify); 858 if (notify) 859 hypervisor_notify_via_evtchn(sc->sc_evtchn); 860 } 861 } 862 863 /* 864 * Reclaim all RX buffers used by the I/O ring between frontend and backend 865 */ 866 static void 867 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc, bool revoke) 868 { 869 RING_IDX i; 870 871 KASSERT(mutex_owned(&sc->sc_rx_lock)); 872 873 DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev))); 874 /* get back memory from RX ring */ 875 for (i = 0; i < NET_RX_RING_SIZE; i++) { 876 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i]; 877 878 if (rxreq->rxreq_gntref != GRANT_INVALID_REF) { 879 /* 880 * this req is still granted. Get back the page or 881 * allocate a new one, and remap it. 882 */ 883 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, 884 rxreq_next); 885 sc->sc_free_rxreql++; 886 887 if (revoke) 888 xengnt_revoke_access(rxreq->rxreq_gntref); 889 rxreq->rxreq_gntref = GRANT_INVALID_REF; 890 } 891 892 if (rxreq->rxreq_m != NULL) { 893 m_freem(rxreq->rxreq_m); 894 rxreq->rxreq_m = NULL; 895 } 896 } 897 DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev))); 898 } 899 900 /* 901 * Clears a used RX request when its associated mbuf has been processed 902 */ 903 static void 904 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg) 905 { 906 KASSERT(buf == m->m_ext.ext_buf); 907 KASSERT(arg == NULL); 908 KASSERT(m != NULL); 909 vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK); 910 pool_cache_put_paddr(if_xennetrxbuf_cache, 911 (void *)va, m->m_ext.ext_paddr); 912 pool_cache_put(mb_cache, m); 913 }; 914 915 static void 916 xennet_rx_free_req(struct xennet_xenbus_softc *sc, struct xennet_rxreq *req) 917 { 918 KASSERT(mutex_owned(&sc->sc_rx_lock)); 919 920 /* puts back the RX request in the list of free RX requests */ 921 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next); 922 sc->sc_free_rxreql++; 923 924 /* 925 * ring needs more requests to be pushed in, allocate some 926 * RX buffers to catch-up with backend's consumption 927 */ 928 if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) && 929 __predict_true(sc->sc_backend_status == BEST_CONNECTED)) { 930 xennet_alloc_rx_buffer(sc); 931 } 932 } 933 934 /* 935 * Process responses associated to the TX mbufs sent previously through 936 * xennet_start() 937 * Called at splsoftnet. 938 */ 939 static void 940 xennet_tx_complete(struct xennet_xenbus_softc *sc) 941 { 942 struct xennet_txreq *req; 943 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 944 RING_IDX resp_prod, i; 945 946 DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n", 947 sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons)); 948 949 KASSERT(mutex_owned(&sc->sc_tx_lock)); 950 again: 951 resp_prod = sc->sc_tx_ring.sring->rsp_prod; 952 xen_rmb(); 953 for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) { 954 req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id]; 955 KASSERT(req->txreq_id == 956 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id); 957 KASSERT(xengnt_status(req->txreq_gntref) == 0); 958 xengnt_revoke_access(req->txreq_gntref); 959 req->txreq_gntref = GRANT_INVALID_REF; 960 961 /* Cleanup/statistics if this is the master req of a chain */ 962 if (req->txreq_m) { 963 if (__predict_false( 964 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status != 965 NETIF_RSP_OKAY)) 966 if_statinc(ifp, if_oerrors); 967 else 968 if_statinc(ifp, if_opackets); 969 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 970 req->txreq_dmamap); 971 m_freem(req->txreq_m); 972 req->txreq_m = NULL; 973 } 974 975 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next); 976 sc->sc_free_txreql++; 977 } 978 sc->sc_tx_ring.rsp_cons = resp_prod; 979 /* set new event and check for race with rsp_cons update */ 980 xen_wmb(); 981 sc->sc_tx_ring.sring->rsp_event = 982 resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1; 983 xen_mb(); 984 if (resp_prod != sc->sc_tx_ring.sring->rsp_prod) 985 goto again; 986 } 987 988 /* 989 * Xennet event handler. 990 * Get outstanding responses of TX packets, then collect all responses of 991 * pending RX packets 992 * Called at splnet. 993 */ 994 static int 995 xennet_handler(void *arg) 996 { 997 struct xennet_xenbus_softc *sc = arg; 998 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 999 RING_IDX resp_prod, i; 1000 struct xennet_rxreq *req; 1001 struct mbuf *m, *m0; 1002 int rxflags, m0_rxflags; 1003 int more_to_do; 1004 1005 if (sc->sc_backend_status != BEST_CONNECTED) 1006 return 1; 1007 1008 /* Poke Tx queue if we run out of Tx buffers earlier */ 1009 if_schedule_deferred_start(ifp); 1010 1011 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt); 1012 1013 again: 1014 DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n", 1015 sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons)); 1016 1017 mutex_enter(&sc->sc_rx_lock); 1018 resp_prod = sc->sc_rx_ring.sring->rsp_prod; 1019 xen_rmb(); /* ensure we see replies up to resp_prod */ 1020 1021 m0 = NULL; 1022 for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) { 1023 netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i); 1024 req = &sc->sc_rxreqs[rx->id]; 1025 KASSERT(req->rxreq_gntref != GRANT_INVALID_REF); 1026 KASSERT(req->rxreq_id == rx->id); 1027 1028 xengnt_revoke_access(req->rxreq_gntref); 1029 req->rxreq_gntref = GRANT_INVALID_REF; 1030 1031 m = req->rxreq_m; 1032 req->rxreq_m = NULL; 1033 1034 m->m_len = m->m_pkthdr.len = rx->status; 1035 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->rxreq_dmamap, 0, 1036 m->m_pkthdr.len, BUS_DMASYNC_PREREAD); 1037 1038 if (m0 == NULL) { 1039 MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner); 1040 m_set_rcvif(m, ifp); 1041 } 1042 1043 rxflags = rx->flags; 1044 1045 if (m0 || rxflags & NETRXF_more_data) { 1046 /* 1047 * On Rx, every fragment (even first one) contain 1048 * just length of data in the fragment. 1049 */ 1050 if (m0 == NULL) { 1051 m0 = m; 1052 m0_rxflags = rxflags; 1053 } else { 1054 m_cat(m0, m); 1055 m0->m_pkthdr.len += m->m_len; 1056 } 1057 1058 if (rxflags & NETRXF_more_data) { 1059 /* Still more fragments to receive */ 1060 xennet_rx_free_req(sc, req); 1061 continue; 1062 } 1063 1064 sc->sc_cnt_rx_frag.ev_count++; 1065 m = m0; 1066 m0 = NULL; 1067 rxflags = m0_rxflags; 1068 } 1069 1070 if (rxflags & NETRXF_csum_blank) { 1071 xennet_checksum_fill(ifp, m, &sc->sc_cnt_rx_cksum_blank, 1072 &sc->sc_cnt_rx_cksum_undefer); 1073 } else if (rxflags & NETRXF_data_validated) 1074 m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED; 1075 1076 /* We'are done with req */ 1077 xennet_rx_free_req(sc, req); 1078 1079 /* Pass the packet up. */ 1080 if_percpuq_enqueue(ifp->if_percpuq, m); 1081 } 1082 /* If the queued Rx fragments did not finish the packet, drop it */ 1083 if (m0) { 1084 if_statinc(ifp, if_iqdrops); 1085 m_freem(m0); 1086 } 1087 sc->sc_rx_ring.rsp_cons = i; 1088 xen_wmb(); 1089 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do); 1090 mutex_exit(&sc->sc_rx_lock); 1091 1092 if (more_to_do) { 1093 DPRINTF(("%s: %s more_to_do\n", ifp->if_xname, __func__)); 1094 goto again; 1095 } 1096 1097 return 1; 1098 } 1099 1100 static bool 1101 xennet_submit_tx_request(struct xennet_xenbus_softc *sc, struct mbuf *m, 1102 struct xennet_txreq *req0, int *req_prod) 1103 { 1104 struct xennet_txreq *req = req0; 1105 netif_tx_request_t *txreq; 1106 int i, prod = *req_prod; 1107 const bool multiseg = (req0->txreq_dmamap->dm_nsegs > 1); 1108 const int lastseg = req0->txreq_dmamap->dm_nsegs - 1; 1109 bus_dma_segment_t *ds; 1110 SLIST_HEAD(, xennet_txreq) txchain; 1111 1112 KASSERT(mutex_owned(&sc->sc_tx_lock)); 1113 KASSERT(req0->txreq_dmamap->dm_nsegs > 0); 1114 1115 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->txreq_dmamap, 0, 1116 m->m_pkthdr.len, BUS_DMASYNC_POSTWRITE); 1117 MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner); 1118 SLIST_INIT(&txchain); 1119 1120 for (i = 0; i < req0->txreq_dmamap->dm_nsegs; i++) { 1121 KASSERT(req != NULL); 1122 1123 ds = &req0->txreq_dmamap->dm_segs[i]; 1124 1125 if (__predict_false(xengnt_grant_access( 1126 sc->sc_xbusd->xbusd_otherend_id, 1127 trunc_page(ds->ds_addr), 1128 GNTMAP_readonly, &req->txreq_gntref) != 0)) { 1129 goto grant_fail; 1130 } 1131 1132 KASSERT(SLIST_FIRST(&sc->sc_txreq_head) == req); 1133 SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next); 1134 SLIST_INSERT_HEAD(&txchain, req, txreq_next); 1135 sc->sc_free_txreql--; 1136 req->txreq_m = (req == req0) ? m : NULL; 1137 1138 txreq = RING_GET_REQUEST(&sc->sc_tx_ring, prod + i); 1139 txreq->id = req->txreq_id; 1140 txreq->gref = req->txreq_gntref; 1141 txreq->offset = ds->ds_addr & PAGE_MASK; 1142 /* For Tx, first fragment size is always set to total size */ 1143 txreq->size = (i == 0) ? m->m_pkthdr.len : ds->ds_len; 1144 txreq->flags = 0; 1145 if (i == 0) { 1146 if (m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) { 1147 txreq->flags |= NETTXF_csum_blank; 1148 } else { 1149 #if 0 1150 /* 1151 * XXX Checksum optimization disabled 1152 * to avoid port-xen/57743. 1153 */ 1154 txreq->flags |= NETTXF_data_validated; 1155 #endif 1156 } 1157 } 1158 if (multiseg && i < lastseg) 1159 txreq->flags |= NETTXF_more_data; 1160 1161 req = SLIST_FIRST(&sc->sc_txreq_head); 1162 } 1163 1164 if (i > 1) 1165 sc->sc_cnt_tx_frag.ev_count++; 1166 1167 /* All done */ 1168 *req_prod += i; 1169 return true; 1170 1171 grant_fail: 1172 printf("%s: grant_access failed\n", device_xname(sc->sc_dev)); 1173 while (!SLIST_EMPTY(&txchain)) { 1174 req = SLIST_FIRST(&txchain); 1175 SLIST_REMOVE_HEAD(&txchain, txreq_next); 1176 xengnt_revoke_access(req->txreq_gntref); 1177 req->txreq_gntref = GRANT_INVALID_REF; 1178 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next); 1179 sc->sc_free_txreql++; 1180 } 1181 req0->txreq_m = NULL; 1182 return false; 1183 } 1184 1185 /* 1186 * The output routine of a xennet interface. Prepares mbufs for TX, 1187 * and notify backend when finished. 1188 * Called at splsoftnet. 1189 */ 1190 void 1191 xennet_start(struct ifnet *ifp) 1192 { 1193 struct xennet_xenbus_softc *sc = ifp->if_softc; 1194 struct mbuf *m; 1195 RING_IDX req_prod; 1196 struct xennet_txreq *req; 1197 int notify; 1198 1199 mutex_enter(&sc->sc_tx_lock); 1200 1201 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt); 1202 1203 xennet_tx_complete(sc); 1204 1205 req_prod = sc->sc_tx_ring.req_prod_pvt; 1206 while (/*CONSTCOND*/1) { 1207 req = SLIST_FIRST(&sc->sc_txreq_head); 1208 if (__predict_false(req == NULL)) { 1209 if (!IFQ_IS_EMPTY(&ifp->if_snd)) 1210 sc->sc_cnt_tx_queue_full.ev_count++; 1211 break; 1212 } 1213 IFQ_DEQUEUE(&ifp->if_snd, m); 1214 if (m == NULL) 1215 break; 1216 1217 /* 1218 * For short packets it's always way faster passing 1219 * single defragmented packet, even with feature-sg. 1220 * Try to defragment first if the result is likely to fit 1221 * into a single mbuf. 1222 */ 1223 if (m->m_pkthdr.len < MCLBYTES && m->m_next) 1224 (void)m_defrag(m, M_DONTWAIT); 1225 1226 /* Try to load the mbuf as-is, if that fails defrag */ 1227 if (__predict_false(bus_dmamap_load_mbuf( 1228 sc->sc_xbusd->xbusd_dmat, 1229 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 1230 sc->sc_cnt_tx_defrag.ev_count++; 1231 if (__predict_false(m_defrag(m, M_DONTWAIT) == NULL)) { 1232 DPRINTF(("%s: defrag failed\n", 1233 device_xname(sc->sc_dev))); 1234 m_freem(m); 1235 break; 1236 } 1237 1238 if (__predict_false(bus_dmamap_load_mbuf( 1239 sc->sc_xbusd->xbusd_dmat, 1240 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 1241 printf("%s: cannot load new mbuf len %d\n", 1242 device_xname(sc->sc_dev), 1243 m->m_pkthdr.len); 1244 m_freem(m); 1245 break; 1246 } 1247 } 1248 1249 if (req->txreq_dmamap->dm_nsegs > sc->sc_free_txreql) { 1250 /* Not enough slots right now, postpone */ 1251 sc->sc_cnt_tx_queue_full.ev_count++; 1252 sc->sc_cnt_tx_drop.ev_count++; 1253 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1254 req->txreq_dmamap); 1255 m_freem(m); 1256 break; 1257 } 1258 1259 DPRINTFN(XEDB_MBUF, ("xennet_start id %d, " 1260 "mbuf %p, buf %p, size %d\n", 1261 req->txreq_id, m, mtod(m, void *), m->m_pkthdr.len)); 1262 1263 #ifdef XENNET_DEBUG_DUMP 1264 xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s", 1265 req->txreq_id); 1266 #endif 1267 1268 if (!xennet_submit_tx_request(sc, m, req, &req_prod)) { 1269 /* Grant failed, postpone */ 1270 sc->sc_cnt_tx_drop.ev_count++; 1271 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1272 req->txreq_dmamap); 1273 m_freem(m); 1274 break; 1275 } 1276 1277 /* 1278 * Pass packet to bpf if there is a listener. 1279 */ 1280 bpf_mtap(ifp, m, BPF_D_OUT); 1281 } 1282 1283 sc->sc_tx_ring.req_prod_pvt = req_prod; 1284 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify); 1285 if (notify) 1286 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1287 1288 mutex_exit(&sc->sc_tx_lock); 1289 1290 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n", 1291 device_xname(sc->sc_dev))); 1292 } 1293 1294 int 1295 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data) 1296 { 1297 #ifdef XENNET_DEBUG 1298 struct xennet_xenbus_softc *sc = ifp->if_softc; 1299 #endif 1300 int error = 0; 1301 1302 #ifdef NET_MPSAFE 1303 #ifdef notyet 1304 /* XXX IFNET_LOCK() is not taken in some cases e.g. multicast ioctls */ 1305 KASSERT(IFNET_LOCKED(ifp)); 1306 #endif 1307 #endif 1308 int s = splnet(); 1309 1310 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n", 1311 device_xname(sc->sc_dev))); 1312 error = ether_ioctl(ifp, cmd, data); 1313 if (error == ENETRESET) 1314 error = 0; 1315 1316 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n", 1317 device_xname(sc->sc_dev), error)); 1318 1319 splx(s); 1320 1321 return error; 1322 } 1323 1324 int 1325 xennet_init(struct ifnet *ifp) 1326 { 1327 struct xennet_xenbus_softc *sc = ifp->if_softc; 1328 1329 KASSERT(IFNET_LOCKED(ifp)); 1330 1331 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n", 1332 device_xname(sc->sc_dev))); 1333 1334 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1335 mutex_enter(&sc->sc_rx_lock); 1336 sc->sc_rx_ring.sring->rsp_event = 1337 sc->sc_rx_ring.rsp_cons + 1; 1338 mutex_exit(&sc->sc_rx_lock); 1339 hypervisor_unmask_event(sc->sc_evtchn); 1340 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1341 } 1342 ifp->if_flags |= IFF_RUNNING; 1343 1344 return 0; 1345 } 1346 1347 void 1348 xennet_stop(struct ifnet *ifp, int disable) 1349 { 1350 struct xennet_xenbus_softc *sc = ifp->if_softc; 1351 1352 KASSERT(IFNET_LOCKED(ifp)); 1353 1354 ifp->if_flags &= ~IFF_RUNNING; 1355 hypervisor_mask_event(sc->sc_evtchn); 1356 } 1357 1358 /* 1359 * Set up sysctl(3) MIB, hw.xennet.*. 1360 */ 1361 static void 1362 xennet_sysctl_init(struct xennet_xenbus_softc *sc) 1363 { 1364 int rc, xennet_root_num; 1365 const struct sysctlnode *node; 1366 1367 if ((rc = sysctl_createv(&xennet_log, 0, NULL, &node, 1368 0, CTLTYPE_NODE, "xennet", 1369 SYSCTL_DESCR("XENNET interface controls"), 1370 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0) { 1371 goto out; 1372 } 1373 1374 xennet_root_num = node->sysctl_num; 1375 1376 /* xnfrx_lowat setting */ 1377 if ((rc = sysctl_createv(&xennet_log, 0, NULL, &node, 1378 CTLFLAG_READWRITE, 1379 CTLTYPE_INT, "xnfrx_lowat", 1380 SYSCTL_DESCR("xnfrx low water threshold"), 1381 xennet_sysctl_verify, 0, 1382 &if_xnfrx_lowat, 1383 0, CTL_HW, xennet_root_num, CTL_CREATE, 1384 CTL_EOL)) != 0) { 1385 goto out; 1386 } 1387 1388 xennet_xnfrx_nodenum = node->sysctl_num; 1389 1390 return; 1391 1392 out: 1393 aprint_error("%s: sysctl_createv failed (rc = %d)\n", __func__, rc); 1394 } 1395 1396 static int 1397 xennet_sysctl_verify(SYSCTLFN_ARGS) 1398 { 1399 int error, t; 1400 struct sysctlnode node; 1401 1402 node = *rnode; 1403 t = *(int*)rnode->sysctl_data; 1404 node.sysctl_data = &t; 1405 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1406 if (error || newp == NULL) 1407 return error; 1408 1409 #if 0 1410 DPRINTF2(("%s: t = %d, nodenum = %d, rnodenum = %d\n", __func__, t, 1411 node.sysctl_num, rnode->sysctl_num)); 1412 #endif 1413 1414 if (node.sysctl_num == xennet_xnfrx_nodenum) { 1415 if (t < 0 || t >= IF_XNFRX_LOWAT_MAX) 1416 return EINVAL; 1417 if_xnfrx_lowat = t; 1418 pool_cache_setlowat(if_xennetrxbuf_cache, if_xnfrx_lowat); 1419 } else 1420 return EINVAL; 1421 1422 *(int*)rnode->sysctl_data = t; 1423 1424 return 0; 1425 } 1426 1427 #if defined(NFS_BOOT_BOOTSTATIC) 1428 int 1429 xennet_bootstatic_callback(struct nfs_diskless *nd) 1430 { 1431 #if 0 1432 struct ifnet *ifp = nd->nd_ifp; 1433 struct xennet_xenbus_softc *sc = 1434 (struct xennet_xenbus_softc *)ifp->if_softc; 1435 #endif 1436 int flags = 0; 1437 union xen_cmdline_parseinfo xcp; 1438 struct sockaddr_in *sin; 1439 1440 memset(&xcp, 0, sizeof(xcp.xcp_netinfo)); 1441 xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0; 1442 xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host; 1443 xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp); 1444 1445 if (xcp.xcp_netinfo.xi_root[0] != '\0') { 1446 flags |= NFS_BOOT_HAS_SERVER; 1447 if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL) 1448 flags |= NFS_BOOT_HAS_ROOTPATH; 1449 } 1450 1451 nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]); 1452 nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]); 1453 nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]); 1454 1455 sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr; 1456 memset((void *)sin, 0, sizeof(*sin)); 1457 sin->sin_len = sizeof(*sin); 1458 sin->sin_family = AF_INET; 1459 sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]); 1460 1461 if (nd->nd_myip.s_addr) 1462 flags |= NFS_BOOT_HAS_MYIP; 1463 if (nd->nd_gwip.s_addr) 1464 flags |= NFS_BOOT_HAS_GWIP; 1465 if (nd->nd_mask.s_addr) 1466 flags |= NFS_BOOT_HAS_MASK; 1467 if (sin->sin_addr.s_addr) 1468 flags |= NFS_BOOT_HAS_SERVADDR; 1469 1470 return flags; 1471 } 1472 #endif /* defined(NFS_BOOT_BOOTSTATIC) */ 1473 1474 #ifdef XENNET_DEBUG_DUMP 1475 #define XCHR(x) hexdigits[(x) & 0xf] 1476 static void 1477 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id) 1478 { 1479 size_t i, j; 1480 1481 printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id); 1482 printf("00000000 "); 1483 for(i=0; i<len; i++) { 1484 printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i])); 1485 if ((i+1) % 16 == 8) 1486 printf(" "); 1487 if ((i+1) % 16 == 0) { 1488 printf(" %c", '|'); 1489 for(j=0; j<16; j++) 1490 printf("%c", pkt[i-15+j]>=32 && 1491 pkt[i-15+j]<127?pkt[i-15+j]:'.'); 1492 printf("%c\n%c%c%c%c%c%c%c%c ", '|', 1493 XCHR((i+1)>>28), XCHR((i+1)>>24), 1494 XCHR((i+1)>>20), XCHR((i+1)>>16), 1495 XCHR((i+1)>>12), XCHR((i+1)>>8), 1496 XCHR((i+1)>>4), XCHR(i+1)); 1497 } 1498 } 1499 printf("\n"); 1500 } 1501 #undef XCHR 1502 #endif 1503