Home | History | Annotate | Line # | Download | only in xen
      1 /*      $NetBSD: xennetback_xenbus.c,v 1.126 2024/07/05 04:31:50 rin Exp $      */
      2 
      3 /*
      4  * Copyright (c) 2006 Manuel Bouyer.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include <sys/cdefs.h>
     28 __KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.126 2024/07/05 04:31:50 rin Exp $");
     29 
     30 #include <sys/types.h>
     31 #include <sys/param.h>
     32 #include <sys/systm.h>
     33 #include <sys/kmem.h>
     34 #include <sys/queue.h>
     35 #include <sys/kernel.h>
     36 #include <sys/mbuf.h>
     37 #include <sys/protosw.h>
     38 #include <sys/socket.h>
     39 #include <sys/ioctl.h>
     40 #include <sys/errno.h>
     41 #include <sys/device.h>
     42 
     43 #include <net/if.h>
     44 #include <net/if_types.h>
     45 #include <net/if_dl.h>
     46 #include <net/route.h>
     47 #include <net/bpf.h>
     48 
     49 #include <net/if_ether.h>
     50 
     51 #include <xen/intr.h>
     52 #include <xen/hypervisor.h>
     53 #include <xen/xen.h>
     54 #include <xen/xen_shm.h>
     55 #include <xen/evtchn.h>
     56 #include <xen/xenbus.h>
     57 #include <xen/xennet_checksum.h>
     58 
     59 #include <uvm/uvm.h>
     60 
     61 /*
     62  * Backend network device driver for Xen.
     63  */
     64 
     65 #ifdef XENDEBUG_NET
     66 #define XENPRINTF(x) printf x
     67 #else
     68 #define XENPRINTF(x)
     69 #endif
     70 
     71 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
     72 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
     73 
     74 /*
     75  * Number of packets to transmit in one hypercall (= number of pages to
     76  * transmit at once).
     77  */
     78 #define NB_XMIT_PAGES_BATCH 64
     79 CTASSERT(NB_XMIT_PAGES_BATCH >= XEN_NETIF_NR_SLOTS_MIN);
     80 
     81 /* ratecheck(9) for pool allocation failures */
     82 static const struct timeval xni_pool_errintvl = { 30, 0 };  /* 30s, each */
     83 
     84 /* state of a xnetback instance */
     85 typedef enum {
     86 	CONNECTED,
     87 	DISCONNECTING,
     88 	DISCONNECTED
     89 } xnetback_state_t;
     90 
     91 struct xnetback_xstate {
     92 	bus_dmamap_t xs_dmamap;
     93 	bool xs_loaded;
     94 	struct mbuf *xs_m;
     95 	struct netif_tx_request xs_tx;
     96 	uint16_t xs_tx_size;		/* Size of data in this Tx fragment */
     97 };
     98 
     99 /* we keep the xnetback instances in a linked list */
    100 struct xnetback_instance {
    101 	SLIST_ENTRY(xnetback_instance) next;
    102 	struct xenbus_device *xni_xbusd; /* our xenstore entry */
    103 	domid_t xni_domid;		/* attached to this domain */
    104 	uint32_t xni_handle;	/* domain-specific handle */
    105 	xnetback_state_t xni_status;
    106 
    107 	/* network interface stuff */
    108 	struct ethercom xni_ec;
    109 	struct callout xni_restart;
    110 	uint8_t xni_enaddr[ETHER_ADDR_LEN];
    111 
    112 	/* remote domain communication stuff */
    113 	unsigned int xni_evtchn; /* our event channel */
    114 	struct intrhand *xni_ih;
    115 	netif_tx_back_ring_t xni_txring;
    116 	netif_rx_back_ring_t xni_rxring;
    117 	grant_handle_t xni_tx_ring_handle; /* to unmap the ring */
    118 	grant_handle_t xni_rx_ring_handle;
    119 	vaddr_t xni_tx_ring_va; /* to unmap the ring */
    120 	vaddr_t xni_rx_ring_va;
    121 
    122 	/* arrays used in xennetback_ifstart(), used for both Rx and Tx */
    123 	gnttab_copy_t     	xni_gop_copy[NB_XMIT_PAGES_BATCH];
    124 	struct xnetback_xstate	xni_xstate[NB_XMIT_PAGES_BATCH];
    125 
    126 	/* event counters */
    127 	struct evcnt xni_cnt_rx_cksum_blank;
    128 	struct evcnt xni_cnt_rx_cksum_undefer;
    129 };
    130 #define xni_if    xni_ec.ec_if
    131 #define xni_bpf   xni_if.if_bpf
    132 
    133        void xvifattach(int);
    134 static int  xennetback_ifioctl(struct ifnet *, u_long, void *);
    135 static void xennetback_ifstart(struct ifnet *);
    136 static void xennetback_ifsoftstart_copy(struct xnetback_instance *);
    137 static void xennetback_ifwatchdog(struct ifnet *);
    138 static int  xennetback_ifinit(struct ifnet *);
    139 static void xennetback_ifstop(struct ifnet *, int);
    140 
    141 static int  xennetback_xenbus_create(struct xenbus_device *);
    142 static int  xennetback_xenbus_destroy(void *);
    143 static void xennetback_frontend_changed(void *, XenbusState);
    144 
    145 static inline void xennetback_tx_response(struct xnetback_instance *,
    146     int, int);
    147 
    148 static SLIST_HEAD(, xnetback_instance) xnetback_instances;
    149 static kmutex_t xnetback_lock;
    150 
    151 static bool xnetif_lookup(domid_t, uint32_t);
    152 static int  xennetback_evthandler(void *);
    153 
    154 static struct xenbus_backend_driver xvif_backend_driver = {
    155 	.xbakd_create = xennetback_xenbus_create,
    156 	.xbakd_type = "vif"
    157 };
    158 
    159 void
    160 xvifattach(int n)
    161 {
    162 	XENPRINTF(("xennetback_init\n"));
    163 
    164 	SLIST_INIT(&xnetback_instances);
    165 	mutex_init(&xnetback_lock, MUTEX_DEFAULT, IPL_NONE);
    166 
    167 	xenbus_backend_register(&xvif_backend_driver);
    168 }
    169 
    170 static int
    171 xennetback_xenbus_create(struct xenbus_device *xbusd)
    172 {
    173 	struct xnetback_instance *xneti;
    174 	long domid, handle;
    175 	struct ifnet *ifp;
    176 	extern int ifqmaxlen; /* XXX */
    177 	char *e, *p;
    178 	char mac[32];
    179 	int i, err;
    180 	struct xenbus_transaction *xbt;
    181 
    182 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
    183 	    "frontend-id", &domid, 10)) != 0) {
    184 		aprint_error("xvif: can't read %s/frontend-id: %d\n",
    185 		    xbusd->xbusd_path, err);
    186 		return err;
    187 	}
    188 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
    189 	    "handle", &handle, 10)) != 0) {
    190 		aprint_error("xvif: can't read %s/handle: %d\n",
    191 		    xbusd->xbusd_path, err);
    192 		return err;
    193 	}
    194 
    195 	xneti = kmem_zalloc(sizeof(*xneti), KM_SLEEP);
    196 	xneti->xni_domid = domid;
    197 	xneti->xni_handle = handle;
    198 	xneti->xni_status = DISCONNECTED;
    199 
    200 	/* Need to keep the lock for lookup and the list update */
    201 	mutex_enter(&xnetback_lock);
    202 	if (xnetif_lookup(domid, handle)) {
    203 		mutex_exit(&xnetback_lock);
    204 		kmem_free(xneti, sizeof(*xneti));
    205 		return EEXIST;
    206 	}
    207 	SLIST_INSERT_HEAD(&xnetback_instances, xneti, next);
    208 	mutex_exit(&xnetback_lock);
    209 
    210 	xbusd->xbusd_u.b.b_cookie = xneti;
    211 	xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy;
    212 	xneti->xni_xbusd = xbusd;
    213 
    214 	ifp = &xneti->xni_if;
    215 	ifp->if_softc = xneti;
    216 	snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d",
    217 	    (int)domid, (int)handle);
    218 
    219 	/* read mac address */
    220 	err = xenbus_read(NULL, xbusd->xbusd_path, "mac", mac, sizeof(mac));
    221 	if (err) {
    222 		aprint_error_ifnet(ifp, "can't read %s/mac: %d\n",
    223 		    xbusd->xbusd_path, err);
    224 		goto fail;
    225 	}
    226 	for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) {
    227 		xneti->xni_enaddr[i] = strtoul(p, &e, 16);
    228 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
    229 			aprint_error_ifnet(ifp,
    230 			    "%s is not a valid mac address\n", mac);
    231 			err = EINVAL;
    232 			goto fail;
    233 		}
    234 		p = &e[1];
    235 	}
    236 
    237 	/* we can't use the same MAC addr as our guest */
    238 	xneti->xni_enaddr[3]++;
    239 
    240 	/* Initialize DMA map, used only for loading PA */
    241 	for (i = 0; i < __arraycount(xneti->xni_xstate); i++) {
    242 		if (bus_dmamap_create(xneti->xni_xbusd->xbusd_dmat,
    243 		    ETHER_MAX_LEN_JUMBO, XEN_NETIF_NR_SLOTS_MIN,
    244 		    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
    245 		    &xneti->xni_xstate[i].xs_dmamap)
    246 		    != 0) {
    247 			aprint_error_ifnet(ifp,
    248 			    "failed to allocate dma map\n");
    249 			err = ENOMEM;
    250 			goto fail;
    251 		}
    252 	}
    253 
    254 	evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_blank, EVCNT_TYPE_MISC,
    255 	    NULL, ifp->if_xname, "Rx csum blank");
    256 	evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC,
    257 	    NULL, ifp->if_xname, "Rx csum undeferred");
    258 
    259 	/* create pseudo-interface */
    260 	aprint_verbose_ifnet(ifp, "Ethernet address %s\n",
    261 	    ether_sprintf(xneti->xni_enaddr));
    262 	xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
    263 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
    264 	ifp->if_snd.ifq_maxlen =
    265 	    uimax(ifqmaxlen, NET_TX_RING_SIZE * 2);
    266 	ifp->if_capabilities =
    267 		IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx
    268 		| IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx
    269 		| IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx
    270 		| IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx;
    271 #define XN_M_CSUM_SUPPORTED						\
    272 	(M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6)
    273 
    274 	ifp->if_ioctl = xennetback_ifioctl;
    275 	ifp->if_start = xennetback_ifstart;
    276 	ifp->if_watchdog = xennetback_ifwatchdog;
    277 	ifp->if_init = xennetback_ifinit;
    278 	ifp->if_stop = xennetback_ifstop;
    279 	ifp->if_timer = 0;
    280 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
    281 	IFQ_SET_READY(&ifp->if_snd);
    282 	if_attach(ifp);
    283 	if_deferred_start_init(ifp, NULL);
    284 	ether_ifattach(&xneti->xni_if, xneti->xni_enaddr);
    285 
    286 	xbusd->xbusd_otherend_changed = xennetback_frontend_changed;
    287 
    288 	do {
    289 		xbt = xenbus_transaction_start();
    290 		if (xbt == NULL) {
    291 			aprint_error_ifnet(ifp,
    292 			    "%s: can't start transaction\n",
    293 			    xbusd->xbusd_path);
    294 			goto fail;
    295 		}
    296 		err = xenbus_printf(xbt, xbusd->xbusd_path,
    297 		    "vifname", "%s", ifp->if_xname);
    298 		if (err) {
    299 			aprint_error_ifnet(ifp,
    300 			    "failed to write %s/vifname: %d\n",
    301 			    xbusd->xbusd_path, err);
    302 			goto abort_xbt;
    303 		}
    304 		err = xenbus_printf(xbt, xbusd->xbusd_path,
    305 		    "feature-rx-copy", "%d", 1);
    306 		if (err) {
    307 			aprint_error_ifnet(ifp,
    308 			    "failed to write %s/feature-rx-copy: %d\n",
    309 			    xbusd->xbusd_path, err);
    310 			goto abort_xbt;
    311 		}
    312 		err = xenbus_printf(xbt, xbusd->xbusd_path,
    313 		    "feature-ipv6-csum-offload", "%d", 1);
    314 		if (err) {
    315 			aprint_error_ifnet(ifp,
    316 			    "failed to write %s/feature-ipv6-csum-offload: %d\n",
    317 			    xbusd->xbusd_path, err);
    318 			goto abort_xbt;
    319 		}
    320 		err = xenbus_printf(xbt, xbusd->xbusd_path,
    321 		    "feature-sg", "%d", 1);
    322 		if (err) {
    323 			aprint_error_ifnet(ifp,
    324 			    "failed to write %s/feature-sg: %d\n",
    325 			    xbusd->xbusd_path, err);
    326 			goto abort_xbt;
    327 		}
    328 	} while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN);
    329 	if (err) {
    330 		aprint_error_ifnet(ifp,
    331 		    "%s: can't end transaction: %d\n",
    332 		    xbusd->xbusd_path, err);
    333 	}
    334 
    335 	err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
    336 	if (err) {
    337 		aprint_error_ifnet(ifp,
    338 		    "failed to switch state on %s: %d\n",
    339 		    xbusd->xbusd_path, err);
    340 		goto fail;
    341 	}
    342 	return 0;
    343 
    344 abort_xbt:
    345 	xenbus_transaction_end(xbt, 1);
    346 fail:
    347 	kmem_free(xneti, sizeof(*xneti));
    348 	return err;
    349 }
    350 
    351 int
    352 xennetback_xenbus_destroy(void *arg)
    353 {
    354 	struct xnetback_instance *xneti = arg;
    355 
    356 	aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n");
    357 
    358 	if (xneti->xni_ih != NULL) {
    359 		hypervisor_mask_event(xneti->xni_evtchn);
    360 		xen_intr_disestablish(xneti->xni_ih);
    361 		xneti->xni_ih = NULL;
    362 	}
    363 
    364 	mutex_enter(&xnetback_lock);
    365 	SLIST_REMOVE(&xnetback_instances,
    366 	    xneti, xnetback_instance, next);
    367 	mutex_exit(&xnetback_lock);
    368 
    369 	ether_ifdetach(&xneti->xni_if);
    370 	if_detach(&xneti->xni_if);
    371 
    372 	evcnt_detach(&xneti->xni_cnt_rx_cksum_blank);
    373 	evcnt_detach(&xneti->xni_cnt_rx_cksum_undefer);
    374 
    375 	/* Destroy DMA maps */
    376 	for (int i = 0; i < __arraycount(xneti->xni_xstate); i++) {
    377 		if (xneti->xni_xstate[i].xs_dmamap != NULL) {
    378 			bus_dmamap_destroy(xneti->xni_xbusd->xbusd_dmat,
    379 			    xneti->xni_xstate[i].xs_dmamap);
    380 			xneti->xni_xstate[i].xs_dmamap = NULL;
    381 		}
    382 	}
    383 
    384 	if (xneti->xni_txring.sring) {
    385 		xen_shm_unmap(xneti->xni_tx_ring_va, 1,
    386 		    &xneti->xni_tx_ring_handle);
    387 	}
    388 	if (xneti->xni_rxring.sring) {
    389 		xen_shm_unmap(xneti->xni_rx_ring_va, 1,
    390 		    &xneti->xni_rx_ring_handle);
    391 	}
    392 	if (xneti->xni_tx_ring_va != 0) {
    393 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
    394 		    PAGE_SIZE, UVM_KMF_VAONLY);
    395 		xneti->xni_tx_ring_va = 0;
    396 	}
    397 	if (xneti->xni_rx_ring_va != 0) {
    398 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
    399 		    PAGE_SIZE, UVM_KMF_VAONLY);
    400 		xneti->xni_rx_ring_va = 0;
    401 	}
    402 	kmem_free(xneti, sizeof(*xneti));
    403 	return 0;
    404 }
    405 
    406 static int
    407 xennetback_connect(struct xnetback_instance *xneti)
    408 {
    409 	int err;
    410 	netif_tx_sring_t *tx_ring;
    411 	netif_rx_sring_t *rx_ring;
    412 	evtchn_op_t evop;
    413 	u_long tx_ring_ref, rx_ring_ref;
    414 	grant_ref_t gtx_ring_ref, grx_ring_ref;
    415 	u_long revtchn, rx_copy;
    416 	struct xenbus_device *xbusd = xneti->xni_xbusd;
    417 
    418 	/* read communication information */
    419 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
    420 	    "tx-ring-ref", &tx_ring_ref, 10);
    421 	if (err) {
    422 		xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref",
    423 		    xbusd->xbusd_otherend);
    424 		return -1;
    425 	}
    426 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
    427 	    "rx-ring-ref", &rx_ring_ref, 10);
    428 	if (err) {
    429 		xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref",
    430 		    xbusd->xbusd_otherend);
    431 		return -1;
    432 	}
    433 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
    434 	    "event-channel", &revtchn, 10);
    435 	if (err) {
    436 		xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
    437 		    xbusd->xbusd_otherend);
    438 		return -1;
    439 	}
    440 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
    441 	    "request-rx-copy", &rx_copy, 10);
    442 	if (err == ENOENT || !rx_copy) {
    443 		xenbus_dev_fatal(xbusd, err,
    444 		    "%s/request-rx-copy not supported by frontend",
    445 		    xbusd->xbusd_otherend);
    446 		return -1;
    447 	} else if (err) {
    448 		xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy",
    449 		    xbusd->xbusd_otherend);
    450 		return -1;
    451 	}
    452 
    453 	/* allocate VA space and map rings */
    454 	xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
    455 	    UVM_KMF_VAONLY);
    456 	if (xneti->xni_tx_ring_va == 0) {
    457 		xenbus_dev_fatal(xbusd, ENOMEM,
    458 		    "can't get VA for TX ring", xbusd->xbusd_otherend);
    459 		goto err1;
    460 	}
    461 	tx_ring = (void *)xneti->xni_tx_ring_va;
    462 
    463 	xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
    464 	    UVM_KMF_VAONLY);
    465 	if (xneti->xni_rx_ring_va == 0) {
    466 		xenbus_dev_fatal(xbusd, ENOMEM,
    467 		    "can't get VA for RX ring", xbusd->xbusd_otherend);
    468 		goto err1;
    469 	}
    470 	rx_ring = (void *)xneti->xni_rx_ring_va;
    471 
    472 	gtx_ring_ref = tx_ring_ref;
    473         if (xen_shm_map(1, xneti->xni_domid, &gtx_ring_ref,
    474 	    xneti->xni_tx_ring_va, &xneti->xni_tx_ring_handle, 0) != 0) {
    475 		aprint_error_ifnet(&xneti->xni_if,
    476 		    "can't map TX grant ref\n");
    477 		goto err2;
    478 	}
    479 	BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE);
    480 
    481 	grx_ring_ref = rx_ring_ref;
    482         if (xen_shm_map(1, xneti->xni_domid, &grx_ring_ref,
    483 	    xneti->xni_rx_ring_va, &xneti->xni_rx_ring_handle, 0) != 0) {
    484 		aprint_error_ifnet(&xneti->xni_if,
    485 		    "can't map RX grant ref\n");
    486 		goto err2;
    487 	}
    488 	BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE);
    489 
    490 	evop.cmd = EVTCHNOP_bind_interdomain;
    491 	evop.u.bind_interdomain.remote_dom = xneti->xni_domid;
    492 	evop.u.bind_interdomain.remote_port = revtchn;
    493 	err = HYPERVISOR_event_channel_op(&evop);
    494 	if (err) {
    495 		aprint_error_ifnet(&xneti->xni_if,
    496 		    "can't get event channel: %d\n", err);
    497 		goto err2;
    498 	}
    499 	xneti->xni_evtchn = evop.u.bind_interdomain.local_port;
    500 	xneti->xni_status = CONNECTED;
    501 
    502 	xneti->xni_ih = xen_intr_establish_xname(-1, &xen_pic,
    503 	    xneti->xni_evtchn, IST_LEVEL, IPL_NET, xennetback_evthandler,
    504 	    xneti, false, xneti->xni_if.if_xname);
    505 	KASSERT(xneti->xni_ih != NULL);
    506 	xennetback_ifinit(&xneti->xni_if);
    507 	hypervisor_unmask_event(xneti->xni_evtchn);
    508 	hypervisor_notify_via_evtchn(xneti->xni_evtchn);
    509 	return 0;
    510 
    511 err2:
    512 	/* unmap rings */
    513 	if (xneti->xni_tx_ring_handle != 0) {
    514 		xen_shm_unmap(xneti->xni_tx_ring_va, 1,
    515 		    &xneti->xni_tx_ring_handle);
    516 	}
    517 
    518 	if (xneti->xni_rx_ring_handle != 0) {
    519 		xen_shm_unmap(xneti->xni_rx_ring_va, 1,
    520 		    &xneti->xni_rx_ring_handle);
    521 	}
    522 err1:
    523 	/* free rings VA space */
    524 	if (xneti->xni_rx_ring_va != 0)
    525 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
    526 		    PAGE_SIZE, UVM_KMF_VAONLY);
    527 
    528 	if (xneti->xni_tx_ring_va != 0)
    529 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
    530 		    PAGE_SIZE, UVM_KMF_VAONLY);
    531 
    532 	return -1;
    533 
    534 }
    535 
    536 static void
    537 xennetback_frontend_changed(void *arg, XenbusState new_state)
    538 {
    539 	struct xnetback_instance *xneti = arg;
    540 	struct xenbus_device *xbusd = xneti->xni_xbusd;
    541 
    542 	XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state));
    543 	switch(new_state) {
    544 	case XenbusStateInitialising:
    545 	case XenbusStateInitialised:
    546 		break;
    547 
    548 	case XenbusStateConnected:
    549 		if (xneti->xni_status == CONNECTED)
    550 			break;
    551 		if (xennetback_connect(xneti) == 0)
    552 			xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
    553 		break;
    554 
    555 	case XenbusStateClosing:
    556 		xneti->xni_status = DISCONNECTING;
    557 		xneti->xni_if.if_flags &= ~IFF_RUNNING;
    558 		xneti->xni_if.if_timer = 0;
    559 		xenbus_switch_state(xbusd, NULL, XenbusStateClosing);
    560 		break;
    561 
    562 	case XenbusStateClosed:
    563 		/* otherend_changed() should handle it for us */
    564 		panic("xennetback_frontend_changed: closed\n");
    565 	case XenbusStateUnknown:
    566 	case XenbusStateInitWait:
    567 	default:
    568 		aprint_error("%s: invalid frontend state %d\n",
    569 		    xneti->xni_if.if_xname, new_state);
    570 		break;
    571 	}
    572 	return;
    573 
    574 }
    575 
    576 /* lookup a xneti based on domain id and interface handle */
    577 static bool
    578 xnetif_lookup(domid_t dom , uint32_t handle)
    579 {
    580 	struct xnetback_instance *xneti;
    581 	bool found = false;
    582 
    583 	KASSERT(mutex_owned(&xnetback_lock));
    584 
    585 	SLIST_FOREACH(xneti, &xnetback_instances, next) {
    586 		if (xneti->xni_domid == dom && xneti->xni_handle == handle) {
    587 			found = true;
    588 			break;
    589 		}
    590 	}
    591 
    592 	return found;
    593 }
    594 
    595 static inline void
    596 xennetback_tx_response(struct xnetback_instance *xneti, int id, int status)
    597 {
    598 	RING_IDX resp_prod;
    599 	netif_tx_response_t *txresp;
    600 	int do_event;
    601 
    602 	resp_prod = xneti->xni_txring.rsp_prod_pvt;
    603 	txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod);
    604 
    605 	txresp->id = id;
    606 	txresp->status = status;
    607 	xneti->xni_txring.rsp_prod_pvt++;
    608 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event);
    609 	if (do_event) {
    610 		XENPRINTF(("%s send event\n", xneti->xni_if.if_xname));
    611 		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
    612 	}
    613 }
    614 
    615 static const char *
    616 xennetback_tx_check_packet(const netif_tx_request_t *txreq, bool first)
    617 {
    618 	if (__predict_false((txreq->flags & NETTXF_more_data) == 0 &&
    619 	    txreq->offset + txreq->size > PAGE_SIZE))
    620 		return "crossing page boundary";
    621 
    622 	if (__predict_false(txreq->size > ETHER_MAX_LEN_JUMBO))
    623 		return "bigger then jumbo";
    624 
    625 	if (first &&
    626 	    __predict_false(txreq->size < ETHER_HDR_LEN))
    627 		return "too short";
    628 
    629 	return NULL;
    630 }
    631 
    632 static int
    633 xennetback_copy(struct ifnet *ifp, gnttab_copy_t *gop, int copycnt,
    634     const char *dir)
    635 {
    636 	/*
    637 	 * Copy the data and ack it. Delaying it until the mbuf is
    638 	 * freed will stall transmit.
    639 	 */
    640 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, gop, copycnt) != 0) {
    641 		printf("%s: GNTTABOP_copy %s failed", ifp->if_xname, dir);
    642 		return EINVAL;
    643 	}
    644 
    645 	for (int i = 0; i < copycnt; i++) {
    646 		if (gop->status != GNTST_okay) {
    647 			printf("%s GNTTABOP_copy[%d] %s %d\n",
    648 			    ifp->if_xname, i, dir, gop->status);
    649 			return EINVAL;
    650 		}
    651 	}
    652 
    653 	return 0;
    654 }
    655 
    656 static void
    657 xennetback_tx_copy_abort(struct ifnet *ifp, struct xnetback_instance *xneti,
    658 	int queued)
    659 {
    660 	struct xnetback_xstate *xst;
    661 
    662 	for (int i = 0; i < queued; i++) {
    663 		xst = &xneti->xni_xstate[i];
    664 
    665 		if (xst->xs_loaded) {
    666 			KASSERT(xst->xs_m != NULL);
    667 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
    668 			    xst->xs_dmamap);
    669 			xst->xs_loaded = false;
    670 			m_freem(xst->xs_m);
    671 		}
    672 
    673 		xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_ERROR);
    674 		if_statinc(ifp, if_ierrors);
    675 	}
    676 }
    677 
    678 static void
    679 xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
    680 	int queued)
    681 {
    682 	gnttab_copy_t *gop;
    683 	struct xnetback_xstate *xst;
    684 	int copycnt = 0, seg = 0;
    685 	size_t goff = 0, segoff = 0, gsize, take;
    686 	bus_dmamap_t dm = NULL;
    687 	paddr_t ma;
    688 
    689 	for (int i = 0; i < queued; i++) {
    690 		xst = &xneti->xni_xstate[i];
    691 
    692 		if (xst->xs_m != NULL) {
    693 			KASSERT(xst->xs_m->m_pkthdr.len == xst->xs_tx.size);
    694 			if (__predict_false(bus_dmamap_load_mbuf(
    695 			    xneti->xni_xbusd->xbusd_dmat,
    696 			    xst->xs_dmamap, xst->xs_m, BUS_DMA_NOWAIT) != 0))
    697 				goto abort;
    698 			xst->xs_loaded = true;
    699 			dm = xst->xs_dmamap;
    700 			seg = 0;
    701 			goff = segoff = 0;
    702 		}
    703 
    704 		gsize = xst->xs_tx_size;
    705 		goff = 0;
    706 		for (; seg < dm->dm_nsegs && gsize > 0; seg++) {
    707 			bus_dma_segment_t *ds = &dm->dm_segs[seg];
    708 			ma = ds->ds_addr;
    709 			take = uimin(gsize, ds->ds_len);
    710 
    711 			KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
    712 			if (copycnt == NB_XMIT_PAGES_BATCH) {
    713 				if (xennetback_copy(ifp, xneti->xni_gop_copy,
    714 				    copycnt, "Tx") != 0)
    715 					goto abort;
    716 				copycnt = 0;
    717 			}
    718 
    719 			/* Queue for the copy */
    720 			gop = &xneti->xni_gop_copy[copycnt++];
    721 			memset(gop, 0, sizeof(*gop));
    722 			gop->flags = GNTCOPY_source_gref;
    723 			gop->len = take;
    724 
    725 			gop->source.u.ref = xst->xs_tx.gref;
    726 			gop->source.offset = xst->xs_tx.offset + goff;
    727 			gop->source.domid = xneti->xni_domid;
    728 
    729 			gop->dest.offset = (ma & PAGE_MASK) + segoff;
    730 			KASSERT(gop->dest.offset <= PAGE_SIZE);
    731 			gop->dest.domid = DOMID_SELF;
    732 			gop->dest.u.gmfn = ma >> PAGE_SHIFT;
    733 
    734 			goff += take;
    735 			gsize -= take;
    736 			if (take + segoff < ds->ds_len) {
    737 				segoff += take;
    738 				/* Segment not completely consumed yet */
    739 				break;
    740 			}
    741 			segoff = 0;
    742 		}
    743 		KASSERT(gsize == 0);
    744 		KASSERT(goff == xst->xs_tx_size);
    745 	}
    746 	if (copycnt > 0) {
    747 		if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Tx"))
    748 			goto abort;
    749 		copycnt = 0;
    750 	}
    751 
    752 	/* If we got here, the whole copy was successful */
    753 	for (int i = 0; i < queued; i++) {
    754 		xst = &xneti->xni_xstate[i];
    755 
    756 		xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_OKAY);
    757 
    758 		if (xst->xs_m != NULL) {
    759 			KASSERT(xst->xs_loaded);
    760 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
    761 			    xst->xs_dmamap);
    762 
    763 			if (xst->xs_tx.flags & NETTXF_csum_blank) {
    764 				xennet_checksum_fill(ifp, xst->xs_m,
    765 				    &xneti->xni_cnt_rx_cksum_blank,
    766 				    &xneti->xni_cnt_rx_cksum_undefer);
    767 			} else if (xst->xs_tx.flags & NETTXF_data_validated) {
    768 				xst->xs_m->m_pkthdr.csum_flags =
    769 				    XN_M_CSUM_SUPPORTED;
    770 			}
    771 			m_set_rcvif(xst->xs_m, ifp);
    772 
    773 			if_percpuq_enqueue(ifp->if_percpuq, xst->xs_m);
    774 		}
    775 	}
    776 
    777 	return;
    778 
    779 abort:
    780 	xennetback_tx_copy_abort(ifp, xneti, queued);
    781 }
    782 
    783 static int
    784 xennetback_tx_m0len_fragment(struct xnetback_instance *xneti,
    785     int m0_len, int req_cons, int *cntp)
    786 {
    787 	netif_tx_request_t *txreq;
    788 
    789 	/* This assumes all the requests are already pushed into the ring */
    790 	*cntp = 1;
    791 	do {
    792 		txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons);
    793 		if (m0_len <= txreq->size || *cntp > XEN_NETIF_NR_SLOTS_MIN)
    794 			return -1;
    795 		if (RING_REQUEST_CONS_OVERFLOW(&xneti->xni_txring, req_cons))
    796 			return -1;
    797 
    798 		m0_len -= txreq->size;
    799 		req_cons++;
    800 		(*cntp)++;
    801 	} while (txreq->flags & NETTXF_more_data);
    802 
    803 	return m0_len;
    804 }
    805 
    806 static int
    807 xennetback_evthandler(void *arg)
    808 {
    809 	struct xnetback_instance *xneti = arg;
    810 	struct ifnet *ifp = &xneti->xni_if;
    811 	netif_tx_request_t txreq;
    812 	struct mbuf *m, *m0 = NULL, *mlast = NULL;
    813 	int receive_pending;
    814 	int queued = 0, m0_len = 0;
    815 	struct xnetback_xstate *xst;
    816 	const bool nupnrun = ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
    817 	    (IFF_UP | IFF_RUNNING));
    818 	bool discard = 0;
    819 
    820 	XENPRINTF(("xennetback_evthandler "));
    821 again:
    822 	while (RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_txring)) {
    823 		/*
    824 		 * Ensure we have read the producer's queue index in
    825 		 * RING_FINAL_CHECK_FOR_REQUESTS before we read the
    826 		 * content of the producer's next request in
    827 		 * RING_COPY_REQUEST.
    828 		 */
    829 		xen_rmb();
    830 		RING_COPY_REQUEST(&xneti->xni_txring,
    831 		    xneti->xni_txring.req_cons,
    832 		    &txreq);
    833 		XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname,
    834 		    txreq.size));
    835 		xneti->xni_txring.req_cons++;
    836 		if (__predict_false(nupnrun || discard)) {
    837 			/* interface not up, drop all requests */
    838 			if_statinc(ifp, if_iqdrops);
    839 			discard = (txreq.flags & NETTXF_more_data) != 0;
    840 			xennetback_tx_response(xneti, txreq.id,
    841 			    NETIF_RSP_DROPPED);
    842 			continue;
    843 		}
    844 
    845 		/*
    846 		 * Do some sanity checks, and queue copy of the data.
    847 		 */
    848 		const char *msg = xennetback_tx_check_packet(&txreq,
    849 		    m0 == NULL);
    850 		if (__predict_false(msg != NULL)) {
    851 			printf("%s: packet with size %d is %s\n",
    852 			    ifp->if_xname, txreq.size, msg);
    853 			discard = (txreq.flags & NETTXF_more_data) != 0;
    854 			xennetback_tx_response(xneti, txreq.id,
    855 			    NETIF_RSP_ERROR);
    856 			if_statinc(ifp, if_ierrors);
    857 			continue;
    858 		}
    859 
    860 		/* get a mbuf for this fragment */
    861 		MGETHDR(m, M_DONTWAIT, MT_DATA);
    862 		if (__predict_false(m == NULL)) {
    863 			static struct timeval lasttime;
    864 mbuf_fail:
    865 			if (ratecheck(&lasttime, &xni_pool_errintvl))
    866 				printf("%s: mbuf alloc failed\n",
    867 				    ifp->if_xname);
    868 			xennetback_tx_copy_abort(ifp, xneti, queued);
    869 			queued = 0;
    870 			m0 = NULL;
    871 			discard = (txreq.flags & NETTXF_more_data) != 0;
    872 			xennetback_tx_response(xneti, txreq.id,
    873 			    NETIF_RSP_DROPPED);
    874 			if_statinc(ifp, if_ierrors);
    875 			continue;
    876 		}
    877 		m->m_len = m->m_pkthdr.len = txreq.size;
    878 
    879 		if (!m0 && (txreq.flags & NETTXF_more_data)) {
    880 			/*
    881 			 * The first fragment of multi-fragment Tx request
    882 			 * contains total size. Need to read whole
    883 			 * chain to determine actual size of the first
    884 			 * (i.e. current) fragment.
    885 			 */
    886 			int cnt;
    887 			m0_len = xennetback_tx_m0len_fragment(xneti,
    888 			    txreq.size, xneti->xni_txring.req_cons, &cnt);
    889 			if (m0_len < 0) {
    890 				m_freem(m);
    891 				discard = 1;
    892 				xennetback_tx_response(xneti, txreq.id,
    893 				    NETIF_RSP_DROPPED);
    894 				if_statinc(ifp, if_ierrors);
    895 				continue;
    896 			}
    897 			m->m_len = m0_len;
    898 			KASSERT(cnt <= XEN_NETIF_NR_SLOTS_MIN);
    899 
    900 			if (queued + cnt >= NB_XMIT_PAGES_BATCH) {
    901 				/*
    902 				 * Flush queue if too full to fit this
    903 				 * new packet whole.
    904 				 */
    905 				xennetback_tx_copy_process(ifp, xneti, queued);
    906 				queued = 0;
    907 			}
    908 		}
    909 
    910 		if (m->m_len > MHLEN) {
    911 			MCLGET(m, M_DONTWAIT);
    912 			if (__predict_false((m->m_flags & M_EXT) == 0)) {
    913 				m_freem(m);
    914 				goto mbuf_fail;
    915 			}
    916 			if (__predict_false(m->m_len > MCLBYTES)) {
    917 				/* one more mbuf necessary */
    918 				struct mbuf *mn;
    919 				MGET(mn, M_DONTWAIT, MT_DATA);
    920 				if (__predict_false(mn == NULL)) {
    921 					m_freem(m);
    922 					goto mbuf_fail;
    923 				}
    924 				if (m->m_len - MCLBYTES > MLEN) {
    925 					MCLGET(mn, M_DONTWAIT);
    926 					if ((mn->m_flags & M_EXT) == 0) {
    927 						m_freem(mn);
    928 						m_freem(m);
    929 						goto mbuf_fail;
    930 					}
    931 				}
    932 				mn->m_len = m->m_len - MCLBYTES;
    933 				m->m_len = MCLBYTES;
    934 				m->m_next = mn;
    935 				KASSERT(mn->m_len <= MCLBYTES);
    936 			}
    937 			KASSERT(m->m_len <= MCLBYTES);
    938 		}
    939 
    940 		if (m0 || (txreq.flags & NETTXF_more_data)) {
    941 			if (m0 == NULL) {
    942 				m0 = m;
    943 				mlast = (m->m_next) ? m->m_next : m;
    944 				KASSERT(mlast->m_next == NULL);
    945 			} else {
    946 				/* Coalesce like m_cat(), but without copy */
    947 				KASSERT(mlast != NULL);
    948 				if (M_TRAILINGSPACE(mlast) >= m->m_pkthdr.len) {
    949 					mlast->m_len +=  m->m_pkthdr.len;
    950 					m_freem(m);
    951 				} else {
    952 					mlast->m_next = m;
    953 					mlast = (m->m_next) ? m->m_next : m;
    954 					KASSERT(mlast->m_next == NULL);
    955 				}
    956 			}
    957 		}
    958 
    959 		XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n",
    960 		    xneti->xni_if.if_xname, txreq.offset,
    961 		    txreq.size, txreq.id,
    962 		    xneti->xni_txring.req_cons & (RING_SIZE(&xneti->xni_txring) - 1)));
    963 
    964 		xst = &xneti->xni_xstate[queued];
    965 		xst->xs_m = (m0 == NULL || m == m0) ? m : NULL;
    966 		xst->xs_tx = txreq;
    967 		/* Fill the length of _this_ fragment */
    968 		xst->xs_tx_size = (m == m0) ? m0_len : m->m_pkthdr.len;
    969 		queued++;
    970 
    971 		KASSERT(queued <= NB_XMIT_PAGES_BATCH);
    972 		if (__predict_false(m0 &&
    973 		    (txreq.flags & NETTXF_more_data) == 0)) {
    974 			/* Last fragment, stop appending mbufs */
    975 			m0 = NULL;
    976 		}
    977 		if (queued == NB_XMIT_PAGES_BATCH) {
    978 			KASSERT(m0 == NULL);
    979 			xennetback_tx_copy_process(ifp, xneti, queued);
    980 			queued = 0;
    981 		}
    982 	}
    983 	RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring, receive_pending);
    984 	if (receive_pending)
    985 		goto again;
    986 	if (m0) {
    987 		/* Queue empty, and still unfinished multi-fragment request */
    988 		printf("%s: dropped unfinished multi-fragment\n",
    989 		    ifp->if_xname);
    990 		xennetback_tx_copy_abort(ifp, xneti, queued);
    991 		queued = 0;
    992 		m0 = NULL;
    993 	}
    994 	if (queued > 0)
    995 		xennetback_tx_copy_process(ifp, xneti, queued);
    996 
    997 	/* check to see if we can transmit more packets */
    998 	if_schedule_deferred_start(ifp);
    999 
   1000 	return 1;
   1001 }
   1002 
   1003 static int
   1004 xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data)
   1005 {
   1006 	//struct xnetback_instance *xneti = ifp->if_softc;
   1007 	//struct ifreq *ifr = (struct ifreq *)data;
   1008 	int s, error;
   1009 
   1010 	s = splnet();
   1011 	error = ether_ioctl(ifp, cmd, data);
   1012 	if (error == ENETRESET)
   1013 		error = 0;
   1014 	splx(s);
   1015 	return error;
   1016 }
   1017 
   1018 static void
   1019 xennetback_ifstart(struct ifnet *ifp)
   1020 {
   1021 	struct xnetback_instance *xneti = ifp->if_softc;
   1022 
   1023 	/*
   1024 	 * The Xen communication channel is much more efficient if we can
   1025 	 * schedule batch of packets for the domain. Deferred start by network
   1026 	 * stack will enqueue all pending mbufs in the interface's send queue
   1027 	 * before it is processed by the soft interrupt handler.
   1028 	 */
   1029 	xennetback_ifsoftstart_copy(xneti);
   1030 }
   1031 
   1032 static void
   1033 xennetback_rx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
   1034 	int queued, int copycnt)
   1035 {
   1036 	int notify;
   1037 	struct xnetback_xstate *xst;
   1038 
   1039 	if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Rx") != 0) {
   1040 		/* message already displayed */
   1041 		goto free_mbufs;
   1042 	}
   1043 
   1044 	/* update pointer */
   1045 	xneti->xni_rxring.req_cons += queued;
   1046 	xneti->xni_rxring.rsp_prod_pvt += queued;
   1047 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_rxring, notify);
   1048 
   1049 	/* send event */
   1050 	if (notify) {
   1051 		XENPRINTF(("%s receive event\n",
   1052 		    xneti->xni_if.if_xname));
   1053 		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
   1054 	}
   1055 
   1056 free_mbufs:
   1057 	/* now that data was copied we can free the mbufs */
   1058 	for (int j = 0; j < queued; j++) {
   1059 		xst = &xneti->xni_xstate[j];
   1060 		if (xst->xs_loaded) {
   1061 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
   1062 			    xst->xs_dmamap);
   1063 			xst->xs_loaded = false;
   1064 		}
   1065 		m_freem(xst->xs_m);
   1066 		xst->xs_m = NULL;
   1067 	}
   1068 }
   1069 
   1070 static void
   1071 xennetback_rx_copy_queue(struct xnetback_instance *xneti,
   1072     struct xnetback_xstate *xst0, int rsp_prod_pvt, int *queued, int *copycntp)
   1073 {
   1074 	struct xnetback_xstate *xst = xst0;
   1075 	gnttab_copy_t *gop;
   1076 	struct netif_rx_request rxreq;
   1077 	netif_rx_response_t *rxresp;
   1078 	paddr_t ma;
   1079 	size_t goff, segoff, segsize, take, totsize;
   1080 	int copycnt = *copycntp, reqcnt = *queued;
   1081 	const bus_dmamap_t dm = xst0->xs_dmamap;
   1082 	const bool multiseg = (dm->dm_nsegs > 1);
   1083 
   1084 	KASSERT(xst0 == &xneti->xni_xstate[reqcnt]);
   1085 
   1086 	RING_COPY_REQUEST(&xneti->xni_rxring,
   1087 	    xneti->xni_rxring.req_cons + reqcnt, &rxreq);
   1088 	goff = 0;
   1089 	rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, rsp_prod_pvt + reqcnt);
   1090 	reqcnt++;
   1091 
   1092 	rxresp->id = rxreq.id;
   1093 	rxresp->offset = 0;
   1094 	if ((xst0->xs_m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) != 0) {
   1095 		rxresp->flags = NETRXF_csum_blank;
   1096 	} else {
   1097 		rxresp->flags = NETRXF_data_validated;
   1098 	}
   1099 	if (multiseg)
   1100 		rxresp->flags |= NETRXF_more_data;
   1101 
   1102 	totsize = xst0->xs_m->m_pkthdr.len;
   1103 
   1104 	/*
   1105 	 * Arrange for the mbuf contents to be copied into one or more
   1106 	 * provided memory pages.
   1107 	 */
   1108 	for (int seg = 0; seg < dm->dm_nsegs; seg++) {
   1109 		ma = dm->dm_segs[seg].ds_addr;
   1110 		segsize = dm->dm_segs[seg].ds_len;
   1111 		segoff = 0;
   1112 
   1113 		while (segoff < segsize) {
   1114 			take = uimin(PAGE_SIZE - goff, segsize - segoff);
   1115 			KASSERT(take <= totsize);
   1116 
   1117 			/* add copy request */
   1118 			gop = &xneti->xni_gop_copy[copycnt++];
   1119 			gop->flags = GNTCOPY_dest_gref;
   1120 			gop->source.offset = (ma & PAGE_MASK) + segoff;
   1121 			gop->source.domid = DOMID_SELF;
   1122 			gop->source.u.gmfn = ma >> PAGE_SHIFT;
   1123 
   1124 			gop->dest.u.ref = rxreq.gref;
   1125 			gop->dest.offset = goff;
   1126 			gop->dest.domid = xneti->xni_domid;
   1127 
   1128 			gop->len = take;
   1129 
   1130 			segoff += take;
   1131 			goff += take;
   1132 			totsize -= take;
   1133 
   1134 			if (goff == PAGE_SIZE && totsize > 0) {
   1135 				rxresp->status = goff;
   1136 
   1137 				/* Take next grant */
   1138 				RING_COPY_REQUEST(&xneti->xni_rxring,
   1139 				    xneti->xni_rxring.req_cons + reqcnt,
   1140 				    &rxreq);
   1141 				goff = 0;
   1142 				rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
   1143 				    rsp_prod_pvt + reqcnt);
   1144 				reqcnt++;
   1145 
   1146 				rxresp->id = rxreq.id;
   1147 				rxresp->offset = 0;
   1148 				rxresp->flags = NETRXF_more_data;
   1149 
   1150 				xst++;
   1151 				xst->xs_m = NULL;
   1152 			}
   1153 		}
   1154 	}
   1155 	rxresp->flags &= ~NETRXF_more_data;
   1156 	rxresp->status = goff;
   1157 	KASSERT(totsize == 0);
   1158 
   1159 	KASSERT(copycnt > *copycntp);
   1160 	KASSERT(reqcnt > *queued);
   1161 	*copycntp = copycnt;
   1162 	*queued = reqcnt;
   1163 }
   1164 
   1165 static void
   1166 xennetback_ifsoftstart_copy(struct xnetback_instance *xneti)
   1167 {
   1168 	struct ifnet *ifp = &xneti->xni_if;
   1169 	struct mbuf *m;
   1170 	int queued = 0;
   1171 	RING_IDX req_prod, rsp_prod_pvt;
   1172 	struct xnetback_xstate *xst;
   1173 	int copycnt = 0;
   1174 	bool abort;
   1175 
   1176 	XENPRINTF(("xennetback_ifsoftstart_copy "));
   1177 	int s = splnet();
   1178 	if (__predict_false((ifp->if_flags & IFF_RUNNING) == 0)) {
   1179 		splx(s);
   1180 		return;
   1181 	}
   1182 
   1183 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
   1184 		XENPRINTF(("pkt\n"));
   1185 		req_prod = xneti->xni_rxring.sring->req_prod;
   1186 		rsp_prod_pvt = xneti->xni_rxring.rsp_prod_pvt;
   1187 		xen_rmb();
   1188 
   1189 		abort = false;
   1190 		KASSERT(queued == 0);
   1191 		KASSERT(copycnt == 0);
   1192 		while (copycnt < NB_XMIT_PAGES_BATCH) {
   1193 #define XN_RING_FULL(cnt)	\
   1194 			req_prod == xneti->xni_rxring.req_cons + (cnt) ||  \
   1195 			xneti->xni_rxring.req_cons - (rsp_prod_pvt + cnt) ==  \
   1196 			NET_RX_RING_SIZE
   1197 
   1198 			if (__predict_false(XN_RING_FULL(1))) {
   1199 				/* out of ring space */
   1200 				XENPRINTF(("xennetback_ifstart: ring full "
   1201 				    "req_prod 0x%x req_cons 0x%x rsp_prod_pvt "
   1202 				    "0x%x\n",
   1203 				    req_prod,
   1204 				    xneti->xni_rxring.req_cons + queued,
   1205 				    rsp_prod_pvt + queued));
   1206 				abort = true;
   1207 				break;
   1208 			}
   1209 
   1210 			IFQ_DEQUEUE(&ifp->if_snd, m);
   1211 			if (m == NULL)
   1212 				break;
   1213 
   1214 again:
   1215 			xst = &xneti->xni_xstate[queued];
   1216 
   1217 			/*
   1218 			 * For short packets it's always way faster passing
   1219 			 * single defragmented packet, even with feature-sg.
   1220 			 * Try to defragment first if the result is likely
   1221 			 * to fit into a single mbuf.
   1222 			 */
   1223 			if (m->m_pkthdr.len < MCLBYTES && m->m_next)
   1224 				(void)m_defrag(m, M_DONTWAIT);
   1225 
   1226 			if (bus_dmamap_load_mbuf(
   1227 			    xneti->xni_xbusd->xbusd_dmat,
   1228 			    xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0) {
   1229 				if (m_defrag(m, M_DONTWAIT) == NULL) {
   1230 					m_freem(m);
   1231 					static struct timeval lasttime;
   1232 					if (ratecheck(&lasttime, &xni_pool_errintvl))
   1233 						printf("%s: fail defrag mbuf\n",
   1234 						    ifp->if_xname);
   1235 					continue;
   1236 				}
   1237 
   1238 				if (__predict_false(bus_dmamap_load_mbuf(
   1239 				    xneti->xni_xbusd->xbusd_dmat,
   1240 				    xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
   1241 					printf("%s: cannot load mbuf\n",
   1242 					    ifp->if_xname);
   1243 					m_freem(m);
   1244 					continue;
   1245 				}
   1246 			}
   1247 			KASSERT(xst->xs_dmamap->dm_nsegs < NB_XMIT_PAGES_BATCH);
   1248 			KASSERTMSG(queued <= copycnt, "queued %d > copycnt %d",
   1249 			    queued, copycnt);
   1250 
   1251 			if (__predict_false(XN_RING_FULL(
   1252 			    xst->xs_dmamap->dm_nsegs))) {
   1253 				/* Ring too full to fit the packet */
   1254 				bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
   1255 				    xst->xs_dmamap);
   1256 				m_freem(m);
   1257 				abort = true;
   1258 				break;
   1259 			}
   1260 			if (__predict_false(copycnt + xst->xs_dmamap->dm_nsegs >
   1261 			    NB_XMIT_PAGES_BATCH)) {
   1262 				/* Batch already too full, flush and retry */
   1263 				bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
   1264 				    xst->xs_dmamap);
   1265 				xennetback_rx_copy_process(ifp, xneti, queued,
   1266 				    copycnt);
   1267 				queued = copycnt = 0;
   1268 				goto again;
   1269 			}
   1270 
   1271 			/* Now committed to send */
   1272 			xst->xs_loaded = true;
   1273 			xst->xs_m = m;
   1274 			xennetback_rx_copy_queue(xneti, xst,
   1275 			    rsp_prod_pvt, &queued, &copycnt);
   1276 
   1277 			if_statinc(ifp, if_opackets);
   1278 			bpf_mtap(ifp, m, BPF_D_OUT);
   1279 		}
   1280 		KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
   1281 		KASSERT(queued <= copycnt);
   1282 		if (copycnt > 0) {
   1283 			xennetback_rx_copy_process(ifp, xneti, queued, copycnt);
   1284 			queued = copycnt = 0;
   1285 		}
   1286 		/*
   1287 		 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
   1288 		 * here, as the frontend doesn't notify when adding
   1289 		 * requests anyway
   1290 		 */
   1291 		if (__predict_false(abort ||
   1292 		    !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
   1293 			/* ring full */
   1294 			ifp->if_timer = 1;
   1295 			break;
   1296 		}
   1297 	}
   1298 	splx(s);
   1299 }
   1300 
   1301 static void
   1302 xennetback_ifwatchdog(struct ifnet * ifp)
   1303 {
   1304 	/*
   1305 	 * We can get to the following condition: transmit stalls because the
   1306 	 * ring is full when the ifq is full too.
   1307 	 *
   1308 	 * In this case (as, unfortunately, we don't get an interrupt from xen
   1309 	 * on transmit) nothing will ever call xennetback_ifstart() again.
   1310 	 * Here we abuse the watchdog to get out of this condition.
   1311 	 */
   1312 	XENPRINTF(("xennetback_ifwatchdog\n"));
   1313 	xennetback_ifstart(ifp);
   1314 }
   1315 
   1316 static int
   1317 xennetback_ifinit(struct ifnet *ifp)
   1318 {
   1319 	struct xnetback_instance *xneti = ifp->if_softc;
   1320 	int s = splnet();
   1321 
   1322 	if ((ifp->if_flags & IFF_UP) == 0) {
   1323 		splx(s);
   1324 		return 0;
   1325 	}
   1326 	if (xneti->xni_status == CONNECTED)
   1327 		ifp->if_flags |= IFF_RUNNING;
   1328 	splx(s);
   1329 	return 0;
   1330 }
   1331 
   1332 static void
   1333 xennetback_ifstop(struct ifnet *ifp, int disable)
   1334 {
   1335 	struct xnetback_instance *xneti = ifp->if_softc;
   1336 	int s = splnet();
   1337 
   1338 	ifp->if_flags &= ~IFF_RUNNING;
   1339 	ifp->if_timer = 0;
   1340 	if (xneti->xni_status == CONNECTED) {
   1341 		xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */
   1342 	}
   1343 	splx(s);
   1344 }
   1345