Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.9
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 292751 2015-12-26 17:27:48Z bz $*/
     62 /*$NetBSD: ix_txrx.c,v 1.9 2016/12/02 12:14:37 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 
     69 #ifdef DEV_NETMAP
     70 #include <net/netmap.h>
     71 #include <sys/selinfo.h>
     72 #include <dev/netmap/netmap_kern.h>
     73 
     74 extern int ix_crcstrip;
     75 #endif
     76 
     77 /*
     78 ** HW RSC control:
     79 **  this feature only works with
     80 **  IPv4, and only on 82599 and later.
     81 **  Also this will cause IP forwarding to
     82 **  fail and that can't be controlled by
     83 **  the stack as LRO can. For all these
     84 **  reasons I've deemed it best to leave
     85 **  this off and not bother with a tuneable
     86 **  interface, this would need to be compiled
     87 **  to enable.
     88 */
     89 static bool ixgbe_rsc_enable = FALSE;
     90 
     91 #ifdef IXGBE_FDIR
     92 /*
     93 ** For Flow Director: this is the
     94 ** number of TX packets we sample
     95 ** for the filter pool, this means
     96 ** every 20th packet will be probed.
     97 **
     98 ** This feature can be disabled by
     99 ** setting this to 0.
    100 */
    101 static int atr_sample_rate = 20;
    102 #endif
    103 
    104 /*********************************************************************
    105  *  Local Function prototypes
    106  *********************************************************************/
    107 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    108 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    109 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    110 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    111 
    112 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    113 		    struct ixgbe_hw_stats *);
    114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    117 		    struct mbuf *, u32 *, u32 *);
    118 static int	ixgbe_tso_setup(struct tx_ring *,
    119 		    struct mbuf *, u32 *, u32 *);
    120 #ifdef IXGBE_FDIR
    121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    122 #endif
    123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    125 		    struct mbuf *, u32);
    126 
    127 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    128 
    129 #ifdef IXGBE_LEGACY_TX
    130 /*********************************************************************
    131  *  Transmit entry point
    132  *
    133  *  ixgbe_start is called by the stack to initiate a transmit.
    134  *  The driver will remain in this routine as long as there are
    135  *  packets to transmit and transmit resources are available.
    136  *  In case resources are not available stack is notified and
    137  *  the packet is requeued.
    138  **********************************************************************/
    139 
    140 void
    141 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    142 {
    143 	int rc;
    144 	struct mbuf    *m_head;
    145 	struct adapter *adapter = txr->adapter;
    146 
    147 	IXGBE_TX_LOCK_ASSERT(txr);
    148 
    149 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    150 		return;
    151 	if (!adapter->link_active)
    152 		return;
    153 
    154 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    155 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    156 			break;
    157 
    158 		IFQ_POLL(&ifp->if_snd, m_head);
    159 		if (m_head == NULL)
    160 			break;
    161 
    162 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    163 			break;
    164 		}
    165 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    166 		if (rc == EFBIG) {
    167 			struct mbuf *mtmp;
    168 
    169 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    170 				m_head = mtmp;
    171 				rc = ixgbe_xmit(txr, m_head);
    172 				if (rc != 0)
    173 					adapter->efbig2_tx_dma_setup.ev_count++;
    174 			} else
    175 				adapter->m_defrag_failed.ev_count++;
    176 		}
    177 		if (rc != 0) {
    178 			m_freem(m_head);
    179 			continue;
    180 		}
    181 
    182 		/* Send a copy of the frame to the BPF listener */
    183 		bpf_mtap(ifp, m_head);
    184 	}
    185 	return;
    186 }
    187 
    188 /*
    189  * Legacy TX start - called by the stack, this
    190  * always uses the first tx ring, and should
    191  * not be used with multiqueue tx enabled.
    192  */
    193 void
    194 ixgbe_start(struct ifnet *ifp)
    195 {
    196 	struct adapter *adapter = ifp->if_softc;
    197 	struct tx_ring	*txr = adapter->tx_rings;
    198 
    199 	if (ifp->if_flags & IFF_RUNNING) {
    200 		IXGBE_TX_LOCK(txr);
    201 		ixgbe_start_locked(txr, ifp);
    202 		IXGBE_TX_UNLOCK(txr);
    203 	}
    204 	return;
    205 }
    206 
    207 #else /* ! IXGBE_LEGACY_TX */
    208 
    209 /*
    210 ** Multiqueue Transmit Entry Point
    211 ** (if_transmit function)
    212 */
    213 int
    214 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    215 {
    216 	struct adapter	*adapter = ifp->if_softc;
    217 	struct ix_queue	*que;
    218 	struct tx_ring	*txr;
    219 	int 		i, err = 0;
    220 #ifdef	RSS
    221 	uint32_t bucket_id;
    222 #endif
    223 
    224 	/*
    225 	 * When doing RSS, map it to the same outbound queue
    226 	 * as the incoming flow would be mapped to.
    227 	 *
    228 	 * If everything is setup correctly, it should be the
    229 	 * same bucket that the current CPU we're on is.
    230 	 */
    231 #if __FreeBSD_version < 1100054
    232 	if (m->m_flags & M_FLOWID) {
    233 #else
    234 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    235 #endif
    236 #ifdef	RSS
    237 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    238 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
    239 			/* TODO: spit out something if bucket_id > num_queues? */
    240 			i = bucket_id % adapter->num_queues;
    241 #ifdef IXGBE_DEBUG
    242 			if (bucket_id > adapter->num_queues)
    243 				if_printf(ifp, "bucket_id (%d) > num_queues "
    244 				    "(%d)\n", bucket_id, adapter->num_queues);
    245 #endif
    246 		} else
    247 #endif
    248 			i = m->m_pkthdr.flowid % adapter->num_queues;
    249 	} else
    250 		i = curcpu % adapter->num_queues;
    251 
    252 	/* Check for a hung queue and pick alternative */
    253 	if (((1 << i) & adapter->active_queues) == 0)
    254 		i = ffsl(adapter->active_queues);
    255 
    256 	txr = &adapter->tx_rings[i];
    257 	que = &adapter->queues[i];
    258 
    259 	err = drbr_enqueue(ifp, txr->br, m);
    260 	if (err)
    261 		return (err);
    262 	if (IXGBE_TX_TRYLOCK(txr)) {
    263 		ixgbe_mq_start_locked(ifp, txr);
    264 		IXGBE_TX_UNLOCK(txr);
    265 	} else
    266 		softint_schedule(txr->txq_si);
    267 
    268 	return (0);
    269 }
    270 
    271 int
    272 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    273 {
    274 	struct adapter  *adapter = txr->adapter;
    275 	struct mbuf     *next;
    276 	int             enqueued = 0, err = 0;
    277 
    278 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    279 	    adapter->link_active == 0)
    280 		return (ENETDOWN);
    281 
    282 	/* Process the queue */
    283 #if __FreeBSD_version < 901504
    284 	next = drbr_dequeue(ifp, txr->br);
    285 	while (next != NULL) {
    286 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    287 			if (next != NULL)
    288 				err = drbr_enqueue(ifp, txr->br, next);
    289 #else
    290 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
    291 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    292 			if (next == NULL) {
    293 				drbr_advance(ifp, txr->br);
    294 			} else {
    295 				drbr_putback(ifp, txr->br, next);
    296 			}
    297 #endif
    298 			break;
    299 		}
    300 #if __FreeBSD_version >= 901504
    301 		drbr_advance(ifp, txr->br);
    302 #endif
    303 		enqueued++;
    304 #if 0 // this is VF-only
    305 #if __FreeBSD_version >= 1100036
    306 		/*
    307 		 * Since we're looking at the tx ring, we can check
    308 		 * to see if we're a VF by examing our tail register
    309 		 * address.
    310 		 */
    311 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    312 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    313 #endif
    314 #endif
    315 		/* Send a copy of the frame to the BPF listener */
    316 		bpf_mtap(ifp, next);
    317 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    318 			break;
    319 #if __FreeBSD_version < 901504
    320 		next = drbr_dequeue(ifp, txr->br);
    321 #endif
    322 	}
    323 
    324 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    325 		ixgbe_txeof(txr);
    326 
    327 	return (err);
    328 }
    329 
    330 /*
    331  * Called from a taskqueue to drain queued transmit packets.
    332  */
    333 void
    334 ixgbe_deferred_mq_start(void *arg, int pending)
    335 {
    336 	struct tx_ring *txr = arg;
    337 	struct adapter *adapter = txr->adapter;
    338 	struct ifnet *ifp = adapter->ifp;
    339 
    340 	IXGBE_TX_LOCK(txr);
    341 	if (!drbr_empty(ifp, txr->br))
    342 		ixgbe_mq_start_locked(ifp, txr);
    343 	IXGBE_TX_UNLOCK(txr);
    344 }
    345 
    346 /*
    347  * Flush all ring buffers
    348  */
    349 void
    350 ixgbe_qflush(struct ifnet *ifp)
    351 {
    352 	struct adapter	*adapter = ifp->if_softc;
    353 	struct tx_ring	*txr = adapter->tx_rings;
    354 	struct mbuf	*m;
    355 
    356 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    357 		IXGBE_TX_LOCK(txr);
    358 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
    359 			m_freem(m);
    360 		IXGBE_TX_UNLOCK(txr);
    361 	}
    362 	if_qflush(ifp);
    363 }
    364 #endif /* IXGBE_LEGACY_TX */
    365 
    366 
    367 /*********************************************************************
    368  *
    369  *  This routine maps the mbufs to tx descriptors, allowing the
    370  *  TX engine to transmit the packets.
    371  *  	- return 0 on success, positive on failure
    372  *
    373  **********************************************************************/
    374 
    375 static int
    376 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    377 {
    378 	struct m_tag *mtag;
    379 	struct adapter  *adapter = txr->adapter;
    380 	struct ethercom *ec = &adapter->osdep.ec;
    381 	u32		olinfo_status = 0, cmd_type_len;
    382 	int             i, j, error;
    383 	int		first;
    384 	bus_dmamap_t	map;
    385 	struct ixgbe_tx_buf *txbuf;
    386 	union ixgbe_adv_tx_desc *txd = NULL;
    387 
    388 	/* Basic descriptor defines */
    389         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    390 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    391 
    392 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    393         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    394 
    395         /*
    396          * Important to capture the first descriptor
    397          * used because it will contain the index of
    398          * the one we tell the hardware to report back
    399          */
    400         first = txr->next_avail_desc;
    401 	txbuf = &txr->tx_buffers[first];
    402 	map = txbuf->map;
    403 
    404 	/*
    405 	 * Map the packet for DMA.
    406 	 */
    407 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    408 	    m_head, BUS_DMA_NOWAIT);
    409 
    410 	if (__predict_false(error)) {
    411 
    412 		switch (error) {
    413 		case EAGAIN:
    414 			adapter->eagain_tx_dma_setup.ev_count++;
    415 			return EAGAIN;
    416 		case ENOMEM:
    417 			adapter->enomem_tx_dma_setup.ev_count++;
    418 			return EAGAIN;
    419 		case EFBIG:
    420 			/*
    421 			 * XXX Try it again?
    422 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
    423 			 */
    424 			adapter->efbig_tx_dma_setup.ev_count++;
    425 			return error;
    426 		case EINVAL:
    427 			adapter->einval_tx_dma_setup.ev_count++;
    428 			return error;
    429 		default:
    430 			adapter->other_tx_dma_setup.ev_count++;
    431 			return error;
    432 		}
    433 	}
    434 
    435 	/* Make certain there are enough descriptors */
    436 	if (map->dm_nsegs > txr->tx_avail - 2) {
    437 		txr->no_desc_avail.ev_count++;
    438 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    439 		return EAGAIN;
    440 	}
    441 
    442 	/*
    443 	 * Set up the appropriate offload context
    444 	 * this will consume the first descriptor
    445 	 */
    446 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    447 	if (__predict_false(error)) {
    448 		return (error);
    449 	}
    450 
    451 #ifdef IXGBE_FDIR
    452 	/* Do the flow director magic */
    453 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    454 		++txr->atr_count;
    455 		if (txr->atr_count >= atr_sample_rate) {
    456 			ixgbe_atr(txr, m_head);
    457 			txr->atr_count = 0;
    458 		}
    459 	}
    460 #endif
    461 
    462 	olinfo_status |= IXGBE_ADVTXD_CC;
    463 	i = txr->next_avail_desc;
    464 	for (j = 0; j < map->dm_nsegs; j++) {
    465 		bus_size_t seglen;
    466 		bus_addr_t segaddr;
    467 
    468 		txbuf = &txr->tx_buffers[i];
    469 		txd = &txr->tx_base[i];
    470 		seglen = map->dm_segs[j].ds_len;
    471 		segaddr = htole64(map->dm_segs[j].ds_addr);
    472 
    473 		txd->read.buffer_addr = segaddr;
    474 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    475 		    cmd_type_len |seglen);
    476 		txd->read.olinfo_status = htole32(olinfo_status);
    477 
    478 		if (++i == txr->num_desc)
    479 			i = 0;
    480 	}
    481 
    482 	txd->read.cmd_type_len |=
    483 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    484 	txr->tx_avail -= map->dm_nsegs;
    485 	txr->next_avail_desc = i;
    486 
    487 	txbuf->m_head = m_head;
    488 	/*
    489 	 * Here we swap the map so the last descriptor,
    490 	 * which gets the completion interrupt has the
    491 	 * real map, and the first descriptor gets the
    492 	 * unused map from this descriptor.
    493 	 */
    494 	txr->tx_buffers[first].map = txbuf->map;
    495 	txbuf->map = map;
    496 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    497 	    BUS_DMASYNC_PREWRITE);
    498 
    499         /* Set the EOP descriptor that will be marked done */
    500         txbuf = &txr->tx_buffers[first];
    501 	txbuf->eop = txd;
    502 
    503         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    504 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    505 	/*
    506 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    507 	 * hardware that this frame is available to transmit.
    508 	 */
    509 	++txr->total_packets.ev_count;
    510 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    511 
    512 	/* Mark queue as having work */
    513 	if (txr->busy == 0)
    514 		txr->busy = 1;
    515 
    516 	return 0;
    517 }
    518 
    519 /*********************************************************************
    520  *
    521  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    522  *  the information needed to transmit a packet on the wire. This is
    523  *  called only once at attach, setup is done every reset.
    524  *
    525  **********************************************************************/
    526 int
    527 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    528 {
    529 	struct adapter *adapter = txr->adapter;
    530 	device_t dev = adapter->dev;
    531 	struct ixgbe_tx_buf *txbuf;
    532 	int error, i;
    533 
    534 	/*
    535 	 * Setup DMA descriptor areas.
    536 	 */
    537 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    538 			       1, 0,		/* alignment, bounds */
    539 			       IXGBE_TSO_SIZE,		/* maxsize */
    540 			       adapter->num_segs,	/* nsegments */
    541 			       PAGE_SIZE,		/* maxsegsize */
    542 			       0,			/* flags */
    543 			       &txr->txtag))) {
    544 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    545 		goto fail;
    546 	}
    547 
    548 	if (!(txr->tx_buffers =
    549 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    550 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    551 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    552 		error = ENOMEM;
    553 		goto fail;
    554 	}
    555 
    556         /* Create the descriptor buffer dma maps */
    557 	txbuf = txr->tx_buffers;
    558 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    559 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    560 		if (error != 0) {
    561 			aprint_error_dev(dev,
    562 			    "Unable to create TX DMA map (%d)\n", error);
    563 			goto fail;
    564 		}
    565 	}
    566 
    567 	return 0;
    568 fail:
    569 	/* We free all, it handles case where we are in the middle */
    570 	ixgbe_free_transmit_structures(adapter);
    571 	return (error);
    572 }
    573 
    574 /*********************************************************************
    575  *
    576  *  Initialize a transmit ring.
    577  *
    578  **********************************************************************/
    579 static void
    580 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    581 {
    582 	struct adapter *adapter = txr->adapter;
    583 	struct ixgbe_tx_buf *txbuf;
    584 #ifdef DEV_NETMAP
    585 	struct netmap_adapter *na = NA(adapter->ifp);
    586 	struct netmap_slot *slot;
    587 #endif /* DEV_NETMAP */
    588 
    589 	/* Clear the old ring contents */
    590 	IXGBE_TX_LOCK(txr);
    591 #ifdef DEV_NETMAP
    592 	/*
    593 	 * (under lock): if in netmap mode, do some consistency
    594 	 * checks and set slot to entry 0 of the netmap ring.
    595 	 */
    596 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    597 #endif /* DEV_NETMAP */
    598 	bzero((void *)txr->tx_base,
    599 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    600 	/* Reset indices */
    601 	txr->next_avail_desc = 0;
    602 	txr->next_to_clean = 0;
    603 
    604 	/* Free any existing tx buffers. */
    605         txbuf = txr->tx_buffers;
    606 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    607 		if (txbuf->m_head != NULL) {
    608 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    609 			    0, txbuf->m_head->m_pkthdr.len,
    610 			    BUS_DMASYNC_POSTWRITE);
    611 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    612 			m_freem(txbuf->m_head);
    613 			txbuf->m_head = NULL;
    614 		}
    615 #ifdef DEV_NETMAP
    616 		/*
    617 		 * In netmap mode, set the map for the packet buffer.
    618 		 * NOTE: Some drivers (not this one) also need to set
    619 		 * the physical buffer address in the NIC ring.
    620 		 * Slots in the netmap ring (indexed by "si") are
    621 		 * kring->nkr_hwofs positions "ahead" wrt the
    622 		 * corresponding slot in the NIC ring. In some drivers
    623 		 * (not here) nkr_hwofs can be negative. Function
    624 		 * netmap_idx_n2k() handles wraparounds properly.
    625 		 */
    626 		if (slot) {
    627 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    628 			netmap_load_map(na, txr->txtag,
    629 			    txbuf->map, NMB(na, slot + si));
    630 		}
    631 #endif /* DEV_NETMAP */
    632 		/* Clear the EOP descriptor pointer */
    633 		txbuf->eop = NULL;
    634         }
    635 
    636 #ifdef IXGBE_FDIR
    637 	/* Set the rate at which we sample packets */
    638 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    639 		txr->atr_sample = atr_sample_rate;
    640 #endif
    641 
    642 	/* Set number of descriptors available */
    643 	txr->tx_avail = adapter->num_tx_desc;
    644 
    645 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    646 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    647 	IXGBE_TX_UNLOCK(txr);
    648 }
    649 
    650 /*********************************************************************
    651  *
    652  *  Initialize all transmit rings.
    653  *
    654  **********************************************************************/
    655 int
    656 ixgbe_setup_transmit_structures(struct adapter *adapter)
    657 {
    658 	struct tx_ring *txr = adapter->tx_rings;
    659 
    660 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    661 		ixgbe_setup_transmit_ring(txr);
    662 
    663 	return (0);
    664 }
    665 
    666 /*********************************************************************
    667  *
    668  *  Free all transmit rings.
    669  *
    670  **********************************************************************/
    671 void
    672 ixgbe_free_transmit_structures(struct adapter *adapter)
    673 {
    674 	struct tx_ring *txr = adapter->tx_rings;
    675 
    676 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    677 		ixgbe_free_transmit_buffers(txr);
    678 		ixgbe_dma_free(adapter, &txr->txdma);
    679 		IXGBE_TX_LOCK_DESTROY(txr);
    680 	}
    681 	free(adapter->tx_rings, M_DEVBUF);
    682 }
    683 
    684 /*********************************************************************
    685  *
    686  *  Free transmit ring related data structures.
    687  *
    688  **********************************************************************/
    689 static void
    690 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    691 {
    692 	struct adapter *adapter = txr->adapter;
    693 	struct ixgbe_tx_buf *tx_buffer;
    694 	int             i;
    695 
    696 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
    697 
    698 	if (txr->tx_buffers == NULL)
    699 		return;
    700 
    701 	tx_buffer = txr->tx_buffers;
    702 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    703 		if (tx_buffer->m_head != NULL) {
    704 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    705 			    0, tx_buffer->m_head->m_pkthdr.len,
    706 			    BUS_DMASYNC_POSTWRITE);
    707 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    708 			m_freem(tx_buffer->m_head);
    709 			tx_buffer->m_head = NULL;
    710 			if (tx_buffer->map != NULL) {
    711 				ixgbe_dmamap_destroy(txr->txtag,
    712 				    tx_buffer->map);
    713 				tx_buffer->map = NULL;
    714 			}
    715 		} else if (tx_buffer->map != NULL) {
    716 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    717 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    718 			tx_buffer->map = NULL;
    719 		}
    720 	}
    721 #ifndef IXGBE_LEGACY_TX
    722 	if (txr->br != NULL)
    723 		buf_ring_free(txr->br, M_DEVBUF);
    724 #endif
    725 	if (txr->tx_buffers != NULL) {
    726 		free(txr->tx_buffers, M_DEVBUF);
    727 		txr->tx_buffers = NULL;
    728 	}
    729 	if (txr->txtag != NULL) {
    730 		ixgbe_dma_tag_destroy(txr->txtag);
    731 		txr->txtag = NULL;
    732 	}
    733 	return;
    734 }
    735 
    736 /*********************************************************************
    737  *
    738  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    739  *
    740  **********************************************************************/
    741 
    742 static int
    743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    744     u32 *cmd_type_len, u32 *olinfo_status)
    745 {
    746 	struct adapter *adapter = txr->adapter;
    747 	struct ethercom *ec = &adapter->osdep.ec;
    748 	struct m_tag *mtag;
    749 	struct ixgbe_adv_tx_context_desc *TXD;
    750 	struct ether_vlan_header *eh;
    751 #ifdef INET
    752 	struct ip *ip;
    753 #endif
    754 #ifdef INET6
    755 	struct ip6_hdr *ip6;
    756 #endif
    757 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    758 	int	ehdrlen, ip_hlen = 0;
    759 	u16	etype;
    760 	u8	ipproto = 0;
    761 	int	offload = TRUE;
    762 	int	ctxd = txr->next_avail_desc;
    763 	u16	vtag = 0;
    764 	char	*l3d;
    765 
    766 
    767 	/* First check if TSO is to be used */
    768 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
    769 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
    770 
    771 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    772 		offload = FALSE;
    773 
    774 	/* Indicate the whole packet as payload when not doing TSO */
    775        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    776 
    777 	/* Now ready a context descriptor */
    778 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    779 
    780 	/*
    781 	** In advanced descriptors the vlan tag must
    782 	** be placed into the context descriptor. Hence
    783 	** we need to make one even if not doing offloads.
    784 	*/
    785 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    786 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    787 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    788 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    789 		return (0);
    790 
    791 	/*
    792 	 * Determine where frame payload starts.
    793 	 * Jump over vlan headers if already present,
    794 	 * helpful for QinQ too.
    795 	 */
    796 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    797 	eh = mtod(mp, struct ether_vlan_header *);
    798 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    799 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    800 		etype = ntohs(eh->evl_proto);
    801 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    802 	} else {
    803 		etype = ntohs(eh->evl_encap_proto);
    804 		ehdrlen = ETHER_HDR_LEN;
    805 	}
    806 
    807 	/* Set the ether header length */
    808 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    809 
    810 	if (offload == FALSE)
    811 		goto no_offloads;
    812 
    813 	/*
    814 	 * If the first mbuf only includes the ethernet header, jump to the next one
    815 	 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
    816 	 * XXX: And assumes the entire IP header is contained in one mbuf
    817 	 */
    818 	if (mp->m_len == ehdrlen && mp->m_next)
    819 		l3d = mtod(mp->m_next, char *);
    820 	else
    821 		l3d = mtod(mp, char *) + ehdrlen;
    822 
    823 	switch (etype) {
    824 #ifdef INET
    825 	case ETHERTYPE_IP:
    826 		ip = (struct ip *)(l3d);
    827 		ip_hlen = ip->ip_hl << 2;
    828 		ipproto = ip->ip_p;
    829 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    830 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    831 		    ip->ip_sum == 0);
    832 		break;
    833 #endif
    834 #ifdef INET6
    835 	case ETHERTYPE_IPV6:
    836 		ip6 = (struct ip6_hdr *)(l3d);
    837 		ip_hlen = sizeof(struct ip6_hdr);
    838 		ipproto = ip6->ip6_nxt;
    839 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    840 		break;
    841 #endif
    842 	default:
    843 		break;
    844 	}
    845 
    846 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    847 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    848 
    849 	vlan_macip_lens |= ip_hlen;
    850 
    851 	/* No support for offloads for non-L4 next headers */
    852  	switch (ipproto) {
    853  		case IPPROTO_TCP:
    854 			if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
    855 
    856 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    857 			else
    858 				offload = false;
    859 			break;
    860 		case IPPROTO_UDP:
    861 			if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
    862 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    863 			else
    864 				offload = false;
    865 			break;
    866 	}
    867 
    868 	if (offload) /* Insert L4 checksum into data descriptors */
    869 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    870 
    871 no_offloads:
    872 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    873 
    874 	/* Now copy bits into descriptor */
    875 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    876 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    877 	TXD->seqnum_seed = htole32(0);
    878 	TXD->mss_l4len_idx = htole32(0);
    879 
    880 	/* We've consumed the first desc, adjust counters */
    881 	if (++ctxd == txr->num_desc)
    882 		ctxd = 0;
    883 	txr->next_avail_desc = ctxd;
    884 	--txr->tx_avail;
    885 
    886         return 0;
    887 }
    888 
    889 /**********************************************************************
    890  *
    891  *  Setup work for hardware segmentation offload (TSO) on
    892  *  adapters using advanced tx descriptors
    893  *
    894  **********************************************************************/
    895 static int
    896 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    897     u32 *cmd_type_len, u32 *olinfo_status)
    898 {
    899 	struct m_tag *mtag;
    900 	struct adapter *adapter = txr->adapter;
    901 	struct ethercom *ec = &adapter->osdep.ec;
    902 	struct ixgbe_adv_tx_context_desc *TXD;
    903 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    904 	u32 mss_l4len_idx = 0, paylen;
    905 	u16 vtag = 0, eh_type;
    906 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    907 	struct ether_vlan_header *eh;
    908 #ifdef INET6
    909 	struct ip6_hdr *ip6;
    910 #endif
    911 #ifdef INET
    912 	struct ip *ip;
    913 #endif
    914 	struct tcphdr *th;
    915 
    916 	/*
    917 	 * Determine where frame payload starts.
    918 	 * Jump over vlan headers if already present
    919 	 */
    920 	eh = mtod(mp, struct ether_vlan_header *);
    921 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    922 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    923 		eh_type = eh->evl_proto;
    924 	} else {
    925 		ehdrlen = ETHER_HDR_LEN;
    926 		eh_type = eh->evl_encap_proto;
    927 	}
    928 
    929 	switch (ntohs(eh_type)) {
    930 #ifdef INET6
    931 	case ETHERTYPE_IPV6:
    932 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    933 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    934 		if (ip6->ip6_nxt != IPPROTO_TCP)
    935 			return (ENXIO);
    936 		ip_hlen = sizeof(struct ip6_hdr);
    937 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    938 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    939 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    940 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    941 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    942 		break;
    943 #endif
    944 #ifdef INET
    945 	case ETHERTYPE_IP:
    946 		ip = (struct ip *)(mp->m_data + ehdrlen);
    947 		if (ip->ip_p != IPPROTO_TCP)
    948 			return (ENXIO);
    949 		ip->ip_sum = 0;
    950 		ip_hlen = ip->ip_hl << 2;
    951 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    952 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    953 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    954 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    955 		/* Tell transmit desc to also do IPv4 checksum. */
    956 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    957 		break;
    958 #endif
    959 	default:
    960 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    961 		    __func__, ntohs(eh_type));
    962 		break;
    963 	}
    964 
    965 	ctxd = txr->next_avail_desc;
    966 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    967 
    968 	tcp_hlen = th->th_off << 2;
    969 
    970 	/* This is used in the transmit desc in encap */
    971 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    972 
    973 	/* VLAN MACLEN IPLEN */
    974 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    975 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    976                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    977 	}
    978 
    979 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    980 	vlan_macip_lens |= ip_hlen;
    981 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    982 
    983 	/* ADV DTYPE TUCMD */
    984 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    985 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    986 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    987 
    988 	/* MSS L4LEN IDX */
    989 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
    990 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
    991 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
    992 
    993 	TXD->seqnum_seed = htole32(0);
    994 
    995 	if (++ctxd == txr->num_desc)
    996 		ctxd = 0;
    997 
    998 	txr->tx_avail--;
    999 	txr->next_avail_desc = ctxd;
   1000 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1001 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1002 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1003 	++txr->tso_tx.ev_count;
   1004 	return (0);
   1005 }
   1006 
   1007 
   1008 /**********************************************************************
   1009  *
   1010  *  Examine each tx_buffer in the used queue. If the hardware is done
   1011  *  processing the packet then free associated resources. The
   1012  *  tx_buffer is put back on the free queue.
   1013  *
   1014  **********************************************************************/
   1015 void
   1016 ixgbe_txeof(struct tx_ring *txr)
   1017 {
   1018 	struct adapter		*adapter = txr->adapter;
   1019 	struct ifnet		*ifp = adapter->ifp;
   1020 	u32			work, processed = 0;
   1021 	u32			limit = adapter->tx_process_limit;
   1022 	struct ixgbe_tx_buf	*buf;
   1023 	union ixgbe_adv_tx_desc *txd;
   1024 
   1025 	KASSERT(mutex_owned(&txr->tx_mtx));
   1026 
   1027 #ifdef DEV_NETMAP
   1028 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1029 		struct netmap_adapter *na = NA(ifp);
   1030 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1031 		txd = txr->tx_base;
   1032 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1033 		    BUS_DMASYNC_POSTREAD);
   1034 		/*
   1035 		 * In netmap mode, all the work is done in the context
   1036 		 * of the client thread. Interrupt handlers only wake up
   1037 		 * clients, which may be sleeping on individual rings
   1038 		 * or on a global resource for all rings.
   1039 		 * To implement tx interrupt mitigation, we wake up the client
   1040 		 * thread roughly every half ring, even if the NIC interrupts
   1041 		 * more frequently. This is implemented as follows:
   1042 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1043 		 *   the slot that should wake up the thread (nkr_num_slots
   1044 		 *   means the user thread should not be woken up);
   1045 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1046 		 *   or the slot has the DD bit set.
   1047 		 */
   1048 		if (!netmap_mitigate ||
   1049 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1050 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1051 			netmap_tx_irq(ifp, txr->me);
   1052 		}
   1053 		return;
   1054 	}
   1055 #endif /* DEV_NETMAP */
   1056 
   1057 	if (txr->tx_avail == txr->num_desc) {
   1058 		txr->busy = 0;
   1059 		return;
   1060 	}
   1061 
   1062 	/* Get work starting point */
   1063 	work = txr->next_to_clean;
   1064 	buf = &txr->tx_buffers[work];
   1065 	txd = &txr->tx_base[work];
   1066 	work -= txr->num_desc; /* The distance to ring end */
   1067         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1068 	    BUS_DMASYNC_POSTREAD);
   1069 
   1070 	do {
   1071 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1072 		if (eop == NULL) /* No work */
   1073 			break;
   1074 
   1075 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1076 			break;	/* I/O not complete */
   1077 
   1078 		if (buf->m_head) {
   1079 			txr->bytes +=
   1080 			    buf->m_head->m_pkthdr.len;
   1081 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1082 			    buf->map,
   1083 			    0, buf->m_head->m_pkthdr.len,
   1084 			    BUS_DMASYNC_POSTWRITE);
   1085 			ixgbe_dmamap_unload(txr->txtag,
   1086 			    buf->map);
   1087 			m_freem(buf->m_head);
   1088 			buf->m_head = NULL;
   1089 		}
   1090 		buf->eop = NULL;
   1091 		++txr->tx_avail;
   1092 
   1093 		/* We clean the range if multi segment */
   1094 		while (txd != eop) {
   1095 			++txd;
   1096 			++buf;
   1097 			++work;
   1098 			/* wrap the ring? */
   1099 			if (__predict_false(!work)) {
   1100 				work -= txr->num_desc;
   1101 				buf = txr->tx_buffers;
   1102 				txd = txr->tx_base;
   1103 			}
   1104 			if (buf->m_head) {
   1105 				txr->bytes +=
   1106 				    buf->m_head->m_pkthdr.len;
   1107 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1108 				    buf->map,
   1109 				    0, buf->m_head->m_pkthdr.len,
   1110 				    BUS_DMASYNC_POSTWRITE);
   1111 				ixgbe_dmamap_unload(txr->txtag,
   1112 				    buf->map);
   1113 				m_freem(buf->m_head);
   1114 				buf->m_head = NULL;
   1115 			}
   1116 			++txr->tx_avail;
   1117 			buf->eop = NULL;
   1118 
   1119 		}
   1120 		++txr->packets;
   1121 		++processed;
   1122 		++ifp->if_opackets;
   1123 
   1124 		/* Try the next packet */
   1125 		++txd;
   1126 		++buf;
   1127 		++work;
   1128 		/* reset with a wrap */
   1129 		if (__predict_false(!work)) {
   1130 			work -= txr->num_desc;
   1131 			buf = txr->tx_buffers;
   1132 			txd = txr->tx_base;
   1133 		}
   1134 		prefetch(txd);
   1135 	} while (__predict_true(--limit));
   1136 
   1137 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1138 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1139 
   1140 	work += txr->num_desc;
   1141 	txr->next_to_clean = work;
   1142 
   1143 	/*
   1144 	** Queue Hang detection, we know there's
   1145 	** work outstanding or the first return
   1146 	** would have been taken, so increment busy
   1147 	** if nothing managed to get cleaned, then
   1148 	** in local_timer it will be checked and
   1149 	** marked as HUNG if it exceeds a MAX attempt.
   1150 	*/
   1151 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1152 		++txr->busy;
   1153 	/*
   1154 	** If anything gets cleaned we reset state to 1,
   1155 	** note this will turn off HUNG if its set.
   1156 	*/
   1157 	if (processed)
   1158 		txr->busy = 1;
   1159 
   1160 	if (txr->tx_avail == txr->num_desc)
   1161 		txr->busy = 0;
   1162 
   1163 	return;
   1164 }
   1165 
   1166 
   1167 #ifdef IXGBE_FDIR
   1168 /*
   1169 ** This routine parses packet headers so that Flow
   1170 ** Director can make a hashed filter table entry
   1171 ** allowing traffic flows to be identified and kept
   1172 ** on the same cpu.  This would be a performance
   1173 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1174 ** packets.
   1175 */
   1176 static void
   1177 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1178 {
   1179 	struct adapter			*adapter = txr->adapter;
   1180 	struct ix_queue			*que;
   1181 	struct ip			*ip;
   1182 	struct tcphdr			*th;
   1183 	struct udphdr			*uh;
   1184 	struct ether_vlan_header	*eh;
   1185 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1186 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1187 	int  				ehdrlen, ip_hlen;
   1188 	u16				etype;
   1189 
   1190 	eh = mtod(mp, struct ether_vlan_header *);
   1191 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1192 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1193 		etype = eh->evl_proto;
   1194 	} else {
   1195 		ehdrlen = ETHER_HDR_LEN;
   1196 		etype = eh->evl_encap_proto;
   1197 	}
   1198 
   1199 	/* Only handling IPv4 */
   1200 	if (etype != htons(ETHERTYPE_IP))
   1201 		return;
   1202 
   1203 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1204 	ip_hlen = ip->ip_hl << 2;
   1205 
   1206 	/* check if we're UDP or TCP */
   1207 	switch (ip->ip_p) {
   1208 	case IPPROTO_TCP:
   1209 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1210 		/* src and dst are inverted */
   1211 		common.port.dst ^= th->th_sport;
   1212 		common.port.src ^= th->th_dport;
   1213 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1214 		break;
   1215 	case IPPROTO_UDP:
   1216 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1217 		/* src and dst are inverted */
   1218 		common.port.dst ^= uh->uh_sport;
   1219 		common.port.src ^= uh->uh_dport;
   1220 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1221 		break;
   1222 	default:
   1223 		return;
   1224 	}
   1225 
   1226 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1227 	if (mp->m_pkthdr.ether_vtag)
   1228 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1229 	else
   1230 		common.flex_bytes ^= etype;
   1231 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1232 
   1233 	que = &adapter->queues[txr->me];
   1234 	/*
   1235 	** This assumes the Rx queue and Tx
   1236 	** queue are bound to the same CPU
   1237 	*/
   1238 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1239 	    input, common, que->msix);
   1240 }
   1241 #endif /* IXGBE_FDIR */
   1242 
   1243 /*
   1244 ** Used to detect a descriptor that has
   1245 ** been merged by Hardware RSC.
   1246 */
   1247 static inline u32
   1248 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1249 {
   1250 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1251 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1252 }
   1253 
   1254 /*********************************************************************
   1255  *
   1256  *  Initialize Hardware RSC (LRO) feature on 82599
   1257  *  for an RX ring, this is toggled by the LRO capability
   1258  *  even though it is transparent to the stack.
   1259  *
   1260  *  NOTE: since this HW feature only works with IPV4 and
   1261  *        our testing has shown soft LRO to be as effective
   1262  *        I have decided to disable this by default.
   1263  *
   1264  **********************************************************************/
   1265 static void
   1266 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1267 {
   1268 	struct	adapter 	*adapter = rxr->adapter;
   1269 	struct	ixgbe_hw	*hw = &adapter->hw;
   1270 	u32			rscctrl, rdrxctl;
   1271 
   1272 	/* If turning LRO/RSC off we need to disable it */
   1273 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1274 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1275 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1276 		return;
   1277 	}
   1278 
   1279 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1280 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1281 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1282 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1283 #endif /* DEV_NETMAP */
   1284 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1285 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1286 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1287 
   1288 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1289 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1290 	/*
   1291 	** Limit the total number of descriptors that
   1292 	** can be combined, so it does not exceed 64K
   1293 	*/
   1294 	if (rxr->mbuf_sz == MCLBYTES)
   1295 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1296 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1297 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1298 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1299 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1300 	else  /* Using 16K cluster */
   1301 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1302 
   1303 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1304 
   1305 	/* Enable TCP header recognition */
   1306 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1307 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1308 	    IXGBE_PSRTYPE_TCPHDR));
   1309 
   1310 	/* Disable RSC for ACK packets */
   1311 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1312 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1313 
   1314 	rxr->hw_rsc = TRUE;
   1315 }
   1316 
   1317 /*********************************************************************
   1318  *
   1319  *  Refresh mbuf buffers for RX descriptor rings
   1320  *   - now keeps its own state so discards due to resource
   1321  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1322  *     it just returns, keeping its placeholder, thus it can simply
   1323  *     be recalled to try again.
   1324  *
   1325  **********************************************************************/
   1326 static void
   1327 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1328 {
   1329 	struct adapter		*adapter = rxr->adapter;
   1330 	struct ixgbe_rx_buf	*rxbuf;
   1331 	struct mbuf		*mp;
   1332 	int			i, j, error;
   1333 	bool			refreshed = false;
   1334 
   1335 	i = j = rxr->next_to_refresh;
   1336 	/* Control the loop with one beyond */
   1337 	if (++j == rxr->num_desc)
   1338 		j = 0;
   1339 
   1340 	while (j != limit) {
   1341 		rxbuf = &rxr->rx_buffers[i];
   1342 		if (rxbuf->buf == NULL) {
   1343 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1344 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1345 			if (mp == NULL) {
   1346 				rxr->no_jmbuf.ev_count++;
   1347 				goto update;
   1348 			}
   1349 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1350 				m_adj(mp, ETHER_ALIGN);
   1351 		} else
   1352 			mp = rxbuf->buf;
   1353 
   1354 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1355 
   1356 		/* If we're dealing with an mbuf that was copied rather
   1357 		 * than replaced, there's no need to go through busdma.
   1358 		 */
   1359 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1360 			/* Get the memory mapping */
   1361 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1362 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1363 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1364 			if (error != 0) {
   1365 				printf("Refresh mbufs: payload dmamap load"
   1366 				    " failure - %d\n", error);
   1367 				m_free(mp);
   1368 				rxbuf->buf = NULL;
   1369 				goto update;
   1370 			}
   1371 			rxbuf->buf = mp;
   1372 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1373 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1374 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1375 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1376 		} else {
   1377 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1378 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1379 		}
   1380 
   1381 		refreshed = true;
   1382 		/* Next is precalculated */
   1383 		i = j;
   1384 		rxr->next_to_refresh = i;
   1385 		if (++j == rxr->num_desc)
   1386 			j = 0;
   1387 	}
   1388 update:
   1389 	if (refreshed) /* Update hardware tail index */
   1390 		IXGBE_WRITE_REG(&adapter->hw,
   1391 		    rxr->tail, rxr->next_to_refresh);
   1392 	return;
   1393 }
   1394 
   1395 /*********************************************************************
   1396  *
   1397  *  Allocate memory for rx_buffer structures. Since we use one
   1398  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1399  *  that we'll need is equal to the number of receive descriptors
   1400  *  that we've allocated.
   1401  *
   1402  **********************************************************************/
   1403 int
   1404 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1405 {
   1406 	struct	adapter 	*adapter = rxr->adapter;
   1407 	device_t 		dev = adapter->dev;
   1408 	struct ixgbe_rx_buf 	*rxbuf;
   1409 	int             	bsize, error;
   1410 
   1411 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1412 	if (!(rxr->rx_buffers =
   1413 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1414 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1415 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1416 		error = ENOMEM;
   1417 		goto fail;
   1418 	}
   1419 
   1420 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1421 				   1, 0,	/* alignment, bounds */
   1422 				   MJUM16BYTES,		/* maxsize */
   1423 				   1,			/* nsegments */
   1424 				   MJUM16BYTES,		/* maxsegsize */
   1425 				   0,			/* flags */
   1426 				   &rxr->ptag))) {
   1427 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1428 		goto fail;
   1429 	}
   1430 
   1431 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1432 		rxbuf = &rxr->rx_buffers[i];
   1433 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1434 		if (error) {
   1435 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1436 			goto fail;
   1437 		}
   1438 	}
   1439 
   1440 	return (0);
   1441 
   1442 fail:
   1443 	/* Frees all, but can handle partial completion */
   1444 	ixgbe_free_receive_structures(adapter);
   1445 	return (error);
   1446 }
   1447 
   1448 static void
   1449 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1450 {
   1451 	struct ixgbe_rx_buf       *rxbuf;
   1452 
   1453 	for (int i = 0; i < rxr->num_desc; i++) {
   1454 		rxbuf = &rxr->rx_buffers[i];
   1455 		if (rxbuf->buf != NULL) {
   1456 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1457 			    0, rxbuf->buf->m_pkthdr.len,
   1458 			    BUS_DMASYNC_POSTREAD);
   1459 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1460 			rxbuf->buf->m_flags |= M_PKTHDR;
   1461 			m_freem(rxbuf->buf);
   1462 			rxbuf->buf = NULL;
   1463 			rxbuf->flags = 0;
   1464 		}
   1465 	}
   1466 }
   1467 
   1468 /*********************************************************************
   1469  *
   1470  *  Initialize a receive ring and its buffers.
   1471  *
   1472  **********************************************************************/
   1473 static int
   1474 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1475 {
   1476 	struct	adapter 	*adapter;
   1477 	struct ixgbe_rx_buf	*rxbuf;
   1478 #ifdef LRO
   1479 	struct ifnet		*ifp;
   1480 	struct lro_ctrl		*lro = &rxr->lro;
   1481 #endif /* LRO */
   1482 	int			rsize, error = 0;
   1483 #ifdef DEV_NETMAP
   1484 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1485 	struct netmap_slot *slot;
   1486 #endif /* DEV_NETMAP */
   1487 
   1488 	adapter = rxr->adapter;
   1489 #ifdef LRO
   1490 	ifp = adapter->ifp;
   1491 #endif /* LRO */
   1492 
   1493 	/* Clear the ring contents */
   1494 	IXGBE_RX_LOCK(rxr);
   1495 #ifdef DEV_NETMAP
   1496 	/* same as in ixgbe_setup_transmit_ring() */
   1497 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1498 #endif /* DEV_NETMAP */
   1499 	rsize = roundup2(adapter->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	IXGBE_RX_UNLOCK(rxr);
   1509 
   1510 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1511 	 * or size of jumbo mbufs may have changed.
   1512 	 */
   1513 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1514 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1515 
   1516 	IXGBE_RX_LOCK(rxr);
   1517 
   1518 	/* Now replenish the mbufs */
   1519 	for (int j = 0; j != rxr->num_desc; ++j) {
   1520 		struct mbuf	*mp;
   1521 
   1522 		rxbuf = &rxr->rx_buffers[j];
   1523 #ifdef DEV_NETMAP
   1524 		/*
   1525 		 * In netmap mode, fill the map and set the buffer
   1526 		 * address in the NIC ring, considering the offset
   1527 		 * between the netmap and NIC rings (see comment in
   1528 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1529 		 * an mbuf, so end the block with a continue;
   1530 		 */
   1531 		if (slot) {
   1532 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1533 			uint64_t paddr;
   1534 			void *addr;
   1535 
   1536 			addr = PNMB(na, slot + sj, &paddr);
   1537 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1538 			/* Update descriptor and the cached value */
   1539 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1540 			rxbuf->addr = htole64(paddr);
   1541 			continue;
   1542 		}
   1543 #endif /* DEV_NETMAP */
   1544 		rxbuf->flags = 0;
   1545 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1546 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1547 		if (rxbuf->buf == NULL) {
   1548 			error = ENOBUFS;
   1549                         goto fail;
   1550 		}
   1551 		mp = rxbuf->buf;
   1552 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1553 		/* Get the memory mapping */
   1554 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1555 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1556 		if (error != 0)
   1557                         goto fail;
   1558 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1559 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1560 		/* Update the descriptor and the cached value */
   1561 		rxr->rx_base[j].read.pkt_addr =
   1562 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1563 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1564 	}
   1565 
   1566 
   1567 	/* Setup our descriptor indices */
   1568 	rxr->next_to_check = 0;
   1569 	rxr->next_to_refresh = 0;
   1570 	rxr->lro_enabled = FALSE;
   1571 	rxr->rx_copies.ev_count = 0;
   1572 	rxr->rx_bytes.ev_count = 0;
   1573 	rxr->vtag_strip = FALSE;
   1574 
   1575 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1576 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1577 
   1578 	/*
   1579 	** Now set up the LRO interface:
   1580 	*/
   1581 	if (ixgbe_rsc_enable)
   1582 		ixgbe_setup_hw_rsc(rxr);
   1583 #ifdef LRO
   1584 	else if (ifp->if_capenable & IFCAP_LRO) {
   1585 		device_t dev = adapter->dev;
   1586 		int err = tcp_lro_init(lro);
   1587 		if (err) {
   1588 			device_printf(dev, "LRO Initialization failed!\n");
   1589 			goto fail;
   1590 		}
   1591 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1592 		rxr->lro_enabled = TRUE;
   1593 		lro->ifp = adapter->ifp;
   1594 	}
   1595 #endif /* LRO */
   1596 
   1597 	IXGBE_RX_UNLOCK(rxr);
   1598 	return (0);
   1599 
   1600 fail:
   1601 	ixgbe_free_receive_ring(rxr);
   1602 	IXGBE_RX_UNLOCK(rxr);
   1603 	return (error);
   1604 }
   1605 
   1606 /*********************************************************************
   1607  *
   1608  *  Initialize all receive rings.
   1609  *
   1610  **********************************************************************/
   1611 int
   1612 ixgbe_setup_receive_structures(struct adapter *adapter)
   1613 {
   1614 	struct rx_ring *rxr = adapter->rx_rings;
   1615 	int j;
   1616 
   1617 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1618 		if (ixgbe_setup_receive_ring(rxr))
   1619 			goto fail;
   1620 
   1621 	return (0);
   1622 fail:
   1623 	/*
   1624 	 * Free RX buffers allocated so far, we will only handle
   1625 	 * the rings that completed, the failing case will have
   1626 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1627 	 */
   1628 	for (int i = 0; i < j; ++i) {
   1629 		rxr = &adapter->rx_rings[i];
   1630 		ixgbe_free_receive_ring(rxr);
   1631 	}
   1632 
   1633 	return (ENOBUFS);
   1634 }
   1635 
   1636 
   1637 /*********************************************************************
   1638  *
   1639  *  Free all receive rings.
   1640  *
   1641  **********************************************************************/
   1642 void
   1643 ixgbe_free_receive_structures(struct adapter *adapter)
   1644 {
   1645 	struct rx_ring *rxr = adapter->rx_rings;
   1646 
   1647 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1648 
   1649 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1650 #ifdef LRO
   1651 		struct lro_ctrl		*lro = &rxr->lro;
   1652 #endif /* LRO */
   1653 		ixgbe_free_receive_buffers(rxr);
   1654 #ifdef LRO
   1655 		/* Free LRO memory */
   1656 		tcp_lro_free(lro);
   1657 #endif /* LRO */
   1658 		/* Free the ring memory as well */
   1659 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1660 		IXGBE_RX_LOCK_DESTROY(rxr);
   1661 	}
   1662 
   1663 	free(adapter->rx_rings, M_DEVBUF);
   1664 }
   1665 
   1666 
   1667 /*********************************************************************
   1668  *
   1669  *  Free receive ring data structures
   1670  *
   1671  **********************************************************************/
   1672 static void
   1673 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1674 {
   1675 	struct adapter		*adapter = rxr->adapter;
   1676 	struct ixgbe_rx_buf	*rxbuf;
   1677 
   1678 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1679 
   1680 	/* Cleanup any existing buffers */
   1681 	if (rxr->rx_buffers != NULL) {
   1682 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1683 			rxbuf = &rxr->rx_buffers[i];
   1684 			if (rxbuf->buf != NULL) {
   1685 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1686 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1687 				    BUS_DMASYNC_POSTREAD);
   1688 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1689 				rxbuf->buf->m_flags |= M_PKTHDR;
   1690 				m_freem(rxbuf->buf);
   1691 			}
   1692 			rxbuf->buf = NULL;
   1693 			if (rxbuf->pmap != NULL) {
   1694 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1695 				rxbuf->pmap = NULL;
   1696 			}
   1697 		}
   1698 		if (rxr->rx_buffers != NULL) {
   1699 			free(rxr->rx_buffers, M_DEVBUF);
   1700 			rxr->rx_buffers = NULL;
   1701 		}
   1702 	}
   1703 
   1704 	if (rxr->ptag != NULL) {
   1705 		ixgbe_dma_tag_destroy(rxr->ptag);
   1706 		rxr->ptag = NULL;
   1707 	}
   1708 
   1709 	return;
   1710 }
   1711 
   1712 static __inline void
   1713 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1714 {
   1715 	int s;
   1716 
   1717 #ifdef LRO
   1718 	struct adapter	*adapter = ifp->if_softc;
   1719 	struct ethercom *ec = &adapter->osdep.ec;
   1720 
   1721         /*
   1722          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1723          * should be computed by hardware. Also it should not have VLAN tag in
   1724          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1725          */
   1726         if (rxr->lro_enabled &&
   1727             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1728             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1729             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1730             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1731             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1732             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1733             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1734             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1735                 /*
   1736                  * Send to the stack if:
   1737                  **  - LRO not enabled, or
   1738                  **  - no LRO resources, or
   1739                  **  - lro enqueue fails
   1740                  */
   1741                 if (rxr->lro.lro_cnt != 0)
   1742                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1743                                 return;
   1744         }
   1745 #endif /* LRO */
   1746 
   1747 	IXGBE_RX_UNLOCK(rxr);
   1748 
   1749 	s = splnet();
   1750 	/* Pass this up to any BPF listeners. */
   1751 	bpf_mtap(ifp, m);
   1752 	if_input(ifp, m);
   1753 	splx(s);
   1754 
   1755 	IXGBE_RX_LOCK(rxr);
   1756 }
   1757 
   1758 static __inline void
   1759 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1760 {
   1761 	struct ixgbe_rx_buf	*rbuf;
   1762 
   1763 	rbuf = &rxr->rx_buffers[i];
   1764 
   1765 
   1766 	/*
   1767 	** With advanced descriptors the writeback
   1768 	** clobbers the buffer addrs, so its easier
   1769 	** to just free the existing mbufs and take
   1770 	** the normal refresh path to get new buffers
   1771 	** and mapping.
   1772 	*/
   1773 
   1774 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1775 		rbuf->fmp->m_flags |= M_PKTHDR;
   1776 		m_freem(rbuf->fmp);
   1777 		rbuf->fmp = NULL;
   1778 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1779 	} else if (rbuf->buf) {
   1780 		m_free(rbuf->buf);
   1781 		rbuf->buf = NULL;
   1782 	}
   1783 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1784 
   1785 	rbuf->flags = 0;
   1786 
   1787 	return;
   1788 }
   1789 
   1790 
   1791 /*********************************************************************
   1792  *
   1793  *  This routine executes in interrupt context. It replenishes
   1794  *  the mbufs in the descriptor and sends data which has been
   1795  *  dma'ed into host memory to upper layer.
   1796  *
   1797  *  Return TRUE for more work, FALSE for all clean.
   1798  *********************************************************************/
   1799 bool
   1800 ixgbe_rxeof(struct ix_queue *que)
   1801 {
   1802 	struct adapter		*adapter = que->adapter;
   1803 	struct rx_ring		*rxr = que->rxr;
   1804 	struct ifnet		*ifp = adapter->ifp;
   1805 #ifdef LRO
   1806 	struct lro_ctrl		*lro = &rxr->lro;
   1807 	struct lro_entry	*queued;
   1808 #endif /* LRO */
   1809 	int			i, nextp, processed = 0;
   1810 	u32			staterr = 0;
   1811 	u32			count = adapter->rx_process_limit;
   1812 	union ixgbe_adv_rx_desc	*cur;
   1813 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1814 #ifdef RSS
   1815 	u16			pkt_info;
   1816 #endif
   1817 
   1818 	IXGBE_RX_LOCK(rxr);
   1819 
   1820 #ifdef DEV_NETMAP
   1821 	/* Same as the txeof routine: wakeup clients on intr. */
   1822 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1823 		IXGBE_RX_UNLOCK(rxr);
   1824 		return (FALSE);
   1825 	}
   1826 #endif /* DEV_NETMAP */
   1827 
   1828 	for (i = rxr->next_to_check; count != 0;) {
   1829 		struct mbuf	*sendmp, *mp;
   1830 		u32		rsc, ptype;
   1831 		u16		len;
   1832 		u16		vtag = 0;
   1833 		bool		eop;
   1834 
   1835 		/* Sync the ring. */
   1836 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1837 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1838 
   1839 		cur = &rxr->rx_base[i];
   1840 		staterr = le32toh(cur->wb.upper.status_error);
   1841 #ifdef RSS
   1842 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1843 #endif
   1844 
   1845 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1846 			break;
   1847 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1848 			break;
   1849 
   1850 		count--;
   1851 		sendmp = NULL;
   1852 		nbuf = NULL;
   1853 		rsc = 0;
   1854 		cur->wb.upper.status_error = 0;
   1855 		rbuf = &rxr->rx_buffers[i];
   1856 		mp = rbuf->buf;
   1857 
   1858 		len = le16toh(cur->wb.upper.length);
   1859 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1860 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1861 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1862 
   1863 		/* Make sure bad packets are discarded */
   1864 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1865 #if __FreeBSD_version >= 1100036
   1866 			if (IXGBE_IS_VF(adapter))
   1867 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1868 #endif
   1869 			rxr->rx_discarded.ev_count++;
   1870 			ixgbe_rx_discard(rxr, i);
   1871 			goto next_desc;
   1872 		}
   1873 
   1874 		/*
   1875 		** On 82599 which supports a hardware
   1876 		** LRO (called HW RSC), packets need
   1877 		** not be fragmented across sequential
   1878 		** descriptors, rather the next descriptor
   1879 		** is indicated in bits of the descriptor.
   1880 		** This also means that we might proceses
   1881 		** more than one packet at a time, something
   1882 		** that has never been true before, it
   1883 		** required eliminating global chain pointers
   1884 		** in favor of what we are doing here.  -jfv
   1885 		*/
   1886 		if (!eop) {
   1887 			/*
   1888 			** Figure out the next descriptor
   1889 			** of this frame.
   1890 			*/
   1891 			if (rxr->hw_rsc == TRUE) {
   1892 				rsc = ixgbe_rsc_count(cur);
   1893 				rxr->rsc_num += (rsc - 1);
   1894 			}
   1895 			if (rsc) { /* Get hardware index */
   1896 				nextp = ((staterr &
   1897 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1898 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1899 			} else { /* Just sequential */
   1900 				nextp = i + 1;
   1901 				if (nextp == adapter->num_rx_desc)
   1902 					nextp = 0;
   1903 			}
   1904 			nbuf = &rxr->rx_buffers[nextp];
   1905 			prefetch(nbuf);
   1906 		}
   1907 		/*
   1908 		** Rather than using the fmp/lmp global pointers
   1909 		** we now keep the head of a packet chain in the
   1910 		** buffer struct and pass this along from one
   1911 		** descriptor to the next, until we get EOP.
   1912 		*/
   1913 		mp->m_len = len;
   1914 		/*
   1915 		** See if there is a stored head
   1916 		** that determines what we are
   1917 		*/
   1918 		sendmp = rbuf->fmp;
   1919 		if (sendmp != NULL) {  /* secondary frag */
   1920 			rbuf->buf = rbuf->fmp = NULL;
   1921 			mp->m_flags &= ~M_PKTHDR;
   1922 			sendmp->m_pkthdr.len += mp->m_len;
   1923 		} else {
   1924 			/*
   1925 			 * Optimize.  This might be a small packet,
   1926 			 * maybe just a TCP ACK.  Do a fast copy that
   1927 			 * is cache aligned into a new mbuf, and
   1928 			 * leave the old mbuf+cluster for re-use.
   1929 			 */
   1930 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1931 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1932 				if (sendmp != NULL) {
   1933 					sendmp->m_data +=
   1934 					    IXGBE_RX_COPY_ALIGN;
   1935 					ixgbe_bcopy(mp->m_data,
   1936 					    sendmp->m_data, len);
   1937 					sendmp->m_len = len;
   1938 					rxr->rx_copies.ev_count++;
   1939 					rbuf->flags |= IXGBE_RX_COPY;
   1940 				}
   1941 			}
   1942 			if (sendmp == NULL) {
   1943 				rbuf->buf = rbuf->fmp = NULL;
   1944 				sendmp = mp;
   1945 			}
   1946 
   1947 			/* first desc of a non-ps chain */
   1948 			sendmp->m_flags |= M_PKTHDR;
   1949 			sendmp->m_pkthdr.len = mp->m_len;
   1950 		}
   1951 		++processed;
   1952 
   1953 		/* Pass the head pointer on */
   1954 		if (eop == 0) {
   1955 			nbuf->fmp = sendmp;
   1956 			sendmp = NULL;
   1957 			mp->m_next = nbuf->buf;
   1958 		} else { /* Sending this frame */
   1959 			m_set_rcvif(sendmp, ifp);
   1960 			ifp->if_ipackets++;
   1961 			rxr->rx_packets.ev_count++;
   1962 			/* capture data for AIM */
   1963 			rxr->bytes += sendmp->m_pkthdr.len;
   1964 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1965 			/* Process vlan info */
   1966 			if ((rxr->vtag_strip) &&
   1967 			    (staterr & IXGBE_RXD_STAT_VP))
   1968 				vtag = le16toh(cur->wb.upper.vlan);
   1969 			if (vtag) {
   1970 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1971 				    printf("%s: could not apply VLAN "
   1972 					"tag", __func__));
   1973 			}
   1974 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1975 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1976 				   &adapter->stats.pf);
   1977 			}
   1978 
   1979 #if 0 /* FreeBSD */
   1980                         /*
   1981                          * In case of multiqueue, we have RXCSUM.PCSD bit set
   1982                          * and never cleared. This means we have RSS hash
   1983                          * available to be used.
   1984                          */
   1985                         if (adapter->num_queues > 1) {
   1986                                 sendmp->m_pkthdr.flowid =
   1987                                     le32toh(cur->wb.lower.hi_dword.rss);
   1988                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1989                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
   1990                                         M_HASHTYPE_SET(sendmp,
   1991                                             M_HASHTYPE_RSS_IPV4);
   1992                                         break;
   1993                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1994                                         M_HASHTYPE_SET(sendmp,
   1995                                             M_HASHTYPE_RSS_TCP_IPV4);
   1996                                         break;
   1997                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
   1998                                         M_HASHTYPE_SET(sendmp,
   1999                                             M_HASHTYPE_RSS_IPV6);
   2000                                         break;
   2001                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2002                                         M_HASHTYPE_SET(sendmp,
   2003                                             M_HASHTYPE_RSS_TCP_IPV6);
   2004                                         break;
   2005                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2006                                         M_HASHTYPE_SET(sendmp,
   2007                                             M_HASHTYPE_RSS_IPV6_EX);
   2008                                         break;
   2009                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2010                                         M_HASHTYPE_SET(sendmp,
   2011                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
   2012                                         break;
   2013 #if __FreeBSD_version > 1100000
   2014                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2015                                         M_HASHTYPE_SET(sendmp,
   2016                                             M_HASHTYPE_RSS_UDP_IPV4);
   2017                                         break;
   2018                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2019                                         M_HASHTYPE_SET(sendmp,
   2020                                             M_HASHTYPE_RSS_UDP_IPV6);
   2021                                         break;
   2022                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2023                                         M_HASHTYPE_SET(sendmp,
   2024                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
   2025                                         break;
   2026 #endif
   2027                                     default:
   2028                                         M_HASHTYPE_SET(sendmp,
   2029                                             M_HASHTYPE_OPAQUE);
   2030                                 }
   2031                         } else {
   2032                                 sendmp->m_pkthdr.flowid = que->msix;
   2033 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2034 			}
   2035 #endif
   2036 		}
   2037 next_desc:
   2038 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2039 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2040 
   2041 		/* Advance our pointers to the next descriptor. */
   2042 		if (++i == rxr->num_desc)
   2043 			i = 0;
   2044 
   2045 		/* Now send to the stack or do LRO */
   2046 		if (sendmp != NULL) {
   2047 			rxr->next_to_check = i;
   2048 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2049 			i = rxr->next_to_check;
   2050 		}
   2051 
   2052                /* Every 8 descriptors we go to refresh mbufs */
   2053 		if (processed == 8) {
   2054 			ixgbe_refresh_mbufs(rxr, i);
   2055 			processed = 0;
   2056 		}
   2057 	}
   2058 
   2059 	/* Refresh any remaining buf structs */
   2060 	if (ixgbe_rx_unrefreshed(rxr))
   2061 		ixgbe_refresh_mbufs(rxr, i);
   2062 
   2063 	rxr->next_to_check = i;
   2064 
   2065 #ifdef LRO
   2066 	/*
   2067 	 * Flush any outstanding LRO work
   2068 	 */
   2069 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   2070 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   2071 		tcp_lro_flush(lro, queued);
   2072 	}
   2073 #endif /* LRO */
   2074 
   2075 	IXGBE_RX_UNLOCK(rxr);
   2076 
   2077 	/*
   2078 	** Still have cleaning to do?
   2079 	*/
   2080 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2081 		return true;
   2082 	else
   2083 		return false;
   2084 }
   2085 
   2086 
   2087 /*********************************************************************
   2088  *
   2089  *  Verify that the hardware indicated that the checksum is valid.
   2090  *  Inform the stack about the status of checksum so that stack
   2091  *  doesn't spend time verifying the checksum.
   2092  *
   2093  *********************************************************************/
   2094 static void
   2095 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2096     struct ixgbe_hw_stats *stats)
   2097 {
   2098 	u16	status = (u16) staterr;
   2099 	u8	errors = (u8) (staterr >> 24);
   2100 #if 0
   2101 	bool	sctp = false;
   2102 
   2103 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2104 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2105 		sctp = true;
   2106 #endif
   2107 
   2108 	/* IPv4 checksum */
   2109 	if (status & IXGBE_RXD_STAT_IPCS) {
   2110 		stats->ipcs.ev_count++;
   2111 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2112 			/* IP Checksum Good */
   2113 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2114 		} else {
   2115 			stats->ipcs_bad.ev_count++;
   2116 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2117 		}
   2118 	}
   2119 	/* TCP/UDP/SCTP checksum */
   2120 	if (status & IXGBE_RXD_STAT_L4CS) {
   2121 		stats->l4cs.ev_count++;
   2122 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2123 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2124 			mp->m_pkthdr.csum_flags |= type;
   2125 		} else {
   2126 			stats->l4cs_bad.ev_count++;
   2127 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2128 		}
   2129 	}
   2130 }
   2131 
   2132 
   2133 /********************************************************************
   2134  * Manage DMA'able memory.
   2135  *******************************************************************/
   2136 
   2137 int
   2138 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2139 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2140 {
   2141 	device_t dev = adapter->dev;
   2142 	int             r, rsegs;
   2143 
   2144 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2145 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2146 			       size,	/* maxsize */
   2147 			       1,	/* nsegments */
   2148 			       size,	/* maxsegsize */
   2149 			       BUS_DMA_ALLOCNOW,	/* flags */
   2150 			       &dma->dma_tag);
   2151 	if (r != 0) {
   2152 		aprint_error_dev(dev,
   2153 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2154 		goto fail_0;
   2155 	}
   2156 
   2157 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2158 		size,
   2159 		dma->dma_tag->dt_alignment,
   2160 		dma->dma_tag->dt_boundary,
   2161 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2162 	if (r != 0) {
   2163 		aprint_error_dev(dev,
   2164 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2165 		goto fail_1;
   2166 	}
   2167 
   2168 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2169 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2170 	if (r != 0) {
   2171 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2172 		    __func__, r);
   2173 		goto fail_2;
   2174 	}
   2175 
   2176 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2177 	if (r != 0) {
   2178 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2179 		    __func__, r);
   2180 		goto fail_3;
   2181 	}
   2182 
   2183 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2184 			    size,
   2185 			    NULL,
   2186 			    mapflags | BUS_DMA_NOWAIT);
   2187 	if (r != 0) {
   2188 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2189 		    __func__, r);
   2190 		goto fail_4;
   2191 	}
   2192 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2193 	dma->dma_size = size;
   2194 	return 0;
   2195 fail_4:
   2196 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2197 fail_3:
   2198 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2199 fail_2:
   2200 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2201 fail_1:
   2202 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2203 fail_0:
   2204 	return r;
   2205 }
   2206 
   2207 void
   2208 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2209 {
   2210 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2211 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2212 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2213 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2214 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2215 }
   2216 
   2217 
   2218 /*********************************************************************
   2219  *
   2220  *  Allocate memory for the transmit and receive rings, and then
   2221  *  the descriptors associated with each, called only once at attach.
   2222  *
   2223  **********************************************************************/
   2224 int
   2225 ixgbe_allocate_queues(struct adapter *adapter)
   2226 {
   2227 	device_t	dev = adapter->dev;
   2228 	struct ix_queue	*que;
   2229 	struct tx_ring	*txr;
   2230 	struct rx_ring	*rxr;
   2231 	int rsize, tsize, error = IXGBE_SUCCESS;
   2232 	int txconf = 0, rxconf = 0;
   2233 #ifdef PCI_IOV
   2234 	enum ixgbe_iov_mode iov_mode;
   2235 #endif
   2236 
   2237         /* First allocate the top level queue structs */
   2238         if (!(adapter->queues =
   2239             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2240             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2241                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2242                 error = ENOMEM;
   2243                 goto fail;
   2244         }
   2245 
   2246 	/* First allocate the TX ring struct memory */
   2247 	if (!(adapter->tx_rings =
   2248 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2249 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2250 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2251 		error = ENOMEM;
   2252 		goto tx_fail;
   2253 	}
   2254 
   2255 	/* Next allocate the RX */
   2256 	if (!(adapter->rx_rings =
   2257 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2258 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2259 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2260 		error = ENOMEM;
   2261 		goto rx_fail;
   2262 	}
   2263 
   2264 	/* For the ring itself */
   2265 	tsize = roundup2(adapter->num_tx_desc *
   2266 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2267 
   2268 #ifdef PCI_IOV
   2269 	iov_mode = ixgbe_get_iov_mode(adapter);
   2270 	adapter->pool = ixgbe_max_vfs(iov_mode);
   2271 #else
   2272 	adapter->pool = 0;
   2273 #endif
   2274 	/*
   2275 	 * Now set up the TX queues, txconf is needed to handle the
   2276 	 * possibility that things fail midcourse and we need to
   2277 	 * undo memory gracefully
   2278 	 */
   2279 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2280 		/* Set up some basics */
   2281 		txr = &adapter->tx_rings[i];
   2282 		txr->adapter = adapter;
   2283 #ifdef PCI_IOV
   2284 		txr->me = ixgbe_pf_que_index(iov_mode, i);
   2285 #else
   2286 		txr->me = i;
   2287 #endif
   2288 		txr->num_desc = adapter->num_tx_desc;
   2289 
   2290 		/* Initialize the TX side lock */
   2291 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2292 		    device_xname(dev), txr->me);
   2293 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2294 
   2295 		if (ixgbe_dma_malloc(adapter, tsize,
   2296 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2297 			aprint_error_dev(dev,
   2298 			    "Unable to allocate TX Descriptor memory\n");
   2299 			error = ENOMEM;
   2300 			goto err_tx_desc;
   2301 		}
   2302 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2303 		bzero((void *)txr->tx_base, tsize);
   2304 
   2305         	/* Now allocate transmit buffers for the ring */
   2306         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2307 			aprint_error_dev(dev,
   2308 			    "Critical Failure setting up transmit buffers\n");
   2309 			error = ENOMEM;
   2310 			goto err_tx_desc;
   2311         	}
   2312 #ifndef IXGBE_LEGACY_TX
   2313 		/* Allocate a buf ring */
   2314 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   2315 		    M_WAITOK, &txr->tx_mtx);
   2316 		if (txr->br == NULL) {
   2317 			aprint_error_dev(dev,
   2318 			    "Critical Failure setting up buf ring\n");
   2319 			error = ENOMEM;
   2320 			goto err_tx_desc;
   2321         	}
   2322 #endif
   2323 	}
   2324 
   2325 	/*
   2326 	 * Next the RX queues...
   2327 	 */
   2328 	rsize = roundup2(adapter->num_rx_desc *
   2329 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2330 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2331 		rxr = &adapter->rx_rings[i];
   2332 		/* Set up some basics */
   2333 		rxr->adapter = adapter;
   2334 #ifdef PCI_IOV
   2335 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
   2336 #else
   2337 		rxr->me = i;
   2338 #endif
   2339 		rxr->num_desc = adapter->num_rx_desc;
   2340 
   2341 		/* Initialize the RX side lock */
   2342 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2343 		    device_xname(dev), rxr->me);
   2344 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2345 
   2346 		if (ixgbe_dma_malloc(adapter, rsize,
   2347 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2348 			aprint_error_dev(dev,
   2349 			    "Unable to allocate RxDescriptor memory\n");
   2350 			error = ENOMEM;
   2351 			goto err_rx_desc;
   2352 		}
   2353 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2354 		bzero((void *)rxr->rx_base, rsize);
   2355 
   2356         	/* Allocate receive buffers for the ring*/
   2357 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2358 			aprint_error_dev(dev,
   2359 			    "Critical Failure setting up receive buffers\n");
   2360 			error = ENOMEM;
   2361 			goto err_rx_desc;
   2362 		}
   2363 	}
   2364 
   2365 	/*
   2366 	** Finally set up the queue holding structs
   2367 	*/
   2368 	for (int i = 0; i < adapter->num_queues; i++) {
   2369 		que = &adapter->queues[i];
   2370 		que->adapter = adapter;
   2371 		que->me = i;
   2372 		que->txr = &adapter->tx_rings[i];
   2373 		que->rxr = &adapter->rx_rings[i];
   2374 	}
   2375 
   2376 	return (0);
   2377 
   2378 err_rx_desc:
   2379 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2380 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2381 err_tx_desc:
   2382 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2383 		ixgbe_dma_free(adapter, &txr->txdma);
   2384 	free(adapter->rx_rings, M_DEVBUF);
   2385 rx_fail:
   2386 	free(adapter->tx_rings, M_DEVBUF);
   2387 tx_fail:
   2388 	free(adapter->queues, M_DEVBUF);
   2389 fail:
   2390 	return (error);
   2391 }
   2392 
   2393