Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.22
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
     62 /*$NetBSD: ix_txrx.c,v 1.22 2017/03/02 05:35:01 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 
     69 #ifdef DEV_NETMAP
     70 #include <net/netmap.h>
     71 #include <sys/selinfo.h>
     72 #include <dev/netmap/netmap_kern.h>
     73 
     74 extern int ix_crcstrip;
     75 #endif
     76 
     77 /*
     78 ** HW RSC control:
     79 **  this feature only works with
     80 **  IPv4, and only on 82599 and later.
     81 **  Also this will cause IP forwarding to
     82 **  fail and that can't be controlled by
     83 **  the stack as LRO can. For all these
     84 **  reasons I've deemed it best to leave
     85 **  this off and not bother with a tuneable
     86 **  interface, this would need to be compiled
     87 **  to enable.
     88 */
     89 static bool ixgbe_rsc_enable = FALSE;
     90 
     91 #ifdef IXGBE_FDIR
     92 /*
     93 ** For Flow Director: this is the
     94 ** number of TX packets we sample
     95 ** for the filter pool, this means
     96 ** every 20th packet will be probed.
     97 **
     98 ** This feature can be disabled by
     99 ** setting this to 0.
    100 */
    101 static int atr_sample_rate = 20;
    102 #endif
    103 
    104 /*********************************************************************
    105  *  Local Function prototypes
    106  *********************************************************************/
    107 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    108 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    109 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    110 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    111 
    112 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    113 		    struct ixgbe_hw_stats *);
    114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    117 		    struct mbuf *, u32 *, u32 *);
    118 static int	ixgbe_tso_setup(struct tx_ring *,
    119 		    struct mbuf *, u32 *, u32 *);
    120 #ifdef IXGBE_FDIR
    121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    122 #endif
    123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    125 		    struct mbuf *, u32);
    126 
    127 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    128 
    129 /*********************************************************************
    130  *  Transmit entry point
    131  *
    132  *  ixgbe_start is called by the stack to initiate a transmit.
    133  *  The driver will remain in this routine as long as there are
    134  *  packets to transmit and transmit resources are available.
    135  *  In case resources are not available stack is notified and
    136  *  the packet is requeued.
    137  **********************************************************************/
    138 
    139 void
    140 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    141 {
    142 	int rc;
    143 	struct mbuf    *m_head;
    144 	struct adapter *adapter = txr->adapter;
    145 
    146 	IXGBE_TX_LOCK_ASSERT(txr);
    147 
    148 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    149 		return;
    150 	if (!adapter->link_active)
    151 		return;
    152 
    153 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    154 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    155 			break;
    156 
    157 		IFQ_POLL(&ifp->if_snd, m_head);
    158 		if (m_head == NULL)
    159 			break;
    160 
    161 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    162 			break;
    163 		}
    164 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    165 		if (rc != 0) {
    166 			m_freem(m_head);
    167 			continue;
    168 		}
    169 
    170 		/* Send a copy of the frame to the BPF listener */
    171 		bpf_mtap(ifp, m_head);
    172 	}
    173 	return;
    174 }
    175 
    176 /*
    177  * Legacy TX start - called by the stack, this
    178  * always uses the first tx ring, and should
    179  * not be used with multiqueue tx enabled.
    180  */
    181 void
    182 ixgbe_start(struct ifnet *ifp)
    183 {
    184 	struct adapter *adapter = ifp->if_softc;
    185 	struct tx_ring	*txr = adapter->tx_rings;
    186 
    187 	if (ifp->if_flags & IFF_RUNNING) {
    188 		IXGBE_TX_LOCK(txr);
    189 		ixgbe_start_locked(txr, ifp);
    190 		IXGBE_TX_UNLOCK(txr);
    191 	}
    192 	return;
    193 }
    194 
    195 #ifndef IXGBE_LEGACY_TX
    196 
    197 /*
    198 ** Multiqueue Transmit Entry Point
    199 ** (if_transmit function)
    200 */
    201 int
    202 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    203 {
    204 	struct adapter	*adapter = ifp->if_softc;
    205 	struct tx_ring	*txr;
    206 	int 		i, err = 0;
    207 #ifdef	RSS
    208 	uint32_t bucket_id;
    209 #endif
    210 
    211 	/*
    212 	 * When doing RSS, map it to the same outbound queue
    213 	 * as the incoming flow would be mapped to.
    214 	 *
    215 	 * If everything is setup correctly, it should be the
    216 	 * same bucket that the current CPU we're on is.
    217 	 */
    218 #if 0
    219 #if __FreeBSD_version < 1100054
    220 	if (m->m_flags & M_FLOWID) {
    221 #else
    222 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    223 #endif
    224 #ifdef	RSS
    225 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    226 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
    227 			/* TODO: spit out something if bucket_id > num_queues? */
    228 			i = bucket_id % adapter->num_queues;
    229 #ifdef IXGBE_DEBUG
    230 			if (bucket_id > adapter->num_queues)
    231 				if_printf(ifp, "bucket_id (%d) > num_queues "
    232 				    "(%d)\n", bucket_id, adapter->num_queues);
    233 #endif
    234 		} else
    235 #endif /* RSS */
    236 			i = m->m_pkthdr.flowid % adapter->num_queues;
    237 	} else
    238 #endif
    239 		i = cpu_index(curcpu()) % adapter->num_queues;
    240 
    241 	/* Check for a hung queue and pick alternative */
    242 	if (((1 << i) & adapter->active_queues) == 0)
    243 		i = ffs64(adapter->active_queues);
    244 
    245 	txr = &adapter->tx_rings[i];
    246 
    247 	err = pcq_put(txr->txr_interq, m);
    248 	if (err == false) {
    249 		m_freem(m);
    250 		txr->pcq_drops.ev_count++;
    251 		return (err);
    252 	}
    253 	if (IXGBE_TX_TRYLOCK(txr)) {
    254 		ixgbe_mq_start_locked(ifp, txr);
    255 		IXGBE_TX_UNLOCK(txr);
    256 	} else
    257 		softint_schedule(txr->txr_si);
    258 
    259 	return (0);
    260 }
    261 
    262 int
    263 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    264 {
    265 	struct adapter  *adapter = txr->adapter;
    266 	struct mbuf     *next;
    267 	int             enqueued = 0, err = 0;
    268 
    269 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    270 	    adapter->link_active == 0)
    271 		return (ENETDOWN);
    272 
    273 	/* Process the queue */
    274 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    275 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    276 			m_freem(next);
    277 			/* All errors are counted in ixgbe_xmit() */
    278 			break;
    279 		}
    280 		enqueued++;
    281 #if 0 // this is VF-only
    282 #if __FreeBSD_version >= 1100036
    283 		/*
    284 		 * Since we're looking at the tx ring, we can check
    285 		 * to see if we're a VF by examing our tail register
    286 		 * address.
    287 		 */
    288 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    289 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    290 #endif
    291 #endif /* 0 */
    292 		/* Send a copy of the frame to the BPF listener */
    293 		bpf_mtap(ifp, next);
    294 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    295 			break;
    296 	}
    297 
    298 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    299 		ixgbe_txeof(txr);
    300 
    301 	return (err);
    302 }
    303 
    304 /*
    305  * Called from a taskqueue to drain queued transmit packets.
    306  */
    307 void
    308 ixgbe_deferred_mq_start(void *arg)
    309 {
    310 	struct tx_ring *txr = arg;
    311 	struct adapter *adapter = txr->adapter;
    312 	struct ifnet *ifp = adapter->ifp;
    313 
    314 	IXGBE_TX_LOCK(txr);
    315 	if (pcq_peek(txr->txr_interq) != NULL)
    316 		ixgbe_mq_start_locked(ifp, txr);
    317 	IXGBE_TX_UNLOCK(txr);
    318 }
    319 
    320 #endif /* IXGBE_LEGACY_TX */
    321 
    322 
    323 /*********************************************************************
    324  *
    325  *  This routine maps the mbufs to tx descriptors, allowing the
    326  *  TX engine to transmit the packets.
    327  *  	- return 0 on success, positive on failure
    328  *
    329  **********************************************************************/
    330 
    331 static int
    332 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    333 {
    334 	struct m_tag *mtag;
    335 	struct adapter  *adapter = txr->adapter;
    336 	struct ethercom *ec = &adapter->osdep.ec;
    337 	u32		olinfo_status = 0, cmd_type_len;
    338 	int             i, j, error;
    339 	int		first;
    340 	bool		remap = TRUE;
    341 	bus_dmamap_t	map;
    342 	struct ixgbe_tx_buf *txbuf;
    343 	union ixgbe_adv_tx_desc *txd = NULL;
    344 
    345 	/* Basic descriptor defines */
    346         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    347 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    348 
    349 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    350         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    351 
    352         /*
    353          * Important to capture the first descriptor
    354          * used because it will contain the index of
    355          * the one we tell the hardware to report back
    356          */
    357         first = txr->next_avail_desc;
    358 	txbuf = &txr->tx_buffers[first];
    359 	map = txbuf->map;
    360 
    361 	/*
    362 	 * Map the packet for DMA.
    363 	 */
    364 retry:
    365 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    366 	    m_head, BUS_DMA_NOWAIT);
    367 
    368 	if (__predict_false(error)) {
    369 		struct mbuf *m;
    370 
    371 		switch (error) {
    372 		case EAGAIN:
    373 			adapter->eagain_tx_dma_setup.ev_count++;
    374 			return EAGAIN;
    375 		case ENOMEM:
    376 			adapter->enomem_tx_dma_setup.ev_count++;
    377 			return EAGAIN;
    378 		case EFBIG:
    379 			/* Try it again? - one try */
    380 			if (remap == TRUE) {
    381 				remap = FALSE;
    382 				/*
    383 				 * XXX: m_defrag will choke on
    384 				 * non-MCLBYTES-sized clusters
    385 				 */
    386 				adapter->efbig_tx_dma_setup.ev_count++;
    387 				m = m_defrag(m_head, M_NOWAIT);
    388 				if (m == NULL) {
    389 					adapter->mbuf_defrag_failed.ev_count++;
    390 					return ENOBUFS;
    391 				}
    392 				m_head = m;
    393 				goto retry;
    394 			} else {
    395 				adapter->efbig2_tx_dma_setup.ev_count++;
    396 				return error;
    397 			}
    398 		case EINVAL:
    399 			adapter->einval_tx_dma_setup.ev_count++;
    400 			return error;
    401 		default:
    402 			adapter->other_tx_dma_setup.ev_count++;
    403 			return error;
    404 		}
    405 	}
    406 
    407 	/* Make certain there are enough descriptors */
    408 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    409 		txr->no_desc_avail.ev_count++;
    410 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    411 		return EAGAIN;
    412 	}
    413 
    414 	/*
    415 	 * Set up the appropriate offload context
    416 	 * this will consume the first descriptor
    417 	 */
    418 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    419 	if (__predict_false(error)) {
    420 		return (error);
    421 	}
    422 
    423 #ifdef IXGBE_FDIR
    424 	/* Do the flow director magic */
    425 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    426 		++txr->atr_count;
    427 		if (txr->atr_count >= atr_sample_rate) {
    428 			ixgbe_atr(txr, m_head);
    429 			txr->atr_count = 0;
    430 		}
    431 	}
    432 #endif
    433 
    434 	olinfo_status |= IXGBE_ADVTXD_CC;
    435 	i = txr->next_avail_desc;
    436 	for (j = 0; j < map->dm_nsegs; j++) {
    437 		bus_size_t seglen;
    438 		bus_addr_t segaddr;
    439 
    440 		txbuf = &txr->tx_buffers[i];
    441 		txd = &txr->tx_base[i];
    442 		seglen = map->dm_segs[j].ds_len;
    443 		segaddr = htole64(map->dm_segs[j].ds_addr);
    444 
    445 		txd->read.buffer_addr = segaddr;
    446 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    447 		    cmd_type_len |seglen);
    448 		txd->read.olinfo_status = htole32(olinfo_status);
    449 
    450 		if (++i == txr->num_desc)
    451 			i = 0;
    452 	}
    453 
    454 	txd->read.cmd_type_len |=
    455 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    456 	txr->tx_avail -= map->dm_nsegs;
    457 	txr->next_avail_desc = i;
    458 
    459 	txbuf->m_head = m_head;
    460 	/*
    461 	 * Here we swap the map so the last descriptor,
    462 	 * which gets the completion interrupt has the
    463 	 * real map, and the first descriptor gets the
    464 	 * unused map from this descriptor.
    465 	 */
    466 	txr->tx_buffers[first].map = txbuf->map;
    467 	txbuf->map = map;
    468 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    469 	    BUS_DMASYNC_PREWRITE);
    470 
    471         /* Set the EOP descriptor that will be marked done */
    472         txbuf = &txr->tx_buffers[first];
    473 	txbuf->eop = txd;
    474 
    475         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    476 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    477 	/*
    478 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    479 	 * hardware that this frame is available to transmit.
    480 	 */
    481 	++txr->total_packets.ev_count;
    482 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    483 
    484 	/* Mark queue as having work */
    485 	if (txr->busy == 0)
    486 		txr->busy = 1;
    487 
    488 	return 0;
    489 }
    490 
    491 
    492 /*********************************************************************
    493  *
    494  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    495  *  the information needed to transmit a packet on the wire. This is
    496  *  called only once at attach, setup is done every reset.
    497  *
    498  **********************************************************************/
    499 int
    500 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    501 {
    502 	struct adapter *adapter = txr->adapter;
    503 	device_t dev = adapter->dev;
    504 	struct ixgbe_tx_buf *txbuf;
    505 	int error, i;
    506 
    507 	/*
    508 	 * Setup DMA descriptor areas.
    509 	 */
    510 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    511 			       1, 0,		/* alignment, bounds */
    512 			       IXGBE_TSO_SIZE,		/* maxsize */
    513 			       adapter->num_segs,	/* nsegments */
    514 			       PAGE_SIZE,		/* maxsegsize */
    515 			       0,			/* flags */
    516 			       &txr->txtag))) {
    517 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    518 		goto fail;
    519 	}
    520 
    521 	if (!(txr->tx_buffers =
    522 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    523 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    524 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    525 		error = ENOMEM;
    526 		goto fail;
    527 	}
    528 
    529         /* Create the descriptor buffer dma maps */
    530 	txbuf = txr->tx_buffers;
    531 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    532 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    533 		if (error != 0) {
    534 			aprint_error_dev(dev,
    535 			    "Unable to create TX DMA map (%d)\n", error);
    536 			goto fail;
    537 		}
    538 	}
    539 
    540 	return 0;
    541 fail:
    542 	/* We free all, it handles case where we are in the middle */
    543 #if 0 /* XXX was FreeBSD */
    544 	ixgbe_free_transmit_structures(adapter);
    545 #else
    546 	ixgbe_free_transmit_buffers(txr);
    547 #endif
    548 	return (error);
    549 }
    550 
    551 /*********************************************************************
    552  *
    553  *  Initialize a transmit ring.
    554  *
    555  **********************************************************************/
    556 static void
    557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    558 {
    559 	struct adapter *adapter = txr->adapter;
    560 	struct ixgbe_tx_buf *txbuf;
    561 #ifdef DEV_NETMAP
    562 	struct netmap_adapter *na = NA(adapter->ifp);
    563 	struct netmap_slot *slot;
    564 #endif /* DEV_NETMAP */
    565 
    566 	/* Clear the old ring contents */
    567 	IXGBE_TX_LOCK(txr);
    568 #ifdef DEV_NETMAP
    569 	/*
    570 	 * (under lock): if in netmap mode, do some consistency
    571 	 * checks and set slot to entry 0 of the netmap ring.
    572 	 */
    573 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    574 #endif /* DEV_NETMAP */
    575 	bzero((void *)txr->tx_base,
    576 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    577 	/* Reset indices */
    578 	txr->next_avail_desc = 0;
    579 	txr->next_to_clean = 0;
    580 
    581 	/* Free any existing tx buffers. */
    582         txbuf = txr->tx_buffers;
    583 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    584 		if (txbuf->m_head != NULL) {
    585 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    586 			    0, txbuf->m_head->m_pkthdr.len,
    587 			    BUS_DMASYNC_POSTWRITE);
    588 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    589 			m_freem(txbuf->m_head);
    590 			txbuf->m_head = NULL;
    591 		}
    592 #ifdef DEV_NETMAP
    593 		/*
    594 		 * In netmap mode, set the map for the packet buffer.
    595 		 * NOTE: Some drivers (not this one) also need to set
    596 		 * the physical buffer address in the NIC ring.
    597 		 * Slots in the netmap ring (indexed by "si") are
    598 		 * kring->nkr_hwofs positions "ahead" wrt the
    599 		 * corresponding slot in the NIC ring. In some drivers
    600 		 * (not here) nkr_hwofs can be negative. Function
    601 		 * netmap_idx_n2k() handles wraparounds properly.
    602 		 */
    603 		if (slot) {
    604 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    605 			netmap_load_map(na, txr->txtag,
    606 			    txbuf->map, NMB(na, slot + si));
    607 		}
    608 #endif /* DEV_NETMAP */
    609 		/* Clear the EOP descriptor pointer */
    610 		txbuf->eop = NULL;
    611         }
    612 
    613 #ifdef IXGBE_FDIR
    614 	/* Set the rate at which we sample packets */
    615 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    616 		txr->atr_sample = atr_sample_rate;
    617 #endif
    618 
    619 	/* Set number of descriptors available */
    620 	txr->tx_avail = adapter->num_tx_desc;
    621 
    622 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    623 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    624 	IXGBE_TX_UNLOCK(txr);
    625 }
    626 
    627 /*********************************************************************
    628  *
    629  *  Initialize all transmit rings.
    630  *
    631  **********************************************************************/
    632 int
    633 ixgbe_setup_transmit_structures(struct adapter *adapter)
    634 {
    635 	struct tx_ring *txr = adapter->tx_rings;
    636 
    637 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    638 		ixgbe_setup_transmit_ring(txr);
    639 
    640 	return (0);
    641 }
    642 
    643 /*********************************************************************
    644  *
    645  *  Free all transmit rings.
    646  *
    647  **********************************************************************/
    648 void
    649 ixgbe_free_transmit_structures(struct adapter *adapter)
    650 {
    651 	struct tx_ring *txr = adapter->tx_rings;
    652 
    653 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    654 		ixgbe_free_transmit_buffers(txr);
    655 		ixgbe_dma_free(adapter, &txr->txdma);
    656 		IXGBE_TX_LOCK_DESTROY(txr);
    657 	}
    658 	free(adapter->tx_rings, M_DEVBUF);
    659 }
    660 
    661 /*********************************************************************
    662  *
    663  *  Free transmit ring related data structures.
    664  *
    665  **********************************************************************/
    666 static void
    667 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    668 {
    669 	struct adapter *adapter = txr->adapter;
    670 	struct ixgbe_tx_buf *tx_buffer;
    671 	int             i;
    672 
    673 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    674 
    675 	if (txr->tx_buffers == NULL)
    676 		return;
    677 
    678 	tx_buffer = txr->tx_buffers;
    679 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    680 		if (tx_buffer->m_head != NULL) {
    681 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    682 			    0, tx_buffer->m_head->m_pkthdr.len,
    683 			    BUS_DMASYNC_POSTWRITE);
    684 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    685 			m_freem(tx_buffer->m_head);
    686 			tx_buffer->m_head = NULL;
    687 			if (tx_buffer->map != NULL) {
    688 				ixgbe_dmamap_destroy(txr->txtag,
    689 				    tx_buffer->map);
    690 				tx_buffer->map = NULL;
    691 			}
    692 		} else if (tx_buffer->map != NULL) {
    693 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    694 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    695 			tx_buffer->map = NULL;
    696 		}
    697 	}
    698 #ifndef IXGBE_LEGACY_TX
    699 	if (txr->txr_interq != NULL) {
    700 		struct mbuf *m;
    701 
    702 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    703 			m_freem(m);
    704 		pcq_destroy(txr->txr_interq);
    705 	}
    706 #endif
    707 	if (txr->tx_buffers != NULL) {
    708 		free(txr->tx_buffers, M_DEVBUF);
    709 		txr->tx_buffers = NULL;
    710 	}
    711 	if (txr->txtag != NULL) {
    712 		ixgbe_dma_tag_destroy(txr->txtag);
    713 		txr->txtag = NULL;
    714 	}
    715 	return;
    716 }
    717 
    718 /*********************************************************************
    719  *
    720  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    721  *
    722  **********************************************************************/
    723 
    724 static int
    725 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    726     u32 *cmd_type_len, u32 *olinfo_status)
    727 {
    728 	struct adapter *adapter = txr->adapter;
    729 	struct ethercom *ec = &adapter->osdep.ec;
    730 	struct m_tag *mtag;
    731 	struct ixgbe_adv_tx_context_desc *TXD;
    732 	struct ether_vlan_header *eh;
    733 #ifdef INET
    734 	struct ip *ip;
    735 #endif
    736 #ifdef INET6
    737 	struct ip6_hdr *ip6;
    738 #endif
    739 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    740 	int	ehdrlen, ip_hlen = 0;
    741 	u16	etype;
    742 	u8	ipproto = 0;
    743 	int	offload = TRUE;
    744 	int	ctxd = txr->next_avail_desc;
    745 	u16	vtag = 0;
    746 	char	*l3d;
    747 
    748 
    749 	/* First check if TSO is to be used */
    750 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6)) {
    751 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    752 
    753 		if (rv != 0)
    754 			++adapter->tso_err.ev_count;
    755 		return rv;
    756 	}
    757 
    758 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    759 		offload = FALSE;
    760 
    761 	/* Indicate the whole packet as payload when not doing TSO */
    762        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    763 
    764 	/* Now ready a context descriptor */
    765 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    766 
    767 	/*
    768 	** In advanced descriptors the vlan tag must
    769 	** be placed into the context descriptor. Hence
    770 	** we need to make one even if not doing offloads.
    771 	*/
    772 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    773 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    774 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    775 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    776 		return (0);
    777 
    778 	/*
    779 	 * Determine where frame payload starts.
    780 	 * Jump over vlan headers if already present,
    781 	 * helpful for QinQ too.
    782 	 */
    783 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    784 	eh = mtod(mp, struct ether_vlan_header *);
    785 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    786 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    787 		etype = ntohs(eh->evl_proto);
    788 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    789 	} else {
    790 		etype = ntohs(eh->evl_encap_proto);
    791 		ehdrlen = ETHER_HDR_LEN;
    792 	}
    793 
    794 	/* Set the ether header length */
    795 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    796 
    797 	if (offload == FALSE)
    798 		goto no_offloads;
    799 
    800 	/*
    801 	 * If the first mbuf only includes the ethernet header, jump to the next one
    802 	 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
    803 	 * XXX: And assumes the entire IP header is contained in one mbuf
    804 	 */
    805 	if (mp->m_len == ehdrlen && mp->m_next)
    806 		l3d = mtod(mp->m_next, char *);
    807 	else
    808 		l3d = mtod(mp, char *) + ehdrlen;
    809 
    810 	switch (etype) {
    811 #ifdef INET
    812 	case ETHERTYPE_IP:
    813 		ip = (struct ip *)(l3d);
    814 		ip_hlen = ip->ip_hl << 2;
    815 		ipproto = ip->ip_p;
    816 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    817 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    818 		    ip->ip_sum == 0);
    819 		break;
    820 #endif
    821 #ifdef INET6
    822 	case ETHERTYPE_IPV6:
    823 		ip6 = (struct ip6_hdr *)(l3d);
    824 		ip_hlen = sizeof(struct ip6_hdr);
    825 		ipproto = ip6->ip6_nxt;
    826 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    827 		break;
    828 #endif
    829 	default:
    830 		offload = false;
    831 		break;
    832 	}
    833 
    834 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    835 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    836 
    837 	vlan_macip_lens |= ip_hlen;
    838 
    839 	/* No support for offloads for non-L4 next headers */
    840  	switch (ipproto) {
    841  		case IPPROTO_TCP:
    842 			if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
    843 
    844 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    845 			else
    846 				offload = false;
    847 			break;
    848 		case IPPROTO_UDP:
    849 			if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
    850 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    851 			else
    852 				offload = false;
    853 			break;
    854 		default:
    855 			offload = false;
    856 			break;
    857 	}
    858 
    859 	if (offload) /* Insert L4 checksum into data descriptors */
    860 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    861 
    862 no_offloads:
    863 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    864 
    865 	/* Now copy bits into descriptor */
    866 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    867 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    868 	TXD->seqnum_seed = htole32(0);
    869 	TXD->mss_l4len_idx = htole32(0);
    870 
    871 	/* We've consumed the first desc, adjust counters */
    872 	if (++ctxd == txr->num_desc)
    873 		ctxd = 0;
    874 	txr->next_avail_desc = ctxd;
    875 	--txr->tx_avail;
    876 
    877         return 0;
    878 }
    879 
    880 /**********************************************************************
    881  *
    882  *  Setup work for hardware segmentation offload (TSO) on
    883  *  adapters using advanced tx descriptors
    884  *
    885  **********************************************************************/
    886 static int
    887 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    888     u32 *cmd_type_len, u32 *olinfo_status)
    889 {
    890 	struct m_tag *mtag;
    891 	struct adapter *adapter = txr->adapter;
    892 	struct ethercom *ec = &adapter->osdep.ec;
    893 	struct ixgbe_adv_tx_context_desc *TXD;
    894 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    895 	u32 mss_l4len_idx = 0, paylen;
    896 	u16 vtag = 0, eh_type;
    897 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    898 	struct ether_vlan_header *eh;
    899 #ifdef INET6
    900 	struct ip6_hdr *ip6;
    901 #endif
    902 #ifdef INET
    903 	struct ip *ip;
    904 #endif
    905 	struct tcphdr *th;
    906 
    907 	/*
    908 	 * Determine where frame payload starts.
    909 	 * Jump over vlan headers if already present
    910 	 */
    911 	eh = mtod(mp, struct ether_vlan_header *);
    912 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    913 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    914 		eh_type = eh->evl_proto;
    915 	} else {
    916 		ehdrlen = ETHER_HDR_LEN;
    917 		eh_type = eh->evl_encap_proto;
    918 	}
    919 
    920 	switch (ntohs(eh_type)) {
    921 #ifdef INET6
    922 	case ETHERTYPE_IPV6:
    923 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    924 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    925 		if (ip6->ip6_nxt != IPPROTO_TCP)
    926 			return (ENXIO);
    927 		ip_hlen = sizeof(struct ip6_hdr);
    928 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    929 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    930 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    931 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    932 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    933 		break;
    934 #endif
    935 #ifdef INET
    936 	case ETHERTYPE_IP:
    937 		ip = (struct ip *)(mp->m_data + ehdrlen);
    938 		if (ip->ip_p != IPPROTO_TCP)
    939 			return (ENXIO);
    940 		ip->ip_sum = 0;
    941 		ip_hlen = ip->ip_hl << 2;
    942 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    943 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    944 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    945 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    946 		/* Tell transmit desc to also do IPv4 checksum. */
    947 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    948 		break;
    949 #endif
    950 	default:
    951 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    952 		    __func__, ntohs(eh_type));
    953 		break;
    954 	}
    955 
    956 	ctxd = txr->next_avail_desc;
    957 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    958 
    959 	tcp_hlen = th->th_off << 2;
    960 
    961 	/* This is used in the transmit desc in encap */
    962 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    963 
    964 	/* VLAN MACLEN IPLEN */
    965 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    966 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    967                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    968 	}
    969 
    970 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    971 	vlan_macip_lens |= ip_hlen;
    972 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    973 
    974 	/* ADV DTYPE TUCMD */
    975 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    976 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    977 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    978 
    979 	/* MSS L4LEN IDX */
    980 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
    981 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
    982 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
    983 
    984 	TXD->seqnum_seed = htole32(0);
    985 
    986 	if (++ctxd == txr->num_desc)
    987 		ctxd = 0;
    988 
    989 	txr->tx_avail--;
    990 	txr->next_avail_desc = ctxd;
    991 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
    992 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    993 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
    994 	++txr->tso_tx.ev_count;
    995 	return (0);
    996 }
    997 
    998 
    999 /**********************************************************************
   1000  *
   1001  *  Examine each tx_buffer in the used queue. If the hardware is done
   1002  *  processing the packet then free associated resources. The
   1003  *  tx_buffer is put back on the free queue.
   1004  *
   1005  **********************************************************************/
   1006 void
   1007 ixgbe_txeof(struct tx_ring *txr)
   1008 {
   1009 	struct adapter		*adapter = txr->adapter;
   1010 	struct ifnet		*ifp = adapter->ifp;
   1011 	u32			work, processed = 0;
   1012 	u32			limit = adapter->tx_process_limit;
   1013 	struct ixgbe_tx_buf	*buf;
   1014 	union ixgbe_adv_tx_desc *txd;
   1015 
   1016 	KASSERT(mutex_owned(&txr->tx_mtx));
   1017 
   1018 #ifdef DEV_NETMAP
   1019 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1020 		struct netmap_adapter *na = NA(ifp);
   1021 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1022 		txd = txr->tx_base;
   1023 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1024 		    BUS_DMASYNC_POSTREAD);
   1025 		/*
   1026 		 * In netmap mode, all the work is done in the context
   1027 		 * of the client thread. Interrupt handlers only wake up
   1028 		 * clients, which may be sleeping on individual rings
   1029 		 * or on a global resource for all rings.
   1030 		 * To implement tx interrupt mitigation, we wake up the client
   1031 		 * thread roughly every half ring, even if the NIC interrupts
   1032 		 * more frequently. This is implemented as follows:
   1033 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1034 		 *   the slot that should wake up the thread (nkr_num_slots
   1035 		 *   means the user thread should not be woken up);
   1036 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1037 		 *   or the slot has the DD bit set.
   1038 		 */
   1039 		if (!netmap_mitigate ||
   1040 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1041 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1042 			netmap_tx_irq(ifp, txr->me);
   1043 		}
   1044 		return;
   1045 	}
   1046 #endif /* DEV_NETMAP */
   1047 
   1048 	if (txr->tx_avail == txr->num_desc) {
   1049 		txr->busy = 0;
   1050 		return;
   1051 	}
   1052 
   1053 	/* Get work starting point */
   1054 	work = txr->next_to_clean;
   1055 	buf = &txr->tx_buffers[work];
   1056 	txd = &txr->tx_base[work];
   1057 	work -= txr->num_desc; /* The distance to ring end */
   1058         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1059 	    BUS_DMASYNC_POSTREAD);
   1060 
   1061 	do {
   1062 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1063 		if (eop == NULL) /* No work */
   1064 			break;
   1065 
   1066 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1067 			break;	/* I/O not complete */
   1068 
   1069 		if (buf->m_head) {
   1070 			txr->bytes +=
   1071 			    buf->m_head->m_pkthdr.len;
   1072 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1073 			    buf->map,
   1074 			    0, buf->m_head->m_pkthdr.len,
   1075 			    BUS_DMASYNC_POSTWRITE);
   1076 			ixgbe_dmamap_unload(txr->txtag,
   1077 			    buf->map);
   1078 			m_freem(buf->m_head);
   1079 			buf->m_head = NULL;
   1080 		}
   1081 		buf->eop = NULL;
   1082 		++txr->tx_avail;
   1083 
   1084 		/* We clean the range if multi segment */
   1085 		while (txd != eop) {
   1086 			++txd;
   1087 			++buf;
   1088 			++work;
   1089 			/* wrap the ring? */
   1090 			if (__predict_false(!work)) {
   1091 				work -= txr->num_desc;
   1092 				buf = txr->tx_buffers;
   1093 				txd = txr->tx_base;
   1094 			}
   1095 			if (buf->m_head) {
   1096 				txr->bytes +=
   1097 				    buf->m_head->m_pkthdr.len;
   1098 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1099 				    buf->map,
   1100 				    0, buf->m_head->m_pkthdr.len,
   1101 				    BUS_DMASYNC_POSTWRITE);
   1102 				ixgbe_dmamap_unload(txr->txtag,
   1103 				    buf->map);
   1104 				m_freem(buf->m_head);
   1105 				buf->m_head = NULL;
   1106 			}
   1107 			++txr->tx_avail;
   1108 			buf->eop = NULL;
   1109 
   1110 		}
   1111 		++txr->packets;
   1112 		++processed;
   1113 		++ifp->if_opackets;
   1114 
   1115 		/* Try the next packet */
   1116 		++txd;
   1117 		++buf;
   1118 		++work;
   1119 		/* reset with a wrap */
   1120 		if (__predict_false(!work)) {
   1121 			work -= txr->num_desc;
   1122 			buf = txr->tx_buffers;
   1123 			txd = txr->tx_base;
   1124 		}
   1125 		prefetch(txd);
   1126 	} while (__predict_true(--limit));
   1127 
   1128 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1129 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1130 
   1131 	work += txr->num_desc;
   1132 	txr->next_to_clean = work;
   1133 
   1134 	/*
   1135 	** Queue Hang detection, we know there's
   1136 	** work outstanding or the first return
   1137 	** would have been taken, so increment busy
   1138 	** if nothing managed to get cleaned, then
   1139 	** in local_timer it will be checked and
   1140 	** marked as HUNG if it exceeds a MAX attempt.
   1141 	*/
   1142 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1143 		++txr->busy;
   1144 	/*
   1145 	** If anything gets cleaned we reset state to 1,
   1146 	** note this will turn off HUNG if its set.
   1147 	*/
   1148 	if (processed)
   1149 		txr->busy = 1;
   1150 
   1151 	if (txr->tx_avail == txr->num_desc)
   1152 		txr->busy = 0;
   1153 
   1154 	return;
   1155 }
   1156 
   1157 
   1158 #ifdef IXGBE_FDIR
   1159 /*
   1160 ** This routine parses packet headers so that Flow
   1161 ** Director can make a hashed filter table entry
   1162 ** allowing traffic flows to be identified and kept
   1163 ** on the same cpu.  This would be a performance
   1164 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1165 ** packets.
   1166 */
   1167 static void
   1168 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1169 {
   1170 	struct adapter			*adapter = txr->adapter;
   1171 	struct ix_queue			*que;
   1172 	struct ip			*ip;
   1173 	struct tcphdr			*th;
   1174 	struct udphdr			*uh;
   1175 	struct ether_vlan_header	*eh;
   1176 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1177 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1178 	int  				ehdrlen, ip_hlen;
   1179 	u16				etype;
   1180 
   1181 	eh = mtod(mp, struct ether_vlan_header *);
   1182 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1183 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1184 		etype = eh->evl_proto;
   1185 	} else {
   1186 		ehdrlen = ETHER_HDR_LEN;
   1187 		etype = eh->evl_encap_proto;
   1188 	}
   1189 
   1190 	/* Only handling IPv4 */
   1191 	if (etype != htons(ETHERTYPE_IP))
   1192 		return;
   1193 
   1194 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1195 	ip_hlen = ip->ip_hl << 2;
   1196 
   1197 	/* check if we're UDP or TCP */
   1198 	switch (ip->ip_p) {
   1199 	case IPPROTO_TCP:
   1200 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1201 		/* src and dst are inverted */
   1202 		common.port.dst ^= th->th_sport;
   1203 		common.port.src ^= th->th_dport;
   1204 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1205 		break;
   1206 	case IPPROTO_UDP:
   1207 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1208 		/* src and dst are inverted */
   1209 		common.port.dst ^= uh->uh_sport;
   1210 		common.port.src ^= uh->uh_dport;
   1211 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1212 		break;
   1213 	default:
   1214 		return;
   1215 	}
   1216 
   1217 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1218 	if (mp->m_pkthdr.ether_vtag)
   1219 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1220 	else
   1221 		common.flex_bytes ^= etype;
   1222 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1223 
   1224 	que = &adapter->queues[txr->me];
   1225 	/*
   1226 	** This assumes the Rx queue and Tx
   1227 	** queue are bound to the same CPU
   1228 	*/
   1229 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1230 	    input, common, que->msix);
   1231 }
   1232 #endif /* IXGBE_FDIR */
   1233 
   1234 /*
   1235 ** Used to detect a descriptor that has
   1236 ** been merged by Hardware RSC.
   1237 */
   1238 static inline u32
   1239 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1240 {
   1241 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1242 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1243 }
   1244 
   1245 /*********************************************************************
   1246  *
   1247  *  Initialize Hardware RSC (LRO) feature on 82599
   1248  *  for an RX ring, this is toggled by the LRO capability
   1249  *  even though it is transparent to the stack.
   1250  *
   1251  *  NOTE: since this HW feature only works with IPV4 and
   1252  *        our testing has shown soft LRO to be as effective
   1253  *        I have decided to disable this by default.
   1254  *
   1255  **********************************************************************/
   1256 static void
   1257 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1258 {
   1259 	struct	adapter 	*adapter = rxr->adapter;
   1260 	struct	ixgbe_hw	*hw = &adapter->hw;
   1261 	u32			rscctrl, rdrxctl;
   1262 
   1263 	/* If turning LRO/RSC off we need to disable it */
   1264 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1265 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1266 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1267 		return;
   1268 	}
   1269 
   1270 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1271 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1272 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1273 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1274 #endif /* DEV_NETMAP */
   1275 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1276 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1277 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1278 
   1279 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1280 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1281 	/*
   1282 	** Limit the total number of descriptors that
   1283 	** can be combined, so it does not exceed 64K
   1284 	*/
   1285 	if (rxr->mbuf_sz == MCLBYTES)
   1286 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1287 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1288 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1289 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1290 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1291 	else  /* Using 16K cluster */
   1292 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1293 
   1294 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1295 
   1296 	/* Enable TCP header recognition */
   1297 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1298 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1299 	    IXGBE_PSRTYPE_TCPHDR));
   1300 
   1301 	/* Disable RSC for ACK packets */
   1302 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1303 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1304 
   1305 	rxr->hw_rsc = TRUE;
   1306 }
   1307 
   1308 /*********************************************************************
   1309  *
   1310  *  Refresh mbuf buffers for RX descriptor rings
   1311  *   - now keeps its own state so discards due to resource
   1312  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1313  *     it just returns, keeping its placeholder, thus it can simply
   1314  *     be recalled to try again.
   1315  *
   1316  **********************************************************************/
   1317 static void
   1318 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1319 {
   1320 	struct adapter		*adapter = rxr->adapter;
   1321 	struct ixgbe_rx_buf	*rxbuf;
   1322 	struct mbuf		*mp;
   1323 	int			i, j, error;
   1324 	bool			refreshed = false;
   1325 
   1326 	i = j = rxr->next_to_refresh;
   1327 	/* Control the loop with one beyond */
   1328 	if (++j == rxr->num_desc)
   1329 		j = 0;
   1330 
   1331 	while (j != limit) {
   1332 		rxbuf = &rxr->rx_buffers[i];
   1333 		if (rxbuf->buf == NULL) {
   1334 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1335 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1336 			if (mp == NULL) {
   1337 				rxr->no_jmbuf.ev_count++;
   1338 				goto update;
   1339 			}
   1340 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1341 				m_adj(mp, ETHER_ALIGN);
   1342 		} else
   1343 			mp = rxbuf->buf;
   1344 
   1345 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1346 
   1347 		/* If we're dealing with an mbuf that was copied rather
   1348 		 * than replaced, there's no need to go through busdma.
   1349 		 */
   1350 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1351 			/* Get the memory mapping */
   1352 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1353 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1354 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1355 			if (error != 0) {
   1356 				printf("Refresh mbufs: payload dmamap load"
   1357 				    " failure - %d\n", error);
   1358 				m_free(mp);
   1359 				rxbuf->buf = NULL;
   1360 				goto update;
   1361 			}
   1362 			rxbuf->buf = mp;
   1363 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1364 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1365 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1366 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1367 		} else {
   1368 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1369 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1370 		}
   1371 
   1372 		refreshed = true;
   1373 		/* Next is precalculated */
   1374 		i = j;
   1375 		rxr->next_to_refresh = i;
   1376 		if (++j == rxr->num_desc)
   1377 			j = 0;
   1378 	}
   1379 update:
   1380 	if (refreshed) /* Update hardware tail index */
   1381 		IXGBE_WRITE_REG(&adapter->hw,
   1382 		    rxr->tail, rxr->next_to_refresh);
   1383 	return;
   1384 }
   1385 
   1386 /*********************************************************************
   1387  *
   1388  *  Allocate memory for rx_buffer structures. Since we use one
   1389  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1390  *  that we'll need is equal to the number of receive descriptors
   1391  *  that we've allocated.
   1392  *
   1393  **********************************************************************/
   1394 int
   1395 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1396 {
   1397 	struct	adapter 	*adapter = rxr->adapter;
   1398 	device_t 		dev = adapter->dev;
   1399 	struct ixgbe_rx_buf 	*rxbuf;
   1400 	int             	bsize, error;
   1401 
   1402 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1403 	if (!(rxr->rx_buffers =
   1404 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1405 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1406 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1407 		error = ENOMEM;
   1408 		goto fail;
   1409 	}
   1410 
   1411 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1412 				   1, 0,	/* alignment, bounds */
   1413 				   MJUM16BYTES,		/* maxsize */
   1414 				   1,			/* nsegments */
   1415 				   MJUM16BYTES,		/* maxsegsize */
   1416 				   0,			/* flags */
   1417 				   &rxr->ptag))) {
   1418 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1419 		goto fail;
   1420 	}
   1421 
   1422 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1423 		rxbuf = &rxr->rx_buffers[i];
   1424 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1425 		if (error) {
   1426 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1427 			goto fail;
   1428 		}
   1429 	}
   1430 
   1431 	return (0);
   1432 
   1433 fail:
   1434 	/* Frees all, but can handle partial completion */
   1435 	ixgbe_free_receive_structures(adapter);
   1436 	return (error);
   1437 }
   1438 
   1439 static void
   1440 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1441 {
   1442 	struct ixgbe_rx_buf       *rxbuf;
   1443 
   1444 	for (int i = 0; i < rxr->num_desc; i++) {
   1445 		rxbuf = &rxr->rx_buffers[i];
   1446 		if (rxbuf->buf != NULL) {
   1447 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1448 			    0, rxbuf->buf->m_pkthdr.len,
   1449 			    BUS_DMASYNC_POSTREAD);
   1450 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1451 			rxbuf->buf->m_flags |= M_PKTHDR;
   1452 			m_freem(rxbuf->buf);
   1453 			rxbuf->buf = NULL;
   1454 			rxbuf->flags = 0;
   1455 		}
   1456 	}
   1457 }
   1458 
   1459 /*********************************************************************
   1460  *
   1461  *  Initialize a receive ring and its buffers.
   1462  *
   1463  **********************************************************************/
   1464 static int
   1465 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1466 {
   1467 	struct	adapter 	*adapter;
   1468 	struct ixgbe_rx_buf	*rxbuf;
   1469 #ifdef LRO
   1470 	struct ifnet		*ifp;
   1471 	struct lro_ctrl		*lro = &rxr->lro;
   1472 #endif /* LRO */
   1473 	int			rsize, error = 0;
   1474 #ifdef DEV_NETMAP
   1475 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1476 	struct netmap_slot *slot;
   1477 #endif /* DEV_NETMAP */
   1478 
   1479 	adapter = rxr->adapter;
   1480 #ifdef LRO
   1481 	ifp = adapter->ifp;
   1482 #endif /* LRO */
   1483 
   1484 	/* Clear the ring contents */
   1485 	IXGBE_RX_LOCK(rxr);
   1486 #ifdef DEV_NETMAP
   1487 	/* same as in ixgbe_setup_transmit_ring() */
   1488 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1489 #endif /* DEV_NETMAP */
   1490 	rsize = roundup2(adapter->num_rx_desc *
   1491 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1492 	bzero((void *)rxr->rx_base, rsize);
   1493 	/* Cache the size */
   1494 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1495 
   1496 	/* Free current RX buffer structs and their mbufs */
   1497 	ixgbe_free_receive_ring(rxr);
   1498 
   1499 	IXGBE_RX_UNLOCK(rxr);
   1500 
   1501 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1502 	 * or size of jumbo mbufs may have changed.
   1503 	 */
   1504 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1505 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1506 
   1507 	IXGBE_RX_LOCK(rxr);
   1508 
   1509 	/* Now replenish the mbufs */
   1510 	for (int j = 0; j != rxr->num_desc; ++j) {
   1511 		struct mbuf	*mp;
   1512 
   1513 		rxbuf = &rxr->rx_buffers[j];
   1514 #ifdef DEV_NETMAP
   1515 		/*
   1516 		 * In netmap mode, fill the map and set the buffer
   1517 		 * address in the NIC ring, considering the offset
   1518 		 * between the netmap and NIC rings (see comment in
   1519 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1520 		 * an mbuf, so end the block with a continue;
   1521 		 */
   1522 		if (slot) {
   1523 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1524 			uint64_t paddr;
   1525 			void *addr;
   1526 
   1527 			addr = PNMB(na, slot + sj, &paddr);
   1528 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1529 			/* Update descriptor and the cached value */
   1530 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1531 			rxbuf->addr = htole64(paddr);
   1532 			continue;
   1533 		}
   1534 #endif /* DEV_NETMAP */
   1535 		rxbuf->flags = 0;
   1536 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1537 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1538 		if (rxbuf->buf == NULL) {
   1539 			error = ENOBUFS;
   1540                         goto fail;
   1541 		}
   1542 		mp = rxbuf->buf;
   1543 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1544 		/* Get the memory mapping */
   1545 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1546 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1547 		if (error != 0)
   1548                         goto fail;
   1549 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1550 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1551 		/* Update the descriptor and the cached value */
   1552 		rxr->rx_base[j].read.pkt_addr =
   1553 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1554 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1555 	}
   1556 
   1557 
   1558 	/* Setup our descriptor indices */
   1559 	rxr->next_to_check = 0;
   1560 	rxr->next_to_refresh = 0;
   1561 	rxr->lro_enabled = FALSE;
   1562 	rxr->rx_copies.ev_count = 0;
   1563 #if 0 /* NetBSD */
   1564 	rxr->rx_bytes.ev_count = 0;
   1565 #if 1	/* Fix inconsistency */
   1566 	rxr->rx_packets.ev_count = 0;
   1567 #endif
   1568 #endif
   1569 	rxr->vtag_strip = FALSE;
   1570 
   1571 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1572 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1573 
   1574 	/*
   1575 	** Now set up the LRO interface:
   1576 	*/
   1577 	if (ixgbe_rsc_enable)
   1578 		ixgbe_setup_hw_rsc(rxr);
   1579 #ifdef LRO
   1580 	else if (ifp->if_capenable & IFCAP_LRO) {
   1581 		device_t dev = adapter->dev;
   1582 		int err = tcp_lro_init(lro);
   1583 		if (err) {
   1584 			device_printf(dev, "LRO Initialization failed!\n");
   1585 			goto fail;
   1586 		}
   1587 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1588 		rxr->lro_enabled = TRUE;
   1589 		lro->ifp = adapter->ifp;
   1590 	}
   1591 #endif /* LRO */
   1592 
   1593 	IXGBE_RX_UNLOCK(rxr);
   1594 	return (0);
   1595 
   1596 fail:
   1597 	ixgbe_free_receive_ring(rxr);
   1598 	IXGBE_RX_UNLOCK(rxr);
   1599 	return (error);
   1600 }
   1601 
   1602 /*********************************************************************
   1603  *
   1604  *  Initialize all receive rings.
   1605  *
   1606  **********************************************************************/
   1607 int
   1608 ixgbe_setup_receive_structures(struct adapter *adapter)
   1609 {
   1610 	struct rx_ring *rxr = adapter->rx_rings;
   1611 	int j;
   1612 
   1613 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1614 		if (ixgbe_setup_receive_ring(rxr))
   1615 			goto fail;
   1616 
   1617 	return (0);
   1618 fail:
   1619 	/*
   1620 	 * Free RX buffers allocated so far, we will only handle
   1621 	 * the rings that completed, the failing case will have
   1622 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1623 	 */
   1624 	for (int i = 0; i < j; ++i) {
   1625 		rxr = &adapter->rx_rings[i];
   1626 		ixgbe_free_receive_ring(rxr);
   1627 	}
   1628 
   1629 	return (ENOBUFS);
   1630 }
   1631 
   1632 
   1633 /*********************************************************************
   1634  *
   1635  *  Free all receive rings.
   1636  *
   1637  **********************************************************************/
   1638 void
   1639 ixgbe_free_receive_structures(struct adapter *adapter)
   1640 {
   1641 	struct rx_ring *rxr = adapter->rx_rings;
   1642 
   1643 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1644 
   1645 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1646 #ifdef LRO
   1647 		struct lro_ctrl		*lro = &rxr->lro;
   1648 #endif /* LRO */
   1649 		ixgbe_free_receive_buffers(rxr);
   1650 #ifdef LRO
   1651 		/* Free LRO memory */
   1652 		tcp_lro_free(lro);
   1653 #endif /* LRO */
   1654 		/* Free the ring memory as well */
   1655 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1656 		IXGBE_RX_LOCK_DESTROY(rxr);
   1657 	}
   1658 
   1659 	free(adapter->rx_rings, M_DEVBUF);
   1660 }
   1661 
   1662 
   1663 /*********************************************************************
   1664  *
   1665  *  Free receive ring data structures
   1666  *
   1667  **********************************************************************/
   1668 static void
   1669 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1670 {
   1671 	struct adapter		*adapter = rxr->adapter;
   1672 	struct ixgbe_rx_buf	*rxbuf;
   1673 
   1674 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1675 
   1676 	/* Cleanup any existing buffers */
   1677 	if (rxr->rx_buffers != NULL) {
   1678 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1679 			rxbuf = &rxr->rx_buffers[i];
   1680 			if (rxbuf->buf != NULL) {
   1681 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1682 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1683 				    BUS_DMASYNC_POSTREAD);
   1684 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1685 				rxbuf->buf->m_flags |= M_PKTHDR;
   1686 				m_freem(rxbuf->buf);
   1687 			}
   1688 			rxbuf->buf = NULL;
   1689 			if (rxbuf->pmap != NULL) {
   1690 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1691 				rxbuf->pmap = NULL;
   1692 			}
   1693 		}
   1694 		if (rxr->rx_buffers != NULL) {
   1695 			free(rxr->rx_buffers, M_DEVBUF);
   1696 			rxr->rx_buffers = NULL;
   1697 		}
   1698 	}
   1699 
   1700 	if (rxr->ptag != NULL) {
   1701 		ixgbe_dma_tag_destroy(rxr->ptag);
   1702 		rxr->ptag = NULL;
   1703 	}
   1704 
   1705 	return;
   1706 }
   1707 
   1708 static __inline void
   1709 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1710 {
   1711 	struct adapter	*adapter = ifp->if_softc;
   1712 
   1713 #ifdef LRO
   1714 	struct ethercom *ec = &adapter->osdep.ec;
   1715 
   1716         /*
   1717          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1718          * should be computed by hardware. Also it should not have VLAN tag in
   1719          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1720          */
   1721         if (rxr->lro_enabled &&
   1722             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1723             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1724             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1725             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1726             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1727             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1728             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1729             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1730                 /*
   1731                  * Send to the stack if:
   1732                  **  - LRO not enabled, or
   1733                  **  - no LRO resources, or
   1734                  **  - lro enqueue fails
   1735                  */
   1736                 if (rxr->lro.lro_cnt != 0)
   1737                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1738                                 return;
   1739         }
   1740 #endif /* LRO */
   1741 
   1742 	IXGBE_RX_UNLOCK(rxr);
   1743 
   1744 	if_percpuq_enqueue(adapter->ipq, m);
   1745 
   1746 	IXGBE_RX_LOCK(rxr);
   1747 }
   1748 
   1749 static __inline void
   1750 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1751 {
   1752 	struct ixgbe_rx_buf	*rbuf;
   1753 
   1754 	rbuf = &rxr->rx_buffers[i];
   1755 
   1756 
   1757 	/*
   1758 	** With advanced descriptors the writeback
   1759 	** clobbers the buffer addrs, so its easier
   1760 	** to just free the existing mbufs and take
   1761 	** the normal refresh path to get new buffers
   1762 	** and mapping.
   1763 	*/
   1764 
   1765 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1766 		rbuf->fmp->m_flags |= M_PKTHDR;
   1767 		m_freem(rbuf->fmp);
   1768 		rbuf->fmp = NULL;
   1769 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1770 	} else if (rbuf->buf) {
   1771 		m_free(rbuf->buf);
   1772 		rbuf->buf = NULL;
   1773 	}
   1774 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1775 
   1776 	rbuf->flags = 0;
   1777 
   1778 	return;
   1779 }
   1780 
   1781 
   1782 /*********************************************************************
   1783  *
   1784  *  This routine executes in interrupt context. It replenishes
   1785  *  the mbufs in the descriptor and sends data which has been
   1786  *  dma'ed into host memory to upper layer.
   1787  *
   1788  *  Return TRUE for more work, FALSE for all clean.
   1789  *********************************************************************/
   1790 bool
   1791 ixgbe_rxeof(struct ix_queue *que)
   1792 {
   1793 	struct adapter		*adapter = que->adapter;
   1794 	struct rx_ring		*rxr = que->rxr;
   1795 	struct ifnet		*ifp = adapter->ifp;
   1796 #ifdef LRO
   1797 	struct lro_ctrl		*lro = &rxr->lro;
   1798 #endif /* LRO */
   1799 	int			i, nextp, processed = 0;
   1800 	u32			staterr = 0;
   1801 	u32			count = adapter->rx_process_limit;
   1802 	union ixgbe_adv_rx_desc	*cur;
   1803 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1804 #ifdef RSS
   1805 	u16			pkt_info;
   1806 #endif
   1807 
   1808 	IXGBE_RX_LOCK(rxr);
   1809 
   1810 #ifdef DEV_NETMAP
   1811 	/* Same as the txeof routine: wakeup clients on intr. */
   1812 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1813 		IXGBE_RX_UNLOCK(rxr);
   1814 		return (FALSE);
   1815 	}
   1816 #endif /* DEV_NETMAP */
   1817 
   1818 	for (i = rxr->next_to_check; count != 0;) {
   1819 		struct mbuf	*sendmp, *mp;
   1820 		u32		rsc, ptype;
   1821 		u16		len;
   1822 		u16		vtag = 0;
   1823 		bool		eop;
   1824 
   1825 		/* Sync the ring. */
   1826 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1827 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1828 
   1829 		cur = &rxr->rx_base[i];
   1830 		staterr = le32toh(cur->wb.upper.status_error);
   1831 #ifdef RSS
   1832 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1833 #endif
   1834 
   1835 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1836 			break;
   1837 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1838 			break;
   1839 
   1840 		count--;
   1841 		sendmp = NULL;
   1842 		nbuf = NULL;
   1843 		rsc = 0;
   1844 		cur->wb.upper.status_error = 0;
   1845 		rbuf = &rxr->rx_buffers[i];
   1846 		mp = rbuf->buf;
   1847 
   1848 		len = le16toh(cur->wb.upper.length);
   1849 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1850 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1851 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1852 
   1853 		/* Make sure bad packets are discarded */
   1854 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1855 #if __FreeBSD_version >= 1100036
   1856 			if (IXGBE_IS_VF(adapter))
   1857 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1858 #endif
   1859 			rxr->rx_discarded.ev_count++;
   1860 			ixgbe_rx_discard(rxr, i);
   1861 			goto next_desc;
   1862 		}
   1863 
   1864 		/*
   1865 		** On 82599 which supports a hardware
   1866 		** LRO (called HW RSC), packets need
   1867 		** not be fragmented across sequential
   1868 		** descriptors, rather the next descriptor
   1869 		** is indicated in bits of the descriptor.
   1870 		** This also means that we might proceses
   1871 		** more than one packet at a time, something
   1872 		** that has never been true before, it
   1873 		** required eliminating global chain pointers
   1874 		** in favor of what we are doing here.  -jfv
   1875 		*/
   1876 		if (!eop) {
   1877 			/*
   1878 			** Figure out the next descriptor
   1879 			** of this frame.
   1880 			*/
   1881 			if (rxr->hw_rsc == TRUE) {
   1882 				rsc = ixgbe_rsc_count(cur);
   1883 				rxr->rsc_num += (rsc - 1);
   1884 			}
   1885 			if (rsc) { /* Get hardware index */
   1886 				nextp = ((staterr &
   1887 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1888 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1889 			} else { /* Just sequential */
   1890 				nextp = i + 1;
   1891 				if (nextp == adapter->num_rx_desc)
   1892 					nextp = 0;
   1893 			}
   1894 			nbuf = &rxr->rx_buffers[nextp];
   1895 			prefetch(nbuf);
   1896 		}
   1897 		/*
   1898 		** Rather than using the fmp/lmp global pointers
   1899 		** we now keep the head of a packet chain in the
   1900 		** buffer struct and pass this along from one
   1901 		** descriptor to the next, until we get EOP.
   1902 		*/
   1903 		mp->m_len = len;
   1904 		/*
   1905 		** See if there is a stored head
   1906 		** that determines what we are
   1907 		*/
   1908 		sendmp = rbuf->fmp;
   1909 		if (sendmp != NULL) {  /* secondary frag */
   1910 			rbuf->buf = rbuf->fmp = NULL;
   1911 			mp->m_flags &= ~M_PKTHDR;
   1912 			sendmp->m_pkthdr.len += mp->m_len;
   1913 		} else {
   1914 			/*
   1915 			 * Optimize.  This might be a small packet,
   1916 			 * maybe just a TCP ACK.  Do a fast copy that
   1917 			 * is cache aligned into a new mbuf, and
   1918 			 * leave the old mbuf+cluster for re-use.
   1919 			 */
   1920 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1921 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1922 				if (sendmp != NULL) {
   1923 					sendmp->m_data +=
   1924 					    IXGBE_RX_COPY_ALIGN;
   1925 					ixgbe_bcopy(mp->m_data,
   1926 					    sendmp->m_data, len);
   1927 					sendmp->m_len = len;
   1928 					rxr->rx_copies.ev_count++;
   1929 					rbuf->flags |= IXGBE_RX_COPY;
   1930 				}
   1931 			}
   1932 			if (sendmp == NULL) {
   1933 				rbuf->buf = rbuf->fmp = NULL;
   1934 				sendmp = mp;
   1935 			}
   1936 
   1937 			/* first desc of a non-ps chain */
   1938 			sendmp->m_flags |= M_PKTHDR;
   1939 			sendmp->m_pkthdr.len = mp->m_len;
   1940 		}
   1941 		++processed;
   1942 
   1943 		/* Pass the head pointer on */
   1944 		if (eop == 0) {
   1945 			nbuf->fmp = sendmp;
   1946 			sendmp = NULL;
   1947 			mp->m_next = nbuf->buf;
   1948 		} else { /* Sending this frame */
   1949 			m_set_rcvif(sendmp, ifp);
   1950 			ifp->if_ipackets++;
   1951 			rxr->rx_packets.ev_count++;
   1952 			/* capture data for AIM */
   1953 			rxr->bytes += sendmp->m_pkthdr.len;
   1954 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1955 			/* Process vlan info */
   1956 			if ((rxr->vtag_strip) &&
   1957 			    (staterr & IXGBE_RXD_STAT_VP))
   1958 				vtag = le16toh(cur->wb.upper.vlan);
   1959 			if (vtag) {
   1960 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1961 				    printf("%s: could not apply VLAN "
   1962 					"tag", __func__));
   1963 			}
   1964 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1965 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1966 				   &adapter->stats.pf);
   1967 			}
   1968 
   1969 #if 0 /* FreeBSD */
   1970                         /*
   1971                          * In case of multiqueue, we have RXCSUM.PCSD bit set
   1972                          * and never cleared. This means we have RSS hash
   1973                          * available to be used.
   1974                          */
   1975                         if (adapter->num_queues > 1) {
   1976                                 sendmp->m_pkthdr.flowid =
   1977                                     le32toh(cur->wb.lower.hi_dword.rss);
   1978                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1979                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
   1980                                         M_HASHTYPE_SET(sendmp,
   1981                                             M_HASHTYPE_RSS_IPV4);
   1982                                         break;
   1983                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1984                                         M_HASHTYPE_SET(sendmp,
   1985                                             M_HASHTYPE_RSS_TCP_IPV4);
   1986                                         break;
   1987                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
   1988                                         M_HASHTYPE_SET(sendmp,
   1989                                             M_HASHTYPE_RSS_IPV6);
   1990                                         break;
   1991                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   1992                                         M_HASHTYPE_SET(sendmp,
   1993                                             M_HASHTYPE_RSS_TCP_IPV6);
   1994                                         break;
   1995                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   1996                                         M_HASHTYPE_SET(sendmp,
   1997                                             M_HASHTYPE_RSS_IPV6_EX);
   1998                                         break;
   1999                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2000                                         M_HASHTYPE_SET(sendmp,
   2001                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
   2002                                         break;
   2003 #if __FreeBSD_version > 1100000
   2004                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2005                                         M_HASHTYPE_SET(sendmp,
   2006                                             M_HASHTYPE_RSS_UDP_IPV4);
   2007                                         break;
   2008                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2009                                         M_HASHTYPE_SET(sendmp,
   2010                                             M_HASHTYPE_RSS_UDP_IPV6);
   2011                                         break;
   2012                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2013                                         M_HASHTYPE_SET(sendmp,
   2014                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
   2015                                         break;
   2016 #endif
   2017                                     default:
   2018                                         M_HASHTYPE_SET(sendmp,
   2019                                             M_HASHTYPE_OPAQUE_HASH);
   2020                                 }
   2021                         } else {
   2022                                 sendmp->m_pkthdr.flowid = que->msix;
   2023 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2024 			}
   2025 #endif
   2026 		}
   2027 next_desc:
   2028 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2029 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2030 
   2031 		/* Advance our pointers to the next descriptor. */
   2032 		if (++i == rxr->num_desc)
   2033 			i = 0;
   2034 
   2035 		/* Now send to the stack or do LRO */
   2036 		if (sendmp != NULL) {
   2037 			rxr->next_to_check = i;
   2038 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2039 			i = rxr->next_to_check;
   2040 		}
   2041 
   2042                /* Every 8 descriptors we go to refresh mbufs */
   2043 		if (processed == 8) {
   2044 			ixgbe_refresh_mbufs(rxr, i);
   2045 			processed = 0;
   2046 		}
   2047 	}
   2048 
   2049 	/* Refresh any remaining buf structs */
   2050 	if (ixgbe_rx_unrefreshed(rxr))
   2051 		ixgbe_refresh_mbufs(rxr, i);
   2052 
   2053 	rxr->next_to_check = i;
   2054 
   2055 #ifdef LRO
   2056 	/*
   2057 	 * Flush any outstanding LRO work
   2058 	 */
   2059 	tcp_lro_flush_all(lro);
   2060 #endif /* LRO */
   2061 
   2062 	IXGBE_RX_UNLOCK(rxr);
   2063 
   2064 	/*
   2065 	** Still have cleaning to do?
   2066 	*/
   2067 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2068 		return true;
   2069 	else
   2070 		return false;
   2071 }
   2072 
   2073 
   2074 /*********************************************************************
   2075  *
   2076  *  Verify that the hardware indicated that the checksum is valid.
   2077  *  Inform the stack about the status of checksum so that stack
   2078  *  doesn't spend time verifying the checksum.
   2079  *
   2080  *********************************************************************/
   2081 static void
   2082 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2083     struct ixgbe_hw_stats *stats)
   2084 {
   2085 	u16	status = (u16) staterr;
   2086 	u8	errors = (u8) (staterr >> 24);
   2087 #if 0
   2088 	bool	sctp = false;
   2089 
   2090 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2091 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2092 		sctp = true;
   2093 #endif
   2094 
   2095 	/* IPv4 checksum */
   2096 	if (status & IXGBE_RXD_STAT_IPCS) {
   2097 		stats->ipcs.ev_count++;
   2098 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2099 			/* IP Checksum Good */
   2100 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2101 		} else {
   2102 			stats->ipcs_bad.ev_count++;
   2103 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2104 		}
   2105 	}
   2106 	/* TCP/UDP/SCTP checksum */
   2107 	if (status & IXGBE_RXD_STAT_L4CS) {
   2108 		stats->l4cs.ev_count++;
   2109 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2110 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2111 			mp->m_pkthdr.csum_flags |= type;
   2112 		} else {
   2113 			stats->l4cs_bad.ev_count++;
   2114 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2115 		}
   2116 	}
   2117 }
   2118 
   2119 
   2120 /********************************************************************
   2121  * Manage DMA'able memory.
   2122  *******************************************************************/
   2123 
   2124 int
   2125 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2126 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2127 {
   2128 	device_t dev = adapter->dev;
   2129 	int             r, rsegs;
   2130 
   2131 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2132 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2133 			       size,	/* maxsize */
   2134 			       1,	/* nsegments */
   2135 			       size,	/* maxsegsize */
   2136 			       BUS_DMA_ALLOCNOW,	/* flags */
   2137 			       &dma->dma_tag);
   2138 	if (r != 0) {
   2139 		aprint_error_dev(dev,
   2140 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2141 		goto fail_0;
   2142 	}
   2143 
   2144 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2145 		size,
   2146 		dma->dma_tag->dt_alignment,
   2147 		dma->dma_tag->dt_boundary,
   2148 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2149 	if (r != 0) {
   2150 		aprint_error_dev(dev,
   2151 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2152 		goto fail_1;
   2153 	}
   2154 
   2155 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2156 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2157 	if (r != 0) {
   2158 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2159 		    __func__, r);
   2160 		goto fail_2;
   2161 	}
   2162 
   2163 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2164 	if (r != 0) {
   2165 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2166 		    __func__, r);
   2167 		goto fail_3;
   2168 	}
   2169 
   2170 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2171 			    size,
   2172 			    NULL,
   2173 			    mapflags | BUS_DMA_NOWAIT);
   2174 	if (r != 0) {
   2175 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2176 		    __func__, r);
   2177 		goto fail_4;
   2178 	}
   2179 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2180 	dma->dma_size = size;
   2181 	return 0;
   2182 fail_4:
   2183 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2184 fail_3:
   2185 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2186 fail_2:
   2187 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2188 fail_1:
   2189 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2190 fail_0:
   2191 	return r;
   2192 }
   2193 
   2194 void
   2195 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2196 {
   2197 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2198 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2199 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2200 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2201 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2202 }
   2203 
   2204 
   2205 /*********************************************************************
   2206  *
   2207  *  Allocate memory for the transmit and receive rings, and then
   2208  *  the descriptors associated with each, called only once at attach.
   2209  *
   2210  **********************************************************************/
   2211 int
   2212 ixgbe_allocate_queues(struct adapter *adapter)
   2213 {
   2214 	device_t	dev = adapter->dev;
   2215 	struct ix_queue	*que;
   2216 	struct tx_ring	*txr;
   2217 	struct rx_ring	*rxr;
   2218 	int rsize, tsize, error = IXGBE_SUCCESS;
   2219 	int txconf = 0, rxconf = 0;
   2220 #ifdef PCI_IOV
   2221 	enum ixgbe_iov_mode iov_mode;
   2222 #endif
   2223 
   2224         /* First allocate the top level queue structs */
   2225         if (!(adapter->queues =
   2226             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2227             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2228                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2229                 error = ENOMEM;
   2230                 goto fail;
   2231         }
   2232 
   2233 	/* First allocate the TX ring struct memory */
   2234 	if (!(adapter->tx_rings =
   2235 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2236 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2237 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2238 		error = ENOMEM;
   2239 		goto tx_fail;
   2240 	}
   2241 
   2242 	/* Next allocate the RX */
   2243 	if (!(adapter->rx_rings =
   2244 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2245 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2246 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2247 		error = ENOMEM;
   2248 		goto rx_fail;
   2249 	}
   2250 
   2251 	/* For the ring itself */
   2252 	tsize = roundup2(adapter->num_tx_desc *
   2253 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2254 
   2255 #ifdef PCI_IOV
   2256 	iov_mode = ixgbe_get_iov_mode(adapter);
   2257 	adapter->pool = ixgbe_max_vfs(iov_mode);
   2258 #else
   2259 	adapter->pool = 0;
   2260 #endif
   2261 	/*
   2262 	 * Now set up the TX queues, txconf is needed to handle the
   2263 	 * possibility that things fail midcourse and we need to
   2264 	 * undo memory gracefully
   2265 	 */
   2266 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2267 		/* Set up some basics */
   2268 		txr = &adapter->tx_rings[i];
   2269 		txr->adapter = adapter;
   2270 #ifdef PCI_IOV
   2271 		txr->me = ixgbe_pf_que_index(iov_mode, i);
   2272 #else
   2273 		txr->me = i;
   2274 #endif
   2275 		txr->num_desc = adapter->num_tx_desc;
   2276 
   2277 		/* Initialize the TX side lock */
   2278 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2279 		    device_xname(dev), txr->me);
   2280 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2281 
   2282 		if (ixgbe_dma_malloc(adapter, tsize,
   2283 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2284 			aprint_error_dev(dev,
   2285 			    "Unable to allocate TX Descriptor memory\n");
   2286 			error = ENOMEM;
   2287 			goto err_tx_desc;
   2288 		}
   2289 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2290 		bzero((void *)txr->tx_base, tsize);
   2291 
   2292         	/* Now allocate transmit buffers for the ring */
   2293         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2294 			aprint_error_dev(dev,
   2295 			    "Critical Failure setting up transmit buffers\n");
   2296 			error = ENOMEM;
   2297 			goto err_tx_desc;
   2298         	}
   2299 #ifndef IXGBE_LEGACY_TX
   2300 		/* Allocate a buf ring */
   2301 		txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2302 		if (txr->txr_interq == NULL) {
   2303 			aprint_error_dev(dev,
   2304 			    "Critical Failure setting up buf ring\n");
   2305 			error = ENOMEM;
   2306 			goto err_tx_desc;
   2307         	}
   2308 #endif
   2309 	}
   2310 
   2311 	/*
   2312 	 * Next the RX queues...
   2313 	 */
   2314 	rsize = roundup2(adapter->num_rx_desc *
   2315 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2316 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2317 		rxr = &adapter->rx_rings[i];
   2318 		/* Set up some basics */
   2319 		rxr->adapter = adapter;
   2320 #ifdef PCI_IOV
   2321 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
   2322 #else
   2323 		rxr->me = i;
   2324 #endif
   2325 		rxr->num_desc = adapter->num_rx_desc;
   2326 
   2327 		/* Initialize the RX side lock */
   2328 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2329 		    device_xname(dev), rxr->me);
   2330 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2331 
   2332 		if (ixgbe_dma_malloc(adapter, rsize,
   2333 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2334 			aprint_error_dev(dev,
   2335 			    "Unable to allocate RxDescriptor memory\n");
   2336 			error = ENOMEM;
   2337 			goto err_rx_desc;
   2338 		}
   2339 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2340 		bzero((void *)rxr->rx_base, rsize);
   2341 
   2342         	/* Allocate receive buffers for the ring*/
   2343 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2344 			aprint_error_dev(dev,
   2345 			    "Critical Failure setting up receive buffers\n");
   2346 			error = ENOMEM;
   2347 			goto err_rx_desc;
   2348 		}
   2349 	}
   2350 
   2351 	/*
   2352 	** Finally set up the queue holding structs
   2353 	*/
   2354 	for (int i = 0; i < adapter->num_queues; i++) {
   2355 		que = &adapter->queues[i];
   2356 		que->adapter = adapter;
   2357 		que->me = i;
   2358 		que->txr = &adapter->tx_rings[i];
   2359 		que->rxr = &adapter->rx_rings[i];
   2360 	}
   2361 
   2362 	return (0);
   2363 
   2364 err_rx_desc:
   2365 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2366 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2367 err_tx_desc:
   2368 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2369 		ixgbe_dma_free(adapter, &txr->txdma);
   2370 	free(adapter->rx_rings, M_DEVBUF);
   2371 rx_fail:
   2372 	free(adapter->tx_rings, M_DEVBUF);
   2373 tx_fail:
   2374 	free(adapter->queues, M_DEVBUF);
   2375 fail:
   2376 	return (error);
   2377 }
   2378