Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.23
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
     62 /*$NetBSD: ix_txrx.c,v 1.23 2017/05/08 10:00:41 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 
     69 #ifdef DEV_NETMAP
     70 #include <net/netmap.h>
     71 #include <sys/selinfo.h>
     72 #include <dev/netmap/netmap_kern.h>
     73 
     74 extern int ix_crcstrip;
     75 #endif
     76 
     77 /*
     78 ** HW RSC control:
     79 **  this feature only works with
     80 **  IPv4, and only on 82599 and later.
     81 **  Also this will cause IP forwarding to
     82 **  fail and that can't be controlled by
     83 **  the stack as LRO can. For all these
     84 **  reasons I've deemed it best to leave
     85 **  this off and not bother with a tuneable
     86 **  interface, this would need to be compiled
     87 **  to enable.
     88 */
     89 static bool ixgbe_rsc_enable = FALSE;
     90 
     91 #ifdef IXGBE_FDIR
     92 /*
     93 ** For Flow Director: this is the
     94 ** number of TX packets we sample
     95 ** for the filter pool, this means
     96 ** every 20th packet will be probed.
     97 **
     98 ** This feature can be disabled by
     99 ** setting this to 0.
    100 */
    101 static int atr_sample_rate = 20;
    102 #endif
    103 
    104 /*********************************************************************
    105  *  Local Function prototypes
    106  *********************************************************************/
    107 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    108 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    109 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    110 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    111 
    112 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    113 		    struct ixgbe_hw_stats *);
    114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    117 		    struct mbuf *, u32 *, u32 *);
    118 static int	ixgbe_tso_setup(struct tx_ring *,
    119 		    struct mbuf *, u32 *, u32 *);
    120 #ifdef IXGBE_FDIR
    121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    122 #endif
    123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    125 		    struct mbuf *, u32);
    126 
    127 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    128 
    129 /*********************************************************************
    130  *  Transmit entry point
    131  *
    132  *  ixgbe_start is called by the stack to initiate a transmit.
    133  *  The driver will remain in this routine as long as there are
    134  *  packets to transmit and transmit resources are available.
    135  *  In case resources are not available stack is notified and
    136  *  the packet is requeued.
    137  **********************************************************************/
    138 
    139 void
    140 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    141 {
    142 	int rc;
    143 	struct mbuf    *m_head;
    144 	struct adapter *adapter = txr->adapter;
    145 
    146 	IXGBE_TX_LOCK_ASSERT(txr);
    147 
    148 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    149 		return;
    150 	if (!adapter->link_active)
    151 		return;
    152 
    153 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    154 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    155 			break;
    156 
    157 		IFQ_POLL(&ifp->if_snd, m_head);
    158 		if (m_head == NULL)
    159 			break;
    160 
    161 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    162 			break;
    163 		}
    164 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    165 		if (rc != 0) {
    166 			m_freem(m_head);
    167 			continue;
    168 		}
    169 
    170 		/* Send a copy of the frame to the BPF listener */
    171 		bpf_mtap(ifp, m_head);
    172 	}
    173 	return;
    174 }
    175 
    176 /*
    177  * Legacy TX start - called by the stack, this
    178  * always uses the first tx ring, and should
    179  * not be used with multiqueue tx enabled.
    180  */
    181 void
    182 ixgbe_start(struct ifnet *ifp)
    183 {
    184 	struct adapter *adapter = ifp->if_softc;
    185 	struct tx_ring	*txr = adapter->tx_rings;
    186 
    187 	if (ifp->if_flags & IFF_RUNNING) {
    188 		IXGBE_TX_LOCK(txr);
    189 		ixgbe_start_locked(txr, ifp);
    190 		IXGBE_TX_UNLOCK(txr);
    191 	}
    192 	return;
    193 }
    194 
    195 #ifndef IXGBE_LEGACY_TX
    196 
    197 /*
    198 ** Multiqueue Transmit Entry Point
    199 ** (if_transmit function)
    200 */
    201 int
    202 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    203 {
    204 	struct adapter	*adapter = ifp->if_softc;
    205 	struct tx_ring	*txr;
    206 	int 		i, err = 0;
    207 #ifdef	RSS
    208 	uint32_t bucket_id;
    209 #endif
    210 
    211 	/*
    212 	 * When doing RSS, map it to the same outbound queue
    213 	 * as the incoming flow would be mapped to.
    214 	 *
    215 	 * If everything is setup correctly, it should be the
    216 	 * same bucket that the current CPU we're on is.
    217 	 */
    218 #if 0
    219 #if __FreeBSD_version < 1100054
    220 	if (m->m_flags & M_FLOWID) {
    221 #else
    222 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    223 #endif
    224 #ifdef	RSS
    225 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    226 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
    227 			/* TODO: spit out something if bucket_id > num_queues? */
    228 			i = bucket_id % adapter->num_queues;
    229 #ifdef IXGBE_DEBUG
    230 			if (bucket_id > adapter->num_queues)
    231 				if_printf(ifp, "bucket_id (%d) > num_queues "
    232 				    "(%d)\n", bucket_id, adapter->num_queues);
    233 #endif
    234 		} else
    235 #endif /* RSS */
    236 			i = m->m_pkthdr.flowid % adapter->num_queues;
    237 	} else
    238 #endif
    239 		i = cpu_index(curcpu()) % adapter->num_queues;
    240 
    241 	/* Check for a hung queue and pick alternative */
    242 	if (((1 << i) & adapter->active_queues) == 0)
    243 		i = ffs64(adapter->active_queues);
    244 
    245 	txr = &adapter->tx_rings[i];
    246 
    247 	err = pcq_put(txr->txr_interq, m);
    248 	if (err == false) {
    249 		m_freem(m);
    250 		txr->pcq_drops.ev_count++;
    251 		return (err);
    252 	}
    253 	if (IXGBE_TX_TRYLOCK(txr)) {
    254 		ixgbe_mq_start_locked(ifp, txr);
    255 		IXGBE_TX_UNLOCK(txr);
    256 	} else
    257 		softint_schedule(txr->txr_si);
    258 
    259 	return (0);
    260 }
    261 
    262 int
    263 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    264 {
    265 	struct adapter  *adapter = txr->adapter;
    266 	struct mbuf     *next;
    267 	int             enqueued = 0, err = 0;
    268 
    269 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    270 	    adapter->link_active == 0)
    271 		return (ENETDOWN);
    272 
    273 	/* Process the queue */
    274 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    275 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    276 			m_freem(next);
    277 			/* All errors are counted in ixgbe_xmit() */
    278 			break;
    279 		}
    280 		enqueued++;
    281 #if 0 // this is VF-only
    282 #if __FreeBSD_version >= 1100036
    283 		/*
    284 		 * Since we're looking at the tx ring, we can check
    285 		 * to see if we're a VF by examing our tail register
    286 		 * address.
    287 		 */
    288 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    289 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    290 #endif
    291 #endif /* 0 */
    292 		/* Send a copy of the frame to the BPF listener */
    293 		bpf_mtap(ifp, next);
    294 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    295 			break;
    296 	}
    297 
    298 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    299 		ixgbe_txeof(txr);
    300 
    301 	return (err);
    302 }
    303 
    304 /*
    305  * Called from a taskqueue to drain queued transmit packets.
    306  */
    307 void
    308 ixgbe_deferred_mq_start(void *arg)
    309 {
    310 	struct tx_ring *txr = arg;
    311 	struct adapter *adapter = txr->adapter;
    312 	struct ifnet *ifp = adapter->ifp;
    313 
    314 	IXGBE_TX_LOCK(txr);
    315 	if (pcq_peek(txr->txr_interq) != NULL)
    316 		ixgbe_mq_start_locked(ifp, txr);
    317 	IXGBE_TX_UNLOCK(txr);
    318 }
    319 
    320 #endif /* IXGBE_LEGACY_TX */
    321 
    322 
    323 /*********************************************************************
    324  *
    325  *  This routine maps the mbufs to tx descriptors, allowing the
    326  *  TX engine to transmit the packets.
    327  *  	- return 0 on success, positive on failure
    328  *
    329  **********************************************************************/
    330 
    331 static int
    332 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    333 {
    334 	struct m_tag *mtag;
    335 	struct adapter  *adapter = txr->adapter;
    336 	struct ifnet	*ifp = adapter->ifp;
    337 	struct ethercom *ec = &adapter->osdep.ec;
    338 	u32		olinfo_status = 0, cmd_type_len;
    339 	int             i, j, error;
    340 	int		first;
    341 	bool		remap = TRUE;
    342 	bus_dmamap_t	map;
    343 	struct ixgbe_tx_buf *txbuf;
    344 	union ixgbe_adv_tx_desc *txd = NULL;
    345 
    346 	/* Basic descriptor defines */
    347         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    348 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    349 
    350 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    351         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    352 
    353         /*
    354          * Important to capture the first descriptor
    355          * used because it will contain the index of
    356          * the one we tell the hardware to report back
    357          */
    358         first = txr->next_avail_desc;
    359 	txbuf = &txr->tx_buffers[first];
    360 	map = txbuf->map;
    361 
    362 	/*
    363 	 * Map the packet for DMA.
    364 	 */
    365 retry:
    366 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    367 	    m_head, BUS_DMA_NOWAIT);
    368 
    369 	if (__predict_false(error)) {
    370 		struct mbuf *m;
    371 
    372 		switch (error) {
    373 		case EAGAIN:
    374 			adapter->eagain_tx_dma_setup.ev_count++;
    375 			return EAGAIN;
    376 		case ENOMEM:
    377 			adapter->enomem_tx_dma_setup.ev_count++;
    378 			return EAGAIN;
    379 		case EFBIG:
    380 			/* Try it again? - one try */
    381 			if (remap == TRUE) {
    382 				remap = FALSE;
    383 				/*
    384 				 * XXX: m_defrag will choke on
    385 				 * non-MCLBYTES-sized clusters
    386 				 */
    387 				adapter->efbig_tx_dma_setup.ev_count++;
    388 				m = m_defrag(m_head, M_NOWAIT);
    389 				if (m == NULL) {
    390 					adapter->mbuf_defrag_failed.ev_count++;
    391 					return ENOBUFS;
    392 				}
    393 				m_head = m;
    394 				goto retry;
    395 			} else {
    396 				adapter->efbig2_tx_dma_setup.ev_count++;
    397 				return error;
    398 			}
    399 		case EINVAL:
    400 			adapter->einval_tx_dma_setup.ev_count++;
    401 			return error;
    402 		default:
    403 			adapter->other_tx_dma_setup.ev_count++;
    404 			return error;
    405 		}
    406 	}
    407 
    408 	/* Make certain there are enough descriptors */
    409 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    410 		txr->no_desc_avail.ev_count++;
    411 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    412 		return EAGAIN;
    413 	}
    414 
    415 	/*
    416 	 * Set up the appropriate offload context
    417 	 * this will consume the first descriptor
    418 	 */
    419 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    420 	if (__predict_false(error)) {
    421 		return (error);
    422 	}
    423 
    424 #ifdef IXGBE_FDIR
    425 	/* Do the flow director magic */
    426 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    427 		++txr->atr_count;
    428 		if (txr->atr_count >= atr_sample_rate) {
    429 			ixgbe_atr(txr, m_head);
    430 			txr->atr_count = 0;
    431 		}
    432 	}
    433 #endif
    434 
    435 	olinfo_status |= IXGBE_ADVTXD_CC;
    436 	i = txr->next_avail_desc;
    437 	for (j = 0; j < map->dm_nsegs; j++) {
    438 		bus_size_t seglen;
    439 		bus_addr_t segaddr;
    440 
    441 		txbuf = &txr->tx_buffers[i];
    442 		txd = &txr->tx_base[i];
    443 		seglen = map->dm_segs[j].ds_len;
    444 		segaddr = htole64(map->dm_segs[j].ds_addr);
    445 
    446 		txd->read.buffer_addr = segaddr;
    447 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    448 		    cmd_type_len |seglen);
    449 		txd->read.olinfo_status = htole32(olinfo_status);
    450 
    451 		if (++i == txr->num_desc)
    452 			i = 0;
    453 	}
    454 
    455 	txd->read.cmd_type_len |=
    456 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    457 	txr->tx_avail -= map->dm_nsegs;
    458 	txr->next_avail_desc = i;
    459 
    460 	txbuf->m_head = m_head;
    461 	/*
    462 	 * Here we swap the map so the last descriptor,
    463 	 * which gets the completion interrupt has the
    464 	 * real map, and the first descriptor gets the
    465 	 * unused map from this descriptor.
    466 	 */
    467 	txr->tx_buffers[first].map = txbuf->map;
    468 	txbuf->map = map;
    469 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    470 	    BUS_DMASYNC_PREWRITE);
    471 
    472         /* Set the EOP descriptor that will be marked done */
    473         txbuf = &txr->tx_buffers[first];
    474 	txbuf->eop = txd;
    475 
    476         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    477 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    478 	/*
    479 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    480 	 * hardware that this frame is available to transmit.
    481 	 */
    482 	++txr->total_packets.ev_count;
    483 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    484 
    485 	/*
    486 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
    487 	 */
    488 	ifp->if_obytes += m_head->m_pkthdr.len;
    489 	if (m_head->m_flags & M_MCAST)
    490 		ifp->if_omcasts++;
    491 
    492 	/* Mark queue as having work */
    493 	if (txr->busy == 0)
    494 		txr->busy = 1;
    495 
    496 	return 0;
    497 }
    498 
    499 
    500 /*********************************************************************
    501  *
    502  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    503  *  the information needed to transmit a packet on the wire. This is
    504  *  called only once at attach, setup is done every reset.
    505  *
    506  **********************************************************************/
    507 int
    508 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    509 {
    510 	struct adapter *adapter = txr->adapter;
    511 	device_t dev = adapter->dev;
    512 	struct ixgbe_tx_buf *txbuf;
    513 	int error, i;
    514 
    515 	/*
    516 	 * Setup DMA descriptor areas.
    517 	 */
    518 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    519 			       1, 0,		/* alignment, bounds */
    520 			       IXGBE_TSO_SIZE,		/* maxsize */
    521 			       adapter->num_segs,	/* nsegments */
    522 			       PAGE_SIZE,		/* maxsegsize */
    523 			       0,			/* flags */
    524 			       &txr->txtag))) {
    525 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    526 		goto fail;
    527 	}
    528 
    529 	if (!(txr->tx_buffers =
    530 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    531 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    532 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    533 		error = ENOMEM;
    534 		goto fail;
    535 	}
    536 
    537         /* Create the descriptor buffer dma maps */
    538 	txbuf = txr->tx_buffers;
    539 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    540 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    541 		if (error != 0) {
    542 			aprint_error_dev(dev,
    543 			    "Unable to create TX DMA map (%d)\n", error);
    544 			goto fail;
    545 		}
    546 	}
    547 
    548 	return 0;
    549 fail:
    550 	/* We free all, it handles case where we are in the middle */
    551 #if 0 /* XXX was FreeBSD */
    552 	ixgbe_free_transmit_structures(adapter);
    553 #else
    554 	ixgbe_free_transmit_buffers(txr);
    555 #endif
    556 	return (error);
    557 }
    558 
    559 /*********************************************************************
    560  *
    561  *  Initialize a transmit ring.
    562  *
    563  **********************************************************************/
    564 static void
    565 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    566 {
    567 	struct adapter *adapter = txr->adapter;
    568 	struct ixgbe_tx_buf *txbuf;
    569 #ifdef DEV_NETMAP
    570 	struct netmap_adapter *na = NA(adapter->ifp);
    571 	struct netmap_slot *slot;
    572 #endif /* DEV_NETMAP */
    573 
    574 	/* Clear the old ring contents */
    575 	IXGBE_TX_LOCK(txr);
    576 #ifdef DEV_NETMAP
    577 	/*
    578 	 * (under lock): if in netmap mode, do some consistency
    579 	 * checks and set slot to entry 0 of the netmap ring.
    580 	 */
    581 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    582 #endif /* DEV_NETMAP */
    583 	bzero((void *)txr->tx_base,
    584 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    585 	/* Reset indices */
    586 	txr->next_avail_desc = 0;
    587 	txr->next_to_clean = 0;
    588 
    589 	/* Free any existing tx buffers. */
    590         txbuf = txr->tx_buffers;
    591 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    592 		if (txbuf->m_head != NULL) {
    593 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    594 			    0, txbuf->m_head->m_pkthdr.len,
    595 			    BUS_DMASYNC_POSTWRITE);
    596 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    597 			m_freem(txbuf->m_head);
    598 			txbuf->m_head = NULL;
    599 		}
    600 #ifdef DEV_NETMAP
    601 		/*
    602 		 * In netmap mode, set the map for the packet buffer.
    603 		 * NOTE: Some drivers (not this one) also need to set
    604 		 * the physical buffer address in the NIC ring.
    605 		 * Slots in the netmap ring (indexed by "si") are
    606 		 * kring->nkr_hwofs positions "ahead" wrt the
    607 		 * corresponding slot in the NIC ring. In some drivers
    608 		 * (not here) nkr_hwofs can be negative. Function
    609 		 * netmap_idx_n2k() handles wraparounds properly.
    610 		 */
    611 		if (slot) {
    612 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    613 			netmap_load_map(na, txr->txtag,
    614 			    txbuf->map, NMB(na, slot + si));
    615 		}
    616 #endif /* DEV_NETMAP */
    617 		/* Clear the EOP descriptor pointer */
    618 		txbuf->eop = NULL;
    619         }
    620 
    621 #ifdef IXGBE_FDIR
    622 	/* Set the rate at which we sample packets */
    623 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    624 		txr->atr_sample = atr_sample_rate;
    625 #endif
    626 
    627 	/* Set number of descriptors available */
    628 	txr->tx_avail = adapter->num_tx_desc;
    629 
    630 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    631 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    632 	IXGBE_TX_UNLOCK(txr);
    633 }
    634 
    635 /*********************************************************************
    636  *
    637  *  Initialize all transmit rings.
    638  *
    639  **********************************************************************/
    640 int
    641 ixgbe_setup_transmit_structures(struct adapter *adapter)
    642 {
    643 	struct tx_ring *txr = adapter->tx_rings;
    644 
    645 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    646 		ixgbe_setup_transmit_ring(txr);
    647 
    648 	return (0);
    649 }
    650 
    651 /*********************************************************************
    652  *
    653  *  Free all transmit rings.
    654  *
    655  **********************************************************************/
    656 void
    657 ixgbe_free_transmit_structures(struct adapter *adapter)
    658 {
    659 	struct tx_ring *txr = adapter->tx_rings;
    660 
    661 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    662 		ixgbe_free_transmit_buffers(txr);
    663 		ixgbe_dma_free(adapter, &txr->txdma);
    664 		IXGBE_TX_LOCK_DESTROY(txr);
    665 	}
    666 	free(adapter->tx_rings, M_DEVBUF);
    667 }
    668 
    669 /*********************************************************************
    670  *
    671  *  Free transmit ring related data structures.
    672  *
    673  **********************************************************************/
    674 static void
    675 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    676 {
    677 	struct adapter *adapter = txr->adapter;
    678 	struct ixgbe_tx_buf *tx_buffer;
    679 	int             i;
    680 
    681 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    682 
    683 	if (txr->tx_buffers == NULL)
    684 		return;
    685 
    686 	tx_buffer = txr->tx_buffers;
    687 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    688 		if (tx_buffer->m_head != NULL) {
    689 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    690 			    0, tx_buffer->m_head->m_pkthdr.len,
    691 			    BUS_DMASYNC_POSTWRITE);
    692 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    693 			m_freem(tx_buffer->m_head);
    694 			tx_buffer->m_head = NULL;
    695 			if (tx_buffer->map != NULL) {
    696 				ixgbe_dmamap_destroy(txr->txtag,
    697 				    tx_buffer->map);
    698 				tx_buffer->map = NULL;
    699 			}
    700 		} else if (tx_buffer->map != NULL) {
    701 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    702 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    703 			tx_buffer->map = NULL;
    704 		}
    705 	}
    706 #ifndef IXGBE_LEGACY_TX
    707 	if (txr->txr_interq != NULL) {
    708 		struct mbuf *m;
    709 
    710 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    711 			m_freem(m);
    712 		pcq_destroy(txr->txr_interq);
    713 	}
    714 #endif
    715 	if (txr->tx_buffers != NULL) {
    716 		free(txr->tx_buffers, M_DEVBUF);
    717 		txr->tx_buffers = NULL;
    718 	}
    719 	if (txr->txtag != NULL) {
    720 		ixgbe_dma_tag_destroy(txr->txtag);
    721 		txr->txtag = NULL;
    722 	}
    723 	return;
    724 }
    725 
    726 /*********************************************************************
    727  *
    728  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    729  *
    730  **********************************************************************/
    731 
    732 static int
    733 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    734     u32 *cmd_type_len, u32 *olinfo_status)
    735 {
    736 	struct adapter *adapter = txr->adapter;
    737 	struct ethercom *ec = &adapter->osdep.ec;
    738 	struct m_tag *mtag;
    739 	struct ixgbe_adv_tx_context_desc *TXD;
    740 	struct ether_vlan_header *eh;
    741 #ifdef INET
    742 	struct ip *ip;
    743 #endif
    744 #ifdef INET6
    745 	struct ip6_hdr *ip6;
    746 #endif
    747 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    748 	int	ehdrlen, ip_hlen = 0;
    749 	u16	etype;
    750 	u8	ipproto = 0;
    751 	int	offload = TRUE;
    752 	int	ctxd = txr->next_avail_desc;
    753 	u16	vtag = 0;
    754 	char	*l3d;
    755 
    756 
    757 	/* First check if TSO is to be used */
    758 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6)) {
    759 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    760 
    761 		if (rv != 0)
    762 			++adapter->tso_err.ev_count;
    763 		return rv;
    764 	}
    765 
    766 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    767 		offload = FALSE;
    768 
    769 	/* Indicate the whole packet as payload when not doing TSO */
    770        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    771 
    772 	/* Now ready a context descriptor */
    773 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    774 
    775 	/*
    776 	** In advanced descriptors the vlan tag must
    777 	** be placed into the context descriptor. Hence
    778 	** we need to make one even if not doing offloads.
    779 	*/
    780 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    781 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    782 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    783 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    784 		return (0);
    785 
    786 	/*
    787 	 * Determine where frame payload starts.
    788 	 * Jump over vlan headers if already present,
    789 	 * helpful for QinQ too.
    790 	 */
    791 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    792 	eh = mtod(mp, struct ether_vlan_header *);
    793 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    794 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    795 		etype = ntohs(eh->evl_proto);
    796 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    797 	} else {
    798 		etype = ntohs(eh->evl_encap_proto);
    799 		ehdrlen = ETHER_HDR_LEN;
    800 	}
    801 
    802 	/* Set the ether header length */
    803 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    804 
    805 	if (offload == FALSE)
    806 		goto no_offloads;
    807 
    808 	/*
    809 	 * If the first mbuf only includes the ethernet header, jump to the next one
    810 	 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
    811 	 * XXX: And assumes the entire IP header is contained in one mbuf
    812 	 */
    813 	if (mp->m_len == ehdrlen && mp->m_next)
    814 		l3d = mtod(mp->m_next, char *);
    815 	else
    816 		l3d = mtod(mp, char *) + ehdrlen;
    817 
    818 	switch (etype) {
    819 #ifdef INET
    820 	case ETHERTYPE_IP:
    821 		ip = (struct ip *)(l3d);
    822 		ip_hlen = ip->ip_hl << 2;
    823 		ipproto = ip->ip_p;
    824 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    825 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    826 		    ip->ip_sum == 0);
    827 		break;
    828 #endif
    829 #ifdef INET6
    830 	case ETHERTYPE_IPV6:
    831 		ip6 = (struct ip6_hdr *)(l3d);
    832 		ip_hlen = sizeof(struct ip6_hdr);
    833 		ipproto = ip6->ip6_nxt;
    834 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    835 		break;
    836 #endif
    837 	default:
    838 		offload = false;
    839 		break;
    840 	}
    841 
    842 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    843 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    844 
    845 	vlan_macip_lens |= ip_hlen;
    846 
    847 	/* No support for offloads for non-L4 next headers */
    848  	switch (ipproto) {
    849  		case IPPROTO_TCP:
    850 			if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
    851 
    852 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    853 			else
    854 				offload = false;
    855 			break;
    856 		case IPPROTO_UDP:
    857 			if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
    858 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    859 			else
    860 				offload = false;
    861 			break;
    862 		default:
    863 			offload = false;
    864 			break;
    865 	}
    866 
    867 	if (offload) /* Insert L4 checksum into data descriptors */
    868 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    869 
    870 no_offloads:
    871 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    872 
    873 	/* Now copy bits into descriptor */
    874 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    875 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    876 	TXD->seqnum_seed = htole32(0);
    877 	TXD->mss_l4len_idx = htole32(0);
    878 
    879 	/* We've consumed the first desc, adjust counters */
    880 	if (++ctxd == txr->num_desc)
    881 		ctxd = 0;
    882 	txr->next_avail_desc = ctxd;
    883 	--txr->tx_avail;
    884 
    885         return 0;
    886 }
    887 
    888 /**********************************************************************
    889  *
    890  *  Setup work for hardware segmentation offload (TSO) on
    891  *  adapters using advanced tx descriptors
    892  *
    893  **********************************************************************/
    894 static int
    895 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    896     u32 *cmd_type_len, u32 *olinfo_status)
    897 {
    898 	struct m_tag *mtag;
    899 	struct adapter *adapter = txr->adapter;
    900 	struct ethercom *ec = &adapter->osdep.ec;
    901 	struct ixgbe_adv_tx_context_desc *TXD;
    902 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    903 	u32 mss_l4len_idx = 0, paylen;
    904 	u16 vtag = 0, eh_type;
    905 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    906 	struct ether_vlan_header *eh;
    907 #ifdef INET6
    908 	struct ip6_hdr *ip6;
    909 #endif
    910 #ifdef INET
    911 	struct ip *ip;
    912 #endif
    913 	struct tcphdr *th;
    914 
    915 	/*
    916 	 * Determine where frame payload starts.
    917 	 * Jump over vlan headers if already present
    918 	 */
    919 	eh = mtod(mp, struct ether_vlan_header *);
    920 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    921 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    922 		eh_type = eh->evl_proto;
    923 	} else {
    924 		ehdrlen = ETHER_HDR_LEN;
    925 		eh_type = eh->evl_encap_proto;
    926 	}
    927 
    928 	switch (ntohs(eh_type)) {
    929 #ifdef INET6
    930 	case ETHERTYPE_IPV6:
    931 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    932 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    933 		if (ip6->ip6_nxt != IPPROTO_TCP)
    934 			return (ENXIO);
    935 		ip_hlen = sizeof(struct ip6_hdr);
    936 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    937 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    938 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    939 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    940 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    941 		break;
    942 #endif
    943 #ifdef INET
    944 	case ETHERTYPE_IP:
    945 		ip = (struct ip *)(mp->m_data + ehdrlen);
    946 		if (ip->ip_p != IPPROTO_TCP)
    947 			return (ENXIO);
    948 		ip->ip_sum = 0;
    949 		ip_hlen = ip->ip_hl << 2;
    950 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    951 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    952 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    953 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    954 		/* Tell transmit desc to also do IPv4 checksum. */
    955 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    956 		break;
    957 #endif
    958 	default:
    959 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    960 		    __func__, ntohs(eh_type));
    961 		break;
    962 	}
    963 
    964 	ctxd = txr->next_avail_desc;
    965 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    966 
    967 	tcp_hlen = th->th_off << 2;
    968 
    969 	/* This is used in the transmit desc in encap */
    970 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    971 
    972 	/* VLAN MACLEN IPLEN */
    973 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    974 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    975                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    976 	}
    977 
    978 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    979 	vlan_macip_lens |= ip_hlen;
    980 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    981 
    982 	/* ADV DTYPE TUCMD */
    983 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    984 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    985 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    986 
    987 	/* MSS L4LEN IDX */
    988 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
    989 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
    990 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
    991 
    992 	TXD->seqnum_seed = htole32(0);
    993 
    994 	if (++ctxd == txr->num_desc)
    995 		ctxd = 0;
    996 
    997 	txr->tx_avail--;
    998 	txr->next_avail_desc = ctxd;
    999 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1000 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1001 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1002 	++txr->tso_tx.ev_count;
   1003 	return (0);
   1004 }
   1005 
   1006 
   1007 /**********************************************************************
   1008  *
   1009  *  Examine each tx_buffer in the used queue. If the hardware is done
   1010  *  processing the packet then free associated resources. The
   1011  *  tx_buffer is put back on the free queue.
   1012  *
   1013  **********************************************************************/
   1014 void
   1015 ixgbe_txeof(struct tx_ring *txr)
   1016 {
   1017 	struct adapter		*adapter = txr->adapter;
   1018 	struct ifnet		*ifp = adapter->ifp;
   1019 	u32			work, processed = 0;
   1020 	u32			limit = adapter->tx_process_limit;
   1021 	struct ixgbe_tx_buf	*buf;
   1022 	union ixgbe_adv_tx_desc *txd;
   1023 
   1024 	KASSERT(mutex_owned(&txr->tx_mtx));
   1025 
   1026 #ifdef DEV_NETMAP
   1027 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1028 		struct netmap_adapter *na = NA(ifp);
   1029 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1030 		txd = txr->tx_base;
   1031 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1032 		    BUS_DMASYNC_POSTREAD);
   1033 		/*
   1034 		 * In netmap mode, all the work is done in the context
   1035 		 * of the client thread. Interrupt handlers only wake up
   1036 		 * clients, which may be sleeping on individual rings
   1037 		 * or on a global resource for all rings.
   1038 		 * To implement tx interrupt mitigation, we wake up the client
   1039 		 * thread roughly every half ring, even if the NIC interrupts
   1040 		 * more frequently. This is implemented as follows:
   1041 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1042 		 *   the slot that should wake up the thread (nkr_num_slots
   1043 		 *   means the user thread should not be woken up);
   1044 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1045 		 *   or the slot has the DD bit set.
   1046 		 */
   1047 		if (!netmap_mitigate ||
   1048 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1049 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1050 			netmap_tx_irq(ifp, txr->me);
   1051 		}
   1052 		return;
   1053 	}
   1054 #endif /* DEV_NETMAP */
   1055 
   1056 	if (txr->tx_avail == txr->num_desc) {
   1057 		txr->busy = 0;
   1058 		return;
   1059 	}
   1060 
   1061 	/* Get work starting point */
   1062 	work = txr->next_to_clean;
   1063 	buf = &txr->tx_buffers[work];
   1064 	txd = &txr->tx_base[work];
   1065 	work -= txr->num_desc; /* The distance to ring end */
   1066         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1067 	    BUS_DMASYNC_POSTREAD);
   1068 
   1069 	do {
   1070 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1071 		if (eop == NULL) /* No work */
   1072 			break;
   1073 
   1074 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1075 			break;	/* I/O not complete */
   1076 
   1077 		if (buf->m_head) {
   1078 			txr->bytes +=
   1079 			    buf->m_head->m_pkthdr.len;
   1080 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1081 			    buf->map,
   1082 			    0, buf->m_head->m_pkthdr.len,
   1083 			    BUS_DMASYNC_POSTWRITE);
   1084 			ixgbe_dmamap_unload(txr->txtag,
   1085 			    buf->map);
   1086 			m_freem(buf->m_head);
   1087 			buf->m_head = NULL;
   1088 		}
   1089 		buf->eop = NULL;
   1090 		++txr->tx_avail;
   1091 
   1092 		/* We clean the range if multi segment */
   1093 		while (txd != eop) {
   1094 			++txd;
   1095 			++buf;
   1096 			++work;
   1097 			/* wrap the ring? */
   1098 			if (__predict_false(!work)) {
   1099 				work -= txr->num_desc;
   1100 				buf = txr->tx_buffers;
   1101 				txd = txr->tx_base;
   1102 			}
   1103 			if (buf->m_head) {
   1104 				txr->bytes +=
   1105 				    buf->m_head->m_pkthdr.len;
   1106 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1107 				    buf->map,
   1108 				    0, buf->m_head->m_pkthdr.len,
   1109 				    BUS_DMASYNC_POSTWRITE);
   1110 				ixgbe_dmamap_unload(txr->txtag,
   1111 				    buf->map);
   1112 				m_freem(buf->m_head);
   1113 				buf->m_head = NULL;
   1114 			}
   1115 			++txr->tx_avail;
   1116 			buf->eop = NULL;
   1117 
   1118 		}
   1119 		++txr->packets;
   1120 		++processed;
   1121 		++ifp->if_opackets;
   1122 
   1123 		/* Try the next packet */
   1124 		++txd;
   1125 		++buf;
   1126 		++work;
   1127 		/* reset with a wrap */
   1128 		if (__predict_false(!work)) {
   1129 			work -= txr->num_desc;
   1130 			buf = txr->tx_buffers;
   1131 			txd = txr->tx_base;
   1132 		}
   1133 		prefetch(txd);
   1134 	} while (__predict_true(--limit));
   1135 
   1136 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1137 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1138 
   1139 	work += txr->num_desc;
   1140 	txr->next_to_clean = work;
   1141 
   1142 	/*
   1143 	** Queue Hang detection, we know there's
   1144 	** work outstanding or the first return
   1145 	** would have been taken, so increment busy
   1146 	** if nothing managed to get cleaned, then
   1147 	** in local_timer it will be checked and
   1148 	** marked as HUNG if it exceeds a MAX attempt.
   1149 	*/
   1150 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1151 		++txr->busy;
   1152 	/*
   1153 	** If anything gets cleaned we reset state to 1,
   1154 	** note this will turn off HUNG if its set.
   1155 	*/
   1156 	if (processed)
   1157 		txr->busy = 1;
   1158 
   1159 	if (txr->tx_avail == txr->num_desc)
   1160 		txr->busy = 0;
   1161 
   1162 	return;
   1163 }
   1164 
   1165 
   1166 #ifdef IXGBE_FDIR
   1167 /*
   1168 ** This routine parses packet headers so that Flow
   1169 ** Director can make a hashed filter table entry
   1170 ** allowing traffic flows to be identified and kept
   1171 ** on the same cpu.  This would be a performance
   1172 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1173 ** packets.
   1174 */
   1175 static void
   1176 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1177 {
   1178 	struct adapter			*adapter = txr->adapter;
   1179 	struct ix_queue			*que;
   1180 	struct ip			*ip;
   1181 	struct tcphdr			*th;
   1182 	struct udphdr			*uh;
   1183 	struct ether_vlan_header	*eh;
   1184 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1185 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1186 	int  				ehdrlen, ip_hlen;
   1187 	u16				etype;
   1188 
   1189 	eh = mtod(mp, struct ether_vlan_header *);
   1190 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1191 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1192 		etype = eh->evl_proto;
   1193 	} else {
   1194 		ehdrlen = ETHER_HDR_LEN;
   1195 		etype = eh->evl_encap_proto;
   1196 	}
   1197 
   1198 	/* Only handling IPv4 */
   1199 	if (etype != htons(ETHERTYPE_IP))
   1200 		return;
   1201 
   1202 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1203 	ip_hlen = ip->ip_hl << 2;
   1204 
   1205 	/* check if we're UDP or TCP */
   1206 	switch (ip->ip_p) {
   1207 	case IPPROTO_TCP:
   1208 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1209 		/* src and dst are inverted */
   1210 		common.port.dst ^= th->th_sport;
   1211 		common.port.src ^= th->th_dport;
   1212 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1213 		break;
   1214 	case IPPROTO_UDP:
   1215 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1216 		/* src and dst are inverted */
   1217 		common.port.dst ^= uh->uh_sport;
   1218 		common.port.src ^= uh->uh_dport;
   1219 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1220 		break;
   1221 	default:
   1222 		return;
   1223 	}
   1224 
   1225 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1226 	if (mp->m_pkthdr.ether_vtag)
   1227 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1228 	else
   1229 		common.flex_bytes ^= etype;
   1230 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1231 
   1232 	que = &adapter->queues[txr->me];
   1233 	/*
   1234 	** This assumes the Rx queue and Tx
   1235 	** queue are bound to the same CPU
   1236 	*/
   1237 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1238 	    input, common, que->msix);
   1239 }
   1240 #endif /* IXGBE_FDIR */
   1241 
   1242 /*
   1243 ** Used to detect a descriptor that has
   1244 ** been merged by Hardware RSC.
   1245 */
   1246 static inline u32
   1247 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1248 {
   1249 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1250 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1251 }
   1252 
   1253 /*********************************************************************
   1254  *
   1255  *  Initialize Hardware RSC (LRO) feature on 82599
   1256  *  for an RX ring, this is toggled by the LRO capability
   1257  *  even though it is transparent to the stack.
   1258  *
   1259  *  NOTE: since this HW feature only works with IPV4 and
   1260  *        our testing has shown soft LRO to be as effective
   1261  *        I have decided to disable this by default.
   1262  *
   1263  **********************************************************************/
   1264 static void
   1265 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1266 {
   1267 	struct	adapter 	*adapter = rxr->adapter;
   1268 	struct	ixgbe_hw	*hw = &adapter->hw;
   1269 	u32			rscctrl, rdrxctl;
   1270 
   1271 	/* If turning LRO/RSC off we need to disable it */
   1272 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1273 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1274 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1275 		return;
   1276 	}
   1277 
   1278 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1279 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1280 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1281 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1282 #endif /* DEV_NETMAP */
   1283 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1284 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1285 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1286 
   1287 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1288 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1289 	/*
   1290 	** Limit the total number of descriptors that
   1291 	** can be combined, so it does not exceed 64K
   1292 	*/
   1293 	if (rxr->mbuf_sz == MCLBYTES)
   1294 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1295 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1296 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1297 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1298 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1299 	else  /* Using 16K cluster */
   1300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1301 
   1302 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1303 
   1304 	/* Enable TCP header recognition */
   1305 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1306 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1307 	    IXGBE_PSRTYPE_TCPHDR));
   1308 
   1309 	/* Disable RSC for ACK packets */
   1310 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1311 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1312 
   1313 	rxr->hw_rsc = TRUE;
   1314 }
   1315 
   1316 /*********************************************************************
   1317  *
   1318  *  Refresh mbuf buffers for RX descriptor rings
   1319  *   - now keeps its own state so discards due to resource
   1320  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1321  *     it just returns, keeping its placeholder, thus it can simply
   1322  *     be recalled to try again.
   1323  *
   1324  **********************************************************************/
   1325 static void
   1326 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1327 {
   1328 	struct adapter		*adapter = rxr->adapter;
   1329 	struct ixgbe_rx_buf	*rxbuf;
   1330 	struct mbuf		*mp;
   1331 	int			i, j, error;
   1332 	bool			refreshed = false;
   1333 
   1334 	i = j = rxr->next_to_refresh;
   1335 	/* Control the loop with one beyond */
   1336 	if (++j == rxr->num_desc)
   1337 		j = 0;
   1338 
   1339 	while (j != limit) {
   1340 		rxbuf = &rxr->rx_buffers[i];
   1341 		if (rxbuf->buf == NULL) {
   1342 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1343 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1344 			if (mp == NULL) {
   1345 				rxr->no_jmbuf.ev_count++;
   1346 				goto update;
   1347 			}
   1348 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1349 				m_adj(mp, ETHER_ALIGN);
   1350 		} else
   1351 			mp = rxbuf->buf;
   1352 
   1353 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1354 
   1355 		/* If we're dealing with an mbuf that was copied rather
   1356 		 * than replaced, there's no need to go through busdma.
   1357 		 */
   1358 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1359 			/* Get the memory mapping */
   1360 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1361 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1362 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1363 			if (error != 0) {
   1364 				printf("Refresh mbufs: payload dmamap load"
   1365 				    " failure - %d\n", error);
   1366 				m_free(mp);
   1367 				rxbuf->buf = NULL;
   1368 				goto update;
   1369 			}
   1370 			rxbuf->buf = mp;
   1371 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1372 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1373 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1374 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1375 		} else {
   1376 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1377 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1378 		}
   1379 
   1380 		refreshed = true;
   1381 		/* Next is precalculated */
   1382 		i = j;
   1383 		rxr->next_to_refresh = i;
   1384 		if (++j == rxr->num_desc)
   1385 			j = 0;
   1386 	}
   1387 update:
   1388 	if (refreshed) /* Update hardware tail index */
   1389 		IXGBE_WRITE_REG(&adapter->hw,
   1390 		    rxr->tail, rxr->next_to_refresh);
   1391 	return;
   1392 }
   1393 
   1394 /*********************************************************************
   1395  *
   1396  *  Allocate memory for rx_buffer structures. Since we use one
   1397  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1398  *  that we'll need is equal to the number of receive descriptors
   1399  *  that we've allocated.
   1400  *
   1401  **********************************************************************/
   1402 int
   1403 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1404 {
   1405 	struct	adapter 	*adapter = rxr->adapter;
   1406 	device_t 		dev = adapter->dev;
   1407 	struct ixgbe_rx_buf 	*rxbuf;
   1408 	int             	bsize, error;
   1409 
   1410 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1411 	if (!(rxr->rx_buffers =
   1412 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1413 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1414 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1415 		error = ENOMEM;
   1416 		goto fail;
   1417 	}
   1418 
   1419 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1420 				   1, 0,	/* alignment, bounds */
   1421 				   MJUM16BYTES,		/* maxsize */
   1422 				   1,			/* nsegments */
   1423 				   MJUM16BYTES,		/* maxsegsize */
   1424 				   0,			/* flags */
   1425 				   &rxr->ptag))) {
   1426 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1427 		goto fail;
   1428 	}
   1429 
   1430 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1431 		rxbuf = &rxr->rx_buffers[i];
   1432 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1433 		if (error) {
   1434 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1435 			goto fail;
   1436 		}
   1437 	}
   1438 
   1439 	return (0);
   1440 
   1441 fail:
   1442 	/* Frees all, but can handle partial completion */
   1443 	ixgbe_free_receive_structures(adapter);
   1444 	return (error);
   1445 }
   1446 
   1447 static void
   1448 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1449 {
   1450 	struct ixgbe_rx_buf       *rxbuf;
   1451 
   1452 	for (int i = 0; i < rxr->num_desc; i++) {
   1453 		rxbuf = &rxr->rx_buffers[i];
   1454 		if (rxbuf->buf != NULL) {
   1455 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1456 			    0, rxbuf->buf->m_pkthdr.len,
   1457 			    BUS_DMASYNC_POSTREAD);
   1458 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1459 			rxbuf->buf->m_flags |= M_PKTHDR;
   1460 			m_freem(rxbuf->buf);
   1461 			rxbuf->buf = NULL;
   1462 			rxbuf->flags = 0;
   1463 		}
   1464 	}
   1465 }
   1466 
   1467 /*********************************************************************
   1468  *
   1469  *  Initialize a receive ring and its buffers.
   1470  *
   1471  **********************************************************************/
   1472 static int
   1473 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1474 {
   1475 	struct	adapter 	*adapter;
   1476 	struct ixgbe_rx_buf	*rxbuf;
   1477 #ifdef LRO
   1478 	struct ifnet		*ifp;
   1479 	struct lro_ctrl		*lro = &rxr->lro;
   1480 #endif /* LRO */
   1481 	int			rsize, error = 0;
   1482 #ifdef DEV_NETMAP
   1483 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1484 	struct netmap_slot *slot;
   1485 #endif /* DEV_NETMAP */
   1486 
   1487 	adapter = rxr->adapter;
   1488 #ifdef LRO
   1489 	ifp = adapter->ifp;
   1490 #endif /* LRO */
   1491 
   1492 	/* Clear the ring contents */
   1493 	IXGBE_RX_LOCK(rxr);
   1494 #ifdef DEV_NETMAP
   1495 	/* same as in ixgbe_setup_transmit_ring() */
   1496 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1497 #endif /* DEV_NETMAP */
   1498 	rsize = roundup2(adapter->num_rx_desc *
   1499 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1500 	bzero((void *)rxr->rx_base, rsize);
   1501 	/* Cache the size */
   1502 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1503 
   1504 	/* Free current RX buffer structs and their mbufs */
   1505 	ixgbe_free_receive_ring(rxr);
   1506 
   1507 	IXGBE_RX_UNLOCK(rxr);
   1508 
   1509 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1510 	 * or size of jumbo mbufs may have changed.
   1511 	 */
   1512 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1513 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1514 
   1515 	IXGBE_RX_LOCK(rxr);
   1516 
   1517 	/* Now replenish the mbufs */
   1518 	for (int j = 0; j != rxr->num_desc; ++j) {
   1519 		struct mbuf	*mp;
   1520 
   1521 		rxbuf = &rxr->rx_buffers[j];
   1522 #ifdef DEV_NETMAP
   1523 		/*
   1524 		 * In netmap mode, fill the map and set the buffer
   1525 		 * address in the NIC ring, considering the offset
   1526 		 * between the netmap and NIC rings (see comment in
   1527 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1528 		 * an mbuf, so end the block with a continue;
   1529 		 */
   1530 		if (slot) {
   1531 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1532 			uint64_t paddr;
   1533 			void *addr;
   1534 
   1535 			addr = PNMB(na, slot + sj, &paddr);
   1536 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1537 			/* Update descriptor and the cached value */
   1538 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1539 			rxbuf->addr = htole64(paddr);
   1540 			continue;
   1541 		}
   1542 #endif /* DEV_NETMAP */
   1543 		rxbuf->flags = 0;
   1544 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1545 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1546 		if (rxbuf->buf == NULL) {
   1547 			error = ENOBUFS;
   1548                         goto fail;
   1549 		}
   1550 		mp = rxbuf->buf;
   1551 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1552 		/* Get the memory mapping */
   1553 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1554 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1555 		if (error != 0)
   1556                         goto fail;
   1557 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1558 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1559 		/* Update the descriptor and the cached value */
   1560 		rxr->rx_base[j].read.pkt_addr =
   1561 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1562 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1563 	}
   1564 
   1565 
   1566 	/* Setup our descriptor indices */
   1567 	rxr->next_to_check = 0;
   1568 	rxr->next_to_refresh = 0;
   1569 	rxr->lro_enabled = FALSE;
   1570 	rxr->rx_copies.ev_count = 0;
   1571 #if 0 /* NetBSD */
   1572 	rxr->rx_bytes.ev_count = 0;
   1573 #if 1	/* Fix inconsistency */
   1574 	rxr->rx_packets.ev_count = 0;
   1575 #endif
   1576 #endif
   1577 	rxr->vtag_strip = FALSE;
   1578 
   1579 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1580 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1581 
   1582 	/*
   1583 	** Now set up the LRO interface:
   1584 	*/
   1585 	if (ixgbe_rsc_enable)
   1586 		ixgbe_setup_hw_rsc(rxr);
   1587 #ifdef LRO
   1588 	else if (ifp->if_capenable & IFCAP_LRO) {
   1589 		device_t dev = adapter->dev;
   1590 		int err = tcp_lro_init(lro);
   1591 		if (err) {
   1592 			device_printf(dev, "LRO Initialization failed!\n");
   1593 			goto fail;
   1594 		}
   1595 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1596 		rxr->lro_enabled = TRUE;
   1597 		lro->ifp = adapter->ifp;
   1598 	}
   1599 #endif /* LRO */
   1600 
   1601 	IXGBE_RX_UNLOCK(rxr);
   1602 	return (0);
   1603 
   1604 fail:
   1605 	ixgbe_free_receive_ring(rxr);
   1606 	IXGBE_RX_UNLOCK(rxr);
   1607 	return (error);
   1608 }
   1609 
   1610 /*********************************************************************
   1611  *
   1612  *  Initialize all receive rings.
   1613  *
   1614  **********************************************************************/
   1615 int
   1616 ixgbe_setup_receive_structures(struct adapter *adapter)
   1617 {
   1618 	struct rx_ring *rxr = adapter->rx_rings;
   1619 	int j;
   1620 
   1621 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1622 		if (ixgbe_setup_receive_ring(rxr))
   1623 			goto fail;
   1624 
   1625 	return (0);
   1626 fail:
   1627 	/*
   1628 	 * Free RX buffers allocated so far, we will only handle
   1629 	 * the rings that completed, the failing case will have
   1630 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1631 	 */
   1632 	for (int i = 0; i < j; ++i) {
   1633 		rxr = &adapter->rx_rings[i];
   1634 		ixgbe_free_receive_ring(rxr);
   1635 	}
   1636 
   1637 	return (ENOBUFS);
   1638 }
   1639 
   1640 
   1641 /*********************************************************************
   1642  *
   1643  *  Free all receive rings.
   1644  *
   1645  **********************************************************************/
   1646 void
   1647 ixgbe_free_receive_structures(struct adapter *adapter)
   1648 {
   1649 	struct rx_ring *rxr = adapter->rx_rings;
   1650 
   1651 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1652 
   1653 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1654 #ifdef LRO
   1655 		struct lro_ctrl		*lro = &rxr->lro;
   1656 #endif /* LRO */
   1657 		ixgbe_free_receive_buffers(rxr);
   1658 #ifdef LRO
   1659 		/* Free LRO memory */
   1660 		tcp_lro_free(lro);
   1661 #endif /* LRO */
   1662 		/* Free the ring memory as well */
   1663 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1664 		IXGBE_RX_LOCK_DESTROY(rxr);
   1665 	}
   1666 
   1667 	free(adapter->rx_rings, M_DEVBUF);
   1668 }
   1669 
   1670 
   1671 /*********************************************************************
   1672  *
   1673  *  Free receive ring data structures
   1674  *
   1675  **********************************************************************/
   1676 static void
   1677 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1678 {
   1679 	struct adapter		*adapter = rxr->adapter;
   1680 	struct ixgbe_rx_buf	*rxbuf;
   1681 
   1682 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1683 
   1684 	/* Cleanup any existing buffers */
   1685 	if (rxr->rx_buffers != NULL) {
   1686 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1687 			rxbuf = &rxr->rx_buffers[i];
   1688 			if (rxbuf->buf != NULL) {
   1689 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1690 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1691 				    BUS_DMASYNC_POSTREAD);
   1692 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1693 				rxbuf->buf->m_flags |= M_PKTHDR;
   1694 				m_freem(rxbuf->buf);
   1695 			}
   1696 			rxbuf->buf = NULL;
   1697 			if (rxbuf->pmap != NULL) {
   1698 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1699 				rxbuf->pmap = NULL;
   1700 			}
   1701 		}
   1702 		if (rxr->rx_buffers != NULL) {
   1703 			free(rxr->rx_buffers, M_DEVBUF);
   1704 			rxr->rx_buffers = NULL;
   1705 		}
   1706 	}
   1707 
   1708 	if (rxr->ptag != NULL) {
   1709 		ixgbe_dma_tag_destroy(rxr->ptag);
   1710 		rxr->ptag = NULL;
   1711 	}
   1712 
   1713 	return;
   1714 }
   1715 
   1716 static __inline void
   1717 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1718 {
   1719 	struct adapter	*adapter = ifp->if_softc;
   1720 
   1721 #ifdef LRO
   1722 	struct ethercom *ec = &adapter->osdep.ec;
   1723 
   1724         /*
   1725          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1726          * should be computed by hardware. Also it should not have VLAN tag in
   1727          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1728          */
   1729         if (rxr->lro_enabled &&
   1730             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1731             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1732             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1733             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1734             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1735             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1736             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1737             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1738                 /*
   1739                  * Send to the stack if:
   1740                  **  - LRO not enabled, or
   1741                  **  - no LRO resources, or
   1742                  **  - lro enqueue fails
   1743                  */
   1744                 if (rxr->lro.lro_cnt != 0)
   1745                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1746                                 return;
   1747         }
   1748 #endif /* LRO */
   1749 
   1750 	IXGBE_RX_UNLOCK(rxr);
   1751 
   1752 	if_percpuq_enqueue(adapter->ipq, m);
   1753 
   1754 	IXGBE_RX_LOCK(rxr);
   1755 }
   1756 
   1757 static __inline void
   1758 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1759 {
   1760 	struct ixgbe_rx_buf	*rbuf;
   1761 
   1762 	rbuf = &rxr->rx_buffers[i];
   1763 
   1764 
   1765 	/*
   1766 	** With advanced descriptors the writeback
   1767 	** clobbers the buffer addrs, so its easier
   1768 	** to just free the existing mbufs and take
   1769 	** the normal refresh path to get new buffers
   1770 	** and mapping.
   1771 	*/
   1772 
   1773 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1774 		rbuf->fmp->m_flags |= M_PKTHDR;
   1775 		m_freem(rbuf->fmp);
   1776 		rbuf->fmp = NULL;
   1777 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1778 	} else if (rbuf->buf) {
   1779 		m_free(rbuf->buf);
   1780 		rbuf->buf = NULL;
   1781 	}
   1782 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1783 
   1784 	rbuf->flags = 0;
   1785 
   1786 	return;
   1787 }
   1788 
   1789 
   1790 /*********************************************************************
   1791  *
   1792  *  This routine executes in interrupt context. It replenishes
   1793  *  the mbufs in the descriptor and sends data which has been
   1794  *  dma'ed into host memory to upper layer.
   1795  *
   1796  *  Return TRUE for more work, FALSE for all clean.
   1797  *********************************************************************/
   1798 bool
   1799 ixgbe_rxeof(struct ix_queue *que)
   1800 {
   1801 	struct adapter		*adapter = que->adapter;
   1802 	struct rx_ring		*rxr = que->rxr;
   1803 	struct ifnet		*ifp = adapter->ifp;
   1804 #ifdef LRO
   1805 	struct lro_ctrl		*lro = &rxr->lro;
   1806 #endif /* LRO */
   1807 	int			i, nextp, processed = 0;
   1808 	u32			staterr = 0;
   1809 	u32			count = adapter->rx_process_limit;
   1810 	union ixgbe_adv_rx_desc	*cur;
   1811 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1812 #ifdef RSS
   1813 	u16			pkt_info;
   1814 #endif
   1815 
   1816 	IXGBE_RX_LOCK(rxr);
   1817 
   1818 #ifdef DEV_NETMAP
   1819 	/* Same as the txeof routine: wakeup clients on intr. */
   1820 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1821 		IXGBE_RX_UNLOCK(rxr);
   1822 		return (FALSE);
   1823 	}
   1824 #endif /* DEV_NETMAP */
   1825 
   1826 	for (i = rxr->next_to_check; count != 0;) {
   1827 		struct mbuf	*sendmp, *mp;
   1828 		u32		rsc, ptype;
   1829 		u16		len;
   1830 		u16		vtag = 0;
   1831 		bool		eop;
   1832 
   1833 		/* Sync the ring. */
   1834 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1835 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1836 
   1837 		cur = &rxr->rx_base[i];
   1838 		staterr = le32toh(cur->wb.upper.status_error);
   1839 #ifdef RSS
   1840 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1841 #endif
   1842 
   1843 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1844 			break;
   1845 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1846 			break;
   1847 
   1848 		count--;
   1849 		sendmp = NULL;
   1850 		nbuf = NULL;
   1851 		rsc = 0;
   1852 		cur->wb.upper.status_error = 0;
   1853 		rbuf = &rxr->rx_buffers[i];
   1854 		mp = rbuf->buf;
   1855 
   1856 		len = le16toh(cur->wb.upper.length);
   1857 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1858 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1859 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1860 
   1861 		/* Make sure bad packets are discarded */
   1862 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1863 #if __FreeBSD_version >= 1100036
   1864 			if (IXGBE_IS_VF(adapter))
   1865 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1866 #endif
   1867 			rxr->rx_discarded.ev_count++;
   1868 			ixgbe_rx_discard(rxr, i);
   1869 			goto next_desc;
   1870 		}
   1871 
   1872 		/*
   1873 		** On 82599 which supports a hardware
   1874 		** LRO (called HW RSC), packets need
   1875 		** not be fragmented across sequential
   1876 		** descriptors, rather the next descriptor
   1877 		** is indicated in bits of the descriptor.
   1878 		** This also means that we might proceses
   1879 		** more than one packet at a time, something
   1880 		** that has never been true before, it
   1881 		** required eliminating global chain pointers
   1882 		** in favor of what we are doing here.  -jfv
   1883 		*/
   1884 		if (!eop) {
   1885 			/*
   1886 			** Figure out the next descriptor
   1887 			** of this frame.
   1888 			*/
   1889 			if (rxr->hw_rsc == TRUE) {
   1890 				rsc = ixgbe_rsc_count(cur);
   1891 				rxr->rsc_num += (rsc - 1);
   1892 			}
   1893 			if (rsc) { /* Get hardware index */
   1894 				nextp = ((staterr &
   1895 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1896 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1897 			} else { /* Just sequential */
   1898 				nextp = i + 1;
   1899 				if (nextp == adapter->num_rx_desc)
   1900 					nextp = 0;
   1901 			}
   1902 			nbuf = &rxr->rx_buffers[nextp];
   1903 			prefetch(nbuf);
   1904 		}
   1905 		/*
   1906 		** Rather than using the fmp/lmp global pointers
   1907 		** we now keep the head of a packet chain in the
   1908 		** buffer struct and pass this along from one
   1909 		** descriptor to the next, until we get EOP.
   1910 		*/
   1911 		mp->m_len = len;
   1912 		/*
   1913 		** See if there is a stored head
   1914 		** that determines what we are
   1915 		*/
   1916 		sendmp = rbuf->fmp;
   1917 		if (sendmp != NULL) {  /* secondary frag */
   1918 			rbuf->buf = rbuf->fmp = NULL;
   1919 			mp->m_flags &= ~M_PKTHDR;
   1920 			sendmp->m_pkthdr.len += mp->m_len;
   1921 		} else {
   1922 			/*
   1923 			 * Optimize.  This might be a small packet,
   1924 			 * maybe just a TCP ACK.  Do a fast copy that
   1925 			 * is cache aligned into a new mbuf, and
   1926 			 * leave the old mbuf+cluster for re-use.
   1927 			 */
   1928 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1929 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1930 				if (sendmp != NULL) {
   1931 					sendmp->m_data +=
   1932 					    IXGBE_RX_COPY_ALIGN;
   1933 					ixgbe_bcopy(mp->m_data,
   1934 					    sendmp->m_data, len);
   1935 					sendmp->m_len = len;
   1936 					rxr->rx_copies.ev_count++;
   1937 					rbuf->flags |= IXGBE_RX_COPY;
   1938 				}
   1939 			}
   1940 			if (sendmp == NULL) {
   1941 				rbuf->buf = rbuf->fmp = NULL;
   1942 				sendmp = mp;
   1943 			}
   1944 
   1945 			/* first desc of a non-ps chain */
   1946 			sendmp->m_flags |= M_PKTHDR;
   1947 			sendmp->m_pkthdr.len = mp->m_len;
   1948 		}
   1949 		++processed;
   1950 
   1951 		/* Pass the head pointer on */
   1952 		if (eop == 0) {
   1953 			nbuf->fmp = sendmp;
   1954 			sendmp = NULL;
   1955 			mp->m_next = nbuf->buf;
   1956 		} else { /* Sending this frame */
   1957 			m_set_rcvif(sendmp, ifp);
   1958 			ifp->if_ipackets++;
   1959 			rxr->rx_packets.ev_count++;
   1960 			/* capture data for AIM */
   1961 			rxr->bytes += sendmp->m_pkthdr.len;
   1962 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1963 			/* Process vlan info */
   1964 			if ((rxr->vtag_strip) &&
   1965 			    (staterr & IXGBE_RXD_STAT_VP))
   1966 				vtag = le16toh(cur->wb.upper.vlan);
   1967 			if (vtag) {
   1968 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1969 				    printf("%s: could not apply VLAN "
   1970 					"tag", __func__));
   1971 			}
   1972 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1973 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1974 				   &adapter->stats.pf);
   1975 			}
   1976 
   1977 #if 0 /* FreeBSD */
   1978                         /*
   1979                          * In case of multiqueue, we have RXCSUM.PCSD bit set
   1980                          * and never cleared. This means we have RSS hash
   1981                          * available to be used.
   1982                          */
   1983                         if (adapter->num_queues > 1) {
   1984                                 sendmp->m_pkthdr.flowid =
   1985                                     le32toh(cur->wb.lower.hi_dword.rss);
   1986                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1987                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
   1988                                         M_HASHTYPE_SET(sendmp,
   1989                                             M_HASHTYPE_RSS_IPV4);
   1990                                         break;
   1991                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1992                                         M_HASHTYPE_SET(sendmp,
   1993                                             M_HASHTYPE_RSS_TCP_IPV4);
   1994                                         break;
   1995                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
   1996                                         M_HASHTYPE_SET(sendmp,
   1997                                             M_HASHTYPE_RSS_IPV6);
   1998                                         break;
   1999                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2000                                         M_HASHTYPE_SET(sendmp,
   2001                                             M_HASHTYPE_RSS_TCP_IPV6);
   2002                                         break;
   2003                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2004                                         M_HASHTYPE_SET(sendmp,
   2005                                             M_HASHTYPE_RSS_IPV6_EX);
   2006                                         break;
   2007                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2008                                         M_HASHTYPE_SET(sendmp,
   2009                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
   2010                                         break;
   2011 #if __FreeBSD_version > 1100000
   2012                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2013                                         M_HASHTYPE_SET(sendmp,
   2014                                             M_HASHTYPE_RSS_UDP_IPV4);
   2015                                         break;
   2016                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2017                                         M_HASHTYPE_SET(sendmp,
   2018                                             M_HASHTYPE_RSS_UDP_IPV6);
   2019                                         break;
   2020                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2021                                         M_HASHTYPE_SET(sendmp,
   2022                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
   2023                                         break;
   2024 #endif
   2025                                     default:
   2026                                         M_HASHTYPE_SET(sendmp,
   2027                                             M_HASHTYPE_OPAQUE_HASH);
   2028                                 }
   2029                         } else {
   2030                                 sendmp->m_pkthdr.flowid = que->msix;
   2031 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2032 			}
   2033 #endif
   2034 		}
   2035 next_desc:
   2036 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2037 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2038 
   2039 		/* Advance our pointers to the next descriptor. */
   2040 		if (++i == rxr->num_desc)
   2041 			i = 0;
   2042 
   2043 		/* Now send to the stack or do LRO */
   2044 		if (sendmp != NULL) {
   2045 			rxr->next_to_check = i;
   2046 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2047 			i = rxr->next_to_check;
   2048 		}
   2049 
   2050                /* Every 8 descriptors we go to refresh mbufs */
   2051 		if (processed == 8) {
   2052 			ixgbe_refresh_mbufs(rxr, i);
   2053 			processed = 0;
   2054 		}
   2055 	}
   2056 
   2057 	/* Refresh any remaining buf structs */
   2058 	if (ixgbe_rx_unrefreshed(rxr))
   2059 		ixgbe_refresh_mbufs(rxr, i);
   2060 
   2061 	rxr->next_to_check = i;
   2062 
   2063 #ifdef LRO
   2064 	/*
   2065 	 * Flush any outstanding LRO work
   2066 	 */
   2067 	tcp_lro_flush_all(lro);
   2068 #endif /* LRO */
   2069 
   2070 	IXGBE_RX_UNLOCK(rxr);
   2071 
   2072 	/*
   2073 	** Still have cleaning to do?
   2074 	*/
   2075 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2076 		return true;
   2077 	else
   2078 		return false;
   2079 }
   2080 
   2081 
   2082 /*********************************************************************
   2083  *
   2084  *  Verify that the hardware indicated that the checksum is valid.
   2085  *  Inform the stack about the status of checksum so that stack
   2086  *  doesn't spend time verifying the checksum.
   2087  *
   2088  *********************************************************************/
   2089 static void
   2090 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2091     struct ixgbe_hw_stats *stats)
   2092 {
   2093 	u16	status = (u16) staterr;
   2094 	u8	errors = (u8) (staterr >> 24);
   2095 #if 0
   2096 	bool	sctp = false;
   2097 
   2098 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2099 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2100 		sctp = true;
   2101 #endif
   2102 
   2103 	/* IPv4 checksum */
   2104 	if (status & IXGBE_RXD_STAT_IPCS) {
   2105 		stats->ipcs.ev_count++;
   2106 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2107 			/* IP Checksum Good */
   2108 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2109 		} else {
   2110 			stats->ipcs_bad.ev_count++;
   2111 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2112 		}
   2113 	}
   2114 	/* TCP/UDP/SCTP checksum */
   2115 	if (status & IXGBE_RXD_STAT_L4CS) {
   2116 		stats->l4cs.ev_count++;
   2117 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2118 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2119 			mp->m_pkthdr.csum_flags |= type;
   2120 		} else {
   2121 			stats->l4cs_bad.ev_count++;
   2122 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2123 		}
   2124 	}
   2125 }
   2126 
   2127 
   2128 /********************************************************************
   2129  * Manage DMA'able memory.
   2130  *******************************************************************/
   2131 
   2132 int
   2133 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2134 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2135 {
   2136 	device_t dev = adapter->dev;
   2137 	int             r, rsegs;
   2138 
   2139 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2140 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2141 			       size,	/* maxsize */
   2142 			       1,	/* nsegments */
   2143 			       size,	/* maxsegsize */
   2144 			       BUS_DMA_ALLOCNOW,	/* flags */
   2145 			       &dma->dma_tag);
   2146 	if (r != 0) {
   2147 		aprint_error_dev(dev,
   2148 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2149 		goto fail_0;
   2150 	}
   2151 
   2152 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2153 		size,
   2154 		dma->dma_tag->dt_alignment,
   2155 		dma->dma_tag->dt_boundary,
   2156 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2157 	if (r != 0) {
   2158 		aprint_error_dev(dev,
   2159 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2160 		goto fail_1;
   2161 	}
   2162 
   2163 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2164 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2165 	if (r != 0) {
   2166 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2167 		    __func__, r);
   2168 		goto fail_2;
   2169 	}
   2170 
   2171 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2172 	if (r != 0) {
   2173 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2174 		    __func__, r);
   2175 		goto fail_3;
   2176 	}
   2177 
   2178 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2179 			    size,
   2180 			    NULL,
   2181 			    mapflags | BUS_DMA_NOWAIT);
   2182 	if (r != 0) {
   2183 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2184 		    __func__, r);
   2185 		goto fail_4;
   2186 	}
   2187 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2188 	dma->dma_size = size;
   2189 	return 0;
   2190 fail_4:
   2191 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2192 fail_3:
   2193 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2194 fail_2:
   2195 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2196 fail_1:
   2197 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2198 fail_0:
   2199 	return r;
   2200 }
   2201 
   2202 void
   2203 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2204 {
   2205 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2206 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2207 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2208 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2209 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2210 }
   2211 
   2212 
   2213 /*********************************************************************
   2214  *
   2215  *  Allocate memory for the transmit and receive rings, and then
   2216  *  the descriptors associated with each, called only once at attach.
   2217  *
   2218  **********************************************************************/
   2219 int
   2220 ixgbe_allocate_queues(struct adapter *adapter)
   2221 {
   2222 	device_t	dev = adapter->dev;
   2223 	struct ix_queue	*que;
   2224 	struct tx_ring	*txr;
   2225 	struct rx_ring	*rxr;
   2226 	int rsize, tsize, error = IXGBE_SUCCESS;
   2227 	int txconf = 0, rxconf = 0;
   2228 #ifdef PCI_IOV
   2229 	enum ixgbe_iov_mode iov_mode;
   2230 #endif
   2231 
   2232         /* First allocate the top level queue structs */
   2233         if (!(adapter->queues =
   2234             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2235             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2236                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2237                 error = ENOMEM;
   2238                 goto fail;
   2239         }
   2240 
   2241 	/* First allocate the TX ring struct memory */
   2242 	if (!(adapter->tx_rings =
   2243 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2244 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2245 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2246 		error = ENOMEM;
   2247 		goto tx_fail;
   2248 	}
   2249 
   2250 	/* Next allocate the RX */
   2251 	if (!(adapter->rx_rings =
   2252 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2253 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2254 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2255 		error = ENOMEM;
   2256 		goto rx_fail;
   2257 	}
   2258 
   2259 	/* For the ring itself */
   2260 	tsize = roundup2(adapter->num_tx_desc *
   2261 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2262 
   2263 #ifdef PCI_IOV
   2264 	iov_mode = ixgbe_get_iov_mode(adapter);
   2265 	adapter->pool = ixgbe_max_vfs(iov_mode);
   2266 #else
   2267 	adapter->pool = 0;
   2268 #endif
   2269 	/*
   2270 	 * Now set up the TX queues, txconf is needed to handle the
   2271 	 * possibility that things fail midcourse and we need to
   2272 	 * undo memory gracefully
   2273 	 */
   2274 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2275 		/* Set up some basics */
   2276 		txr = &adapter->tx_rings[i];
   2277 		txr->adapter = adapter;
   2278 #ifdef PCI_IOV
   2279 		txr->me = ixgbe_pf_que_index(iov_mode, i);
   2280 #else
   2281 		txr->me = i;
   2282 #endif
   2283 		txr->num_desc = adapter->num_tx_desc;
   2284 
   2285 		/* Initialize the TX side lock */
   2286 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2287 		    device_xname(dev), txr->me);
   2288 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2289 
   2290 		if (ixgbe_dma_malloc(adapter, tsize,
   2291 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2292 			aprint_error_dev(dev,
   2293 			    "Unable to allocate TX Descriptor memory\n");
   2294 			error = ENOMEM;
   2295 			goto err_tx_desc;
   2296 		}
   2297 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2298 		bzero((void *)txr->tx_base, tsize);
   2299 
   2300         	/* Now allocate transmit buffers for the ring */
   2301         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2302 			aprint_error_dev(dev,
   2303 			    "Critical Failure setting up transmit buffers\n");
   2304 			error = ENOMEM;
   2305 			goto err_tx_desc;
   2306         	}
   2307 #ifndef IXGBE_LEGACY_TX
   2308 		/* Allocate a buf ring */
   2309 		txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2310 		if (txr->txr_interq == NULL) {
   2311 			aprint_error_dev(dev,
   2312 			    "Critical Failure setting up buf ring\n");
   2313 			error = ENOMEM;
   2314 			goto err_tx_desc;
   2315         	}
   2316 #endif
   2317 	}
   2318 
   2319 	/*
   2320 	 * Next the RX queues...
   2321 	 */
   2322 	rsize = roundup2(adapter->num_rx_desc *
   2323 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2324 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2325 		rxr = &adapter->rx_rings[i];
   2326 		/* Set up some basics */
   2327 		rxr->adapter = adapter;
   2328 #ifdef PCI_IOV
   2329 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
   2330 #else
   2331 		rxr->me = i;
   2332 #endif
   2333 		rxr->num_desc = adapter->num_rx_desc;
   2334 
   2335 		/* Initialize the RX side lock */
   2336 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2337 		    device_xname(dev), rxr->me);
   2338 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2339 
   2340 		if (ixgbe_dma_malloc(adapter, rsize,
   2341 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2342 			aprint_error_dev(dev,
   2343 			    "Unable to allocate RxDescriptor memory\n");
   2344 			error = ENOMEM;
   2345 			goto err_rx_desc;
   2346 		}
   2347 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2348 		bzero((void *)rxr->rx_base, rsize);
   2349 
   2350         	/* Allocate receive buffers for the ring*/
   2351 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2352 			aprint_error_dev(dev,
   2353 			    "Critical Failure setting up receive buffers\n");
   2354 			error = ENOMEM;
   2355 			goto err_rx_desc;
   2356 		}
   2357 	}
   2358 
   2359 	/*
   2360 	** Finally set up the queue holding structs
   2361 	*/
   2362 	for (int i = 0; i < adapter->num_queues; i++) {
   2363 		que = &adapter->queues[i];
   2364 		que->adapter = adapter;
   2365 		que->me = i;
   2366 		que->txr = &adapter->tx_rings[i];
   2367 		que->rxr = &adapter->rx_rings[i];
   2368 	}
   2369 
   2370 	return (0);
   2371 
   2372 err_rx_desc:
   2373 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2374 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2375 err_tx_desc:
   2376 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2377 		ixgbe_dma_free(adapter, &txr->txdma);
   2378 	free(adapter->rx_rings, M_DEVBUF);
   2379 rx_fail:
   2380 	free(adapter->tx_rings, M_DEVBUF);
   2381 tx_fail:
   2382 	free(adapter->queues, M_DEVBUF);
   2383 fail:
   2384 	return (error);
   2385 }
   2386