Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.13
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
     62 /*$NetBSD: ix_txrx.c,v 1.13 2016/12/16 08:24:40 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 
     69 #ifdef DEV_NETMAP
     70 #include <net/netmap.h>
     71 #include <sys/selinfo.h>
     72 #include <dev/netmap/netmap_kern.h>
     73 
     74 extern int ix_crcstrip;
     75 #endif
     76 
     77 /*
     78 ** HW RSC control:
     79 **  this feature only works with
     80 **  IPv4, and only on 82599 and later.
     81 **  Also this will cause IP forwarding to
     82 **  fail and that can't be controlled by
     83 **  the stack as LRO can. For all these
     84 **  reasons I've deemed it best to leave
     85 **  this off and not bother with a tuneable
     86 **  interface, this would need to be compiled
     87 **  to enable.
     88 */
     89 static bool ixgbe_rsc_enable = FALSE;
     90 
     91 #ifdef IXGBE_FDIR
     92 /*
     93 ** For Flow Director: this is the
     94 ** number of TX packets we sample
     95 ** for the filter pool, this means
     96 ** every 20th packet will be probed.
     97 **
     98 ** This feature can be disabled by
     99 ** setting this to 0.
    100 */
    101 static int atr_sample_rate = 20;
    102 #endif
    103 
    104 /*********************************************************************
    105  *  Local Function prototypes
    106  *********************************************************************/
    107 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    108 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    109 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    110 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    111 
    112 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    113 		    struct ixgbe_hw_stats *);
    114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    117 		    struct mbuf *, u32 *, u32 *);
    118 static int	ixgbe_tso_setup(struct tx_ring *,
    119 		    struct mbuf *, u32 *, u32 *);
    120 #ifdef IXGBE_FDIR
    121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    122 #endif
    123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    125 		    struct mbuf *, u32);
    126 
    127 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    128 
    129 #ifdef IXGBE_LEGACY_TX
    130 /*********************************************************************
    131  *  Transmit entry point
    132  *
    133  *  ixgbe_start is called by the stack to initiate a transmit.
    134  *  The driver will remain in this routine as long as there are
    135  *  packets to transmit and transmit resources are available.
    136  *  In case resources are not available stack is notified and
    137  *  the packet is requeued.
    138  **********************************************************************/
    139 
    140 void
    141 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    142 {
    143 	int rc;
    144 	struct mbuf    *m_head;
    145 	struct adapter *adapter = txr->adapter;
    146 
    147 	IXGBE_TX_LOCK_ASSERT(txr);
    148 
    149 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    150 		return;
    151 	if (!adapter->link_active)
    152 		return;
    153 
    154 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    155 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    156 			break;
    157 
    158 		IFQ_POLL(&ifp->if_snd, m_head);
    159 		if (m_head == NULL)
    160 			break;
    161 
    162 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    163 			break;
    164 		}
    165 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    166 		if (rc == EFBIG) {
    167 			struct mbuf *mtmp;
    168 
    169 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    170 				m_head = mtmp;
    171 				rc = ixgbe_xmit(txr, m_head);
    172 				if (rc != 0)
    173 					adapter->efbig2_tx_dma_setup.ev_count++;
    174 			} else
    175 				adapter->m_defrag_failed.ev_count++;
    176 		}
    177 		if (rc != 0) {
    178 			m_freem(m_head);
    179 			continue;
    180 		}
    181 
    182 		/* Send a copy of the frame to the BPF listener */
    183 		bpf_mtap(ifp, m_head);
    184 	}
    185 	return;
    186 }
    187 
    188 /*
    189  * Legacy TX start - called by the stack, this
    190  * always uses the first tx ring, and should
    191  * not be used with multiqueue tx enabled.
    192  */
    193 void
    194 ixgbe_start(struct ifnet *ifp)
    195 {
    196 	struct adapter *adapter = ifp->if_softc;
    197 	struct tx_ring	*txr = adapter->tx_rings;
    198 
    199 	if (ifp->if_flags & IFF_RUNNING) {
    200 		IXGBE_TX_LOCK(txr);
    201 		ixgbe_start_locked(txr, ifp);
    202 		IXGBE_TX_UNLOCK(txr);
    203 	}
    204 	return;
    205 }
    206 
    207 #else /* ! IXGBE_LEGACY_TX */
    208 
    209 /*
    210 ** Multiqueue Transmit Entry Point
    211 ** (if_transmit function)
    212 */
    213 int
    214 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    215 {
    216 	struct adapter	*adapter = ifp->if_softc;
    217 	struct ix_queue	*que;
    218 	struct tx_ring	*txr;
    219 	int 		i, err = 0;
    220 #ifdef	RSS
    221 	uint32_t bucket_id;
    222 #endif
    223 
    224 	/*
    225 	 * When doing RSS, map it to the same outbound queue
    226 	 * as the incoming flow would be mapped to.
    227 	 *
    228 	 * If everything is setup correctly, it should be the
    229 	 * same bucket that the current CPU we're on is.
    230 	 */
    231 #if __FreeBSD_version < 1100054
    232 	if (m->m_flags & M_FLOWID) {
    233 #else
    234 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    235 #endif
    236 #ifdef	RSS
    237 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    238 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
    239 			/* TODO: spit out something if bucket_id > num_queues? */
    240 			i = bucket_id % adapter->num_queues;
    241 #ifdef IXGBE_DEBUG
    242 			if (bucket_id > adapter->num_queues)
    243 				if_printf(ifp, "bucket_id (%d) > num_queues "
    244 				    "(%d)\n", bucket_id, adapter->num_queues);
    245 #endif
    246 		} else
    247 #endif
    248 			i = m->m_pkthdr.flowid % adapter->num_queues;
    249 	} else
    250 		i = curcpu % adapter->num_queues;
    251 
    252 	/* Check for a hung queue and pick alternative */
    253 	if (((1 << i) & adapter->active_queues) == 0)
    254 		i = ffsl(adapter->active_queues);
    255 
    256 	txr = &adapter->tx_rings[i];
    257 	que = &adapter->queues[i];
    258 
    259 	err = drbr_enqueue(ifp, txr->br, m);
    260 	if (err)
    261 		return (err);
    262 	if (IXGBE_TX_TRYLOCK(txr)) {
    263 		ixgbe_mq_start_locked(ifp, txr);
    264 		IXGBE_TX_UNLOCK(txr);
    265 	} else
    266 		softint_schedule(txr->txq_si);
    267 
    268 	return (0);
    269 }
    270 
    271 int
    272 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    273 {
    274 	struct adapter  *adapter = txr->adapter;
    275 	struct mbuf     *next;
    276 	int             enqueued = 0, err = 0;
    277 
    278 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    279 	    adapter->link_active == 0)
    280 		return (ENETDOWN);
    281 
    282 	/* Process the queue */
    283 #if __FreeBSD_version < 901504
    284 	next = drbr_dequeue(ifp, txr->br);
    285 	while (next != NULL) {
    286 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    287 			if (next != NULL)
    288 				err = drbr_enqueue(ifp, txr->br, next);
    289 #else
    290 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
    291 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    292 			if (next == NULL) {
    293 				drbr_advance(ifp, txr->br);
    294 			} else {
    295 				drbr_putback(ifp, txr->br, next);
    296 			}
    297 #endif
    298 			break;
    299 		}
    300 #if __FreeBSD_version >= 901504
    301 		drbr_advance(ifp, txr->br);
    302 #endif
    303 		enqueued++;
    304 #if 0 // this is VF-only
    305 #if __FreeBSD_version >= 1100036
    306 		/*
    307 		 * Since we're looking at the tx ring, we can check
    308 		 * to see if we're a VF by examing our tail register
    309 		 * address.
    310 		 */
    311 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    312 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    313 #endif
    314 #endif
    315 		/* Send a copy of the frame to the BPF listener */
    316 		bpf_mtap(ifp, next);
    317 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    318 			break;
    319 #if __FreeBSD_version < 901504
    320 		next = drbr_dequeue(ifp, txr->br);
    321 #endif
    322 	}
    323 
    324 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    325 		ixgbe_txeof(txr);
    326 
    327 	return (err);
    328 }
    329 
    330 /*
    331  * Called from a taskqueue to drain queued transmit packets.
    332  */
    333 void
    334 ixgbe_deferred_mq_start(void *arg, int pending)
    335 {
    336 	struct tx_ring *txr = arg;
    337 	struct adapter *adapter = txr->adapter;
    338 	struct ifnet *ifp = adapter->ifp;
    339 
    340 	IXGBE_TX_LOCK(txr);
    341 	if (!drbr_empty(ifp, txr->br))
    342 		ixgbe_mq_start_locked(ifp, txr);
    343 	IXGBE_TX_UNLOCK(txr);
    344 }
    345 
    346 /*
    347  * Flush all ring buffers
    348  */
    349 void
    350 ixgbe_qflush(struct ifnet *ifp)
    351 {
    352 	struct adapter	*adapter = ifp->if_softc;
    353 	struct tx_ring	*txr = adapter->tx_rings;
    354 	struct mbuf	*m;
    355 
    356 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    357 		IXGBE_TX_LOCK(txr);
    358 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
    359 			m_freem(m);
    360 		IXGBE_TX_UNLOCK(txr);
    361 	}
    362 	if_qflush(ifp);
    363 }
    364 #endif /* IXGBE_LEGACY_TX */
    365 
    366 
    367 /*********************************************************************
    368  *
    369  *  This routine maps the mbufs to tx descriptors, allowing the
    370  *  TX engine to transmit the packets.
    371  *  	- return 0 on success, positive on failure
    372  *
    373  **********************************************************************/
    374 
    375 static int
    376 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    377 {
    378 	struct m_tag *mtag;
    379 	struct adapter  *adapter = txr->adapter;
    380 	struct ethercom *ec = &adapter->osdep.ec;
    381 	u32		olinfo_status = 0, cmd_type_len;
    382 	int             i, j, error;
    383 	int		first;
    384 	bus_dmamap_t	map;
    385 	struct ixgbe_tx_buf *txbuf;
    386 	union ixgbe_adv_tx_desc *txd = NULL;
    387 
    388 	/* Basic descriptor defines */
    389         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    390 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    391 
    392 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    393         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    394 
    395         /*
    396          * Important to capture the first descriptor
    397          * used because it will contain the index of
    398          * the one we tell the hardware to report back
    399          */
    400         first = txr->next_avail_desc;
    401 	txbuf = &txr->tx_buffers[first];
    402 	map = txbuf->map;
    403 
    404 	/*
    405 	 * Map the packet for DMA.
    406 	 */
    407 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    408 	    m_head, BUS_DMA_NOWAIT);
    409 
    410 	if (__predict_false(error)) {
    411 
    412 		switch (error) {
    413 		case EAGAIN:
    414 			adapter->eagain_tx_dma_setup.ev_count++;
    415 			return EAGAIN;
    416 		case ENOMEM:
    417 			adapter->enomem_tx_dma_setup.ev_count++;
    418 			return EAGAIN;
    419 		case EFBIG:
    420 			/*
    421 			 * XXX Try it again?
    422 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
    423 			 */
    424 			adapter->efbig_tx_dma_setup.ev_count++;
    425 			return error;
    426 		case EINVAL:
    427 			adapter->einval_tx_dma_setup.ev_count++;
    428 			return error;
    429 		default:
    430 			adapter->other_tx_dma_setup.ev_count++;
    431 			return error;
    432 		}
    433 	}
    434 
    435 	/* Make certain there are enough descriptors */
    436 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    437 		txr->no_desc_avail.ev_count++;
    438 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    439 		return EAGAIN;
    440 	}
    441 
    442 	/*
    443 	 * Set up the appropriate offload context
    444 	 * this will consume the first descriptor
    445 	 */
    446 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    447 	if (__predict_false(error)) {
    448 		return (error);
    449 	}
    450 
    451 #ifdef IXGBE_FDIR
    452 	/* Do the flow director magic */
    453 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    454 		++txr->atr_count;
    455 		if (txr->atr_count >= atr_sample_rate) {
    456 			ixgbe_atr(txr, m_head);
    457 			txr->atr_count = 0;
    458 		}
    459 	}
    460 #endif
    461 
    462 	olinfo_status |= IXGBE_ADVTXD_CC;
    463 	i = txr->next_avail_desc;
    464 	for (j = 0; j < map->dm_nsegs; j++) {
    465 		bus_size_t seglen;
    466 		bus_addr_t segaddr;
    467 
    468 		txbuf = &txr->tx_buffers[i];
    469 		txd = &txr->tx_base[i];
    470 		seglen = map->dm_segs[j].ds_len;
    471 		segaddr = htole64(map->dm_segs[j].ds_addr);
    472 
    473 		txd->read.buffer_addr = segaddr;
    474 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    475 		    cmd_type_len |seglen);
    476 		txd->read.olinfo_status = htole32(olinfo_status);
    477 
    478 		if (++i == txr->num_desc)
    479 			i = 0;
    480 	}
    481 
    482 	txd->read.cmd_type_len |=
    483 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    484 	txr->tx_avail -= map->dm_nsegs;
    485 	txr->next_avail_desc = i;
    486 
    487 	txbuf->m_head = m_head;
    488 	/*
    489 	 * Here we swap the map so the last descriptor,
    490 	 * which gets the completion interrupt has the
    491 	 * real map, and the first descriptor gets the
    492 	 * unused map from this descriptor.
    493 	 */
    494 	txr->tx_buffers[first].map = txbuf->map;
    495 	txbuf->map = map;
    496 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    497 	    BUS_DMASYNC_PREWRITE);
    498 
    499         /* Set the EOP descriptor that will be marked done */
    500         txbuf = &txr->tx_buffers[first];
    501 	txbuf->eop = txd;
    502 
    503         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    504 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    505 	/*
    506 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    507 	 * hardware that this frame is available to transmit.
    508 	 */
    509 	++txr->total_packets.ev_count;
    510 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    511 
    512 	/* Mark queue as having work */
    513 	if (txr->busy == 0)
    514 		txr->busy = 1;
    515 
    516 	return 0;
    517 }
    518 
    519 /*********************************************************************
    520  *
    521  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    522  *  the information needed to transmit a packet on the wire. This is
    523  *  called only once at attach, setup is done every reset.
    524  *
    525  **********************************************************************/
    526 int
    527 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    528 {
    529 	struct adapter *adapter = txr->adapter;
    530 	device_t dev = adapter->dev;
    531 	struct ixgbe_tx_buf *txbuf;
    532 	int error, i;
    533 
    534 	/*
    535 	 * Setup DMA descriptor areas.
    536 	 */
    537 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    538 			       1, 0,		/* alignment, bounds */
    539 			       IXGBE_TSO_SIZE,		/* maxsize */
    540 			       adapter->num_segs,	/* nsegments */
    541 			       PAGE_SIZE,		/* maxsegsize */
    542 			       0,			/* flags */
    543 			       &txr->txtag))) {
    544 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    545 		goto fail;
    546 	}
    547 
    548 	if (!(txr->tx_buffers =
    549 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    550 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    551 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    552 		error = ENOMEM;
    553 		goto fail;
    554 	}
    555 
    556         /* Create the descriptor buffer dma maps */
    557 	txbuf = txr->tx_buffers;
    558 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    559 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    560 		if (error != 0) {
    561 			aprint_error_dev(dev,
    562 			    "Unable to create TX DMA map (%d)\n", error);
    563 			goto fail;
    564 		}
    565 	}
    566 
    567 	return 0;
    568 fail:
    569 	/* We free all, it handles case where we are in the middle */
    570 	ixgbe_free_transmit_structures(adapter);
    571 	return (error);
    572 }
    573 
    574 /*********************************************************************
    575  *
    576  *  Initialize a transmit ring.
    577  *
    578  **********************************************************************/
    579 static void
    580 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    581 {
    582 	struct adapter *adapter = txr->adapter;
    583 	struct ixgbe_tx_buf *txbuf;
    584 #ifdef DEV_NETMAP
    585 	struct netmap_adapter *na = NA(adapter->ifp);
    586 	struct netmap_slot *slot;
    587 #endif /* DEV_NETMAP */
    588 
    589 	/* Clear the old ring contents */
    590 	IXGBE_TX_LOCK(txr);
    591 #ifdef DEV_NETMAP
    592 	/*
    593 	 * (under lock): if in netmap mode, do some consistency
    594 	 * checks and set slot to entry 0 of the netmap ring.
    595 	 */
    596 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    597 #endif /* DEV_NETMAP */
    598 	bzero((void *)txr->tx_base,
    599 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    600 	/* Reset indices */
    601 	txr->next_avail_desc = 0;
    602 	txr->next_to_clean = 0;
    603 
    604 	/* Free any existing tx buffers. */
    605         txbuf = txr->tx_buffers;
    606 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    607 		if (txbuf->m_head != NULL) {
    608 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    609 			    0, txbuf->m_head->m_pkthdr.len,
    610 			    BUS_DMASYNC_POSTWRITE);
    611 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    612 			m_freem(txbuf->m_head);
    613 			txbuf->m_head = NULL;
    614 		}
    615 #ifdef DEV_NETMAP
    616 		/*
    617 		 * In netmap mode, set the map for the packet buffer.
    618 		 * NOTE: Some drivers (not this one) also need to set
    619 		 * the physical buffer address in the NIC ring.
    620 		 * Slots in the netmap ring (indexed by "si") are
    621 		 * kring->nkr_hwofs positions "ahead" wrt the
    622 		 * corresponding slot in the NIC ring. In some drivers
    623 		 * (not here) nkr_hwofs can be negative. Function
    624 		 * netmap_idx_n2k() handles wraparounds properly.
    625 		 */
    626 		if (slot) {
    627 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    628 			netmap_load_map(na, txr->txtag,
    629 			    txbuf->map, NMB(na, slot + si));
    630 		}
    631 #endif /* DEV_NETMAP */
    632 		/* Clear the EOP descriptor pointer */
    633 		txbuf->eop = NULL;
    634         }
    635 
    636 #ifdef IXGBE_FDIR
    637 	/* Set the rate at which we sample packets */
    638 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    639 		txr->atr_sample = atr_sample_rate;
    640 #endif
    641 
    642 	/* Set number of descriptors available */
    643 	txr->tx_avail = adapter->num_tx_desc;
    644 
    645 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    646 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    647 	IXGBE_TX_UNLOCK(txr);
    648 }
    649 
    650 /*********************************************************************
    651  *
    652  *  Initialize all transmit rings.
    653  *
    654  **********************************************************************/
    655 int
    656 ixgbe_setup_transmit_structures(struct adapter *adapter)
    657 {
    658 	struct tx_ring *txr = adapter->tx_rings;
    659 
    660 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    661 		ixgbe_setup_transmit_ring(txr);
    662 
    663 	return (0);
    664 }
    665 
    666 /*********************************************************************
    667  *
    668  *  Free all transmit rings.
    669  *
    670  **********************************************************************/
    671 void
    672 ixgbe_free_transmit_structures(struct adapter *adapter)
    673 {
    674 	struct tx_ring *txr = adapter->tx_rings;
    675 
    676 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    677 		ixgbe_free_transmit_buffers(txr);
    678 		ixgbe_dma_free(adapter, &txr->txdma);
    679 		IXGBE_TX_LOCK_DESTROY(txr);
    680 	}
    681 	free(adapter->tx_rings, M_DEVBUF);
    682 }
    683 
    684 /*********************************************************************
    685  *
    686  *  Free transmit ring related data structures.
    687  *
    688  **********************************************************************/
    689 static void
    690 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    691 {
    692 	struct adapter *adapter = txr->adapter;
    693 	struct ixgbe_tx_buf *tx_buffer;
    694 	int             i;
    695 
    696 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
    697 
    698 	if (txr->tx_buffers == NULL)
    699 		return;
    700 
    701 	tx_buffer = txr->tx_buffers;
    702 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    703 		if (tx_buffer->m_head != NULL) {
    704 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    705 			    0, tx_buffer->m_head->m_pkthdr.len,
    706 			    BUS_DMASYNC_POSTWRITE);
    707 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    708 			m_freem(tx_buffer->m_head);
    709 			tx_buffer->m_head = NULL;
    710 			if (tx_buffer->map != NULL) {
    711 				ixgbe_dmamap_destroy(txr->txtag,
    712 				    tx_buffer->map);
    713 				tx_buffer->map = NULL;
    714 			}
    715 		} else if (tx_buffer->map != NULL) {
    716 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    717 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    718 			tx_buffer->map = NULL;
    719 		}
    720 	}
    721 #ifndef IXGBE_LEGACY_TX
    722 	if (txr->br != NULL)
    723 		buf_ring_free(txr->br, M_DEVBUF);
    724 #endif
    725 	if (txr->tx_buffers != NULL) {
    726 		free(txr->tx_buffers, M_DEVBUF);
    727 		txr->tx_buffers = NULL;
    728 	}
    729 	if (txr->txtag != NULL) {
    730 		ixgbe_dma_tag_destroy(txr->txtag);
    731 		txr->txtag = NULL;
    732 	}
    733 	return;
    734 }
    735 
    736 /*********************************************************************
    737  *
    738  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    739  *
    740  **********************************************************************/
    741 
    742 static int
    743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    744     u32 *cmd_type_len, u32 *olinfo_status)
    745 {
    746 	struct adapter *adapter = txr->adapter;
    747 	struct ethercom *ec = &adapter->osdep.ec;
    748 	struct m_tag *mtag;
    749 	struct ixgbe_adv_tx_context_desc *TXD;
    750 	struct ether_vlan_header *eh;
    751 #ifdef INET
    752 	struct ip *ip;
    753 #endif
    754 #ifdef INET6
    755 	struct ip6_hdr *ip6;
    756 #endif
    757 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    758 	int	ehdrlen, ip_hlen = 0;
    759 	u16	etype;
    760 	u8	ipproto = 0;
    761 	int	offload = TRUE;
    762 	int	ctxd = txr->next_avail_desc;
    763 	u16	vtag = 0;
    764 	char	*l3d;
    765 
    766 
    767 	/* First check if TSO is to be used */
    768 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
    769 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
    770 
    771 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    772 		offload = FALSE;
    773 
    774 	/* Indicate the whole packet as payload when not doing TSO */
    775        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    776 
    777 	/* Now ready a context descriptor */
    778 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    779 
    780 	/*
    781 	** In advanced descriptors the vlan tag must
    782 	** be placed into the context descriptor. Hence
    783 	** we need to make one even if not doing offloads.
    784 	*/
    785 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    786 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    787 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    788 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    789 		return (0);
    790 
    791 	/*
    792 	 * Determine where frame payload starts.
    793 	 * Jump over vlan headers if already present,
    794 	 * helpful for QinQ too.
    795 	 */
    796 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    797 	eh = mtod(mp, struct ether_vlan_header *);
    798 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    799 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    800 		etype = ntohs(eh->evl_proto);
    801 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    802 	} else {
    803 		etype = ntohs(eh->evl_encap_proto);
    804 		ehdrlen = ETHER_HDR_LEN;
    805 	}
    806 
    807 	/* Set the ether header length */
    808 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    809 
    810 	if (offload == FALSE)
    811 		goto no_offloads;
    812 
    813 	/*
    814 	 * If the first mbuf only includes the ethernet header, jump to the next one
    815 	 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
    816 	 * XXX: And assumes the entire IP header is contained in one mbuf
    817 	 */
    818 	if (mp->m_len == ehdrlen && mp->m_next)
    819 		l3d = mtod(mp->m_next, char *);
    820 	else
    821 		l3d = mtod(mp, char *) + ehdrlen;
    822 
    823 	switch (etype) {
    824 #ifdef INET
    825 	case ETHERTYPE_IP:
    826 		ip = (struct ip *)(l3d);
    827 		ip_hlen = ip->ip_hl << 2;
    828 		ipproto = ip->ip_p;
    829 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    830 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    831 		    ip->ip_sum == 0);
    832 		break;
    833 #endif
    834 #ifdef INET6
    835 	case ETHERTYPE_IPV6:
    836 		ip6 = (struct ip6_hdr *)(l3d);
    837 		ip_hlen = sizeof(struct ip6_hdr);
    838 		ipproto = ip6->ip6_nxt;
    839 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    840 		break;
    841 #endif
    842 	default:
    843 		offload = false;
    844 		break;
    845 	}
    846 
    847 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    848 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    849 
    850 	vlan_macip_lens |= ip_hlen;
    851 
    852 	/* No support for offloads for non-L4 next headers */
    853  	switch (ipproto) {
    854  		case IPPROTO_TCP:
    855 			if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
    856 
    857 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    858 			else
    859 				offload = false;
    860 			break;
    861 		case IPPROTO_UDP:
    862 			if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
    863 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    864 			else
    865 				offload = false;
    866 			break;
    867 		default:
    868 			offload = false;
    869 			break;
    870 	}
    871 
    872 	if (offload) /* Insert L4 checksum into data descriptors */
    873 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    874 
    875 no_offloads:
    876 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    877 
    878 	/* Now copy bits into descriptor */
    879 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    880 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    881 	TXD->seqnum_seed = htole32(0);
    882 	TXD->mss_l4len_idx = htole32(0);
    883 
    884 	/* We've consumed the first desc, adjust counters */
    885 	if (++ctxd == txr->num_desc)
    886 		ctxd = 0;
    887 	txr->next_avail_desc = ctxd;
    888 	--txr->tx_avail;
    889 
    890         return 0;
    891 }
    892 
    893 /**********************************************************************
    894  *
    895  *  Setup work for hardware segmentation offload (TSO) on
    896  *  adapters using advanced tx descriptors
    897  *
    898  **********************************************************************/
    899 static int
    900 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    901     u32 *cmd_type_len, u32 *olinfo_status)
    902 {
    903 	struct m_tag *mtag;
    904 	struct adapter *adapter = txr->adapter;
    905 	struct ethercom *ec = &adapter->osdep.ec;
    906 	struct ixgbe_adv_tx_context_desc *TXD;
    907 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    908 	u32 mss_l4len_idx = 0, paylen;
    909 	u16 vtag = 0, eh_type;
    910 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    911 	struct ether_vlan_header *eh;
    912 #ifdef INET6
    913 	struct ip6_hdr *ip6;
    914 #endif
    915 #ifdef INET
    916 	struct ip *ip;
    917 #endif
    918 	struct tcphdr *th;
    919 
    920 	/*
    921 	 * Determine where frame payload starts.
    922 	 * Jump over vlan headers if already present
    923 	 */
    924 	eh = mtod(mp, struct ether_vlan_header *);
    925 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    926 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    927 		eh_type = eh->evl_proto;
    928 	} else {
    929 		ehdrlen = ETHER_HDR_LEN;
    930 		eh_type = eh->evl_encap_proto;
    931 	}
    932 
    933 	switch (ntohs(eh_type)) {
    934 #ifdef INET6
    935 	case ETHERTYPE_IPV6:
    936 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    937 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    938 		if (ip6->ip6_nxt != IPPROTO_TCP)
    939 			return (ENXIO);
    940 		ip_hlen = sizeof(struct ip6_hdr);
    941 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    942 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    943 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    944 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    945 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    946 		break;
    947 #endif
    948 #ifdef INET
    949 	case ETHERTYPE_IP:
    950 		ip = (struct ip *)(mp->m_data + ehdrlen);
    951 		if (ip->ip_p != IPPROTO_TCP)
    952 			return (ENXIO);
    953 		ip->ip_sum = 0;
    954 		ip_hlen = ip->ip_hl << 2;
    955 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    956 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    957 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    958 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    959 		/* Tell transmit desc to also do IPv4 checksum. */
    960 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    961 		break;
    962 #endif
    963 	default:
    964 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    965 		    __func__, ntohs(eh_type));
    966 		break;
    967 	}
    968 
    969 	ctxd = txr->next_avail_desc;
    970 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    971 
    972 	tcp_hlen = th->th_off << 2;
    973 
    974 	/* This is used in the transmit desc in encap */
    975 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    976 
    977 	/* VLAN MACLEN IPLEN */
    978 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    979 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    980                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    981 	}
    982 
    983 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    984 	vlan_macip_lens |= ip_hlen;
    985 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    986 
    987 	/* ADV DTYPE TUCMD */
    988 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    989 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    990 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    991 
    992 	/* MSS L4LEN IDX */
    993 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
    994 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
    995 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
    996 
    997 	TXD->seqnum_seed = htole32(0);
    998 
    999 	if (++ctxd == txr->num_desc)
   1000 		ctxd = 0;
   1001 
   1002 	txr->tx_avail--;
   1003 	txr->next_avail_desc = ctxd;
   1004 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1005 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1006 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1007 	++txr->tso_tx.ev_count;
   1008 	return (0);
   1009 }
   1010 
   1011 
   1012 /**********************************************************************
   1013  *
   1014  *  Examine each tx_buffer in the used queue. If the hardware is done
   1015  *  processing the packet then free associated resources. The
   1016  *  tx_buffer is put back on the free queue.
   1017  *
   1018  **********************************************************************/
   1019 void
   1020 ixgbe_txeof(struct tx_ring *txr)
   1021 {
   1022 	struct adapter		*adapter = txr->adapter;
   1023 	struct ifnet		*ifp = adapter->ifp;
   1024 	u32			work, processed = 0;
   1025 	u32			limit = adapter->tx_process_limit;
   1026 	struct ixgbe_tx_buf	*buf;
   1027 	union ixgbe_adv_tx_desc *txd;
   1028 
   1029 	KASSERT(mutex_owned(&txr->tx_mtx));
   1030 
   1031 #ifdef DEV_NETMAP
   1032 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1033 		struct netmap_adapter *na = NA(ifp);
   1034 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1035 		txd = txr->tx_base;
   1036 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1037 		    BUS_DMASYNC_POSTREAD);
   1038 		/*
   1039 		 * In netmap mode, all the work is done in the context
   1040 		 * of the client thread. Interrupt handlers only wake up
   1041 		 * clients, which may be sleeping on individual rings
   1042 		 * or on a global resource for all rings.
   1043 		 * To implement tx interrupt mitigation, we wake up the client
   1044 		 * thread roughly every half ring, even if the NIC interrupts
   1045 		 * more frequently. This is implemented as follows:
   1046 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1047 		 *   the slot that should wake up the thread (nkr_num_slots
   1048 		 *   means the user thread should not be woken up);
   1049 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1050 		 *   or the slot has the DD bit set.
   1051 		 */
   1052 		if (!netmap_mitigate ||
   1053 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1054 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1055 			netmap_tx_irq(ifp, txr->me);
   1056 		}
   1057 		return;
   1058 	}
   1059 #endif /* DEV_NETMAP */
   1060 
   1061 	if (txr->tx_avail == txr->num_desc) {
   1062 		txr->busy = 0;
   1063 		return;
   1064 	}
   1065 
   1066 	/* Get work starting point */
   1067 	work = txr->next_to_clean;
   1068 	buf = &txr->tx_buffers[work];
   1069 	txd = &txr->tx_base[work];
   1070 	work -= txr->num_desc; /* The distance to ring end */
   1071         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1072 	    BUS_DMASYNC_POSTREAD);
   1073 
   1074 	do {
   1075 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1076 		if (eop == NULL) /* No work */
   1077 			break;
   1078 
   1079 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1080 			break;	/* I/O not complete */
   1081 
   1082 		if (buf->m_head) {
   1083 			txr->bytes +=
   1084 			    buf->m_head->m_pkthdr.len;
   1085 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1086 			    buf->map,
   1087 			    0, buf->m_head->m_pkthdr.len,
   1088 			    BUS_DMASYNC_POSTWRITE);
   1089 			ixgbe_dmamap_unload(txr->txtag,
   1090 			    buf->map);
   1091 			m_freem(buf->m_head);
   1092 			buf->m_head = NULL;
   1093 		}
   1094 		buf->eop = NULL;
   1095 		++txr->tx_avail;
   1096 
   1097 		/* We clean the range if multi segment */
   1098 		while (txd != eop) {
   1099 			++txd;
   1100 			++buf;
   1101 			++work;
   1102 			/* wrap the ring? */
   1103 			if (__predict_false(!work)) {
   1104 				work -= txr->num_desc;
   1105 				buf = txr->tx_buffers;
   1106 				txd = txr->tx_base;
   1107 			}
   1108 			if (buf->m_head) {
   1109 				txr->bytes +=
   1110 				    buf->m_head->m_pkthdr.len;
   1111 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1112 				    buf->map,
   1113 				    0, buf->m_head->m_pkthdr.len,
   1114 				    BUS_DMASYNC_POSTWRITE);
   1115 				ixgbe_dmamap_unload(txr->txtag,
   1116 				    buf->map);
   1117 				m_freem(buf->m_head);
   1118 				buf->m_head = NULL;
   1119 			}
   1120 			++txr->tx_avail;
   1121 			buf->eop = NULL;
   1122 
   1123 		}
   1124 		++txr->packets;
   1125 		++processed;
   1126 		++ifp->if_opackets;
   1127 
   1128 		/* Try the next packet */
   1129 		++txd;
   1130 		++buf;
   1131 		++work;
   1132 		/* reset with a wrap */
   1133 		if (__predict_false(!work)) {
   1134 			work -= txr->num_desc;
   1135 			buf = txr->tx_buffers;
   1136 			txd = txr->tx_base;
   1137 		}
   1138 		prefetch(txd);
   1139 	} while (__predict_true(--limit));
   1140 
   1141 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1142 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1143 
   1144 	work += txr->num_desc;
   1145 	txr->next_to_clean = work;
   1146 
   1147 	/*
   1148 	** Queue Hang detection, we know there's
   1149 	** work outstanding or the first return
   1150 	** would have been taken, so increment busy
   1151 	** if nothing managed to get cleaned, then
   1152 	** in local_timer it will be checked and
   1153 	** marked as HUNG if it exceeds a MAX attempt.
   1154 	*/
   1155 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1156 		++txr->busy;
   1157 	/*
   1158 	** If anything gets cleaned we reset state to 1,
   1159 	** note this will turn off HUNG if its set.
   1160 	*/
   1161 	if (processed)
   1162 		txr->busy = 1;
   1163 
   1164 	if (txr->tx_avail == txr->num_desc)
   1165 		txr->busy = 0;
   1166 
   1167 	return;
   1168 }
   1169 
   1170 
   1171 #ifdef IXGBE_FDIR
   1172 /*
   1173 ** This routine parses packet headers so that Flow
   1174 ** Director can make a hashed filter table entry
   1175 ** allowing traffic flows to be identified and kept
   1176 ** on the same cpu.  This would be a performance
   1177 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1178 ** packets.
   1179 */
   1180 static void
   1181 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1182 {
   1183 	struct adapter			*adapter = txr->adapter;
   1184 	struct ix_queue			*que;
   1185 	struct ip			*ip;
   1186 	struct tcphdr			*th;
   1187 	struct udphdr			*uh;
   1188 	struct ether_vlan_header	*eh;
   1189 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1190 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1191 	int  				ehdrlen, ip_hlen;
   1192 	u16				etype;
   1193 
   1194 	eh = mtod(mp, struct ether_vlan_header *);
   1195 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1196 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1197 		etype = eh->evl_proto;
   1198 	} else {
   1199 		ehdrlen = ETHER_HDR_LEN;
   1200 		etype = eh->evl_encap_proto;
   1201 	}
   1202 
   1203 	/* Only handling IPv4 */
   1204 	if (etype != htons(ETHERTYPE_IP))
   1205 		return;
   1206 
   1207 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1208 	ip_hlen = ip->ip_hl << 2;
   1209 
   1210 	/* check if we're UDP or TCP */
   1211 	switch (ip->ip_p) {
   1212 	case IPPROTO_TCP:
   1213 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1214 		/* src and dst are inverted */
   1215 		common.port.dst ^= th->th_sport;
   1216 		common.port.src ^= th->th_dport;
   1217 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1218 		break;
   1219 	case IPPROTO_UDP:
   1220 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1221 		/* src and dst are inverted */
   1222 		common.port.dst ^= uh->uh_sport;
   1223 		common.port.src ^= uh->uh_dport;
   1224 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1225 		break;
   1226 	default:
   1227 		return;
   1228 	}
   1229 
   1230 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1231 	if (mp->m_pkthdr.ether_vtag)
   1232 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1233 	else
   1234 		common.flex_bytes ^= etype;
   1235 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1236 
   1237 	que = &adapter->queues[txr->me];
   1238 	/*
   1239 	** This assumes the Rx queue and Tx
   1240 	** queue are bound to the same CPU
   1241 	*/
   1242 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1243 	    input, common, que->msix);
   1244 }
   1245 #endif /* IXGBE_FDIR */
   1246 
   1247 /*
   1248 ** Used to detect a descriptor that has
   1249 ** been merged by Hardware RSC.
   1250 */
   1251 static inline u32
   1252 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1253 {
   1254 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1255 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1256 }
   1257 
   1258 /*********************************************************************
   1259  *
   1260  *  Initialize Hardware RSC (LRO) feature on 82599
   1261  *  for an RX ring, this is toggled by the LRO capability
   1262  *  even though it is transparent to the stack.
   1263  *
   1264  *  NOTE: since this HW feature only works with IPV4 and
   1265  *        our testing has shown soft LRO to be as effective
   1266  *        I have decided to disable this by default.
   1267  *
   1268  **********************************************************************/
   1269 static void
   1270 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1271 {
   1272 	struct	adapter 	*adapter = rxr->adapter;
   1273 	struct	ixgbe_hw	*hw = &adapter->hw;
   1274 	u32			rscctrl, rdrxctl;
   1275 
   1276 	/* If turning LRO/RSC off we need to disable it */
   1277 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1278 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1279 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1280 		return;
   1281 	}
   1282 
   1283 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1284 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1285 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1286 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1287 #endif /* DEV_NETMAP */
   1288 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1289 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1290 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1291 
   1292 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1293 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1294 	/*
   1295 	** Limit the total number of descriptors that
   1296 	** can be combined, so it does not exceed 64K
   1297 	*/
   1298 	if (rxr->mbuf_sz == MCLBYTES)
   1299 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1300 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1301 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1302 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1303 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1304 	else  /* Using 16K cluster */
   1305 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1306 
   1307 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1308 
   1309 	/* Enable TCP header recognition */
   1310 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1311 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1312 	    IXGBE_PSRTYPE_TCPHDR));
   1313 
   1314 	/* Disable RSC for ACK packets */
   1315 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1316 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1317 
   1318 	rxr->hw_rsc = TRUE;
   1319 }
   1320 
   1321 /*********************************************************************
   1322  *
   1323  *  Refresh mbuf buffers for RX descriptor rings
   1324  *   - now keeps its own state so discards due to resource
   1325  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1326  *     it just returns, keeping its placeholder, thus it can simply
   1327  *     be recalled to try again.
   1328  *
   1329  **********************************************************************/
   1330 static void
   1331 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1332 {
   1333 	struct adapter		*adapter = rxr->adapter;
   1334 	struct ixgbe_rx_buf	*rxbuf;
   1335 	struct mbuf		*mp;
   1336 	int			i, j, error;
   1337 	bool			refreshed = false;
   1338 
   1339 	i = j = rxr->next_to_refresh;
   1340 	/* Control the loop with one beyond */
   1341 	if (++j == rxr->num_desc)
   1342 		j = 0;
   1343 
   1344 	while (j != limit) {
   1345 		rxbuf = &rxr->rx_buffers[i];
   1346 		if (rxbuf->buf == NULL) {
   1347 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1348 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1349 			if (mp == NULL) {
   1350 				rxr->no_jmbuf.ev_count++;
   1351 				goto update;
   1352 			}
   1353 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1354 				m_adj(mp, ETHER_ALIGN);
   1355 		} else
   1356 			mp = rxbuf->buf;
   1357 
   1358 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1359 
   1360 		/* If we're dealing with an mbuf that was copied rather
   1361 		 * than replaced, there's no need to go through busdma.
   1362 		 */
   1363 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1364 			/* Get the memory mapping */
   1365 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1366 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1367 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1368 			if (error != 0) {
   1369 				printf("Refresh mbufs: payload dmamap load"
   1370 				    " failure - %d\n", error);
   1371 				m_free(mp);
   1372 				rxbuf->buf = NULL;
   1373 				goto update;
   1374 			}
   1375 			rxbuf->buf = mp;
   1376 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1377 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1378 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1379 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1380 		} else {
   1381 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1382 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1383 		}
   1384 
   1385 		refreshed = true;
   1386 		/* Next is precalculated */
   1387 		i = j;
   1388 		rxr->next_to_refresh = i;
   1389 		if (++j == rxr->num_desc)
   1390 			j = 0;
   1391 	}
   1392 update:
   1393 	if (refreshed) /* Update hardware tail index */
   1394 		IXGBE_WRITE_REG(&adapter->hw,
   1395 		    rxr->tail, rxr->next_to_refresh);
   1396 	return;
   1397 }
   1398 
   1399 /*********************************************************************
   1400  *
   1401  *  Allocate memory for rx_buffer structures. Since we use one
   1402  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1403  *  that we'll need is equal to the number of receive descriptors
   1404  *  that we've allocated.
   1405  *
   1406  **********************************************************************/
   1407 int
   1408 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1409 {
   1410 	struct	adapter 	*adapter = rxr->adapter;
   1411 	device_t 		dev = adapter->dev;
   1412 	struct ixgbe_rx_buf 	*rxbuf;
   1413 	int             	bsize, error;
   1414 
   1415 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1416 	if (!(rxr->rx_buffers =
   1417 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1418 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1419 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1420 		error = ENOMEM;
   1421 		goto fail;
   1422 	}
   1423 
   1424 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1425 				   1, 0,	/* alignment, bounds */
   1426 				   MJUM16BYTES,		/* maxsize */
   1427 				   1,			/* nsegments */
   1428 				   MJUM16BYTES,		/* maxsegsize */
   1429 				   0,			/* flags */
   1430 				   &rxr->ptag))) {
   1431 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1432 		goto fail;
   1433 	}
   1434 
   1435 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1436 		rxbuf = &rxr->rx_buffers[i];
   1437 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1438 		if (error) {
   1439 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1440 			goto fail;
   1441 		}
   1442 	}
   1443 
   1444 	return (0);
   1445 
   1446 fail:
   1447 	/* Frees all, but can handle partial completion */
   1448 	ixgbe_free_receive_structures(adapter);
   1449 	return (error);
   1450 }
   1451 
   1452 static void
   1453 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1454 {
   1455 	struct ixgbe_rx_buf       *rxbuf;
   1456 
   1457 	for (int i = 0; i < rxr->num_desc; i++) {
   1458 		rxbuf = &rxr->rx_buffers[i];
   1459 		if (rxbuf->buf != NULL) {
   1460 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1461 			    0, rxbuf->buf->m_pkthdr.len,
   1462 			    BUS_DMASYNC_POSTREAD);
   1463 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1464 			rxbuf->buf->m_flags |= M_PKTHDR;
   1465 			m_freem(rxbuf->buf);
   1466 			rxbuf->buf = NULL;
   1467 			rxbuf->flags = 0;
   1468 		}
   1469 	}
   1470 }
   1471 
   1472 /*********************************************************************
   1473  *
   1474  *  Initialize a receive ring and its buffers.
   1475  *
   1476  **********************************************************************/
   1477 static int
   1478 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1479 {
   1480 	struct	adapter 	*adapter;
   1481 	struct ixgbe_rx_buf	*rxbuf;
   1482 #ifdef LRO
   1483 	struct ifnet		*ifp;
   1484 	struct lro_ctrl		*lro = &rxr->lro;
   1485 #endif /* LRO */
   1486 	int			rsize, error = 0;
   1487 #ifdef DEV_NETMAP
   1488 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1489 	struct netmap_slot *slot;
   1490 #endif /* DEV_NETMAP */
   1491 
   1492 	adapter = rxr->adapter;
   1493 #ifdef LRO
   1494 	ifp = adapter->ifp;
   1495 #endif /* LRO */
   1496 
   1497 	/* Clear the ring contents */
   1498 	IXGBE_RX_LOCK(rxr);
   1499 #ifdef DEV_NETMAP
   1500 	/* same as in ixgbe_setup_transmit_ring() */
   1501 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1502 #endif /* DEV_NETMAP */
   1503 	rsize = roundup2(adapter->num_rx_desc *
   1504 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1505 	bzero((void *)rxr->rx_base, rsize);
   1506 	/* Cache the size */
   1507 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1508 
   1509 	/* Free current RX buffer structs and their mbufs */
   1510 	ixgbe_free_receive_ring(rxr);
   1511 
   1512 	IXGBE_RX_UNLOCK(rxr);
   1513 
   1514 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1515 	 * or size of jumbo mbufs may have changed.
   1516 	 */
   1517 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1518 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1519 
   1520 	IXGBE_RX_LOCK(rxr);
   1521 
   1522 	/* Now replenish the mbufs */
   1523 	for (int j = 0; j != rxr->num_desc; ++j) {
   1524 		struct mbuf	*mp;
   1525 
   1526 		rxbuf = &rxr->rx_buffers[j];
   1527 #ifdef DEV_NETMAP
   1528 		/*
   1529 		 * In netmap mode, fill the map and set the buffer
   1530 		 * address in the NIC ring, considering the offset
   1531 		 * between the netmap and NIC rings (see comment in
   1532 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1533 		 * an mbuf, so end the block with a continue;
   1534 		 */
   1535 		if (slot) {
   1536 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1537 			uint64_t paddr;
   1538 			void *addr;
   1539 
   1540 			addr = PNMB(na, slot + sj, &paddr);
   1541 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1542 			/* Update descriptor and the cached value */
   1543 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1544 			rxbuf->addr = htole64(paddr);
   1545 			continue;
   1546 		}
   1547 #endif /* DEV_NETMAP */
   1548 		rxbuf->flags = 0;
   1549 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1550 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1551 		if (rxbuf->buf == NULL) {
   1552 			error = ENOBUFS;
   1553                         goto fail;
   1554 		}
   1555 		mp = rxbuf->buf;
   1556 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1557 		/* Get the memory mapping */
   1558 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1559 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1560 		if (error != 0)
   1561                         goto fail;
   1562 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1563 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1564 		/* Update the descriptor and the cached value */
   1565 		rxr->rx_base[j].read.pkt_addr =
   1566 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1567 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1568 	}
   1569 
   1570 
   1571 	/* Setup our descriptor indices */
   1572 	rxr->next_to_check = 0;
   1573 	rxr->next_to_refresh = 0;
   1574 	rxr->lro_enabled = FALSE;
   1575 	rxr->rx_copies.ev_count = 0;
   1576 #if 0 /* NetBSD */
   1577 	rxr->rx_bytes.ev_count = 0;
   1578 #if 1	/* Fix inconsistency */
   1579 	rxr->rx_packets.ev_count = 0;
   1580 #endif
   1581 #endif
   1582 	rxr->vtag_strip = FALSE;
   1583 
   1584 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1585 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1586 
   1587 	/*
   1588 	** Now set up the LRO interface:
   1589 	*/
   1590 	if (ixgbe_rsc_enable)
   1591 		ixgbe_setup_hw_rsc(rxr);
   1592 #ifdef LRO
   1593 	else if (ifp->if_capenable & IFCAP_LRO) {
   1594 		device_t dev = adapter->dev;
   1595 		int err = tcp_lro_init(lro);
   1596 		if (err) {
   1597 			device_printf(dev, "LRO Initialization failed!\n");
   1598 			goto fail;
   1599 		}
   1600 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1601 		rxr->lro_enabled = TRUE;
   1602 		lro->ifp = adapter->ifp;
   1603 	}
   1604 #endif /* LRO */
   1605 
   1606 	IXGBE_RX_UNLOCK(rxr);
   1607 	return (0);
   1608 
   1609 fail:
   1610 	ixgbe_free_receive_ring(rxr);
   1611 	IXGBE_RX_UNLOCK(rxr);
   1612 	return (error);
   1613 }
   1614 
   1615 /*********************************************************************
   1616  *
   1617  *  Initialize all receive rings.
   1618  *
   1619  **********************************************************************/
   1620 int
   1621 ixgbe_setup_receive_structures(struct adapter *adapter)
   1622 {
   1623 	struct rx_ring *rxr = adapter->rx_rings;
   1624 	int j;
   1625 
   1626 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1627 		if (ixgbe_setup_receive_ring(rxr))
   1628 			goto fail;
   1629 
   1630 	return (0);
   1631 fail:
   1632 	/*
   1633 	 * Free RX buffers allocated so far, we will only handle
   1634 	 * the rings that completed, the failing case will have
   1635 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1636 	 */
   1637 	for (int i = 0; i < j; ++i) {
   1638 		rxr = &adapter->rx_rings[i];
   1639 		ixgbe_free_receive_ring(rxr);
   1640 	}
   1641 
   1642 	return (ENOBUFS);
   1643 }
   1644 
   1645 
   1646 /*********************************************************************
   1647  *
   1648  *  Free all receive rings.
   1649  *
   1650  **********************************************************************/
   1651 void
   1652 ixgbe_free_receive_structures(struct adapter *adapter)
   1653 {
   1654 	struct rx_ring *rxr = adapter->rx_rings;
   1655 
   1656 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1657 
   1658 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1659 #ifdef LRO
   1660 		struct lro_ctrl		*lro = &rxr->lro;
   1661 #endif /* LRO */
   1662 		ixgbe_free_receive_buffers(rxr);
   1663 #ifdef LRO
   1664 		/* Free LRO memory */
   1665 		tcp_lro_free(lro);
   1666 #endif /* LRO */
   1667 		/* Free the ring memory as well */
   1668 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1669 		IXGBE_RX_LOCK_DESTROY(rxr);
   1670 	}
   1671 
   1672 	free(adapter->rx_rings, M_DEVBUF);
   1673 }
   1674 
   1675 
   1676 /*********************************************************************
   1677  *
   1678  *  Free receive ring data structures
   1679  *
   1680  **********************************************************************/
   1681 static void
   1682 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1683 {
   1684 	struct adapter		*adapter = rxr->adapter;
   1685 	struct ixgbe_rx_buf	*rxbuf;
   1686 
   1687 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1688 
   1689 	/* Cleanup any existing buffers */
   1690 	if (rxr->rx_buffers != NULL) {
   1691 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1692 			rxbuf = &rxr->rx_buffers[i];
   1693 			if (rxbuf->buf != NULL) {
   1694 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1695 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1696 				    BUS_DMASYNC_POSTREAD);
   1697 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1698 				rxbuf->buf->m_flags |= M_PKTHDR;
   1699 				m_freem(rxbuf->buf);
   1700 			}
   1701 			rxbuf->buf = NULL;
   1702 			if (rxbuf->pmap != NULL) {
   1703 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1704 				rxbuf->pmap = NULL;
   1705 			}
   1706 		}
   1707 		if (rxr->rx_buffers != NULL) {
   1708 			free(rxr->rx_buffers, M_DEVBUF);
   1709 			rxr->rx_buffers = NULL;
   1710 		}
   1711 	}
   1712 
   1713 	if (rxr->ptag != NULL) {
   1714 		ixgbe_dma_tag_destroy(rxr->ptag);
   1715 		rxr->ptag = NULL;
   1716 	}
   1717 
   1718 	return;
   1719 }
   1720 
   1721 static __inline void
   1722 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1723 {
   1724 	int s;
   1725 
   1726 #ifdef LRO
   1727 	struct adapter	*adapter = ifp->if_softc;
   1728 	struct ethercom *ec = &adapter->osdep.ec;
   1729 
   1730         /*
   1731          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1732          * should be computed by hardware. Also it should not have VLAN tag in
   1733          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1734          */
   1735         if (rxr->lro_enabled &&
   1736             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1737             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1738             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1739             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1740             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1741             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1742             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1743             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1744                 /*
   1745                  * Send to the stack if:
   1746                  **  - LRO not enabled, or
   1747                  **  - no LRO resources, or
   1748                  **  - lro enqueue fails
   1749                  */
   1750                 if (rxr->lro.lro_cnt != 0)
   1751                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1752                                 return;
   1753         }
   1754 #endif /* LRO */
   1755 
   1756 	IXGBE_RX_UNLOCK(rxr);
   1757 
   1758 	s = splnet();
   1759 	if_input(ifp, m);
   1760 	splx(s);
   1761 
   1762 	IXGBE_RX_LOCK(rxr);
   1763 }
   1764 
   1765 static __inline void
   1766 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1767 {
   1768 	struct ixgbe_rx_buf	*rbuf;
   1769 
   1770 	rbuf = &rxr->rx_buffers[i];
   1771 
   1772 
   1773 	/*
   1774 	** With advanced descriptors the writeback
   1775 	** clobbers the buffer addrs, so its easier
   1776 	** to just free the existing mbufs and take
   1777 	** the normal refresh path to get new buffers
   1778 	** and mapping.
   1779 	*/
   1780 
   1781 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1782 		rbuf->fmp->m_flags |= M_PKTHDR;
   1783 		m_freem(rbuf->fmp);
   1784 		rbuf->fmp = NULL;
   1785 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1786 	} else if (rbuf->buf) {
   1787 		m_free(rbuf->buf);
   1788 		rbuf->buf = NULL;
   1789 	}
   1790 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1791 
   1792 	rbuf->flags = 0;
   1793 
   1794 	return;
   1795 }
   1796 
   1797 
   1798 /*********************************************************************
   1799  *
   1800  *  This routine executes in interrupt context. It replenishes
   1801  *  the mbufs in the descriptor and sends data which has been
   1802  *  dma'ed into host memory to upper layer.
   1803  *
   1804  *  Return TRUE for more work, FALSE for all clean.
   1805  *********************************************************************/
   1806 bool
   1807 ixgbe_rxeof(struct ix_queue *que)
   1808 {
   1809 	struct adapter		*adapter = que->adapter;
   1810 	struct rx_ring		*rxr = que->rxr;
   1811 	struct ifnet		*ifp = adapter->ifp;
   1812 #ifdef LRO
   1813 	struct lro_ctrl		*lro = &rxr->lro;
   1814 #endif /* LRO */
   1815 	int			i, nextp, processed = 0;
   1816 	u32			staterr = 0;
   1817 	u32			count = adapter->rx_process_limit;
   1818 	union ixgbe_adv_rx_desc	*cur;
   1819 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1820 #ifdef RSS
   1821 	u16			pkt_info;
   1822 #endif
   1823 
   1824 	IXGBE_RX_LOCK(rxr);
   1825 
   1826 #ifdef DEV_NETMAP
   1827 	/* Same as the txeof routine: wakeup clients on intr. */
   1828 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1829 		IXGBE_RX_UNLOCK(rxr);
   1830 		return (FALSE);
   1831 	}
   1832 #endif /* DEV_NETMAP */
   1833 
   1834 	for (i = rxr->next_to_check; count != 0;) {
   1835 		struct mbuf	*sendmp, *mp;
   1836 		u32		rsc, ptype;
   1837 		u16		len;
   1838 		u16		vtag = 0;
   1839 		bool		eop;
   1840 
   1841 		/* Sync the ring. */
   1842 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1843 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1844 
   1845 		cur = &rxr->rx_base[i];
   1846 		staterr = le32toh(cur->wb.upper.status_error);
   1847 #ifdef RSS
   1848 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1849 #endif
   1850 
   1851 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1852 			break;
   1853 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1854 			break;
   1855 
   1856 		count--;
   1857 		sendmp = NULL;
   1858 		nbuf = NULL;
   1859 		rsc = 0;
   1860 		cur->wb.upper.status_error = 0;
   1861 		rbuf = &rxr->rx_buffers[i];
   1862 		mp = rbuf->buf;
   1863 
   1864 		len = le16toh(cur->wb.upper.length);
   1865 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1866 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1867 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1868 
   1869 		/* Make sure bad packets are discarded */
   1870 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1871 #if __FreeBSD_version >= 1100036
   1872 			if (IXGBE_IS_VF(adapter))
   1873 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1874 #endif
   1875 			rxr->rx_discarded.ev_count++;
   1876 			ixgbe_rx_discard(rxr, i);
   1877 			goto next_desc;
   1878 		}
   1879 
   1880 		/*
   1881 		** On 82599 which supports a hardware
   1882 		** LRO (called HW RSC), packets need
   1883 		** not be fragmented across sequential
   1884 		** descriptors, rather the next descriptor
   1885 		** is indicated in bits of the descriptor.
   1886 		** This also means that we might proceses
   1887 		** more than one packet at a time, something
   1888 		** that has never been true before, it
   1889 		** required eliminating global chain pointers
   1890 		** in favor of what we are doing here.  -jfv
   1891 		*/
   1892 		if (!eop) {
   1893 			/*
   1894 			** Figure out the next descriptor
   1895 			** of this frame.
   1896 			*/
   1897 			if (rxr->hw_rsc == TRUE) {
   1898 				rsc = ixgbe_rsc_count(cur);
   1899 				rxr->rsc_num += (rsc - 1);
   1900 			}
   1901 			if (rsc) { /* Get hardware index */
   1902 				nextp = ((staterr &
   1903 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1904 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1905 			} else { /* Just sequential */
   1906 				nextp = i + 1;
   1907 				if (nextp == adapter->num_rx_desc)
   1908 					nextp = 0;
   1909 			}
   1910 			nbuf = &rxr->rx_buffers[nextp];
   1911 			prefetch(nbuf);
   1912 		}
   1913 		/*
   1914 		** Rather than using the fmp/lmp global pointers
   1915 		** we now keep the head of a packet chain in the
   1916 		** buffer struct and pass this along from one
   1917 		** descriptor to the next, until we get EOP.
   1918 		*/
   1919 		mp->m_len = len;
   1920 		/*
   1921 		** See if there is a stored head
   1922 		** that determines what we are
   1923 		*/
   1924 		sendmp = rbuf->fmp;
   1925 		if (sendmp != NULL) {  /* secondary frag */
   1926 			rbuf->buf = rbuf->fmp = NULL;
   1927 			mp->m_flags &= ~M_PKTHDR;
   1928 			sendmp->m_pkthdr.len += mp->m_len;
   1929 		} else {
   1930 			/*
   1931 			 * Optimize.  This might be a small packet,
   1932 			 * maybe just a TCP ACK.  Do a fast copy that
   1933 			 * is cache aligned into a new mbuf, and
   1934 			 * leave the old mbuf+cluster for re-use.
   1935 			 */
   1936 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1937 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1938 				if (sendmp != NULL) {
   1939 					sendmp->m_data +=
   1940 					    IXGBE_RX_COPY_ALIGN;
   1941 					ixgbe_bcopy(mp->m_data,
   1942 					    sendmp->m_data, len);
   1943 					sendmp->m_len = len;
   1944 					rxr->rx_copies.ev_count++;
   1945 					rbuf->flags |= IXGBE_RX_COPY;
   1946 				}
   1947 			}
   1948 			if (sendmp == NULL) {
   1949 				rbuf->buf = rbuf->fmp = NULL;
   1950 				sendmp = mp;
   1951 			}
   1952 
   1953 			/* first desc of a non-ps chain */
   1954 			sendmp->m_flags |= M_PKTHDR;
   1955 			sendmp->m_pkthdr.len = mp->m_len;
   1956 		}
   1957 		++processed;
   1958 
   1959 		/* Pass the head pointer on */
   1960 		if (eop == 0) {
   1961 			nbuf->fmp = sendmp;
   1962 			sendmp = NULL;
   1963 			mp->m_next = nbuf->buf;
   1964 		} else { /* Sending this frame */
   1965 			m_set_rcvif(sendmp, ifp);
   1966 			ifp->if_ipackets++;
   1967 			rxr->rx_packets.ev_count++;
   1968 			/* capture data for AIM */
   1969 			rxr->bytes += sendmp->m_pkthdr.len;
   1970 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1971 			/* Process vlan info */
   1972 			if ((rxr->vtag_strip) &&
   1973 			    (staterr & IXGBE_RXD_STAT_VP))
   1974 				vtag = le16toh(cur->wb.upper.vlan);
   1975 			if (vtag) {
   1976 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1977 				    printf("%s: could not apply VLAN "
   1978 					"tag", __func__));
   1979 			}
   1980 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1981 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1982 				   &adapter->stats.pf);
   1983 			}
   1984 
   1985 #if 0 /* FreeBSD */
   1986                         /*
   1987                          * In case of multiqueue, we have RXCSUM.PCSD bit set
   1988                          * and never cleared. This means we have RSS hash
   1989                          * available to be used.
   1990                          */
   1991                         if (adapter->num_queues > 1) {
   1992                                 sendmp->m_pkthdr.flowid =
   1993                                     le32toh(cur->wb.lower.hi_dword.rss);
   1994                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1995                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
   1996                                         M_HASHTYPE_SET(sendmp,
   1997                                             M_HASHTYPE_RSS_IPV4);
   1998                                         break;
   1999                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2000                                         M_HASHTYPE_SET(sendmp,
   2001                                             M_HASHTYPE_RSS_TCP_IPV4);
   2002                                         break;
   2003                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
   2004                                         M_HASHTYPE_SET(sendmp,
   2005                                             M_HASHTYPE_RSS_IPV6);
   2006                                         break;
   2007                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2008                                         M_HASHTYPE_SET(sendmp,
   2009                                             M_HASHTYPE_RSS_TCP_IPV6);
   2010                                         break;
   2011                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2012                                         M_HASHTYPE_SET(sendmp,
   2013                                             M_HASHTYPE_RSS_IPV6_EX);
   2014                                         break;
   2015                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2016                                         M_HASHTYPE_SET(sendmp,
   2017                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
   2018                                         break;
   2019 #if __FreeBSD_version > 1100000
   2020                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2021                                         M_HASHTYPE_SET(sendmp,
   2022                                             M_HASHTYPE_RSS_UDP_IPV4);
   2023                                         break;
   2024                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2025                                         M_HASHTYPE_SET(sendmp,
   2026                                             M_HASHTYPE_RSS_UDP_IPV6);
   2027                                         break;
   2028                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2029                                         M_HASHTYPE_SET(sendmp,
   2030                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
   2031                                         break;
   2032 #endif
   2033                                     default:
   2034                                         M_HASHTYPE_SET(sendmp,
   2035                                             M_HASHTYPE_OPAQUE_HASH);
   2036                                 }
   2037                         } else {
   2038                                 sendmp->m_pkthdr.flowid = que->msix;
   2039 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2040 			}
   2041 #endif
   2042 		}
   2043 next_desc:
   2044 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2045 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2046 
   2047 		/* Advance our pointers to the next descriptor. */
   2048 		if (++i == rxr->num_desc)
   2049 			i = 0;
   2050 
   2051 		/* Now send to the stack or do LRO */
   2052 		if (sendmp != NULL) {
   2053 			rxr->next_to_check = i;
   2054 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2055 			i = rxr->next_to_check;
   2056 		}
   2057 
   2058                /* Every 8 descriptors we go to refresh mbufs */
   2059 		if (processed == 8) {
   2060 			ixgbe_refresh_mbufs(rxr, i);
   2061 			processed = 0;
   2062 		}
   2063 	}
   2064 
   2065 	/* Refresh any remaining buf structs */
   2066 	if (ixgbe_rx_unrefreshed(rxr))
   2067 		ixgbe_refresh_mbufs(rxr, i);
   2068 
   2069 	rxr->next_to_check = i;
   2070 
   2071 #ifdef LRO
   2072 	/*
   2073 	 * Flush any outstanding LRO work
   2074 	 */
   2075 	tcp_lro_flush_all(lro);
   2076 #endif /* LRO */
   2077 
   2078 	IXGBE_RX_UNLOCK(rxr);
   2079 
   2080 	/*
   2081 	** Still have cleaning to do?
   2082 	*/
   2083 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2084 		return true;
   2085 	else
   2086 		return false;
   2087 }
   2088 
   2089 
   2090 /*********************************************************************
   2091  *
   2092  *  Verify that the hardware indicated that the checksum is valid.
   2093  *  Inform the stack about the status of checksum so that stack
   2094  *  doesn't spend time verifying the checksum.
   2095  *
   2096  *********************************************************************/
   2097 static void
   2098 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2099     struct ixgbe_hw_stats *stats)
   2100 {
   2101 	u16	status = (u16) staterr;
   2102 	u8	errors = (u8) (staterr >> 24);
   2103 #if 0
   2104 	bool	sctp = false;
   2105 
   2106 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2107 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2108 		sctp = true;
   2109 #endif
   2110 
   2111 	/* IPv4 checksum */
   2112 	if (status & IXGBE_RXD_STAT_IPCS) {
   2113 		stats->ipcs.ev_count++;
   2114 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2115 			/* IP Checksum Good */
   2116 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2117 		} else {
   2118 			stats->ipcs_bad.ev_count++;
   2119 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2120 		}
   2121 	}
   2122 	/* TCP/UDP/SCTP checksum */
   2123 	if (status & IXGBE_RXD_STAT_L4CS) {
   2124 		stats->l4cs.ev_count++;
   2125 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2126 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2127 			mp->m_pkthdr.csum_flags |= type;
   2128 		} else {
   2129 			stats->l4cs_bad.ev_count++;
   2130 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2131 		}
   2132 	}
   2133 }
   2134 
   2135 
   2136 /********************************************************************
   2137  * Manage DMA'able memory.
   2138  *******************************************************************/
   2139 
   2140 int
   2141 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2142 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2143 {
   2144 	device_t dev = adapter->dev;
   2145 	int             r, rsegs;
   2146 
   2147 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2148 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2149 			       size,	/* maxsize */
   2150 			       1,	/* nsegments */
   2151 			       size,	/* maxsegsize */
   2152 			       BUS_DMA_ALLOCNOW,	/* flags */
   2153 			       &dma->dma_tag);
   2154 	if (r != 0) {
   2155 		aprint_error_dev(dev,
   2156 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2157 		goto fail_0;
   2158 	}
   2159 
   2160 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2161 		size,
   2162 		dma->dma_tag->dt_alignment,
   2163 		dma->dma_tag->dt_boundary,
   2164 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2165 	if (r != 0) {
   2166 		aprint_error_dev(dev,
   2167 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2168 		goto fail_1;
   2169 	}
   2170 
   2171 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2172 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2173 	if (r != 0) {
   2174 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2175 		    __func__, r);
   2176 		goto fail_2;
   2177 	}
   2178 
   2179 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2180 	if (r != 0) {
   2181 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2182 		    __func__, r);
   2183 		goto fail_3;
   2184 	}
   2185 
   2186 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2187 			    size,
   2188 			    NULL,
   2189 			    mapflags | BUS_DMA_NOWAIT);
   2190 	if (r != 0) {
   2191 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2192 		    __func__, r);
   2193 		goto fail_4;
   2194 	}
   2195 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2196 	dma->dma_size = size;
   2197 	return 0;
   2198 fail_4:
   2199 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2200 fail_3:
   2201 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2202 fail_2:
   2203 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2204 fail_1:
   2205 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2206 fail_0:
   2207 	return r;
   2208 }
   2209 
   2210 void
   2211 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2212 {
   2213 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2214 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2215 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2216 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2217 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2218 }
   2219 
   2220 
   2221 /*********************************************************************
   2222  *
   2223  *  Allocate memory for the transmit and receive rings, and then
   2224  *  the descriptors associated with each, called only once at attach.
   2225  *
   2226  **********************************************************************/
   2227 int
   2228 ixgbe_allocate_queues(struct adapter *adapter)
   2229 {
   2230 	device_t	dev = adapter->dev;
   2231 	struct ix_queue	*que;
   2232 	struct tx_ring	*txr;
   2233 	struct rx_ring	*rxr;
   2234 	int rsize, tsize, error = IXGBE_SUCCESS;
   2235 	int txconf = 0, rxconf = 0;
   2236 #ifdef PCI_IOV
   2237 	enum ixgbe_iov_mode iov_mode;
   2238 #endif
   2239 
   2240         /* First allocate the top level queue structs */
   2241         if (!(adapter->queues =
   2242             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2243             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2244                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2245                 error = ENOMEM;
   2246                 goto fail;
   2247         }
   2248 
   2249 	/* First allocate the TX ring struct memory */
   2250 	if (!(adapter->tx_rings =
   2251 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2252 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2253 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2254 		error = ENOMEM;
   2255 		goto tx_fail;
   2256 	}
   2257 
   2258 	/* Next allocate the RX */
   2259 	if (!(adapter->rx_rings =
   2260 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2261 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2262 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2263 		error = ENOMEM;
   2264 		goto rx_fail;
   2265 	}
   2266 
   2267 	/* For the ring itself */
   2268 	tsize = roundup2(adapter->num_tx_desc *
   2269 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2270 
   2271 #ifdef PCI_IOV
   2272 	iov_mode = ixgbe_get_iov_mode(adapter);
   2273 	adapter->pool = ixgbe_max_vfs(iov_mode);
   2274 #else
   2275 	adapter->pool = 0;
   2276 #endif
   2277 	/*
   2278 	 * Now set up the TX queues, txconf is needed to handle the
   2279 	 * possibility that things fail midcourse and we need to
   2280 	 * undo memory gracefully
   2281 	 */
   2282 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2283 		/* Set up some basics */
   2284 		txr = &adapter->tx_rings[i];
   2285 		txr->adapter = adapter;
   2286 #ifdef PCI_IOV
   2287 		txr->me = ixgbe_pf_que_index(iov_mode, i);
   2288 #else
   2289 		txr->me = i;
   2290 #endif
   2291 		txr->num_desc = adapter->num_tx_desc;
   2292 
   2293 		/* Initialize the TX side lock */
   2294 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2295 		    device_xname(dev), txr->me);
   2296 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2297 
   2298 		if (ixgbe_dma_malloc(adapter, tsize,
   2299 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2300 			aprint_error_dev(dev,
   2301 			    "Unable to allocate TX Descriptor memory\n");
   2302 			error = ENOMEM;
   2303 			goto err_tx_desc;
   2304 		}
   2305 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2306 		bzero((void *)txr->tx_base, tsize);
   2307 
   2308         	/* Now allocate transmit buffers for the ring */
   2309         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2310 			aprint_error_dev(dev,
   2311 			    "Critical Failure setting up transmit buffers\n");
   2312 			error = ENOMEM;
   2313 			goto err_tx_desc;
   2314         	}
   2315 #ifndef IXGBE_LEGACY_TX
   2316 		/* Allocate a buf ring */
   2317 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   2318 		    M_WAITOK, &txr->tx_mtx);
   2319 		if (txr->br == NULL) {
   2320 			aprint_error_dev(dev,
   2321 			    "Critical Failure setting up buf ring\n");
   2322 			error = ENOMEM;
   2323 			goto err_tx_desc;
   2324         	}
   2325 #endif
   2326 	}
   2327 
   2328 	/*
   2329 	 * Next the RX queues...
   2330 	 */
   2331 	rsize = roundup2(adapter->num_rx_desc *
   2332 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2333 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2334 		rxr = &adapter->rx_rings[i];
   2335 		/* Set up some basics */
   2336 		rxr->adapter = adapter;
   2337 #ifdef PCI_IOV
   2338 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
   2339 #else
   2340 		rxr->me = i;
   2341 #endif
   2342 		rxr->num_desc = adapter->num_rx_desc;
   2343 
   2344 		/* Initialize the RX side lock */
   2345 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2346 		    device_xname(dev), rxr->me);
   2347 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2348 
   2349 		if (ixgbe_dma_malloc(adapter, rsize,
   2350 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2351 			aprint_error_dev(dev,
   2352 			    "Unable to allocate RxDescriptor memory\n");
   2353 			error = ENOMEM;
   2354 			goto err_rx_desc;
   2355 		}
   2356 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2357 		bzero((void *)rxr->rx_base, rsize);
   2358 
   2359         	/* Allocate receive buffers for the ring*/
   2360 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2361 			aprint_error_dev(dev,
   2362 			    "Critical Failure setting up receive buffers\n");
   2363 			error = ENOMEM;
   2364 			goto err_rx_desc;
   2365 		}
   2366 	}
   2367 
   2368 	/*
   2369 	** Finally set up the queue holding structs
   2370 	*/
   2371 	for (int i = 0; i < adapter->num_queues; i++) {
   2372 		que = &adapter->queues[i];
   2373 		que->adapter = adapter;
   2374 		que->me = i;
   2375 		que->txr = &adapter->tx_rings[i];
   2376 		que->rxr = &adapter->rx_rings[i];
   2377 	}
   2378 
   2379 	return (0);
   2380 
   2381 err_rx_desc:
   2382 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2383 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2384 err_tx_desc:
   2385 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2386 		ixgbe_dma_free(adapter, &txr->txdma);
   2387 	free(adapter->rx_rings, M_DEVBUF);
   2388 rx_fail:
   2389 	free(adapter->tx_rings, M_DEVBUF);
   2390 tx_fail:
   2391 	free(adapter->queues, M_DEVBUF);
   2392 fail:
   2393 	return (error);
   2394 }
   2395 
   2396