Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.10
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
     62 /*$NetBSD: ix_txrx.c,v 1.10 2016/12/05 08:50:29 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 
     69 #ifdef DEV_NETMAP
     70 #include <net/netmap.h>
     71 #include <sys/selinfo.h>
     72 #include <dev/netmap/netmap_kern.h>
     73 
     74 extern int ix_crcstrip;
     75 #endif
     76 
     77 /*
     78 ** HW RSC control:
     79 **  this feature only works with
     80 **  IPv4, and only on 82599 and later.
     81 **  Also this will cause IP forwarding to
     82 **  fail and that can't be controlled by
     83 **  the stack as LRO can. For all these
     84 **  reasons I've deemed it best to leave
     85 **  this off and not bother with a tuneable
     86 **  interface, this would need to be compiled
     87 **  to enable.
     88 */
     89 static bool ixgbe_rsc_enable = FALSE;
     90 
     91 #ifdef IXGBE_FDIR
     92 /*
     93 ** For Flow Director: this is the
     94 ** number of TX packets we sample
     95 ** for the filter pool, this means
     96 ** every 20th packet will be probed.
     97 **
     98 ** This feature can be disabled by
     99 ** setting this to 0.
    100 */
    101 static int atr_sample_rate = 20;
    102 #endif
    103 
    104 /*********************************************************************
    105  *  Local Function prototypes
    106  *********************************************************************/
    107 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    108 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    109 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    110 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    111 
    112 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    113 		    struct ixgbe_hw_stats *);
    114 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    116 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    117 		    struct mbuf *, u32 *, u32 *);
    118 static int	ixgbe_tso_setup(struct tx_ring *,
    119 		    struct mbuf *, u32 *, u32 *);
    120 #ifdef IXGBE_FDIR
    121 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    122 #endif
    123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    125 		    struct mbuf *, u32);
    126 
    127 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    128 
    129 #ifdef IXGBE_LEGACY_TX
    130 /*********************************************************************
    131  *  Transmit entry point
    132  *
    133  *  ixgbe_start is called by the stack to initiate a transmit.
    134  *  The driver will remain in this routine as long as there are
    135  *  packets to transmit and transmit resources are available.
    136  *  In case resources are not available stack is notified and
    137  *  the packet is requeued.
    138  **********************************************************************/
    139 
    140 void
    141 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    142 {
    143 	int rc;
    144 	struct mbuf    *m_head;
    145 	struct adapter *adapter = txr->adapter;
    146 
    147 	IXGBE_TX_LOCK_ASSERT(txr);
    148 
    149 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    150 		return;
    151 	if (!adapter->link_active)
    152 		return;
    153 
    154 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    155 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    156 			break;
    157 
    158 		IFQ_POLL(&ifp->if_snd, m_head);
    159 		if (m_head == NULL)
    160 			break;
    161 
    162 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    163 			break;
    164 		}
    165 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    166 		if (rc == EFBIG) {
    167 			struct mbuf *mtmp;
    168 
    169 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    170 				m_head = mtmp;
    171 				rc = ixgbe_xmit(txr, m_head);
    172 				if (rc != 0)
    173 					adapter->efbig2_tx_dma_setup.ev_count++;
    174 			} else
    175 				adapter->m_defrag_failed.ev_count++;
    176 		}
    177 		if (rc != 0) {
    178 			m_freem(m_head);
    179 			continue;
    180 		}
    181 
    182 		/* Send a copy of the frame to the BPF listener */
    183 		bpf_mtap(ifp, m_head);
    184 	}
    185 	return;
    186 }
    187 
    188 /*
    189  * Legacy TX start - called by the stack, this
    190  * always uses the first tx ring, and should
    191  * not be used with multiqueue tx enabled.
    192  */
    193 void
    194 ixgbe_start(struct ifnet *ifp)
    195 {
    196 	struct adapter *adapter = ifp->if_softc;
    197 	struct tx_ring	*txr = adapter->tx_rings;
    198 
    199 	if (ifp->if_flags & IFF_RUNNING) {
    200 		IXGBE_TX_LOCK(txr);
    201 		ixgbe_start_locked(txr, ifp);
    202 		IXGBE_TX_UNLOCK(txr);
    203 	}
    204 	return;
    205 }
    206 
    207 #else /* ! IXGBE_LEGACY_TX */
    208 
    209 /*
    210 ** Multiqueue Transmit Entry Point
    211 ** (if_transmit function)
    212 */
    213 int
    214 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    215 {
    216 	struct adapter	*adapter = ifp->if_softc;
    217 	struct ix_queue	*que;
    218 	struct tx_ring	*txr;
    219 	int 		i, err = 0;
    220 #ifdef	RSS
    221 	uint32_t bucket_id;
    222 #endif
    223 
    224 	/*
    225 	 * When doing RSS, map it to the same outbound queue
    226 	 * as the incoming flow would be mapped to.
    227 	 *
    228 	 * If everything is setup correctly, it should be the
    229 	 * same bucket that the current CPU we're on is.
    230 	 */
    231 #if __FreeBSD_version < 1100054
    232 	if (m->m_flags & M_FLOWID) {
    233 #else
    234 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    235 #endif
    236 #ifdef	RSS
    237 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    238 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
    239 			/* TODO: spit out something if bucket_id > num_queues? */
    240 			i = bucket_id % adapter->num_queues;
    241 #ifdef IXGBE_DEBUG
    242 			if (bucket_id > adapter->num_queues)
    243 				if_printf(ifp, "bucket_id (%d) > num_queues "
    244 				    "(%d)\n", bucket_id, adapter->num_queues);
    245 #endif
    246 		} else
    247 #endif
    248 			i = m->m_pkthdr.flowid % adapter->num_queues;
    249 	} else
    250 		i = curcpu % adapter->num_queues;
    251 
    252 	/* Check for a hung queue and pick alternative */
    253 	if (((1 << i) & adapter->active_queues) == 0)
    254 		i = ffsl(adapter->active_queues);
    255 
    256 	txr = &adapter->tx_rings[i];
    257 	que = &adapter->queues[i];
    258 
    259 	err = drbr_enqueue(ifp, txr->br, m);
    260 	if (err)
    261 		return (err);
    262 	if (IXGBE_TX_TRYLOCK(txr)) {
    263 		ixgbe_mq_start_locked(ifp, txr);
    264 		IXGBE_TX_UNLOCK(txr);
    265 	} else
    266 		softint_schedule(txr->txq_si);
    267 
    268 	return (0);
    269 }
    270 
    271 int
    272 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    273 {
    274 	struct adapter  *adapter = txr->adapter;
    275 	struct mbuf     *next;
    276 	int             enqueued = 0, err = 0;
    277 
    278 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    279 	    adapter->link_active == 0)
    280 		return (ENETDOWN);
    281 
    282 	/* Process the queue */
    283 #if __FreeBSD_version < 901504
    284 	next = drbr_dequeue(ifp, txr->br);
    285 	while (next != NULL) {
    286 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    287 			if (next != NULL)
    288 				err = drbr_enqueue(ifp, txr->br, next);
    289 #else
    290 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
    291 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    292 			if (next == NULL) {
    293 				drbr_advance(ifp, txr->br);
    294 			} else {
    295 				drbr_putback(ifp, txr->br, next);
    296 			}
    297 #endif
    298 			break;
    299 		}
    300 #if __FreeBSD_version >= 901504
    301 		drbr_advance(ifp, txr->br);
    302 #endif
    303 		enqueued++;
    304 #if 0 // this is VF-only
    305 #if __FreeBSD_version >= 1100036
    306 		/*
    307 		 * Since we're looking at the tx ring, we can check
    308 		 * to see if we're a VF by examing our tail register
    309 		 * address.
    310 		 */
    311 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    312 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    313 #endif
    314 #endif
    315 		/* Send a copy of the frame to the BPF listener */
    316 		bpf_mtap(ifp, next);
    317 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    318 			break;
    319 #if __FreeBSD_version < 901504
    320 		next = drbr_dequeue(ifp, txr->br);
    321 #endif
    322 	}
    323 
    324 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    325 		ixgbe_txeof(txr);
    326 
    327 	return (err);
    328 }
    329 
    330 /*
    331  * Called from a taskqueue to drain queued transmit packets.
    332  */
    333 void
    334 ixgbe_deferred_mq_start(void *arg, int pending)
    335 {
    336 	struct tx_ring *txr = arg;
    337 	struct adapter *adapter = txr->adapter;
    338 	struct ifnet *ifp = adapter->ifp;
    339 
    340 	IXGBE_TX_LOCK(txr);
    341 	if (!drbr_empty(ifp, txr->br))
    342 		ixgbe_mq_start_locked(ifp, txr);
    343 	IXGBE_TX_UNLOCK(txr);
    344 }
    345 
    346 /*
    347  * Flush all ring buffers
    348  */
    349 void
    350 ixgbe_qflush(struct ifnet *ifp)
    351 {
    352 	struct adapter	*adapter = ifp->if_softc;
    353 	struct tx_ring	*txr = adapter->tx_rings;
    354 	struct mbuf	*m;
    355 
    356 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    357 		IXGBE_TX_LOCK(txr);
    358 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
    359 			m_freem(m);
    360 		IXGBE_TX_UNLOCK(txr);
    361 	}
    362 	if_qflush(ifp);
    363 }
    364 #endif /* IXGBE_LEGACY_TX */
    365 
    366 
    367 /*********************************************************************
    368  *
    369  *  This routine maps the mbufs to tx descriptors, allowing the
    370  *  TX engine to transmit the packets.
    371  *  	- return 0 on success, positive on failure
    372  *
    373  **********************************************************************/
    374 
    375 static int
    376 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    377 {
    378 	struct m_tag *mtag;
    379 	struct adapter  *adapter = txr->adapter;
    380 	struct ethercom *ec = &adapter->osdep.ec;
    381 	u32		olinfo_status = 0, cmd_type_len;
    382 	int             i, j, error;
    383 	int		first;
    384 	bus_dmamap_t	map;
    385 	struct ixgbe_tx_buf *txbuf;
    386 	union ixgbe_adv_tx_desc *txd = NULL;
    387 
    388 	/* Basic descriptor defines */
    389         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    390 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    391 
    392 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    393         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    394 
    395         /*
    396          * Important to capture the first descriptor
    397          * used because it will contain the index of
    398          * the one we tell the hardware to report back
    399          */
    400         first = txr->next_avail_desc;
    401 	txbuf = &txr->tx_buffers[first];
    402 	map = txbuf->map;
    403 
    404 	/*
    405 	 * Map the packet for DMA.
    406 	 */
    407 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    408 	    m_head, BUS_DMA_NOWAIT);
    409 
    410 	if (__predict_false(error)) {
    411 
    412 		switch (error) {
    413 		case EAGAIN:
    414 			adapter->eagain_tx_dma_setup.ev_count++;
    415 			return EAGAIN;
    416 		case ENOMEM:
    417 			adapter->enomem_tx_dma_setup.ev_count++;
    418 			return EAGAIN;
    419 		case EFBIG:
    420 			/*
    421 			 * XXX Try it again?
    422 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
    423 			 */
    424 			adapter->efbig_tx_dma_setup.ev_count++;
    425 			return error;
    426 		case EINVAL:
    427 			adapter->einval_tx_dma_setup.ev_count++;
    428 			return error;
    429 		default:
    430 			adapter->other_tx_dma_setup.ev_count++;
    431 			return error;
    432 		}
    433 	}
    434 
    435 	/* Make certain there are enough descriptors */
    436 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    437 		txr->no_desc_avail.ev_count++;
    438 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    439 		return EAGAIN;
    440 	}
    441 
    442 	/*
    443 	 * Set up the appropriate offload context
    444 	 * this will consume the first descriptor
    445 	 */
    446 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    447 	if (__predict_false(error)) {
    448 		return (error);
    449 	}
    450 
    451 #ifdef IXGBE_FDIR
    452 	/* Do the flow director magic */
    453 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    454 		++txr->atr_count;
    455 		if (txr->atr_count >= atr_sample_rate) {
    456 			ixgbe_atr(txr, m_head);
    457 			txr->atr_count = 0;
    458 		}
    459 	}
    460 #endif
    461 
    462 	olinfo_status |= IXGBE_ADVTXD_CC;
    463 	i = txr->next_avail_desc;
    464 	for (j = 0; j < map->dm_nsegs; j++) {
    465 		bus_size_t seglen;
    466 		bus_addr_t segaddr;
    467 
    468 		txbuf = &txr->tx_buffers[i];
    469 		txd = &txr->tx_base[i];
    470 		seglen = map->dm_segs[j].ds_len;
    471 		segaddr = htole64(map->dm_segs[j].ds_addr);
    472 
    473 		txd->read.buffer_addr = segaddr;
    474 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    475 		    cmd_type_len |seglen);
    476 		txd->read.olinfo_status = htole32(olinfo_status);
    477 
    478 		if (++i == txr->num_desc)
    479 			i = 0;
    480 	}
    481 
    482 	txd->read.cmd_type_len |=
    483 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    484 	txr->tx_avail -= map->dm_nsegs;
    485 	txr->next_avail_desc = i;
    486 
    487 	txbuf->m_head = m_head;
    488 	/*
    489 	 * Here we swap the map so the last descriptor,
    490 	 * which gets the completion interrupt has the
    491 	 * real map, and the first descriptor gets the
    492 	 * unused map from this descriptor.
    493 	 */
    494 	txr->tx_buffers[first].map = txbuf->map;
    495 	txbuf->map = map;
    496 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    497 	    BUS_DMASYNC_PREWRITE);
    498 
    499         /* Set the EOP descriptor that will be marked done */
    500         txbuf = &txr->tx_buffers[first];
    501 	txbuf->eop = txd;
    502 
    503         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    504 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    505 	/*
    506 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    507 	 * hardware that this frame is available to transmit.
    508 	 */
    509 	++txr->total_packets.ev_count;
    510 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    511 
    512 	/* Mark queue as having work */
    513 	if (txr->busy == 0)
    514 		txr->busy = 1;
    515 
    516 	return 0;
    517 }
    518 
    519 /*********************************************************************
    520  *
    521  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    522  *  the information needed to transmit a packet on the wire. This is
    523  *  called only once at attach, setup is done every reset.
    524  *
    525  **********************************************************************/
    526 int
    527 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    528 {
    529 	struct adapter *adapter = txr->adapter;
    530 	device_t dev = adapter->dev;
    531 	struct ixgbe_tx_buf *txbuf;
    532 	int error, i;
    533 
    534 	/*
    535 	 * Setup DMA descriptor areas.
    536 	 */
    537 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    538 			       1, 0,		/* alignment, bounds */
    539 			       IXGBE_TSO_SIZE,		/* maxsize */
    540 			       adapter->num_segs,	/* nsegments */
    541 			       PAGE_SIZE,		/* maxsegsize */
    542 			       0,			/* flags */
    543 			       &txr->txtag))) {
    544 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    545 		goto fail;
    546 	}
    547 
    548 	if (!(txr->tx_buffers =
    549 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    550 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    551 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    552 		error = ENOMEM;
    553 		goto fail;
    554 	}
    555 
    556         /* Create the descriptor buffer dma maps */
    557 	txbuf = txr->tx_buffers;
    558 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    559 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    560 		if (error != 0) {
    561 			aprint_error_dev(dev,
    562 			    "Unable to create TX DMA map (%d)\n", error);
    563 			goto fail;
    564 		}
    565 	}
    566 
    567 	return 0;
    568 fail:
    569 	/* We free all, it handles case where we are in the middle */
    570 	ixgbe_free_transmit_structures(adapter);
    571 	return (error);
    572 }
    573 
    574 /*********************************************************************
    575  *
    576  *  Initialize a transmit ring.
    577  *
    578  **********************************************************************/
    579 static void
    580 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    581 {
    582 	struct adapter *adapter = txr->adapter;
    583 	struct ixgbe_tx_buf *txbuf;
    584 #ifdef DEV_NETMAP
    585 	struct netmap_adapter *na = NA(adapter->ifp);
    586 	struct netmap_slot *slot;
    587 #endif /* DEV_NETMAP */
    588 
    589 	/* Clear the old ring contents */
    590 	IXGBE_TX_LOCK(txr);
    591 #ifdef DEV_NETMAP
    592 	/*
    593 	 * (under lock): if in netmap mode, do some consistency
    594 	 * checks and set slot to entry 0 of the netmap ring.
    595 	 */
    596 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    597 #endif /* DEV_NETMAP */
    598 	bzero((void *)txr->tx_base,
    599 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    600 	/* Reset indices */
    601 	txr->next_avail_desc = 0;
    602 	txr->next_to_clean = 0;
    603 
    604 	/* Free any existing tx buffers. */
    605         txbuf = txr->tx_buffers;
    606 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    607 		if (txbuf->m_head != NULL) {
    608 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    609 			    0, txbuf->m_head->m_pkthdr.len,
    610 			    BUS_DMASYNC_POSTWRITE);
    611 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    612 			m_freem(txbuf->m_head);
    613 			txbuf->m_head = NULL;
    614 		}
    615 #ifdef DEV_NETMAP
    616 		/*
    617 		 * In netmap mode, set the map for the packet buffer.
    618 		 * NOTE: Some drivers (not this one) also need to set
    619 		 * the physical buffer address in the NIC ring.
    620 		 * Slots in the netmap ring (indexed by "si") are
    621 		 * kring->nkr_hwofs positions "ahead" wrt the
    622 		 * corresponding slot in the NIC ring. In some drivers
    623 		 * (not here) nkr_hwofs can be negative. Function
    624 		 * netmap_idx_n2k() handles wraparounds properly.
    625 		 */
    626 		if (slot) {
    627 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    628 			netmap_load_map(na, txr->txtag,
    629 			    txbuf->map, NMB(na, slot + si));
    630 		}
    631 #endif /* DEV_NETMAP */
    632 		/* Clear the EOP descriptor pointer */
    633 		txbuf->eop = NULL;
    634         }
    635 
    636 #ifdef IXGBE_FDIR
    637 	/* Set the rate at which we sample packets */
    638 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    639 		txr->atr_sample = atr_sample_rate;
    640 #endif
    641 
    642 	/* Set number of descriptors available */
    643 	txr->tx_avail = adapter->num_tx_desc;
    644 
    645 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    646 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    647 	IXGBE_TX_UNLOCK(txr);
    648 }
    649 
    650 /*********************************************************************
    651  *
    652  *  Initialize all transmit rings.
    653  *
    654  **********************************************************************/
    655 int
    656 ixgbe_setup_transmit_structures(struct adapter *adapter)
    657 {
    658 	struct tx_ring *txr = adapter->tx_rings;
    659 
    660 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    661 		ixgbe_setup_transmit_ring(txr);
    662 
    663 	return (0);
    664 }
    665 
    666 /*********************************************************************
    667  *
    668  *  Free all transmit rings.
    669  *
    670  **********************************************************************/
    671 void
    672 ixgbe_free_transmit_structures(struct adapter *adapter)
    673 {
    674 	struct tx_ring *txr = adapter->tx_rings;
    675 
    676 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    677 		ixgbe_free_transmit_buffers(txr);
    678 		ixgbe_dma_free(adapter, &txr->txdma);
    679 		IXGBE_TX_LOCK_DESTROY(txr);
    680 	}
    681 	free(adapter->tx_rings, M_DEVBUF);
    682 }
    683 
    684 /*********************************************************************
    685  *
    686  *  Free transmit ring related data structures.
    687  *
    688  **********************************************************************/
    689 static void
    690 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    691 {
    692 	struct adapter *adapter = txr->adapter;
    693 	struct ixgbe_tx_buf *tx_buffer;
    694 	int             i;
    695 
    696 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
    697 
    698 	if (txr->tx_buffers == NULL)
    699 		return;
    700 
    701 	tx_buffer = txr->tx_buffers;
    702 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    703 		if (tx_buffer->m_head != NULL) {
    704 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    705 			    0, tx_buffer->m_head->m_pkthdr.len,
    706 			    BUS_DMASYNC_POSTWRITE);
    707 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    708 			m_freem(tx_buffer->m_head);
    709 			tx_buffer->m_head = NULL;
    710 			if (tx_buffer->map != NULL) {
    711 				ixgbe_dmamap_destroy(txr->txtag,
    712 				    tx_buffer->map);
    713 				tx_buffer->map = NULL;
    714 			}
    715 		} else if (tx_buffer->map != NULL) {
    716 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    717 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    718 			tx_buffer->map = NULL;
    719 		}
    720 	}
    721 #ifndef IXGBE_LEGACY_TX
    722 	if (txr->br != NULL)
    723 		buf_ring_free(txr->br, M_DEVBUF);
    724 #endif
    725 	if (txr->tx_buffers != NULL) {
    726 		free(txr->tx_buffers, M_DEVBUF);
    727 		txr->tx_buffers = NULL;
    728 	}
    729 	if (txr->txtag != NULL) {
    730 		ixgbe_dma_tag_destroy(txr->txtag);
    731 		txr->txtag = NULL;
    732 	}
    733 	return;
    734 }
    735 
    736 /*********************************************************************
    737  *
    738  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    739  *
    740  **********************************************************************/
    741 
    742 static int
    743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    744     u32 *cmd_type_len, u32 *olinfo_status)
    745 {
    746 	struct adapter *adapter = txr->adapter;
    747 	struct ethercom *ec = &adapter->osdep.ec;
    748 	struct m_tag *mtag;
    749 	struct ixgbe_adv_tx_context_desc *TXD;
    750 	struct ether_vlan_header *eh;
    751 #ifdef INET
    752 	struct ip *ip;
    753 #endif
    754 #ifdef INET6
    755 	struct ip6_hdr *ip6;
    756 #endif
    757 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    758 	int	ehdrlen, ip_hlen = 0;
    759 	u16	etype;
    760 	u8	ipproto = 0;
    761 	int	offload = TRUE;
    762 	int	ctxd = txr->next_avail_desc;
    763 	u16	vtag = 0;
    764 	char	*l3d;
    765 
    766 
    767 	/* First check if TSO is to be used */
    768 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
    769 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
    770 
    771 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    772 		offload = FALSE;
    773 
    774 	/* Indicate the whole packet as payload when not doing TSO */
    775        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    776 
    777 	/* Now ready a context descriptor */
    778 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    779 
    780 	/*
    781 	** In advanced descriptors the vlan tag must
    782 	** be placed into the context descriptor. Hence
    783 	** we need to make one even if not doing offloads.
    784 	*/
    785 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    786 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    787 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    788 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    789 		return (0);
    790 
    791 	/*
    792 	 * Determine where frame payload starts.
    793 	 * Jump over vlan headers if already present,
    794 	 * helpful for QinQ too.
    795 	 */
    796 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    797 	eh = mtod(mp, struct ether_vlan_header *);
    798 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    799 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    800 		etype = ntohs(eh->evl_proto);
    801 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    802 	} else {
    803 		etype = ntohs(eh->evl_encap_proto);
    804 		ehdrlen = ETHER_HDR_LEN;
    805 	}
    806 
    807 	/* Set the ether header length */
    808 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    809 
    810 	if (offload == FALSE)
    811 		goto no_offloads;
    812 
    813 	/*
    814 	 * If the first mbuf only includes the ethernet header, jump to the next one
    815 	 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
    816 	 * XXX: And assumes the entire IP header is contained in one mbuf
    817 	 */
    818 	if (mp->m_len == ehdrlen && mp->m_next)
    819 		l3d = mtod(mp->m_next, char *);
    820 	else
    821 		l3d = mtod(mp, char *) + ehdrlen;
    822 
    823 	switch (etype) {
    824 #ifdef INET
    825 	case ETHERTYPE_IP:
    826 		ip = (struct ip *)(l3d);
    827 		ip_hlen = ip->ip_hl << 2;
    828 		ipproto = ip->ip_p;
    829 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    830 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    831 		    ip->ip_sum == 0);
    832 		break;
    833 #endif
    834 #ifdef INET6
    835 	case ETHERTYPE_IPV6:
    836 		ip6 = (struct ip6_hdr *)(l3d);
    837 		ip_hlen = sizeof(struct ip6_hdr);
    838 		ipproto = ip6->ip6_nxt;
    839 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    840 		break;
    841 #endif
    842 	default:
    843 		break;
    844 	}
    845 
    846 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    847 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    848 
    849 	vlan_macip_lens |= ip_hlen;
    850 
    851 	/* No support for offloads for non-L4 next headers */
    852  	switch (ipproto) {
    853  		case IPPROTO_TCP:
    854 			if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
    855 
    856 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    857 			else
    858 				offload = false;
    859 			break;
    860 		case IPPROTO_UDP:
    861 			if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
    862 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    863 			else
    864 				offload = false;
    865 			break;
    866 	}
    867 
    868 	if (offload) /* Insert L4 checksum into data descriptors */
    869 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    870 
    871 no_offloads:
    872 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    873 
    874 	/* Now copy bits into descriptor */
    875 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    876 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    877 	TXD->seqnum_seed = htole32(0);
    878 	TXD->mss_l4len_idx = htole32(0);
    879 
    880 	/* We've consumed the first desc, adjust counters */
    881 	if (++ctxd == txr->num_desc)
    882 		ctxd = 0;
    883 	txr->next_avail_desc = ctxd;
    884 	--txr->tx_avail;
    885 
    886         return 0;
    887 }
    888 
    889 /**********************************************************************
    890  *
    891  *  Setup work for hardware segmentation offload (TSO) on
    892  *  adapters using advanced tx descriptors
    893  *
    894  **********************************************************************/
    895 static int
    896 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    897     u32 *cmd_type_len, u32 *olinfo_status)
    898 {
    899 	struct m_tag *mtag;
    900 	struct adapter *adapter = txr->adapter;
    901 	struct ethercom *ec = &adapter->osdep.ec;
    902 	struct ixgbe_adv_tx_context_desc *TXD;
    903 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    904 	u32 mss_l4len_idx = 0, paylen;
    905 	u16 vtag = 0, eh_type;
    906 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    907 	struct ether_vlan_header *eh;
    908 #ifdef INET6
    909 	struct ip6_hdr *ip6;
    910 #endif
    911 #ifdef INET
    912 	struct ip *ip;
    913 #endif
    914 	struct tcphdr *th;
    915 
    916 	/*
    917 	 * Determine where frame payload starts.
    918 	 * Jump over vlan headers if already present
    919 	 */
    920 	eh = mtod(mp, struct ether_vlan_header *);
    921 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    922 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    923 		eh_type = eh->evl_proto;
    924 	} else {
    925 		ehdrlen = ETHER_HDR_LEN;
    926 		eh_type = eh->evl_encap_proto;
    927 	}
    928 
    929 	switch (ntohs(eh_type)) {
    930 #ifdef INET6
    931 	case ETHERTYPE_IPV6:
    932 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    933 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    934 		if (ip6->ip6_nxt != IPPROTO_TCP)
    935 			return (ENXIO);
    936 		ip_hlen = sizeof(struct ip6_hdr);
    937 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    938 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    939 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    940 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    941 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    942 		break;
    943 #endif
    944 #ifdef INET
    945 	case ETHERTYPE_IP:
    946 		ip = (struct ip *)(mp->m_data + ehdrlen);
    947 		if (ip->ip_p != IPPROTO_TCP)
    948 			return (ENXIO);
    949 		ip->ip_sum = 0;
    950 		ip_hlen = ip->ip_hl << 2;
    951 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    952 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    953 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    954 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    955 		/* Tell transmit desc to also do IPv4 checksum. */
    956 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    957 		break;
    958 #endif
    959 	default:
    960 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    961 		    __func__, ntohs(eh_type));
    962 		break;
    963 	}
    964 
    965 	ctxd = txr->next_avail_desc;
    966 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    967 
    968 	tcp_hlen = th->th_off << 2;
    969 
    970 	/* This is used in the transmit desc in encap */
    971 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    972 
    973 	/* VLAN MACLEN IPLEN */
    974 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    975 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    976                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    977 	}
    978 
    979 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    980 	vlan_macip_lens |= ip_hlen;
    981 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    982 
    983 	/* ADV DTYPE TUCMD */
    984 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    985 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    986 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    987 
    988 	/* MSS L4LEN IDX */
    989 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
    990 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
    991 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
    992 
    993 	TXD->seqnum_seed = htole32(0);
    994 
    995 	if (++ctxd == txr->num_desc)
    996 		ctxd = 0;
    997 
    998 	txr->tx_avail--;
    999 	txr->next_avail_desc = ctxd;
   1000 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1001 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1002 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1003 	++txr->tso_tx.ev_count;
   1004 	return (0);
   1005 }
   1006 
   1007 
   1008 /**********************************************************************
   1009  *
   1010  *  Examine each tx_buffer in the used queue. If the hardware is done
   1011  *  processing the packet then free associated resources. The
   1012  *  tx_buffer is put back on the free queue.
   1013  *
   1014  **********************************************************************/
   1015 void
   1016 ixgbe_txeof(struct tx_ring *txr)
   1017 {
   1018 	struct adapter		*adapter = txr->adapter;
   1019 	struct ifnet		*ifp = adapter->ifp;
   1020 	u32			work, processed = 0;
   1021 	u32			limit = adapter->tx_process_limit;
   1022 	struct ixgbe_tx_buf	*buf;
   1023 	union ixgbe_adv_tx_desc *txd;
   1024 
   1025 	KASSERT(mutex_owned(&txr->tx_mtx));
   1026 
   1027 #ifdef DEV_NETMAP
   1028 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1029 		struct netmap_adapter *na = NA(ifp);
   1030 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1031 		txd = txr->tx_base;
   1032 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1033 		    BUS_DMASYNC_POSTREAD);
   1034 		/*
   1035 		 * In netmap mode, all the work is done in the context
   1036 		 * of the client thread. Interrupt handlers only wake up
   1037 		 * clients, which may be sleeping on individual rings
   1038 		 * or on a global resource for all rings.
   1039 		 * To implement tx interrupt mitigation, we wake up the client
   1040 		 * thread roughly every half ring, even if the NIC interrupts
   1041 		 * more frequently. This is implemented as follows:
   1042 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1043 		 *   the slot that should wake up the thread (nkr_num_slots
   1044 		 *   means the user thread should not be woken up);
   1045 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1046 		 *   or the slot has the DD bit set.
   1047 		 */
   1048 		if (!netmap_mitigate ||
   1049 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1050 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1051 			netmap_tx_irq(ifp, txr->me);
   1052 		}
   1053 		return;
   1054 	}
   1055 #endif /* DEV_NETMAP */
   1056 
   1057 	if (txr->tx_avail == txr->num_desc) {
   1058 		txr->busy = 0;
   1059 		return;
   1060 	}
   1061 
   1062 	/* Get work starting point */
   1063 	work = txr->next_to_clean;
   1064 	buf = &txr->tx_buffers[work];
   1065 	txd = &txr->tx_base[work];
   1066 	work -= txr->num_desc; /* The distance to ring end */
   1067         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1068 	    BUS_DMASYNC_POSTREAD);
   1069 
   1070 	do {
   1071 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1072 		if (eop == NULL) /* No work */
   1073 			break;
   1074 
   1075 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1076 			break;	/* I/O not complete */
   1077 
   1078 		if (buf->m_head) {
   1079 			txr->bytes +=
   1080 			    buf->m_head->m_pkthdr.len;
   1081 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1082 			    buf->map,
   1083 			    0, buf->m_head->m_pkthdr.len,
   1084 			    BUS_DMASYNC_POSTWRITE);
   1085 			ixgbe_dmamap_unload(txr->txtag,
   1086 			    buf->map);
   1087 			m_freem(buf->m_head);
   1088 			buf->m_head = NULL;
   1089 		}
   1090 		buf->eop = NULL;
   1091 		++txr->tx_avail;
   1092 
   1093 		/* We clean the range if multi segment */
   1094 		while (txd != eop) {
   1095 			++txd;
   1096 			++buf;
   1097 			++work;
   1098 			/* wrap the ring? */
   1099 			if (__predict_false(!work)) {
   1100 				work -= txr->num_desc;
   1101 				buf = txr->tx_buffers;
   1102 				txd = txr->tx_base;
   1103 			}
   1104 			if (buf->m_head) {
   1105 				txr->bytes +=
   1106 				    buf->m_head->m_pkthdr.len;
   1107 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1108 				    buf->map,
   1109 				    0, buf->m_head->m_pkthdr.len,
   1110 				    BUS_DMASYNC_POSTWRITE);
   1111 				ixgbe_dmamap_unload(txr->txtag,
   1112 				    buf->map);
   1113 				m_freem(buf->m_head);
   1114 				buf->m_head = NULL;
   1115 			}
   1116 			++txr->tx_avail;
   1117 			buf->eop = NULL;
   1118 
   1119 		}
   1120 		++txr->packets;
   1121 		++processed;
   1122 		++ifp->if_opackets;
   1123 
   1124 		/* Try the next packet */
   1125 		++txd;
   1126 		++buf;
   1127 		++work;
   1128 		/* reset with a wrap */
   1129 		if (__predict_false(!work)) {
   1130 			work -= txr->num_desc;
   1131 			buf = txr->tx_buffers;
   1132 			txd = txr->tx_base;
   1133 		}
   1134 		prefetch(txd);
   1135 	} while (__predict_true(--limit));
   1136 
   1137 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1138 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1139 
   1140 	work += txr->num_desc;
   1141 	txr->next_to_clean = work;
   1142 
   1143 	/*
   1144 	** Queue Hang detection, we know there's
   1145 	** work outstanding or the first return
   1146 	** would have been taken, so increment busy
   1147 	** if nothing managed to get cleaned, then
   1148 	** in local_timer it will be checked and
   1149 	** marked as HUNG if it exceeds a MAX attempt.
   1150 	*/
   1151 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1152 		++txr->busy;
   1153 	/*
   1154 	** If anything gets cleaned we reset state to 1,
   1155 	** note this will turn off HUNG if its set.
   1156 	*/
   1157 	if (processed)
   1158 		txr->busy = 1;
   1159 
   1160 	if (txr->tx_avail == txr->num_desc)
   1161 		txr->busy = 0;
   1162 
   1163 	return;
   1164 }
   1165 
   1166 
   1167 #ifdef IXGBE_FDIR
   1168 /*
   1169 ** This routine parses packet headers so that Flow
   1170 ** Director can make a hashed filter table entry
   1171 ** allowing traffic flows to be identified and kept
   1172 ** on the same cpu.  This would be a performance
   1173 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1174 ** packets.
   1175 */
   1176 static void
   1177 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1178 {
   1179 	struct adapter			*adapter = txr->adapter;
   1180 	struct ix_queue			*que;
   1181 	struct ip			*ip;
   1182 	struct tcphdr			*th;
   1183 	struct udphdr			*uh;
   1184 	struct ether_vlan_header	*eh;
   1185 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1186 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1187 	int  				ehdrlen, ip_hlen;
   1188 	u16				etype;
   1189 
   1190 	eh = mtod(mp, struct ether_vlan_header *);
   1191 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1192 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1193 		etype = eh->evl_proto;
   1194 	} else {
   1195 		ehdrlen = ETHER_HDR_LEN;
   1196 		etype = eh->evl_encap_proto;
   1197 	}
   1198 
   1199 	/* Only handling IPv4 */
   1200 	if (etype != htons(ETHERTYPE_IP))
   1201 		return;
   1202 
   1203 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1204 	ip_hlen = ip->ip_hl << 2;
   1205 
   1206 	/* check if we're UDP or TCP */
   1207 	switch (ip->ip_p) {
   1208 	case IPPROTO_TCP:
   1209 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1210 		/* src and dst are inverted */
   1211 		common.port.dst ^= th->th_sport;
   1212 		common.port.src ^= th->th_dport;
   1213 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1214 		break;
   1215 	case IPPROTO_UDP:
   1216 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1217 		/* src and dst are inverted */
   1218 		common.port.dst ^= uh->uh_sport;
   1219 		common.port.src ^= uh->uh_dport;
   1220 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1221 		break;
   1222 	default:
   1223 		return;
   1224 	}
   1225 
   1226 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1227 	if (mp->m_pkthdr.ether_vtag)
   1228 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1229 	else
   1230 		common.flex_bytes ^= etype;
   1231 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1232 
   1233 	que = &adapter->queues[txr->me];
   1234 	/*
   1235 	** This assumes the Rx queue and Tx
   1236 	** queue are bound to the same CPU
   1237 	*/
   1238 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1239 	    input, common, que->msix);
   1240 }
   1241 #endif /* IXGBE_FDIR */
   1242 
   1243 /*
   1244 ** Used to detect a descriptor that has
   1245 ** been merged by Hardware RSC.
   1246 */
   1247 static inline u32
   1248 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1249 {
   1250 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1251 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1252 }
   1253 
   1254 /*********************************************************************
   1255  *
   1256  *  Initialize Hardware RSC (LRO) feature on 82599
   1257  *  for an RX ring, this is toggled by the LRO capability
   1258  *  even though it is transparent to the stack.
   1259  *
   1260  *  NOTE: since this HW feature only works with IPV4 and
   1261  *        our testing has shown soft LRO to be as effective
   1262  *        I have decided to disable this by default.
   1263  *
   1264  **********************************************************************/
   1265 static void
   1266 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1267 {
   1268 	struct	adapter 	*adapter = rxr->adapter;
   1269 	struct	ixgbe_hw	*hw = &adapter->hw;
   1270 	u32			rscctrl, rdrxctl;
   1271 
   1272 	/* If turning LRO/RSC off we need to disable it */
   1273 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1274 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1275 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1276 		return;
   1277 	}
   1278 
   1279 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1280 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1281 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1282 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1283 #endif /* DEV_NETMAP */
   1284 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1285 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1286 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1287 
   1288 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1289 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1290 	/*
   1291 	** Limit the total number of descriptors that
   1292 	** can be combined, so it does not exceed 64K
   1293 	*/
   1294 	if (rxr->mbuf_sz == MCLBYTES)
   1295 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1296 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1297 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1298 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1299 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1300 	else  /* Using 16K cluster */
   1301 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1302 
   1303 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1304 
   1305 	/* Enable TCP header recognition */
   1306 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1307 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1308 	    IXGBE_PSRTYPE_TCPHDR));
   1309 
   1310 	/* Disable RSC for ACK packets */
   1311 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1312 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1313 
   1314 	rxr->hw_rsc = TRUE;
   1315 }
   1316 
   1317 /*********************************************************************
   1318  *
   1319  *  Refresh mbuf buffers for RX descriptor rings
   1320  *   - now keeps its own state so discards due to resource
   1321  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1322  *     it just returns, keeping its placeholder, thus it can simply
   1323  *     be recalled to try again.
   1324  *
   1325  **********************************************************************/
   1326 static void
   1327 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1328 {
   1329 	struct adapter		*adapter = rxr->adapter;
   1330 	struct ixgbe_rx_buf	*rxbuf;
   1331 	struct mbuf		*mp;
   1332 	int			i, j, error;
   1333 	bool			refreshed = false;
   1334 
   1335 	i = j = rxr->next_to_refresh;
   1336 	/* Control the loop with one beyond */
   1337 	if (++j == rxr->num_desc)
   1338 		j = 0;
   1339 
   1340 	while (j != limit) {
   1341 		rxbuf = &rxr->rx_buffers[i];
   1342 		if (rxbuf->buf == NULL) {
   1343 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1344 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1345 			if (mp == NULL) {
   1346 				rxr->no_jmbuf.ev_count++;
   1347 				goto update;
   1348 			}
   1349 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1350 				m_adj(mp, ETHER_ALIGN);
   1351 		} else
   1352 			mp = rxbuf->buf;
   1353 
   1354 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1355 
   1356 		/* If we're dealing with an mbuf that was copied rather
   1357 		 * than replaced, there's no need to go through busdma.
   1358 		 */
   1359 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1360 			/* Get the memory mapping */
   1361 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1362 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1363 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1364 			if (error != 0) {
   1365 				printf("Refresh mbufs: payload dmamap load"
   1366 				    " failure - %d\n", error);
   1367 				m_free(mp);
   1368 				rxbuf->buf = NULL;
   1369 				goto update;
   1370 			}
   1371 			rxbuf->buf = mp;
   1372 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1373 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1374 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1375 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1376 		} else {
   1377 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1378 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1379 		}
   1380 
   1381 		refreshed = true;
   1382 		/* Next is precalculated */
   1383 		i = j;
   1384 		rxr->next_to_refresh = i;
   1385 		if (++j == rxr->num_desc)
   1386 			j = 0;
   1387 	}
   1388 update:
   1389 	if (refreshed) /* Update hardware tail index */
   1390 		IXGBE_WRITE_REG(&adapter->hw,
   1391 		    rxr->tail, rxr->next_to_refresh);
   1392 	return;
   1393 }
   1394 
   1395 /*********************************************************************
   1396  *
   1397  *  Allocate memory for rx_buffer structures. Since we use one
   1398  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1399  *  that we'll need is equal to the number of receive descriptors
   1400  *  that we've allocated.
   1401  *
   1402  **********************************************************************/
   1403 int
   1404 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1405 {
   1406 	struct	adapter 	*adapter = rxr->adapter;
   1407 	device_t 		dev = adapter->dev;
   1408 	struct ixgbe_rx_buf 	*rxbuf;
   1409 	int             	bsize, error;
   1410 
   1411 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1412 	if (!(rxr->rx_buffers =
   1413 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1414 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1415 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1416 		error = ENOMEM;
   1417 		goto fail;
   1418 	}
   1419 
   1420 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1421 				   1, 0,	/* alignment, bounds */
   1422 				   MJUM16BYTES,		/* maxsize */
   1423 				   1,			/* nsegments */
   1424 				   MJUM16BYTES,		/* maxsegsize */
   1425 				   0,			/* flags */
   1426 				   &rxr->ptag))) {
   1427 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1428 		goto fail;
   1429 	}
   1430 
   1431 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1432 		rxbuf = &rxr->rx_buffers[i];
   1433 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1434 		if (error) {
   1435 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1436 			goto fail;
   1437 		}
   1438 	}
   1439 
   1440 	return (0);
   1441 
   1442 fail:
   1443 	/* Frees all, but can handle partial completion */
   1444 	ixgbe_free_receive_structures(adapter);
   1445 	return (error);
   1446 }
   1447 
   1448 static void
   1449 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1450 {
   1451 	struct ixgbe_rx_buf       *rxbuf;
   1452 
   1453 	for (int i = 0; i < rxr->num_desc; i++) {
   1454 		rxbuf = &rxr->rx_buffers[i];
   1455 		if (rxbuf->buf != NULL) {
   1456 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1457 			    0, rxbuf->buf->m_pkthdr.len,
   1458 			    BUS_DMASYNC_POSTREAD);
   1459 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1460 			rxbuf->buf->m_flags |= M_PKTHDR;
   1461 			m_freem(rxbuf->buf);
   1462 			rxbuf->buf = NULL;
   1463 			rxbuf->flags = 0;
   1464 		}
   1465 	}
   1466 }
   1467 
   1468 /*********************************************************************
   1469  *
   1470  *  Initialize a receive ring and its buffers.
   1471  *
   1472  **********************************************************************/
   1473 static int
   1474 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1475 {
   1476 	struct	adapter 	*adapter;
   1477 	struct ixgbe_rx_buf	*rxbuf;
   1478 #ifdef LRO
   1479 	struct ifnet		*ifp;
   1480 	struct lro_ctrl		*lro = &rxr->lro;
   1481 #endif /* LRO */
   1482 	int			rsize, error = 0;
   1483 #ifdef DEV_NETMAP
   1484 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1485 	struct netmap_slot *slot;
   1486 #endif /* DEV_NETMAP */
   1487 
   1488 	adapter = rxr->adapter;
   1489 #ifdef LRO
   1490 	ifp = adapter->ifp;
   1491 #endif /* LRO */
   1492 
   1493 	/* Clear the ring contents */
   1494 	IXGBE_RX_LOCK(rxr);
   1495 #ifdef DEV_NETMAP
   1496 	/* same as in ixgbe_setup_transmit_ring() */
   1497 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1498 #endif /* DEV_NETMAP */
   1499 	rsize = roundup2(adapter->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	IXGBE_RX_UNLOCK(rxr);
   1509 
   1510 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1511 	 * or size of jumbo mbufs may have changed.
   1512 	 */
   1513 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1514 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1515 
   1516 	IXGBE_RX_LOCK(rxr);
   1517 
   1518 	/* Now replenish the mbufs */
   1519 	for (int j = 0; j != rxr->num_desc; ++j) {
   1520 		struct mbuf	*mp;
   1521 
   1522 		rxbuf = &rxr->rx_buffers[j];
   1523 #ifdef DEV_NETMAP
   1524 		/*
   1525 		 * In netmap mode, fill the map and set the buffer
   1526 		 * address in the NIC ring, considering the offset
   1527 		 * between the netmap and NIC rings (see comment in
   1528 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1529 		 * an mbuf, so end the block with a continue;
   1530 		 */
   1531 		if (slot) {
   1532 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1533 			uint64_t paddr;
   1534 			void *addr;
   1535 
   1536 			addr = PNMB(na, slot + sj, &paddr);
   1537 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1538 			/* Update descriptor and the cached value */
   1539 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1540 			rxbuf->addr = htole64(paddr);
   1541 			continue;
   1542 		}
   1543 #endif /* DEV_NETMAP */
   1544 		rxbuf->flags = 0;
   1545 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1546 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1547 		if (rxbuf->buf == NULL) {
   1548 			error = ENOBUFS;
   1549                         goto fail;
   1550 		}
   1551 		mp = rxbuf->buf;
   1552 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1553 		/* Get the memory mapping */
   1554 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1555 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1556 		if (error != 0)
   1557                         goto fail;
   1558 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1559 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1560 		/* Update the descriptor and the cached value */
   1561 		rxr->rx_base[j].read.pkt_addr =
   1562 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1563 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1564 	}
   1565 
   1566 
   1567 	/* Setup our descriptor indices */
   1568 	rxr->next_to_check = 0;
   1569 	rxr->next_to_refresh = 0;
   1570 	rxr->lro_enabled = FALSE;
   1571 	rxr->rx_copies.ev_count = 0;
   1572 	rxr->rx_bytes.ev_count = 0;
   1573 	rxr->vtag_strip = FALSE;
   1574 
   1575 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1576 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1577 
   1578 	/*
   1579 	** Now set up the LRO interface:
   1580 	*/
   1581 	if (ixgbe_rsc_enable)
   1582 		ixgbe_setup_hw_rsc(rxr);
   1583 #ifdef LRO
   1584 	else if (ifp->if_capenable & IFCAP_LRO) {
   1585 		device_t dev = adapter->dev;
   1586 		int err = tcp_lro_init(lro);
   1587 		if (err) {
   1588 			device_printf(dev, "LRO Initialization failed!\n");
   1589 			goto fail;
   1590 		}
   1591 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1592 		rxr->lro_enabled = TRUE;
   1593 		lro->ifp = adapter->ifp;
   1594 	}
   1595 #endif /* LRO */
   1596 
   1597 	IXGBE_RX_UNLOCK(rxr);
   1598 	return (0);
   1599 
   1600 fail:
   1601 	ixgbe_free_receive_ring(rxr);
   1602 	IXGBE_RX_UNLOCK(rxr);
   1603 	return (error);
   1604 }
   1605 
   1606 /*********************************************************************
   1607  *
   1608  *  Initialize all receive rings.
   1609  *
   1610  **********************************************************************/
   1611 int
   1612 ixgbe_setup_receive_structures(struct adapter *adapter)
   1613 {
   1614 	struct rx_ring *rxr = adapter->rx_rings;
   1615 	int j;
   1616 
   1617 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1618 		if (ixgbe_setup_receive_ring(rxr))
   1619 			goto fail;
   1620 
   1621 	return (0);
   1622 fail:
   1623 	/*
   1624 	 * Free RX buffers allocated so far, we will only handle
   1625 	 * the rings that completed, the failing case will have
   1626 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1627 	 */
   1628 	for (int i = 0; i < j; ++i) {
   1629 		rxr = &adapter->rx_rings[i];
   1630 		ixgbe_free_receive_ring(rxr);
   1631 	}
   1632 
   1633 	return (ENOBUFS);
   1634 }
   1635 
   1636 
   1637 /*********************************************************************
   1638  *
   1639  *  Free all receive rings.
   1640  *
   1641  **********************************************************************/
   1642 void
   1643 ixgbe_free_receive_structures(struct adapter *adapter)
   1644 {
   1645 	struct rx_ring *rxr = adapter->rx_rings;
   1646 
   1647 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1648 
   1649 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1650 #ifdef LRO
   1651 		struct lro_ctrl		*lro = &rxr->lro;
   1652 #endif /* LRO */
   1653 		ixgbe_free_receive_buffers(rxr);
   1654 #ifdef LRO
   1655 		/* Free LRO memory */
   1656 		tcp_lro_free(lro);
   1657 #endif /* LRO */
   1658 		/* Free the ring memory as well */
   1659 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1660 		IXGBE_RX_LOCK_DESTROY(rxr);
   1661 	}
   1662 
   1663 	free(adapter->rx_rings, M_DEVBUF);
   1664 }
   1665 
   1666 
   1667 /*********************************************************************
   1668  *
   1669  *  Free receive ring data structures
   1670  *
   1671  **********************************************************************/
   1672 static void
   1673 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1674 {
   1675 	struct adapter		*adapter = rxr->adapter;
   1676 	struct ixgbe_rx_buf	*rxbuf;
   1677 
   1678 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1679 
   1680 	/* Cleanup any existing buffers */
   1681 	if (rxr->rx_buffers != NULL) {
   1682 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1683 			rxbuf = &rxr->rx_buffers[i];
   1684 			if (rxbuf->buf != NULL) {
   1685 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1686 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1687 				    BUS_DMASYNC_POSTREAD);
   1688 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1689 				rxbuf->buf->m_flags |= M_PKTHDR;
   1690 				m_freem(rxbuf->buf);
   1691 			}
   1692 			rxbuf->buf = NULL;
   1693 			if (rxbuf->pmap != NULL) {
   1694 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1695 				rxbuf->pmap = NULL;
   1696 			}
   1697 		}
   1698 		if (rxr->rx_buffers != NULL) {
   1699 			free(rxr->rx_buffers, M_DEVBUF);
   1700 			rxr->rx_buffers = NULL;
   1701 		}
   1702 	}
   1703 
   1704 	if (rxr->ptag != NULL) {
   1705 		ixgbe_dma_tag_destroy(rxr->ptag);
   1706 		rxr->ptag = NULL;
   1707 	}
   1708 
   1709 	return;
   1710 }
   1711 
   1712 static __inline void
   1713 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1714 {
   1715 	int s;
   1716 
   1717 #ifdef LRO
   1718 	struct adapter	*adapter = ifp->if_softc;
   1719 	struct ethercom *ec = &adapter->osdep.ec;
   1720 
   1721         /*
   1722          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1723          * should be computed by hardware. Also it should not have VLAN tag in
   1724          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1725          */
   1726         if (rxr->lro_enabled &&
   1727             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1728             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1729             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1730             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1731             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1732             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1733             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1734             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1735                 /*
   1736                  * Send to the stack if:
   1737                  **  - LRO not enabled, or
   1738                  **  - no LRO resources, or
   1739                  **  - lro enqueue fails
   1740                  */
   1741                 if (rxr->lro.lro_cnt != 0)
   1742                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1743                                 return;
   1744         }
   1745 #endif /* LRO */
   1746 
   1747 	IXGBE_RX_UNLOCK(rxr);
   1748 
   1749 	s = splnet();
   1750 	/* Pass this up to any BPF listeners. */
   1751 	bpf_mtap(ifp, m);
   1752 	if_input(ifp, m);
   1753 	splx(s);
   1754 
   1755 	IXGBE_RX_LOCK(rxr);
   1756 }
   1757 
   1758 static __inline void
   1759 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1760 {
   1761 	struct ixgbe_rx_buf	*rbuf;
   1762 
   1763 	rbuf = &rxr->rx_buffers[i];
   1764 
   1765 
   1766 	/*
   1767 	** With advanced descriptors the writeback
   1768 	** clobbers the buffer addrs, so its easier
   1769 	** to just free the existing mbufs and take
   1770 	** the normal refresh path to get new buffers
   1771 	** and mapping.
   1772 	*/
   1773 
   1774 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1775 		rbuf->fmp->m_flags |= M_PKTHDR;
   1776 		m_freem(rbuf->fmp);
   1777 		rbuf->fmp = NULL;
   1778 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1779 	} else if (rbuf->buf) {
   1780 		m_free(rbuf->buf);
   1781 		rbuf->buf = NULL;
   1782 	}
   1783 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1784 
   1785 	rbuf->flags = 0;
   1786 
   1787 	return;
   1788 }
   1789 
   1790 
   1791 /*********************************************************************
   1792  *
   1793  *  This routine executes in interrupt context. It replenishes
   1794  *  the mbufs in the descriptor and sends data which has been
   1795  *  dma'ed into host memory to upper layer.
   1796  *
   1797  *  Return TRUE for more work, FALSE for all clean.
   1798  *********************************************************************/
   1799 bool
   1800 ixgbe_rxeof(struct ix_queue *que)
   1801 {
   1802 	struct adapter		*adapter = que->adapter;
   1803 	struct rx_ring		*rxr = que->rxr;
   1804 	struct ifnet		*ifp = adapter->ifp;
   1805 #ifdef LRO
   1806 	struct lro_ctrl		*lro = &rxr->lro;
   1807 #endif /* LRO */
   1808 	int			i, nextp, processed = 0;
   1809 	u32			staterr = 0;
   1810 	u32			count = adapter->rx_process_limit;
   1811 	union ixgbe_adv_rx_desc	*cur;
   1812 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1813 #ifdef RSS
   1814 	u16			pkt_info;
   1815 #endif
   1816 
   1817 	IXGBE_RX_LOCK(rxr);
   1818 
   1819 #ifdef DEV_NETMAP
   1820 	/* Same as the txeof routine: wakeup clients on intr. */
   1821 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1822 		IXGBE_RX_UNLOCK(rxr);
   1823 		return (FALSE);
   1824 	}
   1825 #endif /* DEV_NETMAP */
   1826 
   1827 	for (i = rxr->next_to_check; count != 0;) {
   1828 		struct mbuf	*sendmp, *mp;
   1829 		u32		rsc, ptype;
   1830 		u16		len;
   1831 		u16		vtag = 0;
   1832 		bool		eop;
   1833 
   1834 		/* Sync the ring. */
   1835 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1836 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1837 
   1838 		cur = &rxr->rx_base[i];
   1839 		staterr = le32toh(cur->wb.upper.status_error);
   1840 #ifdef RSS
   1841 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1842 #endif
   1843 
   1844 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1845 			break;
   1846 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1847 			break;
   1848 
   1849 		count--;
   1850 		sendmp = NULL;
   1851 		nbuf = NULL;
   1852 		rsc = 0;
   1853 		cur->wb.upper.status_error = 0;
   1854 		rbuf = &rxr->rx_buffers[i];
   1855 		mp = rbuf->buf;
   1856 
   1857 		len = le16toh(cur->wb.upper.length);
   1858 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1859 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1860 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1861 
   1862 		/* Make sure bad packets are discarded */
   1863 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1864 #if __FreeBSD_version >= 1100036
   1865 			if (IXGBE_IS_VF(adapter))
   1866 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1867 #endif
   1868 			rxr->rx_discarded.ev_count++;
   1869 			ixgbe_rx_discard(rxr, i);
   1870 			goto next_desc;
   1871 		}
   1872 
   1873 		/*
   1874 		** On 82599 which supports a hardware
   1875 		** LRO (called HW RSC), packets need
   1876 		** not be fragmented across sequential
   1877 		** descriptors, rather the next descriptor
   1878 		** is indicated in bits of the descriptor.
   1879 		** This also means that we might proceses
   1880 		** more than one packet at a time, something
   1881 		** that has never been true before, it
   1882 		** required eliminating global chain pointers
   1883 		** in favor of what we are doing here.  -jfv
   1884 		*/
   1885 		if (!eop) {
   1886 			/*
   1887 			** Figure out the next descriptor
   1888 			** of this frame.
   1889 			*/
   1890 			if (rxr->hw_rsc == TRUE) {
   1891 				rsc = ixgbe_rsc_count(cur);
   1892 				rxr->rsc_num += (rsc - 1);
   1893 			}
   1894 			if (rsc) { /* Get hardware index */
   1895 				nextp = ((staterr &
   1896 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1897 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1898 			} else { /* Just sequential */
   1899 				nextp = i + 1;
   1900 				if (nextp == adapter->num_rx_desc)
   1901 					nextp = 0;
   1902 			}
   1903 			nbuf = &rxr->rx_buffers[nextp];
   1904 			prefetch(nbuf);
   1905 		}
   1906 		/*
   1907 		** Rather than using the fmp/lmp global pointers
   1908 		** we now keep the head of a packet chain in the
   1909 		** buffer struct and pass this along from one
   1910 		** descriptor to the next, until we get EOP.
   1911 		*/
   1912 		mp->m_len = len;
   1913 		/*
   1914 		** See if there is a stored head
   1915 		** that determines what we are
   1916 		*/
   1917 		sendmp = rbuf->fmp;
   1918 		if (sendmp != NULL) {  /* secondary frag */
   1919 			rbuf->buf = rbuf->fmp = NULL;
   1920 			mp->m_flags &= ~M_PKTHDR;
   1921 			sendmp->m_pkthdr.len += mp->m_len;
   1922 		} else {
   1923 			/*
   1924 			 * Optimize.  This might be a small packet,
   1925 			 * maybe just a TCP ACK.  Do a fast copy that
   1926 			 * is cache aligned into a new mbuf, and
   1927 			 * leave the old mbuf+cluster for re-use.
   1928 			 */
   1929 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1930 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1931 				if (sendmp != NULL) {
   1932 					sendmp->m_data +=
   1933 					    IXGBE_RX_COPY_ALIGN;
   1934 					ixgbe_bcopy(mp->m_data,
   1935 					    sendmp->m_data, len);
   1936 					sendmp->m_len = len;
   1937 					rxr->rx_copies.ev_count++;
   1938 					rbuf->flags |= IXGBE_RX_COPY;
   1939 				}
   1940 			}
   1941 			if (sendmp == NULL) {
   1942 				rbuf->buf = rbuf->fmp = NULL;
   1943 				sendmp = mp;
   1944 			}
   1945 
   1946 			/* first desc of a non-ps chain */
   1947 			sendmp->m_flags |= M_PKTHDR;
   1948 			sendmp->m_pkthdr.len = mp->m_len;
   1949 		}
   1950 		++processed;
   1951 
   1952 		/* Pass the head pointer on */
   1953 		if (eop == 0) {
   1954 			nbuf->fmp = sendmp;
   1955 			sendmp = NULL;
   1956 			mp->m_next = nbuf->buf;
   1957 		} else { /* Sending this frame */
   1958 			m_set_rcvif(sendmp, ifp);
   1959 			ifp->if_ipackets++;
   1960 			rxr->rx_packets.ev_count++;
   1961 			/* capture data for AIM */
   1962 			rxr->bytes += sendmp->m_pkthdr.len;
   1963 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1964 			/* Process vlan info */
   1965 			if ((rxr->vtag_strip) &&
   1966 			    (staterr & IXGBE_RXD_STAT_VP))
   1967 				vtag = le16toh(cur->wb.upper.vlan);
   1968 			if (vtag) {
   1969 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1970 				    printf("%s: could not apply VLAN "
   1971 					"tag", __func__));
   1972 			}
   1973 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1974 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1975 				   &adapter->stats.pf);
   1976 			}
   1977 
   1978 #if 0 /* FreeBSD */
   1979                         /*
   1980                          * In case of multiqueue, we have RXCSUM.PCSD bit set
   1981                          * and never cleared. This means we have RSS hash
   1982                          * available to be used.
   1983                          */
   1984                         if (adapter->num_queues > 1) {
   1985                                 sendmp->m_pkthdr.flowid =
   1986                                     le32toh(cur->wb.lower.hi_dword.rss);
   1987                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1988                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
   1989                                         M_HASHTYPE_SET(sendmp,
   1990                                             M_HASHTYPE_RSS_IPV4);
   1991                                         break;
   1992                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1993                                         M_HASHTYPE_SET(sendmp,
   1994                                             M_HASHTYPE_RSS_TCP_IPV4);
   1995                                         break;
   1996                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
   1997                                         M_HASHTYPE_SET(sendmp,
   1998                                             M_HASHTYPE_RSS_IPV6);
   1999                                         break;
   2000                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2001                                         M_HASHTYPE_SET(sendmp,
   2002                                             M_HASHTYPE_RSS_TCP_IPV6);
   2003                                         break;
   2004                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2005                                         M_HASHTYPE_SET(sendmp,
   2006                                             M_HASHTYPE_RSS_IPV6_EX);
   2007                                         break;
   2008                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2009                                         M_HASHTYPE_SET(sendmp,
   2010                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
   2011                                         break;
   2012 #if __FreeBSD_version > 1100000
   2013                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2014                                         M_HASHTYPE_SET(sendmp,
   2015                                             M_HASHTYPE_RSS_UDP_IPV4);
   2016                                         break;
   2017                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2018                                         M_HASHTYPE_SET(sendmp,
   2019                                             M_HASHTYPE_RSS_UDP_IPV6);
   2020                                         break;
   2021                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2022                                         M_HASHTYPE_SET(sendmp,
   2023                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
   2024                                         break;
   2025 #endif
   2026                                     default:
   2027                                         M_HASHTYPE_SET(sendmp,
   2028                                             M_HASHTYPE_OPAQUE_HASH);
   2029                                 }
   2030                         } else {
   2031                                 sendmp->m_pkthdr.flowid = que->msix;
   2032 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2033 			}
   2034 #endif
   2035 		}
   2036 next_desc:
   2037 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2038 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2039 
   2040 		/* Advance our pointers to the next descriptor. */
   2041 		if (++i == rxr->num_desc)
   2042 			i = 0;
   2043 
   2044 		/* Now send to the stack or do LRO */
   2045 		if (sendmp != NULL) {
   2046 			rxr->next_to_check = i;
   2047 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2048 			i = rxr->next_to_check;
   2049 		}
   2050 
   2051                /* Every 8 descriptors we go to refresh mbufs */
   2052 		if (processed == 8) {
   2053 			ixgbe_refresh_mbufs(rxr, i);
   2054 			processed = 0;
   2055 		}
   2056 	}
   2057 
   2058 	/* Refresh any remaining buf structs */
   2059 	if (ixgbe_rx_unrefreshed(rxr))
   2060 		ixgbe_refresh_mbufs(rxr, i);
   2061 
   2062 	rxr->next_to_check = i;
   2063 
   2064 #ifdef LRO
   2065 	/*
   2066 	 * Flush any outstanding LRO work
   2067 	 */
   2068 	tcp_lro_flush_all(lro);
   2069 #endif /* LRO */
   2070 
   2071 	IXGBE_RX_UNLOCK(rxr);
   2072 
   2073 	/*
   2074 	** Still have cleaning to do?
   2075 	*/
   2076 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2077 		return true;
   2078 	else
   2079 		return false;
   2080 }
   2081 
   2082 
   2083 /*********************************************************************
   2084  *
   2085  *  Verify that the hardware indicated that the checksum is valid.
   2086  *  Inform the stack about the status of checksum so that stack
   2087  *  doesn't spend time verifying the checksum.
   2088  *
   2089  *********************************************************************/
   2090 static void
   2091 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2092     struct ixgbe_hw_stats *stats)
   2093 {
   2094 	u16	status = (u16) staterr;
   2095 	u8	errors = (u8) (staterr >> 24);
   2096 #if 0
   2097 	bool	sctp = false;
   2098 
   2099 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2100 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2101 		sctp = true;
   2102 #endif
   2103 
   2104 	/* IPv4 checksum */
   2105 	if (status & IXGBE_RXD_STAT_IPCS) {
   2106 		stats->ipcs.ev_count++;
   2107 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2108 			/* IP Checksum Good */
   2109 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2110 		} else {
   2111 			stats->ipcs_bad.ev_count++;
   2112 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2113 		}
   2114 	}
   2115 	/* TCP/UDP/SCTP checksum */
   2116 	if (status & IXGBE_RXD_STAT_L4CS) {
   2117 		stats->l4cs.ev_count++;
   2118 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2119 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2120 			mp->m_pkthdr.csum_flags |= type;
   2121 		} else {
   2122 			stats->l4cs_bad.ev_count++;
   2123 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2124 		}
   2125 	}
   2126 }
   2127 
   2128 
   2129 /********************************************************************
   2130  * Manage DMA'able memory.
   2131  *******************************************************************/
   2132 
   2133 int
   2134 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2135 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2136 {
   2137 	device_t dev = adapter->dev;
   2138 	int             r, rsegs;
   2139 
   2140 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2141 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2142 			       size,	/* maxsize */
   2143 			       1,	/* nsegments */
   2144 			       size,	/* maxsegsize */
   2145 			       BUS_DMA_ALLOCNOW,	/* flags */
   2146 			       &dma->dma_tag);
   2147 	if (r != 0) {
   2148 		aprint_error_dev(dev,
   2149 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2150 		goto fail_0;
   2151 	}
   2152 
   2153 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2154 		size,
   2155 		dma->dma_tag->dt_alignment,
   2156 		dma->dma_tag->dt_boundary,
   2157 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2158 	if (r != 0) {
   2159 		aprint_error_dev(dev,
   2160 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2161 		goto fail_1;
   2162 	}
   2163 
   2164 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2165 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2166 	if (r != 0) {
   2167 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2168 		    __func__, r);
   2169 		goto fail_2;
   2170 	}
   2171 
   2172 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2173 	if (r != 0) {
   2174 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2175 		    __func__, r);
   2176 		goto fail_3;
   2177 	}
   2178 
   2179 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2180 			    size,
   2181 			    NULL,
   2182 			    mapflags | BUS_DMA_NOWAIT);
   2183 	if (r != 0) {
   2184 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2185 		    __func__, r);
   2186 		goto fail_4;
   2187 	}
   2188 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2189 	dma->dma_size = size;
   2190 	return 0;
   2191 fail_4:
   2192 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2193 fail_3:
   2194 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2195 fail_2:
   2196 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2197 fail_1:
   2198 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2199 fail_0:
   2200 	return r;
   2201 }
   2202 
   2203 void
   2204 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2205 {
   2206 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2207 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2208 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2209 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2210 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2211 }
   2212 
   2213 
   2214 /*********************************************************************
   2215  *
   2216  *  Allocate memory for the transmit and receive rings, and then
   2217  *  the descriptors associated with each, called only once at attach.
   2218  *
   2219  **********************************************************************/
   2220 int
   2221 ixgbe_allocate_queues(struct adapter *adapter)
   2222 {
   2223 	device_t	dev = adapter->dev;
   2224 	struct ix_queue	*que;
   2225 	struct tx_ring	*txr;
   2226 	struct rx_ring	*rxr;
   2227 	int rsize, tsize, error = IXGBE_SUCCESS;
   2228 	int txconf = 0, rxconf = 0;
   2229 #ifdef PCI_IOV
   2230 	enum ixgbe_iov_mode iov_mode;
   2231 #endif
   2232 
   2233         /* First allocate the top level queue structs */
   2234         if (!(adapter->queues =
   2235             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2236             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2237                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2238                 error = ENOMEM;
   2239                 goto fail;
   2240         }
   2241 
   2242 	/* First allocate the TX ring struct memory */
   2243 	if (!(adapter->tx_rings =
   2244 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2245 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2246 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2247 		error = ENOMEM;
   2248 		goto tx_fail;
   2249 	}
   2250 
   2251 	/* Next allocate the RX */
   2252 	if (!(adapter->rx_rings =
   2253 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2254 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2255 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2256 		error = ENOMEM;
   2257 		goto rx_fail;
   2258 	}
   2259 
   2260 	/* For the ring itself */
   2261 	tsize = roundup2(adapter->num_tx_desc *
   2262 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2263 
   2264 #ifdef PCI_IOV
   2265 	iov_mode = ixgbe_get_iov_mode(adapter);
   2266 	adapter->pool = ixgbe_max_vfs(iov_mode);
   2267 #else
   2268 	adapter->pool = 0;
   2269 #endif
   2270 	/*
   2271 	 * Now set up the TX queues, txconf is needed to handle the
   2272 	 * possibility that things fail midcourse and we need to
   2273 	 * undo memory gracefully
   2274 	 */
   2275 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2276 		/* Set up some basics */
   2277 		txr = &adapter->tx_rings[i];
   2278 		txr->adapter = adapter;
   2279 #ifdef PCI_IOV
   2280 		txr->me = ixgbe_pf_que_index(iov_mode, i);
   2281 #else
   2282 		txr->me = i;
   2283 #endif
   2284 		txr->num_desc = adapter->num_tx_desc;
   2285 
   2286 		/* Initialize the TX side lock */
   2287 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2288 		    device_xname(dev), txr->me);
   2289 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2290 
   2291 		if (ixgbe_dma_malloc(adapter, tsize,
   2292 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2293 			aprint_error_dev(dev,
   2294 			    "Unable to allocate TX Descriptor memory\n");
   2295 			error = ENOMEM;
   2296 			goto err_tx_desc;
   2297 		}
   2298 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2299 		bzero((void *)txr->tx_base, tsize);
   2300 
   2301         	/* Now allocate transmit buffers for the ring */
   2302         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2303 			aprint_error_dev(dev,
   2304 			    "Critical Failure setting up transmit buffers\n");
   2305 			error = ENOMEM;
   2306 			goto err_tx_desc;
   2307         	}
   2308 #ifndef IXGBE_LEGACY_TX
   2309 		/* Allocate a buf ring */
   2310 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   2311 		    M_WAITOK, &txr->tx_mtx);
   2312 		if (txr->br == NULL) {
   2313 			aprint_error_dev(dev,
   2314 			    "Critical Failure setting up buf ring\n");
   2315 			error = ENOMEM;
   2316 			goto err_tx_desc;
   2317         	}
   2318 #endif
   2319 	}
   2320 
   2321 	/*
   2322 	 * Next the RX queues...
   2323 	 */
   2324 	rsize = roundup2(adapter->num_rx_desc *
   2325 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2326 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2327 		rxr = &adapter->rx_rings[i];
   2328 		/* Set up some basics */
   2329 		rxr->adapter = adapter;
   2330 #ifdef PCI_IOV
   2331 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
   2332 #else
   2333 		rxr->me = i;
   2334 #endif
   2335 		rxr->num_desc = adapter->num_rx_desc;
   2336 
   2337 		/* Initialize the RX side lock */
   2338 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2339 		    device_xname(dev), rxr->me);
   2340 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2341 
   2342 		if (ixgbe_dma_malloc(adapter, rsize,
   2343 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2344 			aprint_error_dev(dev,
   2345 			    "Unable to allocate RxDescriptor memory\n");
   2346 			error = ENOMEM;
   2347 			goto err_rx_desc;
   2348 		}
   2349 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2350 		bzero((void *)rxr->rx_base, rsize);
   2351 
   2352         	/* Allocate receive buffers for the ring*/
   2353 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2354 			aprint_error_dev(dev,
   2355 			    "Critical Failure setting up receive buffers\n");
   2356 			error = ENOMEM;
   2357 			goto err_rx_desc;
   2358 		}
   2359 	}
   2360 
   2361 	/*
   2362 	** Finally set up the queue holding structs
   2363 	*/
   2364 	for (int i = 0; i < adapter->num_queues; i++) {
   2365 		que = &adapter->queues[i];
   2366 		que->adapter = adapter;
   2367 		que->me = i;
   2368 		que->txr = &adapter->tx_rings[i];
   2369 		que->rxr = &adapter->rx_rings[i];
   2370 	}
   2371 
   2372 	return (0);
   2373 
   2374 err_rx_desc:
   2375 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2376 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2377 err_tx_desc:
   2378 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2379 		ixgbe_dma_free(adapter, &txr->txdma);
   2380 	free(adapter->rx_rings, M_DEVBUF);
   2381 rx_fail:
   2382 	free(adapter->tx_rings, M_DEVBUF);
   2383 tx_fail:
   2384 	free(adapter->queues, M_DEVBUF);
   2385 fail:
   2386 	return (error);
   2387 }
   2388 
   2389