Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.7
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 289238 2015-10-13 17:34:18Z sbruno $*/
     62 /*$NetBSD: ix_txrx.c,v 1.7 2016/12/02 10:34:23 msaitoh Exp $*/
     63 
     64 #include "ixgbe.h"
     65 
     66 #ifdef DEV_NETMAP
     67 #include <net/netmap.h>
     68 #include <sys/selinfo.h>
     69 #include <dev/netmap/netmap_kern.h>
     70 
     71 extern int ix_crcstrip;
     72 #endif
     73 
     74 /*
     75 ** HW RSC control:
     76 **  this feature only works with
     77 **  IPv4, and only on 82599 and later.
     78 **  Also this will cause IP forwarding to
     79 **  fail and that can't be controlled by
     80 **  the stack as LRO can. For all these
     81 **  reasons I've deemed it best to leave
     82 **  this off and not bother with a tuneable
     83 **  interface, this would need to be compiled
     84 **  to enable.
     85 */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 #ifdef IXGBE_FDIR
     89 /*
     90 ** For Flow Director: this is the
     91 ** number of TX packets we sample
     92 ** for the filter pool, this means
     93 ** every 20th packet will be probed.
     94 **
     95 ** This feature can be disabled by
     96 ** setting this to 0.
     97 */
     98 static int atr_sample_rate = 20;
     99 #endif
    100 
    101 /* Shared PCI config read/write */
    102 u16
    103 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
    104 {
    105 	switch (reg % 4) {
    106 	case 0:
    107 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
    108 		    __BITS(15, 0);
    109 	case 2:
    110 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
    111 		    reg - 2), __BITS(31, 16));
    112 	default:
    113 		panic("%s: invalid register (%" PRIx32, __func__, reg);
    114 		break;
    115 	}
    116 }
    117 
    118 void
    119 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
    120 {
    121 	pcireg_t old;
    122 
    123 	switch (reg % 4) {
    124 	case 0:
    125 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
    126 		    __BITS(31, 16);
    127 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
    128 		break;
    129 	case 2:
    130 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
    131 		    __BITS(15, 0);
    132 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
    133 		    __SHIFTIN(value, __BITS(31, 16)) | old);
    134 		break;
    135 	default:
    136 		panic("%s: invalid register (%" PRIx32, __func__, reg);
    137 		break;
    138 	}
    139 
    140 	return;
    141 }
    142 
    143 /*********************************************************************
    144  *  Local Function prototypes
    145  *********************************************************************/
    146 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    147 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    148 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    149 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    150 
    151 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    152 		    struct ixgbe_hw_stats *);
    153 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    154 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    155 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    156 		    struct mbuf *, u32 *, u32 *);
    157 static int	ixgbe_tso_setup(struct tx_ring *,
    158 		    struct mbuf *, u32 *, u32 *);
    159 #ifdef IXGBE_FDIR
    160 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    161 #endif
    162 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    163 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    164 		    struct mbuf *, u32);
    165 
    166 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    167 
    168 #ifdef IXGBE_LEGACY_TX
    169 /*********************************************************************
    170  *  Transmit entry point
    171  *
    172  *  ixgbe_start is called by the stack to initiate a transmit.
    173  *  The driver will remain in this routine as long as there are
    174  *  packets to transmit and transmit resources are available.
    175  *  In case resources are not available stack is notified and
    176  *  the packet is requeued.
    177  **********************************************************************/
    178 
    179 void
    180 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    181 {
    182 	int rc;
    183 	struct mbuf    *m_head;
    184 	struct adapter *adapter = txr->adapter;
    185 
    186 	IXGBE_TX_LOCK_ASSERT(txr);
    187 
    188 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    189 		return;
    190 	if (!adapter->link_active)
    191 		return;
    192 
    193 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    194 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    195 			break;
    196 
    197 		IFQ_POLL(&ifp->if_snd, m_head);
    198 		if (m_head == NULL)
    199 			break;
    200 
    201 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    202 			break;
    203 		}
    204 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    205 		if (rc == EFBIG) {
    206 			struct mbuf *mtmp;
    207 
    208 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    209 				m_head = mtmp;
    210 				rc = ixgbe_xmit(txr, m_head);
    211 				if (rc != 0)
    212 					adapter->efbig2_tx_dma_setup.ev_count++;
    213 			} else
    214 				adapter->m_defrag_failed.ev_count++;
    215 		}
    216 		if (rc != 0) {
    217 			m_freem(m_head);
    218 			continue;
    219 		}
    220 
    221 		/* Send a copy of the frame to the BPF listener */
    222 		bpf_mtap(ifp, m_head);
    223 	}
    224 	return;
    225 }
    226 
    227 /*
    228  * Legacy TX start - called by the stack, this
    229  * always uses the first tx ring, and should
    230  * not be used with multiqueue tx enabled.
    231  */
    232 void
    233 ixgbe_start(struct ifnet *ifp)
    234 {
    235 	struct adapter *adapter = ifp->if_softc;
    236 	struct tx_ring	*txr = adapter->tx_rings;
    237 
    238 	if (ifp->if_flags & IFF_RUNNING) {
    239 		IXGBE_TX_LOCK(txr);
    240 		ixgbe_start_locked(txr, ifp);
    241 		IXGBE_TX_UNLOCK(txr);
    242 	}
    243 	return;
    244 }
    245 
    246 #else /* ! IXGBE_LEGACY_TX */
    247 
    248 /*
    249 ** Multiqueue Transmit driver
    250 **
    251 */
    252 int
    253 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    254 {
    255 	struct adapter	*adapter = ifp->if_softc;
    256 	struct ix_queue	*que;
    257 	struct tx_ring	*txr;
    258 	int 		i, err = 0;
    259 #ifdef	RSS
    260 	uint32_t bucket_id;
    261 #endif
    262 
    263 	/*
    264 	 * When doing RSS, map it to the same outbound queue
    265 	 * as the incoming flow would be mapped to.
    266 	 *
    267 	 * If everything is setup correctly, it should be the
    268 	 * same bucket that the current CPU we're on is.
    269 	 */
    270 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    271 #ifdef	RSS
    272 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    273 		    M_HASHTYPE_GET(m), &bucket_id) == 0)
    274 			/* TODO: spit out something if bucket_id > num_queues? */
    275 			i = bucket_id % adapter->num_queues;
    276 		else
    277 #endif
    278 			i = m->m_pkthdr.flowid % adapter->num_queues;
    279 	} else
    280 		i = curcpu % adapter->num_queues;
    281 
    282 	/* Check for a hung queue and pick alternative */
    283 	if (((1 << i) & adapter->active_queues) == 0)
    284 		i = ffsl(adapter->active_queues);
    285 
    286 	txr = &adapter->tx_rings[i];
    287 	que = &adapter->queues[i];
    288 
    289 	err = drbr_enqueue(ifp, txr->br, m);
    290 	if (err)
    291 		return (err);
    292 	if (IXGBE_TX_TRYLOCK(txr)) {
    293 		ixgbe_mq_start_locked(ifp, txr);
    294 		IXGBE_TX_UNLOCK(txr);
    295 	} else
    296 		softint_schedule(txr->txq_si);
    297 
    298 	return (0);
    299 }
    300 
    301 int
    302 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    303 {
    304 	struct adapter  *adapter = txr->adapter;
    305 	struct mbuf     *next;
    306 	int             enqueued = 0, err = 0;
    307 
    308 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    309 	    adapter->link_active == 0)
    310 		return (ENETDOWN);
    311 
    312 	/* Process the queue */
    313 #if __FreeBSD_version < 901504
    314 	next = drbr_dequeue(ifp, txr->br);
    315 	while (next != NULL) {
    316 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    317 			if (next != NULL)
    318 				err = drbr_enqueue(ifp, txr->br, next);
    319 #else
    320 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
    321 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    322 			if (next == NULL) {
    323 				drbr_advance(ifp, txr->br);
    324 			} else {
    325 				drbr_putback(ifp, txr->br, next);
    326 			}
    327 #endif
    328 			break;
    329 		}
    330 #if __FreeBSD_version >= 901504
    331 		drbr_advance(ifp, txr->br);
    332 #endif
    333 		enqueued++;
    334 #if 0 // this is VF-only
    335 #if __FreeBSD_version >= 1100036
    336 		/*
    337 		 * Since we're looking at the tx ring, we can check
    338 		 * to see if we're a VF by examing our tail register
    339 		 * address.
    340 		 */
    341 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    342 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    343 #endif
    344 #endif
    345 		/* Send a copy of the frame to the BPF listener */
    346 		bpf_mtap(ifp, next);
    347 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    348 			break;
    349 #if __FreeBSD_version < 901504
    350 		next = drbr_dequeue(ifp, txr->br);
    351 #endif
    352 	}
    353 
    354 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    355 		ixgbe_txeof(txr);
    356 
    357 	return (err);
    358 }
    359 
    360 /*
    361  * Called from a taskqueue to drain queued transmit packets.
    362  */
    363 void
    364 ixgbe_deferred_mq_start(void *arg, int pending)
    365 {
    366 	struct tx_ring *txr = arg;
    367 	struct adapter *adapter = txr->adapter;
    368 	struct ifnet *ifp = adapter->ifp;
    369 
    370 	IXGBE_TX_LOCK(txr);
    371 	if (!drbr_empty(ifp, txr->br))
    372 		ixgbe_mq_start_locked(ifp, txr);
    373 	IXGBE_TX_UNLOCK(txr);
    374 }
    375 
    376 /*
    377  * Flush all ring buffers
    378  */
    379 void
    380 ixgbe_qflush(struct ifnet *ifp)
    381 {
    382 	struct adapter	*adapter = ifp->if_softc;
    383 	struct tx_ring	*txr = adapter->tx_rings;
    384 	struct mbuf	*m;
    385 
    386 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    387 		IXGBE_TX_LOCK(txr);
    388 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
    389 			m_freem(m);
    390 		IXGBE_TX_UNLOCK(txr);
    391 	}
    392 	if_qflush(ifp);
    393 }
    394 #endif /* IXGBE_LEGACY_TX */
    395 
    396 
    397 /*********************************************************************
    398  *
    399  *  This routine maps the mbufs to tx descriptors, allowing the
    400  *  TX engine to transmit the packets.
    401  *  	- return 0 on success, positive on failure
    402  *
    403  **********************************************************************/
    404 
    405 static int
    406 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    407 {
    408 	struct m_tag *mtag;
    409 	struct adapter  *adapter = txr->adapter;
    410 	struct ethercom *ec = &adapter->osdep.ec;
    411 	u32		olinfo_status = 0, cmd_type_len;
    412 	int             i, j, error;
    413 	int		first;
    414 	bus_dmamap_t	map;
    415 	struct ixgbe_tx_buf *txbuf;
    416 	union ixgbe_adv_tx_desc *txd = NULL;
    417 
    418 	/* Basic descriptor defines */
    419         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    420 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    421 
    422 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    423         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    424 
    425         /*
    426          * Important to capture the first descriptor
    427          * used because it will contain the index of
    428          * the one we tell the hardware to report back
    429          */
    430         first = txr->next_avail_desc;
    431 	txbuf = &txr->tx_buffers[first];
    432 	map = txbuf->map;
    433 
    434 	/*
    435 	 * Map the packet for DMA.
    436 	 */
    437 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    438 	    m_head, BUS_DMA_NOWAIT);
    439 
    440 	if (__predict_false(error)) {
    441 
    442 		switch (error) {
    443 		case EAGAIN:
    444 			adapter->eagain_tx_dma_setup.ev_count++;
    445 			return EAGAIN;
    446 		case ENOMEM:
    447 			adapter->enomem_tx_dma_setup.ev_count++;
    448 			return EAGAIN;
    449 		case EFBIG:
    450 			/*
    451 			 * XXX Try it again?
    452 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
    453 			 */
    454 			adapter->efbig_tx_dma_setup.ev_count++;
    455 			return error;
    456 		case EINVAL:
    457 			adapter->einval_tx_dma_setup.ev_count++;
    458 			return error;
    459 		default:
    460 			adapter->other_tx_dma_setup.ev_count++;
    461 			return error;
    462 		}
    463 	}
    464 
    465 	/* Make certain there are enough descriptors */
    466 	if (map->dm_nsegs > txr->tx_avail - 2) {
    467 		txr->no_desc_avail.ev_count++;
    468 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    469 		return EAGAIN;
    470 	}
    471 
    472 	/*
    473 	 * Set up the appropriate offload context
    474 	 * this will consume the first descriptor
    475 	 */
    476 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    477 	if (__predict_false(error)) {
    478 		return (error);
    479 	}
    480 
    481 #ifdef IXGBE_FDIR
    482 	/* Do the flow director magic */
    483 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    484 		++txr->atr_count;
    485 		if (txr->atr_count >= atr_sample_rate) {
    486 			ixgbe_atr(txr, m_head);
    487 			txr->atr_count = 0;
    488 		}
    489 	}
    490 #endif
    491 
    492 	i = txr->next_avail_desc;
    493 	for (j = 0; j < map->dm_nsegs; j++) {
    494 		bus_size_t seglen;
    495 		bus_addr_t segaddr;
    496 
    497 		txbuf = &txr->tx_buffers[i];
    498 		txd = &txr->tx_base[i];
    499 		seglen = map->dm_segs[j].ds_len;
    500 		segaddr = htole64(map->dm_segs[j].ds_addr);
    501 
    502 		txd->read.buffer_addr = segaddr;
    503 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    504 		    cmd_type_len |seglen);
    505 		txd->read.olinfo_status = htole32(olinfo_status);
    506 
    507 		if (++i == txr->num_desc)
    508 			i = 0;
    509 	}
    510 
    511 	txd->read.cmd_type_len |=
    512 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    513 	txr->tx_avail -= map->dm_nsegs;
    514 	txr->next_avail_desc = i;
    515 
    516 	txbuf->m_head = m_head;
    517 	/*
    518 	 * Here we swap the map so the last descriptor,
    519 	 * which gets the completion interrupt has the
    520 	 * real map, and the first descriptor gets the
    521 	 * unused map from this descriptor.
    522 	 */
    523 	txr->tx_buffers[first].map = txbuf->map;
    524 	txbuf->map = map;
    525 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    526 	    BUS_DMASYNC_PREWRITE);
    527 
    528         /* Set the EOP descriptor that will be marked done */
    529         txbuf = &txr->tx_buffers[first];
    530 	txbuf->eop = txd;
    531 
    532         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    533 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    534 	/*
    535 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    536 	 * hardware that this frame is available to transmit.
    537 	 */
    538 	++txr->total_packets.ev_count;
    539 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    540 
    541 	/* Mark queue as having work */
    542 	if (txr->busy == 0)
    543 		txr->busy = 1;
    544 
    545 	return 0;
    546 }
    547 
    548 /*********************************************************************
    549  *
    550  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    551  *  the information needed to transmit a packet on the wire. This is
    552  *  called only once at attach, setup is done every reset.
    553  *
    554  **********************************************************************/
    555 int
    556 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    557 {
    558 	struct adapter *adapter = txr->adapter;
    559 	device_t dev = adapter->dev;
    560 	struct ixgbe_tx_buf *txbuf;
    561 	int error, i;
    562 
    563 	/*
    564 	 * Setup DMA descriptor areas.
    565 	 */
    566 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    567 			       1, 0,		/* alignment, bounds */
    568 			       IXGBE_TSO_SIZE,		/* maxsize */
    569 			       adapter->num_segs,	/* nsegments */
    570 			       PAGE_SIZE,		/* maxsegsize */
    571 			       0,			/* flags */
    572 			       &txr->txtag))) {
    573 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    574 		goto fail;
    575 	}
    576 
    577 	if (!(txr->tx_buffers =
    578 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    579 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    580 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    581 		error = ENOMEM;
    582 		goto fail;
    583 	}
    584 
    585         /* Create the descriptor buffer dma maps */
    586 	txbuf = txr->tx_buffers;
    587 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    588 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    589 		if (error != 0) {
    590 			aprint_error_dev(dev,
    591 			    "Unable to create TX DMA map (%d)\n", error);
    592 			goto fail;
    593 		}
    594 	}
    595 
    596 	return 0;
    597 fail:
    598 	/* We free all, it handles case where we are in the middle */
    599 	ixgbe_free_transmit_structures(adapter);
    600 	return (error);
    601 }
    602 
    603 /*********************************************************************
    604  *
    605  *  Initialize a transmit ring.
    606  *
    607  **********************************************************************/
    608 static void
    609 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    610 {
    611 	struct adapter *adapter = txr->adapter;
    612 	struct ixgbe_tx_buf *txbuf;
    613 #ifdef DEV_NETMAP
    614 	struct netmap_adapter *na = NA(adapter->ifp);
    615 	struct netmap_slot *slot;
    616 #endif /* DEV_NETMAP */
    617 
    618 	/* Clear the old ring contents */
    619 	IXGBE_TX_LOCK(txr);
    620 #ifdef DEV_NETMAP
    621 	/*
    622 	 * (under lock): if in netmap mode, do some consistency
    623 	 * checks and set slot to entry 0 of the netmap ring.
    624 	 */
    625 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    626 #endif /* DEV_NETMAP */
    627 	bzero((void *)txr->tx_base,
    628 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    629 	/* Reset indices */
    630 	txr->next_avail_desc = 0;
    631 	txr->next_to_clean = 0;
    632 
    633 	/* Free any existing tx buffers. */
    634         txbuf = txr->tx_buffers;
    635 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    636 		if (txbuf->m_head != NULL) {
    637 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    638 			    0, txbuf->m_head->m_pkthdr.len,
    639 			    BUS_DMASYNC_POSTWRITE);
    640 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    641 			m_freem(txbuf->m_head);
    642 			txbuf->m_head = NULL;
    643 		}
    644 #ifdef DEV_NETMAP
    645 		/*
    646 		 * In netmap mode, set the map for the packet buffer.
    647 		 * NOTE: Some drivers (not this one) also need to set
    648 		 * the physical buffer address in the NIC ring.
    649 		 * Slots in the netmap ring (indexed by "si") are
    650 		 * kring->nkr_hwofs positions "ahead" wrt the
    651 		 * corresponding slot in the NIC ring. In some drivers
    652 		 * (not here) nkr_hwofs can be negative. Function
    653 		 * netmap_idx_n2k() handles wraparounds properly.
    654 		 */
    655 		if (slot) {
    656 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    657 			netmap_load_map(na, txr->txtag,
    658 			    txbuf->map, NMB(na, slot + si));
    659 		}
    660 #endif /* DEV_NETMAP */
    661 		/* Clear the EOP descriptor pointer */
    662 		txbuf->eop = NULL;
    663         }
    664 
    665 #ifdef IXGBE_FDIR
    666 	/* Set the rate at which we sample packets */
    667 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    668 		txr->atr_sample = atr_sample_rate;
    669 #endif
    670 
    671 	/* Set number of descriptors available */
    672 	txr->tx_avail = adapter->num_tx_desc;
    673 
    674 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    675 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    676 	IXGBE_TX_UNLOCK(txr);
    677 }
    678 
    679 /*********************************************************************
    680  *
    681  *  Initialize all transmit rings.
    682  *
    683  **********************************************************************/
    684 int
    685 ixgbe_setup_transmit_structures(struct adapter *adapter)
    686 {
    687 	struct tx_ring *txr = adapter->tx_rings;
    688 
    689 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    690 		ixgbe_setup_transmit_ring(txr);
    691 
    692 	return (0);
    693 }
    694 
    695 /*********************************************************************
    696  *
    697  *  Free all transmit rings.
    698  *
    699  **********************************************************************/
    700 void
    701 ixgbe_free_transmit_structures(struct adapter *adapter)
    702 {
    703 	struct tx_ring *txr = adapter->tx_rings;
    704 
    705 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    706 		ixgbe_free_transmit_buffers(txr);
    707 		ixgbe_dma_free(adapter, &txr->txdma);
    708 		IXGBE_TX_LOCK_DESTROY(txr);
    709 	}
    710 	free(adapter->tx_rings, M_DEVBUF);
    711 }
    712 
    713 /*********************************************************************
    714  *
    715  *  Free transmit ring related data structures.
    716  *
    717  **********************************************************************/
    718 static void
    719 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    720 {
    721 	struct adapter *adapter = txr->adapter;
    722 	struct ixgbe_tx_buf *tx_buffer;
    723 	int             i;
    724 
    725 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
    726 
    727 	if (txr->tx_buffers == NULL)
    728 		return;
    729 
    730 	tx_buffer = txr->tx_buffers;
    731 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    732 		if (tx_buffer->m_head != NULL) {
    733 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    734 			    0, tx_buffer->m_head->m_pkthdr.len,
    735 			    BUS_DMASYNC_POSTWRITE);
    736 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    737 			m_freem(tx_buffer->m_head);
    738 			tx_buffer->m_head = NULL;
    739 			if (tx_buffer->map != NULL) {
    740 				ixgbe_dmamap_destroy(txr->txtag,
    741 				    tx_buffer->map);
    742 				tx_buffer->map = NULL;
    743 			}
    744 		} else if (tx_buffer->map != NULL) {
    745 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    746 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    747 			tx_buffer->map = NULL;
    748 		}
    749 	}
    750 #ifndef IXGBE_LEGACY_TX
    751 	if (txr->br != NULL)
    752 		buf_ring_free(txr->br, M_DEVBUF);
    753 #endif
    754 	if (txr->tx_buffers != NULL) {
    755 		free(txr->tx_buffers, M_DEVBUF);
    756 		txr->tx_buffers = NULL;
    757 	}
    758 	if (txr->txtag != NULL) {
    759 		ixgbe_dma_tag_destroy(txr->txtag);
    760 		txr->txtag = NULL;
    761 	}
    762 	return;
    763 }
    764 
    765 /*********************************************************************
    766  *
    767  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    768  *
    769  **********************************************************************/
    770 
    771 static int
    772 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    773     u32 *cmd_type_len, u32 *olinfo_status)
    774 {
    775 	struct adapter *adapter = txr->adapter;
    776 	struct ethercom *ec = &adapter->osdep.ec;
    777 	struct m_tag *mtag;
    778 	struct ixgbe_adv_tx_context_desc *TXD;
    779 	struct ether_vlan_header *eh;
    780 	struct ip ip;
    781 	struct ip6_hdr ip6;
    782 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    783 	int	ehdrlen, ip_hlen = 0;
    784 	u16	etype;
    785 	u8	ipproto __diagused = 0;
    786 	int	offload = TRUE;
    787 	int	ctxd = txr->next_avail_desc;
    788 	u16	vtag = 0;
    789 
    790 	/* First check if TSO is to be used */
    791 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
    792 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
    793 
    794 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    795 		offload = FALSE;
    796 
    797 	/* Indicate the whole packet as payload when not doing TSO */
    798        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    799 
    800 	/* Now ready a context descriptor */
    801 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    802 
    803 	/*
    804 	** In advanced descriptors the vlan tag must
    805 	** be placed into the context descriptor. Hence
    806 	** we need to make one even if not doing offloads.
    807 	*/
    808 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    809 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    810 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    811 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    812 		return (0);
    813 
    814 	/*
    815 	 * Determine where frame payload starts.
    816 	 * Jump over vlan headers if already present,
    817 	 * helpful for QinQ too.
    818 	 */
    819 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    820 	eh = mtod(mp, struct ether_vlan_header *);
    821 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    822 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    823 		etype = ntohs(eh->evl_proto);
    824 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    825 	} else {
    826 		etype = ntohs(eh->evl_encap_proto);
    827 		ehdrlen = ETHER_HDR_LEN;
    828 	}
    829 
    830 	/* Set the ether header length */
    831 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    832 
    833 	if (offload == FALSE)
    834 		goto no_offloads;
    835 
    836 	switch (etype) {
    837 	case ETHERTYPE_IP:
    838 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
    839 		ip_hlen = ip.ip_hl << 2;
    840 		ipproto = ip.ip_p;
    841 #if 0
    842 		ip.ip_sum = 0;
    843 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
    844 #else
    845 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    846 		    ip.ip_sum == 0);
    847 #endif
    848 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    849 		break;
    850 	case ETHERTYPE_IPV6:
    851 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
    852 		ip_hlen = sizeof(ip6);
    853 		/* XXX-BZ this will go badly in case of ext hdrs. */
    854 		ipproto = ip6.ip6_nxt;
    855 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    856 		break;
    857 	default:
    858 		break;
    859 	}
    860 
    861 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    862 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    863 
    864 	vlan_macip_lens |= ip_hlen;
    865 
    866 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
    867 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    868 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    869 		KASSERT(ipproto == IPPROTO_TCP);
    870 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
    871 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    872 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    873 		KASSERT(ipproto == IPPROTO_UDP);
    874 	}
    875 
    876 no_offloads:
    877 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    878 
    879 	/* Now copy bits into descriptor */
    880 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    881 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    882 	TXD->seqnum_seed = htole32(0);
    883 	TXD->mss_l4len_idx = htole32(0);
    884 
    885 	/* We've consumed the first desc, adjust counters */
    886 	if (++ctxd == txr->num_desc)
    887 		ctxd = 0;
    888 	txr->next_avail_desc = ctxd;
    889 	--txr->tx_avail;
    890 
    891         return 0;
    892 }
    893 
    894 /**********************************************************************
    895  *
    896  *  Setup work for hardware segmentation offload (TSO) on
    897  *  adapters using advanced tx descriptors
    898  *
    899  **********************************************************************/
    900 static int
    901 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    902     u32 *cmd_type_len, u32 *olinfo_status)
    903 {
    904 	struct m_tag *mtag;
    905 	struct adapter *adapter = txr->adapter;
    906 	struct ethercom *ec = &adapter->osdep.ec;
    907 	struct ixgbe_adv_tx_context_desc *TXD;
    908 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    909 	u32 mss_l4len_idx = 0, paylen;
    910 	u16 vtag = 0, eh_type;
    911 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    912 	struct ether_vlan_header *eh;
    913 #ifdef INET6
    914 	struct ip6_hdr *ip6;
    915 #endif
    916 #ifdef INET
    917 	struct ip *ip;
    918 #endif
    919 	struct tcphdr *th;
    920 
    921 
    922 	/*
    923 	 * Determine where frame payload starts.
    924 	 * Jump over vlan headers if already present
    925 	 */
    926 	eh = mtod(mp, struct ether_vlan_header *);
    927 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    928 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    929 		eh_type = eh->evl_proto;
    930 	} else {
    931 		ehdrlen = ETHER_HDR_LEN;
    932 		eh_type = eh->evl_encap_proto;
    933 	}
    934 
    935 	switch (ntohs(eh_type)) {
    936 #ifdef INET6
    937 	case ETHERTYPE_IPV6:
    938 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    939 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    940 		if (ip6->ip6_nxt != IPPROTO_TCP)
    941 			return (ENXIO);
    942 		ip_hlen = sizeof(struct ip6_hdr);
    943 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    944 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    945 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    946 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    947 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    948 		break;
    949 #endif
    950 #ifdef INET
    951 	case ETHERTYPE_IP:
    952 		ip = (struct ip *)(mp->m_data + ehdrlen);
    953 		if (ip->ip_p != IPPROTO_TCP)
    954 			return (ENXIO);
    955 		ip->ip_sum = 0;
    956 		ip_hlen = ip->ip_hl << 2;
    957 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    958 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    959 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    960 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    961 		/* Tell transmit desc to also do IPv4 checksum. */
    962 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    963 		break;
    964 #endif
    965 	default:
    966 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    967 		    __func__, ntohs(eh_type));
    968 		break;
    969 	}
    970 
    971 	ctxd = txr->next_avail_desc;
    972 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    973 
    974 	tcp_hlen = th->th_off << 2;
    975 
    976 	/* This is used in the transmit desc in encap */
    977 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    978 
    979 	/* VLAN MACLEN IPLEN */
    980 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    981 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    982                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    983 	}
    984 
    985 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    986 	vlan_macip_lens |= ip_hlen;
    987 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    988 
    989 	/* ADV DTYPE TUCMD */
    990 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    991 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    992 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    993 
    994 	/* MSS L4LEN IDX */
    995 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
    996 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
    997 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
    998 
    999 	TXD->seqnum_seed = htole32(0);
   1000 
   1001 	if (++ctxd == txr->num_desc)
   1002 		ctxd = 0;
   1003 
   1004 	txr->tx_avail--;
   1005 	txr->next_avail_desc = ctxd;
   1006 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1007 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1008 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1009 	++txr->tso_tx.ev_count;
   1010 	return (0);
   1011 }
   1012 
   1013 
   1014 /**********************************************************************
   1015  *
   1016  *  Examine each tx_buffer in the used queue. If the hardware is done
   1017  *  processing the packet then free associated resources. The
   1018  *  tx_buffer is put back on the free queue.
   1019  *
   1020  **********************************************************************/
   1021 void
   1022 ixgbe_txeof(struct tx_ring *txr)
   1023 {
   1024 	struct adapter		*adapter = txr->adapter;
   1025 	struct ifnet		*ifp = adapter->ifp;
   1026 	u32			work, processed = 0;
   1027 	u32			limit = adapter->tx_process_limit;
   1028 	struct ixgbe_tx_buf	*buf;
   1029 	union ixgbe_adv_tx_desc *txd;
   1030 
   1031 	KASSERT(mutex_owned(&txr->tx_mtx));
   1032 
   1033 #ifdef DEV_NETMAP
   1034 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1035 		struct netmap_adapter *na = NA(ifp);
   1036 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1037 		txd = txr->tx_base;
   1038 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1039 		    BUS_DMASYNC_POSTREAD);
   1040 		/*
   1041 		 * In netmap mode, all the work is done in the context
   1042 		 * of the client thread. Interrupt handlers only wake up
   1043 		 * clients, which may be sleeping on individual rings
   1044 		 * or on a global resource for all rings.
   1045 		 * To implement tx interrupt mitigation, we wake up the client
   1046 		 * thread roughly every half ring, even if the NIC interrupts
   1047 		 * more frequently. This is implemented as follows:
   1048 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1049 		 *   the slot that should wake up the thread (nkr_num_slots
   1050 		 *   means the user thread should not be woken up);
   1051 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1052 		 *   or the slot has the DD bit set.
   1053 		 */
   1054 		if (!netmap_mitigate ||
   1055 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1056 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1057 			netmap_tx_irq(ifp, txr->me);
   1058 		}
   1059 		return;
   1060 	}
   1061 #endif /* DEV_NETMAP */
   1062 
   1063 	if (txr->tx_avail == txr->num_desc) {
   1064 		txr->busy = 0;
   1065 		return;
   1066 	}
   1067 
   1068 	/* Get work starting point */
   1069 	work = txr->next_to_clean;
   1070 	buf = &txr->tx_buffers[work];
   1071 	txd = &txr->tx_base[work];
   1072 	work -= txr->num_desc; /* The distance to ring end */
   1073         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1074 	    BUS_DMASYNC_POSTREAD);
   1075 	do {
   1076 		union ixgbe_adv_tx_desc *eop= buf->eop;
   1077 		if (eop == NULL) /* No work */
   1078 			break;
   1079 
   1080 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1081 			break;	/* I/O not complete */
   1082 
   1083 		if (buf->m_head) {
   1084 			txr->bytes +=
   1085 			    buf->m_head->m_pkthdr.len;
   1086 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1087 			    buf->map,
   1088 			    0, buf->m_head->m_pkthdr.len,
   1089 			    BUS_DMASYNC_POSTWRITE);
   1090 			ixgbe_dmamap_unload(txr->txtag,
   1091 			    buf->map);
   1092 			m_freem(buf->m_head);
   1093 			buf->m_head = NULL;
   1094 		}
   1095 		buf->eop = NULL;
   1096 		++txr->tx_avail;
   1097 
   1098 		/* We clean the range if multi segment */
   1099 		while (txd != eop) {
   1100 			++txd;
   1101 			++buf;
   1102 			++work;
   1103 			/* wrap the ring? */
   1104 			if (__predict_false(!work)) {
   1105 				work -= txr->num_desc;
   1106 				buf = txr->tx_buffers;
   1107 				txd = txr->tx_base;
   1108 			}
   1109 			if (buf->m_head) {
   1110 				txr->bytes +=
   1111 				    buf->m_head->m_pkthdr.len;
   1112 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1113 				    buf->map,
   1114 				    0, buf->m_head->m_pkthdr.len,
   1115 				    BUS_DMASYNC_POSTWRITE);
   1116 				ixgbe_dmamap_unload(txr->txtag,
   1117 				    buf->map);
   1118 				m_freem(buf->m_head);
   1119 				buf->m_head = NULL;
   1120 			}
   1121 			++txr->tx_avail;
   1122 			buf->eop = NULL;
   1123 
   1124 		}
   1125 		++txr->packets;
   1126 		++processed;
   1127 		++ifp->if_opackets;
   1128 
   1129 		/* Try the next packet */
   1130 		++txd;
   1131 		++buf;
   1132 		++work;
   1133 		/* reset with a wrap */
   1134 		if (__predict_false(!work)) {
   1135 			work -= txr->num_desc;
   1136 			buf = txr->tx_buffers;
   1137 			txd = txr->tx_base;
   1138 		}
   1139 		prefetch(txd);
   1140 	} while (__predict_true(--limit));
   1141 
   1142 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1143 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1144 
   1145 	work += txr->num_desc;
   1146 	txr->next_to_clean = work;
   1147 
   1148 	/*
   1149 	** Queue Hang detection, we know there's
   1150 	** work outstanding or the first return
   1151 	** would have been taken, so increment busy
   1152 	** if nothing managed to get cleaned, then
   1153 	** in local_timer it will be checked and
   1154 	** marked as HUNG if it exceeds a MAX attempt.
   1155 	*/
   1156 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1157 		++txr->busy;
   1158 	/*
   1159 	** If anything gets cleaned we reset state to 1,
   1160 	** note this will turn off HUNG if its set.
   1161 	*/
   1162 	if (processed)
   1163 		txr->busy = 1;
   1164 
   1165 	if (txr->tx_avail == txr->num_desc)
   1166 		txr->busy = 0;
   1167 
   1168 	return;
   1169 }
   1170 
   1171 
   1172 #ifdef IXGBE_FDIR
   1173 /*
   1174 ** This routine parses packet headers so that Flow
   1175 ** Director can make a hashed filter table entry
   1176 ** allowing traffic flows to be identified and kept
   1177 ** on the same cpu.  This would be a performance
   1178 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1179 ** packets.
   1180 */
   1181 static void
   1182 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1183 {
   1184 	struct adapter			*adapter = txr->adapter;
   1185 	struct ix_queue			*que;
   1186 	struct ip			*ip;
   1187 	struct tcphdr			*th;
   1188 	struct udphdr			*uh;
   1189 	struct ether_vlan_header	*eh;
   1190 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1191 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1192 	int  				ehdrlen, ip_hlen;
   1193 	u16				etype;
   1194 
   1195 	eh = mtod(mp, struct ether_vlan_header *);
   1196 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1197 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1198 		etype = eh->evl_proto;
   1199 	} else {
   1200 		ehdrlen = ETHER_HDR_LEN;
   1201 		etype = eh->evl_encap_proto;
   1202 	}
   1203 
   1204 	/* Only handling IPv4 */
   1205 	if (etype != htons(ETHERTYPE_IP))
   1206 		return;
   1207 
   1208 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1209 	ip_hlen = ip->ip_hl << 2;
   1210 
   1211 	/* check if we're UDP or TCP */
   1212 	switch (ip->ip_p) {
   1213 	case IPPROTO_TCP:
   1214 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1215 		/* src and dst are inverted */
   1216 		common.port.dst ^= th->th_sport;
   1217 		common.port.src ^= th->th_dport;
   1218 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1219 		break;
   1220 	case IPPROTO_UDP:
   1221 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1222 		/* src and dst are inverted */
   1223 		common.port.dst ^= uh->uh_sport;
   1224 		common.port.src ^= uh->uh_dport;
   1225 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1226 		break;
   1227 	default:
   1228 		return;
   1229 	}
   1230 
   1231 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1232 	if (mp->m_pkthdr.ether_vtag)
   1233 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1234 	else
   1235 		common.flex_bytes ^= etype;
   1236 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1237 
   1238 	que = &adapter->queues[txr->me];
   1239 	/*
   1240 	** This assumes the Rx queue and Tx
   1241 	** queue are bound to the same CPU
   1242 	*/
   1243 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1244 	    input, common, que->msix);
   1245 }
   1246 #endif /* IXGBE_FDIR */
   1247 
   1248 /*
   1249 ** Used to detect a descriptor that has
   1250 ** been merged by Hardware RSC.
   1251 */
   1252 static inline u32
   1253 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1254 {
   1255 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1256 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1257 }
   1258 
   1259 /*********************************************************************
   1260  *
   1261  *  Initialize Hardware RSC (LRO) feature on 82599
   1262  *  for an RX ring, this is toggled by the LRO capability
   1263  *  even though it is transparent to the stack.
   1264  *
   1265  *  NOTE: since this HW feature only works with IPV4 and
   1266  *        our testing has shown soft LRO to be as effective
   1267  *        I have decided to disable this by default.
   1268  *
   1269  **********************************************************************/
   1270 static void
   1271 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1272 {
   1273 	struct	adapter 	*adapter = rxr->adapter;
   1274 	struct	ixgbe_hw	*hw = &adapter->hw;
   1275 	u32			rscctrl, rdrxctl;
   1276 
   1277 	/* If turning LRO/RSC off we need to disable it */
   1278 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1279 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1280 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1281 		return;
   1282 	}
   1283 
   1284 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1285 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1286 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1287 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1288 #endif /* DEV_NETMAP */
   1289 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1290 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1291 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1292 
   1293 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1294 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1295 	/*
   1296 	** Limit the total number of descriptors that
   1297 	** can be combined, so it does not exceed 64K
   1298 	*/
   1299 	if (rxr->mbuf_sz == MCLBYTES)
   1300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1301 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1303 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1305 	else  /* Using 16K cluster */
   1306 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1307 
   1308 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1309 
   1310 	/* Enable TCP header recognition */
   1311 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1312 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1313 	    IXGBE_PSRTYPE_TCPHDR));
   1314 
   1315 	/* Disable RSC for ACK packets */
   1316 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1317 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1318 
   1319 	rxr->hw_rsc = TRUE;
   1320 }
   1321 /*********************************************************************
   1322  *
   1323  *  Refresh mbuf buffers for RX descriptor rings
   1324  *   - now keeps its own state so discards due to resource
   1325  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1326  *     it just returns, keeping its placeholder, thus it can simply
   1327  *     be recalled to try again.
   1328  *
   1329  **********************************************************************/
   1330 static void
   1331 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1332 {
   1333 	struct adapter		*adapter = rxr->adapter;
   1334 	struct ixgbe_rx_buf	*rxbuf;
   1335 	struct mbuf		*mp;
   1336 	int			i, j, error;
   1337 	bool			refreshed = false;
   1338 
   1339 	i = j = rxr->next_to_refresh;
   1340 	/* Control the loop with one beyond */
   1341 	if (++j == rxr->num_desc)
   1342 		j = 0;
   1343 
   1344 	while (j != limit) {
   1345 		rxbuf = &rxr->rx_buffers[i];
   1346 		if (rxbuf->buf == NULL) {
   1347 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1348 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1349 			if (mp == NULL) {
   1350 				rxr->no_jmbuf.ev_count++;
   1351 				goto update;
   1352 			}
   1353 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1354 				m_adj(mp, ETHER_ALIGN);
   1355 		} else
   1356 			mp = rxbuf->buf;
   1357 
   1358 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1359 
   1360 		/* If we're dealing with an mbuf that was copied rather
   1361 		 * than replaced, there's no need to go through busdma.
   1362 		 */
   1363 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1364 			/* Get the memory mapping */
   1365 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1366 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1367 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1368 			if (error != 0) {
   1369 				printf("Refresh mbufs: payload dmamap load"
   1370 				    " failure - %d\n", error);
   1371 				m_free(mp);
   1372 				rxbuf->buf = NULL;
   1373 				goto update;
   1374 			}
   1375 			rxbuf->buf = mp;
   1376 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1377 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1378 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1379 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1380 		} else {
   1381 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1382 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1383 		}
   1384 
   1385 		refreshed = true;
   1386 		/* Next is precalculated */
   1387 		i = j;
   1388 		rxr->next_to_refresh = i;
   1389 		if (++j == rxr->num_desc)
   1390 			j = 0;
   1391 	}
   1392 update:
   1393 	if (refreshed) /* Update hardware tail index */
   1394 		IXGBE_WRITE_REG(&adapter->hw,
   1395 		    rxr->tail, rxr->next_to_refresh);
   1396 	return;
   1397 }
   1398 
   1399 /*********************************************************************
   1400  *
   1401  *  Allocate memory for rx_buffer structures. Since we use one
   1402  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1403  *  that we'll need is equal to the number of receive descriptors
   1404  *  that we've allocated.
   1405  *
   1406  **********************************************************************/
   1407 int
   1408 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1409 {
   1410 	struct	adapter 	*adapter = rxr->adapter;
   1411 	device_t 		dev = adapter->dev;
   1412 	struct ixgbe_rx_buf 	*rxbuf;
   1413 	int             	bsize, error;
   1414 
   1415 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1416 	if (!(rxr->rx_buffers =
   1417 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1418 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1419 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1420 		error = ENOMEM;
   1421 		goto fail;
   1422 	}
   1423 
   1424 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1425 				   1, 0,	/* alignment, bounds */
   1426 				   MJUM16BYTES,		/* maxsize */
   1427 				   1,			/* nsegments */
   1428 				   MJUM16BYTES,		/* maxsegsize */
   1429 				   0,			/* flags */
   1430 				   &rxr->ptag))) {
   1431 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1432 		goto fail;
   1433 	}
   1434 
   1435 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1436 		rxbuf = &rxr->rx_buffers[i];
   1437 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1438 		if (error) {
   1439 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1440 			goto fail;
   1441 		}
   1442 	}
   1443 
   1444 	return (0);
   1445 
   1446 fail:
   1447 	/* Frees all, but can handle partial completion */
   1448 	ixgbe_free_receive_structures(adapter);
   1449 	return (error);
   1450 }
   1451 
   1452 
   1453 static void
   1454 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1455 {
   1456 	struct ixgbe_rx_buf       *rxbuf;
   1457 
   1458 	for (int i = 0; i < rxr->num_desc; i++) {
   1459 		rxbuf = &rxr->rx_buffers[i];
   1460 		if (rxbuf->buf != NULL) {
   1461 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1462 			    0, rxbuf->buf->m_pkthdr.len,
   1463 			    BUS_DMASYNC_POSTREAD);
   1464 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1465 			rxbuf->buf->m_flags |= M_PKTHDR;
   1466 			m_freem(rxbuf->buf);
   1467 			rxbuf->buf = NULL;
   1468 			rxbuf->flags = 0;
   1469 		}
   1470 	}
   1471 }
   1472 
   1473 
   1474 /*********************************************************************
   1475  *
   1476  *  Initialize a receive ring and its buffers.
   1477  *
   1478  **********************************************************************/
   1479 static int
   1480 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1481 {
   1482 	struct	adapter 	*adapter;
   1483 	struct ixgbe_rx_buf	*rxbuf;
   1484 #ifdef LRO
   1485 	struct ifnet		*ifp;
   1486 	struct lro_ctrl		*lro = &rxr->lro;
   1487 #endif /* LRO */
   1488 	int			rsize, error = 0;
   1489 #ifdef DEV_NETMAP
   1490 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1491 	struct netmap_slot *slot;
   1492 #endif /* DEV_NETMAP */
   1493 
   1494 	adapter = rxr->adapter;
   1495 #ifdef LRO
   1496 	ifp = adapter->ifp;
   1497 #endif /* LRO */
   1498 
   1499 	/* Clear the ring contents */
   1500 	IXGBE_RX_LOCK(rxr);
   1501 #ifdef DEV_NETMAP
   1502 	/* same as in ixgbe_setup_transmit_ring() */
   1503 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1504 #endif /* DEV_NETMAP */
   1505 	rsize = roundup2(adapter->num_rx_desc *
   1506 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1507 	bzero((void *)rxr->rx_base, rsize);
   1508 	/* Cache the size */
   1509 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1510 
   1511 	/* Free current RX buffer structs and their mbufs */
   1512 	ixgbe_free_receive_ring(rxr);
   1513 
   1514 	IXGBE_RX_UNLOCK(rxr);
   1515 
   1516 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1517 	 * or size of jumbo mbufs may have changed.
   1518 	 */
   1519 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1520 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1521 
   1522 	IXGBE_RX_LOCK(rxr);
   1523 
   1524 	/* Now replenish the mbufs */
   1525 	for (int j = 0; j != rxr->num_desc; ++j) {
   1526 		struct mbuf	*mp;
   1527 
   1528 		rxbuf = &rxr->rx_buffers[j];
   1529 #ifdef DEV_NETMAP
   1530 		/*
   1531 		 * In netmap mode, fill the map and set the buffer
   1532 		 * address in the NIC ring, considering the offset
   1533 		 * between the netmap and NIC rings (see comment in
   1534 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1535 		 * an mbuf, so end the block with a continue;
   1536 		 */
   1537 		if (slot) {
   1538 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1539 			uint64_t paddr;
   1540 			void *addr;
   1541 
   1542 			addr = PNMB(na, slot + sj, &paddr);
   1543 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1544 			/* Update descriptor and the cached value */
   1545 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1546 			rxbuf->addr = htole64(paddr);
   1547 			continue;
   1548 		}
   1549 #endif /* DEV_NETMAP */
   1550 		rxbuf->flags = 0;
   1551 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1552 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1553 		if (rxbuf->buf == NULL) {
   1554 			error = ENOBUFS;
   1555                         goto fail;
   1556 		}
   1557 		mp = rxbuf->buf;
   1558 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1559 		/* Get the memory mapping */
   1560 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1561 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1562 		if (error != 0)
   1563                         goto fail;
   1564 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1565 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1566 		/* Update the descriptor and the cached value */
   1567 		rxr->rx_base[j].read.pkt_addr =
   1568 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1569 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1570 	}
   1571 
   1572 
   1573 	/* Setup our descriptor indices */
   1574 	rxr->next_to_check = 0;
   1575 	rxr->next_to_refresh = 0;
   1576 	rxr->lro_enabled = FALSE;
   1577 	rxr->rx_copies.ev_count = 0;
   1578 	rxr->rx_bytes.ev_count = 0;
   1579 	rxr->vtag_strip = FALSE;
   1580 
   1581 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1582 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1583 
   1584 	/*
   1585 	** Now set up the LRO interface:
   1586 	*/
   1587 	if (ixgbe_rsc_enable)
   1588 		ixgbe_setup_hw_rsc(rxr);
   1589 #ifdef LRO
   1590 	else if (ifp->if_capenable & IFCAP_LRO) {
   1591 		device_t dev = adapter->dev;
   1592 		int err = tcp_lro_init(lro);
   1593 		if (err) {
   1594 			device_printf(dev, "LRO Initialization failed!\n");
   1595 			goto fail;
   1596 		}
   1597 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1598 		rxr->lro_enabled = TRUE;
   1599 		lro->ifp = adapter->ifp;
   1600 	}
   1601 #endif /* LRO */
   1602 
   1603 	IXGBE_RX_UNLOCK(rxr);
   1604 	return (0);
   1605 
   1606 fail:
   1607 	ixgbe_free_receive_ring(rxr);
   1608 	IXGBE_RX_UNLOCK(rxr);
   1609 	return (error);
   1610 }
   1611 
   1612 /*********************************************************************
   1613  *
   1614  *  Initialize all receive rings.
   1615  *
   1616  **********************************************************************/
   1617 int
   1618 ixgbe_setup_receive_structures(struct adapter *adapter)
   1619 {
   1620 	struct rx_ring *rxr = adapter->rx_rings;
   1621 	int j;
   1622 
   1623 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1624 		if (ixgbe_setup_receive_ring(rxr))
   1625 			goto fail;
   1626 
   1627 	return (0);
   1628 fail:
   1629 	/*
   1630 	 * Free RX buffers allocated so far, we will only handle
   1631 	 * the rings that completed, the failing case will have
   1632 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1633 	 */
   1634 	for (int i = 0; i < j; ++i) {
   1635 		rxr = &adapter->rx_rings[i];
   1636 		ixgbe_free_receive_ring(rxr);
   1637 	}
   1638 
   1639 	return (ENOBUFS);
   1640 }
   1641 
   1642 
   1643 /*********************************************************************
   1644  *
   1645  *  Free all receive rings.
   1646  *
   1647  **********************************************************************/
   1648 void
   1649 ixgbe_free_receive_structures(struct adapter *adapter)
   1650 {
   1651 	struct rx_ring *rxr = adapter->rx_rings;
   1652 
   1653 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1654 
   1655 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1656 #ifdef LRO
   1657 		struct lro_ctrl		*lro = &rxr->lro;
   1658 #endif /* LRO */
   1659 		ixgbe_free_receive_buffers(rxr);
   1660 #ifdef LRO
   1661 		/* Free LRO memory */
   1662 		tcp_lro_free(lro);
   1663 #endif /* LRO */
   1664 		/* Free the ring memory as well */
   1665 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1666 		IXGBE_RX_LOCK_DESTROY(rxr);
   1667 	}
   1668 
   1669 	free(adapter->rx_rings, M_DEVBUF);
   1670 }
   1671 
   1672 
   1673 /*********************************************************************
   1674  *
   1675  *  Free receive ring data structures
   1676  *
   1677  **********************************************************************/
   1678 static void
   1679 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1680 {
   1681 	struct adapter		*adapter = rxr->adapter;
   1682 	struct ixgbe_rx_buf	*rxbuf;
   1683 
   1684 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1685 
   1686 	/* Cleanup any existing buffers */
   1687 	if (rxr->rx_buffers != NULL) {
   1688 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1689 			rxbuf = &rxr->rx_buffers[i];
   1690 			if (rxbuf->buf != NULL) {
   1691 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1692 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1693 				    BUS_DMASYNC_POSTREAD);
   1694 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1695 				rxbuf->buf->m_flags |= M_PKTHDR;
   1696 				m_freem(rxbuf->buf);
   1697 			}
   1698 			rxbuf->buf = NULL;
   1699 			if (rxbuf->pmap != NULL) {
   1700 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1701 				rxbuf->pmap = NULL;
   1702 			}
   1703 		}
   1704 		if (rxr->rx_buffers != NULL) {
   1705 			free(rxr->rx_buffers, M_DEVBUF);
   1706 			rxr->rx_buffers = NULL;
   1707 		}
   1708 	}
   1709 
   1710 	if (rxr->ptag != NULL) {
   1711 		ixgbe_dma_tag_destroy(rxr->ptag);
   1712 		rxr->ptag = NULL;
   1713 	}
   1714 
   1715 	return;
   1716 }
   1717 
   1718 static __inline void
   1719 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1720 {
   1721 	int s;
   1722 
   1723 #ifdef LRO
   1724 	struct adapter	*adapter = ifp->if_softc;
   1725 	struct ethercom *ec = &adapter->osdep.ec;
   1726 
   1727         /*
   1728          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1729          * should be computed by hardware. Also it should not have VLAN tag in
   1730          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1731          */
   1732         if (rxr->lro_enabled &&
   1733             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1734             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1735             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1736             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1737             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1738             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1739             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1740             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1741                 /*
   1742                  * Send to the stack if:
   1743                  **  - LRO not enabled, or
   1744                  **  - no LRO resources, or
   1745                  **  - lro enqueue fails
   1746                  */
   1747                 if (rxr->lro.lro_cnt != 0)
   1748                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1749                                 return;
   1750         }
   1751 #endif /* LRO */
   1752 
   1753 	IXGBE_RX_UNLOCK(rxr);
   1754 
   1755 	s = splnet();
   1756 	/* Pass this up to any BPF listeners. */
   1757 	bpf_mtap(ifp, m);
   1758 	if_input(ifp, m);
   1759 	splx(s);
   1760 
   1761 	IXGBE_RX_LOCK(rxr);
   1762 }
   1763 
   1764 static __inline void
   1765 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1766 {
   1767 	struct ixgbe_rx_buf	*rbuf;
   1768 
   1769 	rbuf = &rxr->rx_buffers[i];
   1770 
   1771 
   1772 	/*
   1773 	** With advanced descriptors the writeback
   1774 	** clobbers the buffer addrs, so its easier
   1775 	** to just free the existing mbufs and take
   1776 	** the normal refresh path to get new buffers
   1777 	** and mapping.
   1778 	*/
   1779 
   1780 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1781 		rbuf->fmp->m_flags |= M_PKTHDR;
   1782 		m_freem(rbuf->fmp);
   1783 		rbuf->fmp = NULL;
   1784 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1785 	} else if (rbuf->buf) {
   1786 		m_free(rbuf->buf);
   1787 		rbuf->buf = NULL;
   1788 	}
   1789 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1790 
   1791 	rbuf->flags = 0;
   1792 
   1793 	return;
   1794 }
   1795 
   1796 
   1797 /*********************************************************************
   1798  *
   1799  *  This routine executes in interrupt context. It replenishes
   1800  *  the mbufs in the descriptor and sends data which has been
   1801  *  dma'ed into host memory to upper layer.
   1802  *
   1803  *  Return TRUE for more work, FALSE for all clean.
   1804  *********************************************************************/
   1805 bool
   1806 ixgbe_rxeof(struct ix_queue *que)
   1807 {
   1808 	struct adapter		*adapter = que->adapter;
   1809 	struct rx_ring		*rxr = que->rxr;
   1810 	struct ifnet		*ifp = adapter->ifp;
   1811 #ifdef LRO
   1812 	struct lro_ctrl		*lro = &rxr->lro;
   1813 	struct lro_entry	*queued;
   1814 #endif /* LRO */
   1815 	int			i, nextp, processed = 0;
   1816 	u32			staterr = 0;
   1817 	u32			count = adapter->rx_process_limit;
   1818 	union ixgbe_adv_rx_desc	*cur;
   1819 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1820 #ifdef RSS
   1821 	u16			pkt_info;
   1822 #endif
   1823 
   1824 	IXGBE_RX_LOCK(rxr);
   1825 
   1826 #ifdef DEV_NETMAP
   1827 	/* Same as the txeof routine: wakeup clients on intr. */
   1828 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1829 		IXGBE_RX_UNLOCK(rxr);
   1830 		return (FALSE);
   1831 	}
   1832 #endif /* DEV_NETMAP */
   1833 
   1834 	for (i = rxr->next_to_check; count != 0;) {
   1835 		struct mbuf	*sendmp, *mp;
   1836 		u32		rsc, ptype;
   1837 		u16		len;
   1838 		u16		vtag = 0;
   1839 		bool		eop;
   1840 
   1841 		/* Sync the ring. */
   1842 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1843 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1844 
   1845 		cur = &rxr->rx_base[i];
   1846 		staterr = le32toh(cur->wb.upper.status_error);
   1847 #ifdef RSS
   1848 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1849 #endif
   1850 
   1851 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1852 			break;
   1853 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1854 			break;
   1855 
   1856 		count--;
   1857 		sendmp = NULL;
   1858 		nbuf = NULL;
   1859 		rsc = 0;
   1860 		cur->wb.upper.status_error = 0;
   1861 		rbuf = &rxr->rx_buffers[i];
   1862 		mp = rbuf->buf;
   1863 
   1864 		len = le16toh(cur->wb.upper.length);
   1865 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1866 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1867 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1868 
   1869 		/* Make sure bad packets are discarded */
   1870 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1871 #if __FreeBSD_version >= 1100036
   1872 			if (IXGBE_IS_VF(adapter))
   1873 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1874 #endif
   1875 			rxr->rx_discarded.ev_count++;
   1876 			ixgbe_rx_discard(rxr, i);
   1877 			goto next_desc;
   1878 		}
   1879 
   1880 		/*
   1881 		** On 82599 which supports a hardware
   1882 		** LRO (called HW RSC), packets need
   1883 		** not be fragmented across sequential
   1884 		** descriptors, rather the next descriptor
   1885 		** is indicated in bits of the descriptor.
   1886 		** This also means that we might proceses
   1887 		** more than one packet at a time, something
   1888 		** that has never been true before, it
   1889 		** required eliminating global chain pointers
   1890 		** in favor of what we are doing here.  -jfv
   1891 		*/
   1892 		if (!eop) {
   1893 			/*
   1894 			** Figure out the next descriptor
   1895 			** of this frame.
   1896 			*/
   1897 			if (rxr->hw_rsc == TRUE) {
   1898 				rsc = ixgbe_rsc_count(cur);
   1899 				rxr->rsc_num += (rsc - 1);
   1900 			}
   1901 			if (rsc) { /* Get hardware index */
   1902 				nextp = ((staterr &
   1903 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1904 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1905 			} else { /* Just sequential */
   1906 				nextp = i + 1;
   1907 				if (nextp == adapter->num_rx_desc)
   1908 					nextp = 0;
   1909 			}
   1910 			nbuf = &rxr->rx_buffers[nextp];
   1911 			prefetch(nbuf);
   1912 		}
   1913 		/*
   1914 		** Rather than using the fmp/lmp global pointers
   1915 		** we now keep the head of a packet chain in the
   1916 		** buffer struct and pass this along from one
   1917 		** descriptor to the next, until we get EOP.
   1918 		*/
   1919 		mp->m_len = len;
   1920 		/*
   1921 		** See if there is a stored head
   1922 		** that determines what we are
   1923 		*/
   1924 		sendmp = rbuf->fmp;
   1925 		if (sendmp != NULL) {  /* secondary frag */
   1926 			rbuf->buf = rbuf->fmp = NULL;
   1927 			mp->m_flags &= ~M_PKTHDR;
   1928 			sendmp->m_pkthdr.len += mp->m_len;
   1929 		} else {
   1930 			/*
   1931 			 * Optimize.  This might be a small packet,
   1932 			 * maybe just a TCP ACK.  Do a fast copy that
   1933 			 * is cache aligned into a new mbuf, and
   1934 			 * leave the old mbuf+cluster for re-use.
   1935 			 */
   1936 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1937 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1938 				if (sendmp != NULL) {
   1939 					sendmp->m_data +=
   1940 					    IXGBE_RX_COPY_ALIGN;
   1941 					ixgbe_bcopy(mp->m_data,
   1942 					    sendmp->m_data, len);
   1943 					sendmp->m_len = len;
   1944 					rxr->rx_copies.ev_count++;
   1945 					rbuf->flags |= IXGBE_RX_COPY;
   1946 				}
   1947 			}
   1948 			if (sendmp == NULL) {
   1949 				rbuf->buf = rbuf->fmp = NULL;
   1950 				sendmp = mp;
   1951 			}
   1952 
   1953 			/* first desc of a non-ps chain */
   1954 			sendmp->m_flags |= M_PKTHDR;
   1955 			sendmp->m_pkthdr.len = mp->m_len;
   1956 		}
   1957 		++processed;
   1958 
   1959 		/* Pass the head pointer on */
   1960 		if (eop == 0) {
   1961 			nbuf->fmp = sendmp;
   1962 			sendmp = NULL;
   1963 			mp->m_next = nbuf->buf;
   1964 		} else { /* Sending this frame */
   1965 			m_set_rcvif(sendmp, ifp);
   1966 			ifp->if_ipackets++;
   1967 			rxr->rx_packets.ev_count++;
   1968 			/* capture data for AIM */
   1969 			rxr->bytes += sendmp->m_pkthdr.len;
   1970 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1971 			/* Process vlan info */
   1972 			if ((rxr->vtag_strip) &&
   1973 			    (staterr & IXGBE_RXD_STAT_VP))
   1974 				vtag = le16toh(cur->wb.upper.vlan);
   1975 			if (vtag) {
   1976 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1977 				    printf("%s: could not apply VLAN "
   1978 					"tag", __func__));
   1979 			}
   1980 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1981 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1982 				   &adapter->stats.pf);
   1983 			}
   1984 #if 0 /* FreeBSD */
   1985                         /*
   1986                          * In case of multiqueue, we have RXCSUM.PCSD bit set
   1987                          * and never cleared. This means we have RSS hash
   1988                          * available to be used.
   1989                          */
   1990                         if (adapter->num_queues > 1) {
   1991                                 sendmp->m_pkthdr.flowid =
   1992                                     le32toh(cur->wb.lower.hi_dword.rss);
   1993                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1994                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
   1995                                         M_HASHTYPE_SET(sendmp,
   1996                                             M_HASHTYPE_RSS_IPV4);
   1997                                         break;
   1998                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1999                                         M_HASHTYPE_SET(sendmp,
   2000                                             M_HASHTYPE_RSS_TCP_IPV4);
   2001                                         break;
   2002                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
   2003                                         M_HASHTYPE_SET(sendmp,
   2004                                             M_HASHTYPE_RSS_IPV6);
   2005                                         break;
   2006                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2007                                         M_HASHTYPE_SET(sendmp,
   2008                                             M_HASHTYPE_RSS_TCP_IPV6);
   2009                                         break;
   2010                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2011                                         M_HASHTYPE_SET(sendmp,
   2012                                             M_HASHTYPE_RSS_IPV6_EX);
   2013                                         break;
   2014                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2015                                         M_HASHTYPE_SET(sendmp,
   2016                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
   2017                                         break;
   2018 #if __FreeBSD_version > 1100000
   2019                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2020                                         M_HASHTYPE_SET(sendmp,
   2021                                             M_HASHTYPE_RSS_UDP_IPV4);
   2022                                         break;
   2023                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2024                                         M_HASHTYPE_SET(sendmp,
   2025                                             M_HASHTYPE_RSS_UDP_IPV6);
   2026                                         break;
   2027                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2028                                         M_HASHTYPE_SET(sendmp,
   2029                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
   2030                                         break;
   2031 #endif
   2032                                     default:
   2033                                         M_HASHTYPE_SET(sendmp,
   2034                                             M_HASHTYPE_OPAQUE);
   2035                                 }
   2036                         } else {
   2037                                 sendmp->m_pkthdr.flowid = que->msix;
   2038 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2039 			}
   2040 #endif /* FreeBSD_version */
   2041 		}
   2042 next_desc:
   2043 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2044 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2045 
   2046 		/* Advance our pointers to the next descriptor. */
   2047 		if (++i == rxr->num_desc)
   2048 			i = 0;
   2049 
   2050 		/* Now send to the stack or do LRO */
   2051 		if (sendmp != NULL) {
   2052 			rxr->next_to_check = i;
   2053 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2054 			i = rxr->next_to_check;
   2055 		}
   2056 
   2057                /* Every 8 descriptors we go to refresh mbufs */
   2058 		if (processed == 8) {
   2059 			ixgbe_refresh_mbufs(rxr, i);
   2060 			processed = 0;
   2061 		}
   2062 	}
   2063 
   2064 	/* Refresh any remaining buf structs */
   2065 	if (ixgbe_rx_unrefreshed(rxr))
   2066 		ixgbe_refresh_mbufs(rxr, i);
   2067 
   2068 	rxr->next_to_check = i;
   2069 
   2070 #ifdef LRO
   2071 	/*
   2072 	 * Flush any outstanding LRO work
   2073 	 */
   2074 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   2075 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   2076 		tcp_lro_flush(lro, queued);
   2077 	}
   2078 #endif /* LRO */
   2079 
   2080 	IXGBE_RX_UNLOCK(rxr);
   2081 
   2082 	/*
   2083 	** Still have cleaning to do?
   2084 	*/
   2085 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2086 		return true;
   2087 	else
   2088 		return false;
   2089 }
   2090 
   2091 
   2092 /*********************************************************************
   2093  *
   2094  *  Verify that the hardware indicated that the checksum is valid.
   2095  *  Inform the stack about the status of checksum so that stack
   2096  *  doesn't spend time verifying the checksum.
   2097  *
   2098  *********************************************************************/
   2099 static void
   2100 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2101     struct ixgbe_hw_stats *stats)
   2102 {
   2103 	u16	status = (u16) staterr;
   2104 	u8	errors = (u8) (staterr >> 24);
   2105 #if 0
   2106 	bool	sctp = FALSE;
   2107 
   2108 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2109 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2110 		sctp = TRUE;
   2111 #endif
   2112 
   2113 	if (status & IXGBE_RXD_STAT_IPCS) {
   2114 		stats->ipcs.ev_count++;
   2115 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2116 			/* IP Checksum Good */
   2117 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2118 
   2119 		} else {
   2120 			stats->ipcs_bad.ev_count++;
   2121 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2122 		}
   2123 	}
   2124 	if (status & IXGBE_RXD_STAT_L4CS) {
   2125 		stats->l4cs.ev_count++;
   2126 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2127 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2128 			mp->m_pkthdr.csum_flags |= type;
   2129 		} else {
   2130 			stats->l4cs_bad.ev_count++;
   2131 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2132 		}
   2133 	}
   2134 	return;
   2135 }
   2136 
   2137 
   2138 /********************************************************************
   2139  * Manage DMA'able memory.
   2140  *******************************************************************/
   2141 
   2142 int
   2143 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2144 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2145 {
   2146 	device_t dev = adapter->dev;
   2147 	int             r, rsegs;
   2148 
   2149 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2150 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2151 			       size,	/* maxsize */
   2152 			       1,	/* nsegments */
   2153 			       size,	/* maxsegsize */
   2154 			       BUS_DMA_ALLOCNOW,	/* flags */
   2155 			       &dma->dma_tag);
   2156 	if (r != 0) {
   2157 		aprint_error_dev(dev,
   2158 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2159 		goto fail_0;
   2160 	}
   2161 
   2162 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2163 		size,
   2164 		dma->dma_tag->dt_alignment,
   2165 		dma->dma_tag->dt_boundary,
   2166 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2167 	if (r != 0) {
   2168 		aprint_error_dev(dev,
   2169 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2170 		goto fail_1;
   2171 	}
   2172 
   2173 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2174 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2175 	if (r != 0) {
   2176 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2177 		    __func__, r);
   2178 		goto fail_2;
   2179 	}
   2180 
   2181 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2182 	if (r != 0) {
   2183 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2184 		    __func__, r);
   2185 		goto fail_3;
   2186 	}
   2187 
   2188 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2189 			    size,
   2190 			    NULL,
   2191 			    mapflags | BUS_DMA_NOWAIT);
   2192 	if (r != 0) {
   2193 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2194 		    __func__, r);
   2195 		goto fail_4;
   2196 	}
   2197 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2198 	dma->dma_size = size;
   2199 	return 0;
   2200 fail_4:
   2201 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2202 fail_3:
   2203 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2204 fail_2:
   2205 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2206 fail_1:
   2207 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2208 fail_0:
   2209 	return r;
   2210 }
   2211 
   2212 void
   2213 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2214 {
   2215 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2216 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2217 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2218 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2219 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2220 }
   2221 
   2222 
   2223 /*********************************************************************
   2224  *
   2225  *  Allocate memory for the transmit and receive rings, and then
   2226  *  the descriptors associated with each, called only once at attach.
   2227  *
   2228  **********************************************************************/
   2229 int
   2230 ixgbe_allocate_queues(struct adapter *adapter)
   2231 {
   2232 	device_t	dev = adapter->dev;
   2233 	struct ix_queue	*que;
   2234 	struct tx_ring	*txr;
   2235 	struct rx_ring	*rxr;
   2236 	int rsize, tsize, error = IXGBE_SUCCESS;
   2237 	int txconf = 0, rxconf = 0;
   2238 #ifdef PCI_IOV
   2239 	enum ixgbe_iov_mode iov_mode;
   2240 #endif
   2241 
   2242         /* First allocate the top level queue structs */
   2243         if (!(adapter->queues =
   2244             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2245             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2246                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2247                 error = ENOMEM;
   2248                 goto fail;
   2249         }
   2250 
   2251 	/* First allocate the TX ring struct memory */
   2252 	if (!(adapter->tx_rings =
   2253 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2254 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2255 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2256 		error = ENOMEM;
   2257 		goto tx_fail;
   2258 	}
   2259 
   2260 	/* Next allocate the RX */
   2261 	if (!(adapter->rx_rings =
   2262 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2263 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2264 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2265 		error = ENOMEM;
   2266 		goto rx_fail;
   2267 	}
   2268 
   2269 	/* For the ring itself */
   2270 	tsize = roundup2(adapter->num_tx_desc *
   2271 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2272 
   2273 #ifdef PCI_IOV
   2274 	iov_mode = ixgbe_get_iov_mode(adapter);
   2275 	adapter->pool = ixgbe_max_vfs(iov_mode);
   2276 #else
   2277 	adapter->pool = 0;
   2278 #endif
   2279 	/*
   2280 	 * Now set up the TX queues, txconf is needed to handle the
   2281 	 * possibility that things fail midcourse and we need to
   2282 	 * undo memory gracefully
   2283 	 */
   2284 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2285 		/* Set up some basics */
   2286 		txr = &adapter->tx_rings[i];
   2287 		txr->adapter = adapter;
   2288 #ifdef PCI_IOV
   2289 		txr->me = ixgbe_pf_que_index(iov_mode, i);
   2290 #else
   2291 		txr->me = i;
   2292 #endif
   2293 		txr->num_desc = adapter->num_tx_desc;
   2294 
   2295 		/* Initialize the TX side lock */
   2296 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2297 		    device_xname(dev), txr->me);
   2298 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2299 
   2300 		if (ixgbe_dma_malloc(adapter, tsize,
   2301 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2302 			aprint_error_dev(dev,
   2303 			    "Unable to allocate TX Descriptor memory\n");
   2304 			error = ENOMEM;
   2305 			goto err_tx_desc;
   2306 		}
   2307 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2308 		bzero((void *)txr->tx_base, tsize);
   2309 
   2310         	/* Now allocate transmit buffers for the ring */
   2311         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2312 			aprint_error_dev(dev,
   2313 			    "Critical Failure setting up transmit buffers\n");
   2314 			error = ENOMEM;
   2315 			goto err_tx_desc;
   2316         	}
   2317 #ifndef IXGBE_LEGACY_TX
   2318 		/* Allocate a buf ring */
   2319 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   2320 		    M_WAITOK, &txr->tx_mtx);
   2321 		if (txr->br == NULL) {
   2322 			aprint_error_dev(dev,
   2323 			    "Critical Failure setting up buf ring\n");
   2324 			error = ENOMEM;
   2325 			goto err_tx_desc;
   2326         	}
   2327 #endif
   2328 	}
   2329 
   2330 	/*
   2331 	 * Next the RX queues...
   2332 	 */
   2333 	rsize = roundup2(adapter->num_rx_desc *
   2334 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2335 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2336 		rxr = &adapter->rx_rings[i];
   2337 		/* Set up some basics */
   2338 		rxr->adapter = adapter;
   2339 #ifdef PCI_IOV
   2340 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
   2341 #else
   2342 		rxr->me = i;
   2343 #endif
   2344 		rxr->num_desc = adapter->num_rx_desc;
   2345 
   2346 		/* Initialize the RX side lock */
   2347 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2348 		    device_xname(dev), rxr->me);
   2349 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2350 
   2351 		if (ixgbe_dma_malloc(adapter, rsize,
   2352 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2353 			aprint_error_dev(dev,
   2354 			    "Unable to allocate RxDescriptor memory\n");
   2355 			error = ENOMEM;
   2356 			goto err_rx_desc;
   2357 		}
   2358 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2359 		bzero((void *)rxr->rx_base, rsize);
   2360 
   2361         	/* Allocate receive buffers for the ring*/
   2362 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2363 			aprint_error_dev(dev,
   2364 			    "Critical Failure setting up receive buffers\n");
   2365 			error = ENOMEM;
   2366 			goto err_rx_desc;
   2367 		}
   2368 	}
   2369 
   2370 	/*
   2371 	** Finally set up the queue holding structs
   2372 	*/
   2373 	for (int i = 0; i < adapter->num_queues; i++) {
   2374 		que = &adapter->queues[i];
   2375 		que->adapter = adapter;
   2376 		que->me = i;
   2377 		que->txr = &adapter->tx_rings[i];
   2378 		que->rxr = &adapter->rx_rings[i];
   2379 	}
   2380 
   2381 	return (0);
   2382 
   2383 err_rx_desc:
   2384 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2385 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2386 err_tx_desc:
   2387 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2388 		ixgbe_dma_free(adapter, &txr->txdma);
   2389 	free(adapter->rx_rings, M_DEVBUF);
   2390 rx_fail:
   2391 	free(adapter->tx_rings, M_DEVBUF);
   2392 tx_fail:
   2393 	free(adapter->queues, M_DEVBUF);
   2394 fail:
   2395 	return (error);
   2396 }
   2397 
   2398