Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.6
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 285528 2015-07-14 09:13:18Z hiren $*/
     62 /*$NetBSD: ix_txrx.c,v 1.6 2016/12/02 10:24:31 msaitoh Exp $*/
     63 
     64 #include "ixgbe.h"
     65 
     66 #ifdef DEV_NETMAP
     67 #include <net/netmap.h>
     68 #include <sys/selinfo.h>
     69 #include <dev/netmap/netmap_kern.h>
     70 
     71 extern int ix_crcstrip;
     72 #endif
     73 
     74 /*
     75 ** HW RSC control:
     76 **  this feature only works with
     77 **  IPv4, and only on 82599 and later.
     78 **  Also this will cause IP forwarding to
     79 **  fail and that can't be controlled by
     80 **  the stack as LRO can. For all these
     81 **  reasons I've deemed it best to leave
     82 **  this off and not bother with a tuneable
     83 **  interface, this would need to be compiled
     84 **  to enable.
     85 */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 #ifdef IXGBE_FDIR
     89 /*
     90 ** For Flow Director: this is the
     91 ** number of TX packets we sample
     92 ** for the filter pool, this means
     93 ** every 20th packet will be probed.
     94 **
     95 ** This feature can be disabled by
     96 ** setting this to 0.
     97 */
     98 static int atr_sample_rate = 20;
     99 #endif
    100 
    101 /* Shared PCI config read/write */
    102 u16
    103 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
    104 {
    105 	switch (reg % 4) {
    106 	case 0:
    107 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
    108 		    __BITS(15, 0);
    109 	case 2:
    110 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
    111 		    reg - 2), __BITS(31, 16));
    112 	default:
    113 		panic("%s: invalid register (%" PRIx32, __func__, reg);
    114 		break;
    115 	}
    116 }
    117 
    118 void
    119 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
    120 {
    121 	pcireg_t old;
    122 
    123 	switch (reg % 4) {
    124 	case 0:
    125 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
    126 		    __BITS(31, 16);
    127 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
    128 		break;
    129 	case 2:
    130 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
    131 		    __BITS(15, 0);
    132 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
    133 		    __SHIFTIN(value, __BITS(31, 16)) | old);
    134 		break;
    135 	default:
    136 		panic("%s: invalid register (%" PRIx32, __func__, reg);
    137 		break;
    138 	}
    139 
    140 	return;
    141 }
    142 
    143 /*********************************************************************
    144  *  Local Function prototypes
    145  *********************************************************************/
    146 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    147 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    148 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    149 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    150 
    151 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    152 		    struct ixgbe_hw_stats *);
    153 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    154 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    155 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    156 		    struct mbuf *, u32 *, u32 *);
    157 static int	ixgbe_tso_setup(struct tx_ring *,
    158 		    struct mbuf *, u32 *, u32 *);
    159 #ifdef IXGBE_FDIR
    160 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    161 #endif
    162 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    163 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    164 		    struct mbuf *, u32);
    165 
    166 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    167 
    168 #ifdef IXGBE_LEGACY_TX
    169 /*********************************************************************
    170  *  Transmit entry point
    171  *
    172  *  ixgbe_start is called by the stack to initiate a transmit.
    173  *  The driver will remain in this routine as long as there are
    174  *  packets to transmit and transmit resources are available.
    175  *  In case resources are not available stack is notified and
    176  *  the packet is requeued.
    177  **********************************************************************/
    178 
    179 void
    180 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    181 {
    182 	int rc;
    183 	struct mbuf    *m_head;
    184 	struct adapter *adapter = txr->adapter;
    185 
    186 	IXGBE_TX_LOCK_ASSERT(txr);
    187 
    188 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    189 		return;
    190 	if (!adapter->link_active)
    191 		return;
    192 
    193 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    194 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    195 			break;
    196 
    197 		IFQ_POLL(&ifp->if_snd, m_head);
    198 		if (m_head == NULL)
    199 			break;
    200 
    201 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    202 			break;
    203 		}
    204 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    205 		if (rc == EFBIG) {
    206 			struct mbuf *mtmp;
    207 
    208 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    209 				m_head = mtmp;
    210 				rc = ixgbe_xmit(txr, m_head);
    211 				if (rc != 0)
    212 					adapter->efbig2_tx_dma_setup.ev_count++;
    213 			} else
    214 				adapter->m_defrag_failed.ev_count++;
    215 		}
    216 		if (rc != 0) {
    217 			m_freem(m_head);
    218 			continue;
    219 		}
    220 
    221 		/* Send a copy of the frame to the BPF listener */
    222 		bpf_mtap(ifp, m_head);
    223 	}
    224 	return;
    225 }
    226 
    227 /*
    228  * Legacy TX start - called by the stack, this
    229  * always uses the first tx ring, and should
    230  * not be used with multiqueue tx enabled.
    231  */
    232 void
    233 ixgbe_start(struct ifnet *ifp)
    234 {
    235 	struct adapter *adapter = ifp->if_softc;
    236 	struct tx_ring	*txr = adapter->tx_rings;
    237 
    238 	if (ifp->if_flags & IFF_RUNNING) {
    239 		IXGBE_TX_LOCK(txr);
    240 		ixgbe_start_locked(txr, ifp);
    241 		IXGBE_TX_UNLOCK(txr);
    242 	}
    243 	return;
    244 }
    245 
    246 #else /* ! IXGBE_LEGACY_TX */
    247 
    248 /*
    249 ** Multiqueue Transmit driver
    250 **
    251 */
    252 int
    253 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    254 {
    255 	struct adapter	*adapter = ifp->if_softc;
    256 	struct ix_queue	*que;
    257 	struct tx_ring	*txr;
    258 	int 		i, err = 0;
    259 #ifdef	RSS
    260 	uint32_t bucket_id;
    261 #endif
    262 
    263 	/*
    264 	 * When doing RSS, map it to the same outbound queue
    265 	 * as the incoming flow would be mapped to.
    266 	 *
    267 	 * If everything is setup correctly, it should be the
    268 	 * same bucket that the current CPU we're on is.
    269 	 */
    270 #if __FreeBSD_version < 1100054
    271 	if (m->m_flags & M_FLOWID) {
    272 #else
    273 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    274 #endif
    275 #ifdef	RSS
    276 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    277 		    M_HASHTYPE_GET(m), &bucket_id) == 0)
    278 			/* TODO: spit out something if bucket_id > num_queues? */
    279 			i = bucket_id % adapter->num_queues;
    280 		else
    281 #endif
    282 			i = m->m_pkthdr.flowid % adapter->num_queues;
    283 	} else
    284 		i = curcpu % adapter->num_queues;
    285 
    286 	/* Check for a hung queue and pick alternative */
    287 	if (((1 << i) & adapter->active_queues) == 0)
    288 		i = ffsl(adapter->active_queues);
    289 
    290 	txr = &adapter->tx_rings[i];
    291 	que = &adapter->queues[i];
    292 
    293 	err = drbr_enqueue(ifp, txr->br, m);
    294 	if (err)
    295 		return (err);
    296 	if (IXGBE_TX_TRYLOCK(txr)) {
    297 		ixgbe_mq_start_locked(ifp, txr);
    298 		IXGBE_TX_UNLOCK(txr);
    299 	} else
    300 		softint_schedule(txr->txq_si);
    301 
    302 	return (0);
    303 }
    304 
    305 int
    306 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    307 {
    308 	struct adapter  *adapter = txr->adapter;
    309 	struct mbuf     *next;
    310 	int             enqueued = 0, err = 0;
    311 
    312 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    313 	    adapter->link_active == 0)
    314 		return (ENETDOWN);
    315 
    316 	/* Process the queue */
    317 #if __FreeBSD_version < 901504
    318 	next = drbr_dequeue(ifp, txr->br);
    319 	while (next != NULL) {
    320 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    321 			if (next != NULL)
    322 				err = drbr_enqueue(ifp, txr->br, next);
    323 #else
    324 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
    325 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    326 			if (next == NULL) {
    327 				drbr_advance(ifp, txr->br);
    328 			} else {
    329 				drbr_putback(ifp, txr->br, next);
    330 			}
    331 #endif
    332 			break;
    333 		}
    334 #if __FreeBSD_version >= 901504
    335 		drbr_advance(ifp, txr->br);
    336 #endif
    337 		enqueued++;
    338 #if 0 // this is VF-only
    339 #if __FreeBSD_version >= 1100036
    340 		/*
    341 		 * Since we're looking at the tx ring, we can check
    342 		 * to see if we're a VF by examing our tail register
    343 		 * address.
    344 		 */
    345 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    346 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    347 #endif
    348 #endif
    349 		/* Send a copy of the frame to the BPF listener */
    350 		bpf_mtap(ifp, next);
    351 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    352 			break;
    353 #if __FreeBSD_version < 901504
    354 		next = drbr_dequeue(ifp, txr->br);
    355 #endif
    356 	}
    357 
    358 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    359 		ixgbe_txeof(txr);
    360 
    361 	return (err);
    362 }
    363 
    364 /*
    365  * Called from a taskqueue to drain queued transmit packets.
    366  */
    367 void
    368 ixgbe_deferred_mq_start(void *arg, int pending)
    369 {
    370 	struct tx_ring *txr = arg;
    371 	struct adapter *adapter = txr->adapter;
    372 	struct ifnet *ifp = adapter->ifp;
    373 
    374 	IXGBE_TX_LOCK(txr);
    375 	if (!drbr_empty(ifp, txr->br))
    376 		ixgbe_mq_start_locked(ifp, txr);
    377 	IXGBE_TX_UNLOCK(txr);
    378 }
    379 
    380 /*
    381  * Flush all ring buffers
    382  */
    383 void
    384 ixgbe_qflush(struct ifnet *ifp)
    385 {
    386 	struct adapter	*adapter = ifp->if_softc;
    387 	struct tx_ring	*txr = adapter->tx_rings;
    388 	struct mbuf	*m;
    389 
    390 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    391 		IXGBE_TX_LOCK(txr);
    392 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
    393 			m_freem(m);
    394 		IXGBE_TX_UNLOCK(txr);
    395 	}
    396 	if_qflush(ifp);
    397 }
    398 #endif /* IXGBE_LEGACY_TX */
    399 
    400 
    401 /*********************************************************************
    402  *
    403  *  This routine maps the mbufs to tx descriptors, allowing the
    404  *  TX engine to transmit the packets.
    405  *  	- return 0 on success, positive on failure
    406  *
    407  **********************************************************************/
    408 
    409 static int
    410 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    411 {
    412 	struct m_tag *mtag;
    413 	struct adapter  *adapter = txr->adapter;
    414 	struct ethercom *ec = &adapter->osdep.ec;
    415 	u32		olinfo_status = 0, cmd_type_len;
    416 	int             i, j, error;
    417 	int		first;
    418 	bus_dmamap_t	map;
    419 	struct ixgbe_tx_buf *txbuf;
    420 	union ixgbe_adv_tx_desc *txd = NULL;
    421 
    422 	/* Basic descriptor defines */
    423         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    424 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    425 
    426 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    427         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    428 
    429         /*
    430          * Important to capture the first descriptor
    431          * used because it will contain the index of
    432          * the one we tell the hardware to report back
    433          */
    434         first = txr->next_avail_desc;
    435 	txbuf = &txr->tx_buffers[first];
    436 	map = txbuf->map;
    437 
    438 	/*
    439 	 * Map the packet for DMA.
    440 	 */
    441 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    442 	    m_head, BUS_DMA_NOWAIT);
    443 
    444 	if (__predict_false(error)) {
    445 
    446 		switch (error) {
    447 		case EAGAIN:
    448 			adapter->eagain_tx_dma_setup.ev_count++;
    449 			return EAGAIN;
    450 		case ENOMEM:
    451 			adapter->enomem_tx_dma_setup.ev_count++;
    452 			return EAGAIN;
    453 		case EFBIG:
    454 			/*
    455 			 * XXX Try it again?
    456 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
    457 			 */
    458 			adapter->efbig_tx_dma_setup.ev_count++;
    459 			return error;
    460 		case EINVAL:
    461 			adapter->einval_tx_dma_setup.ev_count++;
    462 			return error;
    463 		default:
    464 			adapter->other_tx_dma_setup.ev_count++;
    465 			return error;
    466 		}
    467 	}
    468 
    469 	/* Make certain there are enough descriptors */
    470 	if (map->dm_nsegs > txr->tx_avail - 2) {
    471 		txr->no_desc_avail.ev_count++;
    472 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    473 		return EAGAIN;
    474 	}
    475 
    476 	/*
    477 	 * Set up the appropriate offload context
    478 	 * this will consume the first descriptor
    479 	 */
    480 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    481 	if (__predict_false(error)) {
    482 		return (error);
    483 	}
    484 
    485 #ifdef IXGBE_FDIR
    486 	/* Do the flow director magic */
    487 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    488 		++txr->atr_count;
    489 		if (txr->atr_count >= atr_sample_rate) {
    490 			ixgbe_atr(txr, m_head);
    491 			txr->atr_count = 0;
    492 		}
    493 	}
    494 #endif
    495 
    496 	i = txr->next_avail_desc;
    497 	for (j = 0; j < map->dm_nsegs; j++) {
    498 		bus_size_t seglen;
    499 		bus_addr_t segaddr;
    500 
    501 		txbuf = &txr->tx_buffers[i];
    502 		txd = &txr->tx_base[i];
    503 		seglen = map->dm_segs[j].ds_len;
    504 		segaddr = htole64(map->dm_segs[j].ds_addr);
    505 
    506 		txd->read.buffer_addr = segaddr;
    507 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    508 		    cmd_type_len |seglen);
    509 		txd->read.olinfo_status = htole32(olinfo_status);
    510 
    511 		if (++i == txr->num_desc)
    512 			i = 0;
    513 	}
    514 
    515 	txd->read.cmd_type_len |=
    516 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    517 	txr->tx_avail -= map->dm_nsegs;
    518 	txr->next_avail_desc = i;
    519 
    520 	txbuf->m_head = m_head;
    521 	/*
    522 	 * Here we swap the map so the last descriptor,
    523 	 * which gets the completion interrupt has the
    524 	 * real map, and the first descriptor gets the
    525 	 * unused map from this descriptor.
    526 	 */
    527 	txr->tx_buffers[first].map = txbuf->map;
    528 	txbuf->map = map;
    529 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    530 	    BUS_DMASYNC_PREWRITE);
    531 
    532         /* Set the EOP descriptor that will be marked done */
    533         txbuf = &txr->tx_buffers[first];
    534 	txbuf->eop = txd;
    535 
    536         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    537 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    538 	/*
    539 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    540 	 * hardware that this frame is available to transmit.
    541 	 */
    542 	++txr->total_packets.ev_count;
    543 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    544 
    545 	/* Mark queue as having work */
    546 	if (txr->busy == 0)
    547 		txr->busy = 1;
    548 
    549 	return 0;
    550 }
    551 
    552 /*********************************************************************
    553  *
    554  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    555  *  the information needed to transmit a packet on the wire. This is
    556  *  called only once at attach, setup is done every reset.
    557  *
    558  **********************************************************************/
    559 int
    560 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    561 {
    562 	struct adapter *adapter = txr->adapter;
    563 	device_t dev = adapter->dev;
    564 	struct ixgbe_tx_buf *txbuf;
    565 	int error, i;
    566 
    567 	/*
    568 	 * Setup DMA descriptor areas.
    569 	 */
    570 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    571 			       1, 0,		/* alignment, bounds */
    572 			       IXGBE_TSO_SIZE,		/* maxsize */
    573 			       adapter->num_segs,	/* nsegments */
    574 			       PAGE_SIZE,		/* maxsegsize */
    575 			       0,			/* flags */
    576 			       &txr->txtag))) {
    577 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    578 		goto fail;
    579 	}
    580 
    581 	if (!(txr->tx_buffers =
    582 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    583 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    584 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    585 		error = ENOMEM;
    586 		goto fail;
    587 	}
    588 
    589         /* Create the descriptor buffer dma maps */
    590 	txbuf = txr->tx_buffers;
    591 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    592 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    593 		if (error != 0) {
    594 			aprint_error_dev(dev,
    595 			    "Unable to create TX DMA map (%d)\n", error);
    596 			goto fail;
    597 		}
    598 	}
    599 
    600 	return 0;
    601 fail:
    602 	/* We free all, it handles case where we are in the middle */
    603 	ixgbe_free_transmit_structures(adapter);
    604 	return (error);
    605 }
    606 
    607 /*********************************************************************
    608  *
    609  *  Initialize a transmit ring.
    610  *
    611  **********************************************************************/
    612 static void
    613 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    614 {
    615 	struct adapter *adapter = txr->adapter;
    616 	struct ixgbe_tx_buf *txbuf;
    617 #ifdef DEV_NETMAP
    618 	struct netmap_adapter *na = NA(adapter->ifp);
    619 	struct netmap_slot *slot;
    620 #endif /* DEV_NETMAP */
    621 
    622 	/* Clear the old ring contents */
    623 	IXGBE_TX_LOCK(txr);
    624 #ifdef DEV_NETMAP
    625 	/*
    626 	 * (under lock): if in netmap mode, do some consistency
    627 	 * checks and set slot to entry 0 of the netmap ring.
    628 	 */
    629 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    630 #endif /* DEV_NETMAP */
    631 	bzero((void *)txr->tx_base,
    632 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    633 	/* Reset indices */
    634 	txr->next_avail_desc = 0;
    635 	txr->next_to_clean = 0;
    636 
    637 	/* Free any existing tx buffers. */
    638         txbuf = txr->tx_buffers;
    639 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    640 		if (txbuf->m_head != NULL) {
    641 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    642 			    0, txbuf->m_head->m_pkthdr.len,
    643 			    BUS_DMASYNC_POSTWRITE);
    644 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    645 			m_freem(txbuf->m_head);
    646 			txbuf->m_head = NULL;
    647 		}
    648 #ifdef DEV_NETMAP
    649 		/*
    650 		 * In netmap mode, set the map for the packet buffer.
    651 		 * NOTE: Some drivers (not this one) also need to set
    652 		 * the physical buffer address in the NIC ring.
    653 		 * Slots in the netmap ring (indexed by "si") are
    654 		 * kring->nkr_hwofs positions "ahead" wrt the
    655 		 * corresponding slot in the NIC ring. In some drivers
    656 		 * (not here) nkr_hwofs can be negative. Function
    657 		 * netmap_idx_n2k() handles wraparounds properly.
    658 		 */
    659 		if (slot) {
    660 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    661 			netmap_load_map(na, txr->txtag,
    662 			    txbuf->map, NMB(na, slot + si));
    663 		}
    664 #endif /* DEV_NETMAP */
    665 		/* Clear the EOP descriptor pointer */
    666 		txbuf->eop = NULL;
    667         }
    668 
    669 #ifdef IXGBE_FDIR
    670 	/* Set the rate at which we sample packets */
    671 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    672 		txr->atr_sample = atr_sample_rate;
    673 #endif
    674 
    675 	/* Set number of descriptors available */
    676 	txr->tx_avail = adapter->num_tx_desc;
    677 
    678 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    679 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    680 	IXGBE_TX_UNLOCK(txr);
    681 }
    682 
    683 /*********************************************************************
    684  *
    685  *  Initialize all transmit rings.
    686  *
    687  **********************************************************************/
    688 int
    689 ixgbe_setup_transmit_structures(struct adapter *adapter)
    690 {
    691 	struct tx_ring *txr = adapter->tx_rings;
    692 
    693 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    694 		ixgbe_setup_transmit_ring(txr);
    695 
    696 	return (0);
    697 }
    698 
    699 /*********************************************************************
    700  *
    701  *  Free all transmit rings.
    702  *
    703  **********************************************************************/
    704 void
    705 ixgbe_free_transmit_structures(struct adapter *adapter)
    706 {
    707 	struct tx_ring *txr = adapter->tx_rings;
    708 
    709 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    710 		ixgbe_free_transmit_buffers(txr);
    711 		ixgbe_dma_free(adapter, &txr->txdma);
    712 		IXGBE_TX_LOCK_DESTROY(txr);
    713 	}
    714 	free(adapter->tx_rings, M_DEVBUF);
    715 }
    716 
    717 /*********************************************************************
    718  *
    719  *  Free transmit ring related data structures.
    720  *
    721  **********************************************************************/
    722 static void
    723 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    724 {
    725 	struct adapter *adapter = txr->adapter;
    726 	struct ixgbe_tx_buf *tx_buffer;
    727 	int             i;
    728 
    729 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
    730 
    731 	if (txr->tx_buffers == NULL)
    732 		return;
    733 
    734 	tx_buffer = txr->tx_buffers;
    735 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    736 		if (tx_buffer->m_head != NULL) {
    737 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    738 			    0, tx_buffer->m_head->m_pkthdr.len,
    739 			    BUS_DMASYNC_POSTWRITE);
    740 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    741 			m_freem(tx_buffer->m_head);
    742 			tx_buffer->m_head = NULL;
    743 			if (tx_buffer->map != NULL) {
    744 				ixgbe_dmamap_destroy(txr->txtag,
    745 				    tx_buffer->map);
    746 				tx_buffer->map = NULL;
    747 			}
    748 		} else if (tx_buffer->map != NULL) {
    749 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    750 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    751 			tx_buffer->map = NULL;
    752 		}
    753 	}
    754 #ifndef IXGBE_LEGACY_TX
    755 	if (txr->br != NULL)
    756 		buf_ring_free(txr->br, M_DEVBUF);
    757 #endif
    758 	if (txr->tx_buffers != NULL) {
    759 		free(txr->tx_buffers, M_DEVBUF);
    760 		txr->tx_buffers = NULL;
    761 	}
    762 	if (txr->txtag != NULL) {
    763 		ixgbe_dma_tag_destroy(txr->txtag);
    764 		txr->txtag = NULL;
    765 	}
    766 	return;
    767 }
    768 
    769 /*********************************************************************
    770  *
    771  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    772  *
    773  **********************************************************************/
    774 
    775 static int
    776 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    777     u32 *cmd_type_len, u32 *olinfo_status)
    778 {
    779 	struct adapter *adapter = txr->adapter;
    780 	struct ethercom *ec = &adapter->osdep.ec;
    781 	struct m_tag *mtag;
    782 	struct ixgbe_adv_tx_context_desc *TXD;
    783 	struct ether_vlan_header *eh;
    784 	struct ip ip;
    785 	struct ip6_hdr ip6;
    786 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    787 	int	ehdrlen, ip_hlen = 0;
    788 	u16	etype;
    789 	u8	ipproto __diagused = 0;
    790 	int	offload = TRUE;
    791 	int	ctxd = txr->next_avail_desc;
    792 	u16	vtag = 0;
    793 
    794 	/* First check if TSO is to be used */
    795 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
    796 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
    797 
    798 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    799 		offload = FALSE;
    800 
    801 	/* Indicate the whole packet as payload when not doing TSO */
    802        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    803 
    804 	/* Now ready a context descriptor */
    805 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    806 
    807 	/*
    808 	** In advanced descriptors the vlan tag must
    809 	** be placed into the context descriptor. Hence
    810 	** we need to make one even if not doing offloads.
    811 	*/
    812 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    813 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    814 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    815 	} else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    816 		return (0);
    817 
    818 	/*
    819 	 * Determine where frame payload starts.
    820 	 * Jump over vlan headers if already present,
    821 	 * helpful for QinQ too.
    822 	 */
    823 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    824 	eh = mtod(mp, struct ether_vlan_header *);
    825 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    826 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    827 		etype = ntohs(eh->evl_proto);
    828 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    829 	} else {
    830 		etype = ntohs(eh->evl_encap_proto);
    831 		ehdrlen = ETHER_HDR_LEN;
    832 	}
    833 
    834 	/* Set the ether header length */
    835 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    836 
    837 	if (offload == FALSE)
    838 		goto no_offloads;
    839 
    840 	switch (etype) {
    841 	case ETHERTYPE_IP:
    842 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
    843 		ip_hlen = ip.ip_hl << 2;
    844 		ipproto = ip.ip_p;
    845 #if 0
    846 		ip.ip_sum = 0;
    847 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
    848 #else
    849 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    850 		    ip.ip_sum == 0);
    851 #endif
    852 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    853 		break;
    854 	case ETHERTYPE_IPV6:
    855 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
    856 		ip_hlen = sizeof(ip6);
    857 		/* XXX-BZ this will go badly in case of ext hdrs. */
    858 		ipproto = ip6.ip6_nxt;
    859 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    860 		break;
    861 	default:
    862 		break;
    863 	}
    864 
    865 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    866 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    867 
    868 	vlan_macip_lens |= ip_hlen;
    869 
    870 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
    871 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    872 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    873 		KASSERT(ipproto == IPPROTO_TCP);
    874 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
    875 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    876 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    877 		KASSERT(ipproto == IPPROTO_UDP);
    878 	}
    879 
    880 no_offloads:
    881 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    882 
    883 	/* Now copy bits into descriptor */
    884 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    885 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    886 	TXD->seqnum_seed = htole32(0);
    887 	TXD->mss_l4len_idx = htole32(0);
    888 
    889 	/* We've consumed the first desc, adjust counters */
    890 	if (++ctxd == txr->num_desc)
    891 		ctxd = 0;
    892 	txr->next_avail_desc = ctxd;
    893 	--txr->tx_avail;
    894 
    895         return 0;
    896 }
    897 
    898 /**********************************************************************
    899  *
    900  *  Setup work for hardware segmentation offload (TSO) on
    901  *  adapters using advanced tx descriptors
    902  *
    903  **********************************************************************/
    904 static int
    905 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    906     u32 *cmd_type_len, u32 *olinfo_status)
    907 {
    908 	struct m_tag *mtag;
    909 	struct adapter *adapter = txr->adapter;
    910 	struct ethercom *ec = &adapter->osdep.ec;
    911 	struct ixgbe_adv_tx_context_desc *TXD;
    912 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    913 	u32 mss_l4len_idx = 0, paylen;
    914 	u16 vtag = 0, eh_type;
    915 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    916 	struct ether_vlan_header *eh;
    917 #ifdef INET6
    918 	struct ip6_hdr *ip6;
    919 #endif
    920 #ifdef INET
    921 	struct ip *ip;
    922 #endif
    923 	struct tcphdr *th;
    924 
    925 
    926 	/*
    927 	 * Determine where frame payload starts.
    928 	 * Jump over vlan headers if already present
    929 	 */
    930 	eh = mtod(mp, struct ether_vlan_header *);
    931 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    932 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    933 		eh_type = eh->evl_proto;
    934 	} else {
    935 		ehdrlen = ETHER_HDR_LEN;
    936 		eh_type = eh->evl_encap_proto;
    937 	}
    938 
    939 	switch (ntohs(eh_type)) {
    940 #ifdef INET6
    941 	case ETHERTYPE_IPV6:
    942 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    943 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    944 		if (ip6->ip6_nxt != IPPROTO_TCP)
    945 			return (ENXIO);
    946 		ip_hlen = sizeof(struct ip6_hdr);
    947 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    948 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    949 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    950 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    951 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    952 		break;
    953 #endif
    954 #ifdef INET
    955 	case ETHERTYPE_IP:
    956 		ip = (struct ip *)(mp->m_data + ehdrlen);
    957 		if (ip->ip_p != IPPROTO_TCP)
    958 			return (ENXIO);
    959 		ip->ip_sum = 0;
    960 		ip_hlen = ip->ip_hl << 2;
    961 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    962 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    963 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    964 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    965 		/* Tell transmit desc to also do IPv4 checksum. */
    966 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    967 		break;
    968 #endif
    969 	default:
    970 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    971 		    __func__, ntohs(eh_type));
    972 		break;
    973 	}
    974 
    975 	ctxd = txr->next_avail_desc;
    976 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    977 
    978 	tcp_hlen = th->th_off << 2;
    979 
    980 	/* This is used in the transmit desc in encap */
    981 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    982 
    983 	/* VLAN MACLEN IPLEN */
    984 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    985 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    986                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    987 	}
    988 
    989 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    990 	vlan_macip_lens |= ip_hlen;
    991 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    992 
    993 	/* ADV DTYPE TUCMD */
    994 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    995 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    996 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    997 
    998 	/* MSS L4LEN IDX */
    999 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1000 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1001 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1002 
   1003 	TXD->seqnum_seed = htole32(0);
   1004 
   1005 	if (++ctxd == txr->num_desc)
   1006 		ctxd = 0;
   1007 
   1008 	txr->tx_avail--;
   1009 	txr->next_avail_desc = ctxd;
   1010 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1011 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1012 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1013 	++txr->tso_tx.ev_count;
   1014 	return (0);
   1015 }
   1016 
   1017 
   1018 /**********************************************************************
   1019  *
   1020  *  Examine each tx_buffer in the used queue. If the hardware is done
   1021  *  processing the packet then free associated resources. The
   1022  *  tx_buffer is put back on the free queue.
   1023  *
   1024  **********************************************************************/
   1025 void
   1026 ixgbe_txeof(struct tx_ring *txr)
   1027 {
   1028 	struct adapter		*adapter = txr->adapter;
   1029 	struct ifnet		*ifp = adapter->ifp;
   1030 	u32			work, processed = 0;
   1031 	u16			limit = txr->process_limit;
   1032 	struct ixgbe_tx_buf	*buf;
   1033 	union ixgbe_adv_tx_desc *txd;
   1034 
   1035 	KASSERT(mutex_owned(&txr->tx_mtx));
   1036 
   1037 #ifdef DEV_NETMAP
   1038 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1039 		struct netmap_adapter *na = NA(ifp);
   1040 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1041 		txd = txr->tx_base;
   1042 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1043 		    BUS_DMASYNC_POSTREAD);
   1044 		/*
   1045 		 * In netmap mode, all the work is done in the context
   1046 		 * of the client thread. Interrupt handlers only wake up
   1047 		 * clients, which may be sleeping on individual rings
   1048 		 * or on a global resource for all rings.
   1049 		 * To implement tx interrupt mitigation, we wake up the client
   1050 		 * thread roughly every half ring, even if the NIC interrupts
   1051 		 * more frequently. This is implemented as follows:
   1052 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1053 		 *   the slot that should wake up the thread (nkr_num_slots
   1054 		 *   means the user thread should not be woken up);
   1055 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1056 		 *   or the slot has the DD bit set.
   1057 		 */
   1058 		if (!netmap_mitigate ||
   1059 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1060 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1061 			netmap_tx_irq(ifp, txr->me);
   1062 		}
   1063 		return;
   1064 	}
   1065 #endif /* DEV_NETMAP */
   1066 
   1067 	if (txr->tx_avail == txr->num_desc) {
   1068 		txr->busy = 0;
   1069 		return;
   1070 	}
   1071 
   1072 	/* Get work starting point */
   1073 	work = txr->next_to_clean;
   1074 	buf = &txr->tx_buffers[work];
   1075 	txd = &txr->tx_base[work];
   1076 	work -= txr->num_desc; /* The distance to ring end */
   1077         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1078 	    BUS_DMASYNC_POSTREAD);
   1079 	do {
   1080 		union ixgbe_adv_tx_desc *eop= buf->eop;
   1081 		if (eop == NULL) /* No work */
   1082 			break;
   1083 
   1084 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1085 			break;	/* I/O not complete */
   1086 
   1087 		if (buf->m_head) {
   1088 			txr->bytes +=
   1089 			    buf->m_head->m_pkthdr.len;
   1090 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1091 			    buf->map,
   1092 			    0, buf->m_head->m_pkthdr.len,
   1093 			    BUS_DMASYNC_POSTWRITE);
   1094 			ixgbe_dmamap_unload(txr->txtag,
   1095 			    buf->map);
   1096 			m_freem(buf->m_head);
   1097 			buf->m_head = NULL;
   1098 		}
   1099 		buf->eop = NULL;
   1100 		++txr->tx_avail;
   1101 
   1102 		/* We clean the range if multi segment */
   1103 		while (txd != eop) {
   1104 			++txd;
   1105 			++buf;
   1106 			++work;
   1107 			/* wrap the ring? */
   1108 			if (__predict_false(!work)) {
   1109 				work -= txr->num_desc;
   1110 				buf = txr->tx_buffers;
   1111 				txd = txr->tx_base;
   1112 			}
   1113 			if (buf->m_head) {
   1114 				txr->bytes +=
   1115 				    buf->m_head->m_pkthdr.len;
   1116 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1117 				    buf->map,
   1118 				    0, buf->m_head->m_pkthdr.len,
   1119 				    BUS_DMASYNC_POSTWRITE);
   1120 				ixgbe_dmamap_unload(txr->txtag,
   1121 				    buf->map);
   1122 				m_freem(buf->m_head);
   1123 				buf->m_head = NULL;
   1124 			}
   1125 			++txr->tx_avail;
   1126 			buf->eop = NULL;
   1127 
   1128 		}
   1129 		++txr->packets;
   1130 		++processed;
   1131 		++ifp->if_opackets;
   1132 
   1133 		/* Try the next packet */
   1134 		++txd;
   1135 		++buf;
   1136 		++work;
   1137 		/* reset with a wrap */
   1138 		if (__predict_false(!work)) {
   1139 			work -= txr->num_desc;
   1140 			buf = txr->tx_buffers;
   1141 			txd = txr->tx_base;
   1142 		}
   1143 		prefetch(txd);
   1144 	} while (__predict_true(--limit));
   1145 
   1146 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1147 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1148 
   1149 	work += txr->num_desc;
   1150 	txr->next_to_clean = work;
   1151 
   1152 	/*
   1153 	** Queue Hang detection, we know there's
   1154 	** work outstanding or the first return
   1155 	** would have been taken, so increment busy
   1156 	** if nothing managed to get cleaned, then
   1157 	** in local_timer it will be checked and
   1158 	** marked as HUNG if it exceeds a MAX attempt.
   1159 	*/
   1160 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1161 		++txr->busy;
   1162 	/*
   1163 	** If anything gets cleaned we reset state to 1,
   1164 	** note this will turn off HUNG if its set.
   1165 	*/
   1166 	if (processed)
   1167 		txr->busy = 1;
   1168 
   1169 	if (txr->tx_avail == txr->num_desc)
   1170 		txr->busy = 0;
   1171 
   1172 	return;
   1173 }
   1174 
   1175 
   1176 #ifdef IXGBE_FDIR
   1177 /*
   1178 ** This routine parses packet headers so that Flow
   1179 ** Director can make a hashed filter table entry
   1180 ** allowing traffic flows to be identified and kept
   1181 ** on the same cpu.  This would be a performance
   1182 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1183 ** packets.
   1184 */
   1185 static void
   1186 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1187 {
   1188 	struct adapter			*adapter = txr->adapter;
   1189 	struct ix_queue			*que;
   1190 	struct ip			*ip;
   1191 	struct tcphdr			*th;
   1192 	struct udphdr			*uh;
   1193 	struct ether_vlan_header	*eh;
   1194 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1195 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1196 	int  				ehdrlen, ip_hlen;
   1197 	u16				etype;
   1198 
   1199 	eh = mtod(mp, struct ether_vlan_header *);
   1200 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1201 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1202 		etype = eh->evl_proto;
   1203 	} else {
   1204 		ehdrlen = ETHER_HDR_LEN;
   1205 		etype = eh->evl_encap_proto;
   1206 	}
   1207 
   1208 	/* Only handling IPv4 */
   1209 	if (etype != htons(ETHERTYPE_IP))
   1210 		return;
   1211 
   1212 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1213 	ip_hlen = ip->ip_hl << 2;
   1214 
   1215 	/* check if we're UDP or TCP */
   1216 	switch (ip->ip_p) {
   1217 	case IPPROTO_TCP:
   1218 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1219 		/* src and dst are inverted */
   1220 		common.port.dst ^= th->th_sport;
   1221 		common.port.src ^= th->th_dport;
   1222 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1223 		break;
   1224 	case IPPROTO_UDP:
   1225 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1226 		/* src and dst are inverted */
   1227 		common.port.dst ^= uh->uh_sport;
   1228 		common.port.src ^= uh->uh_dport;
   1229 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1230 		break;
   1231 	default:
   1232 		return;
   1233 	}
   1234 
   1235 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1236 	if (mp->m_pkthdr.ether_vtag)
   1237 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1238 	else
   1239 		common.flex_bytes ^= etype;
   1240 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1241 
   1242 	que = &adapter->queues[txr->me];
   1243 	/*
   1244 	** This assumes the Rx queue and Tx
   1245 	** queue are bound to the same CPU
   1246 	*/
   1247 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1248 	    input, common, que->msix);
   1249 }
   1250 #endif /* IXGBE_FDIR */
   1251 
   1252 /*
   1253 ** Used to detect a descriptor that has
   1254 ** been merged by Hardware RSC.
   1255 */
   1256 static inline u32
   1257 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1258 {
   1259 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1260 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1261 }
   1262 
   1263 /*********************************************************************
   1264  *
   1265  *  Initialize Hardware RSC (LRO) feature on 82599
   1266  *  for an RX ring, this is toggled by the LRO capability
   1267  *  even though it is transparent to the stack.
   1268  *
   1269  *  NOTE: since this HW feature only works with IPV4 and
   1270  *        our testing has shown soft LRO to be as effective
   1271  *        I have decided to disable this by default.
   1272  *
   1273  **********************************************************************/
   1274 static void
   1275 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1276 {
   1277 	struct	adapter 	*adapter = rxr->adapter;
   1278 	struct	ixgbe_hw	*hw = &adapter->hw;
   1279 	u32			rscctrl, rdrxctl;
   1280 
   1281 	/* If turning LRO/RSC off we need to disable it */
   1282 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1283 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1284 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1285 		return;
   1286 	}
   1287 
   1288 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1289 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1290 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1291 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1292 #endif /* DEV_NETMAP */
   1293 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1294 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1295 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1296 
   1297 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1298 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1299 	/*
   1300 	** Limit the total number of descriptors that
   1301 	** can be combined, so it does not exceed 64K
   1302 	*/
   1303 	if (rxr->mbuf_sz == MCLBYTES)
   1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1305 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1306 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1307 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1308 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1309 	else  /* Using 16K cluster */
   1310 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1311 
   1312 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1313 
   1314 	/* Enable TCP header recognition */
   1315 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1316 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1317 	    IXGBE_PSRTYPE_TCPHDR));
   1318 
   1319 	/* Disable RSC for ACK packets */
   1320 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1321 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1322 
   1323 	rxr->hw_rsc = TRUE;
   1324 }
   1325 /*********************************************************************
   1326  *
   1327  *  Refresh mbuf buffers for RX descriptor rings
   1328  *   - now keeps its own state so discards due to resource
   1329  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1330  *     it just returns, keeping its placeholder, thus it can simply
   1331  *     be recalled to try again.
   1332  *
   1333  **********************************************************************/
   1334 static void
   1335 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1336 {
   1337 	struct adapter		*adapter = rxr->adapter;
   1338 	struct ixgbe_rx_buf	*rxbuf;
   1339 	struct mbuf		*mp;
   1340 	int			i, j, error;
   1341 	bool			refreshed = false;
   1342 
   1343 	i = j = rxr->next_to_refresh;
   1344 	/* Control the loop with one beyond */
   1345 	if (++j == rxr->num_desc)
   1346 		j = 0;
   1347 
   1348 	while (j != limit) {
   1349 		rxbuf = &rxr->rx_buffers[i];
   1350 		if (rxbuf->buf == NULL) {
   1351 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1352 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1353 			if (mp == NULL) {
   1354 				rxr->no_jmbuf.ev_count++;
   1355 				goto update;
   1356 			}
   1357 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1358 				m_adj(mp, ETHER_ALIGN);
   1359 		} else
   1360 			mp = rxbuf->buf;
   1361 
   1362 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1363 
   1364 		/* If we're dealing with an mbuf that was copied rather
   1365 		 * than replaced, there's no need to go through busdma.
   1366 		 */
   1367 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1368 			/* Get the memory mapping */
   1369 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1370 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1371 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1372 			if (error != 0) {
   1373 				printf("Refresh mbufs: payload dmamap load"
   1374 				    " failure - %d\n", error);
   1375 				m_free(mp);
   1376 				rxbuf->buf = NULL;
   1377 				goto update;
   1378 			}
   1379 			rxbuf->buf = mp;
   1380 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1381 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1382 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1383 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1384 		} else {
   1385 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1386 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1387 		}
   1388 
   1389 		refreshed = true;
   1390 		/* Next is precalculated */
   1391 		i = j;
   1392 		rxr->next_to_refresh = i;
   1393 		if (++j == rxr->num_desc)
   1394 			j = 0;
   1395 	}
   1396 update:
   1397 	if (refreshed) /* Update hardware tail index */
   1398 		IXGBE_WRITE_REG(&adapter->hw,
   1399 		    rxr->tail, rxr->next_to_refresh);
   1400 	return;
   1401 }
   1402 
   1403 /*********************************************************************
   1404  *
   1405  *  Allocate memory for rx_buffer structures. Since we use one
   1406  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1407  *  that we'll need is equal to the number of receive descriptors
   1408  *  that we've allocated.
   1409  *
   1410  **********************************************************************/
   1411 int
   1412 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1413 {
   1414 	struct	adapter 	*adapter = rxr->adapter;
   1415 	device_t 		dev = adapter->dev;
   1416 	struct ixgbe_rx_buf 	*rxbuf;
   1417 	int             	bsize, error;
   1418 
   1419 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1420 	if (!(rxr->rx_buffers =
   1421 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1422 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1423 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1424 		error = ENOMEM;
   1425 		goto fail;
   1426 	}
   1427 
   1428 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1429 				   1, 0,	/* alignment, bounds */
   1430 				   MJUM16BYTES,		/* maxsize */
   1431 				   1,			/* nsegments */
   1432 				   MJUM16BYTES,		/* maxsegsize */
   1433 				   0,			/* flags */
   1434 				   &rxr->ptag))) {
   1435 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1436 		goto fail;
   1437 	}
   1438 
   1439 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1440 		rxbuf = &rxr->rx_buffers[i];
   1441 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1442 		if (error) {
   1443 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1444 			goto fail;
   1445 		}
   1446 	}
   1447 
   1448 	return (0);
   1449 
   1450 fail:
   1451 	/* Frees all, but can handle partial completion */
   1452 	ixgbe_free_receive_structures(adapter);
   1453 	return (error);
   1454 }
   1455 
   1456 
   1457 static void
   1458 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1459 {
   1460 	struct ixgbe_rx_buf       *rxbuf;
   1461 
   1462 	for (int i = 0; i < rxr->num_desc; i++) {
   1463 		rxbuf = &rxr->rx_buffers[i];
   1464 		if (rxbuf->buf != NULL) {
   1465 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1466 			    0, rxbuf->buf->m_pkthdr.len,
   1467 			    BUS_DMASYNC_POSTREAD);
   1468 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1469 			rxbuf->buf->m_flags |= M_PKTHDR;
   1470 			m_freem(rxbuf->buf);
   1471 			rxbuf->buf = NULL;
   1472 			rxbuf->flags = 0;
   1473 		}
   1474 	}
   1475 }
   1476 
   1477 
   1478 /*********************************************************************
   1479  *
   1480  *  Initialize a receive ring and its buffers.
   1481  *
   1482  **********************************************************************/
   1483 static int
   1484 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1485 {
   1486 	struct	adapter 	*adapter;
   1487 	struct ixgbe_rx_buf	*rxbuf;
   1488 #ifdef LRO
   1489 	struct ifnet		*ifp;
   1490 	struct lro_ctrl		*lro = &rxr->lro;
   1491 #endif /* LRO */
   1492 	int			rsize, error = 0;
   1493 #ifdef DEV_NETMAP
   1494 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1495 	struct netmap_slot *slot;
   1496 #endif /* DEV_NETMAP */
   1497 
   1498 	adapter = rxr->adapter;
   1499 #ifdef LRO
   1500 	ifp = adapter->ifp;
   1501 #endif /* LRO */
   1502 
   1503 	/* Clear the ring contents */
   1504 	IXGBE_RX_LOCK(rxr);
   1505 #ifdef DEV_NETMAP
   1506 	/* same as in ixgbe_setup_transmit_ring() */
   1507 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1508 #endif /* DEV_NETMAP */
   1509 	rsize = roundup2(adapter->num_rx_desc *
   1510 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1511 	bzero((void *)rxr->rx_base, rsize);
   1512 	/* Cache the size */
   1513 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1514 
   1515 	/* Free current RX buffer structs and their mbufs */
   1516 	ixgbe_free_receive_ring(rxr);
   1517 
   1518 	IXGBE_RX_UNLOCK(rxr);
   1519 
   1520 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1521 	 * or size of jumbo mbufs may have changed.
   1522 	 */
   1523 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1524 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1525 
   1526 	IXGBE_RX_LOCK(rxr);
   1527 
   1528 	/* Now replenish the mbufs */
   1529 	for (int j = 0; j != rxr->num_desc; ++j) {
   1530 		struct mbuf	*mp;
   1531 
   1532 		rxbuf = &rxr->rx_buffers[j];
   1533 #ifdef DEV_NETMAP
   1534 		/*
   1535 		 * In netmap mode, fill the map and set the buffer
   1536 		 * address in the NIC ring, considering the offset
   1537 		 * between the netmap and NIC rings (see comment in
   1538 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1539 		 * an mbuf, so end the block with a continue;
   1540 		 */
   1541 		if (slot) {
   1542 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1543 			uint64_t paddr;
   1544 			void *addr;
   1545 
   1546 			addr = PNMB(na, slot + sj, &paddr);
   1547 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1548 			/* Update descriptor and the cached value */
   1549 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1550 			rxbuf->addr = htole64(paddr);
   1551 			continue;
   1552 		}
   1553 #endif /* DEV_NETMAP */
   1554 		rxbuf->flags = 0;
   1555 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1556 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1557 		if (rxbuf->buf == NULL) {
   1558 			error = ENOBUFS;
   1559                         goto fail;
   1560 		}
   1561 		mp = rxbuf->buf;
   1562 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1563 		/* Get the memory mapping */
   1564 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1565 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1566 		if (error != 0)
   1567                         goto fail;
   1568 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1569 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1570 		/* Update the descriptor and the cached value */
   1571 		rxr->rx_base[j].read.pkt_addr =
   1572 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1573 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1574 	}
   1575 
   1576 
   1577 	/* Setup our descriptor indices */
   1578 	rxr->next_to_check = 0;
   1579 	rxr->next_to_refresh = 0;
   1580 	rxr->lro_enabled = FALSE;
   1581 	rxr->rx_copies.ev_count = 0;
   1582 	rxr->rx_bytes.ev_count = 0;
   1583 	rxr->vtag_strip = FALSE;
   1584 
   1585 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1586 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1587 
   1588 	/*
   1589 	** Now set up the LRO interface:
   1590 	*/
   1591 	if (ixgbe_rsc_enable)
   1592 		ixgbe_setup_hw_rsc(rxr);
   1593 #ifdef LRO
   1594 	else if (ifp->if_capenable & IFCAP_LRO) {
   1595 		device_t dev = adapter->dev;
   1596 		int err = tcp_lro_init(lro);
   1597 		if (err) {
   1598 			device_printf(dev, "LRO Initialization failed!\n");
   1599 			goto fail;
   1600 		}
   1601 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1602 		rxr->lro_enabled = TRUE;
   1603 		lro->ifp = adapter->ifp;
   1604 	}
   1605 #endif /* LRO */
   1606 
   1607 	IXGBE_RX_UNLOCK(rxr);
   1608 	return (0);
   1609 
   1610 fail:
   1611 	ixgbe_free_receive_ring(rxr);
   1612 	IXGBE_RX_UNLOCK(rxr);
   1613 	return (error);
   1614 }
   1615 
   1616 /*********************************************************************
   1617  *
   1618  *  Initialize all receive rings.
   1619  *
   1620  **********************************************************************/
   1621 int
   1622 ixgbe_setup_receive_structures(struct adapter *adapter)
   1623 {
   1624 	struct rx_ring *rxr = adapter->rx_rings;
   1625 	int j;
   1626 
   1627 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1628 		if (ixgbe_setup_receive_ring(rxr))
   1629 			goto fail;
   1630 
   1631 	return (0);
   1632 fail:
   1633 	/*
   1634 	 * Free RX buffers allocated so far, we will only handle
   1635 	 * the rings that completed, the failing case will have
   1636 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1637 	 */
   1638 	for (int i = 0; i < j; ++i) {
   1639 		rxr = &adapter->rx_rings[i];
   1640 		ixgbe_free_receive_ring(rxr);
   1641 	}
   1642 
   1643 	return (ENOBUFS);
   1644 }
   1645 
   1646 
   1647 /*********************************************************************
   1648  *
   1649  *  Free all receive rings.
   1650  *
   1651  **********************************************************************/
   1652 void
   1653 ixgbe_free_receive_structures(struct adapter *adapter)
   1654 {
   1655 	struct rx_ring *rxr = adapter->rx_rings;
   1656 
   1657 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1658 
   1659 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1660 #ifdef LRO
   1661 		struct lro_ctrl		*lro = &rxr->lro;
   1662 #endif /* LRO */
   1663 		ixgbe_free_receive_buffers(rxr);
   1664 #ifdef LRO
   1665 		/* Free LRO memory */
   1666 		tcp_lro_free(lro);
   1667 #endif /* LRO */
   1668 		/* Free the ring memory as well */
   1669 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1670 		IXGBE_RX_LOCK_DESTROY(rxr);
   1671 	}
   1672 
   1673 	free(adapter->rx_rings, M_DEVBUF);
   1674 }
   1675 
   1676 
   1677 /*********************************************************************
   1678  *
   1679  *  Free receive ring data structures
   1680  *
   1681  **********************************************************************/
   1682 static void
   1683 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1684 {
   1685 	struct adapter		*adapter = rxr->adapter;
   1686 	struct ixgbe_rx_buf	*rxbuf;
   1687 
   1688 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1689 
   1690 	/* Cleanup any existing buffers */
   1691 	if (rxr->rx_buffers != NULL) {
   1692 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1693 			rxbuf = &rxr->rx_buffers[i];
   1694 			if (rxbuf->buf != NULL) {
   1695 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1696 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1697 				    BUS_DMASYNC_POSTREAD);
   1698 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1699 				rxbuf->buf->m_flags |= M_PKTHDR;
   1700 				m_freem(rxbuf->buf);
   1701 			}
   1702 			rxbuf->buf = NULL;
   1703 			if (rxbuf->pmap != NULL) {
   1704 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1705 				rxbuf->pmap = NULL;
   1706 			}
   1707 		}
   1708 		if (rxr->rx_buffers != NULL) {
   1709 			free(rxr->rx_buffers, M_DEVBUF);
   1710 			rxr->rx_buffers = NULL;
   1711 		}
   1712 	}
   1713 
   1714 	if (rxr->ptag != NULL) {
   1715 		ixgbe_dma_tag_destroy(rxr->ptag);
   1716 		rxr->ptag = NULL;
   1717 	}
   1718 
   1719 	return;
   1720 }
   1721 
   1722 static __inline void
   1723 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1724 {
   1725 	int s;
   1726 
   1727 #ifdef LRO
   1728 	struct adapter	*adapter = ifp->if_softc;
   1729 	struct ethercom *ec = &adapter->osdep.ec;
   1730 
   1731         /*
   1732          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1733          * should be computed by hardware. Also it should not have VLAN tag in
   1734          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1735          */
   1736         if (rxr->lro_enabled &&
   1737             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1738             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1739             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1740             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1741             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1742             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1743             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1744             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1745                 /*
   1746                  * Send to the stack if:
   1747                  **  - LRO not enabled, or
   1748                  **  - no LRO resources, or
   1749                  **  - lro enqueue fails
   1750                  */
   1751                 if (rxr->lro.lro_cnt != 0)
   1752                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1753                                 return;
   1754         }
   1755 #endif /* LRO */
   1756 
   1757 	IXGBE_RX_UNLOCK(rxr);
   1758 
   1759 	s = splnet();
   1760 	/* Pass this up to any BPF listeners. */
   1761 	bpf_mtap(ifp, m);
   1762 	if_input(ifp, m);
   1763 	splx(s);
   1764 
   1765 	IXGBE_RX_LOCK(rxr);
   1766 }
   1767 
   1768 static __inline void
   1769 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1770 {
   1771 	struct ixgbe_rx_buf	*rbuf;
   1772 
   1773 	rbuf = &rxr->rx_buffers[i];
   1774 
   1775 
   1776 	/*
   1777 	** With advanced descriptors the writeback
   1778 	** clobbers the buffer addrs, so its easier
   1779 	** to just free the existing mbufs and take
   1780 	** the normal refresh path to get new buffers
   1781 	** and mapping.
   1782 	*/
   1783 
   1784 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1785 		rbuf->fmp->m_flags |= M_PKTHDR;
   1786 		m_freem(rbuf->fmp);
   1787 		rbuf->fmp = NULL;
   1788 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1789 	} else if (rbuf->buf) {
   1790 		m_free(rbuf->buf);
   1791 		rbuf->buf = NULL;
   1792 	}
   1793 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1794 
   1795 	rbuf->flags = 0;
   1796 
   1797 	return;
   1798 }
   1799 
   1800 
   1801 /*********************************************************************
   1802  *
   1803  *  This routine executes in interrupt context. It replenishes
   1804  *  the mbufs in the descriptor and sends data which has been
   1805  *  dma'ed into host memory to upper layer.
   1806  *
   1807  *  Return TRUE for more work, FALSE for all clean.
   1808  *********************************************************************/
   1809 bool
   1810 ixgbe_rxeof(struct ix_queue *que)
   1811 {
   1812 	struct adapter		*adapter = que->adapter;
   1813 	struct rx_ring		*rxr = que->rxr;
   1814 	struct ifnet		*ifp = adapter->ifp;
   1815 #ifdef LRO
   1816 	struct lro_ctrl		*lro = &rxr->lro;
   1817 	struct lro_entry	*queued;
   1818 #endif /* LRO */
   1819 	int			i, nextp, processed = 0;
   1820 	u32			staterr = 0;
   1821 	u16			count = rxr->process_limit;
   1822 	union ixgbe_adv_rx_desc	*cur;
   1823 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1824 #ifdef RSS
   1825 	u16			pkt_info;
   1826 #endif
   1827 
   1828 	IXGBE_RX_LOCK(rxr);
   1829 
   1830 #ifdef DEV_NETMAP
   1831 	/* Same as the txeof routine: wakeup clients on intr. */
   1832 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1833 		IXGBE_RX_UNLOCK(rxr);
   1834 		return (FALSE);
   1835 	}
   1836 #endif /* DEV_NETMAP */
   1837 
   1838 	for (i = rxr->next_to_check; count != 0;) {
   1839 		struct mbuf	*sendmp, *mp;
   1840 		u32		rsc, ptype;
   1841 		u16		len;
   1842 		u16		vtag = 0;
   1843 		bool		eop;
   1844 
   1845 		/* Sync the ring. */
   1846 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1847 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1848 
   1849 		cur = &rxr->rx_base[i];
   1850 		staterr = le32toh(cur->wb.upper.status_error);
   1851 #ifdef RSS
   1852 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1853 #endif
   1854 
   1855 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1856 			break;
   1857 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1858 			break;
   1859 
   1860 		count--;
   1861 		sendmp = NULL;
   1862 		nbuf = NULL;
   1863 		rsc = 0;
   1864 		cur->wb.upper.status_error = 0;
   1865 		rbuf = &rxr->rx_buffers[i];
   1866 		mp = rbuf->buf;
   1867 
   1868 		len = le16toh(cur->wb.upper.length);
   1869 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1870 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1871 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1872 
   1873 		/* Make sure bad packets are discarded */
   1874 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1875 #if __FreeBSD_version >= 1100036
   1876 			if (IXGBE_IS_VF(adapter))
   1877 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1878 #endif
   1879 			rxr->rx_discarded.ev_count++;
   1880 			ixgbe_rx_discard(rxr, i);
   1881 			goto next_desc;
   1882 		}
   1883 
   1884 		/*
   1885 		** On 82599 which supports a hardware
   1886 		** LRO (called HW RSC), packets need
   1887 		** not be fragmented across sequential
   1888 		** descriptors, rather the next descriptor
   1889 		** is indicated in bits of the descriptor.
   1890 		** This also means that we might proceses
   1891 		** more than one packet at a time, something
   1892 		** that has never been true before, it
   1893 		** required eliminating global chain pointers
   1894 		** in favor of what we are doing here.  -jfv
   1895 		*/
   1896 		if (!eop) {
   1897 			/*
   1898 			** Figure out the next descriptor
   1899 			** of this frame.
   1900 			*/
   1901 			if (rxr->hw_rsc == TRUE) {
   1902 				rsc = ixgbe_rsc_count(cur);
   1903 				rxr->rsc_num += (rsc - 1);
   1904 			}
   1905 			if (rsc) { /* Get hardware index */
   1906 				nextp = ((staterr &
   1907 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1908 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1909 			} else { /* Just sequential */
   1910 				nextp = i + 1;
   1911 				if (nextp == adapter->num_rx_desc)
   1912 					nextp = 0;
   1913 			}
   1914 			nbuf = &rxr->rx_buffers[nextp];
   1915 			prefetch(nbuf);
   1916 		}
   1917 		/*
   1918 		** Rather than using the fmp/lmp global pointers
   1919 		** we now keep the head of a packet chain in the
   1920 		** buffer struct and pass this along from one
   1921 		** descriptor to the next, until we get EOP.
   1922 		*/
   1923 		mp->m_len = len;
   1924 		/*
   1925 		** See if there is a stored head
   1926 		** that determines what we are
   1927 		*/
   1928 		sendmp = rbuf->fmp;
   1929 		if (sendmp != NULL) {  /* secondary frag */
   1930 			rbuf->buf = rbuf->fmp = NULL;
   1931 			mp->m_flags &= ~M_PKTHDR;
   1932 			sendmp->m_pkthdr.len += mp->m_len;
   1933 		} else {
   1934 			/*
   1935 			 * Optimize.  This might be a small packet,
   1936 			 * maybe just a TCP ACK.  Do a fast copy that
   1937 			 * is cache aligned into a new mbuf, and
   1938 			 * leave the old mbuf+cluster for re-use.
   1939 			 */
   1940 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1941 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1942 				if (sendmp != NULL) {
   1943 					sendmp->m_data +=
   1944 					    IXGBE_RX_COPY_ALIGN;
   1945 					ixgbe_bcopy(mp->m_data,
   1946 					    sendmp->m_data, len);
   1947 					sendmp->m_len = len;
   1948 					rxr->rx_copies.ev_count++;
   1949 					rbuf->flags |= IXGBE_RX_COPY;
   1950 				}
   1951 			}
   1952 			if (sendmp == NULL) {
   1953 				rbuf->buf = rbuf->fmp = NULL;
   1954 				sendmp = mp;
   1955 			}
   1956 
   1957 			/* first desc of a non-ps chain */
   1958 			sendmp->m_flags |= M_PKTHDR;
   1959 			sendmp->m_pkthdr.len = mp->m_len;
   1960 		}
   1961 		++processed;
   1962 
   1963 		/* Pass the head pointer on */
   1964 		if (eop == 0) {
   1965 			nbuf->fmp = sendmp;
   1966 			sendmp = NULL;
   1967 			mp->m_next = nbuf->buf;
   1968 		} else { /* Sending this frame */
   1969 			m_set_rcvif(sendmp, ifp);
   1970 			ifp->if_ipackets++;
   1971 			rxr->rx_packets.ev_count++;
   1972 			/* capture data for AIM */
   1973 			rxr->bytes += sendmp->m_pkthdr.len;
   1974 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1975 			/* Process vlan info */
   1976 			if ((rxr->vtag_strip) &&
   1977 			    (staterr & IXGBE_RXD_STAT_VP))
   1978 				vtag = le16toh(cur->wb.upper.vlan);
   1979 			if (vtag) {
   1980 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1981 				    printf("%s: could not apply VLAN "
   1982 					"tag", __func__));
   1983 			}
   1984 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1985 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1986 				   &adapter->stats.pf);
   1987 			}
   1988 #if 0 /* FreeBSD */
   1989                         /*
   1990                          * In case of multiqueue, we have RXCSUM.PCSD bit set
   1991                          * and never cleared. This means we have RSS hash
   1992                          * available to be used.
   1993                          */
   1994                         if (adapter->num_queues > 1) {
   1995                                 sendmp->m_pkthdr.flowid =
   1996                                     le32toh(cur->wb.lower.hi_dword.rss);
   1997                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1998                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
   1999                                         M_HASHTYPE_SET(sendmp,
   2000                                             M_HASHTYPE_RSS_IPV4);
   2001                                         break;
   2002                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2003                                         M_HASHTYPE_SET(sendmp,
   2004                                             M_HASHTYPE_RSS_TCP_IPV4);
   2005                                         break;
   2006                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
   2007                                         M_HASHTYPE_SET(sendmp,
   2008                                             M_HASHTYPE_RSS_IPV6);
   2009                                         break;
   2010                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2011                                         M_HASHTYPE_SET(sendmp,
   2012                                             M_HASHTYPE_RSS_TCP_IPV6);
   2013                                         break;
   2014                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2015                                         M_HASHTYPE_SET(sendmp,
   2016                                             M_HASHTYPE_RSS_IPV6_EX);
   2017                                         break;
   2018                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2019                                         M_HASHTYPE_SET(sendmp,
   2020                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
   2021                                         break;
   2022 #if __FreeBSD_version > 1100000
   2023                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2024                                         M_HASHTYPE_SET(sendmp,
   2025                                             M_HASHTYPE_RSS_UDP_IPV4);
   2026                                         break;
   2027                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2028                                         M_HASHTYPE_SET(sendmp,
   2029                                             M_HASHTYPE_RSS_UDP_IPV6);
   2030                                         break;
   2031                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2032                                         M_HASHTYPE_SET(sendmp,
   2033                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
   2034                                         break;
   2035 #endif
   2036                                     default:
   2037                                         M_HASHTYPE_SET(sendmp,
   2038                                             M_HASHTYPE_OPAQUE);
   2039                                 }
   2040                         } else {
   2041                                 sendmp->m_pkthdr.flowid = que->msix;
   2042 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2043 			}
   2044 #endif /* FreeBSD_version */
   2045 		}
   2046 next_desc:
   2047 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2048 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2049 
   2050 		/* Advance our pointers to the next descriptor. */
   2051 		if (++i == rxr->num_desc)
   2052 			i = 0;
   2053 
   2054 		/* Now send to the stack or do LRO */
   2055 		if (sendmp != NULL) {
   2056 			rxr->next_to_check = i;
   2057 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2058 			i = rxr->next_to_check;
   2059 		}
   2060 
   2061                /* Every 8 descriptors we go to refresh mbufs */
   2062 		if (processed == 8) {
   2063 			ixgbe_refresh_mbufs(rxr, i);
   2064 			processed = 0;
   2065 		}
   2066 	}
   2067 
   2068 	/* Refresh any remaining buf structs */
   2069 	if (ixgbe_rx_unrefreshed(rxr))
   2070 		ixgbe_refresh_mbufs(rxr, i);
   2071 
   2072 	rxr->next_to_check = i;
   2073 
   2074 #ifdef LRO
   2075 	/*
   2076 	 * Flush any outstanding LRO work
   2077 	 */
   2078 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   2079 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   2080 		tcp_lro_flush(lro, queued);
   2081 	}
   2082 #endif /* LRO */
   2083 
   2084 	IXGBE_RX_UNLOCK(rxr);
   2085 
   2086 	/*
   2087 	** Still have cleaning to do?
   2088 	*/
   2089 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2090 		return true;
   2091 	else
   2092 		return false;
   2093 }
   2094 
   2095 
   2096 /*********************************************************************
   2097  *
   2098  *  Verify that the hardware indicated that the checksum is valid.
   2099  *  Inform the stack about the status of checksum so that stack
   2100  *  doesn't spend time verifying the checksum.
   2101  *
   2102  *********************************************************************/
   2103 static void
   2104 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2105     struct ixgbe_hw_stats *stats)
   2106 {
   2107 	u16	status = (u16) staterr;
   2108 	u8	errors = (u8) (staterr >> 24);
   2109 #if 0
   2110 	bool	sctp = FALSE;
   2111 
   2112 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2113 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2114 		sctp = TRUE;
   2115 #endif
   2116 
   2117 	if (status & IXGBE_RXD_STAT_IPCS) {
   2118 		stats->ipcs.ev_count++;
   2119 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2120 			/* IP Checksum Good */
   2121 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2122 
   2123 		} else {
   2124 			stats->ipcs_bad.ev_count++;
   2125 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2126 		}
   2127 	}
   2128 	if (status & IXGBE_RXD_STAT_L4CS) {
   2129 		stats->l4cs.ev_count++;
   2130 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2131 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2132 			mp->m_pkthdr.csum_flags |= type;
   2133 		} else {
   2134 			stats->l4cs_bad.ev_count++;
   2135 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2136 		}
   2137 	}
   2138 	return;
   2139 }
   2140 
   2141 
   2142 /********************************************************************
   2143  * Manage DMA'able memory.
   2144  *******************************************************************/
   2145 
   2146 int
   2147 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2148 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2149 {
   2150 	device_t dev = adapter->dev;
   2151 	int             r, rsegs;
   2152 
   2153 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2154 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2155 			       size,	/* maxsize */
   2156 			       1,	/* nsegments */
   2157 			       size,	/* maxsegsize */
   2158 			       BUS_DMA_ALLOCNOW,	/* flags */
   2159 			       &dma->dma_tag);
   2160 	if (r != 0) {
   2161 		aprint_error_dev(dev,
   2162 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2163 		goto fail_0;
   2164 	}
   2165 
   2166 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2167 		size,
   2168 		dma->dma_tag->dt_alignment,
   2169 		dma->dma_tag->dt_boundary,
   2170 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2171 	if (r != 0) {
   2172 		aprint_error_dev(dev,
   2173 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2174 		goto fail_1;
   2175 	}
   2176 
   2177 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2178 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2179 	if (r != 0) {
   2180 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2181 		    __func__, r);
   2182 		goto fail_2;
   2183 	}
   2184 
   2185 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2186 	if (r != 0) {
   2187 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2188 		    __func__, r);
   2189 		goto fail_3;
   2190 	}
   2191 
   2192 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2193 			    size,
   2194 			    NULL,
   2195 			    mapflags | BUS_DMA_NOWAIT);
   2196 	if (r != 0) {
   2197 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2198 		    __func__, r);
   2199 		goto fail_4;
   2200 	}
   2201 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2202 	dma->dma_size = size;
   2203 	return 0;
   2204 fail_4:
   2205 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2206 fail_3:
   2207 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2208 fail_2:
   2209 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2210 fail_1:
   2211 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2212 fail_0:
   2213 	return r;
   2214 }
   2215 
   2216 void
   2217 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2218 {
   2219 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2220 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2221 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2222 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2223 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2224 }
   2225 
   2226 
   2227 /*********************************************************************
   2228  *
   2229  *  Allocate memory for the transmit and receive rings, and then
   2230  *  the descriptors associated with each, called only once at attach.
   2231  *
   2232  **********************************************************************/
   2233 int
   2234 ixgbe_allocate_queues(struct adapter *adapter)
   2235 {
   2236 	device_t	dev = adapter->dev;
   2237 	struct ix_queue	*que;
   2238 	struct tx_ring	*txr;
   2239 	struct rx_ring	*rxr;
   2240 	int rsize, tsize, error = IXGBE_SUCCESS;
   2241 	int txconf = 0, rxconf = 0;
   2242 #ifdef PCI_IOV
   2243 	enum ixgbe_iov_mode iov_mode;
   2244 #endif
   2245 
   2246         /* First allocate the top level queue structs */
   2247         if (!(adapter->queues =
   2248             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2249             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2250                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2251                 error = ENOMEM;
   2252                 goto fail;
   2253         }
   2254 
   2255 	/* First allocate the TX ring struct memory */
   2256 	if (!(adapter->tx_rings =
   2257 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2258 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2259 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2260 		error = ENOMEM;
   2261 		goto tx_fail;
   2262 	}
   2263 
   2264 	/* Next allocate the RX */
   2265 	if (!(adapter->rx_rings =
   2266 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2267 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2268 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2269 		error = ENOMEM;
   2270 		goto rx_fail;
   2271 	}
   2272 
   2273 	/* For the ring itself */
   2274 	tsize = roundup2(adapter->num_tx_desc *
   2275 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2276 
   2277 #ifdef PCI_IOV
   2278 	iov_mode = ixgbe_get_iov_mode(adapter);
   2279 	adapter->pool = ixgbe_max_vfs(iov_mode);
   2280 #else
   2281 	adapter->pool = 0;
   2282 #endif
   2283 	/*
   2284 	 * Now set up the TX queues, txconf is needed to handle the
   2285 	 * possibility that things fail midcourse and we need to
   2286 	 * undo memory gracefully
   2287 	 */
   2288 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2289 		/* Set up some basics */
   2290 		txr = &adapter->tx_rings[i];
   2291 		txr->adapter = adapter;
   2292 #ifdef PCI_IOV
   2293 		txr->me = ixgbe_pf_que_index(iov_mode, i);
   2294 #else
   2295 		txr->me = i;
   2296 #endif
   2297 		txr->num_desc = adapter->num_tx_desc;
   2298 
   2299 		/* Initialize the TX side lock */
   2300 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2301 		    device_xname(dev), txr->me);
   2302 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2303 
   2304 		if (ixgbe_dma_malloc(adapter, tsize,
   2305 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2306 			aprint_error_dev(dev,
   2307 			    "Unable to allocate TX Descriptor memory\n");
   2308 			error = ENOMEM;
   2309 			goto err_tx_desc;
   2310 		}
   2311 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2312 		bzero((void *)txr->tx_base, tsize);
   2313 
   2314         	/* Now allocate transmit buffers for the ring */
   2315         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2316 			aprint_error_dev(dev,
   2317 			    "Critical Failure setting up transmit buffers\n");
   2318 			error = ENOMEM;
   2319 			goto err_tx_desc;
   2320         	}
   2321 #ifndef IXGBE_LEGACY_TX
   2322 		/* Allocate a buf ring */
   2323 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   2324 		    M_WAITOK, &txr->tx_mtx);
   2325 		if (txr->br == NULL) {
   2326 			aprint_error_dev(dev,
   2327 			    "Critical Failure setting up buf ring\n");
   2328 			error = ENOMEM;
   2329 			goto err_tx_desc;
   2330         	}
   2331 #endif
   2332 	}
   2333 
   2334 	/*
   2335 	 * Next the RX queues...
   2336 	 */
   2337 	rsize = roundup2(adapter->num_rx_desc *
   2338 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2339 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2340 		rxr = &adapter->rx_rings[i];
   2341 		/* Set up some basics */
   2342 		rxr->adapter = adapter;
   2343 #ifdef PCI_IOV
   2344 		rxr->me = ixgbe_pf_que_index(iov_mode, i);
   2345 #else
   2346 		rxr->me = i;
   2347 #endif
   2348 		rxr->num_desc = adapter->num_rx_desc;
   2349 
   2350 		/* Initialize the RX side lock */
   2351 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2352 		    device_xname(dev), rxr->me);
   2353 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2354 
   2355 		if (ixgbe_dma_malloc(adapter, rsize,
   2356 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2357 			aprint_error_dev(dev,
   2358 			    "Unable to allocate RxDescriptor memory\n");
   2359 			error = ENOMEM;
   2360 			goto err_rx_desc;
   2361 		}
   2362 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2363 		bzero((void *)rxr->rx_base, rsize);
   2364 
   2365         	/* Allocate receive buffers for the ring*/
   2366 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2367 			aprint_error_dev(dev,
   2368 			    "Critical Failure setting up receive buffers\n");
   2369 			error = ENOMEM;
   2370 			goto err_rx_desc;
   2371 		}
   2372 	}
   2373 
   2374 	/*
   2375 	** Finally set up the queue holding structs
   2376 	*/
   2377 	for (int i = 0; i < adapter->num_queues; i++) {
   2378 		que = &adapter->queues[i];
   2379 		que->adapter = adapter;
   2380 		que->me = i;
   2381 		que->txr = &adapter->tx_rings[i];
   2382 		que->rxr = &adapter->rx_rings[i];
   2383 	}
   2384 
   2385 	return (0);
   2386 
   2387 err_rx_desc:
   2388 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2389 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2390 err_tx_desc:
   2391 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2392 		ixgbe_dma_free(adapter, &txr->txdma);
   2393 	free(adapter->rx_rings, M_DEVBUF);
   2394 rx_fail:
   2395 	free(adapter->tx_rings, M_DEVBUF);
   2396 tx_fail:
   2397 	free(adapter->queues, M_DEVBUF);
   2398 fail:
   2399 	return (error);
   2400 }
   2401 
   2402