Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.4
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2015, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 282289 2015-04-30 22:53:27Z erj $*/
     62 /*$NetBSD: ix_txrx.c,v 1.4 2016/12/01 06:56:28 msaitoh Exp $*/
     63 
     64 #include "ixgbe.h"
     65 
     66 #ifdef DEV_NETMAP
     67 #include <net/netmap.h>
     68 #include <sys/selinfo.h>
     69 #include <dev/netmap/netmap_kern.h>
     70 
     71 extern int ix_crcstrip;
     72 #endif
     73 
     74 /*
     75 ** HW RSC control:
     76 **  this feature only works with
     77 **  IPv4, and only on 82599 and later.
     78 **  Also this will cause IP forwarding to
     79 **  fail and that can't be controlled by
     80 **  the stack as LRO can. For all these
     81 **  reasons I've deemed it best to leave
     82 **  this off and not bother with a tuneable
     83 **  interface, this would need to be compiled
     84 **  to enable.
     85 */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 #ifdef IXGBE_FDIR
     89 /*
     90 ** For Flow Director: this is the
     91 ** number of TX packets we sample
     92 ** for the filter pool, this means
     93 ** every 20th packet will be probed.
     94 **
     95 ** This feature can be disabled by
     96 ** setting this to 0.
     97 */
     98 static int atr_sample_rate = 20;
     99 #endif
    100 
    101 /* Shared PCI config read/write */
    102 u16
    103 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
    104 {
    105 	switch (reg % 4) {
    106 	case 0:
    107 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
    108 		    __BITS(15, 0);
    109 	case 2:
    110 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
    111 		    reg - 2), __BITS(31, 16));
    112 	default:
    113 		panic("%s: invalid register (%" PRIx32, __func__, reg);
    114 		break;
    115 	}
    116 }
    117 
    118 void
    119 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
    120 {
    121 	pcireg_t old;
    122 
    123 	switch (reg % 4) {
    124 	case 0:
    125 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
    126 		    __BITS(31, 16);
    127 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
    128 		break;
    129 	case 2:
    130 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
    131 		    __BITS(15, 0);
    132 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
    133 		    __SHIFTIN(value, __BITS(31, 16)) | old);
    134 		break;
    135 	default:
    136 		panic("%s: invalid register (%" PRIx32, __func__, reg);
    137 		break;
    138 	}
    139 
    140 	return;
    141 }
    142 
    143 /*********************************************************************
    144  *  Local Function prototypes
    145  *********************************************************************/
    146 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    147 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    148 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    149 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    150 
    151 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    152 		    struct ixgbe_hw_stats *);
    153 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    154 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    155 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    156 		    struct mbuf *, u32 *, u32 *);
    157 static int	ixgbe_tso_setup(struct tx_ring *,
    158 		    struct mbuf *, u32 *, u32 *);
    159 #ifdef IXGBE_FDIR
    160 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    161 #endif
    162 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    163 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    164 		    struct mbuf *, u32);
    165 
    166 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    167 
    168 #ifdef IXGBE_LEGACY_TX
    169 /*********************************************************************
    170  *  Transmit entry point
    171  *
    172  *  ixgbe_start is called by the stack to initiate a transmit.
    173  *  The driver will remain in this routine as long as there are
    174  *  packets to transmit and transmit resources are available.
    175  *  In case resources are not available stack is notified and
    176  *  the packet is requeued.
    177  **********************************************************************/
    178 
    179 void
    180 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    181 {
    182 	int rc;
    183 	struct mbuf    *m_head;
    184 	struct adapter *adapter = txr->adapter;
    185 
    186 	IXGBE_TX_LOCK_ASSERT(txr);
    187 
    188 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    189 		return;
    190 	if (!adapter->link_active)
    191 		return;
    192 
    193 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    194 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    195 			break;
    196 
    197 		IFQ_POLL(&ifp->if_snd, m_head);
    198 		if (m_head == NULL)
    199 			break;
    200 
    201 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    202 			break;
    203 		}
    204 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    205 		if (rc == EFBIG) {
    206 			struct mbuf *mtmp;
    207 
    208 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    209 				m_head = mtmp;
    210 				rc = ixgbe_xmit(txr, m_head);
    211 				if (rc != 0)
    212 					adapter->efbig2_tx_dma_setup.ev_count++;
    213 			} else
    214 				adapter->m_defrag_failed.ev_count++;
    215 		}
    216 		if (rc != 0) {
    217 			m_freem(m_head);
    218 			continue;
    219 		}
    220 
    221 		/* Send a copy of the frame to the BPF listener */
    222 		bpf_mtap(ifp, m_head);
    223 	}
    224 	return;
    225 }
    226 
    227 /*
    228  * Legacy TX start - called by the stack, this
    229  * always uses the first tx ring, and should
    230  * not be used with multiqueue tx enabled.
    231  */
    232 void
    233 ixgbe_start(struct ifnet *ifp)
    234 {
    235 	struct adapter *adapter = ifp->if_softc;
    236 	struct tx_ring	*txr = adapter->tx_rings;
    237 
    238 	if (ifp->if_flags & IFF_RUNNING) {
    239 		IXGBE_TX_LOCK(txr);
    240 		ixgbe_start_locked(txr, ifp);
    241 		IXGBE_TX_UNLOCK(txr);
    242 	}
    243 	return;
    244 }
    245 
    246 #else /* ! IXGBE_LEGACY_TX */
    247 
    248 /*
    249 ** Multiqueue Transmit driver
    250 **
    251 */
    252 int
    253 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    254 {
    255 	struct adapter	*adapter = ifp->if_softc;
    256 	struct ix_queue	*que;
    257 	struct tx_ring	*txr;
    258 	int 		i, err = 0;
    259 #ifdef	RSS
    260 	uint32_t bucket_id;
    261 #endif
    262 
    263 	/*
    264 	 * When doing RSS, map it to the same outbound queue
    265 	 * as the incoming flow would be mapped to.
    266 	 *
    267 	 * If everything is setup correctly, it should be the
    268 	 * same bucket that the current CPU we're on is.
    269 	 */
    270 #if __FreeBSD_version < 1100054
    271 	if (m->m_flags & M_FLOWID) {
    272 #else
    273 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    274 #endif
    275 #ifdef	RSS
    276 		if (rss_hash2bucket(m->m_pkthdr.flowid,
    277 		    M_HASHTYPE_GET(m), &bucket_id) == 0)
    278 			/* TODO: spit out something if bucket_id > num_queues? */
    279 			i = bucket_id % adapter->num_queues;
    280 		else
    281 #endif
    282 			i = m->m_pkthdr.flowid % adapter->num_queues;
    283 	} else
    284 		i = curcpu % adapter->num_queues;
    285 
    286 	/* Check for a hung queue and pick alternative */
    287 	if (((1 << i) & adapter->active_queues) == 0)
    288 		i = ffsl(adapter->active_queues);
    289 
    290 	txr = &adapter->tx_rings[i];
    291 	que = &adapter->queues[i];
    292 
    293 	err = drbr_enqueue(ifp, txr->br, m);
    294 	if (err)
    295 		return (err);
    296 	if (IXGBE_TX_TRYLOCK(txr)) {
    297 		ixgbe_mq_start_locked(ifp, txr);
    298 		IXGBE_TX_UNLOCK(txr);
    299 	} else
    300 		softint_schedule(txr->txq_si);
    301 
    302 	return (0);
    303 }
    304 
    305 int
    306 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    307 {
    308 	struct adapter  *adapter = txr->adapter;
    309 	struct mbuf     *next;
    310 	int             enqueued = 0, err = 0;
    311 
    312 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
    313 	    adapter->link_active == 0)
    314 		return (ENETDOWN);
    315 
    316 	/* Process the queue */
    317 #if __FreeBSD_version < 901504
    318 	next = drbr_dequeue(ifp, txr->br);
    319 	while (next != NULL) {
    320 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    321 			if (next != NULL)
    322 				err = drbr_enqueue(ifp, txr->br, next);
    323 #else
    324 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
    325 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
    326 			if (next == NULL) {
    327 				drbr_advance(ifp, txr->br);
    328 			} else {
    329 				drbr_putback(ifp, txr->br, next);
    330 			}
    331 #endif
    332 			break;
    333 		}
    334 #if __FreeBSD_version >= 901504
    335 		drbr_advance(ifp, txr->br);
    336 #endif
    337 		enqueued++;
    338 #if 0 // this is VF-only
    339 #if __FreeBSD_version >= 1100036
    340 		/*
    341 		 * Since we're looking at the tx ring, we can check
    342 		 * to see if we're a VF by examing our tail register
    343 		 * address.
    344 		 */
    345 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
    346 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    347 #endif
    348 #endif
    349 		/* Send a copy of the frame to the BPF listener */
    350 		bpf_mtap(ifp, next);
    351 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    352 			break;
    353 #if __FreeBSD_version < 901504
    354 		next = drbr_dequeue(ifp, txr->br);
    355 #endif
    356 	}
    357 
    358 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
    359 		ixgbe_txeof(txr);
    360 
    361 	return (err);
    362 }
    363 
    364 /*
    365  * Called from a taskqueue to drain queued transmit packets.
    366  */
    367 void
    368 ixgbe_deferred_mq_start(void *arg, int pending)
    369 {
    370 	struct tx_ring *txr = arg;
    371 	struct adapter *adapter = txr->adapter;
    372 	struct ifnet *ifp = adapter->ifp;
    373 
    374 	IXGBE_TX_LOCK(txr);
    375 	if (!drbr_empty(ifp, txr->br))
    376 		ixgbe_mq_start_locked(ifp, txr);
    377 	IXGBE_TX_UNLOCK(txr);
    378 }
    379 
    380 /*
    381  * Flush all ring buffers
    382  */
    383 void
    384 ixgbe_qflush(struct ifnet *ifp)
    385 {
    386 	struct adapter	*adapter = ifp->if_softc;
    387 	struct tx_ring	*txr = adapter->tx_rings;
    388 	struct mbuf	*m;
    389 
    390 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    391 		IXGBE_TX_LOCK(txr);
    392 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
    393 			m_freem(m);
    394 		IXGBE_TX_UNLOCK(txr);
    395 	}
    396 	if_qflush(ifp);
    397 }
    398 #endif /* IXGBE_LEGACY_TX */
    399 
    400 
    401 /*********************************************************************
    402  *
    403  *  This routine maps the mbufs to tx descriptors, allowing the
    404  *  TX engine to transmit the packets.
    405  *  	- return 0 on success, positive on failure
    406  *
    407  **********************************************************************/
    408 
    409 static int
    410 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    411 {
    412 	struct m_tag *mtag;
    413 	struct adapter  *adapter = txr->adapter;
    414 	struct ethercom *ec = &adapter->osdep.ec;
    415 	u32		olinfo_status = 0, cmd_type_len;
    416 	int             i, j, error;
    417 	int		first;
    418 	bus_dmamap_t	map;
    419 	struct ixgbe_tx_buf *txbuf;
    420 	union ixgbe_adv_tx_desc *txd = NULL;
    421 
    422 	/* Basic descriptor defines */
    423         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    424 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    425 
    426 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
    427         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    428 
    429         /*
    430          * Important to capture the first descriptor
    431          * used because it will contain the index of
    432          * the one we tell the hardware to report back
    433          */
    434         first = txr->next_avail_desc;
    435 	txbuf = &txr->tx_buffers[first];
    436 	map = txbuf->map;
    437 
    438 	/*
    439 	 * Map the packet for DMA.
    440 	 */
    441 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
    442 	    m_head, BUS_DMA_NOWAIT);
    443 
    444 	if (__predict_false(error)) {
    445 
    446 		switch (error) {
    447 		case EAGAIN:
    448 			adapter->eagain_tx_dma_setup.ev_count++;
    449 			return EAGAIN;
    450 		case ENOMEM:
    451 			adapter->enomem_tx_dma_setup.ev_count++;
    452 			return EAGAIN;
    453 		case EFBIG:
    454 			/*
    455 			 * XXX Try it again?
    456 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
    457 			 */
    458 			adapter->efbig_tx_dma_setup.ev_count++;
    459 			return error;
    460 		case EINVAL:
    461 			adapter->einval_tx_dma_setup.ev_count++;
    462 			return error;
    463 		default:
    464 			adapter->other_tx_dma_setup.ev_count++;
    465 			return error;
    466 		}
    467 	}
    468 
    469 	/* Make certain there are enough descriptors */
    470 	if (map->dm_nsegs > txr->tx_avail - 2) {
    471 		txr->no_desc_avail.ev_count++;
    472 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    473 		return EAGAIN;
    474 	}
    475 
    476 	/*
    477 	 * Set up the appropriate offload context
    478 	 * this will consume the first descriptor
    479 	 */
    480 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    481 	if (__predict_false(error)) {
    482 		return (error);
    483 	}
    484 
    485 #ifdef IXGBE_FDIR
    486 	/* Do the flow director magic */
    487 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
    488 		++txr->atr_count;
    489 		if (txr->atr_count >= atr_sample_rate) {
    490 			ixgbe_atr(txr, m_head);
    491 			txr->atr_count = 0;
    492 		}
    493 	}
    494 #endif
    495 
    496 	i = txr->next_avail_desc;
    497 	for (j = 0; j < map->dm_nsegs; j++) {
    498 		bus_size_t seglen;
    499 		bus_addr_t segaddr;
    500 
    501 		txbuf = &txr->tx_buffers[i];
    502 		txd = &txr->tx_base[i];
    503 		seglen = map->dm_segs[j].ds_len;
    504 		segaddr = htole64(map->dm_segs[j].ds_addr);
    505 
    506 		txd->read.buffer_addr = segaddr;
    507 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    508 		    cmd_type_len |seglen);
    509 		txd->read.olinfo_status = htole32(olinfo_status);
    510 
    511 		if (++i == txr->num_desc)
    512 			i = 0;
    513 	}
    514 
    515 	txd->read.cmd_type_len |=
    516 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    517 	txr->tx_avail -= map->dm_nsegs;
    518 	txr->next_avail_desc = i;
    519 
    520 	txbuf->m_head = m_head;
    521 	/*
    522 	 * Here we swap the map so the last descriptor,
    523 	 * which gets the completion interrupt has the
    524 	 * real map, and the first descriptor gets the
    525 	 * unused map from this descriptor.
    526 	 */
    527 	txr->tx_buffers[first].map = txbuf->map;
    528 	txbuf->map = map;
    529 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    530 	    BUS_DMASYNC_PREWRITE);
    531 
    532         /* Set the EOP descriptor that will be marked done */
    533         txbuf = &txr->tx_buffers[first];
    534 	txbuf->eop = txd;
    535 
    536         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    537 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    538 	/*
    539 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    540 	 * hardware that this frame is available to transmit.
    541 	 */
    542 	++txr->total_packets.ev_count;
    543 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    544 
    545 	/* Mark queue as having work */
    546 	if (txr->busy == 0)
    547 		txr->busy = 1;
    548 
    549 	return 0;
    550 }
    551 
    552 /*********************************************************************
    553  *
    554  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
    555  *  the information needed to transmit a packet on the wire. This is
    556  *  called only once at attach, setup is done every reset.
    557  *
    558  **********************************************************************/
    559 int
    560 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    561 {
    562 	struct adapter *adapter = txr->adapter;
    563 	device_t dev = adapter->dev;
    564 	struct ixgbe_tx_buf *txbuf;
    565 	int error, i;
    566 
    567 	/*
    568 	 * Setup DMA descriptor areas.
    569 	 */
    570 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
    571 			       1, 0,		/* alignment, bounds */
    572 			       IXGBE_TSO_SIZE,		/* maxsize */
    573 			       adapter->num_segs,	/* nsegments */
    574 			       PAGE_SIZE,		/* maxsegsize */
    575 			       0,			/* flags */
    576 			       &txr->txtag))) {
    577 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    578 		goto fail;
    579 	}
    580 
    581 	if (!(txr->tx_buffers =
    582 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    583 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
    584 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    585 		error = ENOMEM;
    586 		goto fail;
    587 	}
    588 
    589         /* Create the descriptor buffer dma maps */
    590 	txbuf = txr->tx_buffers;
    591 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    592 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    593 		if (error != 0) {
    594 			aprint_error_dev(dev,
    595 			    "Unable to create TX DMA map (%d)\n", error);
    596 			goto fail;
    597 		}
    598 	}
    599 
    600 	return 0;
    601 fail:
    602 	/* We free all, it handles case where we are in the middle */
    603 	ixgbe_free_transmit_structures(adapter);
    604 	return (error);
    605 }
    606 
    607 /*********************************************************************
    608  *
    609  *  Initialize a transmit ring.
    610  *
    611  **********************************************************************/
    612 static void
    613 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    614 {
    615 	struct adapter *adapter = txr->adapter;
    616 	struct ixgbe_tx_buf *txbuf;
    617 	int i;
    618 #ifdef DEV_NETMAP
    619 	struct netmap_adapter *na = NA(adapter->ifp);
    620 	struct netmap_slot *slot;
    621 #endif /* DEV_NETMAP */
    622 
    623 	/* Clear the old ring contents */
    624 	IXGBE_TX_LOCK(txr);
    625 #ifdef DEV_NETMAP
    626 	/*
    627 	 * (under lock): if in netmap mode, do some consistency
    628 	 * checks and set slot to entry 0 of the netmap ring.
    629 	 */
    630 	slot = netmap_reset(na, NR_TX, txr->me, 0);
    631 #endif /* DEV_NETMAP */
    632 	bzero((void *)txr->tx_base,
    633 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    634 	/* Reset indices */
    635 	txr->next_avail_desc = 0;
    636 	txr->next_to_clean = 0;
    637 
    638 	/* Free any existing tx buffers. */
    639         txbuf = txr->tx_buffers;
    640 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
    641 		if (txbuf->m_head != NULL) {
    642 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    643 			    0, txbuf->m_head->m_pkthdr.len,
    644 			    BUS_DMASYNC_POSTWRITE);
    645 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    646 			m_freem(txbuf->m_head);
    647 			txbuf->m_head = NULL;
    648 		}
    649 #ifdef DEV_NETMAP
    650 		/*
    651 		 * In netmap mode, set the map for the packet buffer.
    652 		 * NOTE: Some drivers (not this one) also need to set
    653 		 * the physical buffer address in the NIC ring.
    654 		 * Slots in the netmap ring (indexed by "si") are
    655 		 * kring->nkr_hwofs positions "ahead" wrt the
    656 		 * corresponding slot in the NIC ring. In some drivers
    657 		 * (not here) nkr_hwofs can be negative. Function
    658 		 * netmap_idx_n2k() handles wraparounds properly.
    659 		 */
    660 		if (slot) {
    661 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    662 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
    663 		}
    664 #endif /* DEV_NETMAP */
    665 		/* Clear the EOP descriptor pointer */
    666 		txbuf->eop = NULL;
    667         }
    668 
    669 #ifdef IXGBE_FDIR
    670 	/* Set the rate at which we sample packets */
    671 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
    672 		txr->atr_sample = atr_sample_rate;
    673 #endif
    674 
    675 	/* Set number of descriptors available */
    676 	txr->tx_avail = adapter->num_tx_desc;
    677 
    678 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    679 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    680 	IXGBE_TX_UNLOCK(txr);
    681 }
    682 
    683 /*********************************************************************
    684  *
    685  *  Initialize all transmit rings.
    686  *
    687  **********************************************************************/
    688 int
    689 ixgbe_setup_transmit_structures(struct adapter *adapter)
    690 {
    691 	struct tx_ring *txr = adapter->tx_rings;
    692 
    693 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    694 		ixgbe_setup_transmit_ring(txr);
    695 
    696 	return (0);
    697 }
    698 
    699 /*********************************************************************
    700  *
    701  *  Free all transmit rings.
    702  *
    703  **********************************************************************/
    704 void
    705 ixgbe_free_transmit_structures(struct adapter *adapter)
    706 {
    707 	struct tx_ring *txr = adapter->tx_rings;
    708 
    709 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    710 		ixgbe_free_transmit_buffers(txr);
    711 		ixgbe_dma_free(adapter, &txr->txdma);
    712 		IXGBE_TX_LOCK_DESTROY(txr);
    713 	}
    714 	free(adapter->tx_rings, M_DEVBUF);
    715 }
    716 
    717 /*********************************************************************
    718  *
    719  *  Free transmit ring related data structures.
    720  *
    721  **********************************************************************/
    722 static void
    723 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    724 {
    725 	struct adapter *adapter = txr->adapter;
    726 	struct ixgbe_tx_buf *tx_buffer;
    727 	int             i;
    728 
    729 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
    730 
    731 	if (txr->tx_buffers == NULL)
    732 		return;
    733 
    734 	tx_buffer = txr->tx_buffers;
    735 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    736 		if (tx_buffer->m_head != NULL) {
    737 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    738 			    0, tx_buffer->m_head->m_pkthdr.len,
    739 			    BUS_DMASYNC_POSTWRITE);
    740 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    741 			m_freem(tx_buffer->m_head);
    742 			tx_buffer->m_head = NULL;
    743 			if (tx_buffer->map != NULL) {
    744 				ixgbe_dmamap_destroy(txr->txtag,
    745 				    tx_buffer->map);
    746 				tx_buffer->map = NULL;
    747 			}
    748 		} else if (tx_buffer->map != NULL) {
    749 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    750 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    751 			tx_buffer->map = NULL;
    752 		}
    753 	}
    754 #ifndef IXGBE_LEGACY_TX
    755 	if (txr->br != NULL)
    756 		buf_ring_free(txr->br, M_DEVBUF);
    757 #endif
    758 	if (txr->tx_buffers != NULL) {
    759 		free(txr->tx_buffers, M_DEVBUF);
    760 		txr->tx_buffers = NULL;
    761 	}
    762 	if (txr->txtag != NULL) {
    763 		ixgbe_dma_tag_destroy(txr->txtag);
    764 		txr->txtag = NULL;
    765 	}
    766 	return;
    767 }
    768 
    769 /*********************************************************************
    770  *
    771  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
    772  *
    773  **********************************************************************/
    774 
    775 static int
    776 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    777     u32 *cmd_type_len, u32 *olinfo_status)
    778 {
    779 	struct adapter *adapter = txr->adapter;
    780 	struct ethercom *ec = &adapter->osdep.ec;
    781 	struct m_tag *mtag;
    782 	struct ixgbe_adv_tx_context_desc *TXD;
    783 	struct ether_vlan_header *eh;
    784 	struct ip ip;
    785 	struct ip6_hdr ip6;
    786 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    787 	int	ehdrlen, ip_hlen = 0;
    788 	u16	etype;
    789 	u8	ipproto __diagused = 0;
    790 	int	offload = TRUE;
    791 	int	ctxd = txr->next_avail_desc;
    792 	u16	vtag = 0;
    793 
    794 	/* First check if TSO is to be used */
    795 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
    796 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
    797 
    798 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    799 		offload = FALSE;
    800 
    801 	/* Indicate the whole packet as payload when not doing TSO */
    802        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    803 
    804 	/* Now ready a context descriptor */
    805 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    806 
    807 	/*
    808 	** In advanced descriptors the vlan tag must
    809 	** be placed into the context descriptor. Hence
    810 	** we need to make one even if not doing offloads.
    811 	*/
    812 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    813 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    814 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    815 	}
    816 	else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
    817 		return (0);
    818 
    819 	/*
    820 	 * Determine where frame payload starts.
    821 	 * Jump over vlan headers if already present,
    822 	 * helpful for QinQ too.
    823 	 */
    824 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    825 	eh = mtod(mp, struct ether_vlan_header *);
    826 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    827 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    828 		etype = ntohs(eh->evl_proto);
    829 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    830 	} else {
    831 		etype = ntohs(eh->evl_encap_proto);
    832 		ehdrlen = ETHER_HDR_LEN;
    833 	}
    834 
    835 	/* Set the ether header length */
    836 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    837 
    838 	if (offload == FALSE)
    839 		goto no_offloads;
    840 
    841 	switch (etype) {
    842 	case ETHERTYPE_IP:
    843 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
    844 		ip_hlen = ip.ip_hl << 2;
    845 		ipproto = ip.ip_p;
    846 #if 0
    847 		ip.ip_sum = 0;
    848 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
    849 #else
    850 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    851 		    ip.ip_sum == 0);
    852 #endif
    853 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    854 		break;
    855 	case ETHERTYPE_IPV6:
    856 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
    857 		ip_hlen = sizeof(ip6);
    858 		/* XXX-BZ this will go badly in case of ext hdrs. */
    859 		ipproto = ip6.ip6_nxt;
    860 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    861 		break;
    862 	default:
    863 		break;
    864 	}
    865 
    866 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    867 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    868 
    869 	vlan_macip_lens |= ip_hlen;
    870 
    871 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
    872 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    873 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    874 		KASSERT(ipproto == IPPROTO_TCP);
    875 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
    876 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    877 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    878 		KASSERT(ipproto == IPPROTO_UDP);
    879 	}
    880 
    881 no_offloads:
    882 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    883 
    884 	/* Now copy bits into descriptor */
    885 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    886 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    887 	TXD->seqnum_seed = htole32(0);
    888 	TXD->mss_l4len_idx = htole32(0);
    889 
    890 	/* We've consumed the first desc, adjust counters */
    891 	if (++ctxd == txr->num_desc)
    892 		ctxd = 0;
    893 	txr->next_avail_desc = ctxd;
    894 	--txr->tx_avail;
    895 
    896         return 0;
    897 }
    898 
    899 /**********************************************************************
    900  *
    901  *  Setup work for hardware segmentation offload (TSO) on
    902  *  adapters using advanced tx descriptors
    903  *
    904  **********************************************************************/
    905 static int
    906 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
    907     u32 *cmd_type_len, u32 *olinfo_status)
    908 {
    909 	struct m_tag *mtag;
    910 	struct adapter *adapter = txr->adapter;
    911 	struct ethercom *ec = &adapter->osdep.ec;
    912 	struct ixgbe_adv_tx_context_desc *TXD;
    913 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
    914 	u32 mss_l4len_idx = 0, paylen;
    915 	u16 vtag = 0, eh_type;
    916 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
    917 	struct ether_vlan_header *eh;
    918 #ifdef INET6
    919 	struct ip6_hdr *ip6;
    920 #endif
    921 #ifdef INET
    922 	struct ip *ip;
    923 #endif
    924 	struct tcphdr *th;
    925 
    926 
    927 	/*
    928 	 * Determine where frame payload starts.
    929 	 * Jump over vlan headers if already present
    930 	 */
    931 	eh = mtod(mp, struct ether_vlan_header *);
    932 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    933 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    934 		eh_type = eh->evl_proto;
    935 	} else {
    936 		ehdrlen = ETHER_HDR_LEN;
    937 		eh_type = eh->evl_encap_proto;
    938 	}
    939 
    940 	switch (ntohs(eh_type)) {
    941 #ifdef INET6
    942 	case ETHERTYPE_IPV6:
    943 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    944 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
    945 		if (ip6->ip6_nxt != IPPROTO_TCP)
    946 			return (ENXIO);
    947 		ip_hlen = sizeof(struct ip6_hdr);
    948 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
    949 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
    950 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
    951 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
    952 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    953 		break;
    954 #endif
    955 #ifdef INET
    956 	case ETHERTYPE_IP:
    957 		ip = (struct ip *)(mp->m_data + ehdrlen);
    958 		if (ip->ip_p != IPPROTO_TCP)
    959 			return (ENXIO);
    960 		ip->ip_sum = 0;
    961 		ip_hlen = ip->ip_hl << 2;
    962 		th = (struct tcphdr *)((char *)ip + ip_hlen);
    963 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
    964 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
    965 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    966 		/* Tell transmit desc to also do IPv4 checksum. */
    967 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    968 		break;
    969 #endif
    970 	default:
    971 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
    972 		    __func__, ntohs(eh_type));
    973 		break;
    974 	}
    975 
    976 	ctxd = txr->next_avail_desc;
    977 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
    978 
    979 	tcp_hlen = th->th_off << 2;
    980 
    981 	/* This is used in the transmit desc in encap */
    982 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
    983 
    984 	/* VLAN MACLEN IPLEN */
    985 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
    986 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
    987                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    988 	}
    989 
    990 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    991 	vlan_macip_lens |= ip_hlen;
    992 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    993 
    994 	/* ADV DTYPE TUCMD */
    995 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    996 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    997 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    998 
    999 	/* MSS L4LEN IDX */
   1000 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1001 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1002 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1003 
   1004 	TXD->seqnum_seed = htole32(0);
   1005 
   1006 	if (++ctxd == txr->num_desc)
   1007 		ctxd = 0;
   1008 
   1009 	txr->tx_avail--;
   1010 	txr->next_avail_desc = ctxd;
   1011 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1012 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1013 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1014 	++txr->tso_tx.ev_count;
   1015 	return (0);
   1016 }
   1017 
   1018 
   1019 /**********************************************************************
   1020  *
   1021  *  Examine each tx_buffer in the used queue. If the hardware is done
   1022  *  processing the packet then free associated resources. The
   1023  *  tx_buffer is put back on the free queue.
   1024  *
   1025  **********************************************************************/
   1026 void
   1027 ixgbe_txeof(struct tx_ring *txr)
   1028 {
   1029 	struct adapter		*adapter = txr->adapter;
   1030 	struct ifnet		*ifp = adapter->ifp;
   1031 	u32			work, processed = 0;
   1032 	u16			limit = txr->process_limit;
   1033 	struct ixgbe_tx_buf	*buf;
   1034 	union ixgbe_adv_tx_desc *txd;
   1035 
   1036 	KASSERT(mutex_owned(&txr->tx_mtx));
   1037 
   1038 #ifdef DEV_NETMAP
   1039 	if (ifp->if_capenable & IFCAP_NETMAP) {
   1040 		struct netmap_adapter *na = NA(ifp);
   1041 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1042 		txd = txr->tx_base;
   1043 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1044 		    BUS_DMASYNC_POSTREAD);
   1045 		/*
   1046 		 * In netmap mode, all the work is done in the context
   1047 		 * of the client thread. Interrupt handlers only wake up
   1048 		 * clients, which may be sleeping on individual rings
   1049 		 * or on a global resource for all rings.
   1050 		 * To implement tx interrupt mitigation, we wake up the client
   1051 		 * thread roughly every half ring, even if the NIC interrupts
   1052 		 * more frequently. This is implemented as follows:
   1053 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1054 		 *   the slot that should wake up the thread (nkr_num_slots
   1055 		 *   means the user thread should not be woken up);
   1056 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1057 		 *   or the slot has the DD bit set.
   1058 		 */
   1059 		if (!netmap_mitigate ||
   1060 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1061 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1062 			netmap_tx_irq(ifp, txr->me);
   1063 		}
   1064 		return;
   1065 	}
   1066 #endif /* DEV_NETMAP */
   1067 
   1068 	if (txr->tx_avail == txr->num_desc) {
   1069 		txr->busy = 0;
   1070 		return;
   1071 	}
   1072 
   1073 	/* Get work starting point */
   1074 	work = txr->next_to_clean;
   1075 	buf = &txr->tx_buffers[work];
   1076 	txd = &txr->tx_base[work];
   1077 	work -= txr->num_desc; /* The distance to ring end */
   1078         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1079 	    BUS_DMASYNC_POSTREAD);
   1080 	do {
   1081 		union ixgbe_adv_tx_desc *eop= buf->eop;
   1082 		if (eop == NULL) /* No work */
   1083 			break;
   1084 
   1085 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1086 			break;	/* I/O not complete */
   1087 
   1088 		if (buf->m_head) {
   1089 			txr->bytes +=
   1090 			    buf->m_head->m_pkthdr.len;
   1091 			bus_dmamap_sync(txr->txtag->dt_dmat,
   1092 			    buf->map,
   1093 			    0, buf->m_head->m_pkthdr.len,
   1094 			    BUS_DMASYNC_POSTWRITE);
   1095 			ixgbe_dmamap_unload(txr->txtag,
   1096 			    buf->map);
   1097 			m_freem(buf->m_head);
   1098 			buf->m_head = NULL;
   1099 		}
   1100 		buf->eop = NULL;
   1101 		++txr->tx_avail;
   1102 
   1103 		/* We clean the range if multi segment */
   1104 		while (txd != eop) {
   1105 			++txd;
   1106 			++buf;
   1107 			++work;
   1108 			/* wrap the ring? */
   1109 			if (__predict_false(!work)) {
   1110 				work -= txr->num_desc;
   1111 				buf = txr->tx_buffers;
   1112 				txd = txr->tx_base;
   1113 			}
   1114 			if (buf->m_head) {
   1115 				txr->bytes +=
   1116 				    buf->m_head->m_pkthdr.len;
   1117 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1118 				    buf->map,
   1119 				    0, buf->m_head->m_pkthdr.len,
   1120 				    BUS_DMASYNC_POSTWRITE);
   1121 				ixgbe_dmamap_unload(txr->txtag,
   1122 				    buf->map);
   1123 				m_freem(buf->m_head);
   1124 				buf->m_head = NULL;
   1125 			}
   1126 			++txr->tx_avail;
   1127 			buf->eop = NULL;
   1128 
   1129 		}
   1130 		++txr->packets;
   1131 		++processed;
   1132 		++ifp->if_opackets;
   1133 
   1134 		/* Try the next packet */
   1135 		++txd;
   1136 		++buf;
   1137 		++work;
   1138 		/* reset with a wrap */
   1139 		if (__predict_false(!work)) {
   1140 			work -= txr->num_desc;
   1141 			buf = txr->tx_buffers;
   1142 			txd = txr->tx_base;
   1143 		}
   1144 		prefetch(txd);
   1145 	} while (__predict_true(--limit));
   1146 
   1147 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1148 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1149 
   1150 	work += txr->num_desc;
   1151 	txr->next_to_clean = work;
   1152 
   1153 	/*
   1154 	** Queue Hang detection, we know there's
   1155 	** work outstanding or the first return
   1156 	** would have been taken, so increment busy
   1157 	** if nothing managed to get cleaned, then
   1158 	** in local_timer it will be checked and
   1159 	** marked as HUNG if it exceeds a MAX attempt.
   1160 	*/
   1161 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1162 		++txr->busy;
   1163 	/*
   1164 	** If anything gets cleaned we reset state to 1,
   1165 	** note this will turn off HUNG if its set.
   1166 	*/
   1167 	if (processed)
   1168 		txr->busy = 1;
   1169 
   1170 	if (txr->tx_avail == txr->num_desc)
   1171 		txr->busy = 0;
   1172 
   1173 	return;
   1174 }
   1175 
   1176 
   1177 #ifdef IXGBE_FDIR
   1178 /*
   1179 ** This routine parses packet headers so that Flow
   1180 ** Director can make a hashed filter table entry
   1181 ** allowing traffic flows to be identified and kept
   1182 ** on the same cpu.  This would be a performance
   1183 ** hit, but we only do it at IXGBE_FDIR_RATE of
   1184 ** packets.
   1185 */
   1186 static void
   1187 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   1188 {
   1189 	struct adapter			*adapter = txr->adapter;
   1190 	struct ix_queue			*que;
   1191 	struct ip			*ip;
   1192 	struct tcphdr			*th;
   1193 	struct udphdr			*uh;
   1194 	struct ether_vlan_header	*eh;
   1195 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   1196 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   1197 	int  				ehdrlen, ip_hlen;
   1198 	u16				etype;
   1199 
   1200 	eh = mtod(mp, struct ether_vlan_header *);
   1201 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1202 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1203 		etype = eh->evl_proto;
   1204 	} else {
   1205 		ehdrlen = ETHER_HDR_LEN;
   1206 		etype = eh->evl_encap_proto;
   1207 	}
   1208 
   1209 	/* Only handling IPv4 */
   1210 	if (etype != htons(ETHERTYPE_IP))
   1211 		return;
   1212 
   1213 	ip = (struct ip *)(mp->m_data + ehdrlen);
   1214 	ip_hlen = ip->ip_hl << 2;
   1215 
   1216 	/* check if we're UDP or TCP */
   1217 	switch (ip->ip_p) {
   1218 	case IPPROTO_TCP:
   1219 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1220 		/* src and dst are inverted */
   1221 		common.port.dst ^= th->th_sport;
   1222 		common.port.src ^= th->th_dport;
   1223 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   1224 		break;
   1225 	case IPPROTO_UDP:
   1226 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   1227 		/* src and dst are inverted */
   1228 		common.port.dst ^= uh->uh_sport;
   1229 		common.port.src ^= uh->uh_dport;
   1230 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   1231 		break;
   1232 	default:
   1233 		return;
   1234 	}
   1235 
   1236 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   1237 	if (mp->m_pkthdr.ether_vtag)
   1238 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   1239 	else
   1240 		common.flex_bytes ^= etype;
   1241 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   1242 
   1243 	que = &adapter->queues[txr->me];
   1244 	/*
   1245 	** This assumes the Rx queue and Tx
   1246 	** queue are bound to the same CPU
   1247 	*/
   1248 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   1249 	    input, common, que->msix);
   1250 }
   1251 #endif /* IXGBE_FDIR */
   1252 
   1253 /*
   1254 ** Used to detect a descriptor that has
   1255 ** been merged by Hardware RSC.
   1256 */
   1257 static inline u32
   1258 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1259 {
   1260 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1261 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1262 }
   1263 
   1264 /*********************************************************************
   1265  *
   1266  *  Initialize Hardware RSC (LRO) feature on 82599
   1267  *  for an RX ring, this is toggled by the LRO capability
   1268  *  even though it is transparent to the stack.
   1269  *
   1270  *  NOTE: since this HW feature only works with IPV4 and
   1271  *        our testing has shown soft LRO to be as effective
   1272  *        I have decided to disable this by default.
   1273  *
   1274  **********************************************************************/
   1275 static void
   1276 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1277 {
   1278 	struct	adapter 	*adapter = rxr->adapter;
   1279 	struct	ixgbe_hw	*hw = &adapter->hw;
   1280 	u32			rscctrl, rdrxctl;
   1281 
   1282 	/* If turning LRO/RSC off we need to disable it */
   1283 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1284 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1285 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1286 		return;
   1287 	}
   1288 
   1289 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1290 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1291 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   1292 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   1293 #endif /* DEV_NETMAP */
   1294 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1295 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1296 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1297 
   1298 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1299 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1300 	/*
   1301 	** Limit the total number of descriptors that
   1302 	** can be combined, so it does not exceed 64K
   1303 	*/
   1304 	if (rxr->mbuf_sz == MCLBYTES)
   1305 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1306 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1307 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1308 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1309 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1310 	else  /* Using 16K cluster */
   1311 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1312 
   1313 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1314 
   1315 	/* Enable TCP header recognition */
   1316 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1317 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   1318 	    IXGBE_PSRTYPE_TCPHDR));
   1319 
   1320 	/* Disable RSC for ACK packets */
   1321 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1322 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1323 
   1324 	rxr->hw_rsc = TRUE;
   1325 }
   1326 /*********************************************************************
   1327  *
   1328  *  Refresh mbuf buffers for RX descriptor rings
   1329  *   - now keeps its own state so discards due to resource
   1330  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   1331  *     it just returns, keeping its placeholder, thus it can simply
   1332  *     be recalled to try again.
   1333  *
   1334  **********************************************************************/
   1335 static void
   1336 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1337 {
   1338 	struct adapter		*adapter = rxr->adapter;
   1339 	struct ixgbe_rx_buf	*rxbuf;
   1340 	struct mbuf		*mp;
   1341 	int			i, j, error;
   1342 	bool			refreshed = false;
   1343 
   1344 	i = j = rxr->next_to_refresh;
   1345 	/* Control the loop with one beyond */
   1346 	if (++j == rxr->num_desc)
   1347 		j = 0;
   1348 
   1349 	while (j != limit) {
   1350 		rxbuf = &rxr->rx_buffers[i];
   1351 		if (rxbuf->buf == NULL) {
   1352 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1353 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1354 			if (mp == NULL) {
   1355 				rxr->no_jmbuf.ev_count++;
   1356 				goto update;
   1357 			}
   1358 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1359 				m_adj(mp, ETHER_ALIGN);
   1360 		} else
   1361 			mp = rxbuf->buf;
   1362 
   1363 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1364 
   1365 		/* If we're dealing with an mbuf that was copied rather
   1366 		 * than replaced, there's no need to go through busdma.
   1367 		 */
   1368 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1369 			/* Get the memory mapping */
   1370 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1371 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1372 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1373 			if (error != 0) {
   1374 				printf("Refresh mbufs: payload dmamap load"
   1375 				    " failure - %d\n", error);
   1376 				m_free(mp);
   1377 				rxbuf->buf = NULL;
   1378 				goto update;
   1379 			}
   1380 			rxbuf->buf = mp;
   1381 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1382 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1383 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1384 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1385 		} else {
   1386 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1387 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1388 		}
   1389 
   1390 		refreshed = true;
   1391 		/* Next is precalculated */
   1392 		i = j;
   1393 		rxr->next_to_refresh = i;
   1394 		if (++j == rxr->num_desc)
   1395 			j = 0;
   1396 	}
   1397 update:
   1398 	if (refreshed) /* Update hardware tail index */
   1399 		IXGBE_WRITE_REG(&adapter->hw,
   1400 		    rxr->tail, rxr->next_to_refresh);
   1401 	return;
   1402 }
   1403 
   1404 /*********************************************************************
   1405  *
   1406  *  Allocate memory for rx_buffer structures. Since we use one
   1407  *  rx_buffer per received packet, the maximum number of rx_buffer's
   1408  *  that we'll need is equal to the number of receive descriptors
   1409  *  that we've allocated.
   1410  *
   1411  **********************************************************************/
   1412 int
   1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1414 {
   1415 	struct	adapter 	*adapter = rxr->adapter;
   1416 	device_t 		dev = adapter->dev;
   1417 	struct ixgbe_rx_buf 	*rxbuf;
   1418 	int             	i, bsize, error;
   1419 
   1420 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1421 	if (!(rxr->rx_buffers =
   1422 	    (struct ixgbe_rx_buf *) malloc(bsize,
   1423 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   1424 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1425 		error = ENOMEM;
   1426 		goto fail;
   1427 	}
   1428 
   1429 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   1430 				   1, 0,	/* alignment, bounds */
   1431 				   MJUM16BYTES,		/* maxsize */
   1432 				   1,			/* nsegments */
   1433 				   MJUM16BYTES,		/* maxsegsize */
   1434 				   0,			/* flags */
   1435 				   &rxr->ptag))) {
   1436 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1437 		goto fail;
   1438 	}
   1439 
   1440 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1441 		rxbuf = &rxr->rx_buffers[i];
   1442 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1443 		if (error) {
   1444 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1445 			goto fail;
   1446 		}
   1447 	}
   1448 
   1449 	return (0);
   1450 
   1451 fail:
   1452 	/* Frees all, but can handle partial completion */
   1453 	ixgbe_free_receive_structures(adapter);
   1454 	return (error);
   1455 }
   1456 
   1457 
   1458 static void
   1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1460 {
   1461 	struct ixgbe_rx_buf       *rxbuf;
   1462 	int i;
   1463 
   1464 	for (i = 0; i < rxr->num_desc; i++) {
   1465 		rxbuf = &rxr->rx_buffers[i];
   1466 		if (rxbuf->buf != NULL) {
   1467 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1468 			    0, rxbuf->buf->m_pkthdr.len,
   1469 			    BUS_DMASYNC_POSTREAD);
   1470 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1471 			rxbuf->buf->m_flags |= M_PKTHDR;
   1472 			m_freem(rxbuf->buf);
   1473 			rxbuf->buf = NULL;
   1474 			rxbuf->flags = 0;
   1475 		}
   1476 	}
   1477 }
   1478 
   1479 
   1480 /*********************************************************************
   1481  *
   1482  *  Initialize a receive ring and its buffers.
   1483  *
   1484  **********************************************************************/
   1485 static int
   1486 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1487 {
   1488 	struct	adapter 	*adapter;
   1489 	struct ixgbe_rx_buf	*rxbuf;
   1490 #ifdef LRO
   1491 	struct ifnet		*ifp;
   1492 	struct lro_ctrl		*lro = &rxr->lro;
   1493 #endif /* LRO */
   1494 	int			rsize, error = 0;
   1495 #ifdef DEV_NETMAP
   1496 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1497 	struct netmap_slot *slot;
   1498 #endif /* DEV_NETMAP */
   1499 
   1500 	adapter = rxr->adapter;
   1501 #ifdef LRO
   1502 	ifp = adapter->ifp;
   1503 #endif /* LRO */
   1504 
   1505 	/* Clear the ring contents */
   1506 	IXGBE_RX_LOCK(rxr);
   1507 #ifdef DEV_NETMAP
   1508 	/* same as in ixgbe_setup_transmit_ring() */
   1509 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1510 #endif /* DEV_NETMAP */
   1511 	rsize = roundup2(adapter->num_rx_desc *
   1512 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1513 	bzero((void *)rxr->rx_base, rsize);
   1514 	/* Cache the size */
   1515 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1516 
   1517 	/* Free current RX buffer structs and their mbufs */
   1518 	ixgbe_free_receive_ring(rxr);
   1519 
   1520 	IXGBE_RX_UNLOCK(rxr);
   1521 
   1522 	/* Now reinitialize our supply of jumbo mbufs.  The number
   1523 	 * or size of jumbo mbufs may have changed.
   1524 	 */
   1525 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1526 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   1527 
   1528 	IXGBE_RX_LOCK(rxr);
   1529 
   1530 	/* Now replenish the mbufs */
   1531 	for (int j = 0; j != rxr->num_desc; ++j) {
   1532 		struct mbuf	*mp;
   1533 
   1534 		rxbuf = &rxr->rx_buffers[j];
   1535 #ifdef DEV_NETMAP
   1536 		/*
   1537 		 * In netmap mode, fill the map and set the buffer
   1538 		 * address in the NIC ring, considering the offset
   1539 		 * between the netmap and NIC rings (see comment in
   1540 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1541 		 * an mbuf, so end the block with a continue;
   1542 		 */
   1543 		if (slot) {
   1544 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1545 			uint64_t paddr;
   1546 			void *addr;
   1547 
   1548 			addr = PNMB(na, slot + sj, &paddr);
   1549 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1550 			/* Update descriptor and the cached value */
   1551 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1552 			rxbuf->addr = htole64(paddr);
   1553 			continue;
   1554 		}
   1555 #endif /* DEV_NETMAP */
   1556 		rxbuf->flags = 0;
   1557 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1558 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1559 		if (rxbuf->buf == NULL) {
   1560 			error = ENOBUFS;
   1561                         goto fail;
   1562 		}
   1563 		mp = rxbuf->buf;
   1564 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1565 		/* Get the memory mapping */
   1566 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1567 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1568 		if (error != 0)
   1569                         goto fail;
   1570 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1571 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1572 		/* Update the descriptor and the cached value */
   1573 		rxr->rx_base[j].read.pkt_addr =
   1574 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1575 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1576 	}
   1577 
   1578 
   1579 	/* Setup our descriptor indices */
   1580 	rxr->next_to_check = 0;
   1581 	rxr->next_to_refresh = 0;
   1582 	rxr->lro_enabled = FALSE;
   1583 	rxr->rx_copies.ev_count = 0;
   1584 	rxr->rx_bytes.ev_count = 0;
   1585 	rxr->vtag_strip = FALSE;
   1586 
   1587 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1588 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1589 
   1590 	/*
   1591 	** Now set up the LRO interface:
   1592 	*/
   1593 	if (ixgbe_rsc_enable)
   1594 		ixgbe_setup_hw_rsc(rxr);
   1595 #ifdef LRO
   1596 	else if (ifp->if_capenable & IFCAP_LRO) {
   1597 		device_t dev = adapter->dev;
   1598 		int err = tcp_lro_init(lro);
   1599 		if (err) {
   1600 			device_printf(dev, "LRO Initialization failed!\n");
   1601 			goto fail;
   1602 		}
   1603 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1604 		rxr->lro_enabled = TRUE;
   1605 		lro->ifp = adapter->ifp;
   1606 	}
   1607 #endif /* LRO */
   1608 
   1609 	IXGBE_RX_UNLOCK(rxr);
   1610 	return (0);
   1611 
   1612 fail:
   1613 	ixgbe_free_receive_ring(rxr);
   1614 	IXGBE_RX_UNLOCK(rxr);
   1615 	return (error);
   1616 }
   1617 
   1618 /*********************************************************************
   1619  *
   1620  *  Initialize all receive rings.
   1621  *
   1622  **********************************************************************/
   1623 int
   1624 ixgbe_setup_receive_structures(struct adapter *adapter)
   1625 {
   1626 	struct rx_ring *rxr = adapter->rx_rings;
   1627 	int j;
   1628 
   1629 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1630 		if (ixgbe_setup_receive_ring(rxr))
   1631 			goto fail;
   1632 
   1633 	return (0);
   1634 fail:
   1635 	/*
   1636 	 * Free RX buffers allocated so far, we will only handle
   1637 	 * the rings that completed, the failing case will have
   1638 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1639 	 */
   1640 	for (int i = 0; i < j; ++i) {
   1641 		rxr = &adapter->rx_rings[i];
   1642 		ixgbe_free_receive_ring(rxr);
   1643 	}
   1644 
   1645 	return (ENOBUFS);
   1646 }
   1647 
   1648 
   1649 /*********************************************************************
   1650  *
   1651  *  Free all receive rings.
   1652  *
   1653  **********************************************************************/
   1654 void
   1655 ixgbe_free_receive_structures(struct adapter *adapter)
   1656 {
   1657 	struct rx_ring *rxr = adapter->rx_rings;
   1658 
   1659 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1660 
   1661 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1662 #ifdef LRO
   1663 		struct lro_ctrl		*lro = &rxr->lro;
   1664 #endif /* LRO */
   1665 		ixgbe_free_receive_buffers(rxr);
   1666 #ifdef LRO
   1667 		/* Free LRO memory */
   1668 		tcp_lro_free(lro);
   1669 #endif /* LRO */
   1670 		/* Free the ring memory as well */
   1671 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1672 		IXGBE_RX_LOCK_DESTROY(rxr);
   1673 	}
   1674 
   1675 	free(adapter->rx_rings, M_DEVBUF);
   1676 }
   1677 
   1678 
   1679 /*********************************************************************
   1680  *
   1681  *  Free receive ring data structures
   1682  *
   1683  **********************************************************************/
   1684 static void
   1685 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1686 {
   1687 	struct adapter		*adapter = rxr->adapter;
   1688 	struct ixgbe_rx_buf	*rxbuf;
   1689 
   1690 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1691 
   1692 	/* Cleanup any existing buffers */
   1693 	if (rxr->rx_buffers != NULL) {
   1694 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1695 			rxbuf = &rxr->rx_buffers[i];
   1696 			if (rxbuf->buf != NULL) {
   1697 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   1698 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   1699 				    BUS_DMASYNC_POSTREAD);
   1700 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1701 				rxbuf->buf->m_flags |= M_PKTHDR;
   1702 				m_freem(rxbuf->buf);
   1703 			}
   1704 			rxbuf->buf = NULL;
   1705 			if (rxbuf->pmap != NULL) {
   1706 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1707 				rxbuf->pmap = NULL;
   1708 			}
   1709 		}
   1710 		if (rxr->rx_buffers != NULL) {
   1711 			free(rxr->rx_buffers, M_DEVBUF);
   1712 			rxr->rx_buffers = NULL;
   1713 		}
   1714 	}
   1715 
   1716 	if (rxr->ptag != NULL) {
   1717 		ixgbe_dma_tag_destroy(rxr->ptag);
   1718 		rxr->ptag = NULL;
   1719 	}
   1720 
   1721 	return;
   1722 }
   1723 
   1724 static __inline void
   1725 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   1726 {
   1727 	int s;
   1728 
   1729 #ifdef LRO
   1730 	struct adapter	*adapter = ifp->if_softc;
   1731 	struct ethercom *ec = &adapter->osdep.ec;
   1732 
   1733         /*
   1734          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1735          * should be computed by hardware. Also it should not have VLAN tag in
   1736          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1737          */
   1738         if (rxr->lro_enabled &&
   1739             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1740             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1741             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1742             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1743             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1744             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1745             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1746             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1747                 /*
   1748                  * Send to the stack if:
   1749                  **  - LRO not enabled, or
   1750                  **  - no LRO resources, or
   1751                  **  - lro enqueue fails
   1752                  */
   1753                 if (rxr->lro.lro_cnt != 0)
   1754                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1755                                 return;
   1756         }
   1757 #endif /* LRO */
   1758 
   1759 	IXGBE_RX_UNLOCK(rxr);
   1760 
   1761 	s = splnet();
   1762 	/* Pass this up to any BPF listeners. */
   1763 	bpf_mtap(ifp, m);
   1764 	if_input(ifp, m);
   1765 	splx(s);
   1766 
   1767 	IXGBE_RX_LOCK(rxr);
   1768 }
   1769 
   1770 static __inline void
   1771 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1772 {
   1773 	struct ixgbe_rx_buf	*rbuf;
   1774 
   1775 	rbuf = &rxr->rx_buffers[i];
   1776 
   1777 
   1778 	/*
   1779 	** With advanced descriptors the writeback
   1780 	** clobbers the buffer addrs, so its easier
   1781 	** to just free the existing mbufs and take
   1782 	** the normal refresh path to get new buffers
   1783 	** and mapping.
   1784 	*/
   1785 
   1786 	if (rbuf->buf != NULL) {/* Partial chain ? */
   1787 		rbuf->fmp->m_flags |= M_PKTHDR;
   1788 		m_freem(rbuf->fmp);
   1789 		rbuf->fmp = NULL;
   1790 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1791 	} else if (rbuf->buf) {
   1792 		m_free(rbuf->buf);
   1793 		rbuf->buf = NULL;
   1794 	}
   1795 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1796 
   1797 	rbuf->flags = 0;
   1798 
   1799 	return;
   1800 }
   1801 
   1802 
   1803 /*********************************************************************
   1804  *
   1805  *  This routine executes in interrupt context. It replenishes
   1806  *  the mbufs in the descriptor and sends data which has been
   1807  *  dma'ed into host memory to upper layer.
   1808  *
   1809  *  Return TRUE for more work, FALSE for all clean.
   1810  *********************************************************************/
   1811 bool
   1812 ixgbe_rxeof(struct ix_queue *que)
   1813 {
   1814 	struct adapter		*adapter = que->adapter;
   1815 	struct rx_ring		*rxr = que->rxr;
   1816 	struct ifnet		*ifp = adapter->ifp;
   1817 #ifdef LRO
   1818 	struct lro_ctrl		*lro = &rxr->lro;
   1819 	struct lro_entry	*queued;
   1820 #endif /* LRO */
   1821 	int			i, nextp, processed = 0;
   1822 	u32			staterr = 0;
   1823 	u16			count = rxr->process_limit;
   1824 	union ixgbe_adv_rx_desc	*cur;
   1825 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1826 #ifdef RSS
   1827 	u16			pkt_info;
   1828 #endif
   1829 
   1830 	IXGBE_RX_LOCK(rxr);
   1831 
   1832 #ifdef DEV_NETMAP
   1833 	/* Same as the txeof routine: wakeup clients on intr. */
   1834 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1835 		IXGBE_RX_UNLOCK(rxr);
   1836 		return (FALSE);
   1837 	}
   1838 #endif /* DEV_NETMAP */
   1839 
   1840 	for (i = rxr->next_to_check; count != 0;) {
   1841 		struct mbuf	*sendmp, *mp;
   1842 		u32		rsc, ptype;
   1843 		u16		len;
   1844 		u16		vtag = 0;
   1845 		bool		eop;
   1846 
   1847 		/* Sync the ring. */
   1848 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1849 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1850 
   1851 		cur = &rxr->rx_base[i];
   1852 		staterr = le32toh(cur->wb.upper.status_error);
   1853 #ifdef RSS
   1854 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1855 #endif
   1856 
   1857 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1858 			break;
   1859 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1860 			break;
   1861 
   1862 		count--;
   1863 		sendmp = NULL;
   1864 		nbuf = NULL;
   1865 		rsc = 0;
   1866 		cur->wb.upper.status_error = 0;
   1867 		rbuf = &rxr->rx_buffers[i];
   1868 		mp = rbuf->buf;
   1869 
   1870 		len = le16toh(cur->wb.upper.length);
   1871 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1872 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1873 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1874 
   1875 		/* Make sure bad packets are discarded */
   1876 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1877 #if __FreeBSD_version >= 1100036
   1878 			if (IXGBE_IS_VF(adapter))
   1879 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1880 #endif
   1881 			rxr->rx_discarded.ev_count++;
   1882 			ixgbe_rx_discard(rxr, i);
   1883 			goto next_desc;
   1884 		}
   1885 
   1886 		/*
   1887 		** On 82599 which supports a hardware
   1888 		** LRO (called HW RSC), packets need
   1889 		** not be fragmented across sequential
   1890 		** descriptors, rather the next descriptor
   1891 		** is indicated in bits of the descriptor.
   1892 		** This also means that we might proceses
   1893 		** more than one packet at a time, something
   1894 		** that has never been true before, it
   1895 		** required eliminating global chain pointers
   1896 		** in favor of what we are doing here.  -jfv
   1897 		*/
   1898 		if (!eop) {
   1899 			/*
   1900 			** Figure out the next descriptor
   1901 			** of this frame.
   1902 			*/
   1903 			if (rxr->hw_rsc == TRUE) {
   1904 				rsc = ixgbe_rsc_count(cur);
   1905 				rxr->rsc_num += (rsc - 1);
   1906 			}
   1907 			if (rsc) { /* Get hardware index */
   1908 				nextp = ((staterr &
   1909 				    IXGBE_RXDADV_NEXTP_MASK) >>
   1910 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1911 			} else { /* Just sequential */
   1912 				nextp = i + 1;
   1913 				if (nextp == adapter->num_rx_desc)
   1914 					nextp = 0;
   1915 			}
   1916 			nbuf = &rxr->rx_buffers[nextp];
   1917 			prefetch(nbuf);
   1918 		}
   1919 		/*
   1920 		** Rather than using the fmp/lmp global pointers
   1921 		** we now keep the head of a packet chain in the
   1922 		** buffer struct and pass this along from one
   1923 		** descriptor to the next, until we get EOP.
   1924 		*/
   1925 		mp->m_len = len;
   1926 		/*
   1927 		** See if there is a stored head
   1928 		** that determines what we are
   1929 		*/
   1930 		sendmp = rbuf->fmp;
   1931 		if (sendmp != NULL) {  /* secondary frag */
   1932 			rbuf->buf = rbuf->fmp = NULL;
   1933 			mp->m_flags &= ~M_PKTHDR;
   1934 			sendmp->m_pkthdr.len += mp->m_len;
   1935 		} else {
   1936 			/*
   1937 			 * Optimize.  This might be a small packet,
   1938 			 * maybe just a TCP ACK.  Do a fast copy that
   1939 			 * is cache aligned into a new mbuf, and
   1940 			 * leave the old mbuf+cluster for re-use.
   1941 			 */
   1942 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1943 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1944 				if (sendmp != NULL) {
   1945 					sendmp->m_data +=
   1946 					    IXGBE_RX_COPY_ALIGN;
   1947 					ixgbe_bcopy(mp->m_data,
   1948 					    sendmp->m_data, len);
   1949 					sendmp->m_len = len;
   1950 					rxr->rx_copies.ev_count++;
   1951 					rbuf->flags |= IXGBE_RX_COPY;
   1952 				}
   1953 			}
   1954 			if (sendmp == NULL) {
   1955 				rbuf->buf = rbuf->fmp = NULL;
   1956 				sendmp = mp;
   1957 			}
   1958 
   1959 			/* first desc of a non-ps chain */
   1960 			sendmp->m_flags |= M_PKTHDR;
   1961 			sendmp->m_pkthdr.len = mp->m_len;
   1962 		}
   1963 		++processed;
   1964 
   1965 		/* Pass the head pointer on */
   1966 		if (eop == 0) {
   1967 			nbuf->fmp = sendmp;
   1968 			sendmp = NULL;
   1969 			mp->m_next = nbuf->buf;
   1970 		} else { /* Sending this frame */
   1971 			m_set_rcvif(sendmp, ifp);
   1972 			ifp->if_ipackets++;
   1973 			rxr->rx_packets.ev_count++;
   1974 			/* capture data for AIM */
   1975 			rxr->bytes += sendmp->m_pkthdr.len;
   1976 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1977 			/* Process vlan info */
   1978 			if ((rxr->vtag_strip) &&
   1979 			    (staterr & IXGBE_RXD_STAT_VP))
   1980 				vtag = le16toh(cur->wb.upper.vlan);
   1981 			if (vtag) {
   1982 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   1983 				    printf("%s: could not apply VLAN "
   1984 					"tag", __func__));
   1985 			}
   1986 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1987 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1988 				   &adapter->stats.pf);
   1989 			}
   1990 #if __FreeBSD_version >= 800000
   1991 #ifdef RSS
   1992 			sendmp->m_pkthdr.flowid =
   1993 			    le32toh(cur->wb.lower.hi_dword.rss);
   1994 #if __FreeBSD_version < 1100054
   1995 			sendmp->m_flags |= M_FLOWID;
   1996 #endif
   1997 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1998 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1999 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
   2000 				break;
   2001 			case IXGBE_RXDADV_RSSTYPE_IPV4:
   2002 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
   2003 				break;
   2004 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2005 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
   2006 				break;
   2007 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2008 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
   2009 				break;
   2010 			case IXGBE_RXDADV_RSSTYPE_IPV6:
   2011 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
   2012 				break;
   2013 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2014 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
   2015 				break;
   2016 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2017 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
   2018 				break;
   2019 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2020 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
   2021 				break;
   2022 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2023 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
   2024 				break;
   2025 			default:
   2026 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2027 			}
   2028 #else /* RSS */
   2029 			sendmp->m_pkthdr.flowid = que->msix;
   2030 #if __FreeBSD_version >= 1100054
   2031 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2032 #else
   2033 			sendmp->m_flags |= M_FLOWID;
   2034 #endif
   2035 #endif /* RSS */
   2036 #endif /* FreeBSD_version */
   2037 		}
   2038 next_desc:
   2039 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2040 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2041 
   2042 		/* Advance our pointers to the next descriptor. */
   2043 		if (++i == rxr->num_desc)
   2044 			i = 0;
   2045 
   2046 		/* Now send to the stack or do LRO */
   2047 		if (sendmp != NULL) {
   2048 			rxr->next_to_check = i;
   2049 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2050 			i = rxr->next_to_check;
   2051 		}
   2052 
   2053                /* Every 8 descriptors we go to refresh mbufs */
   2054 		if (processed == 8) {
   2055 			ixgbe_refresh_mbufs(rxr, i);
   2056 			processed = 0;
   2057 		}
   2058 	}
   2059 
   2060 	/* Refresh any remaining buf structs */
   2061 	if (ixgbe_rx_unrefreshed(rxr))
   2062 		ixgbe_refresh_mbufs(rxr, i);
   2063 
   2064 	rxr->next_to_check = i;
   2065 
   2066 #ifdef LRO
   2067 	/*
   2068 	 * Flush any outstanding LRO work
   2069 	 */
   2070 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   2071 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   2072 		tcp_lro_flush(lro, queued);
   2073 	}
   2074 #endif /* LRO */
   2075 
   2076 	IXGBE_RX_UNLOCK(rxr);
   2077 
   2078 	/*
   2079 	** Still have cleaning to do?
   2080 	*/
   2081 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2082 		return true;
   2083 	else
   2084 		return false;
   2085 }
   2086 
   2087 
   2088 /*********************************************************************
   2089  *
   2090  *  Verify that the hardware indicated that the checksum is valid.
   2091  *  Inform the stack about the status of checksum so that stack
   2092  *  doesn't spend time verifying the checksum.
   2093  *
   2094  *********************************************************************/
   2095 static void
   2096 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2097     struct ixgbe_hw_stats *stats)
   2098 {
   2099 	u16	status = (u16) staterr;
   2100 	u8	errors = (u8) (staterr >> 24);
   2101 #if 0
   2102 	bool	sctp = FALSE;
   2103 
   2104 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2105 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2106 		sctp = TRUE;
   2107 #endif
   2108 
   2109 	if (status & IXGBE_RXD_STAT_IPCS) {
   2110 		stats->ipcs.ev_count++;
   2111 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2112 			/* IP Checksum Good */
   2113 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2114 
   2115 		} else {
   2116 			stats->ipcs_bad.ev_count++;
   2117 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2118 		}
   2119 	}
   2120 	if (status & IXGBE_RXD_STAT_L4CS) {
   2121 		stats->l4cs.ev_count++;
   2122 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2123 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2124 			mp->m_pkthdr.csum_flags |= type;
   2125 		} else {
   2126 			stats->l4cs_bad.ev_count++;
   2127 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2128 		}
   2129 	}
   2130 	return;
   2131 }
   2132 
   2133 
   2134 /********************************************************************
   2135  * Manage DMA'able memory.
   2136  *******************************************************************/
   2137 
   2138 int
   2139 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2140 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2141 {
   2142 	device_t dev = adapter->dev;
   2143 	int             r, rsegs;
   2144 
   2145 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2146 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2147 			       size,	/* maxsize */
   2148 			       1,	/* nsegments */
   2149 			       size,	/* maxsegsize */
   2150 			       BUS_DMA_ALLOCNOW,	/* flags */
   2151 			       &dma->dma_tag);
   2152 	if (r != 0) {
   2153 		aprint_error_dev(dev,
   2154 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2155 		goto fail_0;
   2156 	}
   2157 
   2158 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2159 		size,
   2160 		dma->dma_tag->dt_alignment,
   2161 		dma->dma_tag->dt_boundary,
   2162 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2163 	if (r != 0) {
   2164 		aprint_error_dev(dev,
   2165 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2166 		goto fail_1;
   2167 	}
   2168 
   2169 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2170 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2171 	if (r != 0) {
   2172 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2173 		    __func__, r);
   2174 		goto fail_2;
   2175 	}
   2176 
   2177 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2178 	if (r != 0) {
   2179 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2180 		    __func__, r);
   2181 		goto fail_3;
   2182 	}
   2183 
   2184 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2185 			    size,
   2186 			    NULL,
   2187 			    mapflags | BUS_DMA_NOWAIT);
   2188 	if (r != 0) {
   2189 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2190 		    __func__, r);
   2191 		goto fail_4;
   2192 	}
   2193 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2194 	dma->dma_size = size;
   2195 	return 0;
   2196 fail_4:
   2197 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2198 fail_3:
   2199 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2200 fail_2:
   2201 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2202 fail_1:
   2203 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2204 fail_0:
   2205 	return r;
   2206 }
   2207 
   2208 void
   2209 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2210 {
   2211 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2212 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2213 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2214 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2215 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2216 }
   2217 
   2218 
   2219 /*********************************************************************
   2220  *
   2221  *  Allocate memory for the transmit and receive rings, and then
   2222  *  the descriptors associated with each, called only once at attach.
   2223  *
   2224  **********************************************************************/
   2225 int
   2226 ixgbe_allocate_queues(struct adapter *adapter)
   2227 {
   2228 	device_t	dev = adapter->dev;
   2229 	struct ix_queue	*que;
   2230 	struct tx_ring	*txr;
   2231 	struct rx_ring	*rxr;
   2232 	int rsize, tsize, error = IXGBE_SUCCESS;
   2233 	int txconf = 0, rxconf = 0;
   2234 
   2235         /* First allocate the top level queue structs */
   2236         if (!(adapter->queues =
   2237             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   2238             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2239                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2240                 error = ENOMEM;
   2241                 goto fail;
   2242         }
   2243 
   2244 	/* First allocate the TX ring struct memory */
   2245 	if (!(adapter->tx_rings =
   2246 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   2247 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2248 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2249 		error = ENOMEM;
   2250 		goto tx_fail;
   2251 	}
   2252 
   2253 	/* Next allocate the RX */
   2254 	if (!(adapter->rx_rings =
   2255 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   2256 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   2257 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2258 		error = ENOMEM;
   2259 		goto rx_fail;
   2260 	}
   2261 
   2262 	/* For the ring itself */
   2263 	tsize = roundup2(adapter->num_tx_desc *
   2264 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   2265 
   2266 	/*
   2267 	 * Now set up the TX queues, txconf is needed to handle the
   2268 	 * possibility that things fail midcourse and we need to
   2269 	 * undo memory gracefully
   2270 	 */
   2271 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2272 		/* Set up some basics */
   2273 		txr = &adapter->tx_rings[i];
   2274 		txr->adapter = adapter;
   2275 		txr->me = i;
   2276 		txr->num_desc = adapter->num_tx_desc;
   2277 
   2278 		/* Initialize the TX side lock */
   2279 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2280 		    device_xname(dev), txr->me);
   2281 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2282 
   2283 		if (ixgbe_dma_malloc(adapter, tsize,
   2284 			&txr->txdma, BUS_DMA_NOWAIT)) {
   2285 			aprint_error_dev(dev,
   2286 			    "Unable to allocate TX Descriptor memory\n");
   2287 			error = ENOMEM;
   2288 			goto err_tx_desc;
   2289 		}
   2290 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2291 		bzero((void *)txr->tx_base, tsize);
   2292 
   2293         	/* Now allocate transmit buffers for the ring */
   2294         	if (ixgbe_allocate_transmit_buffers(txr)) {
   2295 			aprint_error_dev(dev,
   2296 			    "Critical Failure setting up transmit buffers\n");
   2297 			error = ENOMEM;
   2298 			goto err_tx_desc;
   2299         	}
   2300 #ifndef IXGBE_LEGACY_TX
   2301 		/* Allocate a buf ring */
   2302 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   2303 		    M_WAITOK, &txr->tx_mtx);
   2304 		if (txr->br == NULL) {
   2305 			aprint_error_dev(dev,
   2306 			    "Critical Failure setting up buf ring\n");
   2307 			error = ENOMEM;
   2308 			goto err_tx_desc;
   2309         	}
   2310 #endif
   2311 	}
   2312 
   2313 	/*
   2314 	 * Next the RX queues...
   2315 	 */
   2316 	rsize = roundup2(adapter->num_rx_desc *
   2317 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   2318 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2319 		rxr = &adapter->rx_rings[i];
   2320 		/* Set up some basics */
   2321 		rxr->adapter = adapter;
   2322 		rxr->me = i;
   2323 		rxr->num_desc = adapter->num_rx_desc;
   2324 
   2325 		/* Initialize the RX side lock */
   2326 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2327 		    device_xname(dev), rxr->me);
   2328 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2329 
   2330 		if (ixgbe_dma_malloc(adapter, rsize,
   2331 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   2332 			aprint_error_dev(dev,
   2333 			    "Unable to allocate RxDescriptor memory\n");
   2334 			error = ENOMEM;
   2335 			goto err_rx_desc;
   2336 		}
   2337 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2338 		bzero((void *)rxr->rx_base, rsize);
   2339 
   2340         	/* Allocate receive buffers for the ring*/
   2341 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2342 			aprint_error_dev(dev,
   2343 			    "Critical Failure setting up receive buffers\n");
   2344 			error = ENOMEM;
   2345 			goto err_rx_desc;
   2346 		}
   2347 	}
   2348 
   2349 	/*
   2350 	** Finally set up the queue holding structs
   2351 	*/
   2352 	for (int i = 0; i < adapter->num_queues; i++) {
   2353 		que = &adapter->queues[i];
   2354 		que->adapter = adapter;
   2355 		que->me = i;
   2356 		que->txr = &adapter->tx_rings[i];
   2357 		que->rxr = &adapter->rx_rings[i];
   2358 	}
   2359 
   2360 	return (0);
   2361 
   2362 err_rx_desc:
   2363 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2364 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2365 err_tx_desc:
   2366 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2367 		ixgbe_dma_free(adapter, &txr->txdma);
   2368 	free(adapter->rx_rings, M_DEVBUF);
   2369 rx_fail:
   2370 	free(adapter->tx_rings, M_DEVBUF);
   2371 tx_fail:
   2372 	free(adapter->queues, M_DEVBUF);
   2373 fail:
   2374 	return (error);
   2375 }
   2376 
   2377