Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.114
      1 /* $NetBSD: ix_txrx.c,v 1.114 2023/12/28 10:13:51 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.114 2023/12/28 10:13:51 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 #ifdef IXGBE_FDIR
     89 /*
     90  * For Flow Director: this is the
     91  * number of TX packets we sample
     92  * for the filter pool, this means
     93  * every 20th packet will be probed.
     94  *
     95  * This feature can be disabled by
     96  * setting this to 0.
     97  */
     98 static int atr_sample_rate = 20;
     99 #endif
    100 
    101 #define IXGBE_M_ADJ(sc, rxr, mp)					\
    102 	if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    103 		m_adj(mp, ETHER_ALIGN)
    104 
    105 /************************************************************************
    106  *  Local Function prototypes
    107  ************************************************************************/
    108 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    109 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    110 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    111 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    112 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    113                                        struct ixgbe_hw_stats *);
    114 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    115 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    116 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    117 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    118                                         struct mbuf *, u32 *, u32 *);
    119 static int           ixgbe_tso_setup(struct tx_ring *,
    120                                      struct mbuf *, u32 *, u32 *);
    121 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    122 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    123                                     struct mbuf *, u32);
    124 static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
    125                                       struct ixgbe_dma_alloc *, int);
    126 static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
    127 
    128 static void	     ixgbe_setup_hw_rsc(struct rx_ring *);
    129 
    130 /************************************************************************
    131  * ixgbe_legacy_start_locked - Transmit entry point
    132  *
    133  *   Called by the stack to initiate a transmit.
    134  *   The driver will remain in this routine as long as there are
    135  *   packets to transmit and transmit resources are available.
    136  *   In case resources are not available, the stack is notified
    137  *   and the packet is requeued.
    138  ************************************************************************/
    139 int
    140 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    141 {
    142 	int rc;
    143 	struct mbuf    *m_head;
    144 	struct ixgbe_softc *sc = txr->sc;
    145 
    146 	IXGBE_TX_LOCK_ASSERT(txr);
    147 
    148 	if (sc->link_active != LINK_STATE_UP) {
    149 		/*
    150 		 * discard all packets buffered in IFQ to avoid
    151 		 * sending old packets at next link up timing.
    152 		 */
    153 		ixgbe_drain(ifp, txr);
    154 		return (ENETDOWN);
    155 	}
    156 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    157 		return (ENETDOWN);
    158 	if (txr->txr_no_space)
    159 		return (ENETDOWN);
    160 
    161 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    162 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    163 			break;
    164 
    165 		IFQ_POLL(&ifp->if_snd, m_head);
    166 		if (m_head == NULL)
    167 			break;
    168 
    169 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    170 			break;
    171 		}
    172 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    173 		if (rc != 0) {
    174 			m_freem(m_head);
    175 			continue;
    176 		}
    177 
    178 		/* Send a copy of the frame to the BPF listener */
    179 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    180 	}
    181 
    182 	return IXGBE_SUCCESS;
    183 } /* ixgbe_legacy_start_locked */
    184 
    185 /************************************************************************
    186  * ixgbe_legacy_start
    187  *
    188  *   Called by the stack, this always uses the first tx ring,
    189  *   and should not be used with multiqueue tx enabled.
    190  ************************************************************************/
    191 void
    192 ixgbe_legacy_start(struct ifnet *ifp)
    193 {
    194 	struct ixgbe_softc *sc = ifp->if_softc;
    195 	struct tx_ring *txr = sc->tx_rings;
    196 
    197 	if (ifp->if_flags & IFF_RUNNING) {
    198 		IXGBE_TX_LOCK(txr);
    199 		ixgbe_legacy_start_locked(ifp, txr);
    200 		IXGBE_TX_UNLOCK(txr);
    201 	}
    202 } /* ixgbe_legacy_start */
    203 
    204 /************************************************************************
    205  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    206  *
    207  *   (if_transmit function)
    208  ************************************************************************/
    209 int
    210 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    211 {
    212 	struct ixgbe_softc *sc = ifp->if_softc;
    213 	struct tx_ring	*txr;
    214 	int		i;
    215 #ifdef RSS
    216 	uint32_t bucket_id;
    217 #endif
    218 
    219 	/*
    220 	 * When doing RSS, map it to the same outbound queue
    221 	 * as the incoming flow would be mapped to.
    222 	 *
    223 	 * If everything is setup correctly, it should be the
    224 	 * same bucket that the current CPU we're on is.
    225 	 */
    226 #ifdef RSS
    227 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    228 		if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
    229 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    230 		    &bucket_id) == 0)) {
    231 			i = bucket_id % sc->num_queues;
    232 #ifdef IXGBE_DEBUG
    233 			if (bucket_id > sc->num_queues)
    234 				if_printf(ifp,
    235 				    "bucket_id (%d) > num_queues (%d)\n",
    236 				    bucket_id, sc->num_queues);
    237 #endif
    238 		} else
    239 			i = m->m_pkthdr.flowid % sc->num_queues;
    240 	} else
    241 #endif /* 0 */
    242 		i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
    243 
    244 	/* Check for a hung queue and pick alternative */
    245 	if (((1ULL << i) & sc->active_queues) == 0)
    246 		i = ffs64(sc->active_queues);
    247 
    248 	txr = &sc->tx_rings[i];
    249 
    250 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    251 		m_freem(m);
    252 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    253 		return ENOBUFS;
    254 	}
    255 #ifdef IXGBE_ALWAYS_TXDEFER
    256 	kpreempt_disable();
    257 	softint_schedule(txr->txr_si);
    258 	kpreempt_enable();
    259 #else
    260 	if (IXGBE_TX_TRYLOCK(txr)) {
    261 		ixgbe_mq_start_locked(ifp, txr);
    262 		IXGBE_TX_UNLOCK(txr);
    263 	} else {
    264 		if (sc->txrx_use_workqueue) {
    265 			u_int *enqueued;
    266 
    267 			/*
    268 			 * This function itself is not called in interrupt
    269 			 * context, however it can be called in fast softint
    270 			 * context right after receiving forwarding packets.
    271 			 * So, it is required to protect workqueue from twice
    272 			 * enqueuing when the machine uses both spontaneous
    273 			 * packets and forwarding packets.
    274 			 */
    275 			enqueued = percpu_getref(sc->txr_wq_enqueued);
    276 			if (*enqueued == 0) {
    277 				*enqueued = 1;
    278 				percpu_putref(sc->txr_wq_enqueued);
    279 				workqueue_enqueue(sc->txr_wq,
    280 				    &txr->wq_cookie, curcpu());
    281 			} else
    282 				percpu_putref(sc->txr_wq_enqueued);
    283 		} else {
    284 			kpreempt_disable();
    285 			softint_schedule(txr->txr_si);
    286 			kpreempt_enable();
    287 		}
    288 	}
    289 #endif
    290 
    291 	return (0);
    292 } /* ixgbe_mq_start */
    293 
    294 /************************************************************************
    295  * ixgbe_mq_start_locked
    296  ************************************************************************/
    297 int
    298 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    299 {
    300 	struct mbuf    *next;
    301 	int            enqueued = 0, err = 0;
    302 
    303 	if (txr->sc->link_active != LINK_STATE_UP) {
    304 		/*
    305 		 * discard all packets buffered in txr_interq to avoid
    306 		 * sending old packets at next link up timing.
    307 		 */
    308 		ixgbe_drain(ifp, txr);
    309 		return (ENETDOWN);
    310 	}
    311 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    312 		return (ENETDOWN);
    313 	if (txr->txr_no_space)
    314 		return (ENETDOWN);
    315 
    316 	/* Process the queue */
    317 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    318 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    319 			m_freem(next);
    320 			/* All errors are counted in ixgbe_xmit() */
    321 			break;
    322 		}
    323 		enqueued++;
    324 #if __FreeBSD_version >= 1100036
    325 		/*
    326 		 * Since we're looking at the tx ring, we can check
    327 		 * to see if we're a VF by examining our tail register
    328 		 * address.
    329 		 */
    330 		if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
    331 		    (next->m_flags & M_MCAST))
    332 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    333 #endif
    334 		/* Send a copy of the frame to the BPF listener */
    335 		bpf_mtap(ifp, next, BPF_D_OUT);
    336 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    337 			break;
    338 	}
    339 
    340 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
    341 		ixgbe_txeof(txr);
    342 
    343 	return (err);
    344 } /* ixgbe_mq_start_locked */
    345 
    346 /************************************************************************
    347  * ixgbe_deferred_mq_start
    348  *
    349  *   Called from a softint and workqueue (indirectly) to drain queued
    350  *   transmit packets.
    351  ************************************************************************/
    352 void
    353 ixgbe_deferred_mq_start(void *arg)
    354 {
    355 	struct tx_ring *txr = arg;
    356 	struct ixgbe_softc *sc = txr->sc;
    357 	struct ifnet   *ifp = sc->ifp;
    358 
    359 	IXGBE_TX_LOCK(txr);
    360 	if (pcq_peek(txr->txr_interq) != NULL)
    361 		ixgbe_mq_start_locked(ifp, txr);
    362 	IXGBE_TX_UNLOCK(txr);
    363 } /* ixgbe_deferred_mq_start */
    364 
    365 /************************************************************************
    366  * ixgbe_deferred_mq_start_work
    367  *
    368  *   Called from a workqueue to drain queued transmit packets.
    369  ************************************************************************/
    370 void
    371 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    372 {
    373 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    374 	struct ixgbe_softc *sc = txr->sc;
    375 	u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
    376 	*enqueued = 0;
    377 	percpu_putref(sc->txr_wq_enqueued);
    378 
    379 	ixgbe_deferred_mq_start(txr);
    380 } /* ixgbe_deferred_mq_start */
    381 
    382 /************************************************************************
    383  * ixgbe_drain_all
    384  ************************************************************************/
    385 void
    386 ixgbe_drain_all(struct ixgbe_softc *sc)
    387 {
    388 	struct ifnet *ifp = sc->ifp;
    389 	struct ix_queue *que = sc->queues;
    390 
    391 	for (int i = 0; i < sc->num_queues; i++, que++) {
    392 		struct tx_ring  *txr = que->txr;
    393 
    394 		IXGBE_TX_LOCK(txr);
    395 		ixgbe_drain(ifp, txr);
    396 		IXGBE_TX_UNLOCK(txr);
    397 	}
    398 }
    399 
    400 /************************************************************************
    401  * ixgbe_xmit
    402  *
    403  *   Maps the mbufs to tx descriptors, allowing the
    404  *   TX engine to transmit the packets.
    405  *
    406  *   Return 0 on success, positive on failure
    407  ************************************************************************/
    408 static int
    409 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    410 {
    411 	struct ixgbe_softc      *sc = txr->sc;
    412 	struct ixgbe_tx_buf     *txbuf;
    413 	union ixgbe_adv_tx_desc *txd = NULL;
    414 	struct ifnet	        *ifp = sc->ifp;
    415 	int                     i, j, error;
    416 	int                     first;
    417 	u32                     olinfo_status = 0, cmd_type_len;
    418 	bool                    remap = TRUE;
    419 	bus_dmamap_t            map;
    420 
    421 	/* Basic descriptor defines */
    422 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    423 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    424 
    425 	if (vlan_has_tag(m_head))
    426 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    427 
    428 	/*
    429 	 * Important to capture the first descriptor
    430 	 * used because it will contain the index of
    431 	 * the one we tell the hardware to report back
    432 	 */
    433 	first = txr->next_avail_desc;
    434 	txbuf = &txr->tx_buffers[first];
    435 	map = txbuf->map;
    436 
    437 	/*
    438 	 * Map the packet for DMA.
    439 	 */
    440 retry:
    441 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    442 	    BUS_DMA_NOWAIT);
    443 
    444 	if (__predict_false(error)) {
    445 		struct mbuf *m;
    446 
    447 		switch (error) {
    448 		case EAGAIN:
    449 			txr->q_eagain_tx_dma_setup++;
    450 			return EAGAIN;
    451 		case ENOMEM:
    452 			txr->q_enomem_tx_dma_setup++;
    453 			return EAGAIN;
    454 		case EFBIG:
    455 			/* Try it again? - one try */
    456 			if (remap == TRUE) {
    457 				remap = FALSE;
    458 				/*
    459 				 * XXX: m_defrag will choke on
    460 				 * non-MCLBYTES-sized clusters
    461 				 */
    462 				txr->q_efbig_tx_dma_setup++;
    463 				m = m_defrag(m_head, M_NOWAIT);
    464 				if (m == NULL) {
    465 					txr->q_mbuf_defrag_failed++;
    466 					return ENOBUFS;
    467 				}
    468 				m_head = m;
    469 				goto retry;
    470 			} else {
    471 				txr->q_efbig2_tx_dma_setup++;
    472 				return error;
    473 			}
    474 		case EINVAL:
    475 			txr->q_einval_tx_dma_setup++;
    476 			return error;
    477 		default:
    478 			txr->q_other_tx_dma_setup++;
    479 			return error;
    480 		}
    481 	}
    482 
    483 	/* Make certain there are enough descriptors */
    484 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    485 		txr->txr_no_space = true;
    486 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
    487 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    488 		return EAGAIN;
    489 	}
    490 
    491 	/*
    492 	 * Set up the appropriate offload context if requested,
    493 	 * this may consume one TX descriptor.
    494 	 */
    495 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    496 	if (__predict_false(error)) {
    497 		return (error);
    498 	}
    499 
    500 #ifdef IXGBE_FDIR
    501 	/* Do the flow director magic */
    502 	if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
    503 	    (txr->atr_sample) && (!sc->fdir_reinit)) {
    504 		++txr->atr_count;
    505 		if (txr->atr_count >= atr_sample_rate) {
    506 			ixgbe_atr(txr, m_head);
    507 			txr->atr_count = 0;
    508 		}
    509 	}
    510 #endif
    511 
    512 	olinfo_status |= IXGBE_ADVTXD_CC;
    513 	i = txr->next_avail_desc;
    514 	for (j = 0; j < map->dm_nsegs; j++) {
    515 		bus_size_t seglen;
    516 		uint64_t segaddr;
    517 
    518 		txbuf = &txr->tx_buffers[i];
    519 		txd = &txr->tx_base[i];
    520 		seglen = map->dm_segs[j].ds_len;
    521 		segaddr = htole64(map->dm_segs[j].ds_addr);
    522 
    523 		txd->read.buffer_addr = segaddr;
    524 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    525 		txd->read.olinfo_status = htole32(olinfo_status);
    526 
    527 		if (++i == txr->num_desc)
    528 			i = 0;
    529 	}
    530 
    531 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    532 	txr->tx_avail -= map->dm_nsegs;
    533 	txr->next_avail_desc = i;
    534 
    535 	txbuf->m_head = m_head;
    536 	/*
    537 	 * Here we swap the map so the last descriptor,
    538 	 * which gets the completion interrupt has the
    539 	 * real map, and the first descriptor gets the
    540 	 * unused map from this descriptor.
    541 	 */
    542 	txr->tx_buffers[first].map = txbuf->map;
    543 	txbuf->map = map;
    544 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    545 	    BUS_DMASYNC_PREWRITE);
    546 
    547 	/* Set the EOP descriptor that will be marked done */
    548 	txbuf = &txr->tx_buffers[first];
    549 	txbuf->eop = txd;
    550 
    551 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    552 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    553 	/*
    554 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    555 	 * hardware that this frame is available to transmit.
    556 	 */
    557 	IXGBE_EVC_ADD(&txr->total_packets, 1);
    558 	IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
    559 
    560 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    561 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    562 	if (m_head->m_flags & M_MCAST)
    563 		if_statinc_ref(nsr, if_omcasts);
    564 	IF_STAT_PUTREF(ifp);
    565 
    566 	/* Mark queue as having work */
    567 	if (txr->busy == 0)
    568 		txr->busy = 1;
    569 
    570 	return (0);
    571 } /* ixgbe_xmit */
    572 
    573 /************************************************************************
    574  * ixgbe_drain
    575  ************************************************************************/
    576 static void
    577 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    578 {
    579 	struct mbuf *m;
    580 
    581 	IXGBE_TX_LOCK_ASSERT(txr);
    582 
    583 	if (txr->me == 0) {
    584 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    585 			IFQ_DEQUEUE(&ifp->if_snd, m);
    586 			m_freem(m);
    587 			IF_DROP(&ifp->if_snd);
    588 		}
    589 	}
    590 
    591 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    592 		m_freem(m);
    593 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    594 	}
    595 }
    596 
    597 /************************************************************************
    598  * ixgbe_allocate_transmit_buffers
    599  *
    600  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    601  *   the information needed to transmit a packet on the wire. This is
    602  *   called only once at attach, setup is done every reset.
    603  ************************************************************************/
    604 static int
    605 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    606 {
    607 	struct ixgbe_softc  *sc = txr->sc;
    608 	device_t            dev = sc->dev;
    609 	struct ixgbe_tx_buf *txbuf;
    610 	int                 error, i;
    611 
    612 	/*
    613 	 * Setup DMA descriptor areas.
    614 	 */
    615 	error = ixgbe_dma_tag_create(
    616 	         /*      parent */ sc->osdep.dmat,
    617 	         /*   alignment */ 1,
    618 	         /*      bounds */ 0,
    619 	         /*     maxsize */ IXGBE_TSO_SIZE,
    620 	         /*   nsegments */ sc->num_segs,
    621 	         /*  maxsegsize */ PAGE_SIZE,
    622 	         /*       flags */ 0,
    623 	                           &txr->txtag);
    624 	if (error != 0) {
    625 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    626 		goto fail;
    627 	}
    628 
    629 	txr->tx_buffers = kmem_zalloc(sizeof(struct ixgbe_tx_buf) *
    630 	    sc->num_tx_desc, KM_SLEEP);
    631 
    632 	/* Create the descriptor buffer dma maps */
    633 	txbuf = txr->tx_buffers;
    634 	for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
    635 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    636 		if (error != 0) {
    637 			aprint_error_dev(dev,
    638 			    "Unable to create TX DMA map (%d)\n", error);
    639 			goto fail;
    640 		}
    641 	}
    642 
    643 	return 0;
    644 fail:
    645 	/* We free all, it handles case where we are in the middle */
    646 #if 0 /* XXX was FreeBSD */
    647 	ixgbe_free_transmit_structures(sc);
    648 #else
    649 	ixgbe_free_transmit_buffers(txr);
    650 #endif
    651 	return (error);
    652 } /* ixgbe_allocate_transmit_buffers */
    653 
    654 /************************************************************************
    655  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    656  ************************************************************************/
    657 static void
    658 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    659 {
    660 	struct ixgbe_softc    *sc = txr->sc;
    661 	struct ixgbe_tx_buf   *txbuf;
    662 #ifdef DEV_NETMAP
    663 	struct netmap_sc      *na = NA(sc->ifp);
    664 	struct netmap_slot    *slot;
    665 #endif /* DEV_NETMAP */
    666 
    667 	/* Clear the old ring contents */
    668 	IXGBE_TX_LOCK(txr);
    669 
    670 #ifdef DEV_NETMAP
    671 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
    672 		/*
    673 		 * (under lock): if in netmap mode, do some consistency
    674 		 * checks and set slot to entry 0 of the netmap ring.
    675 		 */
    676 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    677 	}
    678 #endif /* DEV_NETMAP */
    679 
    680 	bzero((void *)txr->tx_base,
    681 	    (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
    682 	/* Reset indices */
    683 	txr->next_avail_desc = 0;
    684 	txr->next_to_clean = 0;
    685 
    686 	/* Free any existing tx buffers. */
    687 	txbuf = txr->tx_buffers;
    688 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    689 		if (txbuf->m_head != NULL) {
    690 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    691 			    0, txbuf->m_head->m_pkthdr.len,
    692 			    BUS_DMASYNC_POSTWRITE);
    693 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    694 			m_freem(txbuf->m_head);
    695 			txbuf->m_head = NULL;
    696 		}
    697 
    698 #ifdef DEV_NETMAP
    699 		/*
    700 		 * In netmap mode, set the map for the packet buffer.
    701 		 * NOTE: Some drivers (not this one) also need to set
    702 		 * the physical buffer address in the NIC ring.
    703 		 * Slots in the netmap ring (indexed by "si") are
    704 		 * kring->nkr_hwofs positions "ahead" wrt the
    705 		 * corresponding slot in the NIC ring. In some drivers
    706 		 * (not here) nkr_hwofs can be negative. Function
    707 		 * netmap_idx_n2k() handles wraparounds properly.
    708 		 */
    709 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    710 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    711 			netmap_load_map(na, txr->txtag,
    712 			    txbuf->map, NMB(na, slot + si));
    713 		}
    714 #endif /* DEV_NETMAP */
    715 
    716 		/* Clear the EOP descriptor pointer */
    717 		txbuf->eop = NULL;
    718 	}
    719 
    720 #ifdef IXGBE_FDIR
    721 	/* Set the rate at which we sample packets */
    722 	if (sc->feat_en & IXGBE_FEATURE_FDIR)
    723 		txr->atr_sample = atr_sample_rate;
    724 #endif
    725 
    726 	/* Set number of descriptors available */
    727 	txr->tx_avail = sc->num_tx_desc;
    728 
    729 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    730 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    731 	IXGBE_TX_UNLOCK(txr);
    732 } /* ixgbe_setup_transmit_ring */
    733 
    734 /************************************************************************
    735  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    736  ************************************************************************/
    737 int
    738 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
    739 {
    740 	struct tx_ring *txr = sc->tx_rings;
    741 
    742 	for (int i = 0; i < sc->num_queues; i++, txr++)
    743 		ixgbe_setup_transmit_ring(txr);
    744 
    745 	return (0);
    746 } /* ixgbe_setup_transmit_structures */
    747 
    748 /************************************************************************
    749  * ixgbe_free_transmit_structures - Free all transmit rings.
    750  ************************************************************************/
    751 void
    752 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
    753 {
    754 	struct tx_ring *txr = sc->tx_rings;
    755 
    756 	for (int i = 0; i < sc->num_queues; i++, txr++) {
    757 		ixgbe_free_transmit_buffers(txr);
    758 		ixgbe_dma_free(sc, &txr->txdma);
    759 		IXGBE_TX_LOCK_DESTROY(txr);
    760 	}
    761 	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
    762 } /* ixgbe_free_transmit_structures */
    763 
    764 /************************************************************************
    765  * ixgbe_free_transmit_buffers
    766  *
    767  *   Free transmit ring related data structures.
    768  ************************************************************************/
    769 static void
    770 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    771 {
    772 	struct ixgbe_softc  *sc = txr->sc;
    773 	struct ixgbe_tx_buf *tx_buffer;
    774 	int                 i;
    775 
    776 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    777 
    778 	if (txr->tx_buffers == NULL)
    779 		return;
    780 
    781 	tx_buffer = txr->tx_buffers;
    782 	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
    783 		if (tx_buffer->m_head != NULL) {
    784 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    785 			    0, tx_buffer->m_head->m_pkthdr.len,
    786 			    BUS_DMASYNC_POSTWRITE);
    787 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    788 			m_freem(tx_buffer->m_head);
    789 			tx_buffer->m_head = NULL;
    790 			if (tx_buffer->map != NULL) {
    791 				ixgbe_dmamap_destroy(txr->txtag,
    792 				    tx_buffer->map);
    793 				tx_buffer->map = NULL;
    794 			}
    795 		} else if (tx_buffer->map != NULL) {
    796 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    797 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    798 			tx_buffer->map = NULL;
    799 		}
    800 	}
    801 	if (txr->txr_interq != NULL) {
    802 		struct mbuf *m;
    803 
    804 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    805 			m_freem(m);
    806 		pcq_destroy(txr->txr_interq);
    807 	}
    808 	if (txr->tx_buffers != NULL) {
    809 		kmem_free(txr->tx_buffers,
    810 		    sizeof(struct ixgbe_tx_buf) * sc->num_tx_desc);
    811 		txr->tx_buffers = NULL;
    812 	}
    813 	if (txr->txtag != NULL) {
    814 		ixgbe_dma_tag_destroy(txr->txtag);
    815 		txr->txtag = NULL;
    816 	}
    817 } /* ixgbe_free_transmit_buffers */
    818 
    819 /************************************************************************
    820  * ixgbe_tx_ctx_setup
    821  *
    822  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    823  ************************************************************************/
    824 static int
    825 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    826     u32 *cmd_type_len, u32 *olinfo_status)
    827 {
    828 	struct ixgbe_softc               *sc = txr->sc;
    829 	struct ixgbe_adv_tx_context_desc *TXD;
    830 	struct ether_vlan_header         *eh;
    831 #ifdef INET
    832 	struct ip                        *ip;
    833 #endif
    834 #ifdef INET6
    835 	struct ip6_hdr                   *ip6;
    836 #endif
    837 	int                              ehdrlen, ip_hlen = 0;
    838 	int                              offload = TRUE;
    839 	int                              ctxd = txr->next_avail_desc;
    840 	u32                              vlan_macip_lens = 0;
    841 	u32                              type_tucmd_mlhl = 0;
    842 	u16                              vtag = 0;
    843 	u16                              etype;
    844 	u8                               ipproto = 0;
    845 	char                             *l3d;
    846 
    847 	/* First check if TSO is to be used */
    848 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    849 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    850 
    851 		if (rv != 0)
    852 			IXGBE_EVC_ADD(&sc->tso_err, 1);
    853 		return rv;
    854 	}
    855 
    856 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    857 		offload = FALSE;
    858 
    859 	/* Indicate the whole packet as payload when not doing TSO */
    860 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    861 
    862 	/*
    863 	 * In advanced descriptors the vlan tag must
    864 	 * be placed into the context descriptor. Hence
    865 	 * we need to make one even if not doing offloads.
    866 	 */
    867 	if (vlan_has_tag(mp)) {
    868 		vtag = htole16(vlan_get_tag(mp));
    869 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    870 	} else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    871 	           (offload == FALSE))
    872 		return (0);
    873 
    874 	/*
    875 	 * Determine where frame payload starts.
    876 	 * Jump over vlan headers if already present,
    877 	 * helpful for QinQ too.
    878 	 */
    879 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    880 	eh = mtod(mp, struct ether_vlan_header *);
    881 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    882 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    883 		etype = ntohs(eh->evl_proto);
    884 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    885 	} else {
    886 		etype = ntohs(eh->evl_encap_proto);
    887 		ehdrlen = ETHER_HDR_LEN;
    888 	}
    889 
    890 	/* Set the ether header length */
    891 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    892 
    893 	if (offload == FALSE)
    894 		goto no_offloads;
    895 
    896 	/*
    897 	 * If the first mbuf only includes the ethernet header,
    898 	 * jump to the next one
    899 	 * XXX: This assumes the stack splits mbufs containing headers
    900 	 *      on header boundaries
    901 	 * XXX: And assumes the entire IP header is contained in one mbuf
    902 	 */
    903 	if (mp->m_len == ehdrlen && mp->m_next)
    904 		l3d = mtod(mp->m_next, char *);
    905 	else
    906 		l3d = mtod(mp, char *) + ehdrlen;
    907 
    908 	switch (etype) {
    909 #ifdef INET
    910 	case ETHERTYPE_IP:
    911 		ip = (struct ip *)(l3d);
    912 		ip_hlen = ip->ip_hl << 2;
    913 		ipproto = ip->ip_p;
    914 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    915 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    916 		    ip->ip_sum == 0);
    917 		break;
    918 #endif
    919 #ifdef INET6
    920 	case ETHERTYPE_IPV6:
    921 		ip6 = (struct ip6_hdr *)(l3d);
    922 		ip_hlen = sizeof(struct ip6_hdr);
    923 		ipproto = ip6->ip6_nxt;
    924 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    925 		break;
    926 #endif
    927 	default:
    928 		offload = false;
    929 		break;
    930 	}
    931 
    932 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    933 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    934 
    935 	vlan_macip_lens |= ip_hlen;
    936 
    937 	/* No support for offloads for non-L4 next headers */
    938 	switch (ipproto) {
    939 	case IPPROTO_TCP:
    940 		if (mp->m_pkthdr.csum_flags &
    941 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    942 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    943 		else
    944 			offload = false;
    945 		break;
    946 	case IPPROTO_UDP:
    947 		if (mp->m_pkthdr.csum_flags &
    948 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    949 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    950 		else
    951 			offload = false;
    952 		break;
    953 	default:
    954 		offload = false;
    955 		break;
    956 	}
    957 
    958 	if (offload) /* Insert L4 checksum into data descriptors */
    959 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    960 
    961 no_offloads:
    962 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    963 
    964 	/* Now ready a context descriptor */
    965 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    966 
    967 	/* Now copy bits into descriptor */
    968 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    969 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    970 	TXD->seqnum_seed = htole32(0);
    971 	TXD->mss_l4len_idx = htole32(0);
    972 
    973 	/* We've consumed the first desc, adjust counters */
    974 	if (++ctxd == txr->num_desc)
    975 		ctxd = 0;
    976 	txr->next_avail_desc = ctxd;
    977 	--txr->tx_avail;
    978 
    979 	return (0);
    980 } /* ixgbe_tx_ctx_setup */
    981 
    982 /************************************************************************
    983  * ixgbe_tso_setup
    984  *
    985  *   Setup work for hardware segmentation offload (TSO) on
    986  *   adapters using advanced tx descriptors
    987  ************************************************************************/
    988 static int
    989 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    990     u32 *olinfo_status)
    991 {
    992 	struct ixgbe_adv_tx_context_desc *TXD;
    993 	struct ether_vlan_header         *eh;
    994 #ifdef INET6
    995 	struct ip6_hdr                   *ip6;
    996 #endif
    997 #ifdef INET
    998 	struct ip                        *ip;
    999 #endif
   1000 	struct tcphdr                    *th;
   1001 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
   1002 	u32                              vlan_macip_lens = 0;
   1003 	u32                              type_tucmd_mlhl = 0;
   1004 	u32                              mss_l4len_idx = 0, paylen;
   1005 	u16                              vtag = 0, eh_type;
   1006 
   1007 	/*
   1008 	 * Determine where frame payload starts.
   1009 	 * Jump over vlan headers if already present
   1010 	 */
   1011 	eh = mtod(mp, struct ether_vlan_header *);
   1012 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1013 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1014 		eh_type = eh->evl_proto;
   1015 	} else {
   1016 		ehdrlen = ETHER_HDR_LEN;
   1017 		eh_type = eh->evl_encap_proto;
   1018 	}
   1019 
   1020 	switch (ntohs(eh_type)) {
   1021 #ifdef INET
   1022 	case ETHERTYPE_IP:
   1023 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1024 		if (ip->ip_p != IPPROTO_TCP)
   1025 			return (ENXIO);
   1026 		ip->ip_sum = 0;
   1027 		ip_hlen = ip->ip_hl << 2;
   1028 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1029 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1030 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1031 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1032 		/* Tell transmit desc to also do IPv4 checksum. */
   1033 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1034 		break;
   1035 #endif
   1036 #ifdef INET6
   1037 	case ETHERTYPE_IPV6:
   1038 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1039 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1040 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1041 			return (ENXIO);
   1042 		ip_hlen = sizeof(struct ip6_hdr);
   1043 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1044 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1045 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1046 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1047 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1048 		break;
   1049 #endif
   1050 	default:
   1051 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1052 		    __func__, ntohs(eh_type));
   1053 		break;
   1054 	}
   1055 
   1056 	ctxd = txr->next_avail_desc;
   1057 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1058 
   1059 	tcp_hlen = th->th_off << 2;
   1060 
   1061 	/* This is used in the transmit desc in encap */
   1062 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1063 
   1064 	/* VLAN MACLEN IPLEN */
   1065 	if (vlan_has_tag(mp)) {
   1066 		vtag = htole16(vlan_get_tag(mp));
   1067 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1068 	}
   1069 
   1070 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1071 	vlan_macip_lens |= ip_hlen;
   1072 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1073 
   1074 	/* ADV DTYPE TUCMD */
   1075 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1076 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1077 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1078 
   1079 	/* MSS L4LEN IDX */
   1080 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1081 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1082 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1083 
   1084 	TXD->seqnum_seed = htole32(0);
   1085 
   1086 	if (++ctxd == txr->num_desc)
   1087 		ctxd = 0;
   1088 
   1089 	txr->tx_avail--;
   1090 	txr->next_avail_desc = ctxd;
   1091 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1092 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1093 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1094 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
   1095 
   1096 	return (0);
   1097 } /* ixgbe_tso_setup */
   1098 
   1099 
   1100 /************************************************************************
   1101  * ixgbe_txeof
   1102  *
   1103  *   Examine each tx_buffer in the used queue. If the hardware is done
   1104  *   processing the packet then free associated resources. The
   1105  *   tx_buffer is put back on the free queue.
   1106  ************************************************************************/
   1107 bool
   1108 ixgbe_txeof(struct tx_ring *txr)
   1109 {
   1110 	struct ixgbe_softc	*sc = txr->sc;
   1111 	struct ifnet		*ifp = sc->ifp;
   1112 	struct ixgbe_tx_buf	*buf;
   1113 	union ixgbe_adv_tx_desc *txd;
   1114 	u32			work, processed = 0;
   1115 	u32			limit = sc->tx_process_limit;
   1116 	u16			avail;
   1117 
   1118 	KASSERT(mutex_owned(&txr->tx_mtx));
   1119 
   1120 #ifdef DEV_NETMAP
   1121 	if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
   1122 	    (sc->ifp->if_capenable & IFCAP_NETMAP)) {
   1123 		struct netmap_sc *na = NA(sc->ifp);
   1124 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1125 		txd = txr->tx_base;
   1126 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1127 		    BUS_DMASYNC_POSTREAD);
   1128 		/*
   1129 		 * In netmap mode, all the work is done in the context
   1130 		 * of the client thread. Interrupt handlers only wake up
   1131 		 * clients, which may be sleeping on individual rings
   1132 		 * or on a global resource for all rings.
   1133 		 * To implement tx interrupt mitigation, we wake up the client
   1134 		 * thread roughly every half ring, even if the NIC interrupts
   1135 		 * more frequently. This is implemented as follows:
   1136 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1137 		 *   the slot that should wake up the thread (nkr_num_slots
   1138 		 *   means the user thread should not be woken up);
   1139 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1140 		 *   or the slot has the DD bit set.
   1141 		 */
   1142 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1143 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1144 			netmap_tx_irq(ifp, txr->me);
   1145 		}
   1146 		return false;
   1147 	}
   1148 #endif /* DEV_NETMAP */
   1149 
   1150 	if (txr->tx_avail == txr->num_desc) {
   1151 		txr->busy = 0;
   1152 		return false;
   1153 	}
   1154 
   1155 	/* Get work starting point */
   1156 	work = txr->next_to_clean;
   1157 	buf = &txr->tx_buffers[work];
   1158 	txd = &txr->tx_base[work];
   1159 	work -= txr->num_desc; /* The distance to ring end */
   1160 	avail = txr->tx_avail;
   1161 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1162 	    BUS_DMASYNC_POSTREAD);
   1163 
   1164 	do {
   1165 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1166 		if (eop == NULL) /* No work */
   1167 			break;
   1168 
   1169 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1170 			break;	/* I/O not complete */
   1171 
   1172 		if (buf->m_head) {
   1173 			txr->bytes += buf->m_head->m_pkthdr.len;
   1174 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1175 			    0, buf->m_head->m_pkthdr.len,
   1176 			    BUS_DMASYNC_POSTWRITE);
   1177 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1178 			m_freem(buf->m_head);
   1179 			buf->m_head = NULL;
   1180 		}
   1181 		buf->eop = NULL;
   1182 		++avail;
   1183 
   1184 		/* We clean the range if multi segment */
   1185 		while (txd != eop) {
   1186 			++txd;
   1187 			++buf;
   1188 			++work;
   1189 			/* wrap the ring? */
   1190 			if (__predict_false(!work)) {
   1191 				work -= txr->num_desc;
   1192 				buf = txr->tx_buffers;
   1193 				txd = txr->tx_base;
   1194 			}
   1195 			if (buf->m_head) {
   1196 				txr->bytes +=
   1197 				    buf->m_head->m_pkthdr.len;
   1198 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1199 				    buf->map,
   1200 				    0, buf->m_head->m_pkthdr.len,
   1201 				    BUS_DMASYNC_POSTWRITE);
   1202 				ixgbe_dmamap_unload(txr->txtag,
   1203 				    buf->map);
   1204 				m_freem(buf->m_head);
   1205 				buf->m_head = NULL;
   1206 			}
   1207 			++avail;
   1208 			buf->eop = NULL;
   1209 
   1210 		}
   1211 		++processed;
   1212 
   1213 		/* Try the next packet */
   1214 		++txd;
   1215 		++buf;
   1216 		++work;
   1217 		/* reset with a wrap */
   1218 		if (__predict_false(!work)) {
   1219 			work -= txr->num_desc;
   1220 			buf = txr->tx_buffers;
   1221 			txd = txr->tx_base;
   1222 		}
   1223 		prefetch(txd);
   1224 	} while (__predict_true(--limit));
   1225 
   1226 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1227 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1228 
   1229 	work += txr->num_desc;
   1230 	txr->next_to_clean = work;
   1231 	if (processed) {
   1232 		txr->tx_avail = avail;
   1233 		txr->txr_no_space = false;
   1234 		txr->packets += processed;
   1235 		if_statadd(ifp, if_opackets, processed);
   1236 	}
   1237 
   1238 	/*
   1239 	 * Queue Hang detection, we know there's
   1240 	 * work outstanding or the first return
   1241 	 * would have been taken, so increment busy
   1242 	 * if nothing managed to get cleaned, then
   1243 	 * in local_timer it will be checked and
   1244 	 * marked as HUNG if it exceeds a MAX attempt.
   1245 	 */
   1246 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1247 		++txr->busy;
   1248 	/*
   1249 	 * If anything gets cleaned we reset state to 1,
   1250 	 * note this will turn off HUNG if its set.
   1251 	 */
   1252 	if (processed)
   1253 		txr->busy = 1;
   1254 
   1255 	if (txr->tx_avail == txr->num_desc)
   1256 		txr->busy = 0;
   1257 
   1258 	return ((limit > 0) ? false : true);
   1259 } /* ixgbe_txeof */
   1260 
   1261 /************************************************************************
   1262  * ixgbe_rsc_count
   1263  *
   1264  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1265  ************************************************************************/
   1266 static inline u32
   1267 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1268 {
   1269 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1270 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1271 } /* ixgbe_rsc_count */
   1272 
   1273 /************************************************************************
   1274  * ixgbe_setup_hw_rsc
   1275  *
   1276  *   Initialize Hardware RSC (LRO) feature on 82599
   1277  *   for an RX ring, this is toggled by the LRO capability
   1278  *   even though it is transparent to the stack.
   1279  *
   1280  *   NOTE: Since this HW feature only works with IPv4 and
   1281  *         testing has shown soft LRO to be as effective,
   1282  *         this feature will be disabled by default.
   1283  ************************************************************************/
   1284 static void
   1285 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1286 {
   1287 	struct ixgbe_softc *sc = rxr->sc;
   1288 	struct ixgbe_hw	*hw = &sc->hw;
   1289 	u32		rscctrl, rdrxctl;
   1290 
   1291 	/* If turning LRO/RSC off we need to disable it */
   1292 	if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
   1293 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1294 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1295 		return;
   1296 	}
   1297 
   1298 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1299 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1300 #ifdef DEV_NETMAP
   1301 	/* Always strip CRC unless Netmap disabled it */
   1302 	if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
   1303 	    !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
   1304 	    ix_crcstrip)
   1305 #endif /* DEV_NETMAP */
   1306 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1307 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1308 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1309 
   1310 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1311 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1312 	/*
   1313 	 * Limit the total number of descriptors that
   1314 	 * can be combined, so it does not exceed 64K
   1315 	 */
   1316 	if (rxr->mbuf_sz == MCLBYTES)
   1317 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1318 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1319 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1320 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1321 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1322 	else  /* Using 16K cluster */
   1323 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1324 
   1325 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1326 
   1327 	/* Enable TCP header recognition */
   1328 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1329 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1330 
   1331 	/* Disable RSC for ACK packets */
   1332 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1333 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1334 
   1335 	rxr->hw_rsc = TRUE;
   1336 } /* ixgbe_setup_hw_rsc */
   1337 
   1338 /************************************************************************
   1339  * ixgbe_refresh_mbufs
   1340  *
   1341  *   Refresh mbuf buffers for RX descriptor rings
   1342  *    - now keeps its own state so discards due to resource
   1343  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1344  *      it just returns, keeping its placeholder, thus it can simply
   1345  *      be recalled to try again.
   1346  ************************************************************************/
   1347 static void
   1348 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1349 {
   1350 	struct ixgbe_softc  *sc = rxr->sc;
   1351 	struct ixgbe_rx_buf *rxbuf;
   1352 	struct mbuf         *mp;
   1353 	int                 i, error;
   1354 	bool                refreshed = false;
   1355 
   1356 	i = rxr->next_to_refresh;
   1357 	/* next_to_refresh points to the previous one */
   1358 	if (++i == rxr->num_desc)
   1359 		i = 0;
   1360 
   1361 	while (i != limit) {
   1362 		rxbuf = &rxr->rx_buffers[i];
   1363 		if (__predict_false(rxbuf->buf == NULL)) {
   1364 			mp = ixgbe_getcl();
   1365 			if (mp == NULL) {
   1366 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1367 				goto update;
   1368 			}
   1369 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1370 			IXGBE_M_ADJ(sc, rxr, mp);
   1371 		} else
   1372 			mp = rxbuf->buf;
   1373 
   1374 		/* If we're dealing with an mbuf that was copied rather
   1375 		 * than replaced, there's no need to go through busdma.
   1376 		 */
   1377 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1378 			/* Get the memory mapping */
   1379 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1380 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1381 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1382 			if (__predict_false(error != 0)) {
   1383 				device_printf(sc->dev, "Refresh mbufs: "
   1384 				    "payload dmamap load failure - %d\n",
   1385 				    error);
   1386 				m_free(mp);
   1387 				rxbuf->buf = NULL;
   1388 				goto update;
   1389 			}
   1390 			rxbuf->buf = mp;
   1391 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1392 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1393 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1394 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1395 		} else {
   1396 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1397 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1398 		}
   1399 
   1400 		refreshed = true;
   1401 		/* next_to_refresh points to the previous one */
   1402 		rxr->next_to_refresh = i;
   1403 		if (++i == rxr->num_desc)
   1404 			i = 0;
   1405 	}
   1406 
   1407 update:
   1408 	if (refreshed) /* Update hardware tail index */
   1409 		IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
   1410 
   1411 	return;
   1412 } /* ixgbe_refresh_mbufs */
   1413 
   1414 /************************************************************************
   1415  * ixgbe_allocate_receive_buffers
   1416  *
   1417  *   Allocate memory for rx_buffer structures. Since we use one
   1418  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1419  *   that we'll need is equal to the number of receive descriptors
   1420  *   that we've allocated.
   1421  ************************************************************************/
   1422 static int
   1423 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1424 {
   1425 	struct ixgbe_softc  *sc = rxr->sc;
   1426 	device_t            dev = sc->dev;
   1427 	struct ixgbe_rx_buf *rxbuf;
   1428 	int                 bsize, error;
   1429 
   1430 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1431 	rxr->rx_buffers = kmem_zalloc(bsize, KM_SLEEP);
   1432 
   1433 	error = ixgbe_dma_tag_create(
   1434 	         /*      parent */ sc->osdep.dmat,
   1435 	         /*   alignment */ 1,
   1436 	         /*      bounds */ 0,
   1437 	         /*     maxsize */ MJUM16BYTES,
   1438 	         /*   nsegments */ 1,
   1439 	         /*  maxsegsize */ MJUM16BYTES,
   1440 	         /*       flags */ 0,
   1441 	                           &rxr->ptag);
   1442 	if (error != 0) {
   1443 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1444 		goto fail;
   1445 	}
   1446 
   1447 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1448 		rxbuf = &rxr->rx_buffers[i];
   1449 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1450 		if (error) {
   1451 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1452 			goto fail;
   1453 		}
   1454 	}
   1455 
   1456 	return (0);
   1457 
   1458 fail:
   1459 	/* Frees all, but can handle partial completion */
   1460 	ixgbe_free_receive_structures(sc);
   1461 
   1462 	return (error);
   1463 } /* ixgbe_allocate_receive_buffers */
   1464 
   1465 /************************************************************************
   1466  * ixgbe_free_receive_ring
   1467  ************************************************************************/
   1468 static void
   1469 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1470 {
   1471 	for (int i = 0; i < rxr->num_desc; i++) {
   1472 		ixgbe_rx_discard(rxr, i);
   1473 	}
   1474 } /* ixgbe_free_receive_ring */
   1475 
   1476 /************************************************************************
   1477  * ixgbe_setup_receive_ring
   1478  *
   1479  *   Initialize a receive ring and its buffers.
   1480  ************************************************************************/
   1481 static int
   1482 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1483 {
   1484 	struct ixgbe_softc    *sc;
   1485 	struct ixgbe_rx_buf   *rxbuf;
   1486 #ifdef LRO
   1487 	struct ifnet          *ifp;
   1488 	struct lro_ctrl       *lro = &rxr->lro;
   1489 #endif /* LRO */
   1490 #ifdef DEV_NETMAP
   1491 	struct netmap_sc      *na = NA(rxr->sc->ifp);
   1492 	struct netmap_slot    *slot;
   1493 #endif /* DEV_NETMAP */
   1494 	int                   rsize, error = 0;
   1495 
   1496 	sc = rxr->sc;
   1497 #ifdef LRO
   1498 	ifp = sc->ifp;
   1499 #endif /* LRO */
   1500 
   1501 	/* Clear the ring contents */
   1502 	IXGBE_RX_LOCK(rxr);
   1503 
   1504 #ifdef DEV_NETMAP
   1505 	if (sc->feat_en & IXGBE_FEATURE_NETMAP)
   1506 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1507 #endif /* DEV_NETMAP */
   1508 
   1509 	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
   1510 	KASSERT((rsize % DBA_ALIGN) == 0);
   1511 	bzero((void *)rxr->rx_base, rsize);
   1512 	/* Cache the size */
   1513 	rxr->mbuf_sz = sc->rx_mbuf_sz;
   1514 
   1515 	/* Free current RX buffer structs and their mbufs */
   1516 	ixgbe_free_receive_ring(rxr);
   1517 
   1518 	/* Now replenish the mbufs */
   1519 	for (int i = 0; i < rxr->num_desc; i++) {
   1520 		struct mbuf *mp;
   1521 
   1522 		rxbuf = &rxr->rx_buffers[i];
   1523 
   1524 #ifdef DEV_NETMAP
   1525 		/*
   1526 		 * In netmap mode, fill the map and set the buffer
   1527 		 * address in the NIC ring, considering the offset
   1528 		 * between the netmap and NIC rings (see comment in
   1529 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1530 		 * an mbuf, so end the block with a continue;
   1531 		 */
   1532 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1533 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
   1534 			uint64_t paddr;
   1535 			void *addr;
   1536 
   1537 			addr = PNMB(na, slot + sj, &paddr);
   1538 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1539 			/* Update descriptor and the cached value */
   1540 			rxr->rx_base[i].read.pkt_addr = htole64(paddr);
   1541 			rxbuf->addr = htole64(paddr);
   1542 			continue;
   1543 		}
   1544 #endif /* DEV_NETMAP */
   1545 
   1546 		rxbuf->flags = 0;
   1547 		rxbuf->buf = ixgbe_getcl();
   1548 		if (rxbuf->buf == NULL) {
   1549 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1550 			error = ENOBUFS;
   1551 			goto fail;
   1552 		}
   1553 		mp = rxbuf->buf;
   1554 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1555 		IXGBE_M_ADJ(sc, rxr, mp);
   1556 		/* Get the memory mapping */
   1557 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1558 		    mp, BUS_DMA_NOWAIT);
   1559 		if (error != 0) {
   1560 			/*
   1561 			 * Clear this entry for later cleanup in
   1562 			 * ixgbe_discard() which is called via
   1563 			 * ixgbe_free_receive_ring().
   1564 			 */
   1565 			m_freem(mp);
   1566 			rxbuf->buf = NULL;
   1567 			goto fail;
   1568 		}
   1569 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1570 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1571 		/* Update the descriptor and the cached value */
   1572 		rxr->rx_base[i].read.pkt_addr =
   1573 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1574 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1575 	}
   1576 
   1577 	/* Setup our descriptor indices */
   1578 	rxr->next_to_check = 0;
   1579 	rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
   1580 	rxr->lro_enabled = FALSE;
   1581 	rxr->discard_multidesc = false;
   1582 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
   1583 #if 0 /* NetBSD */
   1584 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
   1585 #if 1	/* Fix inconsistency */
   1586 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
   1587 #endif
   1588 #endif
   1589 	rxr->vtag_strip = FALSE;
   1590 
   1591 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1592 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1593 
   1594 	/*
   1595 	 * Now set up the LRO interface
   1596 	 */
   1597 	if (ixgbe_rsc_enable)
   1598 		ixgbe_setup_hw_rsc(rxr);
   1599 #ifdef LRO
   1600 	else if (ifp->if_capenable & IFCAP_LRO) {
   1601 		device_t dev = sc->dev;
   1602 		int err = tcp_lro_init(lro);
   1603 		if (err) {
   1604 			device_printf(dev, "LRO Initialization failed!\n");
   1605 			goto fail;
   1606 		}
   1607 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1608 		rxr->lro_enabled = TRUE;
   1609 		lro->ifp = sc->ifp;
   1610 	}
   1611 #endif /* LRO */
   1612 
   1613 	IXGBE_RX_UNLOCK(rxr);
   1614 
   1615 	return (0);
   1616 
   1617 fail:
   1618 	ixgbe_free_receive_ring(rxr);
   1619 	IXGBE_RX_UNLOCK(rxr);
   1620 
   1621 	return (error);
   1622 } /* ixgbe_setup_receive_ring */
   1623 
   1624 /************************************************************************
   1625  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1626  ************************************************************************/
   1627 int
   1628 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
   1629 {
   1630 	struct rx_ring *rxr = sc->rx_rings;
   1631 	int            j;
   1632 
   1633 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1634 	for (j = 0; j < sc->num_queues; j++, rxr++)
   1635 		if (ixgbe_setup_receive_ring(rxr))
   1636 			goto fail;
   1637 
   1638 	return (0);
   1639 fail:
   1640 	/*
   1641 	 * Free RX buffers allocated so far, we will only handle
   1642 	 * the rings that completed, the failing case will have
   1643 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1644 	 */
   1645 	for (int i = 0; i < j; ++i) {
   1646 		rxr = &sc->rx_rings[i];
   1647 		IXGBE_RX_LOCK(rxr);
   1648 		ixgbe_free_receive_ring(rxr);
   1649 		IXGBE_RX_UNLOCK(rxr);
   1650 	}
   1651 
   1652 	return (ENOBUFS);
   1653 } /* ixgbe_setup_receive_structures */
   1654 
   1655 
   1656 /************************************************************************
   1657  * ixgbe_free_receive_structures - Free all receive rings.
   1658  ************************************************************************/
   1659 void
   1660 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
   1661 {
   1662 	struct rx_ring *rxr = sc->rx_rings;
   1663 
   1664 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1665 
   1666 	for (int i = 0; i < sc->num_queues; i++, rxr++) {
   1667 		ixgbe_free_receive_buffers(rxr);
   1668 #ifdef LRO
   1669 		/* Free LRO memory */
   1670 		tcp_lro_free(&rxr->lro);
   1671 #endif /* LRO */
   1672 		/* Free the ring memory as well */
   1673 		ixgbe_dma_free(sc, &rxr->rxdma);
   1674 		IXGBE_RX_LOCK_DESTROY(rxr);
   1675 	}
   1676 
   1677 	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
   1678 } /* ixgbe_free_receive_structures */
   1679 
   1680 
   1681 /************************************************************************
   1682  * ixgbe_free_receive_buffers - Free receive ring data structures
   1683  ************************************************************************/
   1684 static void
   1685 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1686 {
   1687 	struct ixgbe_softc  *sc = rxr->sc;
   1688 	struct ixgbe_rx_buf *rxbuf;
   1689 
   1690 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1691 
   1692 	/* Cleanup any existing buffers */
   1693 	if (rxr->rx_buffers != NULL) {
   1694 		for (int i = 0; i < sc->num_rx_desc; i++) {
   1695 			rxbuf = &rxr->rx_buffers[i];
   1696 			ixgbe_rx_discard(rxr, i);
   1697 			if (rxbuf->pmap != NULL) {
   1698 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1699 				rxbuf->pmap = NULL;
   1700 			}
   1701 		}
   1702 
   1703 		if (rxr->rx_buffers != NULL) {
   1704 			kmem_free(rxr->rx_buffers,
   1705 			    sizeof(struct ixgbe_rx_buf) * rxr->num_desc);
   1706 			rxr->rx_buffers = NULL;
   1707 		}
   1708 	}
   1709 
   1710 	if (rxr->ptag != NULL) {
   1711 		ixgbe_dma_tag_destroy(rxr->ptag);
   1712 		rxr->ptag = NULL;
   1713 	}
   1714 
   1715 	return;
   1716 } /* ixgbe_free_receive_buffers */
   1717 
   1718 /************************************************************************
   1719  * ixgbe_rx_input
   1720  ************************************************************************/
   1721 static __inline void
   1722 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1723     u32 ptype)
   1724 {
   1725 	struct ixgbe_softc *sc = ifp->if_softc;
   1726 
   1727 #ifdef LRO
   1728 	struct ethercom *ec = &sc->osdep.ec;
   1729 
   1730 	/*
   1731 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1732 	 * should be computed by hardware. Also it should not have VLAN tag in
   1733 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1734 	 */
   1735         if (rxr->lro_enabled &&
   1736             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1737             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1738             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1739             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1740             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1741             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1742             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1743             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1744                 /*
   1745                  * Send to the stack if:
   1746                  *  - LRO not enabled, or
   1747                  *  - no LRO resources, or
   1748                  *  - lro enqueue fails
   1749                  */
   1750                 if (rxr->lro.lro_cnt != 0)
   1751                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1752                                 return;
   1753         }
   1754 #endif /* LRO */
   1755 
   1756 	if_percpuq_enqueue(sc->ipq, m);
   1757 } /* ixgbe_rx_input */
   1758 
   1759 /************************************************************************
   1760  * ixgbe_rx_discard
   1761  ************************************************************************/
   1762 static __inline void
   1763 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1764 {
   1765 	struct ixgbe_rx_buf *rbuf;
   1766 
   1767 	rbuf = &rxr->rx_buffers[i];
   1768 
   1769 	/*
   1770 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1771 	 * so its easier to just free the existing mbufs and take the normal
   1772 	 * refresh path to get new buffers and mapping.
   1773 	 */
   1774 
   1775 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1776 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1777 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1778 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1779 		m_freem(rbuf->fmp);
   1780 		rbuf->fmp = NULL;
   1781 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1782 	} else if (rbuf->buf) {
   1783 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1784 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1785 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1786 		m_free(rbuf->buf);
   1787 		rbuf->buf = NULL;
   1788 	}
   1789 
   1790 	rbuf->flags = 0;
   1791 
   1792 	return;
   1793 } /* ixgbe_rx_discard */
   1794 
   1795 
   1796 /************************************************************************
   1797  * ixgbe_rxeof
   1798  *
   1799  *   Executes in interrupt context. It replenishes the
   1800  *   mbufs in the descriptor and sends data which has
   1801  *   been dma'ed into host memory to upper layer.
   1802  *
   1803  *   Return TRUE for more work, FALSE for all clean.
   1804  ************************************************************************/
   1805 bool
   1806 ixgbe_rxeof(struct ix_queue *que)
   1807 {
   1808 	struct ixgbe_softc	*sc = que->sc;
   1809 	struct rx_ring		*rxr = que->rxr;
   1810 	struct ifnet		*ifp = sc->ifp;
   1811 #ifdef LRO
   1812 	struct lro_ctrl		*lro = &rxr->lro;
   1813 #endif /* LRO */
   1814 	union ixgbe_adv_rx_desc	*cur;
   1815 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1816 	int			i, nextp, processed = 0;
   1817 	u32			staterr = 0;
   1818 	u32			loopcount = 0, numdesc;
   1819 	u32			limit = sc->rx_process_limit;
   1820 	u32			rx_copy_len = sc->rx_copy_len;
   1821 	bool			discard_multidesc = rxr->discard_multidesc;
   1822 	bool			wraparound = false;
   1823 	unsigned int		syncremain;
   1824 #ifdef RSS
   1825 	u16			pkt_info;
   1826 #endif
   1827 
   1828 	IXGBE_RX_LOCK(rxr);
   1829 
   1830 #ifdef DEV_NETMAP
   1831 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
   1832 		/* Same as the txeof routine: wakeup clients on intr. */
   1833 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1834 			IXGBE_RX_UNLOCK(rxr);
   1835 			return (FALSE);
   1836 		}
   1837 	}
   1838 #endif /* DEV_NETMAP */
   1839 
   1840 	/* Sync the ring. The size is rx_process_limit or the first half */
   1841 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
   1842 		/* Non-wraparound */
   1843 		numdesc = limit;
   1844 		syncremain = 0;
   1845 	} else {
   1846 		/* Wraparound. Sync the first half. */
   1847 		numdesc = rxr->num_desc - rxr->next_to_check;
   1848 
   1849 		/* Set the size of the last half */
   1850 		syncremain = limit - numdesc;
   1851 	}
   1852 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1853 	    rxr->rxdma.dma_map,
   1854 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
   1855 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1856 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1857 
   1858 	/*
   1859 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1860 	 * true, continue processing to not to send broken packet to the upper
   1861 	 * layer.
   1862 	 */
   1863 	for (i = rxr->next_to_check;
   1864 	     (loopcount < limit) || (discard_multidesc == true);) {
   1865 
   1866 		struct mbuf *sendmp, *mp;
   1867 		struct mbuf *newmp;
   1868 		u32         rsc, ptype;
   1869 		u16         len;
   1870 		u16         vtag = 0;
   1871 		bool        eop;
   1872 		bool        discard = false;
   1873 
   1874 		if (wraparound) {
   1875 			/* Sync the last half. */
   1876 			KASSERT(syncremain != 0);
   1877 			numdesc = syncremain;
   1878 			wraparound = false;
   1879 		} else if (__predict_false(loopcount >= limit)) {
   1880 			KASSERT(discard_multidesc == true);
   1881 			numdesc = 1;
   1882 		} else
   1883 			numdesc = 0;
   1884 
   1885 		if (numdesc != 0)
   1886 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1887 			    rxr->rxdma.dma_map, 0,
   1888 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1889 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1890 
   1891 		cur = &rxr->rx_base[i];
   1892 		staterr = le32toh(cur->wb.upper.status_error);
   1893 #ifdef RSS
   1894 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1895 #endif
   1896 
   1897 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1898 			break;
   1899 
   1900 		loopcount++;
   1901 		sendmp = newmp = NULL;
   1902 		nbuf = NULL;
   1903 		rsc = 0;
   1904 		cur->wb.upper.status_error = 0;
   1905 		rbuf = &rxr->rx_buffers[i];
   1906 		mp = rbuf->buf;
   1907 
   1908 		len = le16toh(cur->wb.upper.length);
   1909 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1910 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1911 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1912 
   1913 		/* Make sure bad packets are discarded */
   1914 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1915 #if __FreeBSD_version >= 1100036
   1916 			if (sc->feat_en & IXGBE_FEATURE_VF)
   1917 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1918 #endif
   1919 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
   1920 			ixgbe_rx_discard(rxr, i);
   1921 			discard_multidesc = false;
   1922 			goto next_desc;
   1923 		}
   1924 
   1925 		if (__predict_false(discard_multidesc))
   1926 			discard = true;
   1927 		else {
   1928 			/* Pre-alloc new mbuf. */
   1929 
   1930 			if ((rbuf->fmp == NULL) &&
   1931 			    eop && (len <= rx_copy_len)) {
   1932 				/* For short packet. See below. */
   1933 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1934 				if (__predict_false(sendmp == NULL)) {
   1935 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1936 					discard = true;
   1937 				}
   1938 			} else {
   1939 				/* For long packet. */
   1940 				newmp = ixgbe_getcl();
   1941 				if (__predict_false(newmp == NULL)) {
   1942 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1943 					discard = true;
   1944 				}
   1945 			}
   1946 		}
   1947 
   1948 		if (__predict_false(discard)) {
   1949 			/*
   1950 			 * Descriptor initialization is already done by the
   1951 			 * above code (cur->wb.upper.status_error = 0).
   1952 			 * So, we can reuse current rbuf->buf for new packet.
   1953 			 *
   1954 			 * Rewrite the buffer addr, see comment in
   1955 			 * ixgbe_rx_discard().
   1956 			 */
   1957 			cur->read.pkt_addr = rbuf->addr;
   1958 			m_freem(rbuf->fmp);
   1959 			rbuf->fmp = NULL;
   1960 			if (!eop) {
   1961 				/* Discard the entire packet. */
   1962 				discard_multidesc = true;
   1963 			} else
   1964 				discard_multidesc = false;
   1965 			goto next_desc;
   1966 		}
   1967 		discard_multidesc = false;
   1968 
   1969 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1970 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1971 
   1972 		/*
   1973 		 * On 82599 which supports a hardware
   1974 		 * LRO (called HW RSC), packets need
   1975 		 * not be fragmented across sequential
   1976 		 * descriptors, rather the next descriptor
   1977 		 * is indicated in bits of the descriptor.
   1978 		 * This also means that we might process
   1979 		 * more than one packet at a time, something
   1980 		 * that has never been true before, it
   1981 		 * required eliminating global chain pointers
   1982 		 * in favor of what we are doing here.  -jfv
   1983 		 */
   1984 		if (!eop) {
   1985 			/*
   1986 			 * Figure out the next descriptor
   1987 			 * of this frame.
   1988 			 */
   1989 			if (rxr->hw_rsc == TRUE) {
   1990 				rsc = ixgbe_rsc_count(cur);
   1991 				rxr->rsc_num += (rsc - 1);
   1992 			}
   1993 			if (rsc) { /* Get hardware index */
   1994 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1995 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1996 			} else { /* Just sequential */
   1997 				nextp = i + 1;
   1998 				if (nextp == sc->num_rx_desc)
   1999 					nextp = 0;
   2000 			}
   2001 			nbuf = &rxr->rx_buffers[nextp];
   2002 			prefetch(nbuf);
   2003 		}
   2004 		/*
   2005 		 * Rather than using the fmp/lmp global pointers
   2006 		 * we now keep the head of a packet chain in the
   2007 		 * buffer struct and pass this along from one
   2008 		 * descriptor to the next, until we get EOP.
   2009 		 */
   2010 		/*
   2011 		 * See if there is a stored head
   2012 		 * that determines what we are
   2013 		 */
   2014 		if (rbuf->fmp != NULL) {
   2015 			/* Secondary frag */
   2016 			sendmp = rbuf->fmp;
   2017 
   2018 			/* Update new (used in future) mbuf */
   2019 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   2020 			IXGBE_M_ADJ(sc, rxr, newmp);
   2021 			rbuf->buf = newmp;
   2022 			rbuf->fmp = NULL;
   2023 
   2024 			/* For secondary frag */
   2025 			mp->m_len = len;
   2026 			mp->m_flags &= ~M_PKTHDR;
   2027 
   2028 			/* For sendmp */
   2029 			sendmp->m_pkthdr.len += mp->m_len;
   2030 		} else {
   2031 			/*
   2032 			 * It's the first segment of a multi descriptor
   2033 			 * packet or a single segment which contains a full
   2034 			 * packet.
   2035 			 */
   2036 
   2037 			if (eop && (len <= rx_copy_len)) {
   2038 				/*
   2039 				 * Optimize.  This might be a small packet, may
   2040 				 * be just a TCP ACK. Copy into a new mbuf, and
   2041 				 * Leave the old mbuf+cluster for re-use.
   2042 				 */
   2043 				sendmp->m_data += ETHER_ALIGN;
   2044 				memcpy(mtod(sendmp, void *),
   2045 				    mtod(mp, void *), len);
   2046 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
   2047 				rbuf->flags |= IXGBE_RX_COPY;
   2048 			} else {
   2049 				/* For long packet */
   2050 
   2051 				/* Update new (used in future) mbuf */
   2052 				newmp->m_pkthdr.len = newmp->m_len
   2053 				    = rxr->mbuf_sz;
   2054 				IXGBE_M_ADJ(sc, rxr, newmp);
   2055 				rbuf->buf = newmp;
   2056 				rbuf->fmp = NULL;
   2057 
   2058 				/* For sendmp */
   2059 				sendmp = mp;
   2060 			}
   2061 
   2062 			/* first desc of a non-ps chain */
   2063 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2064 		}
   2065 		++processed;
   2066 
   2067 		/* Pass the head pointer on */
   2068 		if (eop == 0) {
   2069 			nbuf->fmp = sendmp;
   2070 			sendmp = NULL;
   2071 			mp->m_next = nbuf->buf;
   2072 		} else { /* Sending this frame */
   2073 			m_set_rcvif(sendmp, ifp);
   2074 			++rxr->packets;
   2075 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
   2076 			/* capture data for AIM */
   2077 			rxr->bytes += sendmp->m_pkthdr.len;
   2078 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
   2079 			/* Process vlan info */
   2080 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2081 				vtag = le16toh(cur->wb.upper.vlan);
   2082 			if (vtag) {
   2083 				vlan_set_tag(sendmp, vtag);
   2084 			}
   2085 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2086 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2087 				   &sc->stats.pf);
   2088 			}
   2089 
   2090 #if 0 /* FreeBSD */
   2091 			/*
   2092 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2093 			 * and never cleared. This means we have RSS hash
   2094 			 * available to be used.
   2095 			 */
   2096 			if (sc->num_queues > 1) {
   2097 				sendmp->m_pkthdr.flowid =
   2098 				    le32toh(cur->wb.lower.hi_dword.rss);
   2099 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2100 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2101 					M_HASHTYPE_SET(sendmp,
   2102 					    M_HASHTYPE_RSS_IPV4);
   2103 					break;
   2104 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2105 					M_HASHTYPE_SET(sendmp,
   2106 					    M_HASHTYPE_RSS_TCP_IPV4);
   2107 					break;
   2108 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2109 					M_HASHTYPE_SET(sendmp,
   2110 					    M_HASHTYPE_RSS_IPV6);
   2111 					break;
   2112 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2113 					M_HASHTYPE_SET(sendmp,
   2114 					    M_HASHTYPE_RSS_TCP_IPV6);
   2115 					break;
   2116 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2117 					M_HASHTYPE_SET(sendmp,
   2118 					    M_HASHTYPE_RSS_IPV6_EX);
   2119 					break;
   2120 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2121 					M_HASHTYPE_SET(sendmp,
   2122 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2123 					break;
   2124 #if __FreeBSD_version > 1100000
   2125 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2126 					M_HASHTYPE_SET(sendmp,
   2127 					    M_HASHTYPE_RSS_UDP_IPV4);
   2128 					break;
   2129 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2130 					M_HASHTYPE_SET(sendmp,
   2131 					    M_HASHTYPE_RSS_UDP_IPV6);
   2132 					break;
   2133 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2134 					M_HASHTYPE_SET(sendmp,
   2135 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2136 					break;
   2137 #endif
   2138 				default:
   2139 					M_HASHTYPE_SET(sendmp,
   2140 					    M_HASHTYPE_OPAQUE_HASH);
   2141 				}
   2142 			} else {
   2143 				sendmp->m_pkthdr.flowid = que->msix;
   2144 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2145 			}
   2146 #endif
   2147 		}
   2148 next_desc:
   2149 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2150 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2151 
   2152 		/* Advance our pointers to the next descriptor. */
   2153 		if (++i == rxr->num_desc) {
   2154 			wraparound = true;
   2155 			i = 0;
   2156 		}
   2157 		rxr->next_to_check = i;
   2158 
   2159 		/* Now send to the stack or do LRO */
   2160 		if (sendmp != NULL)
   2161 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2162 
   2163 		/* Every 8 descriptors we go to refresh mbufs */
   2164 		if (processed == 8) {
   2165 			ixgbe_refresh_mbufs(rxr, i);
   2166 			processed = 0;
   2167 		}
   2168 	}
   2169 
   2170 	/* Save the current status */
   2171 	rxr->discard_multidesc = discard_multidesc;
   2172 
   2173 	/* Refresh any remaining buf structs */
   2174 	if (ixgbe_rx_unrefreshed(rxr))
   2175 		ixgbe_refresh_mbufs(rxr, i);
   2176 
   2177 	IXGBE_RX_UNLOCK(rxr);
   2178 
   2179 #ifdef LRO
   2180 	/*
   2181 	 * Flush any outstanding LRO work
   2182 	 */
   2183 	tcp_lro_flush_all(lro);
   2184 #endif /* LRO */
   2185 
   2186 	/*
   2187 	 * Still have cleaning to do?
   2188 	 */
   2189 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2190 		return (TRUE);
   2191 
   2192 	return (FALSE);
   2193 } /* ixgbe_rxeof */
   2194 
   2195 
   2196 /************************************************************************
   2197  * ixgbe_rx_checksum
   2198  *
   2199  *   Verify that the hardware indicated that the checksum is valid.
   2200  *   Inform the stack about the status of checksum so that stack
   2201  *   doesn't spend time verifying the checksum.
   2202  ************************************************************************/
   2203 static void
   2204 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2205     struct ixgbe_hw_stats *stats)
   2206 {
   2207 	u16  status = (u16)staterr;
   2208 	u8   errors = (u8)(staterr >> 24);
   2209 #if 0
   2210 	bool sctp = false;
   2211 
   2212 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2213 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2214 		sctp = true;
   2215 #endif
   2216 
   2217 	/* IPv4 checksum */
   2218 	if (status & IXGBE_RXD_STAT_IPCS) {
   2219 		IXGBE_EVC_ADD(&stats->ipcs, 1);
   2220 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2221 			/* IP Checksum Good */
   2222 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2223 		} else {
   2224 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
   2225 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2226 		}
   2227 	}
   2228 	/* TCP/UDP/SCTP checksum */
   2229 	if (status & IXGBE_RXD_STAT_L4CS) {
   2230 		IXGBE_EVC_ADD(&stats->l4cs, 1);
   2231 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2232 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2233 			mp->m_pkthdr.csum_flags |= type;
   2234 		} else {
   2235 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
   2236 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2237 		}
   2238 	}
   2239 } /* ixgbe_rx_checksum */
   2240 
   2241 /************************************************************************
   2242  * ixgbe_dma_malloc
   2243  ************************************************************************/
   2244 int
   2245 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
   2246 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2247 {
   2248 	device_t dev = sc->dev;
   2249 	int      r, rsegs;
   2250 
   2251 	r = ixgbe_dma_tag_create(
   2252 	     /*      parent */ sc->osdep.dmat,
   2253 	     /*   alignment */ DBA_ALIGN,
   2254 	     /*      bounds */ 0,
   2255 	     /*     maxsize */ size,
   2256 	     /*   nsegments */ 1,
   2257 	     /*  maxsegsize */ size,
   2258 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2259 			       &dma->dma_tag);
   2260 	if (r != 0) {
   2261 		aprint_error_dev(dev,
   2262 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2263 		    r);
   2264 		goto fail_0;
   2265 	}
   2266 
   2267 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2268 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2269 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2270 	if (r != 0) {
   2271 		aprint_error_dev(dev,
   2272 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2273 		goto fail_1;
   2274 	}
   2275 
   2276 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2277 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2278 	if (r != 0) {
   2279 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2280 		    __func__, r);
   2281 		goto fail_2;
   2282 	}
   2283 
   2284 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2285 	if (r != 0) {
   2286 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2287 		    __func__, r);
   2288 		goto fail_3;
   2289 	}
   2290 
   2291 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2292 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2293 	if (r != 0) {
   2294 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2295 		    __func__, r);
   2296 		goto fail_4;
   2297 	}
   2298 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2299 	dma->dma_size = size;
   2300 	return 0;
   2301 fail_4:
   2302 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2303 fail_3:
   2304 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2305 fail_2:
   2306 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2307 fail_1:
   2308 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2309 fail_0:
   2310 
   2311 	return (r);
   2312 } /* ixgbe_dma_malloc */
   2313 
   2314 /************************************************************************
   2315  * ixgbe_dma_free
   2316  ************************************************************************/
   2317 void
   2318 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
   2319 {
   2320 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2321 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2322 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2323 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
   2324 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2325 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2326 } /* ixgbe_dma_free */
   2327 
   2328 
   2329 /************************************************************************
   2330  * ixgbe_allocate_queues
   2331  *
   2332  *   Allocate memory for the transmit and receive rings, and then
   2333  *   the descriptors associated with each, called only once at attach.
   2334  ************************************************************************/
   2335 int
   2336 ixgbe_allocate_queues(struct ixgbe_softc *sc)
   2337 {
   2338 	device_t	dev = sc->dev;
   2339 	struct ix_queue	*que;
   2340 	struct tx_ring	*txr;
   2341 	struct rx_ring	*rxr;
   2342 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2343 	int             txconf = 0, rxconf = 0;
   2344 
   2345 	/* First, allocate the top level queue structs */
   2346 	sc->queues = kmem_zalloc(sizeof(struct ix_queue) * sc->num_queues,
   2347 	    KM_SLEEP);
   2348 
   2349 	/* Second, allocate the TX ring struct memory */
   2350 	sc->tx_rings = kmem_zalloc(sizeof(struct tx_ring) * sc->num_queues,
   2351 	    KM_SLEEP);
   2352 
   2353 	/* Third, allocate the RX ring */
   2354 	sc->rx_rings = kmem_zalloc(sizeof(struct rx_ring) * sc->num_queues,
   2355 	    KM_SLEEP);
   2356 
   2357 	/* For the ring itself */
   2358 	tsize = sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc);
   2359 	KASSERT((tsize % DBA_ALIGN) == 0);
   2360 
   2361 	/*
   2362 	 * Now set up the TX queues, txconf is needed to handle the
   2363 	 * possibility that things fail midcourse and we need to
   2364 	 * undo memory gracefully
   2365 	 */
   2366 	for (int i = 0; i < sc->num_queues; i++, txconf++) {
   2367 		/* Set up some basics */
   2368 		txr = &sc->tx_rings[i];
   2369 		txr->sc = sc;
   2370 		txr->txr_interq = NULL;
   2371 		/* In case SR-IOV is enabled, align the index properly */
   2372 #ifdef PCI_IOV
   2373 		txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2374 		    i);
   2375 #else
   2376 		txr->me = i;
   2377 #endif
   2378 		txr->num_desc = sc->num_tx_desc;
   2379 
   2380 		/* Initialize the TX side lock */
   2381 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2382 
   2383 		if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
   2384 		    BUS_DMA_NOWAIT)) {
   2385 			aprint_error_dev(dev,
   2386 			    "Unable to allocate TX Descriptor memory\n");
   2387 			error = ENOMEM;
   2388 			goto err_tx_desc;
   2389 		}
   2390 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2391 		bzero((void *)txr->tx_base, tsize);
   2392 
   2393 		/* Now allocate transmit buffers for the ring */
   2394 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2395 			aprint_error_dev(dev,
   2396 			    "Critical Failure setting up transmit buffers\n");
   2397 			error = ENOMEM;
   2398 			goto err_tx_desc;
   2399 		}
   2400 		if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2401 			/* Allocate a buf ring */
   2402 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2403 			if (txr->txr_interq == NULL) {
   2404 				aprint_error_dev(dev,
   2405 				    "Critical Failure setting up buf ring\n");
   2406 				error = ENOMEM;
   2407 				goto err_tx_desc;
   2408 			}
   2409 		}
   2410 	}
   2411 
   2412 	/*
   2413 	 * Next the RX queues...
   2414 	 */
   2415 	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
   2416 	KASSERT((rsize % DBA_ALIGN) == 0);
   2417 	for (int i = 0; i < sc->num_queues; i++, rxconf++) {
   2418 		rxr = &sc->rx_rings[i];
   2419 		/* Set up some basics */
   2420 		rxr->sc = sc;
   2421 #ifdef PCI_IOV
   2422 		/* In case SR-IOV is enabled, align the index properly */
   2423 		rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2424 		    i);
   2425 #else
   2426 		rxr->me = i;
   2427 #endif
   2428 		rxr->num_desc = sc->num_rx_desc;
   2429 
   2430 		/* Initialize the RX side lock */
   2431 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2432 
   2433 		if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
   2434 		    BUS_DMA_NOWAIT)) {
   2435 			aprint_error_dev(dev,
   2436 			    "Unable to allocate RxDescriptor memory\n");
   2437 			error = ENOMEM;
   2438 			goto err_rx_desc;
   2439 		}
   2440 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2441 		bzero((void *)rxr->rx_base, rsize);
   2442 
   2443 		/* Allocate receive buffers for the ring */
   2444 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2445 			aprint_error_dev(dev,
   2446 			    "Critical Failure setting up receive buffers\n");
   2447 			error = ENOMEM;
   2448 			goto err_rx_desc;
   2449 		}
   2450 	}
   2451 
   2452 	/*
   2453 	 * Finally set up the queue holding structs
   2454 	 */
   2455 	for (int i = 0; i < sc->num_queues; i++) {
   2456 		que = &sc->queues[i];
   2457 		que->sc = sc;
   2458 		que->me = i;
   2459 		que->txr = &sc->tx_rings[i];
   2460 		que->rxr = &sc->rx_rings[i];
   2461 
   2462 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2463 		que->disabled_count = 0;
   2464 	}
   2465 
   2466 	return (0);
   2467 
   2468 err_rx_desc:
   2469 	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
   2470 		ixgbe_dma_free(sc, &rxr->rxdma);
   2471 err_tx_desc:
   2472 	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
   2473 		ixgbe_dma_free(sc, &txr->txdma);
   2474 	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
   2475 	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
   2476 	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
   2477 	return (error);
   2478 } /* ixgbe_allocate_queues */
   2479 
   2480 /************************************************************************
   2481  * ixgbe_free_queues
   2482  *
   2483  *   Free descriptors for the transmit and receive rings, and then
   2484  *   the memory associated with each.
   2485  ************************************************************************/
   2486 void
   2487 ixgbe_free_queues(struct ixgbe_softc *sc)
   2488 {
   2489 	struct ix_queue *que;
   2490 	int i;
   2491 
   2492 	ixgbe_free_transmit_structures(sc);
   2493 	ixgbe_free_receive_structures(sc);
   2494 	for (i = 0; i < sc->num_queues; i++) {
   2495 		que = &sc->queues[i];
   2496 		mutex_destroy(&que->dc_mtx);
   2497 	}
   2498 	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
   2499 } /* ixgbe_free_queues */
   2500