Home | History | Annotate | Line # | Download | only in ixgbe
      1 /* $NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 #ifdef RSC
     75 /*
     76  * HW RSC control:
     77  *  this feature only works with
     78  *  IPv4, and only on 82599 and later.
     79  *  Also this will cause IP forwarding to
     80  *  fail and that can't be controlled by
     81  *  the stack as LRO can. For all these
     82  *  reasons I've deemed it best to leave
     83  *  this off and not bother with a tuneable
     84  *  interface, this would need to be compiled
     85  *  to enable.
     86  */
     87 static bool ixgbe_rsc_enable = FALSE;
     88 #endif
     89 
     90 #ifdef IXGBE_FDIR
     91 /*
     92  * For Flow Director: this is the
     93  * number of TX packets we sample
     94  * for the filter pool, this means
     95  * every 20th packet will be probed.
     96  *
     97  * This feature can be disabled by
     98  * setting this to 0.
     99  */
    100 static int atr_sample_rate = 20;
    101 #endif
    102 
    103 #define IXGBE_M_ADJ(sc, rxr, mp)					\
    104 	if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    105 		m_adj(mp, ETHER_ALIGN)
    106 
    107 /************************************************************************
    108  *  Local Function prototypes
    109  ************************************************************************/
    110 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    111 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    112 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    113 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    114 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    115                                        struct ixgbe_hw_stats *);
    116 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    117 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    118 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    119 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    120                                         struct mbuf *, u32 *, u32 *);
    121 static int           ixgbe_tso_setup(struct tx_ring *,
    122                                      struct mbuf *, u32 *, u32 *);
    123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    125                                     struct mbuf *, u32);
    126 static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
    127                                       struct ixgbe_dma_alloc *, int);
    128 static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
    129 #ifdef RSC
    130 static void	     ixgbe_setup_hw_rsc(struct rx_ring *);
    131 #endif
    132 
    133 /************************************************************************
    134  * ixgbe_legacy_start_locked - Transmit entry point
    135  *
    136  *   Called by the stack to initiate a transmit.
    137  *   The driver will remain in this routine as long as there are
    138  *   packets to transmit and transmit resources are available.
    139  *   In case resources are not available, the stack is notified
    140  *   and the packet is requeued.
    141  ************************************************************************/
    142 int
    143 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    144 {
    145 	int rc;
    146 	struct mbuf    *m_head;
    147 	struct ixgbe_softc *sc = txr->sc;
    148 
    149 	IXGBE_TX_LOCK_ASSERT(txr);
    150 
    151 	if (sc->link_active != LINK_STATE_UP) {
    152 		/*
    153 		 * discard all packets buffered in IFQ to avoid
    154 		 * sending old packets at next link up timing.
    155 		 */
    156 		ixgbe_drain(ifp, txr);
    157 		return (ENETDOWN);
    158 	}
    159 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    160 		return (ENETDOWN);
    161 	if (txr->txr_no_space)
    162 		return (ENETDOWN);
    163 
    164 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    165 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    166 			break;
    167 
    168 		IFQ_POLL(&ifp->if_snd, m_head);
    169 		if (m_head == NULL)
    170 			break;
    171 
    172 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    173 			break;
    174 		}
    175 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    176 		if (rc != 0) {
    177 			m_freem(m_head);
    178 			continue;
    179 		}
    180 
    181 		/* Send a copy of the frame to the BPF listener */
    182 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    183 	}
    184 
    185 	return IXGBE_SUCCESS;
    186 } /* ixgbe_legacy_start_locked */
    187 
    188 /************************************************************************
    189  * ixgbe_legacy_start
    190  *
    191  *   Called by the stack, this always uses the first tx ring,
    192  *   and should not be used with multiqueue tx enabled.
    193  ************************************************************************/
    194 void
    195 ixgbe_legacy_start(struct ifnet *ifp)
    196 {
    197 	struct ixgbe_softc *sc = ifp->if_softc;
    198 	struct tx_ring *txr = sc->tx_rings;
    199 
    200 	if (ifp->if_flags & IFF_RUNNING) {
    201 		IXGBE_TX_LOCK(txr);
    202 		ixgbe_legacy_start_locked(ifp, txr);
    203 		IXGBE_TX_UNLOCK(txr);
    204 	}
    205 } /* ixgbe_legacy_start */
    206 
    207 /************************************************************************
    208  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    209  *
    210  *   (if_transmit function)
    211  ************************************************************************/
    212 int
    213 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    214 {
    215 	struct ixgbe_softc *sc = ifp->if_softc;
    216 	struct tx_ring	*txr;
    217 	int		i;
    218 #ifdef RSS
    219 	uint32_t bucket_id;
    220 #endif
    221 
    222 	/*
    223 	 * When doing RSS, map it to the same outbound queue
    224 	 * as the incoming flow would be mapped to.
    225 	 *
    226 	 * If everything is setup correctly, it should be the
    227 	 * same bucket that the current CPU we're on is.
    228 	 */
    229 #ifdef RSS
    230 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    231 		if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
    232 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    233 		    &bucket_id) == 0)) {
    234 			i = bucket_id % sc->num_queues;
    235 #ifdef IXGBE_DEBUG
    236 			if (bucket_id > sc->num_queues)
    237 				if_printf(ifp,
    238 				    "bucket_id (%d) > num_queues (%d)\n",
    239 				    bucket_id, sc->num_queues);
    240 #endif
    241 		} else
    242 			i = m->m_pkthdr.flowid % sc->num_queues;
    243 	} else
    244 #endif /* 0 */
    245 		i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
    246 
    247 	/* Check for a hung queue and pick alternative */
    248 	if (((1ULL << i) & sc->active_queues) == 0)
    249 		i = ffs64(sc->active_queues);
    250 
    251 	txr = &sc->tx_rings[i];
    252 
    253 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    254 		m_freem(m);
    255 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    256 		return ENOBUFS;
    257 	}
    258 #ifdef IXGBE_ALWAYS_TXDEFER
    259 	kpreempt_disable();
    260 	softint_schedule(txr->txr_si);
    261 	kpreempt_enable();
    262 #else
    263 	if (IXGBE_TX_TRYLOCK(txr)) {
    264 		ixgbe_mq_start_locked(ifp, txr);
    265 		IXGBE_TX_UNLOCK(txr);
    266 	} else {
    267 		if (sc->txrx_use_workqueue) {
    268 			u_int *enqueued;
    269 
    270 			/*
    271 			 * This function itself is not called in interrupt
    272 			 * context, however it can be called in fast softint
    273 			 * context right after receiving forwarding packets.
    274 			 * So, it is required to protect workqueue from twice
    275 			 * enqueuing when the machine uses both spontaneous
    276 			 * packets and forwarding packets.
    277 			 */
    278 			enqueued = percpu_getref(sc->txr_wq_enqueued);
    279 			if (*enqueued == 0) {
    280 				*enqueued = 1;
    281 				percpu_putref(sc->txr_wq_enqueued);
    282 				workqueue_enqueue(sc->txr_wq,
    283 				    &txr->wq_cookie, curcpu());
    284 			} else
    285 				percpu_putref(sc->txr_wq_enqueued);
    286 		} else {
    287 			kpreempt_disable();
    288 			softint_schedule(txr->txr_si);
    289 			kpreempt_enable();
    290 		}
    291 	}
    292 #endif
    293 
    294 	return (0);
    295 } /* ixgbe_mq_start */
    296 
    297 /************************************************************************
    298  * ixgbe_mq_start_locked
    299  ************************************************************************/
    300 int
    301 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    302 {
    303 	struct mbuf    *next;
    304 	int            enqueued = 0, err = 0;
    305 
    306 	if (txr->sc->link_active != LINK_STATE_UP) {
    307 		/*
    308 		 * discard all packets buffered in txr_interq to avoid
    309 		 * sending old packets at next link up timing.
    310 		 */
    311 		ixgbe_drain(ifp, txr);
    312 		return (ENETDOWN);
    313 	}
    314 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    315 		return (ENETDOWN);
    316 	if (txr->txr_no_space)
    317 		return (ENETDOWN);
    318 
    319 	/* Process the queue */
    320 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    321 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    322 			m_freem(next);
    323 			/* All errors are counted in ixgbe_xmit() */
    324 			break;
    325 		}
    326 		enqueued++;
    327 #if __FreeBSD_version >= 1100036
    328 		/*
    329 		 * Since we're looking at the tx ring, we can check
    330 		 * to see if we're a VF by examining our tail register
    331 		 * address.
    332 		 */
    333 		if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
    334 		    (next->m_flags & M_MCAST))
    335 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    336 #endif
    337 		/* Send a copy of the frame to the BPF listener */
    338 		bpf_mtap(ifp, next, BPF_D_OUT);
    339 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    340 			break;
    341 	}
    342 
    343 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
    344 		ixgbe_txeof(txr);
    345 
    346 	return (err);
    347 } /* ixgbe_mq_start_locked */
    348 
    349 /************************************************************************
    350  * ixgbe_deferred_mq_start
    351  *
    352  *   Called from a softint and workqueue (indirectly) to drain queued
    353  *   transmit packets.
    354  ************************************************************************/
    355 void
    356 ixgbe_deferred_mq_start(void *arg)
    357 {
    358 	struct tx_ring *txr = arg;
    359 	struct ixgbe_softc *sc = txr->sc;
    360 	struct ifnet   *ifp = sc->ifp;
    361 
    362 	IXGBE_TX_LOCK(txr);
    363 	if (pcq_peek(txr->txr_interq) != NULL)
    364 		ixgbe_mq_start_locked(ifp, txr);
    365 	IXGBE_TX_UNLOCK(txr);
    366 } /* ixgbe_deferred_mq_start */
    367 
    368 /************************************************************************
    369  * ixgbe_deferred_mq_start_work
    370  *
    371  *   Called from a workqueue to drain queued transmit packets.
    372  ************************************************************************/
    373 void
    374 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    375 {
    376 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    377 	struct ixgbe_softc *sc = txr->sc;
    378 	u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
    379 	*enqueued = 0;
    380 	percpu_putref(sc->txr_wq_enqueued);
    381 
    382 	ixgbe_deferred_mq_start(txr);
    383 } /* ixgbe_deferred_mq_start */
    384 
    385 /************************************************************************
    386  * ixgbe_drain_all
    387  ************************************************************************/
    388 void
    389 ixgbe_drain_all(struct ixgbe_softc *sc)
    390 {
    391 	struct ifnet *ifp = sc->ifp;
    392 	struct ix_queue *que = sc->queues;
    393 
    394 	for (int i = 0; i < sc->num_queues; i++, que++) {
    395 		struct tx_ring  *txr = que->txr;
    396 
    397 		IXGBE_TX_LOCK(txr);
    398 		ixgbe_drain(ifp, txr);
    399 		IXGBE_TX_UNLOCK(txr);
    400 	}
    401 }
    402 
    403 /************************************************************************
    404  * ixgbe_xmit
    405  *
    406  *   Maps the mbufs to tx descriptors, allowing the
    407  *   TX engine to transmit the packets.
    408  *
    409  *   Return 0 on success, positive on failure
    410  ************************************************************************/
    411 static int
    412 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    413 {
    414 	struct ixgbe_softc      *sc = txr->sc;
    415 	struct ixgbe_tx_buf     *txbuf;
    416 	union ixgbe_adv_tx_desc *txd = NULL;
    417 	struct ifnet	        *ifp = sc->ifp;
    418 	int                     i, j, error;
    419 	int                     first;
    420 	u32                     olinfo_status = 0, cmd_type_len;
    421 	bool                    remap = TRUE;
    422 	bus_dmamap_t            map;
    423 
    424 	/* Basic descriptor defines */
    425 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    426 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    427 
    428 	if (vlan_has_tag(m_head))
    429 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    430 
    431 	/*
    432 	 * Important to capture the first descriptor
    433 	 * used because it will contain the index of
    434 	 * the one we tell the hardware to report back
    435 	 */
    436 	first = txr->next_avail_desc;
    437 	txbuf = &txr->tx_buffers[first];
    438 	map = txbuf->map;
    439 
    440 	/*
    441 	 * Map the packet for DMA.
    442 	 */
    443 retry:
    444 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    445 	    BUS_DMA_NOWAIT);
    446 
    447 	if (__predict_false(error)) {
    448 		struct mbuf *m;
    449 
    450 		switch (error) {
    451 		case EAGAIN:
    452 			txr->q_eagain_tx_dma_setup++;
    453 			return EAGAIN;
    454 		case ENOMEM:
    455 			txr->q_enomem_tx_dma_setup++;
    456 			return EAGAIN;
    457 		case EFBIG:
    458 			/* Try it again? - one try */
    459 			if (remap == TRUE) {
    460 				remap = FALSE;
    461 				/*
    462 				 * XXX: m_defrag will choke on
    463 				 * non-MCLBYTES-sized clusters
    464 				 */
    465 				txr->q_efbig_tx_dma_setup++;
    466 				m = m_defrag(m_head, M_NOWAIT);
    467 				if (m == NULL) {
    468 					txr->q_mbuf_defrag_failed++;
    469 					return ENOBUFS;
    470 				}
    471 				m_head = m;
    472 				goto retry;
    473 			} else {
    474 				txr->q_efbig2_tx_dma_setup++;
    475 				return error;
    476 			}
    477 		case EINVAL:
    478 			txr->q_einval_tx_dma_setup++;
    479 			return error;
    480 		default:
    481 			txr->q_other_tx_dma_setup++;
    482 			return error;
    483 		}
    484 	}
    485 
    486 	/* Make certain there are enough descriptors */
    487 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    488 		txr->txr_no_space = true;
    489 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
    490 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    491 		return EAGAIN;
    492 	}
    493 
    494 	/*
    495 	 * Set up the appropriate offload context if requested,
    496 	 * this may consume one TX descriptor.
    497 	 */
    498 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    499 	if (__predict_false(error)) {
    500 		return (error);
    501 	}
    502 
    503 #ifdef IXGBE_FDIR
    504 	/* Do the flow director magic */
    505 	if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
    506 	    (txr->atr_sample) && (!sc->fdir_reinit)) {
    507 		++txr->atr_count;
    508 		if (txr->atr_count >= atr_sample_rate) {
    509 			ixgbe_atr(txr, m_head);
    510 			txr->atr_count = 0;
    511 		}
    512 	}
    513 #endif
    514 
    515 	olinfo_status |= IXGBE_ADVTXD_CC;
    516 	i = txr->next_avail_desc;
    517 	for (j = 0; j < map->dm_nsegs; j++) {
    518 		bus_size_t seglen;
    519 		uint64_t segaddr;
    520 
    521 		txbuf = &txr->tx_buffers[i];
    522 		txd = &txr->tx_base[i];
    523 		seglen = map->dm_segs[j].ds_len;
    524 		segaddr = htole64(map->dm_segs[j].ds_addr);
    525 
    526 		txd->read.buffer_addr = segaddr;
    527 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    528 		txd->read.olinfo_status = htole32(olinfo_status);
    529 
    530 		if (++i == txr->num_desc)
    531 			i = 0;
    532 	}
    533 
    534 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    535 	txr->tx_avail -= map->dm_nsegs;
    536 	txr->next_avail_desc = i;
    537 
    538 	txbuf->m_head = m_head;
    539 	/*
    540 	 * Here we swap the map so the last descriptor,
    541 	 * which gets the completion interrupt has the
    542 	 * real map, and the first descriptor gets the
    543 	 * unused map from this descriptor.
    544 	 */
    545 	txr->tx_buffers[first].map = txbuf->map;
    546 	txbuf->map = map;
    547 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    548 	    BUS_DMASYNC_PREWRITE);
    549 
    550 	/* Set the EOP descriptor that will be marked done */
    551 	txbuf = &txr->tx_buffers[first];
    552 	txbuf->eop = txd;
    553 
    554 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    555 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    556 	/*
    557 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    558 	 * hardware that this frame is available to transmit.
    559 	 */
    560 	IXGBE_EVC_ADD(&txr->total_packets, 1);
    561 	IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
    562 
    563 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    564 	if_statadd_ref(ifp, nsr, if_obytes, m_head->m_pkthdr.len);
    565 	if (m_head->m_flags & M_MCAST)
    566 		if_statinc_ref(ifp, nsr, if_omcasts);
    567 	IF_STAT_PUTREF(ifp);
    568 
    569 	/* Mark queue as having work */
    570 	if (txr->busy == 0)
    571 		txr->busy = 1;
    572 
    573 	return (0);
    574 } /* ixgbe_xmit */
    575 
    576 /************************************************************************
    577  * ixgbe_drain
    578  ************************************************************************/
    579 static void
    580 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    581 {
    582 	struct mbuf *m;
    583 
    584 	IXGBE_TX_LOCK_ASSERT(txr);
    585 
    586 	if (txr->me == 0) {
    587 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    588 			IFQ_DEQUEUE(&ifp->if_snd, m);
    589 			m_freem(m);
    590 			IF_DROP(&ifp->if_snd);
    591 		}
    592 	}
    593 
    594 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    595 		m_freem(m);
    596 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    597 	}
    598 }
    599 
    600 /************************************************************************
    601  * ixgbe_allocate_transmit_buffers
    602  *
    603  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    604  *   the information needed to transmit a packet on the wire. This is
    605  *   called only once at attach, setup is done every reset.
    606  ************************************************************************/
    607 static int
    608 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    609 {
    610 	struct ixgbe_softc  *sc = txr->sc;
    611 	device_t            dev = sc->dev;
    612 	struct ixgbe_tx_buf *txbuf;
    613 	int                 error, i;
    614 
    615 	/*
    616 	 * Setup DMA descriptor areas.
    617 	 */
    618 	error = ixgbe_dma_tag_create(
    619 	         /*      parent */ sc->osdep.dmat,
    620 	         /*   alignment */ 1,
    621 	         /*      bounds */ 0,
    622 	         /*     maxsize */ IXGBE_TSO_SIZE,
    623 	         /*   nsegments */ sc->num_segs,
    624 	         /*  maxsegsize */ PAGE_SIZE,
    625 	         /*       flags */ 0,
    626 	                           &txr->txtag);
    627 	if (error != 0) {
    628 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    629 		goto fail;
    630 	}
    631 
    632 	txr->tx_buffers = kmem_zalloc(sizeof(struct ixgbe_tx_buf) *
    633 	    sc->num_tx_desc, KM_SLEEP);
    634 
    635 	/* Create the descriptor buffer dma maps */
    636 	txbuf = txr->tx_buffers;
    637 	for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
    638 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    639 		if (error != 0) {
    640 			aprint_error_dev(dev,
    641 			    "Unable to create TX DMA map (%d)\n", error);
    642 			goto fail;
    643 		}
    644 	}
    645 
    646 	return 0;
    647 fail:
    648 	/* We free all, it handles case where we are in the middle */
    649 #if 0 /* XXX was FreeBSD */
    650 	ixgbe_free_transmit_structures(sc);
    651 #else
    652 	ixgbe_free_transmit_buffers(txr);
    653 #endif
    654 	return (error);
    655 } /* ixgbe_allocate_transmit_buffers */
    656 
    657 /************************************************************************
    658  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    659  ************************************************************************/
    660 static void
    661 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    662 {
    663 	struct ixgbe_softc    *sc = txr->sc;
    664 	struct ixgbe_tx_buf   *txbuf;
    665 #ifdef DEV_NETMAP
    666 	struct netmap_sc      *na = NA(sc->ifp);
    667 	struct netmap_slot    *slot;
    668 #endif /* DEV_NETMAP */
    669 
    670 	/* Clear the old ring contents */
    671 	IXGBE_TX_LOCK(txr);
    672 
    673 #ifdef DEV_NETMAP
    674 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
    675 		/*
    676 		 * (under lock): if in netmap mode, do some consistency
    677 		 * checks and set slot to entry 0 of the netmap ring.
    678 		 */
    679 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    680 	}
    681 #endif /* DEV_NETMAP */
    682 
    683 	bzero((void *)txr->tx_base,
    684 	    (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
    685 	/* Reset indices */
    686 	txr->next_avail_desc = 0;
    687 	txr->next_to_clean = 0;
    688 
    689 	/* Free any existing tx buffers. */
    690 	txbuf = txr->tx_buffers;
    691 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    692 		if (txbuf->m_head != NULL) {
    693 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    694 			    0, txbuf->m_head->m_pkthdr.len,
    695 			    BUS_DMASYNC_POSTWRITE);
    696 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    697 			m_freem(txbuf->m_head);
    698 			txbuf->m_head = NULL;
    699 		}
    700 
    701 #ifdef DEV_NETMAP
    702 		/*
    703 		 * In netmap mode, set the map for the packet buffer.
    704 		 * NOTE: Some drivers (not this one) also need to set
    705 		 * the physical buffer address in the NIC ring.
    706 		 * Slots in the netmap ring (indexed by "si") are
    707 		 * kring->nkr_hwofs positions "ahead" wrt the
    708 		 * corresponding slot in the NIC ring. In some drivers
    709 		 * (not here) nkr_hwofs can be negative. Function
    710 		 * netmap_idx_n2k() handles wraparounds properly.
    711 		 */
    712 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    713 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    714 			netmap_load_map(na, txr->txtag,
    715 			    txbuf->map, NMB(na, slot + si));
    716 		}
    717 #endif /* DEV_NETMAP */
    718 
    719 		/* Clear the EOP descriptor pointer */
    720 		txbuf->eop = NULL;
    721 	}
    722 
    723 #ifdef IXGBE_FDIR
    724 	/* Set the rate at which we sample packets */
    725 	if (sc->feat_en & IXGBE_FEATURE_FDIR)
    726 		txr->atr_sample = atr_sample_rate;
    727 #endif
    728 
    729 	/* Set number of descriptors available */
    730 	txr->tx_avail = sc->num_tx_desc;
    731 
    732 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    733 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    734 	IXGBE_TX_UNLOCK(txr);
    735 } /* ixgbe_setup_transmit_ring */
    736 
    737 /************************************************************************
    738  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    739  ************************************************************************/
    740 int
    741 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
    742 {
    743 	struct tx_ring *txr = sc->tx_rings;
    744 
    745 	for (int i = 0; i < sc->num_queues; i++, txr++)
    746 		ixgbe_setup_transmit_ring(txr);
    747 
    748 	return (0);
    749 } /* ixgbe_setup_transmit_structures */
    750 
    751 /************************************************************************
    752  * ixgbe_free_transmit_structures - Free all transmit rings.
    753  ************************************************************************/
    754 void
    755 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
    756 {
    757 	struct tx_ring *txr = sc->tx_rings;
    758 
    759 	for (int i = 0; i < sc->num_queues; i++, txr++) {
    760 		ixgbe_free_transmit_buffers(txr);
    761 		ixgbe_dma_free(sc, &txr->txdma);
    762 		IXGBE_TX_LOCK_DESTROY(txr);
    763 	}
    764 	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
    765 } /* ixgbe_free_transmit_structures */
    766 
    767 /************************************************************************
    768  * ixgbe_free_transmit_buffers
    769  *
    770  *   Free transmit ring related data structures.
    771  ************************************************************************/
    772 static void
    773 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    774 {
    775 	struct ixgbe_softc  *sc = txr->sc;
    776 	struct ixgbe_tx_buf *tx_buffer;
    777 	int                 i;
    778 
    779 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    780 
    781 	if (txr->tx_buffers == NULL)
    782 		return;
    783 
    784 	tx_buffer = txr->tx_buffers;
    785 	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
    786 		if (tx_buffer->m_head != NULL) {
    787 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    788 			    0, tx_buffer->m_head->m_pkthdr.len,
    789 			    BUS_DMASYNC_POSTWRITE);
    790 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    791 			m_freem(tx_buffer->m_head);
    792 			tx_buffer->m_head = NULL;
    793 			if (tx_buffer->map != NULL) {
    794 				ixgbe_dmamap_destroy(txr->txtag,
    795 				    tx_buffer->map);
    796 				tx_buffer->map = NULL;
    797 			}
    798 		} else if (tx_buffer->map != NULL) {
    799 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    800 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    801 			tx_buffer->map = NULL;
    802 		}
    803 	}
    804 	if (txr->txr_interq != NULL) {
    805 		struct mbuf *m;
    806 
    807 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    808 			m_freem(m);
    809 		pcq_destroy(txr->txr_interq);
    810 	}
    811 	if (txr->tx_buffers != NULL) {
    812 		kmem_free(txr->tx_buffers,
    813 		    sizeof(struct ixgbe_tx_buf) * sc->num_tx_desc);
    814 		txr->tx_buffers = NULL;
    815 	}
    816 	if (txr->txtag != NULL) {
    817 		ixgbe_dma_tag_destroy(txr->txtag);
    818 		txr->txtag = NULL;
    819 	}
    820 } /* ixgbe_free_transmit_buffers */
    821 
    822 /************************************************************************
    823  * ixgbe_tx_ctx_setup
    824  *
    825  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    826  ************************************************************************/
    827 static int
    828 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    829     u32 *cmd_type_len, u32 *olinfo_status)
    830 {
    831 	struct ixgbe_softc               *sc = txr->sc;
    832 	struct ixgbe_adv_tx_context_desc *TXD;
    833 	struct ether_vlan_header         *eh;
    834 #ifdef INET
    835 	struct ip                        *ip;
    836 #endif
    837 #ifdef INET6
    838 	struct ip6_hdr                   *ip6;
    839 #endif
    840 	int                              ehdrlen, ip_hlen = 0;
    841 	int                              offload = TRUE;
    842 	int                              ctxd = txr->next_avail_desc;
    843 	u32                              vlan_macip_lens = 0;
    844 	u32                              type_tucmd_mlhl = 0;
    845 	u16                              vtag = 0;
    846 	u16                              etype;
    847 	u8                               ipproto = 0;
    848 	char                             *l3d;
    849 
    850 	/* First check if TSO is to be used */
    851 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    852 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    853 
    854 		if (rv != 0)
    855 			IXGBE_EVC_ADD(&sc->tso_err, 1);
    856 		return rv;
    857 	}
    858 
    859 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    860 		offload = FALSE;
    861 
    862 	/* Indicate the whole packet as payload when not doing TSO */
    863 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    864 
    865 	/*
    866 	 * In advanced descriptors the vlan tag must
    867 	 * be placed into the context descriptor. Hence
    868 	 * we need to make one even if not doing offloads.
    869 	 */
    870 	if (vlan_has_tag(mp)) {
    871 		vtag = htole16(vlan_get_tag(mp));
    872 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    873 	} else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    874 	           (offload == FALSE))
    875 		return (0);
    876 
    877 	/*
    878 	 * Determine where frame payload starts.
    879 	 * Jump over vlan headers if already present,
    880 	 * helpful for QinQ too.
    881 	 */
    882 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    883 	eh = mtod(mp, struct ether_vlan_header *);
    884 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    885 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    886 		etype = ntohs(eh->evl_proto);
    887 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    888 	} else {
    889 		etype = ntohs(eh->evl_encap_proto);
    890 		ehdrlen = ETHER_HDR_LEN;
    891 	}
    892 
    893 	/* Set the ether header length */
    894 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    895 
    896 	if (offload == FALSE)
    897 		goto no_offloads;
    898 
    899 	/*
    900 	 * If the first mbuf only includes the ethernet header,
    901 	 * jump to the next one
    902 	 * XXX: This assumes the stack splits mbufs containing headers
    903 	 *      on header boundaries
    904 	 * XXX: And assumes the entire IP header is contained in one mbuf
    905 	 */
    906 	if (mp->m_len == ehdrlen && mp->m_next)
    907 		l3d = mtod(mp->m_next, char *);
    908 	else
    909 		l3d = mtod(mp, char *) + ehdrlen;
    910 
    911 	switch (etype) {
    912 #ifdef INET
    913 	case ETHERTYPE_IP:
    914 		ip = (struct ip *)(l3d);
    915 		ip_hlen = ip->ip_hl << 2;
    916 		ipproto = ip->ip_p;
    917 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    918 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    919 		    ip->ip_sum == 0);
    920 		break;
    921 #endif
    922 #ifdef INET6
    923 	case ETHERTYPE_IPV6:
    924 		ip6 = (struct ip6_hdr *)(l3d);
    925 		ip_hlen = sizeof(struct ip6_hdr);
    926 		ipproto = ip6->ip6_nxt;
    927 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    928 		break;
    929 #endif
    930 	default:
    931 		offload = false;
    932 		break;
    933 	}
    934 
    935 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    936 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    937 
    938 	vlan_macip_lens |= ip_hlen;
    939 
    940 	/* No support for offloads for non-L4 next headers */
    941 	switch (ipproto) {
    942 	case IPPROTO_TCP:
    943 		if (mp->m_pkthdr.csum_flags &
    944 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    945 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    946 		else
    947 			offload = false;
    948 		break;
    949 	case IPPROTO_UDP:
    950 		if (mp->m_pkthdr.csum_flags &
    951 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    952 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    953 		else
    954 			offload = false;
    955 		break;
    956 	default:
    957 		offload = false;
    958 		break;
    959 	}
    960 
    961 	if (offload) /* Insert L4 checksum into data descriptors */
    962 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    963 
    964 no_offloads:
    965 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    966 
    967 	/* Now ready a context descriptor */
    968 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    969 
    970 	/* Now copy bits into descriptor */
    971 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    972 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    973 	TXD->seqnum_seed = htole32(0);
    974 	TXD->mss_l4len_idx = htole32(0);
    975 
    976 	/* We've consumed the first desc, adjust counters */
    977 	if (++ctxd == txr->num_desc)
    978 		ctxd = 0;
    979 	txr->next_avail_desc = ctxd;
    980 	--txr->tx_avail;
    981 
    982 	return (0);
    983 } /* ixgbe_tx_ctx_setup */
    984 
    985 /************************************************************************
    986  * ixgbe_tso_setup
    987  *
    988  *   Setup work for hardware segmentation offload (TSO) on
    989  *   adapters using advanced tx descriptors
    990  ************************************************************************/
    991 static int
    992 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    993     u32 *olinfo_status)
    994 {
    995 	struct ixgbe_adv_tx_context_desc *TXD;
    996 	struct ether_vlan_header         *eh;
    997 #ifdef INET6
    998 	struct ip6_hdr                   *ip6;
    999 #endif
   1000 #ifdef INET
   1001 	struct ip                        *ip;
   1002 #endif
   1003 	struct tcphdr                    *th;
   1004 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
   1005 	u32                              vlan_macip_lens = 0;
   1006 	u32                              type_tucmd_mlhl = 0;
   1007 	u32                              mss_l4len_idx = 0, paylen;
   1008 	u16                              vtag = 0, eh_type;
   1009 
   1010 	/*
   1011 	 * Determine where frame payload starts.
   1012 	 * Jump over vlan headers if already present
   1013 	 */
   1014 	eh = mtod(mp, struct ether_vlan_header *);
   1015 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1016 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1017 		eh_type = eh->evl_proto;
   1018 	} else {
   1019 		ehdrlen = ETHER_HDR_LEN;
   1020 		eh_type = eh->evl_encap_proto;
   1021 	}
   1022 
   1023 	switch (ntohs(eh_type)) {
   1024 #ifdef INET
   1025 	case ETHERTYPE_IP:
   1026 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1027 		if (ip->ip_p != IPPROTO_TCP)
   1028 			return (ENXIO);
   1029 		ip->ip_sum = 0;
   1030 		ip_hlen = ip->ip_hl << 2;
   1031 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1032 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1033 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1034 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1035 		/* Tell transmit desc to also do IPv4 checksum. */
   1036 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1037 		break;
   1038 #endif
   1039 #ifdef INET6
   1040 	case ETHERTYPE_IPV6:
   1041 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1042 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1043 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1044 			return (ENXIO);
   1045 		ip_hlen = sizeof(struct ip6_hdr);
   1046 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1047 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1048 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1049 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1050 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1051 		break;
   1052 #endif
   1053 	default:
   1054 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1055 		    __func__, ntohs(eh_type));
   1056 		break;
   1057 	}
   1058 
   1059 	ctxd = txr->next_avail_desc;
   1060 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1061 
   1062 	tcp_hlen = th->th_off << 2;
   1063 
   1064 	/* This is used in the transmit desc in encap */
   1065 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1066 
   1067 	/* VLAN MACLEN IPLEN */
   1068 	if (vlan_has_tag(mp)) {
   1069 		vtag = htole16(vlan_get_tag(mp));
   1070 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1071 	}
   1072 
   1073 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1074 	vlan_macip_lens |= ip_hlen;
   1075 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1076 
   1077 	/* ADV DTYPE TUCMD */
   1078 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1079 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1080 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1081 
   1082 	/* MSS L4LEN IDX */
   1083 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1084 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1085 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1086 
   1087 	TXD->seqnum_seed = htole32(0);
   1088 
   1089 	if (++ctxd == txr->num_desc)
   1090 		ctxd = 0;
   1091 
   1092 	txr->tx_avail--;
   1093 	txr->next_avail_desc = ctxd;
   1094 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1095 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1096 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1097 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
   1098 
   1099 	return (0);
   1100 } /* ixgbe_tso_setup */
   1101 
   1102 
   1103 /************************************************************************
   1104  * ixgbe_txeof
   1105  *
   1106  *   Examine each tx_buffer in the used queue. If the hardware is done
   1107  *   processing the packet then free associated resources. The
   1108  *   tx_buffer is put back on the free queue.
   1109  ************************************************************************/
   1110 bool
   1111 ixgbe_txeof(struct tx_ring *txr)
   1112 {
   1113 	struct ixgbe_softc	*sc = txr->sc;
   1114 	struct ifnet		*ifp = sc->ifp;
   1115 	struct ixgbe_tx_buf	*buf;
   1116 	union ixgbe_adv_tx_desc *txd;
   1117 	u32			work, processed = 0;
   1118 	u32			limit = sc->tx_process_limit;
   1119 	u16			avail;
   1120 
   1121 	KASSERT(mutex_owned(&txr->tx_mtx));
   1122 
   1123 #ifdef DEV_NETMAP
   1124 	if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
   1125 	    (sc->ifp->if_capenable & IFCAP_NETMAP)) {
   1126 		struct netmap_sc *na = NA(sc->ifp);
   1127 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1128 		txd = txr->tx_base;
   1129 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1130 		    BUS_DMASYNC_POSTREAD);
   1131 		/*
   1132 		 * In netmap mode, all the work is done in the context
   1133 		 * of the client thread. Interrupt handlers only wake up
   1134 		 * clients, which may be sleeping on individual rings
   1135 		 * or on a global resource for all rings.
   1136 		 * To implement tx interrupt mitigation, we wake up the client
   1137 		 * thread roughly every half ring, even if the NIC interrupts
   1138 		 * more frequently. This is implemented as follows:
   1139 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1140 		 *   the slot that should wake up the thread (nkr_num_slots
   1141 		 *   means the user thread should not be woken up);
   1142 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1143 		 *   or the slot has the DD bit set.
   1144 		 */
   1145 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1146 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1147 			netmap_tx_irq(ifp, txr->me);
   1148 		}
   1149 		return false;
   1150 	}
   1151 #endif /* DEV_NETMAP */
   1152 
   1153 	if (txr->tx_avail == txr->num_desc) {
   1154 		txr->busy = 0;
   1155 		return false;
   1156 	}
   1157 
   1158 	/* Get work starting point */
   1159 	work = txr->next_to_clean;
   1160 	buf = &txr->tx_buffers[work];
   1161 	txd = &txr->tx_base[work];
   1162 	work -= txr->num_desc; /* The distance to ring end */
   1163 	avail = txr->tx_avail;
   1164 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1165 	    BUS_DMASYNC_POSTREAD);
   1166 
   1167 	do {
   1168 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1169 		if (eop == NULL) /* No work */
   1170 			break;
   1171 
   1172 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1173 			break;	/* I/O not complete */
   1174 
   1175 		if (buf->m_head) {
   1176 			txr->bytes += buf->m_head->m_pkthdr.len;
   1177 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1178 			    0, buf->m_head->m_pkthdr.len,
   1179 			    BUS_DMASYNC_POSTWRITE);
   1180 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1181 			m_freem(buf->m_head);
   1182 			buf->m_head = NULL;
   1183 		}
   1184 		buf->eop = NULL;
   1185 		++avail;
   1186 
   1187 		/* We clean the range if multi segment */
   1188 		while (txd != eop) {
   1189 			++txd;
   1190 			++buf;
   1191 			++work;
   1192 			/* wrap the ring? */
   1193 			if (__predict_false(!work)) {
   1194 				work -= txr->num_desc;
   1195 				buf = txr->tx_buffers;
   1196 				txd = txr->tx_base;
   1197 			}
   1198 			if (buf->m_head) {
   1199 				txr->bytes +=
   1200 				    buf->m_head->m_pkthdr.len;
   1201 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1202 				    buf->map,
   1203 				    0, buf->m_head->m_pkthdr.len,
   1204 				    BUS_DMASYNC_POSTWRITE);
   1205 				ixgbe_dmamap_unload(txr->txtag,
   1206 				    buf->map);
   1207 				m_freem(buf->m_head);
   1208 				buf->m_head = NULL;
   1209 			}
   1210 			++avail;
   1211 			buf->eop = NULL;
   1212 
   1213 		}
   1214 		++processed;
   1215 
   1216 		/* Try the next packet */
   1217 		++txd;
   1218 		++buf;
   1219 		++work;
   1220 		/* reset with a wrap */
   1221 		if (__predict_false(!work)) {
   1222 			work -= txr->num_desc;
   1223 			buf = txr->tx_buffers;
   1224 			txd = txr->tx_base;
   1225 		}
   1226 		prefetch(txd);
   1227 	} while (__predict_true(--limit));
   1228 
   1229 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1230 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1231 
   1232 	work += txr->num_desc;
   1233 	txr->next_to_clean = work;
   1234 	if (processed) {
   1235 		txr->tx_avail = avail;
   1236 		txr->txr_no_space = false;
   1237 		txr->packets += processed;
   1238 		if_statadd(ifp, if_opackets, processed);
   1239 	}
   1240 
   1241 	/*
   1242 	 * Queue Hang detection, we know there's
   1243 	 * work outstanding or the first return
   1244 	 * would have been taken, so increment busy
   1245 	 * if nothing managed to get cleaned, then
   1246 	 * in local_timer it will be checked and
   1247 	 * marked as HUNG if it exceeds a MAX attempt.
   1248 	 */
   1249 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1250 		++txr->busy;
   1251 	/*
   1252 	 * If anything gets cleaned we reset state to 1,
   1253 	 * note this will turn off HUNG if its set.
   1254 	 */
   1255 	if (processed)
   1256 		txr->busy = 1;
   1257 
   1258 	if (txr->tx_avail == txr->num_desc)
   1259 		txr->busy = 0;
   1260 
   1261 	return ((limit > 0) ? false : true);
   1262 } /* ixgbe_txeof */
   1263 
   1264 #ifdef RSC
   1265 /************************************************************************
   1266  * ixgbe_rsc_count
   1267  *
   1268  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1269  ************************************************************************/
   1270 static inline u32
   1271 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1272 {
   1273 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1274 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1275 } /* ixgbe_rsc_count */
   1276 
   1277 /************************************************************************
   1278  * ixgbe_setup_hw_rsc
   1279  *
   1280  *   Initialize Hardware RSC (LRO) feature on 82599
   1281  *   for an RX ring, this is toggled by the LRO capability
   1282  *   even though it is transparent to the stack.
   1283  *
   1284  *   NOTE: Since this HW feature only works with IPv4 and
   1285  *         testing has shown soft LRO to be as effective,
   1286  *         this feature will be disabled by default.
   1287  ************************************************************************/
   1288 static void
   1289 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1290 {
   1291 	struct ixgbe_softc *sc = rxr->sc;
   1292 	struct ixgbe_hw	*hw = &sc->hw;
   1293 	u32		rscctrl, rdrxctl;
   1294 
   1295 	/* If turning LRO/RSC off we need to disable it */
   1296 	if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
   1297 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1298 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1299 		return;
   1300 	}
   1301 
   1302 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1303 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1304 #ifdef DEV_NETMAP
   1305 	/* Always strip CRC unless Netmap disabled it */
   1306 	if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
   1307 	    !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
   1308 	    ix_crcstrip)
   1309 #endif /* DEV_NETMAP */
   1310 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1311 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1312 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1313 
   1314 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1315 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1316 	/*
   1317 	 * Limit the total number of descriptors that
   1318 	 * can be combined, so it does not exceed 64K
   1319 	 */
   1320 	if (rxr->mbuf_sz == MCLBYTES)
   1321 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1322 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1323 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1324 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1325 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1326 	else  /* Using 16K cluster */
   1327 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1328 
   1329 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1330 
   1331 	/* Enable TCP header recognition */
   1332 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1333 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1334 
   1335 	/* Disable RSC for ACK packets */
   1336 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1337 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1338 
   1339 	rxr->hw_rsc = TRUE;
   1340 } /* ixgbe_setup_hw_rsc */
   1341 #endif
   1342 
   1343 /************************************************************************
   1344  * ixgbe_refresh_mbufs
   1345  *
   1346  *   Refresh mbuf buffers for RX descriptor rings
   1347  *    - now keeps its own state so discards due to resource
   1348  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1349  *      it just returns, keeping its placeholder, thus it can simply
   1350  *      be recalled to try again.
   1351  ************************************************************************/
   1352 static void
   1353 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1354 {
   1355 	struct ixgbe_softc  *sc = rxr->sc;
   1356 	struct ixgbe_rx_buf *rxbuf;
   1357 	struct mbuf         *mp;
   1358 	int                 i, error;
   1359 	bool                refreshed = false;
   1360 
   1361 	i = rxr->next_to_refresh;
   1362 	/* next_to_refresh points to the previous one */
   1363 	if (++i == rxr->num_desc)
   1364 		i = 0;
   1365 
   1366 	while (i != limit) {
   1367 		rxbuf = &rxr->rx_buffers[i];
   1368 		if (__predict_false(rxbuf->buf == NULL)) {
   1369 			mp = ixgbe_getcl();
   1370 			if (mp == NULL) {
   1371 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1372 				goto update;
   1373 			}
   1374 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1375 			IXGBE_M_ADJ(sc, rxr, mp);
   1376 		} else
   1377 			mp = rxbuf->buf;
   1378 
   1379 		/* If we're dealing with an mbuf that was copied rather
   1380 		 * than replaced, there's no need to go through busdma.
   1381 		 */
   1382 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1383 			/* Get the memory mapping */
   1384 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1385 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1386 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1387 			if (__predict_false(error != 0)) {
   1388 				device_printf(sc->dev, "Refresh mbufs: "
   1389 				    "payload dmamap load failure - %d\n",
   1390 				    error);
   1391 				m_free(mp);
   1392 				rxbuf->buf = NULL;
   1393 				goto update;
   1394 			}
   1395 			rxbuf->buf = mp;
   1396 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1397 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1398 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1399 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1400 		} else {
   1401 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1402 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1403 		}
   1404 
   1405 		refreshed = true;
   1406 		/* next_to_refresh points to the previous one */
   1407 		rxr->next_to_refresh = i;
   1408 		if (++i == rxr->num_desc)
   1409 			i = 0;
   1410 	}
   1411 
   1412 update:
   1413 	if (refreshed) /* Update hardware tail index */
   1414 		IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
   1415 
   1416 	return;
   1417 } /* ixgbe_refresh_mbufs */
   1418 
   1419 /************************************************************************
   1420  * ixgbe_allocate_receive_buffers
   1421  *
   1422  *   Allocate memory for rx_buffer structures. Since we use one
   1423  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1424  *   that we'll need is equal to the number of receive descriptors
   1425  *   that we've allocated.
   1426  ************************************************************************/
   1427 static int
   1428 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1429 {
   1430 	struct ixgbe_softc  *sc = rxr->sc;
   1431 	device_t            dev = sc->dev;
   1432 	struct ixgbe_rx_buf *rxbuf;
   1433 	int                 bsize, error;
   1434 
   1435 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1436 	rxr->rx_buffers = kmem_zalloc(bsize, KM_SLEEP);
   1437 
   1438 	error = ixgbe_dma_tag_create(
   1439 	         /*      parent */ sc->osdep.dmat,
   1440 	         /*   alignment */ 1,
   1441 	         /*      bounds */ 0,
   1442 	         /*     maxsize */ MJUM16BYTES,
   1443 	         /*   nsegments */ 1,
   1444 	         /*  maxsegsize */ MJUM16BYTES,
   1445 	         /*       flags */ 0,
   1446 	                           &rxr->ptag);
   1447 	if (error != 0) {
   1448 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1449 		goto fail;
   1450 	}
   1451 
   1452 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1453 		rxbuf = &rxr->rx_buffers[i];
   1454 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1455 		if (error) {
   1456 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1457 			goto fail;
   1458 		}
   1459 	}
   1460 
   1461 	return (0);
   1462 
   1463 fail:
   1464 	/* Frees all, but can handle partial completion */
   1465 	ixgbe_free_receive_structures(sc);
   1466 
   1467 	return (error);
   1468 } /* ixgbe_allocate_receive_buffers */
   1469 
   1470 /************************************************************************
   1471  * ixgbe_free_receive_ring
   1472  ************************************************************************/
   1473 static void
   1474 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1475 {
   1476 	for (int i = 0; i < rxr->num_desc; i++) {
   1477 		ixgbe_rx_discard(rxr, i);
   1478 	}
   1479 } /* ixgbe_free_receive_ring */
   1480 
   1481 /************************************************************************
   1482  * ixgbe_setup_receive_ring
   1483  *
   1484  *   Initialize a receive ring and its buffers.
   1485  ************************************************************************/
   1486 static int
   1487 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1488 {
   1489 	struct ixgbe_softc    *sc;
   1490 	struct ixgbe_rx_buf   *rxbuf;
   1491 #ifdef LRO
   1492 	struct ifnet          *ifp;
   1493 	struct lro_ctrl       *lro = &rxr->lro;
   1494 #endif /* LRO */
   1495 #ifdef DEV_NETMAP
   1496 	struct netmap_sc      *na = NA(rxr->sc->ifp);
   1497 	struct netmap_slot    *slot;
   1498 #endif /* DEV_NETMAP */
   1499 	int                   rsize, error = 0;
   1500 
   1501 	sc = rxr->sc;
   1502 #ifdef LRO
   1503 	ifp = sc->ifp;
   1504 #endif /* LRO */
   1505 
   1506 	/* Clear the ring contents */
   1507 	IXGBE_RX_LOCK(rxr);
   1508 
   1509 #ifdef DEV_NETMAP
   1510 	if (sc->feat_en & IXGBE_FEATURE_NETMAP)
   1511 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1512 #endif /* DEV_NETMAP */
   1513 
   1514 	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
   1515 	KASSERT((rsize % DBA_ALIGN) == 0);
   1516 	bzero((void *)rxr->rx_base, rsize);
   1517 	/* Cache the size */
   1518 	rxr->mbuf_sz = sc->rx_mbuf_sz;
   1519 
   1520 	/* Free current RX buffer structs and their mbufs */
   1521 	ixgbe_free_receive_ring(rxr);
   1522 
   1523 	/* Now replenish the mbufs */
   1524 	for (int i = 0; i < rxr->num_desc; i++) {
   1525 		struct mbuf *mp;
   1526 
   1527 		rxbuf = &rxr->rx_buffers[i];
   1528 
   1529 #ifdef DEV_NETMAP
   1530 		/*
   1531 		 * In netmap mode, fill the map and set the buffer
   1532 		 * address in the NIC ring, considering the offset
   1533 		 * between the netmap and NIC rings (see comment in
   1534 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1535 		 * an mbuf, so end the block with a continue;
   1536 		 */
   1537 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1538 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
   1539 			uint64_t paddr;
   1540 			void *addr;
   1541 
   1542 			addr = PNMB(na, slot + sj, &paddr);
   1543 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1544 			/* Update descriptor and the cached value */
   1545 			rxr->rx_base[i].read.pkt_addr = htole64(paddr);
   1546 			rxbuf->addr = htole64(paddr);
   1547 			continue;
   1548 		}
   1549 #endif /* DEV_NETMAP */
   1550 
   1551 		rxbuf->flags = 0;
   1552 		rxbuf->buf = ixgbe_getcl();
   1553 		if (rxbuf->buf == NULL) {
   1554 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1555 			error = ENOBUFS;
   1556 			goto fail;
   1557 		}
   1558 		mp = rxbuf->buf;
   1559 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1560 		IXGBE_M_ADJ(sc, rxr, mp);
   1561 		/* Get the memory mapping */
   1562 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1563 		    mp, BUS_DMA_NOWAIT);
   1564 		if (error != 0) {
   1565 			/*
   1566 			 * Clear this entry for later cleanup in
   1567 			 * ixgbe_discard() which is called via
   1568 			 * ixgbe_free_receive_ring().
   1569 			 */
   1570 			m_freem(mp);
   1571 			rxbuf->buf = NULL;
   1572 			goto fail;
   1573 		}
   1574 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1575 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1576 		/* Update the descriptor and the cached value */
   1577 		rxr->rx_base[i].read.pkt_addr =
   1578 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1579 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1580 	}
   1581 
   1582 	/* Setup our descriptor indices */
   1583 	rxr->next_to_check = 0;
   1584 	rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
   1585 #ifdef LRO
   1586 	rxr->lro_enabled = FALSE;
   1587 #endif
   1588 	rxr->discard_multidesc = false;
   1589 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
   1590 #if 0 /* NetBSD */
   1591 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
   1592 #if 1	/* Fix inconsistency */
   1593 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
   1594 #endif
   1595 #endif
   1596 	rxr->vtag_strip = FALSE;
   1597 
   1598 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1599 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1600 
   1601 	/*
   1602 	 * Now set up the LRO interface
   1603 	 */
   1604 #ifdef RSC
   1605 	if (ixgbe_rsc_enable)
   1606 		ixgbe_setup_hw_rsc(rxr);
   1607 #endif
   1608 #ifdef LRO
   1609 #ifdef RSC
   1610 	else
   1611 #endif
   1612 	if (ifp->if_capenable & IFCAP_LRO) {
   1613 		device_t dev = sc->dev;
   1614 		int err = tcp_lro_init(lro);
   1615 		if (err) {
   1616 			device_printf(dev, "LRO Initialization failed!\n");
   1617 			goto fail;
   1618 		}
   1619 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1620 		rxr->lro_enabled = TRUE;
   1621 		lro->ifp = sc->ifp;
   1622 	}
   1623 #endif /* LRO */
   1624 
   1625 	IXGBE_RX_UNLOCK(rxr);
   1626 
   1627 	return (0);
   1628 
   1629 fail:
   1630 	ixgbe_free_receive_ring(rxr);
   1631 	IXGBE_RX_UNLOCK(rxr);
   1632 
   1633 	return (error);
   1634 } /* ixgbe_setup_receive_ring */
   1635 
   1636 /************************************************************************
   1637  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1638  ************************************************************************/
   1639 int
   1640 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
   1641 {
   1642 	struct rx_ring *rxr = sc->rx_rings;
   1643 	int            j;
   1644 
   1645 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1646 	for (j = 0; j < sc->num_queues; j++, rxr++)
   1647 		if (ixgbe_setup_receive_ring(rxr))
   1648 			goto fail;
   1649 
   1650 	return (0);
   1651 fail:
   1652 	/*
   1653 	 * Free RX buffers allocated so far, we will only handle
   1654 	 * the rings that completed, the failing case will have
   1655 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1656 	 */
   1657 	for (int i = 0; i < j; ++i) {
   1658 		rxr = &sc->rx_rings[i];
   1659 		IXGBE_RX_LOCK(rxr);
   1660 		ixgbe_free_receive_ring(rxr);
   1661 		IXGBE_RX_UNLOCK(rxr);
   1662 	}
   1663 
   1664 	return (ENOBUFS);
   1665 } /* ixgbe_setup_receive_structures */
   1666 
   1667 
   1668 /************************************************************************
   1669  * ixgbe_free_receive_structures - Free all receive rings.
   1670  ************************************************************************/
   1671 void
   1672 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
   1673 {
   1674 	struct rx_ring *rxr = sc->rx_rings;
   1675 
   1676 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1677 
   1678 	for (int i = 0; i < sc->num_queues; i++, rxr++) {
   1679 		ixgbe_free_receive_buffers(rxr);
   1680 #ifdef LRO
   1681 		/* Free LRO memory */
   1682 		tcp_lro_free(&rxr->lro);
   1683 #endif /* LRO */
   1684 		/* Free the ring memory as well */
   1685 		ixgbe_dma_free(sc, &rxr->rxdma);
   1686 		IXGBE_RX_LOCK_DESTROY(rxr);
   1687 	}
   1688 
   1689 	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
   1690 } /* ixgbe_free_receive_structures */
   1691 
   1692 
   1693 /************************************************************************
   1694  * ixgbe_free_receive_buffers - Free receive ring data structures
   1695  ************************************************************************/
   1696 static void
   1697 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1698 {
   1699 	struct ixgbe_softc  *sc = rxr->sc;
   1700 	struct ixgbe_rx_buf *rxbuf;
   1701 
   1702 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1703 
   1704 	/* Cleanup any existing buffers */
   1705 	if (rxr->rx_buffers != NULL) {
   1706 		for (int i = 0; i < sc->num_rx_desc; i++) {
   1707 			rxbuf = &rxr->rx_buffers[i];
   1708 			ixgbe_rx_discard(rxr, i);
   1709 			if (rxbuf->pmap != NULL) {
   1710 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1711 				rxbuf->pmap = NULL;
   1712 			}
   1713 		}
   1714 
   1715 		if (rxr->rx_buffers != NULL) {
   1716 			kmem_free(rxr->rx_buffers,
   1717 			    sizeof(struct ixgbe_rx_buf) * rxr->num_desc);
   1718 			rxr->rx_buffers = NULL;
   1719 		}
   1720 	}
   1721 
   1722 	if (rxr->ptag != NULL) {
   1723 		ixgbe_dma_tag_destroy(rxr->ptag);
   1724 		rxr->ptag = NULL;
   1725 	}
   1726 
   1727 	return;
   1728 } /* ixgbe_free_receive_buffers */
   1729 
   1730 /************************************************************************
   1731  * ixgbe_rx_input
   1732  ************************************************************************/
   1733 static __inline void
   1734 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1735     u32 ptype)
   1736 {
   1737 	struct ixgbe_softc *sc = ifp->if_softc;
   1738 
   1739 #ifdef LRO
   1740 	struct ethercom *ec = &sc->osdep.ec;
   1741 
   1742 	/*
   1743 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1744 	 * should be computed by hardware. Also it should not have VLAN tag in
   1745 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1746 	 */
   1747         if (rxr->lro_enabled &&
   1748             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1749             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1750             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1751             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1752             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1753             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1754             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1755             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1756                 /*
   1757                  * Send to the stack if:
   1758                  *  - LRO not enabled, or
   1759                  *  - no LRO resources, or
   1760                  *  - lro enqueue fails
   1761                  */
   1762                 if (rxr->lro.lro_cnt != 0)
   1763                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1764                                 return;
   1765         }
   1766 #endif /* LRO */
   1767 
   1768 	if_percpuq_enqueue(sc->ipq, m);
   1769 } /* ixgbe_rx_input */
   1770 
   1771 /************************************************************************
   1772  * ixgbe_rx_discard
   1773  ************************************************************************/
   1774 static __inline void
   1775 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1776 {
   1777 	struct ixgbe_rx_buf *rbuf;
   1778 
   1779 	rbuf = &rxr->rx_buffers[i];
   1780 
   1781 	/*
   1782 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1783 	 * so its easier to just free the existing mbufs and take the normal
   1784 	 * refresh path to get new buffers and mapping.
   1785 	 */
   1786 
   1787 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1788 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1789 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1790 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1791 		m_freem(rbuf->fmp);
   1792 		rbuf->fmp = NULL;
   1793 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1794 	} else if (rbuf->buf) {
   1795 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1796 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1797 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1798 		m_free(rbuf->buf);
   1799 		rbuf->buf = NULL;
   1800 	}
   1801 
   1802 	rbuf->flags = 0;
   1803 
   1804 	return;
   1805 } /* ixgbe_rx_discard */
   1806 
   1807 
   1808 /************************************************************************
   1809  * ixgbe_rxeof
   1810  *
   1811  *   Executes in interrupt context. It replenishes the
   1812  *   mbufs in the descriptor and sends data which has
   1813  *   been dma'ed into host memory to upper layer.
   1814  *
   1815  *   Return TRUE for more work, FALSE for all clean.
   1816  ************************************************************************/
   1817 bool
   1818 ixgbe_rxeof(struct ix_queue *que)
   1819 {
   1820 	struct ixgbe_softc	*sc = que->sc;
   1821 	struct rx_ring		*rxr = que->rxr;
   1822 	struct ifnet		*ifp = sc->ifp;
   1823 #ifdef LRO
   1824 	struct lro_ctrl		*lro = &rxr->lro;
   1825 #endif /* LRO */
   1826 	union ixgbe_adv_rx_desc	*cur;
   1827 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1828 	int			i, nextp, processed = 0;
   1829 	u32			staterr = 0;
   1830 	u32			loopcount = 0, numdesc;
   1831 	u32			limit = sc->rx_process_limit;
   1832 	u32			rx_copy_len = sc->rx_copy_len;
   1833 	bool			discard_multidesc = rxr->discard_multidesc;
   1834 	bool			wraparound = false;
   1835 	unsigned int		syncremain;
   1836 #ifdef RSS
   1837 	u16			pkt_info;
   1838 #endif
   1839 
   1840 	IXGBE_RX_LOCK(rxr);
   1841 
   1842 #ifdef DEV_NETMAP
   1843 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
   1844 		/* Same as the txeof routine: wakeup clients on intr. */
   1845 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1846 			IXGBE_RX_UNLOCK(rxr);
   1847 			return (FALSE);
   1848 		}
   1849 	}
   1850 #endif /* DEV_NETMAP */
   1851 
   1852 	/* Sync the ring. The size is rx_process_limit or the first half */
   1853 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
   1854 		/* Non-wraparound */
   1855 		numdesc = limit;
   1856 		syncremain = 0;
   1857 	} else {
   1858 		/* Wraparound. Sync the first half. */
   1859 		numdesc = rxr->num_desc - rxr->next_to_check;
   1860 
   1861 		/* Set the size of the last half */
   1862 		syncremain = limit - numdesc;
   1863 	}
   1864 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1865 	    rxr->rxdma.dma_map,
   1866 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
   1867 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1868 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1869 
   1870 	/*
   1871 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1872 	 * true, continue processing to not to send broken packet to the upper
   1873 	 * layer.
   1874 	 */
   1875 	for (i = rxr->next_to_check;
   1876 	     (loopcount < limit) || (discard_multidesc == true);) {
   1877 
   1878 		struct mbuf *sendmp, *mp;
   1879 		struct mbuf *newmp;
   1880 #ifdef RSC
   1881 		u32         rsc;
   1882 #endif
   1883 		u32         ptype;
   1884 		u16         len;
   1885 		u16         vtag = 0;
   1886 		bool        eop;
   1887 		bool        discard = false;
   1888 
   1889 		if (wraparound) {
   1890 			/* Sync the last half. */
   1891 			KASSERT(syncremain != 0);
   1892 			numdesc = syncremain;
   1893 			wraparound = false;
   1894 		} else if (__predict_false(loopcount >= limit)) {
   1895 			KASSERT(discard_multidesc == true);
   1896 			numdesc = 1;
   1897 		} else
   1898 			numdesc = 0;
   1899 
   1900 		if (numdesc != 0)
   1901 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1902 			    rxr->rxdma.dma_map, 0,
   1903 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1904 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1905 
   1906 		cur = &rxr->rx_base[i];
   1907 		staterr = le32toh(cur->wb.upper.status_error);
   1908 #ifdef RSS
   1909 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1910 #endif
   1911 
   1912 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1913 			break;
   1914 
   1915 		loopcount++;
   1916 		sendmp = newmp = NULL;
   1917 		nbuf = NULL;
   1918 #ifdef RSC
   1919 		rsc = 0;
   1920 #endif
   1921 		cur->wb.upper.status_error = 0;
   1922 		rbuf = &rxr->rx_buffers[i];
   1923 		mp = rbuf->buf;
   1924 
   1925 		len = le16toh(cur->wb.upper.length);
   1926 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1927 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1928 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1929 
   1930 		/* Make sure bad packets are discarded */
   1931 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1932 #if __FreeBSD_version >= 1100036
   1933 			if (sc->feat_en & IXGBE_FEATURE_VF)
   1934 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1935 #endif
   1936 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
   1937 			ixgbe_rx_discard(rxr, i);
   1938 			discard_multidesc = false;
   1939 			goto next_desc;
   1940 		}
   1941 
   1942 		if (__predict_false(discard_multidesc))
   1943 			discard = true;
   1944 		else {
   1945 			/* Pre-alloc new mbuf. */
   1946 
   1947 			if ((rbuf->fmp == NULL) &&
   1948 			    eop && (len <= rx_copy_len)) {
   1949 				/* For short packet. See below. */
   1950 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1951 				if (__predict_false(sendmp == NULL)) {
   1952 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1953 					discard = true;
   1954 				}
   1955 			} else {
   1956 				/* For long packet. */
   1957 				newmp = ixgbe_getcl();
   1958 				if (__predict_false(newmp == NULL)) {
   1959 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1960 					discard = true;
   1961 				}
   1962 			}
   1963 		}
   1964 
   1965 		if (__predict_false(discard)) {
   1966 			/*
   1967 			 * Descriptor initialization is already done by the
   1968 			 * above code (cur->wb.upper.status_error = 0).
   1969 			 * So, we can reuse current rbuf->buf for new packet.
   1970 			 *
   1971 			 * Rewrite the buffer addr, see comment in
   1972 			 * ixgbe_rx_discard().
   1973 			 */
   1974 			cur->read.pkt_addr = rbuf->addr;
   1975 			m_freem(rbuf->fmp);
   1976 			rbuf->fmp = NULL;
   1977 			if (!eop) {
   1978 				/* Discard the entire packet. */
   1979 				discard_multidesc = true;
   1980 			} else
   1981 				discard_multidesc = false;
   1982 			goto next_desc;
   1983 		}
   1984 		discard_multidesc = false;
   1985 
   1986 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1987 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1988 
   1989 		/*
   1990 		 * On 82599 which supports a hardware
   1991 		 * LRO (called HW RSC), packets need
   1992 		 * not be fragmented across sequential
   1993 		 * descriptors, rather the next descriptor
   1994 		 * is indicated in bits of the descriptor.
   1995 		 * This also means that we might process
   1996 		 * more than one packet at a time, something
   1997 		 * that has never been true before, it
   1998 		 * required eliminating global chain pointers
   1999 		 * in favor of what we are doing here.  -jfv
   2000 		 */
   2001 		if (!eop) {
   2002 			/*
   2003 			 * Figure out the next descriptor
   2004 			 * of this frame.
   2005 			 */
   2006 #ifdef RSC
   2007 			if (rxr->hw_rsc == TRUE) {
   2008 				rsc = ixgbe_rsc_count(cur);
   2009 				rxr->rsc_num += (rsc - 1);
   2010 			}
   2011 			if (rsc) { /* Get hardware index */
   2012 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   2013 				    IXGBE_RXDADV_NEXTP_SHIFT);
   2014 			} else
   2015 #endif
   2016 			{ /* Just sequential */
   2017 				nextp = i + 1;
   2018 				if (nextp == sc->num_rx_desc)
   2019 					nextp = 0;
   2020 			}
   2021 			nbuf = &rxr->rx_buffers[nextp];
   2022 			prefetch(nbuf);
   2023 		}
   2024 		/*
   2025 		 * Rather than using the fmp/lmp global pointers
   2026 		 * we now keep the head of a packet chain in the
   2027 		 * buffer struct and pass this along from one
   2028 		 * descriptor to the next, until we get EOP.
   2029 		 */
   2030 		/*
   2031 		 * See if there is a stored head
   2032 		 * that determines what we are
   2033 		 */
   2034 		if (rbuf->fmp != NULL) {
   2035 			/* Secondary frag */
   2036 			sendmp = rbuf->fmp;
   2037 
   2038 			/* Update new (used in future) mbuf */
   2039 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   2040 			IXGBE_M_ADJ(sc, rxr, newmp);
   2041 			rbuf->buf = newmp;
   2042 			rbuf->fmp = NULL;
   2043 
   2044 			/* For secondary frag */
   2045 			mp->m_len = len;
   2046 			mp->m_flags &= ~M_PKTHDR;
   2047 
   2048 			/* For sendmp */
   2049 			sendmp->m_pkthdr.len += mp->m_len;
   2050 		} else {
   2051 			/*
   2052 			 * It's the first segment of a multi descriptor
   2053 			 * packet or a single segment which contains a full
   2054 			 * packet.
   2055 			 */
   2056 
   2057 			if (eop && (len <= rx_copy_len)) {
   2058 				/*
   2059 				 * Optimize.  This might be a small packet, may
   2060 				 * be just a TCP ACK. Copy into a new mbuf, and
   2061 				 * Leave the old mbuf+cluster for re-use.
   2062 				 */
   2063 				sendmp->m_data += ETHER_ALIGN;
   2064 				memcpy(mtod(sendmp, void *),
   2065 				    mtod(mp, void *), len);
   2066 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
   2067 				rbuf->flags |= IXGBE_RX_COPY;
   2068 			} else {
   2069 				/* For long packet */
   2070 
   2071 				/* Update new (used in future) mbuf */
   2072 				newmp->m_pkthdr.len = newmp->m_len
   2073 				    = rxr->mbuf_sz;
   2074 				IXGBE_M_ADJ(sc, rxr, newmp);
   2075 				rbuf->buf = newmp;
   2076 				rbuf->fmp = NULL;
   2077 
   2078 				/* For sendmp */
   2079 				sendmp = mp;
   2080 			}
   2081 
   2082 			/* first desc of a non-ps chain */
   2083 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2084 		}
   2085 		++processed;
   2086 
   2087 		/* Pass the head pointer on */
   2088 		if (eop == 0) {
   2089 			nbuf->fmp = sendmp;
   2090 			sendmp = NULL;
   2091 			mp->m_next = nbuf->buf;
   2092 		} else { /* Sending this frame */
   2093 			m_set_rcvif(sendmp, ifp);
   2094 			++rxr->packets;
   2095 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
   2096 			/* capture data for AIM */
   2097 			rxr->bytes += sendmp->m_pkthdr.len;
   2098 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
   2099 			/* Process vlan info */
   2100 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2101 				vtag = le16toh(cur->wb.upper.vlan);
   2102 			if (vtag) {
   2103 				vlan_set_tag(sendmp, vtag);
   2104 			}
   2105 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2106 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2107 				   &sc->stats.pf);
   2108 			}
   2109 
   2110 #if 0 /* FreeBSD */
   2111 			/*
   2112 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2113 			 * and never cleared. This means we have RSS hash
   2114 			 * available to be used.
   2115 			 */
   2116 			if (sc->num_queues > 1) {
   2117 				sendmp->m_pkthdr.flowid =
   2118 				    le32toh(cur->wb.lower.hi_dword.rss);
   2119 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2120 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2121 					M_HASHTYPE_SET(sendmp,
   2122 					    M_HASHTYPE_RSS_IPV4);
   2123 					break;
   2124 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2125 					M_HASHTYPE_SET(sendmp,
   2126 					    M_HASHTYPE_RSS_TCP_IPV4);
   2127 					break;
   2128 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2129 					M_HASHTYPE_SET(sendmp,
   2130 					    M_HASHTYPE_RSS_IPV6);
   2131 					break;
   2132 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2133 					M_HASHTYPE_SET(sendmp,
   2134 					    M_HASHTYPE_RSS_TCP_IPV6);
   2135 					break;
   2136 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2137 					M_HASHTYPE_SET(sendmp,
   2138 					    M_HASHTYPE_RSS_IPV6_EX);
   2139 					break;
   2140 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2141 					M_HASHTYPE_SET(sendmp,
   2142 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2143 					break;
   2144 #if __FreeBSD_version > 1100000
   2145 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2146 					M_HASHTYPE_SET(sendmp,
   2147 					    M_HASHTYPE_RSS_UDP_IPV4);
   2148 					break;
   2149 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2150 					M_HASHTYPE_SET(sendmp,
   2151 					    M_HASHTYPE_RSS_UDP_IPV6);
   2152 					break;
   2153 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2154 					M_HASHTYPE_SET(sendmp,
   2155 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2156 					break;
   2157 #endif
   2158 				default:
   2159 					M_HASHTYPE_SET(sendmp,
   2160 					    M_HASHTYPE_OPAQUE_HASH);
   2161 				}
   2162 			} else {
   2163 				sendmp->m_pkthdr.flowid = que->msix;
   2164 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2165 			}
   2166 #endif
   2167 		}
   2168 next_desc:
   2169 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2170 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2171 
   2172 		/* Advance our pointers to the next descriptor. */
   2173 		if (++i == rxr->num_desc) {
   2174 			wraparound = true;
   2175 			i = 0;
   2176 		}
   2177 		rxr->next_to_check = i;
   2178 
   2179 		/* Now send to the stack or do LRO */
   2180 		if (sendmp != NULL)
   2181 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2182 
   2183 		/* Every 8 descriptors we go to refresh mbufs */
   2184 		if (processed == 8) {
   2185 			ixgbe_refresh_mbufs(rxr, i);
   2186 			processed = 0;
   2187 		}
   2188 	}
   2189 
   2190 	/* Save the current status */
   2191 	rxr->discard_multidesc = discard_multidesc;
   2192 
   2193 	/* Refresh any remaining buf structs */
   2194 	if (ixgbe_rx_unrefreshed(rxr))
   2195 		ixgbe_refresh_mbufs(rxr, i);
   2196 
   2197 	IXGBE_RX_UNLOCK(rxr);
   2198 
   2199 #ifdef LRO
   2200 	/*
   2201 	 * Flush any outstanding LRO work
   2202 	 */
   2203 	tcp_lro_flush_all(lro);
   2204 #endif /* LRO */
   2205 
   2206 	/*
   2207 	 * Still have cleaning to do?
   2208 	 */
   2209 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2210 		return (TRUE);
   2211 
   2212 	return (FALSE);
   2213 } /* ixgbe_rxeof */
   2214 
   2215 
   2216 /************************************************************************
   2217  * ixgbe_rx_checksum
   2218  *
   2219  *   Verify that the hardware indicated that the checksum is valid.
   2220  *   Inform the stack about the status of checksum so that stack
   2221  *   doesn't spend time verifying the checksum.
   2222  ************************************************************************/
   2223 static void
   2224 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2225     struct ixgbe_hw_stats *stats)
   2226 {
   2227 	u16  status = (u16)staterr;
   2228 	u8   errors = (u8)(staterr >> 24);
   2229 #if 0
   2230 	bool sctp = false;
   2231 
   2232 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2233 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2234 		sctp = true;
   2235 #endif
   2236 
   2237 	/* IPv4 checksum */
   2238 	if (status & IXGBE_RXD_STAT_IPCS) {
   2239 		IXGBE_EVC_ADD(&stats->ipcs, 1);
   2240 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2241 			/* IP Checksum Good */
   2242 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2243 		} else {
   2244 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
   2245 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2246 		}
   2247 	}
   2248 	/* TCP/UDP/SCTP checksum */
   2249 	if (status & IXGBE_RXD_STAT_L4CS) {
   2250 		IXGBE_EVC_ADD(&stats->l4cs, 1);
   2251 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2252 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2253 			mp->m_pkthdr.csum_flags |= type;
   2254 		} else {
   2255 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
   2256 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2257 		}
   2258 	}
   2259 } /* ixgbe_rx_checksum */
   2260 
   2261 /************************************************************************
   2262  * ixgbe_dma_malloc
   2263  ************************************************************************/
   2264 int
   2265 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
   2266 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2267 {
   2268 	device_t dev = sc->dev;
   2269 	int      r, rsegs;
   2270 
   2271 	r = ixgbe_dma_tag_create(
   2272 	     /*      parent */ sc->osdep.dmat,
   2273 	     /*   alignment */ DBA_ALIGN,
   2274 	     /*      bounds */ 0,
   2275 	     /*     maxsize */ size,
   2276 	     /*   nsegments */ 1,
   2277 	     /*  maxsegsize */ size,
   2278 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2279 			       &dma->dma_tag);
   2280 	if (r != 0) {
   2281 		aprint_error_dev(dev,
   2282 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2283 		    r);
   2284 		goto fail_0;
   2285 	}
   2286 
   2287 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2288 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2289 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2290 	if (r != 0) {
   2291 		aprint_error_dev(dev,
   2292 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2293 		goto fail_1;
   2294 	}
   2295 
   2296 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2297 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2298 	if (r != 0) {
   2299 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2300 		    __func__, r);
   2301 		goto fail_2;
   2302 	}
   2303 
   2304 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2305 	if (r != 0) {
   2306 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2307 		    __func__, r);
   2308 		goto fail_3;
   2309 	}
   2310 
   2311 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2312 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2313 	if (r != 0) {
   2314 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2315 		    __func__, r);
   2316 		goto fail_4;
   2317 	}
   2318 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2319 	dma->dma_size = size;
   2320 	return 0;
   2321 fail_4:
   2322 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2323 fail_3:
   2324 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2325 fail_2:
   2326 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2327 fail_1:
   2328 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2329 fail_0:
   2330 
   2331 	return (r);
   2332 } /* ixgbe_dma_malloc */
   2333 
   2334 /************************************************************************
   2335  * ixgbe_dma_free
   2336  ************************************************************************/
   2337 void
   2338 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
   2339 {
   2340 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2341 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2342 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2343 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
   2344 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2345 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2346 } /* ixgbe_dma_free */
   2347 
   2348 
   2349 /************************************************************************
   2350  * ixgbe_allocate_queues
   2351  *
   2352  *   Allocate memory for the transmit and receive rings, and then
   2353  *   the descriptors associated with each, called only once at attach.
   2354  ************************************************************************/
   2355 int
   2356 ixgbe_allocate_queues(struct ixgbe_softc *sc)
   2357 {
   2358 	device_t	dev = sc->dev;
   2359 	struct ix_queue	*que;
   2360 	struct tx_ring	*txr;
   2361 	struct rx_ring	*rxr;
   2362 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2363 	int             txconf = 0, rxconf = 0;
   2364 
   2365 	/* First, allocate the top level queue structs */
   2366 	sc->queues = kmem_zalloc(sizeof(struct ix_queue) * sc->num_queues,
   2367 	    KM_SLEEP);
   2368 
   2369 	/* Second, allocate the TX ring struct memory */
   2370 	sc->tx_rings = kmem_zalloc(sizeof(struct tx_ring) * sc->num_queues,
   2371 	    KM_SLEEP);
   2372 
   2373 	/* Third, allocate the RX ring */
   2374 	sc->rx_rings = kmem_zalloc(sizeof(struct rx_ring) * sc->num_queues,
   2375 	    KM_SLEEP);
   2376 
   2377 	/* For the ring itself */
   2378 	tsize = sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc);
   2379 	KASSERT((tsize % DBA_ALIGN) == 0);
   2380 
   2381 	/*
   2382 	 * Now set up the TX queues, txconf is needed to handle the
   2383 	 * possibility that things fail midcourse and we need to
   2384 	 * undo memory gracefully
   2385 	 */
   2386 	for (int i = 0; i < sc->num_queues; i++, txconf++) {
   2387 		/* Set up some basics */
   2388 		txr = &sc->tx_rings[i];
   2389 		txr->sc = sc;
   2390 		txr->txr_interq = NULL;
   2391 		/* In case SR-IOV is enabled, align the index properly */
   2392 #ifdef PCI_IOV
   2393 		txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2394 		    i);
   2395 #else
   2396 		txr->me = i;
   2397 #endif
   2398 		txr->num_desc = sc->num_tx_desc;
   2399 
   2400 		/* Initialize the TX side lock */
   2401 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2402 
   2403 		if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
   2404 		    BUS_DMA_NOWAIT)) {
   2405 			aprint_error_dev(dev,
   2406 			    "Unable to allocate TX Descriptor memory\n");
   2407 			error = ENOMEM;
   2408 			goto err_tx_desc;
   2409 		}
   2410 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2411 		bzero((void *)txr->tx_base, tsize);
   2412 
   2413 		/* Now allocate transmit buffers for the ring */
   2414 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2415 			aprint_error_dev(dev,
   2416 			    "Critical Failure setting up transmit buffers\n");
   2417 			error = ENOMEM;
   2418 			goto err_tx_desc;
   2419 		}
   2420 		if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2421 			/* Allocate a buf ring */
   2422 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2423 			if (txr->txr_interq == NULL) {
   2424 				aprint_error_dev(dev,
   2425 				    "Critical Failure setting up buf ring\n");
   2426 				error = ENOMEM;
   2427 				goto err_tx_desc;
   2428 			}
   2429 		}
   2430 	}
   2431 
   2432 	/*
   2433 	 * Next the RX queues...
   2434 	 */
   2435 	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
   2436 	KASSERT((rsize % DBA_ALIGN) == 0);
   2437 	for (int i = 0; i < sc->num_queues; i++, rxconf++) {
   2438 		rxr = &sc->rx_rings[i];
   2439 		/* Set up some basics */
   2440 		rxr->sc = sc;
   2441 #ifdef PCI_IOV
   2442 		/* In case SR-IOV is enabled, align the index properly */
   2443 		rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2444 		    i);
   2445 #else
   2446 		rxr->me = i;
   2447 #endif
   2448 		rxr->num_desc = sc->num_rx_desc;
   2449 
   2450 		/* Initialize the RX side lock */
   2451 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2452 
   2453 		if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
   2454 		    BUS_DMA_NOWAIT)) {
   2455 			aprint_error_dev(dev,
   2456 			    "Unable to allocate RxDescriptor memory\n");
   2457 			error = ENOMEM;
   2458 			goto err_rx_desc;
   2459 		}
   2460 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2461 		bzero((void *)rxr->rx_base, rsize);
   2462 
   2463 		/* Allocate receive buffers for the ring */
   2464 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2465 			aprint_error_dev(dev,
   2466 			    "Critical Failure setting up receive buffers\n");
   2467 			error = ENOMEM;
   2468 			goto err_rx_desc;
   2469 		}
   2470 	}
   2471 
   2472 	/*
   2473 	 * Finally set up the queue holding structs
   2474 	 */
   2475 	for (int i = 0; i < sc->num_queues; i++) {
   2476 		que = &sc->queues[i];
   2477 		que->sc = sc;
   2478 		que->me = i;
   2479 		que->txr = &sc->tx_rings[i];
   2480 		que->rxr = &sc->rx_rings[i];
   2481 
   2482 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2483 		que->disabled_count = 0;
   2484 	}
   2485 
   2486 	return (0);
   2487 
   2488 err_rx_desc:
   2489 	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
   2490 		ixgbe_dma_free(sc, &rxr->rxdma);
   2491 err_tx_desc:
   2492 	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
   2493 		ixgbe_dma_free(sc, &txr->txdma);
   2494 	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
   2495 	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
   2496 	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
   2497 	return (error);
   2498 } /* ixgbe_allocate_queues */
   2499 
   2500 /************************************************************************
   2501  * ixgbe_free_queues
   2502  *
   2503  *   Free descriptors for the transmit and receive rings, and then
   2504  *   the memory associated with each.
   2505  ************************************************************************/
   2506 void
   2507 ixgbe_free_queues(struct ixgbe_softc *sc)
   2508 {
   2509 	struct ix_queue *que;
   2510 	int i;
   2511 
   2512 	ixgbe_free_transmit_structures(sc);
   2513 	ixgbe_free_receive_structures(sc);
   2514 	for (i = 0; i < sc->num_queues; i++) {
   2515 		que = &sc->queues[i];
   2516 		mutex_destroy(&que->dc_mtx);
   2517 	}
   2518 	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
   2519 } /* ixgbe_free_queues */
   2520