Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.112
      1 /* $NetBSD: ix_txrx.c,v 1.112 2023/12/28 10:02:14 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.112 2023/12/28 10:02:14 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 #ifdef IXGBE_FDIR
     89 /*
     90  * For Flow Director: this is the
     91  * number of TX packets we sample
     92  * for the filter pool, this means
     93  * every 20th packet will be probed.
     94  *
     95  * This feature can be disabled by
     96  * setting this to 0.
     97  */
     98 static int atr_sample_rate = 20;
     99 #endif
    100 
    101 #define IXGBE_M_ADJ(sc, rxr, mp)					\
    102 	if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    103 		m_adj(mp, ETHER_ALIGN)
    104 
    105 /************************************************************************
    106  *  Local Function prototypes
    107  ************************************************************************/
    108 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    109 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    110 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    111 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    112 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    113                                        struct ixgbe_hw_stats *);
    114 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    115 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    116 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    117 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    118                                         struct mbuf *, u32 *, u32 *);
    119 static int           ixgbe_tso_setup(struct tx_ring *,
    120                                      struct mbuf *, u32 *, u32 *);
    121 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    122 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    123                                     struct mbuf *, u32);
    124 static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
    125                                       struct ixgbe_dma_alloc *, int);
    126 static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
    127 
    128 static void	     ixgbe_setup_hw_rsc(struct rx_ring *);
    129 
    130 /************************************************************************
    131  * ixgbe_legacy_start_locked - Transmit entry point
    132  *
    133  *   Called by the stack to initiate a transmit.
    134  *   The driver will remain in this routine as long as there are
    135  *   packets to transmit and transmit resources are available.
    136  *   In case resources are not available, the stack is notified
    137  *   and the packet is requeued.
    138  ************************************************************************/
    139 int
    140 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    141 {
    142 	int rc;
    143 	struct mbuf    *m_head;
    144 	struct ixgbe_softc *sc = txr->sc;
    145 
    146 	IXGBE_TX_LOCK_ASSERT(txr);
    147 
    148 	if (sc->link_active != LINK_STATE_UP) {
    149 		/*
    150 		 * discard all packets buffered in IFQ to avoid
    151 		 * sending old packets at next link up timing.
    152 		 */
    153 		ixgbe_drain(ifp, txr);
    154 		return (ENETDOWN);
    155 	}
    156 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    157 		return (ENETDOWN);
    158 	if (txr->txr_no_space)
    159 		return (ENETDOWN);
    160 
    161 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    162 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    163 			break;
    164 
    165 		IFQ_POLL(&ifp->if_snd, m_head);
    166 		if (m_head == NULL)
    167 			break;
    168 
    169 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    170 			break;
    171 		}
    172 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    173 		if (rc != 0) {
    174 			m_freem(m_head);
    175 			continue;
    176 		}
    177 
    178 		/* Send a copy of the frame to the BPF listener */
    179 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    180 	}
    181 
    182 	return IXGBE_SUCCESS;
    183 } /* ixgbe_legacy_start_locked */
    184 
    185 /************************************************************************
    186  * ixgbe_legacy_start
    187  *
    188  *   Called by the stack, this always uses the first tx ring,
    189  *   and should not be used with multiqueue tx enabled.
    190  ************************************************************************/
    191 void
    192 ixgbe_legacy_start(struct ifnet *ifp)
    193 {
    194 	struct ixgbe_softc *sc = ifp->if_softc;
    195 	struct tx_ring *txr = sc->tx_rings;
    196 
    197 	if (ifp->if_flags & IFF_RUNNING) {
    198 		IXGBE_TX_LOCK(txr);
    199 		ixgbe_legacy_start_locked(ifp, txr);
    200 		IXGBE_TX_UNLOCK(txr);
    201 	}
    202 } /* ixgbe_legacy_start */
    203 
    204 /************************************************************************
    205  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    206  *
    207  *   (if_transmit function)
    208  ************************************************************************/
    209 int
    210 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    211 {
    212 	struct ixgbe_softc *sc = ifp->if_softc;
    213 	struct tx_ring	*txr;
    214 	int		i;
    215 #ifdef RSS
    216 	uint32_t bucket_id;
    217 #endif
    218 
    219 	/*
    220 	 * When doing RSS, map it to the same outbound queue
    221 	 * as the incoming flow would be mapped to.
    222 	 *
    223 	 * If everything is setup correctly, it should be the
    224 	 * same bucket that the current CPU we're on is.
    225 	 */
    226 #ifdef RSS
    227 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    228 		if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
    229 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    230 		    &bucket_id) == 0)) {
    231 			i = bucket_id % sc->num_queues;
    232 #ifdef IXGBE_DEBUG
    233 			if (bucket_id > sc->num_queues)
    234 				if_printf(ifp,
    235 				    "bucket_id (%d) > num_queues (%d)\n",
    236 				    bucket_id, sc->num_queues);
    237 #endif
    238 		} else
    239 			i = m->m_pkthdr.flowid % sc->num_queues;
    240 	} else
    241 #endif /* 0 */
    242 		i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
    243 
    244 	/* Check for a hung queue and pick alternative */
    245 	if (((1ULL << i) & sc->active_queues) == 0)
    246 		i = ffs64(sc->active_queues);
    247 
    248 	txr = &sc->tx_rings[i];
    249 
    250 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    251 		m_freem(m);
    252 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    253 		return ENOBUFS;
    254 	}
    255 #ifdef IXGBE_ALWAYS_TXDEFER
    256 	kpreempt_disable();
    257 	softint_schedule(txr->txr_si);
    258 	kpreempt_enable();
    259 #else
    260 	if (IXGBE_TX_TRYLOCK(txr)) {
    261 		ixgbe_mq_start_locked(ifp, txr);
    262 		IXGBE_TX_UNLOCK(txr);
    263 	} else {
    264 		if (sc->txrx_use_workqueue) {
    265 			u_int *enqueued;
    266 
    267 			/*
    268 			 * This function itself is not called in interrupt
    269 			 * context, however it can be called in fast softint
    270 			 * context right after receiving forwarding packets.
    271 			 * So, it is required to protect workqueue from twice
    272 			 * enqueuing when the machine uses both spontaneous
    273 			 * packets and forwarding packets.
    274 			 */
    275 			enqueued = percpu_getref(sc->txr_wq_enqueued);
    276 			if (*enqueued == 0) {
    277 				*enqueued = 1;
    278 				percpu_putref(sc->txr_wq_enqueued);
    279 				workqueue_enqueue(sc->txr_wq,
    280 				    &txr->wq_cookie, curcpu());
    281 			} else
    282 				percpu_putref(sc->txr_wq_enqueued);
    283 		} else {
    284 			kpreempt_disable();
    285 			softint_schedule(txr->txr_si);
    286 			kpreempt_enable();
    287 		}
    288 	}
    289 #endif
    290 
    291 	return (0);
    292 } /* ixgbe_mq_start */
    293 
    294 /************************************************************************
    295  * ixgbe_mq_start_locked
    296  ************************************************************************/
    297 int
    298 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    299 {
    300 	struct mbuf    *next;
    301 	int            enqueued = 0, err = 0;
    302 
    303 	if (txr->sc->link_active != LINK_STATE_UP) {
    304 		/*
    305 		 * discard all packets buffered in txr_interq to avoid
    306 		 * sending old packets at next link up timing.
    307 		 */
    308 		ixgbe_drain(ifp, txr);
    309 		return (ENETDOWN);
    310 	}
    311 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    312 		return (ENETDOWN);
    313 	if (txr->txr_no_space)
    314 		return (ENETDOWN);
    315 
    316 	/* Process the queue */
    317 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    318 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    319 			m_freem(next);
    320 			/* All errors are counted in ixgbe_xmit() */
    321 			break;
    322 		}
    323 		enqueued++;
    324 #if __FreeBSD_version >= 1100036
    325 		/*
    326 		 * Since we're looking at the tx ring, we can check
    327 		 * to see if we're a VF by examining our tail register
    328 		 * address.
    329 		 */
    330 		if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
    331 		    (next->m_flags & M_MCAST))
    332 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    333 #endif
    334 		/* Send a copy of the frame to the BPF listener */
    335 		bpf_mtap(ifp, next, BPF_D_OUT);
    336 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    337 			break;
    338 	}
    339 
    340 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
    341 		ixgbe_txeof(txr);
    342 
    343 	return (err);
    344 } /* ixgbe_mq_start_locked */
    345 
    346 /************************************************************************
    347  * ixgbe_deferred_mq_start
    348  *
    349  *   Called from a softint and workqueue (indirectly) to drain queued
    350  *   transmit packets.
    351  ************************************************************************/
    352 void
    353 ixgbe_deferred_mq_start(void *arg)
    354 {
    355 	struct tx_ring *txr = arg;
    356 	struct ixgbe_softc *sc = txr->sc;
    357 	struct ifnet   *ifp = sc->ifp;
    358 
    359 	IXGBE_TX_LOCK(txr);
    360 	if (pcq_peek(txr->txr_interq) != NULL)
    361 		ixgbe_mq_start_locked(ifp, txr);
    362 	IXGBE_TX_UNLOCK(txr);
    363 } /* ixgbe_deferred_mq_start */
    364 
    365 /************************************************************************
    366  * ixgbe_deferred_mq_start_work
    367  *
    368  *   Called from a workqueue to drain queued transmit packets.
    369  ************************************************************************/
    370 void
    371 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    372 {
    373 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    374 	struct ixgbe_softc *sc = txr->sc;
    375 	u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
    376 	*enqueued = 0;
    377 	percpu_putref(sc->txr_wq_enqueued);
    378 
    379 	ixgbe_deferred_mq_start(txr);
    380 } /* ixgbe_deferred_mq_start */
    381 
    382 /************************************************************************
    383  * ixgbe_drain_all
    384  ************************************************************************/
    385 void
    386 ixgbe_drain_all(struct ixgbe_softc *sc)
    387 {
    388 	struct ifnet *ifp = sc->ifp;
    389 	struct ix_queue *que = sc->queues;
    390 
    391 	for (int i = 0; i < sc->num_queues; i++, que++) {
    392 		struct tx_ring  *txr = que->txr;
    393 
    394 		IXGBE_TX_LOCK(txr);
    395 		ixgbe_drain(ifp, txr);
    396 		IXGBE_TX_UNLOCK(txr);
    397 	}
    398 }
    399 
    400 /************************************************************************
    401  * ixgbe_xmit
    402  *
    403  *   Maps the mbufs to tx descriptors, allowing the
    404  *   TX engine to transmit the packets.
    405  *
    406  *   Return 0 on success, positive on failure
    407  ************************************************************************/
    408 static int
    409 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    410 {
    411 	struct ixgbe_softc      *sc = txr->sc;
    412 	struct ixgbe_tx_buf     *txbuf;
    413 	union ixgbe_adv_tx_desc *txd = NULL;
    414 	struct ifnet	        *ifp = sc->ifp;
    415 	int                     i, j, error;
    416 	int                     first;
    417 	u32                     olinfo_status = 0, cmd_type_len;
    418 	bool                    remap = TRUE;
    419 	bus_dmamap_t            map;
    420 
    421 	/* Basic descriptor defines */
    422 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    423 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    424 
    425 	if (vlan_has_tag(m_head))
    426 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    427 
    428 	/*
    429 	 * Important to capture the first descriptor
    430 	 * used because it will contain the index of
    431 	 * the one we tell the hardware to report back
    432 	 */
    433 	first = txr->next_avail_desc;
    434 	txbuf = &txr->tx_buffers[first];
    435 	map = txbuf->map;
    436 
    437 	/*
    438 	 * Map the packet for DMA.
    439 	 */
    440 retry:
    441 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    442 	    BUS_DMA_NOWAIT);
    443 
    444 	if (__predict_false(error)) {
    445 		struct mbuf *m;
    446 
    447 		switch (error) {
    448 		case EAGAIN:
    449 			txr->q_eagain_tx_dma_setup++;
    450 			return EAGAIN;
    451 		case ENOMEM:
    452 			txr->q_enomem_tx_dma_setup++;
    453 			return EAGAIN;
    454 		case EFBIG:
    455 			/* Try it again? - one try */
    456 			if (remap == TRUE) {
    457 				remap = FALSE;
    458 				/*
    459 				 * XXX: m_defrag will choke on
    460 				 * non-MCLBYTES-sized clusters
    461 				 */
    462 				txr->q_efbig_tx_dma_setup++;
    463 				m = m_defrag(m_head, M_NOWAIT);
    464 				if (m == NULL) {
    465 					txr->q_mbuf_defrag_failed++;
    466 					return ENOBUFS;
    467 				}
    468 				m_head = m;
    469 				goto retry;
    470 			} else {
    471 				txr->q_efbig2_tx_dma_setup++;
    472 				return error;
    473 			}
    474 		case EINVAL:
    475 			txr->q_einval_tx_dma_setup++;
    476 			return error;
    477 		default:
    478 			txr->q_other_tx_dma_setup++;
    479 			return error;
    480 		}
    481 	}
    482 
    483 	/* Make certain there are enough descriptors */
    484 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    485 		txr->txr_no_space = true;
    486 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
    487 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    488 		return EAGAIN;
    489 	}
    490 
    491 	/*
    492 	 * Set up the appropriate offload context if requested,
    493 	 * this may consume one TX descriptor.
    494 	 */
    495 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    496 	if (__predict_false(error)) {
    497 		return (error);
    498 	}
    499 
    500 #ifdef IXGBE_FDIR
    501 	/* Do the flow director magic */
    502 	if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
    503 	    (txr->atr_sample) && (!sc->fdir_reinit)) {
    504 		++txr->atr_count;
    505 		if (txr->atr_count >= atr_sample_rate) {
    506 			ixgbe_atr(txr, m_head);
    507 			txr->atr_count = 0;
    508 		}
    509 	}
    510 #endif
    511 
    512 	olinfo_status |= IXGBE_ADVTXD_CC;
    513 	i = txr->next_avail_desc;
    514 	for (j = 0; j < map->dm_nsegs; j++) {
    515 		bus_size_t seglen;
    516 		uint64_t segaddr;
    517 
    518 		txbuf = &txr->tx_buffers[i];
    519 		txd = &txr->tx_base[i];
    520 		seglen = map->dm_segs[j].ds_len;
    521 		segaddr = htole64(map->dm_segs[j].ds_addr);
    522 
    523 		txd->read.buffer_addr = segaddr;
    524 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    525 		txd->read.olinfo_status = htole32(olinfo_status);
    526 
    527 		if (++i == txr->num_desc)
    528 			i = 0;
    529 	}
    530 
    531 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    532 	txr->tx_avail -= map->dm_nsegs;
    533 	txr->next_avail_desc = i;
    534 
    535 	txbuf->m_head = m_head;
    536 	/*
    537 	 * Here we swap the map so the last descriptor,
    538 	 * which gets the completion interrupt has the
    539 	 * real map, and the first descriptor gets the
    540 	 * unused map from this descriptor.
    541 	 */
    542 	txr->tx_buffers[first].map = txbuf->map;
    543 	txbuf->map = map;
    544 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    545 	    BUS_DMASYNC_PREWRITE);
    546 
    547 	/* Set the EOP descriptor that will be marked done */
    548 	txbuf = &txr->tx_buffers[first];
    549 	txbuf->eop = txd;
    550 
    551 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    552 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    553 	/*
    554 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    555 	 * hardware that this frame is available to transmit.
    556 	 */
    557 	IXGBE_EVC_ADD(&txr->total_packets, 1);
    558 	IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
    559 
    560 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    561 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    562 	if (m_head->m_flags & M_MCAST)
    563 		if_statinc_ref(nsr, if_omcasts);
    564 	IF_STAT_PUTREF(ifp);
    565 
    566 	/* Mark queue as having work */
    567 	if (txr->busy == 0)
    568 		txr->busy = 1;
    569 
    570 	return (0);
    571 } /* ixgbe_xmit */
    572 
    573 /************************************************************************
    574  * ixgbe_drain
    575  ************************************************************************/
    576 static void
    577 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    578 {
    579 	struct mbuf *m;
    580 
    581 	IXGBE_TX_LOCK_ASSERT(txr);
    582 
    583 	if (txr->me == 0) {
    584 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    585 			IFQ_DEQUEUE(&ifp->if_snd, m);
    586 			m_freem(m);
    587 			IF_DROP(&ifp->if_snd);
    588 		}
    589 	}
    590 
    591 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    592 		m_freem(m);
    593 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    594 	}
    595 }
    596 
    597 /************************************************************************
    598  * ixgbe_allocate_transmit_buffers
    599  *
    600  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    601  *   the information needed to transmit a packet on the wire. This is
    602  *   called only once at attach, setup is done every reset.
    603  ************************************************************************/
    604 static int
    605 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    606 {
    607 	struct ixgbe_softc  *sc = txr->sc;
    608 	device_t            dev = sc->dev;
    609 	struct ixgbe_tx_buf *txbuf;
    610 	int                 error, i;
    611 
    612 	/*
    613 	 * Setup DMA descriptor areas.
    614 	 */
    615 	error = ixgbe_dma_tag_create(
    616 	         /*      parent */ sc->osdep.dmat,
    617 	         /*   alignment */ 1,
    618 	         /*      bounds */ 0,
    619 	         /*     maxsize */ IXGBE_TSO_SIZE,
    620 	         /*   nsegments */ sc->num_segs,
    621 	         /*  maxsegsize */ PAGE_SIZE,
    622 	         /*       flags */ 0,
    623 	                           &txr->txtag);
    624 	if (error != 0) {
    625 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    626 		goto fail;
    627 	}
    628 
    629 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    630 	    sc->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    631 
    632 	/* Create the descriptor buffer dma maps */
    633 	txbuf = txr->tx_buffers;
    634 	for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
    635 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    636 		if (error != 0) {
    637 			aprint_error_dev(dev,
    638 			    "Unable to create TX DMA map (%d)\n", error);
    639 			goto fail;
    640 		}
    641 	}
    642 
    643 	return 0;
    644 fail:
    645 	/* We free all, it handles case where we are in the middle */
    646 #if 0 /* XXX was FreeBSD */
    647 	ixgbe_free_transmit_structures(sc);
    648 #else
    649 	ixgbe_free_transmit_buffers(txr);
    650 #endif
    651 	return (error);
    652 } /* ixgbe_allocate_transmit_buffers */
    653 
    654 /************************************************************************
    655  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    656  ************************************************************************/
    657 static void
    658 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    659 {
    660 	struct ixgbe_softc    *sc = txr->sc;
    661 	struct ixgbe_tx_buf   *txbuf;
    662 #ifdef DEV_NETMAP
    663 	struct netmap_sc      *na = NA(sc->ifp);
    664 	struct netmap_slot    *slot;
    665 #endif /* DEV_NETMAP */
    666 
    667 	/* Clear the old ring contents */
    668 	IXGBE_TX_LOCK(txr);
    669 
    670 #ifdef DEV_NETMAP
    671 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
    672 		/*
    673 		 * (under lock): if in netmap mode, do some consistency
    674 		 * checks and set slot to entry 0 of the netmap ring.
    675 		 */
    676 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    677 	}
    678 #endif /* DEV_NETMAP */
    679 
    680 	bzero((void *)txr->tx_base,
    681 	    (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
    682 	/* Reset indices */
    683 	txr->next_avail_desc = 0;
    684 	txr->next_to_clean = 0;
    685 
    686 	/* Free any existing tx buffers. */
    687 	txbuf = txr->tx_buffers;
    688 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    689 		if (txbuf->m_head != NULL) {
    690 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    691 			    0, txbuf->m_head->m_pkthdr.len,
    692 			    BUS_DMASYNC_POSTWRITE);
    693 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    694 			m_freem(txbuf->m_head);
    695 			txbuf->m_head = NULL;
    696 		}
    697 
    698 #ifdef DEV_NETMAP
    699 		/*
    700 		 * In netmap mode, set the map for the packet buffer.
    701 		 * NOTE: Some drivers (not this one) also need to set
    702 		 * the physical buffer address in the NIC ring.
    703 		 * Slots in the netmap ring (indexed by "si") are
    704 		 * kring->nkr_hwofs positions "ahead" wrt the
    705 		 * corresponding slot in the NIC ring. In some drivers
    706 		 * (not here) nkr_hwofs can be negative. Function
    707 		 * netmap_idx_n2k() handles wraparounds properly.
    708 		 */
    709 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    710 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    711 			netmap_load_map(na, txr->txtag,
    712 			    txbuf->map, NMB(na, slot + si));
    713 		}
    714 #endif /* DEV_NETMAP */
    715 
    716 		/* Clear the EOP descriptor pointer */
    717 		txbuf->eop = NULL;
    718 	}
    719 
    720 #ifdef IXGBE_FDIR
    721 	/* Set the rate at which we sample packets */
    722 	if (sc->feat_en & IXGBE_FEATURE_FDIR)
    723 		txr->atr_sample = atr_sample_rate;
    724 #endif
    725 
    726 	/* Set number of descriptors available */
    727 	txr->tx_avail = sc->num_tx_desc;
    728 
    729 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    730 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    731 	IXGBE_TX_UNLOCK(txr);
    732 } /* ixgbe_setup_transmit_ring */
    733 
    734 /************************************************************************
    735  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    736  ************************************************************************/
    737 int
    738 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
    739 {
    740 	struct tx_ring *txr = sc->tx_rings;
    741 
    742 	for (int i = 0; i < sc->num_queues; i++, txr++)
    743 		ixgbe_setup_transmit_ring(txr);
    744 
    745 	return (0);
    746 } /* ixgbe_setup_transmit_structures */
    747 
    748 /************************************************************************
    749  * ixgbe_free_transmit_structures - Free all transmit rings.
    750  ************************************************************************/
    751 void
    752 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
    753 {
    754 	struct tx_ring *txr = sc->tx_rings;
    755 
    756 	for (int i = 0; i < sc->num_queues; i++, txr++) {
    757 		ixgbe_free_transmit_buffers(txr);
    758 		ixgbe_dma_free(sc, &txr->txdma);
    759 		IXGBE_TX_LOCK_DESTROY(txr);
    760 	}
    761 	free(sc->tx_rings, M_DEVBUF);
    762 } /* ixgbe_free_transmit_structures */
    763 
    764 /************************************************************************
    765  * ixgbe_free_transmit_buffers
    766  *
    767  *   Free transmit ring related data structures.
    768  ************************************************************************/
    769 static void
    770 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    771 {
    772 	struct ixgbe_softc  *sc = txr->sc;
    773 	struct ixgbe_tx_buf *tx_buffer;
    774 	int                 i;
    775 
    776 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    777 
    778 	if (txr->tx_buffers == NULL)
    779 		return;
    780 
    781 	tx_buffer = txr->tx_buffers;
    782 	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
    783 		if (tx_buffer->m_head != NULL) {
    784 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    785 			    0, tx_buffer->m_head->m_pkthdr.len,
    786 			    BUS_DMASYNC_POSTWRITE);
    787 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    788 			m_freem(tx_buffer->m_head);
    789 			tx_buffer->m_head = NULL;
    790 			if (tx_buffer->map != NULL) {
    791 				ixgbe_dmamap_destroy(txr->txtag,
    792 				    tx_buffer->map);
    793 				tx_buffer->map = NULL;
    794 			}
    795 		} else if (tx_buffer->map != NULL) {
    796 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    797 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    798 			tx_buffer->map = NULL;
    799 		}
    800 	}
    801 	if (txr->txr_interq != NULL) {
    802 		struct mbuf *m;
    803 
    804 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    805 			m_freem(m);
    806 		pcq_destroy(txr->txr_interq);
    807 	}
    808 	if (txr->tx_buffers != NULL) {
    809 		free(txr->tx_buffers, M_DEVBUF);
    810 		txr->tx_buffers = NULL;
    811 	}
    812 	if (txr->txtag != NULL) {
    813 		ixgbe_dma_tag_destroy(txr->txtag);
    814 		txr->txtag = NULL;
    815 	}
    816 } /* ixgbe_free_transmit_buffers */
    817 
    818 /************************************************************************
    819  * ixgbe_tx_ctx_setup
    820  *
    821  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    822  ************************************************************************/
    823 static int
    824 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    825     u32 *cmd_type_len, u32 *olinfo_status)
    826 {
    827 	struct ixgbe_softc               *sc = txr->sc;
    828 	struct ixgbe_adv_tx_context_desc *TXD;
    829 	struct ether_vlan_header         *eh;
    830 #ifdef INET
    831 	struct ip                        *ip;
    832 #endif
    833 #ifdef INET6
    834 	struct ip6_hdr                   *ip6;
    835 #endif
    836 	int                              ehdrlen, ip_hlen = 0;
    837 	int                              offload = TRUE;
    838 	int                              ctxd = txr->next_avail_desc;
    839 	u32                              vlan_macip_lens = 0;
    840 	u32                              type_tucmd_mlhl = 0;
    841 	u16                              vtag = 0;
    842 	u16                              etype;
    843 	u8                               ipproto = 0;
    844 	char                             *l3d;
    845 
    846 	/* First check if TSO is to be used */
    847 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    848 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    849 
    850 		if (rv != 0)
    851 			IXGBE_EVC_ADD(&sc->tso_err, 1);
    852 		return rv;
    853 	}
    854 
    855 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    856 		offload = FALSE;
    857 
    858 	/* Indicate the whole packet as payload when not doing TSO */
    859 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    860 
    861 	/*
    862 	 * In advanced descriptors the vlan tag must
    863 	 * be placed into the context descriptor. Hence
    864 	 * we need to make one even if not doing offloads.
    865 	 */
    866 	if (vlan_has_tag(mp)) {
    867 		vtag = htole16(vlan_get_tag(mp));
    868 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    869 	} else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    870 	           (offload == FALSE))
    871 		return (0);
    872 
    873 	/*
    874 	 * Determine where frame payload starts.
    875 	 * Jump over vlan headers if already present,
    876 	 * helpful for QinQ too.
    877 	 */
    878 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    879 	eh = mtod(mp, struct ether_vlan_header *);
    880 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    881 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    882 		etype = ntohs(eh->evl_proto);
    883 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    884 	} else {
    885 		etype = ntohs(eh->evl_encap_proto);
    886 		ehdrlen = ETHER_HDR_LEN;
    887 	}
    888 
    889 	/* Set the ether header length */
    890 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    891 
    892 	if (offload == FALSE)
    893 		goto no_offloads;
    894 
    895 	/*
    896 	 * If the first mbuf only includes the ethernet header,
    897 	 * jump to the next one
    898 	 * XXX: This assumes the stack splits mbufs containing headers
    899 	 *      on header boundaries
    900 	 * XXX: And assumes the entire IP header is contained in one mbuf
    901 	 */
    902 	if (mp->m_len == ehdrlen && mp->m_next)
    903 		l3d = mtod(mp->m_next, char *);
    904 	else
    905 		l3d = mtod(mp, char *) + ehdrlen;
    906 
    907 	switch (etype) {
    908 #ifdef INET
    909 	case ETHERTYPE_IP:
    910 		ip = (struct ip *)(l3d);
    911 		ip_hlen = ip->ip_hl << 2;
    912 		ipproto = ip->ip_p;
    913 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    914 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    915 		    ip->ip_sum == 0);
    916 		break;
    917 #endif
    918 #ifdef INET6
    919 	case ETHERTYPE_IPV6:
    920 		ip6 = (struct ip6_hdr *)(l3d);
    921 		ip_hlen = sizeof(struct ip6_hdr);
    922 		ipproto = ip6->ip6_nxt;
    923 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    924 		break;
    925 #endif
    926 	default:
    927 		offload = false;
    928 		break;
    929 	}
    930 
    931 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    932 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    933 
    934 	vlan_macip_lens |= ip_hlen;
    935 
    936 	/* No support for offloads for non-L4 next headers */
    937 	switch (ipproto) {
    938 	case IPPROTO_TCP:
    939 		if (mp->m_pkthdr.csum_flags &
    940 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    941 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    942 		else
    943 			offload = false;
    944 		break;
    945 	case IPPROTO_UDP:
    946 		if (mp->m_pkthdr.csum_flags &
    947 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    948 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    949 		else
    950 			offload = false;
    951 		break;
    952 	default:
    953 		offload = false;
    954 		break;
    955 	}
    956 
    957 	if (offload) /* Insert L4 checksum into data descriptors */
    958 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    959 
    960 no_offloads:
    961 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    962 
    963 	/* Now ready a context descriptor */
    964 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    965 
    966 	/* Now copy bits into descriptor */
    967 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    968 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    969 	TXD->seqnum_seed = htole32(0);
    970 	TXD->mss_l4len_idx = htole32(0);
    971 
    972 	/* We've consumed the first desc, adjust counters */
    973 	if (++ctxd == txr->num_desc)
    974 		ctxd = 0;
    975 	txr->next_avail_desc = ctxd;
    976 	--txr->tx_avail;
    977 
    978 	return (0);
    979 } /* ixgbe_tx_ctx_setup */
    980 
    981 /************************************************************************
    982  * ixgbe_tso_setup
    983  *
    984  *   Setup work for hardware segmentation offload (TSO) on
    985  *   adapters using advanced tx descriptors
    986  ************************************************************************/
    987 static int
    988 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    989     u32 *olinfo_status)
    990 {
    991 	struct ixgbe_adv_tx_context_desc *TXD;
    992 	struct ether_vlan_header         *eh;
    993 #ifdef INET6
    994 	struct ip6_hdr                   *ip6;
    995 #endif
    996 #ifdef INET
    997 	struct ip                        *ip;
    998 #endif
    999 	struct tcphdr                    *th;
   1000 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
   1001 	u32                              vlan_macip_lens = 0;
   1002 	u32                              type_tucmd_mlhl = 0;
   1003 	u32                              mss_l4len_idx = 0, paylen;
   1004 	u16                              vtag = 0, eh_type;
   1005 
   1006 	/*
   1007 	 * Determine where frame payload starts.
   1008 	 * Jump over vlan headers if already present
   1009 	 */
   1010 	eh = mtod(mp, struct ether_vlan_header *);
   1011 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1012 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1013 		eh_type = eh->evl_proto;
   1014 	} else {
   1015 		ehdrlen = ETHER_HDR_LEN;
   1016 		eh_type = eh->evl_encap_proto;
   1017 	}
   1018 
   1019 	switch (ntohs(eh_type)) {
   1020 #ifdef INET
   1021 	case ETHERTYPE_IP:
   1022 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1023 		if (ip->ip_p != IPPROTO_TCP)
   1024 			return (ENXIO);
   1025 		ip->ip_sum = 0;
   1026 		ip_hlen = ip->ip_hl << 2;
   1027 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1028 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1029 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1030 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1031 		/* Tell transmit desc to also do IPv4 checksum. */
   1032 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1033 		break;
   1034 #endif
   1035 #ifdef INET6
   1036 	case ETHERTYPE_IPV6:
   1037 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1038 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1039 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1040 			return (ENXIO);
   1041 		ip_hlen = sizeof(struct ip6_hdr);
   1042 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1043 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1044 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1045 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1046 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1047 		break;
   1048 #endif
   1049 	default:
   1050 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1051 		    __func__, ntohs(eh_type));
   1052 		break;
   1053 	}
   1054 
   1055 	ctxd = txr->next_avail_desc;
   1056 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1057 
   1058 	tcp_hlen = th->th_off << 2;
   1059 
   1060 	/* This is used in the transmit desc in encap */
   1061 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1062 
   1063 	/* VLAN MACLEN IPLEN */
   1064 	if (vlan_has_tag(mp)) {
   1065 		vtag = htole16(vlan_get_tag(mp));
   1066 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1067 	}
   1068 
   1069 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1070 	vlan_macip_lens |= ip_hlen;
   1071 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1072 
   1073 	/* ADV DTYPE TUCMD */
   1074 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1075 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1076 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1077 
   1078 	/* MSS L4LEN IDX */
   1079 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1080 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1081 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1082 
   1083 	TXD->seqnum_seed = htole32(0);
   1084 
   1085 	if (++ctxd == txr->num_desc)
   1086 		ctxd = 0;
   1087 
   1088 	txr->tx_avail--;
   1089 	txr->next_avail_desc = ctxd;
   1090 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1091 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1092 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1093 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
   1094 
   1095 	return (0);
   1096 } /* ixgbe_tso_setup */
   1097 
   1098 
   1099 /************************************************************************
   1100  * ixgbe_txeof
   1101  *
   1102  *   Examine each tx_buffer in the used queue. If the hardware is done
   1103  *   processing the packet then free associated resources. The
   1104  *   tx_buffer is put back on the free queue.
   1105  ************************************************************************/
   1106 bool
   1107 ixgbe_txeof(struct tx_ring *txr)
   1108 {
   1109 	struct ixgbe_softc	*sc = txr->sc;
   1110 	struct ifnet		*ifp = sc->ifp;
   1111 	struct ixgbe_tx_buf	*buf;
   1112 	union ixgbe_adv_tx_desc *txd;
   1113 	u32			work, processed = 0;
   1114 	u32			limit = sc->tx_process_limit;
   1115 	u16			avail;
   1116 
   1117 	KASSERT(mutex_owned(&txr->tx_mtx));
   1118 
   1119 #ifdef DEV_NETMAP
   1120 	if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
   1121 	    (sc->ifp->if_capenable & IFCAP_NETMAP)) {
   1122 		struct netmap_sc *na = NA(sc->ifp);
   1123 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1124 		txd = txr->tx_base;
   1125 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1126 		    BUS_DMASYNC_POSTREAD);
   1127 		/*
   1128 		 * In netmap mode, all the work is done in the context
   1129 		 * of the client thread. Interrupt handlers only wake up
   1130 		 * clients, which may be sleeping on individual rings
   1131 		 * or on a global resource for all rings.
   1132 		 * To implement tx interrupt mitigation, we wake up the client
   1133 		 * thread roughly every half ring, even if the NIC interrupts
   1134 		 * more frequently. This is implemented as follows:
   1135 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1136 		 *   the slot that should wake up the thread (nkr_num_slots
   1137 		 *   means the user thread should not be woken up);
   1138 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1139 		 *   or the slot has the DD bit set.
   1140 		 */
   1141 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1142 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1143 			netmap_tx_irq(ifp, txr->me);
   1144 		}
   1145 		return false;
   1146 	}
   1147 #endif /* DEV_NETMAP */
   1148 
   1149 	if (txr->tx_avail == txr->num_desc) {
   1150 		txr->busy = 0;
   1151 		return false;
   1152 	}
   1153 
   1154 	/* Get work starting point */
   1155 	work = txr->next_to_clean;
   1156 	buf = &txr->tx_buffers[work];
   1157 	txd = &txr->tx_base[work];
   1158 	work -= txr->num_desc; /* The distance to ring end */
   1159 	avail = txr->tx_avail;
   1160 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1161 	    BUS_DMASYNC_POSTREAD);
   1162 
   1163 	do {
   1164 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1165 		if (eop == NULL) /* No work */
   1166 			break;
   1167 
   1168 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1169 			break;	/* I/O not complete */
   1170 
   1171 		if (buf->m_head) {
   1172 			txr->bytes += buf->m_head->m_pkthdr.len;
   1173 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1174 			    0, buf->m_head->m_pkthdr.len,
   1175 			    BUS_DMASYNC_POSTWRITE);
   1176 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1177 			m_freem(buf->m_head);
   1178 			buf->m_head = NULL;
   1179 		}
   1180 		buf->eop = NULL;
   1181 		++avail;
   1182 
   1183 		/* We clean the range if multi segment */
   1184 		while (txd != eop) {
   1185 			++txd;
   1186 			++buf;
   1187 			++work;
   1188 			/* wrap the ring? */
   1189 			if (__predict_false(!work)) {
   1190 				work -= txr->num_desc;
   1191 				buf = txr->tx_buffers;
   1192 				txd = txr->tx_base;
   1193 			}
   1194 			if (buf->m_head) {
   1195 				txr->bytes +=
   1196 				    buf->m_head->m_pkthdr.len;
   1197 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1198 				    buf->map,
   1199 				    0, buf->m_head->m_pkthdr.len,
   1200 				    BUS_DMASYNC_POSTWRITE);
   1201 				ixgbe_dmamap_unload(txr->txtag,
   1202 				    buf->map);
   1203 				m_freem(buf->m_head);
   1204 				buf->m_head = NULL;
   1205 			}
   1206 			++avail;
   1207 			buf->eop = NULL;
   1208 
   1209 		}
   1210 		++processed;
   1211 
   1212 		/* Try the next packet */
   1213 		++txd;
   1214 		++buf;
   1215 		++work;
   1216 		/* reset with a wrap */
   1217 		if (__predict_false(!work)) {
   1218 			work -= txr->num_desc;
   1219 			buf = txr->tx_buffers;
   1220 			txd = txr->tx_base;
   1221 		}
   1222 		prefetch(txd);
   1223 	} while (__predict_true(--limit));
   1224 
   1225 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1226 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1227 
   1228 	work += txr->num_desc;
   1229 	txr->next_to_clean = work;
   1230 	if (processed) {
   1231 		txr->tx_avail = avail;
   1232 		txr->txr_no_space = false;
   1233 		txr->packets += processed;
   1234 		if_statadd(ifp, if_opackets, processed);
   1235 	}
   1236 
   1237 	/*
   1238 	 * Queue Hang detection, we know there's
   1239 	 * work outstanding or the first return
   1240 	 * would have been taken, so increment busy
   1241 	 * if nothing managed to get cleaned, then
   1242 	 * in local_timer it will be checked and
   1243 	 * marked as HUNG if it exceeds a MAX attempt.
   1244 	 */
   1245 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1246 		++txr->busy;
   1247 	/*
   1248 	 * If anything gets cleaned we reset state to 1,
   1249 	 * note this will turn off HUNG if its set.
   1250 	 */
   1251 	if (processed)
   1252 		txr->busy = 1;
   1253 
   1254 	if (txr->tx_avail == txr->num_desc)
   1255 		txr->busy = 0;
   1256 
   1257 	return ((limit > 0) ? false : true);
   1258 } /* ixgbe_txeof */
   1259 
   1260 /************************************************************************
   1261  * ixgbe_rsc_count
   1262  *
   1263  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1264  ************************************************************************/
   1265 static inline u32
   1266 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1267 {
   1268 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1269 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1270 } /* ixgbe_rsc_count */
   1271 
   1272 /************************************************************************
   1273  * ixgbe_setup_hw_rsc
   1274  *
   1275  *   Initialize Hardware RSC (LRO) feature on 82599
   1276  *   for an RX ring, this is toggled by the LRO capability
   1277  *   even though it is transparent to the stack.
   1278  *
   1279  *   NOTE: Since this HW feature only works with IPv4 and
   1280  *         testing has shown soft LRO to be as effective,
   1281  *         this feature will be disabled by default.
   1282  ************************************************************************/
   1283 static void
   1284 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1285 {
   1286 	struct ixgbe_softc *sc = rxr->sc;
   1287 	struct ixgbe_hw	*hw = &sc->hw;
   1288 	u32		rscctrl, rdrxctl;
   1289 
   1290 	/* If turning LRO/RSC off we need to disable it */
   1291 	if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
   1292 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1293 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1294 		return;
   1295 	}
   1296 
   1297 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1298 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1299 #ifdef DEV_NETMAP
   1300 	/* Always strip CRC unless Netmap disabled it */
   1301 	if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
   1302 	    !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
   1303 	    ix_crcstrip)
   1304 #endif /* DEV_NETMAP */
   1305 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1306 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1307 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1308 
   1309 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1310 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1311 	/*
   1312 	 * Limit the total number of descriptors that
   1313 	 * can be combined, so it does not exceed 64K
   1314 	 */
   1315 	if (rxr->mbuf_sz == MCLBYTES)
   1316 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1317 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1318 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1319 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1320 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1321 	else  /* Using 16K cluster */
   1322 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1323 
   1324 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1325 
   1326 	/* Enable TCP header recognition */
   1327 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1328 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1329 
   1330 	/* Disable RSC for ACK packets */
   1331 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1332 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1333 
   1334 	rxr->hw_rsc = TRUE;
   1335 } /* ixgbe_setup_hw_rsc */
   1336 
   1337 /************************************************************************
   1338  * ixgbe_refresh_mbufs
   1339  *
   1340  *   Refresh mbuf buffers for RX descriptor rings
   1341  *    - now keeps its own state so discards due to resource
   1342  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1343  *      it just returns, keeping its placeholder, thus it can simply
   1344  *      be recalled to try again.
   1345  ************************************************************************/
   1346 static void
   1347 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1348 {
   1349 	struct ixgbe_softc  *sc = rxr->sc;
   1350 	struct ixgbe_rx_buf *rxbuf;
   1351 	struct mbuf         *mp;
   1352 	int                 i, error;
   1353 	bool                refreshed = false;
   1354 
   1355 	i = rxr->next_to_refresh;
   1356 	/* next_to_refresh points to the previous one */
   1357 	if (++i == rxr->num_desc)
   1358 		i = 0;
   1359 
   1360 	while (i != limit) {
   1361 		rxbuf = &rxr->rx_buffers[i];
   1362 		if (__predict_false(rxbuf->buf == NULL)) {
   1363 			mp = ixgbe_getcl();
   1364 			if (mp == NULL) {
   1365 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1366 				goto update;
   1367 			}
   1368 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1369 			IXGBE_M_ADJ(sc, rxr, mp);
   1370 		} else
   1371 			mp = rxbuf->buf;
   1372 
   1373 		/* If we're dealing with an mbuf that was copied rather
   1374 		 * than replaced, there's no need to go through busdma.
   1375 		 */
   1376 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1377 			/* Get the memory mapping */
   1378 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1379 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1380 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1381 			if (__predict_false(error != 0)) {
   1382 				device_printf(sc->dev, "Refresh mbufs: "
   1383 				    "payload dmamap load failure - %d\n",
   1384 				    error);
   1385 				m_free(mp);
   1386 				rxbuf->buf = NULL;
   1387 				goto update;
   1388 			}
   1389 			rxbuf->buf = mp;
   1390 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1391 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1392 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1393 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1394 		} else {
   1395 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1396 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1397 		}
   1398 
   1399 		refreshed = true;
   1400 		/* next_to_refresh points to the previous one */
   1401 		rxr->next_to_refresh = i;
   1402 		if (++i == rxr->num_desc)
   1403 			i = 0;
   1404 	}
   1405 
   1406 update:
   1407 	if (refreshed) /* Update hardware tail index */
   1408 		IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
   1409 
   1410 	return;
   1411 } /* ixgbe_refresh_mbufs */
   1412 
   1413 /************************************************************************
   1414  * ixgbe_allocate_receive_buffers
   1415  *
   1416  *   Allocate memory for rx_buffer structures. Since we use one
   1417  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1418  *   that we'll need is equal to the number of receive descriptors
   1419  *   that we've allocated.
   1420  ************************************************************************/
   1421 static int
   1422 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1423 {
   1424 	struct ixgbe_softc  *sc = rxr->sc;
   1425 	device_t            dev = sc->dev;
   1426 	struct ixgbe_rx_buf *rxbuf;
   1427 	int                 bsize, error;
   1428 
   1429 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1430 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1431 
   1432 	error = ixgbe_dma_tag_create(
   1433 	         /*      parent */ sc->osdep.dmat,
   1434 	         /*   alignment */ 1,
   1435 	         /*      bounds */ 0,
   1436 	         /*     maxsize */ MJUM16BYTES,
   1437 	         /*   nsegments */ 1,
   1438 	         /*  maxsegsize */ MJUM16BYTES,
   1439 	         /*       flags */ 0,
   1440 	                           &rxr->ptag);
   1441 	if (error != 0) {
   1442 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1443 		goto fail;
   1444 	}
   1445 
   1446 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1447 		rxbuf = &rxr->rx_buffers[i];
   1448 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1449 		if (error) {
   1450 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1451 			goto fail;
   1452 		}
   1453 	}
   1454 
   1455 	return (0);
   1456 
   1457 fail:
   1458 	/* Frees all, but can handle partial completion */
   1459 	ixgbe_free_receive_structures(sc);
   1460 
   1461 	return (error);
   1462 } /* ixgbe_allocate_receive_buffers */
   1463 
   1464 /************************************************************************
   1465  * ixgbe_free_receive_ring
   1466  ************************************************************************/
   1467 static void
   1468 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1469 {
   1470 	for (int i = 0; i < rxr->num_desc; i++) {
   1471 		ixgbe_rx_discard(rxr, i);
   1472 	}
   1473 } /* ixgbe_free_receive_ring */
   1474 
   1475 /************************************************************************
   1476  * ixgbe_setup_receive_ring
   1477  *
   1478  *   Initialize a receive ring and its buffers.
   1479  ************************************************************************/
   1480 static int
   1481 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1482 {
   1483 	struct ixgbe_softc    *sc;
   1484 	struct ixgbe_rx_buf   *rxbuf;
   1485 #ifdef LRO
   1486 	struct ifnet          *ifp;
   1487 	struct lro_ctrl       *lro = &rxr->lro;
   1488 #endif /* LRO */
   1489 #ifdef DEV_NETMAP
   1490 	struct netmap_sc      *na = NA(rxr->sc->ifp);
   1491 	struct netmap_slot    *slot;
   1492 #endif /* DEV_NETMAP */
   1493 	int                   rsize, error = 0;
   1494 
   1495 	sc = rxr->sc;
   1496 #ifdef LRO
   1497 	ifp = sc->ifp;
   1498 #endif /* LRO */
   1499 
   1500 	/* Clear the ring contents */
   1501 	IXGBE_RX_LOCK(rxr);
   1502 
   1503 #ifdef DEV_NETMAP
   1504 	if (sc->feat_en & IXGBE_FEATURE_NETMAP)
   1505 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1506 #endif /* DEV_NETMAP */
   1507 
   1508 	rsize = roundup2(sc->num_rx_desc *
   1509 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1510 	bzero((void *)rxr->rx_base, rsize);
   1511 	/* Cache the size */
   1512 	rxr->mbuf_sz = sc->rx_mbuf_sz;
   1513 
   1514 	/* Free current RX buffer structs and their mbufs */
   1515 	ixgbe_free_receive_ring(rxr);
   1516 
   1517 	/* Now replenish the mbufs */
   1518 	for (int i = 0; i < rxr->num_desc; i++) {
   1519 		struct mbuf *mp;
   1520 
   1521 		rxbuf = &rxr->rx_buffers[i];
   1522 
   1523 #ifdef DEV_NETMAP
   1524 		/*
   1525 		 * In netmap mode, fill the map and set the buffer
   1526 		 * address in the NIC ring, considering the offset
   1527 		 * between the netmap and NIC rings (see comment in
   1528 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1529 		 * an mbuf, so end the block with a continue;
   1530 		 */
   1531 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1532 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
   1533 			uint64_t paddr;
   1534 			void *addr;
   1535 
   1536 			addr = PNMB(na, slot + sj, &paddr);
   1537 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1538 			/* Update descriptor and the cached value */
   1539 			rxr->rx_base[i].read.pkt_addr = htole64(paddr);
   1540 			rxbuf->addr = htole64(paddr);
   1541 			continue;
   1542 		}
   1543 #endif /* DEV_NETMAP */
   1544 
   1545 		rxbuf->flags = 0;
   1546 		rxbuf->buf = ixgbe_getcl();
   1547 		if (rxbuf->buf == NULL) {
   1548 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1549 			error = ENOBUFS;
   1550 			goto fail;
   1551 		}
   1552 		mp = rxbuf->buf;
   1553 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1554 		IXGBE_M_ADJ(sc, rxr, mp);
   1555 		/* Get the memory mapping */
   1556 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1557 		    mp, BUS_DMA_NOWAIT);
   1558 		if (error != 0) {
   1559 			/*
   1560 			 * Clear this entry for later cleanup in
   1561 			 * ixgbe_discard() which is called via
   1562 			 * ixgbe_free_receive_ring().
   1563 			 */
   1564 			m_freem(mp);
   1565 			rxbuf->buf = NULL;
   1566 			goto fail;
   1567 		}
   1568 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1569 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1570 		/* Update the descriptor and the cached value */
   1571 		rxr->rx_base[i].read.pkt_addr =
   1572 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1573 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1574 	}
   1575 
   1576 	/* Setup our descriptor indices */
   1577 	rxr->next_to_check = 0;
   1578 	rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
   1579 	rxr->lro_enabled = FALSE;
   1580 	rxr->discard_multidesc = false;
   1581 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
   1582 #if 0 /* NetBSD */
   1583 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
   1584 #if 1	/* Fix inconsistency */
   1585 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
   1586 #endif
   1587 #endif
   1588 	rxr->vtag_strip = FALSE;
   1589 
   1590 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1591 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1592 
   1593 	/*
   1594 	 * Now set up the LRO interface
   1595 	 */
   1596 	if (ixgbe_rsc_enable)
   1597 		ixgbe_setup_hw_rsc(rxr);
   1598 #ifdef LRO
   1599 	else if (ifp->if_capenable & IFCAP_LRO) {
   1600 		device_t dev = sc->dev;
   1601 		int err = tcp_lro_init(lro);
   1602 		if (err) {
   1603 			device_printf(dev, "LRO Initialization failed!\n");
   1604 			goto fail;
   1605 		}
   1606 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1607 		rxr->lro_enabled = TRUE;
   1608 		lro->ifp = sc->ifp;
   1609 	}
   1610 #endif /* LRO */
   1611 
   1612 	IXGBE_RX_UNLOCK(rxr);
   1613 
   1614 	return (0);
   1615 
   1616 fail:
   1617 	ixgbe_free_receive_ring(rxr);
   1618 	IXGBE_RX_UNLOCK(rxr);
   1619 
   1620 	return (error);
   1621 } /* ixgbe_setup_receive_ring */
   1622 
   1623 /************************************************************************
   1624  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1625  ************************************************************************/
   1626 int
   1627 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
   1628 {
   1629 	struct rx_ring *rxr = sc->rx_rings;
   1630 	int            j;
   1631 
   1632 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1633 	for (j = 0; j < sc->num_queues; j++, rxr++)
   1634 		if (ixgbe_setup_receive_ring(rxr))
   1635 			goto fail;
   1636 
   1637 	return (0);
   1638 fail:
   1639 	/*
   1640 	 * Free RX buffers allocated so far, we will only handle
   1641 	 * the rings that completed, the failing case will have
   1642 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1643 	 */
   1644 	for (int i = 0; i < j; ++i) {
   1645 		rxr = &sc->rx_rings[i];
   1646 		IXGBE_RX_LOCK(rxr);
   1647 		ixgbe_free_receive_ring(rxr);
   1648 		IXGBE_RX_UNLOCK(rxr);
   1649 	}
   1650 
   1651 	return (ENOBUFS);
   1652 } /* ixgbe_setup_receive_structures */
   1653 
   1654 
   1655 /************************************************************************
   1656  * ixgbe_free_receive_structures - Free all receive rings.
   1657  ************************************************************************/
   1658 void
   1659 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
   1660 {
   1661 	struct rx_ring *rxr = sc->rx_rings;
   1662 
   1663 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1664 
   1665 	for (int i = 0; i < sc->num_queues; i++, rxr++) {
   1666 		ixgbe_free_receive_buffers(rxr);
   1667 #ifdef LRO
   1668 		/* Free LRO memory */
   1669 		tcp_lro_free(&rxr->lro);
   1670 #endif /* LRO */
   1671 		/* Free the ring memory as well */
   1672 		ixgbe_dma_free(sc, &rxr->rxdma);
   1673 		IXGBE_RX_LOCK_DESTROY(rxr);
   1674 	}
   1675 
   1676 	free(sc->rx_rings, M_DEVBUF);
   1677 } /* ixgbe_free_receive_structures */
   1678 
   1679 
   1680 /************************************************************************
   1681  * ixgbe_free_receive_buffers - Free receive ring data structures
   1682  ************************************************************************/
   1683 static void
   1684 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1685 {
   1686 	struct ixgbe_softc  *sc = rxr->sc;
   1687 	struct ixgbe_rx_buf *rxbuf;
   1688 
   1689 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1690 
   1691 	/* Cleanup any existing buffers */
   1692 	if (rxr->rx_buffers != NULL) {
   1693 		for (int i = 0; i < sc->num_rx_desc; i++) {
   1694 			rxbuf = &rxr->rx_buffers[i];
   1695 			ixgbe_rx_discard(rxr, i);
   1696 			if (rxbuf->pmap != NULL) {
   1697 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1698 				rxbuf->pmap = NULL;
   1699 			}
   1700 		}
   1701 
   1702 		if (rxr->rx_buffers != NULL) {
   1703 			free(rxr->rx_buffers, M_DEVBUF);
   1704 			rxr->rx_buffers = NULL;
   1705 		}
   1706 	}
   1707 
   1708 	if (rxr->ptag != NULL) {
   1709 		ixgbe_dma_tag_destroy(rxr->ptag);
   1710 		rxr->ptag = NULL;
   1711 	}
   1712 
   1713 	return;
   1714 } /* ixgbe_free_receive_buffers */
   1715 
   1716 /************************************************************************
   1717  * ixgbe_rx_input
   1718  ************************************************************************/
   1719 static __inline void
   1720 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1721     u32 ptype)
   1722 {
   1723 	struct ixgbe_softc *sc = ifp->if_softc;
   1724 
   1725 #ifdef LRO
   1726 	struct ethercom *ec = &sc->osdep.ec;
   1727 
   1728 	/*
   1729 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1730 	 * should be computed by hardware. Also it should not have VLAN tag in
   1731 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1732 	 */
   1733         if (rxr->lro_enabled &&
   1734             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1735             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1736             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1737             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1738             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1739             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1740             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1741             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1742                 /*
   1743                  * Send to the stack if:
   1744                  *  - LRO not enabled, or
   1745                  *  - no LRO resources, or
   1746                  *  - lro enqueue fails
   1747                  */
   1748                 if (rxr->lro.lro_cnt != 0)
   1749                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1750                                 return;
   1751         }
   1752 #endif /* LRO */
   1753 
   1754 	if_percpuq_enqueue(sc->ipq, m);
   1755 } /* ixgbe_rx_input */
   1756 
   1757 /************************************************************************
   1758  * ixgbe_rx_discard
   1759  ************************************************************************/
   1760 static __inline void
   1761 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1762 {
   1763 	struct ixgbe_rx_buf *rbuf;
   1764 
   1765 	rbuf = &rxr->rx_buffers[i];
   1766 
   1767 	/*
   1768 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1769 	 * so its easier to just free the existing mbufs and take the normal
   1770 	 * refresh path to get new buffers and mapping.
   1771 	 */
   1772 
   1773 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1774 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1775 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1776 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1777 		m_freem(rbuf->fmp);
   1778 		rbuf->fmp = NULL;
   1779 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1780 	} else if (rbuf->buf) {
   1781 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1782 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1783 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1784 		m_free(rbuf->buf);
   1785 		rbuf->buf = NULL;
   1786 	}
   1787 
   1788 	rbuf->flags = 0;
   1789 
   1790 	return;
   1791 } /* ixgbe_rx_discard */
   1792 
   1793 
   1794 /************************************************************************
   1795  * ixgbe_rxeof
   1796  *
   1797  *   Executes in interrupt context. It replenishes the
   1798  *   mbufs in the descriptor and sends data which has
   1799  *   been dma'ed into host memory to upper layer.
   1800  *
   1801  *   Return TRUE for more work, FALSE for all clean.
   1802  ************************************************************************/
   1803 bool
   1804 ixgbe_rxeof(struct ix_queue *que)
   1805 {
   1806 	struct ixgbe_softc	*sc = que->sc;
   1807 	struct rx_ring		*rxr = que->rxr;
   1808 	struct ifnet		*ifp = sc->ifp;
   1809 #ifdef LRO
   1810 	struct lro_ctrl		*lro = &rxr->lro;
   1811 #endif /* LRO */
   1812 	union ixgbe_adv_rx_desc	*cur;
   1813 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1814 	int			i, nextp, processed = 0;
   1815 	u32			staterr = 0;
   1816 	u32			loopcount = 0, numdesc;
   1817 	u32			limit = sc->rx_process_limit;
   1818 	u32			rx_copy_len = sc->rx_copy_len;
   1819 	bool			discard_multidesc = rxr->discard_multidesc;
   1820 	bool			wraparound = false;
   1821 	unsigned int		syncremain;
   1822 #ifdef RSS
   1823 	u16			pkt_info;
   1824 #endif
   1825 
   1826 	IXGBE_RX_LOCK(rxr);
   1827 
   1828 #ifdef DEV_NETMAP
   1829 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
   1830 		/* Same as the txeof routine: wakeup clients on intr. */
   1831 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1832 			IXGBE_RX_UNLOCK(rxr);
   1833 			return (FALSE);
   1834 		}
   1835 	}
   1836 #endif /* DEV_NETMAP */
   1837 
   1838 	/* Sync the ring. The size is rx_process_limit or the first half */
   1839 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
   1840 		/* Non-wraparound */
   1841 		numdesc = limit;
   1842 		syncremain = 0;
   1843 	} else {
   1844 		/* Wraparound. Sync the first half. */
   1845 		numdesc = rxr->num_desc - rxr->next_to_check;
   1846 
   1847 		/* Set the size of the last half */
   1848 		syncremain = limit - numdesc;
   1849 	}
   1850 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1851 	    rxr->rxdma.dma_map,
   1852 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
   1853 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1854 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1855 
   1856 	/*
   1857 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1858 	 * true, continue processing to not to send broken packet to the upper
   1859 	 * layer.
   1860 	 */
   1861 	for (i = rxr->next_to_check;
   1862 	     (loopcount < limit) || (discard_multidesc == true);) {
   1863 
   1864 		struct mbuf *sendmp, *mp;
   1865 		struct mbuf *newmp;
   1866 		u32         rsc, ptype;
   1867 		u16         len;
   1868 		u16         vtag = 0;
   1869 		bool        eop;
   1870 		bool        discard = false;
   1871 
   1872 		if (wraparound) {
   1873 			/* Sync the last half. */
   1874 			KASSERT(syncremain != 0);
   1875 			numdesc = syncremain;
   1876 			wraparound = false;
   1877 		} else if (__predict_false(loopcount >= limit)) {
   1878 			KASSERT(discard_multidesc == true);
   1879 			numdesc = 1;
   1880 		} else
   1881 			numdesc = 0;
   1882 
   1883 		if (numdesc != 0)
   1884 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1885 			    rxr->rxdma.dma_map, 0,
   1886 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1887 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1888 
   1889 		cur = &rxr->rx_base[i];
   1890 		staterr = le32toh(cur->wb.upper.status_error);
   1891 #ifdef RSS
   1892 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1893 #endif
   1894 
   1895 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1896 			break;
   1897 
   1898 		loopcount++;
   1899 		sendmp = newmp = NULL;
   1900 		nbuf = NULL;
   1901 		rsc = 0;
   1902 		cur->wb.upper.status_error = 0;
   1903 		rbuf = &rxr->rx_buffers[i];
   1904 		mp = rbuf->buf;
   1905 
   1906 		len = le16toh(cur->wb.upper.length);
   1907 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1908 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1909 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1910 
   1911 		/* Make sure bad packets are discarded */
   1912 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1913 #if __FreeBSD_version >= 1100036
   1914 			if (sc->feat_en & IXGBE_FEATURE_VF)
   1915 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1916 #endif
   1917 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
   1918 			ixgbe_rx_discard(rxr, i);
   1919 			discard_multidesc = false;
   1920 			goto next_desc;
   1921 		}
   1922 
   1923 		if (__predict_false(discard_multidesc))
   1924 			discard = true;
   1925 		else {
   1926 			/* Pre-alloc new mbuf. */
   1927 
   1928 			if ((rbuf->fmp == NULL) &&
   1929 			    eop && (len <= rx_copy_len)) {
   1930 				/* For short packet. See below. */
   1931 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1932 				if (__predict_false(sendmp == NULL)) {
   1933 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1934 					discard = true;
   1935 				}
   1936 			} else {
   1937 				/* For long packet. */
   1938 				newmp = ixgbe_getcl();
   1939 				if (__predict_false(newmp == NULL)) {
   1940 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1941 					discard = true;
   1942 				}
   1943 			}
   1944 		}
   1945 
   1946 		if (__predict_false(discard)) {
   1947 			/*
   1948 			 * Descriptor initialization is already done by the
   1949 			 * above code (cur->wb.upper.status_error = 0).
   1950 			 * So, we can reuse current rbuf->buf for new packet.
   1951 			 *
   1952 			 * Rewrite the buffer addr, see comment in
   1953 			 * ixgbe_rx_discard().
   1954 			 */
   1955 			cur->read.pkt_addr = rbuf->addr;
   1956 			m_freem(rbuf->fmp);
   1957 			rbuf->fmp = NULL;
   1958 			if (!eop) {
   1959 				/* Discard the entire packet. */
   1960 				discard_multidesc = true;
   1961 			} else
   1962 				discard_multidesc = false;
   1963 			goto next_desc;
   1964 		}
   1965 		discard_multidesc = false;
   1966 
   1967 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1968 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1969 
   1970 		/*
   1971 		 * On 82599 which supports a hardware
   1972 		 * LRO (called HW RSC), packets need
   1973 		 * not be fragmented across sequential
   1974 		 * descriptors, rather the next descriptor
   1975 		 * is indicated in bits of the descriptor.
   1976 		 * This also means that we might process
   1977 		 * more than one packet at a time, something
   1978 		 * that has never been true before, it
   1979 		 * required eliminating global chain pointers
   1980 		 * in favor of what we are doing here.  -jfv
   1981 		 */
   1982 		if (!eop) {
   1983 			/*
   1984 			 * Figure out the next descriptor
   1985 			 * of this frame.
   1986 			 */
   1987 			if (rxr->hw_rsc == TRUE) {
   1988 				rsc = ixgbe_rsc_count(cur);
   1989 				rxr->rsc_num += (rsc - 1);
   1990 			}
   1991 			if (rsc) { /* Get hardware index */
   1992 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1993 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1994 			} else { /* Just sequential */
   1995 				nextp = i + 1;
   1996 				if (nextp == sc->num_rx_desc)
   1997 					nextp = 0;
   1998 			}
   1999 			nbuf = &rxr->rx_buffers[nextp];
   2000 			prefetch(nbuf);
   2001 		}
   2002 		/*
   2003 		 * Rather than using the fmp/lmp global pointers
   2004 		 * we now keep the head of a packet chain in the
   2005 		 * buffer struct and pass this along from one
   2006 		 * descriptor to the next, until we get EOP.
   2007 		 */
   2008 		/*
   2009 		 * See if there is a stored head
   2010 		 * that determines what we are
   2011 		 */
   2012 		if (rbuf->fmp != NULL) {
   2013 			/* Secondary frag */
   2014 			sendmp = rbuf->fmp;
   2015 
   2016 			/* Update new (used in future) mbuf */
   2017 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   2018 			IXGBE_M_ADJ(sc, rxr, newmp);
   2019 			rbuf->buf = newmp;
   2020 			rbuf->fmp = NULL;
   2021 
   2022 			/* For secondary frag */
   2023 			mp->m_len = len;
   2024 			mp->m_flags &= ~M_PKTHDR;
   2025 
   2026 			/* For sendmp */
   2027 			sendmp->m_pkthdr.len += mp->m_len;
   2028 		} else {
   2029 			/*
   2030 			 * It's the first segment of a multi descriptor
   2031 			 * packet or a single segment which contains a full
   2032 			 * packet.
   2033 			 */
   2034 
   2035 			if (eop && (len <= rx_copy_len)) {
   2036 				/*
   2037 				 * Optimize.  This might be a small packet, may
   2038 				 * be just a TCP ACK. Copy into a new mbuf, and
   2039 				 * Leave the old mbuf+cluster for re-use.
   2040 				 */
   2041 				sendmp->m_data += ETHER_ALIGN;
   2042 				memcpy(mtod(sendmp, void *),
   2043 				    mtod(mp, void *), len);
   2044 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
   2045 				rbuf->flags |= IXGBE_RX_COPY;
   2046 			} else {
   2047 				/* For long packet */
   2048 
   2049 				/* Update new (used in future) mbuf */
   2050 				newmp->m_pkthdr.len = newmp->m_len
   2051 				    = rxr->mbuf_sz;
   2052 				IXGBE_M_ADJ(sc, rxr, newmp);
   2053 				rbuf->buf = newmp;
   2054 				rbuf->fmp = NULL;
   2055 
   2056 				/* For sendmp */
   2057 				sendmp = mp;
   2058 			}
   2059 
   2060 			/* first desc of a non-ps chain */
   2061 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2062 		}
   2063 		++processed;
   2064 
   2065 		/* Pass the head pointer on */
   2066 		if (eop == 0) {
   2067 			nbuf->fmp = sendmp;
   2068 			sendmp = NULL;
   2069 			mp->m_next = nbuf->buf;
   2070 		} else { /* Sending this frame */
   2071 			m_set_rcvif(sendmp, ifp);
   2072 			++rxr->packets;
   2073 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
   2074 			/* capture data for AIM */
   2075 			rxr->bytes += sendmp->m_pkthdr.len;
   2076 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
   2077 			/* Process vlan info */
   2078 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2079 				vtag = le16toh(cur->wb.upper.vlan);
   2080 			if (vtag) {
   2081 				vlan_set_tag(sendmp, vtag);
   2082 			}
   2083 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2084 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2085 				   &sc->stats.pf);
   2086 			}
   2087 
   2088 #if 0 /* FreeBSD */
   2089 			/*
   2090 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2091 			 * and never cleared. This means we have RSS hash
   2092 			 * available to be used.
   2093 			 */
   2094 			if (sc->num_queues > 1) {
   2095 				sendmp->m_pkthdr.flowid =
   2096 				    le32toh(cur->wb.lower.hi_dword.rss);
   2097 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2098 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2099 					M_HASHTYPE_SET(sendmp,
   2100 					    M_HASHTYPE_RSS_IPV4);
   2101 					break;
   2102 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2103 					M_HASHTYPE_SET(sendmp,
   2104 					    M_HASHTYPE_RSS_TCP_IPV4);
   2105 					break;
   2106 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2107 					M_HASHTYPE_SET(sendmp,
   2108 					    M_HASHTYPE_RSS_IPV6);
   2109 					break;
   2110 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2111 					M_HASHTYPE_SET(sendmp,
   2112 					    M_HASHTYPE_RSS_TCP_IPV6);
   2113 					break;
   2114 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2115 					M_HASHTYPE_SET(sendmp,
   2116 					    M_HASHTYPE_RSS_IPV6_EX);
   2117 					break;
   2118 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2119 					M_HASHTYPE_SET(sendmp,
   2120 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2121 					break;
   2122 #if __FreeBSD_version > 1100000
   2123 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2124 					M_HASHTYPE_SET(sendmp,
   2125 					    M_HASHTYPE_RSS_UDP_IPV4);
   2126 					break;
   2127 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2128 					M_HASHTYPE_SET(sendmp,
   2129 					    M_HASHTYPE_RSS_UDP_IPV6);
   2130 					break;
   2131 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2132 					M_HASHTYPE_SET(sendmp,
   2133 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2134 					break;
   2135 #endif
   2136 				default:
   2137 					M_HASHTYPE_SET(sendmp,
   2138 					    M_HASHTYPE_OPAQUE_HASH);
   2139 				}
   2140 			} else {
   2141 				sendmp->m_pkthdr.flowid = que->msix;
   2142 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2143 			}
   2144 #endif
   2145 		}
   2146 next_desc:
   2147 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2148 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2149 
   2150 		/* Advance our pointers to the next descriptor. */
   2151 		if (++i == rxr->num_desc) {
   2152 			wraparound = true;
   2153 			i = 0;
   2154 		}
   2155 		rxr->next_to_check = i;
   2156 
   2157 		/* Now send to the stack or do LRO */
   2158 		if (sendmp != NULL)
   2159 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2160 
   2161 		/* Every 8 descriptors we go to refresh mbufs */
   2162 		if (processed == 8) {
   2163 			ixgbe_refresh_mbufs(rxr, i);
   2164 			processed = 0;
   2165 		}
   2166 	}
   2167 
   2168 	/* Save the current status */
   2169 	rxr->discard_multidesc = discard_multidesc;
   2170 
   2171 	/* Refresh any remaining buf structs */
   2172 	if (ixgbe_rx_unrefreshed(rxr))
   2173 		ixgbe_refresh_mbufs(rxr, i);
   2174 
   2175 	IXGBE_RX_UNLOCK(rxr);
   2176 
   2177 #ifdef LRO
   2178 	/*
   2179 	 * Flush any outstanding LRO work
   2180 	 */
   2181 	tcp_lro_flush_all(lro);
   2182 #endif /* LRO */
   2183 
   2184 	/*
   2185 	 * Still have cleaning to do?
   2186 	 */
   2187 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2188 		return (TRUE);
   2189 
   2190 	return (FALSE);
   2191 } /* ixgbe_rxeof */
   2192 
   2193 
   2194 /************************************************************************
   2195  * ixgbe_rx_checksum
   2196  *
   2197  *   Verify that the hardware indicated that the checksum is valid.
   2198  *   Inform the stack about the status of checksum so that stack
   2199  *   doesn't spend time verifying the checksum.
   2200  ************************************************************************/
   2201 static void
   2202 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2203     struct ixgbe_hw_stats *stats)
   2204 {
   2205 	u16  status = (u16)staterr;
   2206 	u8   errors = (u8)(staterr >> 24);
   2207 #if 0
   2208 	bool sctp = false;
   2209 
   2210 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2211 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2212 		sctp = true;
   2213 #endif
   2214 
   2215 	/* IPv4 checksum */
   2216 	if (status & IXGBE_RXD_STAT_IPCS) {
   2217 		IXGBE_EVC_ADD(&stats->ipcs, 1);
   2218 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2219 			/* IP Checksum Good */
   2220 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2221 		} else {
   2222 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
   2223 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2224 		}
   2225 	}
   2226 	/* TCP/UDP/SCTP checksum */
   2227 	if (status & IXGBE_RXD_STAT_L4CS) {
   2228 		IXGBE_EVC_ADD(&stats->l4cs, 1);
   2229 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2230 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2231 			mp->m_pkthdr.csum_flags |= type;
   2232 		} else {
   2233 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
   2234 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2235 		}
   2236 	}
   2237 } /* ixgbe_rx_checksum */
   2238 
   2239 /************************************************************************
   2240  * ixgbe_dma_malloc
   2241  ************************************************************************/
   2242 int
   2243 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
   2244 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2245 {
   2246 	device_t dev = sc->dev;
   2247 	int      r, rsegs;
   2248 
   2249 	r = ixgbe_dma_tag_create(
   2250 	     /*      parent */ sc->osdep.dmat,
   2251 	     /*   alignment */ DBA_ALIGN,
   2252 	     /*      bounds */ 0,
   2253 	     /*     maxsize */ size,
   2254 	     /*   nsegments */ 1,
   2255 	     /*  maxsegsize */ size,
   2256 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2257 			       &dma->dma_tag);
   2258 	if (r != 0) {
   2259 		aprint_error_dev(dev,
   2260 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2261 		    r);
   2262 		goto fail_0;
   2263 	}
   2264 
   2265 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2266 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2267 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2268 	if (r != 0) {
   2269 		aprint_error_dev(dev,
   2270 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2271 		goto fail_1;
   2272 	}
   2273 
   2274 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2275 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2276 	if (r != 0) {
   2277 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2278 		    __func__, r);
   2279 		goto fail_2;
   2280 	}
   2281 
   2282 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2283 	if (r != 0) {
   2284 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2285 		    __func__, r);
   2286 		goto fail_3;
   2287 	}
   2288 
   2289 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2290 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2291 	if (r != 0) {
   2292 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2293 		    __func__, r);
   2294 		goto fail_4;
   2295 	}
   2296 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2297 	dma->dma_size = size;
   2298 	return 0;
   2299 fail_4:
   2300 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2301 fail_3:
   2302 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2303 fail_2:
   2304 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2305 fail_1:
   2306 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2307 fail_0:
   2308 
   2309 	return (r);
   2310 } /* ixgbe_dma_malloc */
   2311 
   2312 /************************************************************************
   2313  * ixgbe_dma_free
   2314  ************************************************************************/
   2315 void
   2316 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
   2317 {
   2318 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2319 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2320 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2321 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
   2322 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2323 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2324 } /* ixgbe_dma_free */
   2325 
   2326 
   2327 /************************************************************************
   2328  * ixgbe_allocate_queues
   2329  *
   2330  *   Allocate memory for the transmit and receive rings, and then
   2331  *   the descriptors associated with each, called only once at attach.
   2332  ************************************************************************/
   2333 int
   2334 ixgbe_allocate_queues(struct ixgbe_softc *sc)
   2335 {
   2336 	device_t	dev = sc->dev;
   2337 	struct ix_queue	*que;
   2338 	struct tx_ring	*txr;
   2339 	struct rx_ring	*rxr;
   2340 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2341 	int             txconf = 0, rxconf = 0;
   2342 
   2343 	/* First, allocate the top level queue structs */
   2344 	sc->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2345 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2346 
   2347 	/* Second, allocate the TX ring struct memory */
   2348 	sc->tx_rings = malloc(sizeof(struct tx_ring) *
   2349 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2350 
   2351 	/* Third, allocate the RX ring */
   2352 	sc->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2353 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2354 
   2355 	/* For the ring itself */
   2356 	tsize = roundup2(sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2357 	    DBA_ALIGN);
   2358 
   2359 	/*
   2360 	 * Now set up the TX queues, txconf is needed to handle the
   2361 	 * possibility that things fail midcourse and we need to
   2362 	 * undo memory gracefully
   2363 	 */
   2364 	for (int i = 0; i < sc->num_queues; i++, txconf++) {
   2365 		/* Set up some basics */
   2366 		txr = &sc->tx_rings[i];
   2367 		txr->sc = sc;
   2368 		txr->txr_interq = NULL;
   2369 		/* In case SR-IOV is enabled, align the index properly */
   2370 #ifdef PCI_IOV
   2371 		txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2372 		    i);
   2373 #else
   2374 		txr->me = i;
   2375 #endif
   2376 		txr->num_desc = sc->num_tx_desc;
   2377 
   2378 		/* Initialize the TX side lock */
   2379 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2380 
   2381 		if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
   2382 		    BUS_DMA_NOWAIT)) {
   2383 			aprint_error_dev(dev,
   2384 			    "Unable to allocate TX Descriptor memory\n");
   2385 			error = ENOMEM;
   2386 			goto err_tx_desc;
   2387 		}
   2388 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2389 		bzero((void *)txr->tx_base, tsize);
   2390 
   2391 		/* Now allocate transmit buffers for the ring */
   2392 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2393 			aprint_error_dev(dev,
   2394 			    "Critical Failure setting up transmit buffers\n");
   2395 			error = ENOMEM;
   2396 			goto err_tx_desc;
   2397 		}
   2398 		if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2399 			/* Allocate a buf ring */
   2400 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2401 			if (txr->txr_interq == NULL) {
   2402 				aprint_error_dev(dev,
   2403 				    "Critical Failure setting up buf ring\n");
   2404 				error = ENOMEM;
   2405 				goto err_tx_desc;
   2406 			}
   2407 		}
   2408 	}
   2409 
   2410 	/*
   2411 	 * Next the RX queues...
   2412 	 */
   2413 	rsize = roundup2(sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2414 	    DBA_ALIGN);
   2415 	for (int i = 0; i < sc->num_queues; i++, rxconf++) {
   2416 		rxr = &sc->rx_rings[i];
   2417 		/* Set up some basics */
   2418 		rxr->sc = sc;
   2419 #ifdef PCI_IOV
   2420 		/* In case SR-IOV is enabled, align the index properly */
   2421 		rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2422 		    i);
   2423 #else
   2424 		rxr->me = i;
   2425 #endif
   2426 		rxr->num_desc = sc->num_rx_desc;
   2427 
   2428 		/* Initialize the RX side lock */
   2429 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2430 
   2431 		if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
   2432 		    BUS_DMA_NOWAIT)) {
   2433 			aprint_error_dev(dev,
   2434 			    "Unable to allocate RxDescriptor memory\n");
   2435 			error = ENOMEM;
   2436 			goto err_rx_desc;
   2437 		}
   2438 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2439 		bzero((void *)rxr->rx_base, rsize);
   2440 
   2441 		/* Allocate receive buffers for the ring */
   2442 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2443 			aprint_error_dev(dev,
   2444 			    "Critical Failure setting up receive buffers\n");
   2445 			error = ENOMEM;
   2446 			goto err_rx_desc;
   2447 		}
   2448 	}
   2449 
   2450 	/*
   2451 	 * Finally set up the queue holding structs
   2452 	 */
   2453 	for (int i = 0; i < sc->num_queues; i++) {
   2454 		que = &sc->queues[i];
   2455 		que->sc = sc;
   2456 		que->me = i;
   2457 		que->txr = &sc->tx_rings[i];
   2458 		que->rxr = &sc->rx_rings[i];
   2459 
   2460 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2461 		que->disabled_count = 0;
   2462 	}
   2463 
   2464 	return (0);
   2465 
   2466 err_rx_desc:
   2467 	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
   2468 		ixgbe_dma_free(sc, &rxr->rxdma);
   2469 err_tx_desc:
   2470 	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
   2471 		ixgbe_dma_free(sc, &txr->txdma);
   2472 	free(sc->rx_rings, M_DEVBUF);
   2473 	free(sc->tx_rings, M_DEVBUF);
   2474 	free(sc->queues, M_DEVBUF);
   2475 	return (error);
   2476 } /* ixgbe_allocate_queues */
   2477 
   2478 /************************************************************************
   2479  * ixgbe_free_queues
   2480  *
   2481  *   Free descriptors for the transmit and receive rings, and then
   2482  *   the memory associated with each.
   2483  ************************************************************************/
   2484 void
   2485 ixgbe_free_queues(struct ixgbe_softc *sc)
   2486 {
   2487 	struct ix_queue *que;
   2488 	int i;
   2489 
   2490 	ixgbe_free_transmit_structures(sc);
   2491 	ixgbe_free_receive_structures(sc);
   2492 	for (i = 0; i < sc->num_queues; i++) {
   2493 		que = &sc->queues[i];
   2494 		mutex_destroy(&que->dc_mtx);
   2495 	}
   2496 	free(sc->queues, M_DEVBUF);
   2497 } /* ixgbe_free_queues */
   2498