Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.111
      1 /* $NetBSD: ix_txrx.c,v 1.111 2023/12/13 08:25:54 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.111 2023/12/13 08:25:54 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 /*
     89  * For Flow Director: this is the
     90  * number of TX packets we sample
     91  * for the filter pool, this means
     92  * every 20th packet will be probed.
     93  *
     94  * This feature can be disabled by
     95  * setting this to 0.
     96  */
     97 static int atr_sample_rate = 20;
     98 
     99 #define IXGBE_M_ADJ(sc, rxr, mp)					\
    100 	if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    101 		m_adj(mp, ETHER_ALIGN)
    102 
    103 /************************************************************************
    104  *  Local Function prototypes
    105  ************************************************************************/
    106 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    107 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    108 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    109 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    110 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    111                                        struct ixgbe_hw_stats *);
    112 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    113 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    114 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    115 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    116                                         struct mbuf *, u32 *, u32 *);
    117 static int           ixgbe_tso_setup(struct tx_ring *,
    118                                      struct mbuf *, u32 *, u32 *);
    119 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    120 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    121                                     struct mbuf *, u32);
    122 static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
    123                                       struct ixgbe_dma_alloc *, int);
    124 static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
    125 
    126 static void	     ixgbe_setup_hw_rsc(struct rx_ring *);
    127 
    128 /************************************************************************
    129  * ixgbe_legacy_start_locked - Transmit entry point
    130  *
    131  *   Called by the stack to initiate a transmit.
    132  *   The driver will remain in this routine as long as there are
    133  *   packets to transmit and transmit resources are available.
    134  *   In case resources are not available, the stack is notified
    135  *   and the packet is requeued.
    136  ************************************************************************/
    137 int
    138 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    139 {
    140 	int rc;
    141 	struct mbuf    *m_head;
    142 	struct ixgbe_softc *sc = txr->sc;
    143 
    144 	IXGBE_TX_LOCK_ASSERT(txr);
    145 
    146 	if (sc->link_active != LINK_STATE_UP) {
    147 		/*
    148 		 * discard all packets buffered in IFQ to avoid
    149 		 * sending old packets at next link up timing.
    150 		 */
    151 		ixgbe_drain(ifp, txr);
    152 		return (ENETDOWN);
    153 	}
    154 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    155 		return (ENETDOWN);
    156 	if (txr->txr_no_space)
    157 		return (ENETDOWN);
    158 
    159 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    160 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    161 			break;
    162 
    163 		IFQ_POLL(&ifp->if_snd, m_head);
    164 		if (m_head == NULL)
    165 			break;
    166 
    167 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    168 			break;
    169 		}
    170 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    171 		if (rc != 0) {
    172 			m_freem(m_head);
    173 			continue;
    174 		}
    175 
    176 		/* Send a copy of the frame to the BPF listener */
    177 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    178 	}
    179 
    180 	return IXGBE_SUCCESS;
    181 } /* ixgbe_legacy_start_locked */
    182 
    183 /************************************************************************
    184  * ixgbe_legacy_start
    185  *
    186  *   Called by the stack, this always uses the first tx ring,
    187  *   and should not be used with multiqueue tx enabled.
    188  ************************************************************************/
    189 void
    190 ixgbe_legacy_start(struct ifnet *ifp)
    191 {
    192 	struct ixgbe_softc *sc = ifp->if_softc;
    193 	struct tx_ring *txr = sc->tx_rings;
    194 
    195 	if (ifp->if_flags & IFF_RUNNING) {
    196 		IXGBE_TX_LOCK(txr);
    197 		ixgbe_legacy_start_locked(ifp, txr);
    198 		IXGBE_TX_UNLOCK(txr);
    199 	}
    200 } /* ixgbe_legacy_start */
    201 
    202 /************************************************************************
    203  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    204  *
    205  *   (if_transmit function)
    206  ************************************************************************/
    207 int
    208 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    209 {
    210 	struct ixgbe_softc *sc = ifp->if_softc;
    211 	struct tx_ring	*txr;
    212 	int		i;
    213 #ifdef RSS
    214 	uint32_t bucket_id;
    215 #endif
    216 
    217 	/*
    218 	 * When doing RSS, map it to the same outbound queue
    219 	 * as the incoming flow would be mapped to.
    220 	 *
    221 	 * If everything is setup correctly, it should be the
    222 	 * same bucket that the current CPU we're on is.
    223 	 */
    224 #ifdef RSS
    225 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    226 		if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
    227 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    228 		    &bucket_id) == 0)) {
    229 			i = bucket_id % sc->num_queues;
    230 #ifdef IXGBE_DEBUG
    231 			if (bucket_id > sc->num_queues)
    232 				if_printf(ifp,
    233 				    "bucket_id (%d) > num_queues (%d)\n",
    234 				    bucket_id, sc->num_queues);
    235 #endif
    236 		} else
    237 			i = m->m_pkthdr.flowid % sc->num_queues;
    238 	} else
    239 #endif /* 0 */
    240 		i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
    241 
    242 	/* Check for a hung queue and pick alternative */
    243 	if (((1ULL << i) & sc->active_queues) == 0)
    244 		i = ffs64(sc->active_queues);
    245 
    246 	txr = &sc->tx_rings[i];
    247 
    248 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    249 		m_freem(m);
    250 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    251 		return ENOBUFS;
    252 	}
    253 #ifdef IXGBE_ALWAYS_TXDEFER
    254 	kpreempt_disable();
    255 	softint_schedule(txr->txr_si);
    256 	kpreempt_enable();
    257 #else
    258 	if (IXGBE_TX_TRYLOCK(txr)) {
    259 		ixgbe_mq_start_locked(ifp, txr);
    260 		IXGBE_TX_UNLOCK(txr);
    261 	} else {
    262 		if (sc->txrx_use_workqueue) {
    263 			u_int *enqueued;
    264 
    265 			/*
    266 			 * This function itself is not called in interrupt
    267 			 * context, however it can be called in fast softint
    268 			 * context right after receiving forwarding packets.
    269 			 * So, it is required to protect workqueue from twice
    270 			 * enqueuing when the machine uses both spontaneous
    271 			 * packets and forwarding packets.
    272 			 */
    273 			enqueued = percpu_getref(sc->txr_wq_enqueued);
    274 			if (*enqueued == 0) {
    275 				*enqueued = 1;
    276 				percpu_putref(sc->txr_wq_enqueued);
    277 				workqueue_enqueue(sc->txr_wq,
    278 				    &txr->wq_cookie, curcpu());
    279 			} else
    280 				percpu_putref(sc->txr_wq_enqueued);
    281 		} else {
    282 			kpreempt_disable();
    283 			softint_schedule(txr->txr_si);
    284 			kpreempt_enable();
    285 		}
    286 	}
    287 #endif
    288 
    289 	return (0);
    290 } /* ixgbe_mq_start */
    291 
    292 /************************************************************************
    293  * ixgbe_mq_start_locked
    294  ************************************************************************/
    295 int
    296 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    297 {
    298 	struct mbuf    *next;
    299 	int            enqueued = 0, err = 0;
    300 
    301 	if (txr->sc->link_active != LINK_STATE_UP) {
    302 		/*
    303 		 * discard all packets buffered in txr_interq to avoid
    304 		 * sending old packets at next link up timing.
    305 		 */
    306 		ixgbe_drain(ifp, txr);
    307 		return (ENETDOWN);
    308 	}
    309 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    310 		return (ENETDOWN);
    311 	if (txr->txr_no_space)
    312 		return (ENETDOWN);
    313 
    314 	/* Process the queue */
    315 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    316 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    317 			m_freem(next);
    318 			/* All errors are counted in ixgbe_xmit() */
    319 			break;
    320 		}
    321 		enqueued++;
    322 #if __FreeBSD_version >= 1100036
    323 		/*
    324 		 * Since we're looking at the tx ring, we can check
    325 		 * to see if we're a VF by examining our tail register
    326 		 * address.
    327 		 */
    328 		if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
    329 		    (next->m_flags & M_MCAST))
    330 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    331 #endif
    332 		/* Send a copy of the frame to the BPF listener */
    333 		bpf_mtap(ifp, next, BPF_D_OUT);
    334 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    335 			break;
    336 	}
    337 
    338 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
    339 		ixgbe_txeof(txr);
    340 
    341 	return (err);
    342 } /* ixgbe_mq_start_locked */
    343 
    344 /************************************************************************
    345  * ixgbe_deferred_mq_start
    346  *
    347  *   Called from a softint and workqueue (indirectly) to drain queued
    348  *   transmit packets.
    349  ************************************************************************/
    350 void
    351 ixgbe_deferred_mq_start(void *arg)
    352 {
    353 	struct tx_ring *txr = arg;
    354 	struct ixgbe_softc *sc = txr->sc;
    355 	struct ifnet   *ifp = sc->ifp;
    356 
    357 	IXGBE_TX_LOCK(txr);
    358 	if (pcq_peek(txr->txr_interq) != NULL)
    359 		ixgbe_mq_start_locked(ifp, txr);
    360 	IXGBE_TX_UNLOCK(txr);
    361 } /* ixgbe_deferred_mq_start */
    362 
    363 /************************************************************************
    364  * ixgbe_deferred_mq_start_work
    365  *
    366  *   Called from a workqueue to drain queued transmit packets.
    367  ************************************************************************/
    368 void
    369 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    370 {
    371 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    372 	struct ixgbe_softc *sc = txr->sc;
    373 	u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
    374 	*enqueued = 0;
    375 	percpu_putref(sc->txr_wq_enqueued);
    376 
    377 	ixgbe_deferred_mq_start(txr);
    378 } /* ixgbe_deferred_mq_start */
    379 
    380 /************************************************************************
    381  * ixgbe_drain_all
    382  ************************************************************************/
    383 void
    384 ixgbe_drain_all(struct ixgbe_softc *sc)
    385 {
    386 	struct ifnet *ifp = sc->ifp;
    387 	struct ix_queue *que = sc->queues;
    388 
    389 	for (int i = 0; i < sc->num_queues; i++, que++) {
    390 		struct tx_ring  *txr = que->txr;
    391 
    392 		IXGBE_TX_LOCK(txr);
    393 		ixgbe_drain(ifp, txr);
    394 		IXGBE_TX_UNLOCK(txr);
    395 	}
    396 }
    397 
    398 /************************************************************************
    399  * ixgbe_xmit
    400  *
    401  *   Maps the mbufs to tx descriptors, allowing the
    402  *   TX engine to transmit the packets.
    403  *
    404  *   Return 0 on success, positive on failure
    405  ************************************************************************/
    406 static int
    407 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    408 {
    409 	struct ixgbe_softc      *sc = txr->sc;
    410 	struct ixgbe_tx_buf     *txbuf;
    411 	union ixgbe_adv_tx_desc *txd = NULL;
    412 	struct ifnet	        *ifp = sc->ifp;
    413 	int                     i, j, error;
    414 	int                     first;
    415 	u32                     olinfo_status = 0, cmd_type_len;
    416 	bool                    remap = TRUE;
    417 	bus_dmamap_t            map;
    418 
    419 	/* Basic descriptor defines */
    420 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    421 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    422 
    423 	if (vlan_has_tag(m_head))
    424 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    425 
    426 	/*
    427 	 * Important to capture the first descriptor
    428 	 * used because it will contain the index of
    429 	 * the one we tell the hardware to report back
    430 	 */
    431 	first = txr->next_avail_desc;
    432 	txbuf = &txr->tx_buffers[first];
    433 	map = txbuf->map;
    434 
    435 	/*
    436 	 * Map the packet for DMA.
    437 	 */
    438 retry:
    439 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    440 	    BUS_DMA_NOWAIT);
    441 
    442 	if (__predict_false(error)) {
    443 		struct mbuf *m;
    444 
    445 		switch (error) {
    446 		case EAGAIN:
    447 			txr->q_eagain_tx_dma_setup++;
    448 			return EAGAIN;
    449 		case ENOMEM:
    450 			txr->q_enomem_tx_dma_setup++;
    451 			return EAGAIN;
    452 		case EFBIG:
    453 			/* Try it again? - one try */
    454 			if (remap == TRUE) {
    455 				remap = FALSE;
    456 				/*
    457 				 * XXX: m_defrag will choke on
    458 				 * non-MCLBYTES-sized clusters
    459 				 */
    460 				txr->q_efbig_tx_dma_setup++;
    461 				m = m_defrag(m_head, M_NOWAIT);
    462 				if (m == NULL) {
    463 					txr->q_mbuf_defrag_failed++;
    464 					return ENOBUFS;
    465 				}
    466 				m_head = m;
    467 				goto retry;
    468 			} else {
    469 				txr->q_efbig2_tx_dma_setup++;
    470 				return error;
    471 			}
    472 		case EINVAL:
    473 			txr->q_einval_tx_dma_setup++;
    474 			return error;
    475 		default:
    476 			txr->q_other_tx_dma_setup++;
    477 			return error;
    478 		}
    479 	}
    480 
    481 	/* Make certain there are enough descriptors */
    482 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    483 		txr->txr_no_space = true;
    484 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
    485 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    486 		return EAGAIN;
    487 	}
    488 
    489 	/*
    490 	 * Set up the appropriate offload context if requested,
    491 	 * this may consume one TX descriptor.
    492 	 */
    493 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    494 	if (__predict_false(error)) {
    495 		return (error);
    496 	}
    497 
    498 #ifdef IXGBE_FDIR
    499 	/* Do the flow director magic */
    500 	if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
    501 	    (txr->atr_sample) && (!sc->fdir_reinit)) {
    502 		++txr->atr_count;
    503 		if (txr->atr_count >= atr_sample_rate) {
    504 			ixgbe_atr(txr, m_head);
    505 			txr->atr_count = 0;
    506 		}
    507 	}
    508 #endif
    509 
    510 	olinfo_status |= IXGBE_ADVTXD_CC;
    511 	i = txr->next_avail_desc;
    512 	for (j = 0; j < map->dm_nsegs; j++) {
    513 		bus_size_t seglen;
    514 		uint64_t segaddr;
    515 
    516 		txbuf = &txr->tx_buffers[i];
    517 		txd = &txr->tx_base[i];
    518 		seglen = map->dm_segs[j].ds_len;
    519 		segaddr = htole64(map->dm_segs[j].ds_addr);
    520 
    521 		txd->read.buffer_addr = segaddr;
    522 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    523 		txd->read.olinfo_status = htole32(olinfo_status);
    524 
    525 		if (++i == txr->num_desc)
    526 			i = 0;
    527 	}
    528 
    529 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    530 	txr->tx_avail -= map->dm_nsegs;
    531 	txr->next_avail_desc = i;
    532 
    533 	txbuf->m_head = m_head;
    534 	/*
    535 	 * Here we swap the map so the last descriptor,
    536 	 * which gets the completion interrupt has the
    537 	 * real map, and the first descriptor gets the
    538 	 * unused map from this descriptor.
    539 	 */
    540 	txr->tx_buffers[first].map = txbuf->map;
    541 	txbuf->map = map;
    542 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    543 	    BUS_DMASYNC_PREWRITE);
    544 
    545 	/* Set the EOP descriptor that will be marked done */
    546 	txbuf = &txr->tx_buffers[first];
    547 	txbuf->eop = txd;
    548 
    549 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    550 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    551 	/*
    552 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    553 	 * hardware that this frame is available to transmit.
    554 	 */
    555 	IXGBE_EVC_ADD(&txr->total_packets, 1);
    556 	IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
    557 
    558 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    559 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    560 	if (m_head->m_flags & M_MCAST)
    561 		if_statinc_ref(nsr, if_omcasts);
    562 	IF_STAT_PUTREF(ifp);
    563 
    564 	/* Mark queue as having work */
    565 	if (txr->busy == 0)
    566 		txr->busy = 1;
    567 
    568 	return (0);
    569 } /* ixgbe_xmit */
    570 
    571 /************************************************************************
    572  * ixgbe_drain
    573  ************************************************************************/
    574 static void
    575 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    576 {
    577 	struct mbuf *m;
    578 
    579 	IXGBE_TX_LOCK_ASSERT(txr);
    580 
    581 	if (txr->me == 0) {
    582 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    583 			IFQ_DEQUEUE(&ifp->if_snd, m);
    584 			m_freem(m);
    585 			IF_DROP(&ifp->if_snd);
    586 		}
    587 	}
    588 
    589 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    590 		m_freem(m);
    591 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    592 	}
    593 }
    594 
    595 /************************************************************************
    596  * ixgbe_allocate_transmit_buffers
    597  *
    598  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    599  *   the information needed to transmit a packet on the wire. This is
    600  *   called only once at attach, setup is done every reset.
    601  ************************************************************************/
    602 static int
    603 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    604 {
    605 	struct ixgbe_softc  *sc = txr->sc;
    606 	device_t            dev = sc->dev;
    607 	struct ixgbe_tx_buf *txbuf;
    608 	int                 error, i;
    609 
    610 	/*
    611 	 * Setup DMA descriptor areas.
    612 	 */
    613 	error = ixgbe_dma_tag_create(
    614 	         /*      parent */ sc->osdep.dmat,
    615 	         /*   alignment */ 1,
    616 	         /*      bounds */ 0,
    617 	         /*     maxsize */ IXGBE_TSO_SIZE,
    618 	         /*   nsegments */ sc->num_segs,
    619 	         /*  maxsegsize */ PAGE_SIZE,
    620 	         /*       flags */ 0,
    621 	                           &txr->txtag);
    622 	if (error != 0) {
    623 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    624 		goto fail;
    625 	}
    626 
    627 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    628 	    sc->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    629 
    630 	/* Create the descriptor buffer dma maps */
    631 	txbuf = txr->tx_buffers;
    632 	for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
    633 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    634 		if (error != 0) {
    635 			aprint_error_dev(dev,
    636 			    "Unable to create TX DMA map (%d)\n", error);
    637 			goto fail;
    638 		}
    639 	}
    640 
    641 	return 0;
    642 fail:
    643 	/* We free all, it handles case where we are in the middle */
    644 #if 0 /* XXX was FreeBSD */
    645 	ixgbe_free_transmit_structures(sc);
    646 #else
    647 	ixgbe_free_transmit_buffers(txr);
    648 #endif
    649 	return (error);
    650 } /* ixgbe_allocate_transmit_buffers */
    651 
    652 /************************************************************************
    653  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    654  ************************************************************************/
    655 static void
    656 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    657 {
    658 	struct ixgbe_softc    *sc = txr->sc;
    659 	struct ixgbe_tx_buf   *txbuf;
    660 #ifdef DEV_NETMAP
    661 	struct netmap_sc      *na = NA(sc->ifp);
    662 	struct netmap_slot    *slot;
    663 #endif /* DEV_NETMAP */
    664 
    665 	/* Clear the old ring contents */
    666 	IXGBE_TX_LOCK(txr);
    667 
    668 #ifdef DEV_NETMAP
    669 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
    670 		/*
    671 		 * (under lock): if in netmap mode, do some consistency
    672 		 * checks and set slot to entry 0 of the netmap ring.
    673 		 */
    674 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    675 	}
    676 #endif /* DEV_NETMAP */
    677 
    678 	bzero((void *)txr->tx_base,
    679 	    (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
    680 	/* Reset indices */
    681 	txr->next_avail_desc = 0;
    682 	txr->next_to_clean = 0;
    683 
    684 	/* Free any existing tx buffers. */
    685 	txbuf = txr->tx_buffers;
    686 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    687 		if (txbuf->m_head != NULL) {
    688 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    689 			    0, txbuf->m_head->m_pkthdr.len,
    690 			    BUS_DMASYNC_POSTWRITE);
    691 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    692 			m_freem(txbuf->m_head);
    693 			txbuf->m_head = NULL;
    694 		}
    695 
    696 #ifdef DEV_NETMAP
    697 		/*
    698 		 * In netmap mode, set the map for the packet buffer.
    699 		 * NOTE: Some drivers (not this one) also need to set
    700 		 * the physical buffer address in the NIC ring.
    701 		 * Slots in the netmap ring (indexed by "si") are
    702 		 * kring->nkr_hwofs positions "ahead" wrt the
    703 		 * corresponding slot in the NIC ring. In some drivers
    704 		 * (not here) nkr_hwofs can be negative. Function
    705 		 * netmap_idx_n2k() handles wraparounds properly.
    706 		 */
    707 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    708 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    709 			netmap_load_map(na, txr->txtag,
    710 			    txbuf->map, NMB(na, slot + si));
    711 		}
    712 #endif /* DEV_NETMAP */
    713 
    714 		/* Clear the EOP descriptor pointer */
    715 		txbuf->eop = NULL;
    716 	}
    717 
    718 	/* Set the rate at which we sample packets */
    719 	if (sc->feat_en & IXGBE_FEATURE_FDIR)
    720 		txr->atr_sample = atr_sample_rate;
    721 
    722 	/* Set number of descriptors available */
    723 	txr->tx_avail = sc->num_tx_desc;
    724 
    725 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    726 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    727 	IXGBE_TX_UNLOCK(txr);
    728 } /* ixgbe_setup_transmit_ring */
    729 
    730 /************************************************************************
    731  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    732  ************************************************************************/
    733 int
    734 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
    735 {
    736 	struct tx_ring *txr = sc->tx_rings;
    737 
    738 	for (int i = 0; i < sc->num_queues; i++, txr++)
    739 		ixgbe_setup_transmit_ring(txr);
    740 
    741 	return (0);
    742 } /* ixgbe_setup_transmit_structures */
    743 
    744 /************************************************************************
    745  * ixgbe_free_transmit_structures - Free all transmit rings.
    746  ************************************************************************/
    747 void
    748 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
    749 {
    750 	struct tx_ring *txr = sc->tx_rings;
    751 
    752 	for (int i = 0; i < sc->num_queues; i++, txr++) {
    753 		ixgbe_free_transmit_buffers(txr);
    754 		ixgbe_dma_free(sc, &txr->txdma);
    755 		IXGBE_TX_LOCK_DESTROY(txr);
    756 	}
    757 	free(sc->tx_rings, M_DEVBUF);
    758 } /* ixgbe_free_transmit_structures */
    759 
    760 /************************************************************************
    761  * ixgbe_free_transmit_buffers
    762  *
    763  *   Free transmit ring related data structures.
    764  ************************************************************************/
    765 static void
    766 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    767 {
    768 	struct ixgbe_softc  *sc = txr->sc;
    769 	struct ixgbe_tx_buf *tx_buffer;
    770 	int                 i;
    771 
    772 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    773 
    774 	if (txr->tx_buffers == NULL)
    775 		return;
    776 
    777 	tx_buffer = txr->tx_buffers;
    778 	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
    779 		if (tx_buffer->m_head != NULL) {
    780 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    781 			    0, tx_buffer->m_head->m_pkthdr.len,
    782 			    BUS_DMASYNC_POSTWRITE);
    783 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    784 			m_freem(tx_buffer->m_head);
    785 			tx_buffer->m_head = NULL;
    786 			if (tx_buffer->map != NULL) {
    787 				ixgbe_dmamap_destroy(txr->txtag,
    788 				    tx_buffer->map);
    789 				tx_buffer->map = NULL;
    790 			}
    791 		} else if (tx_buffer->map != NULL) {
    792 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    793 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    794 			tx_buffer->map = NULL;
    795 		}
    796 	}
    797 	if (txr->txr_interq != NULL) {
    798 		struct mbuf *m;
    799 
    800 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    801 			m_freem(m);
    802 		pcq_destroy(txr->txr_interq);
    803 	}
    804 	if (txr->tx_buffers != NULL) {
    805 		free(txr->tx_buffers, M_DEVBUF);
    806 		txr->tx_buffers = NULL;
    807 	}
    808 	if (txr->txtag != NULL) {
    809 		ixgbe_dma_tag_destroy(txr->txtag);
    810 		txr->txtag = NULL;
    811 	}
    812 } /* ixgbe_free_transmit_buffers */
    813 
    814 /************************************************************************
    815  * ixgbe_tx_ctx_setup
    816  *
    817  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    818  ************************************************************************/
    819 static int
    820 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    821     u32 *cmd_type_len, u32 *olinfo_status)
    822 {
    823 	struct ixgbe_softc               *sc = txr->sc;
    824 	struct ixgbe_adv_tx_context_desc *TXD;
    825 	struct ether_vlan_header         *eh;
    826 #ifdef INET
    827 	struct ip                        *ip;
    828 #endif
    829 #ifdef INET6
    830 	struct ip6_hdr                   *ip6;
    831 #endif
    832 	int                              ehdrlen, ip_hlen = 0;
    833 	int                              offload = TRUE;
    834 	int                              ctxd = txr->next_avail_desc;
    835 	u32                              vlan_macip_lens = 0;
    836 	u32                              type_tucmd_mlhl = 0;
    837 	u16                              vtag = 0;
    838 	u16                              etype;
    839 	u8                               ipproto = 0;
    840 	char                             *l3d;
    841 
    842 	/* First check if TSO is to be used */
    843 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    844 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    845 
    846 		if (rv != 0)
    847 			IXGBE_EVC_ADD(&sc->tso_err, 1);
    848 		return rv;
    849 	}
    850 
    851 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    852 		offload = FALSE;
    853 
    854 	/* Indicate the whole packet as payload when not doing TSO */
    855 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    856 
    857 	/*
    858 	 * In advanced descriptors the vlan tag must
    859 	 * be placed into the context descriptor. Hence
    860 	 * we need to make one even if not doing offloads.
    861 	 */
    862 	if (vlan_has_tag(mp)) {
    863 		vtag = htole16(vlan_get_tag(mp));
    864 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    865 	} else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    866 	           (offload == FALSE))
    867 		return (0);
    868 
    869 	/*
    870 	 * Determine where frame payload starts.
    871 	 * Jump over vlan headers if already present,
    872 	 * helpful for QinQ too.
    873 	 */
    874 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    875 	eh = mtod(mp, struct ether_vlan_header *);
    876 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    877 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    878 		etype = ntohs(eh->evl_proto);
    879 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    880 	} else {
    881 		etype = ntohs(eh->evl_encap_proto);
    882 		ehdrlen = ETHER_HDR_LEN;
    883 	}
    884 
    885 	/* Set the ether header length */
    886 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    887 
    888 	if (offload == FALSE)
    889 		goto no_offloads;
    890 
    891 	/*
    892 	 * If the first mbuf only includes the ethernet header,
    893 	 * jump to the next one
    894 	 * XXX: This assumes the stack splits mbufs containing headers
    895 	 *      on header boundaries
    896 	 * XXX: And assumes the entire IP header is contained in one mbuf
    897 	 */
    898 	if (mp->m_len == ehdrlen && mp->m_next)
    899 		l3d = mtod(mp->m_next, char *);
    900 	else
    901 		l3d = mtod(mp, char *) + ehdrlen;
    902 
    903 	switch (etype) {
    904 #ifdef INET
    905 	case ETHERTYPE_IP:
    906 		ip = (struct ip *)(l3d);
    907 		ip_hlen = ip->ip_hl << 2;
    908 		ipproto = ip->ip_p;
    909 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    910 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    911 		    ip->ip_sum == 0);
    912 		break;
    913 #endif
    914 #ifdef INET6
    915 	case ETHERTYPE_IPV6:
    916 		ip6 = (struct ip6_hdr *)(l3d);
    917 		ip_hlen = sizeof(struct ip6_hdr);
    918 		ipproto = ip6->ip6_nxt;
    919 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    920 		break;
    921 #endif
    922 	default:
    923 		offload = false;
    924 		break;
    925 	}
    926 
    927 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    928 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    929 
    930 	vlan_macip_lens |= ip_hlen;
    931 
    932 	/* No support for offloads for non-L4 next headers */
    933 	switch (ipproto) {
    934 	case IPPROTO_TCP:
    935 		if (mp->m_pkthdr.csum_flags &
    936 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    937 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    938 		else
    939 			offload = false;
    940 		break;
    941 	case IPPROTO_UDP:
    942 		if (mp->m_pkthdr.csum_flags &
    943 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    944 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    945 		else
    946 			offload = false;
    947 		break;
    948 	default:
    949 		offload = false;
    950 		break;
    951 	}
    952 
    953 	if (offload) /* Insert L4 checksum into data descriptors */
    954 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    955 
    956 no_offloads:
    957 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    958 
    959 	/* Now ready a context descriptor */
    960 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    961 
    962 	/* Now copy bits into descriptor */
    963 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    964 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    965 	TXD->seqnum_seed = htole32(0);
    966 	TXD->mss_l4len_idx = htole32(0);
    967 
    968 	/* We've consumed the first desc, adjust counters */
    969 	if (++ctxd == txr->num_desc)
    970 		ctxd = 0;
    971 	txr->next_avail_desc = ctxd;
    972 	--txr->tx_avail;
    973 
    974 	return (0);
    975 } /* ixgbe_tx_ctx_setup */
    976 
    977 /************************************************************************
    978  * ixgbe_tso_setup
    979  *
    980  *   Setup work for hardware segmentation offload (TSO) on
    981  *   adapters using advanced tx descriptors
    982  ************************************************************************/
    983 static int
    984 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    985     u32 *olinfo_status)
    986 {
    987 	struct ixgbe_adv_tx_context_desc *TXD;
    988 	struct ether_vlan_header         *eh;
    989 #ifdef INET6
    990 	struct ip6_hdr                   *ip6;
    991 #endif
    992 #ifdef INET
    993 	struct ip                        *ip;
    994 #endif
    995 	struct tcphdr                    *th;
    996 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    997 	u32                              vlan_macip_lens = 0;
    998 	u32                              type_tucmd_mlhl = 0;
    999 	u32                              mss_l4len_idx = 0, paylen;
   1000 	u16                              vtag = 0, eh_type;
   1001 
   1002 	/*
   1003 	 * Determine where frame payload starts.
   1004 	 * Jump over vlan headers if already present
   1005 	 */
   1006 	eh = mtod(mp, struct ether_vlan_header *);
   1007 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1008 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1009 		eh_type = eh->evl_proto;
   1010 	} else {
   1011 		ehdrlen = ETHER_HDR_LEN;
   1012 		eh_type = eh->evl_encap_proto;
   1013 	}
   1014 
   1015 	switch (ntohs(eh_type)) {
   1016 #ifdef INET
   1017 	case ETHERTYPE_IP:
   1018 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1019 		if (ip->ip_p != IPPROTO_TCP)
   1020 			return (ENXIO);
   1021 		ip->ip_sum = 0;
   1022 		ip_hlen = ip->ip_hl << 2;
   1023 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1024 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1025 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1026 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1027 		/* Tell transmit desc to also do IPv4 checksum. */
   1028 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1029 		break;
   1030 #endif
   1031 #ifdef INET6
   1032 	case ETHERTYPE_IPV6:
   1033 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1034 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1035 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1036 			return (ENXIO);
   1037 		ip_hlen = sizeof(struct ip6_hdr);
   1038 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1039 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1040 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1041 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1042 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1043 		break;
   1044 #endif
   1045 	default:
   1046 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1047 		    __func__, ntohs(eh_type));
   1048 		break;
   1049 	}
   1050 
   1051 	ctxd = txr->next_avail_desc;
   1052 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1053 
   1054 	tcp_hlen = th->th_off << 2;
   1055 
   1056 	/* This is used in the transmit desc in encap */
   1057 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1058 
   1059 	/* VLAN MACLEN IPLEN */
   1060 	if (vlan_has_tag(mp)) {
   1061 		vtag = htole16(vlan_get_tag(mp));
   1062 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1063 	}
   1064 
   1065 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1066 	vlan_macip_lens |= ip_hlen;
   1067 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1068 
   1069 	/* ADV DTYPE TUCMD */
   1070 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1071 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1072 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1073 
   1074 	/* MSS L4LEN IDX */
   1075 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1076 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1077 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1078 
   1079 	TXD->seqnum_seed = htole32(0);
   1080 
   1081 	if (++ctxd == txr->num_desc)
   1082 		ctxd = 0;
   1083 
   1084 	txr->tx_avail--;
   1085 	txr->next_avail_desc = ctxd;
   1086 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1087 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1088 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1089 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
   1090 
   1091 	return (0);
   1092 } /* ixgbe_tso_setup */
   1093 
   1094 
   1095 /************************************************************************
   1096  * ixgbe_txeof
   1097  *
   1098  *   Examine each tx_buffer in the used queue. If the hardware is done
   1099  *   processing the packet then free associated resources. The
   1100  *   tx_buffer is put back on the free queue.
   1101  ************************************************************************/
   1102 bool
   1103 ixgbe_txeof(struct tx_ring *txr)
   1104 {
   1105 	struct ixgbe_softc	*sc = txr->sc;
   1106 	struct ifnet		*ifp = sc->ifp;
   1107 	struct ixgbe_tx_buf	*buf;
   1108 	union ixgbe_adv_tx_desc *txd;
   1109 	u32			work, processed = 0;
   1110 	u32			limit = sc->tx_process_limit;
   1111 	u16			avail;
   1112 
   1113 	KASSERT(mutex_owned(&txr->tx_mtx));
   1114 
   1115 #ifdef DEV_NETMAP
   1116 	if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
   1117 	    (sc->ifp->if_capenable & IFCAP_NETMAP)) {
   1118 		struct netmap_sc *na = NA(sc->ifp);
   1119 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1120 		txd = txr->tx_base;
   1121 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1122 		    BUS_DMASYNC_POSTREAD);
   1123 		/*
   1124 		 * In netmap mode, all the work is done in the context
   1125 		 * of the client thread. Interrupt handlers only wake up
   1126 		 * clients, which may be sleeping on individual rings
   1127 		 * or on a global resource for all rings.
   1128 		 * To implement tx interrupt mitigation, we wake up the client
   1129 		 * thread roughly every half ring, even if the NIC interrupts
   1130 		 * more frequently. This is implemented as follows:
   1131 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1132 		 *   the slot that should wake up the thread (nkr_num_slots
   1133 		 *   means the user thread should not be woken up);
   1134 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1135 		 *   or the slot has the DD bit set.
   1136 		 */
   1137 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1138 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1139 			netmap_tx_irq(ifp, txr->me);
   1140 		}
   1141 		return false;
   1142 	}
   1143 #endif /* DEV_NETMAP */
   1144 
   1145 	if (txr->tx_avail == txr->num_desc) {
   1146 		txr->busy = 0;
   1147 		return false;
   1148 	}
   1149 
   1150 	/* Get work starting point */
   1151 	work = txr->next_to_clean;
   1152 	buf = &txr->tx_buffers[work];
   1153 	txd = &txr->tx_base[work];
   1154 	work -= txr->num_desc; /* The distance to ring end */
   1155 	avail = txr->tx_avail;
   1156 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1157 	    BUS_DMASYNC_POSTREAD);
   1158 
   1159 	do {
   1160 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1161 		if (eop == NULL) /* No work */
   1162 			break;
   1163 
   1164 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1165 			break;	/* I/O not complete */
   1166 
   1167 		if (buf->m_head) {
   1168 			txr->bytes += buf->m_head->m_pkthdr.len;
   1169 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1170 			    0, buf->m_head->m_pkthdr.len,
   1171 			    BUS_DMASYNC_POSTWRITE);
   1172 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1173 			m_freem(buf->m_head);
   1174 			buf->m_head = NULL;
   1175 		}
   1176 		buf->eop = NULL;
   1177 		++avail;
   1178 
   1179 		/* We clean the range if multi segment */
   1180 		while (txd != eop) {
   1181 			++txd;
   1182 			++buf;
   1183 			++work;
   1184 			/* wrap the ring? */
   1185 			if (__predict_false(!work)) {
   1186 				work -= txr->num_desc;
   1187 				buf = txr->tx_buffers;
   1188 				txd = txr->tx_base;
   1189 			}
   1190 			if (buf->m_head) {
   1191 				txr->bytes +=
   1192 				    buf->m_head->m_pkthdr.len;
   1193 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1194 				    buf->map,
   1195 				    0, buf->m_head->m_pkthdr.len,
   1196 				    BUS_DMASYNC_POSTWRITE);
   1197 				ixgbe_dmamap_unload(txr->txtag,
   1198 				    buf->map);
   1199 				m_freem(buf->m_head);
   1200 				buf->m_head = NULL;
   1201 			}
   1202 			++avail;
   1203 			buf->eop = NULL;
   1204 
   1205 		}
   1206 		++processed;
   1207 
   1208 		/* Try the next packet */
   1209 		++txd;
   1210 		++buf;
   1211 		++work;
   1212 		/* reset with a wrap */
   1213 		if (__predict_false(!work)) {
   1214 			work -= txr->num_desc;
   1215 			buf = txr->tx_buffers;
   1216 			txd = txr->tx_base;
   1217 		}
   1218 		prefetch(txd);
   1219 	} while (__predict_true(--limit));
   1220 
   1221 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1222 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1223 
   1224 	work += txr->num_desc;
   1225 	txr->next_to_clean = work;
   1226 	if (processed) {
   1227 		txr->tx_avail = avail;
   1228 		txr->txr_no_space = false;
   1229 		txr->packets += processed;
   1230 		if_statadd(ifp, if_opackets, processed);
   1231 	}
   1232 
   1233 	/*
   1234 	 * Queue Hang detection, we know there's
   1235 	 * work outstanding or the first return
   1236 	 * would have been taken, so increment busy
   1237 	 * if nothing managed to get cleaned, then
   1238 	 * in local_timer it will be checked and
   1239 	 * marked as HUNG if it exceeds a MAX attempt.
   1240 	 */
   1241 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1242 		++txr->busy;
   1243 	/*
   1244 	 * If anything gets cleaned we reset state to 1,
   1245 	 * note this will turn off HUNG if its set.
   1246 	 */
   1247 	if (processed)
   1248 		txr->busy = 1;
   1249 
   1250 	if (txr->tx_avail == txr->num_desc)
   1251 		txr->busy = 0;
   1252 
   1253 	return ((limit > 0) ? false : true);
   1254 } /* ixgbe_txeof */
   1255 
   1256 /************************************************************************
   1257  * ixgbe_rsc_count
   1258  *
   1259  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1260  ************************************************************************/
   1261 static inline u32
   1262 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1263 {
   1264 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1265 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1266 } /* ixgbe_rsc_count */
   1267 
   1268 /************************************************************************
   1269  * ixgbe_setup_hw_rsc
   1270  *
   1271  *   Initialize Hardware RSC (LRO) feature on 82599
   1272  *   for an RX ring, this is toggled by the LRO capability
   1273  *   even though it is transparent to the stack.
   1274  *
   1275  *   NOTE: Since this HW feature only works with IPv4 and
   1276  *         testing has shown soft LRO to be as effective,
   1277  *         this feature will be disabled by default.
   1278  ************************************************************************/
   1279 static void
   1280 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1281 {
   1282 	struct ixgbe_softc *sc = rxr->sc;
   1283 	struct ixgbe_hw	*hw = &sc->hw;
   1284 	u32		rscctrl, rdrxctl;
   1285 
   1286 	/* If turning LRO/RSC off we need to disable it */
   1287 	if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
   1288 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1289 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1290 		return;
   1291 	}
   1292 
   1293 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1294 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1295 #ifdef DEV_NETMAP
   1296 	/* Always strip CRC unless Netmap disabled it */
   1297 	if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
   1298 	    !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
   1299 	    ix_crcstrip)
   1300 #endif /* DEV_NETMAP */
   1301 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1302 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1303 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1304 
   1305 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1306 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1307 	/*
   1308 	 * Limit the total number of descriptors that
   1309 	 * can be combined, so it does not exceed 64K
   1310 	 */
   1311 	if (rxr->mbuf_sz == MCLBYTES)
   1312 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1313 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1314 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1315 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1316 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1317 	else  /* Using 16K cluster */
   1318 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1319 
   1320 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1321 
   1322 	/* Enable TCP header recognition */
   1323 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1324 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1325 
   1326 	/* Disable RSC for ACK packets */
   1327 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1328 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1329 
   1330 	rxr->hw_rsc = TRUE;
   1331 } /* ixgbe_setup_hw_rsc */
   1332 
   1333 /************************************************************************
   1334  * ixgbe_refresh_mbufs
   1335  *
   1336  *   Refresh mbuf buffers for RX descriptor rings
   1337  *    - now keeps its own state so discards due to resource
   1338  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1339  *      it just returns, keeping its placeholder, thus it can simply
   1340  *      be recalled to try again.
   1341  ************************************************************************/
   1342 static void
   1343 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1344 {
   1345 	struct ixgbe_softc  *sc = rxr->sc;
   1346 	struct ixgbe_rx_buf *rxbuf;
   1347 	struct mbuf         *mp;
   1348 	int                 i, error;
   1349 	bool                refreshed = false;
   1350 
   1351 	i = rxr->next_to_refresh;
   1352 	/* next_to_refresh points to the previous one */
   1353 	if (++i == rxr->num_desc)
   1354 		i = 0;
   1355 
   1356 	while (i != limit) {
   1357 		rxbuf = &rxr->rx_buffers[i];
   1358 		if (__predict_false(rxbuf->buf == NULL)) {
   1359 			mp = ixgbe_getcl();
   1360 			if (mp == NULL) {
   1361 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1362 				goto update;
   1363 			}
   1364 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1365 			IXGBE_M_ADJ(sc, rxr, mp);
   1366 		} else
   1367 			mp = rxbuf->buf;
   1368 
   1369 		/* If we're dealing with an mbuf that was copied rather
   1370 		 * than replaced, there's no need to go through busdma.
   1371 		 */
   1372 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1373 			/* Get the memory mapping */
   1374 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1375 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1376 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1377 			if (__predict_false(error != 0)) {
   1378 				device_printf(sc->dev, "Refresh mbufs: "
   1379 				    "payload dmamap load failure - %d\n",
   1380 				    error);
   1381 				m_free(mp);
   1382 				rxbuf->buf = NULL;
   1383 				goto update;
   1384 			}
   1385 			rxbuf->buf = mp;
   1386 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1387 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1388 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1389 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1390 		} else {
   1391 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1392 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1393 		}
   1394 
   1395 		refreshed = true;
   1396 		/* next_to_refresh points to the previous one */
   1397 		rxr->next_to_refresh = i;
   1398 		if (++i == rxr->num_desc)
   1399 			i = 0;
   1400 	}
   1401 
   1402 update:
   1403 	if (refreshed) /* Update hardware tail index */
   1404 		IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
   1405 
   1406 	return;
   1407 } /* ixgbe_refresh_mbufs */
   1408 
   1409 /************************************************************************
   1410  * ixgbe_allocate_receive_buffers
   1411  *
   1412  *   Allocate memory for rx_buffer structures. Since we use one
   1413  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1414  *   that we'll need is equal to the number of receive descriptors
   1415  *   that we've allocated.
   1416  ************************************************************************/
   1417 static int
   1418 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1419 {
   1420 	struct ixgbe_softc  *sc = rxr->sc;
   1421 	device_t            dev = sc->dev;
   1422 	struct ixgbe_rx_buf *rxbuf;
   1423 	int                 bsize, error;
   1424 
   1425 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1426 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1427 
   1428 	error = ixgbe_dma_tag_create(
   1429 	         /*      parent */ sc->osdep.dmat,
   1430 	         /*   alignment */ 1,
   1431 	         /*      bounds */ 0,
   1432 	         /*     maxsize */ MJUM16BYTES,
   1433 	         /*   nsegments */ 1,
   1434 	         /*  maxsegsize */ MJUM16BYTES,
   1435 	         /*       flags */ 0,
   1436 	                           &rxr->ptag);
   1437 	if (error != 0) {
   1438 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1439 		goto fail;
   1440 	}
   1441 
   1442 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1443 		rxbuf = &rxr->rx_buffers[i];
   1444 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1445 		if (error) {
   1446 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1447 			goto fail;
   1448 		}
   1449 	}
   1450 
   1451 	return (0);
   1452 
   1453 fail:
   1454 	/* Frees all, but can handle partial completion */
   1455 	ixgbe_free_receive_structures(sc);
   1456 
   1457 	return (error);
   1458 } /* ixgbe_allocate_receive_buffers */
   1459 
   1460 /************************************************************************
   1461  * ixgbe_free_receive_ring
   1462  ************************************************************************/
   1463 static void
   1464 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1465 {
   1466 	for (int i = 0; i < rxr->num_desc; i++) {
   1467 		ixgbe_rx_discard(rxr, i);
   1468 	}
   1469 } /* ixgbe_free_receive_ring */
   1470 
   1471 /************************************************************************
   1472  * ixgbe_setup_receive_ring
   1473  *
   1474  *   Initialize a receive ring and its buffers.
   1475  ************************************************************************/
   1476 static int
   1477 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1478 {
   1479 	struct ixgbe_softc    *sc;
   1480 	struct ixgbe_rx_buf   *rxbuf;
   1481 #ifdef LRO
   1482 	struct ifnet          *ifp;
   1483 	struct lro_ctrl       *lro = &rxr->lro;
   1484 #endif /* LRO */
   1485 #ifdef DEV_NETMAP
   1486 	struct netmap_sc      *na = NA(rxr->sc->ifp);
   1487 	struct netmap_slot    *slot;
   1488 #endif /* DEV_NETMAP */
   1489 	int                   rsize, error = 0;
   1490 
   1491 	sc = rxr->sc;
   1492 #ifdef LRO
   1493 	ifp = sc->ifp;
   1494 #endif /* LRO */
   1495 
   1496 	/* Clear the ring contents */
   1497 	IXGBE_RX_LOCK(rxr);
   1498 
   1499 #ifdef DEV_NETMAP
   1500 	if (sc->feat_en & IXGBE_FEATURE_NETMAP)
   1501 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1502 #endif /* DEV_NETMAP */
   1503 
   1504 	rsize = roundup2(sc->num_rx_desc *
   1505 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1506 	bzero((void *)rxr->rx_base, rsize);
   1507 	/* Cache the size */
   1508 	rxr->mbuf_sz = sc->rx_mbuf_sz;
   1509 
   1510 	/* Free current RX buffer structs and their mbufs */
   1511 	ixgbe_free_receive_ring(rxr);
   1512 
   1513 	/* Now replenish the mbufs */
   1514 	for (int i = 0; i < rxr->num_desc; i++) {
   1515 		struct mbuf *mp;
   1516 
   1517 		rxbuf = &rxr->rx_buffers[i];
   1518 
   1519 #ifdef DEV_NETMAP
   1520 		/*
   1521 		 * In netmap mode, fill the map and set the buffer
   1522 		 * address in the NIC ring, considering the offset
   1523 		 * between the netmap and NIC rings (see comment in
   1524 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1525 		 * an mbuf, so end the block with a continue;
   1526 		 */
   1527 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1528 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
   1529 			uint64_t paddr;
   1530 			void *addr;
   1531 
   1532 			addr = PNMB(na, slot + sj, &paddr);
   1533 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1534 			/* Update descriptor and the cached value */
   1535 			rxr->rx_base[i].read.pkt_addr = htole64(paddr);
   1536 			rxbuf->addr = htole64(paddr);
   1537 			continue;
   1538 		}
   1539 #endif /* DEV_NETMAP */
   1540 
   1541 		rxbuf->flags = 0;
   1542 		rxbuf->buf = ixgbe_getcl();
   1543 		if (rxbuf->buf == NULL) {
   1544 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1545 			error = ENOBUFS;
   1546 			goto fail;
   1547 		}
   1548 		mp = rxbuf->buf;
   1549 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1550 		IXGBE_M_ADJ(sc, rxr, mp);
   1551 		/* Get the memory mapping */
   1552 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1553 		    mp, BUS_DMA_NOWAIT);
   1554 		if (error != 0) {
   1555 			/*
   1556 			 * Clear this entry for later cleanup in
   1557 			 * ixgbe_discard() which is called via
   1558 			 * ixgbe_free_receive_ring().
   1559 			 */
   1560 			m_freem(mp);
   1561 			rxbuf->buf = NULL;
   1562 			goto fail;
   1563 		}
   1564 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1565 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1566 		/* Update the descriptor and the cached value */
   1567 		rxr->rx_base[i].read.pkt_addr =
   1568 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1569 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1570 	}
   1571 
   1572 	/* Setup our descriptor indices */
   1573 	rxr->next_to_check = 0;
   1574 	rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
   1575 	rxr->lro_enabled = FALSE;
   1576 	rxr->discard_multidesc = false;
   1577 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
   1578 #if 0 /* NetBSD */
   1579 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
   1580 #if 1	/* Fix inconsistency */
   1581 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
   1582 #endif
   1583 #endif
   1584 	rxr->vtag_strip = FALSE;
   1585 
   1586 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1587 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1588 
   1589 	/*
   1590 	 * Now set up the LRO interface
   1591 	 */
   1592 	if (ixgbe_rsc_enable)
   1593 		ixgbe_setup_hw_rsc(rxr);
   1594 #ifdef LRO
   1595 	else if (ifp->if_capenable & IFCAP_LRO) {
   1596 		device_t dev = sc->dev;
   1597 		int err = tcp_lro_init(lro);
   1598 		if (err) {
   1599 			device_printf(dev, "LRO Initialization failed!\n");
   1600 			goto fail;
   1601 		}
   1602 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1603 		rxr->lro_enabled = TRUE;
   1604 		lro->ifp = sc->ifp;
   1605 	}
   1606 #endif /* LRO */
   1607 
   1608 	IXGBE_RX_UNLOCK(rxr);
   1609 
   1610 	return (0);
   1611 
   1612 fail:
   1613 	ixgbe_free_receive_ring(rxr);
   1614 	IXGBE_RX_UNLOCK(rxr);
   1615 
   1616 	return (error);
   1617 } /* ixgbe_setup_receive_ring */
   1618 
   1619 /************************************************************************
   1620  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1621  ************************************************************************/
   1622 int
   1623 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
   1624 {
   1625 	struct rx_ring *rxr = sc->rx_rings;
   1626 	int            j;
   1627 
   1628 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1629 	for (j = 0; j < sc->num_queues; j++, rxr++)
   1630 		if (ixgbe_setup_receive_ring(rxr))
   1631 			goto fail;
   1632 
   1633 	return (0);
   1634 fail:
   1635 	/*
   1636 	 * Free RX buffers allocated so far, we will only handle
   1637 	 * the rings that completed, the failing case will have
   1638 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1639 	 */
   1640 	for (int i = 0; i < j; ++i) {
   1641 		rxr = &sc->rx_rings[i];
   1642 		IXGBE_RX_LOCK(rxr);
   1643 		ixgbe_free_receive_ring(rxr);
   1644 		IXGBE_RX_UNLOCK(rxr);
   1645 	}
   1646 
   1647 	return (ENOBUFS);
   1648 } /* ixgbe_setup_receive_structures */
   1649 
   1650 
   1651 /************************************************************************
   1652  * ixgbe_free_receive_structures - Free all receive rings.
   1653  ************************************************************************/
   1654 void
   1655 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
   1656 {
   1657 	struct rx_ring *rxr = sc->rx_rings;
   1658 
   1659 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1660 
   1661 	for (int i = 0; i < sc->num_queues; i++, rxr++) {
   1662 		ixgbe_free_receive_buffers(rxr);
   1663 #ifdef LRO
   1664 		/* Free LRO memory */
   1665 		tcp_lro_free(&rxr->lro);
   1666 #endif /* LRO */
   1667 		/* Free the ring memory as well */
   1668 		ixgbe_dma_free(sc, &rxr->rxdma);
   1669 		IXGBE_RX_LOCK_DESTROY(rxr);
   1670 	}
   1671 
   1672 	free(sc->rx_rings, M_DEVBUF);
   1673 } /* ixgbe_free_receive_structures */
   1674 
   1675 
   1676 /************************************************************************
   1677  * ixgbe_free_receive_buffers - Free receive ring data structures
   1678  ************************************************************************/
   1679 static void
   1680 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1681 {
   1682 	struct ixgbe_softc  *sc = rxr->sc;
   1683 	struct ixgbe_rx_buf *rxbuf;
   1684 
   1685 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1686 
   1687 	/* Cleanup any existing buffers */
   1688 	if (rxr->rx_buffers != NULL) {
   1689 		for (int i = 0; i < sc->num_rx_desc; i++) {
   1690 			rxbuf = &rxr->rx_buffers[i];
   1691 			ixgbe_rx_discard(rxr, i);
   1692 			if (rxbuf->pmap != NULL) {
   1693 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1694 				rxbuf->pmap = NULL;
   1695 			}
   1696 		}
   1697 
   1698 		if (rxr->rx_buffers != NULL) {
   1699 			free(rxr->rx_buffers, M_DEVBUF);
   1700 			rxr->rx_buffers = NULL;
   1701 		}
   1702 	}
   1703 
   1704 	if (rxr->ptag != NULL) {
   1705 		ixgbe_dma_tag_destroy(rxr->ptag);
   1706 		rxr->ptag = NULL;
   1707 	}
   1708 
   1709 	return;
   1710 } /* ixgbe_free_receive_buffers */
   1711 
   1712 /************************************************************************
   1713  * ixgbe_rx_input
   1714  ************************************************************************/
   1715 static __inline void
   1716 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1717     u32 ptype)
   1718 {
   1719 	struct ixgbe_softc *sc = ifp->if_softc;
   1720 
   1721 #ifdef LRO
   1722 	struct ethercom *ec = &sc->osdep.ec;
   1723 
   1724 	/*
   1725 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1726 	 * should be computed by hardware. Also it should not have VLAN tag in
   1727 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1728 	 */
   1729         if (rxr->lro_enabled &&
   1730             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1731             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1732             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1733             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1734             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1735             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1736             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1737             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1738                 /*
   1739                  * Send to the stack if:
   1740                  *  - LRO not enabled, or
   1741                  *  - no LRO resources, or
   1742                  *  - lro enqueue fails
   1743                  */
   1744                 if (rxr->lro.lro_cnt != 0)
   1745                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1746                                 return;
   1747         }
   1748 #endif /* LRO */
   1749 
   1750 	if_percpuq_enqueue(sc->ipq, m);
   1751 } /* ixgbe_rx_input */
   1752 
   1753 /************************************************************************
   1754  * ixgbe_rx_discard
   1755  ************************************************************************/
   1756 static __inline void
   1757 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1758 {
   1759 	struct ixgbe_rx_buf *rbuf;
   1760 
   1761 	rbuf = &rxr->rx_buffers[i];
   1762 
   1763 	/*
   1764 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1765 	 * so its easier to just free the existing mbufs and take the normal
   1766 	 * refresh path to get new buffers and mapping.
   1767 	 */
   1768 
   1769 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1770 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1771 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1772 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1773 		m_freem(rbuf->fmp);
   1774 		rbuf->fmp = NULL;
   1775 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1776 	} else if (rbuf->buf) {
   1777 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1778 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1779 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1780 		m_free(rbuf->buf);
   1781 		rbuf->buf = NULL;
   1782 	}
   1783 
   1784 	rbuf->flags = 0;
   1785 
   1786 	return;
   1787 } /* ixgbe_rx_discard */
   1788 
   1789 
   1790 /************************************************************************
   1791  * ixgbe_rxeof
   1792  *
   1793  *   Executes in interrupt context. It replenishes the
   1794  *   mbufs in the descriptor and sends data which has
   1795  *   been dma'ed into host memory to upper layer.
   1796  *
   1797  *   Return TRUE for more work, FALSE for all clean.
   1798  ************************************************************************/
   1799 bool
   1800 ixgbe_rxeof(struct ix_queue *que)
   1801 {
   1802 	struct ixgbe_softc	*sc = que->sc;
   1803 	struct rx_ring		*rxr = que->rxr;
   1804 	struct ifnet		*ifp = sc->ifp;
   1805 #ifdef LRO
   1806 	struct lro_ctrl		*lro = &rxr->lro;
   1807 #endif /* LRO */
   1808 	union ixgbe_adv_rx_desc	*cur;
   1809 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1810 	int			i, nextp, processed = 0;
   1811 	u32			staterr = 0;
   1812 	u32			loopcount = 0, numdesc;
   1813 	u32			limit = sc->rx_process_limit;
   1814 	u32			rx_copy_len = sc->rx_copy_len;
   1815 	bool			discard_multidesc = rxr->discard_multidesc;
   1816 	bool			wraparound = false;
   1817 	unsigned int		syncremain;
   1818 #ifdef RSS
   1819 	u16			pkt_info;
   1820 #endif
   1821 
   1822 	IXGBE_RX_LOCK(rxr);
   1823 
   1824 #ifdef DEV_NETMAP
   1825 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
   1826 		/* Same as the txeof routine: wakeup clients on intr. */
   1827 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1828 			IXGBE_RX_UNLOCK(rxr);
   1829 			return (FALSE);
   1830 		}
   1831 	}
   1832 #endif /* DEV_NETMAP */
   1833 
   1834 	/* Sync the ring. The size is rx_process_limit or the first half */
   1835 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
   1836 		/* Non-wraparound */
   1837 		numdesc = limit;
   1838 		syncremain = 0;
   1839 	} else {
   1840 		/* Wraparound. Sync the first half. */
   1841 		numdesc = rxr->num_desc - rxr->next_to_check;
   1842 
   1843 		/* Set the size of the last half */
   1844 		syncremain = limit - numdesc;
   1845 	}
   1846 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1847 	    rxr->rxdma.dma_map,
   1848 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
   1849 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1850 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1851 
   1852 	/*
   1853 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1854 	 * true, continue processing to not to send broken packet to the upper
   1855 	 * layer.
   1856 	 */
   1857 	for (i = rxr->next_to_check;
   1858 	     (loopcount < limit) || (discard_multidesc == true);) {
   1859 
   1860 		struct mbuf *sendmp, *mp;
   1861 		struct mbuf *newmp;
   1862 		u32         rsc, ptype;
   1863 		u16         len;
   1864 		u16         vtag = 0;
   1865 		bool        eop;
   1866 		bool        discard = false;
   1867 
   1868 		if (wraparound) {
   1869 			/* Sync the last half. */
   1870 			KASSERT(syncremain != 0);
   1871 			numdesc = syncremain;
   1872 			wraparound = false;
   1873 		} else if (__predict_false(loopcount >= limit)) {
   1874 			KASSERT(discard_multidesc == true);
   1875 			numdesc = 1;
   1876 		} else
   1877 			numdesc = 0;
   1878 
   1879 		if (numdesc != 0)
   1880 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1881 			    rxr->rxdma.dma_map, 0,
   1882 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1883 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1884 
   1885 		cur = &rxr->rx_base[i];
   1886 		staterr = le32toh(cur->wb.upper.status_error);
   1887 #ifdef RSS
   1888 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1889 #endif
   1890 
   1891 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1892 			break;
   1893 
   1894 		loopcount++;
   1895 		sendmp = newmp = NULL;
   1896 		nbuf = NULL;
   1897 		rsc = 0;
   1898 		cur->wb.upper.status_error = 0;
   1899 		rbuf = &rxr->rx_buffers[i];
   1900 		mp = rbuf->buf;
   1901 
   1902 		len = le16toh(cur->wb.upper.length);
   1903 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1904 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1905 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1906 
   1907 		/* Make sure bad packets are discarded */
   1908 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1909 #if __FreeBSD_version >= 1100036
   1910 			if (sc->feat_en & IXGBE_FEATURE_VF)
   1911 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1912 #endif
   1913 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
   1914 			ixgbe_rx_discard(rxr, i);
   1915 			discard_multidesc = false;
   1916 			goto next_desc;
   1917 		}
   1918 
   1919 		if (__predict_false(discard_multidesc))
   1920 			discard = true;
   1921 		else {
   1922 			/* Pre-alloc new mbuf. */
   1923 
   1924 			if ((rbuf->fmp == NULL) &&
   1925 			    eop && (len <= rx_copy_len)) {
   1926 				/* For short packet. See below. */
   1927 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1928 				if (__predict_false(sendmp == NULL)) {
   1929 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1930 					discard = true;
   1931 				}
   1932 			} else {
   1933 				/* For long packet. */
   1934 				newmp = ixgbe_getcl();
   1935 				if (__predict_false(newmp == NULL)) {
   1936 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1937 					discard = true;
   1938 				}
   1939 			}
   1940 		}
   1941 
   1942 		if (__predict_false(discard)) {
   1943 			/*
   1944 			 * Descriptor initialization is already done by the
   1945 			 * above code (cur->wb.upper.status_error = 0).
   1946 			 * So, we can reuse current rbuf->buf for new packet.
   1947 			 *
   1948 			 * Rewrite the buffer addr, see comment in
   1949 			 * ixgbe_rx_discard().
   1950 			 */
   1951 			cur->read.pkt_addr = rbuf->addr;
   1952 			m_freem(rbuf->fmp);
   1953 			rbuf->fmp = NULL;
   1954 			if (!eop) {
   1955 				/* Discard the entire packet. */
   1956 				discard_multidesc = true;
   1957 			} else
   1958 				discard_multidesc = false;
   1959 			goto next_desc;
   1960 		}
   1961 		discard_multidesc = false;
   1962 
   1963 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1964 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1965 
   1966 		/*
   1967 		 * On 82599 which supports a hardware
   1968 		 * LRO (called HW RSC), packets need
   1969 		 * not be fragmented across sequential
   1970 		 * descriptors, rather the next descriptor
   1971 		 * is indicated in bits of the descriptor.
   1972 		 * This also means that we might process
   1973 		 * more than one packet at a time, something
   1974 		 * that has never been true before, it
   1975 		 * required eliminating global chain pointers
   1976 		 * in favor of what we are doing here.  -jfv
   1977 		 */
   1978 		if (!eop) {
   1979 			/*
   1980 			 * Figure out the next descriptor
   1981 			 * of this frame.
   1982 			 */
   1983 			if (rxr->hw_rsc == TRUE) {
   1984 				rsc = ixgbe_rsc_count(cur);
   1985 				rxr->rsc_num += (rsc - 1);
   1986 			}
   1987 			if (rsc) { /* Get hardware index */
   1988 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1989 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1990 			} else { /* Just sequential */
   1991 				nextp = i + 1;
   1992 				if (nextp == sc->num_rx_desc)
   1993 					nextp = 0;
   1994 			}
   1995 			nbuf = &rxr->rx_buffers[nextp];
   1996 			prefetch(nbuf);
   1997 		}
   1998 		/*
   1999 		 * Rather than using the fmp/lmp global pointers
   2000 		 * we now keep the head of a packet chain in the
   2001 		 * buffer struct and pass this along from one
   2002 		 * descriptor to the next, until we get EOP.
   2003 		 */
   2004 		/*
   2005 		 * See if there is a stored head
   2006 		 * that determines what we are
   2007 		 */
   2008 		if (rbuf->fmp != NULL) {
   2009 			/* Secondary frag */
   2010 			sendmp = rbuf->fmp;
   2011 
   2012 			/* Update new (used in future) mbuf */
   2013 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   2014 			IXGBE_M_ADJ(sc, rxr, newmp);
   2015 			rbuf->buf = newmp;
   2016 			rbuf->fmp = NULL;
   2017 
   2018 			/* For secondary frag */
   2019 			mp->m_len = len;
   2020 			mp->m_flags &= ~M_PKTHDR;
   2021 
   2022 			/* For sendmp */
   2023 			sendmp->m_pkthdr.len += mp->m_len;
   2024 		} else {
   2025 			/*
   2026 			 * It's the first segment of a multi descriptor
   2027 			 * packet or a single segment which contains a full
   2028 			 * packet.
   2029 			 */
   2030 
   2031 			if (eop && (len <= rx_copy_len)) {
   2032 				/*
   2033 				 * Optimize.  This might be a small packet, may
   2034 				 * be just a TCP ACK. Copy into a new mbuf, and
   2035 				 * Leave the old mbuf+cluster for re-use.
   2036 				 */
   2037 				sendmp->m_data += ETHER_ALIGN;
   2038 				memcpy(mtod(sendmp, void *),
   2039 				    mtod(mp, void *), len);
   2040 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
   2041 				rbuf->flags |= IXGBE_RX_COPY;
   2042 			} else {
   2043 				/* For long packet */
   2044 
   2045 				/* Update new (used in future) mbuf */
   2046 				newmp->m_pkthdr.len = newmp->m_len
   2047 				    = rxr->mbuf_sz;
   2048 				IXGBE_M_ADJ(sc, rxr, newmp);
   2049 				rbuf->buf = newmp;
   2050 				rbuf->fmp = NULL;
   2051 
   2052 				/* For sendmp */
   2053 				sendmp = mp;
   2054 			}
   2055 
   2056 			/* first desc of a non-ps chain */
   2057 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2058 		}
   2059 		++processed;
   2060 
   2061 		/* Pass the head pointer on */
   2062 		if (eop == 0) {
   2063 			nbuf->fmp = sendmp;
   2064 			sendmp = NULL;
   2065 			mp->m_next = nbuf->buf;
   2066 		} else { /* Sending this frame */
   2067 			m_set_rcvif(sendmp, ifp);
   2068 			++rxr->packets;
   2069 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
   2070 			/* capture data for AIM */
   2071 			rxr->bytes += sendmp->m_pkthdr.len;
   2072 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
   2073 			/* Process vlan info */
   2074 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2075 				vtag = le16toh(cur->wb.upper.vlan);
   2076 			if (vtag) {
   2077 				vlan_set_tag(sendmp, vtag);
   2078 			}
   2079 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2080 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2081 				   &sc->stats.pf);
   2082 			}
   2083 
   2084 #if 0 /* FreeBSD */
   2085 			/*
   2086 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2087 			 * and never cleared. This means we have RSS hash
   2088 			 * available to be used.
   2089 			 */
   2090 			if (sc->num_queues > 1) {
   2091 				sendmp->m_pkthdr.flowid =
   2092 				    le32toh(cur->wb.lower.hi_dword.rss);
   2093 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2094 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2095 					M_HASHTYPE_SET(sendmp,
   2096 					    M_HASHTYPE_RSS_IPV4);
   2097 					break;
   2098 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2099 					M_HASHTYPE_SET(sendmp,
   2100 					    M_HASHTYPE_RSS_TCP_IPV4);
   2101 					break;
   2102 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2103 					M_HASHTYPE_SET(sendmp,
   2104 					    M_HASHTYPE_RSS_IPV6);
   2105 					break;
   2106 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2107 					M_HASHTYPE_SET(sendmp,
   2108 					    M_HASHTYPE_RSS_TCP_IPV6);
   2109 					break;
   2110 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2111 					M_HASHTYPE_SET(sendmp,
   2112 					    M_HASHTYPE_RSS_IPV6_EX);
   2113 					break;
   2114 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2115 					M_HASHTYPE_SET(sendmp,
   2116 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2117 					break;
   2118 #if __FreeBSD_version > 1100000
   2119 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2120 					M_HASHTYPE_SET(sendmp,
   2121 					    M_HASHTYPE_RSS_UDP_IPV4);
   2122 					break;
   2123 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2124 					M_HASHTYPE_SET(sendmp,
   2125 					    M_HASHTYPE_RSS_UDP_IPV6);
   2126 					break;
   2127 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2128 					M_HASHTYPE_SET(sendmp,
   2129 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2130 					break;
   2131 #endif
   2132 				default:
   2133 					M_HASHTYPE_SET(sendmp,
   2134 					    M_HASHTYPE_OPAQUE_HASH);
   2135 				}
   2136 			} else {
   2137 				sendmp->m_pkthdr.flowid = que->msix;
   2138 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2139 			}
   2140 #endif
   2141 		}
   2142 next_desc:
   2143 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2144 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2145 
   2146 		/* Advance our pointers to the next descriptor. */
   2147 		if (++i == rxr->num_desc) {
   2148 			wraparound = true;
   2149 			i = 0;
   2150 		}
   2151 		rxr->next_to_check = i;
   2152 
   2153 		/* Now send to the stack or do LRO */
   2154 		if (sendmp != NULL)
   2155 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2156 
   2157 		/* Every 8 descriptors we go to refresh mbufs */
   2158 		if (processed == 8) {
   2159 			ixgbe_refresh_mbufs(rxr, i);
   2160 			processed = 0;
   2161 		}
   2162 	}
   2163 
   2164 	/* Save the current status */
   2165 	rxr->discard_multidesc = discard_multidesc;
   2166 
   2167 	/* Refresh any remaining buf structs */
   2168 	if (ixgbe_rx_unrefreshed(rxr))
   2169 		ixgbe_refresh_mbufs(rxr, i);
   2170 
   2171 	IXGBE_RX_UNLOCK(rxr);
   2172 
   2173 #ifdef LRO
   2174 	/*
   2175 	 * Flush any outstanding LRO work
   2176 	 */
   2177 	tcp_lro_flush_all(lro);
   2178 #endif /* LRO */
   2179 
   2180 	/*
   2181 	 * Still have cleaning to do?
   2182 	 */
   2183 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2184 		return (TRUE);
   2185 
   2186 	return (FALSE);
   2187 } /* ixgbe_rxeof */
   2188 
   2189 
   2190 /************************************************************************
   2191  * ixgbe_rx_checksum
   2192  *
   2193  *   Verify that the hardware indicated that the checksum is valid.
   2194  *   Inform the stack about the status of checksum so that stack
   2195  *   doesn't spend time verifying the checksum.
   2196  ************************************************************************/
   2197 static void
   2198 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2199     struct ixgbe_hw_stats *stats)
   2200 {
   2201 	u16  status = (u16)staterr;
   2202 	u8   errors = (u8)(staterr >> 24);
   2203 #if 0
   2204 	bool sctp = false;
   2205 
   2206 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2207 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2208 		sctp = true;
   2209 #endif
   2210 
   2211 	/* IPv4 checksum */
   2212 	if (status & IXGBE_RXD_STAT_IPCS) {
   2213 		IXGBE_EVC_ADD(&stats->ipcs, 1);
   2214 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2215 			/* IP Checksum Good */
   2216 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2217 		} else {
   2218 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
   2219 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2220 		}
   2221 	}
   2222 	/* TCP/UDP/SCTP checksum */
   2223 	if (status & IXGBE_RXD_STAT_L4CS) {
   2224 		IXGBE_EVC_ADD(&stats->l4cs, 1);
   2225 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2226 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2227 			mp->m_pkthdr.csum_flags |= type;
   2228 		} else {
   2229 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
   2230 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2231 		}
   2232 	}
   2233 } /* ixgbe_rx_checksum */
   2234 
   2235 /************************************************************************
   2236  * ixgbe_dma_malloc
   2237  ************************************************************************/
   2238 int
   2239 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
   2240 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2241 {
   2242 	device_t dev = sc->dev;
   2243 	int      r, rsegs;
   2244 
   2245 	r = ixgbe_dma_tag_create(
   2246 	     /*      parent */ sc->osdep.dmat,
   2247 	     /*   alignment */ DBA_ALIGN,
   2248 	     /*      bounds */ 0,
   2249 	     /*     maxsize */ size,
   2250 	     /*   nsegments */ 1,
   2251 	     /*  maxsegsize */ size,
   2252 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2253 			       &dma->dma_tag);
   2254 	if (r != 0) {
   2255 		aprint_error_dev(dev,
   2256 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2257 		    r);
   2258 		goto fail_0;
   2259 	}
   2260 
   2261 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2262 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2263 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2264 	if (r != 0) {
   2265 		aprint_error_dev(dev,
   2266 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2267 		goto fail_1;
   2268 	}
   2269 
   2270 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2271 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2272 	if (r != 0) {
   2273 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2274 		    __func__, r);
   2275 		goto fail_2;
   2276 	}
   2277 
   2278 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2279 	if (r != 0) {
   2280 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2281 		    __func__, r);
   2282 		goto fail_3;
   2283 	}
   2284 
   2285 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2286 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2287 	if (r != 0) {
   2288 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2289 		    __func__, r);
   2290 		goto fail_4;
   2291 	}
   2292 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2293 	dma->dma_size = size;
   2294 	return 0;
   2295 fail_4:
   2296 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2297 fail_3:
   2298 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2299 fail_2:
   2300 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2301 fail_1:
   2302 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2303 fail_0:
   2304 
   2305 	return (r);
   2306 } /* ixgbe_dma_malloc */
   2307 
   2308 /************************************************************************
   2309  * ixgbe_dma_free
   2310  ************************************************************************/
   2311 void
   2312 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
   2313 {
   2314 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2315 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2316 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2317 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
   2318 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2319 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2320 } /* ixgbe_dma_free */
   2321 
   2322 
   2323 /************************************************************************
   2324  * ixgbe_allocate_queues
   2325  *
   2326  *   Allocate memory for the transmit and receive rings, and then
   2327  *   the descriptors associated with each, called only once at attach.
   2328  ************************************************************************/
   2329 int
   2330 ixgbe_allocate_queues(struct ixgbe_softc *sc)
   2331 {
   2332 	device_t	dev = sc->dev;
   2333 	struct ix_queue	*que;
   2334 	struct tx_ring	*txr;
   2335 	struct rx_ring	*rxr;
   2336 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2337 	int             txconf = 0, rxconf = 0;
   2338 
   2339 	/* First, allocate the top level queue structs */
   2340 	sc->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2341 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2342 
   2343 	/* Second, allocate the TX ring struct memory */
   2344 	sc->tx_rings = malloc(sizeof(struct tx_ring) *
   2345 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2346 
   2347 	/* Third, allocate the RX ring */
   2348 	sc->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2349 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2350 
   2351 	/* For the ring itself */
   2352 	tsize = roundup2(sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2353 	    DBA_ALIGN);
   2354 
   2355 	/*
   2356 	 * Now set up the TX queues, txconf is needed to handle the
   2357 	 * possibility that things fail midcourse and we need to
   2358 	 * undo memory gracefully
   2359 	 */
   2360 	for (int i = 0; i < sc->num_queues; i++, txconf++) {
   2361 		/* Set up some basics */
   2362 		txr = &sc->tx_rings[i];
   2363 		txr->sc = sc;
   2364 		txr->txr_interq = NULL;
   2365 		/* In case SR-IOV is enabled, align the index properly */
   2366 #ifdef PCI_IOV
   2367 		txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2368 		    i);
   2369 #else
   2370 		txr->me = i;
   2371 #endif
   2372 		txr->num_desc = sc->num_tx_desc;
   2373 
   2374 		/* Initialize the TX side lock */
   2375 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2376 
   2377 		if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
   2378 		    BUS_DMA_NOWAIT)) {
   2379 			aprint_error_dev(dev,
   2380 			    "Unable to allocate TX Descriptor memory\n");
   2381 			error = ENOMEM;
   2382 			goto err_tx_desc;
   2383 		}
   2384 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2385 		bzero((void *)txr->tx_base, tsize);
   2386 
   2387 		/* Now allocate transmit buffers for the ring */
   2388 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2389 			aprint_error_dev(dev,
   2390 			    "Critical Failure setting up transmit buffers\n");
   2391 			error = ENOMEM;
   2392 			goto err_tx_desc;
   2393 		}
   2394 		if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2395 			/* Allocate a buf ring */
   2396 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2397 			if (txr->txr_interq == NULL) {
   2398 				aprint_error_dev(dev,
   2399 				    "Critical Failure setting up buf ring\n");
   2400 				error = ENOMEM;
   2401 				goto err_tx_desc;
   2402 			}
   2403 		}
   2404 	}
   2405 
   2406 	/*
   2407 	 * Next the RX queues...
   2408 	 */
   2409 	rsize = roundup2(sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2410 	    DBA_ALIGN);
   2411 	for (int i = 0; i < sc->num_queues; i++, rxconf++) {
   2412 		rxr = &sc->rx_rings[i];
   2413 		/* Set up some basics */
   2414 		rxr->sc = sc;
   2415 #ifdef PCI_IOV
   2416 		/* In case SR-IOV is enabled, align the index properly */
   2417 		rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2418 		    i);
   2419 #else
   2420 		rxr->me = i;
   2421 #endif
   2422 		rxr->num_desc = sc->num_rx_desc;
   2423 
   2424 		/* Initialize the RX side lock */
   2425 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2426 
   2427 		if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
   2428 		    BUS_DMA_NOWAIT)) {
   2429 			aprint_error_dev(dev,
   2430 			    "Unable to allocate RxDescriptor memory\n");
   2431 			error = ENOMEM;
   2432 			goto err_rx_desc;
   2433 		}
   2434 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2435 		bzero((void *)rxr->rx_base, rsize);
   2436 
   2437 		/* Allocate receive buffers for the ring */
   2438 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2439 			aprint_error_dev(dev,
   2440 			    "Critical Failure setting up receive buffers\n");
   2441 			error = ENOMEM;
   2442 			goto err_rx_desc;
   2443 		}
   2444 	}
   2445 
   2446 	/*
   2447 	 * Finally set up the queue holding structs
   2448 	 */
   2449 	for (int i = 0; i < sc->num_queues; i++) {
   2450 		que = &sc->queues[i];
   2451 		que->sc = sc;
   2452 		que->me = i;
   2453 		que->txr = &sc->tx_rings[i];
   2454 		que->rxr = &sc->rx_rings[i];
   2455 
   2456 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2457 		que->disabled_count = 0;
   2458 	}
   2459 
   2460 	return (0);
   2461 
   2462 err_rx_desc:
   2463 	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
   2464 		ixgbe_dma_free(sc, &rxr->rxdma);
   2465 err_tx_desc:
   2466 	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
   2467 		ixgbe_dma_free(sc, &txr->txdma);
   2468 	free(sc->rx_rings, M_DEVBUF);
   2469 	free(sc->tx_rings, M_DEVBUF);
   2470 	free(sc->queues, M_DEVBUF);
   2471 	return (error);
   2472 } /* ixgbe_allocate_queues */
   2473 
   2474 /************************************************************************
   2475  * ixgbe_free_queues
   2476  *
   2477  *   Free descriptors for the transmit and receive rings, and then
   2478  *   the memory associated with each.
   2479  ************************************************************************/
   2480 void
   2481 ixgbe_free_queues(struct ixgbe_softc *sc)
   2482 {
   2483 	struct ix_queue *que;
   2484 	int i;
   2485 
   2486 	ixgbe_free_transmit_structures(sc);
   2487 	ixgbe_free_receive_structures(sc);
   2488 	for (i = 0; i < sc->num_queues; i++) {
   2489 		que = &sc->queues[i];
   2490 		mutex_destroy(&que->dc_mtx);
   2491 	}
   2492 	free(sc->queues, M_DEVBUF);
   2493 } /* ixgbe_free_queues */
   2494