Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.106
      1 /* $NetBSD: ix_txrx.c,v 1.106 2023/11/14 02:31:46 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.106 2023/11/14 02:31:46 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 /*
     89  * For Flow Director: this is the
     90  * number of TX packets we sample
     91  * for the filter pool, this means
     92  * every 20th packet will be probed.
     93  *
     94  * This feature can be disabled by
     95  * setting this to 0.
     96  */
     97 static int atr_sample_rate = 20;
     98 
     99 #define IXGBE_M_ADJ(sc, rxr, mp)					\
    100 	if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    101 		m_adj(mp, ETHER_ALIGN)
    102 
    103 /************************************************************************
    104  *  Local Function prototypes
    105  ************************************************************************/
    106 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    107 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    108 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    109 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    110 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    111                                        struct ixgbe_hw_stats *);
    112 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    113 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    114 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    115 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    116                                         struct mbuf *, u32 *, u32 *);
    117 static int           ixgbe_tso_setup(struct tx_ring *,
    118                                      struct mbuf *, u32 *, u32 *);
    119 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    120 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    121                                     struct mbuf *, u32);
    122 static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
    123                                       struct ixgbe_dma_alloc *, int);
    124 static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
    125 
    126 static void	     ixgbe_setup_hw_rsc(struct rx_ring *);
    127 
    128 /************************************************************************
    129  * ixgbe_legacy_start_locked - Transmit entry point
    130  *
    131  *   Called by the stack to initiate a transmit.
    132  *   The driver will remain in this routine as long as there are
    133  *   packets to transmit and transmit resources are available.
    134  *   In case resources are not available, the stack is notified
    135  *   and the packet is requeued.
    136  ************************************************************************/
    137 int
    138 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    139 {
    140 	int rc;
    141 	struct mbuf    *m_head;
    142 	struct ixgbe_softc *sc = txr->sc;
    143 
    144 	IXGBE_TX_LOCK_ASSERT(txr);
    145 
    146 	if (sc->link_active != LINK_STATE_UP) {
    147 		/*
    148 		 * discard all packets buffered in IFQ to avoid
    149 		 * sending old packets at next link up timing.
    150 		 */
    151 		ixgbe_drain(ifp, txr);
    152 		return (ENETDOWN);
    153 	}
    154 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    155 		return (ENETDOWN);
    156 	if (txr->txr_no_space)
    157 		return (ENETDOWN);
    158 
    159 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    160 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    161 			break;
    162 
    163 		IFQ_POLL(&ifp->if_snd, m_head);
    164 		if (m_head == NULL)
    165 			break;
    166 
    167 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    168 			break;
    169 		}
    170 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    171 		if (rc != 0) {
    172 			m_freem(m_head);
    173 			continue;
    174 		}
    175 
    176 		/* Send a copy of the frame to the BPF listener */
    177 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    178 	}
    179 
    180 	return IXGBE_SUCCESS;
    181 } /* ixgbe_legacy_start_locked */
    182 
    183 /************************************************************************
    184  * ixgbe_legacy_start
    185  *
    186  *   Called by the stack, this always uses the first tx ring,
    187  *   and should not be used with multiqueue tx enabled.
    188  ************************************************************************/
    189 void
    190 ixgbe_legacy_start(struct ifnet *ifp)
    191 {
    192 	struct ixgbe_softc *sc = ifp->if_softc;
    193 	struct tx_ring *txr = sc->tx_rings;
    194 
    195 	if (ifp->if_flags & IFF_RUNNING) {
    196 		IXGBE_TX_LOCK(txr);
    197 		ixgbe_legacy_start_locked(ifp, txr);
    198 		IXGBE_TX_UNLOCK(txr);
    199 	}
    200 } /* ixgbe_legacy_start */
    201 
    202 /************************************************************************
    203  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    204  *
    205  *   (if_transmit function)
    206  ************************************************************************/
    207 int
    208 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    209 {
    210 	struct ixgbe_softc *sc = ifp->if_softc;
    211 	struct tx_ring	*txr;
    212 	int		i;
    213 #ifdef RSS
    214 	uint32_t bucket_id;
    215 #endif
    216 
    217 	/*
    218 	 * When doing RSS, map it to the same outbound queue
    219 	 * as the incoming flow would be mapped to.
    220 	 *
    221 	 * If everything is setup correctly, it should be the
    222 	 * same bucket that the current CPU we're on is.
    223 	 */
    224 #ifdef RSS
    225 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    226 		if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
    227 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    228 		    &bucket_id) == 0)) {
    229 			i = bucket_id % sc->num_queues;
    230 #ifdef IXGBE_DEBUG
    231 			if (bucket_id > sc->num_queues)
    232 				if_printf(ifp,
    233 				    "bucket_id (%d) > num_queues (%d)\n",
    234 				    bucket_id, sc->num_queues);
    235 #endif
    236 		} else
    237 			i = m->m_pkthdr.flowid % sc->num_queues;
    238 	} else
    239 #endif /* 0 */
    240 		i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
    241 
    242 	/* Check for a hung queue and pick alternative */
    243 	if (((1ULL << i) & sc->active_queues) == 0)
    244 		i = ffs64(sc->active_queues);
    245 
    246 	txr = &sc->tx_rings[i];
    247 
    248 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    249 		m_freem(m);
    250 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    251 		return ENOBUFS;
    252 	}
    253 #ifdef IXGBE_ALWAYS_TXDEFER
    254 	kpreempt_disable();
    255 	softint_schedule(txr->txr_si);
    256 	kpreempt_enable();
    257 #else
    258 	if (IXGBE_TX_TRYLOCK(txr)) {
    259 		ixgbe_mq_start_locked(ifp, txr);
    260 		IXGBE_TX_UNLOCK(txr);
    261 	} else {
    262 		if (sc->txrx_use_workqueue) {
    263 			u_int *enqueued;
    264 
    265 			/*
    266 			 * This function itself is not called in interrupt
    267 			 * context, however it can be called in fast softint
    268 			 * context right after receiving forwarding packets.
    269 			 * So, it is required to protect workqueue from twice
    270 			 * enqueuing when the machine uses both spontaneous
    271 			 * packets and forwarding packets.
    272 			 */
    273 			enqueued = percpu_getref(sc->txr_wq_enqueued);
    274 			if (*enqueued == 0) {
    275 				*enqueued = 1;
    276 				percpu_putref(sc->txr_wq_enqueued);
    277 				workqueue_enqueue(sc->txr_wq,
    278 				    &txr->wq_cookie, curcpu());
    279 			} else
    280 				percpu_putref(sc->txr_wq_enqueued);
    281 		} else {
    282 			kpreempt_disable();
    283 			softint_schedule(txr->txr_si);
    284 			kpreempt_enable();
    285 		}
    286 	}
    287 #endif
    288 
    289 	return (0);
    290 } /* ixgbe_mq_start */
    291 
    292 /************************************************************************
    293  * ixgbe_mq_start_locked
    294  ************************************************************************/
    295 int
    296 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    297 {
    298 	struct mbuf    *next;
    299 	int            enqueued = 0, err = 0;
    300 
    301 	if (txr->sc->link_active != LINK_STATE_UP) {
    302 		/*
    303 		 * discard all packets buffered in txr_interq to avoid
    304 		 * sending old packets at next link up timing.
    305 		 */
    306 		ixgbe_drain(ifp, txr);
    307 		return (ENETDOWN);
    308 	}
    309 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    310 		return (ENETDOWN);
    311 	if (txr->txr_no_space)
    312 		return (ENETDOWN);
    313 
    314 	/* Process the queue */
    315 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    316 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    317 			m_freem(next);
    318 			/* All errors are counted in ixgbe_xmit() */
    319 			break;
    320 		}
    321 		enqueued++;
    322 #if __FreeBSD_version >= 1100036
    323 		/*
    324 		 * Since we're looking at the tx ring, we can check
    325 		 * to see if we're a VF by examining our tail register
    326 		 * address.
    327 		 */
    328 		if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
    329 		    (next->m_flags & M_MCAST))
    330 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    331 #endif
    332 		/* Send a copy of the frame to the BPF listener */
    333 		bpf_mtap(ifp, next, BPF_D_OUT);
    334 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    335 			break;
    336 	}
    337 
    338 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
    339 		ixgbe_txeof(txr);
    340 
    341 	return (err);
    342 } /* ixgbe_mq_start_locked */
    343 
    344 /************************************************************************
    345  * ixgbe_deferred_mq_start
    346  *
    347  *   Called from a softint and workqueue (indirectly) to drain queued
    348  *   transmit packets.
    349  ************************************************************************/
    350 void
    351 ixgbe_deferred_mq_start(void *arg)
    352 {
    353 	struct tx_ring *txr = arg;
    354 	struct ixgbe_softc *sc = txr->sc;
    355 	struct ifnet   *ifp = sc->ifp;
    356 
    357 	IXGBE_TX_LOCK(txr);
    358 	if (pcq_peek(txr->txr_interq) != NULL)
    359 		ixgbe_mq_start_locked(ifp, txr);
    360 	IXGBE_TX_UNLOCK(txr);
    361 } /* ixgbe_deferred_mq_start */
    362 
    363 /************************************************************************
    364  * ixgbe_deferred_mq_start_work
    365  *
    366  *   Called from a workqueue to drain queued transmit packets.
    367  ************************************************************************/
    368 void
    369 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    370 {
    371 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    372 	struct ixgbe_softc *sc = txr->sc;
    373 	u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
    374 	*enqueued = 0;
    375 	percpu_putref(sc->txr_wq_enqueued);
    376 
    377 	ixgbe_deferred_mq_start(txr);
    378 } /* ixgbe_deferred_mq_start */
    379 
    380 /************************************************************************
    381  * ixgbe_drain_all
    382  ************************************************************************/
    383 void
    384 ixgbe_drain_all(struct ixgbe_softc *sc)
    385 {
    386 	struct ifnet *ifp = sc->ifp;
    387 	struct ix_queue *que = sc->queues;
    388 
    389 	for (int i = 0; i < sc->num_queues; i++, que++) {
    390 		struct tx_ring  *txr = que->txr;
    391 
    392 		IXGBE_TX_LOCK(txr);
    393 		ixgbe_drain(ifp, txr);
    394 		IXGBE_TX_UNLOCK(txr);
    395 	}
    396 }
    397 
    398 /************************************************************************
    399  * ixgbe_xmit
    400  *
    401  *   Maps the mbufs to tx descriptors, allowing the
    402  *   TX engine to transmit the packets.
    403  *
    404  *   Return 0 on success, positive on failure
    405  ************************************************************************/
    406 static int
    407 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    408 {
    409 	struct ixgbe_softc      *sc = txr->sc;
    410 	struct ixgbe_tx_buf     *txbuf;
    411 	union ixgbe_adv_tx_desc *txd = NULL;
    412 	struct ifnet	        *ifp = sc->ifp;
    413 	int                     i, j, error;
    414 	int                     first;
    415 	u32                     olinfo_status = 0, cmd_type_len;
    416 	bool                    remap = TRUE;
    417 	bus_dmamap_t            map;
    418 
    419 	/* Basic descriptor defines */
    420 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    421 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    422 
    423 	if (vlan_has_tag(m_head))
    424 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    425 
    426 	/*
    427 	 * Important to capture the first descriptor
    428 	 * used because it will contain the index of
    429 	 * the one we tell the hardware to report back
    430 	 */
    431 	first = txr->next_avail_desc;
    432 	txbuf = &txr->tx_buffers[first];
    433 	map = txbuf->map;
    434 
    435 	/*
    436 	 * Map the packet for DMA.
    437 	 */
    438 retry:
    439 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    440 	    BUS_DMA_NOWAIT);
    441 
    442 	if (__predict_false(error)) {
    443 		struct mbuf *m;
    444 
    445 		switch (error) {
    446 		case EAGAIN:
    447 			txr->q_eagain_tx_dma_setup++;
    448 			return EAGAIN;
    449 		case ENOMEM:
    450 			txr->q_enomem_tx_dma_setup++;
    451 			return EAGAIN;
    452 		case EFBIG:
    453 			/* Try it again? - one try */
    454 			if (remap == TRUE) {
    455 				remap = FALSE;
    456 				/*
    457 				 * XXX: m_defrag will choke on
    458 				 * non-MCLBYTES-sized clusters
    459 				 */
    460 				txr->q_efbig_tx_dma_setup++;
    461 				m = m_defrag(m_head, M_NOWAIT);
    462 				if (m == NULL) {
    463 					txr->q_mbuf_defrag_failed++;
    464 					return ENOBUFS;
    465 				}
    466 				m_head = m;
    467 				goto retry;
    468 			} else {
    469 				txr->q_efbig2_tx_dma_setup++;
    470 				return error;
    471 			}
    472 		case EINVAL:
    473 			txr->q_einval_tx_dma_setup++;
    474 			return error;
    475 		default:
    476 			txr->q_other_tx_dma_setup++;
    477 			return error;
    478 		}
    479 	}
    480 
    481 	/* Make certain there are enough descriptors */
    482 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    483 		txr->txr_no_space = true;
    484 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
    485 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    486 		return EAGAIN;
    487 	}
    488 
    489 	/*
    490 	 * Set up the appropriate offload context
    491 	 * this will consume the first descriptor
    492 	 */
    493 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    494 	if (__predict_false(error)) {
    495 		return (error);
    496 	}
    497 
    498 #ifdef IXGBE_FDIR
    499 	/* Do the flow director magic */
    500 	if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
    501 	    (txr->atr_sample) && (!sc->fdir_reinit)) {
    502 		++txr->atr_count;
    503 		if (txr->atr_count >= atr_sample_rate) {
    504 			ixgbe_atr(txr, m_head);
    505 			txr->atr_count = 0;
    506 		}
    507 	}
    508 #endif
    509 
    510 	olinfo_status |= IXGBE_ADVTXD_CC;
    511 	i = txr->next_avail_desc;
    512 	for (j = 0; j < map->dm_nsegs; j++) {
    513 		bus_size_t seglen;
    514 		uint64_t segaddr;
    515 
    516 		txbuf = &txr->tx_buffers[i];
    517 		txd = &txr->tx_base[i];
    518 		seglen = map->dm_segs[j].ds_len;
    519 		segaddr = htole64(map->dm_segs[j].ds_addr);
    520 
    521 		txd->read.buffer_addr = segaddr;
    522 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    523 		txd->read.olinfo_status = htole32(olinfo_status);
    524 
    525 		if (++i == txr->num_desc)
    526 			i = 0;
    527 	}
    528 
    529 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    530 	txr->tx_avail -= map->dm_nsegs;
    531 	txr->next_avail_desc = i;
    532 
    533 	txbuf->m_head = m_head;
    534 	/*
    535 	 * Here we swap the map so the last descriptor,
    536 	 * which gets the completion interrupt has the
    537 	 * real map, and the first descriptor gets the
    538 	 * unused map from this descriptor.
    539 	 */
    540 	txr->tx_buffers[first].map = txbuf->map;
    541 	txbuf->map = map;
    542 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    543 	    BUS_DMASYNC_PREWRITE);
    544 
    545 	/* Set the EOP descriptor that will be marked done */
    546 	txbuf = &txr->tx_buffers[first];
    547 	txbuf->eop = txd;
    548 
    549 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    550 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    551 	/*
    552 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    553 	 * hardware that this frame is available to transmit.
    554 	 */
    555 	IXGBE_EVC_ADD(&txr->total_packets, 1);
    556 	IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
    557 
    558 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    559 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    560 	if (m_head->m_flags & M_MCAST)
    561 		if_statinc_ref(nsr, if_omcasts);
    562 	IF_STAT_PUTREF(ifp);
    563 
    564 	/* Mark queue as having work */
    565 	if (txr->busy == 0)
    566 		txr->busy = 1;
    567 
    568 	return (0);
    569 } /* ixgbe_xmit */
    570 
    571 /************************************************************************
    572  * ixgbe_drain
    573  ************************************************************************/
    574 static void
    575 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    576 {
    577 	struct mbuf *m;
    578 
    579 	IXGBE_TX_LOCK_ASSERT(txr);
    580 
    581 	if (txr->me == 0) {
    582 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    583 			IFQ_DEQUEUE(&ifp->if_snd, m);
    584 			m_freem(m);
    585 			IF_DROP(&ifp->if_snd);
    586 		}
    587 	}
    588 
    589 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    590 		m_freem(m);
    591 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    592 	}
    593 }
    594 
    595 /************************************************************************
    596  * ixgbe_allocate_transmit_buffers
    597  *
    598  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    599  *   the information needed to transmit a packet on the wire. This is
    600  *   called only once at attach, setup is done every reset.
    601  ************************************************************************/
    602 static int
    603 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    604 {
    605 	struct ixgbe_softc  *sc = txr->sc;
    606 	device_t            dev = sc->dev;
    607 	struct ixgbe_tx_buf *txbuf;
    608 	int                 error, i;
    609 
    610 	/*
    611 	 * Setup DMA descriptor areas.
    612 	 */
    613 	error = ixgbe_dma_tag_create(
    614 	         /*      parent */ sc->osdep.dmat,
    615 	         /*   alignment */ 1,
    616 	         /*      bounds */ 0,
    617 	         /*     maxsize */ IXGBE_TSO_SIZE,
    618 	         /*   nsegments */ sc->num_segs,
    619 	         /*  maxsegsize */ PAGE_SIZE,
    620 	         /*       flags */ 0,
    621 	                           &txr->txtag);
    622 	if (error != 0) {
    623 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    624 		goto fail;
    625 	}
    626 
    627 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    628 	    sc->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    629 
    630 	/* Create the descriptor buffer dma maps */
    631 	txbuf = txr->tx_buffers;
    632 	for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
    633 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    634 		if (error != 0) {
    635 			aprint_error_dev(dev,
    636 			    "Unable to create TX DMA map (%d)\n", error);
    637 			goto fail;
    638 		}
    639 	}
    640 
    641 	return 0;
    642 fail:
    643 	/* We free all, it handles case where we are in the middle */
    644 #if 0 /* XXX was FreeBSD */
    645 	ixgbe_free_transmit_structures(sc);
    646 #else
    647 	ixgbe_free_transmit_buffers(txr);
    648 #endif
    649 	return (error);
    650 } /* ixgbe_allocate_transmit_buffers */
    651 
    652 /************************************************************************
    653  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    654  ************************************************************************/
    655 static void
    656 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    657 {
    658 	struct ixgbe_softc    *sc = txr->sc;
    659 	struct ixgbe_tx_buf   *txbuf;
    660 #ifdef DEV_NETMAP
    661 	struct netmap_sc      *na = NA(sc->ifp);
    662 	struct netmap_slot    *slot;
    663 #endif /* DEV_NETMAP */
    664 
    665 	/* Clear the old ring contents */
    666 	IXGBE_TX_LOCK(txr);
    667 
    668 #ifdef DEV_NETMAP
    669 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
    670 		/*
    671 		 * (under lock): if in netmap mode, do some consistency
    672 		 * checks and set slot to entry 0 of the netmap ring.
    673 		 */
    674 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    675 	}
    676 #endif /* DEV_NETMAP */
    677 
    678 	bzero((void *)txr->tx_base,
    679 	    (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
    680 	/* Reset indices */
    681 	txr->next_avail_desc = 0;
    682 	txr->next_to_clean = 0;
    683 
    684 	/* Free any existing tx buffers. */
    685 	txbuf = txr->tx_buffers;
    686 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    687 		if (txbuf->m_head != NULL) {
    688 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    689 			    0, txbuf->m_head->m_pkthdr.len,
    690 			    BUS_DMASYNC_POSTWRITE);
    691 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    692 			m_freem(txbuf->m_head);
    693 			txbuf->m_head = NULL;
    694 		}
    695 
    696 #ifdef DEV_NETMAP
    697 		/*
    698 		 * In netmap mode, set the map for the packet buffer.
    699 		 * NOTE: Some drivers (not this one) also need to set
    700 		 * the physical buffer address in the NIC ring.
    701 		 * Slots in the netmap ring (indexed by "si") are
    702 		 * kring->nkr_hwofs positions "ahead" wrt the
    703 		 * corresponding slot in the NIC ring. In some drivers
    704 		 * (not here) nkr_hwofs can be negative. Function
    705 		 * netmap_idx_n2k() handles wraparounds properly.
    706 		 */
    707 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    708 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    709 			netmap_load_map(na, txr->txtag,
    710 			    txbuf->map, NMB(na, slot + si));
    711 		}
    712 #endif /* DEV_NETMAP */
    713 
    714 		/* Clear the EOP descriptor pointer */
    715 		txbuf->eop = NULL;
    716 	}
    717 
    718 	/* Set the rate at which we sample packets */
    719 	if (sc->feat_en & IXGBE_FEATURE_FDIR)
    720 		txr->atr_sample = atr_sample_rate;
    721 
    722 	/* Set number of descriptors available */
    723 	txr->tx_avail = sc->num_tx_desc;
    724 
    725 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    726 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    727 	IXGBE_TX_UNLOCK(txr);
    728 } /* ixgbe_setup_transmit_ring */
    729 
    730 /************************************************************************
    731  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    732  ************************************************************************/
    733 int
    734 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
    735 {
    736 	struct tx_ring *txr = sc->tx_rings;
    737 
    738 	for (int i = 0; i < sc->num_queues; i++, txr++)
    739 		ixgbe_setup_transmit_ring(txr);
    740 
    741 	return (0);
    742 } /* ixgbe_setup_transmit_structures */
    743 
    744 /************************************************************************
    745  * ixgbe_free_transmit_structures - Free all transmit rings.
    746  ************************************************************************/
    747 void
    748 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
    749 {
    750 	struct tx_ring *txr = sc->tx_rings;
    751 
    752 	for (int i = 0; i < sc->num_queues; i++, txr++) {
    753 		ixgbe_free_transmit_buffers(txr);
    754 		ixgbe_dma_free(sc, &txr->txdma);
    755 		IXGBE_TX_LOCK_DESTROY(txr);
    756 	}
    757 	free(sc->tx_rings, M_DEVBUF);
    758 } /* ixgbe_free_transmit_structures */
    759 
    760 /************************************************************************
    761  * ixgbe_free_transmit_buffers
    762  *
    763  *   Free transmit ring related data structures.
    764  ************************************************************************/
    765 static void
    766 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    767 {
    768 	struct ixgbe_softc  *sc = txr->sc;
    769 	struct ixgbe_tx_buf *tx_buffer;
    770 	int                 i;
    771 
    772 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    773 
    774 	if (txr->tx_buffers == NULL)
    775 		return;
    776 
    777 	tx_buffer = txr->tx_buffers;
    778 	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
    779 		if (tx_buffer->m_head != NULL) {
    780 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    781 			    0, tx_buffer->m_head->m_pkthdr.len,
    782 			    BUS_DMASYNC_POSTWRITE);
    783 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    784 			m_freem(tx_buffer->m_head);
    785 			tx_buffer->m_head = NULL;
    786 			if (tx_buffer->map != NULL) {
    787 				ixgbe_dmamap_destroy(txr->txtag,
    788 				    tx_buffer->map);
    789 				tx_buffer->map = NULL;
    790 			}
    791 		} else if (tx_buffer->map != NULL) {
    792 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    793 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    794 			tx_buffer->map = NULL;
    795 		}
    796 	}
    797 	if (txr->txr_interq != NULL) {
    798 		struct mbuf *m;
    799 
    800 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    801 			m_freem(m);
    802 		pcq_destroy(txr->txr_interq);
    803 	}
    804 	if (txr->tx_buffers != NULL) {
    805 		free(txr->tx_buffers, M_DEVBUF);
    806 		txr->tx_buffers = NULL;
    807 	}
    808 	if (txr->txtag != NULL) {
    809 		ixgbe_dma_tag_destroy(txr->txtag);
    810 		txr->txtag = NULL;
    811 	}
    812 } /* ixgbe_free_transmit_buffers */
    813 
    814 /************************************************************************
    815  * ixgbe_tx_ctx_setup
    816  *
    817  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    818  ************************************************************************/
    819 static int
    820 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    821     u32 *cmd_type_len, u32 *olinfo_status)
    822 {
    823 	struct ixgbe_softc               *sc = txr->sc;
    824 	struct ixgbe_adv_tx_context_desc *TXD;
    825 	struct ether_vlan_header         *eh;
    826 #ifdef INET
    827 	struct ip                        *ip;
    828 #endif
    829 #ifdef INET6
    830 	struct ip6_hdr                   *ip6;
    831 #endif
    832 	int                              ehdrlen, ip_hlen = 0;
    833 	int                              offload = TRUE;
    834 	int                              ctxd = txr->next_avail_desc;
    835 	u32                              vlan_macip_lens = 0;
    836 	u32                              type_tucmd_mlhl = 0;
    837 	u16                              vtag = 0;
    838 	u16                              etype;
    839 	u8                               ipproto = 0;
    840 	char                             *l3d;
    841 
    842 	/* First check if TSO is to be used */
    843 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    844 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    845 
    846 		if (rv != 0)
    847 			IXGBE_EVC_ADD(&sc->tso_err, 1);
    848 		return rv;
    849 	}
    850 
    851 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    852 		offload = FALSE;
    853 
    854 	/* Indicate the whole packet as payload when not doing TSO */
    855 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    856 
    857 	/*
    858 	 * In advanced descriptors the vlan tag must
    859 	 * be placed into the context descriptor. Hence
    860 	 * we need to make one even if not doing offloads.
    861 	 */
    862 	if (vlan_has_tag(mp)) {
    863 		vtag = htole16(vlan_get_tag(mp));
    864 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    865 	} else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    866 	           (offload == FALSE))
    867 		return (0);
    868 
    869 	/*
    870 	 * Determine where frame payload starts.
    871 	 * Jump over vlan headers if already present,
    872 	 * helpful for QinQ too.
    873 	 */
    874 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    875 	eh = mtod(mp, struct ether_vlan_header *);
    876 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    877 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    878 		etype = ntohs(eh->evl_proto);
    879 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    880 	} else {
    881 		etype = ntohs(eh->evl_encap_proto);
    882 		ehdrlen = ETHER_HDR_LEN;
    883 	}
    884 
    885 	/* Set the ether header length */
    886 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    887 
    888 	if (offload == FALSE)
    889 		goto no_offloads;
    890 
    891 	/*
    892 	 * If the first mbuf only includes the ethernet header,
    893 	 * jump to the next one
    894 	 * XXX: This assumes the stack splits mbufs containing headers
    895 	 *      on header boundaries
    896 	 * XXX: And assumes the entire IP header is contained in one mbuf
    897 	 */
    898 	if (mp->m_len == ehdrlen && mp->m_next)
    899 		l3d = mtod(mp->m_next, char *);
    900 	else
    901 		l3d = mtod(mp, char *) + ehdrlen;
    902 
    903 	switch (etype) {
    904 #ifdef INET
    905 	case ETHERTYPE_IP:
    906 		ip = (struct ip *)(l3d);
    907 		ip_hlen = ip->ip_hl << 2;
    908 		ipproto = ip->ip_p;
    909 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    910 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    911 		    ip->ip_sum == 0);
    912 		break;
    913 #endif
    914 #ifdef INET6
    915 	case ETHERTYPE_IPV6:
    916 		ip6 = (struct ip6_hdr *)(l3d);
    917 		ip_hlen = sizeof(struct ip6_hdr);
    918 		ipproto = ip6->ip6_nxt;
    919 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    920 		break;
    921 #endif
    922 	default:
    923 		offload = false;
    924 		break;
    925 	}
    926 
    927 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    928 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    929 
    930 	vlan_macip_lens |= ip_hlen;
    931 
    932 	/* No support for offloads for non-L4 next headers */
    933 	switch (ipproto) {
    934 	case IPPROTO_TCP:
    935 		if (mp->m_pkthdr.csum_flags &
    936 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    937 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    938 		else
    939 			offload = false;
    940 		break;
    941 	case IPPROTO_UDP:
    942 		if (mp->m_pkthdr.csum_flags &
    943 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    944 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    945 		else
    946 			offload = false;
    947 		break;
    948 	default:
    949 		offload = false;
    950 		break;
    951 	}
    952 
    953 	if (offload) /* Insert L4 checksum into data descriptors */
    954 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    955 
    956 no_offloads:
    957 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    958 
    959 	/* Now ready a context descriptor */
    960 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    961 
    962 	/* Now copy bits into descriptor */
    963 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    964 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    965 	TXD->seqnum_seed = htole32(0);
    966 	TXD->mss_l4len_idx = htole32(0);
    967 
    968 	/* We've consumed the first desc, adjust counters */
    969 	if (++ctxd == txr->num_desc)
    970 		ctxd = 0;
    971 	txr->next_avail_desc = ctxd;
    972 	--txr->tx_avail;
    973 
    974 	return (0);
    975 } /* ixgbe_tx_ctx_setup */
    976 
    977 /************************************************************************
    978  * ixgbe_tso_setup
    979  *
    980  *   Setup work for hardware segmentation offload (TSO) on
    981  *   adapters using advanced tx descriptors
    982  ************************************************************************/
    983 static int
    984 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    985     u32 *olinfo_status)
    986 {
    987 	struct ixgbe_adv_tx_context_desc *TXD;
    988 	struct ether_vlan_header         *eh;
    989 #ifdef INET6
    990 	struct ip6_hdr                   *ip6;
    991 #endif
    992 #ifdef INET
    993 	struct ip                        *ip;
    994 #endif
    995 	struct tcphdr                    *th;
    996 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    997 	u32                              vlan_macip_lens = 0;
    998 	u32                              type_tucmd_mlhl = 0;
    999 	u32                              mss_l4len_idx = 0, paylen;
   1000 	u16                              vtag = 0, eh_type;
   1001 
   1002 	/*
   1003 	 * Determine where frame payload starts.
   1004 	 * Jump over vlan headers if already present
   1005 	 */
   1006 	eh = mtod(mp, struct ether_vlan_header *);
   1007 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1008 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1009 		eh_type = eh->evl_proto;
   1010 	} else {
   1011 		ehdrlen = ETHER_HDR_LEN;
   1012 		eh_type = eh->evl_encap_proto;
   1013 	}
   1014 
   1015 	switch (ntohs(eh_type)) {
   1016 #ifdef INET
   1017 	case ETHERTYPE_IP:
   1018 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1019 		if (ip->ip_p != IPPROTO_TCP)
   1020 			return (ENXIO);
   1021 		ip->ip_sum = 0;
   1022 		ip_hlen = ip->ip_hl << 2;
   1023 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1024 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1025 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1026 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1027 		/* Tell transmit desc to also do IPv4 checksum. */
   1028 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1029 		break;
   1030 #endif
   1031 #ifdef INET6
   1032 	case ETHERTYPE_IPV6:
   1033 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1034 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1035 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1036 			return (ENXIO);
   1037 		ip_hlen = sizeof(struct ip6_hdr);
   1038 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1039 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1040 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1041 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1042 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1043 		break;
   1044 #endif
   1045 	default:
   1046 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1047 		    __func__, ntohs(eh_type));
   1048 		break;
   1049 	}
   1050 
   1051 	ctxd = txr->next_avail_desc;
   1052 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1053 
   1054 	tcp_hlen = th->th_off << 2;
   1055 
   1056 	/* This is used in the transmit desc in encap */
   1057 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1058 
   1059 	/* VLAN MACLEN IPLEN */
   1060 	if (vlan_has_tag(mp)) {
   1061 		vtag = htole16(vlan_get_tag(mp));
   1062 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1063 	}
   1064 
   1065 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1066 	vlan_macip_lens |= ip_hlen;
   1067 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1068 
   1069 	/* ADV DTYPE TUCMD */
   1070 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1071 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1072 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1073 
   1074 	/* MSS L4LEN IDX */
   1075 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1076 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1077 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1078 
   1079 	TXD->seqnum_seed = htole32(0);
   1080 
   1081 	if (++ctxd == txr->num_desc)
   1082 		ctxd = 0;
   1083 
   1084 	txr->tx_avail--;
   1085 	txr->next_avail_desc = ctxd;
   1086 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1087 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1088 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1089 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
   1090 
   1091 	return (0);
   1092 } /* ixgbe_tso_setup */
   1093 
   1094 
   1095 /************************************************************************
   1096  * ixgbe_txeof
   1097  *
   1098  *   Examine each tx_buffer in the used queue. If the hardware is done
   1099  *   processing the packet then free associated resources. The
   1100  *   tx_buffer is put back on the free queue.
   1101  ************************************************************************/
   1102 bool
   1103 ixgbe_txeof(struct tx_ring *txr)
   1104 {
   1105 	struct ixgbe_softc	*sc = txr->sc;
   1106 	struct ifnet		*ifp = sc->ifp;
   1107 	struct ixgbe_tx_buf	*buf;
   1108 	union ixgbe_adv_tx_desc *txd;
   1109 	u32			work, processed = 0;
   1110 	u32			limit = sc->tx_process_limit;
   1111 
   1112 	KASSERT(mutex_owned(&txr->tx_mtx));
   1113 
   1114 #ifdef DEV_NETMAP
   1115 	if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
   1116 	    (sc->ifp->if_capenable & IFCAP_NETMAP)) {
   1117 		struct netmap_sc *na = NA(sc->ifp);
   1118 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1119 		txd = txr->tx_base;
   1120 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1121 		    BUS_DMASYNC_POSTREAD);
   1122 		/*
   1123 		 * In netmap mode, all the work is done in the context
   1124 		 * of the client thread. Interrupt handlers only wake up
   1125 		 * clients, which may be sleeping on individual rings
   1126 		 * or on a global resource for all rings.
   1127 		 * To implement tx interrupt mitigation, we wake up the client
   1128 		 * thread roughly every half ring, even if the NIC interrupts
   1129 		 * more frequently. This is implemented as follows:
   1130 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1131 		 *   the slot that should wake up the thread (nkr_num_slots
   1132 		 *   means the user thread should not be woken up);
   1133 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1134 		 *   or the slot has the DD bit set.
   1135 		 */
   1136 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1137 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1138 			netmap_tx_irq(ifp, txr->me);
   1139 		}
   1140 		return false;
   1141 	}
   1142 #endif /* DEV_NETMAP */
   1143 
   1144 	if (txr->tx_avail == txr->num_desc) {
   1145 		txr->busy = 0;
   1146 		return false;
   1147 	}
   1148 
   1149 	/* Get work starting point */
   1150 	work = txr->next_to_clean;
   1151 	buf = &txr->tx_buffers[work];
   1152 	txd = &txr->tx_base[work];
   1153 	work -= txr->num_desc; /* The distance to ring end */
   1154 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1155 	    BUS_DMASYNC_POSTREAD);
   1156 
   1157 	do {
   1158 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1159 		if (eop == NULL) /* No work */
   1160 			break;
   1161 
   1162 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1163 			break;	/* I/O not complete */
   1164 
   1165 		if (buf->m_head) {
   1166 			txr->bytes += buf->m_head->m_pkthdr.len;
   1167 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1168 			    0, buf->m_head->m_pkthdr.len,
   1169 			    BUS_DMASYNC_POSTWRITE);
   1170 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1171 			m_freem(buf->m_head);
   1172 			buf->m_head = NULL;
   1173 		}
   1174 		buf->eop = NULL;
   1175 		txr->txr_no_space = false;
   1176 		++txr->tx_avail;
   1177 
   1178 		/* We clean the range if multi segment */
   1179 		while (txd != eop) {
   1180 			++txd;
   1181 			++buf;
   1182 			++work;
   1183 			/* wrap the ring? */
   1184 			if (__predict_false(!work)) {
   1185 				work -= txr->num_desc;
   1186 				buf = txr->tx_buffers;
   1187 				txd = txr->tx_base;
   1188 			}
   1189 			if (buf->m_head) {
   1190 				txr->bytes +=
   1191 				    buf->m_head->m_pkthdr.len;
   1192 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1193 				    buf->map,
   1194 				    0, buf->m_head->m_pkthdr.len,
   1195 				    BUS_DMASYNC_POSTWRITE);
   1196 				ixgbe_dmamap_unload(txr->txtag,
   1197 				    buf->map);
   1198 				m_freem(buf->m_head);
   1199 				buf->m_head = NULL;
   1200 			}
   1201 			++txr->tx_avail;
   1202 			buf->eop = NULL;
   1203 
   1204 		}
   1205 		++txr->packets;
   1206 		++processed;
   1207 		if_statinc(ifp, if_opackets);
   1208 
   1209 		/* Try the next packet */
   1210 		++txd;
   1211 		++buf;
   1212 		++work;
   1213 		/* reset with a wrap */
   1214 		if (__predict_false(!work)) {
   1215 			work -= txr->num_desc;
   1216 			buf = txr->tx_buffers;
   1217 			txd = txr->tx_base;
   1218 		}
   1219 		prefetch(txd);
   1220 	} while (__predict_true(--limit));
   1221 
   1222 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1223 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1224 
   1225 	work += txr->num_desc;
   1226 	txr->next_to_clean = work;
   1227 
   1228 	/*
   1229 	 * Queue Hang detection, we know there's
   1230 	 * work outstanding or the first return
   1231 	 * would have been taken, so increment busy
   1232 	 * if nothing managed to get cleaned, then
   1233 	 * in local_timer it will be checked and
   1234 	 * marked as HUNG if it exceeds a MAX attempt.
   1235 	 */
   1236 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1237 		++txr->busy;
   1238 	/*
   1239 	 * If anything gets cleaned we reset state to 1,
   1240 	 * note this will turn off HUNG if its set.
   1241 	 */
   1242 	if (processed)
   1243 		txr->busy = 1;
   1244 
   1245 	if (txr->tx_avail == txr->num_desc)
   1246 		txr->busy = 0;
   1247 
   1248 	return ((limit > 0) ? false : true);
   1249 } /* ixgbe_txeof */
   1250 
   1251 /************************************************************************
   1252  * ixgbe_rsc_count
   1253  *
   1254  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1255  ************************************************************************/
   1256 static inline u32
   1257 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1258 {
   1259 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1260 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1261 } /* ixgbe_rsc_count */
   1262 
   1263 /************************************************************************
   1264  * ixgbe_setup_hw_rsc
   1265  *
   1266  *   Initialize Hardware RSC (LRO) feature on 82599
   1267  *   for an RX ring, this is toggled by the LRO capability
   1268  *   even though it is transparent to the stack.
   1269  *
   1270  *   NOTE: Since this HW feature only works with IPv4 and
   1271  *         testing has shown soft LRO to be as effective,
   1272  *         this feature will be disabled by default.
   1273  ************************************************************************/
   1274 static void
   1275 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1276 {
   1277 	struct ixgbe_softc *sc = rxr->sc;
   1278 	struct ixgbe_hw	*hw = &sc->hw;
   1279 	u32		rscctrl, rdrxctl;
   1280 
   1281 	/* If turning LRO/RSC off we need to disable it */
   1282 	if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
   1283 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1284 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1285 		return;
   1286 	}
   1287 
   1288 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1289 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1290 #ifdef DEV_NETMAP
   1291 	/* Always strip CRC unless Netmap disabled it */
   1292 	if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
   1293 	    !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
   1294 	    ix_crcstrip)
   1295 #endif /* DEV_NETMAP */
   1296 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1297 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1298 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1299 
   1300 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1301 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1302 	/*
   1303 	 * Limit the total number of descriptors that
   1304 	 * can be combined, so it does not exceed 64K
   1305 	 */
   1306 	if (rxr->mbuf_sz == MCLBYTES)
   1307 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1308 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1309 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1310 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1311 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1312 	else  /* Using 16K cluster */
   1313 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1314 
   1315 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1316 
   1317 	/* Enable TCP header recognition */
   1318 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1319 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1320 
   1321 	/* Disable RSC for ACK packets */
   1322 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1323 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1324 
   1325 	rxr->hw_rsc = TRUE;
   1326 } /* ixgbe_setup_hw_rsc */
   1327 
   1328 /************************************************************************
   1329  * ixgbe_refresh_mbufs
   1330  *
   1331  *   Refresh mbuf buffers for RX descriptor rings
   1332  *    - now keeps its own state so discards due to resource
   1333  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1334  *      it just returns, keeping its placeholder, thus it can simply
   1335  *      be recalled to try again.
   1336  ************************************************************************/
   1337 static void
   1338 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1339 {
   1340 	struct ixgbe_softc  *sc = rxr->sc;
   1341 	struct ixgbe_rx_buf *rxbuf;
   1342 	struct mbuf         *mp;
   1343 	int                 i, error;
   1344 	bool                refreshed = false;
   1345 
   1346 	i = rxr->next_to_refresh;
   1347 	/* next_to_refresh points to the previous one */
   1348 	if (++i == rxr->num_desc)
   1349 		i = 0;
   1350 
   1351 	while (i != limit) {
   1352 		rxbuf = &rxr->rx_buffers[i];
   1353 		if (__predict_false(rxbuf->buf == NULL)) {
   1354 			mp = ixgbe_getcl();
   1355 			if (mp == NULL) {
   1356 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1357 				goto update;
   1358 			}
   1359 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1360 			IXGBE_M_ADJ(sc, rxr, mp);
   1361 		} else
   1362 			mp = rxbuf->buf;
   1363 
   1364 		/* If we're dealing with an mbuf that was copied rather
   1365 		 * than replaced, there's no need to go through busdma.
   1366 		 */
   1367 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1368 			/* Get the memory mapping */
   1369 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1370 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1371 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1372 			if (__predict_false(error != 0)) {
   1373 				device_printf(sc->dev, "Refresh mbufs: "
   1374 				    "payload dmamap load failure - %d\n",
   1375 				    error);
   1376 				m_free(mp);
   1377 				rxbuf->buf = NULL;
   1378 				goto update;
   1379 			}
   1380 			rxbuf->buf = mp;
   1381 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1382 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1383 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1384 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1385 		} else {
   1386 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1387 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1388 		}
   1389 
   1390 		refreshed = true;
   1391 		/* next_to_refresh points to the previous one */
   1392 		rxr->next_to_refresh = i;
   1393 		if (++i == rxr->num_desc)
   1394 			i = 0;
   1395 	}
   1396 
   1397 update:
   1398 	if (refreshed) /* Update hardware tail index */
   1399 		IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
   1400 
   1401 	return;
   1402 } /* ixgbe_refresh_mbufs */
   1403 
   1404 /************************************************************************
   1405  * ixgbe_allocate_receive_buffers
   1406  *
   1407  *   Allocate memory for rx_buffer structures. Since we use one
   1408  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1409  *   that we'll need is equal to the number of receive descriptors
   1410  *   that we've allocated.
   1411  ************************************************************************/
   1412 static int
   1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1414 {
   1415 	struct ixgbe_softc  *sc = rxr->sc;
   1416 	device_t            dev = sc->dev;
   1417 	struct ixgbe_rx_buf *rxbuf;
   1418 	int                 bsize, error;
   1419 
   1420 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1421 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1422 
   1423 	error = ixgbe_dma_tag_create(
   1424 	         /*      parent */ sc->osdep.dmat,
   1425 	         /*   alignment */ 1,
   1426 	         /*      bounds */ 0,
   1427 	         /*     maxsize */ MJUM16BYTES,
   1428 	         /*   nsegments */ 1,
   1429 	         /*  maxsegsize */ MJUM16BYTES,
   1430 	         /*       flags */ 0,
   1431 	                           &rxr->ptag);
   1432 	if (error != 0) {
   1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1434 		goto fail;
   1435 	}
   1436 
   1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1438 		rxbuf = &rxr->rx_buffers[i];
   1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1440 		if (error) {
   1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1442 			goto fail;
   1443 		}
   1444 	}
   1445 
   1446 	return (0);
   1447 
   1448 fail:
   1449 	/* Frees all, but can handle partial completion */
   1450 	ixgbe_free_receive_structures(sc);
   1451 
   1452 	return (error);
   1453 } /* ixgbe_allocate_receive_buffers */
   1454 
   1455 /************************************************************************
   1456  * ixgbe_free_receive_ring
   1457  ************************************************************************/
   1458 static void
   1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1460 {
   1461 	for (int i = 0; i < rxr->num_desc; i++) {
   1462 		ixgbe_rx_discard(rxr, i);
   1463 	}
   1464 } /* ixgbe_free_receive_ring */
   1465 
   1466 /************************************************************************
   1467  * ixgbe_setup_receive_ring
   1468  *
   1469  *   Initialize a receive ring and its buffers.
   1470  ************************************************************************/
   1471 static int
   1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1473 {
   1474 	struct ixgbe_softc    *sc;
   1475 	struct ixgbe_rx_buf   *rxbuf;
   1476 #ifdef LRO
   1477 	struct ifnet          *ifp;
   1478 	struct lro_ctrl       *lro = &rxr->lro;
   1479 #endif /* LRO */
   1480 #ifdef DEV_NETMAP
   1481 	struct netmap_sc      *na = NA(rxr->sc->ifp);
   1482 	struct netmap_slot    *slot;
   1483 #endif /* DEV_NETMAP */
   1484 	int                   rsize, error = 0;
   1485 
   1486 	sc = rxr->sc;
   1487 #ifdef LRO
   1488 	ifp = sc->ifp;
   1489 #endif /* LRO */
   1490 
   1491 	/* Clear the ring contents */
   1492 	IXGBE_RX_LOCK(rxr);
   1493 
   1494 #ifdef DEV_NETMAP
   1495 	if (sc->feat_en & IXGBE_FEATURE_NETMAP)
   1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1497 #endif /* DEV_NETMAP */
   1498 
   1499 	rsize = roundup2(sc->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = sc->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	/* Now replenish the mbufs */
   1509 	for (int j = 0; j != rxr->num_desc; ++j) {
   1510 		struct mbuf *mp;
   1511 
   1512 		rxbuf = &rxr->rx_buffers[j];
   1513 
   1514 #ifdef DEV_NETMAP
   1515 		/*
   1516 		 * In netmap mode, fill the map and set the buffer
   1517 		 * address in the NIC ring, considering the offset
   1518 		 * between the netmap and NIC rings (see comment in
   1519 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1520 		 * an mbuf, so end the block with a continue;
   1521 		 */
   1522 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1523 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1524 			uint64_t paddr;
   1525 			void *addr;
   1526 
   1527 			addr = PNMB(na, slot + sj, &paddr);
   1528 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1529 			/* Update descriptor and the cached value */
   1530 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1531 			rxbuf->addr = htole64(paddr);
   1532 			continue;
   1533 		}
   1534 #endif /* DEV_NETMAP */
   1535 
   1536 		rxbuf->flags = 0;
   1537 		rxbuf->buf = ixgbe_getcl();
   1538 		if (rxbuf->buf == NULL) {
   1539 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1540 			error = ENOBUFS;
   1541 			goto fail;
   1542 		}
   1543 		mp = rxbuf->buf;
   1544 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1545 		IXGBE_M_ADJ(sc, rxr, mp);
   1546 		/* Get the memory mapping */
   1547 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1548 		    mp, BUS_DMA_NOWAIT);
   1549 		if (error != 0) {
   1550 			/*
   1551 			 * Clear this entry for later cleanup in
   1552 			 * ixgbe_discard() which is called via
   1553 			 * ixgbe_free_receive_ring().
   1554 			 */
   1555 			m_freem(mp);
   1556 			rxbuf->buf = NULL;
   1557 			goto fail;
   1558 		}
   1559 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1560 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1561 		/* Update the descriptor and the cached value */
   1562 		rxr->rx_base[j].read.pkt_addr =
   1563 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1564 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1565 	}
   1566 
   1567 	/* Setup our descriptor indices */
   1568 	rxr->next_to_check = 0;
   1569 	rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
   1570 	rxr->lro_enabled = FALSE;
   1571 	rxr->discard_multidesc = false;
   1572 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
   1573 #if 0 /* NetBSD */
   1574 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
   1575 #if 1	/* Fix inconsistency */
   1576 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
   1577 #endif
   1578 #endif
   1579 	rxr->vtag_strip = FALSE;
   1580 
   1581 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1582 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1583 
   1584 	/*
   1585 	 * Now set up the LRO interface
   1586 	 */
   1587 	if (ixgbe_rsc_enable)
   1588 		ixgbe_setup_hw_rsc(rxr);
   1589 #ifdef LRO
   1590 	else if (ifp->if_capenable & IFCAP_LRO) {
   1591 		device_t dev = sc->dev;
   1592 		int err = tcp_lro_init(lro);
   1593 		if (err) {
   1594 			device_printf(dev, "LRO Initialization failed!\n");
   1595 			goto fail;
   1596 		}
   1597 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1598 		rxr->lro_enabled = TRUE;
   1599 		lro->ifp = sc->ifp;
   1600 	}
   1601 #endif /* LRO */
   1602 
   1603 	IXGBE_RX_UNLOCK(rxr);
   1604 
   1605 	return (0);
   1606 
   1607 fail:
   1608 	ixgbe_free_receive_ring(rxr);
   1609 	IXGBE_RX_UNLOCK(rxr);
   1610 
   1611 	return (error);
   1612 } /* ixgbe_setup_receive_ring */
   1613 
   1614 /************************************************************************
   1615  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1616  ************************************************************************/
   1617 int
   1618 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
   1619 {
   1620 	struct rx_ring *rxr = sc->rx_rings;
   1621 	int            j;
   1622 
   1623 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1624 	for (j = 0; j < sc->num_queues; j++, rxr++)
   1625 		if (ixgbe_setup_receive_ring(rxr))
   1626 			goto fail;
   1627 
   1628 	return (0);
   1629 fail:
   1630 	/*
   1631 	 * Free RX buffers allocated so far, we will only handle
   1632 	 * the rings that completed, the failing case will have
   1633 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1634 	 */
   1635 	for (int i = 0; i < j; ++i) {
   1636 		rxr = &sc->rx_rings[i];
   1637 		IXGBE_RX_LOCK(rxr);
   1638 		ixgbe_free_receive_ring(rxr);
   1639 		IXGBE_RX_UNLOCK(rxr);
   1640 	}
   1641 
   1642 	return (ENOBUFS);
   1643 } /* ixgbe_setup_receive_structures */
   1644 
   1645 
   1646 /************************************************************************
   1647  * ixgbe_free_receive_structures - Free all receive rings.
   1648  ************************************************************************/
   1649 void
   1650 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
   1651 {
   1652 	struct rx_ring *rxr = sc->rx_rings;
   1653 
   1654 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1655 
   1656 	for (int i = 0; i < sc->num_queues; i++, rxr++) {
   1657 		ixgbe_free_receive_buffers(rxr);
   1658 #ifdef LRO
   1659 		/* Free LRO memory */
   1660 		tcp_lro_free(&rxr->lro);
   1661 #endif /* LRO */
   1662 		/* Free the ring memory as well */
   1663 		ixgbe_dma_free(sc, &rxr->rxdma);
   1664 		IXGBE_RX_LOCK_DESTROY(rxr);
   1665 	}
   1666 
   1667 	free(sc->rx_rings, M_DEVBUF);
   1668 } /* ixgbe_free_receive_structures */
   1669 
   1670 
   1671 /************************************************************************
   1672  * ixgbe_free_receive_buffers - Free receive ring data structures
   1673  ************************************************************************/
   1674 static void
   1675 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1676 {
   1677 	struct ixgbe_softc  *sc = rxr->sc;
   1678 	struct ixgbe_rx_buf *rxbuf;
   1679 
   1680 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1681 
   1682 	/* Cleanup any existing buffers */
   1683 	if (rxr->rx_buffers != NULL) {
   1684 		for (int i = 0; i < sc->num_rx_desc; i++) {
   1685 			rxbuf = &rxr->rx_buffers[i];
   1686 			ixgbe_rx_discard(rxr, i);
   1687 			if (rxbuf->pmap != NULL) {
   1688 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1689 				rxbuf->pmap = NULL;
   1690 			}
   1691 		}
   1692 
   1693 		if (rxr->rx_buffers != NULL) {
   1694 			free(rxr->rx_buffers, M_DEVBUF);
   1695 			rxr->rx_buffers = NULL;
   1696 		}
   1697 	}
   1698 
   1699 	if (rxr->ptag != NULL) {
   1700 		ixgbe_dma_tag_destroy(rxr->ptag);
   1701 		rxr->ptag = NULL;
   1702 	}
   1703 
   1704 	return;
   1705 } /* ixgbe_free_receive_buffers */
   1706 
   1707 /************************************************************************
   1708  * ixgbe_rx_input
   1709  ************************************************************************/
   1710 static __inline void
   1711 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1712     u32 ptype)
   1713 {
   1714 	struct ixgbe_softc *sc = ifp->if_softc;
   1715 
   1716 #ifdef LRO
   1717 	struct ethercom *ec = &sc->osdep.ec;
   1718 
   1719 	/*
   1720 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1721 	 * should be computed by hardware. Also it should not have VLAN tag in
   1722 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1723 	 */
   1724         if (rxr->lro_enabled &&
   1725             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1726             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1727             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1728             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1729             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1730             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1731             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1732             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1733                 /*
   1734                  * Send to the stack if:
   1735                  *  - LRO not enabled, or
   1736                  *  - no LRO resources, or
   1737                  *  - lro enqueue fails
   1738                  */
   1739                 if (rxr->lro.lro_cnt != 0)
   1740                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1741                                 return;
   1742         }
   1743 #endif /* LRO */
   1744 
   1745 	if_percpuq_enqueue(sc->ipq, m);
   1746 } /* ixgbe_rx_input */
   1747 
   1748 /************************************************************************
   1749  * ixgbe_rx_discard
   1750  ************************************************************************/
   1751 static __inline void
   1752 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1753 {
   1754 	struct ixgbe_rx_buf *rbuf;
   1755 
   1756 	rbuf = &rxr->rx_buffers[i];
   1757 
   1758 	/*
   1759 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1760 	 * so its easier to just free the existing mbufs and take the normal
   1761 	 * refresh path to get new buffers and mapping.
   1762 	 */
   1763 
   1764 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1765 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1766 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1767 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1768 		m_freem(rbuf->fmp);
   1769 		rbuf->fmp = NULL;
   1770 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1771 	} else if (rbuf->buf) {
   1772 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1773 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1774 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1775 		m_free(rbuf->buf);
   1776 		rbuf->buf = NULL;
   1777 	}
   1778 
   1779 	rbuf->flags = 0;
   1780 
   1781 	return;
   1782 } /* ixgbe_rx_discard */
   1783 
   1784 
   1785 /************************************************************************
   1786  * ixgbe_rxeof
   1787  *
   1788  *   Executes in interrupt context. It replenishes the
   1789  *   mbufs in the descriptor and sends data which has
   1790  *   been dma'ed into host memory to upper layer.
   1791  *
   1792  *   Return TRUE for more work, FALSE for all clean.
   1793  ************************************************************************/
   1794 bool
   1795 ixgbe_rxeof(struct ix_queue *que)
   1796 {
   1797 	struct ixgbe_softc	*sc = que->sc;
   1798 	struct rx_ring		*rxr = que->rxr;
   1799 	struct ifnet		*ifp = sc->ifp;
   1800 #ifdef LRO
   1801 	struct lro_ctrl		*lro = &rxr->lro;
   1802 #endif /* LRO */
   1803 	union ixgbe_adv_rx_desc	*cur;
   1804 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1805 	int			i, nextp, processed = 0;
   1806 	u32			staterr = 0;
   1807 	u32			loopcount = 0, numdesc;
   1808 	u32			limit = sc->rx_process_limit;
   1809 	u32			rx_copy_len = sc->rx_copy_len;
   1810 	bool			discard_multidesc = rxr->discard_multidesc;
   1811 	bool			wraparound = false;
   1812 	unsigned int		syncremain;
   1813 #ifdef RSS
   1814 	u16			pkt_info;
   1815 #endif
   1816 
   1817 	IXGBE_RX_LOCK(rxr);
   1818 
   1819 #ifdef DEV_NETMAP
   1820 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
   1821 		/* Same as the txeof routine: wakeup clients on intr. */
   1822 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1823 			IXGBE_RX_UNLOCK(rxr);
   1824 			return (FALSE);
   1825 		}
   1826 	}
   1827 #endif /* DEV_NETMAP */
   1828 
   1829 	/* Sync the ring. The size is rx_process_limit or the first half */
   1830 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
   1831 		/* Non-wraparound */
   1832 		numdesc = limit;
   1833 		syncremain = 0;
   1834 	} else {
   1835 		/* Wraparound. Sync the first half. */
   1836 		numdesc = rxr->num_desc - rxr->next_to_check;
   1837 
   1838 		/* Set the size of the last half */
   1839 		syncremain = limit - numdesc;
   1840 	}
   1841 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1842 	    rxr->rxdma.dma_map,
   1843 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
   1844 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1845 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1846 
   1847 	/*
   1848 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1849 	 * true, continue processing to not to send broken packet to the upper
   1850 	 * layer.
   1851 	 */
   1852 	for (i = rxr->next_to_check;
   1853 	     (loopcount < limit) || (discard_multidesc == true);) {
   1854 
   1855 		struct mbuf *sendmp, *mp;
   1856 		struct mbuf *newmp;
   1857 		u32         rsc, ptype;
   1858 		u16         len;
   1859 		u16         vtag = 0;
   1860 		bool        eop;
   1861 		bool        discard = false;
   1862 
   1863 		if (wraparound) {
   1864 			/* Sync the last half. */
   1865 			KASSERT(syncremain != 0);
   1866 			numdesc = syncremain;
   1867 			wraparound = false;
   1868 		} else if (__predict_false(loopcount >= limit)) {
   1869 			KASSERT(discard_multidesc == true);
   1870 			numdesc = 1;
   1871 		} else
   1872 			numdesc = 0;
   1873 
   1874 		if (numdesc != 0)
   1875 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1876 			    rxr->rxdma.dma_map, 0,
   1877 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1878 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1879 
   1880 		cur = &rxr->rx_base[i];
   1881 		staterr = le32toh(cur->wb.upper.status_error);
   1882 #ifdef RSS
   1883 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1884 #endif
   1885 
   1886 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1887 			break;
   1888 
   1889 		loopcount++;
   1890 		sendmp = newmp = NULL;
   1891 		nbuf = NULL;
   1892 		rsc = 0;
   1893 		cur->wb.upper.status_error = 0;
   1894 		rbuf = &rxr->rx_buffers[i];
   1895 		mp = rbuf->buf;
   1896 
   1897 		len = le16toh(cur->wb.upper.length);
   1898 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1899 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1900 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1901 
   1902 		/* Make sure bad packets are discarded */
   1903 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1904 #if __FreeBSD_version >= 1100036
   1905 			if (sc->feat_en & IXGBE_FEATURE_VF)
   1906 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1907 #endif
   1908 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
   1909 			ixgbe_rx_discard(rxr, i);
   1910 			discard_multidesc = false;
   1911 			goto next_desc;
   1912 		}
   1913 
   1914 		if (__predict_false(discard_multidesc))
   1915 			discard = true;
   1916 		else {
   1917 			/* Pre-alloc new mbuf. */
   1918 
   1919 			if ((rbuf->fmp == NULL) &&
   1920 			    eop && (len <= rx_copy_len)) {
   1921 				/* For short packet. See below. */
   1922 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1923 				if (__predict_false(sendmp == NULL)) {
   1924 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1925 					discard = true;
   1926 				}
   1927 			} else {
   1928 				/* For long packet. */
   1929 				newmp = ixgbe_getcl();
   1930 				if (__predict_false(newmp == NULL)) {
   1931 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1932 					discard = true;
   1933 				}
   1934 			}
   1935 		}
   1936 
   1937 		if (__predict_false(discard)) {
   1938 			/*
   1939 			 * Descriptor initialization is already done by the
   1940 			 * above code (cur->wb.upper.status_error = 0).
   1941 			 * So, we can reuse current rbuf->buf for new packet.
   1942 			 *
   1943 			 * Rewrite the buffer addr, see comment in
   1944 			 * ixgbe_rx_discard().
   1945 			 */
   1946 			cur->read.pkt_addr = rbuf->addr;
   1947 			m_freem(rbuf->fmp);
   1948 			rbuf->fmp = NULL;
   1949 			if (!eop) {
   1950 				/* Discard the entire packet. */
   1951 				discard_multidesc = true;
   1952 			} else
   1953 				discard_multidesc = false;
   1954 			goto next_desc;
   1955 		}
   1956 		discard_multidesc = false;
   1957 
   1958 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1959 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1960 
   1961 		/*
   1962 		 * On 82599 which supports a hardware
   1963 		 * LRO (called HW RSC), packets need
   1964 		 * not be fragmented across sequential
   1965 		 * descriptors, rather the next descriptor
   1966 		 * is indicated in bits of the descriptor.
   1967 		 * This also means that we might process
   1968 		 * more than one packet at a time, something
   1969 		 * that has never been true before, it
   1970 		 * required eliminating global chain pointers
   1971 		 * in favor of what we are doing here.  -jfv
   1972 		 */
   1973 		if (!eop) {
   1974 			/*
   1975 			 * Figure out the next descriptor
   1976 			 * of this frame.
   1977 			 */
   1978 			if (rxr->hw_rsc == TRUE) {
   1979 				rsc = ixgbe_rsc_count(cur);
   1980 				rxr->rsc_num += (rsc - 1);
   1981 			}
   1982 			if (rsc) { /* Get hardware index */
   1983 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1984 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1985 			} else { /* Just sequential */
   1986 				nextp = i + 1;
   1987 				if (nextp == sc->num_rx_desc)
   1988 					nextp = 0;
   1989 			}
   1990 			nbuf = &rxr->rx_buffers[nextp];
   1991 			prefetch(nbuf);
   1992 		}
   1993 		/*
   1994 		 * Rather than using the fmp/lmp global pointers
   1995 		 * we now keep the head of a packet chain in the
   1996 		 * buffer struct and pass this along from one
   1997 		 * descriptor to the next, until we get EOP.
   1998 		 */
   1999 		/*
   2000 		 * See if there is a stored head
   2001 		 * that determines what we are
   2002 		 */
   2003 		if (rbuf->fmp != NULL) {
   2004 			/* Secondary frag */
   2005 			sendmp = rbuf->fmp;
   2006 
   2007 			/* Update new (used in future) mbuf */
   2008 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   2009 			IXGBE_M_ADJ(sc, rxr, newmp);
   2010 			rbuf->buf = newmp;
   2011 			rbuf->fmp = NULL;
   2012 
   2013 			/* For secondary frag */
   2014 			mp->m_len = len;
   2015 			mp->m_flags &= ~M_PKTHDR;
   2016 
   2017 			/* For sendmp */
   2018 			sendmp->m_pkthdr.len += mp->m_len;
   2019 		} else {
   2020 			/*
   2021 			 * It's the first segment of a multi descriptor
   2022 			 * packet or a single segment which contains a full
   2023 			 * packet.
   2024 			 */
   2025 
   2026 			if (eop && (len <= rx_copy_len)) {
   2027 				/*
   2028 				 * Optimize.  This might be a small packet, may
   2029 				 * be just a TCP ACK. Copy into a new mbuf, and
   2030 				 * Leave the old mbuf+cluster for re-use.
   2031 				 */
   2032 				sendmp->m_data += ETHER_ALIGN;
   2033 				memcpy(mtod(sendmp, void *),
   2034 				    mtod(mp, void *), len);
   2035 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
   2036 				rbuf->flags |= IXGBE_RX_COPY;
   2037 			} else {
   2038 				/* For long packet */
   2039 
   2040 				/* Update new (used in future) mbuf */
   2041 				newmp->m_pkthdr.len = newmp->m_len
   2042 				    = rxr->mbuf_sz;
   2043 				IXGBE_M_ADJ(sc, rxr, newmp);
   2044 				rbuf->buf = newmp;
   2045 				rbuf->fmp = NULL;
   2046 
   2047 				/* For sendmp */
   2048 				sendmp = mp;
   2049 			}
   2050 
   2051 			/* first desc of a non-ps chain */
   2052 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2053 		}
   2054 		++processed;
   2055 
   2056 		/* Pass the head pointer on */
   2057 		if (eop == 0) {
   2058 			nbuf->fmp = sendmp;
   2059 			sendmp = NULL;
   2060 			mp->m_next = nbuf->buf;
   2061 		} else { /* Sending this frame */
   2062 			m_set_rcvif(sendmp, ifp);
   2063 			++rxr->packets;
   2064 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
   2065 			/* capture data for AIM */
   2066 			rxr->bytes += sendmp->m_pkthdr.len;
   2067 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
   2068 			/* Process vlan info */
   2069 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2070 				vtag = le16toh(cur->wb.upper.vlan);
   2071 			if (vtag) {
   2072 				vlan_set_tag(sendmp, vtag);
   2073 			}
   2074 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2075 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2076 				   &sc->stats.pf);
   2077 			}
   2078 
   2079 #if 0 /* FreeBSD */
   2080 			/*
   2081 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2082 			 * and never cleared. This means we have RSS hash
   2083 			 * available to be used.
   2084 			 */
   2085 			if (sc->num_queues > 1) {
   2086 				sendmp->m_pkthdr.flowid =
   2087 				    le32toh(cur->wb.lower.hi_dword.rss);
   2088 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2089 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2090 					M_HASHTYPE_SET(sendmp,
   2091 					    M_HASHTYPE_RSS_IPV4);
   2092 					break;
   2093 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2094 					M_HASHTYPE_SET(sendmp,
   2095 					    M_HASHTYPE_RSS_TCP_IPV4);
   2096 					break;
   2097 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2098 					M_HASHTYPE_SET(sendmp,
   2099 					    M_HASHTYPE_RSS_IPV6);
   2100 					break;
   2101 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2102 					M_HASHTYPE_SET(sendmp,
   2103 					    M_HASHTYPE_RSS_TCP_IPV6);
   2104 					break;
   2105 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2106 					M_HASHTYPE_SET(sendmp,
   2107 					    M_HASHTYPE_RSS_IPV6_EX);
   2108 					break;
   2109 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2110 					M_HASHTYPE_SET(sendmp,
   2111 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2112 					break;
   2113 #if __FreeBSD_version > 1100000
   2114 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2115 					M_HASHTYPE_SET(sendmp,
   2116 					    M_HASHTYPE_RSS_UDP_IPV4);
   2117 					break;
   2118 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2119 					M_HASHTYPE_SET(sendmp,
   2120 					    M_HASHTYPE_RSS_UDP_IPV6);
   2121 					break;
   2122 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2123 					M_HASHTYPE_SET(sendmp,
   2124 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2125 					break;
   2126 #endif
   2127 				default:
   2128 					M_HASHTYPE_SET(sendmp,
   2129 					    M_HASHTYPE_OPAQUE_HASH);
   2130 				}
   2131 			} else {
   2132 				sendmp->m_pkthdr.flowid = que->msix;
   2133 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2134 			}
   2135 #endif
   2136 		}
   2137 next_desc:
   2138 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2139 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2140 
   2141 		/* Advance our pointers to the next descriptor. */
   2142 		if (++i == rxr->num_desc) {
   2143 			wraparound = true;
   2144 			i = 0;
   2145 		}
   2146 		rxr->next_to_check = i;
   2147 
   2148 		/* Now send to the stack or do LRO */
   2149 		if (sendmp != NULL)
   2150 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2151 
   2152 		/* Every 8 descriptors we go to refresh mbufs */
   2153 		if (processed == 8) {
   2154 			ixgbe_refresh_mbufs(rxr, i);
   2155 			processed = 0;
   2156 		}
   2157 	}
   2158 
   2159 	/* Save the current status */
   2160 	rxr->discard_multidesc = discard_multidesc;
   2161 
   2162 	/* Refresh any remaining buf structs */
   2163 	if (ixgbe_rx_unrefreshed(rxr))
   2164 		ixgbe_refresh_mbufs(rxr, i);
   2165 
   2166 	IXGBE_RX_UNLOCK(rxr);
   2167 
   2168 #ifdef LRO
   2169 	/*
   2170 	 * Flush any outstanding LRO work
   2171 	 */
   2172 	tcp_lro_flush_all(lro);
   2173 #endif /* LRO */
   2174 
   2175 	/*
   2176 	 * Still have cleaning to do?
   2177 	 */
   2178 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2179 		return (TRUE);
   2180 
   2181 	return (FALSE);
   2182 } /* ixgbe_rxeof */
   2183 
   2184 
   2185 /************************************************************************
   2186  * ixgbe_rx_checksum
   2187  *
   2188  *   Verify that the hardware indicated that the checksum is valid.
   2189  *   Inform the stack about the status of checksum so that stack
   2190  *   doesn't spend time verifying the checksum.
   2191  ************************************************************************/
   2192 static void
   2193 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2194     struct ixgbe_hw_stats *stats)
   2195 {
   2196 	u16  status = (u16)staterr;
   2197 	u8   errors = (u8)(staterr >> 24);
   2198 #if 0
   2199 	bool sctp = false;
   2200 
   2201 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2202 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2203 		sctp = true;
   2204 #endif
   2205 
   2206 	/* IPv4 checksum */
   2207 	if (status & IXGBE_RXD_STAT_IPCS) {
   2208 		IXGBE_EVC_ADD(&stats->ipcs, 1);
   2209 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2210 			/* IP Checksum Good */
   2211 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2212 		} else {
   2213 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
   2214 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2215 		}
   2216 	}
   2217 	/* TCP/UDP/SCTP checksum */
   2218 	if (status & IXGBE_RXD_STAT_L4CS) {
   2219 		IXGBE_EVC_ADD(&stats->l4cs, 1);
   2220 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2221 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2222 			mp->m_pkthdr.csum_flags |= type;
   2223 		} else {
   2224 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
   2225 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2226 		}
   2227 	}
   2228 } /* ixgbe_rx_checksum */
   2229 
   2230 /************************************************************************
   2231  * ixgbe_dma_malloc
   2232  ************************************************************************/
   2233 int
   2234 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
   2235 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2236 {
   2237 	device_t dev = sc->dev;
   2238 	int      r, rsegs;
   2239 
   2240 	r = ixgbe_dma_tag_create(
   2241 	     /*      parent */ sc->osdep.dmat,
   2242 	     /*   alignment */ DBA_ALIGN,
   2243 	     /*      bounds */ 0,
   2244 	     /*     maxsize */ size,
   2245 	     /*   nsegments */ 1,
   2246 	     /*  maxsegsize */ size,
   2247 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2248 			       &dma->dma_tag);
   2249 	if (r != 0) {
   2250 		aprint_error_dev(dev,
   2251 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2252 		    r);
   2253 		goto fail_0;
   2254 	}
   2255 
   2256 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2257 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2258 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2259 	if (r != 0) {
   2260 		aprint_error_dev(dev,
   2261 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2262 		goto fail_1;
   2263 	}
   2264 
   2265 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2266 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2267 	if (r != 0) {
   2268 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2269 		    __func__, r);
   2270 		goto fail_2;
   2271 	}
   2272 
   2273 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2274 	if (r != 0) {
   2275 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2276 		    __func__, r);
   2277 		goto fail_3;
   2278 	}
   2279 
   2280 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2281 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2282 	if (r != 0) {
   2283 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2284 		    __func__, r);
   2285 		goto fail_4;
   2286 	}
   2287 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2288 	dma->dma_size = size;
   2289 	return 0;
   2290 fail_4:
   2291 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2292 fail_3:
   2293 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2294 fail_2:
   2295 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2296 fail_1:
   2297 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2298 fail_0:
   2299 
   2300 	return (r);
   2301 } /* ixgbe_dma_malloc */
   2302 
   2303 /************************************************************************
   2304  * ixgbe_dma_free
   2305  ************************************************************************/
   2306 void
   2307 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
   2308 {
   2309 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2310 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2311 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2312 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
   2313 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2314 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2315 } /* ixgbe_dma_free */
   2316 
   2317 
   2318 /************************************************************************
   2319  * ixgbe_allocate_queues
   2320  *
   2321  *   Allocate memory for the transmit and receive rings, and then
   2322  *   the descriptors associated with each, called only once at attach.
   2323  ************************************************************************/
   2324 int
   2325 ixgbe_allocate_queues(struct ixgbe_softc *sc)
   2326 {
   2327 	device_t	dev = sc->dev;
   2328 	struct ix_queue	*que;
   2329 	struct tx_ring	*txr;
   2330 	struct rx_ring	*rxr;
   2331 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2332 	int             txconf = 0, rxconf = 0;
   2333 
   2334 	/* First, allocate the top level queue structs */
   2335 	sc->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2336 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2337 
   2338 	/* Second, allocate the TX ring struct memory */
   2339 	sc->tx_rings = malloc(sizeof(struct tx_ring) *
   2340 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2341 
   2342 	/* Third, allocate the RX ring */
   2343 	sc->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2344 	    sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2345 
   2346 	/* For the ring itself */
   2347 	tsize = roundup2(sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2348 	    DBA_ALIGN);
   2349 
   2350 	/*
   2351 	 * Now set up the TX queues, txconf is needed to handle the
   2352 	 * possibility that things fail midcourse and we need to
   2353 	 * undo memory gracefully
   2354 	 */
   2355 	for (int i = 0; i < sc->num_queues; i++, txconf++) {
   2356 		/* Set up some basics */
   2357 		txr = &sc->tx_rings[i];
   2358 		txr->sc = sc;
   2359 		txr->txr_interq = NULL;
   2360 		/* In case SR-IOV is enabled, align the index properly */
   2361 #ifdef PCI_IOV
   2362 		txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2363 		    i);
   2364 #else
   2365 		txr->me = i;
   2366 #endif
   2367 		txr->num_desc = sc->num_tx_desc;
   2368 
   2369 		/* Initialize the TX side lock */
   2370 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2371 
   2372 		if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
   2373 		    BUS_DMA_NOWAIT)) {
   2374 			aprint_error_dev(dev,
   2375 			    "Unable to allocate TX Descriptor memory\n");
   2376 			error = ENOMEM;
   2377 			goto err_tx_desc;
   2378 		}
   2379 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2380 		bzero((void *)txr->tx_base, tsize);
   2381 
   2382 		/* Now allocate transmit buffers for the ring */
   2383 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2384 			aprint_error_dev(dev,
   2385 			    "Critical Failure setting up transmit buffers\n");
   2386 			error = ENOMEM;
   2387 			goto err_tx_desc;
   2388 		}
   2389 		if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2390 			/* Allocate a buf ring */
   2391 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2392 			if (txr->txr_interq == NULL) {
   2393 				aprint_error_dev(dev,
   2394 				    "Critical Failure setting up buf ring\n");
   2395 				error = ENOMEM;
   2396 				goto err_tx_desc;
   2397 			}
   2398 		}
   2399 	}
   2400 
   2401 	/*
   2402 	 * Next the RX queues...
   2403 	 */
   2404 	rsize = roundup2(sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2405 	    DBA_ALIGN);
   2406 	for (int i = 0; i < sc->num_queues; i++, rxconf++) {
   2407 		rxr = &sc->rx_rings[i];
   2408 		/* Set up some basics */
   2409 		rxr->sc = sc;
   2410 #ifdef PCI_IOV
   2411 		/* In case SR-IOV is enabled, align the index properly */
   2412 		rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
   2413 		    i);
   2414 #else
   2415 		rxr->me = i;
   2416 #endif
   2417 		rxr->num_desc = sc->num_rx_desc;
   2418 
   2419 		/* Initialize the RX side lock */
   2420 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2421 
   2422 		if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
   2423 		    BUS_DMA_NOWAIT)) {
   2424 			aprint_error_dev(dev,
   2425 			    "Unable to allocate RxDescriptor memory\n");
   2426 			error = ENOMEM;
   2427 			goto err_rx_desc;
   2428 		}
   2429 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2430 		bzero((void *)rxr->rx_base, rsize);
   2431 
   2432 		/* Allocate receive buffers for the ring */
   2433 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2434 			aprint_error_dev(dev,
   2435 			    "Critical Failure setting up receive buffers\n");
   2436 			error = ENOMEM;
   2437 			goto err_rx_desc;
   2438 		}
   2439 	}
   2440 
   2441 	/*
   2442 	 * Finally set up the queue holding structs
   2443 	 */
   2444 	for (int i = 0; i < sc->num_queues; i++) {
   2445 		que = &sc->queues[i];
   2446 		que->sc = sc;
   2447 		que->me = i;
   2448 		que->txr = &sc->tx_rings[i];
   2449 		que->rxr = &sc->rx_rings[i];
   2450 
   2451 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2452 		que->disabled_count = 0;
   2453 	}
   2454 
   2455 	return (0);
   2456 
   2457 err_rx_desc:
   2458 	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
   2459 		ixgbe_dma_free(sc, &rxr->rxdma);
   2460 err_tx_desc:
   2461 	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
   2462 		ixgbe_dma_free(sc, &txr->txdma);
   2463 	free(sc->rx_rings, M_DEVBUF);
   2464 	free(sc->tx_rings, M_DEVBUF);
   2465 	free(sc->queues, M_DEVBUF);
   2466 	return (error);
   2467 } /* ixgbe_allocate_queues */
   2468 
   2469 /************************************************************************
   2470  * ixgbe_free_queues
   2471  *
   2472  *   Free descriptors for the transmit and receive rings, and then
   2473  *   the memory associated with each.
   2474  ************************************************************************/
   2475 void
   2476 ixgbe_free_queues(struct ixgbe_softc *sc)
   2477 {
   2478 	struct ix_queue *que;
   2479 	int i;
   2480 
   2481 	ixgbe_free_transmit_structures(sc);
   2482 	ixgbe_free_receive_structures(sc);
   2483 	for (i = 0; i < sc->num_queues; i++) {
   2484 		que = &sc->queues[i];
   2485 		mutex_destroy(&que->dc_mtx);
   2486 	}
   2487 	free(sc->queues, M_DEVBUF);
   2488 } /* ixgbe_free_queues */
   2489