Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.93
      1 /* $NetBSD: ix_txrx.c,v 1.93 2021/09/08 08:46:28 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.93 2021/09/08 08:46:28 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 /*
     89  * For Flow Director: this is the
     90  * number of TX packets we sample
     91  * for the filter pool, this means
     92  * every 20th packet will be probed.
     93  *
     94  * This feature can be disabled by
     95  * setting this to 0.
     96  */
     97 static int atr_sample_rate = 20;
     98 
     99 #define IXGBE_M_ADJ(adapter, rxr, mp)					\
    100 	if (adapter->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    101 		m_adj(mp, ETHER_ALIGN)
    102 
    103 /************************************************************************
    104  *  Local Function prototypes
    105  ************************************************************************/
    106 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    107 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    108 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    109 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    110 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    111                                        struct ixgbe_hw_stats *);
    112 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    113 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    114 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    115 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    116                                         struct mbuf *, u32 *, u32 *);
    117 static int           ixgbe_tso_setup(struct tx_ring *,
    118                                      struct mbuf *, u32 *, u32 *);
    119 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    120 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    121                                     struct mbuf *, u32);
    122 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    123                                       struct ixgbe_dma_alloc *, int);
    124 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    125 
    126 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    127 
    128 /************************************************************************
    129  * ixgbe_legacy_start_locked - Transmit entry point
    130  *
    131  *   Called by the stack to initiate a transmit.
    132  *   The driver will remain in this routine as long as there are
    133  *   packets to transmit and transmit resources are available.
    134  *   In case resources are not available, the stack is notified
    135  *   and the packet is requeued.
    136  ************************************************************************/
    137 int
    138 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    139 {
    140 	int rc;
    141 	struct mbuf    *m_head;
    142 	struct adapter *adapter = txr->adapter;
    143 
    144 	IXGBE_TX_LOCK_ASSERT(txr);
    145 
    146 	if (adapter->link_active != LINK_STATE_UP) {
    147 		/*
    148 		 * discard all packets buffered in IFQ to avoid
    149 		 * sending old packets at next link up timing.
    150 		 */
    151 		ixgbe_drain(ifp, txr);
    152 		return (ENETDOWN);
    153 	}
    154 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    155 		return (ENETDOWN);
    156 	if (txr->txr_no_space)
    157 		return (ENETDOWN);
    158 
    159 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    160 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    161 			break;
    162 
    163 		IFQ_POLL(&ifp->if_snd, m_head);
    164 		if (m_head == NULL)
    165 			break;
    166 
    167 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    168 			break;
    169 		}
    170 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    171 		if (rc != 0) {
    172 			m_freem(m_head);
    173 			continue;
    174 		}
    175 
    176 		/* Send a copy of the frame to the BPF listener */
    177 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    178 	}
    179 
    180 	return IXGBE_SUCCESS;
    181 } /* ixgbe_legacy_start_locked */
    182 
    183 /************************************************************************
    184  * ixgbe_legacy_start
    185  *
    186  *   Called by the stack, this always uses the first tx ring,
    187  *   and should not be used with multiqueue tx enabled.
    188  ************************************************************************/
    189 void
    190 ixgbe_legacy_start(struct ifnet *ifp)
    191 {
    192 	struct adapter *adapter = ifp->if_softc;
    193 	struct tx_ring *txr = adapter->tx_rings;
    194 
    195 	if (ifp->if_flags & IFF_RUNNING) {
    196 		IXGBE_TX_LOCK(txr);
    197 		ixgbe_legacy_start_locked(ifp, txr);
    198 		IXGBE_TX_UNLOCK(txr);
    199 	}
    200 } /* ixgbe_legacy_start */
    201 
    202 /************************************************************************
    203  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    204  *
    205  *   (if_transmit function)
    206  ************************************************************************/
    207 int
    208 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    209 {
    210 	struct adapter	*adapter = ifp->if_softc;
    211 	struct tx_ring	*txr;
    212 	int		i;
    213 #ifdef RSS
    214 	uint32_t bucket_id;
    215 #endif
    216 
    217 	/*
    218 	 * When doing RSS, map it to the same outbound queue
    219 	 * as the incoming flow would be mapped to.
    220 	 *
    221 	 * If everything is setup correctly, it should be the
    222 	 * same bucket that the current CPU we're on is.
    223 	 */
    224 #ifdef RSS
    225 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    226 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    227 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    228 		    &bucket_id) == 0)) {
    229 			i = bucket_id % adapter->num_queues;
    230 #ifdef IXGBE_DEBUG
    231 			if (bucket_id > adapter->num_queues)
    232 				if_printf(ifp,
    233 				    "bucket_id (%d) > num_queues (%d)\n",
    234 				    bucket_id, adapter->num_queues);
    235 #endif
    236 		} else
    237 			i = m->m_pkthdr.flowid % adapter->num_queues;
    238 	} else
    239 #endif /* 0 */
    240 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    241 
    242 	/* Check for a hung queue and pick alternative */
    243 	if (((1ULL << i) & adapter->active_queues) == 0)
    244 		i = ffs64(adapter->active_queues);
    245 
    246 	txr = &adapter->tx_rings[i];
    247 
    248 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    249 		m_freem(m);
    250 		txr->pcq_drops.ev_count++;
    251 		return ENOBUFS;
    252 	}
    253 	if (IXGBE_TX_TRYLOCK(txr)) {
    254 		ixgbe_mq_start_locked(ifp, txr);
    255 		IXGBE_TX_UNLOCK(txr);
    256 	} else {
    257 		if (adapter->txrx_use_workqueue) {
    258 			u_int *enqueued;
    259 
    260 			/*
    261 			 * This function itself is not called in interrupt
    262 			 * context, however it can be called in fast softint
    263 			 * context right after receiving forwarding packets.
    264 			 * So, it is required to protect workqueue from twice
    265 			 * enqueuing when the machine uses both spontaneous
    266 			 * packets and forwarding packets.
    267 			 */
    268 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    269 			if (*enqueued == 0) {
    270 				*enqueued = 1;
    271 				percpu_putref(adapter->txr_wq_enqueued);
    272 				workqueue_enqueue(adapter->txr_wq,
    273 				    &txr->wq_cookie, curcpu());
    274 			} else
    275 				percpu_putref(adapter->txr_wq_enqueued);
    276 		} else {
    277 			kpreempt_disable();
    278 			softint_schedule(txr->txr_si);
    279 			kpreempt_enable();
    280 		}
    281 	}
    282 
    283 	return (0);
    284 } /* ixgbe_mq_start */
    285 
    286 /************************************************************************
    287  * ixgbe_mq_start_locked
    288  ************************************************************************/
    289 int
    290 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    291 {
    292 	struct mbuf    *next;
    293 	int            enqueued = 0, err = 0;
    294 
    295 	if (txr->adapter->link_active != LINK_STATE_UP) {
    296 		/*
    297 		 * discard all packets buffered in txr_interq to avoid
    298 		 * sending old packets at next link up timing.
    299 		 */
    300 		ixgbe_drain(ifp, txr);
    301 		return (ENETDOWN);
    302 	}
    303 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    304 		return (ENETDOWN);
    305 	if (txr->txr_no_space)
    306 		return (ENETDOWN);
    307 
    308 	/* Process the queue */
    309 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    310 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    311 			m_freem(next);
    312 			/* All errors are counted in ixgbe_xmit() */
    313 			break;
    314 		}
    315 		enqueued++;
    316 #if __FreeBSD_version >= 1100036
    317 		/*
    318 		 * Since we're looking at the tx ring, we can check
    319 		 * to see if we're a VF by examing our tail register
    320 		 * address.
    321 		 */
    322 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    323 		    (next->m_flags & M_MCAST))
    324 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    325 #endif
    326 		/* Send a copy of the frame to the BPF listener */
    327 		bpf_mtap(ifp, next, BPF_D_OUT);
    328 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    329 			break;
    330 	}
    331 
    332 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    333 		ixgbe_txeof(txr);
    334 
    335 	return (err);
    336 } /* ixgbe_mq_start_locked */
    337 
    338 /************************************************************************
    339  * ixgbe_deferred_mq_start
    340  *
    341  *   Called from a softint and workqueue (indirectly) to drain queued
    342  *   transmit packets.
    343  ************************************************************************/
    344 void
    345 ixgbe_deferred_mq_start(void *arg)
    346 {
    347 	struct tx_ring *txr = arg;
    348 	struct adapter *adapter = txr->adapter;
    349 	struct ifnet   *ifp = adapter->ifp;
    350 
    351 	IXGBE_TX_LOCK(txr);
    352 	if (pcq_peek(txr->txr_interq) != NULL)
    353 		ixgbe_mq_start_locked(ifp, txr);
    354 	IXGBE_TX_UNLOCK(txr);
    355 } /* ixgbe_deferred_mq_start */
    356 
    357 /************************************************************************
    358  * ixgbe_deferred_mq_start_work
    359  *
    360  *   Called from a workqueue to drain queued transmit packets.
    361  ************************************************************************/
    362 void
    363 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    364 {
    365 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    366 	struct adapter *adapter = txr->adapter;
    367 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    368 	*enqueued = 0;
    369 	percpu_putref(adapter->txr_wq_enqueued);
    370 
    371 	ixgbe_deferred_mq_start(txr);
    372 } /* ixgbe_deferred_mq_start */
    373 
    374 /************************************************************************
    375  * ixgbe_drain_all
    376  ************************************************************************/
    377 void
    378 ixgbe_drain_all(struct adapter *adapter)
    379 {
    380 	struct ifnet *ifp = adapter->ifp;
    381 	struct ix_queue *que = adapter->queues;
    382 
    383 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    384 		struct tx_ring  *txr = que->txr;
    385 
    386 		IXGBE_TX_LOCK(txr);
    387 		ixgbe_drain(ifp, txr);
    388 		IXGBE_TX_UNLOCK(txr);
    389 	}
    390 }
    391 
    392 /************************************************************************
    393  * ixgbe_xmit
    394  *
    395  *   Maps the mbufs to tx descriptors, allowing the
    396  *   TX engine to transmit the packets.
    397  *
    398  *   Return 0 on success, positive on failure
    399  ************************************************************************/
    400 static int
    401 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    402 {
    403 	struct adapter          *adapter = txr->adapter;
    404 	struct ixgbe_tx_buf     *txbuf;
    405 	union ixgbe_adv_tx_desc *txd = NULL;
    406 	struct ifnet	        *ifp = adapter->ifp;
    407 	int                     i, j, error;
    408 	int                     first;
    409 	u32                     olinfo_status = 0, cmd_type_len;
    410 	bool                    remap = TRUE;
    411 	bus_dmamap_t            map;
    412 
    413 	/* Basic descriptor defines */
    414 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    415 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    416 
    417 	if (vlan_has_tag(m_head))
    418 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    419 
    420 	/*
    421 	 * Important to capture the first descriptor
    422 	 * used because it will contain the index of
    423 	 * the one we tell the hardware to report back
    424 	 */
    425 	first = txr->next_avail_desc;
    426 	txbuf = &txr->tx_buffers[first];
    427 	map = txbuf->map;
    428 
    429 	/*
    430 	 * Map the packet for DMA.
    431 	 */
    432 retry:
    433 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    434 	    BUS_DMA_NOWAIT);
    435 
    436 	if (__predict_false(error)) {
    437 		struct mbuf *m;
    438 
    439 		switch (error) {
    440 		case EAGAIN:
    441 			txr->q_eagain_tx_dma_setup++;
    442 			return EAGAIN;
    443 		case ENOMEM:
    444 			txr->q_enomem_tx_dma_setup++;
    445 			return EAGAIN;
    446 		case EFBIG:
    447 			/* Try it again? - one try */
    448 			if (remap == TRUE) {
    449 				remap = FALSE;
    450 				/*
    451 				 * XXX: m_defrag will choke on
    452 				 * non-MCLBYTES-sized clusters
    453 				 */
    454 				txr->q_efbig_tx_dma_setup++;
    455 				m = m_defrag(m_head, M_NOWAIT);
    456 				if (m == NULL) {
    457 					txr->q_mbuf_defrag_failed++;
    458 					return ENOBUFS;
    459 				}
    460 				m_head = m;
    461 				goto retry;
    462 			} else {
    463 				txr->q_efbig2_tx_dma_setup++;
    464 				return error;
    465 			}
    466 		case EINVAL:
    467 			txr->q_einval_tx_dma_setup++;
    468 			return error;
    469 		default:
    470 			txr->q_other_tx_dma_setup++;
    471 			return error;
    472 		}
    473 	}
    474 
    475 	/* Make certain there are enough descriptors */
    476 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    477 		txr->txr_no_space = true;
    478 		txr->no_desc_avail.ev_count++;
    479 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    480 		return EAGAIN;
    481 	}
    482 
    483 	/*
    484 	 * Set up the appropriate offload context
    485 	 * this will consume the first descriptor
    486 	 */
    487 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    488 	if (__predict_false(error)) {
    489 		return (error);
    490 	}
    491 
    492 #ifdef IXGBE_FDIR
    493 	/* Do the flow director magic */
    494 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    495 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    496 		++txr->atr_count;
    497 		if (txr->atr_count >= atr_sample_rate) {
    498 			ixgbe_atr(txr, m_head);
    499 			txr->atr_count = 0;
    500 		}
    501 	}
    502 #endif
    503 
    504 	olinfo_status |= IXGBE_ADVTXD_CC;
    505 	i = txr->next_avail_desc;
    506 	for (j = 0; j < map->dm_nsegs; j++) {
    507 		bus_size_t seglen;
    508 		uint64_t segaddr;
    509 
    510 		txbuf = &txr->tx_buffers[i];
    511 		txd = &txr->tx_base[i];
    512 		seglen = map->dm_segs[j].ds_len;
    513 		segaddr = htole64(map->dm_segs[j].ds_addr);
    514 
    515 		txd->read.buffer_addr = segaddr;
    516 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    517 		txd->read.olinfo_status = htole32(olinfo_status);
    518 
    519 		if (++i == txr->num_desc)
    520 			i = 0;
    521 	}
    522 
    523 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    524 	txr->tx_avail -= map->dm_nsegs;
    525 	txr->next_avail_desc = i;
    526 
    527 	txbuf->m_head = m_head;
    528 	/*
    529 	 * Here we swap the map so the last descriptor,
    530 	 * which gets the completion interrupt has the
    531 	 * real map, and the first descriptor gets the
    532 	 * unused map from this descriptor.
    533 	 */
    534 	txr->tx_buffers[first].map = txbuf->map;
    535 	txbuf->map = map;
    536 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    537 	    BUS_DMASYNC_PREWRITE);
    538 
    539 	/* Set the EOP descriptor that will be marked done */
    540 	txbuf = &txr->tx_buffers[first];
    541 	txbuf->eop = txd;
    542 
    543 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    544 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    545 	/*
    546 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    547 	 * hardware that this frame is available to transmit.
    548 	 */
    549 	++txr->total_packets.ev_count;
    550 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    551 
    552 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    553 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    554 	if (m_head->m_flags & M_MCAST)
    555 		if_statinc_ref(nsr, if_omcasts);
    556 	IF_STAT_PUTREF(ifp);
    557 
    558 	/* Mark queue as having work */
    559 	if (txr->busy == 0)
    560 		txr->busy = 1;
    561 
    562 	return (0);
    563 } /* ixgbe_xmit */
    564 
    565 /************************************************************************
    566  * ixgbe_drain
    567  ************************************************************************/
    568 static void
    569 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    570 {
    571 	struct mbuf *m;
    572 
    573 	IXGBE_TX_LOCK_ASSERT(txr);
    574 
    575 	if (txr->me == 0) {
    576 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    577 			IFQ_DEQUEUE(&ifp->if_snd, m);
    578 			m_freem(m);
    579 			IF_DROP(&ifp->if_snd);
    580 		}
    581 	}
    582 
    583 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    584 		m_freem(m);
    585 		txr->pcq_drops.ev_count++;
    586 	}
    587 }
    588 
    589 /************************************************************************
    590  * ixgbe_allocate_transmit_buffers
    591  *
    592  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    593  *   the information needed to transmit a packet on the wire. This is
    594  *   called only once at attach, setup is done every reset.
    595  ************************************************************************/
    596 static int
    597 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    598 {
    599 	struct adapter      *adapter = txr->adapter;
    600 	device_t            dev = adapter->dev;
    601 	struct ixgbe_tx_buf *txbuf;
    602 	int                 error, i;
    603 
    604 	/*
    605 	 * Setup DMA descriptor areas.
    606 	 */
    607 	error = ixgbe_dma_tag_create(
    608 	         /*      parent */ adapter->osdep.dmat,
    609 	         /*   alignment */ 1,
    610 	         /*      bounds */ 0,
    611 	         /*     maxsize */ IXGBE_TSO_SIZE,
    612 	         /*   nsegments */ adapter->num_segs,
    613 	         /*  maxsegsize */ PAGE_SIZE,
    614 	         /*       flags */ 0,
    615 	                           &txr->txtag);
    616 	if (error != 0) {
    617 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    618 		goto fail;
    619 	}
    620 
    621 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    622 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    623 
    624 	/* Create the descriptor buffer dma maps */
    625 	txbuf = txr->tx_buffers;
    626 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    627 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    628 		if (error != 0) {
    629 			aprint_error_dev(dev,
    630 			    "Unable to create TX DMA map (%d)\n", error);
    631 			goto fail;
    632 		}
    633 	}
    634 
    635 	return 0;
    636 fail:
    637 	/* We free all, it handles case where we are in the middle */
    638 #if 0 /* XXX was FreeBSD */
    639 	ixgbe_free_transmit_structures(adapter);
    640 #else
    641 	ixgbe_free_transmit_buffers(txr);
    642 #endif
    643 	return (error);
    644 } /* ixgbe_allocate_transmit_buffers */
    645 
    646 /************************************************************************
    647  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    648  ************************************************************************/
    649 static void
    650 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    651 {
    652 	struct adapter        *adapter = txr->adapter;
    653 	struct ixgbe_tx_buf   *txbuf;
    654 #ifdef DEV_NETMAP
    655 	struct netmap_adapter *na = NA(adapter->ifp);
    656 	struct netmap_slot    *slot;
    657 #endif /* DEV_NETMAP */
    658 
    659 	/* Clear the old ring contents */
    660 	IXGBE_TX_LOCK(txr);
    661 
    662 #ifdef DEV_NETMAP
    663 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    664 		/*
    665 		 * (under lock): if in netmap mode, do some consistency
    666 		 * checks and set slot to entry 0 of the netmap ring.
    667 		 */
    668 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    669 	}
    670 #endif /* DEV_NETMAP */
    671 
    672 	bzero((void *)txr->tx_base,
    673 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    674 	/* Reset indices */
    675 	txr->next_avail_desc = 0;
    676 	txr->next_to_clean = 0;
    677 
    678 	/* Free any existing tx buffers. */
    679 	txbuf = txr->tx_buffers;
    680 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    681 		if (txbuf->m_head != NULL) {
    682 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    683 			    0, txbuf->m_head->m_pkthdr.len,
    684 			    BUS_DMASYNC_POSTWRITE);
    685 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    686 			m_freem(txbuf->m_head);
    687 			txbuf->m_head = NULL;
    688 		}
    689 
    690 #ifdef DEV_NETMAP
    691 		/*
    692 		 * In netmap mode, set the map for the packet buffer.
    693 		 * NOTE: Some drivers (not this one) also need to set
    694 		 * the physical buffer address in the NIC ring.
    695 		 * Slots in the netmap ring (indexed by "si") are
    696 		 * kring->nkr_hwofs positions "ahead" wrt the
    697 		 * corresponding slot in the NIC ring. In some drivers
    698 		 * (not here) nkr_hwofs can be negative. Function
    699 		 * netmap_idx_n2k() handles wraparounds properly.
    700 		 */
    701 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    702 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    703 			netmap_load_map(na, txr->txtag,
    704 			    txbuf->map, NMB(na, slot + si));
    705 		}
    706 #endif /* DEV_NETMAP */
    707 
    708 		/* Clear the EOP descriptor pointer */
    709 		txbuf->eop = NULL;
    710 	}
    711 
    712 	/* Set the rate at which we sample packets */
    713 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    714 		txr->atr_sample = atr_sample_rate;
    715 
    716 	/* Set number of descriptors available */
    717 	txr->tx_avail = adapter->num_tx_desc;
    718 
    719 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    720 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    721 	IXGBE_TX_UNLOCK(txr);
    722 } /* ixgbe_setup_transmit_ring */
    723 
    724 /************************************************************************
    725  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    726  ************************************************************************/
    727 int
    728 ixgbe_setup_transmit_structures(struct adapter *adapter)
    729 {
    730 	struct tx_ring *txr = adapter->tx_rings;
    731 
    732 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    733 		ixgbe_setup_transmit_ring(txr);
    734 
    735 	return (0);
    736 } /* ixgbe_setup_transmit_structures */
    737 
    738 /************************************************************************
    739  * ixgbe_free_transmit_structures - Free all transmit rings.
    740  ************************************************************************/
    741 void
    742 ixgbe_free_transmit_structures(struct adapter *adapter)
    743 {
    744 	struct tx_ring *txr = adapter->tx_rings;
    745 
    746 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    747 		ixgbe_free_transmit_buffers(txr);
    748 		ixgbe_dma_free(adapter, &txr->txdma);
    749 		IXGBE_TX_LOCK_DESTROY(txr);
    750 	}
    751 	free(adapter->tx_rings, M_DEVBUF);
    752 } /* ixgbe_free_transmit_structures */
    753 
    754 /************************************************************************
    755  * ixgbe_free_transmit_buffers
    756  *
    757  *   Free transmit ring related data structures.
    758  ************************************************************************/
    759 static void
    760 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    761 {
    762 	struct adapter      *adapter = txr->adapter;
    763 	struct ixgbe_tx_buf *tx_buffer;
    764 	int                 i;
    765 
    766 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    767 
    768 	if (txr->tx_buffers == NULL)
    769 		return;
    770 
    771 	tx_buffer = txr->tx_buffers;
    772 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    773 		if (tx_buffer->m_head != NULL) {
    774 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    775 			    0, tx_buffer->m_head->m_pkthdr.len,
    776 			    BUS_DMASYNC_POSTWRITE);
    777 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    778 			m_freem(tx_buffer->m_head);
    779 			tx_buffer->m_head = NULL;
    780 			if (tx_buffer->map != NULL) {
    781 				ixgbe_dmamap_destroy(txr->txtag,
    782 				    tx_buffer->map);
    783 				tx_buffer->map = NULL;
    784 			}
    785 		} else if (tx_buffer->map != NULL) {
    786 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    787 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    788 			tx_buffer->map = NULL;
    789 		}
    790 	}
    791 	if (txr->txr_interq != NULL) {
    792 		struct mbuf *m;
    793 
    794 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    795 			m_freem(m);
    796 		pcq_destroy(txr->txr_interq);
    797 	}
    798 	if (txr->tx_buffers != NULL) {
    799 		free(txr->tx_buffers, M_DEVBUF);
    800 		txr->tx_buffers = NULL;
    801 	}
    802 	if (txr->txtag != NULL) {
    803 		ixgbe_dma_tag_destroy(txr->txtag);
    804 		txr->txtag = NULL;
    805 	}
    806 } /* ixgbe_free_transmit_buffers */
    807 
    808 /************************************************************************
    809  * ixgbe_tx_ctx_setup
    810  *
    811  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    812  ************************************************************************/
    813 static int
    814 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    815     u32 *cmd_type_len, u32 *olinfo_status)
    816 {
    817 	struct adapter                   *adapter = txr->adapter;
    818 	struct ixgbe_adv_tx_context_desc *TXD;
    819 	struct ether_vlan_header         *eh;
    820 #ifdef INET
    821 	struct ip                        *ip;
    822 #endif
    823 #ifdef INET6
    824 	struct ip6_hdr                   *ip6;
    825 #endif
    826 	int                              ehdrlen, ip_hlen = 0;
    827 	int                              offload = TRUE;
    828 	int                              ctxd = txr->next_avail_desc;
    829 	u32                              vlan_macip_lens = 0;
    830 	u32                              type_tucmd_mlhl = 0;
    831 	u16                              vtag = 0;
    832 	u16                              etype;
    833 	u8                               ipproto = 0;
    834 	char                             *l3d;
    835 
    836 
    837 	/* First check if TSO is to be used */
    838 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    839 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    840 
    841 		if (rv != 0)
    842 			++adapter->tso_err.ev_count;
    843 		return rv;
    844 	}
    845 
    846 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    847 		offload = FALSE;
    848 
    849 	/* Indicate the whole packet as payload when not doing TSO */
    850 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    851 
    852 	/* Now ready a context descriptor */
    853 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    854 
    855 	/*
    856 	 * In advanced descriptors the vlan tag must
    857 	 * be placed into the context descriptor. Hence
    858 	 * we need to make one even if not doing offloads.
    859 	 */
    860 	if (vlan_has_tag(mp)) {
    861 		vtag = htole16(vlan_get_tag(mp));
    862 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    863 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    864 	           (offload == FALSE))
    865 		return (0);
    866 
    867 	/*
    868 	 * Determine where frame payload starts.
    869 	 * Jump over vlan headers if already present,
    870 	 * helpful for QinQ too.
    871 	 */
    872 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    873 	eh = mtod(mp, struct ether_vlan_header *);
    874 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    875 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    876 		etype = ntohs(eh->evl_proto);
    877 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    878 	} else {
    879 		etype = ntohs(eh->evl_encap_proto);
    880 		ehdrlen = ETHER_HDR_LEN;
    881 	}
    882 
    883 	/* Set the ether header length */
    884 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    885 
    886 	if (offload == FALSE)
    887 		goto no_offloads;
    888 
    889 	/*
    890 	 * If the first mbuf only includes the ethernet header,
    891 	 * jump to the next one
    892 	 * XXX: This assumes the stack splits mbufs containing headers
    893 	 *      on header boundaries
    894 	 * XXX: And assumes the entire IP header is contained in one mbuf
    895 	 */
    896 	if (mp->m_len == ehdrlen && mp->m_next)
    897 		l3d = mtod(mp->m_next, char *);
    898 	else
    899 		l3d = mtod(mp, char *) + ehdrlen;
    900 
    901 	switch (etype) {
    902 #ifdef INET
    903 	case ETHERTYPE_IP:
    904 		ip = (struct ip *)(l3d);
    905 		ip_hlen = ip->ip_hl << 2;
    906 		ipproto = ip->ip_p;
    907 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    908 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    909 		    ip->ip_sum == 0);
    910 		break;
    911 #endif
    912 #ifdef INET6
    913 	case ETHERTYPE_IPV6:
    914 		ip6 = (struct ip6_hdr *)(l3d);
    915 		ip_hlen = sizeof(struct ip6_hdr);
    916 		ipproto = ip6->ip6_nxt;
    917 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    918 		break;
    919 #endif
    920 	default:
    921 		offload = false;
    922 		break;
    923 	}
    924 
    925 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    926 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    927 
    928 	vlan_macip_lens |= ip_hlen;
    929 
    930 	/* No support for offloads for non-L4 next headers */
    931 	switch (ipproto) {
    932 	case IPPROTO_TCP:
    933 		if (mp->m_pkthdr.csum_flags &
    934 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    935 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    936 		else
    937 			offload = false;
    938 		break;
    939 	case IPPROTO_UDP:
    940 		if (mp->m_pkthdr.csum_flags &
    941 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    942 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    943 		else
    944 			offload = false;
    945 		break;
    946 	default:
    947 		offload = false;
    948 		break;
    949 	}
    950 
    951 	if (offload) /* Insert L4 checksum into data descriptors */
    952 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    953 
    954 no_offloads:
    955 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    956 
    957 	/* Now copy bits into descriptor */
    958 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    959 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    960 	TXD->seqnum_seed = htole32(0);
    961 	TXD->mss_l4len_idx = htole32(0);
    962 
    963 	/* We've consumed the first desc, adjust counters */
    964 	if (++ctxd == txr->num_desc)
    965 		ctxd = 0;
    966 	txr->next_avail_desc = ctxd;
    967 	--txr->tx_avail;
    968 
    969 	return (0);
    970 } /* ixgbe_tx_ctx_setup */
    971 
    972 /************************************************************************
    973  * ixgbe_tso_setup
    974  *
    975  *   Setup work for hardware segmentation offload (TSO) on
    976  *   adapters using advanced tx descriptors
    977  ************************************************************************/
    978 static int
    979 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    980     u32 *olinfo_status)
    981 {
    982 	struct ixgbe_adv_tx_context_desc *TXD;
    983 	struct ether_vlan_header         *eh;
    984 #ifdef INET6
    985 	struct ip6_hdr                   *ip6;
    986 #endif
    987 #ifdef INET
    988 	struct ip                        *ip;
    989 #endif
    990 	struct tcphdr                    *th;
    991 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    992 	u32                              vlan_macip_lens = 0;
    993 	u32                              type_tucmd_mlhl = 0;
    994 	u32                              mss_l4len_idx = 0, paylen;
    995 	u16                              vtag = 0, eh_type;
    996 
    997 	/*
    998 	 * Determine where frame payload starts.
    999 	 * Jump over vlan headers if already present
   1000 	 */
   1001 	eh = mtod(mp, struct ether_vlan_header *);
   1002 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1003 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1004 		eh_type = eh->evl_proto;
   1005 	} else {
   1006 		ehdrlen = ETHER_HDR_LEN;
   1007 		eh_type = eh->evl_encap_proto;
   1008 	}
   1009 
   1010 	switch (ntohs(eh_type)) {
   1011 #ifdef INET
   1012 	case ETHERTYPE_IP:
   1013 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1014 		if (ip->ip_p != IPPROTO_TCP)
   1015 			return (ENXIO);
   1016 		ip->ip_sum = 0;
   1017 		ip_hlen = ip->ip_hl << 2;
   1018 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1019 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1020 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1021 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1022 		/* Tell transmit desc to also do IPv4 checksum. */
   1023 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1024 		break;
   1025 #endif
   1026 #ifdef INET6
   1027 	case ETHERTYPE_IPV6:
   1028 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1029 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1030 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1031 			return (ENXIO);
   1032 		ip_hlen = sizeof(struct ip6_hdr);
   1033 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1034 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1035 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1036 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1037 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1038 		break;
   1039 #endif
   1040 	default:
   1041 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1042 		    __func__, ntohs(eh_type));
   1043 		break;
   1044 	}
   1045 
   1046 	ctxd = txr->next_avail_desc;
   1047 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1048 
   1049 	tcp_hlen = th->th_off << 2;
   1050 
   1051 	/* This is used in the transmit desc in encap */
   1052 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1053 
   1054 	/* VLAN MACLEN IPLEN */
   1055 	if (vlan_has_tag(mp)) {
   1056 		vtag = htole16(vlan_get_tag(mp));
   1057 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1058 	}
   1059 
   1060 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1061 	vlan_macip_lens |= ip_hlen;
   1062 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1063 
   1064 	/* ADV DTYPE TUCMD */
   1065 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1066 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1067 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1068 
   1069 	/* MSS L4LEN IDX */
   1070 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1071 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1072 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1073 
   1074 	TXD->seqnum_seed = htole32(0);
   1075 
   1076 	if (++ctxd == txr->num_desc)
   1077 		ctxd = 0;
   1078 
   1079 	txr->tx_avail--;
   1080 	txr->next_avail_desc = ctxd;
   1081 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1082 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1083 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1084 	++txr->tso_tx.ev_count;
   1085 
   1086 	return (0);
   1087 } /* ixgbe_tso_setup */
   1088 
   1089 
   1090 /************************************************************************
   1091  * ixgbe_txeof
   1092  *
   1093  *   Examine each tx_buffer in the used queue. If the hardware is done
   1094  *   processing the packet then free associated resources. The
   1095  *   tx_buffer is put back on the free queue.
   1096  ************************************************************************/
   1097 bool
   1098 ixgbe_txeof(struct tx_ring *txr)
   1099 {
   1100 	struct adapter		*adapter = txr->adapter;
   1101 	struct ifnet		*ifp = adapter->ifp;
   1102 	struct ixgbe_tx_buf	*buf;
   1103 	union ixgbe_adv_tx_desc *txd;
   1104 	u32			work, processed = 0;
   1105 	u32			limit = adapter->tx_process_limit;
   1106 
   1107 	KASSERT(mutex_owned(&txr->tx_mtx));
   1108 
   1109 #ifdef DEV_NETMAP
   1110 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1111 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1112 		struct netmap_adapter *na = NA(adapter->ifp);
   1113 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1114 		txd = txr->tx_base;
   1115 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1116 		    BUS_DMASYNC_POSTREAD);
   1117 		/*
   1118 		 * In netmap mode, all the work is done in the context
   1119 		 * of the client thread. Interrupt handlers only wake up
   1120 		 * clients, which may be sleeping on individual rings
   1121 		 * or on a global resource for all rings.
   1122 		 * To implement tx interrupt mitigation, we wake up the client
   1123 		 * thread roughly every half ring, even if the NIC interrupts
   1124 		 * more frequently. This is implemented as follows:
   1125 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1126 		 *   the slot that should wake up the thread (nkr_num_slots
   1127 		 *   means the user thread should not be woken up);
   1128 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1129 		 *   or the slot has the DD bit set.
   1130 		 */
   1131 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1132 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1133 			netmap_tx_irq(ifp, txr->me);
   1134 		}
   1135 		return false;
   1136 	}
   1137 #endif /* DEV_NETMAP */
   1138 
   1139 	if (txr->tx_avail == txr->num_desc) {
   1140 		txr->busy = 0;
   1141 		return false;
   1142 	}
   1143 
   1144 	/* Get work starting point */
   1145 	work = txr->next_to_clean;
   1146 	buf = &txr->tx_buffers[work];
   1147 	txd = &txr->tx_base[work];
   1148 	work -= txr->num_desc; /* The distance to ring end */
   1149 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1150 	    BUS_DMASYNC_POSTREAD);
   1151 
   1152 	do {
   1153 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1154 		if (eop == NULL) /* No work */
   1155 			break;
   1156 
   1157 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1158 			break;	/* I/O not complete */
   1159 
   1160 		if (buf->m_head) {
   1161 			txr->bytes += buf->m_head->m_pkthdr.len;
   1162 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1163 			    0, buf->m_head->m_pkthdr.len,
   1164 			    BUS_DMASYNC_POSTWRITE);
   1165 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1166 			m_freem(buf->m_head);
   1167 			buf->m_head = NULL;
   1168 		}
   1169 		buf->eop = NULL;
   1170 		txr->txr_no_space = false;
   1171 		++txr->tx_avail;
   1172 
   1173 		/* We clean the range if multi segment */
   1174 		while (txd != eop) {
   1175 			++txd;
   1176 			++buf;
   1177 			++work;
   1178 			/* wrap the ring? */
   1179 			if (__predict_false(!work)) {
   1180 				work -= txr->num_desc;
   1181 				buf = txr->tx_buffers;
   1182 				txd = txr->tx_base;
   1183 			}
   1184 			if (buf->m_head) {
   1185 				txr->bytes +=
   1186 				    buf->m_head->m_pkthdr.len;
   1187 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1188 				    buf->map,
   1189 				    0, buf->m_head->m_pkthdr.len,
   1190 				    BUS_DMASYNC_POSTWRITE);
   1191 				ixgbe_dmamap_unload(txr->txtag,
   1192 				    buf->map);
   1193 				m_freem(buf->m_head);
   1194 				buf->m_head = NULL;
   1195 			}
   1196 			++txr->tx_avail;
   1197 			buf->eop = NULL;
   1198 
   1199 		}
   1200 		++txr->packets;
   1201 		++processed;
   1202 		if_statinc(ifp, if_opackets);
   1203 
   1204 		/* Try the next packet */
   1205 		++txd;
   1206 		++buf;
   1207 		++work;
   1208 		/* reset with a wrap */
   1209 		if (__predict_false(!work)) {
   1210 			work -= txr->num_desc;
   1211 			buf = txr->tx_buffers;
   1212 			txd = txr->tx_base;
   1213 		}
   1214 		prefetch(txd);
   1215 	} while (__predict_true(--limit));
   1216 
   1217 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1218 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1219 
   1220 	work += txr->num_desc;
   1221 	txr->next_to_clean = work;
   1222 
   1223 	/*
   1224 	 * Queue Hang detection, we know there's
   1225 	 * work outstanding or the first return
   1226 	 * would have been taken, so increment busy
   1227 	 * if nothing managed to get cleaned, then
   1228 	 * in local_timer it will be checked and
   1229 	 * marked as HUNG if it exceeds a MAX attempt.
   1230 	 */
   1231 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1232 		++txr->busy;
   1233 	/*
   1234 	 * If anything gets cleaned we reset state to 1,
   1235 	 * note this will turn off HUNG if its set.
   1236 	 */
   1237 	if (processed)
   1238 		txr->busy = 1;
   1239 
   1240 	if (txr->tx_avail == txr->num_desc)
   1241 		txr->busy = 0;
   1242 
   1243 	return ((limit > 0) ? false : true);
   1244 } /* ixgbe_txeof */
   1245 
   1246 /************************************************************************
   1247  * ixgbe_rsc_count
   1248  *
   1249  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1250  ************************************************************************/
   1251 static inline u32
   1252 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1253 {
   1254 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1255 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1256 } /* ixgbe_rsc_count */
   1257 
   1258 /************************************************************************
   1259  * ixgbe_setup_hw_rsc
   1260  *
   1261  *   Initialize Hardware RSC (LRO) feature on 82599
   1262  *   for an RX ring, this is toggled by the LRO capability
   1263  *   even though it is transparent to the stack.
   1264  *
   1265  *   NOTE: Since this HW feature only works with IPv4 and
   1266  *         testing has shown soft LRO to be as effective,
   1267  *         this feature will be disabled by default.
   1268  ************************************************************************/
   1269 static void
   1270 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1271 {
   1272 	struct	adapter  *adapter = rxr->adapter;
   1273 	struct	ixgbe_hw *hw = &adapter->hw;
   1274 	u32              rscctrl, rdrxctl;
   1275 
   1276 	/* If turning LRO/RSC off we need to disable it */
   1277 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1278 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1279 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1280 		return;
   1281 	}
   1282 
   1283 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1284 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1285 #ifdef DEV_NETMAP
   1286 	/* Always strip CRC unless Netmap disabled it */
   1287 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1288 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1289 	    ix_crcstrip)
   1290 #endif /* DEV_NETMAP */
   1291 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1292 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1293 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1294 
   1295 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1296 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1297 	/*
   1298 	 * Limit the total number of descriptors that
   1299 	 * can be combined, so it does not exceed 64K
   1300 	 */
   1301 	if (rxr->mbuf_sz == MCLBYTES)
   1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1303 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1305 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1306 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1307 	else  /* Using 16K cluster */
   1308 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1309 
   1310 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1311 
   1312 	/* Enable TCP header recognition */
   1313 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1314 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1315 
   1316 	/* Disable RSC for ACK packets */
   1317 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1318 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1319 
   1320 	rxr->hw_rsc = TRUE;
   1321 } /* ixgbe_setup_hw_rsc */
   1322 
   1323 /************************************************************************
   1324  * ixgbe_refresh_mbufs
   1325  *
   1326  *   Refresh mbuf buffers for RX descriptor rings
   1327  *    - now keeps its own state so discards due to resource
   1328  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1329  *      it just returns, keeping its placeholder, thus it can simply
   1330  *      be recalled to try again.
   1331  *
   1332  *   XXX NetBSD TODO:
   1333  *    - The ixgbe_rxeof() function always preallocates mbuf cluster,
   1334  *      so the ixgbe_refresh_mbufs() function can be simplified.
   1335  *
   1336  ************************************************************************/
   1337 static void
   1338 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1339 {
   1340 	struct adapter      *adapter = rxr->adapter;
   1341 	struct ixgbe_rx_buf *rxbuf;
   1342 	struct mbuf         *mp;
   1343 	int                 i, error;
   1344 	bool                refreshed = false;
   1345 
   1346 	i = rxr->next_to_refresh;
   1347 	/* next_to_refresh points to the previous one */
   1348 	if (++i == rxr->num_desc)
   1349 		i = 0;
   1350 
   1351 	while (i != limit) {
   1352 		rxbuf = &rxr->rx_buffers[i];
   1353 		if (__predict_false(rxbuf->buf == NULL)) {
   1354 			mp = ixgbe_getcl();
   1355 			if (mp == NULL) {
   1356 				rxr->no_mbuf.ev_count++;
   1357 				goto update;
   1358 			}
   1359 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1360 			IXGBE_M_ADJ(adapter, rxr, mp);
   1361 		} else
   1362 			mp = rxbuf->buf;
   1363 
   1364 		/* If we're dealing with an mbuf that was copied rather
   1365 		 * than replaced, there's no need to go through busdma.
   1366 		 */
   1367 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1368 			/* Get the memory mapping */
   1369 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1370 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1371 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1372 			if (__predict_false(error != 0)) {
   1373 				device_printf(adapter->dev, "Refresh mbufs: "
   1374 				    "payload dmamap load failure - %d\n",
   1375 				    error);
   1376 				m_free(mp);
   1377 				rxbuf->buf = NULL;
   1378 				goto update;
   1379 			}
   1380 			rxbuf->buf = mp;
   1381 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1382 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1383 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1384 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1385 		} else {
   1386 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1387 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1388 		}
   1389 
   1390 		refreshed = true;
   1391 		/* next_to_refresh points to the previous one */
   1392 		rxr->next_to_refresh = i;
   1393 		if (++i == rxr->num_desc)
   1394 			i = 0;
   1395 	}
   1396 
   1397 update:
   1398 	if (refreshed) /* Update hardware tail index */
   1399 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1400 
   1401 	return;
   1402 } /* ixgbe_refresh_mbufs */
   1403 
   1404 /************************************************************************
   1405  * ixgbe_allocate_receive_buffers
   1406  *
   1407  *   Allocate memory for rx_buffer structures. Since we use one
   1408  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1409  *   that we'll need is equal to the number of receive descriptors
   1410  *   that we've allocated.
   1411  ************************************************************************/
   1412 static int
   1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1414 {
   1415 	struct adapter      *adapter = rxr->adapter;
   1416 	device_t            dev = adapter->dev;
   1417 	struct ixgbe_rx_buf *rxbuf;
   1418 	int                 bsize, error;
   1419 
   1420 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1421 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1422 
   1423 	error = ixgbe_dma_tag_create(
   1424 	         /*      parent */ adapter->osdep.dmat,
   1425 	         /*   alignment */ 1,
   1426 	         /*      bounds */ 0,
   1427 	         /*     maxsize */ MJUM16BYTES,
   1428 	         /*   nsegments */ 1,
   1429 	         /*  maxsegsize */ MJUM16BYTES,
   1430 	         /*       flags */ 0,
   1431 	                           &rxr->ptag);
   1432 	if (error != 0) {
   1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1434 		goto fail;
   1435 	}
   1436 
   1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1438 		rxbuf = &rxr->rx_buffers[i];
   1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1440 		if (error) {
   1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1442 			goto fail;
   1443 		}
   1444 	}
   1445 
   1446 	return (0);
   1447 
   1448 fail:
   1449 	/* Frees all, but can handle partial completion */
   1450 	ixgbe_free_receive_structures(adapter);
   1451 
   1452 	return (error);
   1453 } /* ixgbe_allocate_receive_buffers */
   1454 
   1455 /************************************************************************
   1456  * ixgbe_free_receive_ring
   1457  ************************************************************************/
   1458 static void
   1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1460 {
   1461 	for (int i = 0; i < rxr->num_desc; i++) {
   1462 		ixgbe_rx_discard(rxr, i);
   1463 	}
   1464 } /* ixgbe_free_receive_ring */
   1465 
   1466 /************************************************************************
   1467  * ixgbe_setup_receive_ring
   1468  *
   1469  *   Initialize a receive ring and its buffers.
   1470  ************************************************************************/
   1471 static int
   1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1473 {
   1474 	struct adapter        *adapter;
   1475 	struct ixgbe_rx_buf   *rxbuf;
   1476 #ifdef LRO
   1477 	struct ifnet          *ifp;
   1478 	struct lro_ctrl       *lro = &rxr->lro;
   1479 #endif /* LRO */
   1480 #ifdef DEV_NETMAP
   1481 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1482 	struct netmap_slot    *slot;
   1483 #endif /* DEV_NETMAP */
   1484 	int                   rsize, error = 0;
   1485 
   1486 	adapter = rxr->adapter;
   1487 #ifdef LRO
   1488 	ifp = adapter->ifp;
   1489 #endif /* LRO */
   1490 
   1491 	/* Clear the ring contents */
   1492 	IXGBE_RX_LOCK(rxr);
   1493 
   1494 #ifdef DEV_NETMAP
   1495 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1497 #endif /* DEV_NETMAP */
   1498 
   1499 	rsize = roundup2(adapter->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	/* Now replenish the mbufs */
   1509 	for (int j = 0; j != rxr->num_desc; ++j) {
   1510 		struct mbuf *mp;
   1511 
   1512 		rxbuf = &rxr->rx_buffers[j];
   1513 
   1514 #ifdef DEV_NETMAP
   1515 		/*
   1516 		 * In netmap mode, fill the map and set the buffer
   1517 		 * address in the NIC ring, considering the offset
   1518 		 * between the netmap and NIC rings (see comment in
   1519 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1520 		 * an mbuf, so end the block with a continue;
   1521 		 */
   1522 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1523 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1524 			uint64_t paddr;
   1525 			void *addr;
   1526 
   1527 			addr = PNMB(na, slot + sj, &paddr);
   1528 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1529 			/* Update descriptor and the cached value */
   1530 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1531 			rxbuf->addr = htole64(paddr);
   1532 			continue;
   1533 		}
   1534 #endif /* DEV_NETMAP */
   1535 
   1536 		rxbuf->flags = 0;
   1537 		rxbuf->buf = ixgbe_getcl();
   1538 		if (rxbuf->buf == NULL) {
   1539 			rxr->no_mbuf.ev_count++;
   1540 			error = ENOBUFS;
   1541 			goto fail;
   1542 		}
   1543 		mp = rxbuf->buf;
   1544 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1545 		IXGBE_M_ADJ(adapter, rxr, mp);
   1546 		/* Get the memory mapping */
   1547 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1548 		    mp, BUS_DMA_NOWAIT);
   1549 		if (error != 0) {
   1550 			/*
   1551 			 * Clear this entry for later cleanup in
   1552 			 * ixgbe_discard() which is called via
   1553 			 * ixgbe_free_receive_ring().
   1554 			 */
   1555 			m_freem(mp);
   1556 			rxbuf->buf = NULL;
   1557 			goto fail;
   1558 		}
   1559 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1560 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1561 		/* Update the descriptor and the cached value */
   1562 		rxr->rx_base[j].read.pkt_addr =
   1563 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1564 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1565 	}
   1566 
   1567 	/* Setup our descriptor indices */
   1568 	rxr->next_to_check = 0;
   1569 	rxr->next_to_refresh = adapter->num_rx_desc - 1; /* Fully allocated */
   1570 	rxr->lro_enabled = FALSE;
   1571 	rxr->discard_multidesc = false;
   1572 	rxr->rx_copies.ev_count = 0;
   1573 #if 0 /* NetBSD */
   1574 	rxr->rx_bytes.ev_count = 0;
   1575 #if 1	/* Fix inconsistency */
   1576 	rxr->rx_packets.ev_count = 0;
   1577 #endif
   1578 #endif
   1579 	rxr->vtag_strip = FALSE;
   1580 
   1581 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1582 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1583 
   1584 	/*
   1585 	 * Now set up the LRO interface
   1586 	 */
   1587 	if (ixgbe_rsc_enable)
   1588 		ixgbe_setup_hw_rsc(rxr);
   1589 #ifdef LRO
   1590 	else if (ifp->if_capenable & IFCAP_LRO) {
   1591 		device_t dev = adapter->dev;
   1592 		int err = tcp_lro_init(lro);
   1593 		if (err) {
   1594 			device_printf(dev, "LRO Initialization failed!\n");
   1595 			goto fail;
   1596 		}
   1597 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1598 		rxr->lro_enabled = TRUE;
   1599 		lro->ifp = adapter->ifp;
   1600 	}
   1601 #endif /* LRO */
   1602 
   1603 	IXGBE_RX_UNLOCK(rxr);
   1604 
   1605 	return (0);
   1606 
   1607 fail:
   1608 	ixgbe_free_receive_ring(rxr);
   1609 	IXGBE_RX_UNLOCK(rxr);
   1610 
   1611 	return (error);
   1612 } /* ixgbe_setup_receive_ring */
   1613 
   1614 /************************************************************************
   1615  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1616  ************************************************************************/
   1617 int
   1618 ixgbe_setup_receive_structures(struct adapter *adapter)
   1619 {
   1620 	struct rx_ring *rxr = adapter->rx_rings;
   1621 	int            j;
   1622 
   1623 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1624 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1625 		if (ixgbe_setup_receive_ring(rxr))
   1626 			goto fail;
   1627 
   1628 	return (0);
   1629 fail:
   1630 	/*
   1631 	 * Free RX buffers allocated so far, we will only handle
   1632 	 * the rings that completed, the failing case will have
   1633 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1634 	 */
   1635 	for (int i = 0; i < j; ++i) {
   1636 		rxr = &adapter->rx_rings[i];
   1637 		IXGBE_RX_LOCK(rxr);
   1638 		ixgbe_free_receive_ring(rxr);
   1639 		IXGBE_RX_UNLOCK(rxr);
   1640 	}
   1641 
   1642 	return (ENOBUFS);
   1643 } /* ixgbe_setup_receive_structures */
   1644 
   1645 
   1646 /************************************************************************
   1647  * ixgbe_free_receive_structures - Free all receive rings.
   1648  ************************************************************************/
   1649 void
   1650 ixgbe_free_receive_structures(struct adapter *adapter)
   1651 {
   1652 	struct rx_ring *rxr = adapter->rx_rings;
   1653 
   1654 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1655 
   1656 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1657 		ixgbe_free_receive_buffers(rxr);
   1658 #ifdef LRO
   1659 		/* Free LRO memory */
   1660 		tcp_lro_free(&rxr->lro);
   1661 #endif /* LRO */
   1662 		/* Free the ring memory as well */
   1663 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1664 		IXGBE_RX_LOCK_DESTROY(rxr);
   1665 	}
   1666 
   1667 	free(adapter->rx_rings, M_DEVBUF);
   1668 } /* ixgbe_free_receive_structures */
   1669 
   1670 
   1671 /************************************************************************
   1672  * ixgbe_free_receive_buffers - Free receive ring data structures
   1673  ************************************************************************/
   1674 static void
   1675 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1676 {
   1677 	struct adapter      *adapter = rxr->adapter;
   1678 	struct ixgbe_rx_buf *rxbuf;
   1679 
   1680 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1681 
   1682 	/* Cleanup any existing buffers */
   1683 	if (rxr->rx_buffers != NULL) {
   1684 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1685 			rxbuf = &rxr->rx_buffers[i];
   1686 			ixgbe_rx_discard(rxr, i);
   1687 			if (rxbuf->pmap != NULL) {
   1688 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1689 				rxbuf->pmap = NULL;
   1690 			}
   1691 		}
   1692 
   1693 		if (rxr->rx_buffers != NULL) {
   1694 			free(rxr->rx_buffers, M_DEVBUF);
   1695 			rxr->rx_buffers = NULL;
   1696 		}
   1697 	}
   1698 
   1699 	if (rxr->ptag != NULL) {
   1700 		ixgbe_dma_tag_destroy(rxr->ptag);
   1701 		rxr->ptag = NULL;
   1702 	}
   1703 
   1704 	return;
   1705 } /* ixgbe_free_receive_buffers */
   1706 
   1707 /************************************************************************
   1708  * ixgbe_rx_input
   1709  ************************************************************************/
   1710 static __inline void
   1711 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1712     u32 ptype)
   1713 {
   1714 	struct adapter	*adapter = ifp->if_softc;
   1715 
   1716 #ifdef LRO
   1717 	struct ethercom *ec = &adapter->osdep.ec;
   1718 
   1719 	/*
   1720 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1721 	 * should be computed by hardware. Also it should not have VLAN tag in
   1722 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1723 	 */
   1724         if (rxr->lro_enabled &&
   1725             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1726             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1727             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1728             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1729             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1730             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1731             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1732             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1733                 /*
   1734                  * Send to the stack if:
   1735                  **  - LRO not enabled, or
   1736                  **  - no LRO resources, or
   1737                  **  - lro enqueue fails
   1738                  */
   1739                 if (rxr->lro.lro_cnt != 0)
   1740                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1741                                 return;
   1742         }
   1743 #endif /* LRO */
   1744 
   1745 	if_percpuq_enqueue(adapter->ipq, m);
   1746 } /* ixgbe_rx_input */
   1747 
   1748 /************************************************************************
   1749  * ixgbe_rx_discard
   1750  ************************************************************************/
   1751 static __inline void
   1752 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1753 {
   1754 	struct ixgbe_rx_buf *rbuf;
   1755 
   1756 	rbuf = &rxr->rx_buffers[i];
   1757 
   1758 	/*
   1759 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1760 	 * so its easier to just free the existing mbufs and take the normal
   1761 	 * refresh path to get new buffers and mapping.
   1762 	 */
   1763 
   1764 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1765 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1766 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1767 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1768 		m_freem(rbuf->fmp);
   1769 		rbuf->fmp = NULL;
   1770 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1771 	} else if (rbuf->buf) {
   1772 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1773 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1774 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1775 		m_free(rbuf->buf);
   1776 		rbuf->buf = NULL;
   1777 	}
   1778 
   1779 	rbuf->flags = 0;
   1780 
   1781 	return;
   1782 } /* ixgbe_rx_discard */
   1783 
   1784 
   1785 /************************************************************************
   1786  * ixgbe_rxeof
   1787  *
   1788  *   Executes in interrupt context. It replenishes the
   1789  *   mbufs in the descriptor and sends data which has
   1790  *   been dma'ed into host memory to upper layer.
   1791  *
   1792  *   Return TRUE for more work, FALSE for all clean.
   1793  ************************************************************************/
   1794 bool
   1795 ixgbe_rxeof(struct ix_queue *que)
   1796 {
   1797 	struct adapter		*adapter = que->adapter;
   1798 	struct rx_ring		*rxr = que->rxr;
   1799 	struct ifnet		*ifp = adapter->ifp;
   1800 #ifdef LRO
   1801 	struct lro_ctrl		*lro = &rxr->lro;
   1802 #endif /* LRO */
   1803 	union ixgbe_adv_rx_desc	*cur;
   1804 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1805 	int			i, nextp, processed = 0;
   1806 	u32			staterr = 0;
   1807 	u32			loopcount = 0;
   1808 	u32			limit = adapter->rx_process_limit;
   1809 	bool			discard_multidesc = rxr->discard_multidesc;
   1810 #ifdef RSS
   1811 	u16			pkt_info;
   1812 #endif
   1813 
   1814 	IXGBE_RX_LOCK(rxr);
   1815 
   1816 #ifdef DEV_NETMAP
   1817 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1818 		/* Same as the txeof routine: wakeup clients on intr. */
   1819 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1820 			IXGBE_RX_UNLOCK(rxr);
   1821 			return (FALSE);
   1822 		}
   1823 	}
   1824 #endif /* DEV_NETMAP */
   1825 
   1826 	/*
   1827 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1828 	 * true, continue processing to not to send broken packet to the upper
   1829 	 * layer.
   1830 	 */
   1831 	for (i = rxr->next_to_check;
   1832 	     (loopcount < limit) || (discard_multidesc == true);) {
   1833 
   1834 		struct mbuf *sendmp, *mp;
   1835 		struct mbuf *newmp;
   1836 		u32         rsc, ptype;
   1837 		u16         len;
   1838 		u16         vtag = 0;
   1839 		bool        eop;
   1840 		bool        discard = false;
   1841 
   1842 		/* Sync the ring. */
   1843 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1844 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1845 
   1846 		cur = &rxr->rx_base[i];
   1847 		staterr = le32toh(cur->wb.upper.status_error);
   1848 #ifdef RSS
   1849 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1850 #endif
   1851 
   1852 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1853 			break;
   1854 
   1855 		loopcount++;
   1856 		sendmp = newmp = NULL;
   1857 		nbuf = NULL;
   1858 		rsc = 0;
   1859 		cur->wb.upper.status_error = 0;
   1860 		rbuf = &rxr->rx_buffers[i];
   1861 		mp = rbuf->buf;
   1862 
   1863 		len = le16toh(cur->wb.upper.length);
   1864 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1865 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1866 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1867 
   1868 		/* Make sure bad packets are discarded */
   1869 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1870 #if __FreeBSD_version >= 1100036
   1871 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1872 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1873 #endif
   1874 			rxr->rx_discarded.ev_count++;
   1875 			ixgbe_rx_discard(rxr, i);
   1876 			discard_multidesc = false;
   1877 			goto next_desc;
   1878 		}
   1879 
   1880 		if (__predict_false(discard_multidesc))
   1881 			discard = true;
   1882 		else {
   1883 			/* Pre-alloc new mbuf. */
   1884 
   1885 			if ((rbuf->fmp == NULL) &&
   1886 			    eop && (len <= adapter->rx_copy_len)) {
   1887 				/* For short packet. See below. */
   1888 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1889 				if (__predict_false(sendmp == NULL)) {
   1890 					rxr->no_mbuf.ev_count++;
   1891 					discard = true;
   1892 				}
   1893 			} else {
   1894 				/* For long packet. */
   1895 				newmp = ixgbe_getcl();
   1896 				if (__predict_false(newmp == NULL)) {
   1897 					rxr->no_mbuf.ev_count++;
   1898 					discard = true;
   1899 				}
   1900 			}
   1901 		}
   1902 
   1903 		if (__predict_false(discard)) {
   1904 			/*
   1905 			 * Descriptor initialization is already done by the
   1906 			 * above code (cur->wb.upper.status_error = 0).
   1907 			 * So, we can reuse current rbuf->buf for new packet.
   1908 			 *
   1909 			 * Rewrite the buffer addr, see comment in
   1910 			 * ixgbe_rx_discard().
   1911 			 */
   1912 			cur->read.pkt_addr = rbuf->addr;
   1913 			m_freem(rbuf->fmp);
   1914 			rbuf->fmp = NULL;
   1915 			if (!eop) {
   1916 				/* Discard the entire packet. */
   1917 				discard_multidesc = true;
   1918 			} else
   1919 				discard_multidesc = false;
   1920 			goto next_desc;
   1921 		}
   1922 		discard_multidesc = false;
   1923 
   1924 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1925 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1926 
   1927 		/*
   1928 		 * On 82599 which supports a hardware
   1929 		 * LRO (called HW RSC), packets need
   1930 		 * not be fragmented across sequential
   1931 		 * descriptors, rather the next descriptor
   1932 		 * is indicated in bits of the descriptor.
   1933 		 * This also means that we might proceses
   1934 		 * more than one packet at a time, something
   1935 		 * that has never been true before, it
   1936 		 * required eliminating global chain pointers
   1937 		 * in favor of what we are doing here.  -jfv
   1938 		 */
   1939 		if (!eop) {
   1940 			/*
   1941 			 * Figure out the next descriptor
   1942 			 * of this frame.
   1943 			 */
   1944 			if (rxr->hw_rsc == TRUE) {
   1945 				rsc = ixgbe_rsc_count(cur);
   1946 				rxr->rsc_num += (rsc - 1);
   1947 			}
   1948 			if (rsc) { /* Get hardware index */
   1949 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1950 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1951 			} else { /* Just sequential */
   1952 				nextp = i + 1;
   1953 				if (nextp == adapter->num_rx_desc)
   1954 					nextp = 0;
   1955 			}
   1956 			nbuf = &rxr->rx_buffers[nextp];
   1957 			prefetch(nbuf);
   1958 		}
   1959 		/*
   1960 		 * Rather than using the fmp/lmp global pointers
   1961 		 * we now keep the head of a packet chain in the
   1962 		 * buffer struct and pass this along from one
   1963 		 * descriptor to the next, until we get EOP.
   1964 		 */
   1965 		/*
   1966 		 * See if there is a stored head
   1967 		 * that determines what we are
   1968 		 */
   1969 		if (rbuf->fmp != NULL) {
   1970 			/* Secondary frag */
   1971 			sendmp = rbuf->fmp;
   1972 
   1973 			/* Update new (used in future) mbuf */
   1974 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   1975 			IXGBE_M_ADJ(adapter, rxr, newmp);
   1976 			rbuf->buf = newmp;
   1977 			rbuf->fmp = NULL;
   1978 
   1979 			/* For secondary frag */
   1980 			mp->m_len = len;
   1981 			mp->m_flags &= ~M_PKTHDR;
   1982 
   1983 			/* For sendmp */
   1984 			sendmp->m_pkthdr.len += mp->m_len;
   1985 		} else {
   1986 			/*
   1987 			 * It's the first segment of a multi descriptor
   1988 			 * packet or a single segment which contains a full
   1989 			 * packet.
   1990 			 */
   1991 
   1992 			if (eop && (len <= adapter->rx_copy_len)) {
   1993 				/*
   1994 				 * Optimize.  This might be a small packet, may
   1995 				 * be just a TCP ACK. Copy into a new mbuf, and
   1996 				 * Leave the old mbuf+cluster for re-use.
   1997 				 */
   1998 				sendmp->m_data += ETHER_ALIGN;
   1999 				memcpy(mtod(sendmp, void *),
   2000 				    mtod(mp, void *), len);
   2001 				rxr->rx_copies.ev_count++;
   2002 				rbuf->flags |= IXGBE_RX_COPY;
   2003 			} else {
   2004 				/* Non short packet */
   2005 
   2006 				/* Update new (used in future) mbuf */
   2007 				newmp->m_pkthdr.len = newmp->m_len
   2008 				    = rxr->mbuf_sz;
   2009 				IXGBE_M_ADJ(adapter, rxr, newmp);
   2010 				rbuf->buf = newmp;
   2011 				rbuf->fmp = NULL;
   2012 
   2013 				/* For sendmp */
   2014 				sendmp = mp;
   2015 			}
   2016 
   2017 			/* first desc of a non-ps chain */
   2018 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2019 		}
   2020 		++processed;
   2021 
   2022 		/* Pass the head pointer on */
   2023 		if (eop == 0) {
   2024 			nbuf->fmp = sendmp;
   2025 			sendmp = NULL;
   2026 			mp->m_next = nbuf->buf;
   2027 		} else { /* Sending this frame */
   2028 			m_set_rcvif(sendmp, ifp);
   2029 			++rxr->packets;
   2030 			rxr->rx_packets.ev_count++;
   2031 			/* capture data for AIM */
   2032 			rxr->bytes += sendmp->m_pkthdr.len;
   2033 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   2034 			/* Process vlan info */
   2035 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2036 				vtag = le16toh(cur->wb.upper.vlan);
   2037 			if (vtag) {
   2038 				vlan_set_tag(sendmp, vtag);
   2039 			}
   2040 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2041 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2042 				   &adapter->stats.pf);
   2043 			}
   2044 
   2045 #if 0 /* FreeBSD */
   2046 			/*
   2047 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2048 			 * and never cleared. This means we have RSS hash
   2049 			 * available to be used.
   2050 			 */
   2051 			if (adapter->num_queues > 1) {
   2052 				sendmp->m_pkthdr.flowid =
   2053 				    le32toh(cur->wb.lower.hi_dword.rss);
   2054 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2055 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2056 					M_HASHTYPE_SET(sendmp,
   2057 					    M_HASHTYPE_RSS_IPV4);
   2058 					break;
   2059 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2060 					M_HASHTYPE_SET(sendmp,
   2061 					    M_HASHTYPE_RSS_TCP_IPV4);
   2062 					break;
   2063 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2064 					M_HASHTYPE_SET(sendmp,
   2065 					    M_HASHTYPE_RSS_IPV6);
   2066 					break;
   2067 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2068 					M_HASHTYPE_SET(sendmp,
   2069 					    M_HASHTYPE_RSS_TCP_IPV6);
   2070 					break;
   2071 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2072 					M_HASHTYPE_SET(sendmp,
   2073 					    M_HASHTYPE_RSS_IPV6_EX);
   2074 					break;
   2075 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2076 					M_HASHTYPE_SET(sendmp,
   2077 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2078 					break;
   2079 #if __FreeBSD_version > 1100000
   2080 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2081 					M_HASHTYPE_SET(sendmp,
   2082 					    M_HASHTYPE_RSS_UDP_IPV4);
   2083 					break;
   2084 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2085 					M_HASHTYPE_SET(sendmp,
   2086 					    M_HASHTYPE_RSS_UDP_IPV6);
   2087 					break;
   2088 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2089 					M_HASHTYPE_SET(sendmp,
   2090 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2091 					break;
   2092 #endif
   2093 				default:
   2094 					M_HASHTYPE_SET(sendmp,
   2095 					    M_HASHTYPE_OPAQUE_HASH);
   2096 				}
   2097 			} else {
   2098 				sendmp->m_pkthdr.flowid = que->msix;
   2099 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2100 			}
   2101 #endif
   2102 		}
   2103 next_desc:
   2104 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2105 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2106 
   2107 		/* Advance our pointers to the next descriptor. */
   2108 		if (++i == rxr->num_desc)
   2109 			i = 0;
   2110 		rxr->next_to_check = i;
   2111 
   2112 		/* Now send to the stack or do LRO */
   2113 		if (sendmp != NULL)
   2114 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2115 
   2116 		/* Every 8 descriptors we go to refresh mbufs */
   2117 		if (processed == 8) {
   2118 			ixgbe_refresh_mbufs(rxr, i);
   2119 			processed = 0;
   2120 		}
   2121 	}
   2122 
   2123 	/* Save the current status */
   2124 	rxr->discard_multidesc = discard_multidesc;
   2125 
   2126 	/* Refresh any remaining buf structs */
   2127 	if (ixgbe_rx_unrefreshed(rxr))
   2128 		ixgbe_refresh_mbufs(rxr, i);
   2129 
   2130 	IXGBE_RX_UNLOCK(rxr);
   2131 
   2132 #ifdef LRO
   2133 	/*
   2134 	 * Flush any outstanding LRO work
   2135 	 */
   2136 	tcp_lro_flush_all(lro);
   2137 #endif /* LRO */
   2138 
   2139 	/*
   2140 	 * Still have cleaning to do?
   2141 	 */
   2142 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2143 		return (TRUE);
   2144 
   2145 	return (FALSE);
   2146 } /* ixgbe_rxeof */
   2147 
   2148 
   2149 /************************************************************************
   2150  * ixgbe_rx_checksum
   2151  *
   2152  *   Verify that the hardware indicated that the checksum is valid.
   2153  *   Inform the stack about the status of checksum so that stack
   2154  *   doesn't spend time verifying the checksum.
   2155  ************************************************************************/
   2156 static void
   2157 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2158     struct ixgbe_hw_stats *stats)
   2159 {
   2160 	u16  status = (u16)staterr;
   2161 	u8   errors = (u8)(staterr >> 24);
   2162 #if 0
   2163 	bool sctp = false;
   2164 
   2165 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2166 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2167 		sctp = true;
   2168 #endif
   2169 
   2170 	/* IPv4 checksum */
   2171 	if (status & IXGBE_RXD_STAT_IPCS) {
   2172 		stats->ipcs.ev_count++;
   2173 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2174 			/* IP Checksum Good */
   2175 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2176 		} else {
   2177 			stats->ipcs_bad.ev_count++;
   2178 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2179 		}
   2180 	}
   2181 	/* TCP/UDP/SCTP checksum */
   2182 	if (status & IXGBE_RXD_STAT_L4CS) {
   2183 		stats->l4cs.ev_count++;
   2184 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2185 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2186 			mp->m_pkthdr.csum_flags |= type;
   2187 		} else {
   2188 			stats->l4cs_bad.ev_count++;
   2189 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2190 		}
   2191 	}
   2192 } /* ixgbe_rx_checksum */
   2193 
   2194 /************************************************************************
   2195  * ixgbe_dma_malloc
   2196  ************************************************************************/
   2197 int
   2198 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2199 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2200 {
   2201 	device_t dev = adapter->dev;
   2202 	int      r, rsegs;
   2203 
   2204 	r = ixgbe_dma_tag_create(
   2205 	     /*      parent */ adapter->osdep.dmat,
   2206 	     /*   alignment */ DBA_ALIGN,
   2207 	     /*      bounds */ 0,
   2208 	     /*     maxsize */ size,
   2209 	     /*   nsegments */ 1,
   2210 	     /*  maxsegsize */ size,
   2211 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2212 			       &dma->dma_tag);
   2213 	if (r != 0) {
   2214 		aprint_error_dev(dev,
   2215 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2216 		    r);
   2217 		goto fail_0;
   2218 	}
   2219 
   2220 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2221 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2222 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2223 	if (r != 0) {
   2224 		aprint_error_dev(dev,
   2225 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2226 		goto fail_1;
   2227 	}
   2228 
   2229 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2230 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2231 	if (r != 0) {
   2232 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2233 		    __func__, r);
   2234 		goto fail_2;
   2235 	}
   2236 
   2237 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2238 	if (r != 0) {
   2239 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2240 		    __func__, r);
   2241 		goto fail_3;
   2242 	}
   2243 
   2244 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2245 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2246 	if (r != 0) {
   2247 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2248 		    __func__, r);
   2249 		goto fail_4;
   2250 	}
   2251 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2252 	dma->dma_size = size;
   2253 	return 0;
   2254 fail_4:
   2255 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2256 fail_3:
   2257 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2258 fail_2:
   2259 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2260 fail_1:
   2261 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2262 fail_0:
   2263 
   2264 	return (r);
   2265 } /* ixgbe_dma_malloc */
   2266 
   2267 /************************************************************************
   2268  * ixgbe_dma_free
   2269  ************************************************************************/
   2270 void
   2271 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2272 {
   2273 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2274 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2275 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2276 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2277 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2278 } /* ixgbe_dma_free */
   2279 
   2280 
   2281 /************************************************************************
   2282  * ixgbe_allocate_queues
   2283  *
   2284  *   Allocate memory for the transmit and receive rings, and then
   2285  *   the descriptors associated with each, called only once at attach.
   2286  ************************************************************************/
   2287 int
   2288 ixgbe_allocate_queues(struct adapter *adapter)
   2289 {
   2290 	device_t	dev = adapter->dev;
   2291 	struct ix_queue	*que;
   2292 	struct tx_ring	*txr;
   2293 	struct rx_ring	*rxr;
   2294 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2295 	int             txconf = 0, rxconf = 0;
   2296 
   2297 	/* First, allocate the top level queue structs */
   2298 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2299 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2300 
   2301 	/* Second, allocate the TX ring struct memory */
   2302 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
   2303 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2304 
   2305 	/* Third, allocate the RX ring */
   2306 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2307 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2308 
   2309 	/* For the ring itself */
   2310 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2311 	    DBA_ALIGN);
   2312 
   2313 	/*
   2314 	 * Now set up the TX queues, txconf is needed to handle the
   2315 	 * possibility that things fail midcourse and we need to
   2316 	 * undo memory gracefully
   2317 	 */
   2318 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2319 		/* Set up some basics */
   2320 		txr = &adapter->tx_rings[i];
   2321 		txr->adapter = adapter;
   2322 		txr->txr_interq = NULL;
   2323 		/* In case SR-IOV is enabled, align the index properly */
   2324 #ifdef PCI_IOV
   2325 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2326 		    i);
   2327 #else
   2328 		txr->me = i;
   2329 #endif
   2330 		txr->num_desc = adapter->num_tx_desc;
   2331 
   2332 		/* Initialize the TX side lock */
   2333 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2334 
   2335 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2336 		    BUS_DMA_NOWAIT)) {
   2337 			aprint_error_dev(dev,
   2338 			    "Unable to allocate TX Descriptor memory\n");
   2339 			error = ENOMEM;
   2340 			goto err_tx_desc;
   2341 		}
   2342 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2343 		bzero((void *)txr->tx_base, tsize);
   2344 
   2345 		/* Now allocate transmit buffers for the ring */
   2346 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2347 			aprint_error_dev(dev,
   2348 			    "Critical Failure setting up transmit buffers\n");
   2349 			error = ENOMEM;
   2350 			goto err_tx_desc;
   2351 		}
   2352 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2353 			/* Allocate a buf ring */
   2354 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2355 			if (txr->txr_interq == NULL) {
   2356 				aprint_error_dev(dev,
   2357 				    "Critical Failure setting up buf ring\n");
   2358 				error = ENOMEM;
   2359 				goto err_tx_desc;
   2360 			}
   2361 		}
   2362 	}
   2363 
   2364 	/*
   2365 	 * Next the RX queues...
   2366 	 */
   2367 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2368 	    DBA_ALIGN);
   2369 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2370 		rxr = &adapter->rx_rings[i];
   2371 		/* Set up some basics */
   2372 		rxr->adapter = adapter;
   2373 #ifdef PCI_IOV
   2374 		/* In case SR-IOV is enabled, align the index properly */
   2375 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2376 		    i);
   2377 #else
   2378 		rxr->me = i;
   2379 #endif
   2380 		rxr->num_desc = adapter->num_rx_desc;
   2381 
   2382 		/* Initialize the RX side lock */
   2383 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2384 
   2385 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2386 		    BUS_DMA_NOWAIT)) {
   2387 			aprint_error_dev(dev,
   2388 			    "Unable to allocate RxDescriptor memory\n");
   2389 			error = ENOMEM;
   2390 			goto err_rx_desc;
   2391 		}
   2392 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2393 		bzero((void *)rxr->rx_base, rsize);
   2394 
   2395 		/* Allocate receive buffers for the ring */
   2396 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2397 			aprint_error_dev(dev,
   2398 			    "Critical Failure setting up receive buffers\n");
   2399 			error = ENOMEM;
   2400 			goto err_rx_desc;
   2401 		}
   2402 	}
   2403 
   2404 	/*
   2405 	 * Finally set up the queue holding structs
   2406 	 */
   2407 	for (int i = 0; i < adapter->num_queues; i++) {
   2408 		que = &adapter->queues[i];
   2409 		que->adapter = adapter;
   2410 		que->me = i;
   2411 		que->txr = &adapter->tx_rings[i];
   2412 		que->rxr = &adapter->rx_rings[i];
   2413 
   2414 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2415 		que->disabled_count = 0;
   2416 	}
   2417 
   2418 	return (0);
   2419 
   2420 err_rx_desc:
   2421 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2422 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2423 err_tx_desc:
   2424 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2425 		ixgbe_dma_free(adapter, &txr->txdma);
   2426 	free(adapter->rx_rings, M_DEVBUF);
   2427 	free(adapter->tx_rings, M_DEVBUF);
   2428 	free(adapter->queues, M_DEVBUF);
   2429 	return (error);
   2430 } /* ixgbe_allocate_queues */
   2431 
   2432 /************************************************************************
   2433  * ixgbe_free_queues
   2434  *
   2435  *   Free descriptors for the transmit and receive rings, and then
   2436  *   the memory associated with each.
   2437  ************************************************************************/
   2438 void
   2439 ixgbe_free_queues(struct adapter *adapter)
   2440 {
   2441 	struct ix_queue *que;
   2442 	int i;
   2443 
   2444 	ixgbe_free_transmit_structures(adapter);
   2445 	ixgbe_free_receive_structures(adapter);
   2446 	for (i = 0; i < adapter->num_queues; i++) {
   2447 		que = &adapter->queues[i];
   2448 		mutex_destroy(&que->dc_mtx);
   2449 	}
   2450 	free(adapter->queues, M_DEVBUF);
   2451 } /* ixgbe_free_queues */
   2452