Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.96
      1 /* $NetBSD: ix_txrx.c,v 1.96 2022/04/25 07:48:53 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.96 2022/04/25 07:48:53 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 /*
     89  * For Flow Director: this is the
     90  * number of TX packets we sample
     91  * for the filter pool, this means
     92  * every 20th packet will be probed.
     93  *
     94  * This feature can be disabled by
     95  * setting this to 0.
     96  */
     97 static int atr_sample_rate = 20;
     98 
     99 #define IXGBE_M_ADJ(adapter, rxr, mp)					\
    100 	if (adapter->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    101 		m_adj(mp, ETHER_ALIGN)
    102 
    103 /************************************************************************
    104  *  Local Function prototypes
    105  ************************************************************************/
    106 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    107 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    108 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    109 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    110 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    111                                        struct ixgbe_hw_stats *);
    112 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    113 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    114 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    115 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    116                                         struct mbuf *, u32 *, u32 *);
    117 static int           ixgbe_tso_setup(struct tx_ring *,
    118                                      struct mbuf *, u32 *, u32 *);
    119 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    120 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    121                                     struct mbuf *, u32);
    122 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    123                                       struct ixgbe_dma_alloc *, int);
    124 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    125 
    126 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    127 
    128 /************************************************************************
    129  * ixgbe_legacy_start_locked - Transmit entry point
    130  *
    131  *   Called by the stack to initiate a transmit.
    132  *   The driver will remain in this routine as long as there are
    133  *   packets to transmit and transmit resources are available.
    134  *   In case resources are not available, the stack is notified
    135  *   and the packet is requeued.
    136  ************************************************************************/
    137 int
    138 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    139 {
    140 	int rc;
    141 	struct mbuf    *m_head;
    142 	struct adapter *adapter = txr->adapter;
    143 
    144 	IXGBE_TX_LOCK_ASSERT(txr);
    145 
    146 	if (adapter->link_active != LINK_STATE_UP) {
    147 		/*
    148 		 * discard all packets buffered in IFQ to avoid
    149 		 * sending old packets at next link up timing.
    150 		 */
    151 		ixgbe_drain(ifp, txr);
    152 		return (ENETDOWN);
    153 	}
    154 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    155 		return (ENETDOWN);
    156 	if (txr->txr_no_space)
    157 		return (ENETDOWN);
    158 
    159 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    160 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    161 			break;
    162 
    163 		IFQ_POLL(&ifp->if_snd, m_head);
    164 		if (m_head == NULL)
    165 			break;
    166 
    167 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    168 			break;
    169 		}
    170 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    171 		if (rc != 0) {
    172 			m_freem(m_head);
    173 			continue;
    174 		}
    175 
    176 		/* Send a copy of the frame to the BPF listener */
    177 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    178 	}
    179 
    180 	return IXGBE_SUCCESS;
    181 } /* ixgbe_legacy_start_locked */
    182 
    183 /************************************************************************
    184  * ixgbe_legacy_start
    185  *
    186  *   Called by the stack, this always uses the first tx ring,
    187  *   and should not be used with multiqueue tx enabled.
    188  ************************************************************************/
    189 void
    190 ixgbe_legacy_start(struct ifnet *ifp)
    191 {
    192 	struct adapter *adapter = ifp->if_softc;
    193 	struct tx_ring *txr = adapter->tx_rings;
    194 
    195 	if (ifp->if_flags & IFF_RUNNING) {
    196 		IXGBE_TX_LOCK(txr);
    197 		ixgbe_legacy_start_locked(ifp, txr);
    198 		IXGBE_TX_UNLOCK(txr);
    199 	}
    200 } /* ixgbe_legacy_start */
    201 
    202 /************************************************************************
    203  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    204  *
    205  *   (if_transmit function)
    206  ************************************************************************/
    207 int
    208 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    209 {
    210 	struct adapter	*adapter = ifp->if_softc;
    211 	struct tx_ring	*txr;
    212 	int		i;
    213 #ifdef RSS
    214 	uint32_t bucket_id;
    215 #endif
    216 
    217 	/*
    218 	 * When doing RSS, map it to the same outbound queue
    219 	 * as the incoming flow would be mapped to.
    220 	 *
    221 	 * If everything is setup correctly, it should be the
    222 	 * same bucket that the current CPU we're on is.
    223 	 */
    224 #ifdef RSS
    225 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    226 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    227 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    228 		    &bucket_id) == 0)) {
    229 			i = bucket_id % adapter->num_queues;
    230 #ifdef IXGBE_DEBUG
    231 			if (bucket_id > adapter->num_queues)
    232 				if_printf(ifp,
    233 				    "bucket_id (%d) > num_queues (%d)\n",
    234 				    bucket_id, adapter->num_queues);
    235 #endif
    236 		} else
    237 			i = m->m_pkthdr.flowid % adapter->num_queues;
    238 	} else
    239 #endif /* 0 */
    240 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    241 
    242 	/* Check for a hung queue and pick alternative */
    243 	if (((1ULL << i) & adapter->active_queues) == 0)
    244 		i = ffs64(adapter->active_queues);
    245 
    246 	txr = &adapter->tx_rings[i];
    247 
    248 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    249 		m_freem(m);
    250 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    251 		return ENOBUFS;
    252 	}
    253 	if (IXGBE_TX_TRYLOCK(txr)) {
    254 		ixgbe_mq_start_locked(ifp, txr);
    255 		IXGBE_TX_UNLOCK(txr);
    256 	} else {
    257 		if (adapter->txrx_use_workqueue) {
    258 			u_int *enqueued;
    259 
    260 			/*
    261 			 * This function itself is not called in interrupt
    262 			 * context, however it can be called in fast softint
    263 			 * context right after receiving forwarding packets.
    264 			 * So, it is required to protect workqueue from twice
    265 			 * enqueuing when the machine uses both spontaneous
    266 			 * packets and forwarding packets.
    267 			 */
    268 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    269 			if (*enqueued == 0) {
    270 				*enqueued = 1;
    271 				percpu_putref(adapter->txr_wq_enqueued);
    272 				workqueue_enqueue(adapter->txr_wq,
    273 				    &txr->wq_cookie, curcpu());
    274 			} else
    275 				percpu_putref(adapter->txr_wq_enqueued);
    276 		} else {
    277 			kpreempt_disable();
    278 			softint_schedule(txr->txr_si);
    279 			kpreempt_enable();
    280 		}
    281 	}
    282 
    283 	return (0);
    284 } /* ixgbe_mq_start */
    285 
    286 /************************************************************************
    287  * ixgbe_mq_start_locked
    288  ************************************************************************/
    289 int
    290 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    291 {
    292 	struct mbuf    *next;
    293 	int            enqueued = 0, err = 0;
    294 
    295 	if (txr->adapter->link_active != LINK_STATE_UP) {
    296 		/*
    297 		 * discard all packets buffered in txr_interq to avoid
    298 		 * sending old packets at next link up timing.
    299 		 */
    300 		ixgbe_drain(ifp, txr);
    301 		return (ENETDOWN);
    302 	}
    303 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    304 		return (ENETDOWN);
    305 	if (txr->txr_no_space)
    306 		return (ENETDOWN);
    307 
    308 	/* Process the queue */
    309 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    310 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    311 			m_freem(next);
    312 			/* All errors are counted in ixgbe_xmit() */
    313 			break;
    314 		}
    315 		enqueued++;
    316 #if __FreeBSD_version >= 1100036
    317 		/*
    318 		 * Since we're looking at the tx ring, we can check
    319 		 * to see if we're a VF by examing our tail register
    320 		 * address.
    321 		 */
    322 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    323 		    (next->m_flags & M_MCAST))
    324 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    325 #endif
    326 		/* Send a copy of the frame to the BPF listener */
    327 		bpf_mtap(ifp, next, BPF_D_OUT);
    328 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    329 			break;
    330 	}
    331 
    332 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    333 		ixgbe_txeof(txr);
    334 
    335 	return (err);
    336 } /* ixgbe_mq_start_locked */
    337 
    338 /************************************************************************
    339  * ixgbe_deferred_mq_start
    340  *
    341  *   Called from a softint and workqueue (indirectly) to drain queued
    342  *   transmit packets.
    343  ************************************************************************/
    344 void
    345 ixgbe_deferred_mq_start(void *arg)
    346 {
    347 	struct tx_ring *txr = arg;
    348 	struct adapter *adapter = txr->adapter;
    349 	struct ifnet   *ifp = adapter->ifp;
    350 
    351 	IXGBE_TX_LOCK(txr);
    352 	if (pcq_peek(txr->txr_interq) != NULL)
    353 		ixgbe_mq_start_locked(ifp, txr);
    354 	IXGBE_TX_UNLOCK(txr);
    355 } /* ixgbe_deferred_mq_start */
    356 
    357 /************************************************************************
    358  * ixgbe_deferred_mq_start_work
    359  *
    360  *   Called from a workqueue to drain queued transmit packets.
    361  ************************************************************************/
    362 void
    363 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    364 {
    365 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    366 	struct adapter *adapter = txr->adapter;
    367 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    368 	*enqueued = 0;
    369 	percpu_putref(adapter->txr_wq_enqueued);
    370 
    371 	ixgbe_deferred_mq_start(txr);
    372 } /* ixgbe_deferred_mq_start */
    373 
    374 /************************************************************************
    375  * ixgbe_drain_all
    376  ************************************************************************/
    377 void
    378 ixgbe_drain_all(struct adapter *adapter)
    379 {
    380 	struct ifnet *ifp = adapter->ifp;
    381 	struct ix_queue *que = adapter->queues;
    382 
    383 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    384 		struct tx_ring  *txr = que->txr;
    385 
    386 		IXGBE_TX_LOCK(txr);
    387 		ixgbe_drain(ifp, txr);
    388 		IXGBE_TX_UNLOCK(txr);
    389 	}
    390 }
    391 
    392 /************************************************************************
    393  * ixgbe_xmit
    394  *
    395  *   Maps the mbufs to tx descriptors, allowing the
    396  *   TX engine to transmit the packets.
    397  *
    398  *   Return 0 on success, positive on failure
    399  ************************************************************************/
    400 static int
    401 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    402 {
    403 	struct adapter          *adapter = txr->adapter;
    404 	struct ixgbe_tx_buf     *txbuf;
    405 	union ixgbe_adv_tx_desc *txd = NULL;
    406 	struct ifnet	        *ifp = adapter->ifp;
    407 	int                     i, j, error;
    408 	int                     first;
    409 	u32                     olinfo_status = 0, cmd_type_len;
    410 	bool                    remap = TRUE;
    411 	bus_dmamap_t            map;
    412 
    413 	/* Basic descriptor defines */
    414 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    415 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    416 
    417 	if (vlan_has_tag(m_head))
    418 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    419 
    420 	/*
    421 	 * Important to capture the first descriptor
    422 	 * used because it will contain the index of
    423 	 * the one we tell the hardware to report back
    424 	 */
    425 	first = txr->next_avail_desc;
    426 	txbuf = &txr->tx_buffers[first];
    427 	map = txbuf->map;
    428 
    429 	/*
    430 	 * Map the packet for DMA.
    431 	 */
    432 retry:
    433 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    434 	    BUS_DMA_NOWAIT);
    435 
    436 	if (__predict_false(error)) {
    437 		struct mbuf *m;
    438 
    439 		switch (error) {
    440 		case EAGAIN:
    441 			txr->q_eagain_tx_dma_setup++;
    442 			return EAGAIN;
    443 		case ENOMEM:
    444 			txr->q_enomem_tx_dma_setup++;
    445 			return EAGAIN;
    446 		case EFBIG:
    447 			/* Try it again? - one try */
    448 			if (remap == TRUE) {
    449 				remap = FALSE;
    450 				/*
    451 				 * XXX: m_defrag will choke on
    452 				 * non-MCLBYTES-sized clusters
    453 				 */
    454 				txr->q_efbig_tx_dma_setup++;
    455 				m = m_defrag(m_head, M_NOWAIT);
    456 				if (m == NULL) {
    457 					txr->q_mbuf_defrag_failed++;
    458 					return ENOBUFS;
    459 				}
    460 				m_head = m;
    461 				goto retry;
    462 			} else {
    463 				txr->q_efbig2_tx_dma_setup++;
    464 				return error;
    465 			}
    466 		case EINVAL:
    467 			txr->q_einval_tx_dma_setup++;
    468 			return error;
    469 		default:
    470 			txr->q_other_tx_dma_setup++;
    471 			return error;
    472 		}
    473 	}
    474 
    475 	/* Make certain there are enough descriptors */
    476 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    477 		txr->txr_no_space = true;
    478 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
    479 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    480 		return EAGAIN;
    481 	}
    482 
    483 	/*
    484 	 * Set up the appropriate offload context
    485 	 * this will consume the first descriptor
    486 	 */
    487 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    488 	if (__predict_false(error)) {
    489 		return (error);
    490 	}
    491 
    492 #ifdef IXGBE_FDIR
    493 	/* Do the flow director magic */
    494 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    495 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    496 		++txr->atr_count;
    497 		if (txr->atr_count >= atr_sample_rate) {
    498 			ixgbe_atr(txr, m_head);
    499 			txr->atr_count = 0;
    500 		}
    501 	}
    502 #endif
    503 
    504 	olinfo_status |= IXGBE_ADVTXD_CC;
    505 	i = txr->next_avail_desc;
    506 	for (j = 0; j < map->dm_nsegs; j++) {
    507 		bus_size_t seglen;
    508 		uint64_t segaddr;
    509 
    510 		txbuf = &txr->tx_buffers[i];
    511 		txd = &txr->tx_base[i];
    512 		seglen = map->dm_segs[j].ds_len;
    513 		segaddr = htole64(map->dm_segs[j].ds_addr);
    514 
    515 		txd->read.buffer_addr = segaddr;
    516 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    517 		txd->read.olinfo_status = htole32(olinfo_status);
    518 
    519 		if (++i == txr->num_desc)
    520 			i = 0;
    521 	}
    522 
    523 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    524 	txr->tx_avail -= map->dm_nsegs;
    525 	txr->next_avail_desc = i;
    526 
    527 	txbuf->m_head = m_head;
    528 	/*
    529 	 * Here we swap the map so the last descriptor,
    530 	 * which gets the completion interrupt has the
    531 	 * real map, and the first descriptor gets the
    532 	 * unused map from this descriptor.
    533 	 */
    534 	txr->tx_buffers[first].map = txbuf->map;
    535 	txbuf->map = map;
    536 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    537 	    BUS_DMASYNC_PREWRITE);
    538 
    539 	/* Set the EOP descriptor that will be marked done */
    540 	txbuf = &txr->tx_buffers[first];
    541 	txbuf->eop = txd;
    542 
    543 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    544 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    545 	/*
    546 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    547 	 * hardware that this frame is available to transmit.
    548 	 */
    549 	IXGBE_EVC_ADD(&txr->total_packets, 1);
    550 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    551 
    552 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    553 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    554 	if (m_head->m_flags & M_MCAST)
    555 		if_statinc_ref(nsr, if_omcasts);
    556 	IF_STAT_PUTREF(ifp);
    557 
    558 	/* Mark queue as having work */
    559 	if (txr->busy == 0)
    560 		txr->busy = 1;
    561 
    562 	return (0);
    563 } /* ixgbe_xmit */
    564 
    565 /************************************************************************
    566  * ixgbe_drain
    567  ************************************************************************/
    568 static void
    569 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    570 {
    571 	struct mbuf *m;
    572 
    573 	IXGBE_TX_LOCK_ASSERT(txr);
    574 
    575 	if (txr->me == 0) {
    576 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    577 			IFQ_DEQUEUE(&ifp->if_snd, m);
    578 			m_freem(m);
    579 			IF_DROP(&ifp->if_snd);
    580 		}
    581 	}
    582 
    583 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    584 		m_freem(m);
    585 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
    586 	}
    587 }
    588 
    589 /************************************************************************
    590  * ixgbe_allocate_transmit_buffers
    591  *
    592  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    593  *   the information needed to transmit a packet on the wire. This is
    594  *   called only once at attach, setup is done every reset.
    595  ************************************************************************/
    596 static int
    597 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    598 {
    599 	struct adapter      *adapter = txr->adapter;
    600 	device_t            dev = adapter->dev;
    601 	struct ixgbe_tx_buf *txbuf;
    602 	int                 error, i;
    603 
    604 	/*
    605 	 * Setup DMA descriptor areas.
    606 	 */
    607 	error = ixgbe_dma_tag_create(
    608 	         /*      parent */ adapter->osdep.dmat,
    609 	         /*   alignment */ 1,
    610 	         /*      bounds */ 0,
    611 	         /*     maxsize */ IXGBE_TSO_SIZE,
    612 	         /*   nsegments */ adapter->num_segs,
    613 	         /*  maxsegsize */ PAGE_SIZE,
    614 	         /*       flags */ 0,
    615 	                           &txr->txtag);
    616 	if (error != 0) {
    617 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    618 		goto fail;
    619 	}
    620 
    621 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    622 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    623 
    624 	/* Create the descriptor buffer dma maps */
    625 	txbuf = txr->tx_buffers;
    626 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    627 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    628 		if (error != 0) {
    629 			aprint_error_dev(dev,
    630 			    "Unable to create TX DMA map (%d)\n", error);
    631 			goto fail;
    632 		}
    633 	}
    634 
    635 	return 0;
    636 fail:
    637 	/* We free all, it handles case where we are in the middle */
    638 #if 0 /* XXX was FreeBSD */
    639 	ixgbe_free_transmit_structures(adapter);
    640 #else
    641 	ixgbe_free_transmit_buffers(txr);
    642 #endif
    643 	return (error);
    644 } /* ixgbe_allocate_transmit_buffers */
    645 
    646 /************************************************************************
    647  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    648  ************************************************************************/
    649 static void
    650 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    651 {
    652 	struct adapter        *adapter = txr->adapter;
    653 	struct ixgbe_tx_buf   *txbuf;
    654 #ifdef DEV_NETMAP
    655 	struct netmap_adapter *na = NA(adapter->ifp);
    656 	struct netmap_slot    *slot;
    657 #endif /* DEV_NETMAP */
    658 
    659 	/* Clear the old ring contents */
    660 	IXGBE_TX_LOCK(txr);
    661 
    662 #ifdef DEV_NETMAP
    663 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    664 		/*
    665 		 * (under lock): if in netmap mode, do some consistency
    666 		 * checks and set slot to entry 0 of the netmap ring.
    667 		 */
    668 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    669 	}
    670 #endif /* DEV_NETMAP */
    671 
    672 	bzero((void *)txr->tx_base,
    673 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    674 	/* Reset indices */
    675 	txr->next_avail_desc = 0;
    676 	txr->next_to_clean = 0;
    677 
    678 	/* Free any existing tx buffers. */
    679 	txbuf = txr->tx_buffers;
    680 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    681 		if (txbuf->m_head != NULL) {
    682 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    683 			    0, txbuf->m_head->m_pkthdr.len,
    684 			    BUS_DMASYNC_POSTWRITE);
    685 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    686 			m_freem(txbuf->m_head);
    687 			txbuf->m_head = NULL;
    688 		}
    689 
    690 #ifdef DEV_NETMAP
    691 		/*
    692 		 * In netmap mode, set the map for the packet buffer.
    693 		 * NOTE: Some drivers (not this one) also need to set
    694 		 * the physical buffer address in the NIC ring.
    695 		 * Slots in the netmap ring (indexed by "si") are
    696 		 * kring->nkr_hwofs positions "ahead" wrt the
    697 		 * corresponding slot in the NIC ring. In some drivers
    698 		 * (not here) nkr_hwofs can be negative. Function
    699 		 * netmap_idx_n2k() handles wraparounds properly.
    700 		 */
    701 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    702 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    703 			netmap_load_map(na, txr->txtag,
    704 			    txbuf->map, NMB(na, slot + si));
    705 		}
    706 #endif /* DEV_NETMAP */
    707 
    708 		/* Clear the EOP descriptor pointer */
    709 		txbuf->eop = NULL;
    710 	}
    711 
    712 	/* Set the rate at which we sample packets */
    713 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    714 		txr->atr_sample = atr_sample_rate;
    715 
    716 	/* Set number of descriptors available */
    717 	txr->tx_avail = adapter->num_tx_desc;
    718 
    719 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    720 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    721 	IXGBE_TX_UNLOCK(txr);
    722 } /* ixgbe_setup_transmit_ring */
    723 
    724 /************************************************************************
    725  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    726  ************************************************************************/
    727 int
    728 ixgbe_setup_transmit_structures(struct adapter *adapter)
    729 {
    730 	struct tx_ring *txr = adapter->tx_rings;
    731 
    732 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    733 		ixgbe_setup_transmit_ring(txr);
    734 
    735 	return (0);
    736 } /* ixgbe_setup_transmit_structures */
    737 
    738 /************************************************************************
    739  * ixgbe_free_transmit_structures - Free all transmit rings.
    740  ************************************************************************/
    741 void
    742 ixgbe_free_transmit_structures(struct adapter *adapter)
    743 {
    744 	struct tx_ring *txr = adapter->tx_rings;
    745 
    746 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    747 		ixgbe_free_transmit_buffers(txr);
    748 		ixgbe_dma_free(adapter, &txr->txdma);
    749 		IXGBE_TX_LOCK_DESTROY(txr);
    750 	}
    751 	free(adapter->tx_rings, M_DEVBUF);
    752 } /* ixgbe_free_transmit_structures */
    753 
    754 /************************************************************************
    755  * ixgbe_free_transmit_buffers
    756  *
    757  *   Free transmit ring related data structures.
    758  ************************************************************************/
    759 static void
    760 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    761 {
    762 	struct adapter      *adapter = txr->adapter;
    763 	struct ixgbe_tx_buf *tx_buffer;
    764 	int                 i;
    765 
    766 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    767 
    768 	if (txr->tx_buffers == NULL)
    769 		return;
    770 
    771 	tx_buffer = txr->tx_buffers;
    772 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    773 		if (tx_buffer->m_head != NULL) {
    774 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    775 			    0, tx_buffer->m_head->m_pkthdr.len,
    776 			    BUS_DMASYNC_POSTWRITE);
    777 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    778 			m_freem(tx_buffer->m_head);
    779 			tx_buffer->m_head = NULL;
    780 			if (tx_buffer->map != NULL) {
    781 				ixgbe_dmamap_destroy(txr->txtag,
    782 				    tx_buffer->map);
    783 				tx_buffer->map = NULL;
    784 			}
    785 		} else if (tx_buffer->map != NULL) {
    786 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    787 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    788 			tx_buffer->map = NULL;
    789 		}
    790 	}
    791 	if (txr->txr_interq != NULL) {
    792 		struct mbuf *m;
    793 
    794 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    795 			m_freem(m);
    796 		pcq_destroy(txr->txr_interq);
    797 	}
    798 	if (txr->tx_buffers != NULL) {
    799 		free(txr->tx_buffers, M_DEVBUF);
    800 		txr->tx_buffers = NULL;
    801 	}
    802 	if (txr->txtag != NULL) {
    803 		ixgbe_dma_tag_destroy(txr->txtag);
    804 		txr->txtag = NULL;
    805 	}
    806 } /* ixgbe_free_transmit_buffers */
    807 
    808 /************************************************************************
    809  * ixgbe_tx_ctx_setup
    810  *
    811  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    812  ************************************************************************/
    813 static int
    814 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    815     u32 *cmd_type_len, u32 *olinfo_status)
    816 {
    817 	struct adapter                   *adapter = txr->adapter;
    818 	struct ixgbe_adv_tx_context_desc *TXD;
    819 	struct ether_vlan_header         *eh;
    820 #ifdef INET
    821 	struct ip                        *ip;
    822 #endif
    823 #ifdef INET6
    824 	struct ip6_hdr                   *ip6;
    825 #endif
    826 	int                              ehdrlen, ip_hlen = 0;
    827 	int                              offload = TRUE;
    828 	int                              ctxd = txr->next_avail_desc;
    829 	u32                              vlan_macip_lens = 0;
    830 	u32                              type_tucmd_mlhl = 0;
    831 	u16                              vtag = 0;
    832 	u16                              etype;
    833 	u8                               ipproto = 0;
    834 	char                             *l3d;
    835 
    836 
    837 	/* First check if TSO is to be used */
    838 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    839 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    840 
    841 		if (rv != 0)
    842 			IXGBE_EVC_ADD(&adapter->tso_err, 1);
    843 		return rv;
    844 	}
    845 
    846 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    847 		offload = FALSE;
    848 
    849 	/* Indicate the whole packet as payload when not doing TSO */
    850 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    851 
    852 	/* Now ready a context descriptor */
    853 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    854 
    855 	/*
    856 	 * In advanced descriptors the vlan tag must
    857 	 * be placed into the context descriptor. Hence
    858 	 * we need to make one even if not doing offloads.
    859 	 */
    860 	if (vlan_has_tag(mp)) {
    861 		vtag = htole16(vlan_get_tag(mp));
    862 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    863 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    864 	           (offload == FALSE))
    865 		return (0);
    866 
    867 	/*
    868 	 * Determine where frame payload starts.
    869 	 * Jump over vlan headers if already present,
    870 	 * helpful for QinQ too.
    871 	 */
    872 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    873 	eh = mtod(mp, struct ether_vlan_header *);
    874 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    875 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    876 		etype = ntohs(eh->evl_proto);
    877 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    878 	} else {
    879 		etype = ntohs(eh->evl_encap_proto);
    880 		ehdrlen = ETHER_HDR_LEN;
    881 	}
    882 
    883 	/* Set the ether header length */
    884 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    885 
    886 	if (offload == FALSE)
    887 		goto no_offloads;
    888 
    889 	/*
    890 	 * If the first mbuf only includes the ethernet header,
    891 	 * jump to the next one
    892 	 * XXX: This assumes the stack splits mbufs containing headers
    893 	 *      on header boundaries
    894 	 * XXX: And assumes the entire IP header is contained in one mbuf
    895 	 */
    896 	if (mp->m_len == ehdrlen && mp->m_next)
    897 		l3d = mtod(mp->m_next, char *);
    898 	else
    899 		l3d = mtod(mp, char *) + ehdrlen;
    900 
    901 	switch (etype) {
    902 #ifdef INET
    903 	case ETHERTYPE_IP:
    904 		ip = (struct ip *)(l3d);
    905 		ip_hlen = ip->ip_hl << 2;
    906 		ipproto = ip->ip_p;
    907 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    908 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    909 		    ip->ip_sum == 0);
    910 		break;
    911 #endif
    912 #ifdef INET6
    913 	case ETHERTYPE_IPV6:
    914 		ip6 = (struct ip6_hdr *)(l3d);
    915 		ip_hlen = sizeof(struct ip6_hdr);
    916 		ipproto = ip6->ip6_nxt;
    917 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    918 		break;
    919 #endif
    920 	default:
    921 		offload = false;
    922 		break;
    923 	}
    924 
    925 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    926 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    927 
    928 	vlan_macip_lens |= ip_hlen;
    929 
    930 	/* No support for offloads for non-L4 next headers */
    931 	switch (ipproto) {
    932 	case IPPROTO_TCP:
    933 		if (mp->m_pkthdr.csum_flags &
    934 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    935 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    936 		else
    937 			offload = false;
    938 		break;
    939 	case IPPROTO_UDP:
    940 		if (mp->m_pkthdr.csum_flags &
    941 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    942 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    943 		else
    944 			offload = false;
    945 		break;
    946 	default:
    947 		offload = false;
    948 		break;
    949 	}
    950 
    951 	if (offload) /* Insert L4 checksum into data descriptors */
    952 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    953 
    954 no_offloads:
    955 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    956 
    957 	/* Now copy bits into descriptor */
    958 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    959 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    960 	TXD->seqnum_seed = htole32(0);
    961 	TXD->mss_l4len_idx = htole32(0);
    962 
    963 	/* We've consumed the first desc, adjust counters */
    964 	if (++ctxd == txr->num_desc)
    965 		ctxd = 0;
    966 	txr->next_avail_desc = ctxd;
    967 	--txr->tx_avail;
    968 
    969 	return (0);
    970 } /* ixgbe_tx_ctx_setup */
    971 
    972 /************************************************************************
    973  * ixgbe_tso_setup
    974  *
    975  *   Setup work for hardware segmentation offload (TSO) on
    976  *   adapters using advanced tx descriptors
    977  ************************************************************************/
    978 static int
    979 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    980     u32 *olinfo_status)
    981 {
    982 	struct ixgbe_adv_tx_context_desc *TXD;
    983 	struct ether_vlan_header         *eh;
    984 #ifdef INET6
    985 	struct ip6_hdr                   *ip6;
    986 #endif
    987 #ifdef INET
    988 	struct ip                        *ip;
    989 #endif
    990 	struct tcphdr                    *th;
    991 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    992 	u32                              vlan_macip_lens = 0;
    993 	u32                              type_tucmd_mlhl = 0;
    994 	u32                              mss_l4len_idx = 0, paylen;
    995 	u16                              vtag = 0, eh_type;
    996 
    997 	/*
    998 	 * Determine where frame payload starts.
    999 	 * Jump over vlan headers if already present
   1000 	 */
   1001 	eh = mtod(mp, struct ether_vlan_header *);
   1002 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1003 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1004 		eh_type = eh->evl_proto;
   1005 	} else {
   1006 		ehdrlen = ETHER_HDR_LEN;
   1007 		eh_type = eh->evl_encap_proto;
   1008 	}
   1009 
   1010 	switch (ntohs(eh_type)) {
   1011 #ifdef INET
   1012 	case ETHERTYPE_IP:
   1013 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1014 		if (ip->ip_p != IPPROTO_TCP)
   1015 			return (ENXIO);
   1016 		ip->ip_sum = 0;
   1017 		ip_hlen = ip->ip_hl << 2;
   1018 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1019 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1020 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1021 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1022 		/* Tell transmit desc to also do IPv4 checksum. */
   1023 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1024 		break;
   1025 #endif
   1026 #ifdef INET6
   1027 	case ETHERTYPE_IPV6:
   1028 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1029 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1030 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1031 			return (ENXIO);
   1032 		ip_hlen = sizeof(struct ip6_hdr);
   1033 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1034 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1035 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1036 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1037 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1038 		break;
   1039 #endif
   1040 	default:
   1041 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1042 		    __func__, ntohs(eh_type));
   1043 		break;
   1044 	}
   1045 
   1046 	ctxd = txr->next_avail_desc;
   1047 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1048 
   1049 	tcp_hlen = th->th_off << 2;
   1050 
   1051 	/* This is used in the transmit desc in encap */
   1052 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1053 
   1054 	/* VLAN MACLEN IPLEN */
   1055 	if (vlan_has_tag(mp)) {
   1056 		vtag = htole16(vlan_get_tag(mp));
   1057 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1058 	}
   1059 
   1060 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1061 	vlan_macip_lens |= ip_hlen;
   1062 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1063 
   1064 	/* ADV DTYPE TUCMD */
   1065 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1066 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1067 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1068 
   1069 	/* MSS L4LEN IDX */
   1070 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1071 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1072 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1073 
   1074 	TXD->seqnum_seed = htole32(0);
   1075 
   1076 	if (++ctxd == txr->num_desc)
   1077 		ctxd = 0;
   1078 
   1079 	txr->tx_avail--;
   1080 	txr->next_avail_desc = ctxd;
   1081 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1082 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1083 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1084 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
   1085 
   1086 	return (0);
   1087 } /* ixgbe_tso_setup */
   1088 
   1089 
   1090 /************************************************************************
   1091  * ixgbe_txeof
   1092  *
   1093  *   Examine each tx_buffer in the used queue. If the hardware is done
   1094  *   processing the packet then free associated resources. The
   1095  *   tx_buffer is put back on the free queue.
   1096  ************************************************************************/
   1097 bool
   1098 ixgbe_txeof(struct tx_ring *txr)
   1099 {
   1100 	struct adapter		*adapter = txr->adapter;
   1101 	struct ifnet		*ifp = adapter->ifp;
   1102 	struct ixgbe_tx_buf	*buf;
   1103 	union ixgbe_adv_tx_desc *txd;
   1104 	u32			work, processed = 0;
   1105 	u32			limit = adapter->tx_process_limit;
   1106 
   1107 	KASSERT(mutex_owned(&txr->tx_mtx));
   1108 
   1109 #ifdef DEV_NETMAP
   1110 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1111 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1112 		struct netmap_adapter *na = NA(adapter->ifp);
   1113 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1114 		txd = txr->tx_base;
   1115 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1116 		    BUS_DMASYNC_POSTREAD);
   1117 		/*
   1118 		 * In netmap mode, all the work is done in the context
   1119 		 * of the client thread. Interrupt handlers only wake up
   1120 		 * clients, which may be sleeping on individual rings
   1121 		 * or on a global resource for all rings.
   1122 		 * To implement tx interrupt mitigation, we wake up the client
   1123 		 * thread roughly every half ring, even if the NIC interrupts
   1124 		 * more frequently. This is implemented as follows:
   1125 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1126 		 *   the slot that should wake up the thread (nkr_num_slots
   1127 		 *   means the user thread should not be woken up);
   1128 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1129 		 *   or the slot has the DD bit set.
   1130 		 */
   1131 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1132 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1133 			netmap_tx_irq(ifp, txr->me);
   1134 		}
   1135 		return false;
   1136 	}
   1137 #endif /* DEV_NETMAP */
   1138 
   1139 	if (txr->tx_avail == txr->num_desc) {
   1140 		txr->busy = 0;
   1141 		return false;
   1142 	}
   1143 
   1144 	/* Get work starting point */
   1145 	work = txr->next_to_clean;
   1146 	buf = &txr->tx_buffers[work];
   1147 	txd = &txr->tx_base[work];
   1148 	work -= txr->num_desc; /* The distance to ring end */
   1149 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1150 	    BUS_DMASYNC_POSTREAD);
   1151 
   1152 	do {
   1153 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1154 		if (eop == NULL) /* No work */
   1155 			break;
   1156 
   1157 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1158 			break;	/* I/O not complete */
   1159 
   1160 		if (buf->m_head) {
   1161 			txr->bytes += buf->m_head->m_pkthdr.len;
   1162 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1163 			    0, buf->m_head->m_pkthdr.len,
   1164 			    BUS_DMASYNC_POSTWRITE);
   1165 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1166 			m_freem(buf->m_head);
   1167 			buf->m_head = NULL;
   1168 		}
   1169 		buf->eop = NULL;
   1170 		txr->txr_no_space = false;
   1171 		++txr->tx_avail;
   1172 
   1173 		/* We clean the range if multi segment */
   1174 		while (txd != eop) {
   1175 			++txd;
   1176 			++buf;
   1177 			++work;
   1178 			/* wrap the ring? */
   1179 			if (__predict_false(!work)) {
   1180 				work -= txr->num_desc;
   1181 				buf = txr->tx_buffers;
   1182 				txd = txr->tx_base;
   1183 			}
   1184 			if (buf->m_head) {
   1185 				txr->bytes +=
   1186 				    buf->m_head->m_pkthdr.len;
   1187 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1188 				    buf->map,
   1189 				    0, buf->m_head->m_pkthdr.len,
   1190 				    BUS_DMASYNC_POSTWRITE);
   1191 				ixgbe_dmamap_unload(txr->txtag,
   1192 				    buf->map);
   1193 				m_freem(buf->m_head);
   1194 				buf->m_head = NULL;
   1195 			}
   1196 			++txr->tx_avail;
   1197 			buf->eop = NULL;
   1198 
   1199 		}
   1200 		++txr->packets;
   1201 		++processed;
   1202 		if_statinc(ifp, if_opackets);
   1203 
   1204 		/* Try the next packet */
   1205 		++txd;
   1206 		++buf;
   1207 		++work;
   1208 		/* reset with a wrap */
   1209 		if (__predict_false(!work)) {
   1210 			work -= txr->num_desc;
   1211 			buf = txr->tx_buffers;
   1212 			txd = txr->tx_base;
   1213 		}
   1214 		prefetch(txd);
   1215 	} while (__predict_true(--limit));
   1216 
   1217 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1218 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1219 
   1220 	work += txr->num_desc;
   1221 	txr->next_to_clean = work;
   1222 
   1223 	/*
   1224 	 * Queue Hang detection, we know there's
   1225 	 * work outstanding or the first return
   1226 	 * would have been taken, so increment busy
   1227 	 * if nothing managed to get cleaned, then
   1228 	 * in local_timer it will be checked and
   1229 	 * marked as HUNG if it exceeds a MAX attempt.
   1230 	 */
   1231 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1232 		++txr->busy;
   1233 	/*
   1234 	 * If anything gets cleaned we reset state to 1,
   1235 	 * note this will turn off HUNG if its set.
   1236 	 */
   1237 	if (processed)
   1238 		txr->busy = 1;
   1239 
   1240 	if (txr->tx_avail == txr->num_desc)
   1241 		txr->busy = 0;
   1242 
   1243 	return ((limit > 0) ? false : true);
   1244 } /* ixgbe_txeof */
   1245 
   1246 /************************************************************************
   1247  * ixgbe_rsc_count
   1248  *
   1249  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1250  ************************************************************************/
   1251 static inline u32
   1252 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1253 {
   1254 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1255 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1256 } /* ixgbe_rsc_count */
   1257 
   1258 /************************************************************************
   1259  * ixgbe_setup_hw_rsc
   1260  *
   1261  *   Initialize Hardware RSC (LRO) feature on 82599
   1262  *   for an RX ring, this is toggled by the LRO capability
   1263  *   even though it is transparent to the stack.
   1264  *
   1265  *   NOTE: Since this HW feature only works with IPv4 and
   1266  *         testing has shown soft LRO to be as effective,
   1267  *         this feature will be disabled by default.
   1268  ************************************************************************/
   1269 static void
   1270 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1271 {
   1272 	struct	adapter  *adapter = rxr->adapter;
   1273 	struct	ixgbe_hw *hw = &adapter->hw;
   1274 	u32              rscctrl, rdrxctl;
   1275 
   1276 	/* If turning LRO/RSC off we need to disable it */
   1277 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1278 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1279 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1280 		return;
   1281 	}
   1282 
   1283 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1284 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1285 #ifdef DEV_NETMAP
   1286 	/* Always strip CRC unless Netmap disabled it */
   1287 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1288 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1289 	    ix_crcstrip)
   1290 #endif /* DEV_NETMAP */
   1291 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1292 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1293 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1294 
   1295 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1296 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1297 	/*
   1298 	 * Limit the total number of descriptors that
   1299 	 * can be combined, so it does not exceed 64K
   1300 	 */
   1301 	if (rxr->mbuf_sz == MCLBYTES)
   1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1303 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1305 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1306 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1307 	else  /* Using 16K cluster */
   1308 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1309 
   1310 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1311 
   1312 	/* Enable TCP header recognition */
   1313 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1314 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1315 
   1316 	/* Disable RSC for ACK packets */
   1317 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1318 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1319 
   1320 	rxr->hw_rsc = TRUE;
   1321 } /* ixgbe_setup_hw_rsc */
   1322 
   1323 /************************************************************************
   1324  * ixgbe_refresh_mbufs
   1325  *
   1326  *   Refresh mbuf buffers for RX descriptor rings
   1327  *    - now keeps its own state so discards due to resource
   1328  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1329  *      it just returns, keeping its placeholder, thus it can simply
   1330  *      be recalled to try again.
   1331  *
   1332  *   XXX NetBSD TODO:
   1333  *    - The ixgbe_rxeof() function always preallocates mbuf cluster,
   1334  *      so the ixgbe_refresh_mbufs() function can be simplified.
   1335  *
   1336  ************************************************************************/
   1337 static void
   1338 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1339 {
   1340 	struct adapter      *adapter = rxr->adapter;
   1341 	struct ixgbe_rx_buf *rxbuf;
   1342 	struct mbuf         *mp;
   1343 	int                 i, error;
   1344 	bool                refreshed = false;
   1345 
   1346 	i = rxr->next_to_refresh;
   1347 	/* next_to_refresh points to the previous one */
   1348 	if (++i == rxr->num_desc)
   1349 		i = 0;
   1350 
   1351 	while (i != limit) {
   1352 		rxbuf = &rxr->rx_buffers[i];
   1353 		if (__predict_false(rxbuf->buf == NULL)) {
   1354 			mp = ixgbe_getcl();
   1355 			if (mp == NULL) {
   1356 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1357 				goto update;
   1358 			}
   1359 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1360 			IXGBE_M_ADJ(adapter, rxr, mp);
   1361 		} else
   1362 			mp = rxbuf->buf;
   1363 
   1364 		/* If we're dealing with an mbuf that was copied rather
   1365 		 * than replaced, there's no need to go through busdma.
   1366 		 */
   1367 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1368 			/* Get the memory mapping */
   1369 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1370 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1371 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1372 			if (__predict_false(error != 0)) {
   1373 				device_printf(adapter->dev, "Refresh mbufs: "
   1374 				    "payload dmamap load failure - %d\n",
   1375 				    error);
   1376 				m_free(mp);
   1377 				rxbuf->buf = NULL;
   1378 				goto update;
   1379 			}
   1380 			rxbuf->buf = mp;
   1381 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1382 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1383 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1384 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1385 		} else {
   1386 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1387 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1388 		}
   1389 
   1390 		refreshed = true;
   1391 		/* next_to_refresh points to the previous one */
   1392 		rxr->next_to_refresh = i;
   1393 		if (++i == rxr->num_desc)
   1394 			i = 0;
   1395 	}
   1396 
   1397 update:
   1398 	if (refreshed) /* Update hardware tail index */
   1399 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1400 
   1401 	return;
   1402 } /* ixgbe_refresh_mbufs */
   1403 
   1404 /************************************************************************
   1405  * ixgbe_allocate_receive_buffers
   1406  *
   1407  *   Allocate memory for rx_buffer structures. Since we use one
   1408  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1409  *   that we'll need is equal to the number of receive descriptors
   1410  *   that we've allocated.
   1411  ************************************************************************/
   1412 static int
   1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1414 {
   1415 	struct adapter      *adapter = rxr->adapter;
   1416 	device_t            dev = adapter->dev;
   1417 	struct ixgbe_rx_buf *rxbuf;
   1418 	int                 bsize, error;
   1419 
   1420 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1421 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1422 
   1423 	error = ixgbe_dma_tag_create(
   1424 	         /*      parent */ adapter->osdep.dmat,
   1425 	         /*   alignment */ 1,
   1426 	         /*      bounds */ 0,
   1427 	         /*     maxsize */ MJUM16BYTES,
   1428 	         /*   nsegments */ 1,
   1429 	         /*  maxsegsize */ MJUM16BYTES,
   1430 	         /*       flags */ 0,
   1431 	                           &rxr->ptag);
   1432 	if (error != 0) {
   1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1434 		goto fail;
   1435 	}
   1436 
   1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1438 		rxbuf = &rxr->rx_buffers[i];
   1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1440 		if (error) {
   1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1442 			goto fail;
   1443 		}
   1444 	}
   1445 
   1446 	return (0);
   1447 
   1448 fail:
   1449 	/* Frees all, but can handle partial completion */
   1450 	ixgbe_free_receive_structures(adapter);
   1451 
   1452 	return (error);
   1453 } /* ixgbe_allocate_receive_buffers */
   1454 
   1455 /************************************************************************
   1456  * ixgbe_free_receive_ring
   1457  ************************************************************************/
   1458 static void
   1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1460 {
   1461 	for (int i = 0; i < rxr->num_desc; i++) {
   1462 		ixgbe_rx_discard(rxr, i);
   1463 	}
   1464 } /* ixgbe_free_receive_ring */
   1465 
   1466 /************************************************************************
   1467  * ixgbe_setup_receive_ring
   1468  *
   1469  *   Initialize a receive ring and its buffers.
   1470  ************************************************************************/
   1471 static int
   1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1473 {
   1474 	struct adapter        *adapter;
   1475 	struct ixgbe_rx_buf   *rxbuf;
   1476 #ifdef LRO
   1477 	struct ifnet          *ifp;
   1478 	struct lro_ctrl       *lro = &rxr->lro;
   1479 #endif /* LRO */
   1480 #ifdef DEV_NETMAP
   1481 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1482 	struct netmap_slot    *slot;
   1483 #endif /* DEV_NETMAP */
   1484 	int                   rsize, error = 0;
   1485 
   1486 	adapter = rxr->adapter;
   1487 #ifdef LRO
   1488 	ifp = adapter->ifp;
   1489 #endif /* LRO */
   1490 
   1491 	/* Clear the ring contents */
   1492 	IXGBE_RX_LOCK(rxr);
   1493 
   1494 #ifdef DEV_NETMAP
   1495 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1497 #endif /* DEV_NETMAP */
   1498 
   1499 	rsize = roundup2(adapter->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	/* Now replenish the mbufs */
   1509 	for (int j = 0; j != rxr->num_desc; ++j) {
   1510 		struct mbuf *mp;
   1511 
   1512 		rxbuf = &rxr->rx_buffers[j];
   1513 
   1514 #ifdef DEV_NETMAP
   1515 		/*
   1516 		 * In netmap mode, fill the map and set the buffer
   1517 		 * address in the NIC ring, considering the offset
   1518 		 * between the netmap and NIC rings (see comment in
   1519 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1520 		 * an mbuf, so end the block with a continue;
   1521 		 */
   1522 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1523 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1524 			uint64_t paddr;
   1525 			void *addr;
   1526 
   1527 			addr = PNMB(na, slot + sj, &paddr);
   1528 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1529 			/* Update descriptor and the cached value */
   1530 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1531 			rxbuf->addr = htole64(paddr);
   1532 			continue;
   1533 		}
   1534 #endif /* DEV_NETMAP */
   1535 
   1536 		rxbuf->flags = 0;
   1537 		rxbuf->buf = ixgbe_getcl();
   1538 		if (rxbuf->buf == NULL) {
   1539 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1540 			error = ENOBUFS;
   1541 			goto fail;
   1542 		}
   1543 		mp = rxbuf->buf;
   1544 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1545 		IXGBE_M_ADJ(adapter, rxr, mp);
   1546 		/* Get the memory mapping */
   1547 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1548 		    mp, BUS_DMA_NOWAIT);
   1549 		if (error != 0) {
   1550 			/*
   1551 			 * Clear this entry for later cleanup in
   1552 			 * ixgbe_discard() which is called via
   1553 			 * ixgbe_free_receive_ring().
   1554 			 */
   1555 			m_freem(mp);
   1556 			rxbuf->buf = NULL;
   1557 			goto fail;
   1558 		}
   1559 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1560 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1561 		/* Update the descriptor and the cached value */
   1562 		rxr->rx_base[j].read.pkt_addr =
   1563 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1564 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1565 	}
   1566 
   1567 	/* Setup our descriptor indices */
   1568 	rxr->next_to_check = 0;
   1569 	rxr->next_to_refresh = adapter->num_rx_desc - 1; /* Fully allocated */
   1570 	rxr->lro_enabled = FALSE;
   1571 	rxr->discard_multidesc = false;
   1572 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
   1573 #if 0 /* NetBSD */
   1574 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
   1575 #if 1	/* Fix inconsistency */
   1576 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
   1577 #endif
   1578 #endif
   1579 	rxr->vtag_strip = FALSE;
   1580 
   1581 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1582 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1583 
   1584 	/*
   1585 	 * Now set up the LRO interface
   1586 	 */
   1587 	if (ixgbe_rsc_enable)
   1588 		ixgbe_setup_hw_rsc(rxr);
   1589 #ifdef LRO
   1590 	else if (ifp->if_capenable & IFCAP_LRO) {
   1591 		device_t dev = adapter->dev;
   1592 		int err = tcp_lro_init(lro);
   1593 		if (err) {
   1594 			device_printf(dev, "LRO Initialization failed!\n");
   1595 			goto fail;
   1596 		}
   1597 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1598 		rxr->lro_enabled = TRUE;
   1599 		lro->ifp = adapter->ifp;
   1600 	}
   1601 #endif /* LRO */
   1602 
   1603 	IXGBE_RX_UNLOCK(rxr);
   1604 
   1605 	return (0);
   1606 
   1607 fail:
   1608 	ixgbe_free_receive_ring(rxr);
   1609 	IXGBE_RX_UNLOCK(rxr);
   1610 
   1611 	return (error);
   1612 } /* ixgbe_setup_receive_ring */
   1613 
   1614 /************************************************************************
   1615  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1616  ************************************************************************/
   1617 int
   1618 ixgbe_setup_receive_structures(struct adapter *adapter)
   1619 {
   1620 	struct rx_ring *rxr = adapter->rx_rings;
   1621 	int            j;
   1622 
   1623 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1624 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1625 		if (ixgbe_setup_receive_ring(rxr))
   1626 			goto fail;
   1627 
   1628 	return (0);
   1629 fail:
   1630 	/*
   1631 	 * Free RX buffers allocated so far, we will only handle
   1632 	 * the rings that completed, the failing case will have
   1633 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1634 	 */
   1635 	for (int i = 0; i < j; ++i) {
   1636 		rxr = &adapter->rx_rings[i];
   1637 		IXGBE_RX_LOCK(rxr);
   1638 		ixgbe_free_receive_ring(rxr);
   1639 		IXGBE_RX_UNLOCK(rxr);
   1640 	}
   1641 
   1642 	return (ENOBUFS);
   1643 } /* ixgbe_setup_receive_structures */
   1644 
   1645 
   1646 /************************************************************************
   1647  * ixgbe_free_receive_structures - Free all receive rings.
   1648  ************************************************************************/
   1649 void
   1650 ixgbe_free_receive_structures(struct adapter *adapter)
   1651 {
   1652 	struct rx_ring *rxr = adapter->rx_rings;
   1653 
   1654 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1655 
   1656 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1657 		ixgbe_free_receive_buffers(rxr);
   1658 #ifdef LRO
   1659 		/* Free LRO memory */
   1660 		tcp_lro_free(&rxr->lro);
   1661 #endif /* LRO */
   1662 		/* Free the ring memory as well */
   1663 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1664 		IXGBE_RX_LOCK_DESTROY(rxr);
   1665 	}
   1666 
   1667 	free(adapter->rx_rings, M_DEVBUF);
   1668 } /* ixgbe_free_receive_structures */
   1669 
   1670 
   1671 /************************************************************************
   1672  * ixgbe_free_receive_buffers - Free receive ring data structures
   1673  ************************************************************************/
   1674 static void
   1675 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1676 {
   1677 	struct adapter      *adapter = rxr->adapter;
   1678 	struct ixgbe_rx_buf *rxbuf;
   1679 
   1680 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1681 
   1682 	/* Cleanup any existing buffers */
   1683 	if (rxr->rx_buffers != NULL) {
   1684 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1685 			rxbuf = &rxr->rx_buffers[i];
   1686 			ixgbe_rx_discard(rxr, i);
   1687 			if (rxbuf->pmap != NULL) {
   1688 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1689 				rxbuf->pmap = NULL;
   1690 			}
   1691 		}
   1692 
   1693 		if (rxr->rx_buffers != NULL) {
   1694 			free(rxr->rx_buffers, M_DEVBUF);
   1695 			rxr->rx_buffers = NULL;
   1696 		}
   1697 	}
   1698 
   1699 	if (rxr->ptag != NULL) {
   1700 		ixgbe_dma_tag_destroy(rxr->ptag);
   1701 		rxr->ptag = NULL;
   1702 	}
   1703 
   1704 	return;
   1705 } /* ixgbe_free_receive_buffers */
   1706 
   1707 /************************************************************************
   1708  * ixgbe_rx_input
   1709  ************************************************************************/
   1710 static __inline void
   1711 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1712     u32 ptype)
   1713 {
   1714 	struct adapter	*adapter = ifp->if_softc;
   1715 
   1716 #ifdef LRO
   1717 	struct ethercom *ec = &adapter->osdep.ec;
   1718 
   1719 	/*
   1720 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1721 	 * should be computed by hardware. Also it should not have VLAN tag in
   1722 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1723 	 */
   1724         if (rxr->lro_enabled &&
   1725             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1726             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1727             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1728             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1729             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1730             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1731             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1732             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1733                 /*
   1734                  * Send to the stack if:
   1735                  **  - LRO not enabled, or
   1736                  **  - no LRO resources, or
   1737                  **  - lro enqueue fails
   1738                  */
   1739                 if (rxr->lro.lro_cnt != 0)
   1740                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1741                                 return;
   1742         }
   1743 #endif /* LRO */
   1744 
   1745 	if_percpuq_enqueue(adapter->ipq, m);
   1746 } /* ixgbe_rx_input */
   1747 
   1748 /************************************************************************
   1749  * ixgbe_rx_discard
   1750  ************************************************************************/
   1751 static __inline void
   1752 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1753 {
   1754 	struct ixgbe_rx_buf *rbuf;
   1755 
   1756 	rbuf = &rxr->rx_buffers[i];
   1757 
   1758 	/*
   1759 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1760 	 * so its easier to just free the existing mbufs and take the normal
   1761 	 * refresh path to get new buffers and mapping.
   1762 	 */
   1763 
   1764 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1765 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1766 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1767 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1768 		m_freem(rbuf->fmp);
   1769 		rbuf->fmp = NULL;
   1770 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1771 	} else if (rbuf->buf) {
   1772 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1773 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1774 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1775 		m_free(rbuf->buf);
   1776 		rbuf->buf = NULL;
   1777 	}
   1778 
   1779 	rbuf->flags = 0;
   1780 
   1781 	return;
   1782 } /* ixgbe_rx_discard */
   1783 
   1784 
   1785 /************************************************************************
   1786  * ixgbe_rxeof
   1787  *
   1788  *   Executes in interrupt context. It replenishes the
   1789  *   mbufs in the descriptor and sends data which has
   1790  *   been dma'ed into host memory to upper layer.
   1791  *
   1792  *   Return TRUE for more work, FALSE for all clean.
   1793  ************************************************************************/
   1794 bool
   1795 ixgbe_rxeof(struct ix_queue *que)
   1796 {
   1797 	struct adapter		*adapter = que->adapter;
   1798 	struct rx_ring		*rxr = que->rxr;
   1799 	struct ifnet		*ifp = adapter->ifp;
   1800 #ifdef LRO
   1801 	struct lro_ctrl		*lro = &rxr->lro;
   1802 #endif /* LRO */
   1803 	union ixgbe_adv_rx_desc	*cur;
   1804 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1805 	int			i, nextp, processed = 0;
   1806 	u32			staterr = 0;
   1807 	u32			loopcount = 0, numdesc;
   1808 	u32			limit = adapter->rx_process_limit;
   1809 	bool			discard_multidesc = rxr->discard_multidesc;
   1810 	bool			wraparound = false;
   1811 	unsigned int		syncremain;
   1812 #ifdef RSS
   1813 	u16			pkt_info;
   1814 #endif
   1815 
   1816 	IXGBE_RX_LOCK(rxr);
   1817 
   1818 #ifdef DEV_NETMAP
   1819 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1820 		/* Same as the txeof routine: wakeup clients on intr. */
   1821 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1822 			IXGBE_RX_UNLOCK(rxr);
   1823 			return (FALSE);
   1824 		}
   1825 	}
   1826 #endif /* DEV_NETMAP */
   1827 
   1828 	/* Sync the ring. The size is rx_process_limit or the first half */
   1829 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
   1830 		/* Non-wraparound */
   1831 		numdesc = limit;
   1832 		syncremain = 0;
   1833 	} else {
   1834 		/* Wraparound. Sync the first half. */
   1835 		numdesc = rxr->num_desc - rxr->next_to_check;
   1836 
   1837 		/* Set the size of the last half */
   1838 		syncremain = limit - numdesc;
   1839 	}
   1840 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1841 	    rxr->rxdma.dma_map,
   1842 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
   1843 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1844 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1845 
   1846 	/*
   1847 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1848 	 * true, continue processing to not to send broken packet to the upper
   1849 	 * layer.
   1850 	 */
   1851 	for (i = rxr->next_to_check;
   1852 	     (loopcount < limit) || (discard_multidesc == true);) {
   1853 
   1854 		struct mbuf *sendmp, *mp;
   1855 		struct mbuf *newmp;
   1856 		u32         rsc, ptype;
   1857 		u16         len;
   1858 		u16         vtag = 0;
   1859 		bool        eop;
   1860 		bool        discard = false;
   1861 
   1862 		if (wraparound) {
   1863 			/* Sync the last half. */
   1864 			KASSERT(syncremain != 0);
   1865 			numdesc = syncremain;
   1866 			wraparound = false;
   1867 		} else if (__predict_false(loopcount >= limit)) {
   1868 			KASSERT(discard_multidesc == true);
   1869 			numdesc = 1;
   1870 		} else
   1871 			numdesc = 0;
   1872 
   1873 		if (numdesc != 0)
   1874 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
   1875 			    rxr->rxdma.dma_map, 0,
   1876 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
   1877 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1878 
   1879 		cur = &rxr->rx_base[i];
   1880 		staterr = le32toh(cur->wb.upper.status_error);
   1881 #ifdef RSS
   1882 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1883 #endif
   1884 
   1885 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1886 			break;
   1887 
   1888 		loopcount++;
   1889 		sendmp = newmp = NULL;
   1890 		nbuf = NULL;
   1891 		rsc = 0;
   1892 		cur->wb.upper.status_error = 0;
   1893 		rbuf = &rxr->rx_buffers[i];
   1894 		mp = rbuf->buf;
   1895 
   1896 		len = le16toh(cur->wb.upper.length);
   1897 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1898 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1899 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1900 
   1901 		/* Make sure bad packets are discarded */
   1902 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1903 #if __FreeBSD_version >= 1100036
   1904 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1905 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1906 #endif
   1907 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
   1908 			ixgbe_rx_discard(rxr, i);
   1909 			discard_multidesc = false;
   1910 			goto next_desc;
   1911 		}
   1912 
   1913 		if (__predict_false(discard_multidesc))
   1914 			discard = true;
   1915 		else {
   1916 			/* Pre-alloc new mbuf. */
   1917 
   1918 			if ((rbuf->fmp == NULL) &&
   1919 			    eop && (len <= adapter->rx_copy_len)) {
   1920 				/* For short packet. See below. */
   1921 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1922 				if (__predict_false(sendmp == NULL)) {
   1923 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1924 					discard = true;
   1925 				}
   1926 			} else {
   1927 				/* For long packet. */
   1928 				newmp = ixgbe_getcl();
   1929 				if (__predict_false(newmp == NULL)) {
   1930 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
   1931 					discard = true;
   1932 				}
   1933 			}
   1934 		}
   1935 
   1936 		if (__predict_false(discard)) {
   1937 			/*
   1938 			 * Descriptor initialization is already done by the
   1939 			 * above code (cur->wb.upper.status_error = 0).
   1940 			 * So, we can reuse current rbuf->buf for new packet.
   1941 			 *
   1942 			 * Rewrite the buffer addr, see comment in
   1943 			 * ixgbe_rx_discard().
   1944 			 */
   1945 			cur->read.pkt_addr = rbuf->addr;
   1946 			m_freem(rbuf->fmp);
   1947 			rbuf->fmp = NULL;
   1948 			if (!eop) {
   1949 				/* Discard the entire packet. */
   1950 				discard_multidesc = true;
   1951 			} else
   1952 				discard_multidesc = false;
   1953 			goto next_desc;
   1954 		}
   1955 		discard_multidesc = false;
   1956 
   1957 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1958 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1959 
   1960 		/*
   1961 		 * On 82599 which supports a hardware
   1962 		 * LRO (called HW RSC), packets need
   1963 		 * not be fragmented across sequential
   1964 		 * descriptors, rather the next descriptor
   1965 		 * is indicated in bits of the descriptor.
   1966 		 * This also means that we might proceses
   1967 		 * more than one packet at a time, something
   1968 		 * that has never been true before, it
   1969 		 * required eliminating global chain pointers
   1970 		 * in favor of what we are doing here.  -jfv
   1971 		 */
   1972 		if (!eop) {
   1973 			/*
   1974 			 * Figure out the next descriptor
   1975 			 * of this frame.
   1976 			 */
   1977 			if (rxr->hw_rsc == TRUE) {
   1978 				rsc = ixgbe_rsc_count(cur);
   1979 				rxr->rsc_num += (rsc - 1);
   1980 			}
   1981 			if (rsc) { /* Get hardware index */
   1982 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1983 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1984 			} else { /* Just sequential */
   1985 				nextp = i + 1;
   1986 				if (nextp == adapter->num_rx_desc)
   1987 					nextp = 0;
   1988 			}
   1989 			nbuf = &rxr->rx_buffers[nextp];
   1990 			prefetch(nbuf);
   1991 		}
   1992 		/*
   1993 		 * Rather than using the fmp/lmp global pointers
   1994 		 * we now keep the head of a packet chain in the
   1995 		 * buffer struct and pass this along from one
   1996 		 * descriptor to the next, until we get EOP.
   1997 		 */
   1998 		/*
   1999 		 * See if there is a stored head
   2000 		 * that determines what we are
   2001 		 */
   2002 		if (rbuf->fmp != NULL) {
   2003 			/* Secondary frag */
   2004 			sendmp = rbuf->fmp;
   2005 
   2006 			/* Update new (used in future) mbuf */
   2007 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   2008 			IXGBE_M_ADJ(adapter, rxr, newmp);
   2009 			rbuf->buf = newmp;
   2010 			rbuf->fmp = NULL;
   2011 
   2012 			/* For secondary frag */
   2013 			mp->m_len = len;
   2014 			mp->m_flags &= ~M_PKTHDR;
   2015 
   2016 			/* For sendmp */
   2017 			sendmp->m_pkthdr.len += mp->m_len;
   2018 		} else {
   2019 			/*
   2020 			 * It's the first segment of a multi descriptor
   2021 			 * packet or a single segment which contains a full
   2022 			 * packet.
   2023 			 */
   2024 
   2025 			if (eop && (len <= adapter->rx_copy_len)) {
   2026 				/*
   2027 				 * Optimize.  This might be a small packet, may
   2028 				 * be just a TCP ACK. Copy into a new mbuf, and
   2029 				 * Leave the old mbuf+cluster for re-use.
   2030 				 */
   2031 				sendmp->m_data += ETHER_ALIGN;
   2032 				memcpy(mtod(sendmp, void *),
   2033 				    mtod(mp, void *), len);
   2034 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
   2035 				rbuf->flags |= IXGBE_RX_COPY;
   2036 			} else {
   2037 				/* For long packet */
   2038 
   2039 				/* Update new (used in future) mbuf */
   2040 				newmp->m_pkthdr.len = newmp->m_len
   2041 				    = rxr->mbuf_sz;
   2042 				IXGBE_M_ADJ(adapter, rxr, newmp);
   2043 				rbuf->buf = newmp;
   2044 				rbuf->fmp = NULL;
   2045 
   2046 				/* For sendmp */
   2047 				sendmp = mp;
   2048 			}
   2049 
   2050 			/* first desc of a non-ps chain */
   2051 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2052 		}
   2053 		++processed;
   2054 
   2055 		/* Pass the head pointer on */
   2056 		if (eop == 0) {
   2057 			nbuf->fmp = sendmp;
   2058 			sendmp = NULL;
   2059 			mp->m_next = nbuf->buf;
   2060 		} else { /* Sending this frame */
   2061 			m_set_rcvif(sendmp, ifp);
   2062 			++rxr->packets;
   2063 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
   2064 			/* capture data for AIM */
   2065 			rxr->bytes += sendmp->m_pkthdr.len;
   2066 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
   2067 			/* Process vlan info */
   2068 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2069 				vtag = le16toh(cur->wb.upper.vlan);
   2070 			if (vtag) {
   2071 				vlan_set_tag(sendmp, vtag);
   2072 			}
   2073 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2074 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2075 				   &adapter->stats.pf);
   2076 			}
   2077 
   2078 #if 0 /* FreeBSD */
   2079 			/*
   2080 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2081 			 * and never cleared. This means we have RSS hash
   2082 			 * available to be used.
   2083 			 */
   2084 			if (adapter->num_queues > 1) {
   2085 				sendmp->m_pkthdr.flowid =
   2086 				    le32toh(cur->wb.lower.hi_dword.rss);
   2087 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2088 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2089 					M_HASHTYPE_SET(sendmp,
   2090 					    M_HASHTYPE_RSS_IPV4);
   2091 					break;
   2092 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2093 					M_HASHTYPE_SET(sendmp,
   2094 					    M_HASHTYPE_RSS_TCP_IPV4);
   2095 					break;
   2096 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2097 					M_HASHTYPE_SET(sendmp,
   2098 					    M_HASHTYPE_RSS_IPV6);
   2099 					break;
   2100 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2101 					M_HASHTYPE_SET(sendmp,
   2102 					    M_HASHTYPE_RSS_TCP_IPV6);
   2103 					break;
   2104 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2105 					M_HASHTYPE_SET(sendmp,
   2106 					    M_HASHTYPE_RSS_IPV6_EX);
   2107 					break;
   2108 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2109 					M_HASHTYPE_SET(sendmp,
   2110 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2111 					break;
   2112 #if __FreeBSD_version > 1100000
   2113 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2114 					M_HASHTYPE_SET(sendmp,
   2115 					    M_HASHTYPE_RSS_UDP_IPV4);
   2116 					break;
   2117 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2118 					M_HASHTYPE_SET(sendmp,
   2119 					    M_HASHTYPE_RSS_UDP_IPV6);
   2120 					break;
   2121 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2122 					M_HASHTYPE_SET(sendmp,
   2123 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2124 					break;
   2125 #endif
   2126 				default:
   2127 					M_HASHTYPE_SET(sendmp,
   2128 					    M_HASHTYPE_OPAQUE_HASH);
   2129 				}
   2130 			} else {
   2131 				sendmp->m_pkthdr.flowid = que->msix;
   2132 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2133 			}
   2134 #endif
   2135 		}
   2136 next_desc:
   2137 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2138 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2139 
   2140 		/* Advance our pointers to the next descriptor. */
   2141 		if (++i == rxr->num_desc) {
   2142 			wraparound = true;
   2143 			i = 0;
   2144 		}
   2145 		rxr->next_to_check = i;
   2146 
   2147 		/* Now send to the stack or do LRO */
   2148 		if (sendmp != NULL)
   2149 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2150 
   2151 		/* Every 8 descriptors we go to refresh mbufs */
   2152 		if (processed == 8) {
   2153 			ixgbe_refresh_mbufs(rxr, i);
   2154 			processed = 0;
   2155 		}
   2156 	}
   2157 
   2158 	/* Save the current status */
   2159 	rxr->discard_multidesc = discard_multidesc;
   2160 
   2161 	/* Refresh any remaining buf structs */
   2162 	if (ixgbe_rx_unrefreshed(rxr))
   2163 		ixgbe_refresh_mbufs(rxr, i);
   2164 
   2165 	IXGBE_RX_UNLOCK(rxr);
   2166 
   2167 #ifdef LRO
   2168 	/*
   2169 	 * Flush any outstanding LRO work
   2170 	 */
   2171 	tcp_lro_flush_all(lro);
   2172 #endif /* LRO */
   2173 
   2174 	/*
   2175 	 * Still have cleaning to do?
   2176 	 */
   2177 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2178 		return (TRUE);
   2179 
   2180 	return (FALSE);
   2181 } /* ixgbe_rxeof */
   2182 
   2183 
   2184 /************************************************************************
   2185  * ixgbe_rx_checksum
   2186  *
   2187  *   Verify that the hardware indicated that the checksum is valid.
   2188  *   Inform the stack about the status of checksum so that stack
   2189  *   doesn't spend time verifying the checksum.
   2190  ************************************************************************/
   2191 static void
   2192 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2193     struct ixgbe_hw_stats *stats)
   2194 {
   2195 	u16  status = (u16)staterr;
   2196 	u8   errors = (u8)(staterr >> 24);
   2197 #if 0
   2198 	bool sctp = false;
   2199 
   2200 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2201 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2202 		sctp = true;
   2203 #endif
   2204 
   2205 	/* IPv4 checksum */
   2206 	if (status & IXGBE_RXD_STAT_IPCS) {
   2207 		IXGBE_EVC_ADD(&stats->ipcs, 1);
   2208 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2209 			/* IP Checksum Good */
   2210 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2211 		} else {
   2212 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
   2213 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2214 		}
   2215 	}
   2216 	/* TCP/UDP/SCTP checksum */
   2217 	if (status & IXGBE_RXD_STAT_L4CS) {
   2218 		IXGBE_EVC_ADD(&stats->l4cs, 1);
   2219 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2220 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2221 			mp->m_pkthdr.csum_flags |= type;
   2222 		} else {
   2223 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
   2224 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2225 		}
   2226 	}
   2227 } /* ixgbe_rx_checksum */
   2228 
   2229 /************************************************************************
   2230  * ixgbe_dma_malloc
   2231  ************************************************************************/
   2232 int
   2233 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2234 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2235 {
   2236 	device_t dev = adapter->dev;
   2237 	int      r, rsegs;
   2238 
   2239 	r = ixgbe_dma_tag_create(
   2240 	     /*      parent */ adapter->osdep.dmat,
   2241 	     /*   alignment */ DBA_ALIGN,
   2242 	     /*      bounds */ 0,
   2243 	     /*     maxsize */ size,
   2244 	     /*   nsegments */ 1,
   2245 	     /*  maxsegsize */ size,
   2246 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2247 			       &dma->dma_tag);
   2248 	if (r != 0) {
   2249 		aprint_error_dev(dev,
   2250 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2251 		    r);
   2252 		goto fail_0;
   2253 	}
   2254 
   2255 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2256 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2257 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2258 	if (r != 0) {
   2259 		aprint_error_dev(dev,
   2260 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2261 		goto fail_1;
   2262 	}
   2263 
   2264 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2265 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2266 	if (r != 0) {
   2267 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2268 		    __func__, r);
   2269 		goto fail_2;
   2270 	}
   2271 
   2272 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2273 	if (r != 0) {
   2274 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2275 		    __func__, r);
   2276 		goto fail_3;
   2277 	}
   2278 
   2279 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2280 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2281 	if (r != 0) {
   2282 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2283 		    __func__, r);
   2284 		goto fail_4;
   2285 	}
   2286 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2287 	dma->dma_size = size;
   2288 	return 0;
   2289 fail_4:
   2290 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2291 fail_3:
   2292 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2293 fail_2:
   2294 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2295 fail_1:
   2296 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2297 fail_0:
   2298 
   2299 	return (r);
   2300 } /* ixgbe_dma_malloc */
   2301 
   2302 /************************************************************************
   2303  * ixgbe_dma_free
   2304  ************************************************************************/
   2305 void
   2306 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2307 {
   2308 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2309 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2310 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2311 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2312 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2313 } /* ixgbe_dma_free */
   2314 
   2315 
   2316 /************************************************************************
   2317  * ixgbe_allocate_queues
   2318  *
   2319  *   Allocate memory for the transmit and receive rings, and then
   2320  *   the descriptors associated with each, called only once at attach.
   2321  ************************************************************************/
   2322 int
   2323 ixgbe_allocate_queues(struct adapter *adapter)
   2324 {
   2325 	device_t	dev = adapter->dev;
   2326 	struct ix_queue	*que;
   2327 	struct tx_ring	*txr;
   2328 	struct rx_ring	*rxr;
   2329 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2330 	int             txconf = 0, rxconf = 0;
   2331 
   2332 	/* First, allocate the top level queue structs */
   2333 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2334 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2335 
   2336 	/* Second, allocate the TX ring struct memory */
   2337 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
   2338 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2339 
   2340 	/* Third, allocate the RX ring */
   2341 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2342 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2343 
   2344 	/* For the ring itself */
   2345 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2346 	    DBA_ALIGN);
   2347 
   2348 	/*
   2349 	 * Now set up the TX queues, txconf is needed to handle the
   2350 	 * possibility that things fail midcourse and we need to
   2351 	 * undo memory gracefully
   2352 	 */
   2353 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2354 		/* Set up some basics */
   2355 		txr = &adapter->tx_rings[i];
   2356 		txr->adapter = adapter;
   2357 		txr->txr_interq = NULL;
   2358 		/* In case SR-IOV is enabled, align the index properly */
   2359 #ifdef PCI_IOV
   2360 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2361 		    i);
   2362 #else
   2363 		txr->me = i;
   2364 #endif
   2365 		txr->num_desc = adapter->num_tx_desc;
   2366 
   2367 		/* Initialize the TX side lock */
   2368 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2369 
   2370 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2371 		    BUS_DMA_NOWAIT)) {
   2372 			aprint_error_dev(dev,
   2373 			    "Unable to allocate TX Descriptor memory\n");
   2374 			error = ENOMEM;
   2375 			goto err_tx_desc;
   2376 		}
   2377 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2378 		bzero((void *)txr->tx_base, tsize);
   2379 
   2380 		/* Now allocate transmit buffers for the ring */
   2381 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2382 			aprint_error_dev(dev,
   2383 			    "Critical Failure setting up transmit buffers\n");
   2384 			error = ENOMEM;
   2385 			goto err_tx_desc;
   2386 		}
   2387 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2388 			/* Allocate a buf ring */
   2389 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2390 			if (txr->txr_interq == NULL) {
   2391 				aprint_error_dev(dev,
   2392 				    "Critical Failure setting up buf ring\n");
   2393 				error = ENOMEM;
   2394 				goto err_tx_desc;
   2395 			}
   2396 		}
   2397 	}
   2398 
   2399 	/*
   2400 	 * Next the RX queues...
   2401 	 */
   2402 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2403 	    DBA_ALIGN);
   2404 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2405 		rxr = &adapter->rx_rings[i];
   2406 		/* Set up some basics */
   2407 		rxr->adapter = adapter;
   2408 #ifdef PCI_IOV
   2409 		/* In case SR-IOV is enabled, align the index properly */
   2410 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2411 		    i);
   2412 #else
   2413 		rxr->me = i;
   2414 #endif
   2415 		rxr->num_desc = adapter->num_rx_desc;
   2416 
   2417 		/* Initialize the RX side lock */
   2418 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2419 
   2420 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2421 		    BUS_DMA_NOWAIT)) {
   2422 			aprint_error_dev(dev,
   2423 			    "Unable to allocate RxDescriptor memory\n");
   2424 			error = ENOMEM;
   2425 			goto err_rx_desc;
   2426 		}
   2427 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2428 		bzero((void *)rxr->rx_base, rsize);
   2429 
   2430 		/* Allocate receive buffers for the ring */
   2431 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2432 			aprint_error_dev(dev,
   2433 			    "Critical Failure setting up receive buffers\n");
   2434 			error = ENOMEM;
   2435 			goto err_rx_desc;
   2436 		}
   2437 	}
   2438 
   2439 	/*
   2440 	 * Finally set up the queue holding structs
   2441 	 */
   2442 	for (int i = 0; i < adapter->num_queues; i++) {
   2443 		que = &adapter->queues[i];
   2444 		que->adapter = adapter;
   2445 		que->me = i;
   2446 		que->txr = &adapter->tx_rings[i];
   2447 		que->rxr = &adapter->rx_rings[i];
   2448 
   2449 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2450 		que->disabled_count = 0;
   2451 	}
   2452 
   2453 	return (0);
   2454 
   2455 err_rx_desc:
   2456 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2457 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2458 err_tx_desc:
   2459 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2460 		ixgbe_dma_free(adapter, &txr->txdma);
   2461 	free(adapter->rx_rings, M_DEVBUF);
   2462 	free(adapter->tx_rings, M_DEVBUF);
   2463 	free(adapter->queues, M_DEVBUF);
   2464 	return (error);
   2465 } /* ixgbe_allocate_queues */
   2466 
   2467 /************************************************************************
   2468  * ixgbe_free_queues
   2469  *
   2470  *   Free descriptors for the transmit and receive rings, and then
   2471  *   the memory associated with each.
   2472  ************************************************************************/
   2473 void
   2474 ixgbe_free_queues(struct adapter *adapter)
   2475 {
   2476 	struct ix_queue *que;
   2477 	int i;
   2478 
   2479 	ixgbe_free_transmit_structures(adapter);
   2480 	ixgbe_free_receive_structures(adapter);
   2481 	for (i = 0; i < adapter->num_queues; i++) {
   2482 		que = &adapter->queues[i];
   2483 		mutex_destroy(&que->dc_mtx);
   2484 	}
   2485 	free(adapter->queues, M_DEVBUF);
   2486 } /* ixgbe_free_queues */
   2487