Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.87
      1 /* $NetBSD: ix_txrx.c,v 1.87 2021/08/25 09:06:02 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.87 2021/08/25 09:06:02 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 /*
     89  * For Flow Director: this is the
     90  * number of TX packets we sample
     91  * for the filter pool, this means
     92  * every 20th packet will be probed.
     93  *
     94  * This feature can be disabled by
     95  * setting this to 0.
     96  */
     97 static int atr_sample_rate = 20;
     98 
     99 #define IXGBE_M_ADJ(adapter, rxr, mp)					\
    100 	if (adapter->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
    101 		m_adj(mp, ETHER_ALIGN)
    102 
    103 /************************************************************************
    104  *  Local Function prototypes
    105  ************************************************************************/
    106 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    107 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    108 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    109 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    110 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    111                                        struct ixgbe_hw_stats *);
    112 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    113 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    114 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    115 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    116                                         struct mbuf *, u32 *, u32 *);
    117 static int           ixgbe_tso_setup(struct tx_ring *,
    118                                      struct mbuf *, u32 *, u32 *);
    119 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    120 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    121                                     struct mbuf *, u32);
    122 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    123                                       struct ixgbe_dma_alloc *, int);
    124 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    125 
    126 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    127 
    128 /************************************************************************
    129  * ixgbe_legacy_start_locked - Transmit entry point
    130  *
    131  *   Called by the stack to initiate a transmit.
    132  *   The driver will remain in this routine as long as there are
    133  *   packets to transmit and transmit resources are available.
    134  *   In case resources are not available, the stack is notified
    135  *   and the packet is requeued.
    136  ************************************************************************/
    137 int
    138 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    139 {
    140 	int rc;
    141 	struct mbuf    *m_head;
    142 	struct adapter *adapter = txr->adapter;
    143 
    144 	IXGBE_TX_LOCK_ASSERT(txr);
    145 
    146 	if (adapter->link_active != LINK_STATE_UP) {
    147 		/*
    148 		 * discard all packets buffered in IFQ to avoid
    149 		 * sending old packets at next link up timing.
    150 		 */
    151 		ixgbe_drain(ifp, txr);
    152 		return (ENETDOWN);
    153 	}
    154 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    155 		return (ENETDOWN);
    156 	if (txr->txr_no_space)
    157 		return (ENETDOWN);
    158 
    159 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    160 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    161 			break;
    162 
    163 		IFQ_POLL(&ifp->if_snd, m_head);
    164 		if (m_head == NULL)
    165 			break;
    166 
    167 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    168 			break;
    169 		}
    170 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    171 		if (rc != 0) {
    172 			m_freem(m_head);
    173 			continue;
    174 		}
    175 
    176 		/* Send a copy of the frame to the BPF listener */
    177 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    178 	}
    179 
    180 	return IXGBE_SUCCESS;
    181 } /* ixgbe_legacy_start_locked */
    182 
    183 /************************************************************************
    184  * ixgbe_legacy_start
    185  *
    186  *   Called by the stack, this always uses the first tx ring,
    187  *   and should not be used with multiqueue tx enabled.
    188  ************************************************************************/
    189 void
    190 ixgbe_legacy_start(struct ifnet *ifp)
    191 {
    192 	struct adapter *adapter = ifp->if_softc;
    193 	struct tx_ring *txr = adapter->tx_rings;
    194 
    195 	if (ifp->if_flags & IFF_RUNNING) {
    196 		IXGBE_TX_LOCK(txr);
    197 		ixgbe_legacy_start_locked(ifp, txr);
    198 		IXGBE_TX_UNLOCK(txr);
    199 	}
    200 } /* ixgbe_legacy_start */
    201 
    202 /************************************************************************
    203  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    204  *
    205  *   (if_transmit function)
    206  ************************************************************************/
    207 int
    208 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    209 {
    210 	struct adapter	*adapter = ifp->if_softc;
    211 	struct tx_ring	*txr;
    212 	int		i;
    213 #ifdef RSS
    214 	uint32_t bucket_id;
    215 #endif
    216 
    217 	/*
    218 	 * When doing RSS, map it to the same outbound queue
    219 	 * as the incoming flow would be mapped to.
    220 	 *
    221 	 * If everything is setup correctly, it should be the
    222 	 * same bucket that the current CPU we're on is.
    223 	 */
    224 #ifdef RSS
    225 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    226 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    227 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    228 		    &bucket_id) == 0)) {
    229 			i = bucket_id % adapter->num_queues;
    230 #ifdef IXGBE_DEBUG
    231 			if (bucket_id > adapter->num_queues)
    232 				if_printf(ifp,
    233 				    "bucket_id (%d) > num_queues (%d)\n",
    234 				    bucket_id, adapter->num_queues);
    235 #endif
    236 		} else
    237 			i = m->m_pkthdr.flowid % adapter->num_queues;
    238 	} else
    239 #endif /* 0 */
    240 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    241 
    242 	/* Check for a hung queue and pick alternative */
    243 	if (((1ULL << i) & adapter->active_queues) == 0)
    244 		i = ffs64(adapter->active_queues);
    245 
    246 	txr = &adapter->tx_rings[i];
    247 
    248 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    249 		m_freem(m);
    250 		txr->pcq_drops.ev_count++;
    251 		return ENOBUFS;
    252 	}
    253 	if (IXGBE_TX_TRYLOCK(txr)) {
    254 		ixgbe_mq_start_locked(ifp, txr);
    255 		IXGBE_TX_UNLOCK(txr);
    256 	} else {
    257 		if (adapter->txrx_use_workqueue) {
    258 			u_int *enqueued;
    259 
    260 			/*
    261 			 * This function itself is not called in interrupt
    262 			 * context, however it can be called in fast softint
    263 			 * context right after receiving forwarding packets.
    264 			 * So, it is required to protect workqueue from twice
    265 			 * enqueuing when the machine uses both spontaneous
    266 			 * packets and forwarding packets.
    267 			 */
    268 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    269 			if (*enqueued == 0) {
    270 				*enqueued = 1;
    271 				percpu_putref(adapter->txr_wq_enqueued);
    272 				workqueue_enqueue(adapter->txr_wq,
    273 				    &txr->wq_cookie, curcpu());
    274 			} else
    275 				percpu_putref(adapter->txr_wq_enqueued);
    276 		} else {
    277 			kpreempt_disable();
    278 			softint_schedule(txr->txr_si);
    279 			kpreempt_enable();
    280 		}
    281 	}
    282 
    283 	return (0);
    284 } /* ixgbe_mq_start */
    285 
    286 /************************************************************************
    287  * ixgbe_mq_start_locked
    288  ************************************************************************/
    289 int
    290 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    291 {
    292 	struct mbuf    *next;
    293 	int            enqueued = 0, err = 0;
    294 
    295 	if (txr->adapter->link_active != LINK_STATE_UP) {
    296 		/*
    297 		 * discard all packets buffered in txr_interq to avoid
    298 		 * sending old packets at next link up timing.
    299 		 */
    300 		ixgbe_drain(ifp, txr);
    301 		return (ENETDOWN);
    302 	}
    303 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    304 		return (ENETDOWN);
    305 	if (txr->txr_no_space)
    306 		return (ENETDOWN);
    307 
    308 	/* Process the queue */
    309 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    310 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    311 			m_freem(next);
    312 			/* All errors are counted in ixgbe_xmit() */
    313 			break;
    314 		}
    315 		enqueued++;
    316 #if __FreeBSD_version >= 1100036
    317 		/*
    318 		 * Since we're looking at the tx ring, we can check
    319 		 * to see if we're a VF by examing our tail register
    320 		 * address.
    321 		 */
    322 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    323 		    (next->m_flags & M_MCAST))
    324 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    325 #endif
    326 		/* Send a copy of the frame to the BPF listener */
    327 		bpf_mtap(ifp, next, BPF_D_OUT);
    328 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    329 			break;
    330 	}
    331 
    332 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    333 		ixgbe_txeof(txr);
    334 
    335 	return (err);
    336 } /* ixgbe_mq_start_locked */
    337 
    338 /************************************************************************
    339  * ixgbe_deferred_mq_start
    340  *
    341  *   Called from a softint and workqueue (indirectly) to drain queued
    342  *   transmit packets.
    343  ************************************************************************/
    344 void
    345 ixgbe_deferred_mq_start(void *arg)
    346 {
    347 	struct tx_ring *txr = arg;
    348 	struct adapter *adapter = txr->adapter;
    349 	struct ifnet   *ifp = adapter->ifp;
    350 
    351 	IXGBE_TX_LOCK(txr);
    352 	if (pcq_peek(txr->txr_interq) != NULL)
    353 		ixgbe_mq_start_locked(ifp, txr);
    354 	IXGBE_TX_UNLOCK(txr);
    355 } /* ixgbe_deferred_mq_start */
    356 
    357 /************************************************************************
    358  * ixgbe_deferred_mq_start_work
    359  *
    360  *   Called from a workqueue to drain queued transmit packets.
    361  ************************************************************************/
    362 void
    363 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    364 {
    365 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    366 	struct adapter *adapter = txr->adapter;
    367 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    368 	*enqueued = 0;
    369 	percpu_putref(adapter->txr_wq_enqueued);
    370 
    371 	ixgbe_deferred_mq_start(txr);
    372 } /* ixgbe_deferred_mq_start */
    373 
    374 /************************************************************************
    375  * ixgbe_drain_all
    376  ************************************************************************/
    377 void
    378 ixgbe_drain_all(struct adapter *adapter)
    379 {
    380 	struct ifnet *ifp = adapter->ifp;
    381 	struct ix_queue *que = adapter->queues;
    382 
    383 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    384 		struct tx_ring  *txr = que->txr;
    385 
    386 		IXGBE_TX_LOCK(txr);
    387 		ixgbe_drain(ifp, txr);
    388 		IXGBE_TX_UNLOCK(txr);
    389 	}
    390 }
    391 
    392 /************************************************************************
    393  * ixgbe_xmit
    394  *
    395  *   Maps the mbufs to tx descriptors, allowing the
    396  *   TX engine to transmit the packets.
    397  *
    398  *   Return 0 on success, positive on failure
    399  ************************************************************************/
    400 static int
    401 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    402 {
    403 	struct adapter          *adapter = txr->adapter;
    404 	struct ixgbe_tx_buf     *txbuf;
    405 	union ixgbe_adv_tx_desc *txd = NULL;
    406 	struct ifnet	        *ifp = adapter->ifp;
    407 	int                     i, j, error;
    408 	int                     first;
    409 	u32                     olinfo_status = 0, cmd_type_len;
    410 	bool                    remap = TRUE;
    411 	bus_dmamap_t            map;
    412 
    413 	/* Basic descriptor defines */
    414 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    415 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    416 
    417 	if (vlan_has_tag(m_head))
    418 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    419 
    420 	/*
    421 	 * Important to capture the first descriptor
    422 	 * used because it will contain the index of
    423 	 * the one we tell the hardware to report back
    424 	 */
    425 	first = txr->next_avail_desc;
    426 	txbuf = &txr->tx_buffers[first];
    427 	map = txbuf->map;
    428 
    429 	/*
    430 	 * Map the packet for DMA.
    431 	 */
    432 retry:
    433 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    434 	    BUS_DMA_NOWAIT);
    435 
    436 	if (__predict_false(error)) {
    437 		struct mbuf *m;
    438 
    439 		switch (error) {
    440 		case EAGAIN:
    441 			txr->q_eagain_tx_dma_setup++;
    442 			return EAGAIN;
    443 		case ENOMEM:
    444 			txr->q_enomem_tx_dma_setup++;
    445 			return EAGAIN;
    446 		case EFBIG:
    447 			/* Try it again? - one try */
    448 			if (remap == TRUE) {
    449 				remap = FALSE;
    450 				/*
    451 				 * XXX: m_defrag will choke on
    452 				 * non-MCLBYTES-sized clusters
    453 				 */
    454 				txr->q_efbig_tx_dma_setup++;
    455 				m = m_defrag(m_head, M_NOWAIT);
    456 				if (m == NULL) {
    457 					txr->q_mbuf_defrag_failed++;
    458 					return ENOBUFS;
    459 				}
    460 				m_head = m;
    461 				goto retry;
    462 			} else {
    463 				txr->q_efbig2_tx_dma_setup++;
    464 				return error;
    465 			}
    466 		case EINVAL:
    467 			txr->q_einval_tx_dma_setup++;
    468 			return error;
    469 		default:
    470 			txr->q_other_tx_dma_setup++;
    471 			return error;
    472 		}
    473 	}
    474 
    475 	/* Make certain there are enough descriptors */
    476 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    477 		txr->txr_no_space = true;
    478 		txr->no_desc_avail.ev_count++;
    479 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    480 		return EAGAIN;
    481 	}
    482 
    483 	/*
    484 	 * Set up the appropriate offload context
    485 	 * this will consume the first descriptor
    486 	 */
    487 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    488 	if (__predict_false(error)) {
    489 		return (error);
    490 	}
    491 
    492 #ifdef IXGBE_FDIR
    493 	/* Do the flow director magic */
    494 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    495 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    496 		++txr->atr_count;
    497 		if (txr->atr_count >= atr_sample_rate) {
    498 			ixgbe_atr(txr, m_head);
    499 			txr->atr_count = 0;
    500 		}
    501 	}
    502 #endif
    503 
    504 	olinfo_status |= IXGBE_ADVTXD_CC;
    505 	i = txr->next_avail_desc;
    506 	for (j = 0; j < map->dm_nsegs; j++) {
    507 		bus_size_t seglen;
    508 		uint64_t segaddr;
    509 
    510 		txbuf = &txr->tx_buffers[i];
    511 		txd = &txr->tx_base[i];
    512 		seglen = map->dm_segs[j].ds_len;
    513 		segaddr = htole64(map->dm_segs[j].ds_addr);
    514 
    515 		txd->read.buffer_addr = segaddr;
    516 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    517 		txd->read.olinfo_status = htole32(olinfo_status);
    518 
    519 		if (++i == txr->num_desc)
    520 			i = 0;
    521 	}
    522 
    523 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    524 	txr->tx_avail -= map->dm_nsegs;
    525 	txr->next_avail_desc = i;
    526 
    527 	txbuf->m_head = m_head;
    528 	/*
    529 	 * Here we swap the map so the last descriptor,
    530 	 * which gets the completion interrupt has the
    531 	 * real map, and the first descriptor gets the
    532 	 * unused map from this descriptor.
    533 	 */
    534 	txr->tx_buffers[first].map = txbuf->map;
    535 	txbuf->map = map;
    536 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    537 	    BUS_DMASYNC_PREWRITE);
    538 
    539 	/* Set the EOP descriptor that will be marked done */
    540 	txbuf = &txr->tx_buffers[first];
    541 	txbuf->eop = txd;
    542 
    543 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    544 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    545 	/*
    546 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    547 	 * hardware that this frame is available to transmit.
    548 	 */
    549 	++txr->total_packets.ev_count;
    550 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    551 
    552 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    553 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    554 	if (m_head->m_flags & M_MCAST)
    555 		if_statinc_ref(nsr, if_omcasts);
    556 	IF_STAT_PUTREF(ifp);
    557 
    558 	/* Mark queue as having work */
    559 	if (txr->busy == 0)
    560 		txr->busy = 1;
    561 
    562 	return (0);
    563 } /* ixgbe_xmit */
    564 
    565 /************************************************************************
    566  * ixgbe_drain
    567  ************************************************************************/
    568 static void
    569 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    570 {
    571 	struct mbuf *m;
    572 
    573 	IXGBE_TX_LOCK_ASSERT(txr);
    574 
    575 	if (txr->me == 0) {
    576 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    577 			IFQ_DEQUEUE(&ifp->if_snd, m);
    578 			m_freem(m);
    579 			IF_DROP(&ifp->if_snd);
    580 		}
    581 	}
    582 
    583 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    584 		m_freem(m);
    585 		txr->pcq_drops.ev_count++;
    586 	}
    587 }
    588 
    589 /************************************************************************
    590  * ixgbe_allocate_transmit_buffers
    591  *
    592  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    593  *   the information needed to transmit a packet on the wire. This is
    594  *   called only once at attach, setup is done every reset.
    595  ************************************************************************/
    596 static int
    597 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    598 {
    599 	struct adapter      *adapter = txr->adapter;
    600 	device_t            dev = adapter->dev;
    601 	struct ixgbe_tx_buf *txbuf;
    602 	int                 error, i;
    603 
    604 	/*
    605 	 * Setup DMA descriptor areas.
    606 	 */
    607 	error = ixgbe_dma_tag_create(
    608 	         /*      parent */ adapter->osdep.dmat,
    609 	         /*   alignment */ 1,
    610 	         /*      bounds */ 0,
    611 	         /*     maxsize */ IXGBE_TSO_SIZE,
    612 	         /*   nsegments */ adapter->num_segs,
    613 	         /*  maxsegsize */ PAGE_SIZE,
    614 	         /*       flags */ 0,
    615 	                           &txr->txtag);
    616 	if (error != 0) {
    617 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    618 		goto fail;
    619 	}
    620 
    621 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    622 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    623 
    624 	/* Create the descriptor buffer dma maps */
    625 	txbuf = txr->tx_buffers;
    626 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    627 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    628 		if (error != 0) {
    629 			aprint_error_dev(dev,
    630 			    "Unable to create TX DMA map (%d)\n", error);
    631 			goto fail;
    632 		}
    633 	}
    634 
    635 	return 0;
    636 fail:
    637 	/* We free all, it handles case where we are in the middle */
    638 #if 0 /* XXX was FreeBSD */
    639 	ixgbe_free_transmit_structures(adapter);
    640 #else
    641 	ixgbe_free_transmit_buffers(txr);
    642 #endif
    643 	return (error);
    644 } /* ixgbe_allocate_transmit_buffers */
    645 
    646 /************************************************************************
    647  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    648  ************************************************************************/
    649 static void
    650 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    651 {
    652 	struct adapter        *adapter = txr->adapter;
    653 	struct ixgbe_tx_buf   *txbuf;
    654 #ifdef DEV_NETMAP
    655 	struct netmap_adapter *na = NA(adapter->ifp);
    656 	struct netmap_slot    *slot;
    657 #endif /* DEV_NETMAP */
    658 
    659 	/* Clear the old ring contents */
    660 	IXGBE_TX_LOCK(txr);
    661 
    662 #ifdef DEV_NETMAP
    663 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    664 		/*
    665 		 * (under lock): if in netmap mode, do some consistency
    666 		 * checks and set slot to entry 0 of the netmap ring.
    667 		 */
    668 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    669 	}
    670 #endif /* DEV_NETMAP */
    671 
    672 	bzero((void *)txr->tx_base,
    673 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    674 	/* Reset indices */
    675 	txr->next_avail_desc = 0;
    676 	txr->next_to_clean = 0;
    677 
    678 	/* Free any existing tx buffers. */
    679 	txbuf = txr->tx_buffers;
    680 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    681 		if (txbuf->m_head != NULL) {
    682 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    683 			    0, txbuf->m_head->m_pkthdr.len,
    684 			    BUS_DMASYNC_POSTWRITE);
    685 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    686 			m_freem(txbuf->m_head);
    687 			txbuf->m_head = NULL;
    688 		}
    689 
    690 #ifdef DEV_NETMAP
    691 		/*
    692 		 * In netmap mode, set the map for the packet buffer.
    693 		 * NOTE: Some drivers (not this one) also need to set
    694 		 * the physical buffer address in the NIC ring.
    695 		 * Slots in the netmap ring (indexed by "si") are
    696 		 * kring->nkr_hwofs positions "ahead" wrt the
    697 		 * corresponding slot in the NIC ring. In some drivers
    698 		 * (not here) nkr_hwofs can be negative. Function
    699 		 * netmap_idx_n2k() handles wraparounds properly.
    700 		 */
    701 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    702 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    703 			netmap_load_map(na, txr->txtag,
    704 			    txbuf->map, NMB(na, slot + si));
    705 		}
    706 #endif /* DEV_NETMAP */
    707 
    708 		/* Clear the EOP descriptor pointer */
    709 		txbuf->eop = NULL;
    710 	}
    711 
    712 	/* Set the rate at which we sample packets */
    713 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    714 		txr->atr_sample = atr_sample_rate;
    715 
    716 	/* Set number of descriptors available */
    717 	txr->tx_avail = adapter->num_tx_desc;
    718 
    719 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    720 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    721 	IXGBE_TX_UNLOCK(txr);
    722 } /* ixgbe_setup_transmit_ring */
    723 
    724 /************************************************************************
    725  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    726  ************************************************************************/
    727 int
    728 ixgbe_setup_transmit_structures(struct adapter *adapter)
    729 {
    730 	struct tx_ring *txr = adapter->tx_rings;
    731 
    732 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    733 		ixgbe_setup_transmit_ring(txr);
    734 
    735 	return (0);
    736 } /* ixgbe_setup_transmit_structures */
    737 
    738 /************************************************************************
    739  * ixgbe_free_transmit_structures - Free all transmit rings.
    740  ************************************************************************/
    741 void
    742 ixgbe_free_transmit_structures(struct adapter *adapter)
    743 {
    744 	struct tx_ring *txr = adapter->tx_rings;
    745 
    746 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    747 		ixgbe_free_transmit_buffers(txr);
    748 		ixgbe_dma_free(adapter, &txr->txdma);
    749 		IXGBE_TX_LOCK_DESTROY(txr);
    750 	}
    751 	free(adapter->tx_rings, M_DEVBUF);
    752 } /* ixgbe_free_transmit_structures */
    753 
    754 /************************************************************************
    755  * ixgbe_free_transmit_buffers
    756  *
    757  *   Free transmit ring related data structures.
    758  ************************************************************************/
    759 static void
    760 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    761 {
    762 	struct adapter      *adapter = txr->adapter;
    763 	struct ixgbe_tx_buf *tx_buffer;
    764 	int                 i;
    765 
    766 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    767 
    768 	if (txr->tx_buffers == NULL)
    769 		return;
    770 
    771 	tx_buffer = txr->tx_buffers;
    772 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    773 		if (tx_buffer->m_head != NULL) {
    774 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    775 			    0, tx_buffer->m_head->m_pkthdr.len,
    776 			    BUS_DMASYNC_POSTWRITE);
    777 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    778 			m_freem(tx_buffer->m_head);
    779 			tx_buffer->m_head = NULL;
    780 			if (tx_buffer->map != NULL) {
    781 				ixgbe_dmamap_destroy(txr->txtag,
    782 				    tx_buffer->map);
    783 				tx_buffer->map = NULL;
    784 			}
    785 		} else if (tx_buffer->map != NULL) {
    786 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    787 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    788 			tx_buffer->map = NULL;
    789 		}
    790 	}
    791 	if (txr->txr_interq != NULL) {
    792 		struct mbuf *m;
    793 
    794 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    795 			m_freem(m);
    796 		pcq_destroy(txr->txr_interq);
    797 	}
    798 	if (txr->tx_buffers != NULL) {
    799 		free(txr->tx_buffers, M_DEVBUF);
    800 		txr->tx_buffers = NULL;
    801 	}
    802 	if (txr->txtag != NULL) {
    803 		ixgbe_dma_tag_destroy(txr->txtag);
    804 		txr->txtag = NULL;
    805 	}
    806 } /* ixgbe_free_transmit_buffers */
    807 
    808 /************************************************************************
    809  * ixgbe_tx_ctx_setup
    810  *
    811  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    812  ************************************************************************/
    813 static int
    814 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    815     u32 *cmd_type_len, u32 *olinfo_status)
    816 {
    817 	struct adapter                   *adapter = txr->adapter;
    818 	struct ixgbe_adv_tx_context_desc *TXD;
    819 	struct ether_vlan_header         *eh;
    820 #ifdef INET
    821 	struct ip                        *ip;
    822 #endif
    823 #ifdef INET6
    824 	struct ip6_hdr                   *ip6;
    825 #endif
    826 	int                              ehdrlen, ip_hlen = 0;
    827 	int                              offload = TRUE;
    828 	int                              ctxd = txr->next_avail_desc;
    829 	u32                              vlan_macip_lens = 0;
    830 	u32                              type_tucmd_mlhl = 0;
    831 	u16                              vtag = 0;
    832 	u16                              etype;
    833 	u8                               ipproto = 0;
    834 	char                             *l3d;
    835 
    836 
    837 	/* First check if TSO is to be used */
    838 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    839 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    840 
    841 		if (rv != 0)
    842 			++adapter->tso_err.ev_count;
    843 		return rv;
    844 	}
    845 
    846 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    847 		offload = FALSE;
    848 
    849 	/* Indicate the whole packet as payload when not doing TSO */
    850 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    851 
    852 	/* Now ready a context descriptor */
    853 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    854 
    855 	/*
    856 	 * In advanced descriptors the vlan tag must
    857 	 * be placed into the context descriptor. Hence
    858 	 * we need to make one even if not doing offloads.
    859 	 */
    860 	if (vlan_has_tag(mp)) {
    861 		vtag = htole16(vlan_get_tag(mp));
    862 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    863 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    864 	           (offload == FALSE))
    865 		return (0);
    866 
    867 	/*
    868 	 * Determine where frame payload starts.
    869 	 * Jump over vlan headers if already present,
    870 	 * helpful for QinQ too.
    871 	 */
    872 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    873 	eh = mtod(mp, struct ether_vlan_header *);
    874 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    875 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    876 		etype = ntohs(eh->evl_proto);
    877 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    878 	} else {
    879 		etype = ntohs(eh->evl_encap_proto);
    880 		ehdrlen = ETHER_HDR_LEN;
    881 	}
    882 
    883 	/* Set the ether header length */
    884 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    885 
    886 	if (offload == FALSE)
    887 		goto no_offloads;
    888 
    889 	/*
    890 	 * If the first mbuf only includes the ethernet header,
    891 	 * jump to the next one
    892 	 * XXX: This assumes the stack splits mbufs containing headers
    893 	 *      on header boundaries
    894 	 * XXX: And assumes the entire IP header is contained in one mbuf
    895 	 */
    896 	if (mp->m_len == ehdrlen && mp->m_next)
    897 		l3d = mtod(mp->m_next, char *);
    898 	else
    899 		l3d = mtod(mp, char *) + ehdrlen;
    900 
    901 	switch (etype) {
    902 #ifdef INET
    903 	case ETHERTYPE_IP:
    904 		ip = (struct ip *)(l3d);
    905 		ip_hlen = ip->ip_hl << 2;
    906 		ipproto = ip->ip_p;
    907 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    908 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    909 		    ip->ip_sum == 0);
    910 		break;
    911 #endif
    912 #ifdef INET6
    913 	case ETHERTYPE_IPV6:
    914 		ip6 = (struct ip6_hdr *)(l3d);
    915 		ip_hlen = sizeof(struct ip6_hdr);
    916 		ipproto = ip6->ip6_nxt;
    917 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    918 		break;
    919 #endif
    920 	default:
    921 		offload = false;
    922 		break;
    923 	}
    924 
    925 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    926 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    927 
    928 	vlan_macip_lens |= ip_hlen;
    929 
    930 	/* No support for offloads for non-L4 next headers */
    931 	switch (ipproto) {
    932 	case IPPROTO_TCP:
    933 		if (mp->m_pkthdr.csum_flags &
    934 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    935 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    936 		else
    937 			offload = false;
    938 		break;
    939 	case IPPROTO_UDP:
    940 		if (mp->m_pkthdr.csum_flags &
    941 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    942 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    943 		else
    944 			offload = false;
    945 		break;
    946 	default:
    947 		offload = false;
    948 		break;
    949 	}
    950 
    951 	if (offload) /* Insert L4 checksum into data descriptors */
    952 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    953 
    954 no_offloads:
    955 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    956 
    957 	/* Now copy bits into descriptor */
    958 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    959 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    960 	TXD->seqnum_seed = htole32(0);
    961 	TXD->mss_l4len_idx = htole32(0);
    962 
    963 	/* We've consumed the first desc, adjust counters */
    964 	if (++ctxd == txr->num_desc)
    965 		ctxd = 0;
    966 	txr->next_avail_desc = ctxd;
    967 	--txr->tx_avail;
    968 
    969 	return (0);
    970 } /* ixgbe_tx_ctx_setup */
    971 
    972 /************************************************************************
    973  * ixgbe_tso_setup
    974  *
    975  *   Setup work for hardware segmentation offload (TSO) on
    976  *   adapters using advanced tx descriptors
    977  ************************************************************************/
    978 static int
    979 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    980     u32 *olinfo_status)
    981 {
    982 	struct ixgbe_adv_tx_context_desc *TXD;
    983 	struct ether_vlan_header         *eh;
    984 #ifdef INET6
    985 	struct ip6_hdr                   *ip6;
    986 #endif
    987 #ifdef INET
    988 	struct ip                        *ip;
    989 #endif
    990 	struct tcphdr                    *th;
    991 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    992 	u32                              vlan_macip_lens = 0;
    993 	u32                              type_tucmd_mlhl = 0;
    994 	u32                              mss_l4len_idx = 0, paylen;
    995 	u16                              vtag = 0, eh_type;
    996 
    997 	/*
    998 	 * Determine where frame payload starts.
    999 	 * Jump over vlan headers if already present
   1000 	 */
   1001 	eh = mtod(mp, struct ether_vlan_header *);
   1002 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1003 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1004 		eh_type = eh->evl_proto;
   1005 	} else {
   1006 		ehdrlen = ETHER_HDR_LEN;
   1007 		eh_type = eh->evl_encap_proto;
   1008 	}
   1009 
   1010 	switch (ntohs(eh_type)) {
   1011 #ifdef INET
   1012 	case ETHERTYPE_IP:
   1013 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1014 		if (ip->ip_p != IPPROTO_TCP)
   1015 			return (ENXIO);
   1016 		ip->ip_sum = 0;
   1017 		ip_hlen = ip->ip_hl << 2;
   1018 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1019 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1020 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1021 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1022 		/* Tell transmit desc to also do IPv4 checksum. */
   1023 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1024 		break;
   1025 #endif
   1026 #ifdef INET6
   1027 	case ETHERTYPE_IPV6:
   1028 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1029 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1030 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1031 			return (ENXIO);
   1032 		ip_hlen = sizeof(struct ip6_hdr);
   1033 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1034 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1035 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1036 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1037 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1038 		break;
   1039 #endif
   1040 	default:
   1041 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1042 		    __func__, ntohs(eh_type));
   1043 		break;
   1044 	}
   1045 
   1046 	ctxd = txr->next_avail_desc;
   1047 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1048 
   1049 	tcp_hlen = th->th_off << 2;
   1050 
   1051 	/* This is used in the transmit desc in encap */
   1052 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1053 
   1054 	/* VLAN MACLEN IPLEN */
   1055 	if (vlan_has_tag(mp)) {
   1056 		vtag = htole16(vlan_get_tag(mp));
   1057 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1058 	}
   1059 
   1060 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1061 	vlan_macip_lens |= ip_hlen;
   1062 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1063 
   1064 	/* ADV DTYPE TUCMD */
   1065 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1066 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1067 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1068 
   1069 	/* MSS L4LEN IDX */
   1070 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1071 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1072 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1073 
   1074 	TXD->seqnum_seed = htole32(0);
   1075 
   1076 	if (++ctxd == txr->num_desc)
   1077 		ctxd = 0;
   1078 
   1079 	txr->tx_avail--;
   1080 	txr->next_avail_desc = ctxd;
   1081 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1082 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1083 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1084 	++txr->tso_tx.ev_count;
   1085 
   1086 	return (0);
   1087 } /* ixgbe_tso_setup */
   1088 
   1089 
   1090 /************************************************************************
   1091  * ixgbe_txeof
   1092  *
   1093  *   Examine each tx_buffer in the used queue. If the hardware is done
   1094  *   processing the packet then free associated resources. The
   1095  *   tx_buffer is put back on the free queue.
   1096  ************************************************************************/
   1097 bool
   1098 ixgbe_txeof(struct tx_ring *txr)
   1099 {
   1100 	struct adapter		*adapter = txr->adapter;
   1101 	struct ifnet		*ifp = adapter->ifp;
   1102 	struct ixgbe_tx_buf	*buf;
   1103 	union ixgbe_adv_tx_desc *txd;
   1104 	u32			work, processed = 0;
   1105 	u32			limit = adapter->tx_process_limit;
   1106 
   1107 	KASSERT(mutex_owned(&txr->tx_mtx));
   1108 
   1109 #ifdef DEV_NETMAP
   1110 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1111 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1112 		struct netmap_adapter *na = NA(adapter->ifp);
   1113 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1114 		txd = txr->tx_base;
   1115 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1116 		    BUS_DMASYNC_POSTREAD);
   1117 		/*
   1118 		 * In netmap mode, all the work is done in the context
   1119 		 * of the client thread. Interrupt handlers only wake up
   1120 		 * clients, which may be sleeping on individual rings
   1121 		 * or on a global resource for all rings.
   1122 		 * To implement tx interrupt mitigation, we wake up the client
   1123 		 * thread roughly every half ring, even if the NIC interrupts
   1124 		 * more frequently. This is implemented as follows:
   1125 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1126 		 *   the slot that should wake up the thread (nkr_num_slots
   1127 		 *   means the user thread should not be woken up);
   1128 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1129 		 *   or the slot has the DD bit set.
   1130 		 */
   1131 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1132 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1133 			netmap_tx_irq(ifp, txr->me);
   1134 		}
   1135 		return false;
   1136 	}
   1137 #endif /* DEV_NETMAP */
   1138 
   1139 	if (txr->tx_avail == txr->num_desc) {
   1140 		txr->busy = 0;
   1141 		return false;
   1142 	}
   1143 
   1144 	/* Get work starting point */
   1145 	work = txr->next_to_clean;
   1146 	buf = &txr->tx_buffers[work];
   1147 	txd = &txr->tx_base[work];
   1148 	work -= txr->num_desc; /* The distance to ring end */
   1149 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1150 	    BUS_DMASYNC_POSTREAD);
   1151 
   1152 	do {
   1153 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1154 		if (eop == NULL) /* No work */
   1155 			break;
   1156 
   1157 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1158 			break;	/* I/O not complete */
   1159 
   1160 		if (buf->m_head) {
   1161 			txr->bytes += buf->m_head->m_pkthdr.len;
   1162 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1163 			    0, buf->m_head->m_pkthdr.len,
   1164 			    BUS_DMASYNC_POSTWRITE);
   1165 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1166 			m_freem(buf->m_head);
   1167 			buf->m_head = NULL;
   1168 		}
   1169 		buf->eop = NULL;
   1170 		txr->txr_no_space = false;
   1171 		++txr->tx_avail;
   1172 
   1173 		/* We clean the range if multi segment */
   1174 		while (txd != eop) {
   1175 			++txd;
   1176 			++buf;
   1177 			++work;
   1178 			/* wrap the ring? */
   1179 			if (__predict_false(!work)) {
   1180 				work -= txr->num_desc;
   1181 				buf = txr->tx_buffers;
   1182 				txd = txr->tx_base;
   1183 			}
   1184 			if (buf->m_head) {
   1185 				txr->bytes +=
   1186 				    buf->m_head->m_pkthdr.len;
   1187 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1188 				    buf->map,
   1189 				    0, buf->m_head->m_pkthdr.len,
   1190 				    BUS_DMASYNC_POSTWRITE);
   1191 				ixgbe_dmamap_unload(txr->txtag,
   1192 				    buf->map);
   1193 				m_freem(buf->m_head);
   1194 				buf->m_head = NULL;
   1195 			}
   1196 			++txr->tx_avail;
   1197 			buf->eop = NULL;
   1198 
   1199 		}
   1200 		++txr->packets;
   1201 		++processed;
   1202 		if_statinc(ifp, if_opackets);
   1203 
   1204 		/* Try the next packet */
   1205 		++txd;
   1206 		++buf;
   1207 		++work;
   1208 		/* reset with a wrap */
   1209 		if (__predict_false(!work)) {
   1210 			work -= txr->num_desc;
   1211 			buf = txr->tx_buffers;
   1212 			txd = txr->tx_base;
   1213 		}
   1214 		prefetch(txd);
   1215 	} while (__predict_true(--limit));
   1216 
   1217 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1218 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1219 
   1220 	work += txr->num_desc;
   1221 	txr->next_to_clean = work;
   1222 
   1223 	/*
   1224 	 * Queue Hang detection, we know there's
   1225 	 * work outstanding or the first return
   1226 	 * would have been taken, so increment busy
   1227 	 * if nothing managed to get cleaned, then
   1228 	 * in local_timer it will be checked and
   1229 	 * marked as HUNG if it exceeds a MAX attempt.
   1230 	 */
   1231 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1232 		++txr->busy;
   1233 	/*
   1234 	 * If anything gets cleaned we reset state to 1,
   1235 	 * note this will turn off HUNG if its set.
   1236 	 */
   1237 	if (processed)
   1238 		txr->busy = 1;
   1239 
   1240 	if (txr->tx_avail == txr->num_desc)
   1241 		txr->busy = 0;
   1242 
   1243 	return ((limit > 0) ? false : true);
   1244 } /* ixgbe_txeof */
   1245 
   1246 /************************************************************************
   1247  * ixgbe_rsc_count
   1248  *
   1249  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1250  ************************************************************************/
   1251 static inline u32
   1252 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1253 {
   1254 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1255 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1256 } /* ixgbe_rsc_count */
   1257 
   1258 /************************************************************************
   1259  * ixgbe_setup_hw_rsc
   1260  *
   1261  *   Initialize Hardware RSC (LRO) feature on 82599
   1262  *   for an RX ring, this is toggled by the LRO capability
   1263  *   even though it is transparent to the stack.
   1264  *
   1265  *   NOTE: Since this HW feature only works with IPv4 and
   1266  *         testing has shown soft LRO to be as effective,
   1267  *         this feature will be disabled by default.
   1268  ************************************************************************/
   1269 static void
   1270 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1271 {
   1272 	struct	adapter  *adapter = rxr->adapter;
   1273 	struct	ixgbe_hw *hw = &adapter->hw;
   1274 	u32              rscctrl, rdrxctl;
   1275 
   1276 	/* If turning LRO/RSC off we need to disable it */
   1277 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1278 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1279 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1280 		return;
   1281 	}
   1282 
   1283 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1284 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1285 #ifdef DEV_NETMAP
   1286 	/* Always strip CRC unless Netmap disabled it */
   1287 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1288 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1289 	    ix_crcstrip)
   1290 #endif /* DEV_NETMAP */
   1291 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1292 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1293 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1294 
   1295 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1296 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1297 	/*
   1298 	 * Limit the total number of descriptors that
   1299 	 * can be combined, so it does not exceed 64K
   1300 	 */
   1301 	if (rxr->mbuf_sz == MCLBYTES)
   1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1303 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1305 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1306 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1307 	else  /* Using 16K cluster */
   1308 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1309 
   1310 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1311 
   1312 	/* Enable TCP header recognition */
   1313 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1314 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1315 
   1316 	/* Disable RSC for ACK packets */
   1317 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1318 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1319 
   1320 	rxr->hw_rsc = TRUE;
   1321 } /* ixgbe_setup_hw_rsc */
   1322 
   1323 /************************************************************************
   1324  * ixgbe_refresh_mbufs
   1325  *
   1326  *   Refresh mbuf buffers for RX descriptor rings
   1327  *    - now keeps its own state so discards due to resource
   1328  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1329  *      it just returns, keeping its placeholder, thus it can simply
   1330  *      be recalled to try again.
   1331  *
   1332  *   XXX NetBSD TODO:
   1333  *    - The ixgbe_rxeof() function always preallocates mbuf cluster,
   1334  *      so the ixgbe_refresh_mbufs() function can be simplified.
   1335  *
   1336  ************************************************************************/
   1337 static void
   1338 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1339 {
   1340 	struct adapter      *adapter = rxr->adapter;
   1341 	struct ixgbe_rx_buf *rxbuf;
   1342 	struct mbuf         *mp;
   1343 	int                 i, error;
   1344 	bool                refreshed = false;
   1345 
   1346 	i = rxr->next_to_refresh;
   1347 	/* next_to_refresh points to the previous one */
   1348 	if (++i == rxr->num_desc)
   1349 		i = 0;
   1350 
   1351 	while (i != limit) {
   1352 		rxbuf = &rxr->rx_buffers[i];
   1353 		if (rxbuf->buf == NULL) {
   1354 			mp = ixgbe_getcl();
   1355 			if (mp == NULL) {
   1356 				rxr->no_jmbuf.ev_count++;
   1357 				goto update;
   1358 			}
   1359 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1360 			IXGBE_M_ADJ(adapter, rxr, mp);
   1361 		} else
   1362 			mp = rxbuf->buf;
   1363 
   1364 		/* If we're dealing with an mbuf that was copied rather
   1365 		 * than replaced, there's no need to go through busdma.
   1366 		 */
   1367 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1368 			/* Get the memory mapping */
   1369 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1370 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1371 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1372 			if (error != 0) {
   1373 				device_printf(adapter->dev, "Refresh mbufs: "
   1374 				    "payload dmamap load failure - %d\n",
   1375 				    error);
   1376 				m_free(mp);
   1377 				rxbuf->buf = NULL;
   1378 				goto update;
   1379 			}
   1380 			rxbuf->buf = mp;
   1381 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1382 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1383 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1384 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1385 		} else {
   1386 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1387 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1388 		}
   1389 
   1390 		refreshed = true;
   1391 		/* next_to_refresh points to the previous one */
   1392 		rxr->next_to_refresh = i;
   1393 		if (++i == rxr->num_desc)
   1394 			i = 0;
   1395 	}
   1396 
   1397 update:
   1398 	if (refreshed) /* Update hardware tail index */
   1399 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1400 
   1401 	return;
   1402 } /* ixgbe_refresh_mbufs */
   1403 
   1404 /************************************************************************
   1405  * ixgbe_allocate_receive_buffers
   1406  *
   1407  *   Allocate memory for rx_buffer structures. Since we use one
   1408  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1409  *   that we'll need is equal to the number of receive descriptors
   1410  *   that we've allocated.
   1411  ************************************************************************/
   1412 static int
   1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1414 {
   1415 	struct adapter      *adapter = rxr->adapter;
   1416 	device_t            dev = adapter->dev;
   1417 	struct ixgbe_rx_buf *rxbuf;
   1418 	int                 bsize, error;
   1419 
   1420 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1421 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1422 
   1423 	error = ixgbe_dma_tag_create(
   1424 	         /*      parent */ adapter->osdep.dmat,
   1425 	         /*   alignment */ 1,
   1426 	         /*      bounds */ 0,
   1427 	         /*     maxsize */ MJUM16BYTES,
   1428 	         /*   nsegments */ 1,
   1429 	         /*  maxsegsize */ MJUM16BYTES,
   1430 	         /*       flags */ 0,
   1431 	                           &rxr->ptag);
   1432 	if (error != 0) {
   1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1434 		goto fail;
   1435 	}
   1436 
   1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1438 		rxbuf = &rxr->rx_buffers[i];
   1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1440 		if (error) {
   1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1442 			goto fail;
   1443 		}
   1444 	}
   1445 
   1446 	return (0);
   1447 
   1448 fail:
   1449 	/* Frees all, but can handle partial completion */
   1450 	ixgbe_free_receive_structures(adapter);
   1451 
   1452 	return (error);
   1453 } /* ixgbe_allocate_receive_buffers */
   1454 
   1455 /************************************************************************
   1456  * ixgbe_free_receive_ring
   1457  ************************************************************************/
   1458 static void
   1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1460 {
   1461 	for (int i = 0; i < rxr->num_desc; i++) {
   1462 		ixgbe_rx_discard(rxr, i);
   1463 	}
   1464 } /* ixgbe_free_receive_ring */
   1465 
   1466 /************************************************************************
   1467  * ixgbe_setup_receive_ring
   1468  *
   1469  *   Initialize a receive ring and its buffers.
   1470  ************************************************************************/
   1471 static int
   1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1473 {
   1474 	struct adapter        *adapter;
   1475 	struct ixgbe_rx_buf   *rxbuf;
   1476 #ifdef LRO
   1477 	struct ifnet          *ifp;
   1478 	struct lro_ctrl       *lro = &rxr->lro;
   1479 #endif /* LRO */
   1480 #ifdef DEV_NETMAP
   1481 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1482 	struct netmap_slot    *slot;
   1483 #endif /* DEV_NETMAP */
   1484 	int                   rsize, error = 0;
   1485 
   1486 	adapter = rxr->adapter;
   1487 #ifdef LRO
   1488 	ifp = adapter->ifp;
   1489 #endif /* LRO */
   1490 
   1491 	/* Clear the ring contents */
   1492 	IXGBE_RX_LOCK(rxr);
   1493 
   1494 #ifdef DEV_NETMAP
   1495 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1497 #endif /* DEV_NETMAP */
   1498 
   1499 	rsize = roundup2(adapter->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	/* Now replenish the mbufs */
   1509 	for (int j = 0; j != rxr->num_desc; ++j) {
   1510 		struct mbuf *mp;
   1511 
   1512 		rxbuf = &rxr->rx_buffers[j];
   1513 
   1514 #ifdef DEV_NETMAP
   1515 		/*
   1516 		 * In netmap mode, fill the map and set the buffer
   1517 		 * address in the NIC ring, considering the offset
   1518 		 * between the netmap and NIC rings (see comment in
   1519 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1520 		 * an mbuf, so end the block with a continue;
   1521 		 */
   1522 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1523 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1524 			uint64_t paddr;
   1525 			void *addr;
   1526 
   1527 			addr = PNMB(na, slot + sj, &paddr);
   1528 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1529 			/* Update descriptor and the cached value */
   1530 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1531 			rxbuf->addr = htole64(paddr);
   1532 			continue;
   1533 		}
   1534 #endif /* DEV_NETMAP */
   1535 
   1536 		rxbuf->flags = 0;
   1537 		rxbuf->buf = ixgbe_getcl();
   1538 		if (rxbuf->buf == NULL) {
   1539 			rxr->no_jmbuf.ev_count++;
   1540 			error = ENOBUFS;
   1541 			goto fail;
   1542 		}
   1543 		mp = rxbuf->buf;
   1544 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1545 		IXGBE_M_ADJ(adapter, rxr, mp);
   1546 		/* Get the memory mapping */
   1547 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1548 		    mp, BUS_DMA_NOWAIT);
   1549 		if (error != 0) {
   1550 			/*
   1551 			 * Clear this entry for later cleanup in
   1552 			 * ixgbe_discard() which is called via
   1553 			 * ixgbe_free_receive_ring().
   1554 			 */
   1555 			m_freem(mp);
   1556 			rxbuf->buf = NULL;
   1557 			goto fail;
   1558 		}
   1559 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1560 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1561 		/* Update the descriptor and the cached value */
   1562 		rxr->rx_base[j].read.pkt_addr =
   1563 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1564 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1565 	}
   1566 
   1567 	/* Setup our descriptor indices */
   1568 	rxr->next_to_check = 0;
   1569 	rxr->next_to_refresh = adapter->num_rx_desc - 1; /* Fully allocated */
   1570 	rxr->lro_enabled = FALSE;
   1571 	rxr->rx_copies.ev_count = 0;
   1572 #if 0 /* NetBSD */
   1573 	rxr->rx_bytes.ev_count = 0;
   1574 #if 1	/* Fix inconsistency */
   1575 	rxr->rx_packets.ev_count = 0;
   1576 #endif
   1577 #endif
   1578 	rxr->vtag_strip = FALSE;
   1579 
   1580 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1581 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1582 
   1583 	/*
   1584 	 * Now set up the LRO interface
   1585 	 */
   1586 	if (ixgbe_rsc_enable)
   1587 		ixgbe_setup_hw_rsc(rxr);
   1588 #ifdef LRO
   1589 	else if (ifp->if_capenable & IFCAP_LRO) {
   1590 		device_t dev = adapter->dev;
   1591 		int err = tcp_lro_init(lro);
   1592 		if (err) {
   1593 			device_printf(dev, "LRO Initialization failed!\n");
   1594 			goto fail;
   1595 		}
   1596 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1597 		rxr->lro_enabled = TRUE;
   1598 		lro->ifp = adapter->ifp;
   1599 	}
   1600 #endif /* LRO */
   1601 
   1602 	IXGBE_RX_UNLOCK(rxr);
   1603 
   1604 	return (0);
   1605 
   1606 fail:
   1607 	ixgbe_free_receive_ring(rxr);
   1608 	IXGBE_RX_UNLOCK(rxr);
   1609 
   1610 	return (error);
   1611 } /* ixgbe_setup_receive_ring */
   1612 
   1613 /************************************************************************
   1614  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1615  ************************************************************************/
   1616 int
   1617 ixgbe_setup_receive_structures(struct adapter *adapter)
   1618 {
   1619 	struct rx_ring *rxr = adapter->rx_rings;
   1620 	int            j;
   1621 
   1622 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1623 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1624 		if (ixgbe_setup_receive_ring(rxr))
   1625 			goto fail;
   1626 
   1627 	return (0);
   1628 fail:
   1629 	/*
   1630 	 * Free RX buffers allocated so far, we will only handle
   1631 	 * the rings that completed, the failing case will have
   1632 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1633 	 */
   1634 	for (int i = 0; i < j; ++i) {
   1635 		rxr = &adapter->rx_rings[i];
   1636 		IXGBE_RX_LOCK(rxr);
   1637 		ixgbe_free_receive_ring(rxr);
   1638 		IXGBE_RX_UNLOCK(rxr);
   1639 	}
   1640 
   1641 	return (ENOBUFS);
   1642 } /* ixgbe_setup_receive_structures */
   1643 
   1644 
   1645 /************************************************************************
   1646  * ixgbe_free_receive_structures - Free all receive rings.
   1647  ************************************************************************/
   1648 void
   1649 ixgbe_free_receive_structures(struct adapter *adapter)
   1650 {
   1651 	struct rx_ring *rxr = adapter->rx_rings;
   1652 
   1653 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1654 
   1655 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1656 		ixgbe_free_receive_buffers(rxr);
   1657 #ifdef LRO
   1658 		/* Free LRO memory */
   1659 		tcp_lro_free(&rxr->lro);
   1660 #endif /* LRO */
   1661 		/* Free the ring memory as well */
   1662 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1663 		IXGBE_RX_LOCK_DESTROY(rxr);
   1664 	}
   1665 
   1666 	free(adapter->rx_rings, M_DEVBUF);
   1667 } /* ixgbe_free_receive_structures */
   1668 
   1669 
   1670 /************************************************************************
   1671  * ixgbe_free_receive_buffers - Free receive ring data structures
   1672  ************************************************************************/
   1673 static void
   1674 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1675 {
   1676 	struct adapter      *adapter = rxr->adapter;
   1677 	struct ixgbe_rx_buf *rxbuf;
   1678 
   1679 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1680 
   1681 	/* Cleanup any existing buffers */
   1682 	if (rxr->rx_buffers != NULL) {
   1683 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1684 			rxbuf = &rxr->rx_buffers[i];
   1685 			ixgbe_rx_discard(rxr, i);
   1686 			if (rxbuf->pmap != NULL) {
   1687 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1688 				rxbuf->pmap = NULL;
   1689 			}
   1690 		}
   1691 
   1692 		if (rxr->rx_buffers != NULL) {
   1693 			free(rxr->rx_buffers, M_DEVBUF);
   1694 			rxr->rx_buffers = NULL;
   1695 		}
   1696 	}
   1697 
   1698 	if (rxr->ptag != NULL) {
   1699 		ixgbe_dma_tag_destroy(rxr->ptag);
   1700 		rxr->ptag = NULL;
   1701 	}
   1702 
   1703 	return;
   1704 } /* ixgbe_free_receive_buffers */
   1705 
   1706 /************************************************************************
   1707  * ixgbe_rx_input
   1708  ************************************************************************/
   1709 static __inline void
   1710 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1711     u32 ptype)
   1712 {
   1713 	struct adapter	*adapter = ifp->if_softc;
   1714 
   1715 #ifdef LRO
   1716 	struct ethercom *ec = &adapter->osdep.ec;
   1717 
   1718 	/*
   1719 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1720 	 * should be computed by hardware. Also it should not have VLAN tag in
   1721 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1722 	 */
   1723         if (rxr->lro_enabled &&
   1724             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1725             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1726             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1727             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1728             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1729             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1730             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1731             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1732                 /*
   1733                  * Send to the stack if:
   1734                  **  - LRO not enabled, or
   1735                  **  - no LRO resources, or
   1736                  **  - lro enqueue fails
   1737                  */
   1738                 if (rxr->lro.lro_cnt != 0)
   1739                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1740                                 return;
   1741         }
   1742 #endif /* LRO */
   1743 
   1744 	if_percpuq_enqueue(adapter->ipq, m);
   1745 } /* ixgbe_rx_input */
   1746 
   1747 /************************************************************************
   1748  * ixgbe_rx_discard
   1749  ************************************************************************/
   1750 static __inline void
   1751 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1752 {
   1753 	struct ixgbe_rx_buf *rbuf;
   1754 
   1755 	rbuf = &rxr->rx_buffers[i];
   1756 
   1757 	/*
   1758 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1759 	 * so its easier to just free the existing mbufs and take the normal
   1760 	 * refresh path to get new buffers and mapping.
   1761 	 */
   1762 
   1763 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1764 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1765 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1766 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1767 		m_freem(rbuf->fmp);
   1768 		rbuf->fmp = NULL;
   1769 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1770 	} else if (rbuf->buf) {
   1771 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1772 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1773 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1774 		m_free(rbuf->buf);
   1775 		rbuf->buf = NULL;
   1776 	}
   1777 
   1778 	rbuf->flags = 0;
   1779 
   1780 	return;
   1781 } /* ixgbe_rx_discard */
   1782 
   1783 
   1784 /************************************************************************
   1785  * ixgbe_rxeof
   1786  *
   1787  *   Executes in interrupt context. It replenishes the
   1788  *   mbufs in the descriptor and sends data which has
   1789  *   been dma'ed into host memory to upper layer.
   1790  *
   1791  *   Return TRUE for more work, FALSE for all clean.
   1792  ************************************************************************/
   1793 bool
   1794 ixgbe_rxeof(struct ix_queue *que)
   1795 {
   1796 	struct adapter		*adapter = que->adapter;
   1797 	struct rx_ring		*rxr = que->rxr;
   1798 	struct ifnet		*ifp = adapter->ifp;
   1799 #ifdef LRO
   1800 	struct lro_ctrl		*lro = &rxr->lro;
   1801 #endif /* LRO */
   1802 	union ixgbe_adv_rx_desc	*cur;
   1803 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1804 	int			i, nextp, processed = 0;
   1805 	u32			staterr = 0;
   1806 	u32			count = 0;
   1807 	u32			limit = adapter->rx_process_limit;
   1808 	bool			discard_multidesc = false;
   1809 #ifdef RSS
   1810 	u16			pkt_info;
   1811 #endif
   1812 
   1813 	IXGBE_RX_LOCK(rxr);
   1814 
   1815 #ifdef DEV_NETMAP
   1816 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1817 		/* Same as the txeof routine: wakeup clients on intr. */
   1818 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1819 			IXGBE_RX_UNLOCK(rxr);
   1820 			return (FALSE);
   1821 		}
   1822 	}
   1823 #endif /* DEV_NETMAP */
   1824 
   1825 	/*
   1826 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1827 	 * true, continue processing to not to send broken packet to the upper
   1828 	 * layer.
   1829 	 */
   1830 	for (i = rxr->next_to_check;
   1831 	     (count < limit) || (discard_multidesc == true);) {
   1832 
   1833 		struct mbuf *sendmp, *mp;
   1834 		struct mbuf *newmp;
   1835 		u32         rsc, ptype;
   1836 		u16         len;
   1837 		u16         vtag = 0;
   1838 		bool        eop;
   1839 
   1840 		/* Sync the ring. */
   1841 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1842 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1843 
   1844 		cur = &rxr->rx_base[i];
   1845 		staterr = le32toh(cur->wb.upper.status_error);
   1846 #ifdef RSS
   1847 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1848 #endif
   1849 
   1850 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1851 			break;
   1852 
   1853 		count++;
   1854 		sendmp = NULL;
   1855 		nbuf = NULL;
   1856 		rsc = 0;
   1857 		cur->wb.upper.status_error = 0;
   1858 		rbuf = &rxr->rx_buffers[i];
   1859 		mp = rbuf->buf;
   1860 
   1861 		len = le16toh(cur->wb.upper.length);
   1862 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1863 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1864 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1865 
   1866 		/* Make sure bad packets are discarded */
   1867 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1868 #if __FreeBSD_version >= 1100036
   1869 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1870 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1871 #endif
   1872 			rxr->rx_discarded.ev_count++;
   1873 			ixgbe_rx_discard(rxr, i);
   1874 			discard_multidesc = false;
   1875 			goto next_desc;
   1876 		}
   1877 
   1878 		/* pre-alloc new mbuf */
   1879 		if (!discard_multidesc)
   1880 			newmp = ixgbe_getcl();
   1881 		else
   1882 			newmp = NULL;
   1883 		if (newmp == NULL) {
   1884 			rxr->no_jmbuf.ev_count++;
   1885 			/*
   1886 			 * Descriptor initialization is already done by the
   1887 			 * above code (cur->wb.upper.status_error = 0).
   1888 			 * So, we can reuse current rbuf->buf for new packet.
   1889 			 *
   1890 			 * Rewrite the buffer addr, see comment in
   1891 			 * ixgbe_rx_discard().
   1892 			 */
   1893 			cur->read.pkt_addr = rbuf->addr;
   1894 			m_freem(rbuf->fmp);
   1895 			rbuf->fmp = NULL;
   1896 			if (!eop) {
   1897 				/* Discard the entire packet. */
   1898 				discard_multidesc = true;
   1899 			} else
   1900 				discard_multidesc = false;
   1901 			goto next_desc;
   1902 		}
   1903 		discard_multidesc = false;
   1904 
   1905 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1906 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1907 
   1908 		/*
   1909 		 * On 82599 which supports a hardware
   1910 		 * LRO (called HW RSC), packets need
   1911 		 * not be fragmented across sequential
   1912 		 * descriptors, rather the next descriptor
   1913 		 * is indicated in bits of the descriptor.
   1914 		 * This also means that we might proceses
   1915 		 * more than one packet at a time, something
   1916 		 * that has never been true before, it
   1917 		 * required eliminating global chain pointers
   1918 		 * in favor of what we are doing here.  -jfv
   1919 		 */
   1920 		if (!eop) {
   1921 			/*
   1922 			 * Figure out the next descriptor
   1923 			 * of this frame.
   1924 			 */
   1925 			if (rxr->hw_rsc == TRUE) {
   1926 				rsc = ixgbe_rsc_count(cur);
   1927 				rxr->rsc_num += (rsc - 1);
   1928 			}
   1929 			if (rsc) { /* Get hardware index */
   1930 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1931 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1932 			} else { /* Just sequential */
   1933 				nextp = i + 1;
   1934 				if (nextp == adapter->num_rx_desc)
   1935 					nextp = 0;
   1936 			}
   1937 			nbuf = &rxr->rx_buffers[nextp];
   1938 			prefetch(nbuf);
   1939 		}
   1940 		/*
   1941 		 * Rather than using the fmp/lmp global pointers
   1942 		 * we now keep the head of a packet chain in the
   1943 		 * buffer struct and pass this along from one
   1944 		 * descriptor to the next, until we get EOP.
   1945 		 */
   1946 		/*
   1947 		 * See if there is a stored head
   1948 		 * that determines what we are
   1949 		 */
   1950 		sendmp = rbuf->fmp;
   1951 		if (sendmp != NULL) {  /* secondary frag */
   1952 			/* Update new (used in future) mbuf */
   1953 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
   1954 			IXGBE_M_ADJ(adapter, rxr, newmp);
   1955 			rbuf->buf = newmp;
   1956 			rbuf->fmp = NULL;
   1957 
   1958 			/* For secondary frag */
   1959 			mp->m_len = len;
   1960 			mp->m_flags &= ~M_PKTHDR;
   1961 
   1962 			/* For sendmp */
   1963 			sendmp->m_pkthdr.len += mp->m_len;
   1964 		} else {
   1965 			/*
   1966 			 * It's the first segment of a multi descriptor
   1967 			 * packet or a single segment which contains a full
   1968 			 * packet.
   1969 			 */
   1970 
   1971 			/*
   1972 			 * Optimize.  This might be a small packet, maybe just
   1973 			 * a TCP ACK. Copy into a new mbuf, and Leave the old
   1974 			 * mbuf+cluster for re-use.
   1975 			 */
   1976 			if (eop && len <= adapter->rx_copy_len) {
   1977 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1978 				if (sendmp != NULL) {
   1979 					sendmp->m_data += ETHER_ALIGN;
   1980 					memcpy(mtod(sendmp, void *),
   1981 					    mtod(mp, void *), len);
   1982 					rxr->rx_copies.ev_count++;
   1983 					rbuf->flags |= IXGBE_RX_COPY;
   1984 
   1985 					/*
   1986 					 * Free pre-allocated mbuf anymore
   1987 					 * because we recycle the current
   1988 					 * buffer.
   1989 					 */
   1990 					m_freem(newmp);
   1991 				}
   1992 			}
   1993 
   1994 			/*
   1995 			 * Two cases:
   1996 			 * a) non small packet(i.e. !IXGBE_RX_COPY).
   1997 			 * b) a small packet but the above m_gethdr() failed.
   1998 			 */
   1999 			if (sendmp == NULL) {
   2000 				/* Update new (used in future) mbuf */
   2001 				newmp->m_pkthdr.len = newmp->m_len
   2002 				    = rxr->mbuf_sz;
   2003 				IXGBE_M_ADJ(adapter, rxr, newmp);
   2004 				rbuf->buf = newmp;
   2005 				rbuf->fmp = NULL;
   2006 
   2007 				/* For sendmp */
   2008 				sendmp = mp;
   2009 			}
   2010 
   2011 			/* first desc of a non-ps chain */
   2012 			sendmp->m_pkthdr.len = sendmp->m_len = len;
   2013 		}
   2014 		++processed;
   2015 
   2016 		/* Pass the head pointer on */
   2017 		if (eop == 0) {
   2018 			nbuf->fmp = sendmp;
   2019 			sendmp = NULL;
   2020 			mp->m_next = nbuf->buf;
   2021 		} else { /* Sending this frame */
   2022 			m_set_rcvif(sendmp, ifp);
   2023 			++rxr->packets;
   2024 			rxr->rx_packets.ev_count++;
   2025 			/* capture data for AIM */
   2026 			rxr->bytes += sendmp->m_pkthdr.len;
   2027 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   2028 			/* Process vlan info */
   2029 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2030 				vtag = le16toh(cur->wb.upper.vlan);
   2031 			if (vtag) {
   2032 				vlan_set_tag(sendmp, vtag);
   2033 			}
   2034 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2035 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2036 				   &adapter->stats.pf);
   2037 			}
   2038 
   2039 #if 0 /* FreeBSD */
   2040 			/*
   2041 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2042 			 * and never cleared. This means we have RSS hash
   2043 			 * available to be used.
   2044 			 */
   2045 			if (adapter->num_queues > 1) {
   2046 				sendmp->m_pkthdr.flowid =
   2047 				    le32toh(cur->wb.lower.hi_dword.rss);
   2048 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2049 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2050 					M_HASHTYPE_SET(sendmp,
   2051 					    M_HASHTYPE_RSS_IPV4);
   2052 					break;
   2053 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2054 					M_HASHTYPE_SET(sendmp,
   2055 					    M_HASHTYPE_RSS_TCP_IPV4);
   2056 					break;
   2057 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2058 					M_HASHTYPE_SET(sendmp,
   2059 					    M_HASHTYPE_RSS_IPV6);
   2060 					break;
   2061 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2062 					M_HASHTYPE_SET(sendmp,
   2063 					    M_HASHTYPE_RSS_TCP_IPV6);
   2064 					break;
   2065 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2066 					M_HASHTYPE_SET(sendmp,
   2067 					    M_HASHTYPE_RSS_IPV6_EX);
   2068 					break;
   2069 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2070 					M_HASHTYPE_SET(sendmp,
   2071 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2072 					break;
   2073 #if __FreeBSD_version > 1100000
   2074 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2075 					M_HASHTYPE_SET(sendmp,
   2076 					    M_HASHTYPE_RSS_UDP_IPV4);
   2077 					break;
   2078 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2079 					M_HASHTYPE_SET(sendmp,
   2080 					    M_HASHTYPE_RSS_UDP_IPV6);
   2081 					break;
   2082 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2083 					M_HASHTYPE_SET(sendmp,
   2084 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2085 					break;
   2086 #endif
   2087 				default:
   2088 					M_HASHTYPE_SET(sendmp,
   2089 					    M_HASHTYPE_OPAQUE_HASH);
   2090 				}
   2091 			} else {
   2092 				sendmp->m_pkthdr.flowid = que->msix;
   2093 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2094 			}
   2095 #endif
   2096 		}
   2097 next_desc:
   2098 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2099 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2100 
   2101 		/* Advance our pointers to the next descriptor. */
   2102 		if (++i == rxr->num_desc)
   2103 			i = 0;
   2104 		rxr->next_to_check = i;
   2105 
   2106 		/* Now send to the stack or do LRO */
   2107 		if (sendmp != NULL)
   2108 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2109 
   2110 		/* Every 8 descriptors we go to refresh mbufs */
   2111 		if (processed == 8) {
   2112 			ixgbe_refresh_mbufs(rxr, i);
   2113 			processed = 0;
   2114 		}
   2115 	}
   2116 
   2117 	/* Refresh any remaining buf structs */
   2118 	if (ixgbe_rx_unrefreshed(rxr))
   2119 		ixgbe_refresh_mbufs(rxr, i);
   2120 
   2121 	IXGBE_RX_UNLOCK(rxr);
   2122 
   2123 #ifdef LRO
   2124 	/*
   2125 	 * Flush any outstanding LRO work
   2126 	 */
   2127 	tcp_lro_flush_all(lro);
   2128 #endif /* LRO */
   2129 
   2130 	/*
   2131 	 * Still have cleaning to do?
   2132 	 */
   2133 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2134 		return (TRUE);
   2135 
   2136 	return (FALSE);
   2137 } /* ixgbe_rxeof */
   2138 
   2139 
   2140 /************************************************************************
   2141  * ixgbe_rx_checksum
   2142  *
   2143  *   Verify that the hardware indicated that the checksum is valid.
   2144  *   Inform the stack about the status of checksum so that stack
   2145  *   doesn't spend time verifying the checksum.
   2146  ************************************************************************/
   2147 static void
   2148 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2149     struct ixgbe_hw_stats *stats)
   2150 {
   2151 	u16  status = (u16)staterr;
   2152 	u8   errors = (u8)(staterr >> 24);
   2153 #if 0
   2154 	bool sctp = false;
   2155 
   2156 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2157 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2158 		sctp = true;
   2159 #endif
   2160 
   2161 	/* IPv4 checksum */
   2162 	if (status & IXGBE_RXD_STAT_IPCS) {
   2163 		stats->ipcs.ev_count++;
   2164 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2165 			/* IP Checksum Good */
   2166 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2167 		} else {
   2168 			stats->ipcs_bad.ev_count++;
   2169 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2170 		}
   2171 	}
   2172 	/* TCP/UDP/SCTP checksum */
   2173 	if (status & IXGBE_RXD_STAT_L4CS) {
   2174 		stats->l4cs.ev_count++;
   2175 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2176 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2177 			mp->m_pkthdr.csum_flags |= type;
   2178 		} else {
   2179 			stats->l4cs_bad.ev_count++;
   2180 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2181 		}
   2182 	}
   2183 } /* ixgbe_rx_checksum */
   2184 
   2185 /************************************************************************
   2186  * ixgbe_dma_malloc
   2187  ************************************************************************/
   2188 int
   2189 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2190 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2191 {
   2192 	device_t dev = adapter->dev;
   2193 	int      r, rsegs;
   2194 
   2195 	r = ixgbe_dma_tag_create(
   2196 	     /*      parent */ adapter->osdep.dmat,
   2197 	     /*   alignment */ DBA_ALIGN,
   2198 	     /*      bounds */ 0,
   2199 	     /*     maxsize */ size,
   2200 	     /*   nsegments */ 1,
   2201 	     /*  maxsegsize */ size,
   2202 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2203 			       &dma->dma_tag);
   2204 	if (r != 0) {
   2205 		aprint_error_dev(dev,
   2206 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2207 		    r);
   2208 		goto fail_0;
   2209 	}
   2210 
   2211 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2212 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2213 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2214 	if (r != 0) {
   2215 		aprint_error_dev(dev,
   2216 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2217 		goto fail_1;
   2218 	}
   2219 
   2220 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2221 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2222 	if (r != 0) {
   2223 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2224 		    __func__, r);
   2225 		goto fail_2;
   2226 	}
   2227 
   2228 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2229 	if (r != 0) {
   2230 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2231 		    __func__, r);
   2232 		goto fail_3;
   2233 	}
   2234 
   2235 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2236 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2237 	if (r != 0) {
   2238 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2239 		    __func__, r);
   2240 		goto fail_4;
   2241 	}
   2242 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2243 	dma->dma_size = size;
   2244 	return 0;
   2245 fail_4:
   2246 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2247 fail_3:
   2248 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2249 fail_2:
   2250 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2251 fail_1:
   2252 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2253 fail_0:
   2254 
   2255 	return (r);
   2256 } /* ixgbe_dma_malloc */
   2257 
   2258 /************************************************************************
   2259  * ixgbe_dma_free
   2260  ************************************************************************/
   2261 void
   2262 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2263 {
   2264 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2265 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2266 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2267 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2268 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2269 } /* ixgbe_dma_free */
   2270 
   2271 
   2272 /************************************************************************
   2273  * ixgbe_allocate_queues
   2274  *
   2275  *   Allocate memory for the transmit and receive rings, and then
   2276  *   the descriptors associated with each, called only once at attach.
   2277  ************************************************************************/
   2278 int
   2279 ixgbe_allocate_queues(struct adapter *adapter)
   2280 {
   2281 	device_t	dev = adapter->dev;
   2282 	struct ix_queue	*que;
   2283 	struct tx_ring	*txr;
   2284 	struct rx_ring	*rxr;
   2285 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2286 	int             txconf = 0, rxconf = 0;
   2287 
   2288 	/* First, allocate the top level queue structs */
   2289 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2290 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2291 
   2292 	/* Second, allocate the TX ring struct memory */
   2293 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
   2294 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2295 
   2296 	/* Third, allocate the RX ring */
   2297 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2298 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2299 
   2300 	/* For the ring itself */
   2301 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2302 	    DBA_ALIGN);
   2303 
   2304 	/*
   2305 	 * Now set up the TX queues, txconf is needed to handle the
   2306 	 * possibility that things fail midcourse and we need to
   2307 	 * undo memory gracefully
   2308 	 */
   2309 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2310 		/* Set up some basics */
   2311 		txr = &adapter->tx_rings[i];
   2312 		txr->adapter = adapter;
   2313 		txr->txr_interq = NULL;
   2314 		/* In case SR-IOV is enabled, align the index properly */
   2315 #ifdef PCI_IOV
   2316 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2317 		    i);
   2318 #else
   2319 		txr->me = i;
   2320 #endif
   2321 		txr->num_desc = adapter->num_tx_desc;
   2322 
   2323 		/* Initialize the TX side lock */
   2324 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2325 
   2326 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2327 		    BUS_DMA_NOWAIT)) {
   2328 			aprint_error_dev(dev,
   2329 			    "Unable to allocate TX Descriptor memory\n");
   2330 			error = ENOMEM;
   2331 			goto err_tx_desc;
   2332 		}
   2333 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2334 		bzero((void *)txr->tx_base, tsize);
   2335 
   2336 		/* Now allocate transmit buffers for the ring */
   2337 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2338 			aprint_error_dev(dev,
   2339 			    "Critical Failure setting up transmit buffers\n");
   2340 			error = ENOMEM;
   2341 			goto err_tx_desc;
   2342 		}
   2343 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2344 			/* Allocate a buf ring */
   2345 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2346 			if (txr->txr_interq == NULL) {
   2347 				aprint_error_dev(dev,
   2348 				    "Critical Failure setting up buf ring\n");
   2349 				error = ENOMEM;
   2350 				goto err_tx_desc;
   2351 			}
   2352 		}
   2353 	}
   2354 
   2355 	/*
   2356 	 * Next the RX queues...
   2357 	 */
   2358 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2359 	    DBA_ALIGN);
   2360 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2361 		rxr = &adapter->rx_rings[i];
   2362 		/* Set up some basics */
   2363 		rxr->adapter = adapter;
   2364 #ifdef PCI_IOV
   2365 		/* In case SR-IOV is enabled, align the index properly */
   2366 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2367 		    i);
   2368 #else
   2369 		rxr->me = i;
   2370 #endif
   2371 		rxr->num_desc = adapter->num_rx_desc;
   2372 
   2373 		/* Initialize the RX side lock */
   2374 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2375 
   2376 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2377 		    BUS_DMA_NOWAIT)) {
   2378 			aprint_error_dev(dev,
   2379 			    "Unable to allocate RxDescriptor memory\n");
   2380 			error = ENOMEM;
   2381 			goto err_rx_desc;
   2382 		}
   2383 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2384 		bzero((void *)rxr->rx_base, rsize);
   2385 
   2386 		/* Allocate receive buffers for the ring */
   2387 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2388 			aprint_error_dev(dev,
   2389 			    "Critical Failure setting up receive buffers\n");
   2390 			error = ENOMEM;
   2391 			goto err_rx_desc;
   2392 		}
   2393 	}
   2394 
   2395 	/*
   2396 	 * Finally set up the queue holding structs
   2397 	 */
   2398 	for (int i = 0; i < adapter->num_queues; i++) {
   2399 		que = &adapter->queues[i];
   2400 		que->adapter = adapter;
   2401 		que->me = i;
   2402 		que->txr = &adapter->tx_rings[i];
   2403 		que->rxr = &adapter->rx_rings[i];
   2404 
   2405 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2406 		que->disabled_count = 0;
   2407 	}
   2408 
   2409 	return (0);
   2410 
   2411 err_rx_desc:
   2412 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2413 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2414 err_tx_desc:
   2415 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2416 		ixgbe_dma_free(adapter, &txr->txdma);
   2417 	free(adapter->rx_rings, M_DEVBUF);
   2418 	free(adapter->tx_rings, M_DEVBUF);
   2419 	free(adapter->queues, M_DEVBUF);
   2420 	return (error);
   2421 } /* ixgbe_allocate_queues */
   2422 
   2423 /************************************************************************
   2424  * ixgbe_free_queues
   2425  *
   2426  *   Free descriptors for the transmit and receive rings, and then
   2427  *   the memory associated with each.
   2428  ************************************************************************/
   2429 void
   2430 ixgbe_free_queues(struct adapter *adapter)
   2431 {
   2432 	struct ix_queue *que;
   2433 	int i;
   2434 
   2435 	ixgbe_free_transmit_structures(adapter);
   2436 	ixgbe_free_receive_structures(adapter);
   2437 	for (i = 0; i < adapter->num_queues; i++) {
   2438 		que = &adapter->queues[i];
   2439 		mutex_destroy(&que->dc_mtx);
   2440 	}
   2441 	free(adapter->queues, M_DEVBUF);
   2442 } /* ixgbe_free_queues */
   2443