Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.71
      1 /* $NetBSD: ix_txrx.c,v 1.71 2021/04/30 06:55:32 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.71 2021/04/30 06:55:32 msaitoh Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 /*
     89  * For Flow Director: this is the
     90  * number of TX packets we sample
     91  * for the filter pool, this means
     92  * every 20th packet will be probed.
     93  *
     94  * This feature can be disabled by
     95  * setting this to 0.
     96  */
     97 static int atr_sample_rate = 20;
     98 
     99 /************************************************************************
    100  *  Local Function prototypes
    101  ************************************************************************/
    102 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    103 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    104 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    105 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    106 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    107                                        struct ixgbe_hw_stats *);
    108 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    109 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    110 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    111 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    112                                         struct mbuf *, u32 *, u32 *);
    113 static int           ixgbe_tso_setup(struct tx_ring *,
    114                                      struct mbuf *, u32 *, u32 *);
    115 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    116 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    117                                     struct mbuf *, u32);
    118 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    119                                       struct ixgbe_dma_alloc *, int);
    120 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    121 
    122 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    123 
    124 /************************************************************************
    125  * ixgbe_legacy_start_locked - Transmit entry point
    126  *
    127  *   Called by the stack to initiate a transmit.
    128  *   The driver will remain in this routine as long as there are
    129  *   packets to transmit and transmit resources are available.
    130  *   In case resources are not available, the stack is notified
    131  *   and the packet is requeued.
    132  ************************************************************************/
    133 int
    134 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    135 {
    136 	int rc;
    137 	struct mbuf    *m_head;
    138 	struct adapter *adapter = txr->adapter;
    139 
    140 	IXGBE_TX_LOCK_ASSERT(txr);
    141 
    142 	if (adapter->link_active != LINK_STATE_UP) {
    143 		/*
    144 		 * discard all packets buffered in IFQ to avoid
    145 		 * sending old packets at next link up timing.
    146 		 */
    147 		ixgbe_drain(ifp, txr);
    148 		return (ENETDOWN);
    149 	}
    150 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    151 		return (ENETDOWN);
    152 	if (txr->txr_no_space)
    153 		return (ENETDOWN);
    154 
    155 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    156 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    157 			break;
    158 
    159 		IFQ_POLL(&ifp->if_snd, m_head);
    160 		if (m_head == NULL)
    161 			break;
    162 
    163 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    164 			break;
    165 		}
    166 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    167 		if (rc != 0) {
    168 			m_freem(m_head);
    169 			continue;
    170 		}
    171 
    172 		/* Send a copy of the frame to the BPF listener */
    173 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    174 	}
    175 
    176 	return IXGBE_SUCCESS;
    177 } /* ixgbe_legacy_start_locked */
    178 
    179 /************************************************************************
    180  * ixgbe_legacy_start
    181  *
    182  *   Called by the stack, this always uses the first tx ring,
    183  *   and should not be used with multiqueue tx enabled.
    184  ************************************************************************/
    185 void
    186 ixgbe_legacy_start(struct ifnet *ifp)
    187 {
    188 	struct adapter *adapter = ifp->if_softc;
    189 	struct tx_ring *txr = adapter->tx_rings;
    190 
    191 	if (ifp->if_flags & IFF_RUNNING) {
    192 		IXGBE_TX_LOCK(txr);
    193 		ixgbe_legacy_start_locked(ifp, txr);
    194 		IXGBE_TX_UNLOCK(txr);
    195 	}
    196 } /* ixgbe_legacy_start */
    197 
    198 /************************************************************************
    199  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    200  *
    201  *   (if_transmit function)
    202  ************************************************************************/
    203 int
    204 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    205 {
    206 	struct adapter	*adapter = ifp->if_softc;
    207 	struct tx_ring	*txr;
    208 	int		i;
    209 #ifdef RSS
    210 	uint32_t bucket_id;
    211 #endif
    212 
    213 	/*
    214 	 * When doing RSS, map it to the same outbound queue
    215 	 * as the incoming flow would be mapped to.
    216 	 *
    217 	 * If everything is setup correctly, it should be the
    218 	 * same bucket that the current CPU we're on is.
    219 	 */
    220 #ifdef RSS
    221 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    222 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    223 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    224 		    &bucket_id) == 0)) {
    225 			i = bucket_id % adapter->num_queues;
    226 #ifdef IXGBE_DEBUG
    227 			if (bucket_id > adapter->num_queues)
    228 				if_printf(ifp,
    229 				    "bucket_id (%d) > num_queues (%d)\n",
    230 				    bucket_id, adapter->num_queues);
    231 #endif
    232 		} else
    233 			i = m->m_pkthdr.flowid % adapter->num_queues;
    234 	} else
    235 #endif /* 0 */
    236 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    237 
    238 	/* Check for a hung queue and pick alternative */
    239 	if (((1ULL << i) & adapter->active_queues) == 0)
    240 		i = ffs64(adapter->active_queues);
    241 
    242 	txr = &adapter->tx_rings[i];
    243 
    244 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    245 		m_freem(m);
    246 		txr->pcq_drops.ev_count++;
    247 		return ENOBUFS;
    248 	}
    249 	if (IXGBE_TX_TRYLOCK(txr)) {
    250 		ixgbe_mq_start_locked(ifp, txr);
    251 		IXGBE_TX_UNLOCK(txr);
    252 	} else {
    253 		if (adapter->txrx_use_workqueue) {
    254 			u_int *enqueued;
    255 
    256 			/*
    257 			 * This function itself is not called in interrupt
    258 			 * context, however it can be called in fast softint
    259 			 * context right after receiving forwarding packets.
    260 			 * So, it is required to protect workqueue from twice
    261 			 * enqueuing when the machine uses both spontaneous
    262 			 * packets and forwarding packets.
    263 			 */
    264 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    265 			if (*enqueued == 0) {
    266 				*enqueued = 1;
    267 				percpu_putref(adapter->txr_wq_enqueued);
    268 				workqueue_enqueue(adapter->txr_wq,
    269 				    &txr->wq_cookie, curcpu());
    270 			} else
    271 				percpu_putref(adapter->txr_wq_enqueued);
    272 		} else {
    273 			kpreempt_disable();
    274 			softint_schedule(txr->txr_si);
    275 			kpreempt_enable();
    276 		}
    277 	}
    278 
    279 	return (0);
    280 } /* ixgbe_mq_start */
    281 
    282 /************************************************************************
    283  * ixgbe_mq_start_locked
    284  ************************************************************************/
    285 int
    286 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    287 {
    288 	struct mbuf    *next;
    289 	int            enqueued = 0, err = 0;
    290 
    291 	if (txr->adapter->link_active != LINK_STATE_UP) {
    292 		/*
    293 		 * discard all packets buffered in txr_interq to avoid
    294 		 * sending old packets at next link up timing.
    295 		 */
    296 		ixgbe_drain(ifp, txr);
    297 		return (ENETDOWN);
    298 	}
    299 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    300 		return (ENETDOWN);
    301 	if (txr->txr_no_space)
    302 		return (ENETDOWN);
    303 
    304 	/* Process the queue */
    305 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    306 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    307 			m_freem(next);
    308 			/* All errors are counted in ixgbe_xmit() */
    309 			break;
    310 		}
    311 		enqueued++;
    312 #if __FreeBSD_version >= 1100036
    313 		/*
    314 		 * Since we're looking at the tx ring, we can check
    315 		 * to see if we're a VF by examing our tail register
    316 		 * address.
    317 		 */
    318 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    319 		    (next->m_flags & M_MCAST))
    320 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    321 #endif
    322 		/* Send a copy of the frame to the BPF listener */
    323 		bpf_mtap(ifp, next, BPF_D_OUT);
    324 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    325 			break;
    326 	}
    327 
    328 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    329 		ixgbe_txeof(txr);
    330 
    331 	return (err);
    332 } /* ixgbe_mq_start_locked */
    333 
    334 /************************************************************************
    335  * ixgbe_deferred_mq_start
    336  *
    337  *   Called from a softint and workqueue (indirectly) to drain queued
    338  *   transmit packets.
    339  ************************************************************************/
    340 void
    341 ixgbe_deferred_mq_start(void *arg)
    342 {
    343 	struct tx_ring *txr = arg;
    344 	struct adapter *adapter = txr->adapter;
    345 	struct ifnet   *ifp = adapter->ifp;
    346 
    347 	IXGBE_TX_LOCK(txr);
    348 	if (pcq_peek(txr->txr_interq) != NULL)
    349 		ixgbe_mq_start_locked(ifp, txr);
    350 	IXGBE_TX_UNLOCK(txr);
    351 } /* ixgbe_deferred_mq_start */
    352 
    353 /************************************************************************
    354  * ixgbe_deferred_mq_start_work
    355  *
    356  *   Called from a workqueue to drain queued transmit packets.
    357  ************************************************************************/
    358 void
    359 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    360 {
    361 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    362 	struct adapter *adapter = txr->adapter;
    363 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    364 	*enqueued = 0;
    365 	percpu_putref(adapter->txr_wq_enqueued);
    366 
    367 	ixgbe_deferred_mq_start(txr);
    368 } /* ixgbe_deferred_mq_start */
    369 
    370 /************************************************************************
    371  * ixgbe_drain_all
    372  ************************************************************************/
    373 void
    374 ixgbe_drain_all(struct adapter *adapter)
    375 {
    376 	struct ifnet *ifp = adapter->ifp;
    377 	struct ix_queue *que = adapter->queues;
    378 
    379 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    380 		struct tx_ring  *txr = que->txr;
    381 
    382 		IXGBE_TX_LOCK(txr);
    383 		ixgbe_drain(ifp, txr);
    384 		IXGBE_TX_UNLOCK(txr);
    385 	}
    386 }
    387 
    388 /************************************************************************
    389  * ixgbe_xmit
    390  *
    391  *   Maps the mbufs to tx descriptors, allowing the
    392  *   TX engine to transmit the packets.
    393  *
    394  *   Return 0 on success, positive on failure
    395  ************************************************************************/
    396 static int
    397 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    398 {
    399 	struct adapter          *adapter = txr->adapter;
    400 	struct ixgbe_tx_buf     *txbuf;
    401 	union ixgbe_adv_tx_desc *txd = NULL;
    402 	struct ifnet	        *ifp = adapter->ifp;
    403 	int                     i, j, error;
    404 	int                     first;
    405 	u32                     olinfo_status = 0, cmd_type_len;
    406 	bool                    remap = TRUE;
    407 	bus_dmamap_t            map;
    408 
    409 	/* Basic descriptor defines */
    410 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    411 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    412 
    413 	if (vlan_has_tag(m_head))
    414 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    415 
    416 	/*
    417 	 * Important to capture the first descriptor
    418 	 * used because it will contain the index of
    419 	 * the one we tell the hardware to report back
    420 	 */
    421 	first = txr->next_avail_desc;
    422 	txbuf = &txr->tx_buffers[first];
    423 	map = txbuf->map;
    424 
    425 	/*
    426 	 * Map the packet for DMA.
    427 	 */
    428 retry:
    429 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    430 	    BUS_DMA_NOWAIT);
    431 
    432 	if (__predict_false(error)) {
    433 		struct mbuf *m;
    434 
    435 		switch (error) {
    436 		case EAGAIN:
    437 			txr->q_eagain_tx_dma_setup++;
    438 			return EAGAIN;
    439 		case ENOMEM:
    440 			txr->q_enomem_tx_dma_setup++;
    441 			return EAGAIN;
    442 		case EFBIG:
    443 			/* Try it again? - one try */
    444 			if (remap == TRUE) {
    445 				remap = FALSE;
    446 				/*
    447 				 * XXX: m_defrag will choke on
    448 				 * non-MCLBYTES-sized clusters
    449 				 */
    450 				txr->q_efbig_tx_dma_setup++;
    451 				m = m_defrag(m_head, M_NOWAIT);
    452 				if (m == NULL) {
    453 					txr->q_mbuf_defrag_failed++;
    454 					return ENOBUFS;
    455 				}
    456 				m_head = m;
    457 				goto retry;
    458 			} else {
    459 				txr->q_efbig2_tx_dma_setup++;
    460 				return error;
    461 			}
    462 		case EINVAL:
    463 			txr->q_einval_tx_dma_setup++;
    464 			return error;
    465 		default:
    466 			txr->q_other_tx_dma_setup++;
    467 			return error;
    468 		}
    469 	}
    470 
    471 	/* Make certain there are enough descriptors */
    472 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    473 		txr->txr_no_space = true;
    474 		txr->no_desc_avail.ev_count++;
    475 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    476 		return EAGAIN;
    477 	}
    478 
    479 	/*
    480 	 * Set up the appropriate offload context
    481 	 * this will consume the first descriptor
    482 	 */
    483 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    484 	if (__predict_false(error)) {
    485 		return (error);
    486 	}
    487 
    488 	/* Do the flow director magic */
    489 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    490 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    491 		++txr->atr_count;
    492 		if (txr->atr_count >= atr_sample_rate) {
    493 			ixgbe_atr(txr, m_head);
    494 			txr->atr_count = 0;
    495 		}
    496 	}
    497 
    498 	olinfo_status |= IXGBE_ADVTXD_CC;
    499 	i = txr->next_avail_desc;
    500 	for (j = 0; j < map->dm_nsegs; j++) {
    501 		bus_size_t seglen;
    502 		bus_addr_t segaddr;
    503 
    504 		txbuf = &txr->tx_buffers[i];
    505 		txd = &txr->tx_base[i];
    506 		seglen = map->dm_segs[j].ds_len;
    507 		segaddr = htole64(map->dm_segs[j].ds_addr);
    508 
    509 		txd->read.buffer_addr = segaddr;
    510 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    511 		txd->read.olinfo_status = htole32(olinfo_status);
    512 
    513 		if (++i == txr->num_desc)
    514 			i = 0;
    515 	}
    516 
    517 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    518 	txr->tx_avail -= map->dm_nsegs;
    519 	txr->next_avail_desc = i;
    520 
    521 	txbuf->m_head = m_head;
    522 	/*
    523 	 * Here we swap the map so the last descriptor,
    524 	 * which gets the completion interrupt has the
    525 	 * real map, and the first descriptor gets the
    526 	 * unused map from this descriptor.
    527 	 */
    528 	txr->tx_buffers[first].map = txbuf->map;
    529 	txbuf->map = map;
    530 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    531 	    BUS_DMASYNC_PREWRITE);
    532 
    533 	/* Set the EOP descriptor that will be marked done */
    534 	txbuf = &txr->tx_buffers[first];
    535 	txbuf->eop = txd;
    536 
    537 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    538 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    539 	/*
    540 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    541 	 * hardware that this frame is available to transmit.
    542 	 */
    543 	++txr->total_packets.ev_count;
    544 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    545 
    546 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    547 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    548 	if (m_head->m_flags & M_MCAST)
    549 		if_statinc_ref(nsr, if_omcasts);
    550 	IF_STAT_PUTREF(ifp);
    551 
    552 	/* Mark queue as having work */
    553 	if (txr->busy == 0)
    554 		txr->busy = 1;
    555 
    556 	return (0);
    557 } /* ixgbe_xmit */
    558 
    559 /************************************************************************
    560  * ixgbe_drain
    561  ************************************************************************/
    562 static void
    563 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    564 {
    565 	struct mbuf *m;
    566 
    567 	IXGBE_TX_LOCK_ASSERT(txr);
    568 
    569 	if (txr->me == 0) {
    570 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    571 			IFQ_DEQUEUE(&ifp->if_snd, m);
    572 			m_freem(m);
    573 			IF_DROP(&ifp->if_snd);
    574 		}
    575 	}
    576 
    577 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    578 		m_freem(m);
    579 		txr->pcq_drops.ev_count++;
    580 	}
    581 }
    582 
    583 /************************************************************************
    584  * ixgbe_allocate_transmit_buffers
    585  *
    586  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    587  *   the information needed to transmit a packet on the wire. This is
    588  *   called only once at attach, setup is done every reset.
    589  ************************************************************************/
    590 static int
    591 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    592 {
    593 	struct adapter      *adapter = txr->adapter;
    594 	device_t            dev = adapter->dev;
    595 	struct ixgbe_tx_buf *txbuf;
    596 	int                 error, i;
    597 
    598 	/*
    599 	 * Setup DMA descriptor areas.
    600 	 */
    601 	error = ixgbe_dma_tag_create(
    602 	         /*      parent */ adapter->osdep.dmat,
    603 	         /*   alignment */ 1,
    604 	         /*      bounds */ 0,
    605 	         /*     maxsize */ IXGBE_TSO_SIZE,
    606 	         /*   nsegments */ adapter->num_segs,
    607 	         /*  maxsegsize */ PAGE_SIZE,
    608 	         /*       flags */ 0,
    609 	                           &txr->txtag);
    610 	if (error != 0) {
    611 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    612 		goto fail;
    613 	}
    614 
    615 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    616 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    617 
    618 	/* Create the descriptor buffer dma maps */
    619 	txbuf = txr->tx_buffers;
    620 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    621 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    622 		if (error != 0) {
    623 			aprint_error_dev(dev,
    624 			    "Unable to create TX DMA map (%d)\n", error);
    625 			goto fail;
    626 		}
    627 	}
    628 
    629 	return 0;
    630 fail:
    631 	/* We free all, it handles case where we are in the middle */
    632 #if 0 /* XXX was FreeBSD */
    633 	ixgbe_free_transmit_structures(adapter);
    634 #else
    635 	ixgbe_free_transmit_buffers(txr);
    636 #endif
    637 	return (error);
    638 } /* ixgbe_allocate_transmit_buffers */
    639 
    640 /************************************************************************
    641  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    642  ************************************************************************/
    643 static void
    644 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    645 {
    646 	struct adapter        *adapter = txr->adapter;
    647 	struct ixgbe_tx_buf   *txbuf;
    648 #ifdef DEV_NETMAP
    649 	struct netmap_adapter *na = NA(adapter->ifp);
    650 	struct netmap_slot    *slot;
    651 #endif /* DEV_NETMAP */
    652 
    653 	/* Clear the old ring contents */
    654 	IXGBE_TX_LOCK(txr);
    655 
    656 #ifdef DEV_NETMAP
    657 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    658 		/*
    659 		 * (under lock): if in netmap mode, do some consistency
    660 		 * checks and set slot to entry 0 of the netmap ring.
    661 		 */
    662 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    663 	}
    664 #endif /* DEV_NETMAP */
    665 
    666 	bzero((void *)txr->tx_base,
    667 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    668 	/* Reset indices */
    669 	txr->next_avail_desc = 0;
    670 	txr->next_to_clean = 0;
    671 
    672 	/* Free any existing tx buffers. */
    673 	txbuf = txr->tx_buffers;
    674 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    675 		if (txbuf->m_head != NULL) {
    676 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    677 			    0, txbuf->m_head->m_pkthdr.len,
    678 			    BUS_DMASYNC_POSTWRITE);
    679 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    680 			m_freem(txbuf->m_head);
    681 			txbuf->m_head = NULL;
    682 		}
    683 
    684 #ifdef DEV_NETMAP
    685 		/*
    686 		 * In netmap mode, set the map for the packet buffer.
    687 		 * NOTE: Some drivers (not this one) also need to set
    688 		 * the physical buffer address in the NIC ring.
    689 		 * Slots in the netmap ring (indexed by "si") are
    690 		 * kring->nkr_hwofs positions "ahead" wrt the
    691 		 * corresponding slot in the NIC ring. In some drivers
    692 		 * (not here) nkr_hwofs can be negative. Function
    693 		 * netmap_idx_n2k() handles wraparounds properly.
    694 		 */
    695 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    696 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    697 			netmap_load_map(na, txr->txtag,
    698 			    txbuf->map, NMB(na, slot + si));
    699 		}
    700 #endif /* DEV_NETMAP */
    701 
    702 		/* Clear the EOP descriptor pointer */
    703 		txbuf->eop = NULL;
    704 	}
    705 
    706 	/* Set the rate at which we sample packets */
    707 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    708 		txr->atr_sample = atr_sample_rate;
    709 
    710 	/* Set number of descriptors available */
    711 	txr->tx_avail = adapter->num_tx_desc;
    712 
    713 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    714 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    715 	IXGBE_TX_UNLOCK(txr);
    716 } /* ixgbe_setup_transmit_ring */
    717 
    718 /************************************************************************
    719  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    720  ************************************************************************/
    721 int
    722 ixgbe_setup_transmit_structures(struct adapter *adapter)
    723 {
    724 	struct tx_ring *txr = adapter->tx_rings;
    725 
    726 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    727 		ixgbe_setup_transmit_ring(txr);
    728 
    729 	return (0);
    730 } /* ixgbe_setup_transmit_structures */
    731 
    732 /************************************************************************
    733  * ixgbe_free_transmit_structures - Free all transmit rings.
    734  ************************************************************************/
    735 void
    736 ixgbe_free_transmit_structures(struct adapter *adapter)
    737 {
    738 	struct tx_ring *txr = adapter->tx_rings;
    739 
    740 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    741 		ixgbe_free_transmit_buffers(txr);
    742 		ixgbe_dma_free(adapter, &txr->txdma);
    743 		IXGBE_TX_LOCK_DESTROY(txr);
    744 	}
    745 	free(adapter->tx_rings, M_DEVBUF);
    746 } /* ixgbe_free_transmit_structures */
    747 
    748 /************************************************************************
    749  * ixgbe_free_transmit_buffers
    750  *
    751  *   Free transmit ring related data structures.
    752  ************************************************************************/
    753 static void
    754 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    755 {
    756 	struct adapter      *adapter = txr->adapter;
    757 	struct ixgbe_tx_buf *tx_buffer;
    758 	int                 i;
    759 
    760 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    761 
    762 	if (txr->tx_buffers == NULL)
    763 		return;
    764 
    765 	tx_buffer = txr->tx_buffers;
    766 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    767 		if (tx_buffer->m_head != NULL) {
    768 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    769 			    0, tx_buffer->m_head->m_pkthdr.len,
    770 			    BUS_DMASYNC_POSTWRITE);
    771 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    772 			m_freem(tx_buffer->m_head);
    773 			tx_buffer->m_head = NULL;
    774 			if (tx_buffer->map != NULL) {
    775 				ixgbe_dmamap_destroy(txr->txtag,
    776 				    tx_buffer->map);
    777 				tx_buffer->map = NULL;
    778 			}
    779 		} else if (tx_buffer->map != NULL) {
    780 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    781 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    782 			tx_buffer->map = NULL;
    783 		}
    784 	}
    785 	if (txr->txr_interq != NULL) {
    786 		struct mbuf *m;
    787 
    788 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    789 			m_freem(m);
    790 		pcq_destroy(txr->txr_interq);
    791 	}
    792 	if (txr->tx_buffers != NULL) {
    793 		free(txr->tx_buffers, M_DEVBUF);
    794 		txr->tx_buffers = NULL;
    795 	}
    796 	if (txr->txtag != NULL) {
    797 		ixgbe_dma_tag_destroy(txr->txtag);
    798 		txr->txtag = NULL;
    799 	}
    800 } /* ixgbe_free_transmit_buffers */
    801 
    802 /************************************************************************
    803  * ixgbe_tx_ctx_setup
    804  *
    805  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    806  ************************************************************************/
    807 static int
    808 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    809     u32 *cmd_type_len, u32 *olinfo_status)
    810 {
    811 	struct adapter                   *adapter = txr->adapter;
    812 	struct ixgbe_adv_tx_context_desc *TXD;
    813 	struct ether_vlan_header         *eh;
    814 #ifdef INET
    815 	struct ip                        *ip;
    816 #endif
    817 #ifdef INET6
    818 	struct ip6_hdr                   *ip6;
    819 #endif
    820 	int                              ehdrlen, ip_hlen = 0;
    821 	int                              offload = TRUE;
    822 	int                              ctxd = txr->next_avail_desc;
    823 	u32                              vlan_macip_lens = 0;
    824 	u32                              type_tucmd_mlhl = 0;
    825 	u16                              vtag = 0;
    826 	u16                              etype;
    827 	u8                               ipproto = 0;
    828 	char                             *l3d;
    829 
    830 
    831 	/* First check if TSO is to be used */
    832 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    833 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    834 
    835 		if (rv != 0)
    836 			++adapter->tso_err.ev_count;
    837 		return rv;
    838 	}
    839 
    840 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    841 		offload = FALSE;
    842 
    843 	/* Indicate the whole packet as payload when not doing TSO */
    844 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    845 
    846 	/* Now ready a context descriptor */
    847 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    848 
    849 	/*
    850 	 * In advanced descriptors the vlan tag must
    851 	 * be placed into the context descriptor. Hence
    852 	 * we need to make one even if not doing offloads.
    853 	 */
    854 	if (vlan_has_tag(mp)) {
    855 		vtag = htole16(vlan_get_tag(mp));
    856 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    857 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    858 	           (offload == FALSE))
    859 		return (0);
    860 
    861 	/*
    862 	 * Determine where frame payload starts.
    863 	 * Jump over vlan headers if already present,
    864 	 * helpful for QinQ too.
    865 	 */
    866 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    867 	eh = mtod(mp, struct ether_vlan_header *);
    868 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    869 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    870 		etype = ntohs(eh->evl_proto);
    871 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    872 	} else {
    873 		etype = ntohs(eh->evl_encap_proto);
    874 		ehdrlen = ETHER_HDR_LEN;
    875 	}
    876 
    877 	/* Set the ether header length */
    878 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    879 
    880 	if (offload == FALSE)
    881 		goto no_offloads;
    882 
    883 	/*
    884 	 * If the first mbuf only includes the ethernet header,
    885 	 * jump to the next one
    886 	 * XXX: This assumes the stack splits mbufs containing headers
    887 	 *      on header boundaries
    888 	 * XXX: And assumes the entire IP header is contained in one mbuf
    889 	 */
    890 	if (mp->m_len == ehdrlen && mp->m_next)
    891 		l3d = mtod(mp->m_next, char *);
    892 	else
    893 		l3d = mtod(mp, char *) + ehdrlen;
    894 
    895 	switch (etype) {
    896 #ifdef INET
    897 	case ETHERTYPE_IP:
    898 		ip = (struct ip *)(l3d);
    899 		ip_hlen = ip->ip_hl << 2;
    900 		ipproto = ip->ip_p;
    901 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    902 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    903 		    ip->ip_sum == 0);
    904 		break;
    905 #endif
    906 #ifdef INET6
    907 	case ETHERTYPE_IPV6:
    908 		ip6 = (struct ip6_hdr *)(l3d);
    909 		ip_hlen = sizeof(struct ip6_hdr);
    910 		ipproto = ip6->ip6_nxt;
    911 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    912 		break;
    913 #endif
    914 	default:
    915 		offload = false;
    916 		break;
    917 	}
    918 
    919 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    920 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    921 
    922 	vlan_macip_lens |= ip_hlen;
    923 
    924 	/* No support for offloads for non-L4 next headers */
    925 	switch (ipproto) {
    926 	case IPPROTO_TCP:
    927 		if (mp->m_pkthdr.csum_flags &
    928 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    929 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    930 		else
    931 			offload = false;
    932 		break;
    933 	case IPPROTO_UDP:
    934 		if (mp->m_pkthdr.csum_flags &
    935 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    936 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    937 		else
    938 			offload = false;
    939 		break;
    940 	default:
    941 		offload = false;
    942 		break;
    943 	}
    944 
    945 	if (offload) /* Insert L4 checksum into data descriptors */
    946 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    947 
    948 no_offloads:
    949 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    950 
    951 	/* Now copy bits into descriptor */
    952 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    953 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    954 	TXD->seqnum_seed = htole32(0);
    955 	TXD->mss_l4len_idx = htole32(0);
    956 
    957 	/* We've consumed the first desc, adjust counters */
    958 	if (++ctxd == txr->num_desc)
    959 		ctxd = 0;
    960 	txr->next_avail_desc = ctxd;
    961 	--txr->tx_avail;
    962 
    963 	return (0);
    964 } /* ixgbe_tx_ctx_setup */
    965 
    966 /************************************************************************
    967  * ixgbe_tso_setup
    968  *
    969  *   Setup work for hardware segmentation offload (TSO) on
    970  *   adapters using advanced tx descriptors
    971  ************************************************************************/
    972 static int
    973 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    974     u32 *olinfo_status)
    975 {
    976 	struct ixgbe_adv_tx_context_desc *TXD;
    977 	struct ether_vlan_header         *eh;
    978 #ifdef INET6
    979 	struct ip6_hdr                   *ip6;
    980 #endif
    981 #ifdef INET
    982 	struct ip                        *ip;
    983 #endif
    984 	struct tcphdr                    *th;
    985 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    986 	u32                              vlan_macip_lens = 0;
    987 	u32                              type_tucmd_mlhl = 0;
    988 	u32                              mss_l4len_idx = 0, paylen;
    989 	u16                              vtag = 0, eh_type;
    990 
    991 	/*
    992 	 * Determine where frame payload starts.
    993 	 * Jump over vlan headers if already present
    994 	 */
    995 	eh = mtod(mp, struct ether_vlan_header *);
    996 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    997 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    998 		eh_type = eh->evl_proto;
    999 	} else {
   1000 		ehdrlen = ETHER_HDR_LEN;
   1001 		eh_type = eh->evl_encap_proto;
   1002 	}
   1003 
   1004 	switch (ntohs(eh_type)) {
   1005 #ifdef INET
   1006 	case ETHERTYPE_IP:
   1007 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1008 		if (ip->ip_p != IPPROTO_TCP)
   1009 			return (ENXIO);
   1010 		ip->ip_sum = 0;
   1011 		ip_hlen = ip->ip_hl << 2;
   1012 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1013 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1014 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1015 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1016 		/* Tell transmit desc to also do IPv4 checksum. */
   1017 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1018 		break;
   1019 #endif
   1020 #ifdef INET6
   1021 	case ETHERTYPE_IPV6:
   1022 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1023 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1024 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1025 			return (ENXIO);
   1026 		ip_hlen = sizeof(struct ip6_hdr);
   1027 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1028 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1029 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1030 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1031 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1032 		break;
   1033 #endif
   1034 	default:
   1035 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1036 		    __func__, ntohs(eh_type));
   1037 		break;
   1038 	}
   1039 
   1040 	ctxd = txr->next_avail_desc;
   1041 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1042 
   1043 	tcp_hlen = th->th_off << 2;
   1044 
   1045 	/* This is used in the transmit desc in encap */
   1046 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1047 
   1048 	/* VLAN MACLEN IPLEN */
   1049 	if (vlan_has_tag(mp)) {
   1050 		vtag = htole16(vlan_get_tag(mp));
   1051 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1052 	}
   1053 
   1054 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1055 	vlan_macip_lens |= ip_hlen;
   1056 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1057 
   1058 	/* ADV DTYPE TUCMD */
   1059 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1060 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1061 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1062 
   1063 	/* MSS L4LEN IDX */
   1064 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1065 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1066 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1067 
   1068 	TXD->seqnum_seed = htole32(0);
   1069 
   1070 	if (++ctxd == txr->num_desc)
   1071 		ctxd = 0;
   1072 
   1073 	txr->tx_avail--;
   1074 	txr->next_avail_desc = ctxd;
   1075 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1076 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1077 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1078 	++txr->tso_tx.ev_count;
   1079 
   1080 	return (0);
   1081 } /* ixgbe_tso_setup */
   1082 
   1083 
   1084 /************************************************************************
   1085  * ixgbe_txeof
   1086  *
   1087  *   Examine each tx_buffer in the used queue. If the hardware is done
   1088  *   processing the packet then free associated resources. The
   1089  *   tx_buffer is put back on the free queue.
   1090  ************************************************************************/
   1091 bool
   1092 ixgbe_txeof(struct tx_ring *txr)
   1093 {
   1094 	struct adapter		*adapter = txr->adapter;
   1095 	struct ifnet		*ifp = adapter->ifp;
   1096 	struct ixgbe_tx_buf	*buf;
   1097 	union ixgbe_adv_tx_desc *txd;
   1098 	u32			work, processed = 0;
   1099 	u32			limit = adapter->tx_process_limit;
   1100 
   1101 	KASSERT(mutex_owned(&txr->tx_mtx));
   1102 
   1103 #ifdef DEV_NETMAP
   1104 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1105 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1106 		struct netmap_adapter *na = NA(adapter->ifp);
   1107 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1108 		txd = txr->tx_base;
   1109 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1110 		    BUS_DMASYNC_POSTREAD);
   1111 		/*
   1112 		 * In netmap mode, all the work is done in the context
   1113 		 * of the client thread. Interrupt handlers only wake up
   1114 		 * clients, which may be sleeping on individual rings
   1115 		 * or on a global resource for all rings.
   1116 		 * To implement tx interrupt mitigation, we wake up the client
   1117 		 * thread roughly every half ring, even if the NIC interrupts
   1118 		 * more frequently. This is implemented as follows:
   1119 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1120 		 *   the slot that should wake up the thread (nkr_num_slots
   1121 		 *   means the user thread should not be woken up);
   1122 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1123 		 *   or the slot has the DD bit set.
   1124 		 */
   1125 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1126 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
   1127 			netmap_tx_irq(ifp, txr->me);
   1128 		}
   1129 		return false;
   1130 	}
   1131 #endif /* DEV_NETMAP */
   1132 
   1133 	if (txr->tx_avail == txr->num_desc) {
   1134 		txr->busy = 0;
   1135 		return false;
   1136 	}
   1137 
   1138 	/* Get work starting point */
   1139 	work = txr->next_to_clean;
   1140 	buf = &txr->tx_buffers[work];
   1141 	txd = &txr->tx_base[work];
   1142 	work -= txr->num_desc; /* The distance to ring end */
   1143 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1144 	    BUS_DMASYNC_POSTREAD);
   1145 
   1146 	do {
   1147 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1148 		if (eop == NULL) /* No work */
   1149 			break;
   1150 
   1151 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1152 			break;	/* I/O not complete */
   1153 
   1154 		if (buf->m_head) {
   1155 			txr->bytes += buf->m_head->m_pkthdr.len;
   1156 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1157 			    0, buf->m_head->m_pkthdr.len,
   1158 			    BUS_DMASYNC_POSTWRITE);
   1159 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1160 			m_freem(buf->m_head);
   1161 			buf->m_head = NULL;
   1162 		}
   1163 		buf->eop = NULL;
   1164 		txr->txr_no_space = false;
   1165 		++txr->tx_avail;
   1166 
   1167 		/* We clean the range if multi segment */
   1168 		while (txd != eop) {
   1169 			++txd;
   1170 			++buf;
   1171 			++work;
   1172 			/* wrap the ring? */
   1173 			if (__predict_false(!work)) {
   1174 				work -= txr->num_desc;
   1175 				buf = txr->tx_buffers;
   1176 				txd = txr->tx_base;
   1177 			}
   1178 			if (buf->m_head) {
   1179 				txr->bytes +=
   1180 				    buf->m_head->m_pkthdr.len;
   1181 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1182 				    buf->map,
   1183 				    0, buf->m_head->m_pkthdr.len,
   1184 				    BUS_DMASYNC_POSTWRITE);
   1185 				ixgbe_dmamap_unload(txr->txtag,
   1186 				    buf->map);
   1187 				m_freem(buf->m_head);
   1188 				buf->m_head = NULL;
   1189 			}
   1190 			++txr->tx_avail;
   1191 			buf->eop = NULL;
   1192 
   1193 		}
   1194 		++txr->packets;
   1195 		++processed;
   1196 		if_statinc(ifp, if_opackets);
   1197 
   1198 		/* Try the next packet */
   1199 		++txd;
   1200 		++buf;
   1201 		++work;
   1202 		/* reset with a wrap */
   1203 		if (__predict_false(!work)) {
   1204 			work -= txr->num_desc;
   1205 			buf = txr->tx_buffers;
   1206 			txd = txr->tx_base;
   1207 		}
   1208 		prefetch(txd);
   1209 	} while (__predict_true(--limit));
   1210 
   1211 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1212 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1213 
   1214 	work += txr->num_desc;
   1215 	txr->next_to_clean = work;
   1216 
   1217 	/*
   1218 	 * Queue Hang detection, we know there's
   1219 	 * work outstanding or the first return
   1220 	 * would have been taken, so increment busy
   1221 	 * if nothing managed to get cleaned, then
   1222 	 * in local_timer it will be checked and
   1223 	 * marked as HUNG if it exceeds a MAX attempt.
   1224 	 */
   1225 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1226 		++txr->busy;
   1227 	/*
   1228 	 * If anything gets cleaned we reset state to 1,
   1229 	 * note this will turn off HUNG if its set.
   1230 	 */
   1231 	if (processed)
   1232 		txr->busy = 1;
   1233 
   1234 	if (txr->tx_avail == txr->num_desc)
   1235 		txr->busy = 0;
   1236 
   1237 	return ((limit > 0) ? false : true);
   1238 } /* ixgbe_txeof */
   1239 
   1240 /************************************************************************
   1241  * ixgbe_rsc_count
   1242  *
   1243  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1244  ************************************************************************/
   1245 static inline u32
   1246 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1247 {
   1248 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1249 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1250 } /* ixgbe_rsc_count */
   1251 
   1252 /************************************************************************
   1253  * ixgbe_setup_hw_rsc
   1254  *
   1255  *   Initialize Hardware RSC (LRO) feature on 82599
   1256  *   for an RX ring, this is toggled by the LRO capability
   1257  *   even though it is transparent to the stack.
   1258  *
   1259  *   NOTE: Since this HW feature only works with IPv4 and
   1260  *         testing has shown soft LRO to be as effective,
   1261  *         this feature will be disabled by default.
   1262  ************************************************************************/
   1263 static void
   1264 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1265 {
   1266 	struct	adapter  *adapter = rxr->adapter;
   1267 	struct	ixgbe_hw *hw = &adapter->hw;
   1268 	u32              rscctrl, rdrxctl;
   1269 
   1270 	/* If turning LRO/RSC off we need to disable it */
   1271 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1272 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1273 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1274 		return;
   1275 	}
   1276 
   1277 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1278 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1279 #ifdef DEV_NETMAP
   1280 	/* Always strip CRC unless Netmap disabled it */
   1281 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1282 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1283 	    ix_crcstrip)
   1284 #endif /* DEV_NETMAP */
   1285 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1286 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1287 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1288 
   1289 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1290 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1291 	/*
   1292 	 * Limit the total number of descriptors that
   1293 	 * can be combined, so it does not exceed 64K
   1294 	 */
   1295 	if (rxr->mbuf_sz == MCLBYTES)
   1296 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1297 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1298 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1299 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1301 	else  /* Using 16K cluster */
   1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1303 
   1304 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1305 
   1306 	/* Enable TCP header recognition */
   1307 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1308 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1309 
   1310 	/* Disable RSC for ACK packets */
   1311 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1312 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1313 
   1314 	rxr->hw_rsc = TRUE;
   1315 } /* ixgbe_setup_hw_rsc */
   1316 
   1317 /************************************************************************
   1318  * ixgbe_refresh_mbufs
   1319  *
   1320  *   Refresh mbuf buffers for RX descriptor rings
   1321  *    - now keeps its own state so discards due to resource
   1322  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1323  *      it just returns, keeping its placeholder, thus it can simply
   1324  *      be recalled to try again.
   1325  *
   1326  *   XXX NetBSD TODO:
   1327  *    - The ixgbe_rxeof() function always preallocates mbuf cluster (jcl),
   1328  *      so the ixgbe_refresh_mbufs() function can be simplified.
   1329  *
   1330  ************************************************************************/
   1331 static void
   1332 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1333 {
   1334 	struct adapter      *adapter = rxr->adapter;
   1335 	struct ixgbe_rx_buf *rxbuf;
   1336 	struct mbuf         *mp;
   1337 	int                 i, j, error;
   1338 	bool                refreshed = false;
   1339 
   1340 	i = j = rxr->next_to_refresh;
   1341 	/* Control the loop with one beyond */
   1342 	if (++j == rxr->num_desc)
   1343 		j = 0;
   1344 
   1345 	while (j != limit) {
   1346 		rxbuf = &rxr->rx_buffers[i];
   1347 		if (rxbuf->buf == NULL) {
   1348 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1349 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1350 			if (mp == NULL) {
   1351 				rxr->no_jmbuf.ev_count++;
   1352 				goto update;
   1353 			}
   1354 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1355 				m_adj(mp, ETHER_ALIGN);
   1356 		} else
   1357 			mp = rxbuf->buf;
   1358 
   1359 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1360 
   1361 		/* If we're dealing with an mbuf that was copied rather
   1362 		 * than replaced, there's no need to go through busdma.
   1363 		 */
   1364 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1365 			/* Get the memory mapping */
   1366 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1367 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1368 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1369 			if (error != 0) {
   1370 				device_printf(adapter->dev, "Refresh mbufs: "
   1371 				    "payload dmamap load failure - %d\n",
   1372 				    error);
   1373 				m_free(mp);
   1374 				rxbuf->buf = NULL;
   1375 				goto update;
   1376 			}
   1377 			rxbuf->buf = mp;
   1378 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1379 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1380 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1381 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1382 		} else {
   1383 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1384 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1385 		}
   1386 
   1387 		refreshed = true;
   1388 		/* Next is precalculated */
   1389 		i = j;
   1390 		rxr->next_to_refresh = i;
   1391 		if (++j == rxr->num_desc)
   1392 			j = 0;
   1393 	}
   1394 
   1395 update:
   1396 	if (refreshed) /* Update hardware tail index */
   1397 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1398 
   1399 	return;
   1400 } /* ixgbe_refresh_mbufs */
   1401 
   1402 /************************************************************************
   1403  * ixgbe_allocate_receive_buffers
   1404  *
   1405  *   Allocate memory for rx_buffer structures. Since we use one
   1406  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1407  *   that we'll need is equal to the number of receive descriptors
   1408  *   that we've allocated.
   1409  ************************************************************************/
   1410 static int
   1411 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1412 {
   1413 	struct adapter      *adapter = rxr->adapter;
   1414 	device_t            dev = adapter->dev;
   1415 	struct ixgbe_rx_buf *rxbuf;
   1416 	int                 bsize, error;
   1417 
   1418 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1419 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1420 
   1421 	error = ixgbe_dma_tag_create(
   1422 	         /*      parent */ adapter->osdep.dmat,
   1423 	         /*   alignment */ 1,
   1424 	         /*      bounds */ 0,
   1425 	         /*     maxsize */ MJUM16BYTES,
   1426 	         /*   nsegments */ 1,
   1427 	         /*  maxsegsize */ MJUM16BYTES,
   1428 	         /*       flags */ 0,
   1429 	                           &rxr->ptag);
   1430 	if (error != 0) {
   1431 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1432 		goto fail;
   1433 	}
   1434 
   1435 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1436 		rxbuf = &rxr->rx_buffers[i];
   1437 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1438 		if (error) {
   1439 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1440 			goto fail;
   1441 		}
   1442 	}
   1443 
   1444 	return (0);
   1445 
   1446 fail:
   1447 	/* Frees all, but can handle partial completion */
   1448 	ixgbe_free_receive_structures(adapter);
   1449 
   1450 	return (error);
   1451 } /* ixgbe_allocate_receive_buffers */
   1452 
   1453 /************************************************************************
   1454  * ixgbe_free_receive_ring
   1455  ************************************************************************/
   1456 static void
   1457 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1458 {
   1459 	for (int i = 0; i < rxr->num_desc; i++) {
   1460 		ixgbe_rx_discard(rxr, i);
   1461 	}
   1462 } /* ixgbe_free_receive_ring */
   1463 
   1464 /************************************************************************
   1465  * ixgbe_setup_receive_ring
   1466  *
   1467  *   Initialize a receive ring and its buffers.
   1468  ************************************************************************/
   1469 static int
   1470 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1471 {
   1472 	struct adapter        *adapter;
   1473 	struct ixgbe_rx_buf   *rxbuf;
   1474 #ifdef LRO
   1475 	struct ifnet          *ifp;
   1476 	struct lro_ctrl       *lro = &rxr->lro;
   1477 #endif /* LRO */
   1478 #ifdef DEV_NETMAP
   1479 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1480 	struct netmap_slot    *slot;
   1481 #endif /* DEV_NETMAP */
   1482 	int                   rsize, error = 0;
   1483 
   1484 	adapter = rxr->adapter;
   1485 #ifdef LRO
   1486 	ifp = adapter->ifp;
   1487 #endif /* LRO */
   1488 
   1489 	/* Clear the ring contents */
   1490 	IXGBE_RX_LOCK(rxr);
   1491 
   1492 #ifdef DEV_NETMAP
   1493 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1494 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1495 #endif /* DEV_NETMAP */
   1496 
   1497 	rsize = roundup2(adapter->num_rx_desc *
   1498 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1499 	bzero((void *)rxr->rx_base, rsize);
   1500 	/* Cache the size */
   1501 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1502 
   1503 	/* Free current RX buffer structs and their mbufs */
   1504 	ixgbe_free_receive_ring(rxr);
   1505 
   1506 	IXGBE_RX_UNLOCK(rxr);
   1507 	/*
   1508 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1509 	 * or size of jumbo mbufs may have changed.
   1510 	 * Assume all of rxr->ptag are the same.
   1511 	 */
   1512 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
   1513 	    adapter->num_jcl, adapter->rx_mbuf_sz);
   1514 
   1515 	IXGBE_RX_LOCK(rxr);
   1516 
   1517 	/* Now replenish the mbufs */
   1518 	for (int j = 0; j != rxr->num_desc; ++j) {
   1519 		struct mbuf *mp;
   1520 
   1521 		rxbuf = &rxr->rx_buffers[j];
   1522 
   1523 #ifdef DEV_NETMAP
   1524 		/*
   1525 		 * In netmap mode, fill the map and set the buffer
   1526 		 * address in the NIC ring, considering the offset
   1527 		 * between the netmap and NIC rings (see comment in
   1528 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1529 		 * an mbuf, so end the block with a continue;
   1530 		 */
   1531 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1532 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1533 			uint64_t paddr;
   1534 			void *addr;
   1535 
   1536 			addr = PNMB(na, slot + sj, &paddr);
   1537 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1538 			/* Update descriptor and the cached value */
   1539 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1540 			rxbuf->addr = htole64(paddr);
   1541 			continue;
   1542 		}
   1543 #endif /* DEV_NETMAP */
   1544 
   1545 		rxbuf->flags = 0;
   1546 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1547 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1548 		if (rxbuf->buf == NULL) {
   1549 			error = ENOBUFS;
   1550 			goto fail;
   1551 		}
   1552 		mp = rxbuf->buf;
   1553 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1554 		/* Get the memory mapping */
   1555 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1556 		    mp, BUS_DMA_NOWAIT);
   1557 		if (error != 0)
   1558                         goto fail;
   1559 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1560 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1561 		/* Update the descriptor and the cached value */
   1562 		rxr->rx_base[j].read.pkt_addr =
   1563 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1564 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1565 	}
   1566 
   1567 	/* Setup our descriptor indices */
   1568 	rxr->next_to_check = 0;
   1569 	rxr->next_to_refresh = 0;
   1570 	rxr->lro_enabled = FALSE;
   1571 	rxr->rx_copies.ev_count = 0;
   1572 #if 0 /* NetBSD */
   1573 	rxr->rx_bytes.ev_count = 0;
   1574 #if 1	/* Fix inconsistency */
   1575 	rxr->rx_packets.ev_count = 0;
   1576 #endif
   1577 #endif
   1578 	rxr->vtag_strip = FALSE;
   1579 
   1580 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1581 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1582 
   1583 	/*
   1584 	 * Now set up the LRO interface
   1585 	 */
   1586 	if (ixgbe_rsc_enable)
   1587 		ixgbe_setup_hw_rsc(rxr);
   1588 #ifdef LRO
   1589 	else if (ifp->if_capenable & IFCAP_LRO) {
   1590 		device_t dev = adapter->dev;
   1591 		int err = tcp_lro_init(lro);
   1592 		if (err) {
   1593 			device_printf(dev, "LRO Initialization failed!\n");
   1594 			goto fail;
   1595 		}
   1596 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1597 		rxr->lro_enabled = TRUE;
   1598 		lro->ifp = adapter->ifp;
   1599 	}
   1600 #endif /* LRO */
   1601 
   1602 	IXGBE_RX_UNLOCK(rxr);
   1603 
   1604 	return (0);
   1605 
   1606 fail:
   1607 	ixgbe_free_receive_ring(rxr);
   1608 	IXGBE_RX_UNLOCK(rxr);
   1609 
   1610 	return (error);
   1611 } /* ixgbe_setup_receive_ring */
   1612 
   1613 /************************************************************************
   1614  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1615  ************************************************************************/
   1616 int
   1617 ixgbe_setup_receive_structures(struct adapter *adapter)
   1618 {
   1619 	struct rx_ring *rxr = adapter->rx_rings;
   1620 	int            j;
   1621 
   1622 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1623 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1624 		if (ixgbe_setup_receive_ring(rxr))
   1625 			goto fail;
   1626 
   1627 	return (0);
   1628 fail:
   1629 	/*
   1630 	 * Free RX buffers allocated so far, we will only handle
   1631 	 * the rings that completed, the failing case will have
   1632 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1633 	 */
   1634 	for (int i = 0; i < j; ++i) {
   1635 		rxr = &adapter->rx_rings[i];
   1636 		IXGBE_RX_LOCK(rxr);
   1637 		ixgbe_free_receive_ring(rxr);
   1638 		IXGBE_RX_UNLOCK(rxr);
   1639 	}
   1640 
   1641 	return (ENOBUFS);
   1642 } /* ixgbe_setup_receive_structures */
   1643 
   1644 
   1645 /************************************************************************
   1646  * ixgbe_free_receive_structures - Free all receive rings.
   1647  ************************************************************************/
   1648 void
   1649 ixgbe_free_receive_structures(struct adapter *adapter)
   1650 {
   1651 	struct rx_ring *rxr = adapter->rx_rings;
   1652 
   1653 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1654 
   1655 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1656 		ixgbe_free_receive_buffers(rxr);
   1657 #ifdef LRO
   1658 		/* Free LRO memory */
   1659 		tcp_lro_free(&rxr->lro);
   1660 #endif /* LRO */
   1661 		/* Free the ring memory as well */
   1662 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1663 		IXGBE_RX_LOCK_DESTROY(rxr);
   1664 	}
   1665 
   1666 	free(adapter->rx_rings, M_DEVBUF);
   1667 } /* ixgbe_free_receive_structures */
   1668 
   1669 
   1670 /************************************************************************
   1671  * ixgbe_free_receive_buffers - Free receive ring data structures
   1672  ************************************************************************/
   1673 static void
   1674 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1675 {
   1676 	struct adapter      *adapter = rxr->adapter;
   1677 	struct ixgbe_rx_buf *rxbuf;
   1678 
   1679 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1680 
   1681 	/* Cleanup any existing buffers */
   1682 	if (rxr->rx_buffers != NULL) {
   1683 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1684 			rxbuf = &rxr->rx_buffers[i];
   1685 			ixgbe_rx_discard(rxr, i);
   1686 			if (rxbuf->pmap != NULL) {
   1687 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1688 				rxbuf->pmap = NULL;
   1689 			}
   1690 		}
   1691 
   1692 		/* NetBSD specific. See ixgbe_netbsd.c */
   1693 		ixgbe_jcl_destroy(adapter, rxr);
   1694 
   1695 		if (rxr->rx_buffers != NULL) {
   1696 			free(rxr->rx_buffers, M_DEVBUF);
   1697 			rxr->rx_buffers = NULL;
   1698 		}
   1699 	}
   1700 
   1701 	if (rxr->ptag != NULL) {
   1702 		ixgbe_dma_tag_destroy(rxr->ptag);
   1703 		rxr->ptag = NULL;
   1704 	}
   1705 
   1706 	return;
   1707 } /* ixgbe_free_receive_buffers */
   1708 
   1709 /************************************************************************
   1710  * ixgbe_rx_input
   1711  ************************************************************************/
   1712 static __inline void
   1713 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1714     u32 ptype)
   1715 {
   1716 	struct adapter	*adapter = ifp->if_softc;
   1717 
   1718 #ifdef LRO
   1719 	struct ethercom *ec = &adapter->osdep.ec;
   1720 
   1721 	/*
   1722 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1723 	 * should be computed by hardware. Also it should not have VLAN tag in
   1724 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1725 	 */
   1726         if (rxr->lro_enabled &&
   1727             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1728             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1729             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1730             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1731             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1732             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1733             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1734             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1735                 /*
   1736                  * Send to the stack if:
   1737                  **  - LRO not enabled, or
   1738                  **  - no LRO resources, or
   1739                  **  - lro enqueue fails
   1740                  */
   1741                 if (rxr->lro.lro_cnt != 0)
   1742                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1743                                 return;
   1744         }
   1745 #endif /* LRO */
   1746 
   1747 	if_percpuq_enqueue(adapter->ipq, m);
   1748 } /* ixgbe_rx_input */
   1749 
   1750 /************************************************************************
   1751  * ixgbe_rx_discard
   1752  ************************************************************************/
   1753 static __inline void
   1754 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1755 {
   1756 	struct ixgbe_rx_buf *rbuf;
   1757 
   1758 	rbuf = &rxr->rx_buffers[i];
   1759 
   1760 	/*
   1761 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1762 	 * so its easier to just free the existing mbufs and take the normal
   1763 	 * refresh path to get new buffers and mapping.
   1764 	 */
   1765 
   1766 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1767 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1768 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1769 		m_freem(rbuf->fmp);
   1770 		rbuf->fmp = NULL;
   1771 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1772 	} else if (rbuf->buf) {
   1773 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1774 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1775 		m_free(rbuf->buf);
   1776 		rbuf->buf = NULL;
   1777 	}
   1778 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1779 
   1780 	rbuf->flags = 0;
   1781 
   1782 	return;
   1783 } /* ixgbe_rx_discard */
   1784 
   1785 
   1786 /************************************************************************
   1787  * ixgbe_rxeof
   1788  *
   1789  *   Executes in interrupt context. It replenishes the
   1790  *   mbufs in the descriptor and sends data which has
   1791  *   been dma'ed into host memory to upper layer.
   1792  *
   1793  *   Return TRUE for more work, FALSE for all clean.
   1794  ************************************************************************/
   1795 bool
   1796 ixgbe_rxeof(struct ix_queue *que)
   1797 {
   1798 	struct adapter		*adapter = que->adapter;
   1799 	struct rx_ring		*rxr = que->rxr;
   1800 	struct ifnet		*ifp = adapter->ifp;
   1801 #ifdef LRO
   1802 	struct lro_ctrl		*lro = &rxr->lro;
   1803 #endif /* LRO */
   1804 	union ixgbe_adv_rx_desc	*cur;
   1805 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1806 	int			i, nextp, processed = 0;
   1807 	u32			staterr = 0;
   1808 	u32			count = 0;
   1809 	u32			limit = adapter->rx_process_limit;
   1810 	bool			discard_multidesc = false;
   1811 #ifdef RSS
   1812 	u16			pkt_info;
   1813 #endif
   1814 
   1815 	IXGBE_RX_LOCK(rxr);
   1816 
   1817 #ifdef DEV_NETMAP
   1818 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1819 		/* Same as the txeof routine: wakeup clients on intr. */
   1820 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1821 			IXGBE_RX_UNLOCK(rxr);
   1822 			return (FALSE);
   1823 		}
   1824 	}
   1825 #endif /* DEV_NETMAP */
   1826 
   1827 	/*
   1828 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1829 	 * true, continue processing to not to send broken packet to the upper
   1830 	 * layer.
   1831 	 */
   1832 	for (i = rxr->next_to_check;
   1833 	     (count < limit) || (discard_multidesc == true);) {
   1834 
   1835 		struct mbuf *sendmp, *mp;
   1836 		struct mbuf *newmp;
   1837 		u32         rsc, ptype;
   1838 		u16         len;
   1839 		u16         vtag = 0;
   1840 		bool        eop;
   1841 
   1842 		/* Sync the ring. */
   1843 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1844 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1845 
   1846 		cur = &rxr->rx_base[i];
   1847 		staterr = le32toh(cur->wb.upper.status_error);
   1848 #ifdef RSS
   1849 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1850 #endif
   1851 
   1852 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1853 			break;
   1854 
   1855 		count++;
   1856 		sendmp = NULL;
   1857 		nbuf = NULL;
   1858 		rsc = 0;
   1859 		cur->wb.upper.status_error = 0;
   1860 		rbuf = &rxr->rx_buffers[i];
   1861 		mp = rbuf->buf;
   1862 
   1863 		len = le16toh(cur->wb.upper.length);
   1864 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1865 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1866 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1867 
   1868 		/* Make sure bad packets are discarded */
   1869 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1870 #if __FreeBSD_version >= 1100036
   1871 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1872 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1873 #endif
   1874 			rxr->rx_discarded.ev_count++;
   1875 			ixgbe_rx_discard(rxr, i);
   1876 			discard_multidesc = false;
   1877 			goto next_desc;
   1878 		}
   1879 
   1880 		/* pre-alloc new mbuf */
   1881 		if (!discard_multidesc)
   1882 			newmp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT, MT_DATA,
   1883 			    M_PKTHDR, rxr->mbuf_sz);
   1884 		else
   1885 			newmp = NULL;
   1886 		if (newmp == NULL) {
   1887 			rxr->no_jmbuf.ev_count++;
   1888 			/*
   1889 			 * Descriptor initialization is already done by the
   1890 			 * above code (cur->wb.upper.status_error = 0).
   1891 			 * So, we can reuse current rbuf->buf for new packet.
   1892 			 *
   1893 			 * Rewrite the buffer addr, see comment in
   1894 			 * ixgbe_rx_discard().
   1895 			 */
   1896 			cur->read.pkt_addr = rbuf->addr;
   1897 			m_freem(rbuf->fmp);
   1898 			rbuf->fmp = NULL;
   1899 			if (!eop) {
   1900 				/* Discard the entire packet. */
   1901 				discard_multidesc = true;
   1902 			} else
   1903 				discard_multidesc = false;
   1904 			goto next_desc;
   1905 		}
   1906 		discard_multidesc = false;
   1907 
   1908 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1909 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1910 
   1911 		/*
   1912 		 * On 82599 which supports a hardware
   1913 		 * LRO (called HW RSC), packets need
   1914 		 * not be fragmented across sequential
   1915 		 * descriptors, rather the next descriptor
   1916 		 * is indicated in bits of the descriptor.
   1917 		 * This also means that we might proceses
   1918 		 * more than one packet at a time, something
   1919 		 * that has never been true before, it
   1920 		 * required eliminating global chain pointers
   1921 		 * in favor of what we are doing here.  -jfv
   1922 		 */
   1923 		if (!eop) {
   1924 			/*
   1925 			 * Figure out the next descriptor
   1926 			 * of this frame.
   1927 			 */
   1928 			if (rxr->hw_rsc == TRUE) {
   1929 				rsc = ixgbe_rsc_count(cur);
   1930 				rxr->rsc_num += (rsc - 1);
   1931 			}
   1932 			if (rsc) { /* Get hardware index */
   1933 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1934 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1935 			} else { /* Just sequential */
   1936 				nextp = i + 1;
   1937 				if (nextp == adapter->num_rx_desc)
   1938 					nextp = 0;
   1939 			}
   1940 			nbuf = &rxr->rx_buffers[nextp];
   1941 			prefetch(nbuf);
   1942 		}
   1943 		/*
   1944 		 * Rather than using the fmp/lmp global pointers
   1945 		 * we now keep the head of a packet chain in the
   1946 		 * buffer struct and pass this along from one
   1947 		 * descriptor to the next, until we get EOP.
   1948 		 */
   1949 		mp->m_len = len;
   1950 		/*
   1951 		 * See if there is a stored head
   1952 		 * that determines what we are
   1953 		 */
   1954 		sendmp = rbuf->fmp;
   1955 		if (sendmp != NULL) {  /* secondary frag */
   1956 			rbuf->buf = newmp;
   1957 			rbuf->fmp = NULL;
   1958 			mp->m_flags &= ~M_PKTHDR;
   1959 			sendmp->m_pkthdr.len += mp->m_len;
   1960 		} else {
   1961 			/*
   1962 			 * Optimize.  This might be a small packet,
   1963 			 * maybe just a TCP ACK.  Do a fast copy that
   1964 			 * is cache aligned into a new mbuf, and
   1965 			 * leave the old mbuf+cluster for re-use.
   1966 			 */
   1967 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1968 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1969 				if (sendmp != NULL) {
   1970 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1971 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1972 					    len);
   1973 					sendmp->m_len = len;
   1974 					rxr->rx_copies.ev_count++;
   1975 					rbuf->flags |= IXGBE_RX_COPY;
   1976 
   1977 					m_freem(newmp);
   1978 				}
   1979 			}
   1980 			if (sendmp == NULL) {
   1981 				rbuf->buf = newmp;
   1982 				rbuf->fmp = NULL;
   1983 				sendmp = mp;
   1984 			}
   1985 
   1986 			/* first desc of a non-ps chain */
   1987 			sendmp->m_flags |= M_PKTHDR;
   1988 			sendmp->m_pkthdr.len = mp->m_len;
   1989 		}
   1990 		++processed;
   1991 
   1992 		/* Pass the head pointer on */
   1993 		if (eop == 0) {
   1994 			nbuf->fmp = sendmp;
   1995 			sendmp = NULL;
   1996 			mp->m_next = nbuf->buf;
   1997 		} else { /* Sending this frame */
   1998 			m_set_rcvif(sendmp, ifp);
   1999 			++rxr->packets;
   2000 			rxr->rx_packets.ev_count++;
   2001 			/* capture data for AIM */
   2002 			rxr->bytes += sendmp->m_pkthdr.len;
   2003 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   2004 			/* Process vlan info */
   2005 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2006 				vtag = le16toh(cur->wb.upper.vlan);
   2007 			if (vtag) {
   2008 				vlan_set_tag(sendmp, vtag);
   2009 			}
   2010 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2011 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2012 				   &adapter->stats.pf);
   2013 			}
   2014 
   2015 #if 0 /* FreeBSD */
   2016 			/*
   2017 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2018 			 * and never cleared. This means we have RSS hash
   2019 			 * available to be used.
   2020 			 */
   2021 			if (adapter->num_queues > 1) {
   2022 				sendmp->m_pkthdr.flowid =
   2023 				    le32toh(cur->wb.lower.hi_dword.rss);
   2024 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2025 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2026 					M_HASHTYPE_SET(sendmp,
   2027 					    M_HASHTYPE_RSS_IPV4);
   2028 					break;
   2029 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2030 					M_HASHTYPE_SET(sendmp,
   2031 					    M_HASHTYPE_RSS_TCP_IPV4);
   2032 					break;
   2033 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2034 					M_HASHTYPE_SET(sendmp,
   2035 					    M_HASHTYPE_RSS_IPV6);
   2036 					break;
   2037 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2038 					M_HASHTYPE_SET(sendmp,
   2039 					    M_HASHTYPE_RSS_TCP_IPV6);
   2040 					break;
   2041 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2042 					M_HASHTYPE_SET(sendmp,
   2043 					    M_HASHTYPE_RSS_IPV6_EX);
   2044 					break;
   2045 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2046 					M_HASHTYPE_SET(sendmp,
   2047 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2048 					break;
   2049 #if __FreeBSD_version > 1100000
   2050 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2051 					M_HASHTYPE_SET(sendmp,
   2052 					    M_HASHTYPE_RSS_UDP_IPV4);
   2053 					break;
   2054 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2055 					M_HASHTYPE_SET(sendmp,
   2056 					    M_HASHTYPE_RSS_UDP_IPV6);
   2057 					break;
   2058 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2059 					M_HASHTYPE_SET(sendmp,
   2060 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2061 					break;
   2062 #endif
   2063 				default:
   2064 					M_HASHTYPE_SET(sendmp,
   2065 					    M_HASHTYPE_OPAQUE_HASH);
   2066 				}
   2067 			} else {
   2068 				sendmp->m_pkthdr.flowid = que->msix;
   2069 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2070 			}
   2071 #endif
   2072 		}
   2073 next_desc:
   2074 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2075 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2076 
   2077 		/* Advance our pointers to the next descriptor. */
   2078 		if (++i == rxr->num_desc)
   2079 			i = 0;
   2080 
   2081 		/* Now send to the stack or do LRO */
   2082 		if (sendmp != NULL) {
   2083 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2084 		}
   2085 
   2086 		/* Every 8 descriptors we go to refresh mbufs */
   2087 		if (processed == 8) {
   2088 			ixgbe_refresh_mbufs(rxr, i);
   2089 			processed = 0;
   2090 		}
   2091 	}
   2092 
   2093 	/* Refresh any remaining buf structs */
   2094 	if (ixgbe_rx_unrefreshed(rxr))
   2095 		ixgbe_refresh_mbufs(rxr, i);
   2096 
   2097 	rxr->next_to_check = i;
   2098 
   2099 	IXGBE_RX_UNLOCK(rxr);
   2100 
   2101 #ifdef LRO
   2102 	/*
   2103 	 * Flush any outstanding LRO work
   2104 	 */
   2105 	tcp_lro_flush_all(lro);
   2106 #endif /* LRO */
   2107 
   2108 	/*
   2109 	 * Still have cleaning to do?
   2110 	 */
   2111 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2112 		return (TRUE);
   2113 
   2114 	return (FALSE);
   2115 } /* ixgbe_rxeof */
   2116 
   2117 
   2118 /************************************************************************
   2119  * ixgbe_rx_checksum
   2120  *
   2121  *   Verify that the hardware indicated that the checksum is valid.
   2122  *   Inform the stack about the status of checksum so that stack
   2123  *   doesn't spend time verifying the checksum.
   2124  ************************************************************************/
   2125 static void
   2126 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2127     struct ixgbe_hw_stats *stats)
   2128 {
   2129 	u16  status = (u16)staterr;
   2130 	u8   errors = (u8)(staterr >> 24);
   2131 #if 0
   2132 	bool sctp = false;
   2133 
   2134 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2135 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2136 		sctp = true;
   2137 #endif
   2138 
   2139 	/* IPv4 checksum */
   2140 	if (status & IXGBE_RXD_STAT_IPCS) {
   2141 		stats->ipcs.ev_count++;
   2142 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2143 			/* IP Checksum Good */
   2144 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2145 		} else {
   2146 			stats->ipcs_bad.ev_count++;
   2147 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2148 		}
   2149 	}
   2150 	/* TCP/UDP/SCTP checksum */
   2151 	if (status & IXGBE_RXD_STAT_L4CS) {
   2152 		stats->l4cs.ev_count++;
   2153 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2154 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2155 			mp->m_pkthdr.csum_flags |= type;
   2156 		} else {
   2157 			stats->l4cs_bad.ev_count++;
   2158 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2159 		}
   2160 	}
   2161 } /* ixgbe_rx_checksum */
   2162 
   2163 /************************************************************************
   2164  * ixgbe_dma_malloc
   2165  ************************************************************************/
   2166 int
   2167 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2168 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2169 {
   2170 	device_t dev = adapter->dev;
   2171 	int      r, rsegs;
   2172 
   2173 	r = ixgbe_dma_tag_create(
   2174 	     /*      parent */ adapter->osdep.dmat,
   2175 	     /*   alignment */ DBA_ALIGN,
   2176 	     /*      bounds */ 0,
   2177 	     /*     maxsize */ size,
   2178 	     /*   nsegments */ 1,
   2179 	     /*  maxsegsize */ size,
   2180 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2181 			       &dma->dma_tag);
   2182 	if (r != 0) {
   2183 		aprint_error_dev(dev,
   2184 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2185 		    r);
   2186 		goto fail_0;
   2187 	}
   2188 
   2189 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2190 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2191 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2192 	if (r != 0) {
   2193 		aprint_error_dev(dev,
   2194 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2195 		goto fail_1;
   2196 	}
   2197 
   2198 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2199 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2200 	if (r != 0) {
   2201 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2202 		    __func__, r);
   2203 		goto fail_2;
   2204 	}
   2205 
   2206 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2207 	if (r != 0) {
   2208 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2209 		    __func__, r);
   2210 		goto fail_3;
   2211 	}
   2212 
   2213 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2214 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2215 	if (r != 0) {
   2216 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2217 		    __func__, r);
   2218 		goto fail_4;
   2219 	}
   2220 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2221 	dma->dma_size = size;
   2222 	return 0;
   2223 fail_4:
   2224 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2225 fail_3:
   2226 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2227 fail_2:
   2228 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2229 fail_1:
   2230 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2231 fail_0:
   2232 
   2233 	return (r);
   2234 } /* ixgbe_dma_malloc */
   2235 
   2236 /************************************************************************
   2237  * ixgbe_dma_free
   2238  ************************************************************************/
   2239 void
   2240 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2241 {
   2242 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2243 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2244 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2245 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2246 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2247 } /* ixgbe_dma_free */
   2248 
   2249 
   2250 /************************************************************************
   2251  * ixgbe_allocate_queues
   2252  *
   2253  *   Allocate memory for the transmit and receive rings, and then
   2254  *   the descriptors associated with each, called only once at attach.
   2255  ************************************************************************/
   2256 int
   2257 ixgbe_allocate_queues(struct adapter *adapter)
   2258 {
   2259 	device_t	dev = adapter->dev;
   2260 	struct ix_queue	*que;
   2261 	struct tx_ring	*txr;
   2262 	struct rx_ring	*rxr;
   2263 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2264 	int             txconf = 0, rxconf = 0;
   2265 
   2266 	/* First, allocate the top level queue structs */
   2267 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2268 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2269 
   2270 	/* Second, allocate the TX ring struct memory */
   2271 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
   2272 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2273 
   2274 	/* Third, allocate the RX ring */
   2275 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2276 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2277 
   2278 	/* For the ring itself */
   2279 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2280 	    DBA_ALIGN);
   2281 
   2282 	/*
   2283 	 * Now set up the TX queues, txconf is needed to handle the
   2284 	 * possibility that things fail midcourse and we need to
   2285 	 * undo memory gracefully
   2286 	 */
   2287 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2288 		/* Set up some basics */
   2289 		txr = &adapter->tx_rings[i];
   2290 		txr->adapter = adapter;
   2291 		txr->txr_interq = NULL;
   2292 		/* In case SR-IOV is enabled, align the index properly */
   2293 #ifdef PCI_IOV
   2294 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2295 		    i);
   2296 #else
   2297 		txr->me = i;
   2298 #endif
   2299 		txr->num_desc = adapter->num_tx_desc;
   2300 
   2301 		/* Initialize the TX side lock */
   2302 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2303 
   2304 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2305 		    BUS_DMA_NOWAIT)) {
   2306 			aprint_error_dev(dev,
   2307 			    "Unable to allocate TX Descriptor memory\n");
   2308 			error = ENOMEM;
   2309 			goto err_tx_desc;
   2310 		}
   2311 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2312 		bzero((void *)txr->tx_base, tsize);
   2313 
   2314 		/* Now allocate transmit buffers for the ring */
   2315 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2316 			aprint_error_dev(dev,
   2317 			    "Critical Failure setting up transmit buffers\n");
   2318 			error = ENOMEM;
   2319 			goto err_tx_desc;
   2320 		}
   2321 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2322 			/* Allocate a buf ring */
   2323 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2324 			if (txr->txr_interq == NULL) {
   2325 				aprint_error_dev(dev,
   2326 				    "Critical Failure setting up buf ring\n");
   2327 				error = ENOMEM;
   2328 				goto err_tx_desc;
   2329 			}
   2330 		}
   2331 	}
   2332 
   2333 	/*
   2334 	 * Next the RX queues...
   2335 	 */
   2336 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2337 	    DBA_ALIGN);
   2338 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2339 		rxr = &adapter->rx_rings[i];
   2340 		/* Set up some basics */
   2341 		rxr->adapter = adapter;
   2342 #ifdef PCI_IOV
   2343 		/* In case SR-IOV is enabled, align the index properly */
   2344 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2345 		    i);
   2346 #else
   2347 		rxr->me = i;
   2348 #endif
   2349 		rxr->num_desc = adapter->num_rx_desc;
   2350 
   2351 		/* Initialize the RX side lock */
   2352 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2353 
   2354 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2355 		    BUS_DMA_NOWAIT)) {
   2356 			aprint_error_dev(dev,
   2357 			    "Unable to allocate RxDescriptor memory\n");
   2358 			error = ENOMEM;
   2359 			goto err_rx_desc;
   2360 		}
   2361 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2362 		bzero((void *)rxr->rx_base, rsize);
   2363 
   2364 		/* Allocate receive buffers for the ring */
   2365 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2366 			aprint_error_dev(dev,
   2367 			    "Critical Failure setting up receive buffers\n");
   2368 			error = ENOMEM;
   2369 			goto err_rx_desc;
   2370 		}
   2371 	}
   2372 
   2373 	/*
   2374 	 * Finally set up the queue holding structs
   2375 	 */
   2376 	for (int i = 0; i < adapter->num_queues; i++) {
   2377 		que = &adapter->queues[i];
   2378 		que->adapter = adapter;
   2379 		que->me = i;
   2380 		que->txr = &adapter->tx_rings[i];
   2381 		que->rxr = &adapter->rx_rings[i];
   2382 
   2383 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2384 		que->disabled_count = 0;
   2385 	}
   2386 
   2387 	return (0);
   2388 
   2389 err_rx_desc:
   2390 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2391 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2392 err_tx_desc:
   2393 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2394 		ixgbe_dma_free(adapter, &txr->txdma);
   2395 	free(adapter->rx_rings, M_DEVBUF);
   2396 	free(adapter->tx_rings, M_DEVBUF);
   2397 	free(adapter->queues, M_DEVBUF);
   2398 	return (error);
   2399 } /* ixgbe_allocate_queues */
   2400 
   2401 /************************************************************************
   2402  * ixgbe_free_queues
   2403  *
   2404  *   Free descriptors for the transmit and receive rings, and then
   2405  *   the memory associated with each.
   2406  ************************************************************************/
   2407 void
   2408 ixgbe_free_queues(struct adapter *adapter)
   2409 {
   2410 	struct ix_queue *que;
   2411 	int i;
   2412 
   2413 	ixgbe_free_transmit_structures(adapter);
   2414 	ixgbe_free_receive_structures(adapter);
   2415 	for (i = 0; i < adapter->num_queues; i++) {
   2416 		que = &adapter->queues[i];
   2417 		mutex_destroy(&que->dc_mtx);
   2418 	}
   2419 	free(adapter->queues, M_DEVBUF);
   2420 } /* ixgbe_free_queues */
   2421