Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.78
      1 /* $NetBSD: ix_txrx.c,v 1.78 2021/05/20 22:36:08 ryo Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include <sys/cdefs.h>
     67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.78 2021/05/20 22:36:08 ryo Exp $");
     68 
     69 #include "opt_inet.h"
     70 #include "opt_inet6.h"
     71 
     72 #include "ixgbe.h"
     73 
     74 /*
     75  * HW RSC control:
     76  *  this feature only works with
     77  *  IPv4, and only on 82599 and later.
     78  *  Also this will cause IP forwarding to
     79  *  fail and that can't be controlled by
     80  *  the stack as LRO can. For all these
     81  *  reasons I've deemed it best to leave
     82  *  this off and not bother with a tuneable
     83  *  interface, this would need to be compiled
     84  *  to enable.
     85  */
     86 static bool ixgbe_rsc_enable = FALSE;
     87 
     88 /*
     89  * For Flow Director: this is the
     90  * number of TX packets we sample
     91  * for the filter pool, this means
     92  * every 20th packet will be probed.
     93  *
     94  * This feature can be disabled by
     95  * setting this to 0.
     96  */
     97 static int atr_sample_rate = 20;
     98 
     99 /************************************************************************
    100  *  Local Function prototypes
    101  ************************************************************************/
    102 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    103 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    104 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    105 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    106 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    107                                        struct ixgbe_hw_stats *);
    108 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    109 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    110 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    111 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    112                                         struct mbuf *, u32 *, u32 *);
    113 static int           ixgbe_tso_setup(struct tx_ring *,
    114                                      struct mbuf *, u32 *, u32 *);
    115 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    116 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    117                                     struct mbuf *, u32);
    118 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    119                                       struct ixgbe_dma_alloc *, int);
    120 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    121 
    122 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    123 
    124 /************************************************************************
    125  * ixgbe_legacy_start_locked - Transmit entry point
    126  *
    127  *   Called by the stack to initiate a transmit.
    128  *   The driver will remain in this routine as long as there are
    129  *   packets to transmit and transmit resources are available.
    130  *   In case resources are not available, the stack is notified
    131  *   and the packet is requeued.
    132  ************************************************************************/
    133 int
    134 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    135 {
    136 	int rc;
    137 	struct mbuf    *m_head;
    138 	struct adapter *adapter = txr->adapter;
    139 
    140 	IXGBE_TX_LOCK_ASSERT(txr);
    141 
    142 	if (adapter->link_active != LINK_STATE_UP) {
    143 		/*
    144 		 * discard all packets buffered in IFQ to avoid
    145 		 * sending old packets at next link up timing.
    146 		 */
    147 		ixgbe_drain(ifp, txr);
    148 		return (ENETDOWN);
    149 	}
    150 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    151 		return (ENETDOWN);
    152 	if (txr->txr_no_space)
    153 		return (ENETDOWN);
    154 
    155 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    156 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    157 			break;
    158 
    159 		IFQ_POLL(&ifp->if_snd, m_head);
    160 		if (m_head == NULL)
    161 			break;
    162 
    163 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    164 			break;
    165 		}
    166 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    167 		if (rc != 0) {
    168 			m_freem(m_head);
    169 			continue;
    170 		}
    171 
    172 		/* Send a copy of the frame to the BPF listener */
    173 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    174 	}
    175 
    176 	return IXGBE_SUCCESS;
    177 } /* ixgbe_legacy_start_locked */
    178 
    179 /************************************************************************
    180  * ixgbe_legacy_start
    181  *
    182  *   Called by the stack, this always uses the first tx ring,
    183  *   and should not be used with multiqueue tx enabled.
    184  ************************************************************************/
    185 void
    186 ixgbe_legacy_start(struct ifnet *ifp)
    187 {
    188 	struct adapter *adapter = ifp->if_softc;
    189 	struct tx_ring *txr = adapter->tx_rings;
    190 
    191 	if (ifp->if_flags & IFF_RUNNING) {
    192 		IXGBE_TX_LOCK(txr);
    193 		ixgbe_legacy_start_locked(ifp, txr);
    194 		IXGBE_TX_UNLOCK(txr);
    195 	}
    196 } /* ixgbe_legacy_start */
    197 
    198 /************************************************************************
    199  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    200  *
    201  *   (if_transmit function)
    202  ************************************************************************/
    203 int
    204 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    205 {
    206 	struct adapter	*adapter = ifp->if_softc;
    207 	struct tx_ring	*txr;
    208 	int		i;
    209 #ifdef RSS
    210 	uint32_t bucket_id;
    211 #endif
    212 
    213 	/*
    214 	 * When doing RSS, map it to the same outbound queue
    215 	 * as the incoming flow would be mapped to.
    216 	 *
    217 	 * If everything is setup correctly, it should be the
    218 	 * same bucket that the current CPU we're on is.
    219 	 */
    220 #ifdef RSS
    221 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    222 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    223 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    224 		    &bucket_id) == 0)) {
    225 			i = bucket_id % adapter->num_queues;
    226 #ifdef IXGBE_DEBUG
    227 			if (bucket_id > adapter->num_queues)
    228 				if_printf(ifp,
    229 				    "bucket_id (%d) > num_queues (%d)\n",
    230 				    bucket_id, adapter->num_queues);
    231 #endif
    232 		} else
    233 			i = m->m_pkthdr.flowid % adapter->num_queues;
    234 	} else
    235 #endif /* 0 */
    236 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    237 
    238 	/* Check for a hung queue and pick alternative */
    239 	if (((1ULL << i) & adapter->active_queues) == 0)
    240 		i = ffs64(adapter->active_queues);
    241 
    242 	txr = &adapter->tx_rings[i];
    243 
    244 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    245 		m_freem(m);
    246 		txr->pcq_drops.ev_count++;
    247 		return ENOBUFS;
    248 	}
    249 	if (IXGBE_TX_TRYLOCK(txr)) {
    250 		ixgbe_mq_start_locked(ifp, txr);
    251 		IXGBE_TX_UNLOCK(txr);
    252 	} else {
    253 		if (adapter->txrx_use_workqueue) {
    254 			u_int *enqueued;
    255 
    256 			/*
    257 			 * This function itself is not called in interrupt
    258 			 * context, however it can be called in fast softint
    259 			 * context right after receiving forwarding packets.
    260 			 * So, it is required to protect workqueue from twice
    261 			 * enqueuing when the machine uses both spontaneous
    262 			 * packets and forwarding packets.
    263 			 */
    264 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    265 			if (*enqueued == 0) {
    266 				*enqueued = 1;
    267 				percpu_putref(adapter->txr_wq_enqueued);
    268 				workqueue_enqueue(adapter->txr_wq,
    269 				    &txr->wq_cookie, curcpu());
    270 			} else
    271 				percpu_putref(adapter->txr_wq_enqueued);
    272 		} else {
    273 			kpreempt_disable();
    274 			softint_schedule(txr->txr_si);
    275 			kpreempt_enable();
    276 		}
    277 	}
    278 
    279 	return (0);
    280 } /* ixgbe_mq_start */
    281 
    282 /************************************************************************
    283  * ixgbe_mq_start_locked
    284  ************************************************************************/
    285 int
    286 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    287 {
    288 	struct mbuf    *next;
    289 	int            enqueued = 0, err = 0;
    290 
    291 	if (txr->adapter->link_active != LINK_STATE_UP) {
    292 		/*
    293 		 * discard all packets buffered in txr_interq to avoid
    294 		 * sending old packets at next link up timing.
    295 		 */
    296 		ixgbe_drain(ifp, txr);
    297 		return (ENETDOWN);
    298 	}
    299 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    300 		return (ENETDOWN);
    301 	if (txr->txr_no_space)
    302 		return (ENETDOWN);
    303 
    304 	/* Process the queue */
    305 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    306 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    307 			m_freem(next);
    308 			/* All errors are counted in ixgbe_xmit() */
    309 			break;
    310 		}
    311 		enqueued++;
    312 #if __FreeBSD_version >= 1100036
    313 		/*
    314 		 * Since we're looking at the tx ring, we can check
    315 		 * to see if we're a VF by examing our tail register
    316 		 * address.
    317 		 */
    318 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    319 		    (next->m_flags & M_MCAST))
    320 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    321 #endif
    322 		/* Send a copy of the frame to the BPF listener */
    323 		bpf_mtap(ifp, next, BPF_D_OUT);
    324 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    325 			break;
    326 	}
    327 
    328 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    329 		ixgbe_txeof(txr);
    330 
    331 	return (err);
    332 } /* ixgbe_mq_start_locked */
    333 
    334 /************************************************************************
    335  * ixgbe_deferred_mq_start
    336  *
    337  *   Called from a softint and workqueue (indirectly) to drain queued
    338  *   transmit packets.
    339  ************************************************************************/
    340 void
    341 ixgbe_deferred_mq_start(void *arg)
    342 {
    343 	struct tx_ring *txr = arg;
    344 	struct adapter *adapter = txr->adapter;
    345 	struct ifnet   *ifp = adapter->ifp;
    346 
    347 	IXGBE_TX_LOCK(txr);
    348 	if (pcq_peek(txr->txr_interq) != NULL)
    349 		ixgbe_mq_start_locked(ifp, txr);
    350 	IXGBE_TX_UNLOCK(txr);
    351 } /* ixgbe_deferred_mq_start */
    352 
    353 /************************************************************************
    354  * ixgbe_deferred_mq_start_work
    355  *
    356  *   Called from a workqueue to drain queued transmit packets.
    357  ************************************************************************/
    358 void
    359 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    360 {
    361 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    362 	struct adapter *adapter = txr->adapter;
    363 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    364 	*enqueued = 0;
    365 	percpu_putref(adapter->txr_wq_enqueued);
    366 
    367 	ixgbe_deferred_mq_start(txr);
    368 } /* ixgbe_deferred_mq_start */
    369 
    370 /************************************************************************
    371  * ixgbe_drain_all
    372  ************************************************************************/
    373 void
    374 ixgbe_drain_all(struct adapter *adapter)
    375 {
    376 	struct ifnet *ifp = adapter->ifp;
    377 	struct ix_queue *que = adapter->queues;
    378 
    379 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    380 		struct tx_ring  *txr = que->txr;
    381 
    382 		IXGBE_TX_LOCK(txr);
    383 		ixgbe_drain(ifp, txr);
    384 		IXGBE_TX_UNLOCK(txr);
    385 	}
    386 }
    387 
    388 /************************************************************************
    389  * ixgbe_xmit
    390  *
    391  *   Maps the mbufs to tx descriptors, allowing the
    392  *   TX engine to transmit the packets.
    393  *
    394  *   Return 0 on success, positive on failure
    395  ************************************************************************/
    396 static int
    397 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    398 {
    399 	struct adapter          *adapter = txr->adapter;
    400 	struct ixgbe_tx_buf     *txbuf;
    401 	union ixgbe_adv_tx_desc *txd = NULL;
    402 	struct ifnet	        *ifp = adapter->ifp;
    403 	int                     i, j, error;
    404 	int                     first;
    405 	u32                     olinfo_status = 0, cmd_type_len;
    406 	bool                    remap = TRUE;
    407 	bus_dmamap_t            map;
    408 
    409 	/* Basic descriptor defines */
    410 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    411 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    412 
    413 	if (vlan_has_tag(m_head))
    414 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    415 
    416 	/*
    417 	 * Important to capture the first descriptor
    418 	 * used because it will contain the index of
    419 	 * the one we tell the hardware to report back
    420 	 */
    421 	first = txr->next_avail_desc;
    422 	txbuf = &txr->tx_buffers[first];
    423 	map = txbuf->map;
    424 
    425 	/*
    426 	 * Map the packet for DMA.
    427 	 */
    428 retry:
    429 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    430 	    BUS_DMA_NOWAIT);
    431 
    432 	if (__predict_false(error)) {
    433 		struct mbuf *m;
    434 
    435 		switch (error) {
    436 		case EAGAIN:
    437 			txr->q_eagain_tx_dma_setup++;
    438 			return EAGAIN;
    439 		case ENOMEM:
    440 			txr->q_enomem_tx_dma_setup++;
    441 			return EAGAIN;
    442 		case EFBIG:
    443 			/* Try it again? - one try */
    444 			if (remap == TRUE) {
    445 				remap = FALSE;
    446 				/*
    447 				 * XXX: m_defrag will choke on
    448 				 * non-MCLBYTES-sized clusters
    449 				 */
    450 				txr->q_efbig_tx_dma_setup++;
    451 				m = m_defrag(m_head, M_NOWAIT);
    452 				if (m == NULL) {
    453 					txr->q_mbuf_defrag_failed++;
    454 					return ENOBUFS;
    455 				}
    456 				m_head = m;
    457 				goto retry;
    458 			} else {
    459 				txr->q_efbig2_tx_dma_setup++;
    460 				return error;
    461 			}
    462 		case EINVAL:
    463 			txr->q_einval_tx_dma_setup++;
    464 			return error;
    465 		default:
    466 			txr->q_other_tx_dma_setup++;
    467 			return error;
    468 		}
    469 	}
    470 
    471 	/* Make certain there are enough descriptors */
    472 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    473 		txr->txr_no_space = true;
    474 		txr->no_desc_avail.ev_count++;
    475 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    476 		return EAGAIN;
    477 	}
    478 
    479 	/*
    480 	 * Set up the appropriate offload context
    481 	 * this will consume the first descriptor
    482 	 */
    483 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    484 	if (__predict_false(error)) {
    485 		return (error);
    486 	}
    487 
    488 #ifdef IXGBE_FDIR
    489 	/* Do the flow director magic */
    490 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    491 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    492 		++txr->atr_count;
    493 		if (txr->atr_count >= atr_sample_rate) {
    494 			ixgbe_atr(txr, m_head);
    495 			txr->atr_count = 0;
    496 		}
    497 	}
    498 #endif
    499 
    500 	olinfo_status |= IXGBE_ADVTXD_CC;
    501 	i = txr->next_avail_desc;
    502 	for (j = 0; j < map->dm_nsegs; j++) {
    503 		bus_size_t seglen;
    504 		uint64_t segaddr;
    505 
    506 		txbuf = &txr->tx_buffers[i];
    507 		txd = &txr->tx_base[i];
    508 		seglen = map->dm_segs[j].ds_len;
    509 		segaddr = htole64(map->dm_segs[j].ds_addr);
    510 
    511 		txd->read.buffer_addr = segaddr;
    512 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    513 		txd->read.olinfo_status = htole32(olinfo_status);
    514 
    515 		if (++i == txr->num_desc)
    516 			i = 0;
    517 	}
    518 
    519 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    520 	txr->tx_avail -= map->dm_nsegs;
    521 	txr->next_avail_desc = i;
    522 
    523 	txbuf->m_head = m_head;
    524 	/*
    525 	 * Here we swap the map so the last descriptor,
    526 	 * which gets the completion interrupt has the
    527 	 * real map, and the first descriptor gets the
    528 	 * unused map from this descriptor.
    529 	 */
    530 	txr->tx_buffers[first].map = txbuf->map;
    531 	txbuf->map = map;
    532 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    533 	    BUS_DMASYNC_PREWRITE);
    534 
    535 	/* Set the EOP descriptor that will be marked done */
    536 	txbuf = &txr->tx_buffers[first];
    537 	txbuf->eop = txd;
    538 
    539 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    540 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    541 	/*
    542 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    543 	 * hardware that this frame is available to transmit.
    544 	 */
    545 	++txr->total_packets.ev_count;
    546 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    547 
    548 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    549 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    550 	if (m_head->m_flags & M_MCAST)
    551 		if_statinc_ref(nsr, if_omcasts);
    552 	IF_STAT_PUTREF(ifp);
    553 
    554 	/* Mark queue as having work */
    555 	if (txr->busy == 0)
    556 		txr->busy = 1;
    557 
    558 	return (0);
    559 } /* ixgbe_xmit */
    560 
    561 /************************************************************************
    562  * ixgbe_drain
    563  ************************************************************************/
    564 static void
    565 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    566 {
    567 	struct mbuf *m;
    568 
    569 	IXGBE_TX_LOCK_ASSERT(txr);
    570 
    571 	if (txr->me == 0) {
    572 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    573 			IFQ_DEQUEUE(&ifp->if_snd, m);
    574 			m_freem(m);
    575 			IF_DROP(&ifp->if_snd);
    576 		}
    577 	}
    578 
    579 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    580 		m_freem(m);
    581 		txr->pcq_drops.ev_count++;
    582 	}
    583 }
    584 
    585 /************************************************************************
    586  * ixgbe_allocate_transmit_buffers
    587  *
    588  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    589  *   the information needed to transmit a packet on the wire. This is
    590  *   called only once at attach, setup is done every reset.
    591  ************************************************************************/
    592 static int
    593 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    594 {
    595 	struct adapter      *adapter = txr->adapter;
    596 	device_t            dev = adapter->dev;
    597 	struct ixgbe_tx_buf *txbuf;
    598 	int                 error, i;
    599 
    600 	/*
    601 	 * Setup DMA descriptor areas.
    602 	 */
    603 	error = ixgbe_dma_tag_create(
    604 	         /*      parent */ adapter->osdep.dmat,
    605 	         /*   alignment */ 1,
    606 	         /*      bounds */ 0,
    607 	         /*     maxsize */ IXGBE_TSO_SIZE,
    608 	         /*   nsegments */ adapter->num_segs,
    609 	         /*  maxsegsize */ PAGE_SIZE,
    610 	         /*       flags */ 0,
    611 	                           &txr->txtag);
    612 	if (error != 0) {
    613 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    614 		goto fail;
    615 	}
    616 
    617 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    618 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    619 
    620 	/* Create the descriptor buffer dma maps */
    621 	txbuf = txr->tx_buffers;
    622 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    623 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    624 		if (error != 0) {
    625 			aprint_error_dev(dev,
    626 			    "Unable to create TX DMA map (%d)\n", error);
    627 			goto fail;
    628 		}
    629 	}
    630 
    631 	return 0;
    632 fail:
    633 	/* We free all, it handles case where we are in the middle */
    634 #if 0 /* XXX was FreeBSD */
    635 	ixgbe_free_transmit_structures(adapter);
    636 #else
    637 	ixgbe_free_transmit_buffers(txr);
    638 #endif
    639 	return (error);
    640 } /* ixgbe_allocate_transmit_buffers */
    641 
    642 /************************************************************************
    643  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    644  ************************************************************************/
    645 static void
    646 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    647 {
    648 	struct adapter        *adapter = txr->adapter;
    649 	struct ixgbe_tx_buf   *txbuf;
    650 #ifdef DEV_NETMAP
    651 	struct netmap_adapter *na = NA(adapter->ifp);
    652 	struct netmap_slot    *slot;
    653 #endif /* DEV_NETMAP */
    654 
    655 	/* Clear the old ring contents */
    656 	IXGBE_TX_LOCK(txr);
    657 
    658 #ifdef DEV_NETMAP
    659 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    660 		/*
    661 		 * (under lock): if in netmap mode, do some consistency
    662 		 * checks and set slot to entry 0 of the netmap ring.
    663 		 */
    664 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    665 	}
    666 #endif /* DEV_NETMAP */
    667 
    668 	bzero((void *)txr->tx_base,
    669 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    670 	/* Reset indices */
    671 	txr->next_avail_desc = 0;
    672 	txr->next_to_clean = 0;
    673 
    674 	/* Free any existing tx buffers. */
    675 	txbuf = txr->tx_buffers;
    676 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    677 		if (txbuf->m_head != NULL) {
    678 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    679 			    0, txbuf->m_head->m_pkthdr.len,
    680 			    BUS_DMASYNC_POSTWRITE);
    681 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    682 			m_freem(txbuf->m_head);
    683 			txbuf->m_head = NULL;
    684 		}
    685 
    686 #ifdef DEV_NETMAP
    687 		/*
    688 		 * In netmap mode, set the map for the packet buffer.
    689 		 * NOTE: Some drivers (not this one) also need to set
    690 		 * the physical buffer address in the NIC ring.
    691 		 * Slots in the netmap ring (indexed by "si") are
    692 		 * kring->nkr_hwofs positions "ahead" wrt the
    693 		 * corresponding slot in the NIC ring. In some drivers
    694 		 * (not here) nkr_hwofs can be negative. Function
    695 		 * netmap_idx_n2k() handles wraparounds properly.
    696 		 */
    697 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    698 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    699 			netmap_load_map(na, txr->txtag,
    700 			    txbuf->map, NMB(na, slot + si));
    701 		}
    702 #endif /* DEV_NETMAP */
    703 
    704 		/* Clear the EOP descriptor pointer */
    705 		txbuf->eop = NULL;
    706 	}
    707 
    708 	/* Set the rate at which we sample packets */
    709 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    710 		txr->atr_sample = atr_sample_rate;
    711 
    712 	/* Set number of descriptors available */
    713 	txr->tx_avail = adapter->num_tx_desc;
    714 
    715 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    716 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    717 	IXGBE_TX_UNLOCK(txr);
    718 } /* ixgbe_setup_transmit_ring */
    719 
    720 /************************************************************************
    721  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    722  ************************************************************************/
    723 int
    724 ixgbe_setup_transmit_structures(struct adapter *adapter)
    725 {
    726 	struct tx_ring *txr = adapter->tx_rings;
    727 
    728 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    729 		ixgbe_setup_transmit_ring(txr);
    730 
    731 	return (0);
    732 } /* ixgbe_setup_transmit_structures */
    733 
    734 /************************************************************************
    735  * ixgbe_free_transmit_structures - Free all transmit rings.
    736  ************************************************************************/
    737 void
    738 ixgbe_free_transmit_structures(struct adapter *adapter)
    739 {
    740 	struct tx_ring *txr = adapter->tx_rings;
    741 
    742 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    743 		ixgbe_free_transmit_buffers(txr);
    744 		ixgbe_dma_free(adapter, &txr->txdma);
    745 		IXGBE_TX_LOCK_DESTROY(txr);
    746 	}
    747 	free(adapter->tx_rings, M_DEVBUF);
    748 } /* ixgbe_free_transmit_structures */
    749 
    750 /************************************************************************
    751  * ixgbe_free_transmit_buffers
    752  *
    753  *   Free transmit ring related data structures.
    754  ************************************************************************/
    755 static void
    756 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    757 {
    758 	struct adapter      *adapter = txr->adapter;
    759 	struct ixgbe_tx_buf *tx_buffer;
    760 	int                 i;
    761 
    762 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    763 
    764 	if (txr->tx_buffers == NULL)
    765 		return;
    766 
    767 	tx_buffer = txr->tx_buffers;
    768 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    769 		if (tx_buffer->m_head != NULL) {
    770 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    771 			    0, tx_buffer->m_head->m_pkthdr.len,
    772 			    BUS_DMASYNC_POSTWRITE);
    773 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    774 			m_freem(tx_buffer->m_head);
    775 			tx_buffer->m_head = NULL;
    776 			if (tx_buffer->map != NULL) {
    777 				ixgbe_dmamap_destroy(txr->txtag,
    778 				    tx_buffer->map);
    779 				tx_buffer->map = NULL;
    780 			}
    781 		} else if (tx_buffer->map != NULL) {
    782 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    783 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    784 			tx_buffer->map = NULL;
    785 		}
    786 	}
    787 	if (txr->txr_interq != NULL) {
    788 		struct mbuf *m;
    789 
    790 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    791 			m_freem(m);
    792 		pcq_destroy(txr->txr_interq);
    793 	}
    794 	if (txr->tx_buffers != NULL) {
    795 		free(txr->tx_buffers, M_DEVBUF);
    796 		txr->tx_buffers = NULL;
    797 	}
    798 	if (txr->txtag != NULL) {
    799 		ixgbe_dma_tag_destroy(txr->txtag);
    800 		txr->txtag = NULL;
    801 	}
    802 } /* ixgbe_free_transmit_buffers */
    803 
    804 /************************************************************************
    805  * ixgbe_tx_ctx_setup
    806  *
    807  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    808  ************************************************************************/
    809 static int
    810 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    811     u32 *cmd_type_len, u32 *olinfo_status)
    812 {
    813 	struct adapter                   *adapter = txr->adapter;
    814 	struct ixgbe_adv_tx_context_desc *TXD;
    815 	struct ether_vlan_header         *eh;
    816 #ifdef INET
    817 	struct ip                        *ip;
    818 #endif
    819 #ifdef INET6
    820 	struct ip6_hdr                   *ip6;
    821 #endif
    822 	int                              ehdrlen, ip_hlen = 0;
    823 	int                              offload = TRUE;
    824 	int                              ctxd = txr->next_avail_desc;
    825 	u32                              vlan_macip_lens = 0;
    826 	u32                              type_tucmd_mlhl = 0;
    827 	u16                              vtag = 0;
    828 	u16                              etype;
    829 	u8                               ipproto = 0;
    830 	char                             *l3d;
    831 
    832 
    833 	/* First check if TSO is to be used */
    834 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    835 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    836 
    837 		if (rv != 0)
    838 			++adapter->tso_err.ev_count;
    839 		return rv;
    840 	}
    841 
    842 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    843 		offload = FALSE;
    844 
    845 	/* Indicate the whole packet as payload when not doing TSO */
    846 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    847 
    848 	/* Now ready a context descriptor */
    849 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    850 
    851 	/*
    852 	 * In advanced descriptors the vlan tag must
    853 	 * be placed into the context descriptor. Hence
    854 	 * we need to make one even if not doing offloads.
    855 	 */
    856 	if (vlan_has_tag(mp)) {
    857 		vtag = htole16(vlan_get_tag(mp));
    858 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    859 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    860 	           (offload == FALSE))
    861 		return (0);
    862 
    863 	/*
    864 	 * Determine where frame payload starts.
    865 	 * Jump over vlan headers if already present,
    866 	 * helpful for QinQ too.
    867 	 */
    868 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    869 	eh = mtod(mp, struct ether_vlan_header *);
    870 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    871 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    872 		etype = ntohs(eh->evl_proto);
    873 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    874 	} else {
    875 		etype = ntohs(eh->evl_encap_proto);
    876 		ehdrlen = ETHER_HDR_LEN;
    877 	}
    878 
    879 	/* Set the ether header length */
    880 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    881 
    882 	if (offload == FALSE)
    883 		goto no_offloads;
    884 
    885 	/*
    886 	 * If the first mbuf only includes the ethernet header,
    887 	 * jump to the next one
    888 	 * XXX: This assumes the stack splits mbufs containing headers
    889 	 *      on header boundaries
    890 	 * XXX: And assumes the entire IP header is contained in one mbuf
    891 	 */
    892 	if (mp->m_len == ehdrlen && mp->m_next)
    893 		l3d = mtod(mp->m_next, char *);
    894 	else
    895 		l3d = mtod(mp, char *) + ehdrlen;
    896 
    897 	switch (etype) {
    898 #ifdef INET
    899 	case ETHERTYPE_IP:
    900 		ip = (struct ip *)(l3d);
    901 		ip_hlen = ip->ip_hl << 2;
    902 		ipproto = ip->ip_p;
    903 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    904 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    905 		    ip->ip_sum == 0);
    906 		break;
    907 #endif
    908 #ifdef INET6
    909 	case ETHERTYPE_IPV6:
    910 		ip6 = (struct ip6_hdr *)(l3d);
    911 		ip_hlen = sizeof(struct ip6_hdr);
    912 		ipproto = ip6->ip6_nxt;
    913 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    914 		break;
    915 #endif
    916 	default:
    917 		offload = false;
    918 		break;
    919 	}
    920 
    921 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    922 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    923 
    924 	vlan_macip_lens |= ip_hlen;
    925 
    926 	/* No support for offloads for non-L4 next headers */
    927 	switch (ipproto) {
    928 	case IPPROTO_TCP:
    929 		if (mp->m_pkthdr.csum_flags &
    930 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    931 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    932 		else
    933 			offload = false;
    934 		break;
    935 	case IPPROTO_UDP:
    936 		if (mp->m_pkthdr.csum_flags &
    937 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    938 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    939 		else
    940 			offload = false;
    941 		break;
    942 	default:
    943 		offload = false;
    944 		break;
    945 	}
    946 
    947 	if (offload) /* Insert L4 checksum into data descriptors */
    948 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    949 
    950 no_offloads:
    951 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    952 
    953 	/* Now copy bits into descriptor */
    954 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    955 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    956 	TXD->seqnum_seed = htole32(0);
    957 	TXD->mss_l4len_idx = htole32(0);
    958 
    959 	/* We've consumed the first desc, adjust counters */
    960 	if (++ctxd == txr->num_desc)
    961 		ctxd = 0;
    962 	txr->next_avail_desc = ctxd;
    963 	--txr->tx_avail;
    964 
    965 	return (0);
    966 } /* ixgbe_tx_ctx_setup */
    967 
    968 /************************************************************************
    969  * ixgbe_tso_setup
    970  *
    971  *   Setup work for hardware segmentation offload (TSO) on
    972  *   adapters using advanced tx descriptors
    973  ************************************************************************/
    974 static int
    975 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    976     u32 *olinfo_status)
    977 {
    978 	struct ixgbe_adv_tx_context_desc *TXD;
    979 	struct ether_vlan_header         *eh;
    980 #ifdef INET6
    981 	struct ip6_hdr                   *ip6;
    982 #endif
    983 #ifdef INET
    984 	struct ip                        *ip;
    985 #endif
    986 	struct tcphdr                    *th;
    987 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    988 	u32                              vlan_macip_lens = 0;
    989 	u32                              type_tucmd_mlhl = 0;
    990 	u32                              mss_l4len_idx = 0, paylen;
    991 	u16                              vtag = 0, eh_type;
    992 
    993 	/*
    994 	 * Determine where frame payload starts.
    995 	 * Jump over vlan headers if already present
    996 	 */
    997 	eh = mtod(mp, struct ether_vlan_header *);
    998 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    999 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1000 		eh_type = eh->evl_proto;
   1001 	} else {
   1002 		ehdrlen = ETHER_HDR_LEN;
   1003 		eh_type = eh->evl_encap_proto;
   1004 	}
   1005 
   1006 	switch (ntohs(eh_type)) {
   1007 #ifdef INET
   1008 	case ETHERTYPE_IP:
   1009 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1010 		if (ip->ip_p != IPPROTO_TCP)
   1011 			return (ENXIO);
   1012 		ip->ip_sum = 0;
   1013 		ip_hlen = ip->ip_hl << 2;
   1014 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1015 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1016 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1017 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1018 		/* Tell transmit desc to also do IPv4 checksum. */
   1019 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1020 		break;
   1021 #endif
   1022 #ifdef INET6
   1023 	case ETHERTYPE_IPV6:
   1024 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1025 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1026 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1027 			return (ENXIO);
   1028 		ip_hlen = sizeof(struct ip6_hdr);
   1029 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1030 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1031 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1032 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1033 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1034 		break;
   1035 #endif
   1036 	default:
   1037 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1038 		    __func__, ntohs(eh_type));
   1039 		break;
   1040 	}
   1041 
   1042 	ctxd = txr->next_avail_desc;
   1043 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1044 
   1045 	tcp_hlen = th->th_off << 2;
   1046 
   1047 	/* This is used in the transmit desc in encap */
   1048 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1049 
   1050 	/* VLAN MACLEN IPLEN */
   1051 	if (vlan_has_tag(mp)) {
   1052 		vtag = htole16(vlan_get_tag(mp));
   1053 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1054 	}
   1055 
   1056 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1057 	vlan_macip_lens |= ip_hlen;
   1058 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1059 
   1060 	/* ADV DTYPE TUCMD */
   1061 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1062 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1063 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1064 
   1065 	/* MSS L4LEN IDX */
   1066 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1067 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1068 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1069 
   1070 	TXD->seqnum_seed = htole32(0);
   1071 
   1072 	if (++ctxd == txr->num_desc)
   1073 		ctxd = 0;
   1074 
   1075 	txr->tx_avail--;
   1076 	txr->next_avail_desc = ctxd;
   1077 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1078 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1079 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1080 	++txr->tso_tx.ev_count;
   1081 
   1082 	return (0);
   1083 } /* ixgbe_tso_setup */
   1084 
   1085 
   1086 /************************************************************************
   1087  * ixgbe_txeof
   1088  *
   1089  *   Examine each tx_buffer in the used queue. If the hardware is done
   1090  *   processing the packet then free associated resources. The
   1091  *   tx_buffer is put back on the free queue.
   1092  ************************************************************************/
   1093 bool
   1094 ixgbe_txeof(struct tx_ring *txr)
   1095 {
   1096 	struct adapter		*adapter = txr->adapter;
   1097 	struct ifnet		*ifp = adapter->ifp;
   1098 	struct ixgbe_tx_buf	*buf;
   1099 	union ixgbe_adv_tx_desc *txd;
   1100 	u32			work, processed = 0;
   1101 	u32			limit = adapter->tx_process_limit;
   1102 
   1103 	KASSERT(mutex_owned(&txr->tx_mtx));
   1104 
   1105 #ifdef DEV_NETMAP
   1106 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1107 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1108 		struct netmap_adapter *na = NA(adapter->ifp);
   1109 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1110 		txd = txr->tx_base;
   1111 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1112 		    BUS_DMASYNC_POSTREAD);
   1113 		/*
   1114 		 * In netmap mode, all the work is done in the context
   1115 		 * of the client thread. Interrupt handlers only wake up
   1116 		 * clients, which may be sleeping on individual rings
   1117 		 * or on a global resource for all rings.
   1118 		 * To implement tx interrupt mitigation, we wake up the client
   1119 		 * thread roughly every half ring, even if the NIC interrupts
   1120 		 * more frequently. This is implemented as follows:
   1121 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1122 		 *   the slot that should wake up the thread (nkr_num_slots
   1123 		 *   means the user thread should not be woken up);
   1124 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1125 		 *   or the slot has the DD bit set.
   1126 		 */
   1127 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1128 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
   1129 			netmap_tx_irq(ifp, txr->me);
   1130 		}
   1131 		return false;
   1132 	}
   1133 #endif /* DEV_NETMAP */
   1134 
   1135 	if (txr->tx_avail == txr->num_desc) {
   1136 		txr->busy = 0;
   1137 		return false;
   1138 	}
   1139 
   1140 	/* Get work starting point */
   1141 	work = txr->next_to_clean;
   1142 	buf = &txr->tx_buffers[work];
   1143 	txd = &txr->tx_base[work];
   1144 	work -= txr->num_desc; /* The distance to ring end */
   1145 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1146 	    BUS_DMASYNC_POSTREAD);
   1147 
   1148 	do {
   1149 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1150 		if (eop == NULL) /* No work */
   1151 			break;
   1152 
   1153 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
   1154 			break;	/* I/O not complete */
   1155 
   1156 		if (buf->m_head) {
   1157 			txr->bytes += buf->m_head->m_pkthdr.len;
   1158 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1159 			    0, buf->m_head->m_pkthdr.len,
   1160 			    BUS_DMASYNC_POSTWRITE);
   1161 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1162 			m_freem(buf->m_head);
   1163 			buf->m_head = NULL;
   1164 		}
   1165 		buf->eop = NULL;
   1166 		txr->txr_no_space = false;
   1167 		++txr->tx_avail;
   1168 
   1169 		/* We clean the range if multi segment */
   1170 		while (txd != eop) {
   1171 			++txd;
   1172 			++buf;
   1173 			++work;
   1174 			/* wrap the ring? */
   1175 			if (__predict_false(!work)) {
   1176 				work -= txr->num_desc;
   1177 				buf = txr->tx_buffers;
   1178 				txd = txr->tx_base;
   1179 			}
   1180 			if (buf->m_head) {
   1181 				txr->bytes +=
   1182 				    buf->m_head->m_pkthdr.len;
   1183 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1184 				    buf->map,
   1185 				    0, buf->m_head->m_pkthdr.len,
   1186 				    BUS_DMASYNC_POSTWRITE);
   1187 				ixgbe_dmamap_unload(txr->txtag,
   1188 				    buf->map);
   1189 				m_freem(buf->m_head);
   1190 				buf->m_head = NULL;
   1191 			}
   1192 			++txr->tx_avail;
   1193 			buf->eop = NULL;
   1194 
   1195 		}
   1196 		++txr->packets;
   1197 		++processed;
   1198 		if_statinc(ifp, if_opackets);
   1199 
   1200 		/* Try the next packet */
   1201 		++txd;
   1202 		++buf;
   1203 		++work;
   1204 		/* reset with a wrap */
   1205 		if (__predict_false(!work)) {
   1206 			work -= txr->num_desc;
   1207 			buf = txr->tx_buffers;
   1208 			txd = txr->tx_base;
   1209 		}
   1210 		prefetch(txd);
   1211 	} while (__predict_true(--limit));
   1212 
   1213 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1214 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1215 
   1216 	work += txr->num_desc;
   1217 	txr->next_to_clean = work;
   1218 
   1219 	/*
   1220 	 * Queue Hang detection, we know there's
   1221 	 * work outstanding or the first return
   1222 	 * would have been taken, so increment busy
   1223 	 * if nothing managed to get cleaned, then
   1224 	 * in local_timer it will be checked and
   1225 	 * marked as HUNG if it exceeds a MAX attempt.
   1226 	 */
   1227 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1228 		++txr->busy;
   1229 	/*
   1230 	 * If anything gets cleaned we reset state to 1,
   1231 	 * note this will turn off HUNG if its set.
   1232 	 */
   1233 	if (processed)
   1234 		txr->busy = 1;
   1235 
   1236 	if (txr->tx_avail == txr->num_desc)
   1237 		txr->busy = 0;
   1238 
   1239 	return ((limit > 0) ? false : true);
   1240 } /* ixgbe_txeof */
   1241 
   1242 /************************************************************************
   1243  * ixgbe_rsc_count
   1244  *
   1245  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1246  ************************************************************************/
   1247 static inline u32
   1248 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1249 {
   1250 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1251 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1252 } /* ixgbe_rsc_count */
   1253 
   1254 /************************************************************************
   1255  * ixgbe_setup_hw_rsc
   1256  *
   1257  *   Initialize Hardware RSC (LRO) feature on 82599
   1258  *   for an RX ring, this is toggled by the LRO capability
   1259  *   even though it is transparent to the stack.
   1260  *
   1261  *   NOTE: Since this HW feature only works with IPv4 and
   1262  *         testing has shown soft LRO to be as effective,
   1263  *         this feature will be disabled by default.
   1264  ************************************************************************/
   1265 static void
   1266 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1267 {
   1268 	struct	adapter  *adapter = rxr->adapter;
   1269 	struct	ixgbe_hw *hw = &adapter->hw;
   1270 	u32              rscctrl, rdrxctl;
   1271 
   1272 	/* If turning LRO/RSC off we need to disable it */
   1273 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1274 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1275 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1276 		return;
   1277 	}
   1278 
   1279 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1280 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1281 #ifdef DEV_NETMAP
   1282 	/* Always strip CRC unless Netmap disabled it */
   1283 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1284 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1285 	    ix_crcstrip)
   1286 #endif /* DEV_NETMAP */
   1287 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1288 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1289 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1290 
   1291 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1292 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1293 	/*
   1294 	 * Limit the total number of descriptors that
   1295 	 * can be combined, so it does not exceed 64K
   1296 	 */
   1297 	if (rxr->mbuf_sz == MCLBYTES)
   1298 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1299 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1301 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1303 	else  /* Using 16K cluster */
   1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1305 
   1306 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1307 
   1308 	/* Enable TCP header recognition */
   1309 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1310 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1311 
   1312 	/* Disable RSC for ACK packets */
   1313 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1314 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1315 
   1316 	rxr->hw_rsc = TRUE;
   1317 } /* ixgbe_setup_hw_rsc */
   1318 
   1319 /************************************************************************
   1320  * ixgbe_refresh_mbufs
   1321  *
   1322  *   Refresh mbuf buffers for RX descriptor rings
   1323  *    - now keeps its own state so discards due to resource
   1324  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1325  *      it just returns, keeping its placeholder, thus it can simply
   1326  *      be recalled to try again.
   1327  *
   1328  *   XXX NetBSD TODO:
   1329  *    - The ixgbe_rxeof() function always preallocates mbuf cluster (jcl),
   1330  *      so the ixgbe_refresh_mbufs() function can be simplified.
   1331  *
   1332  ************************************************************************/
   1333 static void
   1334 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1335 {
   1336 	struct adapter      *adapter = rxr->adapter;
   1337 	struct ixgbe_rx_buf *rxbuf;
   1338 	struct mbuf         *mp;
   1339 	int                 i, j, error;
   1340 	bool                refreshed = false;
   1341 
   1342 	i = j = rxr->next_to_refresh;
   1343 	/* Control the loop with one beyond */
   1344 	if (++j == rxr->num_desc)
   1345 		j = 0;
   1346 
   1347 	while (j != limit) {
   1348 		rxbuf = &rxr->rx_buffers[i];
   1349 		if (rxbuf->buf == NULL) {
   1350 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1351 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1352 			if (mp == NULL) {
   1353 				rxr->no_jmbuf.ev_count++;
   1354 				goto update;
   1355 			}
   1356 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1357 				m_adj(mp, ETHER_ALIGN);
   1358 		} else
   1359 			mp = rxbuf->buf;
   1360 
   1361 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1362 
   1363 		/* If we're dealing with an mbuf that was copied rather
   1364 		 * than replaced, there's no need to go through busdma.
   1365 		 */
   1366 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1367 			/* Get the memory mapping */
   1368 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1369 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1370 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1371 			if (error != 0) {
   1372 				device_printf(adapter->dev, "Refresh mbufs: "
   1373 				    "payload dmamap load failure - %d\n",
   1374 				    error);
   1375 				m_free(mp);
   1376 				rxbuf->buf = NULL;
   1377 				goto update;
   1378 			}
   1379 			rxbuf->buf = mp;
   1380 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1381 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1382 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1383 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1384 		} else {
   1385 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1386 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1387 		}
   1388 
   1389 		refreshed = true;
   1390 		/* Next is precalculated */
   1391 		i = j;
   1392 		rxr->next_to_refresh = i;
   1393 		if (++j == rxr->num_desc)
   1394 			j = 0;
   1395 	}
   1396 
   1397 update:
   1398 	if (refreshed) /* Update hardware tail index */
   1399 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1400 
   1401 	return;
   1402 } /* ixgbe_refresh_mbufs */
   1403 
   1404 /************************************************************************
   1405  * ixgbe_allocate_receive_buffers
   1406  *
   1407  *   Allocate memory for rx_buffer structures. Since we use one
   1408  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1409  *   that we'll need is equal to the number of receive descriptors
   1410  *   that we've allocated.
   1411  ************************************************************************/
   1412 static int
   1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1414 {
   1415 	struct adapter      *adapter = rxr->adapter;
   1416 	device_t            dev = adapter->dev;
   1417 	struct ixgbe_rx_buf *rxbuf;
   1418 	int                 bsize, error;
   1419 
   1420 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1421 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1422 
   1423 	error = ixgbe_dma_tag_create(
   1424 	         /*      parent */ adapter->osdep.dmat,
   1425 	         /*   alignment */ 1,
   1426 	         /*      bounds */ 0,
   1427 	         /*     maxsize */ MJUM16BYTES,
   1428 	         /*   nsegments */ 1,
   1429 	         /*  maxsegsize */ MJUM16BYTES,
   1430 	         /*       flags */ 0,
   1431 	                           &rxr->ptag);
   1432 	if (error != 0) {
   1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1434 		goto fail;
   1435 	}
   1436 
   1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1438 		rxbuf = &rxr->rx_buffers[i];
   1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1440 		if (error) {
   1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1442 			goto fail;
   1443 		}
   1444 	}
   1445 
   1446 	return (0);
   1447 
   1448 fail:
   1449 	/* Frees all, but can handle partial completion */
   1450 	ixgbe_free_receive_structures(adapter);
   1451 
   1452 	return (error);
   1453 } /* ixgbe_allocate_receive_buffers */
   1454 
   1455 /************************************************************************
   1456  * ixgbe_free_receive_ring
   1457  ************************************************************************/
   1458 static void
   1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1460 {
   1461 	for (int i = 0; i < rxr->num_desc; i++) {
   1462 		ixgbe_rx_discard(rxr, i);
   1463 	}
   1464 } /* ixgbe_free_receive_ring */
   1465 
   1466 /************************************************************************
   1467  * ixgbe_setup_receive_ring
   1468  *
   1469  *   Initialize a receive ring and its buffers.
   1470  ************************************************************************/
   1471 static int
   1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1473 {
   1474 	struct adapter        *adapter;
   1475 	struct ixgbe_rx_buf   *rxbuf;
   1476 #ifdef LRO
   1477 	struct ifnet          *ifp;
   1478 	struct lro_ctrl       *lro = &rxr->lro;
   1479 #endif /* LRO */
   1480 #ifdef DEV_NETMAP
   1481 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1482 	struct netmap_slot    *slot;
   1483 #endif /* DEV_NETMAP */
   1484 	int                   rsize, error = 0;
   1485 
   1486 	adapter = rxr->adapter;
   1487 #ifdef LRO
   1488 	ifp = adapter->ifp;
   1489 #endif /* LRO */
   1490 
   1491 	/* Clear the ring contents */
   1492 	IXGBE_RX_LOCK(rxr);
   1493 
   1494 #ifdef DEV_NETMAP
   1495 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1497 #endif /* DEV_NETMAP */
   1498 
   1499 	rsize = roundup2(adapter->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	IXGBE_RX_UNLOCK(rxr);
   1509 	/*
   1510 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1511 	 * or size of jumbo mbufs may have changed.
   1512 	 * Assume all of rxr->ptag are the same.
   1513 	 */
   1514 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
   1515 	    adapter->num_jcl, adapter->rx_mbuf_sz);
   1516 
   1517 	IXGBE_RX_LOCK(rxr);
   1518 
   1519 	/* Now replenish the mbufs */
   1520 	for (int j = 0; j != rxr->num_desc; ++j) {
   1521 		struct mbuf *mp;
   1522 
   1523 		rxbuf = &rxr->rx_buffers[j];
   1524 
   1525 #ifdef DEV_NETMAP
   1526 		/*
   1527 		 * In netmap mode, fill the map and set the buffer
   1528 		 * address in the NIC ring, considering the offset
   1529 		 * between the netmap and NIC rings (see comment in
   1530 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1531 		 * an mbuf, so end the block with a continue;
   1532 		 */
   1533 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1534 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1535 			uint64_t paddr;
   1536 			void *addr;
   1537 
   1538 			addr = PNMB(na, slot + sj, &paddr);
   1539 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1540 			/* Update descriptor and the cached value */
   1541 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1542 			rxbuf->addr = htole64(paddr);
   1543 			continue;
   1544 		}
   1545 #endif /* DEV_NETMAP */
   1546 
   1547 		rxbuf->flags = 0;
   1548 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1549 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1550 		if (rxbuf->buf == NULL) {
   1551 			error = ENOBUFS;
   1552 			goto fail;
   1553 		}
   1554 		mp = rxbuf->buf;
   1555 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1556 		/* Get the memory mapping */
   1557 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1558 		    mp, BUS_DMA_NOWAIT);
   1559 		if (error != 0) {
   1560 			/*
   1561 			 * Clear this entry for later cleanup in
   1562 			 * ixgbe_discard() which is called via
   1563 			 * ixgbe_free_receive_ring().
   1564 			 */
   1565 			m_freem(mp);
   1566 			rxbuf->buf = NULL;
   1567                         goto fail;
   1568 		}
   1569 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1570 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1571 		/* Update the descriptor and the cached value */
   1572 		rxr->rx_base[j].read.pkt_addr =
   1573 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1574 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1575 	}
   1576 
   1577 	/* Setup our descriptor indices */
   1578 	rxr->next_to_check = 0;
   1579 	rxr->next_to_refresh = 0;
   1580 	rxr->lro_enabled = FALSE;
   1581 	rxr->rx_copies.ev_count = 0;
   1582 #if 0 /* NetBSD */
   1583 	rxr->rx_bytes.ev_count = 0;
   1584 #if 1	/* Fix inconsistency */
   1585 	rxr->rx_packets.ev_count = 0;
   1586 #endif
   1587 #endif
   1588 	rxr->vtag_strip = FALSE;
   1589 
   1590 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1591 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1592 
   1593 	/*
   1594 	 * Now set up the LRO interface
   1595 	 */
   1596 	if (ixgbe_rsc_enable)
   1597 		ixgbe_setup_hw_rsc(rxr);
   1598 #ifdef LRO
   1599 	else if (ifp->if_capenable & IFCAP_LRO) {
   1600 		device_t dev = adapter->dev;
   1601 		int err = tcp_lro_init(lro);
   1602 		if (err) {
   1603 			device_printf(dev, "LRO Initialization failed!\n");
   1604 			goto fail;
   1605 		}
   1606 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1607 		rxr->lro_enabled = TRUE;
   1608 		lro->ifp = adapter->ifp;
   1609 	}
   1610 #endif /* LRO */
   1611 
   1612 	IXGBE_RX_UNLOCK(rxr);
   1613 
   1614 	return (0);
   1615 
   1616 fail:
   1617 	ixgbe_free_receive_ring(rxr);
   1618 	IXGBE_RX_UNLOCK(rxr);
   1619 
   1620 	return (error);
   1621 } /* ixgbe_setup_receive_ring */
   1622 
   1623 /************************************************************************
   1624  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1625  ************************************************************************/
   1626 int
   1627 ixgbe_setup_receive_structures(struct adapter *adapter)
   1628 {
   1629 	struct rx_ring *rxr = adapter->rx_rings;
   1630 	int            j;
   1631 
   1632 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1633 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1634 		if (ixgbe_setup_receive_ring(rxr))
   1635 			goto fail;
   1636 
   1637 	return (0);
   1638 fail:
   1639 	/*
   1640 	 * Free RX buffers allocated so far, we will only handle
   1641 	 * the rings that completed, the failing case will have
   1642 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1643 	 */
   1644 	for (int i = 0; i < j; ++i) {
   1645 		rxr = &adapter->rx_rings[i];
   1646 		IXGBE_RX_LOCK(rxr);
   1647 		ixgbe_free_receive_ring(rxr);
   1648 		IXGBE_RX_UNLOCK(rxr);
   1649 	}
   1650 
   1651 	return (ENOBUFS);
   1652 } /* ixgbe_setup_receive_structures */
   1653 
   1654 
   1655 /************************************************************************
   1656  * ixgbe_free_receive_structures - Free all receive rings.
   1657  ************************************************************************/
   1658 void
   1659 ixgbe_free_receive_structures(struct adapter *adapter)
   1660 {
   1661 	struct rx_ring *rxr = adapter->rx_rings;
   1662 
   1663 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1664 
   1665 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1666 		ixgbe_free_receive_buffers(rxr);
   1667 #ifdef LRO
   1668 		/* Free LRO memory */
   1669 		tcp_lro_free(&rxr->lro);
   1670 #endif /* LRO */
   1671 		/* Free the ring memory as well */
   1672 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1673 		IXGBE_RX_LOCK_DESTROY(rxr);
   1674 	}
   1675 
   1676 	free(adapter->rx_rings, M_DEVBUF);
   1677 } /* ixgbe_free_receive_structures */
   1678 
   1679 
   1680 /************************************************************************
   1681  * ixgbe_free_receive_buffers - Free receive ring data structures
   1682  ************************************************************************/
   1683 static void
   1684 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1685 {
   1686 	struct adapter      *adapter = rxr->adapter;
   1687 	struct ixgbe_rx_buf *rxbuf;
   1688 
   1689 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1690 
   1691 	/* Cleanup any existing buffers */
   1692 	if (rxr->rx_buffers != NULL) {
   1693 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1694 			rxbuf = &rxr->rx_buffers[i];
   1695 			ixgbe_rx_discard(rxr, i);
   1696 			if (rxbuf->pmap != NULL) {
   1697 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1698 				rxbuf->pmap = NULL;
   1699 			}
   1700 		}
   1701 
   1702 		/* NetBSD specific. See ixgbe_netbsd.c */
   1703 		ixgbe_jcl_destroy(adapter, rxr);
   1704 
   1705 		if (rxr->rx_buffers != NULL) {
   1706 			free(rxr->rx_buffers, M_DEVBUF);
   1707 			rxr->rx_buffers = NULL;
   1708 		}
   1709 	}
   1710 
   1711 	if (rxr->ptag != NULL) {
   1712 		ixgbe_dma_tag_destroy(rxr->ptag);
   1713 		rxr->ptag = NULL;
   1714 	}
   1715 
   1716 	return;
   1717 } /* ixgbe_free_receive_buffers */
   1718 
   1719 /************************************************************************
   1720  * ixgbe_rx_input
   1721  ************************************************************************/
   1722 static __inline void
   1723 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1724     u32 ptype)
   1725 {
   1726 	struct adapter	*adapter = ifp->if_softc;
   1727 
   1728 #ifdef LRO
   1729 	struct ethercom *ec = &adapter->osdep.ec;
   1730 
   1731 	/*
   1732 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1733 	 * should be computed by hardware. Also it should not have VLAN tag in
   1734 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1735 	 */
   1736         if (rxr->lro_enabled &&
   1737             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1738             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1739             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1740             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1741             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1742             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1743             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1744             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1745                 /*
   1746                  * Send to the stack if:
   1747                  **  - LRO not enabled, or
   1748                  **  - no LRO resources, or
   1749                  **  - lro enqueue fails
   1750                  */
   1751                 if (rxr->lro.lro_cnt != 0)
   1752                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1753                                 return;
   1754         }
   1755 #endif /* LRO */
   1756 
   1757 	if_percpuq_enqueue(adapter->ipq, m);
   1758 } /* ixgbe_rx_input */
   1759 
   1760 /************************************************************************
   1761  * ixgbe_rx_discard
   1762  ************************************************************************/
   1763 static __inline void
   1764 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1765 {
   1766 	struct ixgbe_rx_buf *rbuf;
   1767 
   1768 	rbuf = &rxr->rx_buffers[i];
   1769 
   1770 	/*
   1771 	 * With advanced descriptors the writeback clobbers the buffer addrs,
   1772 	 * so its easier to just free the existing mbufs and take the normal
   1773 	 * refresh path to get new buffers and mapping.
   1774 	 */
   1775 
   1776 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1777 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1778 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1779 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1780 		m_freem(rbuf->fmp);
   1781 		rbuf->fmp = NULL;
   1782 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1783 	} else if (rbuf->buf) {
   1784 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1785 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1786 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1787 		m_free(rbuf->buf);
   1788 		rbuf->buf = NULL;
   1789 	}
   1790 
   1791 	rbuf->flags = 0;
   1792 
   1793 	return;
   1794 } /* ixgbe_rx_discard */
   1795 
   1796 
   1797 /************************************************************************
   1798  * ixgbe_rxeof
   1799  *
   1800  *   Executes in interrupt context. It replenishes the
   1801  *   mbufs in the descriptor and sends data which has
   1802  *   been dma'ed into host memory to upper layer.
   1803  *
   1804  *   Return TRUE for more work, FALSE for all clean.
   1805  ************************************************************************/
   1806 bool
   1807 ixgbe_rxeof(struct ix_queue *que)
   1808 {
   1809 	struct adapter		*adapter = que->adapter;
   1810 	struct rx_ring		*rxr = que->rxr;
   1811 	struct ifnet		*ifp = adapter->ifp;
   1812 #ifdef LRO
   1813 	struct lro_ctrl		*lro = &rxr->lro;
   1814 #endif /* LRO */
   1815 	union ixgbe_adv_rx_desc	*cur;
   1816 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1817 	int			i, nextp, processed = 0;
   1818 	u32			staterr = 0;
   1819 	u32			count = 0;
   1820 	u32			limit = adapter->rx_process_limit;
   1821 	bool			discard_multidesc = false;
   1822 #ifdef RSS
   1823 	u16			pkt_info;
   1824 #endif
   1825 
   1826 	IXGBE_RX_LOCK(rxr);
   1827 
   1828 #ifdef DEV_NETMAP
   1829 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1830 		/* Same as the txeof routine: wakeup clients on intr. */
   1831 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1832 			IXGBE_RX_UNLOCK(rxr);
   1833 			return (FALSE);
   1834 		}
   1835 	}
   1836 #endif /* DEV_NETMAP */
   1837 
   1838 	/*
   1839 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1840 	 * true, continue processing to not to send broken packet to the upper
   1841 	 * layer.
   1842 	 */
   1843 	for (i = rxr->next_to_check;
   1844 	     (count < limit) || (discard_multidesc == true);) {
   1845 
   1846 		struct mbuf *sendmp, *mp;
   1847 		struct mbuf *newmp;
   1848 		u32         rsc, ptype;
   1849 		u16         len;
   1850 		u16         vtag = 0;
   1851 		bool        eop;
   1852 
   1853 		/* Sync the ring. */
   1854 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1855 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1856 
   1857 		cur = &rxr->rx_base[i];
   1858 		staterr = le32toh(cur->wb.upper.status_error);
   1859 #ifdef RSS
   1860 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1861 #endif
   1862 
   1863 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1864 			break;
   1865 
   1866 		count++;
   1867 		sendmp = NULL;
   1868 		nbuf = NULL;
   1869 		rsc = 0;
   1870 		cur->wb.upper.status_error = 0;
   1871 		rbuf = &rxr->rx_buffers[i];
   1872 		mp = rbuf->buf;
   1873 
   1874 		len = le16toh(cur->wb.upper.length);
   1875 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1876 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1877 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1878 
   1879 		/* Make sure bad packets are discarded */
   1880 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1881 #if __FreeBSD_version >= 1100036
   1882 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1883 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1884 #endif
   1885 			rxr->rx_discarded.ev_count++;
   1886 			ixgbe_rx_discard(rxr, i);
   1887 			discard_multidesc = false;
   1888 			goto next_desc;
   1889 		}
   1890 
   1891 		/* pre-alloc new mbuf */
   1892 		if (!discard_multidesc)
   1893 			newmp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT, MT_DATA,
   1894 			    M_PKTHDR, rxr->mbuf_sz);
   1895 		else
   1896 			newmp = NULL;
   1897 		if (newmp == NULL) {
   1898 			rxr->no_jmbuf.ev_count++;
   1899 			/*
   1900 			 * Descriptor initialization is already done by the
   1901 			 * above code (cur->wb.upper.status_error = 0).
   1902 			 * So, we can reuse current rbuf->buf for new packet.
   1903 			 *
   1904 			 * Rewrite the buffer addr, see comment in
   1905 			 * ixgbe_rx_discard().
   1906 			 */
   1907 			cur->read.pkt_addr = rbuf->addr;
   1908 			m_freem(rbuf->fmp);
   1909 			rbuf->fmp = NULL;
   1910 			if (!eop) {
   1911 				/* Discard the entire packet. */
   1912 				discard_multidesc = true;
   1913 			} else
   1914 				discard_multidesc = false;
   1915 			goto next_desc;
   1916 		}
   1917 		discard_multidesc = false;
   1918 
   1919 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1920 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1921 
   1922 		/*
   1923 		 * On 82599 which supports a hardware
   1924 		 * LRO (called HW RSC), packets need
   1925 		 * not be fragmented across sequential
   1926 		 * descriptors, rather the next descriptor
   1927 		 * is indicated in bits of the descriptor.
   1928 		 * This also means that we might proceses
   1929 		 * more than one packet at a time, something
   1930 		 * that has never been true before, it
   1931 		 * required eliminating global chain pointers
   1932 		 * in favor of what we are doing here.  -jfv
   1933 		 */
   1934 		if (!eop) {
   1935 			/*
   1936 			 * Figure out the next descriptor
   1937 			 * of this frame.
   1938 			 */
   1939 			if (rxr->hw_rsc == TRUE) {
   1940 				rsc = ixgbe_rsc_count(cur);
   1941 				rxr->rsc_num += (rsc - 1);
   1942 			}
   1943 			if (rsc) { /* Get hardware index */
   1944 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1945 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1946 			} else { /* Just sequential */
   1947 				nextp = i + 1;
   1948 				if (nextp == adapter->num_rx_desc)
   1949 					nextp = 0;
   1950 			}
   1951 			nbuf = &rxr->rx_buffers[nextp];
   1952 			prefetch(nbuf);
   1953 		}
   1954 		/*
   1955 		 * Rather than using the fmp/lmp global pointers
   1956 		 * we now keep the head of a packet chain in the
   1957 		 * buffer struct and pass this along from one
   1958 		 * descriptor to the next, until we get EOP.
   1959 		 */
   1960 		/*
   1961 		 * See if there is a stored head
   1962 		 * that determines what we are
   1963 		 */
   1964 		sendmp = rbuf->fmp;
   1965 		if (sendmp != NULL) {  /* secondary frag */
   1966 			rbuf->buf = newmp;
   1967 			rbuf->fmp = NULL;
   1968 			mp->m_len = len;
   1969 			mp->m_flags &= ~M_PKTHDR;
   1970 			sendmp->m_pkthdr.len += mp->m_len;
   1971 		} else {
   1972 			/*
   1973 			 * Optimize.  This might be a small packet,
   1974 			 * maybe just a TCP ACK.  Do a fast copy that
   1975 			 * is cache aligned into a new mbuf, and
   1976 			 * leave the old mbuf+cluster for re-use.
   1977 			 */
   1978 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1979 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1980 				if (sendmp != NULL) {
   1981 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1982 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1983 					    len);
   1984 					sendmp->m_len = len;
   1985 					rxr->rx_copies.ev_count++;
   1986 					rbuf->flags |= IXGBE_RX_COPY;
   1987 
   1988 					m_freem(newmp);
   1989 				}
   1990 			}
   1991 			if (sendmp == NULL) {
   1992 				rbuf->buf = newmp;
   1993 				rbuf->fmp = NULL;
   1994 				mp->m_len = len;
   1995 				sendmp = mp;
   1996 			}
   1997 
   1998 			/* first desc of a non-ps chain */
   1999 			sendmp->m_flags |= M_PKTHDR;
   2000 			sendmp->m_pkthdr.len = len;
   2001 		}
   2002 		++processed;
   2003 
   2004 		/* Pass the head pointer on */
   2005 		if (eop == 0) {
   2006 			nbuf->fmp = sendmp;
   2007 			sendmp = NULL;
   2008 			mp->m_next = nbuf->buf;
   2009 		} else { /* Sending this frame */
   2010 			m_set_rcvif(sendmp, ifp);
   2011 			++rxr->packets;
   2012 			rxr->rx_packets.ev_count++;
   2013 			/* capture data for AIM */
   2014 			rxr->bytes += sendmp->m_pkthdr.len;
   2015 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   2016 			/* Process vlan info */
   2017 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2018 				vtag = le16toh(cur->wb.upper.vlan);
   2019 			if (vtag) {
   2020 				vlan_set_tag(sendmp, vtag);
   2021 			}
   2022 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2023 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2024 				   &adapter->stats.pf);
   2025 			}
   2026 
   2027 #if 0 /* FreeBSD */
   2028 			/*
   2029 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2030 			 * and never cleared. This means we have RSS hash
   2031 			 * available to be used.
   2032 			 */
   2033 			if (adapter->num_queues > 1) {
   2034 				sendmp->m_pkthdr.flowid =
   2035 				    le32toh(cur->wb.lower.hi_dword.rss);
   2036 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2037 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2038 					M_HASHTYPE_SET(sendmp,
   2039 					    M_HASHTYPE_RSS_IPV4);
   2040 					break;
   2041 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2042 					M_HASHTYPE_SET(sendmp,
   2043 					    M_HASHTYPE_RSS_TCP_IPV4);
   2044 					break;
   2045 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2046 					M_HASHTYPE_SET(sendmp,
   2047 					    M_HASHTYPE_RSS_IPV6);
   2048 					break;
   2049 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2050 					M_HASHTYPE_SET(sendmp,
   2051 					    M_HASHTYPE_RSS_TCP_IPV6);
   2052 					break;
   2053 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2054 					M_HASHTYPE_SET(sendmp,
   2055 					    M_HASHTYPE_RSS_IPV6_EX);
   2056 					break;
   2057 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2058 					M_HASHTYPE_SET(sendmp,
   2059 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2060 					break;
   2061 #if __FreeBSD_version > 1100000
   2062 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2063 					M_HASHTYPE_SET(sendmp,
   2064 					    M_HASHTYPE_RSS_UDP_IPV4);
   2065 					break;
   2066 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2067 					M_HASHTYPE_SET(sendmp,
   2068 					    M_HASHTYPE_RSS_UDP_IPV6);
   2069 					break;
   2070 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2071 					M_HASHTYPE_SET(sendmp,
   2072 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2073 					break;
   2074 #endif
   2075 				default:
   2076 					M_HASHTYPE_SET(sendmp,
   2077 					    M_HASHTYPE_OPAQUE_HASH);
   2078 				}
   2079 			} else {
   2080 				sendmp->m_pkthdr.flowid = que->msix;
   2081 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2082 			}
   2083 #endif
   2084 		}
   2085 next_desc:
   2086 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2087 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2088 
   2089 		/* Advance our pointers to the next descriptor. */
   2090 		if (++i == rxr->num_desc)
   2091 			i = 0;
   2092 
   2093 		/* Now send to the stack or do LRO */
   2094 		if (sendmp != NULL) {
   2095 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2096 		}
   2097 
   2098 		/* Every 8 descriptors we go to refresh mbufs */
   2099 		if (processed == 8) {
   2100 			ixgbe_refresh_mbufs(rxr, i);
   2101 			processed = 0;
   2102 		}
   2103 	}
   2104 
   2105 	/* Refresh any remaining buf structs */
   2106 	if (ixgbe_rx_unrefreshed(rxr))
   2107 		ixgbe_refresh_mbufs(rxr, i);
   2108 
   2109 	rxr->next_to_check = i;
   2110 
   2111 	IXGBE_RX_UNLOCK(rxr);
   2112 
   2113 #ifdef LRO
   2114 	/*
   2115 	 * Flush any outstanding LRO work
   2116 	 */
   2117 	tcp_lro_flush_all(lro);
   2118 #endif /* LRO */
   2119 
   2120 	/*
   2121 	 * Still have cleaning to do?
   2122 	 */
   2123 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2124 		return (TRUE);
   2125 
   2126 	return (FALSE);
   2127 } /* ixgbe_rxeof */
   2128 
   2129 
   2130 /************************************************************************
   2131  * ixgbe_rx_checksum
   2132  *
   2133  *   Verify that the hardware indicated that the checksum is valid.
   2134  *   Inform the stack about the status of checksum so that stack
   2135  *   doesn't spend time verifying the checksum.
   2136  ************************************************************************/
   2137 static void
   2138 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2139     struct ixgbe_hw_stats *stats)
   2140 {
   2141 	u16  status = (u16)staterr;
   2142 	u8   errors = (u8)(staterr >> 24);
   2143 #if 0
   2144 	bool sctp = false;
   2145 
   2146 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2147 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2148 		sctp = true;
   2149 #endif
   2150 
   2151 	/* IPv4 checksum */
   2152 	if (status & IXGBE_RXD_STAT_IPCS) {
   2153 		stats->ipcs.ev_count++;
   2154 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2155 			/* IP Checksum Good */
   2156 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2157 		} else {
   2158 			stats->ipcs_bad.ev_count++;
   2159 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2160 		}
   2161 	}
   2162 	/* TCP/UDP/SCTP checksum */
   2163 	if (status & IXGBE_RXD_STAT_L4CS) {
   2164 		stats->l4cs.ev_count++;
   2165 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2166 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2167 			mp->m_pkthdr.csum_flags |= type;
   2168 		} else {
   2169 			stats->l4cs_bad.ev_count++;
   2170 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2171 		}
   2172 	}
   2173 } /* ixgbe_rx_checksum */
   2174 
   2175 /************************************************************************
   2176  * ixgbe_dma_malloc
   2177  ************************************************************************/
   2178 int
   2179 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2180 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2181 {
   2182 	device_t dev = adapter->dev;
   2183 	int      r, rsegs;
   2184 
   2185 	r = ixgbe_dma_tag_create(
   2186 	     /*      parent */ adapter->osdep.dmat,
   2187 	     /*   alignment */ DBA_ALIGN,
   2188 	     /*      bounds */ 0,
   2189 	     /*     maxsize */ size,
   2190 	     /*   nsegments */ 1,
   2191 	     /*  maxsegsize */ size,
   2192 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2193 			       &dma->dma_tag);
   2194 	if (r != 0) {
   2195 		aprint_error_dev(dev,
   2196 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2197 		    r);
   2198 		goto fail_0;
   2199 	}
   2200 
   2201 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2202 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2203 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2204 	if (r != 0) {
   2205 		aprint_error_dev(dev,
   2206 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2207 		goto fail_1;
   2208 	}
   2209 
   2210 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2211 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
   2212 	if (r != 0) {
   2213 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2214 		    __func__, r);
   2215 		goto fail_2;
   2216 	}
   2217 
   2218 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2219 	if (r != 0) {
   2220 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2221 		    __func__, r);
   2222 		goto fail_3;
   2223 	}
   2224 
   2225 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2226 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2227 	if (r != 0) {
   2228 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2229 		    __func__, r);
   2230 		goto fail_4;
   2231 	}
   2232 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2233 	dma->dma_size = size;
   2234 	return 0;
   2235 fail_4:
   2236 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2237 fail_3:
   2238 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2239 fail_2:
   2240 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2241 fail_1:
   2242 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2243 fail_0:
   2244 
   2245 	return (r);
   2246 } /* ixgbe_dma_malloc */
   2247 
   2248 /************************************************************************
   2249  * ixgbe_dma_free
   2250  ************************************************************************/
   2251 void
   2252 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2253 {
   2254 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2255 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2256 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2257 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2258 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2259 } /* ixgbe_dma_free */
   2260 
   2261 
   2262 /************************************************************************
   2263  * ixgbe_allocate_queues
   2264  *
   2265  *   Allocate memory for the transmit and receive rings, and then
   2266  *   the descriptors associated with each, called only once at attach.
   2267  ************************************************************************/
   2268 int
   2269 ixgbe_allocate_queues(struct adapter *adapter)
   2270 {
   2271 	device_t	dev = adapter->dev;
   2272 	struct ix_queue	*que;
   2273 	struct tx_ring	*txr;
   2274 	struct rx_ring	*rxr;
   2275 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2276 	int             txconf = 0, rxconf = 0;
   2277 
   2278 	/* First, allocate the top level queue structs */
   2279 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2280 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2281 
   2282 	/* Second, allocate the TX ring struct memory */
   2283 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
   2284 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2285 
   2286 	/* Third, allocate the RX ring */
   2287 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2288 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2289 
   2290 	/* For the ring itself */
   2291 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2292 	    DBA_ALIGN);
   2293 
   2294 	/*
   2295 	 * Now set up the TX queues, txconf is needed to handle the
   2296 	 * possibility that things fail midcourse and we need to
   2297 	 * undo memory gracefully
   2298 	 */
   2299 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2300 		/* Set up some basics */
   2301 		txr = &adapter->tx_rings[i];
   2302 		txr->adapter = adapter;
   2303 		txr->txr_interq = NULL;
   2304 		/* In case SR-IOV is enabled, align the index properly */
   2305 #ifdef PCI_IOV
   2306 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2307 		    i);
   2308 #else
   2309 		txr->me = i;
   2310 #endif
   2311 		txr->num_desc = adapter->num_tx_desc;
   2312 
   2313 		/* Initialize the TX side lock */
   2314 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2315 
   2316 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2317 		    BUS_DMA_NOWAIT)) {
   2318 			aprint_error_dev(dev,
   2319 			    "Unable to allocate TX Descriptor memory\n");
   2320 			error = ENOMEM;
   2321 			goto err_tx_desc;
   2322 		}
   2323 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2324 		bzero((void *)txr->tx_base, tsize);
   2325 
   2326 		/* Now allocate transmit buffers for the ring */
   2327 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2328 			aprint_error_dev(dev,
   2329 			    "Critical Failure setting up transmit buffers\n");
   2330 			error = ENOMEM;
   2331 			goto err_tx_desc;
   2332 		}
   2333 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2334 			/* Allocate a buf ring */
   2335 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2336 			if (txr->txr_interq == NULL) {
   2337 				aprint_error_dev(dev,
   2338 				    "Critical Failure setting up buf ring\n");
   2339 				error = ENOMEM;
   2340 				goto err_tx_desc;
   2341 			}
   2342 		}
   2343 	}
   2344 
   2345 	/*
   2346 	 * Next the RX queues...
   2347 	 */
   2348 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2349 	    DBA_ALIGN);
   2350 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2351 		rxr = &adapter->rx_rings[i];
   2352 		/* Set up some basics */
   2353 		rxr->adapter = adapter;
   2354 #ifdef PCI_IOV
   2355 		/* In case SR-IOV is enabled, align the index properly */
   2356 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2357 		    i);
   2358 #else
   2359 		rxr->me = i;
   2360 #endif
   2361 		rxr->num_desc = adapter->num_rx_desc;
   2362 
   2363 		/* Initialize the RX side lock */
   2364 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2365 
   2366 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2367 		    BUS_DMA_NOWAIT)) {
   2368 			aprint_error_dev(dev,
   2369 			    "Unable to allocate RxDescriptor memory\n");
   2370 			error = ENOMEM;
   2371 			goto err_rx_desc;
   2372 		}
   2373 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2374 		bzero((void *)rxr->rx_base, rsize);
   2375 
   2376 		/* Allocate receive buffers for the ring */
   2377 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2378 			aprint_error_dev(dev,
   2379 			    "Critical Failure setting up receive buffers\n");
   2380 			error = ENOMEM;
   2381 			goto err_rx_desc;
   2382 		}
   2383 	}
   2384 
   2385 	/*
   2386 	 * Finally set up the queue holding structs
   2387 	 */
   2388 	for (int i = 0; i < adapter->num_queues; i++) {
   2389 		que = &adapter->queues[i];
   2390 		que->adapter = adapter;
   2391 		que->me = i;
   2392 		que->txr = &adapter->tx_rings[i];
   2393 		que->rxr = &adapter->rx_rings[i];
   2394 
   2395 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2396 		que->disabled_count = 0;
   2397 	}
   2398 
   2399 	return (0);
   2400 
   2401 err_rx_desc:
   2402 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2403 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2404 err_tx_desc:
   2405 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2406 		ixgbe_dma_free(adapter, &txr->txdma);
   2407 	free(adapter->rx_rings, M_DEVBUF);
   2408 	free(adapter->tx_rings, M_DEVBUF);
   2409 	free(adapter->queues, M_DEVBUF);
   2410 	return (error);
   2411 } /* ixgbe_allocate_queues */
   2412 
   2413 /************************************************************************
   2414  * ixgbe_free_queues
   2415  *
   2416  *   Free descriptors for the transmit and receive rings, and then
   2417  *   the memory associated with each.
   2418  ************************************************************************/
   2419 void
   2420 ixgbe_free_queues(struct adapter *adapter)
   2421 {
   2422 	struct ix_queue *que;
   2423 	int i;
   2424 
   2425 	ixgbe_free_transmit_structures(adapter);
   2426 	ixgbe_free_receive_structures(adapter);
   2427 	for (i = 0; i < adapter->num_queues; i++) {
   2428 		que = &adapter->queues[i];
   2429 		mutex_destroy(&que->dc_mtx);
   2430 	}
   2431 	free(adapter->queues, M_DEVBUF);
   2432 } /* ixgbe_free_queues */
   2433