Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.56
      1 /* $NetBSD: ix_txrx.c,v 1.56 2019/10/16 06:36:00 knakahara Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include "opt_inet.h"
     67 #include "opt_inet6.h"
     68 
     69 #include "ixgbe.h"
     70 
     71 /*
     72  * HW RSC control:
     73  *  this feature only works with
     74  *  IPv4, and only on 82599 and later.
     75  *  Also this will cause IP forwarding to
     76  *  fail and that can't be controlled by
     77  *  the stack as LRO can. For all these
     78  *  reasons I've deemed it best to leave
     79  *  this off and not bother with a tuneable
     80  *  interface, this would need to be compiled
     81  *  to enable.
     82  */
     83 static bool ixgbe_rsc_enable = FALSE;
     84 
     85 /*
     86  * For Flow Director: this is the
     87  * number of TX packets we sample
     88  * for the filter pool, this means
     89  * every 20th packet will be probed.
     90  *
     91  * This feature can be disabled by
     92  * setting this to 0.
     93  */
     94 static int atr_sample_rate = 20;
     95 
     96 /************************************************************************
     97  *  Local Function prototypes
     98  ************************************************************************/
     99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    104                                        struct ixgbe_hw_stats *);
    105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    109                                         struct mbuf *, u32 *, u32 *);
    110 static int           ixgbe_tso_setup(struct tx_ring *,
    111                                      struct mbuf *, u32 *, u32 *);
    112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    114                                     struct mbuf *, u32);
    115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    116                                       struct ixgbe_dma_alloc *, int);
    117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    118 
    119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    120 
    121 /************************************************************************
    122  * ixgbe_legacy_start_locked - Transmit entry point
    123  *
    124  *   Called by the stack to initiate a transmit.
    125  *   The driver will remain in this routine as long as there are
    126  *   packets to transmit and transmit resources are available.
    127  *   In case resources are not available, the stack is notified
    128  *   and the packet is requeued.
    129  ************************************************************************/
    130 int
    131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    132 {
    133 	int rc;
    134 	struct mbuf    *m_head;
    135 	struct adapter *adapter = txr->adapter;
    136 
    137 	IXGBE_TX_LOCK_ASSERT(txr);
    138 
    139 	if (adapter->link_active != LINK_STATE_UP) {
    140 		/*
    141 		 * discard all packets buffered in IFQ to avoid
    142 		 * sending old packets at next link up timing.
    143 		 */
    144 		ixgbe_drain(ifp, txr);
    145 		return (ENETDOWN);
    146 	}
    147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    148 		return (ENETDOWN);
    149 	if (txr->txr_no_space)
    150 		return (ENETDOWN);
    151 
    152 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    153 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    154 			break;
    155 
    156 		IFQ_POLL(&ifp->if_snd, m_head);
    157 		if (m_head == NULL)
    158 			break;
    159 
    160 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    161 			break;
    162 		}
    163 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    164 		if (rc != 0) {
    165 			m_freem(m_head);
    166 			continue;
    167 		}
    168 
    169 		/* Send a copy of the frame to the BPF listener */
    170 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    171 	}
    172 
    173 	return IXGBE_SUCCESS;
    174 } /* ixgbe_legacy_start_locked */
    175 
    176 /************************************************************************
    177  * ixgbe_legacy_start
    178  *
    179  *   Called by the stack, this always uses the first tx ring,
    180  *   and should not be used with multiqueue tx enabled.
    181  ************************************************************************/
    182 void
    183 ixgbe_legacy_start(struct ifnet *ifp)
    184 {
    185 	struct adapter *adapter = ifp->if_softc;
    186 	struct tx_ring *txr = adapter->tx_rings;
    187 
    188 	if (ifp->if_flags & IFF_RUNNING) {
    189 		IXGBE_TX_LOCK(txr);
    190 		ixgbe_legacy_start_locked(ifp, txr);
    191 		IXGBE_TX_UNLOCK(txr);
    192 	}
    193 } /* ixgbe_legacy_start */
    194 
    195 /************************************************************************
    196  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    197  *
    198  *   (if_transmit function)
    199  ************************************************************************/
    200 int
    201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    202 {
    203 	struct adapter	*adapter = ifp->if_softc;
    204 	struct tx_ring	*txr;
    205 	int 		i;
    206 #ifdef RSS
    207 	uint32_t bucket_id;
    208 #endif
    209 
    210 	/*
    211 	 * When doing RSS, map it to the same outbound queue
    212 	 * as the incoming flow would be mapped to.
    213 	 *
    214 	 * If everything is setup correctly, it should be the
    215 	 * same bucket that the current CPU we're on is.
    216 	 */
    217 #ifdef RSS
    218 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    219 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    220 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    221 		    &bucket_id) == 0)) {
    222 			i = bucket_id % adapter->num_queues;
    223 #ifdef IXGBE_DEBUG
    224 			if (bucket_id > adapter->num_queues)
    225 				if_printf(ifp,
    226 				    "bucket_id (%d) > num_queues (%d)\n",
    227 				    bucket_id, adapter->num_queues);
    228 #endif
    229 		} else
    230 			i = m->m_pkthdr.flowid % adapter->num_queues;
    231 	} else
    232 #endif /* 0 */
    233 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    234 
    235 	/* Check for a hung queue and pick alternative */
    236 	if (((1ULL << i) & adapter->active_queues) == 0)
    237 		i = ffs64(adapter->active_queues);
    238 
    239 	txr = &adapter->tx_rings[i];
    240 
    241 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    242 		m_freem(m);
    243 		txr->pcq_drops.ev_count++;
    244 		return ENOBUFS;
    245 	}
    246 	if (IXGBE_TX_TRYLOCK(txr)) {
    247 		ixgbe_mq_start_locked(ifp, txr);
    248 		IXGBE_TX_UNLOCK(txr);
    249 	} else {
    250 		if (adapter->txrx_use_workqueue) {
    251 			u_int *enqueued;
    252 
    253 			/*
    254 			 * This function itself is not called in interrupt
    255 			 * context, however it can be called in fast softint
    256 			 * context right after receiving forwarding packets.
    257 			 * So, it is required to protect workqueue from twice
    258 			 * enqueuing when the machine uses both spontaneous
    259 			 * packets and forwarding packets.
    260 			 */
    261 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    262 			if (*enqueued == 0) {
    263 				*enqueued = 1;
    264 				percpu_putref(adapter->txr_wq_enqueued);
    265 				workqueue_enqueue(adapter->txr_wq,
    266 				    &txr->wq_cookie, curcpu());
    267 			} else
    268 				percpu_putref(adapter->txr_wq_enqueued);
    269 		} else {
    270 			kpreempt_disable();
    271 			softint_schedule(txr->txr_si);
    272 			kpreempt_enable();
    273 		}
    274 	}
    275 
    276 	return (0);
    277 } /* ixgbe_mq_start */
    278 
    279 /************************************************************************
    280  * ixgbe_mq_start_locked
    281  ************************************************************************/
    282 int
    283 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    284 {
    285 	struct mbuf    *next;
    286 	int            enqueued = 0, err = 0;
    287 
    288 	if (txr->adapter->link_active != LINK_STATE_UP) {
    289 		/*
    290 		 * discard all packets buffered in txr_interq to avoid
    291 		 * sending old packets at next link up timing.
    292 		 */
    293 		ixgbe_drain(ifp, txr);
    294 		return (ENETDOWN);
    295 	}
    296 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    297 		return (ENETDOWN);
    298 	if (txr->txr_no_space)
    299 		return (ENETDOWN);
    300 
    301 	/* Process the queue */
    302 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    303 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    304 			m_freem(next);
    305 			/* All errors are counted in ixgbe_xmit() */
    306 			break;
    307 		}
    308 		enqueued++;
    309 #if __FreeBSD_version >= 1100036
    310 		/*
    311 		 * Since we're looking at the tx ring, we can check
    312 		 * to see if we're a VF by examing our tail register
    313 		 * address.
    314 		 */
    315 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    316 		    (next->m_flags & M_MCAST))
    317 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    318 #endif
    319 		/* Send a copy of the frame to the BPF listener */
    320 		bpf_mtap(ifp, next, BPF_D_OUT);
    321 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    322 			break;
    323 	}
    324 
    325 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    326 		ixgbe_txeof(txr);
    327 
    328 	return (err);
    329 } /* ixgbe_mq_start_locked */
    330 
    331 /************************************************************************
    332  * ixgbe_deferred_mq_start
    333  *
    334  *   Called from a softint and workqueue (indirectly) to drain queued
    335  *   transmit packets.
    336  ************************************************************************/
    337 void
    338 ixgbe_deferred_mq_start(void *arg)
    339 {
    340 	struct tx_ring *txr = arg;
    341 	struct adapter *adapter = txr->adapter;
    342 	struct ifnet   *ifp = adapter->ifp;
    343 
    344 	IXGBE_TX_LOCK(txr);
    345 	if (pcq_peek(txr->txr_interq) != NULL)
    346 		ixgbe_mq_start_locked(ifp, txr);
    347 	IXGBE_TX_UNLOCK(txr);
    348 } /* ixgbe_deferred_mq_start */
    349 
    350 /************************************************************************
    351  * ixgbe_deferred_mq_start_work
    352  *
    353  *   Called from a workqueue to drain queued transmit packets.
    354  ************************************************************************/
    355 void
    356 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    357 {
    358 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    359 	struct adapter *adapter = txr->adapter;
    360 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    361 	*enqueued = 0;
    362 	percpu_putref(adapter->txr_wq_enqueued);
    363 
    364 	ixgbe_deferred_mq_start(txr);
    365 } /* ixgbe_deferred_mq_start */
    366 
    367 /************************************************************************
    368  * ixgbe_drain_all
    369  ************************************************************************/
    370 void
    371 ixgbe_drain_all(struct adapter *adapter)
    372 {
    373 	struct ifnet *ifp = adapter->ifp;
    374 	struct ix_queue *que = adapter->queues;
    375 
    376 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    377 		struct tx_ring  *txr = que->txr;
    378 
    379 		IXGBE_TX_LOCK(txr);
    380 		ixgbe_drain(ifp, txr);
    381 		IXGBE_TX_UNLOCK(txr);
    382 	}
    383 }
    384 
    385 /************************************************************************
    386  * ixgbe_xmit
    387  *
    388  *   Maps the mbufs to tx descriptors, allowing the
    389  *   TX engine to transmit the packets.
    390  *
    391  *   Return 0 on success, positive on failure
    392  ************************************************************************/
    393 static int
    394 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    395 {
    396 	struct adapter          *adapter = txr->adapter;
    397 	struct ixgbe_tx_buf     *txbuf;
    398 	union ixgbe_adv_tx_desc *txd = NULL;
    399 	struct ifnet	        *ifp = adapter->ifp;
    400 	int                     i, j, error;
    401 	int                     first;
    402 	u32                     olinfo_status = 0, cmd_type_len;
    403 	bool                    remap = TRUE;
    404 	bus_dmamap_t            map;
    405 
    406 	/* Basic descriptor defines */
    407 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    408 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    409 
    410 	if (vlan_has_tag(m_head))
    411 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    412 
    413 	/*
    414 	 * Important to capture the first descriptor
    415 	 * used because it will contain the index of
    416 	 * the one we tell the hardware to report back
    417 	 */
    418 	first = txr->next_avail_desc;
    419 	txbuf = &txr->tx_buffers[first];
    420 	map = txbuf->map;
    421 
    422 	/*
    423 	 * Map the packet for DMA.
    424 	 */
    425 retry:
    426 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    427 	    BUS_DMA_NOWAIT);
    428 
    429 	if (__predict_false(error)) {
    430 		struct mbuf *m;
    431 
    432 		switch (error) {
    433 		case EAGAIN:
    434 			txr->q_eagain_tx_dma_setup++;
    435 			return EAGAIN;
    436 		case ENOMEM:
    437 			txr->q_enomem_tx_dma_setup++;
    438 			return EAGAIN;
    439 		case EFBIG:
    440 			/* Try it again? - one try */
    441 			if (remap == TRUE) {
    442 				remap = FALSE;
    443 				/*
    444 				 * XXX: m_defrag will choke on
    445 				 * non-MCLBYTES-sized clusters
    446 				 */
    447 				txr->q_efbig_tx_dma_setup++;
    448 				m = m_defrag(m_head, M_NOWAIT);
    449 				if (m == NULL) {
    450 					txr->q_mbuf_defrag_failed++;
    451 					return ENOBUFS;
    452 				}
    453 				m_head = m;
    454 				goto retry;
    455 			} else {
    456 				txr->q_efbig2_tx_dma_setup++;
    457 				return error;
    458 			}
    459 		case EINVAL:
    460 			txr->q_einval_tx_dma_setup++;
    461 			return error;
    462 		default:
    463 			txr->q_other_tx_dma_setup++;
    464 			return error;
    465 		}
    466 	}
    467 
    468 	/* Make certain there are enough descriptors */
    469 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    470 		txr->txr_no_space = true;
    471 		txr->no_desc_avail.ev_count++;
    472 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    473 		return EAGAIN;
    474 	}
    475 
    476 	/*
    477 	 * Set up the appropriate offload context
    478 	 * this will consume the first descriptor
    479 	 */
    480 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    481 	if (__predict_false(error)) {
    482 		return (error);
    483 	}
    484 
    485 	/* Do the flow director magic */
    486 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    487 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    488 		++txr->atr_count;
    489 		if (txr->atr_count >= atr_sample_rate) {
    490 			ixgbe_atr(txr, m_head);
    491 			txr->atr_count = 0;
    492 		}
    493 	}
    494 
    495 	olinfo_status |= IXGBE_ADVTXD_CC;
    496 	i = txr->next_avail_desc;
    497 	for (j = 0; j < map->dm_nsegs; j++) {
    498 		bus_size_t seglen;
    499 		bus_addr_t segaddr;
    500 
    501 		txbuf = &txr->tx_buffers[i];
    502 		txd = &txr->tx_base[i];
    503 		seglen = map->dm_segs[j].ds_len;
    504 		segaddr = htole64(map->dm_segs[j].ds_addr);
    505 
    506 		txd->read.buffer_addr = segaddr;
    507 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    508 		txd->read.olinfo_status = htole32(olinfo_status);
    509 
    510 		if (++i == txr->num_desc)
    511 			i = 0;
    512 	}
    513 
    514 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    515 	txr->tx_avail -= map->dm_nsegs;
    516 	txr->next_avail_desc = i;
    517 
    518 	txbuf->m_head = m_head;
    519 	/*
    520 	 * Here we swap the map so the last descriptor,
    521 	 * which gets the completion interrupt has the
    522 	 * real map, and the first descriptor gets the
    523 	 * unused map from this descriptor.
    524 	 */
    525 	txr->tx_buffers[first].map = txbuf->map;
    526 	txbuf->map = map;
    527 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    528 	    BUS_DMASYNC_PREWRITE);
    529 
    530 	/* Set the EOP descriptor that will be marked done */
    531 	txbuf = &txr->tx_buffers[first];
    532 	txbuf->eop = txd;
    533 
    534 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    535 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    536 	/*
    537 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    538 	 * hardware that this frame is available to transmit.
    539 	 */
    540 	++txr->total_packets.ev_count;
    541 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    542 
    543 	/*
    544 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
    545 	 */
    546 	ifp->if_obytes += m_head->m_pkthdr.len;
    547 	if (m_head->m_flags & M_MCAST)
    548 		ifp->if_omcasts++;
    549 
    550 	/* Mark queue as having work */
    551 	if (txr->busy == 0)
    552 		txr->busy = 1;
    553 
    554 	return (0);
    555 } /* ixgbe_xmit */
    556 
    557 /************************************************************************
    558  * ixgbe_drain
    559  ************************************************************************/
    560 static void
    561 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    562 {
    563 	struct mbuf *m;
    564 
    565 	IXGBE_TX_LOCK_ASSERT(txr);
    566 
    567 	if (txr->me == 0) {
    568 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    569 			IFQ_DEQUEUE(&ifp->if_snd, m);
    570 			m_freem(m);
    571 			IF_DROP(&ifp->if_snd);
    572 		}
    573 	}
    574 
    575 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    576 		m_freem(m);
    577 		txr->pcq_drops.ev_count++;
    578 	}
    579 }
    580 
    581 /************************************************************************
    582  * ixgbe_allocate_transmit_buffers
    583  *
    584  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    585  *   the information needed to transmit a packet on the wire. This is
    586  *   called only once at attach, setup is done every reset.
    587  ************************************************************************/
    588 static int
    589 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    590 {
    591 	struct adapter      *adapter = txr->adapter;
    592 	device_t            dev = adapter->dev;
    593 	struct ixgbe_tx_buf *txbuf;
    594 	int                 error, i;
    595 
    596 	/*
    597 	 * Setup DMA descriptor areas.
    598 	 */
    599 	error = ixgbe_dma_tag_create(
    600 	         /*      parent */ adapter->osdep.dmat,
    601 	         /*   alignment */ 1,
    602 	         /*      bounds */ 0,
    603 	         /*     maxsize */ IXGBE_TSO_SIZE,
    604 	         /*   nsegments */ adapter->num_segs,
    605 	         /*  maxsegsize */ PAGE_SIZE,
    606 	         /*       flags */ 0,
    607 	                           &txr->txtag);
    608 	if (error != 0) {
    609 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    610 		goto fail;
    611 	}
    612 
    613 	txr->tx_buffers =
    614 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    615 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
    616 	if (txr->tx_buffers == NULL) {
    617 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    618 		error = ENOMEM;
    619 		goto fail;
    620 	}
    621 
    622 	/* Create the descriptor buffer dma maps */
    623 	txbuf = txr->tx_buffers;
    624 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    625 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    626 		if (error != 0) {
    627 			aprint_error_dev(dev,
    628 			    "Unable to create TX DMA map (%d)\n", error);
    629 			goto fail;
    630 		}
    631 	}
    632 
    633 	return 0;
    634 fail:
    635 	/* We free all, it handles case where we are in the middle */
    636 #if 0 /* XXX was FreeBSD */
    637 	ixgbe_free_transmit_structures(adapter);
    638 #else
    639 	ixgbe_free_transmit_buffers(txr);
    640 #endif
    641 	return (error);
    642 } /* ixgbe_allocate_transmit_buffers */
    643 
    644 /************************************************************************
    645  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    646  ************************************************************************/
    647 static void
    648 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    649 {
    650 	struct adapter        *adapter = txr->adapter;
    651 	struct ixgbe_tx_buf   *txbuf;
    652 #ifdef DEV_NETMAP
    653 	struct netmap_adapter *na = NA(adapter->ifp);
    654 	struct netmap_slot    *slot;
    655 #endif /* DEV_NETMAP */
    656 
    657 	/* Clear the old ring contents */
    658 	IXGBE_TX_LOCK(txr);
    659 
    660 #ifdef DEV_NETMAP
    661 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    662 		/*
    663 		 * (under lock): if in netmap mode, do some consistency
    664 		 * checks and set slot to entry 0 of the netmap ring.
    665 		 */
    666 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    667 	}
    668 #endif /* DEV_NETMAP */
    669 
    670 	bzero((void *)txr->tx_base,
    671 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    672 	/* Reset indices */
    673 	txr->next_avail_desc = 0;
    674 	txr->next_to_clean = 0;
    675 
    676 	/* Free any existing tx buffers. */
    677 	txbuf = txr->tx_buffers;
    678 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    679 		if (txbuf->m_head != NULL) {
    680 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    681 			    0, txbuf->m_head->m_pkthdr.len,
    682 			    BUS_DMASYNC_POSTWRITE);
    683 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    684 			m_freem(txbuf->m_head);
    685 			txbuf->m_head = NULL;
    686 		}
    687 
    688 #ifdef DEV_NETMAP
    689 		/*
    690 		 * In netmap mode, set the map for the packet buffer.
    691 		 * NOTE: Some drivers (not this one) also need to set
    692 		 * the physical buffer address in the NIC ring.
    693 		 * Slots in the netmap ring (indexed by "si") are
    694 		 * kring->nkr_hwofs positions "ahead" wrt the
    695 		 * corresponding slot in the NIC ring. In some drivers
    696 		 * (not here) nkr_hwofs can be negative. Function
    697 		 * netmap_idx_n2k() handles wraparounds properly.
    698 		 */
    699 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    700 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    701 			netmap_load_map(na, txr->txtag,
    702 			    txbuf->map, NMB(na, slot + si));
    703 		}
    704 #endif /* DEV_NETMAP */
    705 
    706 		/* Clear the EOP descriptor pointer */
    707 		txbuf->eop = NULL;
    708 	}
    709 
    710 	/* Set the rate at which we sample packets */
    711 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    712 		txr->atr_sample = atr_sample_rate;
    713 
    714 	/* Set number of descriptors available */
    715 	txr->tx_avail = adapter->num_tx_desc;
    716 
    717 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    718 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    719 	IXGBE_TX_UNLOCK(txr);
    720 } /* ixgbe_setup_transmit_ring */
    721 
    722 /************************************************************************
    723  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    724  ************************************************************************/
    725 int
    726 ixgbe_setup_transmit_structures(struct adapter *adapter)
    727 {
    728 	struct tx_ring *txr = adapter->tx_rings;
    729 
    730 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    731 		ixgbe_setup_transmit_ring(txr);
    732 
    733 	return (0);
    734 } /* ixgbe_setup_transmit_structures */
    735 
    736 /************************************************************************
    737  * ixgbe_free_transmit_structures - Free all transmit rings.
    738  ************************************************************************/
    739 void
    740 ixgbe_free_transmit_structures(struct adapter *adapter)
    741 {
    742 	struct tx_ring *txr = adapter->tx_rings;
    743 
    744 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    745 		ixgbe_free_transmit_buffers(txr);
    746 		ixgbe_dma_free(adapter, &txr->txdma);
    747 		IXGBE_TX_LOCK_DESTROY(txr);
    748 	}
    749 	free(adapter->tx_rings, M_DEVBUF);
    750 } /* ixgbe_free_transmit_structures */
    751 
    752 /************************************************************************
    753  * ixgbe_free_transmit_buffers
    754  *
    755  *   Free transmit ring related data structures.
    756  ************************************************************************/
    757 static void
    758 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    759 {
    760 	struct adapter      *adapter = txr->adapter;
    761 	struct ixgbe_tx_buf *tx_buffer;
    762 	int                 i;
    763 
    764 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    765 
    766 	if (txr->tx_buffers == NULL)
    767 		return;
    768 
    769 	tx_buffer = txr->tx_buffers;
    770 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    771 		if (tx_buffer->m_head != NULL) {
    772 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    773 			    0, tx_buffer->m_head->m_pkthdr.len,
    774 			    BUS_DMASYNC_POSTWRITE);
    775 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    776 			m_freem(tx_buffer->m_head);
    777 			tx_buffer->m_head = NULL;
    778 			if (tx_buffer->map != NULL) {
    779 				ixgbe_dmamap_destroy(txr->txtag,
    780 				    tx_buffer->map);
    781 				tx_buffer->map = NULL;
    782 			}
    783 		} else if (tx_buffer->map != NULL) {
    784 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    785 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    786 			tx_buffer->map = NULL;
    787 		}
    788 	}
    789 	if (txr->txr_interq != NULL) {
    790 		struct mbuf *m;
    791 
    792 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    793 			m_freem(m);
    794 		pcq_destroy(txr->txr_interq);
    795 	}
    796 	if (txr->tx_buffers != NULL) {
    797 		free(txr->tx_buffers, M_DEVBUF);
    798 		txr->tx_buffers = NULL;
    799 	}
    800 	if (txr->txtag != NULL) {
    801 		ixgbe_dma_tag_destroy(txr->txtag);
    802 		txr->txtag = NULL;
    803 	}
    804 } /* ixgbe_free_transmit_buffers */
    805 
    806 /************************************************************************
    807  * ixgbe_tx_ctx_setup
    808  *
    809  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    810  ************************************************************************/
    811 static int
    812 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    813     u32 *cmd_type_len, u32 *olinfo_status)
    814 {
    815 	struct adapter                   *adapter = txr->adapter;
    816 	struct ixgbe_adv_tx_context_desc *TXD;
    817 	struct ether_vlan_header         *eh;
    818 #ifdef INET
    819 	struct ip                        *ip;
    820 #endif
    821 #ifdef INET6
    822 	struct ip6_hdr                   *ip6;
    823 #endif
    824 	int                              ehdrlen, ip_hlen = 0;
    825 	int                              offload = TRUE;
    826 	int                              ctxd = txr->next_avail_desc;
    827 	u32                              vlan_macip_lens = 0;
    828 	u32                              type_tucmd_mlhl = 0;
    829 	u16                              vtag = 0;
    830 	u16                              etype;
    831 	u8                               ipproto = 0;
    832 	char                             *l3d;
    833 
    834 
    835 	/* First check if TSO is to be used */
    836 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    837 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    838 
    839 		if (rv != 0)
    840 			++adapter->tso_err.ev_count;
    841 		return rv;
    842 	}
    843 
    844 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    845 		offload = FALSE;
    846 
    847 	/* Indicate the whole packet as payload when not doing TSO */
    848 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    849 
    850 	/* Now ready a context descriptor */
    851 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    852 
    853 	/*
    854 	 * In advanced descriptors the vlan tag must
    855 	 * be placed into the context descriptor. Hence
    856 	 * we need to make one even if not doing offloads.
    857 	 */
    858 	if (vlan_has_tag(mp)) {
    859 		vtag = htole16(vlan_get_tag(mp));
    860 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    861 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    862 	           (offload == FALSE))
    863 		return (0);
    864 
    865 	/*
    866 	 * Determine where frame payload starts.
    867 	 * Jump over vlan headers if already present,
    868 	 * helpful for QinQ too.
    869 	 */
    870 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    871 	eh = mtod(mp, struct ether_vlan_header *);
    872 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    873 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    874 		etype = ntohs(eh->evl_proto);
    875 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    876 	} else {
    877 		etype = ntohs(eh->evl_encap_proto);
    878 		ehdrlen = ETHER_HDR_LEN;
    879 	}
    880 
    881 	/* Set the ether header length */
    882 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    883 
    884 	if (offload == FALSE)
    885 		goto no_offloads;
    886 
    887 	/*
    888 	 * If the first mbuf only includes the ethernet header,
    889 	 * jump to the next one
    890 	 * XXX: This assumes the stack splits mbufs containing headers
    891 	 *      on header boundaries
    892 	 * XXX: And assumes the entire IP header is contained in one mbuf
    893 	 */
    894 	if (mp->m_len == ehdrlen && mp->m_next)
    895 		l3d = mtod(mp->m_next, char *);
    896 	else
    897 		l3d = mtod(mp, char *) + ehdrlen;
    898 
    899 	switch (etype) {
    900 #ifdef INET
    901 	case ETHERTYPE_IP:
    902 		ip = (struct ip *)(l3d);
    903 		ip_hlen = ip->ip_hl << 2;
    904 		ipproto = ip->ip_p;
    905 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    906 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    907 		    ip->ip_sum == 0);
    908 		break;
    909 #endif
    910 #ifdef INET6
    911 	case ETHERTYPE_IPV6:
    912 		ip6 = (struct ip6_hdr *)(l3d);
    913 		ip_hlen = sizeof(struct ip6_hdr);
    914 		ipproto = ip6->ip6_nxt;
    915 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    916 		break;
    917 #endif
    918 	default:
    919 		offload = false;
    920 		break;
    921 	}
    922 
    923 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    924 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    925 
    926 	vlan_macip_lens |= ip_hlen;
    927 
    928 	/* No support for offloads for non-L4 next headers */
    929  	switch (ipproto) {
    930 	case IPPROTO_TCP:
    931 		if (mp->m_pkthdr.csum_flags &
    932 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    933 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    934 		else
    935 			offload = false;
    936 		break;
    937 	case IPPROTO_UDP:
    938 		if (mp->m_pkthdr.csum_flags &
    939 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    940 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    941 		else
    942 			offload = false;
    943 		break;
    944 	default:
    945 		offload = false;
    946 		break;
    947 	}
    948 
    949 	if (offload) /* Insert L4 checksum into data descriptors */
    950 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    951 
    952 no_offloads:
    953 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    954 
    955 	/* Now copy bits into descriptor */
    956 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    957 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    958 	TXD->seqnum_seed = htole32(0);
    959 	TXD->mss_l4len_idx = htole32(0);
    960 
    961 	/* We've consumed the first desc, adjust counters */
    962 	if (++ctxd == txr->num_desc)
    963 		ctxd = 0;
    964 	txr->next_avail_desc = ctxd;
    965 	--txr->tx_avail;
    966 
    967 	return (0);
    968 } /* ixgbe_tx_ctx_setup */
    969 
    970 /************************************************************************
    971  * ixgbe_tso_setup
    972  *
    973  *   Setup work for hardware segmentation offload (TSO) on
    974  *   adapters using advanced tx descriptors
    975  ************************************************************************/
    976 static int
    977 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    978     u32 *olinfo_status)
    979 {
    980 	struct ixgbe_adv_tx_context_desc *TXD;
    981 	struct ether_vlan_header         *eh;
    982 #ifdef INET6
    983 	struct ip6_hdr                   *ip6;
    984 #endif
    985 #ifdef INET
    986 	struct ip                        *ip;
    987 #endif
    988 	struct tcphdr                    *th;
    989 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    990 	u32                              vlan_macip_lens = 0;
    991 	u32                              type_tucmd_mlhl = 0;
    992 	u32                              mss_l4len_idx = 0, paylen;
    993 	u16                              vtag = 0, eh_type;
    994 
    995 	/*
    996 	 * Determine where frame payload starts.
    997 	 * Jump over vlan headers if already present
    998 	 */
    999 	eh = mtod(mp, struct ether_vlan_header *);
   1000 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1001 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1002 		eh_type = eh->evl_proto;
   1003 	} else {
   1004 		ehdrlen = ETHER_HDR_LEN;
   1005 		eh_type = eh->evl_encap_proto;
   1006 	}
   1007 
   1008 	switch (ntohs(eh_type)) {
   1009 #ifdef INET
   1010 	case ETHERTYPE_IP:
   1011 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1012 		if (ip->ip_p != IPPROTO_TCP)
   1013 			return (ENXIO);
   1014 		ip->ip_sum = 0;
   1015 		ip_hlen = ip->ip_hl << 2;
   1016 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1017 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1018 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1019 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1020 		/* Tell transmit desc to also do IPv4 checksum. */
   1021 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1022 		break;
   1023 #endif
   1024 #ifdef INET6
   1025 	case ETHERTYPE_IPV6:
   1026 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1027 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1028 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1029 			return (ENXIO);
   1030 		ip_hlen = sizeof(struct ip6_hdr);
   1031 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1032 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1033 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1034 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1035 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1036 		break;
   1037 #endif
   1038 	default:
   1039 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1040 		    __func__, ntohs(eh_type));
   1041 		break;
   1042 	}
   1043 
   1044 	ctxd = txr->next_avail_desc;
   1045 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1046 
   1047 	tcp_hlen = th->th_off << 2;
   1048 
   1049 	/* This is used in the transmit desc in encap */
   1050 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1051 
   1052 	/* VLAN MACLEN IPLEN */
   1053 	if (vlan_has_tag(mp)) {
   1054 		vtag = htole16(vlan_get_tag(mp));
   1055 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1056 	}
   1057 
   1058 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1059 	vlan_macip_lens |= ip_hlen;
   1060 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1061 
   1062 	/* ADV DTYPE TUCMD */
   1063 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1064 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1065 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1066 
   1067 	/* MSS L4LEN IDX */
   1068 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1069 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1070 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1071 
   1072 	TXD->seqnum_seed = htole32(0);
   1073 
   1074 	if (++ctxd == txr->num_desc)
   1075 		ctxd = 0;
   1076 
   1077 	txr->tx_avail--;
   1078 	txr->next_avail_desc = ctxd;
   1079 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1080 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1081 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1082 	++txr->tso_tx.ev_count;
   1083 
   1084 	return (0);
   1085 } /* ixgbe_tso_setup */
   1086 
   1087 
   1088 /************************************************************************
   1089  * ixgbe_txeof
   1090  *
   1091  *   Examine each tx_buffer in the used queue. If the hardware is done
   1092  *   processing the packet then free associated resources. The
   1093  *   tx_buffer is put back on the free queue.
   1094  ************************************************************************/
   1095 bool
   1096 ixgbe_txeof(struct tx_ring *txr)
   1097 {
   1098 	struct adapter		*adapter = txr->adapter;
   1099 	struct ifnet		*ifp = adapter->ifp;
   1100 	struct ixgbe_tx_buf	*buf;
   1101 	union ixgbe_adv_tx_desc *txd;
   1102 	u32			work, processed = 0;
   1103 	u32			limit = adapter->tx_process_limit;
   1104 
   1105 	KASSERT(mutex_owned(&txr->tx_mtx));
   1106 
   1107 #ifdef DEV_NETMAP
   1108 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1109 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1110 		struct netmap_adapter *na = NA(adapter->ifp);
   1111 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1112 		txd = txr->tx_base;
   1113 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1114 		    BUS_DMASYNC_POSTREAD);
   1115 		/*
   1116 		 * In netmap mode, all the work is done in the context
   1117 		 * of the client thread. Interrupt handlers only wake up
   1118 		 * clients, which may be sleeping on individual rings
   1119 		 * or on a global resource for all rings.
   1120 		 * To implement tx interrupt mitigation, we wake up the client
   1121 		 * thread roughly every half ring, even if the NIC interrupts
   1122 		 * more frequently. This is implemented as follows:
   1123 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1124 		 *   the slot that should wake up the thread (nkr_num_slots
   1125 		 *   means the user thread should not be woken up);
   1126 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1127 		 *   or the slot has the DD bit set.
   1128 		 */
   1129 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1130 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
   1131 			netmap_tx_irq(ifp, txr->me);
   1132 		}
   1133 		return false;
   1134 	}
   1135 #endif /* DEV_NETMAP */
   1136 
   1137 	if (txr->tx_avail == txr->num_desc) {
   1138 		txr->busy = 0;
   1139 		return false;
   1140 	}
   1141 
   1142 	/* Get work starting point */
   1143 	work = txr->next_to_clean;
   1144 	buf = &txr->tx_buffers[work];
   1145 	txd = &txr->tx_base[work];
   1146 	work -= txr->num_desc; /* The distance to ring end */
   1147 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1148 	    BUS_DMASYNC_POSTREAD);
   1149 
   1150 	do {
   1151 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1152 		if (eop == NULL) /* No work */
   1153 			break;
   1154 
   1155 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1156 			break;	/* I/O not complete */
   1157 
   1158 		if (buf->m_head) {
   1159 			txr->bytes += buf->m_head->m_pkthdr.len;
   1160 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1161 			    0, buf->m_head->m_pkthdr.len,
   1162 			    BUS_DMASYNC_POSTWRITE);
   1163 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1164 			m_freem(buf->m_head);
   1165 			buf->m_head = NULL;
   1166 		}
   1167 		buf->eop = NULL;
   1168 		txr->txr_no_space = false;
   1169 		++txr->tx_avail;
   1170 
   1171 		/* We clean the range if multi segment */
   1172 		while (txd != eop) {
   1173 			++txd;
   1174 			++buf;
   1175 			++work;
   1176 			/* wrap the ring? */
   1177 			if (__predict_false(!work)) {
   1178 				work -= txr->num_desc;
   1179 				buf = txr->tx_buffers;
   1180 				txd = txr->tx_base;
   1181 			}
   1182 			if (buf->m_head) {
   1183 				txr->bytes +=
   1184 				    buf->m_head->m_pkthdr.len;
   1185 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1186 				    buf->map,
   1187 				    0, buf->m_head->m_pkthdr.len,
   1188 				    BUS_DMASYNC_POSTWRITE);
   1189 				ixgbe_dmamap_unload(txr->txtag,
   1190 				    buf->map);
   1191 				m_freem(buf->m_head);
   1192 				buf->m_head = NULL;
   1193 			}
   1194 			++txr->tx_avail;
   1195 			buf->eop = NULL;
   1196 
   1197 		}
   1198 		++txr->packets;
   1199 		++processed;
   1200 		++ifp->if_opackets;
   1201 
   1202 		/* Try the next packet */
   1203 		++txd;
   1204 		++buf;
   1205 		++work;
   1206 		/* reset with a wrap */
   1207 		if (__predict_false(!work)) {
   1208 			work -= txr->num_desc;
   1209 			buf = txr->tx_buffers;
   1210 			txd = txr->tx_base;
   1211 		}
   1212 		prefetch(txd);
   1213 	} while (__predict_true(--limit));
   1214 
   1215 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1216 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1217 
   1218 	work += txr->num_desc;
   1219 	txr->next_to_clean = work;
   1220 
   1221 	/*
   1222 	 * Queue Hang detection, we know there's
   1223 	 * work outstanding or the first return
   1224 	 * would have been taken, so increment busy
   1225 	 * if nothing managed to get cleaned, then
   1226 	 * in local_timer it will be checked and
   1227 	 * marked as HUNG if it exceeds a MAX attempt.
   1228 	 */
   1229 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1230 		++txr->busy;
   1231 	/*
   1232 	 * If anything gets cleaned we reset state to 1,
   1233 	 * note this will turn off HUNG if its set.
   1234 	 */
   1235 	if (processed)
   1236 		txr->busy = 1;
   1237 
   1238 	if (txr->tx_avail == txr->num_desc)
   1239 		txr->busy = 0;
   1240 
   1241 	return ((limit > 0) ? false : true);
   1242 } /* ixgbe_txeof */
   1243 
   1244 /************************************************************************
   1245  * ixgbe_rsc_count
   1246  *
   1247  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1248  ************************************************************************/
   1249 static inline u32
   1250 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1251 {
   1252 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1253 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1254 } /* ixgbe_rsc_count */
   1255 
   1256 /************************************************************************
   1257  * ixgbe_setup_hw_rsc
   1258  *
   1259  *   Initialize Hardware RSC (LRO) feature on 82599
   1260  *   for an RX ring, this is toggled by the LRO capability
   1261  *   even though it is transparent to the stack.
   1262  *
   1263  *   NOTE: Since this HW feature only works with IPv4 and
   1264  *         testing has shown soft LRO to be as effective,
   1265  *         this feature will be disabled by default.
   1266  ************************************************************************/
   1267 static void
   1268 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1269 {
   1270 	struct	adapter  *adapter = rxr->adapter;
   1271 	struct	ixgbe_hw *hw = &adapter->hw;
   1272 	u32              rscctrl, rdrxctl;
   1273 
   1274 	/* If turning LRO/RSC off we need to disable it */
   1275 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1276 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1277 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1278 		return;
   1279 	}
   1280 
   1281 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1282 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1283 #ifdef DEV_NETMAP
   1284 	/* Always strip CRC unless Netmap disabled it */
   1285 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1286 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1287 	    ix_crcstrip)
   1288 #endif /* DEV_NETMAP */
   1289 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1290 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1291 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1292 
   1293 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1294 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1295 	/*
   1296 	 * Limit the total number of descriptors that
   1297 	 * can be combined, so it does not exceed 64K
   1298 	 */
   1299 	if (rxr->mbuf_sz == MCLBYTES)
   1300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1301 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1303 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1305 	else  /* Using 16K cluster */
   1306 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1307 
   1308 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1309 
   1310 	/* Enable TCP header recognition */
   1311 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1312 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1313 
   1314 	/* Disable RSC for ACK packets */
   1315 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1316 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1317 
   1318 	rxr->hw_rsc = TRUE;
   1319 } /* ixgbe_setup_hw_rsc */
   1320 
   1321 /************************************************************************
   1322  * ixgbe_refresh_mbufs
   1323  *
   1324  *   Refresh mbuf buffers for RX descriptor rings
   1325  *    - now keeps its own state so discards due to resource
   1326  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1327  *      it just returns, keeping its placeholder, thus it can simply
   1328  *      be recalled to try again.
   1329  ************************************************************************/
   1330 static void
   1331 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1332 {
   1333 	struct adapter      *adapter = rxr->adapter;
   1334 	struct ixgbe_rx_buf *rxbuf;
   1335 	struct mbuf         *mp;
   1336 	int                 i, j, error;
   1337 	bool                refreshed = false;
   1338 
   1339 	i = j = rxr->next_to_refresh;
   1340 	/* Control the loop with one beyond */
   1341 	if (++j == rxr->num_desc)
   1342 		j = 0;
   1343 
   1344 	while (j != limit) {
   1345 		rxbuf = &rxr->rx_buffers[i];
   1346 		if (rxbuf->buf == NULL) {
   1347 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1348 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1349 			if (mp == NULL) {
   1350 				rxr->no_jmbuf.ev_count++;
   1351 				goto update;
   1352 			}
   1353 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1354 				m_adj(mp, ETHER_ALIGN);
   1355 		} else
   1356 			mp = rxbuf->buf;
   1357 
   1358 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1359 
   1360 		/* If we're dealing with an mbuf that was copied rather
   1361 		 * than replaced, there's no need to go through busdma.
   1362 		 */
   1363 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1364 			/* Get the memory mapping */
   1365 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1366 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1367 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1368 			if (error != 0) {
   1369 				device_printf(adapter->dev, "Refresh mbufs: "
   1370 				    "payload dmamap load failure - %d\n",
   1371 				    error);
   1372 				m_free(mp);
   1373 				rxbuf->buf = NULL;
   1374 				goto update;
   1375 			}
   1376 			rxbuf->buf = mp;
   1377 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1378 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1379 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1380 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1381 		} else {
   1382 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1383 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1384 		}
   1385 
   1386 		refreshed = true;
   1387 		/* Next is precalculated */
   1388 		i = j;
   1389 		rxr->next_to_refresh = i;
   1390 		if (++j == rxr->num_desc)
   1391 			j = 0;
   1392 	}
   1393 
   1394 update:
   1395 	if (refreshed) /* Update hardware tail index */
   1396 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1397 
   1398 	return;
   1399 } /* ixgbe_refresh_mbufs */
   1400 
   1401 /************************************************************************
   1402  * ixgbe_allocate_receive_buffers
   1403  *
   1404  *   Allocate memory for rx_buffer structures. Since we use one
   1405  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1406  *   that we'll need is equal to the number of receive descriptors
   1407  *   that we've allocated.
   1408  ************************************************************************/
   1409 static int
   1410 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1411 {
   1412 	struct adapter      *adapter = rxr->adapter;
   1413 	device_t            dev = adapter->dev;
   1414 	struct ixgbe_rx_buf *rxbuf;
   1415 	int                 bsize, error;
   1416 
   1417 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1418 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
   1419 	    M_NOWAIT | M_ZERO);
   1420 	if (rxr->rx_buffers == NULL) {
   1421 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1422 		error = ENOMEM;
   1423 		goto fail;
   1424 	}
   1425 
   1426 	error = ixgbe_dma_tag_create(
   1427 	         /*      parent */ adapter->osdep.dmat,
   1428 	         /*   alignment */ 1,
   1429 	         /*      bounds */ 0,
   1430 	         /*     maxsize */ MJUM16BYTES,
   1431 	         /*   nsegments */ 1,
   1432 	         /*  maxsegsize */ MJUM16BYTES,
   1433 	         /*       flags */ 0,
   1434 	                           &rxr->ptag);
   1435 	if (error != 0) {
   1436 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1437 		goto fail;
   1438 	}
   1439 
   1440 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1441 		rxbuf = &rxr->rx_buffers[i];
   1442 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1443 		if (error) {
   1444 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1445 			goto fail;
   1446 		}
   1447 	}
   1448 
   1449 	return (0);
   1450 
   1451 fail:
   1452 	/* Frees all, but can handle partial completion */
   1453 	ixgbe_free_receive_structures(adapter);
   1454 
   1455 	return (error);
   1456 } /* ixgbe_allocate_receive_buffers */
   1457 
   1458 /************************************************************************
   1459  * ixgbe_free_receive_ring
   1460  ************************************************************************/
   1461 static void
   1462 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1463 {
   1464 	for (int i = 0; i < rxr->num_desc; i++) {
   1465 		ixgbe_rx_discard(rxr, i);
   1466 	}
   1467 } /* ixgbe_free_receive_ring */
   1468 
   1469 /************************************************************************
   1470  * ixgbe_setup_receive_ring
   1471  *
   1472  *   Initialize a receive ring and its buffers.
   1473  ************************************************************************/
   1474 static int
   1475 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1476 {
   1477 	struct adapter        *adapter;
   1478 	struct ixgbe_rx_buf   *rxbuf;
   1479 #ifdef LRO
   1480 	struct ifnet          *ifp;
   1481 	struct lro_ctrl       *lro = &rxr->lro;
   1482 #endif /* LRO */
   1483 #ifdef DEV_NETMAP
   1484 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1485 	struct netmap_slot    *slot;
   1486 #endif /* DEV_NETMAP */
   1487 	int                   rsize, error = 0;
   1488 
   1489 	adapter = rxr->adapter;
   1490 #ifdef LRO
   1491 	ifp = adapter->ifp;
   1492 #endif /* LRO */
   1493 
   1494 	/* Clear the ring contents */
   1495 	IXGBE_RX_LOCK(rxr);
   1496 
   1497 #ifdef DEV_NETMAP
   1498 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1499 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1500 #endif /* DEV_NETMAP */
   1501 
   1502 	rsize = roundup2(adapter->num_rx_desc *
   1503 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1504 	bzero((void *)rxr->rx_base, rsize);
   1505 	/* Cache the size */
   1506 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1507 
   1508 	/* Free current RX buffer structs and their mbufs */
   1509 	ixgbe_free_receive_ring(rxr);
   1510 
   1511 	IXGBE_RX_UNLOCK(rxr);
   1512 	/*
   1513 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1514 	 * or size of jumbo mbufs may have changed.
   1515 	 * Assume all of rxr->ptag are the same.
   1516 	 */
   1517 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
   1518 	    (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
   1519 
   1520 	IXGBE_RX_LOCK(rxr);
   1521 
   1522 	/* Now replenish the mbufs */
   1523 	for (int j = 0; j != rxr->num_desc; ++j) {
   1524 		struct mbuf *mp;
   1525 
   1526 		rxbuf = &rxr->rx_buffers[j];
   1527 
   1528 #ifdef DEV_NETMAP
   1529 		/*
   1530 		 * In netmap mode, fill the map and set the buffer
   1531 		 * address in the NIC ring, considering the offset
   1532 		 * between the netmap and NIC rings (see comment in
   1533 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1534 		 * an mbuf, so end the block with a continue;
   1535 		 */
   1536 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1537 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1538 			uint64_t paddr;
   1539 			void *addr;
   1540 
   1541 			addr = PNMB(na, slot + sj, &paddr);
   1542 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1543 			/* Update descriptor and the cached value */
   1544 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1545 			rxbuf->addr = htole64(paddr);
   1546 			continue;
   1547 		}
   1548 #endif /* DEV_NETMAP */
   1549 
   1550 		rxbuf->flags = 0;
   1551 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1552 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1553 		if (rxbuf->buf == NULL) {
   1554 			error = ENOBUFS;
   1555 			goto fail;
   1556 		}
   1557 		mp = rxbuf->buf;
   1558 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1559 		/* Get the memory mapping */
   1560 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1561 		    mp, BUS_DMA_NOWAIT);
   1562 		if (error != 0)
   1563                         goto fail;
   1564 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1565 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1566 		/* Update the descriptor and the cached value */
   1567 		rxr->rx_base[j].read.pkt_addr =
   1568 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1569 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1570 	}
   1571 
   1572 
   1573 	/* Setup our descriptor indices */
   1574 	rxr->next_to_check = 0;
   1575 	rxr->next_to_refresh = 0;
   1576 	rxr->lro_enabled = FALSE;
   1577 	rxr->rx_copies.ev_count = 0;
   1578 #if 0 /* NetBSD */
   1579 	rxr->rx_bytes.ev_count = 0;
   1580 #if 1	/* Fix inconsistency */
   1581 	rxr->rx_packets.ev_count = 0;
   1582 #endif
   1583 #endif
   1584 	rxr->vtag_strip = FALSE;
   1585 
   1586 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1587 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1588 
   1589 	/*
   1590 	 * Now set up the LRO interface
   1591 	 */
   1592 	if (ixgbe_rsc_enable)
   1593 		ixgbe_setup_hw_rsc(rxr);
   1594 #ifdef LRO
   1595 	else if (ifp->if_capenable & IFCAP_LRO) {
   1596 		device_t dev = adapter->dev;
   1597 		int err = tcp_lro_init(lro);
   1598 		if (err) {
   1599 			device_printf(dev, "LRO Initialization failed!\n");
   1600 			goto fail;
   1601 		}
   1602 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1603 		rxr->lro_enabled = TRUE;
   1604 		lro->ifp = adapter->ifp;
   1605 	}
   1606 #endif /* LRO */
   1607 
   1608 	IXGBE_RX_UNLOCK(rxr);
   1609 
   1610 	return (0);
   1611 
   1612 fail:
   1613 	ixgbe_free_receive_ring(rxr);
   1614 	IXGBE_RX_UNLOCK(rxr);
   1615 
   1616 	return (error);
   1617 } /* ixgbe_setup_receive_ring */
   1618 
   1619 /************************************************************************
   1620  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1621  ************************************************************************/
   1622 int
   1623 ixgbe_setup_receive_structures(struct adapter *adapter)
   1624 {
   1625 	struct rx_ring *rxr = adapter->rx_rings;
   1626 	int            j;
   1627 
   1628 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1629 		if (ixgbe_setup_receive_ring(rxr))
   1630 			goto fail;
   1631 
   1632 	return (0);
   1633 fail:
   1634 	/*
   1635 	 * Free RX buffers allocated so far, we will only handle
   1636 	 * the rings that completed, the failing case will have
   1637 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1638 	 */
   1639 	for (int i = 0; i < j; ++i) {
   1640 		rxr = &adapter->rx_rings[i];
   1641 		IXGBE_RX_LOCK(rxr);
   1642 		ixgbe_free_receive_ring(rxr);
   1643 		IXGBE_RX_UNLOCK(rxr);
   1644 	}
   1645 
   1646 	return (ENOBUFS);
   1647 } /* ixgbe_setup_receive_structures */
   1648 
   1649 
   1650 /************************************************************************
   1651  * ixgbe_free_receive_structures - Free all receive rings.
   1652  ************************************************************************/
   1653 void
   1654 ixgbe_free_receive_structures(struct adapter *adapter)
   1655 {
   1656 	struct rx_ring *rxr = adapter->rx_rings;
   1657 
   1658 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1659 
   1660 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1661 		ixgbe_free_receive_buffers(rxr);
   1662 #ifdef LRO
   1663 		/* Free LRO memory */
   1664 		tcp_lro_free(&rxr->lro);
   1665 #endif /* LRO */
   1666 		/* Free the ring memory as well */
   1667 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1668 		IXGBE_RX_LOCK_DESTROY(rxr);
   1669 	}
   1670 
   1671 	free(adapter->rx_rings, M_DEVBUF);
   1672 } /* ixgbe_free_receive_structures */
   1673 
   1674 
   1675 /************************************************************************
   1676  * ixgbe_free_receive_buffers - Free receive ring data structures
   1677  ************************************************************************/
   1678 static void
   1679 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1680 {
   1681 	struct adapter      *adapter = rxr->adapter;
   1682 	struct ixgbe_rx_buf *rxbuf;
   1683 
   1684 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1685 
   1686 	/* Cleanup any existing buffers */
   1687 	if (rxr->rx_buffers != NULL) {
   1688 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1689 			rxbuf = &rxr->rx_buffers[i];
   1690 			ixgbe_rx_discard(rxr, i);
   1691 			if (rxbuf->pmap != NULL) {
   1692 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1693 				rxbuf->pmap = NULL;
   1694 			}
   1695 		}
   1696 		if (rxr->rx_buffers != NULL) {
   1697 			free(rxr->rx_buffers, M_DEVBUF);
   1698 			rxr->rx_buffers = NULL;
   1699 		}
   1700 	}
   1701 
   1702 	if (rxr->ptag != NULL) {
   1703 		ixgbe_dma_tag_destroy(rxr->ptag);
   1704 		rxr->ptag = NULL;
   1705 	}
   1706 
   1707 	return;
   1708 } /* ixgbe_free_receive_buffers */
   1709 
   1710 /************************************************************************
   1711  * ixgbe_rx_input
   1712  ************************************************************************/
   1713 static __inline void
   1714 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1715     u32 ptype)
   1716 {
   1717 	struct adapter	*adapter = ifp->if_softc;
   1718 
   1719 #ifdef LRO
   1720 	struct ethercom *ec = &adapter->osdep.ec;
   1721 
   1722 	/*
   1723 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1724 	 * should be computed by hardware. Also it should not have VLAN tag in
   1725 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1726 	 */
   1727         if (rxr->lro_enabled &&
   1728             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1729             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1730             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1731             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1732             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1733             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1734             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1735             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1736                 /*
   1737                  * Send to the stack if:
   1738                  **  - LRO not enabled, or
   1739                  **  - no LRO resources, or
   1740                  **  - lro enqueue fails
   1741                  */
   1742                 if (rxr->lro.lro_cnt != 0)
   1743                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1744                                 return;
   1745         }
   1746 #endif /* LRO */
   1747 
   1748 	if_percpuq_enqueue(adapter->ipq, m);
   1749 } /* ixgbe_rx_input */
   1750 
   1751 /************************************************************************
   1752  * ixgbe_rx_discard
   1753  ************************************************************************/
   1754 static __inline void
   1755 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1756 {
   1757 	struct ixgbe_rx_buf *rbuf;
   1758 
   1759 	rbuf = &rxr->rx_buffers[i];
   1760 
   1761 	/*
   1762 	 * With advanced descriptors the writeback
   1763 	 * clobbers the buffer addrs, so its easier
   1764 	 * to just free the existing mbufs and take
   1765 	 * the normal refresh path to get new buffers
   1766 	 * and mapping.
   1767 	 */
   1768 
   1769 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1770 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1771 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1772 		m_freem(rbuf->fmp);
   1773 		rbuf->fmp = NULL;
   1774 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1775 	} else if (rbuf->buf) {
   1776 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1777 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1778 		m_free(rbuf->buf);
   1779 		rbuf->buf = NULL;
   1780 	}
   1781 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1782 
   1783 	rbuf->flags = 0;
   1784 
   1785 	return;
   1786 } /* ixgbe_rx_discard */
   1787 
   1788 
   1789 /************************************************************************
   1790  * ixgbe_rxeof
   1791  *
   1792  *   Executes in interrupt context. It replenishes the
   1793  *   mbufs in the descriptor and sends data which has
   1794  *   been dma'ed into host memory to upper layer.
   1795  *
   1796  *   Return TRUE for more work, FALSE for all clean.
   1797  ************************************************************************/
   1798 bool
   1799 ixgbe_rxeof(struct ix_queue *que)
   1800 {
   1801 	struct adapter		*adapter = que->adapter;
   1802 	struct rx_ring		*rxr = que->rxr;
   1803 	struct ifnet		*ifp = adapter->ifp;
   1804 #ifdef LRO
   1805 	struct lro_ctrl		*lro = &rxr->lro;
   1806 #endif /* LRO */
   1807 	union ixgbe_adv_rx_desc	*cur;
   1808 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1809 	int			i, nextp, processed = 0;
   1810 	u32			staterr = 0;
   1811 	u32			count = adapter->rx_process_limit;
   1812 #ifdef RSS
   1813 	u16			pkt_info;
   1814 #endif
   1815 
   1816 	IXGBE_RX_LOCK(rxr);
   1817 
   1818 #ifdef DEV_NETMAP
   1819 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1820 		/* Same as the txeof routine: wakeup clients on intr. */
   1821 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1822 			IXGBE_RX_UNLOCK(rxr);
   1823 			return (FALSE);
   1824 		}
   1825 	}
   1826 #endif /* DEV_NETMAP */
   1827 
   1828 	for (i = rxr->next_to_check; count != 0;) {
   1829 		struct mbuf *sendmp, *mp;
   1830 		u32         rsc, ptype;
   1831 		u16         len;
   1832 		u16         vtag = 0;
   1833 		bool        eop;
   1834 
   1835 		/* Sync the ring. */
   1836 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1837 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1838 
   1839 		cur = &rxr->rx_base[i];
   1840 		staterr = le32toh(cur->wb.upper.status_error);
   1841 #ifdef RSS
   1842 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1843 #endif
   1844 
   1845 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1846 			break;
   1847 
   1848 		count--;
   1849 		sendmp = NULL;
   1850 		nbuf = NULL;
   1851 		rsc = 0;
   1852 		cur->wb.upper.status_error = 0;
   1853 		rbuf = &rxr->rx_buffers[i];
   1854 		mp = rbuf->buf;
   1855 
   1856 		len = le16toh(cur->wb.upper.length);
   1857 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1858 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1859 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1860 
   1861 		/* Make sure bad packets are discarded */
   1862 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1863 #if __FreeBSD_version >= 1100036
   1864 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1865 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1866 #endif
   1867 			rxr->rx_discarded.ev_count++;
   1868 			ixgbe_rx_discard(rxr, i);
   1869 			goto next_desc;
   1870 		}
   1871 
   1872 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1873 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1874 
   1875 		/*
   1876 		 * On 82599 which supports a hardware
   1877 		 * LRO (called HW RSC), packets need
   1878 		 * not be fragmented across sequential
   1879 		 * descriptors, rather the next descriptor
   1880 		 * is indicated in bits of the descriptor.
   1881 		 * This also means that we might proceses
   1882 		 * more than one packet at a time, something
   1883 		 * that has never been true before, it
   1884 		 * required eliminating global chain pointers
   1885 		 * in favor of what we are doing here.  -jfv
   1886 		 */
   1887 		if (!eop) {
   1888 			/*
   1889 			 * Figure out the next descriptor
   1890 			 * of this frame.
   1891 			 */
   1892 			if (rxr->hw_rsc == TRUE) {
   1893 				rsc = ixgbe_rsc_count(cur);
   1894 				rxr->rsc_num += (rsc - 1);
   1895 			}
   1896 			if (rsc) { /* Get hardware index */
   1897 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1898 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1899 			} else { /* Just sequential */
   1900 				nextp = i + 1;
   1901 				if (nextp == adapter->num_rx_desc)
   1902 					nextp = 0;
   1903 			}
   1904 			nbuf = &rxr->rx_buffers[nextp];
   1905 			prefetch(nbuf);
   1906 		}
   1907 		/*
   1908 		 * Rather than using the fmp/lmp global pointers
   1909 		 * we now keep the head of a packet chain in the
   1910 		 * buffer struct and pass this along from one
   1911 		 * descriptor to the next, until we get EOP.
   1912 		 */
   1913 		mp->m_len = len;
   1914 		/*
   1915 		 * See if there is a stored head
   1916 		 * that determines what we are
   1917 		 */
   1918 		sendmp = rbuf->fmp;
   1919 		if (sendmp != NULL) {  /* secondary frag */
   1920 			rbuf->buf = rbuf->fmp = NULL;
   1921 			mp->m_flags &= ~M_PKTHDR;
   1922 			sendmp->m_pkthdr.len += mp->m_len;
   1923 		} else {
   1924 			/*
   1925 			 * Optimize.  This might be a small packet,
   1926 			 * maybe just a TCP ACK.  Do a fast copy that
   1927 			 * is cache aligned into a new mbuf, and
   1928 			 * leave the old mbuf+cluster for re-use.
   1929 			 */
   1930 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1931 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1932 				if (sendmp != NULL) {
   1933 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1934 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1935 					    len);
   1936 					sendmp->m_len = len;
   1937 					rxr->rx_copies.ev_count++;
   1938 					rbuf->flags |= IXGBE_RX_COPY;
   1939 				}
   1940 			}
   1941 			if (sendmp == NULL) {
   1942 				rbuf->buf = rbuf->fmp = NULL;
   1943 				sendmp = mp;
   1944 			}
   1945 
   1946 			/* first desc of a non-ps chain */
   1947 			sendmp->m_flags |= M_PKTHDR;
   1948 			sendmp->m_pkthdr.len = mp->m_len;
   1949 		}
   1950 		++processed;
   1951 
   1952 		/* Pass the head pointer on */
   1953 		if (eop == 0) {
   1954 			nbuf->fmp = sendmp;
   1955 			sendmp = NULL;
   1956 			mp->m_next = nbuf->buf;
   1957 		} else { /* Sending this frame */
   1958 			m_set_rcvif(sendmp, ifp);
   1959 			++rxr->packets;
   1960 			rxr->rx_packets.ev_count++;
   1961 			/* capture data for AIM */
   1962 			rxr->bytes += sendmp->m_pkthdr.len;
   1963 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1964 			/* Process vlan info */
   1965 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   1966 				vtag = le16toh(cur->wb.upper.vlan);
   1967 			if (vtag) {
   1968 				vlan_set_tag(sendmp, vtag);
   1969 			}
   1970 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1971 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1972 				   &adapter->stats.pf);
   1973 			}
   1974 
   1975 #if 0 /* FreeBSD */
   1976 			/*
   1977 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   1978 			 * and never cleared. This means we have RSS hash
   1979 			 * available to be used.
   1980 			 */
   1981 			if (adapter->num_queues > 1) {
   1982 				sendmp->m_pkthdr.flowid =
   1983 				    le32toh(cur->wb.lower.hi_dword.rss);
   1984 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1985 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   1986 					M_HASHTYPE_SET(sendmp,
   1987 					    M_HASHTYPE_RSS_IPV4);
   1988 					break;
   1989 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1990 					M_HASHTYPE_SET(sendmp,
   1991 					    M_HASHTYPE_RSS_TCP_IPV4);
   1992 					break;
   1993 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   1994 					M_HASHTYPE_SET(sendmp,
   1995 					    M_HASHTYPE_RSS_IPV6);
   1996 					break;
   1997 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   1998 					M_HASHTYPE_SET(sendmp,
   1999 					    M_HASHTYPE_RSS_TCP_IPV6);
   2000 					break;
   2001 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2002 					M_HASHTYPE_SET(sendmp,
   2003 					    M_HASHTYPE_RSS_IPV6_EX);
   2004 					break;
   2005 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2006 					M_HASHTYPE_SET(sendmp,
   2007 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2008 					break;
   2009 #if __FreeBSD_version > 1100000
   2010 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2011 					M_HASHTYPE_SET(sendmp,
   2012 					    M_HASHTYPE_RSS_UDP_IPV4);
   2013 					break;
   2014 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2015 					M_HASHTYPE_SET(sendmp,
   2016 					    M_HASHTYPE_RSS_UDP_IPV6);
   2017 					break;
   2018 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2019 					M_HASHTYPE_SET(sendmp,
   2020 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2021 					break;
   2022 #endif
   2023 				default:
   2024 					M_HASHTYPE_SET(sendmp,
   2025 					    M_HASHTYPE_OPAQUE_HASH);
   2026 				}
   2027 			} else {
   2028 				sendmp->m_pkthdr.flowid = que->msix;
   2029 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2030 			}
   2031 #endif
   2032 		}
   2033 next_desc:
   2034 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2035 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2036 
   2037 		/* Advance our pointers to the next descriptor. */
   2038 		if (++i == rxr->num_desc)
   2039 			i = 0;
   2040 
   2041 		/* Now send to the stack or do LRO */
   2042 		if (sendmp != NULL) {
   2043 			rxr->next_to_check = i;
   2044 			IXGBE_RX_UNLOCK(rxr);
   2045 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2046 			IXGBE_RX_LOCK(rxr);
   2047 			i = rxr->next_to_check;
   2048 		}
   2049 
   2050 		/* Every 8 descriptors we go to refresh mbufs */
   2051 		if (processed == 8) {
   2052 			ixgbe_refresh_mbufs(rxr, i);
   2053 			processed = 0;
   2054 		}
   2055 	}
   2056 
   2057 	/* Refresh any remaining buf structs */
   2058 	if (ixgbe_rx_unrefreshed(rxr))
   2059 		ixgbe_refresh_mbufs(rxr, i);
   2060 
   2061 	rxr->next_to_check = i;
   2062 
   2063 	IXGBE_RX_UNLOCK(rxr);
   2064 
   2065 #ifdef LRO
   2066 	/*
   2067 	 * Flush any outstanding LRO work
   2068 	 */
   2069 	tcp_lro_flush_all(lro);
   2070 #endif /* LRO */
   2071 
   2072 	/*
   2073 	 * Still have cleaning to do?
   2074 	 */
   2075 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2076 		return (TRUE);
   2077 
   2078 	return (FALSE);
   2079 } /* ixgbe_rxeof */
   2080 
   2081 
   2082 /************************************************************************
   2083  * ixgbe_rx_checksum
   2084  *
   2085  *   Verify that the hardware indicated that the checksum is valid.
   2086  *   Inform the stack about the status of checksum so that stack
   2087  *   doesn't spend time verifying the checksum.
   2088  ************************************************************************/
   2089 static void
   2090 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2091     struct ixgbe_hw_stats *stats)
   2092 {
   2093 	u16  status = (u16)staterr;
   2094 	u8   errors = (u8)(staterr >> 24);
   2095 #if 0
   2096 	bool sctp = false;
   2097 
   2098 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2099 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2100 		sctp = true;
   2101 #endif
   2102 
   2103 	/* IPv4 checksum */
   2104 	if (status & IXGBE_RXD_STAT_IPCS) {
   2105 		stats->ipcs.ev_count++;
   2106 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2107 			/* IP Checksum Good */
   2108 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2109 		} else {
   2110 			stats->ipcs_bad.ev_count++;
   2111 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2112 		}
   2113 	}
   2114 	/* TCP/UDP/SCTP checksum */
   2115 	if (status & IXGBE_RXD_STAT_L4CS) {
   2116 		stats->l4cs.ev_count++;
   2117 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2118 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2119 			mp->m_pkthdr.csum_flags |= type;
   2120 		} else {
   2121 			stats->l4cs_bad.ev_count++;
   2122 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2123 		}
   2124 	}
   2125 } /* ixgbe_rx_checksum */
   2126 
   2127 /************************************************************************
   2128  * ixgbe_dma_malloc
   2129  ************************************************************************/
   2130 int
   2131 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2132 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2133 {
   2134 	device_t dev = adapter->dev;
   2135 	int      r, rsegs;
   2136 
   2137 	r = ixgbe_dma_tag_create(
   2138 	     /*      parent */ adapter->osdep.dmat,
   2139 	     /*   alignment */ DBA_ALIGN,
   2140 	     /*      bounds */ 0,
   2141 	     /*     maxsize */ size,
   2142 	     /*   nsegments */ 1,
   2143 	     /*  maxsegsize */ size,
   2144 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2145 			       &dma->dma_tag);
   2146 	if (r != 0) {
   2147 		aprint_error_dev(dev,
   2148 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2149 		    r);
   2150 		goto fail_0;
   2151 	}
   2152 
   2153 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2154 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2155 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2156 	if (r != 0) {
   2157 		aprint_error_dev(dev,
   2158 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2159 		goto fail_1;
   2160 	}
   2161 
   2162 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2163 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2164 	if (r != 0) {
   2165 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2166 		    __func__, r);
   2167 		goto fail_2;
   2168 	}
   2169 
   2170 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2171 	if (r != 0) {
   2172 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2173 		    __func__, r);
   2174 		goto fail_3;
   2175 	}
   2176 
   2177 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2178 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2179 	if (r != 0) {
   2180 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2181 		    __func__, r);
   2182 		goto fail_4;
   2183 	}
   2184 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2185 	dma->dma_size = size;
   2186 	return 0;
   2187 fail_4:
   2188 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2189 fail_3:
   2190 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2191 fail_2:
   2192 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2193 fail_1:
   2194 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2195 fail_0:
   2196 
   2197 	return (r);
   2198 } /* ixgbe_dma_malloc */
   2199 
   2200 /************************************************************************
   2201  * ixgbe_dma_free
   2202  ************************************************************************/
   2203 void
   2204 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2205 {
   2206 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2207 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2208 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2209 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2210 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2211 } /* ixgbe_dma_free */
   2212 
   2213 
   2214 /************************************************************************
   2215  * ixgbe_allocate_queues
   2216  *
   2217  *   Allocate memory for the transmit and receive rings, and then
   2218  *   the descriptors associated with each, called only once at attach.
   2219  ************************************************************************/
   2220 int
   2221 ixgbe_allocate_queues(struct adapter *adapter)
   2222 {
   2223 	device_t	dev = adapter->dev;
   2224 	struct ix_queue	*que;
   2225 	struct tx_ring	*txr;
   2226 	struct rx_ring	*rxr;
   2227 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2228 	int             txconf = 0, rxconf = 0;
   2229 
   2230 	/* First, allocate the top level queue structs */
   2231 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2232             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2233         if (adapter->queues == NULL) {
   2234 		aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2235                 error = ENOMEM;
   2236                 goto fail;
   2237         }
   2238 
   2239 	/* Second, allocate the TX ring struct memory */
   2240 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
   2241 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2242 	if (adapter->tx_rings == NULL) {
   2243 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2244 		error = ENOMEM;
   2245 		goto tx_fail;
   2246 	}
   2247 
   2248 	/* Third, allocate the RX ring */
   2249 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2250 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2251 	if (adapter->rx_rings == NULL) {
   2252 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2253 		error = ENOMEM;
   2254 		goto rx_fail;
   2255 	}
   2256 
   2257 	/* For the ring itself */
   2258 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2259 	    DBA_ALIGN);
   2260 
   2261 	/*
   2262 	 * Now set up the TX queues, txconf is needed to handle the
   2263 	 * possibility that things fail midcourse and we need to
   2264 	 * undo memory gracefully
   2265 	 */
   2266 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2267 		/* Set up some basics */
   2268 		txr = &adapter->tx_rings[i];
   2269 		txr->adapter = adapter;
   2270 		txr->txr_interq = NULL;
   2271 		/* In case SR-IOV is enabled, align the index properly */
   2272 #ifdef PCI_IOV
   2273 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2274 		    i);
   2275 #else
   2276 		txr->me = i;
   2277 #endif
   2278 		txr->num_desc = adapter->num_tx_desc;
   2279 
   2280 		/* Initialize the TX side lock */
   2281 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2282 
   2283 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2284 		    BUS_DMA_NOWAIT)) {
   2285 			aprint_error_dev(dev,
   2286 			    "Unable to allocate TX Descriptor memory\n");
   2287 			error = ENOMEM;
   2288 			goto err_tx_desc;
   2289 		}
   2290 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2291 		bzero((void *)txr->tx_base, tsize);
   2292 
   2293 		/* Now allocate transmit buffers for the ring */
   2294 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2295 			aprint_error_dev(dev,
   2296 			    "Critical Failure setting up transmit buffers\n");
   2297 			error = ENOMEM;
   2298 			goto err_tx_desc;
   2299         	}
   2300 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2301 			/* Allocate a buf ring */
   2302 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2303 			if (txr->txr_interq == NULL) {
   2304 				aprint_error_dev(dev,
   2305 				    "Critical Failure setting up buf ring\n");
   2306 				error = ENOMEM;
   2307 				goto err_tx_desc;
   2308 			}
   2309 		}
   2310 	}
   2311 
   2312 	/*
   2313 	 * Next the RX queues...
   2314 	 */
   2315 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2316 	    DBA_ALIGN);
   2317 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2318 		rxr = &adapter->rx_rings[i];
   2319 		/* Set up some basics */
   2320 		rxr->adapter = adapter;
   2321 #ifdef PCI_IOV
   2322 		/* In case SR-IOV is enabled, align the index properly */
   2323 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2324 		    i);
   2325 #else
   2326 		rxr->me = i;
   2327 #endif
   2328 		rxr->num_desc = adapter->num_rx_desc;
   2329 
   2330 		/* Initialize the RX side lock */
   2331 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2332 
   2333 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2334 		    BUS_DMA_NOWAIT)) {
   2335 			aprint_error_dev(dev,
   2336 			    "Unable to allocate RxDescriptor memory\n");
   2337 			error = ENOMEM;
   2338 			goto err_rx_desc;
   2339 		}
   2340 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2341 		bzero((void *)rxr->rx_base, rsize);
   2342 
   2343 		/* Allocate receive buffers for the ring */
   2344 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2345 			aprint_error_dev(dev,
   2346 			    "Critical Failure setting up receive buffers\n");
   2347 			error = ENOMEM;
   2348 			goto err_rx_desc;
   2349 		}
   2350 	}
   2351 
   2352 	/*
   2353 	 * Finally set up the queue holding structs
   2354 	 */
   2355 	for (int i = 0; i < adapter->num_queues; i++) {
   2356 		que = &adapter->queues[i];
   2357 		que->adapter = adapter;
   2358 		que->me = i;
   2359 		que->txr = &adapter->tx_rings[i];
   2360 		que->rxr = &adapter->rx_rings[i];
   2361 
   2362 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2363 		que->disabled_count = 0;
   2364 	}
   2365 
   2366 	return (0);
   2367 
   2368 err_rx_desc:
   2369 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2370 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2371 err_tx_desc:
   2372 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2373 		ixgbe_dma_free(adapter, &txr->txdma);
   2374 	free(adapter->rx_rings, M_DEVBUF);
   2375 rx_fail:
   2376 	free(adapter->tx_rings, M_DEVBUF);
   2377 tx_fail:
   2378 	free(adapter->queues, M_DEVBUF);
   2379 fail:
   2380 	return (error);
   2381 } /* ixgbe_allocate_queues */
   2382