Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.60
      1 /* $NetBSD: ix_txrx.c,v 1.60 2020/01/21 14:55:55 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include "opt_inet.h"
     67 #include "opt_inet6.h"
     68 
     69 #include "ixgbe.h"
     70 
     71 /*
     72  * HW RSC control:
     73  *  this feature only works with
     74  *  IPv4, and only on 82599 and later.
     75  *  Also this will cause IP forwarding to
     76  *  fail and that can't be controlled by
     77  *  the stack as LRO can. For all these
     78  *  reasons I've deemed it best to leave
     79  *  this off and not bother with a tuneable
     80  *  interface, this would need to be compiled
     81  *  to enable.
     82  */
     83 static bool ixgbe_rsc_enable = FALSE;
     84 
     85 /*
     86  * For Flow Director: this is the
     87  * number of TX packets we sample
     88  * for the filter pool, this means
     89  * every 20th packet will be probed.
     90  *
     91  * This feature can be disabled by
     92  * setting this to 0.
     93  */
     94 static int atr_sample_rate = 20;
     95 
     96 /************************************************************************
     97  *  Local Function prototypes
     98  ************************************************************************/
     99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    104                                        struct ixgbe_hw_stats *);
    105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    109                                         struct mbuf *, u32 *, u32 *);
    110 static int           ixgbe_tso_setup(struct tx_ring *,
    111                                      struct mbuf *, u32 *, u32 *);
    112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    114                                     struct mbuf *, u32);
    115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    116                                       struct ixgbe_dma_alloc *, int);
    117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    118 
    119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    120 
    121 /************************************************************************
    122  * ixgbe_legacy_start_locked - Transmit entry point
    123  *
    124  *   Called by the stack to initiate a transmit.
    125  *   The driver will remain in this routine as long as there are
    126  *   packets to transmit and transmit resources are available.
    127  *   In case resources are not available, the stack is notified
    128  *   and the packet is requeued.
    129  ************************************************************************/
    130 int
    131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    132 {
    133 	int rc;
    134 	struct mbuf    *m_head;
    135 	struct adapter *adapter = txr->adapter;
    136 
    137 	IXGBE_TX_LOCK_ASSERT(txr);
    138 
    139 	if (adapter->link_active != LINK_STATE_UP) {
    140 		/*
    141 		 * discard all packets buffered in IFQ to avoid
    142 		 * sending old packets at next link up timing.
    143 		 */
    144 		ixgbe_drain(ifp, txr);
    145 		return (ENETDOWN);
    146 	}
    147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    148 		return (ENETDOWN);
    149 	if (txr->txr_no_space)
    150 		return (ENETDOWN);
    151 
    152 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    153 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    154 			break;
    155 
    156 		IFQ_POLL(&ifp->if_snd, m_head);
    157 		if (m_head == NULL)
    158 			break;
    159 
    160 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    161 			break;
    162 		}
    163 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    164 		if (rc != 0) {
    165 			m_freem(m_head);
    166 			continue;
    167 		}
    168 
    169 		/* Send a copy of the frame to the BPF listener */
    170 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    171 	}
    172 
    173 	return IXGBE_SUCCESS;
    174 } /* ixgbe_legacy_start_locked */
    175 
    176 /************************************************************************
    177  * ixgbe_legacy_start
    178  *
    179  *   Called by the stack, this always uses the first tx ring,
    180  *   and should not be used with multiqueue tx enabled.
    181  ************************************************************************/
    182 void
    183 ixgbe_legacy_start(struct ifnet *ifp)
    184 {
    185 	struct adapter *adapter = ifp->if_softc;
    186 	struct tx_ring *txr = adapter->tx_rings;
    187 
    188 	if (ifp->if_flags & IFF_RUNNING) {
    189 		IXGBE_TX_LOCK(txr);
    190 		ixgbe_legacy_start_locked(ifp, txr);
    191 		IXGBE_TX_UNLOCK(txr);
    192 	}
    193 } /* ixgbe_legacy_start */
    194 
    195 /************************************************************************
    196  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    197  *
    198  *   (if_transmit function)
    199  ************************************************************************/
    200 int
    201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    202 {
    203 	struct adapter	*adapter = ifp->if_softc;
    204 	struct tx_ring	*txr;
    205 	int 		i;
    206 #ifdef RSS
    207 	uint32_t bucket_id;
    208 #endif
    209 
    210 	/*
    211 	 * When doing RSS, map it to the same outbound queue
    212 	 * as the incoming flow would be mapped to.
    213 	 *
    214 	 * If everything is setup correctly, it should be the
    215 	 * same bucket that the current CPU we're on is.
    216 	 */
    217 #ifdef RSS
    218 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    219 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    220 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    221 		    &bucket_id) == 0)) {
    222 			i = bucket_id % adapter->num_queues;
    223 #ifdef IXGBE_DEBUG
    224 			if (bucket_id > adapter->num_queues)
    225 				if_printf(ifp,
    226 				    "bucket_id (%d) > num_queues (%d)\n",
    227 				    bucket_id, adapter->num_queues);
    228 #endif
    229 		} else
    230 			i = m->m_pkthdr.flowid % adapter->num_queues;
    231 	} else
    232 #endif /* 0 */
    233 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    234 
    235 	/* Check for a hung queue and pick alternative */
    236 	if (((1ULL << i) & adapter->active_queues) == 0)
    237 		i = ffs64(adapter->active_queues);
    238 
    239 	txr = &adapter->tx_rings[i];
    240 
    241 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    242 		m_freem(m);
    243 		txr->pcq_drops.ev_count++;
    244 		return ENOBUFS;
    245 	}
    246 	if (IXGBE_TX_TRYLOCK(txr)) {
    247 		ixgbe_mq_start_locked(ifp, txr);
    248 		IXGBE_TX_UNLOCK(txr);
    249 	} else {
    250 		if (adapter->txrx_use_workqueue) {
    251 			u_int *enqueued;
    252 
    253 			/*
    254 			 * This function itself is not called in interrupt
    255 			 * context, however it can be called in fast softint
    256 			 * context right after receiving forwarding packets.
    257 			 * So, it is required to protect workqueue from twice
    258 			 * enqueuing when the machine uses both spontaneous
    259 			 * packets and forwarding packets.
    260 			 */
    261 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    262 			if (*enqueued == 0) {
    263 				*enqueued = 1;
    264 				percpu_putref(adapter->txr_wq_enqueued);
    265 				workqueue_enqueue(adapter->txr_wq,
    266 				    &txr->wq_cookie, curcpu());
    267 			} else
    268 				percpu_putref(adapter->txr_wq_enqueued);
    269 		} else {
    270 			kpreempt_disable();
    271 			softint_schedule(txr->txr_si);
    272 			kpreempt_enable();
    273 		}
    274 	}
    275 
    276 	return (0);
    277 } /* ixgbe_mq_start */
    278 
    279 /************************************************************************
    280  * ixgbe_mq_start_locked
    281  ************************************************************************/
    282 int
    283 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    284 {
    285 	struct mbuf    *next;
    286 	int            enqueued = 0, err = 0;
    287 
    288 	if (txr->adapter->link_active != LINK_STATE_UP) {
    289 		/*
    290 		 * discard all packets buffered in txr_interq to avoid
    291 		 * sending old packets at next link up timing.
    292 		 */
    293 		ixgbe_drain(ifp, txr);
    294 		return (ENETDOWN);
    295 	}
    296 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    297 		return (ENETDOWN);
    298 	if (txr->txr_no_space)
    299 		return (ENETDOWN);
    300 
    301 	/* Process the queue */
    302 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    303 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    304 			m_freem(next);
    305 			/* All errors are counted in ixgbe_xmit() */
    306 			break;
    307 		}
    308 		enqueued++;
    309 #if __FreeBSD_version >= 1100036
    310 		/*
    311 		 * Since we're looking at the tx ring, we can check
    312 		 * to see if we're a VF by examing our tail register
    313 		 * address.
    314 		 */
    315 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    316 		    (next->m_flags & M_MCAST))
    317 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    318 #endif
    319 		/* Send a copy of the frame to the BPF listener */
    320 		bpf_mtap(ifp, next, BPF_D_OUT);
    321 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    322 			break;
    323 	}
    324 
    325 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    326 		ixgbe_txeof(txr);
    327 
    328 	return (err);
    329 } /* ixgbe_mq_start_locked */
    330 
    331 /************************************************************************
    332  * ixgbe_deferred_mq_start
    333  *
    334  *   Called from a softint and workqueue (indirectly) to drain queued
    335  *   transmit packets.
    336  ************************************************************************/
    337 void
    338 ixgbe_deferred_mq_start(void *arg)
    339 {
    340 	struct tx_ring *txr = arg;
    341 	struct adapter *adapter = txr->adapter;
    342 	struct ifnet   *ifp = adapter->ifp;
    343 
    344 	IXGBE_TX_LOCK(txr);
    345 	if (pcq_peek(txr->txr_interq) != NULL)
    346 		ixgbe_mq_start_locked(ifp, txr);
    347 	IXGBE_TX_UNLOCK(txr);
    348 } /* ixgbe_deferred_mq_start */
    349 
    350 /************************************************************************
    351  * ixgbe_deferred_mq_start_work
    352  *
    353  *   Called from a workqueue to drain queued transmit packets.
    354  ************************************************************************/
    355 void
    356 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    357 {
    358 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    359 	struct adapter *adapter = txr->adapter;
    360 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    361 	*enqueued = 0;
    362 	percpu_putref(adapter->txr_wq_enqueued);
    363 
    364 	ixgbe_deferred_mq_start(txr);
    365 } /* ixgbe_deferred_mq_start */
    366 
    367 /************************************************************************
    368  * ixgbe_drain_all
    369  ************************************************************************/
    370 void
    371 ixgbe_drain_all(struct adapter *adapter)
    372 {
    373 	struct ifnet *ifp = adapter->ifp;
    374 	struct ix_queue *que = adapter->queues;
    375 
    376 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    377 		struct tx_ring  *txr = que->txr;
    378 
    379 		IXGBE_TX_LOCK(txr);
    380 		ixgbe_drain(ifp, txr);
    381 		IXGBE_TX_UNLOCK(txr);
    382 	}
    383 }
    384 
    385 /************************************************************************
    386  * ixgbe_xmit
    387  *
    388  *   Maps the mbufs to tx descriptors, allowing the
    389  *   TX engine to transmit the packets.
    390  *
    391  *   Return 0 on success, positive on failure
    392  ************************************************************************/
    393 static int
    394 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    395 {
    396 	struct adapter          *adapter = txr->adapter;
    397 	struct ixgbe_tx_buf     *txbuf;
    398 	union ixgbe_adv_tx_desc *txd = NULL;
    399 	struct ifnet	        *ifp = adapter->ifp;
    400 	int                     i, j, error;
    401 	int                     first;
    402 	u32                     olinfo_status = 0, cmd_type_len;
    403 	bool                    remap = TRUE;
    404 	bus_dmamap_t            map;
    405 
    406 	/* Basic descriptor defines */
    407 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    408 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    409 
    410 	if (vlan_has_tag(m_head))
    411 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    412 
    413 	/*
    414 	 * Important to capture the first descriptor
    415 	 * used because it will contain the index of
    416 	 * the one we tell the hardware to report back
    417 	 */
    418 	first = txr->next_avail_desc;
    419 	txbuf = &txr->tx_buffers[first];
    420 	map = txbuf->map;
    421 
    422 	/*
    423 	 * Map the packet for DMA.
    424 	 */
    425 retry:
    426 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    427 	    BUS_DMA_NOWAIT);
    428 
    429 	if (__predict_false(error)) {
    430 		struct mbuf *m;
    431 
    432 		switch (error) {
    433 		case EAGAIN:
    434 			txr->q_eagain_tx_dma_setup++;
    435 			return EAGAIN;
    436 		case ENOMEM:
    437 			txr->q_enomem_tx_dma_setup++;
    438 			return EAGAIN;
    439 		case EFBIG:
    440 			/* Try it again? - one try */
    441 			if (remap == TRUE) {
    442 				remap = FALSE;
    443 				/*
    444 				 * XXX: m_defrag will choke on
    445 				 * non-MCLBYTES-sized clusters
    446 				 */
    447 				txr->q_efbig_tx_dma_setup++;
    448 				m = m_defrag(m_head, M_NOWAIT);
    449 				if (m == NULL) {
    450 					txr->q_mbuf_defrag_failed++;
    451 					return ENOBUFS;
    452 				}
    453 				m_head = m;
    454 				goto retry;
    455 			} else {
    456 				txr->q_efbig2_tx_dma_setup++;
    457 				return error;
    458 			}
    459 		case EINVAL:
    460 			txr->q_einval_tx_dma_setup++;
    461 			return error;
    462 		default:
    463 			txr->q_other_tx_dma_setup++;
    464 			return error;
    465 		}
    466 	}
    467 
    468 	/* Make certain there are enough descriptors */
    469 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    470 		txr->txr_no_space = true;
    471 		txr->no_desc_avail.ev_count++;
    472 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    473 		return EAGAIN;
    474 	}
    475 
    476 	/*
    477 	 * Set up the appropriate offload context
    478 	 * this will consume the first descriptor
    479 	 */
    480 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    481 	if (__predict_false(error)) {
    482 		return (error);
    483 	}
    484 
    485 	/* Do the flow director magic */
    486 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    487 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    488 		++txr->atr_count;
    489 		if (txr->atr_count >= atr_sample_rate) {
    490 			ixgbe_atr(txr, m_head);
    491 			txr->atr_count = 0;
    492 		}
    493 	}
    494 
    495 	olinfo_status |= IXGBE_ADVTXD_CC;
    496 	i = txr->next_avail_desc;
    497 	for (j = 0; j < map->dm_nsegs; j++) {
    498 		bus_size_t seglen;
    499 		bus_addr_t segaddr;
    500 
    501 		txbuf = &txr->tx_buffers[i];
    502 		txd = &txr->tx_base[i];
    503 		seglen = map->dm_segs[j].ds_len;
    504 		segaddr = htole64(map->dm_segs[j].ds_addr);
    505 
    506 		txd->read.buffer_addr = segaddr;
    507 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    508 		txd->read.olinfo_status = htole32(olinfo_status);
    509 
    510 		if (++i == txr->num_desc)
    511 			i = 0;
    512 	}
    513 
    514 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    515 	txr->tx_avail -= map->dm_nsegs;
    516 	txr->next_avail_desc = i;
    517 
    518 	txbuf->m_head = m_head;
    519 	/*
    520 	 * Here we swap the map so the last descriptor,
    521 	 * which gets the completion interrupt has the
    522 	 * real map, and the first descriptor gets the
    523 	 * unused map from this descriptor.
    524 	 */
    525 	txr->tx_buffers[first].map = txbuf->map;
    526 	txbuf->map = map;
    527 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    528 	    BUS_DMASYNC_PREWRITE);
    529 
    530 	/* Set the EOP descriptor that will be marked done */
    531 	txbuf = &txr->tx_buffers[first];
    532 	txbuf->eop = txd;
    533 
    534 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    535 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    536 	/*
    537 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    538 	 * hardware that this frame is available to transmit.
    539 	 */
    540 	++txr->total_packets.ev_count;
    541 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    542 
    543 	/*
    544 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
    545 	 */
    546 	ifp->if_obytes += m_head->m_pkthdr.len;
    547 	if (m_head->m_flags & M_MCAST)
    548 		ifp->if_omcasts++;
    549 
    550 	/* Mark queue as having work */
    551 	if (txr->busy == 0)
    552 		txr->busy = 1;
    553 
    554 	return (0);
    555 } /* ixgbe_xmit */
    556 
    557 /************************************************************************
    558  * ixgbe_drain
    559  ************************************************************************/
    560 static void
    561 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    562 {
    563 	struct mbuf *m;
    564 
    565 	IXGBE_TX_LOCK_ASSERT(txr);
    566 
    567 	if (txr->me == 0) {
    568 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    569 			IFQ_DEQUEUE(&ifp->if_snd, m);
    570 			m_freem(m);
    571 			IF_DROP(&ifp->if_snd);
    572 		}
    573 	}
    574 
    575 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    576 		m_freem(m);
    577 		txr->pcq_drops.ev_count++;
    578 	}
    579 }
    580 
    581 /************************************************************************
    582  * ixgbe_allocate_transmit_buffers
    583  *
    584  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    585  *   the information needed to transmit a packet on the wire. This is
    586  *   called only once at attach, setup is done every reset.
    587  ************************************************************************/
    588 static int
    589 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    590 {
    591 	struct adapter      *adapter = txr->adapter;
    592 	device_t            dev = adapter->dev;
    593 	struct ixgbe_tx_buf *txbuf;
    594 	int                 error, i;
    595 
    596 	/*
    597 	 * Setup DMA descriptor areas.
    598 	 */
    599 	error = ixgbe_dma_tag_create(
    600 	         /*      parent */ adapter->osdep.dmat,
    601 	         /*   alignment */ 1,
    602 	         /*      bounds */ 0,
    603 	         /*     maxsize */ IXGBE_TSO_SIZE,
    604 	         /*   nsegments */ adapter->num_segs,
    605 	         /*  maxsegsize */ PAGE_SIZE,
    606 	         /*       flags */ 0,
    607 	                           &txr->txtag);
    608 	if (error != 0) {
    609 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    610 		goto fail;
    611 	}
    612 
    613 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    614 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    615 
    616 	/* Create the descriptor buffer dma maps */
    617 	txbuf = txr->tx_buffers;
    618 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    619 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    620 		if (error != 0) {
    621 			aprint_error_dev(dev,
    622 			    "Unable to create TX DMA map (%d)\n", error);
    623 			goto fail;
    624 		}
    625 	}
    626 
    627 	return 0;
    628 fail:
    629 	/* We free all, it handles case where we are in the middle */
    630 #if 0 /* XXX was FreeBSD */
    631 	ixgbe_free_transmit_structures(adapter);
    632 #else
    633 	ixgbe_free_transmit_buffers(txr);
    634 #endif
    635 	return (error);
    636 } /* ixgbe_allocate_transmit_buffers */
    637 
    638 /************************************************************************
    639  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    640  ************************************************************************/
    641 static void
    642 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    643 {
    644 	struct adapter        *adapter = txr->adapter;
    645 	struct ixgbe_tx_buf   *txbuf;
    646 #ifdef DEV_NETMAP
    647 	struct netmap_adapter *na = NA(adapter->ifp);
    648 	struct netmap_slot    *slot;
    649 #endif /* DEV_NETMAP */
    650 
    651 	/* Clear the old ring contents */
    652 	IXGBE_TX_LOCK(txr);
    653 
    654 #ifdef DEV_NETMAP
    655 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    656 		/*
    657 		 * (under lock): if in netmap mode, do some consistency
    658 		 * checks and set slot to entry 0 of the netmap ring.
    659 		 */
    660 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    661 	}
    662 #endif /* DEV_NETMAP */
    663 
    664 	bzero((void *)txr->tx_base,
    665 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    666 	/* Reset indices */
    667 	txr->next_avail_desc = 0;
    668 	txr->next_to_clean = 0;
    669 
    670 	/* Free any existing tx buffers. */
    671 	txbuf = txr->tx_buffers;
    672 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    673 		if (txbuf->m_head != NULL) {
    674 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    675 			    0, txbuf->m_head->m_pkthdr.len,
    676 			    BUS_DMASYNC_POSTWRITE);
    677 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    678 			m_freem(txbuf->m_head);
    679 			txbuf->m_head = NULL;
    680 		}
    681 
    682 #ifdef DEV_NETMAP
    683 		/*
    684 		 * In netmap mode, set the map for the packet buffer.
    685 		 * NOTE: Some drivers (not this one) also need to set
    686 		 * the physical buffer address in the NIC ring.
    687 		 * Slots in the netmap ring (indexed by "si") are
    688 		 * kring->nkr_hwofs positions "ahead" wrt the
    689 		 * corresponding slot in the NIC ring. In some drivers
    690 		 * (not here) nkr_hwofs can be negative. Function
    691 		 * netmap_idx_n2k() handles wraparounds properly.
    692 		 */
    693 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    694 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    695 			netmap_load_map(na, txr->txtag,
    696 			    txbuf->map, NMB(na, slot + si));
    697 		}
    698 #endif /* DEV_NETMAP */
    699 
    700 		/* Clear the EOP descriptor pointer */
    701 		txbuf->eop = NULL;
    702 	}
    703 
    704 	/* Set the rate at which we sample packets */
    705 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    706 		txr->atr_sample = atr_sample_rate;
    707 
    708 	/* Set number of descriptors available */
    709 	txr->tx_avail = adapter->num_tx_desc;
    710 
    711 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    712 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    713 	IXGBE_TX_UNLOCK(txr);
    714 } /* ixgbe_setup_transmit_ring */
    715 
    716 /************************************************************************
    717  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    718  ************************************************************************/
    719 int
    720 ixgbe_setup_transmit_structures(struct adapter *adapter)
    721 {
    722 	struct tx_ring *txr = adapter->tx_rings;
    723 
    724 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    725 		ixgbe_setup_transmit_ring(txr);
    726 
    727 	return (0);
    728 } /* ixgbe_setup_transmit_structures */
    729 
    730 /************************************************************************
    731  * ixgbe_free_transmit_structures - Free all transmit rings.
    732  ************************************************************************/
    733 void
    734 ixgbe_free_transmit_structures(struct adapter *adapter)
    735 {
    736 	struct tx_ring *txr = adapter->tx_rings;
    737 
    738 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    739 		ixgbe_free_transmit_buffers(txr);
    740 		ixgbe_dma_free(adapter, &txr->txdma);
    741 		IXGBE_TX_LOCK_DESTROY(txr);
    742 	}
    743 	free(adapter->tx_rings, M_DEVBUF);
    744 } /* ixgbe_free_transmit_structures */
    745 
    746 /************************************************************************
    747  * ixgbe_free_transmit_buffers
    748  *
    749  *   Free transmit ring related data structures.
    750  ************************************************************************/
    751 static void
    752 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    753 {
    754 	struct adapter      *adapter = txr->adapter;
    755 	struct ixgbe_tx_buf *tx_buffer;
    756 	int                 i;
    757 
    758 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    759 
    760 	if (txr->tx_buffers == NULL)
    761 		return;
    762 
    763 	tx_buffer = txr->tx_buffers;
    764 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    765 		if (tx_buffer->m_head != NULL) {
    766 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    767 			    0, tx_buffer->m_head->m_pkthdr.len,
    768 			    BUS_DMASYNC_POSTWRITE);
    769 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    770 			m_freem(tx_buffer->m_head);
    771 			tx_buffer->m_head = NULL;
    772 			if (tx_buffer->map != NULL) {
    773 				ixgbe_dmamap_destroy(txr->txtag,
    774 				    tx_buffer->map);
    775 				tx_buffer->map = NULL;
    776 			}
    777 		} else if (tx_buffer->map != NULL) {
    778 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    779 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    780 			tx_buffer->map = NULL;
    781 		}
    782 	}
    783 	if (txr->txr_interq != NULL) {
    784 		struct mbuf *m;
    785 
    786 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    787 			m_freem(m);
    788 		pcq_destroy(txr->txr_interq);
    789 	}
    790 	if (txr->tx_buffers != NULL) {
    791 		free(txr->tx_buffers, M_DEVBUF);
    792 		txr->tx_buffers = NULL;
    793 	}
    794 	if (txr->txtag != NULL) {
    795 		ixgbe_dma_tag_destroy(txr->txtag);
    796 		txr->txtag = NULL;
    797 	}
    798 } /* ixgbe_free_transmit_buffers */
    799 
    800 /************************************************************************
    801  * ixgbe_tx_ctx_setup
    802  *
    803  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    804  ************************************************************************/
    805 static int
    806 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    807     u32 *cmd_type_len, u32 *olinfo_status)
    808 {
    809 	struct adapter                   *adapter = txr->adapter;
    810 	struct ixgbe_adv_tx_context_desc *TXD;
    811 	struct ether_vlan_header         *eh;
    812 #ifdef INET
    813 	struct ip                        *ip;
    814 #endif
    815 #ifdef INET6
    816 	struct ip6_hdr                   *ip6;
    817 #endif
    818 	int                              ehdrlen, ip_hlen = 0;
    819 	int                              offload = TRUE;
    820 	int                              ctxd = txr->next_avail_desc;
    821 	u32                              vlan_macip_lens = 0;
    822 	u32                              type_tucmd_mlhl = 0;
    823 	u16                              vtag = 0;
    824 	u16                              etype;
    825 	u8                               ipproto = 0;
    826 	char                             *l3d;
    827 
    828 
    829 	/* First check if TSO is to be used */
    830 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    831 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    832 
    833 		if (rv != 0)
    834 			++adapter->tso_err.ev_count;
    835 		return rv;
    836 	}
    837 
    838 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    839 		offload = FALSE;
    840 
    841 	/* Indicate the whole packet as payload when not doing TSO */
    842 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    843 
    844 	/* Now ready a context descriptor */
    845 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    846 
    847 	/*
    848 	 * In advanced descriptors the vlan tag must
    849 	 * be placed into the context descriptor. Hence
    850 	 * we need to make one even if not doing offloads.
    851 	 */
    852 	if (vlan_has_tag(mp)) {
    853 		vtag = htole16(vlan_get_tag(mp));
    854 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    855 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    856 	           (offload == FALSE))
    857 		return (0);
    858 
    859 	/*
    860 	 * Determine where frame payload starts.
    861 	 * Jump over vlan headers if already present,
    862 	 * helpful for QinQ too.
    863 	 */
    864 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    865 	eh = mtod(mp, struct ether_vlan_header *);
    866 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    867 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    868 		etype = ntohs(eh->evl_proto);
    869 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    870 	} else {
    871 		etype = ntohs(eh->evl_encap_proto);
    872 		ehdrlen = ETHER_HDR_LEN;
    873 	}
    874 
    875 	/* Set the ether header length */
    876 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    877 
    878 	if (offload == FALSE)
    879 		goto no_offloads;
    880 
    881 	/*
    882 	 * If the first mbuf only includes the ethernet header,
    883 	 * jump to the next one
    884 	 * XXX: This assumes the stack splits mbufs containing headers
    885 	 *      on header boundaries
    886 	 * XXX: And assumes the entire IP header is contained in one mbuf
    887 	 */
    888 	if (mp->m_len == ehdrlen && mp->m_next)
    889 		l3d = mtod(mp->m_next, char *);
    890 	else
    891 		l3d = mtod(mp, char *) + ehdrlen;
    892 
    893 	switch (etype) {
    894 #ifdef INET
    895 	case ETHERTYPE_IP:
    896 		ip = (struct ip *)(l3d);
    897 		ip_hlen = ip->ip_hl << 2;
    898 		ipproto = ip->ip_p;
    899 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    900 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    901 		    ip->ip_sum == 0);
    902 		break;
    903 #endif
    904 #ifdef INET6
    905 	case ETHERTYPE_IPV6:
    906 		ip6 = (struct ip6_hdr *)(l3d);
    907 		ip_hlen = sizeof(struct ip6_hdr);
    908 		ipproto = ip6->ip6_nxt;
    909 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    910 		break;
    911 #endif
    912 	default:
    913 		offload = false;
    914 		break;
    915 	}
    916 
    917 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    918 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    919 
    920 	vlan_macip_lens |= ip_hlen;
    921 
    922 	/* No support for offloads for non-L4 next headers */
    923  	switch (ipproto) {
    924 	case IPPROTO_TCP:
    925 		if (mp->m_pkthdr.csum_flags &
    926 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    927 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    928 		else
    929 			offload = false;
    930 		break;
    931 	case IPPROTO_UDP:
    932 		if (mp->m_pkthdr.csum_flags &
    933 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    934 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    935 		else
    936 			offload = false;
    937 		break;
    938 	default:
    939 		offload = false;
    940 		break;
    941 	}
    942 
    943 	if (offload) /* Insert L4 checksum into data descriptors */
    944 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    945 
    946 no_offloads:
    947 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    948 
    949 	/* Now copy bits into descriptor */
    950 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    951 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    952 	TXD->seqnum_seed = htole32(0);
    953 	TXD->mss_l4len_idx = htole32(0);
    954 
    955 	/* We've consumed the first desc, adjust counters */
    956 	if (++ctxd == txr->num_desc)
    957 		ctxd = 0;
    958 	txr->next_avail_desc = ctxd;
    959 	--txr->tx_avail;
    960 
    961 	return (0);
    962 } /* ixgbe_tx_ctx_setup */
    963 
    964 /************************************************************************
    965  * ixgbe_tso_setup
    966  *
    967  *   Setup work for hardware segmentation offload (TSO) on
    968  *   adapters using advanced tx descriptors
    969  ************************************************************************/
    970 static int
    971 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    972     u32 *olinfo_status)
    973 {
    974 	struct ixgbe_adv_tx_context_desc *TXD;
    975 	struct ether_vlan_header         *eh;
    976 #ifdef INET6
    977 	struct ip6_hdr                   *ip6;
    978 #endif
    979 #ifdef INET
    980 	struct ip                        *ip;
    981 #endif
    982 	struct tcphdr                    *th;
    983 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    984 	u32                              vlan_macip_lens = 0;
    985 	u32                              type_tucmd_mlhl = 0;
    986 	u32                              mss_l4len_idx = 0, paylen;
    987 	u16                              vtag = 0, eh_type;
    988 
    989 	/*
    990 	 * Determine where frame payload starts.
    991 	 * Jump over vlan headers if already present
    992 	 */
    993 	eh = mtod(mp, struct ether_vlan_header *);
    994 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    995 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    996 		eh_type = eh->evl_proto;
    997 	} else {
    998 		ehdrlen = ETHER_HDR_LEN;
    999 		eh_type = eh->evl_encap_proto;
   1000 	}
   1001 
   1002 	switch (ntohs(eh_type)) {
   1003 #ifdef INET
   1004 	case ETHERTYPE_IP:
   1005 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1006 		if (ip->ip_p != IPPROTO_TCP)
   1007 			return (ENXIO);
   1008 		ip->ip_sum = 0;
   1009 		ip_hlen = ip->ip_hl << 2;
   1010 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1011 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1012 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1013 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1014 		/* Tell transmit desc to also do IPv4 checksum. */
   1015 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1016 		break;
   1017 #endif
   1018 #ifdef INET6
   1019 	case ETHERTYPE_IPV6:
   1020 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1021 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1022 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1023 			return (ENXIO);
   1024 		ip_hlen = sizeof(struct ip6_hdr);
   1025 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1026 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1027 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1028 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1029 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1030 		break;
   1031 #endif
   1032 	default:
   1033 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1034 		    __func__, ntohs(eh_type));
   1035 		break;
   1036 	}
   1037 
   1038 	ctxd = txr->next_avail_desc;
   1039 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1040 
   1041 	tcp_hlen = th->th_off << 2;
   1042 
   1043 	/* This is used in the transmit desc in encap */
   1044 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1045 
   1046 	/* VLAN MACLEN IPLEN */
   1047 	if (vlan_has_tag(mp)) {
   1048 		vtag = htole16(vlan_get_tag(mp));
   1049 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1050 	}
   1051 
   1052 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1053 	vlan_macip_lens |= ip_hlen;
   1054 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1055 
   1056 	/* ADV DTYPE TUCMD */
   1057 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1058 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1059 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1060 
   1061 	/* MSS L4LEN IDX */
   1062 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1063 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1064 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1065 
   1066 	TXD->seqnum_seed = htole32(0);
   1067 
   1068 	if (++ctxd == txr->num_desc)
   1069 		ctxd = 0;
   1070 
   1071 	txr->tx_avail--;
   1072 	txr->next_avail_desc = ctxd;
   1073 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1074 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1075 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1076 	++txr->tso_tx.ev_count;
   1077 
   1078 	return (0);
   1079 } /* ixgbe_tso_setup */
   1080 
   1081 
   1082 /************************************************************************
   1083  * ixgbe_txeof
   1084  *
   1085  *   Examine each tx_buffer in the used queue. If the hardware is done
   1086  *   processing the packet then free associated resources. The
   1087  *   tx_buffer is put back on the free queue.
   1088  ************************************************************************/
   1089 bool
   1090 ixgbe_txeof(struct tx_ring *txr)
   1091 {
   1092 	struct adapter		*adapter = txr->adapter;
   1093 	struct ifnet		*ifp = adapter->ifp;
   1094 	struct ixgbe_tx_buf	*buf;
   1095 	union ixgbe_adv_tx_desc *txd;
   1096 	u32			work, processed = 0;
   1097 	u32			limit = adapter->tx_process_limit;
   1098 
   1099 	KASSERT(mutex_owned(&txr->tx_mtx));
   1100 
   1101 #ifdef DEV_NETMAP
   1102 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1103 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1104 		struct netmap_adapter *na = NA(adapter->ifp);
   1105 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1106 		txd = txr->tx_base;
   1107 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1108 		    BUS_DMASYNC_POSTREAD);
   1109 		/*
   1110 		 * In netmap mode, all the work is done in the context
   1111 		 * of the client thread. Interrupt handlers only wake up
   1112 		 * clients, which may be sleeping on individual rings
   1113 		 * or on a global resource for all rings.
   1114 		 * To implement tx interrupt mitigation, we wake up the client
   1115 		 * thread roughly every half ring, even if the NIC interrupts
   1116 		 * more frequently. This is implemented as follows:
   1117 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1118 		 *   the slot that should wake up the thread (nkr_num_slots
   1119 		 *   means the user thread should not be woken up);
   1120 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1121 		 *   or the slot has the DD bit set.
   1122 		 */
   1123 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1124 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
   1125 			netmap_tx_irq(ifp, txr->me);
   1126 		}
   1127 		return false;
   1128 	}
   1129 #endif /* DEV_NETMAP */
   1130 
   1131 	if (txr->tx_avail == txr->num_desc) {
   1132 		txr->busy = 0;
   1133 		return false;
   1134 	}
   1135 
   1136 	/* Get work starting point */
   1137 	work = txr->next_to_clean;
   1138 	buf = &txr->tx_buffers[work];
   1139 	txd = &txr->tx_base[work];
   1140 	work -= txr->num_desc; /* The distance to ring end */
   1141 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1142 	    BUS_DMASYNC_POSTREAD);
   1143 
   1144 	do {
   1145 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1146 		if (eop == NULL) /* No work */
   1147 			break;
   1148 
   1149 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1150 			break;	/* I/O not complete */
   1151 
   1152 		if (buf->m_head) {
   1153 			txr->bytes += buf->m_head->m_pkthdr.len;
   1154 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1155 			    0, buf->m_head->m_pkthdr.len,
   1156 			    BUS_DMASYNC_POSTWRITE);
   1157 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1158 			m_freem(buf->m_head);
   1159 			buf->m_head = NULL;
   1160 		}
   1161 		buf->eop = NULL;
   1162 		txr->txr_no_space = false;
   1163 		++txr->tx_avail;
   1164 
   1165 		/* We clean the range if multi segment */
   1166 		while (txd != eop) {
   1167 			++txd;
   1168 			++buf;
   1169 			++work;
   1170 			/* wrap the ring? */
   1171 			if (__predict_false(!work)) {
   1172 				work -= txr->num_desc;
   1173 				buf = txr->tx_buffers;
   1174 				txd = txr->tx_base;
   1175 			}
   1176 			if (buf->m_head) {
   1177 				txr->bytes +=
   1178 				    buf->m_head->m_pkthdr.len;
   1179 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1180 				    buf->map,
   1181 				    0, buf->m_head->m_pkthdr.len,
   1182 				    BUS_DMASYNC_POSTWRITE);
   1183 				ixgbe_dmamap_unload(txr->txtag,
   1184 				    buf->map);
   1185 				m_freem(buf->m_head);
   1186 				buf->m_head = NULL;
   1187 			}
   1188 			++txr->tx_avail;
   1189 			buf->eop = NULL;
   1190 
   1191 		}
   1192 		++txr->packets;
   1193 		++processed;
   1194 		++ifp->if_opackets;
   1195 
   1196 		/* Try the next packet */
   1197 		++txd;
   1198 		++buf;
   1199 		++work;
   1200 		/* reset with a wrap */
   1201 		if (__predict_false(!work)) {
   1202 			work -= txr->num_desc;
   1203 			buf = txr->tx_buffers;
   1204 			txd = txr->tx_base;
   1205 		}
   1206 		prefetch(txd);
   1207 	} while (__predict_true(--limit));
   1208 
   1209 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1210 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1211 
   1212 	work += txr->num_desc;
   1213 	txr->next_to_clean = work;
   1214 
   1215 	/*
   1216 	 * Queue Hang detection, we know there's
   1217 	 * work outstanding or the first return
   1218 	 * would have been taken, so increment busy
   1219 	 * if nothing managed to get cleaned, then
   1220 	 * in local_timer it will be checked and
   1221 	 * marked as HUNG if it exceeds a MAX attempt.
   1222 	 */
   1223 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1224 		++txr->busy;
   1225 	/*
   1226 	 * If anything gets cleaned we reset state to 1,
   1227 	 * note this will turn off HUNG if its set.
   1228 	 */
   1229 	if (processed)
   1230 		txr->busy = 1;
   1231 
   1232 	if (txr->tx_avail == txr->num_desc)
   1233 		txr->busy = 0;
   1234 
   1235 	return ((limit > 0) ? false : true);
   1236 } /* ixgbe_txeof */
   1237 
   1238 /************************************************************************
   1239  * ixgbe_rsc_count
   1240  *
   1241  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1242  ************************************************************************/
   1243 static inline u32
   1244 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1245 {
   1246 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1247 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1248 } /* ixgbe_rsc_count */
   1249 
   1250 /************************************************************************
   1251  * ixgbe_setup_hw_rsc
   1252  *
   1253  *   Initialize Hardware RSC (LRO) feature on 82599
   1254  *   for an RX ring, this is toggled by the LRO capability
   1255  *   even though it is transparent to the stack.
   1256  *
   1257  *   NOTE: Since this HW feature only works with IPv4 and
   1258  *         testing has shown soft LRO to be as effective,
   1259  *         this feature will be disabled by default.
   1260  ************************************************************************/
   1261 static void
   1262 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1263 {
   1264 	struct	adapter  *adapter = rxr->adapter;
   1265 	struct	ixgbe_hw *hw = &adapter->hw;
   1266 	u32              rscctrl, rdrxctl;
   1267 
   1268 	/* If turning LRO/RSC off we need to disable it */
   1269 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1270 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1271 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1272 		return;
   1273 	}
   1274 
   1275 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1276 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1277 #ifdef DEV_NETMAP
   1278 	/* Always strip CRC unless Netmap disabled it */
   1279 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1280 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1281 	    ix_crcstrip)
   1282 #endif /* DEV_NETMAP */
   1283 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1284 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1285 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1286 
   1287 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1288 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1289 	/*
   1290 	 * Limit the total number of descriptors that
   1291 	 * can be combined, so it does not exceed 64K
   1292 	 */
   1293 	if (rxr->mbuf_sz == MCLBYTES)
   1294 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1295 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1296 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1297 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1298 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1299 	else  /* Using 16K cluster */
   1300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1301 
   1302 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1303 
   1304 	/* Enable TCP header recognition */
   1305 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1306 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1307 
   1308 	/* Disable RSC for ACK packets */
   1309 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1310 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1311 
   1312 	rxr->hw_rsc = TRUE;
   1313 } /* ixgbe_setup_hw_rsc */
   1314 
   1315 /************************************************************************
   1316  * ixgbe_refresh_mbufs
   1317  *
   1318  *   Refresh mbuf buffers for RX descriptor rings
   1319  *    - now keeps its own state so discards due to resource
   1320  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1321  *      it just returns, keeping its placeholder, thus it can simply
   1322  *      be recalled to try again.
   1323  ************************************************************************/
   1324 static void
   1325 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1326 {
   1327 	struct adapter      *adapter = rxr->adapter;
   1328 	struct ixgbe_rx_buf *rxbuf;
   1329 	struct mbuf         *mp;
   1330 	int                 i, j, error;
   1331 	bool                refreshed = false;
   1332 
   1333 	i = j = rxr->next_to_refresh;
   1334 	/* Control the loop with one beyond */
   1335 	if (++j == rxr->num_desc)
   1336 		j = 0;
   1337 
   1338 	while (j != limit) {
   1339 		rxbuf = &rxr->rx_buffers[i];
   1340 		if (rxbuf->buf == NULL) {
   1341 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1342 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1343 			if (mp == NULL) {
   1344 				rxr->no_jmbuf.ev_count++;
   1345 				goto update;
   1346 			}
   1347 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1348 				m_adj(mp, ETHER_ALIGN);
   1349 		} else
   1350 			mp = rxbuf->buf;
   1351 
   1352 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1353 
   1354 		/* If we're dealing with an mbuf that was copied rather
   1355 		 * than replaced, there's no need to go through busdma.
   1356 		 */
   1357 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1358 			/* Get the memory mapping */
   1359 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1360 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1361 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1362 			if (error != 0) {
   1363 				device_printf(adapter->dev, "Refresh mbufs: "
   1364 				    "payload dmamap load failure - %d\n",
   1365 				    error);
   1366 				m_free(mp);
   1367 				rxbuf->buf = NULL;
   1368 				goto update;
   1369 			}
   1370 			rxbuf->buf = mp;
   1371 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1372 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1373 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1374 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1375 		} else {
   1376 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1377 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1378 		}
   1379 
   1380 		refreshed = true;
   1381 		/* Next is precalculated */
   1382 		i = j;
   1383 		rxr->next_to_refresh = i;
   1384 		if (++j == rxr->num_desc)
   1385 			j = 0;
   1386 	}
   1387 
   1388 update:
   1389 	if (refreshed) /* Update hardware tail index */
   1390 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1391 
   1392 	return;
   1393 } /* ixgbe_refresh_mbufs */
   1394 
   1395 /************************************************************************
   1396  * ixgbe_allocate_receive_buffers
   1397  *
   1398  *   Allocate memory for rx_buffer structures. Since we use one
   1399  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1400  *   that we'll need is equal to the number of receive descriptors
   1401  *   that we've allocated.
   1402  ************************************************************************/
   1403 static int
   1404 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1405 {
   1406 	struct adapter      *adapter = rxr->adapter;
   1407 	device_t            dev = adapter->dev;
   1408 	struct ixgbe_rx_buf *rxbuf;
   1409 	int                 bsize, error;
   1410 
   1411 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1412 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1413 
   1414 	error = ixgbe_dma_tag_create(
   1415 	         /*      parent */ adapter->osdep.dmat,
   1416 	         /*   alignment */ 1,
   1417 	         /*      bounds */ 0,
   1418 	         /*     maxsize */ MJUM16BYTES,
   1419 	         /*   nsegments */ 1,
   1420 	         /*  maxsegsize */ MJUM16BYTES,
   1421 	         /*       flags */ 0,
   1422 	                           &rxr->ptag);
   1423 	if (error != 0) {
   1424 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1425 		goto fail;
   1426 	}
   1427 
   1428 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1429 		rxbuf = &rxr->rx_buffers[i];
   1430 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1431 		if (error) {
   1432 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1433 			goto fail;
   1434 		}
   1435 	}
   1436 
   1437 	return (0);
   1438 
   1439 fail:
   1440 	/* Frees all, but can handle partial completion */
   1441 	ixgbe_free_receive_structures(adapter);
   1442 
   1443 	return (error);
   1444 } /* ixgbe_allocate_receive_buffers */
   1445 
   1446 /************************************************************************
   1447  * ixgbe_free_receive_ring
   1448  ************************************************************************/
   1449 static void
   1450 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1451 {
   1452 	for (int i = 0; i < rxr->num_desc; i++) {
   1453 		ixgbe_rx_discard(rxr, i);
   1454 	}
   1455 } /* ixgbe_free_receive_ring */
   1456 
   1457 /************************************************************************
   1458  * ixgbe_setup_receive_ring
   1459  *
   1460  *   Initialize a receive ring and its buffers.
   1461  ************************************************************************/
   1462 static int
   1463 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1464 {
   1465 	struct adapter        *adapter;
   1466 	struct ixgbe_rx_buf   *rxbuf;
   1467 #ifdef LRO
   1468 	struct ifnet          *ifp;
   1469 	struct lro_ctrl       *lro = &rxr->lro;
   1470 #endif /* LRO */
   1471 #ifdef DEV_NETMAP
   1472 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1473 	struct netmap_slot    *slot;
   1474 #endif /* DEV_NETMAP */
   1475 	int                   rsize, error = 0;
   1476 
   1477 	adapter = rxr->adapter;
   1478 #ifdef LRO
   1479 	ifp = adapter->ifp;
   1480 #endif /* LRO */
   1481 
   1482 	/* Clear the ring contents */
   1483 	IXGBE_RX_LOCK(rxr);
   1484 
   1485 #ifdef DEV_NETMAP
   1486 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1487 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1488 #endif /* DEV_NETMAP */
   1489 
   1490 	rsize = roundup2(adapter->num_rx_desc *
   1491 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1492 	bzero((void *)rxr->rx_base, rsize);
   1493 	/* Cache the size */
   1494 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1495 
   1496 	/* Free current RX buffer structs and their mbufs */
   1497 	ixgbe_free_receive_ring(rxr);
   1498 
   1499 	IXGBE_RX_UNLOCK(rxr);
   1500 	/*
   1501 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1502 	 * or size of jumbo mbufs may have changed.
   1503 	 * Assume all of rxr->ptag are the same.
   1504 	 */
   1505 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
   1506 	    (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
   1507 
   1508 	IXGBE_RX_LOCK(rxr);
   1509 
   1510 	/* Now replenish the mbufs */
   1511 	for (int j = 0; j != rxr->num_desc; ++j) {
   1512 		struct mbuf *mp;
   1513 
   1514 		rxbuf = &rxr->rx_buffers[j];
   1515 
   1516 #ifdef DEV_NETMAP
   1517 		/*
   1518 		 * In netmap mode, fill the map and set the buffer
   1519 		 * address in the NIC ring, considering the offset
   1520 		 * between the netmap and NIC rings (see comment in
   1521 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1522 		 * an mbuf, so end the block with a continue;
   1523 		 */
   1524 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1525 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1526 			uint64_t paddr;
   1527 			void *addr;
   1528 
   1529 			addr = PNMB(na, slot + sj, &paddr);
   1530 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1531 			/* Update descriptor and the cached value */
   1532 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1533 			rxbuf->addr = htole64(paddr);
   1534 			continue;
   1535 		}
   1536 #endif /* DEV_NETMAP */
   1537 
   1538 		rxbuf->flags = 0;
   1539 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1540 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1541 		if (rxbuf->buf == NULL) {
   1542 			error = ENOBUFS;
   1543 			goto fail;
   1544 		}
   1545 		mp = rxbuf->buf;
   1546 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1547 		/* Get the memory mapping */
   1548 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1549 		    mp, BUS_DMA_NOWAIT);
   1550 		if (error != 0)
   1551                         goto fail;
   1552 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1553 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1554 		/* Update the descriptor and the cached value */
   1555 		rxr->rx_base[j].read.pkt_addr =
   1556 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1557 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1558 	}
   1559 
   1560 
   1561 	/* Setup our descriptor indices */
   1562 	rxr->next_to_check = 0;
   1563 	rxr->next_to_refresh = 0;
   1564 	rxr->lro_enabled = FALSE;
   1565 	rxr->rx_copies.ev_count = 0;
   1566 #if 0 /* NetBSD */
   1567 	rxr->rx_bytes.ev_count = 0;
   1568 #if 1	/* Fix inconsistency */
   1569 	rxr->rx_packets.ev_count = 0;
   1570 #endif
   1571 #endif
   1572 	rxr->vtag_strip = FALSE;
   1573 
   1574 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1575 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1576 
   1577 	/*
   1578 	 * Now set up the LRO interface
   1579 	 */
   1580 	if (ixgbe_rsc_enable)
   1581 		ixgbe_setup_hw_rsc(rxr);
   1582 #ifdef LRO
   1583 	else if (ifp->if_capenable & IFCAP_LRO) {
   1584 		device_t dev = adapter->dev;
   1585 		int err = tcp_lro_init(lro);
   1586 		if (err) {
   1587 			device_printf(dev, "LRO Initialization failed!\n");
   1588 			goto fail;
   1589 		}
   1590 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1591 		rxr->lro_enabled = TRUE;
   1592 		lro->ifp = adapter->ifp;
   1593 	}
   1594 #endif /* LRO */
   1595 
   1596 	IXGBE_RX_UNLOCK(rxr);
   1597 
   1598 	return (0);
   1599 
   1600 fail:
   1601 	ixgbe_free_receive_ring(rxr);
   1602 	IXGBE_RX_UNLOCK(rxr);
   1603 
   1604 	return (error);
   1605 } /* ixgbe_setup_receive_ring */
   1606 
   1607 /************************************************************************
   1608  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1609  ************************************************************************/
   1610 int
   1611 ixgbe_setup_receive_structures(struct adapter *adapter)
   1612 {
   1613 	struct rx_ring *rxr = adapter->rx_rings;
   1614 	int            j;
   1615 
   1616 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1617 		if (ixgbe_setup_receive_ring(rxr))
   1618 			goto fail;
   1619 
   1620 	return (0);
   1621 fail:
   1622 	/*
   1623 	 * Free RX buffers allocated so far, we will only handle
   1624 	 * the rings that completed, the failing case will have
   1625 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1626 	 */
   1627 	for (int i = 0; i < j; ++i) {
   1628 		rxr = &adapter->rx_rings[i];
   1629 		IXGBE_RX_LOCK(rxr);
   1630 		ixgbe_free_receive_ring(rxr);
   1631 		IXGBE_RX_UNLOCK(rxr);
   1632 	}
   1633 
   1634 	return (ENOBUFS);
   1635 } /* ixgbe_setup_receive_structures */
   1636 
   1637 
   1638 /************************************************************************
   1639  * ixgbe_free_receive_structures - Free all receive rings.
   1640  ************************************************************************/
   1641 void
   1642 ixgbe_free_receive_structures(struct adapter *adapter)
   1643 {
   1644 	struct rx_ring *rxr = adapter->rx_rings;
   1645 
   1646 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1647 
   1648 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1649 		ixgbe_free_receive_buffers(rxr);
   1650 #ifdef LRO
   1651 		/* Free LRO memory */
   1652 		tcp_lro_free(&rxr->lro);
   1653 #endif /* LRO */
   1654 		/* Free the ring memory as well */
   1655 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1656 		IXGBE_RX_LOCK_DESTROY(rxr);
   1657 	}
   1658 
   1659 	free(adapter->rx_rings, M_DEVBUF);
   1660 } /* ixgbe_free_receive_structures */
   1661 
   1662 
   1663 /************************************************************************
   1664  * ixgbe_free_receive_buffers - Free receive ring data structures
   1665  ************************************************************************/
   1666 static void
   1667 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1668 {
   1669 	struct adapter      *adapter = rxr->adapter;
   1670 	struct ixgbe_rx_buf *rxbuf;
   1671 
   1672 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1673 
   1674 	/* Cleanup any existing buffers */
   1675 	if (rxr->rx_buffers != NULL) {
   1676 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1677 			rxbuf = &rxr->rx_buffers[i];
   1678 			ixgbe_rx_discard(rxr, i);
   1679 			if (rxbuf->pmap != NULL) {
   1680 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1681 				rxbuf->pmap = NULL;
   1682 			}
   1683 		}
   1684 
   1685 		/* NetBSD specific. See ixgbe_netbsd.c */
   1686 		ixgbe_jcl_destroy(adapter, rxr);
   1687 
   1688 		if (rxr->rx_buffers != NULL) {
   1689 			free(rxr->rx_buffers, M_DEVBUF);
   1690 			rxr->rx_buffers = NULL;
   1691 		}
   1692 	}
   1693 
   1694 	if (rxr->ptag != NULL) {
   1695 		ixgbe_dma_tag_destroy(rxr->ptag);
   1696 		rxr->ptag = NULL;
   1697 	}
   1698 
   1699 	return;
   1700 } /* ixgbe_free_receive_buffers */
   1701 
   1702 /************************************************************************
   1703  * ixgbe_rx_input
   1704  ************************************************************************/
   1705 static __inline void
   1706 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1707     u32 ptype)
   1708 {
   1709 	struct adapter	*adapter = ifp->if_softc;
   1710 
   1711 #ifdef LRO
   1712 	struct ethercom *ec = &adapter->osdep.ec;
   1713 
   1714 	/*
   1715 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1716 	 * should be computed by hardware. Also it should not have VLAN tag in
   1717 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1718 	 */
   1719         if (rxr->lro_enabled &&
   1720             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1721             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1722             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1723             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1724             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1725             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1726             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1727             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1728                 /*
   1729                  * Send to the stack if:
   1730                  **  - LRO not enabled, or
   1731                  **  - no LRO resources, or
   1732                  **  - lro enqueue fails
   1733                  */
   1734                 if (rxr->lro.lro_cnt != 0)
   1735                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1736                                 return;
   1737         }
   1738 #endif /* LRO */
   1739 
   1740 	if_percpuq_enqueue(adapter->ipq, m);
   1741 } /* ixgbe_rx_input */
   1742 
   1743 /************************************************************************
   1744  * ixgbe_rx_discard
   1745  ************************************************************************/
   1746 static __inline void
   1747 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1748 {
   1749 	struct ixgbe_rx_buf *rbuf;
   1750 
   1751 	rbuf = &rxr->rx_buffers[i];
   1752 
   1753 	/*
   1754 	 * With advanced descriptors the writeback
   1755 	 * clobbers the buffer addrs, so its easier
   1756 	 * to just free the existing mbufs and take
   1757 	 * the normal refresh path to get new buffers
   1758 	 * and mapping.
   1759 	 */
   1760 
   1761 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1762 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1763 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1764 		m_freem(rbuf->fmp);
   1765 		rbuf->fmp = NULL;
   1766 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1767 	} else if (rbuf->buf) {
   1768 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1769 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1770 		m_free(rbuf->buf);
   1771 		rbuf->buf = NULL;
   1772 	}
   1773 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1774 
   1775 	rbuf->flags = 0;
   1776 
   1777 	return;
   1778 } /* ixgbe_rx_discard */
   1779 
   1780 
   1781 /************************************************************************
   1782  * ixgbe_rxeof
   1783  *
   1784  *   Executes in interrupt context. It replenishes the
   1785  *   mbufs in the descriptor and sends data which has
   1786  *   been dma'ed into host memory to upper layer.
   1787  *
   1788  *   Return TRUE for more work, FALSE for all clean.
   1789  ************************************************************************/
   1790 bool
   1791 ixgbe_rxeof(struct ix_queue *que)
   1792 {
   1793 	struct adapter		*adapter = que->adapter;
   1794 	struct rx_ring		*rxr = que->rxr;
   1795 	struct ifnet		*ifp = adapter->ifp;
   1796 #ifdef LRO
   1797 	struct lro_ctrl		*lro = &rxr->lro;
   1798 #endif /* LRO */
   1799 	union ixgbe_adv_rx_desc	*cur;
   1800 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1801 	int			i, nextp, processed = 0;
   1802 	u32			staterr = 0;
   1803 	u32			count = adapter->rx_process_limit;
   1804 #ifdef RSS
   1805 	u16			pkt_info;
   1806 #endif
   1807 
   1808 	IXGBE_RX_LOCK(rxr);
   1809 
   1810 #ifdef DEV_NETMAP
   1811 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1812 		/* Same as the txeof routine: wakeup clients on intr. */
   1813 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1814 			IXGBE_RX_UNLOCK(rxr);
   1815 			return (FALSE);
   1816 		}
   1817 	}
   1818 #endif /* DEV_NETMAP */
   1819 
   1820 	for (i = rxr->next_to_check; count != 0;) {
   1821 		struct mbuf *sendmp, *mp;
   1822 		u32         rsc, ptype;
   1823 		u16         len;
   1824 		u16         vtag = 0;
   1825 		bool        eop;
   1826 
   1827 		/* Sync the ring. */
   1828 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1829 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1830 
   1831 		cur = &rxr->rx_base[i];
   1832 		staterr = le32toh(cur->wb.upper.status_error);
   1833 #ifdef RSS
   1834 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1835 #endif
   1836 
   1837 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1838 			break;
   1839 
   1840 		count--;
   1841 		sendmp = NULL;
   1842 		nbuf = NULL;
   1843 		rsc = 0;
   1844 		cur->wb.upper.status_error = 0;
   1845 		rbuf = &rxr->rx_buffers[i];
   1846 		mp = rbuf->buf;
   1847 
   1848 		len = le16toh(cur->wb.upper.length);
   1849 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1850 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1851 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1852 
   1853 		/* Make sure bad packets are discarded */
   1854 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1855 #if __FreeBSD_version >= 1100036
   1856 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1857 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1858 #endif
   1859 			rxr->rx_discarded.ev_count++;
   1860 			ixgbe_rx_discard(rxr, i);
   1861 			goto next_desc;
   1862 		}
   1863 
   1864 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1865 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1866 
   1867 		/*
   1868 		 * On 82599 which supports a hardware
   1869 		 * LRO (called HW RSC), packets need
   1870 		 * not be fragmented across sequential
   1871 		 * descriptors, rather the next descriptor
   1872 		 * is indicated in bits of the descriptor.
   1873 		 * This also means that we might proceses
   1874 		 * more than one packet at a time, something
   1875 		 * that has never been true before, it
   1876 		 * required eliminating global chain pointers
   1877 		 * in favor of what we are doing here.  -jfv
   1878 		 */
   1879 		if (!eop) {
   1880 			/*
   1881 			 * Figure out the next descriptor
   1882 			 * of this frame.
   1883 			 */
   1884 			if (rxr->hw_rsc == TRUE) {
   1885 				rsc = ixgbe_rsc_count(cur);
   1886 				rxr->rsc_num += (rsc - 1);
   1887 			}
   1888 			if (rsc) { /* Get hardware index */
   1889 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1890 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1891 			} else { /* Just sequential */
   1892 				nextp = i + 1;
   1893 				if (nextp == adapter->num_rx_desc)
   1894 					nextp = 0;
   1895 			}
   1896 			nbuf = &rxr->rx_buffers[nextp];
   1897 			prefetch(nbuf);
   1898 		}
   1899 		/*
   1900 		 * Rather than using the fmp/lmp global pointers
   1901 		 * we now keep the head of a packet chain in the
   1902 		 * buffer struct and pass this along from one
   1903 		 * descriptor to the next, until we get EOP.
   1904 		 */
   1905 		mp->m_len = len;
   1906 		/*
   1907 		 * See if there is a stored head
   1908 		 * that determines what we are
   1909 		 */
   1910 		sendmp = rbuf->fmp;
   1911 		if (sendmp != NULL) {  /* secondary frag */
   1912 			rbuf->buf = rbuf->fmp = NULL;
   1913 			mp->m_flags &= ~M_PKTHDR;
   1914 			sendmp->m_pkthdr.len += mp->m_len;
   1915 		} else {
   1916 			/*
   1917 			 * Optimize.  This might be a small packet,
   1918 			 * maybe just a TCP ACK.  Do a fast copy that
   1919 			 * is cache aligned into a new mbuf, and
   1920 			 * leave the old mbuf+cluster for re-use.
   1921 			 */
   1922 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1923 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1924 				if (sendmp != NULL) {
   1925 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1926 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1927 					    len);
   1928 					sendmp->m_len = len;
   1929 					rxr->rx_copies.ev_count++;
   1930 					rbuf->flags |= IXGBE_RX_COPY;
   1931 				}
   1932 			}
   1933 			if (sendmp == NULL) {
   1934 				rbuf->buf = rbuf->fmp = NULL;
   1935 				sendmp = mp;
   1936 			}
   1937 
   1938 			/* first desc of a non-ps chain */
   1939 			sendmp->m_flags |= M_PKTHDR;
   1940 			sendmp->m_pkthdr.len = mp->m_len;
   1941 		}
   1942 		++processed;
   1943 
   1944 		/* Pass the head pointer on */
   1945 		if (eop == 0) {
   1946 			nbuf->fmp = sendmp;
   1947 			sendmp = NULL;
   1948 			mp->m_next = nbuf->buf;
   1949 		} else { /* Sending this frame */
   1950 			m_set_rcvif(sendmp, ifp);
   1951 			++rxr->packets;
   1952 			rxr->rx_packets.ev_count++;
   1953 			/* capture data for AIM */
   1954 			rxr->bytes += sendmp->m_pkthdr.len;
   1955 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1956 			/* Process vlan info */
   1957 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   1958 				vtag = le16toh(cur->wb.upper.vlan);
   1959 			if (vtag) {
   1960 				vlan_set_tag(sendmp, vtag);
   1961 			}
   1962 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1963 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1964 				   &adapter->stats.pf);
   1965 			}
   1966 
   1967 #if 0 /* FreeBSD */
   1968 			/*
   1969 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   1970 			 * and never cleared. This means we have RSS hash
   1971 			 * available to be used.
   1972 			 */
   1973 			if (adapter->num_queues > 1) {
   1974 				sendmp->m_pkthdr.flowid =
   1975 				    le32toh(cur->wb.lower.hi_dword.rss);
   1976 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1977 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   1978 					M_HASHTYPE_SET(sendmp,
   1979 					    M_HASHTYPE_RSS_IPV4);
   1980 					break;
   1981 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1982 					M_HASHTYPE_SET(sendmp,
   1983 					    M_HASHTYPE_RSS_TCP_IPV4);
   1984 					break;
   1985 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   1986 					M_HASHTYPE_SET(sendmp,
   1987 					    M_HASHTYPE_RSS_IPV6);
   1988 					break;
   1989 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   1990 					M_HASHTYPE_SET(sendmp,
   1991 					    M_HASHTYPE_RSS_TCP_IPV6);
   1992 					break;
   1993 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   1994 					M_HASHTYPE_SET(sendmp,
   1995 					    M_HASHTYPE_RSS_IPV6_EX);
   1996 					break;
   1997 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   1998 					M_HASHTYPE_SET(sendmp,
   1999 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2000 					break;
   2001 #if __FreeBSD_version > 1100000
   2002 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2003 					M_HASHTYPE_SET(sendmp,
   2004 					    M_HASHTYPE_RSS_UDP_IPV4);
   2005 					break;
   2006 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2007 					M_HASHTYPE_SET(sendmp,
   2008 					    M_HASHTYPE_RSS_UDP_IPV6);
   2009 					break;
   2010 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2011 					M_HASHTYPE_SET(sendmp,
   2012 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2013 					break;
   2014 #endif
   2015 				default:
   2016 					M_HASHTYPE_SET(sendmp,
   2017 					    M_HASHTYPE_OPAQUE_HASH);
   2018 				}
   2019 			} else {
   2020 				sendmp->m_pkthdr.flowid = que->msix;
   2021 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2022 			}
   2023 #endif
   2024 		}
   2025 next_desc:
   2026 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2027 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2028 
   2029 		/* Advance our pointers to the next descriptor. */
   2030 		if (++i == rxr->num_desc)
   2031 			i = 0;
   2032 
   2033 		/* Now send to the stack or do LRO */
   2034 		if (sendmp != NULL) {
   2035 			rxr->next_to_check = i;
   2036 			IXGBE_RX_UNLOCK(rxr);
   2037 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2038 			IXGBE_RX_LOCK(rxr);
   2039 			i = rxr->next_to_check;
   2040 		}
   2041 
   2042 		/* Every 8 descriptors we go to refresh mbufs */
   2043 		if (processed == 8) {
   2044 			ixgbe_refresh_mbufs(rxr, i);
   2045 			processed = 0;
   2046 		}
   2047 	}
   2048 
   2049 	/* Refresh any remaining buf structs */
   2050 	if (ixgbe_rx_unrefreshed(rxr))
   2051 		ixgbe_refresh_mbufs(rxr, i);
   2052 
   2053 	rxr->next_to_check = i;
   2054 
   2055 	IXGBE_RX_UNLOCK(rxr);
   2056 
   2057 #ifdef LRO
   2058 	/*
   2059 	 * Flush any outstanding LRO work
   2060 	 */
   2061 	tcp_lro_flush_all(lro);
   2062 #endif /* LRO */
   2063 
   2064 	/*
   2065 	 * Still have cleaning to do?
   2066 	 */
   2067 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2068 		return (TRUE);
   2069 
   2070 	return (FALSE);
   2071 } /* ixgbe_rxeof */
   2072 
   2073 
   2074 /************************************************************************
   2075  * ixgbe_rx_checksum
   2076  *
   2077  *   Verify that the hardware indicated that the checksum is valid.
   2078  *   Inform the stack about the status of checksum so that stack
   2079  *   doesn't spend time verifying the checksum.
   2080  ************************************************************************/
   2081 static void
   2082 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2083     struct ixgbe_hw_stats *stats)
   2084 {
   2085 	u16  status = (u16)staterr;
   2086 	u8   errors = (u8)(staterr >> 24);
   2087 #if 0
   2088 	bool sctp = false;
   2089 
   2090 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2091 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2092 		sctp = true;
   2093 #endif
   2094 
   2095 	/* IPv4 checksum */
   2096 	if (status & IXGBE_RXD_STAT_IPCS) {
   2097 		stats->ipcs.ev_count++;
   2098 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2099 			/* IP Checksum Good */
   2100 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2101 		} else {
   2102 			stats->ipcs_bad.ev_count++;
   2103 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2104 		}
   2105 	}
   2106 	/* TCP/UDP/SCTP checksum */
   2107 	if (status & IXGBE_RXD_STAT_L4CS) {
   2108 		stats->l4cs.ev_count++;
   2109 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2110 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2111 			mp->m_pkthdr.csum_flags |= type;
   2112 		} else {
   2113 			stats->l4cs_bad.ev_count++;
   2114 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2115 		}
   2116 	}
   2117 } /* ixgbe_rx_checksum */
   2118 
   2119 /************************************************************************
   2120  * ixgbe_dma_malloc
   2121  ************************************************************************/
   2122 int
   2123 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2124 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2125 {
   2126 	device_t dev = adapter->dev;
   2127 	int      r, rsegs;
   2128 
   2129 	r = ixgbe_dma_tag_create(
   2130 	     /*      parent */ adapter->osdep.dmat,
   2131 	     /*   alignment */ DBA_ALIGN,
   2132 	     /*      bounds */ 0,
   2133 	     /*     maxsize */ size,
   2134 	     /*   nsegments */ 1,
   2135 	     /*  maxsegsize */ size,
   2136 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2137 			       &dma->dma_tag);
   2138 	if (r != 0) {
   2139 		aprint_error_dev(dev,
   2140 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2141 		    r);
   2142 		goto fail_0;
   2143 	}
   2144 
   2145 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2146 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2147 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2148 	if (r != 0) {
   2149 		aprint_error_dev(dev,
   2150 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2151 		goto fail_1;
   2152 	}
   2153 
   2154 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2155 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2156 	if (r != 0) {
   2157 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2158 		    __func__, r);
   2159 		goto fail_2;
   2160 	}
   2161 
   2162 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2163 	if (r != 0) {
   2164 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2165 		    __func__, r);
   2166 		goto fail_3;
   2167 	}
   2168 
   2169 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2170 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2171 	if (r != 0) {
   2172 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2173 		    __func__, r);
   2174 		goto fail_4;
   2175 	}
   2176 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2177 	dma->dma_size = size;
   2178 	return 0;
   2179 fail_4:
   2180 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2181 fail_3:
   2182 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2183 fail_2:
   2184 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2185 fail_1:
   2186 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2187 fail_0:
   2188 
   2189 	return (r);
   2190 } /* ixgbe_dma_malloc */
   2191 
   2192 /************************************************************************
   2193  * ixgbe_dma_free
   2194  ************************************************************************/
   2195 void
   2196 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2197 {
   2198 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2199 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2200 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2201 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2202 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2203 } /* ixgbe_dma_free */
   2204 
   2205 
   2206 /************************************************************************
   2207  * ixgbe_allocate_queues
   2208  *
   2209  *   Allocate memory for the transmit and receive rings, and then
   2210  *   the descriptors associated with each, called only once at attach.
   2211  ************************************************************************/
   2212 int
   2213 ixgbe_allocate_queues(struct adapter *adapter)
   2214 {
   2215 	device_t	dev = adapter->dev;
   2216 	struct ix_queue	*que;
   2217 	struct tx_ring	*txr;
   2218 	struct rx_ring	*rxr;
   2219 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2220 	int             txconf = 0, rxconf = 0;
   2221 
   2222 	/* First, allocate the top level queue structs */
   2223 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2224             adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2225 
   2226 	/* Second, allocate the TX ring struct memory */
   2227 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
   2228 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2229 
   2230 	/* Third, allocate the RX ring */
   2231 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2232 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2233 
   2234 	/* For the ring itself */
   2235 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2236 	    DBA_ALIGN);
   2237 
   2238 	/*
   2239 	 * Now set up the TX queues, txconf is needed to handle the
   2240 	 * possibility that things fail midcourse and we need to
   2241 	 * undo memory gracefully
   2242 	 */
   2243 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2244 		/* Set up some basics */
   2245 		txr = &adapter->tx_rings[i];
   2246 		txr->adapter = adapter;
   2247 		txr->txr_interq = NULL;
   2248 		/* In case SR-IOV is enabled, align the index properly */
   2249 #ifdef PCI_IOV
   2250 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2251 		    i);
   2252 #else
   2253 		txr->me = i;
   2254 #endif
   2255 		txr->num_desc = adapter->num_tx_desc;
   2256 
   2257 		/* Initialize the TX side lock */
   2258 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2259 
   2260 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2261 		    BUS_DMA_NOWAIT)) {
   2262 			aprint_error_dev(dev,
   2263 			    "Unable to allocate TX Descriptor memory\n");
   2264 			error = ENOMEM;
   2265 			goto err_tx_desc;
   2266 		}
   2267 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2268 		bzero((void *)txr->tx_base, tsize);
   2269 
   2270 		/* Now allocate transmit buffers for the ring */
   2271 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2272 			aprint_error_dev(dev,
   2273 			    "Critical Failure setting up transmit buffers\n");
   2274 			error = ENOMEM;
   2275 			goto err_tx_desc;
   2276         	}
   2277 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2278 			/* Allocate a buf ring */
   2279 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2280 			if (txr->txr_interq == NULL) {
   2281 				aprint_error_dev(dev,
   2282 				    "Critical Failure setting up buf ring\n");
   2283 				error = ENOMEM;
   2284 				goto err_tx_desc;
   2285 			}
   2286 		}
   2287 	}
   2288 
   2289 	/*
   2290 	 * Next the RX queues...
   2291 	 */
   2292 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2293 	    DBA_ALIGN);
   2294 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2295 		rxr = &adapter->rx_rings[i];
   2296 		/* Set up some basics */
   2297 		rxr->adapter = adapter;
   2298 #ifdef PCI_IOV
   2299 		/* In case SR-IOV is enabled, align the index properly */
   2300 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2301 		    i);
   2302 #else
   2303 		rxr->me = i;
   2304 #endif
   2305 		rxr->num_desc = adapter->num_rx_desc;
   2306 
   2307 		/* Initialize the RX side lock */
   2308 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2309 
   2310 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2311 		    BUS_DMA_NOWAIT)) {
   2312 			aprint_error_dev(dev,
   2313 			    "Unable to allocate RxDescriptor memory\n");
   2314 			error = ENOMEM;
   2315 			goto err_rx_desc;
   2316 		}
   2317 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2318 		bzero((void *)rxr->rx_base, rsize);
   2319 
   2320 		/* Allocate receive buffers for the ring */
   2321 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2322 			aprint_error_dev(dev,
   2323 			    "Critical Failure setting up receive buffers\n");
   2324 			error = ENOMEM;
   2325 			goto err_rx_desc;
   2326 		}
   2327 	}
   2328 
   2329 	/*
   2330 	 * Finally set up the queue holding structs
   2331 	 */
   2332 	for (int i = 0; i < adapter->num_queues; i++) {
   2333 		que = &adapter->queues[i];
   2334 		que->adapter = adapter;
   2335 		que->me = i;
   2336 		que->txr = &adapter->tx_rings[i];
   2337 		que->rxr = &adapter->rx_rings[i];
   2338 
   2339 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2340 		que->disabled_count = 0;
   2341 	}
   2342 
   2343 	return (0);
   2344 
   2345 err_rx_desc:
   2346 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2347 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2348 err_tx_desc:
   2349 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2350 		ixgbe_dma_free(adapter, &txr->txdma);
   2351 	free(adapter->rx_rings, M_DEVBUF);
   2352 	free(adapter->tx_rings, M_DEVBUF);
   2353 	free(adapter->queues, M_DEVBUF);
   2354 	return (error);
   2355 } /* ixgbe_allocate_queues */
   2356 
   2357 /************************************************************************
   2358  * ixgbe_free_queues
   2359  *
   2360  *   Free descriptors for the transmit and receive rings, and then
   2361  *   the memory associated with each.
   2362  ************************************************************************/
   2363 void
   2364 ixgbe_free_queues(struct adapter *adapter)
   2365 {
   2366 	struct ix_queue *que;
   2367 	int i;
   2368 
   2369 	ixgbe_free_transmit_structures(adapter);
   2370 	ixgbe_free_receive_structures(adapter);
   2371 	for (i = 0; i < adapter->num_queues; i++) {
   2372 		que = &adapter->queues[i];
   2373 		mutex_destroy(&que->dc_mtx);
   2374 	}
   2375 	free(adapter->queues, M_DEVBUF);
   2376 } /* ixgbe_free_queues */
   2377