Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.44
      1 /* $NetBSD: ix_txrx.c,v 1.44 2018/05/16 08:08:24 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include "opt_inet.h"
     67 #include "opt_inet6.h"
     68 
     69 #include "ixgbe.h"
     70 
     71 /*
     72  * HW RSC control:
     73  *  this feature only works with
     74  *  IPv4, and only on 82599 and later.
     75  *  Also this will cause IP forwarding to
     76  *  fail and that can't be controlled by
     77  *  the stack as LRO can. For all these
     78  *  reasons I've deemed it best to leave
     79  *  this off and not bother with a tuneable
     80  *  interface, this would need to be compiled
     81  *  to enable.
     82  */
     83 static bool ixgbe_rsc_enable = FALSE;
     84 
     85 /*
     86  * For Flow Director: this is the
     87  * number of TX packets we sample
     88  * for the filter pool, this means
     89  * every 20th packet will be probed.
     90  *
     91  * This feature can be disabled by
     92  * setting this to 0.
     93  */
     94 static int atr_sample_rate = 20;
     95 
     96 /************************************************************************
     97  *  Local Function prototypes
     98  ************************************************************************/
     99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    104                                        struct ixgbe_hw_stats *);
    105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    109                                         struct mbuf *, u32 *, u32 *);
    110 static int           ixgbe_tso_setup(struct tx_ring *,
    111                                      struct mbuf *, u32 *, u32 *);
    112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    114                                     struct mbuf *, u32);
    115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    116                                       struct ixgbe_dma_alloc *, int);
    117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    118 
    119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    120 
    121 /************************************************************************
    122  * ixgbe_legacy_start_locked - Transmit entry point
    123  *
    124  *   Called by the stack to initiate a transmit.
    125  *   The driver will remain in this routine as long as there are
    126  *   packets to transmit and transmit resources are available.
    127  *   In case resources are not available, the stack is notified
    128  *   and the packet is requeued.
    129  ************************************************************************/
    130 int
    131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    132 {
    133 	struct mbuf    *m_head;
    134 	struct adapter *adapter = txr->adapter;
    135 	int enqueued = 0;
    136 	int rc;
    137 
    138 	IXGBE_TX_LOCK_ASSERT(txr);
    139 
    140 	if (!adapter->link_active) {
    141 		/*
    142 		 * discard all packets buffered in IFQ to avoid
    143 		 * sending old packets at next link up timing.
    144 		 */
    145 		ixgbe_drain(ifp, txr);
    146 		return (ENETDOWN);
    147 	}
    148 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    149 		return (ENETDOWN);
    150 
    151 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    152 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    153 			break;
    154 
    155 		IFQ_POLL(&ifp->if_snd, m_head);
    156 		if (m_head == NULL)
    157 			break;
    158 
    159 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    160 			break;
    161 		}
    162 		enqueued++;
    163 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    164 		if (rc != 0) {
    165 			m_freem(m_head);
    166 			continue;
    167 		}
    168 
    169 		/* Send a copy of the frame to the BPF listener */
    170 		bpf_mtap(ifp, m_head);
    171 	}
    172 
    173 	if (enqueued) {
    174 		txr->lastsent = time_uptime;
    175 		txr->sending = true;
    176 	}
    177 
    178 	return IXGBE_SUCCESS;
    179 } /* ixgbe_legacy_start_locked */
    180 
    181 /************************************************************************
    182  * ixgbe_legacy_start
    183  *
    184  *   Called by the stack, this always uses the first tx ring,
    185  *   and should not be used with multiqueue tx enabled.
    186  ************************************************************************/
    187 void
    188 ixgbe_legacy_start(struct ifnet *ifp)
    189 {
    190 	struct adapter *adapter = ifp->if_softc;
    191 	struct tx_ring *txr = adapter->tx_rings;
    192 
    193 	if (ifp->if_flags & IFF_RUNNING) {
    194 		IXGBE_TX_LOCK(txr);
    195 		ixgbe_legacy_start_locked(ifp, txr);
    196 		IXGBE_TX_UNLOCK(txr);
    197 	}
    198 } /* ixgbe_legacy_start */
    199 
    200 /************************************************************************
    201  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    202  *
    203  *   (if_transmit function)
    204  ************************************************************************/
    205 int
    206 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    207 {
    208 	struct adapter	*adapter = ifp->if_softc;
    209 	struct tx_ring	*txr;
    210 	int 		i, err = 0;
    211 #ifdef RSS
    212 	uint32_t bucket_id;
    213 #endif
    214 
    215 	/*
    216 	 * When doing RSS, map it to the same outbound queue
    217 	 * as the incoming flow would be mapped to.
    218 	 *
    219 	 * If everything is setup correctly, it should be the
    220 	 * same bucket that the current CPU we're on is.
    221 	 */
    222 #ifdef RSS
    223 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    224 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    225 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    226 		    &bucket_id) == 0)) {
    227 			i = bucket_id % adapter->num_queues;
    228 #ifdef IXGBE_DEBUG
    229 			if (bucket_id > adapter->num_queues)
    230 				if_printf(ifp,
    231 				    "bucket_id (%d) > num_queues (%d)\n",
    232 				    bucket_id, adapter->num_queues);
    233 #endif
    234 		} else
    235 			i = m->m_pkthdr.flowid % adapter->num_queues;
    236 	} else
    237 #endif /* 0 */
    238 		i = cpu_index(curcpu()) % adapter->num_queues;
    239 
    240 	/* Check for a hung queue and pick alternative */
    241 	if (((1 << i) & adapter->active_queues) == 0)
    242 		i = ffs64(adapter->active_queues);
    243 
    244 	txr = &adapter->tx_rings[i];
    245 
    246 	err = pcq_put(txr->txr_interq, m);
    247 	if (err == false) {
    248 		m_freem(m);
    249 		txr->pcq_drops.ev_count++;
    250 		return (err);
    251 	}
    252 	if (IXGBE_TX_TRYLOCK(txr)) {
    253 		ixgbe_mq_start_locked(ifp, txr);
    254 		IXGBE_TX_UNLOCK(txr);
    255 	} else {
    256 		if (adapter->txrx_use_workqueue) {
    257 			u_int *enqueued;
    258 
    259 			/*
    260 			 * This function itself is not called in interrupt
    261 			 * context, however it can be called in fast softint
    262 			 * context right after receiving forwarding packets.
    263 			 * So, it is required to protect workqueue from twice
    264 			 * enqueuing when the machine uses both spontaneous
    265 			 * packets and forwarding packets.
    266 			 */
    267 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    268 			if (*enqueued == 0) {
    269 				*enqueued = 1;
    270 				percpu_putref(adapter->txr_wq_enqueued);
    271 				workqueue_enqueue(adapter->txr_wq,
    272 				    &txr->wq_cookie, curcpu());
    273 			} else
    274 				percpu_putref(adapter->txr_wq_enqueued);
    275 		} else
    276 			softint_schedule(txr->txr_si);
    277 	}
    278 
    279 	return (0);
    280 } /* ixgbe_mq_start */
    281 
    282 /************************************************************************
    283  * ixgbe_mq_start_locked
    284  ************************************************************************/
    285 int
    286 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    287 {
    288 	struct mbuf    *next;
    289 	int            enqueued = 0, err = 0;
    290 
    291 	if (!txr->adapter->link_active) {
    292 		/*
    293 		 * discard all packets buffered in txr_interq to avoid
    294 		 * sending old packets at next link up timing.
    295 		 */
    296 		ixgbe_drain(ifp, txr);
    297 		return (ENETDOWN);
    298 	}
    299 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    300 		return (ENETDOWN);
    301 
    302 	/* Process the queue */
    303 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    304 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    305 			m_freem(next);
    306 			/* All errors are counted in ixgbe_xmit() */
    307 			break;
    308 		}
    309 		enqueued++;
    310 #if __FreeBSD_version >= 1100036
    311 		/*
    312 		 * Since we're looking at the tx ring, we can check
    313 		 * to see if we're a VF by examing our tail register
    314 		 * address.
    315 		 */
    316 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    317 		    (next->m_flags & M_MCAST))
    318 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    319 #endif
    320 		/* Send a copy of the frame to the BPF listener */
    321 		bpf_mtap(ifp, next);
    322 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    323 			break;
    324 	}
    325 
    326 	if (enqueued) {
    327 		txr->lastsent = time_uptime;
    328 		txr->sending = true;
    329 	}
    330 
    331 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    332 		ixgbe_txeof(txr);
    333 
    334 	return (err);
    335 } /* ixgbe_mq_start_locked */
    336 
    337 /************************************************************************
    338  * ixgbe_deferred_mq_start
    339  *
    340  *   Called from a softint and workqueue (indirectly) to drain queued
    341  *   transmit packets.
    342  ************************************************************************/
    343 void
    344 ixgbe_deferred_mq_start(void *arg)
    345 {
    346 	struct tx_ring *txr = arg;
    347 	struct adapter *adapter = txr->adapter;
    348 	struct ifnet   *ifp = adapter->ifp;
    349 
    350 	IXGBE_TX_LOCK(txr);
    351 	if (pcq_peek(txr->txr_interq) != NULL)
    352 		ixgbe_mq_start_locked(ifp, txr);
    353 	IXGBE_TX_UNLOCK(txr);
    354 } /* ixgbe_deferred_mq_start */
    355 
    356 /************************************************************************
    357  * ixgbe_deferred_mq_start_work
    358  *
    359  *   Called from a workqueue to drain queued transmit packets.
    360  ************************************************************************/
    361 void
    362 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    363 {
    364 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    365 	struct adapter *adapter = txr->adapter;
    366 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    367 	*enqueued = 0;
    368 	percpu_putref(adapter->txr_wq_enqueued);
    369 
    370 	ixgbe_deferred_mq_start(txr);
    371 } /* ixgbe_deferred_mq_start */
    372 
    373 /************************************************************************
    374  * ixgbe_drain_all
    375  ************************************************************************/
    376 void
    377 ixgbe_drain_all(struct adapter *adapter)
    378 {
    379 	struct ifnet *ifp = adapter->ifp;
    380 	struct ix_queue *que = adapter->queues;
    381 
    382 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    383 		struct tx_ring  *txr = que->txr;
    384 
    385 		IXGBE_TX_LOCK(txr);
    386 		ixgbe_drain(ifp, txr);
    387 		IXGBE_TX_UNLOCK(txr);
    388 	}
    389 }
    390 
    391 /************************************************************************
    392  * ixgbe_xmit
    393  *
    394  *   Maps the mbufs to tx descriptors, allowing the
    395  *   TX engine to transmit the packets.
    396  *
    397  *   Return 0 on success, positive on failure
    398  ************************************************************************/
    399 static int
    400 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    401 {
    402 	struct adapter          *adapter = txr->adapter;
    403 	struct ixgbe_tx_buf     *txbuf;
    404 	union ixgbe_adv_tx_desc *txd = NULL;
    405 	struct ifnet	        *ifp = adapter->ifp;
    406 	int                     i, j, error;
    407 	int                     first;
    408 	u32                     olinfo_status = 0, cmd_type_len;
    409 	bool                    remap = TRUE;
    410 	bus_dmamap_t            map;
    411 
    412 	/* Basic descriptor defines */
    413 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    414 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    415 
    416 	if (vlan_has_tag(m_head))
    417 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    418 
    419 	/*
    420 	 * Important to capture the first descriptor
    421 	 * used because it will contain the index of
    422 	 * the one we tell the hardware to report back
    423 	 */
    424 	first = txr->next_avail_desc;
    425 	txbuf = &txr->tx_buffers[first];
    426 	map = txbuf->map;
    427 
    428 	/*
    429 	 * Map the packet for DMA.
    430 	 */
    431 retry:
    432 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    433 	    BUS_DMA_NOWAIT);
    434 
    435 	if (__predict_false(error)) {
    436 		struct mbuf *m;
    437 
    438 		switch (error) {
    439 		case EAGAIN:
    440 			txr->q_eagain_tx_dma_setup++;
    441 			return EAGAIN;
    442 		case ENOMEM:
    443 			txr->q_enomem_tx_dma_setup++;
    444 			return EAGAIN;
    445 		case EFBIG:
    446 			/* Try it again? - one try */
    447 			if (remap == TRUE) {
    448 				remap = FALSE;
    449 				/*
    450 				 * XXX: m_defrag will choke on
    451 				 * non-MCLBYTES-sized clusters
    452 				 */
    453 				txr->q_efbig_tx_dma_setup++;
    454 				m = m_defrag(m_head, M_NOWAIT);
    455 				if (m == NULL) {
    456 					txr->q_mbuf_defrag_failed++;
    457 					return ENOBUFS;
    458 				}
    459 				m_head = m;
    460 				goto retry;
    461 			} else {
    462 				txr->q_efbig2_tx_dma_setup++;
    463 				return error;
    464 			}
    465 		case EINVAL:
    466 			txr->q_einval_tx_dma_setup++;
    467 			return error;
    468 		default:
    469 			txr->q_other_tx_dma_setup++;
    470 			return error;
    471 		}
    472 	}
    473 
    474 	/* Make certain there are enough descriptors */
    475 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    476 		txr->no_desc_avail.ev_count++;
    477 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    478 		return EAGAIN;
    479 	}
    480 
    481 	/*
    482 	 * Set up the appropriate offload context
    483 	 * this will consume the first descriptor
    484 	 */
    485 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    486 	if (__predict_false(error)) {
    487 		return (error);
    488 	}
    489 
    490 	/* Do the flow director magic */
    491 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    492 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    493 		++txr->atr_count;
    494 		if (txr->atr_count >= atr_sample_rate) {
    495 			ixgbe_atr(txr, m_head);
    496 			txr->atr_count = 0;
    497 		}
    498 	}
    499 
    500 	olinfo_status |= IXGBE_ADVTXD_CC;
    501 	i = txr->next_avail_desc;
    502 	for (j = 0; j < map->dm_nsegs; j++) {
    503 		bus_size_t seglen;
    504 		bus_addr_t segaddr;
    505 
    506 		txbuf = &txr->tx_buffers[i];
    507 		txd = &txr->tx_base[i];
    508 		seglen = map->dm_segs[j].ds_len;
    509 		segaddr = htole64(map->dm_segs[j].ds_addr);
    510 
    511 		txd->read.buffer_addr = segaddr;
    512 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    513 		txd->read.olinfo_status = htole32(olinfo_status);
    514 
    515 		if (++i == txr->num_desc)
    516 			i = 0;
    517 	}
    518 
    519 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    520 	txr->tx_avail -= map->dm_nsegs;
    521 	txr->next_avail_desc = i;
    522 
    523 	txbuf->m_head = m_head;
    524 	/*
    525 	 * Here we swap the map so the last descriptor,
    526 	 * which gets the completion interrupt has the
    527 	 * real map, and the first descriptor gets the
    528 	 * unused map from this descriptor.
    529 	 */
    530 	txr->tx_buffers[first].map = txbuf->map;
    531 	txbuf->map = map;
    532 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    533 	    BUS_DMASYNC_PREWRITE);
    534 
    535 	/* Set the EOP descriptor that will be marked done */
    536 	txbuf = &txr->tx_buffers[first];
    537 	txbuf->eop = txd;
    538 
    539 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    540 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    541 	/*
    542 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    543 	 * hardware that this frame is available to transmit.
    544 	 */
    545 	++txr->total_packets.ev_count;
    546 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    547 
    548 	/*
    549 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
    550 	 */
    551 	ifp->if_obytes += m_head->m_pkthdr.len;
    552 	if (m_head->m_flags & M_MCAST)
    553 		ifp->if_omcasts++;
    554 
    555 	return (0);
    556 } /* ixgbe_xmit */
    557 
    558 /************************************************************************
    559  * ixgbe_drain
    560  ************************************************************************/
    561 static void
    562 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    563 {
    564 	struct mbuf *m;
    565 
    566 	IXGBE_TX_LOCK_ASSERT(txr);
    567 
    568 	if (txr->me == 0) {
    569 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    570 			IFQ_DEQUEUE(&ifp->if_snd, m);
    571 			m_freem(m);
    572 			IF_DROP(&ifp->if_snd);
    573 		}
    574 	}
    575 
    576 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    577 		m_freem(m);
    578 		txr->pcq_drops.ev_count++;
    579 	}
    580 }
    581 
    582 /************************************************************************
    583  * ixgbe_allocate_transmit_buffers
    584  *
    585  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    586  *   the information needed to transmit a packet on the wire. This is
    587  *   called only once at attach, setup is done every reset.
    588  ************************************************************************/
    589 static int
    590 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    591 {
    592 	struct adapter      *adapter = txr->adapter;
    593 	device_t            dev = adapter->dev;
    594 	struct ixgbe_tx_buf *txbuf;
    595 	int                 error, i;
    596 
    597 	/*
    598 	 * Setup DMA descriptor areas.
    599 	 */
    600 	error = ixgbe_dma_tag_create(
    601 	         /*      parent */ adapter->osdep.dmat,
    602 	         /*   alignment */ 1,
    603 	         /*      bounds */ 0,
    604 	         /*     maxsize */ IXGBE_TSO_SIZE,
    605 	         /*   nsegments */ adapter->num_segs,
    606 	         /*  maxsegsize */ PAGE_SIZE,
    607 	         /*       flags */ 0,
    608 	                           &txr->txtag);
    609 	if (error != 0) {
    610 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    611 		goto fail;
    612 	}
    613 
    614 	txr->tx_buffers =
    615 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    616 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
    617 	if (txr->tx_buffers == NULL) {
    618 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    619 		error = ENOMEM;
    620 		goto fail;
    621 	}
    622 
    623 	/* Create the descriptor buffer dma maps */
    624 	txbuf = txr->tx_buffers;
    625 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    626 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    627 		if (error != 0) {
    628 			aprint_error_dev(dev,
    629 			    "Unable to create TX DMA map (%d)\n", error);
    630 			goto fail;
    631 		}
    632 	}
    633 
    634 	return 0;
    635 fail:
    636 	/* We free all, it handles case where we are in the middle */
    637 #if 0 /* XXX was FreeBSD */
    638 	ixgbe_free_transmit_structures(adapter);
    639 #else
    640 	ixgbe_free_transmit_buffers(txr);
    641 #endif
    642 	return (error);
    643 } /* ixgbe_allocate_transmit_buffers */
    644 
    645 /************************************************************************
    646  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    647  ************************************************************************/
    648 static void
    649 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    650 {
    651 	struct adapter        *adapter = txr->adapter;
    652 	struct ixgbe_tx_buf   *txbuf;
    653 #ifdef DEV_NETMAP
    654 	struct netmap_adapter *na = NA(adapter->ifp);
    655 	struct netmap_slot    *slot;
    656 #endif /* DEV_NETMAP */
    657 
    658 	/* Clear the old ring contents */
    659 	IXGBE_TX_LOCK(txr);
    660 
    661 #ifdef DEV_NETMAP
    662 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    663 		/*
    664 		 * (under lock): if in netmap mode, do some consistency
    665 		 * checks and set slot to entry 0 of the netmap ring.
    666 		 */
    667 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    668 	}
    669 #endif /* DEV_NETMAP */
    670 
    671 	bzero((void *)txr->tx_base,
    672 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    673 	/* Reset indices */
    674 	txr->next_avail_desc = 0;
    675 	txr->next_to_clean = 0;
    676 
    677 	/* Free any existing tx buffers. */
    678 	txbuf = txr->tx_buffers;
    679 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    680 		txr->sending = false;
    681 		if (txbuf->m_head != NULL) {
    682 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    683 			    0, txbuf->m_head->m_pkthdr.len,
    684 			    BUS_DMASYNC_POSTWRITE);
    685 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    686 			m_freem(txbuf->m_head);
    687 			txbuf->m_head = NULL;
    688 		}
    689 
    690 #ifdef DEV_NETMAP
    691 		/*
    692 		 * In netmap mode, set the map for the packet buffer.
    693 		 * NOTE: Some drivers (not this one) also need to set
    694 		 * the physical buffer address in the NIC ring.
    695 		 * Slots in the netmap ring (indexed by "si") are
    696 		 * kring->nkr_hwofs positions "ahead" wrt the
    697 		 * corresponding slot in the NIC ring. In some drivers
    698 		 * (not here) nkr_hwofs can be negative. Function
    699 		 * netmap_idx_n2k() handles wraparounds properly.
    700 		 */
    701 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    702 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    703 			netmap_load_map(na, txr->txtag,
    704 			    txbuf->map, NMB(na, slot + si));
    705 		}
    706 #endif /* DEV_NETMAP */
    707 
    708 		/* Clear the EOP descriptor pointer */
    709 		txbuf->eop = NULL;
    710 	}
    711 
    712 	/* Set the rate at which we sample packets */
    713 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    714 		txr->atr_sample = atr_sample_rate;
    715 
    716 	/* Set number of descriptors available */
    717 	txr->tx_avail = adapter->num_tx_desc;
    718 
    719 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    720 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    721 	IXGBE_TX_UNLOCK(txr);
    722 } /* ixgbe_setup_transmit_ring */
    723 
    724 /************************************************************************
    725  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    726  ************************************************************************/
    727 int
    728 ixgbe_setup_transmit_structures(struct adapter *adapter)
    729 {
    730 	struct tx_ring *txr = adapter->tx_rings;
    731 
    732 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    733 		ixgbe_setup_transmit_ring(txr);
    734 
    735 	return (0);
    736 } /* ixgbe_setup_transmit_structures */
    737 
    738 /************************************************************************
    739  * ixgbe_free_transmit_structures - Free all transmit rings.
    740  ************************************************************************/
    741 void
    742 ixgbe_free_transmit_structures(struct adapter *adapter)
    743 {
    744 	struct tx_ring *txr = adapter->tx_rings;
    745 
    746 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    747 		ixgbe_free_transmit_buffers(txr);
    748 		ixgbe_dma_free(adapter, &txr->txdma);
    749 		IXGBE_TX_LOCK_DESTROY(txr);
    750 	}
    751 	free(adapter->tx_rings, M_DEVBUF);
    752 } /* ixgbe_free_transmit_structures */
    753 
    754 /************************************************************************
    755  * ixgbe_free_transmit_buffers
    756  *
    757  *   Free transmit ring related data structures.
    758  ************************************************************************/
    759 static void
    760 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    761 {
    762 	struct adapter      *adapter = txr->adapter;
    763 	struct ixgbe_tx_buf *tx_buffer;
    764 	int                 i;
    765 
    766 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    767 
    768 	if (txr->tx_buffers == NULL)
    769 		return;
    770 
    771 	tx_buffer = txr->tx_buffers;
    772 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    773 		if (tx_buffer->m_head != NULL) {
    774 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    775 			    0, tx_buffer->m_head->m_pkthdr.len,
    776 			    BUS_DMASYNC_POSTWRITE);
    777 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    778 			m_freem(tx_buffer->m_head);
    779 			tx_buffer->m_head = NULL;
    780 			if (tx_buffer->map != NULL) {
    781 				ixgbe_dmamap_destroy(txr->txtag,
    782 				    tx_buffer->map);
    783 				tx_buffer->map = NULL;
    784 			}
    785 		} else if (tx_buffer->map != NULL) {
    786 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    787 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    788 			tx_buffer->map = NULL;
    789 		}
    790 	}
    791 	if (txr->txr_interq != NULL) {
    792 		struct mbuf *m;
    793 
    794 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    795 			m_freem(m);
    796 		pcq_destroy(txr->txr_interq);
    797 	}
    798 	if (txr->tx_buffers != NULL) {
    799 		free(txr->tx_buffers, M_DEVBUF);
    800 		txr->tx_buffers = NULL;
    801 	}
    802 	if (txr->txtag != NULL) {
    803 		ixgbe_dma_tag_destroy(txr->txtag);
    804 		txr->txtag = NULL;
    805 	}
    806 } /* ixgbe_free_transmit_buffers */
    807 
    808 /************************************************************************
    809  * ixgbe_tx_ctx_setup
    810  *
    811  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    812  ************************************************************************/
    813 static int
    814 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    815     u32 *cmd_type_len, u32 *olinfo_status)
    816 {
    817 	struct adapter                   *adapter = txr->adapter;
    818 	struct ixgbe_adv_tx_context_desc *TXD;
    819 	struct ether_vlan_header         *eh;
    820 #ifdef INET
    821 	struct ip                        *ip;
    822 #endif
    823 #ifdef INET6
    824 	struct ip6_hdr                   *ip6;
    825 #endif
    826 	int                              ehdrlen, ip_hlen = 0;
    827 	int                              offload = TRUE;
    828 	int                              ctxd = txr->next_avail_desc;
    829 	u32                              vlan_macip_lens = 0;
    830 	u32                              type_tucmd_mlhl = 0;
    831 	u16                              vtag = 0;
    832 	u16                              etype;
    833 	u8                               ipproto = 0;
    834 	char                             *l3d;
    835 
    836 
    837 	/* First check if TSO is to be used */
    838 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    839 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    840 
    841 		if (rv != 0)
    842 			++adapter->tso_err.ev_count;
    843 		return rv;
    844 	}
    845 
    846 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    847 		offload = FALSE;
    848 
    849 	/* Indicate the whole packet as payload when not doing TSO */
    850 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    851 
    852 	/* Now ready a context descriptor */
    853 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    854 
    855 	/*
    856 	 * In advanced descriptors the vlan tag must
    857 	 * be placed into the context descriptor. Hence
    858 	 * we need to make one even if not doing offloads.
    859 	 */
    860 	if (vlan_has_tag(mp)) {
    861 		vtag = htole16(vlan_get_tag(mp));
    862 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    863 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    864 	           (offload == FALSE))
    865 		return (0);
    866 
    867 	/*
    868 	 * Determine where frame payload starts.
    869 	 * Jump over vlan headers if already present,
    870 	 * helpful for QinQ too.
    871 	 */
    872 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    873 	eh = mtod(mp, struct ether_vlan_header *);
    874 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    875 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    876 		etype = ntohs(eh->evl_proto);
    877 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    878 	} else {
    879 		etype = ntohs(eh->evl_encap_proto);
    880 		ehdrlen = ETHER_HDR_LEN;
    881 	}
    882 
    883 	/* Set the ether header length */
    884 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    885 
    886 	if (offload == FALSE)
    887 		goto no_offloads;
    888 
    889 	/*
    890 	 * If the first mbuf only includes the ethernet header,
    891 	 * jump to the next one
    892 	 * XXX: This assumes the stack splits mbufs containing headers
    893 	 *      on header boundaries
    894 	 * XXX: And assumes the entire IP header is contained in one mbuf
    895 	 */
    896 	if (mp->m_len == ehdrlen && mp->m_next)
    897 		l3d = mtod(mp->m_next, char *);
    898 	else
    899 		l3d = mtod(mp, char *) + ehdrlen;
    900 
    901 	switch (etype) {
    902 #ifdef INET
    903 	case ETHERTYPE_IP:
    904 		ip = (struct ip *)(l3d);
    905 		ip_hlen = ip->ip_hl << 2;
    906 		ipproto = ip->ip_p;
    907 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    908 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    909 		    ip->ip_sum == 0);
    910 		break;
    911 #endif
    912 #ifdef INET6
    913 	case ETHERTYPE_IPV6:
    914 		ip6 = (struct ip6_hdr *)(l3d);
    915 		ip_hlen = sizeof(struct ip6_hdr);
    916 		ipproto = ip6->ip6_nxt;
    917 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    918 		break;
    919 #endif
    920 	default:
    921 		offload = false;
    922 		break;
    923 	}
    924 
    925 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    926 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    927 
    928 	vlan_macip_lens |= ip_hlen;
    929 
    930 	/* No support for offloads for non-L4 next headers */
    931  	switch (ipproto) {
    932 	case IPPROTO_TCP:
    933 		if (mp->m_pkthdr.csum_flags &
    934 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    935 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    936 		else
    937 			offload = false;
    938 		break;
    939 	case IPPROTO_UDP:
    940 		if (mp->m_pkthdr.csum_flags &
    941 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    942 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    943 		else
    944 			offload = false;
    945 		break;
    946 	default:
    947 		offload = false;
    948 		break;
    949 	}
    950 
    951 	if (offload) /* Insert L4 checksum into data descriptors */
    952 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    953 
    954 no_offloads:
    955 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    956 
    957 	/* Now copy bits into descriptor */
    958 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    959 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    960 	TXD->seqnum_seed = htole32(0);
    961 	TXD->mss_l4len_idx = htole32(0);
    962 
    963 	/* We've consumed the first desc, adjust counters */
    964 	if (++ctxd == txr->num_desc)
    965 		ctxd = 0;
    966 	txr->next_avail_desc = ctxd;
    967 	--txr->tx_avail;
    968 
    969 	return (0);
    970 } /* ixgbe_tx_ctx_setup */
    971 
    972 /************************************************************************
    973  * ixgbe_tso_setup
    974  *
    975  *   Setup work for hardware segmentation offload (TSO) on
    976  *   adapters using advanced tx descriptors
    977  ************************************************************************/
    978 static int
    979 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    980     u32 *olinfo_status)
    981 {
    982 	struct ixgbe_adv_tx_context_desc *TXD;
    983 	struct ether_vlan_header         *eh;
    984 #ifdef INET6
    985 	struct ip6_hdr                   *ip6;
    986 #endif
    987 #ifdef INET
    988 	struct ip                        *ip;
    989 #endif
    990 	struct tcphdr                    *th;
    991 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    992 	u32                              vlan_macip_lens = 0;
    993 	u32                              type_tucmd_mlhl = 0;
    994 	u32                              mss_l4len_idx = 0, paylen;
    995 	u16                              vtag = 0, eh_type;
    996 
    997 	/*
    998 	 * Determine where frame payload starts.
    999 	 * Jump over vlan headers if already present
   1000 	 */
   1001 	eh = mtod(mp, struct ether_vlan_header *);
   1002 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   1003 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1004 		eh_type = eh->evl_proto;
   1005 	} else {
   1006 		ehdrlen = ETHER_HDR_LEN;
   1007 		eh_type = eh->evl_encap_proto;
   1008 	}
   1009 
   1010 	switch (ntohs(eh_type)) {
   1011 #ifdef INET
   1012 	case ETHERTYPE_IP:
   1013 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1014 		if (ip->ip_p != IPPROTO_TCP)
   1015 			return (ENXIO);
   1016 		ip->ip_sum = 0;
   1017 		ip_hlen = ip->ip_hl << 2;
   1018 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1019 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1020 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1021 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1022 		/* Tell transmit desc to also do IPv4 checksum. */
   1023 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1024 		break;
   1025 #endif
   1026 #ifdef INET6
   1027 	case ETHERTYPE_IPV6:
   1028 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1029 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1030 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1031 			return (ENXIO);
   1032 		ip_hlen = sizeof(struct ip6_hdr);
   1033 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1034 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1035 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1036 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1037 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1038 		break;
   1039 #endif
   1040 	default:
   1041 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1042 		    __func__, ntohs(eh_type));
   1043 		break;
   1044 	}
   1045 
   1046 	ctxd = txr->next_avail_desc;
   1047 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1048 
   1049 	tcp_hlen = th->th_off << 2;
   1050 
   1051 	/* This is used in the transmit desc in encap */
   1052 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1053 
   1054 	/* VLAN MACLEN IPLEN */
   1055 	if (vlan_has_tag(mp)) {
   1056 		vtag = htole16(vlan_get_tag(mp));
   1057 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1058 	}
   1059 
   1060 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1061 	vlan_macip_lens |= ip_hlen;
   1062 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1063 
   1064 	/* ADV DTYPE TUCMD */
   1065 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1066 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1067 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1068 
   1069 	/* MSS L4LEN IDX */
   1070 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1071 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1072 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1073 
   1074 	TXD->seqnum_seed = htole32(0);
   1075 
   1076 	if (++ctxd == txr->num_desc)
   1077 		ctxd = 0;
   1078 
   1079 	txr->tx_avail--;
   1080 	txr->next_avail_desc = ctxd;
   1081 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1082 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1083 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1084 	++txr->tso_tx.ev_count;
   1085 
   1086 	return (0);
   1087 } /* ixgbe_tso_setup */
   1088 
   1089 
   1090 /************************************************************************
   1091  * ixgbe_txeof
   1092  *
   1093  *   Examine each tx_buffer in the used queue. If the hardware is done
   1094  *   processing the packet then free associated resources. The
   1095  *   tx_buffer is put back on the free queue.
   1096  ************************************************************************/
   1097 bool
   1098 ixgbe_txeof(struct tx_ring *txr)
   1099 {
   1100 	struct adapter		*adapter = txr->adapter;
   1101 	struct ifnet		*ifp = adapter->ifp;
   1102 	struct ixgbe_tx_buf	*buf;
   1103 	union ixgbe_adv_tx_desc *txd;
   1104 	u32			work, processed = 0;
   1105 	u32			limit = adapter->tx_process_limit;
   1106 
   1107 	KASSERT(mutex_owned(&txr->tx_mtx));
   1108 
   1109 #ifdef DEV_NETMAP
   1110 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1111 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1112 		struct netmap_adapter *na = NA(adapter->ifp);
   1113 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1114 		txd = txr->tx_base;
   1115 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1116 		    BUS_DMASYNC_POSTREAD);
   1117 		/*
   1118 		 * In netmap mode, all the work is done in the context
   1119 		 * of the client thread. Interrupt handlers only wake up
   1120 		 * clients, which may be sleeping on individual rings
   1121 		 * or on a global resource for all rings.
   1122 		 * To implement tx interrupt mitigation, we wake up the client
   1123 		 * thread roughly every half ring, even if the NIC interrupts
   1124 		 * more frequently. This is implemented as follows:
   1125 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1126 		 *   the slot that should wake up the thread (nkr_num_slots
   1127 		 *   means the user thread should not be woken up);
   1128 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1129 		 *   or the slot has the DD bit set.
   1130 		 */
   1131 		if (!netmap_mitigate ||
   1132 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1133 		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1134 			netmap_tx_irq(ifp, txr->me);
   1135 		}
   1136 		return false;
   1137 	}
   1138 #endif /* DEV_NETMAP */
   1139 
   1140 	if (txr->tx_avail == txr->num_desc) {
   1141 		txr->sending = false;
   1142 		return false;
   1143 	}
   1144 
   1145 	/* Get work starting point */
   1146 	work = txr->next_to_clean;
   1147 	buf = &txr->tx_buffers[work];
   1148 	txd = &txr->tx_base[work];
   1149 	work -= txr->num_desc; /* The distance to ring end */
   1150 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1151 	    BUS_DMASYNC_POSTREAD);
   1152 
   1153 	do {
   1154 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1155 		if (eop == NULL) /* No work */
   1156 			break;
   1157 
   1158 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1159 			break;	/* I/O not complete */
   1160 
   1161 		if (buf->m_head) {
   1162 			txr->bytes += buf->m_head->m_pkthdr.len;
   1163 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1164 			    0, buf->m_head->m_pkthdr.len,
   1165 			    BUS_DMASYNC_POSTWRITE);
   1166 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1167 			m_freem(buf->m_head);
   1168 			buf->m_head = NULL;
   1169 		}
   1170 		buf->eop = NULL;
   1171 		++txr->tx_avail;
   1172 
   1173 		/* We clean the range if multi segment */
   1174 		while (txd != eop) {
   1175 			++txd;
   1176 			++buf;
   1177 			++work;
   1178 			/* wrap the ring? */
   1179 			if (__predict_false(!work)) {
   1180 				work -= txr->num_desc;
   1181 				buf = txr->tx_buffers;
   1182 				txd = txr->tx_base;
   1183 			}
   1184 			if (buf->m_head) {
   1185 				txr->bytes +=
   1186 				    buf->m_head->m_pkthdr.len;
   1187 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1188 				    buf->map,
   1189 				    0, buf->m_head->m_pkthdr.len,
   1190 				    BUS_DMASYNC_POSTWRITE);
   1191 				ixgbe_dmamap_unload(txr->txtag,
   1192 				    buf->map);
   1193 				m_freem(buf->m_head);
   1194 				buf->m_head = NULL;
   1195 			}
   1196 			++txr->tx_avail;
   1197 			buf->eop = NULL;
   1198 
   1199 		}
   1200 		++txr->packets;
   1201 		++processed;
   1202 		++ifp->if_opackets;
   1203 
   1204 		/* Try the next packet */
   1205 		++txd;
   1206 		++buf;
   1207 		++work;
   1208 		/* reset with a wrap */
   1209 		if (__predict_false(!work)) {
   1210 			work -= txr->num_desc;
   1211 			buf = txr->tx_buffers;
   1212 			txd = txr->tx_base;
   1213 		}
   1214 		prefetch(txd);
   1215 	} while (__predict_true(--limit));
   1216 
   1217 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1218 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1219 
   1220 	work += txr->num_desc;
   1221 	txr->next_to_clean = work;
   1222 
   1223 	if (txr->tx_avail == txr->num_desc)
   1224 		txr->sending = false;
   1225 
   1226 	return ((limit > 0) ? false : true);
   1227 } /* ixgbe_txeof */
   1228 
   1229 /************************************************************************
   1230  * ixgbe_rsc_count
   1231  *
   1232  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1233  ************************************************************************/
   1234 static inline u32
   1235 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1236 {
   1237 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1238 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1239 } /* ixgbe_rsc_count */
   1240 
   1241 /************************************************************************
   1242  * ixgbe_setup_hw_rsc
   1243  *
   1244  *   Initialize Hardware RSC (LRO) feature on 82599
   1245  *   for an RX ring, this is toggled by the LRO capability
   1246  *   even though it is transparent to the stack.
   1247  *
   1248  *   NOTE: Since this HW feature only works with IPv4 and
   1249  *         testing has shown soft LRO to be as effective,
   1250  *         this feature will be disabled by default.
   1251  ************************************************************************/
   1252 static void
   1253 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1254 {
   1255 	struct	adapter  *adapter = rxr->adapter;
   1256 	struct	ixgbe_hw *hw = &adapter->hw;
   1257 	u32              rscctrl, rdrxctl;
   1258 
   1259 	/* If turning LRO/RSC off we need to disable it */
   1260 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1261 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1262 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1263 		return;
   1264 	}
   1265 
   1266 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1267 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1268 #ifdef DEV_NETMAP
   1269 	/* Always strip CRC unless Netmap disabled it */
   1270 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1271 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1272 	    ix_crcstrip)
   1273 #endif /* DEV_NETMAP */
   1274 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1275 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1276 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1277 
   1278 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1279 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1280 	/*
   1281 	 * Limit the total number of descriptors that
   1282 	 * can be combined, so it does not exceed 64K
   1283 	 */
   1284 	if (rxr->mbuf_sz == MCLBYTES)
   1285 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1286 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1287 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1288 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1289 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1290 	else  /* Using 16K cluster */
   1291 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1292 
   1293 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1294 
   1295 	/* Enable TCP header recognition */
   1296 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1297 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1298 
   1299 	/* Disable RSC for ACK packets */
   1300 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1301 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1302 
   1303 	rxr->hw_rsc = TRUE;
   1304 } /* ixgbe_setup_hw_rsc */
   1305 
   1306 /************************************************************************
   1307  * ixgbe_refresh_mbufs
   1308  *
   1309  *   Refresh mbuf buffers for RX descriptor rings
   1310  *    - now keeps its own state so discards due to resource
   1311  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1312  *      it just returns, keeping its placeholder, thus it can simply
   1313  *      be recalled to try again.
   1314  ************************************************************************/
   1315 static void
   1316 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1317 {
   1318 	struct adapter      *adapter = rxr->adapter;
   1319 	struct ixgbe_rx_buf *rxbuf;
   1320 	struct mbuf         *mp;
   1321 	int                 i, j, error;
   1322 	bool                refreshed = false;
   1323 
   1324 	i = j = rxr->next_to_refresh;
   1325 	/* Control the loop with one beyond */
   1326 	if (++j == rxr->num_desc)
   1327 		j = 0;
   1328 
   1329 	while (j != limit) {
   1330 		rxbuf = &rxr->rx_buffers[i];
   1331 		if (rxbuf->buf == NULL) {
   1332 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1333 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1334 			if (mp == NULL) {
   1335 				rxr->no_jmbuf.ev_count++;
   1336 				goto update;
   1337 			}
   1338 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1339 				m_adj(mp, ETHER_ALIGN);
   1340 		} else
   1341 			mp = rxbuf->buf;
   1342 
   1343 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1344 
   1345 		/* If we're dealing with an mbuf that was copied rather
   1346 		 * than replaced, there's no need to go through busdma.
   1347 		 */
   1348 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1349 			/* Get the memory mapping */
   1350 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1351 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1352 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1353 			if (error != 0) {
   1354 				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
   1355 				m_free(mp);
   1356 				rxbuf->buf = NULL;
   1357 				goto update;
   1358 			}
   1359 			rxbuf->buf = mp;
   1360 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1361 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1362 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1363 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1364 		} else {
   1365 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1366 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1367 		}
   1368 
   1369 		refreshed = true;
   1370 		/* Next is precalculated */
   1371 		i = j;
   1372 		rxr->next_to_refresh = i;
   1373 		if (++j == rxr->num_desc)
   1374 			j = 0;
   1375 	}
   1376 
   1377 update:
   1378 	if (refreshed) /* Update hardware tail index */
   1379 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1380 
   1381 	return;
   1382 } /* ixgbe_refresh_mbufs */
   1383 
   1384 /************************************************************************
   1385  * ixgbe_allocate_receive_buffers
   1386  *
   1387  *   Allocate memory for rx_buffer structures. Since we use one
   1388  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1389  *   that we'll need is equal to the number of receive descriptors
   1390  *   that we've allocated.
   1391  ************************************************************************/
   1392 static int
   1393 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1394 {
   1395 	struct	adapter     *adapter = rxr->adapter;
   1396 	device_t            dev = adapter->dev;
   1397 	struct ixgbe_rx_buf *rxbuf;
   1398 	int                 bsize, error;
   1399 
   1400 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1401 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
   1402 	    M_NOWAIT | M_ZERO);
   1403 	if (rxr->rx_buffers == NULL) {
   1404 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1405 		error = ENOMEM;
   1406 		goto fail;
   1407 	}
   1408 
   1409 	error = ixgbe_dma_tag_create(
   1410 	         /*      parent */ adapter->osdep.dmat,
   1411 	         /*   alignment */ 1,
   1412 	         /*      bounds */ 0,
   1413 	         /*     maxsize */ MJUM16BYTES,
   1414 	         /*   nsegments */ 1,
   1415 	         /*  maxsegsize */ MJUM16BYTES,
   1416 	         /*       flags */ 0,
   1417 	                           &rxr->ptag);
   1418 	if (error != 0) {
   1419 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1420 		goto fail;
   1421 	}
   1422 
   1423 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1424 		rxbuf = &rxr->rx_buffers[i];
   1425 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1426 		if (error) {
   1427 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1428 			goto fail;
   1429 		}
   1430 	}
   1431 
   1432 	return (0);
   1433 
   1434 fail:
   1435 	/* Frees all, but can handle partial completion */
   1436 	ixgbe_free_receive_structures(adapter);
   1437 
   1438 	return (error);
   1439 } /* ixgbe_allocate_receive_buffers */
   1440 
   1441 /************************************************************************
   1442  * ixgbe_free_receive_ring
   1443  ************************************************************************/
   1444 static void
   1445 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1446 {
   1447 	for (int i = 0; i < rxr->num_desc; i++) {
   1448 		ixgbe_rx_discard(rxr, i);
   1449 	}
   1450 } /* ixgbe_free_receive_ring */
   1451 
   1452 /************************************************************************
   1453  * ixgbe_setup_receive_ring
   1454  *
   1455  *   Initialize a receive ring and its buffers.
   1456  ************************************************************************/
   1457 static int
   1458 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1459 {
   1460 	struct adapter        *adapter;
   1461 	struct ixgbe_rx_buf   *rxbuf;
   1462 #ifdef LRO
   1463 	struct ifnet          *ifp;
   1464 	struct lro_ctrl       *lro = &rxr->lro;
   1465 #endif /* LRO */
   1466 #ifdef DEV_NETMAP
   1467 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1468 	struct netmap_slot    *slot;
   1469 #endif /* DEV_NETMAP */
   1470 	int                   rsize, error = 0;
   1471 
   1472 	adapter = rxr->adapter;
   1473 #ifdef LRO
   1474 	ifp = adapter->ifp;
   1475 #endif /* LRO */
   1476 
   1477 	/* Clear the ring contents */
   1478 	IXGBE_RX_LOCK(rxr);
   1479 
   1480 #ifdef DEV_NETMAP
   1481 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1482 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1483 #endif /* DEV_NETMAP */
   1484 
   1485 	rsize = roundup2(adapter->num_rx_desc *
   1486 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1487 	bzero((void *)rxr->rx_base, rsize);
   1488 	/* Cache the size */
   1489 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1490 
   1491 	/* Free current RX buffer structs and their mbufs */
   1492 	ixgbe_free_receive_ring(rxr);
   1493 
   1494 	/* Now replenish the mbufs */
   1495 	for (int j = 0; j != rxr->num_desc; ++j) {
   1496 		struct mbuf *mp;
   1497 
   1498 		rxbuf = &rxr->rx_buffers[j];
   1499 
   1500 #ifdef DEV_NETMAP
   1501 		/*
   1502 		 * In netmap mode, fill the map and set the buffer
   1503 		 * address in the NIC ring, considering the offset
   1504 		 * between the netmap and NIC rings (see comment in
   1505 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1506 		 * an mbuf, so end the block with a continue;
   1507 		 */
   1508 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1509 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1510 			uint64_t paddr;
   1511 			void *addr;
   1512 
   1513 			addr = PNMB(na, slot + sj, &paddr);
   1514 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1515 			/* Update descriptor and the cached value */
   1516 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1517 			rxbuf->addr = htole64(paddr);
   1518 			continue;
   1519 		}
   1520 #endif /* DEV_NETMAP */
   1521 
   1522 		rxbuf->flags = 0;
   1523 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1524 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1525 		if (rxbuf->buf == NULL) {
   1526 			error = ENOBUFS;
   1527 			goto fail;
   1528 		}
   1529 		mp = rxbuf->buf;
   1530 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1531 		/* Get the memory mapping */
   1532 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1533 		    mp, BUS_DMA_NOWAIT);
   1534 		if (error != 0)
   1535                         goto fail;
   1536 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1537 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1538 		/* Update the descriptor and the cached value */
   1539 		rxr->rx_base[j].read.pkt_addr =
   1540 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1541 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1542 	}
   1543 
   1544 
   1545 	/* Setup our descriptor indices */
   1546 	rxr->next_to_check = 0;
   1547 	rxr->next_to_refresh = 0;
   1548 	rxr->lro_enabled = FALSE;
   1549 	rxr->rx_copies.ev_count = 0;
   1550 #if 0 /* NetBSD */
   1551 	rxr->rx_bytes.ev_count = 0;
   1552 #if 1	/* Fix inconsistency */
   1553 	rxr->rx_packets.ev_count = 0;
   1554 #endif
   1555 #endif
   1556 	rxr->vtag_strip = FALSE;
   1557 
   1558 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1559 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1560 
   1561 	/*
   1562 	 * Now set up the LRO interface
   1563 	 */
   1564 	if (ixgbe_rsc_enable)
   1565 		ixgbe_setup_hw_rsc(rxr);
   1566 #ifdef LRO
   1567 	else if (ifp->if_capenable & IFCAP_LRO) {
   1568 		device_t dev = adapter->dev;
   1569 		int err = tcp_lro_init(lro);
   1570 		if (err) {
   1571 			device_printf(dev, "LRO Initialization failed!\n");
   1572 			goto fail;
   1573 		}
   1574 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1575 		rxr->lro_enabled = TRUE;
   1576 		lro->ifp = adapter->ifp;
   1577 	}
   1578 #endif /* LRO */
   1579 
   1580 	IXGBE_RX_UNLOCK(rxr);
   1581 
   1582 	return (0);
   1583 
   1584 fail:
   1585 	ixgbe_free_receive_ring(rxr);
   1586 	IXGBE_RX_UNLOCK(rxr);
   1587 
   1588 	return (error);
   1589 } /* ixgbe_setup_receive_ring */
   1590 
   1591 /************************************************************************
   1592  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1593  ************************************************************************/
   1594 int
   1595 ixgbe_setup_receive_structures(struct adapter *adapter)
   1596 {
   1597 	struct rx_ring *rxr = adapter->rx_rings;
   1598 	int            j;
   1599 
   1600 	/*
   1601 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1602 	 * or size of jumbo mbufs may have changed.
   1603 	 * Assume all of rxr->ptag are the same.
   1604 	 */
   1605 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat,
   1606 	    (2 * adapter->num_rx_desc) * adapter->num_queues,
   1607 	    adapter->rx_mbuf_sz);
   1608 
   1609 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1610 		if (ixgbe_setup_receive_ring(rxr))
   1611 			goto fail;
   1612 
   1613 	return (0);
   1614 fail:
   1615 	/*
   1616 	 * Free RX buffers allocated so far, we will only handle
   1617 	 * the rings that completed, the failing case will have
   1618 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1619 	 */
   1620 	for (int i = 0; i < j; ++i) {
   1621 		rxr = &adapter->rx_rings[i];
   1622 		IXGBE_RX_LOCK(rxr);
   1623 		ixgbe_free_receive_ring(rxr);
   1624 		IXGBE_RX_UNLOCK(rxr);
   1625 	}
   1626 
   1627 	return (ENOBUFS);
   1628 } /* ixgbe_setup_receive_structures */
   1629 
   1630 
   1631 /************************************************************************
   1632  * ixgbe_free_receive_structures - Free all receive rings.
   1633  ************************************************************************/
   1634 void
   1635 ixgbe_free_receive_structures(struct adapter *adapter)
   1636 {
   1637 	struct rx_ring *rxr = adapter->rx_rings;
   1638 
   1639 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1640 
   1641 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1642 		ixgbe_free_receive_buffers(rxr);
   1643 #ifdef LRO
   1644 		/* Free LRO memory */
   1645 		tcp_lro_free(&rxr->lro);
   1646 #endif /* LRO */
   1647 		/* Free the ring memory as well */
   1648 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1649 		IXGBE_RX_LOCK_DESTROY(rxr);
   1650 	}
   1651 
   1652 	free(adapter->rx_rings, M_DEVBUF);
   1653 } /* ixgbe_free_receive_structures */
   1654 
   1655 
   1656 /************************************************************************
   1657  * ixgbe_free_receive_buffers - Free receive ring data structures
   1658  ************************************************************************/
   1659 static void
   1660 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1661 {
   1662 	struct adapter      *adapter = rxr->adapter;
   1663 	struct ixgbe_rx_buf *rxbuf;
   1664 
   1665 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1666 
   1667 	/* Cleanup any existing buffers */
   1668 	if (rxr->rx_buffers != NULL) {
   1669 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1670 			rxbuf = &rxr->rx_buffers[i];
   1671 			ixgbe_rx_discard(rxr, i);
   1672 			if (rxbuf->pmap != NULL) {
   1673 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1674 				rxbuf->pmap = NULL;
   1675 			}
   1676 		}
   1677 		if (rxr->rx_buffers != NULL) {
   1678 			free(rxr->rx_buffers, M_DEVBUF);
   1679 			rxr->rx_buffers = NULL;
   1680 		}
   1681 	}
   1682 
   1683 	if (rxr->ptag != NULL) {
   1684 		ixgbe_dma_tag_destroy(rxr->ptag);
   1685 		rxr->ptag = NULL;
   1686 	}
   1687 
   1688 	return;
   1689 } /* ixgbe_free_receive_buffers */
   1690 
   1691 /************************************************************************
   1692  * ixgbe_rx_input
   1693  ************************************************************************/
   1694 static __inline void
   1695 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1696     u32 ptype)
   1697 {
   1698 	struct adapter	*adapter = ifp->if_softc;
   1699 
   1700 #ifdef LRO
   1701 	struct ethercom *ec = &adapter->osdep.ec;
   1702 
   1703 	/*
   1704 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1705 	 * should be computed by hardware. Also it should not have VLAN tag in
   1706 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1707 	 */
   1708         if (rxr->lro_enabled &&
   1709             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1710             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1711             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1712             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1713             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1714             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1715             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1716             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1717                 /*
   1718                  * Send to the stack if:
   1719                  **  - LRO not enabled, or
   1720                  **  - no LRO resources, or
   1721                  **  - lro enqueue fails
   1722                  */
   1723                 if (rxr->lro.lro_cnt != 0)
   1724                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1725                                 return;
   1726         }
   1727 #endif /* LRO */
   1728 
   1729 	if_percpuq_enqueue(adapter->ipq, m);
   1730 } /* ixgbe_rx_input */
   1731 
   1732 /************************************************************************
   1733  * ixgbe_rx_discard
   1734  ************************************************************************/
   1735 static __inline void
   1736 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1737 {
   1738 	struct ixgbe_rx_buf *rbuf;
   1739 
   1740 	rbuf = &rxr->rx_buffers[i];
   1741 
   1742 	/*
   1743 	 * With advanced descriptors the writeback
   1744 	 * clobbers the buffer addrs, so its easier
   1745 	 * to just free the existing mbufs and take
   1746 	 * the normal refresh path to get new buffers
   1747 	 * and mapping.
   1748 	 */
   1749 
   1750 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1751 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1752 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1753 		m_freem(rbuf->fmp);
   1754 		rbuf->fmp = NULL;
   1755 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1756 	} else if (rbuf->buf) {
   1757 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1758 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1759 		m_free(rbuf->buf);
   1760 		rbuf->buf = NULL;
   1761 	}
   1762 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1763 
   1764 	rbuf->flags = 0;
   1765 
   1766 	return;
   1767 } /* ixgbe_rx_discard */
   1768 
   1769 
   1770 /************************************************************************
   1771  * ixgbe_rxeof
   1772  *
   1773  *   Executes in interrupt context. It replenishes the
   1774  *   mbufs in the descriptor and sends data which has
   1775  *   been dma'ed into host memory to upper layer.
   1776  *
   1777  *   Return TRUE for more work, FALSE for all clean.
   1778  ************************************************************************/
   1779 bool
   1780 ixgbe_rxeof(struct ix_queue *que)
   1781 {
   1782 	struct adapter		*adapter = que->adapter;
   1783 	struct rx_ring		*rxr = que->rxr;
   1784 	struct ifnet		*ifp = adapter->ifp;
   1785 #ifdef LRO
   1786 	struct lro_ctrl		*lro = &rxr->lro;
   1787 #endif /* LRO */
   1788 	union ixgbe_adv_rx_desc	*cur;
   1789 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1790 	int			i, nextp, processed = 0;
   1791 	u32			staterr = 0;
   1792 	u32			count = adapter->rx_process_limit;
   1793 #ifdef RSS
   1794 	u16			pkt_info;
   1795 #endif
   1796 
   1797 	IXGBE_RX_LOCK(rxr);
   1798 
   1799 #ifdef DEV_NETMAP
   1800 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1801 		/* Same as the txeof routine: wakeup clients on intr. */
   1802 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1803 			IXGBE_RX_UNLOCK(rxr);
   1804 			return (FALSE);
   1805 		}
   1806 	}
   1807 #endif /* DEV_NETMAP */
   1808 
   1809 	for (i = rxr->next_to_check; count != 0;) {
   1810 		struct mbuf *sendmp, *mp;
   1811 		u32         rsc, ptype;
   1812 		u16         len;
   1813 		u16         vtag = 0;
   1814 		bool        eop;
   1815 
   1816 		/* Sync the ring. */
   1817 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1818 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1819 
   1820 		cur = &rxr->rx_base[i];
   1821 		staterr = le32toh(cur->wb.upper.status_error);
   1822 #ifdef RSS
   1823 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1824 #endif
   1825 
   1826 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1827 			break;
   1828 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1829 			break;
   1830 
   1831 		count--;
   1832 		sendmp = NULL;
   1833 		nbuf = NULL;
   1834 		rsc = 0;
   1835 		cur->wb.upper.status_error = 0;
   1836 		rbuf = &rxr->rx_buffers[i];
   1837 		mp = rbuf->buf;
   1838 
   1839 		len = le16toh(cur->wb.upper.length);
   1840 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1841 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1842 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1843 
   1844 		/* Make sure bad packets are discarded */
   1845 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1846 #if __FreeBSD_version >= 1100036
   1847 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1848 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1849 #endif
   1850 			rxr->rx_discarded.ev_count++;
   1851 			ixgbe_rx_discard(rxr, i);
   1852 			goto next_desc;
   1853 		}
   1854 
   1855 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1856 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1857 
   1858 		/*
   1859 		 * On 82599 which supports a hardware
   1860 		 * LRO (called HW RSC), packets need
   1861 		 * not be fragmented across sequential
   1862 		 * descriptors, rather the next descriptor
   1863 		 * is indicated in bits of the descriptor.
   1864 		 * This also means that we might proceses
   1865 		 * more than one packet at a time, something
   1866 		 * that has never been true before, it
   1867 		 * required eliminating global chain pointers
   1868 		 * in favor of what we are doing here.  -jfv
   1869 		 */
   1870 		if (!eop) {
   1871 			/*
   1872 			 * Figure out the next descriptor
   1873 			 * of this frame.
   1874 			 */
   1875 			if (rxr->hw_rsc == TRUE) {
   1876 				rsc = ixgbe_rsc_count(cur);
   1877 				rxr->rsc_num += (rsc - 1);
   1878 			}
   1879 			if (rsc) { /* Get hardware index */
   1880 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1881 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1882 			} else { /* Just sequential */
   1883 				nextp = i + 1;
   1884 				if (nextp == adapter->num_rx_desc)
   1885 					nextp = 0;
   1886 			}
   1887 			nbuf = &rxr->rx_buffers[nextp];
   1888 			prefetch(nbuf);
   1889 		}
   1890 		/*
   1891 		 * Rather than using the fmp/lmp global pointers
   1892 		 * we now keep the head of a packet chain in the
   1893 		 * buffer struct and pass this along from one
   1894 		 * descriptor to the next, until we get EOP.
   1895 		 */
   1896 		mp->m_len = len;
   1897 		/*
   1898 		 * See if there is a stored head
   1899 		 * that determines what we are
   1900 		 */
   1901 		sendmp = rbuf->fmp;
   1902 		if (sendmp != NULL) {  /* secondary frag */
   1903 			rbuf->buf = rbuf->fmp = NULL;
   1904 			mp->m_flags &= ~M_PKTHDR;
   1905 			sendmp->m_pkthdr.len += mp->m_len;
   1906 		} else {
   1907 			/*
   1908 			 * Optimize.  This might be a small packet,
   1909 			 * maybe just a TCP ACK.  Do a fast copy that
   1910 			 * is cache aligned into a new mbuf, and
   1911 			 * leave the old mbuf+cluster for re-use.
   1912 			 */
   1913 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1914 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1915 				if (sendmp != NULL) {
   1916 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1917 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1918 					    len);
   1919 					sendmp->m_len = len;
   1920 					rxr->rx_copies.ev_count++;
   1921 					rbuf->flags |= IXGBE_RX_COPY;
   1922 				}
   1923 			}
   1924 			if (sendmp == NULL) {
   1925 				rbuf->buf = rbuf->fmp = NULL;
   1926 				sendmp = mp;
   1927 			}
   1928 
   1929 			/* first desc of a non-ps chain */
   1930 			sendmp->m_flags |= M_PKTHDR;
   1931 			sendmp->m_pkthdr.len = mp->m_len;
   1932 		}
   1933 		++processed;
   1934 
   1935 		/* Pass the head pointer on */
   1936 		if (eop == 0) {
   1937 			nbuf->fmp = sendmp;
   1938 			sendmp = NULL;
   1939 			mp->m_next = nbuf->buf;
   1940 		} else { /* Sending this frame */
   1941 			m_set_rcvif(sendmp, ifp);
   1942 			++rxr->packets;
   1943 			rxr->rx_packets.ev_count++;
   1944 			/* capture data for AIM */
   1945 			rxr->bytes += sendmp->m_pkthdr.len;
   1946 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1947 			/* Process vlan info */
   1948 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   1949 				vtag = le16toh(cur->wb.upper.vlan);
   1950 			if (vtag) {
   1951 				vlan_set_tag(sendmp, vtag);
   1952 			}
   1953 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1954 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1955 				   &adapter->stats.pf);
   1956 			}
   1957 
   1958 #if 0 /* FreeBSD */
   1959 			/*
   1960 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   1961 			 * and never cleared. This means we have RSS hash
   1962 			 * available to be used.
   1963 			 */
   1964 			if (adapter->num_queues > 1) {
   1965 				sendmp->m_pkthdr.flowid =
   1966 				    le32toh(cur->wb.lower.hi_dword.rss);
   1967 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1968 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   1969 					M_HASHTYPE_SET(sendmp,
   1970 					    M_HASHTYPE_RSS_IPV4);
   1971 					break;
   1972 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1973 					M_HASHTYPE_SET(sendmp,
   1974 					    M_HASHTYPE_RSS_TCP_IPV4);
   1975 					break;
   1976 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   1977 					M_HASHTYPE_SET(sendmp,
   1978 					    M_HASHTYPE_RSS_IPV6);
   1979 					break;
   1980 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   1981 					M_HASHTYPE_SET(sendmp,
   1982 					    M_HASHTYPE_RSS_TCP_IPV6);
   1983 					break;
   1984 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   1985 					M_HASHTYPE_SET(sendmp,
   1986 					    M_HASHTYPE_RSS_IPV6_EX);
   1987 					break;
   1988 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   1989 					M_HASHTYPE_SET(sendmp,
   1990 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   1991 					break;
   1992 #if __FreeBSD_version > 1100000
   1993 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   1994 					M_HASHTYPE_SET(sendmp,
   1995 					    M_HASHTYPE_RSS_UDP_IPV4);
   1996 					break;
   1997 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   1998 					M_HASHTYPE_SET(sendmp,
   1999 					    M_HASHTYPE_RSS_UDP_IPV6);
   2000 					break;
   2001 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2002 					M_HASHTYPE_SET(sendmp,
   2003 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2004 					break;
   2005 #endif
   2006 				default:
   2007 					M_HASHTYPE_SET(sendmp,
   2008 					    M_HASHTYPE_OPAQUE_HASH);
   2009 				}
   2010 			} else {
   2011 				sendmp->m_pkthdr.flowid = que->msix;
   2012 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2013 			}
   2014 #endif
   2015 		}
   2016 next_desc:
   2017 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2018 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2019 
   2020 		/* Advance our pointers to the next descriptor. */
   2021 		if (++i == rxr->num_desc)
   2022 			i = 0;
   2023 
   2024 		/* Now send to the stack or do LRO */
   2025 		if (sendmp != NULL) {
   2026 			rxr->next_to_check = i;
   2027 			IXGBE_RX_UNLOCK(rxr);
   2028 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2029 			IXGBE_RX_LOCK(rxr);
   2030 			i = rxr->next_to_check;
   2031 		}
   2032 
   2033 		/* Every 8 descriptors we go to refresh mbufs */
   2034 		if (processed == 8) {
   2035 			ixgbe_refresh_mbufs(rxr, i);
   2036 			processed = 0;
   2037 		}
   2038 	}
   2039 
   2040 	/* Refresh any remaining buf structs */
   2041 	if (ixgbe_rx_unrefreshed(rxr))
   2042 		ixgbe_refresh_mbufs(rxr, i);
   2043 
   2044 	rxr->next_to_check = i;
   2045 
   2046 	IXGBE_RX_UNLOCK(rxr);
   2047 
   2048 #ifdef LRO
   2049 	/*
   2050 	 * Flush any outstanding LRO work
   2051 	 */
   2052 	tcp_lro_flush_all(lro);
   2053 #endif /* LRO */
   2054 
   2055 	/*
   2056 	 * Still have cleaning to do?
   2057 	 */
   2058 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2059 		return (TRUE);
   2060 
   2061 	return (FALSE);
   2062 } /* ixgbe_rxeof */
   2063 
   2064 
   2065 /************************************************************************
   2066  * ixgbe_rx_checksum
   2067  *
   2068  *   Verify that the hardware indicated that the checksum is valid.
   2069  *   Inform the stack about the status of checksum so that stack
   2070  *   doesn't spend time verifying the checksum.
   2071  ************************************************************************/
   2072 static void
   2073 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2074     struct ixgbe_hw_stats *stats)
   2075 {
   2076 	u16  status = (u16)staterr;
   2077 	u8   errors = (u8)(staterr >> 24);
   2078 #if 0
   2079 	bool sctp = false;
   2080 
   2081 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2082 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2083 		sctp = true;
   2084 #endif
   2085 
   2086 	/* IPv4 checksum */
   2087 	if (status & IXGBE_RXD_STAT_IPCS) {
   2088 		stats->ipcs.ev_count++;
   2089 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2090 			/* IP Checksum Good */
   2091 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2092 		} else {
   2093 			stats->ipcs_bad.ev_count++;
   2094 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2095 		}
   2096 	}
   2097 	/* TCP/UDP/SCTP checksum */
   2098 	if (status & IXGBE_RXD_STAT_L4CS) {
   2099 		stats->l4cs.ev_count++;
   2100 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2101 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2102 			mp->m_pkthdr.csum_flags |= type;
   2103 		} else {
   2104 			stats->l4cs_bad.ev_count++;
   2105 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2106 		}
   2107 	}
   2108 } /* ixgbe_rx_checksum */
   2109 
   2110 /************************************************************************
   2111  * ixgbe_dma_malloc
   2112  ************************************************************************/
   2113 int
   2114 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2115 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2116 {
   2117 	device_t dev = adapter->dev;
   2118 	int      r, rsegs;
   2119 
   2120 	r = ixgbe_dma_tag_create(
   2121 	     /*      parent */ adapter->osdep.dmat,
   2122 	     /*   alignment */ DBA_ALIGN,
   2123 	     /*      bounds */ 0,
   2124 	     /*     maxsize */ size,
   2125 	     /*   nsegments */ 1,
   2126 	     /*  maxsegsize */ size,
   2127 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2128 			       &dma->dma_tag);
   2129 	if (r != 0) {
   2130 		aprint_error_dev(dev,
   2131 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2132 		    r);
   2133 		goto fail_0;
   2134 	}
   2135 
   2136 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2137 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2138 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2139 	if (r != 0) {
   2140 		aprint_error_dev(dev,
   2141 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2142 		goto fail_1;
   2143 	}
   2144 
   2145 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2146 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2147 	if (r != 0) {
   2148 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2149 		    __func__, r);
   2150 		goto fail_2;
   2151 	}
   2152 
   2153 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2154 	if (r != 0) {
   2155 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2156 		    __func__, r);
   2157 		goto fail_3;
   2158 	}
   2159 
   2160 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2161 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2162 	if (r != 0) {
   2163 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2164 		    __func__, r);
   2165 		goto fail_4;
   2166 	}
   2167 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2168 	dma->dma_size = size;
   2169 	return 0;
   2170 fail_4:
   2171 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2172 fail_3:
   2173 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2174 fail_2:
   2175 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2176 fail_1:
   2177 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2178 fail_0:
   2179 
   2180 	return (r);
   2181 } /* ixgbe_dma_malloc */
   2182 
   2183 /************************************************************************
   2184  * ixgbe_dma_free
   2185  ************************************************************************/
   2186 void
   2187 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2188 {
   2189 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2190 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2191 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2192 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2193 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2194 } /* ixgbe_dma_free */
   2195 
   2196 
   2197 /************************************************************************
   2198  * ixgbe_allocate_queues
   2199  *
   2200  *   Allocate memory for the transmit and receive rings, and then
   2201  *   the descriptors associated with each, called only once at attach.
   2202  ************************************************************************/
   2203 int
   2204 ixgbe_allocate_queues(struct adapter *adapter)
   2205 {
   2206 	device_t	dev = adapter->dev;
   2207 	struct ix_queue	*que;
   2208 	struct tx_ring	*txr;
   2209 	struct rx_ring	*rxr;
   2210 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2211 	int             txconf = 0, rxconf = 0;
   2212 
   2213 	/* First, allocate the top level queue structs */
   2214 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2215             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2216         if (adapter->queues == NULL) {
   2217 		aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2218                 error = ENOMEM;
   2219                 goto fail;
   2220         }
   2221 
   2222 	/* Second, allocate the TX ring struct memory */
   2223 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
   2224 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2225 	if (adapter->tx_rings == NULL) {
   2226 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2227 		error = ENOMEM;
   2228 		goto tx_fail;
   2229 	}
   2230 
   2231 	/* Third, allocate the RX ring */
   2232 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2233 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2234 	if (adapter->rx_rings == NULL) {
   2235 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2236 		error = ENOMEM;
   2237 		goto rx_fail;
   2238 	}
   2239 
   2240 	/* For the ring itself */
   2241 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2242 	    DBA_ALIGN);
   2243 
   2244 	/*
   2245 	 * Now set up the TX queues, txconf is needed to handle the
   2246 	 * possibility that things fail midcourse and we need to
   2247 	 * undo memory gracefully
   2248 	 */
   2249 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2250 		/* Set up some basics */
   2251 		txr = &adapter->tx_rings[i];
   2252 		txr->adapter = adapter;
   2253 		txr->txr_interq = NULL;
   2254 		/* In case SR-IOV is enabled, align the index properly */
   2255 #ifdef PCI_IOV
   2256 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2257 		    i);
   2258 #else
   2259 		txr->me = i;
   2260 #endif
   2261 		txr->num_desc = adapter->num_tx_desc;
   2262 
   2263 		/* Initialize the TX side lock */
   2264 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2265 
   2266 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2267 		    BUS_DMA_NOWAIT)) {
   2268 			aprint_error_dev(dev,
   2269 			    "Unable to allocate TX Descriptor memory\n");
   2270 			error = ENOMEM;
   2271 			goto err_tx_desc;
   2272 		}
   2273 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2274 		bzero((void *)txr->tx_base, tsize);
   2275 
   2276 		/* Now allocate transmit buffers for the ring */
   2277 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2278 			aprint_error_dev(dev,
   2279 			    "Critical Failure setting up transmit buffers\n");
   2280 			error = ENOMEM;
   2281 			goto err_tx_desc;
   2282         	}
   2283 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2284 			/* Allocate a buf ring */
   2285 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2286 			if (txr->txr_interq == NULL) {
   2287 				aprint_error_dev(dev,
   2288 				    "Critical Failure setting up buf ring\n");
   2289 				error = ENOMEM;
   2290 				goto err_tx_desc;
   2291 			}
   2292 		}
   2293 	}
   2294 
   2295 	/*
   2296 	 * Next the RX queues...
   2297 	 */
   2298 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2299 	    DBA_ALIGN);
   2300 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2301 		rxr = &adapter->rx_rings[i];
   2302 		/* Set up some basics */
   2303 		rxr->adapter = adapter;
   2304 #ifdef PCI_IOV
   2305 		/* In case SR-IOV is enabled, align the index properly */
   2306 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2307 		    i);
   2308 #else
   2309 		rxr->me = i;
   2310 #endif
   2311 		rxr->num_desc = adapter->num_rx_desc;
   2312 
   2313 		/* Initialize the RX side lock */
   2314 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2315 
   2316 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2317 		    BUS_DMA_NOWAIT)) {
   2318 			aprint_error_dev(dev,
   2319 			    "Unable to allocate RxDescriptor memory\n");
   2320 			error = ENOMEM;
   2321 			goto err_rx_desc;
   2322 		}
   2323 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2324 		bzero((void *)rxr->rx_base, rsize);
   2325 
   2326 		/* Allocate receive buffers for the ring */
   2327 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2328 			aprint_error_dev(dev,
   2329 			    "Critical Failure setting up receive buffers\n");
   2330 			error = ENOMEM;
   2331 			goto err_rx_desc;
   2332 		}
   2333 	}
   2334 
   2335 	/*
   2336 	 * Finally set up the queue holding structs
   2337 	 */
   2338 	for (int i = 0; i < adapter->num_queues; i++) {
   2339 		que = &adapter->queues[i];
   2340 		que->adapter = adapter;
   2341 		que->me = i;
   2342 		que->txr = &adapter->tx_rings[i];
   2343 		que->rxr = &adapter->rx_rings[i];
   2344 
   2345 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2346 		que->disabled_count = 0;
   2347 	}
   2348 
   2349 	return (0);
   2350 
   2351 err_rx_desc:
   2352 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2353 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2354 err_tx_desc:
   2355 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2356 		ixgbe_dma_free(adapter, &txr->txdma);
   2357 	free(adapter->rx_rings, M_DEVBUF);
   2358 rx_fail:
   2359 	free(adapter->tx_rings, M_DEVBUF);
   2360 tx_fail:
   2361 	free(adapter->queues, M_DEVBUF);
   2362 fail:
   2363 	return (error);
   2364 } /* ixgbe_allocate_queues */
   2365