Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.49
      1 /* $NetBSD: ix_txrx.c,v 1.49 2018/07/31 09:19:34 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include "opt_inet.h"
     67 #include "opt_inet6.h"
     68 
     69 #include "ixgbe.h"
     70 
     71 /*
     72  * HW RSC control:
     73  *  this feature only works with
     74  *  IPv4, and only on 82599 and later.
     75  *  Also this will cause IP forwarding to
     76  *  fail and that can't be controlled by
     77  *  the stack as LRO can. For all these
     78  *  reasons I've deemed it best to leave
     79  *  this off and not bother with a tuneable
     80  *  interface, this would need to be compiled
     81  *  to enable.
     82  */
     83 static bool ixgbe_rsc_enable = FALSE;
     84 
     85 /*
     86  * For Flow Director: this is the
     87  * number of TX packets we sample
     88  * for the filter pool, this means
     89  * every 20th packet will be probed.
     90  *
     91  * This feature can be disabled by
     92  * setting this to 0.
     93  */
     94 static int atr_sample_rate = 20;
     95 
     96 /************************************************************************
     97  *  Local Function prototypes
     98  ************************************************************************/
     99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    104                                        struct ixgbe_hw_stats *);
    105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    109                                         struct mbuf *, u32 *, u32 *);
    110 static int           ixgbe_tso_setup(struct tx_ring *,
    111                                      struct mbuf *, u32 *, u32 *);
    112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    114                                     struct mbuf *, u32);
    115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    116                                       struct ixgbe_dma_alloc *, int);
    117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    118 
    119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    120 
    121 /************************************************************************
    122  * ixgbe_legacy_start_locked - Transmit entry point
    123  *
    124  *   Called by the stack to initiate a transmit.
    125  *   The driver will remain in this routine as long as there are
    126  *   packets to transmit and transmit resources are available.
    127  *   In case resources are not available, the stack is notified
    128  *   and the packet is requeued.
    129  ************************************************************************/
    130 int
    131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    132 {
    133 	int rc;
    134 	struct mbuf    *m_head;
    135 	struct adapter *adapter = txr->adapter;
    136 
    137 	IXGBE_TX_LOCK_ASSERT(txr);
    138 
    139 	if (!adapter->link_active) {
    140 		/*
    141 		 * discard all packets buffered in IFQ to avoid
    142 		 * sending old packets at next link up timing.
    143 		 */
    144 		ixgbe_drain(ifp, txr);
    145 		return (ENETDOWN);
    146 	}
    147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    148 		return (ENETDOWN);
    149 	if (txr->txr_no_space)
    150 		return (ENETDOWN);
    151 
    152 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    153 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    154 			break;
    155 
    156 		IFQ_POLL(&ifp->if_snd, m_head);
    157 		if (m_head == NULL)
    158 			break;
    159 
    160 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    161 			break;
    162 		}
    163 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    164 		if (rc != 0) {
    165 			m_freem(m_head);
    166 			continue;
    167 		}
    168 
    169 		/* Send a copy of the frame to the BPF listener */
    170 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    171 	}
    172 
    173 	return IXGBE_SUCCESS;
    174 } /* ixgbe_legacy_start_locked */
    175 
    176 /************************************************************************
    177  * ixgbe_legacy_start
    178  *
    179  *   Called by the stack, this always uses the first tx ring,
    180  *   and should not be used with multiqueue tx enabled.
    181  ************************************************************************/
    182 void
    183 ixgbe_legacy_start(struct ifnet *ifp)
    184 {
    185 	struct adapter *adapter = ifp->if_softc;
    186 	struct tx_ring *txr = adapter->tx_rings;
    187 
    188 	if (ifp->if_flags & IFF_RUNNING) {
    189 		IXGBE_TX_LOCK(txr);
    190 		ixgbe_legacy_start_locked(ifp, txr);
    191 		IXGBE_TX_UNLOCK(txr);
    192 	}
    193 } /* ixgbe_legacy_start */
    194 
    195 /************************************************************************
    196  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    197  *
    198  *   (if_transmit function)
    199  ************************************************************************/
    200 int
    201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    202 {
    203 	struct adapter	*adapter = ifp->if_softc;
    204 	struct tx_ring	*txr;
    205 	int 		i, err = 0;
    206 #ifdef RSS
    207 	uint32_t bucket_id;
    208 #endif
    209 
    210 	/*
    211 	 * When doing RSS, map it to the same outbound queue
    212 	 * as the incoming flow would be mapped to.
    213 	 *
    214 	 * If everything is setup correctly, it should be the
    215 	 * same bucket that the current CPU we're on is.
    216 	 */
    217 #ifdef RSS
    218 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    219 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    220 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    221 		    &bucket_id) == 0)) {
    222 			i = bucket_id % adapter->num_queues;
    223 #ifdef IXGBE_DEBUG
    224 			if (bucket_id > adapter->num_queues)
    225 				if_printf(ifp,
    226 				    "bucket_id (%d) > num_queues (%d)\n",
    227 				    bucket_id, adapter->num_queues);
    228 #endif
    229 		} else
    230 			i = m->m_pkthdr.flowid % adapter->num_queues;
    231 	} else
    232 #endif /* 0 */
    233 		i = cpu_index(curcpu()) % adapter->num_queues;
    234 
    235 	/* Check for a hung queue and pick alternative */
    236 	if (((1 << i) & adapter->active_queues) == 0)
    237 		i = ffs64(adapter->active_queues);
    238 
    239 	txr = &adapter->tx_rings[i];
    240 
    241 	err = pcq_put(txr->txr_interq, m);
    242 	if (err == false) {
    243 		m_freem(m);
    244 		txr->pcq_drops.ev_count++;
    245 		return (err);
    246 	}
    247 	if (IXGBE_TX_TRYLOCK(txr)) {
    248 		ixgbe_mq_start_locked(ifp, txr);
    249 		IXGBE_TX_UNLOCK(txr);
    250 	} else {
    251 		if (adapter->txrx_use_workqueue) {
    252 			u_int *enqueued;
    253 
    254 			/*
    255 			 * This function itself is not called in interrupt
    256 			 * context, however it can be called in fast softint
    257 			 * context right after receiving forwarding packets.
    258 			 * So, it is required to protect workqueue from twice
    259 			 * enqueuing when the machine uses both spontaneous
    260 			 * packets and forwarding packets.
    261 			 */
    262 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    263 			if (*enqueued == 0) {
    264 				*enqueued = 1;
    265 				percpu_putref(adapter->txr_wq_enqueued);
    266 				workqueue_enqueue(adapter->txr_wq,
    267 				    &txr->wq_cookie, curcpu());
    268 			} else
    269 				percpu_putref(adapter->txr_wq_enqueued);
    270 		} else
    271 			softint_schedule(txr->txr_si);
    272 	}
    273 
    274 	return (0);
    275 } /* ixgbe_mq_start */
    276 
    277 /************************************************************************
    278  * ixgbe_mq_start_locked
    279  ************************************************************************/
    280 int
    281 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    282 {
    283 	struct mbuf    *next;
    284 	int            enqueued = 0, err = 0;
    285 
    286 	if (!txr->adapter->link_active) {
    287 		/*
    288 		 * discard all packets buffered in txr_interq to avoid
    289 		 * sending old packets at next link up timing.
    290 		 */
    291 		ixgbe_drain(ifp, txr);
    292 		return (ENETDOWN);
    293 	}
    294 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    295 		return (ENETDOWN);
    296 	if (txr->txr_no_space)
    297 		return (ENETDOWN);
    298 
    299 	/* Process the queue */
    300 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    301 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    302 			m_freem(next);
    303 			/* All errors are counted in ixgbe_xmit() */
    304 			break;
    305 		}
    306 		enqueued++;
    307 #if __FreeBSD_version >= 1100036
    308 		/*
    309 		 * Since we're looking at the tx ring, we can check
    310 		 * to see if we're a VF by examing our tail register
    311 		 * address.
    312 		 */
    313 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    314 		    (next->m_flags & M_MCAST))
    315 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    316 #endif
    317 		/* Send a copy of the frame to the BPF listener */
    318 		bpf_mtap(ifp, next, BPF_D_OUT);
    319 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    320 			break;
    321 	}
    322 
    323 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    324 		ixgbe_txeof(txr);
    325 
    326 	return (err);
    327 } /* ixgbe_mq_start_locked */
    328 
    329 /************************************************************************
    330  * ixgbe_deferred_mq_start
    331  *
    332  *   Called from a softint and workqueue (indirectly) to drain queued
    333  *   transmit packets.
    334  ************************************************************************/
    335 void
    336 ixgbe_deferred_mq_start(void *arg)
    337 {
    338 	struct tx_ring *txr = arg;
    339 	struct adapter *adapter = txr->adapter;
    340 	struct ifnet   *ifp = adapter->ifp;
    341 
    342 	IXGBE_TX_LOCK(txr);
    343 	if (pcq_peek(txr->txr_interq) != NULL)
    344 		ixgbe_mq_start_locked(ifp, txr);
    345 	IXGBE_TX_UNLOCK(txr);
    346 } /* ixgbe_deferred_mq_start */
    347 
    348 /************************************************************************
    349  * ixgbe_deferred_mq_start_work
    350  *
    351  *   Called from a workqueue to drain queued transmit packets.
    352  ************************************************************************/
    353 void
    354 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    355 {
    356 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    357 	struct adapter *adapter = txr->adapter;
    358 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    359 	*enqueued = 0;
    360 	percpu_putref(adapter->txr_wq_enqueued);
    361 
    362 	ixgbe_deferred_mq_start(txr);
    363 } /* ixgbe_deferred_mq_start */
    364 
    365 /************************************************************************
    366  * ixgbe_drain_all
    367  ************************************************************************/
    368 void
    369 ixgbe_drain_all(struct adapter *adapter)
    370 {
    371 	struct ifnet *ifp = adapter->ifp;
    372 	struct ix_queue *que = adapter->queues;
    373 
    374 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    375 		struct tx_ring  *txr = que->txr;
    376 
    377 		IXGBE_TX_LOCK(txr);
    378 		ixgbe_drain(ifp, txr);
    379 		IXGBE_TX_UNLOCK(txr);
    380 	}
    381 }
    382 
    383 /************************************************************************
    384  * ixgbe_xmit
    385  *
    386  *   Maps the mbufs to tx descriptors, allowing the
    387  *   TX engine to transmit the packets.
    388  *
    389  *   Return 0 on success, positive on failure
    390  ************************************************************************/
    391 static int
    392 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    393 {
    394 	struct adapter          *adapter = txr->adapter;
    395 	struct ixgbe_tx_buf     *txbuf;
    396 	union ixgbe_adv_tx_desc *txd = NULL;
    397 	struct ifnet	        *ifp = adapter->ifp;
    398 	int                     i, j, error;
    399 	int                     first;
    400 	u32                     olinfo_status = 0, cmd_type_len;
    401 	bool                    remap = TRUE;
    402 	bus_dmamap_t            map;
    403 
    404 	/* Basic descriptor defines */
    405 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    406 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    407 
    408 	if (vlan_has_tag(m_head))
    409 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    410 
    411 	/*
    412 	 * Important to capture the first descriptor
    413 	 * used because it will contain the index of
    414 	 * the one we tell the hardware to report back
    415 	 */
    416 	first = txr->next_avail_desc;
    417 	txbuf = &txr->tx_buffers[first];
    418 	map = txbuf->map;
    419 
    420 	/*
    421 	 * Map the packet for DMA.
    422 	 */
    423 retry:
    424 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    425 	    BUS_DMA_NOWAIT);
    426 
    427 	if (__predict_false(error)) {
    428 		struct mbuf *m;
    429 
    430 		switch (error) {
    431 		case EAGAIN:
    432 			txr->q_eagain_tx_dma_setup++;
    433 			return EAGAIN;
    434 		case ENOMEM:
    435 			txr->q_enomem_tx_dma_setup++;
    436 			return EAGAIN;
    437 		case EFBIG:
    438 			/* Try it again? - one try */
    439 			if (remap == TRUE) {
    440 				remap = FALSE;
    441 				/*
    442 				 * XXX: m_defrag will choke on
    443 				 * non-MCLBYTES-sized clusters
    444 				 */
    445 				txr->q_efbig_tx_dma_setup++;
    446 				m = m_defrag(m_head, M_NOWAIT);
    447 				if (m == NULL) {
    448 					txr->q_mbuf_defrag_failed++;
    449 					return ENOBUFS;
    450 				}
    451 				m_head = m;
    452 				goto retry;
    453 			} else {
    454 				txr->q_efbig2_tx_dma_setup++;
    455 				return error;
    456 			}
    457 		case EINVAL:
    458 			txr->q_einval_tx_dma_setup++;
    459 			return error;
    460 		default:
    461 			txr->q_other_tx_dma_setup++;
    462 			return error;
    463 		}
    464 	}
    465 
    466 	/* Make certain there are enough descriptors */
    467 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    468 		txr->txr_no_space = true;
    469 		txr->no_desc_avail.ev_count++;
    470 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    471 		return EAGAIN;
    472 	}
    473 
    474 	/*
    475 	 * Set up the appropriate offload context
    476 	 * this will consume the first descriptor
    477 	 */
    478 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    479 	if (__predict_false(error)) {
    480 		return (error);
    481 	}
    482 
    483 	/* Do the flow director magic */
    484 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    485 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    486 		++txr->atr_count;
    487 		if (txr->atr_count >= atr_sample_rate) {
    488 			ixgbe_atr(txr, m_head);
    489 			txr->atr_count = 0;
    490 		}
    491 	}
    492 
    493 	olinfo_status |= IXGBE_ADVTXD_CC;
    494 	i = txr->next_avail_desc;
    495 	for (j = 0; j < map->dm_nsegs; j++) {
    496 		bus_size_t seglen;
    497 		bus_addr_t segaddr;
    498 
    499 		txbuf = &txr->tx_buffers[i];
    500 		txd = &txr->tx_base[i];
    501 		seglen = map->dm_segs[j].ds_len;
    502 		segaddr = htole64(map->dm_segs[j].ds_addr);
    503 
    504 		txd->read.buffer_addr = segaddr;
    505 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    506 		txd->read.olinfo_status = htole32(olinfo_status);
    507 
    508 		if (++i == txr->num_desc)
    509 			i = 0;
    510 	}
    511 
    512 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    513 	txr->tx_avail -= map->dm_nsegs;
    514 	txr->next_avail_desc = i;
    515 
    516 	txbuf->m_head = m_head;
    517 	/*
    518 	 * Here we swap the map so the last descriptor,
    519 	 * which gets the completion interrupt has the
    520 	 * real map, and the first descriptor gets the
    521 	 * unused map from this descriptor.
    522 	 */
    523 	txr->tx_buffers[first].map = txbuf->map;
    524 	txbuf->map = map;
    525 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    526 	    BUS_DMASYNC_PREWRITE);
    527 
    528 	/* Set the EOP descriptor that will be marked done */
    529 	txbuf = &txr->tx_buffers[first];
    530 	txbuf->eop = txd;
    531 
    532 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    533 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    534 	/*
    535 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    536 	 * hardware that this frame is available to transmit.
    537 	 */
    538 	++txr->total_packets.ev_count;
    539 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    540 
    541 	/*
    542 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
    543 	 */
    544 	ifp->if_obytes += m_head->m_pkthdr.len;
    545 	if (m_head->m_flags & M_MCAST)
    546 		ifp->if_omcasts++;
    547 
    548 	/* Mark queue as having work */
    549 	if (txr->busy == 0)
    550 		txr->busy = 1;
    551 
    552 	return (0);
    553 } /* ixgbe_xmit */
    554 
    555 /************************************************************************
    556  * ixgbe_drain
    557  ************************************************************************/
    558 static void
    559 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    560 {
    561 	struct mbuf *m;
    562 
    563 	IXGBE_TX_LOCK_ASSERT(txr);
    564 
    565 	if (txr->me == 0) {
    566 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    567 			IFQ_DEQUEUE(&ifp->if_snd, m);
    568 			m_freem(m);
    569 			IF_DROP(&ifp->if_snd);
    570 		}
    571 	}
    572 
    573 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    574 		m_freem(m);
    575 		txr->pcq_drops.ev_count++;
    576 	}
    577 }
    578 
    579 /************************************************************************
    580  * ixgbe_allocate_transmit_buffers
    581  *
    582  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    583  *   the information needed to transmit a packet on the wire. This is
    584  *   called only once at attach, setup is done every reset.
    585  ************************************************************************/
    586 static int
    587 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    588 {
    589 	struct adapter      *adapter = txr->adapter;
    590 	device_t            dev = adapter->dev;
    591 	struct ixgbe_tx_buf *txbuf;
    592 	int                 error, i;
    593 
    594 	/*
    595 	 * Setup DMA descriptor areas.
    596 	 */
    597 	error = ixgbe_dma_tag_create(
    598 	         /*      parent */ adapter->osdep.dmat,
    599 	         /*   alignment */ 1,
    600 	         /*      bounds */ 0,
    601 	         /*     maxsize */ IXGBE_TSO_SIZE,
    602 	         /*   nsegments */ adapter->num_segs,
    603 	         /*  maxsegsize */ PAGE_SIZE,
    604 	         /*       flags */ 0,
    605 	                           &txr->txtag);
    606 	if (error != 0) {
    607 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    608 		goto fail;
    609 	}
    610 
    611 	txr->tx_buffers =
    612 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    613 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
    614 	if (txr->tx_buffers == NULL) {
    615 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    616 		error = ENOMEM;
    617 		goto fail;
    618 	}
    619 
    620 	/* Create the descriptor buffer dma maps */
    621 	txbuf = txr->tx_buffers;
    622 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    623 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    624 		if (error != 0) {
    625 			aprint_error_dev(dev,
    626 			    "Unable to create TX DMA map (%d)\n", error);
    627 			goto fail;
    628 		}
    629 	}
    630 
    631 	return 0;
    632 fail:
    633 	/* We free all, it handles case where we are in the middle */
    634 #if 0 /* XXX was FreeBSD */
    635 	ixgbe_free_transmit_structures(adapter);
    636 #else
    637 	ixgbe_free_transmit_buffers(txr);
    638 #endif
    639 	return (error);
    640 } /* ixgbe_allocate_transmit_buffers */
    641 
    642 /************************************************************************
    643  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    644  ************************************************************************/
    645 static void
    646 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    647 {
    648 	struct adapter        *adapter = txr->adapter;
    649 	struct ixgbe_tx_buf   *txbuf;
    650 #ifdef DEV_NETMAP
    651 	struct netmap_adapter *na = NA(adapter->ifp);
    652 	struct netmap_slot    *slot;
    653 #endif /* DEV_NETMAP */
    654 
    655 	/* Clear the old ring contents */
    656 	IXGBE_TX_LOCK(txr);
    657 
    658 #ifdef DEV_NETMAP
    659 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    660 		/*
    661 		 * (under lock): if in netmap mode, do some consistency
    662 		 * checks and set slot to entry 0 of the netmap ring.
    663 		 */
    664 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    665 	}
    666 #endif /* DEV_NETMAP */
    667 
    668 	bzero((void *)txr->tx_base,
    669 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    670 	/* Reset indices */
    671 	txr->next_avail_desc = 0;
    672 	txr->next_to_clean = 0;
    673 
    674 	/* Free any existing tx buffers. */
    675 	txbuf = txr->tx_buffers;
    676 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    677 		if (txbuf->m_head != NULL) {
    678 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    679 			    0, txbuf->m_head->m_pkthdr.len,
    680 			    BUS_DMASYNC_POSTWRITE);
    681 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    682 			m_freem(txbuf->m_head);
    683 			txbuf->m_head = NULL;
    684 		}
    685 
    686 #ifdef DEV_NETMAP
    687 		/*
    688 		 * In netmap mode, set the map for the packet buffer.
    689 		 * NOTE: Some drivers (not this one) also need to set
    690 		 * the physical buffer address in the NIC ring.
    691 		 * Slots in the netmap ring (indexed by "si") are
    692 		 * kring->nkr_hwofs positions "ahead" wrt the
    693 		 * corresponding slot in the NIC ring. In some drivers
    694 		 * (not here) nkr_hwofs can be negative. Function
    695 		 * netmap_idx_n2k() handles wraparounds properly.
    696 		 */
    697 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    698 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    699 			netmap_load_map(na, txr->txtag,
    700 			    txbuf->map, NMB(na, slot + si));
    701 		}
    702 #endif /* DEV_NETMAP */
    703 
    704 		/* Clear the EOP descriptor pointer */
    705 		txbuf->eop = NULL;
    706 	}
    707 
    708 	/* Set the rate at which we sample packets */
    709 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    710 		txr->atr_sample = atr_sample_rate;
    711 
    712 	/* Set number of descriptors available */
    713 	txr->tx_avail = adapter->num_tx_desc;
    714 
    715 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    716 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    717 	IXGBE_TX_UNLOCK(txr);
    718 } /* ixgbe_setup_transmit_ring */
    719 
    720 /************************************************************************
    721  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    722  ************************************************************************/
    723 int
    724 ixgbe_setup_transmit_structures(struct adapter *adapter)
    725 {
    726 	struct tx_ring *txr = adapter->tx_rings;
    727 
    728 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    729 		ixgbe_setup_transmit_ring(txr);
    730 
    731 	return (0);
    732 } /* ixgbe_setup_transmit_structures */
    733 
    734 /************************************************************************
    735  * ixgbe_free_transmit_structures - Free all transmit rings.
    736  ************************************************************************/
    737 void
    738 ixgbe_free_transmit_structures(struct adapter *adapter)
    739 {
    740 	struct tx_ring *txr = adapter->tx_rings;
    741 
    742 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    743 		ixgbe_free_transmit_buffers(txr);
    744 		ixgbe_dma_free(adapter, &txr->txdma);
    745 		IXGBE_TX_LOCK_DESTROY(txr);
    746 	}
    747 	free(adapter->tx_rings, M_DEVBUF);
    748 } /* ixgbe_free_transmit_structures */
    749 
    750 /************************************************************************
    751  * ixgbe_free_transmit_buffers
    752  *
    753  *   Free transmit ring related data structures.
    754  ************************************************************************/
    755 static void
    756 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    757 {
    758 	struct adapter      *adapter = txr->adapter;
    759 	struct ixgbe_tx_buf *tx_buffer;
    760 	int                 i;
    761 
    762 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    763 
    764 	if (txr->tx_buffers == NULL)
    765 		return;
    766 
    767 	tx_buffer = txr->tx_buffers;
    768 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    769 		if (tx_buffer->m_head != NULL) {
    770 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    771 			    0, tx_buffer->m_head->m_pkthdr.len,
    772 			    BUS_DMASYNC_POSTWRITE);
    773 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    774 			m_freem(tx_buffer->m_head);
    775 			tx_buffer->m_head = NULL;
    776 			if (tx_buffer->map != NULL) {
    777 				ixgbe_dmamap_destroy(txr->txtag,
    778 				    tx_buffer->map);
    779 				tx_buffer->map = NULL;
    780 			}
    781 		} else if (tx_buffer->map != NULL) {
    782 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    783 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    784 			tx_buffer->map = NULL;
    785 		}
    786 	}
    787 	if (txr->txr_interq != NULL) {
    788 		struct mbuf *m;
    789 
    790 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    791 			m_freem(m);
    792 		pcq_destroy(txr->txr_interq);
    793 	}
    794 	if (txr->tx_buffers != NULL) {
    795 		free(txr->tx_buffers, M_DEVBUF);
    796 		txr->tx_buffers = NULL;
    797 	}
    798 	if (txr->txtag != NULL) {
    799 		ixgbe_dma_tag_destroy(txr->txtag);
    800 		txr->txtag = NULL;
    801 	}
    802 } /* ixgbe_free_transmit_buffers */
    803 
    804 /************************************************************************
    805  * ixgbe_tx_ctx_setup
    806  *
    807  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    808  ************************************************************************/
    809 static int
    810 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    811     u32 *cmd_type_len, u32 *olinfo_status)
    812 {
    813 	struct adapter                   *adapter = txr->adapter;
    814 	struct ixgbe_adv_tx_context_desc *TXD;
    815 	struct ether_vlan_header         *eh;
    816 #ifdef INET
    817 	struct ip                        *ip;
    818 #endif
    819 #ifdef INET6
    820 	struct ip6_hdr                   *ip6;
    821 #endif
    822 	int                              ehdrlen, ip_hlen = 0;
    823 	int                              offload = TRUE;
    824 	int                              ctxd = txr->next_avail_desc;
    825 	u32                              vlan_macip_lens = 0;
    826 	u32                              type_tucmd_mlhl = 0;
    827 	u16                              vtag = 0;
    828 	u16                              etype;
    829 	u8                               ipproto = 0;
    830 	char                             *l3d;
    831 
    832 
    833 	/* First check if TSO is to be used */
    834 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    835 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    836 
    837 		if (rv != 0)
    838 			++adapter->tso_err.ev_count;
    839 		return rv;
    840 	}
    841 
    842 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    843 		offload = FALSE;
    844 
    845 	/* Indicate the whole packet as payload when not doing TSO */
    846 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    847 
    848 	/* Now ready a context descriptor */
    849 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    850 
    851 	/*
    852 	 * In advanced descriptors the vlan tag must
    853 	 * be placed into the context descriptor. Hence
    854 	 * we need to make one even if not doing offloads.
    855 	 */
    856 	if (vlan_has_tag(mp)) {
    857 		vtag = htole16(vlan_get_tag(mp));
    858 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    859 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    860 	           (offload == FALSE))
    861 		return (0);
    862 
    863 	/*
    864 	 * Determine where frame payload starts.
    865 	 * Jump over vlan headers if already present,
    866 	 * helpful for QinQ too.
    867 	 */
    868 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    869 	eh = mtod(mp, struct ether_vlan_header *);
    870 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    871 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    872 		etype = ntohs(eh->evl_proto);
    873 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    874 	} else {
    875 		etype = ntohs(eh->evl_encap_proto);
    876 		ehdrlen = ETHER_HDR_LEN;
    877 	}
    878 
    879 	/* Set the ether header length */
    880 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    881 
    882 	if (offload == FALSE)
    883 		goto no_offloads;
    884 
    885 	/*
    886 	 * If the first mbuf only includes the ethernet header,
    887 	 * jump to the next one
    888 	 * XXX: This assumes the stack splits mbufs containing headers
    889 	 *      on header boundaries
    890 	 * XXX: And assumes the entire IP header is contained in one mbuf
    891 	 */
    892 	if (mp->m_len == ehdrlen && mp->m_next)
    893 		l3d = mtod(mp->m_next, char *);
    894 	else
    895 		l3d = mtod(mp, char *) + ehdrlen;
    896 
    897 	switch (etype) {
    898 #ifdef INET
    899 	case ETHERTYPE_IP:
    900 		ip = (struct ip *)(l3d);
    901 		ip_hlen = ip->ip_hl << 2;
    902 		ipproto = ip->ip_p;
    903 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    904 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    905 		    ip->ip_sum == 0);
    906 		break;
    907 #endif
    908 #ifdef INET6
    909 	case ETHERTYPE_IPV6:
    910 		ip6 = (struct ip6_hdr *)(l3d);
    911 		ip_hlen = sizeof(struct ip6_hdr);
    912 		ipproto = ip6->ip6_nxt;
    913 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    914 		break;
    915 #endif
    916 	default:
    917 		offload = false;
    918 		break;
    919 	}
    920 
    921 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    922 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    923 
    924 	vlan_macip_lens |= ip_hlen;
    925 
    926 	/* No support for offloads for non-L4 next headers */
    927  	switch (ipproto) {
    928 	case IPPROTO_TCP:
    929 		if (mp->m_pkthdr.csum_flags &
    930 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    931 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    932 		else
    933 			offload = false;
    934 		break;
    935 	case IPPROTO_UDP:
    936 		if (mp->m_pkthdr.csum_flags &
    937 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    938 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    939 		else
    940 			offload = false;
    941 		break;
    942 	default:
    943 		offload = false;
    944 		break;
    945 	}
    946 
    947 	if (offload) /* Insert L4 checksum into data descriptors */
    948 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    949 
    950 no_offloads:
    951 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    952 
    953 	/* Now copy bits into descriptor */
    954 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    955 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    956 	TXD->seqnum_seed = htole32(0);
    957 	TXD->mss_l4len_idx = htole32(0);
    958 
    959 	/* We've consumed the first desc, adjust counters */
    960 	if (++ctxd == txr->num_desc)
    961 		ctxd = 0;
    962 	txr->next_avail_desc = ctxd;
    963 	--txr->tx_avail;
    964 
    965 	return (0);
    966 } /* ixgbe_tx_ctx_setup */
    967 
    968 /************************************************************************
    969  * ixgbe_tso_setup
    970  *
    971  *   Setup work for hardware segmentation offload (TSO) on
    972  *   adapters using advanced tx descriptors
    973  ************************************************************************/
    974 static int
    975 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    976     u32 *olinfo_status)
    977 {
    978 	struct ixgbe_adv_tx_context_desc *TXD;
    979 	struct ether_vlan_header         *eh;
    980 #ifdef INET6
    981 	struct ip6_hdr                   *ip6;
    982 #endif
    983 #ifdef INET
    984 	struct ip                        *ip;
    985 #endif
    986 	struct tcphdr                    *th;
    987 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    988 	u32                              vlan_macip_lens = 0;
    989 	u32                              type_tucmd_mlhl = 0;
    990 	u32                              mss_l4len_idx = 0, paylen;
    991 	u16                              vtag = 0, eh_type;
    992 
    993 	/*
    994 	 * Determine where frame payload starts.
    995 	 * Jump over vlan headers if already present
    996 	 */
    997 	eh = mtod(mp, struct ether_vlan_header *);
    998 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    999 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   1000 		eh_type = eh->evl_proto;
   1001 	} else {
   1002 		ehdrlen = ETHER_HDR_LEN;
   1003 		eh_type = eh->evl_encap_proto;
   1004 	}
   1005 
   1006 	switch (ntohs(eh_type)) {
   1007 #ifdef INET
   1008 	case ETHERTYPE_IP:
   1009 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1010 		if (ip->ip_p != IPPROTO_TCP)
   1011 			return (ENXIO);
   1012 		ip->ip_sum = 0;
   1013 		ip_hlen = ip->ip_hl << 2;
   1014 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1015 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1016 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1017 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1018 		/* Tell transmit desc to also do IPv4 checksum. */
   1019 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1020 		break;
   1021 #endif
   1022 #ifdef INET6
   1023 	case ETHERTYPE_IPV6:
   1024 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1025 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1026 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1027 			return (ENXIO);
   1028 		ip_hlen = sizeof(struct ip6_hdr);
   1029 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1030 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1031 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1032 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1033 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1034 		break;
   1035 #endif
   1036 	default:
   1037 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1038 		    __func__, ntohs(eh_type));
   1039 		break;
   1040 	}
   1041 
   1042 	ctxd = txr->next_avail_desc;
   1043 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1044 
   1045 	tcp_hlen = th->th_off << 2;
   1046 
   1047 	/* This is used in the transmit desc in encap */
   1048 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1049 
   1050 	/* VLAN MACLEN IPLEN */
   1051 	if (vlan_has_tag(mp)) {
   1052 		vtag = htole16(vlan_get_tag(mp));
   1053 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1054 	}
   1055 
   1056 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1057 	vlan_macip_lens |= ip_hlen;
   1058 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1059 
   1060 	/* ADV DTYPE TUCMD */
   1061 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1062 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1063 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1064 
   1065 	/* MSS L4LEN IDX */
   1066 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1067 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1068 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1069 
   1070 	TXD->seqnum_seed = htole32(0);
   1071 
   1072 	if (++ctxd == txr->num_desc)
   1073 		ctxd = 0;
   1074 
   1075 	txr->tx_avail--;
   1076 	txr->next_avail_desc = ctxd;
   1077 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1078 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1079 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1080 	++txr->tso_tx.ev_count;
   1081 
   1082 	return (0);
   1083 } /* ixgbe_tso_setup */
   1084 
   1085 
   1086 /************************************************************************
   1087  * ixgbe_txeof
   1088  *
   1089  *   Examine each tx_buffer in the used queue. If the hardware is done
   1090  *   processing the packet then free associated resources. The
   1091  *   tx_buffer is put back on the free queue.
   1092  ************************************************************************/
   1093 bool
   1094 ixgbe_txeof(struct tx_ring *txr)
   1095 {
   1096 	struct adapter		*adapter = txr->adapter;
   1097 	struct ifnet		*ifp = adapter->ifp;
   1098 	struct ixgbe_tx_buf	*buf;
   1099 	union ixgbe_adv_tx_desc *txd;
   1100 	u32			work, processed = 0;
   1101 	u32			limit = adapter->tx_process_limit;
   1102 
   1103 	KASSERT(mutex_owned(&txr->tx_mtx));
   1104 
   1105 #ifdef DEV_NETMAP
   1106 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1107 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1108 		struct netmap_adapter *na = NA(adapter->ifp);
   1109 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1110 		txd = txr->tx_base;
   1111 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1112 		    BUS_DMASYNC_POSTREAD);
   1113 		/*
   1114 		 * In netmap mode, all the work is done in the context
   1115 		 * of the client thread. Interrupt handlers only wake up
   1116 		 * clients, which may be sleeping on individual rings
   1117 		 * or on a global resource for all rings.
   1118 		 * To implement tx interrupt mitigation, we wake up the client
   1119 		 * thread roughly every half ring, even if the NIC interrupts
   1120 		 * more frequently. This is implemented as follows:
   1121 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1122 		 *   the slot that should wake up the thread (nkr_num_slots
   1123 		 *   means the user thread should not be woken up);
   1124 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1125 		 *   or the slot has the DD bit set.
   1126 		 */
   1127 		if (!netmap_mitigate ||
   1128 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1129 		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1130 			netmap_tx_irq(ifp, txr->me);
   1131 		}
   1132 		return false;
   1133 	}
   1134 #endif /* DEV_NETMAP */
   1135 
   1136 	if (txr->tx_avail == txr->num_desc) {
   1137 		txr->busy = 0;
   1138 		return false;
   1139 	}
   1140 
   1141 	/* Get work starting point */
   1142 	work = txr->next_to_clean;
   1143 	buf = &txr->tx_buffers[work];
   1144 	txd = &txr->tx_base[work];
   1145 	work -= txr->num_desc; /* The distance to ring end */
   1146 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1147 	    BUS_DMASYNC_POSTREAD);
   1148 
   1149 	do {
   1150 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1151 		if (eop == NULL) /* No work */
   1152 			break;
   1153 
   1154 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1155 			break;	/* I/O not complete */
   1156 
   1157 		if (buf->m_head) {
   1158 			txr->bytes += buf->m_head->m_pkthdr.len;
   1159 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1160 			    0, buf->m_head->m_pkthdr.len,
   1161 			    BUS_DMASYNC_POSTWRITE);
   1162 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1163 			m_freem(buf->m_head);
   1164 			buf->m_head = NULL;
   1165 		}
   1166 		buf->eop = NULL;
   1167 		txr->txr_no_space = false;
   1168 		++txr->tx_avail;
   1169 
   1170 		/* We clean the range if multi segment */
   1171 		while (txd != eop) {
   1172 			++txd;
   1173 			++buf;
   1174 			++work;
   1175 			/* wrap the ring? */
   1176 			if (__predict_false(!work)) {
   1177 				work -= txr->num_desc;
   1178 				buf = txr->tx_buffers;
   1179 				txd = txr->tx_base;
   1180 			}
   1181 			if (buf->m_head) {
   1182 				txr->bytes +=
   1183 				    buf->m_head->m_pkthdr.len;
   1184 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1185 				    buf->map,
   1186 				    0, buf->m_head->m_pkthdr.len,
   1187 				    BUS_DMASYNC_POSTWRITE);
   1188 				ixgbe_dmamap_unload(txr->txtag,
   1189 				    buf->map);
   1190 				m_freem(buf->m_head);
   1191 				buf->m_head = NULL;
   1192 			}
   1193 			++txr->tx_avail;
   1194 			buf->eop = NULL;
   1195 
   1196 		}
   1197 		++txr->packets;
   1198 		++processed;
   1199 		++ifp->if_opackets;
   1200 
   1201 		/* Try the next packet */
   1202 		++txd;
   1203 		++buf;
   1204 		++work;
   1205 		/* reset with a wrap */
   1206 		if (__predict_false(!work)) {
   1207 			work -= txr->num_desc;
   1208 			buf = txr->tx_buffers;
   1209 			txd = txr->tx_base;
   1210 		}
   1211 		prefetch(txd);
   1212 	} while (__predict_true(--limit));
   1213 
   1214 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1215 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1216 
   1217 	work += txr->num_desc;
   1218 	txr->next_to_clean = work;
   1219 
   1220 	/*
   1221 	 * Queue Hang detection, we know there's
   1222 	 * work outstanding or the first return
   1223 	 * would have been taken, so increment busy
   1224 	 * if nothing managed to get cleaned, then
   1225 	 * in local_timer it will be checked and
   1226 	 * marked as HUNG if it exceeds a MAX attempt.
   1227 	 */
   1228 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1229 		++txr->busy;
   1230 	/*
   1231 	 * If anything gets cleaned we reset state to 1,
   1232 	 * note this will turn off HUNG if its set.
   1233 	 */
   1234 	if (processed)
   1235 		txr->busy = 1;
   1236 
   1237 	if (txr->tx_avail == txr->num_desc)
   1238 		txr->busy = 0;
   1239 
   1240 	return ((limit > 0) ? false : true);
   1241 } /* ixgbe_txeof */
   1242 
   1243 /************************************************************************
   1244  * ixgbe_rsc_count
   1245  *
   1246  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1247  ************************************************************************/
   1248 static inline u32
   1249 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1250 {
   1251 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1252 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1253 } /* ixgbe_rsc_count */
   1254 
   1255 /************************************************************************
   1256  * ixgbe_setup_hw_rsc
   1257  *
   1258  *   Initialize Hardware RSC (LRO) feature on 82599
   1259  *   for an RX ring, this is toggled by the LRO capability
   1260  *   even though it is transparent to the stack.
   1261  *
   1262  *   NOTE: Since this HW feature only works with IPv4 and
   1263  *         testing has shown soft LRO to be as effective,
   1264  *         this feature will be disabled by default.
   1265  ************************************************************************/
   1266 static void
   1267 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1268 {
   1269 	struct	adapter  *adapter = rxr->adapter;
   1270 	struct	ixgbe_hw *hw = &adapter->hw;
   1271 	u32              rscctrl, rdrxctl;
   1272 
   1273 	/* If turning LRO/RSC off we need to disable it */
   1274 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1275 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1276 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1277 		return;
   1278 	}
   1279 
   1280 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1281 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1282 #ifdef DEV_NETMAP
   1283 	/* Always strip CRC unless Netmap disabled it */
   1284 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1285 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1286 	    ix_crcstrip)
   1287 #endif /* DEV_NETMAP */
   1288 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1289 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1290 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1291 
   1292 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1293 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1294 	/*
   1295 	 * Limit the total number of descriptors that
   1296 	 * can be combined, so it does not exceed 64K
   1297 	 */
   1298 	if (rxr->mbuf_sz == MCLBYTES)
   1299 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1300 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1301 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1302 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1303 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1304 	else  /* Using 16K cluster */
   1305 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1306 
   1307 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1308 
   1309 	/* Enable TCP header recognition */
   1310 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1311 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1312 
   1313 	/* Disable RSC for ACK packets */
   1314 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1315 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1316 
   1317 	rxr->hw_rsc = TRUE;
   1318 } /* ixgbe_setup_hw_rsc */
   1319 
   1320 /************************************************************************
   1321  * ixgbe_refresh_mbufs
   1322  *
   1323  *   Refresh mbuf buffers for RX descriptor rings
   1324  *    - now keeps its own state so discards due to resource
   1325  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1326  *      it just returns, keeping its placeholder, thus it can simply
   1327  *      be recalled to try again.
   1328  ************************************************************************/
   1329 static void
   1330 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1331 {
   1332 	struct adapter      *adapter = rxr->adapter;
   1333 	struct ixgbe_rx_buf *rxbuf;
   1334 	struct mbuf         *mp;
   1335 	int                 i, j, error;
   1336 	bool                refreshed = false;
   1337 
   1338 	i = j = rxr->next_to_refresh;
   1339 	/* Control the loop with one beyond */
   1340 	if (++j == rxr->num_desc)
   1341 		j = 0;
   1342 
   1343 	while (j != limit) {
   1344 		rxbuf = &rxr->rx_buffers[i];
   1345 		if (rxbuf->buf == NULL) {
   1346 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1347 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1348 			if (mp == NULL) {
   1349 				rxr->no_jmbuf.ev_count++;
   1350 				goto update;
   1351 			}
   1352 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1353 				m_adj(mp, ETHER_ALIGN);
   1354 		} else
   1355 			mp = rxbuf->buf;
   1356 
   1357 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1358 
   1359 		/* If we're dealing with an mbuf that was copied rather
   1360 		 * than replaced, there's no need to go through busdma.
   1361 		 */
   1362 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1363 			/* Get the memory mapping */
   1364 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1365 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1366 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1367 			if (error != 0) {
   1368 				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
   1369 				m_free(mp);
   1370 				rxbuf->buf = NULL;
   1371 				goto update;
   1372 			}
   1373 			rxbuf->buf = mp;
   1374 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1375 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1376 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1377 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1378 		} else {
   1379 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1380 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1381 		}
   1382 
   1383 		refreshed = true;
   1384 		/* Next is precalculated */
   1385 		i = j;
   1386 		rxr->next_to_refresh = i;
   1387 		if (++j == rxr->num_desc)
   1388 			j = 0;
   1389 	}
   1390 
   1391 update:
   1392 	if (refreshed) /* Update hardware tail index */
   1393 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1394 
   1395 	return;
   1396 } /* ixgbe_refresh_mbufs */
   1397 
   1398 /************************************************************************
   1399  * ixgbe_allocate_receive_buffers
   1400  *
   1401  *   Allocate memory for rx_buffer structures. Since we use one
   1402  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1403  *   that we'll need is equal to the number of receive descriptors
   1404  *   that we've allocated.
   1405  ************************************************************************/
   1406 static int
   1407 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1408 {
   1409 	struct	adapter     *adapter = rxr->adapter;
   1410 	device_t            dev = adapter->dev;
   1411 	struct ixgbe_rx_buf *rxbuf;
   1412 	int                 bsize, error;
   1413 
   1414 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1415 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
   1416 	    M_NOWAIT | M_ZERO);
   1417 	if (rxr->rx_buffers == NULL) {
   1418 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1419 		error = ENOMEM;
   1420 		goto fail;
   1421 	}
   1422 
   1423 	error = ixgbe_dma_tag_create(
   1424 	         /*      parent */ adapter->osdep.dmat,
   1425 	         /*   alignment */ 1,
   1426 	         /*      bounds */ 0,
   1427 	         /*     maxsize */ MJUM16BYTES,
   1428 	         /*   nsegments */ 1,
   1429 	         /*  maxsegsize */ MJUM16BYTES,
   1430 	         /*       flags */ 0,
   1431 	                           &rxr->ptag);
   1432 	if (error != 0) {
   1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1434 		goto fail;
   1435 	}
   1436 
   1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1438 		rxbuf = &rxr->rx_buffers[i];
   1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1440 		if (error) {
   1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1442 			goto fail;
   1443 		}
   1444 	}
   1445 
   1446 	return (0);
   1447 
   1448 fail:
   1449 	/* Frees all, but can handle partial completion */
   1450 	ixgbe_free_receive_structures(adapter);
   1451 
   1452 	return (error);
   1453 } /* ixgbe_allocate_receive_buffers */
   1454 
   1455 /************************************************************************
   1456  * ixgbe_free_receive_ring
   1457  ************************************************************************/
   1458 static void
   1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1460 {
   1461 	for (int i = 0; i < rxr->num_desc; i++) {
   1462 		ixgbe_rx_discard(rxr, i);
   1463 	}
   1464 } /* ixgbe_free_receive_ring */
   1465 
   1466 /************************************************************************
   1467  * ixgbe_setup_receive_ring
   1468  *
   1469  *   Initialize a receive ring and its buffers.
   1470  ************************************************************************/
   1471 static int
   1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1473 {
   1474 	struct adapter        *adapter;
   1475 	struct ixgbe_rx_buf   *rxbuf;
   1476 #ifdef LRO
   1477 	struct ifnet          *ifp;
   1478 	struct lro_ctrl       *lro = &rxr->lro;
   1479 #endif /* LRO */
   1480 #ifdef DEV_NETMAP
   1481 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1482 	struct netmap_slot    *slot;
   1483 #endif /* DEV_NETMAP */
   1484 	int                   rsize, error = 0;
   1485 
   1486 	adapter = rxr->adapter;
   1487 #ifdef LRO
   1488 	ifp = adapter->ifp;
   1489 #endif /* LRO */
   1490 
   1491 	/* Clear the ring contents */
   1492 	IXGBE_RX_LOCK(rxr);
   1493 
   1494 #ifdef DEV_NETMAP
   1495 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1497 #endif /* DEV_NETMAP */
   1498 
   1499 	rsize = roundup2(adapter->num_rx_desc *
   1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1501 	bzero((void *)rxr->rx_base, rsize);
   1502 	/* Cache the size */
   1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1504 
   1505 	/* Free current RX buffer structs and their mbufs */
   1506 	ixgbe_free_receive_ring(rxr);
   1507 
   1508 	IXGBE_RX_UNLOCK(rxr);
   1509 	/*
   1510 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1511 	 * or size of jumbo mbufs may have changed.
   1512 	 * Assume all of rxr->ptag are the same.
   1513 	 */
   1514 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
   1515 	    (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
   1516 
   1517 	IXGBE_RX_LOCK(rxr);
   1518 
   1519 	/* Now replenish the mbufs */
   1520 	for (int j = 0; j != rxr->num_desc; ++j) {
   1521 		struct mbuf *mp;
   1522 
   1523 		rxbuf = &rxr->rx_buffers[j];
   1524 
   1525 #ifdef DEV_NETMAP
   1526 		/*
   1527 		 * In netmap mode, fill the map and set the buffer
   1528 		 * address in the NIC ring, considering the offset
   1529 		 * between the netmap and NIC rings (see comment in
   1530 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1531 		 * an mbuf, so end the block with a continue;
   1532 		 */
   1533 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1534 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1535 			uint64_t paddr;
   1536 			void *addr;
   1537 
   1538 			addr = PNMB(na, slot + sj, &paddr);
   1539 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1540 			/* Update descriptor and the cached value */
   1541 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1542 			rxbuf->addr = htole64(paddr);
   1543 			continue;
   1544 		}
   1545 #endif /* DEV_NETMAP */
   1546 
   1547 		rxbuf->flags = 0;
   1548 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1549 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1550 		if (rxbuf->buf == NULL) {
   1551 			error = ENOBUFS;
   1552 			goto fail;
   1553 		}
   1554 		mp = rxbuf->buf;
   1555 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1556 		/* Get the memory mapping */
   1557 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1558 		    mp, BUS_DMA_NOWAIT);
   1559 		if (error != 0)
   1560                         goto fail;
   1561 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1562 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1563 		/* Update the descriptor and the cached value */
   1564 		rxr->rx_base[j].read.pkt_addr =
   1565 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1566 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1567 	}
   1568 
   1569 
   1570 	/* Setup our descriptor indices */
   1571 	rxr->next_to_check = 0;
   1572 	rxr->next_to_refresh = 0;
   1573 	rxr->lro_enabled = FALSE;
   1574 	rxr->rx_copies.ev_count = 0;
   1575 #if 0 /* NetBSD */
   1576 	rxr->rx_bytes.ev_count = 0;
   1577 #if 1	/* Fix inconsistency */
   1578 	rxr->rx_packets.ev_count = 0;
   1579 #endif
   1580 #endif
   1581 	rxr->vtag_strip = FALSE;
   1582 
   1583 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1584 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1585 
   1586 	/*
   1587 	 * Now set up the LRO interface
   1588 	 */
   1589 	if (ixgbe_rsc_enable)
   1590 		ixgbe_setup_hw_rsc(rxr);
   1591 #ifdef LRO
   1592 	else if (ifp->if_capenable & IFCAP_LRO) {
   1593 		device_t dev = adapter->dev;
   1594 		int err = tcp_lro_init(lro);
   1595 		if (err) {
   1596 			device_printf(dev, "LRO Initialization failed!\n");
   1597 			goto fail;
   1598 		}
   1599 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1600 		rxr->lro_enabled = TRUE;
   1601 		lro->ifp = adapter->ifp;
   1602 	}
   1603 #endif /* LRO */
   1604 
   1605 	IXGBE_RX_UNLOCK(rxr);
   1606 
   1607 	return (0);
   1608 
   1609 fail:
   1610 	ixgbe_free_receive_ring(rxr);
   1611 	IXGBE_RX_UNLOCK(rxr);
   1612 
   1613 	return (error);
   1614 } /* ixgbe_setup_receive_ring */
   1615 
   1616 /************************************************************************
   1617  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1618  ************************************************************************/
   1619 int
   1620 ixgbe_setup_receive_structures(struct adapter *adapter)
   1621 {
   1622 	struct rx_ring *rxr = adapter->rx_rings;
   1623 	int            j;
   1624 
   1625 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1626 		if (ixgbe_setup_receive_ring(rxr))
   1627 			goto fail;
   1628 
   1629 	return (0);
   1630 fail:
   1631 	/*
   1632 	 * Free RX buffers allocated so far, we will only handle
   1633 	 * the rings that completed, the failing case will have
   1634 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1635 	 */
   1636 	for (int i = 0; i < j; ++i) {
   1637 		rxr = &adapter->rx_rings[i];
   1638 		IXGBE_RX_LOCK(rxr);
   1639 		ixgbe_free_receive_ring(rxr);
   1640 		IXGBE_RX_UNLOCK(rxr);
   1641 	}
   1642 
   1643 	return (ENOBUFS);
   1644 } /* ixgbe_setup_receive_structures */
   1645 
   1646 
   1647 /************************************************************************
   1648  * ixgbe_free_receive_structures - Free all receive rings.
   1649  ************************************************************************/
   1650 void
   1651 ixgbe_free_receive_structures(struct adapter *adapter)
   1652 {
   1653 	struct rx_ring *rxr = adapter->rx_rings;
   1654 
   1655 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1656 
   1657 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1658 		ixgbe_free_receive_buffers(rxr);
   1659 #ifdef LRO
   1660 		/* Free LRO memory */
   1661 		tcp_lro_free(&rxr->lro);
   1662 #endif /* LRO */
   1663 		/* Free the ring memory as well */
   1664 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1665 		IXGBE_RX_LOCK_DESTROY(rxr);
   1666 	}
   1667 
   1668 	free(adapter->rx_rings, M_DEVBUF);
   1669 } /* ixgbe_free_receive_structures */
   1670 
   1671 
   1672 /************************************************************************
   1673  * ixgbe_free_receive_buffers - Free receive ring data structures
   1674  ************************************************************************/
   1675 static void
   1676 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1677 {
   1678 	struct adapter      *adapter = rxr->adapter;
   1679 	struct ixgbe_rx_buf *rxbuf;
   1680 
   1681 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1682 
   1683 	/* Cleanup any existing buffers */
   1684 	if (rxr->rx_buffers != NULL) {
   1685 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1686 			rxbuf = &rxr->rx_buffers[i];
   1687 			ixgbe_rx_discard(rxr, i);
   1688 			if (rxbuf->pmap != NULL) {
   1689 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1690 				rxbuf->pmap = NULL;
   1691 			}
   1692 		}
   1693 		if (rxr->rx_buffers != NULL) {
   1694 			free(rxr->rx_buffers, M_DEVBUF);
   1695 			rxr->rx_buffers = NULL;
   1696 		}
   1697 	}
   1698 
   1699 	if (rxr->ptag != NULL) {
   1700 		ixgbe_dma_tag_destroy(rxr->ptag);
   1701 		rxr->ptag = NULL;
   1702 	}
   1703 
   1704 	return;
   1705 } /* ixgbe_free_receive_buffers */
   1706 
   1707 /************************************************************************
   1708  * ixgbe_rx_input
   1709  ************************************************************************/
   1710 static __inline void
   1711 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1712     u32 ptype)
   1713 {
   1714 	struct adapter	*adapter = ifp->if_softc;
   1715 
   1716 #ifdef LRO
   1717 	struct ethercom *ec = &adapter->osdep.ec;
   1718 
   1719 	/*
   1720 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1721 	 * should be computed by hardware. Also it should not have VLAN tag in
   1722 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1723 	 */
   1724         if (rxr->lro_enabled &&
   1725             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1726             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1727             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1728             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1729             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1730             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1731             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1732             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1733                 /*
   1734                  * Send to the stack if:
   1735                  **  - LRO not enabled, or
   1736                  **  - no LRO resources, or
   1737                  **  - lro enqueue fails
   1738                  */
   1739                 if (rxr->lro.lro_cnt != 0)
   1740                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1741                                 return;
   1742         }
   1743 #endif /* LRO */
   1744 
   1745 	if_percpuq_enqueue(adapter->ipq, m);
   1746 } /* ixgbe_rx_input */
   1747 
   1748 /************************************************************************
   1749  * ixgbe_rx_discard
   1750  ************************************************************************/
   1751 static __inline void
   1752 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1753 {
   1754 	struct ixgbe_rx_buf *rbuf;
   1755 
   1756 	rbuf = &rxr->rx_buffers[i];
   1757 
   1758 	/*
   1759 	 * With advanced descriptors the writeback
   1760 	 * clobbers the buffer addrs, so its easier
   1761 	 * to just free the existing mbufs and take
   1762 	 * the normal refresh path to get new buffers
   1763 	 * and mapping.
   1764 	 */
   1765 
   1766 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1767 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1768 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1769 		m_freem(rbuf->fmp);
   1770 		rbuf->fmp = NULL;
   1771 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1772 	} else if (rbuf->buf) {
   1773 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1774 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1775 		m_free(rbuf->buf);
   1776 		rbuf->buf = NULL;
   1777 	}
   1778 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1779 
   1780 	rbuf->flags = 0;
   1781 
   1782 	return;
   1783 } /* ixgbe_rx_discard */
   1784 
   1785 
   1786 /************************************************************************
   1787  * ixgbe_rxeof
   1788  *
   1789  *   Executes in interrupt context. It replenishes the
   1790  *   mbufs in the descriptor and sends data which has
   1791  *   been dma'ed into host memory to upper layer.
   1792  *
   1793  *   Return TRUE for more work, FALSE for all clean.
   1794  ************************************************************************/
   1795 bool
   1796 ixgbe_rxeof(struct ix_queue *que)
   1797 {
   1798 	struct adapter		*adapter = que->adapter;
   1799 	struct rx_ring		*rxr = que->rxr;
   1800 	struct ifnet		*ifp = adapter->ifp;
   1801 #ifdef LRO
   1802 	struct lro_ctrl		*lro = &rxr->lro;
   1803 #endif /* LRO */
   1804 	union ixgbe_adv_rx_desc	*cur;
   1805 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1806 	int			i, nextp, processed = 0;
   1807 	u32			staterr = 0;
   1808 	u32			count = adapter->rx_process_limit;
   1809 #ifdef RSS
   1810 	u16			pkt_info;
   1811 #endif
   1812 
   1813 	IXGBE_RX_LOCK(rxr);
   1814 
   1815 #ifdef DEV_NETMAP
   1816 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1817 		/* Same as the txeof routine: wakeup clients on intr. */
   1818 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1819 			IXGBE_RX_UNLOCK(rxr);
   1820 			return (FALSE);
   1821 		}
   1822 	}
   1823 #endif /* DEV_NETMAP */
   1824 
   1825 	for (i = rxr->next_to_check; count != 0;) {
   1826 		struct mbuf *sendmp, *mp;
   1827 		u32         rsc, ptype;
   1828 		u16         len;
   1829 		u16         vtag = 0;
   1830 		bool        eop;
   1831 
   1832 		/* Sync the ring. */
   1833 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1834 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1835 
   1836 		cur = &rxr->rx_base[i];
   1837 		staterr = le32toh(cur->wb.upper.status_error);
   1838 #ifdef RSS
   1839 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1840 #endif
   1841 
   1842 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1843 			break;
   1844 
   1845 		count--;
   1846 		sendmp = NULL;
   1847 		nbuf = NULL;
   1848 		rsc = 0;
   1849 		cur->wb.upper.status_error = 0;
   1850 		rbuf = &rxr->rx_buffers[i];
   1851 		mp = rbuf->buf;
   1852 
   1853 		len = le16toh(cur->wb.upper.length);
   1854 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1855 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1856 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1857 
   1858 		/* Make sure bad packets are discarded */
   1859 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1860 #if __FreeBSD_version >= 1100036
   1861 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1862 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1863 #endif
   1864 			rxr->rx_discarded.ev_count++;
   1865 			ixgbe_rx_discard(rxr, i);
   1866 			goto next_desc;
   1867 		}
   1868 
   1869 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1870 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1871 
   1872 		/*
   1873 		 * On 82599 which supports a hardware
   1874 		 * LRO (called HW RSC), packets need
   1875 		 * not be fragmented across sequential
   1876 		 * descriptors, rather the next descriptor
   1877 		 * is indicated in bits of the descriptor.
   1878 		 * This also means that we might proceses
   1879 		 * more than one packet at a time, something
   1880 		 * that has never been true before, it
   1881 		 * required eliminating global chain pointers
   1882 		 * in favor of what we are doing here.  -jfv
   1883 		 */
   1884 		if (!eop) {
   1885 			/*
   1886 			 * Figure out the next descriptor
   1887 			 * of this frame.
   1888 			 */
   1889 			if (rxr->hw_rsc == TRUE) {
   1890 				rsc = ixgbe_rsc_count(cur);
   1891 				rxr->rsc_num += (rsc - 1);
   1892 			}
   1893 			if (rsc) { /* Get hardware index */
   1894 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1895 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1896 			} else { /* Just sequential */
   1897 				nextp = i + 1;
   1898 				if (nextp == adapter->num_rx_desc)
   1899 					nextp = 0;
   1900 			}
   1901 			nbuf = &rxr->rx_buffers[nextp];
   1902 			prefetch(nbuf);
   1903 		}
   1904 		/*
   1905 		 * Rather than using the fmp/lmp global pointers
   1906 		 * we now keep the head of a packet chain in the
   1907 		 * buffer struct and pass this along from one
   1908 		 * descriptor to the next, until we get EOP.
   1909 		 */
   1910 		mp->m_len = len;
   1911 		/*
   1912 		 * See if there is a stored head
   1913 		 * that determines what we are
   1914 		 */
   1915 		sendmp = rbuf->fmp;
   1916 		if (sendmp != NULL) {  /* secondary frag */
   1917 			rbuf->buf = rbuf->fmp = NULL;
   1918 			mp->m_flags &= ~M_PKTHDR;
   1919 			sendmp->m_pkthdr.len += mp->m_len;
   1920 		} else {
   1921 			/*
   1922 			 * Optimize.  This might be a small packet,
   1923 			 * maybe just a TCP ACK.  Do a fast copy that
   1924 			 * is cache aligned into a new mbuf, and
   1925 			 * leave the old mbuf+cluster for re-use.
   1926 			 */
   1927 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1928 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1929 				if (sendmp != NULL) {
   1930 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1931 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1932 					    len);
   1933 					sendmp->m_len = len;
   1934 					rxr->rx_copies.ev_count++;
   1935 					rbuf->flags |= IXGBE_RX_COPY;
   1936 				}
   1937 			}
   1938 			if (sendmp == NULL) {
   1939 				rbuf->buf = rbuf->fmp = NULL;
   1940 				sendmp = mp;
   1941 			}
   1942 
   1943 			/* first desc of a non-ps chain */
   1944 			sendmp->m_flags |= M_PKTHDR;
   1945 			sendmp->m_pkthdr.len = mp->m_len;
   1946 		}
   1947 		++processed;
   1948 
   1949 		/* Pass the head pointer on */
   1950 		if (eop == 0) {
   1951 			nbuf->fmp = sendmp;
   1952 			sendmp = NULL;
   1953 			mp->m_next = nbuf->buf;
   1954 		} else { /* Sending this frame */
   1955 			m_set_rcvif(sendmp, ifp);
   1956 			++rxr->packets;
   1957 			rxr->rx_packets.ev_count++;
   1958 			/* capture data for AIM */
   1959 			rxr->bytes += sendmp->m_pkthdr.len;
   1960 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1961 			/* Process vlan info */
   1962 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   1963 				vtag = le16toh(cur->wb.upper.vlan);
   1964 			if (vtag) {
   1965 				vlan_set_tag(sendmp, vtag);
   1966 			}
   1967 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1968 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1969 				   &adapter->stats.pf);
   1970 			}
   1971 
   1972 #if 0 /* FreeBSD */
   1973 			/*
   1974 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   1975 			 * and never cleared. This means we have RSS hash
   1976 			 * available to be used.
   1977 			 */
   1978 			if (adapter->num_queues > 1) {
   1979 				sendmp->m_pkthdr.flowid =
   1980 				    le32toh(cur->wb.lower.hi_dword.rss);
   1981 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1982 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   1983 					M_HASHTYPE_SET(sendmp,
   1984 					    M_HASHTYPE_RSS_IPV4);
   1985 					break;
   1986 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1987 					M_HASHTYPE_SET(sendmp,
   1988 					    M_HASHTYPE_RSS_TCP_IPV4);
   1989 					break;
   1990 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   1991 					M_HASHTYPE_SET(sendmp,
   1992 					    M_HASHTYPE_RSS_IPV6);
   1993 					break;
   1994 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   1995 					M_HASHTYPE_SET(sendmp,
   1996 					    M_HASHTYPE_RSS_TCP_IPV6);
   1997 					break;
   1998 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   1999 					M_HASHTYPE_SET(sendmp,
   2000 					    M_HASHTYPE_RSS_IPV6_EX);
   2001 					break;
   2002 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2003 					M_HASHTYPE_SET(sendmp,
   2004 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2005 					break;
   2006 #if __FreeBSD_version > 1100000
   2007 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2008 					M_HASHTYPE_SET(sendmp,
   2009 					    M_HASHTYPE_RSS_UDP_IPV4);
   2010 					break;
   2011 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2012 					M_HASHTYPE_SET(sendmp,
   2013 					    M_HASHTYPE_RSS_UDP_IPV6);
   2014 					break;
   2015 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2016 					M_HASHTYPE_SET(sendmp,
   2017 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2018 					break;
   2019 #endif
   2020 				default:
   2021 					M_HASHTYPE_SET(sendmp,
   2022 					    M_HASHTYPE_OPAQUE_HASH);
   2023 				}
   2024 			} else {
   2025 				sendmp->m_pkthdr.flowid = que->msix;
   2026 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2027 			}
   2028 #endif
   2029 		}
   2030 next_desc:
   2031 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2032 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2033 
   2034 		/* Advance our pointers to the next descriptor. */
   2035 		if (++i == rxr->num_desc)
   2036 			i = 0;
   2037 
   2038 		/* Now send to the stack or do LRO */
   2039 		if (sendmp != NULL) {
   2040 			rxr->next_to_check = i;
   2041 			IXGBE_RX_UNLOCK(rxr);
   2042 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2043 			IXGBE_RX_LOCK(rxr);
   2044 			i = rxr->next_to_check;
   2045 		}
   2046 
   2047 		/* Every 8 descriptors we go to refresh mbufs */
   2048 		if (processed == 8) {
   2049 			ixgbe_refresh_mbufs(rxr, i);
   2050 			processed = 0;
   2051 		}
   2052 	}
   2053 
   2054 	/* Refresh any remaining buf structs */
   2055 	if (ixgbe_rx_unrefreshed(rxr))
   2056 		ixgbe_refresh_mbufs(rxr, i);
   2057 
   2058 	rxr->next_to_check = i;
   2059 
   2060 	IXGBE_RX_UNLOCK(rxr);
   2061 
   2062 #ifdef LRO
   2063 	/*
   2064 	 * Flush any outstanding LRO work
   2065 	 */
   2066 	tcp_lro_flush_all(lro);
   2067 #endif /* LRO */
   2068 
   2069 	/*
   2070 	 * Still have cleaning to do?
   2071 	 */
   2072 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2073 		return (TRUE);
   2074 
   2075 	return (FALSE);
   2076 } /* ixgbe_rxeof */
   2077 
   2078 
   2079 /************************************************************************
   2080  * ixgbe_rx_checksum
   2081  *
   2082  *   Verify that the hardware indicated that the checksum is valid.
   2083  *   Inform the stack about the status of checksum so that stack
   2084  *   doesn't spend time verifying the checksum.
   2085  ************************************************************************/
   2086 static void
   2087 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2088     struct ixgbe_hw_stats *stats)
   2089 {
   2090 	u16  status = (u16)staterr;
   2091 	u8   errors = (u8)(staterr >> 24);
   2092 #if 0
   2093 	bool sctp = false;
   2094 
   2095 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2096 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2097 		sctp = true;
   2098 #endif
   2099 
   2100 	/* IPv4 checksum */
   2101 	if (status & IXGBE_RXD_STAT_IPCS) {
   2102 		stats->ipcs.ev_count++;
   2103 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2104 			/* IP Checksum Good */
   2105 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2106 		} else {
   2107 			stats->ipcs_bad.ev_count++;
   2108 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2109 		}
   2110 	}
   2111 	/* TCP/UDP/SCTP checksum */
   2112 	if (status & IXGBE_RXD_STAT_L4CS) {
   2113 		stats->l4cs.ev_count++;
   2114 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2115 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2116 			mp->m_pkthdr.csum_flags |= type;
   2117 		} else {
   2118 			stats->l4cs_bad.ev_count++;
   2119 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2120 		}
   2121 	}
   2122 } /* ixgbe_rx_checksum */
   2123 
   2124 /************************************************************************
   2125  * ixgbe_dma_malloc
   2126  ************************************************************************/
   2127 int
   2128 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2129 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2130 {
   2131 	device_t dev = adapter->dev;
   2132 	int      r, rsegs;
   2133 
   2134 	r = ixgbe_dma_tag_create(
   2135 	     /*      parent */ adapter->osdep.dmat,
   2136 	     /*   alignment */ DBA_ALIGN,
   2137 	     /*      bounds */ 0,
   2138 	     /*     maxsize */ size,
   2139 	     /*   nsegments */ 1,
   2140 	     /*  maxsegsize */ size,
   2141 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2142 			       &dma->dma_tag);
   2143 	if (r != 0) {
   2144 		aprint_error_dev(dev,
   2145 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2146 		    r);
   2147 		goto fail_0;
   2148 	}
   2149 
   2150 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2151 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2152 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2153 	if (r != 0) {
   2154 		aprint_error_dev(dev,
   2155 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2156 		goto fail_1;
   2157 	}
   2158 
   2159 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2160 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2161 	if (r != 0) {
   2162 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2163 		    __func__, r);
   2164 		goto fail_2;
   2165 	}
   2166 
   2167 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2168 	if (r != 0) {
   2169 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2170 		    __func__, r);
   2171 		goto fail_3;
   2172 	}
   2173 
   2174 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2175 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2176 	if (r != 0) {
   2177 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2178 		    __func__, r);
   2179 		goto fail_4;
   2180 	}
   2181 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2182 	dma->dma_size = size;
   2183 	return 0;
   2184 fail_4:
   2185 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2186 fail_3:
   2187 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2188 fail_2:
   2189 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2190 fail_1:
   2191 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2192 fail_0:
   2193 
   2194 	return (r);
   2195 } /* ixgbe_dma_malloc */
   2196 
   2197 /************************************************************************
   2198  * ixgbe_dma_free
   2199  ************************************************************************/
   2200 void
   2201 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2202 {
   2203 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2204 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2205 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2206 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2207 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2208 } /* ixgbe_dma_free */
   2209 
   2210 
   2211 /************************************************************************
   2212  * ixgbe_allocate_queues
   2213  *
   2214  *   Allocate memory for the transmit and receive rings, and then
   2215  *   the descriptors associated with each, called only once at attach.
   2216  ************************************************************************/
   2217 int
   2218 ixgbe_allocate_queues(struct adapter *adapter)
   2219 {
   2220 	device_t	dev = adapter->dev;
   2221 	struct ix_queue	*que;
   2222 	struct tx_ring	*txr;
   2223 	struct rx_ring	*rxr;
   2224 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2225 	int             txconf = 0, rxconf = 0;
   2226 
   2227 	/* First, allocate the top level queue structs */
   2228 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2229             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2230         if (adapter->queues == NULL) {
   2231 		aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2232                 error = ENOMEM;
   2233                 goto fail;
   2234         }
   2235 
   2236 	/* Second, allocate the TX ring struct memory */
   2237 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
   2238 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2239 	if (adapter->tx_rings == NULL) {
   2240 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2241 		error = ENOMEM;
   2242 		goto tx_fail;
   2243 	}
   2244 
   2245 	/* Third, allocate the RX ring */
   2246 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2247 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2248 	if (adapter->rx_rings == NULL) {
   2249 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2250 		error = ENOMEM;
   2251 		goto rx_fail;
   2252 	}
   2253 
   2254 	/* For the ring itself */
   2255 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2256 	    DBA_ALIGN);
   2257 
   2258 	/*
   2259 	 * Now set up the TX queues, txconf is needed to handle the
   2260 	 * possibility that things fail midcourse and we need to
   2261 	 * undo memory gracefully
   2262 	 */
   2263 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2264 		/* Set up some basics */
   2265 		txr = &adapter->tx_rings[i];
   2266 		txr->adapter = adapter;
   2267 		txr->txr_interq = NULL;
   2268 		/* In case SR-IOV is enabled, align the index properly */
   2269 #ifdef PCI_IOV
   2270 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2271 		    i);
   2272 #else
   2273 		txr->me = i;
   2274 #endif
   2275 		txr->num_desc = adapter->num_tx_desc;
   2276 
   2277 		/* Initialize the TX side lock */
   2278 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2279 
   2280 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2281 		    BUS_DMA_NOWAIT)) {
   2282 			aprint_error_dev(dev,
   2283 			    "Unable to allocate TX Descriptor memory\n");
   2284 			error = ENOMEM;
   2285 			goto err_tx_desc;
   2286 		}
   2287 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2288 		bzero((void *)txr->tx_base, tsize);
   2289 
   2290 		/* Now allocate transmit buffers for the ring */
   2291 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2292 			aprint_error_dev(dev,
   2293 			    "Critical Failure setting up transmit buffers\n");
   2294 			error = ENOMEM;
   2295 			goto err_tx_desc;
   2296         	}
   2297 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2298 			/* Allocate a buf ring */
   2299 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2300 			if (txr->txr_interq == NULL) {
   2301 				aprint_error_dev(dev,
   2302 				    "Critical Failure setting up buf ring\n");
   2303 				error = ENOMEM;
   2304 				goto err_tx_desc;
   2305 			}
   2306 		}
   2307 	}
   2308 
   2309 	/*
   2310 	 * Next the RX queues...
   2311 	 */
   2312 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2313 	    DBA_ALIGN);
   2314 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2315 		rxr = &adapter->rx_rings[i];
   2316 		/* Set up some basics */
   2317 		rxr->adapter = adapter;
   2318 #ifdef PCI_IOV
   2319 		/* In case SR-IOV is enabled, align the index properly */
   2320 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2321 		    i);
   2322 #else
   2323 		rxr->me = i;
   2324 #endif
   2325 		rxr->num_desc = adapter->num_rx_desc;
   2326 
   2327 		/* Initialize the RX side lock */
   2328 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2329 
   2330 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2331 		    BUS_DMA_NOWAIT)) {
   2332 			aprint_error_dev(dev,
   2333 			    "Unable to allocate RxDescriptor memory\n");
   2334 			error = ENOMEM;
   2335 			goto err_rx_desc;
   2336 		}
   2337 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2338 		bzero((void *)rxr->rx_base, rsize);
   2339 
   2340 		/* Allocate receive buffers for the ring */
   2341 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2342 			aprint_error_dev(dev,
   2343 			    "Critical Failure setting up receive buffers\n");
   2344 			error = ENOMEM;
   2345 			goto err_rx_desc;
   2346 		}
   2347 	}
   2348 
   2349 	/*
   2350 	 * Finally set up the queue holding structs
   2351 	 */
   2352 	for (int i = 0; i < adapter->num_queues; i++) {
   2353 		que = &adapter->queues[i];
   2354 		que->adapter = adapter;
   2355 		que->me = i;
   2356 		que->txr = &adapter->tx_rings[i];
   2357 		que->rxr = &adapter->rx_rings[i];
   2358 
   2359 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2360 		que->disabled_count = 0;
   2361 	}
   2362 
   2363 	return (0);
   2364 
   2365 err_rx_desc:
   2366 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2367 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2368 err_tx_desc:
   2369 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2370 		ixgbe_dma_free(adapter, &txr->txdma);
   2371 	free(adapter->rx_rings, M_DEVBUF);
   2372 rx_fail:
   2373 	free(adapter->tx_rings, M_DEVBUF);
   2374 tx_fail:
   2375 	free(adapter->queues, M_DEVBUF);
   2376 fail:
   2377 	return (error);
   2378 } /* ixgbe_allocate_queues */
   2379