Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.38
      1 /* $NetBSD: ix_txrx.c,v 1.38 2018/04/02 05:02:55 knakahara Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 321476 2017-07-25 14:38:30Z sbruno $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include "opt_inet.h"
     67 #include "opt_inet6.h"
     68 
     69 #include "ixgbe.h"
     70 
     71 /*
     72  * HW RSC control:
     73  *  this feature only works with
     74  *  IPv4, and only on 82599 and later.
     75  *  Also this will cause IP forwarding to
     76  *  fail and that can't be controlled by
     77  *  the stack as LRO can. For all these
     78  *  reasons I've deemed it best to leave
     79  *  this off and not bother with a tuneable
     80  *  interface, this would need to be compiled
     81  *  to enable.
     82  */
     83 static bool ixgbe_rsc_enable = FALSE;
     84 
     85 /*
     86  * For Flow Director: this is the
     87  * number of TX packets we sample
     88  * for the filter pool, this means
     89  * every 20th packet will be probed.
     90  *
     91  * This feature can be disabled by
     92  * setting this to 0.
     93  */
     94 static int atr_sample_rate = 20;
     95 
     96 /************************************************************************
     97  *  Local Function prototypes
     98  ************************************************************************/
     99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    104                                        struct ixgbe_hw_stats *);
    105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    109                                         struct mbuf *, u32 *, u32 *);
    110 static int           ixgbe_tso_setup(struct tx_ring *,
    111                                      struct mbuf *, u32 *, u32 *);
    112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    114                                     struct mbuf *, u32);
    115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    116                                       struct ixgbe_dma_alloc *, int);
    117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    118 
    119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    120 
    121 /************************************************************************
    122  * ixgbe_legacy_start_locked - Transmit entry point
    123  *
    124  *   Called by the stack to initiate a transmit.
    125  *   The driver will remain in this routine as long as there are
    126  *   packets to transmit and transmit resources are available.
    127  *   In case resources are not available, the stack is notified
    128  *   and the packet is requeued.
    129  ************************************************************************/
    130 int
    131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    132 {
    133 	int rc;
    134 	struct mbuf    *m_head;
    135 	struct adapter *adapter = txr->adapter;
    136 
    137 	IXGBE_TX_LOCK_ASSERT(txr);
    138 
    139 	if (!adapter->link_active) {
    140 		/*
    141 		 * discard all packets buffered in IFQ to avoid
    142 		 * sending old packets at next link up timing.
    143 		 */
    144 		ixgbe_drain(ifp, txr);
    145 		return (ENETDOWN);
    146 	}
    147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    148 		return (ENETDOWN);
    149 
    150 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    151 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    152 			break;
    153 
    154 		IFQ_POLL(&ifp->if_snd, m_head);
    155 		if (m_head == NULL)
    156 			break;
    157 
    158 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    159 			break;
    160 		}
    161 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    162 		if (rc != 0) {
    163 			m_freem(m_head);
    164 			continue;
    165 		}
    166 
    167 		/* Send a copy of the frame to the BPF listener */
    168 		bpf_mtap(ifp, m_head);
    169 	}
    170 
    171 	return IXGBE_SUCCESS;
    172 } /* ixgbe_legacy_start_locked */
    173 
    174 /************************************************************************
    175  * ixgbe_legacy_start
    176  *
    177  *   Called by the stack, this always uses the first tx ring,
    178  *   and should not be used with multiqueue tx enabled.
    179  ************************************************************************/
    180 void
    181 ixgbe_legacy_start(struct ifnet *ifp)
    182 {
    183 	struct adapter *adapter = ifp->if_softc;
    184 	struct tx_ring *txr = adapter->tx_rings;
    185 
    186 	if (ifp->if_flags & IFF_RUNNING) {
    187 		IXGBE_TX_LOCK(txr);
    188 		ixgbe_legacy_start_locked(ifp, txr);
    189 		IXGBE_TX_UNLOCK(txr);
    190 	}
    191 } /* ixgbe_legacy_start */
    192 
    193 /************************************************************************
    194  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    195  *
    196  *   (if_transmit function)
    197  ************************************************************************/
    198 int
    199 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    200 {
    201 	struct adapter	*adapter = ifp->if_softc;
    202 	struct tx_ring	*txr;
    203 	int 		i, err = 0;
    204 #ifdef RSS
    205 	uint32_t bucket_id;
    206 #endif
    207 
    208 	/*
    209 	 * When doing RSS, map it to the same outbound queue
    210 	 * as the incoming flow would be mapped to.
    211 	 *
    212 	 * If everything is setup correctly, it should be the
    213 	 * same bucket that the current CPU we're on is.
    214 	 */
    215 #ifdef RSS
    216 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    217 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    218 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    219 		    &bucket_id) == 0)) {
    220 			i = bucket_id % adapter->num_queues;
    221 #ifdef IXGBE_DEBUG
    222 			if (bucket_id > adapter->num_queues)
    223 				if_printf(ifp,
    224 				    "bucket_id (%d) > num_queues (%d)\n",
    225 				    bucket_id, adapter->num_queues);
    226 #endif
    227 		} else
    228 			i = m->m_pkthdr.flowid % adapter->num_queues;
    229 	} else
    230 #endif /* 0 */
    231 		i = cpu_index(curcpu()) % adapter->num_queues;
    232 
    233 	/* Check for a hung queue and pick alternative */
    234 	if (((1 << i) & adapter->active_queues) == 0)
    235 		i = ffs64(adapter->active_queues);
    236 
    237 	txr = &adapter->tx_rings[i];
    238 
    239 	err = pcq_put(txr->txr_interq, m);
    240 	if (err == false) {
    241 		m_freem(m);
    242 		txr->pcq_drops.ev_count++;
    243 		return (err);
    244 	}
    245 	if (IXGBE_TX_TRYLOCK(txr)) {
    246 		ixgbe_mq_start_locked(ifp, txr);
    247 		IXGBE_TX_UNLOCK(txr);
    248 	} else {
    249 		if (adapter->txrx_use_workqueue) {
    250 			/*
    251 			 * This function itself is not called in interrupt
    252 			 * context, however it can be called in fast softint
    253 			 * context right after receiving forwarding packets.
    254 			 * So, it is required to protect workqueue from twice
    255 			 * enqueuing when the machine uses both spontaneous
    256 			 * packets and forwarding packets.
    257 			 */
    258 			u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    259 			if (*enqueued == 0) {
    260 				*enqueued = 1;
    261 				percpu_putref(adapter->txr_wq_enqueued);
    262 				workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
    263 			} else
    264 				percpu_putref(adapter->txr_wq_enqueued);
    265 		} else
    266 			softint_schedule(txr->txr_si);
    267 	}
    268 
    269 	return (0);
    270 } /* ixgbe_mq_start */
    271 
    272 /************************************************************************
    273  * ixgbe_mq_start_locked
    274  ************************************************************************/
    275 int
    276 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    277 {
    278 	struct mbuf    *next;
    279 	int            enqueued = 0, err = 0;
    280 
    281 	if (!txr->adapter->link_active) {
    282 		/*
    283 		 * discard all packets buffered in txr_interq to avoid
    284 		 * sending old packets at next link up timing.
    285 		 */
    286 		ixgbe_drain(ifp, txr);
    287 		return (ENETDOWN);
    288 	}
    289 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    290 		return (ENETDOWN);
    291 
    292 	/* Process the queue */
    293 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    294 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    295 			m_freem(next);
    296 			/* All errors are counted in ixgbe_xmit() */
    297 			break;
    298 		}
    299 		enqueued++;
    300 #if __FreeBSD_version >= 1100036
    301 		/*
    302 		 * Since we're looking at the tx ring, we can check
    303 		 * to see if we're a VF by examing our tail register
    304 		 * address.
    305 		 */
    306 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    307 		    (next->m_flags & M_MCAST))
    308 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    309 #endif
    310 		/* Send a copy of the frame to the BPF listener */
    311 		bpf_mtap(ifp, next);
    312 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    313 			break;
    314 	}
    315 
    316 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    317 		ixgbe_txeof(txr);
    318 
    319 	return (err);
    320 } /* ixgbe_mq_start_locked */
    321 
    322 /************************************************************************
    323  * ixgbe_deferred_mq_start
    324  *
    325  *   Called from a softint and workqueue (indirectly) to drain queued
    326  *   transmit packets.
    327  ************************************************************************/
    328 void
    329 ixgbe_deferred_mq_start(void *arg)
    330 {
    331 	struct tx_ring *txr = arg;
    332 	struct adapter *adapter = txr->adapter;
    333 	struct ifnet   *ifp = adapter->ifp;
    334 
    335 	IXGBE_TX_LOCK(txr);
    336 	if (pcq_peek(txr->txr_interq) != NULL)
    337 		ixgbe_mq_start_locked(ifp, txr);
    338 	IXGBE_TX_UNLOCK(txr);
    339 } /* ixgbe_deferred_mq_start */
    340 
    341 /************************************************************************
    342  * ixgbe_deferred_mq_start_work
    343  *
    344  *   Called from a workqueue to drain queued transmit packets.
    345  ************************************************************************/
    346 void
    347 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    348 {
    349 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    350 	struct adapter *adapter = txr->adapter;
    351 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    352 	*enqueued = 0;
    353 	percpu_putref(adapter->txr_wq_enqueued);
    354 
    355 	ixgbe_deferred_mq_start(txr);
    356 } /* ixgbe_deferred_mq_start */
    357 
    358 /************************************************************************
    359  * ixgbe_drain_all
    360  ************************************************************************/
    361 void
    362 ixgbe_drain_all(struct adapter *adapter)
    363 {
    364 	struct ifnet *ifp = adapter->ifp;
    365 	struct ix_queue *que = adapter->queues;
    366 
    367 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    368 		struct tx_ring  *txr = que->txr;
    369 
    370 		IXGBE_TX_LOCK(txr);
    371 		ixgbe_drain(ifp, txr);
    372 		IXGBE_TX_UNLOCK(txr);
    373 	}
    374 }
    375 
    376 /************************************************************************
    377  * ixgbe_xmit
    378  *
    379  *   Maps the mbufs to tx descriptors, allowing the
    380  *   TX engine to transmit the packets.
    381  *
    382  *   Return 0 on success, positive on failure
    383  ************************************************************************/
    384 static int
    385 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    386 {
    387 	struct adapter          *adapter = txr->adapter;
    388 	struct ixgbe_tx_buf     *txbuf;
    389 	union ixgbe_adv_tx_desc *txd = NULL;
    390 	struct ifnet	        *ifp = adapter->ifp;
    391 	int                     i, j, error;
    392 	int                     first;
    393 	u32                     olinfo_status = 0, cmd_type_len;
    394 	bool                    remap = TRUE;
    395 	bus_dmamap_t            map;
    396 
    397 	/* Basic descriptor defines */
    398 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    399 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    400 
    401 	if (vlan_has_tag(m_head))
    402 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    403 
    404 	/*
    405 	 * Important to capture the first descriptor
    406 	 * used because it will contain the index of
    407 	 * the one we tell the hardware to report back
    408 	 */
    409 	first = txr->next_avail_desc;
    410 	txbuf = &txr->tx_buffers[first];
    411 	map = txbuf->map;
    412 
    413 	/*
    414 	 * Map the packet for DMA.
    415 	 */
    416 retry:
    417 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    418 	    BUS_DMA_NOWAIT);
    419 
    420 	if (__predict_false(error)) {
    421 		struct mbuf *m;
    422 
    423 		switch (error) {
    424 		case EAGAIN:
    425 			txr->q_eagain_tx_dma_setup++;
    426 			return EAGAIN;
    427 		case ENOMEM:
    428 			txr->q_enomem_tx_dma_setup++;
    429 			return EAGAIN;
    430 		case EFBIG:
    431 			/* Try it again? - one try */
    432 			if (remap == TRUE) {
    433 				remap = FALSE;
    434 				/*
    435 				 * XXX: m_defrag will choke on
    436 				 * non-MCLBYTES-sized clusters
    437 				 */
    438 				txr->q_efbig_tx_dma_setup++;
    439 				m = m_defrag(m_head, M_NOWAIT);
    440 				if (m == NULL) {
    441 					txr->q_mbuf_defrag_failed++;
    442 					return ENOBUFS;
    443 				}
    444 				m_head = m;
    445 				goto retry;
    446 			} else {
    447 				txr->q_efbig2_tx_dma_setup++;
    448 				return error;
    449 			}
    450 		case EINVAL:
    451 			txr->q_einval_tx_dma_setup++;
    452 			return error;
    453 		default:
    454 			txr->q_other_tx_dma_setup++;
    455 			return error;
    456 		}
    457 	}
    458 
    459 	/* Make certain there are enough descriptors */
    460 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    461 		txr->no_desc_avail.ev_count++;
    462 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    463 		return EAGAIN;
    464 	}
    465 
    466 	/*
    467 	 * Set up the appropriate offload context
    468 	 * this will consume the first descriptor
    469 	 */
    470 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    471 	if (__predict_false(error)) {
    472 		return (error);
    473 	}
    474 
    475 	/* Do the flow director magic */
    476 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    477 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    478 		++txr->atr_count;
    479 		if (txr->atr_count >= atr_sample_rate) {
    480 			ixgbe_atr(txr, m_head);
    481 			txr->atr_count = 0;
    482 		}
    483 	}
    484 
    485 	olinfo_status |= IXGBE_ADVTXD_CC;
    486 	i = txr->next_avail_desc;
    487 	for (j = 0; j < map->dm_nsegs; j++) {
    488 		bus_size_t seglen;
    489 		bus_addr_t segaddr;
    490 
    491 		txbuf = &txr->tx_buffers[i];
    492 		txd = &txr->tx_base[i];
    493 		seglen = map->dm_segs[j].ds_len;
    494 		segaddr = htole64(map->dm_segs[j].ds_addr);
    495 
    496 		txd->read.buffer_addr = segaddr;
    497 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
    498 		    cmd_type_len | seglen);
    499 		txd->read.olinfo_status = htole32(olinfo_status);
    500 
    501 		if (++i == txr->num_desc)
    502 			i = 0;
    503 	}
    504 
    505 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    506 	txr->tx_avail -= map->dm_nsegs;
    507 	txr->next_avail_desc = i;
    508 
    509 	txbuf->m_head = m_head;
    510 	/*
    511 	 * Here we swap the map so the last descriptor,
    512 	 * which gets the completion interrupt has the
    513 	 * real map, and the first descriptor gets the
    514 	 * unused map from this descriptor.
    515 	 */
    516 	txr->tx_buffers[first].map = txbuf->map;
    517 	txbuf->map = map;
    518 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    519 	    BUS_DMASYNC_PREWRITE);
    520 
    521 	/* Set the EOP descriptor that will be marked done */
    522 	txbuf = &txr->tx_buffers[first];
    523 	txbuf->eop = txd;
    524 
    525 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    526 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    527 	/*
    528 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    529 	 * hardware that this frame is available to transmit.
    530 	 */
    531 	++txr->total_packets.ev_count;
    532 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    533 
    534 	/*
    535 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
    536 	 */
    537 	ifp->if_obytes += m_head->m_pkthdr.len;
    538 	if (m_head->m_flags & M_MCAST)
    539 		ifp->if_omcasts++;
    540 
    541 	/* Mark queue as having work */
    542 	if (txr->busy == 0)
    543 		txr->busy = 1;
    544 
    545 	return (0);
    546 } /* ixgbe_xmit */
    547 
    548 /************************************************************************
    549  * ixgbe_drain
    550  ************************************************************************/
    551 static void
    552 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    553 {
    554 	struct mbuf *m;
    555 
    556 	IXGBE_TX_LOCK_ASSERT(txr);
    557 
    558 	if (txr->me == 0) {
    559 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    560 			IFQ_DEQUEUE(&ifp->if_snd, m);
    561 			m_freem(m);
    562 			IF_DROP(&ifp->if_snd);
    563 		}
    564 	}
    565 
    566 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    567 		m_freem(m);
    568 		txr->pcq_drops.ev_count++;
    569 	}
    570 }
    571 
    572 /************************************************************************
    573  * ixgbe_allocate_transmit_buffers
    574  *
    575  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    576  *   the information needed to transmit a packet on the wire. This is
    577  *   called only once at attach, setup is done every reset.
    578  ************************************************************************/
    579 static int
    580 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    581 {
    582 	struct adapter      *adapter = txr->adapter;
    583 	device_t            dev = adapter->dev;
    584 	struct ixgbe_tx_buf *txbuf;
    585 	int                 error, i;
    586 
    587 	/*
    588 	 * Setup DMA descriptor areas.
    589 	 */
    590 	error = ixgbe_dma_tag_create(
    591 	         /*      parent */ adapter->osdep.dmat,
    592 	         /*   alignment */ 1,
    593 	         /*      bounds */ 0,
    594 	         /*     maxsize */ IXGBE_TSO_SIZE,
    595 	         /*   nsegments */ adapter->num_segs,
    596 	         /*  maxsegsize */ PAGE_SIZE,
    597 	         /*       flags */ 0,
    598 	                           &txr->txtag);
    599 	if (error != 0) {
    600 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    601 		goto fail;
    602 	}
    603 
    604 	txr->tx_buffers =
    605 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
    606 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
    607 	if (txr->tx_buffers == NULL) {
    608 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
    609 		error = ENOMEM;
    610 		goto fail;
    611 	}
    612 
    613 	/* Create the descriptor buffer dma maps */
    614 	txbuf = txr->tx_buffers;
    615 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    616 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    617 		if (error != 0) {
    618 			aprint_error_dev(dev,
    619 			    "Unable to create TX DMA map (%d)\n", error);
    620 			goto fail;
    621 		}
    622 	}
    623 
    624 	return 0;
    625 fail:
    626 	/* We free all, it handles case where we are in the middle */
    627 #if 0 /* XXX was FreeBSD */
    628 	ixgbe_free_transmit_structures(adapter);
    629 #else
    630 	ixgbe_free_transmit_buffers(txr);
    631 #endif
    632 	return (error);
    633 } /* ixgbe_allocate_transmit_buffers */
    634 
    635 /************************************************************************
    636  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    637  ************************************************************************/
    638 static void
    639 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    640 {
    641 	struct adapter        *adapter = txr->adapter;
    642 	struct ixgbe_tx_buf   *txbuf;
    643 #ifdef DEV_NETMAP
    644 	struct netmap_adapter *na = NA(adapter->ifp);
    645 	struct netmap_slot    *slot;
    646 #endif /* DEV_NETMAP */
    647 
    648 	/* Clear the old ring contents */
    649 	IXGBE_TX_LOCK(txr);
    650 
    651 #ifdef DEV_NETMAP
    652 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    653 		/*
    654 		 * (under lock): if in netmap mode, do some consistency
    655 		 * checks and set slot to entry 0 of the netmap ring.
    656 		 */
    657 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    658 	}
    659 #endif /* DEV_NETMAP */
    660 
    661 	bzero((void *)txr->tx_base,
    662 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    663 	/* Reset indices */
    664 	txr->next_avail_desc = 0;
    665 	txr->next_to_clean = 0;
    666 
    667 	/* Free any existing tx buffers. */
    668 	txbuf = txr->tx_buffers;
    669 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    670 		if (txbuf->m_head != NULL) {
    671 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    672 			    0, txbuf->m_head->m_pkthdr.len,
    673 			    BUS_DMASYNC_POSTWRITE);
    674 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    675 			m_freem(txbuf->m_head);
    676 			txbuf->m_head = NULL;
    677 		}
    678 
    679 #ifdef DEV_NETMAP
    680 		/*
    681 		 * In netmap mode, set the map for the packet buffer.
    682 		 * NOTE: Some drivers (not this one) also need to set
    683 		 * the physical buffer address in the NIC ring.
    684 		 * Slots in the netmap ring (indexed by "si") are
    685 		 * kring->nkr_hwofs positions "ahead" wrt the
    686 		 * corresponding slot in the NIC ring. In some drivers
    687 		 * (not here) nkr_hwofs can be negative. Function
    688 		 * netmap_idx_n2k() handles wraparounds properly.
    689 		 */
    690 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    691 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
    692 			netmap_load_map(na, txr->txtag,
    693 			    txbuf->map, NMB(na, slot + si));
    694 		}
    695 #endif /* DEV_NETMAP */
    696 
    697 		/* Clear the EOP descriptor pointer */
    698 		txbuf->eop = NULL;
    699 	}
    700 
    701 	/* Set the rate at which we sample packets */
    702 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    703 		txr->atr_sample = atr_sample_rate;
    704 
    705 	/* Set number of descriptors available */
    706 	txr->tx_avail = adapter->num_tx_desc;
    707 
    708 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    709 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    710 	IXGBE_TX_UNLOCK(txr);
    711 } /* ixgbe_setup_transmit_ring */
    712 
    713 /************************************************************************
    714  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    715  ************************************************************************/
    716 int
    717 ixgbe_setup_transmit_structures(struct adapter *adapter)
    718 {
    719 	struct tx_ring *txr = adapter->tx_rings;
    720 
    721 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    722 		ixgbe_setup_transmit_ring(txr);
    723 
    724 	return (0);
    725 } /* ixgbe_setup_transmit_structures */
    726 
    727 /************************************************************************
    728  * ixgbe_free_transmit_structures - Free all transmit rings.
    729  ************************************************************************/
    730 void
    731 ixgbe_free_transmit_structures(struct adapter *adapter)
    732 {
    733 	struct tx_ring *txr = adapter->tx_rings;
    734 
    735 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    736 		ixgbe_free_transmit_buffers(txr);
    737 		ixgbe_dma_free(adapter, &txr->txdma);
    738 		IXGBE_TX_LOCK_DESTROY(txr);
    739 	}
    740 	free(adapter->tx_rings, M_DEVBUF);
    741 } /* ixgbe_free_transmit_structures */
    742 
    743 /************************************************************************
    744  * ixgbe_free_transmit_buffers
    745  *
    746  *   Free transmit ring related data structures.
    747  ************************************************************************/
    748 static void
    749 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    750 {
    751 	struct adapter      *adapter = txr->adapter;
    752 	struct ixgbe_tx_buf *tx_buffer;
    753 	int                 i;
    754 
    755 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    756 
    757 	if (txr->tx_buffers == NULL)
    758 		return;
    759 
    760 	tx_buffer = txr->tx_buffers;
    761 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    762 		if (tx_buffer->m_head != NULL) {
    763 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    764 			    0, tx_buffer->m_head->m_pkthdr.len,
    765 			    BUS_DMASYNC_POSTWRITE);
    766 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    767 			m_freem(tx_buffer->m_head);
    768 			tx_buffer->m_head = NULL;
    769 			if (tx_buffer->map != NULL) {
    770 				ixgbe_dmamap_destroy(txr->txtag,
    771 				    tx_buffer->map);
    772 				tx_buffer->map = NULL;
    773 			}
    774 		} else if (tx_buffer->map != NULL) {
    775 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    776 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    777 			tx_buffer->map = NULL;
    778 		}
    779 	}
    780 	if (txr->txr_interq != NULL) {
    781 		struct mbuf *m;
    782 
    783 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    784 			m_freem(m);
    785 		pcq_destroy(txr->txr_interq);
    786 	}
    787 	if (txr->tx_buffers != NULL) {
    788 		free(txr->tx_buffers, M_DEVBUF);
    789 		txr->tx_buffers = NULL;
    790 	}
    791 	if (txr->txtag != NULL) {
    792 		ixgbe_dma_tag_destroy(txr->txtag);
    793 		txr->txtag = NULL;
    794 	}
    795 } /* ixgbe_free_transmit_buffers */
    796 
    797 /************************************************************************
    798  * ixgbe_tx_ctx_setup
    799  *
    800  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    801  ************************************************************************/
    802 static int
    803 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    804     u32 *cmd_type_len, u32 *olinfo_status)
    805 {
    806 	struct adapter                   *adapter = txr->adapter;
    807 	struct ixgbe_adv_tx_context_desc *TXD;
    808 	struct ether_vlan_header         *eh;
    809 #ifdef INET
    810 	struct ip                        *ip;
    811 #endif
    812 #ifdef INET6
    813 	struct ip6_hdr                   *ip6;
    814 #endif
    815 	int                              ehdrlen, ip_hlen = 0;
    816 	int                              offload = TRUE;
    817 	int                              ctxd = txr->next_avail_desc;
    818 	u32                              vlan_macip_lens = 0;
    819 	u32                              type_tucmd_mlhl = 0;
    820 	u16                              vtag = 0;
    821 	u16                              etype;
    822 	u8                               ipproto = 0;
    823 	char                             *l3d;
    824 
    825 
    826 	/* First check if TSO is to be used */
    827 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    828 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    829 
    830 		if (rv != 0)
    831 			++adapter->tso_err.ev_count;
    832 		return rv;
    833 	}
    834 
    835 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    836 		offload = FALSE;
    837 
    838 	/* Indicate the whole packet as payload when not doing TSO */
    839 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    840 
    841 	/* Now ready a context descriptor */
    842 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    843 
    844 	/*
    845 	 * In advanced descriptors the vlan tag must
    846 	 * be placed into the context descriptor. Hence
    847 	 * we need to make one even if not doing offloads.
    848 	 */
    849 	if (vlan_has_tag(mp)) {
    850 		vtag = htole16(vlan_get_tag(mp));
    851 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    852 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    853 	           (offload == FALSE))
    854 		return (0);
    855 
    856 	/*
    857 	 * Determine where frame payload starts.
    858 	 * Jump over vlan headers if already present,
    859 	 * helpful for QinQ too.
    860 	 */
    861 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    862 	eh = mtod(mp, struct ether_vlan_header *);
    863 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    864 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    865 		etype = ntohs(eh->evl_proto);
    866 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    867 	} else {
    868 		etype = ntohs(eh->evl_encap_proto);
    869 		ehdrlen = ETHER_HDR_LEN;
    870 	}
    871 
    872 	/* Set the ether header length */
    873 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    874 
    875 	if (offload == FALSE)
    876 		goto no_offloads;
    877 
    878 	/*
    879 	 * If the first mbuf only includes the ethernet header,
    880 	 * jump to the next one
    881 	 * XXX: This assumes the stack splits mbufs containing headers
    882 	 *      on header boundaries
    883 	 * XXX: And assumes the entire IP header is contained in one mbuf
    884 	 */
    885 	if (mp->m_len == ehdrlen && mp->m_next)
    886 		l3d = mtod(mp->m_next, char *);
    887 	else
    888 		l3d = mtod(mp, char *) + ehdrlen;
    889 
    890 	switch (etype) {
    891 #ifdef INET
    892 	case ETHERTYPE_IP:
    893 		ip = (struct ip *)(l3d);
    894 		ip_hlen = ip->ip_hl << 2;
    895 		ipproto = ip->ip_p;
    896 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    897 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    898 		    ip->ip_sum == 0);
    899 		break;
    900 #endif
    901 #ifdef INET6
    902 	case ETHERTYPE_IPV6:
    903 		ip6 = (struct ip6_hdr *)(l3d);
    904 		ip_hlen = sizeof(struct ip6_hdr);
    905 		ipproto = ip6->ip6_nxt;
    906 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    907 		break;
    908 #endif
    909 	default:
    910 		offload = false;
    911 		break;
    912 	}
    913 
    914 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    915 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    916 
    917 	vlan_macip_lens |= ip_hlen;
    918 
    919 	/* No support for offloads for non-L4 next headers */
    920  	switch (ipproto) {
    921 	case IPPROTO_TCP:
    922 		if (mp->m_pkthdr.csum_flags &
    923 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    924 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    925 		else
    926 			offload = false;
    927 		break;
    928 	case IPPROTO_UDP:
    929 		if (mp->m_pkthdr.csum_flags &
    930 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    931 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    932 		else
    933 			offload = false;
    934 		break;
    935 	default:
    936 		offload = false;
    937 		break;
    938 	}
    939 
    940 	if (offload) /* Insert L4 checksum into data descriptors */
    941 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    942 
    943 no_offloads:
    944 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    945 
    946 	/* Now copy bits into descriptor */
    947 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    948 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    949 	TXD->seqnum_seed = htole32(0);
    950 	TXD->mss_l4len_idx = htole32(0);
    951 
    952 	/* We've consumed the first desc, adjust counters */
    953 	if (++ctxd == txr->num_desc)
    954 		ctxd = 0;
    955 	txr->next_avail_desc = ctxd;
    956 	--txr->tx_avail;
    957 
    958 	return (0);
    959 } /* ixgbe_tx_ctx_setup */
    960 
    961 /************************************************************************
    962  * ixgbe_tso_setup
    963  *
    964  *   Setup work for hardware segmentation offload (TSO) on
    965  *   adapters using advanced tx descriptors
    966  ************************************************************************/
    967 static int
    968 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    969     u32 *olinfo_status)
    970 {
    971 	struct ixgbe_adv_tx_context_desc *TXD;
    972 	struct ether_vlan_header         *eh;
    973 #ifdef INET6
    974 	struct ip6_hdr                   *ip6;
    975 #endif
    976 #ifdef INET
    977 	struct ip                        *ip;
    978 #endif
    979 	struct tcphdr                    *th;
    980 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    981 	u32                              vlan_macip_lens = 0;
    982 	u32                              type_tucmd_mlhl = 0;
    983 	u32                              mss_l4len_idx = 0, paylen;
    984 	u16                              vtag = 0, eh_type;
    985 
    986 	/*
    987 	 * Determine where frame payload starts.
    988 	 * Jump over vlan headers if already present
    989 	 */
    990 	eh = mtod(mp, struct ether_vlan_header *);
    991 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    992 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    993 		eh_type = eh->evl_proto;
    994 	} else {
    995 		ehdrlen = ETHER_HDR_LEN;
    996 		eh_type = eh->evl_encap_proto;
    997 	}
    998 
    999 	switch (ntohs(eh_type)) {
   1000 #ifdef INET
   1001 	case ETHERTYPE_IP:
   1002 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1003 		if (ip->ip_p != IPPROTO_TCP)
   1004 			return (ENXIO);
   1005 		ip->ip_sum = 0;
   1006 		ip_hlen = ip->ip_hl << 2;
   1007 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1008 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1009 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1010 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1011 		/* Tell transmit desc to also do IPv4 checksum. */
   1012 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1013 		break;
   1014 #endif
   1015 #ifdef INET6
   1016 	case ETHERTYPE_IPV6:
   1017 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1018 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1019 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1020 			return (ENXIO);
   1021 		ip_hlen = sizeof(struct ip6_hdr);
   1022 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1023 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1024 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1025 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1026 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1027 		break;
   1028 #endif
   1029 	default:
   1030 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1031 		    __func__, ntohs(eh_type));
   1032 		break;
   1033 	}
   1034 
   1035 	ctxd = txr->next_avail_desc;
   1036 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1037 
   1038 	tcp_hlen = th->th_off << 2;
   1039 
   1040 	/* This is used in the transmit desc in encap */
   1041 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1042 
   1043 	/* VLAN MACLEN IPLEN */
   1044 	if (vlan_has_tag(mp)) {
   1045 		vtag = htole16(vlan_get_tag(mp));
   1046 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1047 	}
   1048 
   1049 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1050 	vlan_macip_lens |= ip_hlen;
   1051 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1052 
   1053 	/* ADV DTYPE TUCMD */
   1054 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1055 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1056 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1057 
   1058 	/* MSS L4LEN IDX */
   1059 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1060 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1061 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1062 
   1063 	TXD->seqnum_seed = htole32(0);
   1064 
   1065 	if (++ctxd == txr->num_desc)
   1066 		ctxd = 0;
   1067 
   1068 	txr->tx_avail--;
   1069 	txr->next_avail_desc = ctxd;
   1070 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1071 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1072 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1073 	++txr->tso_tx.ev_count;
   1074 
   1075 	return (0);
   1076 } /* ixgbe_tso_setup */
   1077 
   1078 
   1079 /************************************************************************
   1080  * ixgbe_txeof
   1081  *
   1082  *   Examine each tx_buffer in the used queue. If the hardware is done
   1083  *   processing the packet then free associated resources. The
   1084  *   tx_buffer is put back on the free queue.
   1085  ************************************************************************/
   1086 bool
   1087 ixgbe_txeof(struct tx_ring *txr)
   1088 {
   1089 	struct adapter		*adapter = txr->adapter;
   1090 	struct ifnet		*ifp = adapter->ifp;
   1091 	struct ixgbe_tx_buf	*buf;
   1092 	union ixgbe_adv_tx_desc *txd;
   1093 	u32			work, processed = 0;
   1094 	u32			limit = adapter->tx_process_limit;
   1095 
   1096 	KASSERT(mutex_owned(&txr->tx_mtx));
   1097 
   1098 #ifdef DEV_NETMAP
   1099 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1100 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1101 		struct netmap_adapter *na = NA(adapter->ifp);
   1102 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   1103 		txd = txr->tx_base;
   1104 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1105 		    BUS_DMASYNC_POSTREAD);
   1106 		/*
   1107 		 * In netmap mode, all the work is done in the context
   1108 		 * of the client thread. Interrupt handlers only wake up
   1109 		 * clients, which may be sleeping on individual rings
   1110 		 * or on a global resource for all rings.
   1111 		 * To implement tx interrupt mitigation, we wake up the client
   1112 		 * thread roughly every half ring, even if the NIC interrupts
   1113 		 * more frequently. This is implemented as follows:
   1114 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1115 		 *   the slot that should wake up the thread (nkr_num_slots
   1116 		 *   means the user thread should not be woken up);
   1117 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1118 		 *   or the slot has the DD bit set.
   1119 		 */
   1120 		if (!netmap_mitigate ||
   1121 		    (kring->nr_kflags < kring->nkr_num_slots &&
   1122 		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   1123 			netmap_tx_irq(ifp, txr->me);
   1124 		}
   1125 		return false;
   1126 	}
   1127 #endif /* DEV_NETMAP */
   1128 
   1129 	if (txr->tx_avail == txr->num_desc) {
   1130 		txr->busy = 0;
   1131 		return false;
   1132 	}
   1133 
   1134 	/* Get work starting point */
   1135 	work = txr->next_to_clean;
   1136 	buf = &txr->tx_buffers[work];
   1137 	txd = &txr->tx_base[work];
   1138 	work -= txr->num_desc; /* The distance to ring end */
   1139 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1140 	    BUS_DMASYNC_POSTREAD);
   1141 
   1142 	do {
   1143 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1144 		if (eop == NULL) /* No work */
   1145 			break;
   1146 
   1147 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1148 			break;	/* I/O not complete */
   1149 
   1150 		if (buf->m_head) {
   1151 			txr->bytes += buf->m_head->m_pkthdr.len;
   1152 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1153 			    0, buf->m_head->m_pkthdr.len,
   1154 			    BUS_DMASYNC_POSTWRITE);
   1155 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1156 			m_freem(buf->m_head);
   1157 			buf->m_head = NULL;
   1158 		}
   1159 		buf->eop = NULL;
   1160 		++txr->tx_avail;
   1161 
   1162 		/* We clean the range if multi segment */
   1163 		while (txd != eop) {
   1164 			++txd;
   1165 			++buf;
   1166 			++work;
   1167 			/* wrap the ring? */
   1168 			if (__predict_false(!work)) {
   1169 				work -= txr->num_desc;
   1170 				buf = txr->tx_buffers;
   1171 				txd = txr->tx_base;
   1172 			}
   1173 			if (buf->m_head) {
   1174 				txr->bytes +=
   1175 				    buf->m_head->m_pkthdr.len;
   1176 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1177 				    buf->map,
   1178 				    0, buf->m_head->m_pkthdr.len,
   1179 				    BUS_DMASYNC_POSTWRITE);
   1180 				ixgbe_dmamap_unload(txr->txtag,
   1181 				    buf->map);
   1182 				m_freem(buf->m_head);
   1183 				buf->m_head = NULL;
   1184 			}
   1185 			++txr->tx_avail;
   1186 			buf->eop = NULL;
   1187 
   1188 		}
   1189 		++txr->packets;
   1190 		++processed;
   1191 		++ifp->if_opackets;
   1192 
   1193 		/* Try the next packet */
   1194 		++txd;
   1195 		++buf;
   1196 		++work;
   1197 		/* reset with a wrap */
   1198 		if (__predict_false(!work)) {
   1199 			work -= txr->num_desc;
   1200 			buf = txr->tx_buffers;
   1201 			txd = txr->tx_base;
   1202 		}
   1203 		prefetch(txd);
   1204 	} while (__predict_true(--limit));
   1205 
   1206 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1207 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1208 
   1209 	work += txr->num_desc;
   1210 	txr->next_to_clean = work;
   1211 
   1212 	/*
   1213 	 * Queue Hang detection, we know there's
   1214 	 * work outstanding or the first return
   1215 	 * would have been taken, so increment busy
   1216 	 * if nothing managed to get cleaned, then
   1217 	 * in local_timer it will be checked and
   1218 	 * marked as HUNG if it exceeds a MAX attempt.
   1219 	 */
   1220 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1221 		++txr->busy;
   1222 	/*
   1223 	 * If anything gets cleaned we reset state to 1,
   1224 	 * note this will turn off HUNG if its set.
   1225 	 */
   1226 	if (processed)
   1227 		txr->busy = 1;
   1228 
   1229 	if (txr->tx_avail == txr->num_desc)
   1230 		txr->busy = 0;
   1231 
   1232 	return ((limit > 0) ? false : true);
   1233 } /* ixgbe_txeof */
   1234 
   1235 /************************************************************************
   1236  * ixgbe_rsc_count
   1237  *
   1238  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1239  ************************************************************************/
   1240 static inline u32
   1241 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1242 {
   1243 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1244 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1245 } /* ixgbe_rsc_count */
   1246 
   1247 /************************************************************************
   1248  * ixgbe_setup_hw_rsc
   1249  *
   1250  *   Initialize Hardware RSC (LRO) feature on 82599
   1251  *   for an RX ring, this is toggled by the LRO capability
   1252  *   even though it is transparent to the stack.
   1253  *
   1254  *   NOTE: Since this HW feature only works with IPv4 and
   1255  *         testing has shown soft LRO to be as effective,
   1256  *         this feature will be disabled by default.
   1257  ************************************************************************/
   1258 static void
   1259 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1260 {
   1261 	struct	adapter  *adapter = rxr->adapter;
   1262 	struct	ixgbe_hw *hw = &adapter->hw;
   1263 	u32              rscctrl, rdrxctl;
   1264 
   1265 	/* If turning LRO/RSC off we need to disable it */
   1266 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1267 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1268 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1269 		return;
   1270 	}
   1271 
   1272 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1273 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1274 #ifdef DEV_NETMAP
   1275 	/* Always strip CRC unless Netmap disabled it */
   1276 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1277 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1278 	    ix_crcstrip)
   1279 #endif /* DEV_NETMAP */
   1280 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1281 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1282 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1283 
   1284 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1285 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1286 	/*
   1287 	 * Limit the total number of descriptors that
   1288 	 * can be combined, so it does not exceed 64K
   1289 	 */
   1290 	if (rxr->mbuf_sz == MCLBYTES)
   1291 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1292 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1293 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1294 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1295 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1296 	else  /* Using 16K cluster */
   1297 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1298 
   1299 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1300 
   1301 	/* Enable TCP header recognition */
   1302 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1303 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1304 
   1305 	/* Disable RSC for ACK packets */
   1306 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1307 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1308 
   1309 	rxr->hw_rsc = TRUE;
   1310 } /* ixgbe_setup_hw_rsc */
   1311 
   1312 /************************************************************************
   1313  * ixgbe_refresh_mbufs
   1314  *
   1315  *   Refresh mbuf buffers for RX descriptor rings
   1316  *    - now keeps its own state so discards due to resource
   1317  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1318  *      it just returns, keeping its placeholder, thus it can simply
   1319  *      be recalled to try again.
   1320  ************************************************************************/
   1321 static void
   1322 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1323 {
   1324 	struct adapter      *adapter = rxr->adapter;
   1325 	struct ixgbe_rx_buf *rxbuf;
   1326 	struct mbuf         *mp;
   1327 	int                 i, j, error;
   1328 	bool                refreshed = false;
   1329 
   1330 	i = j = rxr->next_to_refresh;
   1331 	/* Control the loop with one beyond */
   1332 	if (++j == rxr->num_desc)
   1333 		j = 0;
   1334 
   1335 	while (j != limit) {
   1336 		rxbuf = &rxr->rx_buffers[i];
   1337 		if (rxbuf->buf == NULL) {
   1338 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1339 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1340 			if (mp == NULL) {
   1341 				rxr->no_jmbuf.ev_count++;
   1342 				goto update;
   1343 			}
   1344 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1345 				m_adj(mp, ETHER_ALIGN);
   1346 		} else
   1347 			mp = rxbuf->buf;
   1348 
   1349 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1350 
   1351 		/* If we're dealing with an mbuf that was copied rather
   1352 		 * than replaced, there's no need to go through busdma.
   1353 		 */
   1354 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1355 			/* Get the memory mapping */
   1356 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1357 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1358 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1359 			if (error != 0) {
   1360 				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
   1361 				m_free(mp);
   1362 				rxbuf->buf = NULL;
   1363 				goto update;
   1364 			}
   1365 			rxbuf->buf = mp;
   1366 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1367 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1368 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1369 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1370 		} else {
   1371 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1372 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1373 		}
   1374 
   1375 		refreshed = true;
   1376 		/* Next is precalculated */
   1377 		i = j;
   1378 		rxr->next_to_refresh = i;
   1379 		if (++j == rxr->num_desc)
   1380 			j = 0;
   1381 	}
   1382 
   1383 update:
   1384 	if (refreshed) /* Update hardware tail index */
   1385 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1386 
   1387 	return;
   1388 } /* ixgbe_refresh_mbufs */
   1389 
   1390 /************************************************************************
   1391  * ixgbe_allocate_receive_buffers
   1392  *
   1393  *   Allocate memory for rx_buffer structures. Since we use one
   1394  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1395  *   that we'll need is equal to the number of receive descriptors
   1396  *   that we've allocated.
   1397  ************************************************************************/
   1398 static int
   1399 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1400 {
   1401 	struct	adapter     *adapter = rxr->adapter;
   1402 	device_t            dev = adapter->dev;
   1403 	struct ixgbe_rx_buf *rxbuf;
   1404 	int                 bsize, error;
   1405 
   1406 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1407 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
   1408 	    M_NOWAIT | M_ZERO);
   1409 	if (rxr->rx_buffers == NULL) {
   1410 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   1411 		error = ENOMEM;
   1412 		goto fail;
   1413 	}
   1414 
   1415 	error = ixgbe_dma_tag_create(
   1416 	         /*      parent */ adapter->osdep.dmat,
   1417 	         /*   alignment */ 1,
   1418 	         /*      bounds */ 0,
   1419 	         /*     maxsize */ MJUM16BYTES,
   1420 	         /*   nsegments */ 1,
   1421 	         /*  maxsegsize */ MJUM16BYTES,
   1422 	         /*       flags */ 0,
   1423 	                           &rxr->ptag);
   1424 	if (error != 0) {
   1425 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1426 		goto fail;
   1427 	}
   1428 
   1429 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1430 		rxbuf = &rxr->rx_buffers[i];
   1431 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1432 		if (error) {
   1433 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1434 			goto fail;
   1435 		}
   1436 	}
   1437 
   1438 	return (0);
   1439 
   1440 fail:
   1441 	/* Frees all, but can handle partial completion */
   1442 	ixgbe_free_receive_structures(adapter);
   1443 
   1444 	return (error);
   1445 } /* ixgbe_allocate_receive_buffers */
   1446 
   1447 /************************************************************************
   1448  * ixgbe_free_receive_ring
   1449  ************************************************************************/
   1450 static void
   1451 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1452 {
   1453 	for (int i = 0; i < rxr->num_desc; i++) {
   1454 		ixgbe_rx_discard(rxr, i);
   1455 	}
   1456 } /* ixgbe_free_receive_ring */
   1457 
   1458 /************************************************************************
   1459  * ixgbe_setup_receive_ring
   1460  *
   1461  *   Initialize a receive ring and its buffers.
   1462  ************************************************************************/
   1463 static int
   1464 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1465 {
   1466 	struct adapter        *adapter;
   1467 	struct ixgbe_rx_buf   *rxbuf;
   1468 #ifdef LRO
   1469 	struct ifnet          *ifp;
   1470 	struct lro_ctrl       *lro = &rxr->lro;
   1471 #endif /* LRO */
   1472 #ifdef DEV_NETMAP
   1473 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1474 	struct netmap_slot    *slot;
   1475 #endif /* DEV_NETMAP */
   1476 	int                   rsize, error = 0;
   1477 
   1478 	adapter = rxr->adapter;
   1479 #ifdef LRO
   1480 	ifp = adapter->ifp;
   1481 #endif /* LRO */
   1482 
   1483 	/* Clear the ring contents */
   1484 	IXGBE_RX_LOCK(rxr);
   1485 
   1486 #ifdef DEV_NETMAP
   1487 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1488 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1489 #endif /* DEV_NETMAP */
   1490 
   1491 	rsize = roundup2(adapter->num_rx_desc *
   1492 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1493 	bzero((void *)rxr->rx_base, rsize);
   1494 	/* Cache the size */
   1495 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1496 
   1497 	/* Free current RX buffer structs and their mbufs */
   1498 	ixgbe_free_receive_ring(rxr);
   1499 
   1500 	/* Now replenish the mbufs */
   1501 	for (int j = 0; j != rxr->num_desc; ++j) {
   1502 		struct mbuf *mp;
   1503 
   1504 		rxbuf = &rxr->rx_buffers[j];
   1505 
   1506 #ifdef DEV_NETMAP
   1507 		/*
   1508 		 * In netmap mode, fill the map and set the buffer
   1509 		 * address in the NIC ring, considering the offset
   1510 		 * between the netmap and NIC rings (see comment in
   1511 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1512 		 * an mbuf, so end the block with a continue;
   1513 		 */
   1514 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1515 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   1516 			uint64_t paddr;
   1517 			void *addr;
   1518 
   1519 			addr = PNMB(na, slot + sj, &paddr);
   1520 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1521 			/* Update descriptor and the cached value */
   1522 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1523 			rxbuf->addr = htole64(paddr);
   1524 			continue;
   1525 		}
   1526 #endif /* DEV_NETMAP */
   1527 
   1528 		rxbuf->flags = 0;
   1529 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   1530 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1531 		if (rxbuf->buf == NULL) {
   1532 			error = ENOBUFS;
   1533 			goto fail;
   1534 		}
   1535 		mp = rxbuf->buf;
   1536 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1537 		/* Get the memory mapping */
   1538 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1539 		    mp, BUS_DMA_NOWAIT);
   1540 		if (error != 0)
   1541                         goto fail;
   1542 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1543 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1544 		/* Update the descriptor and the cached value */
   1545 		rxr->rx_base[j].read.pkt_addr =
   1546 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1547 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1548 	}
   1549 
   1550 
   1551 	/* Setup our descriptor indices */
   1552 	rxr->next_to_check = 0;
   1553 	rxr->next_to_refresh = 0;
   1554 	rxr->lro_enabled = FALSE;
   1555 	rxr->rx_copies.ev_count = 0;
   1556 #if 0 /* NetBSD */
   1557 	rxr->rx_bytes.ev_count = 0;
   1558 #if 1	/* Fix inconsistency */
   1559 	rxr->rx_packets.ev_count = 0;
   1560 #endif
   1561 #endif
   1562 	rxr->vtag_strip = FALSE;
   1563 
   1564 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1565 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1566 
   1567 	/*
   1568 	 * Now set up the LRO interface
   1569 	 */
   1570 	if (ixgbe_rsc_enable)
   1571 		ixgbe_setup_hw_rsc(rxr);
   1572 #ifdef LRO
   1573 	else if (ifp->if_capenable & IFCAP_LRO) {
   1574 		device_t dev = adapter->dev;
   1575 		int err = tcp_lro_init(lro);
   1576 		if (err) {
   1577 			device_printf(dev, "LRO Initialization failed!\n");
   1578 			goto fail;
   1579 		}
   1580 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1581 		rxr->lro_enabled = TRUE;
   1582 		lro->ifp = adapter->ifp;
   1583 	}
   1584 #endif /* LRO */
   1585 
   1586 	IXGBE_RX_UNLOCK(rxr);
   1587 
   1588 	return (0);
   1589 
   1590 fail:
   1591 	ixgbe_free_receive_ring(rxr);
   1592 	IXGBE_RX_UNLOCK(rxr);
   1593 
   1594 	return (error);
   1595 } /* ixgbe_setup_receive_ring */
   1596 
   1597 /************************************************************************
   1598  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1599  ************************************************************************/
   1600 int
   1601 ixgbe_setup_receive_structures(struct adapter *adapter)
   1602 {
   1603 	struct rx_ring *rxr = adapter->rx_rings;
   1604 	int            j;
   1605 
   1606 	/*
   1607 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1608 	 * or size of jumbo mbufs may have changed.
   1609 	 * Assume all of rxr->ptag are the same.
   1610 	 */
   1611 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   1612 	    (2 * adapter->num_rx_desc) * adapter->num_queues,
   1613 	    adapter->rx_mbuf_sz);
   1614 
   1615 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1616 		if (ixgbe_setup_receive_ring(rxr))
   1617 			goto fail;
   1618 
   1619 	return (0);
   1620 fail:
   1621 	/*
   1622 	 * Free RX buffers allocated so far, we will only handle
   1623 	 * the rings that completed, the failing case will have
   1624 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1625 	 */
   1626 	for (int i = 0; i < j; ++i) {
   1627 		rxr = &adapter->rx_rings[i];
   1628 		IXGBE_RX_LOCK(rxr);
   1629 		ixgbe_free_receive_ring(rxr);
   1630 		IXGBE_RX_UNLOCK(rxr);
   1631 	}
   1632 
   1633 	return (ENOBUFS);
   1634 } /* ixgbe_setup_receive_structures */
   1635 
   1636 
   1637 /************************************************************************
   1638  * ixgbe_free_receive_structures - Free all receive rings.
   1639  ************************************************************************/
   1640 void
   1641 ixgbe_free_receive_structures(struct adapter *adapter)
   1642 {
   1643 	struct rx_ring *rxr = adapter->rx_rings;
   1644 
   1645 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1646 
   1647 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1648 		ixgbe_free_receive_buffers(rxr);
   1649 #ifdef LRO
   1650 		/* Free LRO memory */
   1651 		tcp_lro_free(&rxr->lro);
   1652 #endif /* LRO */
   1653 		/* Free the ring memory as well */
   1654 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1655 		IXGBE_RX_LOCK_DESTROY(rxr);
   1656 	}
   1657 
   1658 	free(adapter->rx_rings, M_DEVBUF);
   1659 } /* ixgbe_free_receive_structures */
   1660 
   1661 
   1662 /************************************************************************
   1663  * ixgbe_free_receive_buffers - Free receive ring data structures
   1664  ************************************************************************/
   1665 static void
   1666 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1667 {
   1668 	struct adapter      *adapter = rxr->adapter;
   1669 	struct ixgbe_rx_buf *rxbuf;
   1670 
   1671 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1672 
   1673 	/* Cleanup any existing buffers */
   1674 	if (rxr->rx_buffers != NULL) {
   1675 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1676 			rxbuf = &rxr->rx_buffers[i];
   1677 			ixgbe_rx_discard(rxr, i);
   1678 			if (rxbuf->pmap != NULL) {
   1679 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1680 				rxbuf->pmap = NULL;
   1681 			}
   1682 		}
   1683 		if (rxr->rx_buffers != NULL) {
   1684 			free(rxr->rx_buffers, M_DEVBUF);
   1685 			rxr->rx_buffers = NULL;
   1686 		}
   1687 	}
   1688 
   1689 	if (rxr->ptag != NULL) {
   1690 		ixgbe_dma_tag_destroy(rxr->ptag);
   1691 		rxr->ptag = NULL;
   1692 	}
   1693 
   1694 	return;
   1695 } /* ixgbe_free_receive_buffers */
   1696 
   1697 /************************************************************************
   1698  * ixgbe_rx_input
   1699  ************************************************************************/
   1700 static __inline void
   1701 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1702     u32 ptype)
   1703 {
   1704 	struct adapter	*adapter = ifp->if_softc;
   1705 
   1706 #ifdef LRO
   1707 	struct ethercom *ec = &adapter->osdep.ec;
   1708 
   1709 	/*
   1710 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1711 	 * should be computed by hardware. Also it should not have VLAN tag in
   1712 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1713 	 */
   1714         if (rxr->lro_enabled &&
   1715             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1716             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1717             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1718             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1719             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1720             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1721             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1722             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1723                 /*
   1724                  * Send to the stack if:
   1725                  **  - LRO not enabled, or
   1726                  **  - no LRO resources, or
   1727                  **  - lro enqueue fails
   1728                  */
   1729                 if (rxr->lro.lro_cnt != 0)
   1730                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1731                                 return;
   1732         }
   1733 #endif /* LRO */
   1734 
   1735 	if_percpuq_enqueue(adapter->ipq, m);
   1736 } /* ixgbe_rx_input */
   1737 
   1738 /************************************************************************
   1739  * ixgbe_rx_discard
   1740  ************************************************************************/
   1741 static __inline void
   1742 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1743 {
   1744 	struct ixgbe_rx_buf *rbuf;
   1745 
   1746 	rbuf = &rxr->rx_buffers[i];
   1747 
   1748 	/*
   1749 	 * With advanced descriptors the writeback
   1750 	 * clobbers the buffer addrs, so its easier
   1751 	 * to just free the existing mbufs and take
   1752 	 * the normal refresh path to get new buffers
   1753 	 * and mapping.
   1754 	 */
   1755 
   1756 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1757 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1758 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1759 		m_freem(rbuf->fmp);
   1760 		rbuf->fmp = NULL;
   1761 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1762 	} else if (rbuf->buf) {
   1763 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1764 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1765 		m_free(rbuf->buf);
   1766 		rbuf->buf = NULL;
   1767 	}
   1768 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1769 
   1770 	rbuf->flags = 0;
   1771 
   1772 	return;
   1773 } /* ixgbe_rx_discard */
   1774 
   1775 
   1776 /************************************************************************
   1777  * ixgbe_rxeof
   1778  *
   1779  *   Executes in interrupt context. It replenishes the
   1780  *   mbufs in the descriptor and sends data which has
   1781  *   been dma'ed into host memory to upper layer.
   1782  *
   1783  *   Return TRUE for more work, FALSE for all clean.
   1784  ************************************************************************/
   1785 bool
   1786 ixgbe_rxeof(struct ix_queue *que)
   1787 {
   1788 	struct adapter		*adapter = que->adapter;
   1789 	struct rx_ring		*rxr = que->rxr;
   1790 	struct ifnet		*ifp = adapter->ifp;
   1791 #ifdef LRO
   1792 	struct lro_ctrl		*lro = &rxr->lro;
   1793 #endif /* LRO */
   1794 	union ixgbe_adv_rx_desc	*cur;
   1795 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1796 	int			i, nextp, processed = 0;
   1797 	u32			staterr = 0;
   1798 	u32			count = adapter->rx_process_limit;
   1799 #ifdef RSS
   1800 	u16			pkt_info;
   1801 #endif
   1802 
   1803 	IXGBE_RX_LOCK(rxr);
   1804 
   1805 #ifdef DEV_NETMAP
   1806 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1807 		/* Same as the txeof routine: wakeup clients on intr. */
   1808 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1809 			IXGBE_RX_UNLOCK(rxr);
   1810 			return (FALSE);
   1811 		}
   1812 	}
   1813 #endif /* DEV_NETMAP */
   1814 
   1815 	for (i = rxr->next_to_check; count != 0;) {
   1816 		struct mbuf *sendmp, *mp;
   1817 		u32         rsc, ptype;
   1818 		u16         len;
   1819 		u16         vtag = 0;
   1820 		bool        eop;
   1821 
   1822 		/* Sync the ring. */
   1823 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1824 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1825 
   1826 		cur = &rxr->rx_base[i];
   1827 		staterr = le32toh(cur->wb.upper.status_error);
   1828 #ifdef RSS
   1829 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1830 #endif
   1831 
   1832 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1833 			break;
   1834 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1835 			break;
   1836 
   1837 		count--;
   1838 		sendmp = NULL;
   1839 		nbuf = NULL;
   1840 		rsc = 0;
   1841 		cur->wb.upper.status_error = 0;
   1842 		rbuf = &rxr->rx_buffers[i];
   1843 		mp = rbuf->buf;
   1844 
   1845 		len = le16toh(cur->wb.upper.length);
   1846 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1847 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1848 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1849 
   1850 		/* Make sure bad packets are discarded */
   1851 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1852 #if __FreeBSD_version >= 1100036
   1853 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1854 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1855 #endif
   1856 			rxr->rx_discarded.ev_count++;
   1857 			ixgbe_rx_discard(rxr, i);
   1858 			goto next_desc;
   1859 		}
   1860 
   1861 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1862 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1863 
   1864 		/*
   1865 		 * On 82599 which supports a hardware
   1866 		 * LRO (called HW RSC), packets need
   1867 		 * not be fragmented across sequential
   1868 		 * descriptors, rather the next descriptor
   1869 		 * is indicated in bits of the descriptor.
   1870 		 * This also means that we might proceses
   1871 		 * more than one packet at a time, something
   1872 		 * that has never been true before, it
   1873 		 * required eliminating global chain pointers
   1874 		 * in favor of what we are doing here.  -jfv
   1875 		 */
   1876 		if (!eop) {
   1877 			/*
   1878 			 * Figure out the next descriptor
   1879 			 * of this frame.
   1880 			 */
   1881 			if (rxr->hw_rsc == TRUE) {
   1882 				rsc = ixgbe_rsc_count(cur);
   1883 				rxr->rsc_num += (rsc - 1);
   1884 			}
   1885 			if (rsc) { /* Get hardware index */
   1886 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1887 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1888 			} else { /* Just sequential */
   1889 				nextp = i + 1;
   1890 				if (nextp == adapter->num_rx_desc)
   1891 					nextp = 0;
   1892 			}
   1893 			nbuf = &rxr->rx_buffers[nextp];
   1894 			prefetch(nbuf);
   1895 		}
   1896 		/*
   1897 		 * Rather than using the fmp/lmp global pointers
   1898 		 * we now keep the head of a packet chain in the
   1899 		 * buffer struct and pass this along from one
   1900 		 * descriptor to the next, until we get EOP.
   1901 		 */
   1902 		mp->m_len = len;
   1903 		/*
   1904 		 * See if there is a stored head
   1905 		 * that determines what we are
   1906 		 */
   1907 		sendmp = rbuf->fmp;
   1908 		if (sendmp != NULL) {  /* secondary frag */
   1909 			rbuf->buf = rbuf->fmp = NULL;
   1910 			mp->m_flags &= ~M_PKTHDR;
   1911 			sendmp->m_pkthdr.len += mp->m_len;
   1912 		} else {
   1913 			/*
   1914 			 * Optimize.  This might be a small packet,
   1915 			 * maybe just a TCP ACK.  Do a fast copy that
   1916 			 * is cache aligned into a new mbuf, and
   1917 			 * leave the old mbuf+cluster for re-use.
   1918 			 */
   1919 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1920 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1921 				if (sendmp != NULL) {
   1922 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1923 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1924 					    len);
   1925 					sendmp->m_len = len;
   1926 					rxr->rx_copies.ev_count++;
   1927 					rbuf->flags |= IXGBE_RX_COPY;
   1928 				}
   1929 			}
   1930 			if (sendmp == NULL) {
   1931 				rbuf->buf = rbuf->fmp = NULL;
   1932 				sendmp = mp;
   1933 			}
   1934 
   1935 			/* first desc of a non-ps chain */
   1936 			sendmp->m_flags |= M_PKTHDR;
   1937 			sendmp->m_pkthdr.len = mp->m_len;
   1938 		}
   1939 		++processed;
   1940 
   1941 		/* Pass the head pointer on */
   1942 		if (eop == 0) {
   1943 			nbuf->fmp = sendmp;
   1944 			sendmp = NULL;
   1945 			mp->m_next = nbuf->buf;
   1946 		} else { /* Sending this frame */
   1947 			m_set_rcvif(sendmp, ifp);
   1948 			++rxr->packets;
   1949 			rxr->rx_packets.ev_count++;
   1950 			/* capture data for AIM */
   1951 			rxr->bytes += sendmp->m_pkthdr.len;
   1952 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   1953 			/* Process vlan info */
   1954 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   1955 				vtag = le16toh(cur->wb.upper.vlan);
   1956 			if (vtag) {
   1957 				vlan_set_tag(sendmp, vtag);
   1958 			}
   1959 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   1960 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   1961 				   &adapter->stats.pf);
   1962 			}
   1963 
   1964 #if 0 /* FreeBSD */
   1965 			/*
   1966 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   1967 			 * and never cleared. This means we have RSS hash
   1968 			 * available to be used.
   1969 			 */
   1970 			if (adapter->num_queues > 1) {
   1971 				sendmp->m_pkthdr.flowid =
   1972 				    le32toh(cur->wb.lower.hi_dword.rss);
   1973 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   1974 				    case IXGBE_RXDADV_RSSTYPE_IPV4:
   1975 					M_HASHTYPE_SET(sendmp,
   1976 					    M_HASHTYPE_RSS_IPV4);
   1977 					break;
   1978 				    case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   1979 					M_HASHTYPE_SET(sendmp,
   1980 					    M_HASHTYPE_RSS_TCP_IPV4);
   1981 					break;
   1982 				    case IXGBE_RXDADV_RSSTYPE_IPV6:
   1983 					M_HASHTYPE_SET(sendmp,
   1984 					    M_HASHTYPE_RSS_IPV6);
   1985 					break;
   1986 				    case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   1987 					M_HASHTYPE_SET(sendmp,
   1988 					    M_HASHTYPE_RSS_TCP_IPV6);
   1989 					break;
   1990 				    case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   1991 					M_HASHTYPE_SET(sendmp,
   1992 					    M_HASHTYPE_RSS_IPV6_EX);
   1993 					break;
   1994 				    case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   1995 					M_HASHTYPE_SET(sendmp,
   1996 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   1997 					break;
   1998 #if __FreeBSD_version > 1100000
   1999 				    case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2000 					M_HASHTYPE_SET(sendmp,
   2001 					    M_HASHTYPE_RSS_UDP_IPV4);
   2002 					break;
   2003 				    case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2004 					M_HASHTYPE_SET(sendmp,
   2005 					    M_HASHTYPE_RSS_UDP_IPV6);
   2006 					break;
   2007 				    case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2008 					M_HASHTYPE_SET(sendmp,
   2009 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2010 					break;
   2011 #endif
   2012 				    default:
   2013 					M_HASHTYPE_SET(sendmp,
   2014 					    M_HASHTYPE_OPAQUE_HASH);
   2015 				}
   2016 			} else {
   2017 				sendmp->m_pkthdr.flowid = que->msix;
   2018 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2019 			}
   2020 #endif
   2021 		}
   2022 next_desc:
   2023 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2024 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2025 
   2026 		/* Advance our pointers to the next descriptor. */
   2027 		if (++i == rxr->num_desc)
   2028 			i = 0;
   2029 
   2030 		/* Now send to the stack or do LRO */
   2031 		if (sendmp != NULL) {
   2032 			rxr->next_to_check = i;
   2033 			IXGBE_RX_UNLOCK(rxr);
   2034 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2035 			IXGBE_RX_LOCK(rxr);
   2036 			i = rxr->next_to_check;
   2037 		}
   2038 
   2039 		/* Every 8 descriptors we go to refresh mbufs */
   2040 		if (processed == 8) {
   2041 			ixgbe_refresh_mbufs(rxr, i);
   2042 			processed = 0;
   2043 		}
   2044 	}
   2045 
   2046 	/* Refresh any remaining buf structs */
   2047 	if (ixgbe_rx_unrefreshed(rxr))
   2048 		ixgbe_refresh_mbufs(rxr, i);
   2049 
   2050 	rxr->next_to_check = i;
   2051 
   2052 	IXGBE_RX_UNLOCK(rxr);
   2053 
   2054 #ifdef LRO
   2055 	/*
   2056 	 * Flush any outstanding LRO work
   2057 	 */
   2058 	tcp_lro_flush_all(lro);
   2059 #endif /* LRO */
   2060 
   2061 	/*
   2062 	 * Still have cleaning to do?
   2063 	 */
   2064 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2065 		return (TRUE);
   2066 
   2067 	return (FALSE);
   2068 } /* ixgbe_rxeof */
   2069 
   2070 
   2071 /************************************************************************
   2072  * ixgbe_rx_checksum
   2073  *
   2074  *   Verify that the hardware indicated that the checksum is valid.
   2075  *   Inform the stack about the status of checksum so that stack
   2076  *   doesn't spend time verifying the checksum.
   2077  ************************************************************************/
   2078 static void
   2079 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2080     struct ixgbe_hw_stats *stats)
   2081 {
   2082 	u16  status = (u16)staterr;
   2083 	u8   errors = (u8)(staterr >> 24);
   2084 #if 0
   2085 	bool sctp = false;
   2086 
   2087 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2088 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2089 		sctp = true;
   2090 #endif
   2091 
   2092 	/* IPv4 checksum */
   2093 	if (status & IXGBE_RXD_STAT_IPCS) {
   2094 		stats->ipcs.ev_count++;
   2095 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2096 			/* IP Checksum Good */
   2097 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2098 		} else {
   2099 			stats->ipcs_bad.ev_count++;
   2100 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2101 		}
   2102 	}
   2103 	/* TCP/UDP/SCTP checksum */
   2104 	if (status & IXGBE_RXD_STAT_L4CS) {
   2105 		stats->l4cs.ev_count++;
   2106 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2107 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2108 			mp->m_pkthdr.csum_flags |= type;
   2109 		} else {
   2110 			stats->l4cs_bad.ev_count++;
   2111 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2112 		}
   2113 	}
   2114 } /* ixgbe_rx_checksum */
   2115 
   2116 /************************************************************************
   2117  * ixgbe_dma_malloc
   2118  ************************************************************************/
   2119 int
   2120 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2121 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2122 {
   2123 	device_t dev = adapter->dev;
   2124 	int      r, rsegs;
   2125 
   2126 	r = ixgbe_dma_tag_create(
   2127 	     /*      parent */ adapter->osdep.dmat,
   2128 	     /*   alignment */ DBA_ALIGN,
   2129 	     /*      bounds */ 0,
   2130 	     /*     maxsize */ size,
   2131 	     /*   nsegments */ 1,
   2132 	     /*  maxsegsize */ size,
   2133 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2134 			       &dma->dma_tag);
   2135 	if (r != 0) {
   2136 		aprint_error_dev(dev,
   2137 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2138 		goto fail_0;
   2139 	}
   2140 
   2141 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2142 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2143 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2144 	if (r != 0) {
   2145 		aprint_error_dev(dev,
   2146 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2147 		goto fail_1;
   2148 	}
   2149 
   2150 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2151 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2152 	if (r != 0) {
   2153 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2154 		    __func__, r);
   2155 		goto fail_2;
   2156 	}
   2157 
   2158 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2159 	if (r != 0) {
   2160 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2161 		    __func__, r);
   2162 		goto fail_3;
   2163 	}
   2164 
   2165 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2166 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2167 	if (r != 0) {
   2168 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2169 		    __func__, r);
   2170 		goto fail_4;
   2171 	}
   2172 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2173 	dma->dma_size = size;
   2174 	return 0;
   2175 fail_4:
   2176 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2177 fail_3:
   2178 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2179 fail_2:
   2180 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2181 fail_1:
   2182 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2183 fail_0:
   2184 
   2185 	return (r);
   2186 } /* ixgbe_dma_malloc */
   2187 
   2188 /************************************************************************
   2189  * ixgbe_dma_free
   2190  ************************************************************************/
   2191 void
   2192 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2193 {
   2194 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2195 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2196 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2197 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2198 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2199 } /* ixgbe_dma_free */
   2200 
   2201 
   2202 /************************************************************************
   2203  * ixgbe_allocate_queues
   2204  *
   2205  *   Allocate memory for the transmit and receive rings, and then
   2206  *   the descriptors associated with each, called only once at attach.
   2207  ************************************************************************/
   2208 int
   2209 ixgbe_allocate_queues(struct adapter *adapter)
   2210 {
   2211 	device_t	dev = adapter->dev;
   2212 	struct ix_queue	*que;
   2213 	struct tx_ring	*txr;
   2214 	struct rx_ring	*rxr;
   2215 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2216 	int             txconf = 0, rxconf = 0;
   2217 
   2218 	/* First, allocate the top level queue structs */
   2219 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2220             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2221         if (adapter->queues == NULL) {
   2222 		aprint_error_dev(dev, "Unable to allocate queue memory\n");
   2223                 error = ENOMEM;
   2224                 goto fail;
   2225         }
   2226 
   2227 	/* Second, allocate the TX ring struct memory */
   2228 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
   2229 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2230 	if (adapter->tx_rings == NULL) {
   2231 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   2232 		error = ENOMEM;
   2233 		goto tx_fail;
   2234 	}
   2235 
   2236 	/* Third, allocate the RX ring */
   2237 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2238 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
   2239 	if (adapter->rx_rings == NULL) {
   2240 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   2241 		error = ENOMEM;
   2242 		goto rx_fail;
   2243 	}
   2244 
   2245 	/* For the ring itself */
   2246 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2247 	    DBA_ALIGN);
   2248 
   2249 	/*
   2250 	 * Now set up the TX queues, txconf is needed to handle the
   2251 	 * possibility that things fail midcourse and we need to
   2252 	 * undo memory gracefully
   2253 	 */
   2254 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2255 		/* Set up some basics */
   2256 		txr = &adapter->tx_rings[i];
   2257 		txr->adapter = adapter;
   2258 		txr->txr_interq = NULL;
   2259 		/* In case SR-IOV is enabled, align the index properly */
   2260 #ifdef PCI_IOV
   2261 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2262 		    i);
   2263 #else
   2264 		txr->me = i;
   2265 #endif
   2266 		txr->num_desc = adapter->num_tx_desc;
   2267 
   2268 		/* Initialize the TX side lock */
   2269 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   2270 		    device_xname(dev), txr->me);
   2271 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2272 
   2273 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2274 		    BUS_DMA_NOWAIT)) {
   2275 			aprint_error_dev(dev,
   2276 			    "Unable to allocate TX Descriptor memory\n");
   2277 			error = ENOMEM;
   2278 			goto err_tx_desc;
   2279 		}
   2280 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2281 		bzero((void *)txr->tx_base, tsize);
   2282 
   2283 		/* Now allocate transmit buffers for the ring */
   2284 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2285 			aprint_error_dev(dev,
   2286 			    "Critical Failure setting up transmit buffers\n");
   2287 			error = ENOMEM;
   2288 			goto err_tx_desc;
   2289         	}
   2290 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2291 			/* Allocate a buf ring */
   2292 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2293 			if (txr->txr_interq == NULL) {
   2294 				aprint_error_dev(dev,
   2295 				    "Critical Failure setting up buf ring\n");
   2296 				error = ENOMEM;
   2297 				goto err_tx_desc;
   2298 			}
   2299 		}
   2300 	}
   2301 
   2302 	/*
   2303 	 * Next the RX queues...
   2304 	 */
   2305 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2306 	    DBA_ALIGN);
   2307 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2308 		rxr = &adapter->rx_rings[i];
   2309 		/* Set up some basics */
   2310 		rxr->adapter = adapter;
   2311 #ifdef PCI_IOV
   2312 		/* In case SR-IOV is enabled, align the index properly */
   2313 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2314 		    i);
   2315 #else
   2316 		rxr->me = i;
   2317 #endif
   2318 		rxr->num_desc = adapter->num_rx_desc;
   2319 
   2320 		/* Initialize the RX side lock */
   2321 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   2322 		    device_xname(dev), rxr->me);
   2323 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2324 
   2325 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2326 		    BUS_DMA_NOWAIT)) {
   2327 			aprint_error_dev(dev,
   2328 			    "Unable to allocate RxDescriptor memory\n");
   2329 			error = ENOMEM;
   2330 			goto err_rx_desc;
   2331 		}
   2332 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2333 		bzero((void *)rxr->rx_base, rsize);
   2334 
   2335 		/* Allocate receive buffers for the ring */
   2336 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2337 			aprint_error_dev(dev,
   2338 			    "Critical Failure setting up receive buffers\n");
   2339 			error = ENOMEM;
   2340 			goto err_rx_desc;
   2341 		}
   2342 	}
   2343 
   2344 	/*
   2345 	 * Finally set up the queue holding structs
   2346 	 */
   2347 	for (int i = 0; i < adapter->num_queues; i++) {
   2348 		que = &adapter->queues[i];
   2349 		que->adapter = adapter;
   2350 		que->me = i;
   2351 		que->txr = &adapter->tx_rings[i];
   2352 		que->rxr = &adapter->rx_rings[i];
   2353 
   2354 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2355 		que->disabled_count = 0;
   2356 	}
   2357 
   2358 	return (0);
   2359 
   2360 err_rx_desc:
   2361 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2362 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2363 err_tx_desc:
   2364 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2365 		ixgbe_dma_free(adapter, &txr->txdma);
   2366 	free(adapter->rx_rings, M_DEVBUF);
   2367 rx_fail:
   2368 	free(adapter->tx_rings, M_DEVBUF);
   2369 tx_fail:
   2370 	free(adapter->queues, M_DEVBUF);
   2371 fail:
   2372 	return (error);
   2373 } /* ixgbe_allocate_queues */
   2374