Home | History | Annotate | Line # | Download | only in ixgbe
ix_txrx.c revision 1.66
      1 /* $NetBSD: ix_txrx.c,v 1.66 2021/03/08 07:10:45 msaitoh Exp $ */
      2 
      3 /******************************************************************************
      4 
      5   Copyright (c) 2001-2017, Intel Corporation
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9   modification, are permitted provided that the following conditions are met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12       this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15       notice, this list of conditions and the following disclaimer in the
     16       documentation and/or other materials provided with the distribution.
     17 
     18    3. Neither the name of the Intel Corporation nor the names of its
     19       contributors may be used to endorse or promote products derived from
     20       this software without specific prior written permission.
     21 
     22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32   POSSIBILITY OF SUCH DAMAGE.
     33 
     34 ******************************************************************************/
     35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
     36 
     37 /*
     38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     39  * All rights reserved.
     40  *
     41  * This code is derived from software contributed to The NetBSD Foundation
     42  * by Coyote Point Systems, Inc.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     63  * POSSIBILITY OF SUCH DAMAGE.
     64  */
     65 
     66 #include "opt_inet.h"
     67 #include "opt_inet6.h"
     68 
     69 #include "ixgbe.h"
     70 
     71 /*
     72  * HW RSC control:
     73  *  this feature only works with
     74  *  IPv4, and only on 82599 and later.
     75  *  Also this will cause IP forwarding to
     76  *  fail and that can't be controlled by
     77  *  the stack as LRO can. For all these
     78  *  reasons I've deemed it best to leave
     79  *  this off and not bother with a tuneable
     80  *  interface, this would need to be compiled
     81  *  to enable.
     82  */
     83 static bool ixgbe_rsc_enable = FALSE;
     84 
     85 /*
     86  * For Flow Director: this is the
     87  * number of TX packets we sample
     88  * for the filter pool, this means
     89  * every 20th packet will be probed.
     90  *
     91  * This feature can be disabled by
     92  * setting this to 0.
     93  */
     94 static int atr_sample_rate = 20;
     95 
     96 /************************************************************************
     97  *  Local Function prototypes
     98  ************************************************************************/
     99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
    100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
    101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
    102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
    103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
    104                                        struct ixgbe_hw_stats *);
    105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
    106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
    107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
    108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
    109                                         struct mbuf *, u32 *, u32 *);
    110 static int           ixgbe_tso_setup(struct tx_ring *,
    111                                      struct mbuf *, u32 *, u32 *);
    112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    114                                     struct mbuf *, u32);
    115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
    116                                       struct ixgbe_dma_alloc *, int);
    117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    118 
    119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    120 
    121 /************************************************************************
    122  * ixgbe_legacy_start_locked - Transmit entry point
    123  *
    124  *   Called by the stack to initiate a transmit.
    125  *   The driver will remain in this routine as long as there are
    126  *   packets to transmit and transmit resources are available.
    127  *   In case resources are not available, the stack is notified
    128  *   and the packet is requeued.
    129  ************************************************************************/
    130 int
    131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    132 {
    133 	int rc;
    134 	struct mbuf    *m_head;
    135 	struct adapter *adapter = txr->adapter;
    136 
    137 	IXGBE_TX_LOCK_ASSERT(txr);
    138 
    139 	if (adapter->link_active != LINK_STATE_UP) {
    140 		/*
    141 		 * discard all packets buffered in IFQ to avoid
    142 		 * sending old packets at next link up timing.
    143 		 */
    144 		ixgbe_drain(ifp, txr);
    145 		return (ENETDOWN);
    146 	}
    147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    148 		return (ENETDOWN);
    149 	if (txr->txr_no_space)
    150 		return (ENETDOWN);
    151 
    152 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    153 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    154 			break;
    155 
    156 		IFQ_POLL(&ifp->if_snd, m_head);
    157 		if (m_head == NULL)
    158 			break;
    159 
    160 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    161 			break;
    162 		}
    163 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    164 		if (rc != 0) {
    165 			m_freem(m_head);
    166 			continue;
    167 		}
    168 
    169 		/* Send a copy of the frame to the BPF listener */
    170 		bpf_mtap(ifp, m_head, BPF_D_OUT);
    171 	}
    172 
    173 	return IXGBE_SUCCESS;
    174 } /* ixgbe_legacy_start_locked */
    175 
    176 /************************************************************************
    177  * ixgbe_legacy_start
    178  *
    179  *   Called by the stack, this always uses the first tx ring,
    180  *   and should not be used with multiqueue tx enabled.
    181  ************************************************************************/
    182 void
    183 ixgbe_legacy_start(struct ifnet *ifp)
    184 {
    185 	struct adapter *adapter = ifp->if_softc;
    186 	struct tx_ring *txr = adapter->tx_rings;
    187 
    188 	if (ifp->if_flags & IFF_RUNNING) {
    189 		IXGBE_TX_LOCK(txr);
    190 		ixgbe_legacy_start_locked(ifp, txr);
    191 		IXGBE_TX_UNLOCK(txr);
    192 	}
    193 } /* ixgbe_legacy_start */
    194 
    195 /************************************************************************
    196  * ixgbe_mq_start - Multiqueue Transmit Entry Point
    197  *
    198  *   (if_transmit function)
    199  ************************************************************************/
    200 int
    201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    202 {
    203 	struct adapter	*adapter = ifp->if_softc;
    204 	struct tx_ring	*txr;
    205 	int 		i;
    206 #ifdef RSS
    207 	uint32_t bucket_id;
    208 #endif
    209 
    210 	/*
    211 	 * When doing RSS, map it to the same outbound queue
    212 	 * as the incoming flow would be mapped to.
    213 	 *
    214 	 * If everything is setup correctly, it should be the
    215 	 * same bucket that the current CPU we're on is.
    216 	 */
    217 #ifdef RSS
    218 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
    219 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
    220 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
    221 		    &bucket_id) == 0)) {
    222 			i = bucket_id % adapter->num_queues;
    223 #ifdef IXGBE_DEBUG
    224 			if (bucket_id > adapter->num_queues)
    225 				if_printf(ifp,
    226 				    "bucket_id (%d) > num_queues (%d)\n",
    227 				    bucket_id, adapter->num_queues);
    228 #endif
    229 		} else
    230 			i = m->m_pkthdr.flowid % adapter->num_queues;
    231 	} else
    232 #endif /* 0 */
    233 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
    234 
    235 	/* Check for a hung queue and pick alternative */
    236 	if (((1ULL << i) & adapter->active_queues) == 0)
    237 		i = ffs64(adapter->active_queues);
    238 
    239 	txr = &adapter->tx_rings[i];
    240 
    241 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
    242 		m_freem(m);
    243 		txr->pcq_drops.ev_count++;
    244 		return ENOBUFS;
    245 	}
    246 	if (IXGBE_TX_TRYLOCK(txr)) {
    247 		ixgbe_mq_start_locked(ifp, txr);
    248 		IXGBE_TX_UNLOCK(txr);
    249 	} else {
    250 		if (adapter->txrx_use_workqueue) {
    251 			u_int *enqueued;
    252 
    253 			/*
    254 			 * This function itself is not called in interrupt
    255 			 * context, however it can be called in fast softint
    256 			 * context right after receiving forwarding packets.
    257 			 * So, it is required to protect workqueue from twice
    258 			 * enqueuing when the machine uses both spontaneous
    259 			 * packets and forwarding packets.
    260 			 */
    261 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
    262 			if (*enqueued == 0) {
    263 				*enqueued = 1;
    264 				percpu_putref(adapter->txr_wq_enqueued);
    265 				workqueue_enqueue(adapter->txr_wq,
    266 				    &txr->wq_cookie, curcpu());
    267 			} else
    268 				percpu_putref(adapter->txr_wq_enqueued);
    269 		} else {
    270 			kpreempt_disable();
    271 			softint_schedule(txr->txr_si);
    272 			kpreempt_enable();
    273 		}
    274 	}
    275 
    276 	return (0);
    277 } /* ixgbe_mq_start */
    278 
    279 /************************************************************************
    280  * ixgbe_mq_start_locked
    281  ************************************************************************/
    282 int
    283 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
    284 {
    285 	struct mbuf    *next;
    286 	int            enqueued = 0, err = 0;
    287 
    288 	if (txr->adapter->link_active != LINK_STATE_UP) {
    289 		/*
    290 		 * discard all packets buffered in txr_interq to avoid
    291 		 * sending old packets at next link up timing.
    292 		 */
    293 		ixgbe_drain(ifp, txr);
    294 		return (ENETDOWN);
    295 	}
    296 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    297 		return (ENETDOWN);
    298 	if (txr->txr_no_space)
    299 		return (ENETDOWN);
    300 
    301 	/* Process the queue */
    302 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
    303 		if ((err = ixgbe_xmit(txr, next)) != 0) {
    304 			m_freem(next);
    305 			/* All errors are counted in ixgbe_xmit() */
    306 			break;
    307 		}
    308 		enqueued++;
    309 #if __FreeBSD_version >= 1100036
    310 		/*
    311 		 * Since we're looking at the tx ring, we can check
    312 		 * to see if we're a VF by examing our tail register
    313 		 * address.
    314 		 */
    315 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
    316 		    (next->m_flags & M_MCAST))
    317 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
    318 #endif
    319 		/* Send a copy of the frame to the BPF listener */
    320 		bpf_mtap(ifp, next, BPF_D_OUT);
    321 		if ((ifp->if_flags & IFF_RUNNING) == 0)
    322 			break;
    323 	}
    324 
    325 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
    326 		ixgbe_txeof(txr);
    327 
    328 	return (err);
    329 } /* ixgbe_mq_start_locked */
    330 
    331 /************************************************************************
    332  * ixgbe_deferred_mq_start
    333  *
    334  *   Called from a softint and workqueue (indirectly) to drain queued
    335  *   transmit packets.
    336  ************************************************************************/
    337 void
    338 ixgbe_deferred_mq_start(void *arg)
    339 {
    340 	struct tx_ring *txr = arg;
    341 	struct adapter *adapter = txr->adapter;
    342 	struct ifnet   *ifp = adapter->ifp;
    343 
    344 	IXGBE_TX_LOCK(txr);
    345 	if (pcq_peek(txr->txr_interq) != NULL)
    346 		ixgbe_mq_start_locked(ifp, txr);
    347 	IXGBE_TX_UNLOCK(txr);
    348 } /* ixgbe_deferred_mq_start */
    349 
    350 /************************************************************************
    351  * ixgbe_deferred_mq_start_work
    352  *
    353  *   Called from a workqueue to drain queued transmit packets.
    354  ************************************************************************/
    355 void
    356 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
    357 {
    358 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
    359 	struct adapter *adapter = txr->adapter;
    360 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
    361 	*enqueued = 0;
    362 	percpu_putref(adapter->txr_wq_enqueued);
    363 
    364 	ixgbe_deferred_mq_start(txr);
    365 } /* ixgbe_deferred_mq_start */
    366 
    367 /************************************************************************
    368  * ixgbe_drain_all
    369  ************************************************************************/
    370 void
    371 ixgbe_drain_all(struct adapter *adapter)
    372 {
    373 	struct ifnet *ifp = adapter->ifp;
    374 	struct ix_queue *que = adapter->queues;
    375 
    376 	for (int i = 0; i < adapter->num_queues; i++, que++) {
    377 		struct tx_ring  *txr = que->txr;
    378 
    379 		IXGBE_TX_LOCK(txr);
    380 		ixgbe_drain(ifp, txr);
    381 		IXGBE_TX_UNLOCK(txr);
    382 	}
    383 }
    384 
    385 /************************************************************************
    386  * ixgbe_xmit
    387  *
    388  *   Maps the mbufs to tx descriptors, allowing the
    389  *   TX engine to transmit the packets.
    390  *
    391  *   Return 0 on success, positive on failure
    392  ************************************************************************/
    393 static int
    394 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
    395 {
    396 	struct adapter          *adapter = txr->adapter;
    397 	struct ixgbe_tx_buf     *txbuf;
    398 	union ixgbe_adv_tx_desc *txd = NULL;
    399 	struct ifnet	        *ifp = adapter->ifp;
    400 	int                     i, j, error;
    401 	int                     first;
    402 	u32                     olinfo_status = 0, cmd_type_len;
    403 	bool                    remap = TRUE;
    404 	bus_dmamap_t            map;
    405 
    406 	/* Basic descriptor defines */
    407 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
    408 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
    409 
    410 	if (vlan_has_tag(m_head))
    411 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
    412 
    413 	/*
    414 	 * Important to capture the first descriptor
    415 	 * used because it will contain the index of
    416 	 * the one we tell the hardware to report back
    417 	 */
    418 	first = txr->next_avail_desc;
    419 	txbuf = &txr->tx_buffers[first];
    420 	map = txbuf->map;
    421 
    422 	/*
    423 	 * Map the packet for DMA.
    424 	 */
    425 retry:
    426 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
    427 	    BUS_DMA_NOWAIT);
    428 
    429 	if (__predict_false(error)) {
    430 		struct mbuf *m;
    431 
    432 		switch (error) {
    433 		case EAGAIN:
    434 			txr->q_eagain_tx_dma_setup++;
    435 			return EAGAIN;
    436 		case ENOMEM:
    437 			txr->q_enomem_tx_dma_setup++;
    438 			return EAGAIN;
    439 		case EFBIG:
    440 			/* Try it again? - one try */
    441 			if (remap == TRUE) {
    442 				remap = FALSE;
    443 				/*
    444 				 * XXX: m_defrag will choke on
    445 				 * non-MCLBYTES-sized clusters
    446 				 */
    447 				txr->q_efbig_tx_dma_setup++;
    448 				m = m_defrag(m_head, M_NOWAIT);
    449 				if (m == NULL) {
    450 					txr->q_mbuf_defrag_failed++;
    451 					return ENOBUFS;
    452 				}
    453 				m_head = m;
    454 				goto retry;
    455 			} else {
    456 				txr->q_efbig2_tx_dma_setup++;
    457 				return error;
    458 			}
    459 		case EINVAL:
    460 			txr->q_einval_tx_dma_setup++;
    461 			return error;
    462 		default:
    463 			txr->q_other_tx_dma_setup++;
    464 			return error;
    465 		}
    466 	}
    467 
    468 	/* Make certain there are enough descriptors */
    469 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
    470 		txr->txr_no_space = true;
    471 		txr->no_desc_avail.ev_count++;
    472 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    473 		return EAGAIN;
    474 	}
    475 
    476 	/*
    477 	 * Set up the appropriate offload context
    478 	 * this will consume the first descriptor
    479 	 */
    480 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
    481 	if (__predict_false(error)) {
    482 		return (error);
    483 	}
    484 
    485 	/* Do the flow director magic */
    486 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
    487 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
    488 		++txr->atr_count;
    489 		if (txr->atr_count >= atr_sample_rate) {
    490 			ixgbe_atr(txr, m_head);
    491 			txr->atr_count = 0;
    492 		}
    493 	}
    494 
    495 	olinfo_status |= IXGBE_ADVTXD_CC;
    496 	i = txr->next_avail_desc;
    497 	for (j = 0; j < map->dm_nsegs; j++) {
    498 		bus_size_t seglen;
    499 		bus_addr_t segaddr;
    500 
    501 		txbuf = &txr->tx_buffers[i];
    502 		txd = &txr->tx_base[i];
    503 		seglen = map->dm_segs[j].ds_len;
    504 		segaddr = htole64(map->dm_segs[j].ds_addr);
    505 
    506 		txd->read.buffer_addr = segaddr;
    507 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
    508 		txd->read.olinfo_status = htole32(olinfo_status);
    509 
    510 		if (++i == txr->num_desc)
    511 			i = 0;
    512 	}
    513 
    514 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
    515 	txr->tx_avail -= map->dm_nsegs;
    516 	txr->next_avail_desc = i;
    517 
    518 	txbuf->m_head = m_head;
    519 	/*
    520 	 * Here we swap the map so the last descriptor,
    521 	 * which gets the completion interrupt has the
    522 	 * real map, and the first descriptor gets the
    523 	 * unused map from this descriptor.
    524 	 */
    525 	txr->tx_buffers[first].map = txbuf->map;
    526 	txbuf->map = map;
    527 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
    528 	    BUS_DMASYNC_PREWRITE);
    529 
    530 	/* Set the EOP descriptor that will be marked done */
    531 	txbuf = &txr->tx_buffers[first];
    532 	txbuf->eop = txd;
    533 
    534 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    535 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    536 	/*
    537 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
    538 	 * hardware that this frame is available to transmit.
    539 	 */
    540 	++txr->total_packets.ev_count;
    541 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
    542 
    543 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
    544 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
    545 	if (m_head->m_flags & M_MCAST)
    546 		if_statinc_ref(nsr, if_omcasts);
    547 	IF_STAT_PUTREF(ifp);
    548 
    549 	/* Mark queue as having work */
    550 	if (txr->busy == 0)
    551 		txr->busy = 1;
    552 
    553 	return (0);
    554 } /* ixgbe_xmit */
    555 
    556 /************************************************************************
    557  * ixgbe_drain
    558  ************************************************************************/
    559 static void
    560 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
    561 {
    562 	struct mbuf *m;
    563 
    564 	IXGBE_TX_LOCK_ASSERT(txr);
    565 
    566 	if (txr->me == 0) {
    567 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    568 			IFQ_DEQUEUE(&ifp->if_snd, m);
    569 			m_freem(m);
    570 			IF_DROP(&ifp->if_snd);
    571 		}
    572 	}
    573 
    574 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
    575 		m_freem(m);
    576 		txr->pcq_drops.ev_count++;
    577 	}
    578 }
    579 
    580 /************************************************************************
    581  * ixgbe_allocate_transmit_buffers
    582  *
    583  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
    584  *   the information needed to transmit a packet on the wire. This is
    585  *   called only once at attach, setup is done every reset.
    586  ************************************************************************/
    587 static int
    588 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
    589 {
    590 	struct adapter      *adapter = txr->adapter;
    591 	device_t            dev = adapter->dev;
    592 	struct ixgbe_tx_buf *txbuf;
    593 	int                 error, i;
    594 
    595 	/*
    596 	 * Setup DMA descriptor areas.
    597 	 */
    598 	error = ixgbe_dma_tag_create(
    599 	         /*      parent */ adapter->osdep.dmat,
    600 	         /*   alignment */ 1,
    601 	         /*      bounds */ 0,
    602 	         /*     maxsize */ IXGBE_TSO_SIZE,
    603 	         /*   nsegments */ adapter->num_segs,
    604 	         /*  maxsegsize */ PAGE_SIZE,
    605 	         /*       flags */ 0,
    606 	                           &txr->txtag);
    607 	if (error != 0) {
    608 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
    609 		goto fail;
    610 	}
    611 
    612 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
    613 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
    614 
    615 	/* Create the descriptor buffer dma maps */
    616 	txbuf = txr->tx_buffers;
    617 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
    618 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
    619 		if (error != 0) {
    620 			aprint_error_dev(dev,
    621 			    "Unable to create TX DMA map (%d)\n", error);
    622 			goto fail;
    623 		}
    624 	}
    625 
    626 	return 0;
    627 fail:
    628 	/* We free all, it handles case where we are in the middle */
    629 #if 0 /* XXX was FreeBSD */
    630 	ixgbe_free_transmit_structures(adapter);
    631 #else
    632 	ixgbe_free_transmit_buffers(txr);
    633 #endif
    634 	return (error);
    635 } /* ixgbe_allocate_transmit_buffers */
    636 
    637 /************************************************************************
    638  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
    639  ************************************************************************/
    640 static void
    641 ixgbe_setup_transmit_ring(struct tx_ring *txr)
    642 {
    643 	struct adapter        *adapter = txr->adapter;
    644 	struct ixgbe_tx_buf   *txbuf;
    645 #ifdef DEV_NETMAP
    646 	struct netmap_adapter *na = NA(adapter->ifp);
    647 	struct netmap_slot    *slot;
    648 #endif /* DEV_NETMAP */
    649 
    650 	/* Clear the old ring contents */
    651 	IXGBE_TX_LOCK(txr);
    652 
    653 #ifdef DEV_NETMAP
    654 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
    655 		/*
    656 		 * (under lock): if in netmap mode, do some consistency
    657 		 * checks and set slot to entry 0 of the netmap ring.
    658 		 */
    659 		slot = netmap_reset(na, NR_TX, txr->me, 0);
    660 	}
    661 #endif /* DEV_NETMAP */
    662 
    663 	bzero((void *)txr->tx_base,
    664 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
    665 	/* Reset indices */
    666 	txr->next_avail_desc = 0;
    667 	txr->next_to_clean = 0;
    668 
    669 	/* Free any existing tx buffers. */
    670 	txbuf = txr->tx_buffers;
    671 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
    672 		if (txbuf->m_head != NULL) {
    673 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
    674 			    0, txbuf->m_head->m_pkthdr.len,
    675 			    BUS_DMASYNC_POSTWRITE);
    676 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
    677 			m_freem(txbuf->m_head);
    678 			txbuf->m_head = NULL;
    679 		}
    680 
    681 #ifdef DEV_NETMAP
    682 		/*
    683 		 * In netmap mode, set the map for the packet buffer.
    684 		 * NOTE: Some drivers (not this one) also need to set
    685 		 * the physical buffer address in the NIC ring.
    686 		 * Slots in the netmap ring (indexed by "si") are
    687 		 * kring->nkr_hwofs positions "ahead" wrt the
    688 		 * corresponding slot in the NIC ring. In some drivers
    689 		 * (not here) nkr_hwofs can be negative. Function
    690 		 * netmap_idx_n2k() handles wraparounds properly.
    691 		 */
    692 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
    693 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
    694 			netmap_load_map(na, txr->txtag,
    695 			    txbuf->map, NMB(na, slot + si));
    696 		}
    697 #endif /* DEV_NETMAP */
    698 
    699 		/* Clear the EOP descriptor pointer */
    700 		txbuf->eop = NULL;
    701 	}
    702 
    703 	/* Set the rate at which we sample packets */
    704 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
    705 		txr->atr_sample = atr_sample_rate;
    706 
    707 	/* Set number of descriptors available */
    708 	txr->tx_avail = adapter->num_tx_desc;
    709 
    710 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
    711 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    712 	IXGBE_TX_UNLOCK(txr);
    713 } /* ixgbe_setup_transmit_ring */
    714 
    715 /************************************************************************
    716  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
    717  ************************************************************************/
    718 int
    719 ixgbe_setup_transmit_structures(struct adapter *adapter)
    720 {
    721 	struct tx_ring *txr = adapter->tx_rings;
    722 
    723 	for (int i = 0; i < adapter->num_queues; i++, txr++)
    724 		ixgbe_setup_transmit_ring(txr);
    725 
    726 	return (0);
    727 } /* ixgbe_setup_transmit_structures */
    728 
    729 /************************************************************************
    730  * ixgbe_free_transmit_structures - Free all transmit rings.
    731  ************************************************************************/
    732 void
    733 ixgbe_free_transmit_structures(struct adapter *adapter)
    734 {
    735 	struct tx_ring *txr = adapter->tx_rings;
    736 
    737 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
    738 		ixgbe_free_transmit_buffers(txr);
    739 		ixgbe_dma_free(adapter, &txr->txdma);
    740 		IXGBE_TX_LOCK_DESTROY(txr);
    741 	}
    742 	free(adapter->tx_rings, M_DEVBUF);
    743 } /* ixgbe_free_transmit_structures */
    744 
    745 /************************************************************************
    746  * ixgbe_free_transmit_buffers
    747  *
    748  *   Free transmit ring related data structures.
    749  ************************************************************************/
    750 static void
    751 ixgbe_free_transmit_buffers(struct tx_ring *txr)
    752 {
    753 	struct adapter      *adapter = txr->adapter;
    754 	struct ixgbe_tx_buf *tx_buffer;
    755 	int                 i;
    756 
    757 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
    758 
    759 	if (txr->tx_buffers == NULL)
    760 		return;
    761 
    762 	tx_buffer = txr->tx_buffers;
    763 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
    764 		if (tx_buffer->m_head != NULL) {
    765 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
    766 			    0, tx_buffer->m_head->m_pkthdr.len,
    767 			    BUS_DMASYNC_POSTWRITE);
    768 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    769 			m_freem(tx_buffer->m_head);
    770 			tx_buffer->m_head = NULL;
    771 			if (tx_buffer->map != NULL) {
    772 				ixgbe_dmamap_destroy(txr->txtag,
    773 				    tx_buffer->map);
    774 				tx_buffer->map = NULL;
    775 			}
    776 		} else if (tx_buffer->map != NULL) {
    777 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
    778 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
    779 			tx_buffer->map = NULL;
    780 		}
    781 	}
    782 	if (txr->txr_interq != NULL) {
    783 		struct mbuf *m;
    784 
    785 		while ((m = pcq_get(txr->txr_interq)) != NULL)
    786 			m_freem(m);
    787 		pcq_destroy(txr->txr_interq);
    788 	}
    789 	if (txr->tx_buffers != NULL) {
    790 		free(txr->tx_buffers, M_DEVBUF);
    791 		txr->tx_buffers = NULL;
    792 	}
    793 	if (txr->txtag != NULL) {
    794 		ixgbe_dma_tag_destroy(txr->txtag);
    795 		txr->txtag = NULL;
    796 	}
    797 } /* ixgbe_free_transmit_buffers */
    798 
    799 /************************************************************************
    800  * ixgbe_tx_ctx_setup
    801  *
    802  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
    803  ************************************************************************/
    804 static int
    805 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
    806     u32 *cmd_type_len, u32 *olinfo_status)
    807 {
    808 	struct adapter                   *adapter = txr->adapter;
    809 	struct ixgbe_adv_tx_context_desc *TXD;
    810 	struct ether_vlan_header         *eh;
    811 #ifdef INET
    812 	struct ip                        *ip;
    813 #endif
    814 #ifdef INET6
    815 	struct ip6_hdr                   *ip6;
    816 #endif
    817 	int                              ehdrlen, ip_hlen = 0;
    818 	int                              offload = TRUE;
    819 	int                              ctxd = txr->next_avail_desc;
    820 	u32                              vlan_macip_lens = 0;
    821 	u32                              type_tucmd_mlhl = 0;
    822 	u16                              vtag = 0;
    823 	u16                              etype;
    824 	u8                               ipproto = 0;
    825 	char                             *l3d;
    826 
    827 
    828 	/* First check if TSO is to be used */
    829 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
    830 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
    831 
    832 		if (rv != 0)
    833 			++adapter->tso_err.ev_count;
    834 		return rv;
    835 	}
    836 
    837 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
    838 		offload = FALSE;
    839 
    840 	/* Indicate the whole packet as payload when not doing TSO */
    841 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
    842 
    843 	/* Now ready a context descriptor */
    844 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
    845 
    846 	/*
    847 	 * In advanced descriptors the vlan tag must
    848 	 * be placed into the context descriptor. Hence
    849 	 * we need to make one even if not doing offloads.
    850 	 */
    851 	if (vlan_has_tag(mp)) {
    852 		vtag = htole16(vlan_get_tag(mp));
    853 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
    854 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
    855 	           (offload == FALSE))
    856 		return (0);
    857 
    858 	/*
    859 	 * Determine where frame payload starts.
    860 	 * Jump over vlan headers if already present,
    861 	 * helpful for QinQ too.
    862 	 */
    863 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
    864 	eh = mtod(mp, struct ether_vlan_header *);
    865 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    866 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
    867 		etype = ntohs(eh->evl_proto);
    868 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    869 	} else {
    870 		etype = ntohs(eh->evl_encap_proto);
    871 		ehdrlen = ETHER_HDR_LEN;
    872 	}
    873 
    874 	/* Set the ether header length */
    875 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
    876 
    877 	if (offload == FALSE)
    878 		goto no_offloads;
    879 
    880 	/*
    881 	 * If the first mbuf only includes the ethernet header,
    882 	 * jump to the next one
    883 	 * XXX: This assumes the stack splits mbufs containing headers
    884 	 *      on header boundaries
    885 	 * XXX: And assumes the entire IP header is contained in one mbuf
    886 	 */
    887 	if (mp->m_len == ehdrlen && mp->m_next)
    888 		l3d = mtod(mp->m_next, char *);
    889 	else
    890 		l3d = mtod(mp, char *) + ehdrlen;
    891 
    892 	switch (etype) {
    893 #ifdef INET
    894 	case ETHERTYPE_IP:
    895 		ip = (struct ip *)(l3d);
    896 		ip_hlen = ip->ip_hl << 2;
    897 		ipproto = ip->ip_p;
    898 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
    899 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
    900 		    ip->ip_sum == 0);
    901 		break;
    902 #endif
    903 #ifdef INET6
    904 	case ETHERTYPE_IPV6:
    905 		ip6 = (struct ip6_hdr *)(l3d);
    906 		ip_hlen = sizeof(struct ip6_hdr);
    907 		ipproto = ip6->ip6_nxt;
    908 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
    909 		break;
    910 #endif
    911 	default:
    912 		offload = false;
    913 		break;
    914 	}
    915 
    916 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
    917 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
    918 
    919 	vlan_macip_lens |= ip_hlen;
    920 
    921 	/* No support for offloads for non-L4 next headers */
    922 	switch (ipproto) {
    923 	case IPPROTO_TCP:
    924 		if (mp->m_pkthdr.csum_flags &
    925 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
    926 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
    927 		else
    928 			offload = false;
    929 		break;
    930 	case IPPROTO_UDP:
    931 		if (mp->m_pkthdr.csum_flags &
    932 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
    933 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
    934 		else
    935 			offload = false;
    936 		break;
    937 	default:
    938 		offload = false;
    939 		break;
    940 	}
    941 
    942 	if (offload) /* Insert L4 checksum into data descriptors */
    943 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
    944 
    945 no_offloads:
    946 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
    947 
    948 	/* Now copy bits into descriptor */
    949 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
    950 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
    951 	TXD->seqnum_seed = htole32(0);
    952 	TXD->mss_l4len_idx = htole32(0);
    953 
    954 	/* We've consumed the first desc, adjust counters */
    955 	if (++ctxd == txr->num_desc)
    956 		ctxd = 0;
    957 	txr->next_avail_desc = ctxd;
    958 	--txr->tx_avail;
    959 
    960 	return (0);
    961 } /* ixgbe_tx_ctx_setup */
    962 
    963 /************************************************************************
    964  * ixgbe_tso_setup
    965  *
    966  *   Setup work for hardware segmentation offload (TSO) on
    967  *   adapters using advanced tx descriptors
    968  ************************************************************************/
    969 static int
    970 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
    971     u32 *olinfo_status)
    972 {
    973 	struct ixgbe_adv_tx_context_desc *TXD;
    974 	struct ether_vlan_header         *eh;
    975 #ifdef INET6
    976 	struct ip6_hdr                   *ip6;
    977 #endif
    978 #ifdef INET
    979 	struct ip                        *ip;
    980 #endif
    981 	struct tcphdr                    *th;
    982 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
    983 	u32                              vlan_macip_lens = 0;
    984 	u32                              type_tucmd_mlhl = 0;
    985 	u32                              mss_l4len_idx = 0, paylen;
    986 	u16                              vtag = 0, eh_type;
    987 
    988 	/*
    989 	 * Determine where frame payload starts.
    990 	 * Jump over vlan headers if already present
    991 	 */
    992 	eh = mtod(mp, struct ether_vlan_header *);
    993 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
    994 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
    995 		eh_type = eh->evl_proto;
    996 	} else {
    997 		ehdrlen = ETHER_HDR_LEN;
    998 		eh_type = eh->evl_encap_proto;
    999 	}
   1000 
   1001 	switch (ntohs(eh_type)) {
   1002 #ifdef INET
   1003 	case ETHERTYPE_IP:
   1004 		ip = (struct ip *)(mp->m_data + ehdrlen);
   1005 		if (ip->ip_p != IPPROTO_TCP)
   1006 			return (ENXIO);
   1007 		ip->ip_sum = 0;
   1008 		ip_hlen = ip->ip_hl << 2;
   1009 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   1010 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   1011 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   1012 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   1013 		/* Tell transmit desc to also do IPv4 checksum. */
   1014 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   1015 		break;
   1016 #endif
   1017 #ifdef INET6
   1018 	case ETHERTYPE_IPV6:
   1019 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1020 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   1021 		if (ip6->ip6_nxt != IPPROTO_TCP)
   1022 			return (ENXIO);
   1023 		ip_hlen = sizeof(struct ip6_hdr);
   1024 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   1025 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   1026 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   1027 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   1028 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   1029 		break;
   1030 #endif
   1031 	default:
   1032 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   1033 		    __func__, ntohs(eh_type));
   1034 		break;
   1035 	}
   1036 
   1037 	ctxd = txr->next_avail_desc;
   1038 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
   1039 
   1040 	tcp_hlen = th->th_off << 2;
   1041 
   1042 	/* This is used in the transmit desc in encap */
   1043 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   1044 
   1045 	/* VLAN MACLEN IPLEN */
   1046 	if (vlan_has_tag(mp)) {
   1047 		vtag = htole16(vlan_get_tag(mp));
   1048 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   1049 	}
   1050 
   1051 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   1052 	vlan_macip_lens |= ip_hlen;
   1053 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   1054 
   1055 	/* ADV DTYPE TUCMD */
   1056 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   1057 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   1058 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   1059 
   1060 	/* MSS L4LEN IDX */
   1061 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   1062 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   1063 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   1064 
   1065 	TXD->seqnum_seed = htole32(0);
   1066 
   1067 	if (++ctxd == txr->num_desc)
   1068 		ctxd = 0;
   1069 
   1070 	txr->tx_avail--;
   1071 	txr->next_avail_desc = ctxd;
   1072 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   1073 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   1074 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   1075 	++txr->tso_tx.ev_count;
   1076 
   1077 	return (0);
   1078 } /* ixgbe_tso_setup */
   1079 
   1080 
   1081 /************************************************************************
   1082  * ixgbe_txeof
   1083  *
   1084  *   Examine each tx_buffer in the used queue. If the hardware is done
   1085  *   processing the packet then free associated resources. The
   1086  *   tx_buffer is put back on the free queue.
   1087  ************************************************************************/
   1088 bool
   1089 ixgbe_txeof(struct tx_ring *txr)
   1090 {
   1091 	struct adapter		*adapter = txr->adapter;
   1092 	struct ifnet		*ifp = adapter->ifp;
   1093 	struct ixgbe_tx_buf	*buf;
   1094 	union ixgbe_adv_tx_desc *txd;
   1095 	u32			work, processed = 0;
   1096 	u32			limit = adapter->tx_process_limit;
   1097 
   1098 	KASSERT(mutex_owned(&txr->tx_mtx));
   1099 
   1100 #ifdef DEV_NETMAP
   1101 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
   1102 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
   1103 		struct netmap_adapter *na = NA(adapter->ifp);
   1104 		struct netmap_kring *kring = na->tx_rings[txr->me];
   1105 		txd = txr->tx_base;
   1106 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1107 		    BUS_DMASYNC_POSTREAD);
   1108 		/*
   1109 		 * In netmap mode, all the work is done in the context
   1110 		 * of the client thread. Interrupt handlers only wake up
   1111 		 * clients, which may be sleeping on individual rings
   1112 		 * or on a global resource for all rings.
   1113 		 * To implement tx interrupt mitigation, we wake up the client
   1114 		 * thread roughly every half ring, even if the NIC interrupts
   1115 		 * more frequently. This is implemented as follows:
   1116 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   1117 		 *   the slot that should wake up the thread (nkr_num_slots
   1118 		 *   means the user thread should not be woken up);
   1119 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   1120 		 *   or the slot has the DD bit set.
   1121 		 */
   1122 		if (kring->nr_kflags < kring->nkr_num_slots &&
   1123 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
   1124 			netmap_tx_irq(ifp, txr->me);
   1125 		}
   1126 		return false;
   1127 	}
   1128 #endif /* DEV_NETMAP */
   1129 
   1130 	if (txr->tx_avail == txr->num_desc) {
   1131 		txr->busy = 0;
   1132 		return false;
   1133 	}
   1134 
   1135 	/* Get work starting point */
   1136 	work = txr->next_to_clean;
   1137 	buf = &txr->tx_buffers[work];
   1138 	txd = &txr->tx_base[work];
   1139 	work -= txr->num_desc; /* The distance to ring end */
   1140 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1141 	    BUS_DMASYNC_POSTREAD);
   1142 
   1143 	do {
   1144 		union ixgbe_adv_tx_desc *eop = buf->eop;
   1145 		if (eop == NULL) /* No work */
   1146 			break;
   1147 
   1148 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   1149 			break;	/* I/O not complete */
   1150 
   1151 		if (buf->m_head) {
   1152 			txr->bytes += buf->m_head->m_pkthdr.len;
   1153 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
   1154 			    0, buf->m_head->m_pkthdr.len,
   1155 			    BUS_DMASYNC_POSTWRITE);
   1156 			ixgbe_dmamap_unload(txr->txtag, buf->map);
   1157 			m_freem(buf->m_head);
   1158 			buf->m_head = NULL;
   1159 		}
   1160 		buf->eop = NULL;
   1161 		txr->txr_no_space = false;
   1162 		++txr->tx_avail;
   1163 
   1164 		/* We clean the range if multi segment */
   1165 		while (txd != eop) {
   1166 			++txd;
   1167 			++buf;
   1168 			++work;
   1169 			/* wrap the ring? */
   1170 			if (__predict_false(!work)) {
   1171 				work -= txr->num_desc;
   1172 				buf = txr->tx_buffers;
   1173 				txd = txr->tx_base;
   1174 			}
   1175 			if (buf->m_head) {
   1176 				txr->bytes +=
   1177 				    buf->m_head->m_pkthdr.len;
   1178 				bus_dmamap_sync(txr->txtag->dt_dmat,
   1179 				    buf->map,
   1180 				    0, buf->m_head->m_pkthdr.len,
   1181 				    BUS_DMASYNC_POSTWRITE);
   1182 				ixgbe_dmamap_unload(txr->txtag,
   1183 				    buf->map);
   1184 				m_freem(buf->m_head);
   1185 				buf->m_head = NULL;
   1186 			}
   1187 			++txr->tx_avail;
   1188 			buf->eop = NULL;
   1189 
   1190 		}
   1191 		++txr->packets;
   1192 		++processed;
   1193 		if_statinc(ifp, if_opackets);
   1194 
   1195 		/* Try the next packet */
   1196 		++txd;
   1197 		++buf;
   1198 		++work;
   1199 		/* reset with a wrap */
   1200 		if (__predict_false(!work)) {
   1201 			work -= txr->num_desc;
   1202 			buf = txr->tx_buffers;
   1203 			txd = txr->tx_base;
   1204 		}
   1205 		prefetch(txd);
   1206 	} while (__predict_true(--limit));
   1207 
   1208 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   1209 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1210 
   1211 	work += txr->num_desc;
   1212 	txr->next_to_clean = work;
   1213 
   1214 	/*
   1215 	 * Queue Hang detection, we know there's
   1216 	 * work outstanding or the first return
   1217 	 * would have been taken, so increment busy
   1218 	 * if nothing managed to get cleaned, then
   1219 	 * in local_timer it will be checked and
   1220 	 * marked as HUNG if it exceeds a MAX attempt.
   1221 	 */
   1222 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
   1223 		++txr->busy;
   1224 	/*
   1225 	 * If anything gets cleaned we reset state to 1,
   1226 	 * note this will turn off HUNG if its set.
   1227 	 */
   1228 	if (processed)
   1229 		txr->busy = 1;
   1230 
   1231 	if (txr->tx_avail == txr->num_desc)
   1232 		txr->busy = 0;
   1233 
   1234 	return ((limit > 0) ? false : true);
   1235 } /* ixgbe_txeof */
   1236 
   1237 /************************************************************************
   1238  * ixgbe_rsc_count
   1239  *
   1240  *   Used to detect a descriptor that has been merged by Hardware RSC.
   1241  ************************************************************************/
   1242 static inline u32
   1243 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   1244 {
   1245 	return (le32toh(rx->wb.lower.lo_dword.data) &
   1246 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   1247 } /* ixgbe_rsc_count */
   1248 
   1249 /************************************************************************
   1250  * ixgbe_setup_hw_rsc
   1251  *
   1252  *   Initialize Hardware RSC (LRO) feature on 82599
   1253  *   for an RX ring, this is toggled by the LRO capability
   1254  *   even though it is transparent to the stack.
   1255  *
   1256  *   NOTE: Since this HW feature only works with IPv4 and
   1257  *         testing has shown soft LRO to be as effective,
   1258  *         this feature will be disabled by default.
   1259  ************************************************************************/
   1260 static void
   1261 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   1262 {
   1263 	struct	adapter  *adapter = rxr->adapter;
   1264 	struct	ixgbe_hw *hw = &adapter->hw;
   1265 	u32              rscctrl, rdrxctl;
   1266 
   1267 	/* If turning LRO/RSC off we need to disable it */
   1268 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   1269 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1270 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   1271 		return;
   1272 	}
   1273 
   1274 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   1275 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   1276 #ifdef DEV_NETMAP
   1277 	/* Always strip CRC unless Netmap disabled it */
   1278 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
   1279 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
   1280 	    ix_crcstrip)
   1281 #endif /* DEV_NETMAP */
   1282 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   1283 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   1284 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   1285 
   1286 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   1287 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   1288 	/*
   1289 	 * Limit the total number of descriptors that
   1290 	 * can be combined, so it does not exceed 64K
   1291 	 */
   1292 	if (rxr->mbuf_sz == MCLBYTES)
   1293 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   1294 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   1295 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   1296 	else if (rxr->mbuf_sz == MJUM9BYTES)
   1297 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   1298 	else  /* Using 16K cluster */
   1299 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   1300 
   1301 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   1302 
   1303 	/* Enable TCP header recognition */
   1304 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   1305 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
   1306 
   1307 	/* Disable RSC for ACK packets */
   1308 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   1309 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   1310 
   1311 	rxr->hw_rsc = TRUE;
   1312 } /* ixgbe_setup_hw_rsc */
   1313 
   1314 /************************************************************************
   1315  * ixgbe_refresh_mbufs
   1316  *
   1317  *   Refresh mbuf buffers for RX descriptor rings
   1318  *    - now keeps its own state so discards due to resource
   1319  *      exhaustion are unnecessary, if an mbuf cannot be obtained
   1320  *      it just returns, keeping its placeholder, thus it can simply
   1321  *      be recalled to try again.
   1322  *
   1323  *   XXX NetBSD TODO:
   1324  *    - The ixgbe_rxeof() function always preallocates mbuf cluster (jcl),
   1325  *      so the ixgbe_refresh_mbufs() function can be simplified.
   1326  *
   1327  ************************************************************************/
   1328 static void
   1329 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   1330 {
   1331 	struct adapter      *adapter = rxr->adapter;
   1332 	struct ixgbe_rx_buf *rxbuf;
   1333 	struct mbuf         *mp;
   1334 	int                 i, j, error;
   1335 	bool                refreshed = false;
   1336 
   1337 	i = j = rxr->next_to_refresh;
   1338 	/* Control the loop with one beyond */
   1339 	if (++j == rxr->num_desc)
   1340 		j = 0;
   1341 
   1342 	while (j != limit) {
   1343 		rxbuf = &rxr->rx_buffers[i];
   1344 		if (rxbuf->buf == NULL) {
   1345 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1346 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   1347 			if (mp == NULL) {
   1348 				rxr->no_jmbuf.ev_count++;
   1349 				goto update;
   1350 			}
   1351 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   1352 				m_adj(mp, ETHER_ALIGN);
   1353 		} else
   1354 			mp = rxbuf->buf;
   1355 
   1356 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1357 
   1358 		/* If we're dealing with an mbuf that was copied rather
   1359 		 * than replaced, there's no need to go through busdma.
   1360 		 */
   1361 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   1362 			/* Get the memory mapping */
   1363 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   1364 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   1365 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   1366 			if (error != 0) {
   1367 				device_printf(adapter->dev, "Refresh mbufs: "
   1368 				    "payload dmamap load failure - %d\n",
   1369 				    error);
   1370 				m_free(mp);
   1371 				rxbuf->buf = NULL;
   1372 				goto update;
   1373 			}
   1374 			rxbuf->buf = mp;
   1375 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1376 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   1377 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   1378 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1379 		} else {
   1380 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   1381 			rxbuf->flags &= ~IXGBE_RX_COPY;
   1382 		}
   1383 
   1384 		refreshed = true;
   1385 		/* Next is precalculated */
   1386 		i = j;
   1387 		rxr->next_to_refresh = i;
   1388 		if (++j == rxr->num_desc)
   1389 			j = 0;
   1390 	}
   1391 
   1392 update:
   1393 	if (refreshed) /* Update hardware tail index */
   1394 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
   1395 
   1396 	return;
   1397 } /* ixgbe_refresh_mbufs */
   1398 
   1399 /************************************************************************
   1400  * ixgbe_allocate_receive_buffers
   1401  *
   1402  *   Allocate memory for rx_buffer structures. Since we use one
   1403  *   rx_buffer per received packet, the maximum number of rx_buffer's
   1404  *   that we'll need is equal to the number of receive descriptors
   1405  *   that we've allocated.
   1406  ************************************************************************/
   1407 static int
   1408 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   1409 {
   1410 	struct adapter      *adapter = rxr->adapter;
   1411 	device_t            dev = adapter->dev;
   1412 	struct ixgbe_rx_buf *rxbuf;
   1413 	int                 bsize, error;
   1414 
   1415 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   1416 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
   1417 
   1418 	error = ixgbe_dma_tag_create(
   1419 	         /*      parent */ adapter->osdep.dmat,
   1420 	         /*   alignment */ 1,
   1421 	         /*      bounds */ 0,
   1422 	         /*     maxsize */ MJUM16BYTES,
   1423 	         /*   nsegments */ 1,
   1424 	         /*  maxsegsize */ MJUM16BYTES,
   1425 	         /*       flags */ 0,
   1426 	                           &rxr->ptag);
   1427 	if (error != 0) {
   1428 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   1429 		goto fail;
   1430 	}
   1431 
   1432 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
   1433 		rxbuf = &rxr->rx_buffers[i];
   1434 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
   1435 		if (error) {
   1436 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   1437 			goto fail;
   1438 		}
   1439 	}
   1440 
   1441 	return (0);
   1442 
   1443 fail:
   1444 	/* Frees all, but can handle partial completion */
   1445 	ixgbe_free_receive_structures(adapter);
   1446 
   1447 	return (error);
   1448 } /* ixgbe_allocate_receive_buffers */
   1449 
   1450 /************************************************************************
   1451  * ixgbe_free_receive_ring
   1452  ************************************************************************/
   1453 static void
   1454 ixgbe_free_receive_ring(struct rx_ring *rxr)
   1455 {
   1456 	for (int i = 0; i < rxr->num_desc; i++) {
   1457 		ixgbe_rx_discard(rxr, i);
   1458 	}
   1459 } /* ixgbe_free_receive_ring */
   1460 
   1461 /************************************************************************
   1462  * ixgbe_setup_receive_ring
   1463  *
   1464  *   Initialize a receive ring and its buffers.
   1465  ************************************************************************/
   1466 static int
   1467 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   1468 {
   1469 	struct adapter        *adapter;
   1470 	struct ixgbe_rx_buf   *rxbuf;
   1471 #ifdef LRO
   1472 	struct ifnet          *ifp;
   1473 	struct lro_ctrl       *lro = &rxr->lro;
   1474 #endif /* LRO */
   1475 #ifdef DEV_NETMAP
   1476 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   1477 	struct netmap_slot    *slot;
   1478 #endif /* DEV_NETMAP */
   1479 	int                   rsize, error = 0;
   1480 
   1481 	adapter = rxr->adapter;
   1482 #ifdef LRO
   1483 	ifp = adapter->ifp;
   1484 #endif /* LRO */
   1485 
   1486 	/* Clear the ring contents */
   1487 	IXGBE_RX_LOCK(rxr);
   1488 
   1489 #ifdef DEV_NETMAP
   1490 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
   1491 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
   1492 #endif /* DEV_NETMAP */
   1493 
   1494 	rsize = roundup2(adapter->num_rx_desc *
   1495 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   1496 	bzero((void *)rxr->rx_base, rsize);
   1497 	/* Cache the size */
   1498 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   1499 
   1500 	/* Free current RX buffer structs and their mbufs */
   1501 	ixgbe_free_receive_ring(rxr);
   1502 
   1503 	IXGBE_RX_UNLOCK(rxr);
   1504 	/*
   1505 	 * Now reinitialize our supply of jumbo mbufs.  The number
   1506 	 * or size of jumbo mbufs may have changed.
   1507 	 * Assume all of rxr->ptag are the same.
   1508 	 */
   1509 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
   1510 	    (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
   1511 
   1512 	IXGBE_RX_LOCK(rxr);
   1513 
   1514 	/* Now replenish the mbufs */
   1515 	for (int j = 0; j != rxr->num_desc; ++j) {
   1516 		struct mbuf *mp;
   1517 
   1518 		rxbuf = &rxr->rx_buffers[j];
   1519 
   1520 #ifdef DEV_NETMAP
   1521 		/*
   1522 		 * In netmap mode, fill the map and set the buffer
   1523 		 * address in the NIC ring, considering the offset
   1524 		 * between the netmap and NIC rings (see comment in
   1525 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   1526 		 * an mbuf, so end the block with a continue;
   1527 		 */
   1528 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
   1529 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
   1530 			uint64_t paddr;
   1531 			void *addr;
   1532 
   1533 			addr = PNMB(na, slot + sj, &paddr);
   1534 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   1535 			/* Update descriptor and the cached value */
   1536 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   1537 			rxbuf->addr = htole64(paddr);
   1538 			continue;
   1539 		}
   1540 #endif /* DEV_NETMAP */
   1541 
   1542 		rxbuf->flags = 0;
   1543 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
   1544 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   1545 		if (rxbuf->buf == NULL) {
   1546 			error = ENOBUFS;
   1547 			goto fail;
   1548 		}
   1549 		mp = rxbuf->buf;
   1550 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   1551 		/* Get the memory mapping */
   1552 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
   1553 		    mp, BUS_DMA_NOWAIT);
   1554 		if (error != 0)
   1555                         goto fail;
   1556 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   1557 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   1558 		/* Update the descriptor and the cached value */
   1559 		rxr->rx_base[j].read.pkt_addr =
   1560 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1561 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   1562 	}
   1563 
   1564 	/* Setup our descriptor indices */
   1565 	rxr->next_to_check = 0;
   1566 	rxr->next_to_refresh = 0;
   1567 	rxr->lro_enabled = FALSE;
   1568 	rxr->rx_copies.ev_count = 0;
   1569 #if 0 /* NetBSD */
   1570 	rxr->rx_bytes.ev_count = 0;
   1571 #if 1	/* Fix inconsistency */
   1572 	rxr->rx_packets.ev_count = 0;
   1573 #endif
   1574 #endif
   1575 	rxr->vtag_strip = FALSE;
   1576 
   1577 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1578 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1579 
   1580 	/*
   1581 	 * Now set up the LRO interface
   1582 	 */
   1583 	if (ixgbe_rsc_enable)
   1584 		ixgbe_setup_hw_rsc(rxr);
   1585 #ifdef LRO
   1586 	else if (ifp->if_capenable & IFCAP_LRO) {
   1587 		device_t dev = adapter->dev;
   1588 		int err = tcp_lro_init(lro);
   1589 		if (err) {
   1590 			device_printf(dev, "LRO Initialization failed!\n");
   1591 			goto fail;
   1592 		}
   1593 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   1594 		rxr->lro_enabled = TRUE;
   1595 		lro->ifp = adapter->ifp;
   1596 	}
   1597 #endif /* LRO */
   1598 
   1599 	IXGBE_RX_UNLOCK(rxr);
   1600 
   1601 	return (0);
   1602 
   1603 fail:
   1604 	ixgbe_free_receive_ring(rxr);
   1605 	IXGBE_RX_UNLOCK(rxr);
   1606 
   1607 	return (error);
   1608 } /* ixgbe_setup_receive_ring */
   1609 
   1610 /************************************************************************
   1611  * ixgbe_setup_receive_structures - Initialize all receive rings.
   1612  ************************************************************************/
   1613 int
   1614 ixgbe_setup_receive_structures(struct adapter *adapter)
   1615 {
   1616 	struct rx_ring *rxr = adapter->rx_rings;
   1617 	int            j;
   1618 
   1619 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
   1620 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   1621 		if (ixgbe_setup_receive_ring(rxr))
   1622 			goto fail;
   1623 
   1624 	return (0);
   1625 fail:
   1626 	/*
   1627 	 * Free RX buffers allocated so far, we will only handle
   1628 	 * the rings that completed, the failing case will have
   1629 	 * cleaned up for itself. 'j' failed, so its the terminus.
   1630 	 */
   1631 	for (int i = 0; i < j; ++i) {
   1632 		rxr = &adapter->rx_rings[i];
   1633 		IXGBE_RX_LOCK(rxr);
   1634 		ixgbe_free_receive_ring(rxr);
   1635 		IXGBE_RX_UNLOCK(rxr);
   1636 	}
   1637 
   1638 	return (ENOBUFS);
   1639 } /* ixgbe_setup_receive_structures */
   1640 
   1641 
   1642 /************************************************************************
   1643  * ixgbe_free_receive_structures - Free all receive rings.
   1644  ************************************************************************/
   1645 void
   1646 ixgbe_free_receive_structures(struct adapter *adapter)
   1647 {
   1648 	struct rx_ring *rxr = adapter->rx_rings;
   1649 
   1650 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   1651 
   1652 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   1653 		ixgbe_free_receive_buffers(rxr);
   1654 #ifdef LRO
   1655 		/* Free LRO memory */
   1656 		tcp_lro_free(&rxr->lro);
   1657 #endif /* LRO */
   1658 		/* Free the ring memory as well */
   1659 		ixgbe_dma_free(adapter, &rxr->rxdma);
   1660 		IXGBE_RX_LOCK_DESTROY(rxr);
   1661 	}
   1662 
   1663 	free(adapter->rx_rings, M_DEVBUF);
   1664 } /* ixgbe_free_receive_structures */
   1665 
   1666 
   1667 /************************************************************************
   1668  * ixgbe_free_receive_buffers - Free receive ring data structures
   1669  ************************************************************************/
   1670 static void
   1671 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   1672 {
   1673 	struct adapter      *adapter = rxr->adapter;
   1674 	struct ixgbe_rx_buf *rxbuf;
   1675 
   1676 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   1677 
   1678 	/* Cleanup any existing buffers */
   1679 	if (rxr->rx_buffers != NULL) {
   1680 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   1681 			rxbuf = &rxr->rx_buffers[i];
   1682 			ixgbe_rx_discard(rxr, i);
   1683 			if (rxbuf->pmap != NULL) {
   1684 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   1685 				rxbuf->pmap = NULL;
   1686 			}
   1687 		}
   1688 
   1689 		/* NetBSD specific. See ixgbe_netbsd.c */
   1690 		ixgbe_jcl_destroy(adapter, rxr);
   1691 
   1692 		if (rxr->rx_buffers != NULL) {
   1693 			free(rxr->rx_buffers, M_DEVBUF);
   1694 			rxr->rx_buffers = NULL;
   1695 		}
   1696 	}
   1697 
   1698 	if (rxr->ptag != NULL) {
   1699 		ixgbe_dma_tag_destroy(rxr->ptag);
   1700 		rxr->ptag = NULL;
   1701 	}
   1702 
   1703 	return;
   1704 } /* ixgbe_free_receive_buffers */
   1705 
   1706 /************************************************************************
   1707  * ixgbe_rx_input
   1708  ************************************************************************/
   1709 static __inline void
   1710 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
   1711     u32 ptype)
   1712 {
   1713 	struct adapter	*adapter = ifp->if_softc;
   1714 
   1715 #ifdef LRO
   1716 	struct ethercom *ec = &adapter->osdep.ec;
   1717 
   1718 	/*
   1719 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   1720 	 * should be computed by hardware. Also it should not have VLAN tag in
   1721 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   1722 	 */
   1723         if (rxr->lro_enabled &&
   1724             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   1725             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   1726             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1727             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   1728             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   1729             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   1730             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   1731             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   1732                 /*
   1733                  * Send to the stack if:
   1734                  **  - LRO not enabled, or
   1735                  **  - no LRO resources, or
   1736                  **  - lro enqueue fails
   1737                  */
   1738                 if (rxr->lro.lro_cnt != 0)
   1739                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   1740                                 return;
   1741         }
   1742 #endif /* LRO */
   1743 
   1744 	if_percpuq_enqueue(adapter->ipq, m);
   1745 } /* ixgbe_rx_input */
   1746 
   1747 /************************************************************************
   1748  * ixgbe_rx_discard
   1749  ************************************************************************/
   1750 static __inline void
   1751 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   1752 {
   1753 	struct ixgbe_rx_buf *rbuf;
   1754 
   1755 	rbuf = &rxr->rx_buffers[i];
   1756 
   1757 	/*
   1758 	 * With advanced descriptors the writeback
   1759 	 * clobbers the buffer addrs, so its easier
   1760 	 * to just free the existing mbufs and take
   1761 	 * the normal refresh path to get new buffers
   1762 	 * and mapping.
   1763 	 */
   1764 
   1765 	if (rbuf->fmp != NULL) {/* Partial chain ? */
   1766 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1767 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1768 		m_freem(rbuf->fmp);
   1769 		rbuf->fmp = NULL;
   1770 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   1771 	} else if (rbuf->buf) {
   1772 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1773 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1774 		m_free(rbuf->buf);
   1775 		rbuf->buf = NULL;
   1776 	}
   1777 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
   1778 
   1779 	rbuf->flags = 0;
   1780 
   1781 	return;
   1782 } /* ixgbe_rx_discard */
   1783 
   1784 
   1785 /************************************************************************
   1786  * ixgbe_rxeof
   1787  *
   1788  *   Executes in interrupt context. It replenishes the
   1789  *   mbufs in the descriptor and sends data which has
   1790  *   been dma'ed into host memory to upper layer.
   1791  *
   1792  *   Return TRUE for more work, FALSE for all clean.
   1793  ************************************************************************/
   1794 bool
   1795 ixgbe_rxeof(struct ix_queue *que)
   1796 {
   1797 	struct adapter		*adapter = que->adapter;
   1798 	struct rx_ring		*rxr = que->rxr;
   1799 	struct ifnet		*ifp = adapter->ifp;
   1800 #ifdef LRO
   1801 	struct lro_ctrl		*lro = &rxr->lro;
   1802 #endif /* LRO */
   1803 	union ixgbe_adv_rx_desc	*cur;
   1804 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   1805 	int			i, nextp, processed = 0;
   1806 	u32			staterr = 0;
   1807 	u32			count = 0;
   1808 	u32			limit = adapter->rx_process_limit;
   1809 	bool			discard_multidesc = false;
   1810 #ifdef RSS
   1811 	u16			pkt_info;
   1812 #endif
   1813 
   1814 	IXGBE_RX_LOCK(rxr);
   1815 
   1816 #ifdef DEV_NETMAP
   1817 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
   1818 		/* Same as the txeof routine: wakeup clients on intr. */
   1819 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   1820 			IXGBE_RX_UNLOCK(rxr);
   1821 			return (FALSE);
   1822 		}
   1823 	}
   1824 #endif /* DEV_NETMAP */
   1825 
   1826 	/*
   1827 	 * The max number of loop is rx_process_limit. If discard_multidesc is
   1828 	 * true, continue processing to not to send broken packet to the upper
   1829 	 * layer.
   1830 	 */
   1831 	for (i = rxr->next_to_check;
   1832 	     (count < limit) || (discard_multidesc == true);) {
   1833 
   1834 		struct mbuf *sendmp, *mp;
   1835 		struct mbuf *newmp;
   1836 		u32         rsc, ptype;
   1837 		u16         len;
   1838 		u16         vtag = 0;
   1839 		bool        eop;
   1840 
   1841 		/* Sync the ring. */
   1842 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   1843 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   1844 
   1845 		cur = &rxr->rx_base[i];
   1846 		staterr = le32toh(cur->wb.upper.status_error);
   1847 #ifdef RSS
   1848 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   1849 #endif
   1850 
   1851 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   1852 			break;
   1853 
   1854 		count++;
   1855 		sendmp = NULL;
   1856 		nbuf = NULL;
   1857 		rsc = 0;
   1858 		cur->wb.upper.status_error = 0;
   1859 		rbuf = &rxr->rx_buffers[i];
   1860 		mp = rbuf->buf;
   1861 
   1862 		len = le16toh(cur->wb.upper.length);
   1863 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   1864 		    IXGBE_RXDADV_PKTTYPE_MASK;
   1865 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   1866 
   1867 		/* Make sure bad packets are discarded */
   1868 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   1869 #if __FreeBSD_version >= 1100036
   1870 			if (adapter->feat_en & IXGBE_FEATURE_VF)
   1871 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
   1872 #endif
   1873 			rxr->rx_discarded.ev_count++;
   1874 			ixgbe_rx_discard(rxr, i);
   1875 			discard_multidesc = false;
   1876 			goto next_desc;
   1877 		}
   1878 
   1879 		/* pre-alloc new mbuf */
   1880 		if (!discard_multidesc)
   1881 			newmp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT, MT_DATA,
   1882 			    M_PKTHDR, rxr->mbuf_sz);
   1883 		else
   1884 			newmp = NULL;
   1885 		if (newmp == NULL) {
   1886 			rxr->no_jmbuf.ev_count++;
   1887 			/*
   1888 			 * Descriptor initialization is already done by the
   1889 			 * above code (cur->wb.upper.status_error = 0).
   1890 			 * So, we can reuse current rbuf->buf for new packet.
   1891 			 *
   1892 			 * Rewrite the buffer addr, see comment in
   1893 			 * ixgbe_rx_discard().
   1894 			 */
   1895 			cur->read.pkt_addr = rbuf->addr;
   1896 			m_freem(rbuf->fmp);
   1897 			rbuf->fmp = NULL;
   1898 			if (!eop) {
   1899 				/* Discard the entire packet. */
   1900 				discard_multidesc = true;
   1901 			} else
   1902 				discard_multidesc = false;
   1903 			goto next_desc;
   1904 		}
   1905 		discard_multidesc = false;
   1906 
   1907 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
   1908 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
   1909 
   1910 		/*
   1911 		 * On 82599 which supports a hardware
   1912 		 * LRO (called HW RSC), packets need
   1913 		 * not be fragmented across sequential
   1914 		 * descriptors, rather the next descriptor
   1915 		 * is indicated in bits of the descriptor.
   1916 		 * This also means that we might proceses
   1917 		 * more than one packet at a time, something
   1918 		 * that has never been true before, it
   1919 		 * required eliminating global chain pointers
   1920 		 * in favor of what we are doing here.  -jfv
   1921 		 */
   1922 		if (!eop) {
   1923 			/*
   1924 			 * Figure out the next descriptor
   1925 			 * of this frame.
   1926 			 */
   1927 			if (rxr->hw_rsc == TRUE) {
   1928 				rsc = ixgbe_rsc_count(cur);
   1929 				rxr->rsc_num += (rsc - 1);
   1930 			}
   1931 			if (rsc) { /* Get hardware index */
   1932 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
   1933 				    IXGBE_RXDADV_NEXTP_SHIFT);
   1934 			} else { /* Just sequential */
   1935 				nextp = i + 1;
   1936 				if (nextp == adapter->num_rx_desc)
   1937 					nextp = 0;
   1938 			}
   1939 			nbuf = &rxr->rx_buffers[nextp];
   1940 			prefetch(nbuf);
   1941 		}
   1942 		/*
   1943 		 * Rather than using the fmp/lmp global pointers
   1944 		 * we now keep the head of a packet chain in the
   1945 		 * buffer struct and pass this along from one
   1946 		 * descriptor to the next, until we get EOP.
   1947 		 */
   1948 		mp->m_len = len;
   1949 		/*
   1950 		 * See if there is a stored head
   1951 		 * that determines what we are
   1952 		 */
   1953 		sendmp = rbuf->fmp;
   1954 		if (sendmp != NULL) {  /* secondary frag */
   1955 			rbuf->buf = newmp;
   1956 			rbuf->fmp = NULL;
   1957 			mp->m_flags &= ~M_PKTHDR;
   1958 			sendmp->m_pkthdr.len += mp->m_len;
   1959 		} else {
   1960 			/*
   1961 			 * Optimize.  This might be a small packet,
   1962 			 * maybe just a TCP ACK.  Do a fast copy that
   1963 			 * is cache aligned into a new mbuf, and
   1964 			 * leave the old mbuf+cluster for re-use.
   1965 			 */
   1966 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   1967 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   1968 				if (sendmp != NULL) {
   1969 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
   1970 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
   1971 					    len);
   1972 					sendmp->m_len = len;
   1973 					rxr->rx_copies.ev_count++;
   1974 					rbuf->flags |= IXGBE_RX_COPY;
   1975 
   1976 					m_freem(newmp);
   1977 				}
   1978 			}
   1979 			if (sendmp == NULL) {
   1980 				rbuf->buf = newmp;
   1981 				rbuf->fmp = NULL;
   1982 				sendmp = mp;
   1983 			}
   1984 
   1985 			/* first desc of a non-ps chain */
   1986 			sendmp->m_flags |= M_PKTHDR;
   1987 			sendmp->m_pkthdr.len = mp->m_len;
   1988 		}
   1989 		++processed;
   1990 
   1991 		/* Pass the head pointer on */
   1992 		if (eop == 0) {
   1993 			nbuf->fmp = sendmp;
   1994 			sendmp = NULL;
   1995 			mp->m_next = nbuf->buf;
   1996 		} else { /* Sending this frame */
   1997 			m_set_rcvif(sendmp, ifp);
   1998 			++rxr->packets;
   1999 			rxr->rx_packets.ev_count++;
   2000 			/* capture data for AIM */
   2001 			rxr->bytes += sendmp->m_pkthdr.len;
   2002 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   2003 			/* Process vlan info */
   2004 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
   2005 				vtag = le16toh(cur->wb.upper.vlan);
   2006 			if (vtag) {
   2007 				vlan_set_tag(sendmp, vtag);
   2008 			}
   2009 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   2010 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   2011 				   &adapter->stats.pf);
   2012 			}
   2013 
   2014 #if 0 /* FreeBSD */
   2015 			/*
   2016 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
   2017 			 * and never cleared. This means we have RSS hash
   2018 			 * available to be used.
   2019 			 */
   2020 			if (adapter->num_queues > 1) {
   2021 				sendmp->m_pkthdr.flowid =
   2022 				    le32toh(cur->wb.lower.hi_dword.rss);
   2023 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   2024 				case IXGBE_RXDADV_RSSTYPE_IPV4:
   2025 					M_HASHTYPE_SET(sendmp,
   2026 					    M_HASHTYPE_RSS_IPV4);
   2027 					break;
   2028 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   2029 					M_HASHTYPE_SET(sendmp,
   2030 					    M_HASHTYPE_RSS_TCP_IPV4);
   2031 					break;
   2032 				case IXGBE_RXDADV_RSSTYPE_IPV6:
   2033 					M_HASHTYPE_SET(sendmp,
   2034 					    M_HASHTYPE_RSS_IPV6);
   2035 					break;
   2036 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   2037 					M_HASHTYPE_SET(sendmp,
   2038 					    M_HASHTYPE_RSS_TCP_IPV6);
   2039 					break;
   2040 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   2041 					M_HASHTYPE_SET(sendmp,
   2042 					    M_HASHTYPE_RSS_IPV6_EX);
   2043 					break;
   2044 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   2045 					M_HASHTYPE_SET(sendmp,
   2046 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
   2047 					break;
   2048 #if __FreeBSD_version > 1100000
   2049 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   2050 					M_HASHTYPE_SET(sendmp,
   2051 					    M_HASHTYPE_RSS_UDP_IPV4);
   2052 					break;
   2053 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   2054 					M_HASHTYPE_SET(sendmp,
   2055 					    M_HASHTYPE_RSS_UDP_IPV6);
   2056 					break;
   2057 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   2058 					M_HASHTYPE_SET(sendmp,
   2059 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
   2060 					break;
   2061 #endif
   2062 				default:
   2063 					M_HASHTYPE_SET(sendmp,
   2064 					    M_HASHTYPE_OPAQUE_HASH);
   2065 				}
   2066 			} else {
   2067 				sendmp->m_pkthdr.flowid = que->msix;
   2068 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   2069 			}
   2070 #endif
   2071 		}
   2072 next_desc:
   2073 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   2074 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2075 
   2076 		/* Advance our pointers to the next descriptor. */
   2077 		if (++i == rxr->num_desc)
   2078 			i = 0;
   2079 
   2080 		/* Now send to the stack or do LRO */
   2081 		if (sendmp != NULL) {
   2082 			rxr->next_to_check = i;
   2083 			IXGBE_RX_UNLOCK(rxr);
   2084 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   2085 			IXGBE_RX_LOCK(rxr);
   2086 			i = rxr->next_to_check;
   2087 		}
   2088 
   2089 		/* Every 8 descriptors we go to refresh mbufs */
   2090 		if (processed == 8) {
   2091 			ixgbe_refresh_mbufs(rxr, i);
   2092 			processed = 0;
   2093 		}
   2094 	}
   2095 
   2096 	/* Refresh any remaining buf structs */
   2097 	if (ixgbe_rx_unrefreshed(rxr))
   2098 		ixgbe_refresh_mbufs(rxr, i);
   2099 
   2100 	rxr->next_to_check = i;
   2101 
   2102 	IXGBE_RX_UNLOCK(rxr);
   2103 
   2104 #ifdef LRO
   2105 	/*
   2106 	 * Flush any outstanding LRO work
   2107 	 */
   2108 	tcp_lro_flush_all(lro);
   2109 #endif /* LRO */
   2110 
   2111 	/*
   2112 	 * Still have cleaning to do?
   2113 	 */
   2114 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   2115 		return (TRUE);
   2116 
   2117 	return (FALSE);
   2118 } /* ixgbe_rxeof */
   2119 
   2120 
   2121 /************************************************************************
   2122  * ixgbe_rx_checksum
   2123  *
   2124  *   Verify that the hardware indicated that the checksum is valid.
   2125  *   Inform the stack about the status of checksum so that stack
   2126  *   doesn't spend time verifying the checksum.
   2127  ************************************************************************/
   2128 static void
   2129 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   2130     struct ixgbe_hw_stats *stats)
   2131 {
   2132 	u16  status = (u16)staterr;
   2133 	u8   errors = (u8)(staterr >> 24);
   2134 #if 0
   2135 	bool sctp = false;
   2136 
   2137 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   2138 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   2139 		sctp = true;
   2140 #endif
   2141 
   2142 	/* IPv4 checksum */
   2143 	if (status & IXGBE_RXD_STAT_IPCS) {
   2144 		stats->ipcs.ev_count++;
   2145 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   2146 			/* IP Checksum Good */
   2147 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   2148 		} else {
   2149 			stats->ipcs_bad.ev_count++;
   2150 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   2151 		}
   2152 	}
   2153 	/* TCP/UDP/SCTP checksum */
   2154 	if (status & IXGBE_RXD_STAT_L4CS) {
   2155 		stats->l4cs.ev_count++;
   2156 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   2157 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   2158 			mp->m_pkthdr.csum_flags |= type;
   2159 		} else {
   2160 			stats->l4cs_bad.ev_count++;
   2161 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   2162 		}
   2163 	}
   2164 } /* ixgbe_rx_checksum */
   2165 
   2166 /************************************************************************
   2167  * ixgbe_dma_malloc
   2168  ************************************************************************/
   2169 int
   2170 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2171 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2172 {
   2173 	device_t dev = adapter->dev;
   2174 	int      r, rsegs;
   2175 
   2176 	r = ixgbe_dma_tag_create(
   2177 	     /*      parent */ adapter->osdep.dmat,
   2178 	     /*   alignment */ DBA_ALIGN,
   2179 	     /*      bounds */ 0,
   2180 	     /*     maxsize */ size,
   2181 	     /*   nsegments */ 1,
   2182 	     /*  maxsegsize */ size,
   2183 	     /*       flags */ BUS_DMA_ALLOCNOW,
   2184 			       &dma->dma_tag);
   2185 	if (r != 0) {
   2186 		aprint_error_dev(dev,
   2187 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
   2188 		    r);
   2189 		goto fail_0;
   2190 	}
   2191 
   2192 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
   2193 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
   2194 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2195 	if (r != 0) {
   2196 		aprint_error_dev(dev,
   2197 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2198 		goto fail_1;
   2199 	}
   2200 
   2201 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2202 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2203 	if (r != 0) {
   2204 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2205 		    __func__, r);
   2206 		goto fail_2;
   2207 	}
   2208 
   2209 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2210 	if (r != 0) {
   2211 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2212 		    __func__, r);
   2213 		goto fail_3;
   2214 	}
   2215 
   2216 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
   2217 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
   2218 	if (r != 0) {
   2219 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2220 		    __func__, r);
   2221 		goto fail_4;
   2222 	}
   2223 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   2224 	dma->dma_size = size;
   2225 	return 0;
   2226 fail_4:
   2227 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   2228 fail_3:
   2229 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   2230 fail_2:
   2231 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   2232 fail_1:
   2233 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2234 fail_0:
   2235 
   2236 	return (r);
   2237 } /* ixgbe_dma_malloc */
   2238 
   2239 /************************************************************************
   2240  * ixgbe_dma_free
   2241  ************************************************************************/
   2242 void
   2243 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   2244 {
   2245 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   2246 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   2247 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   2248 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   2249 	ixgbe_dma_tag_destroy(dma->dma_tag);
   2250 } /* ixgbe_dma_free */
   2251 
   2252 
   2253 /************************************************************************
   2254  * ixgbe_allocate_queues
   2255  *
   2256  *   Allocate memory for the transmit and receive rings, and then
   2257  *   the descriptors associated with each, called only once at attach.
   2258  ************************************************************************/
   2259 int
   2260 ixgbe_allocate_queues(struct adapter *adapter)
   2261 {
   2262 	device_t	dev = adapter->dev;
   2263 	struct ix_queue	*que;
   2264 	struct tx_ring	*txr;
   2265 	struct rx_ring	*rxr;
   2266 	int             rsize, tsize, error = IXGBE_SUCCESS;
   2267 	int             txconf = 0, rxconf = 0;
   2268 
   2269 	/* First, allocate the top level queue structs */
   2270 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
   2271 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2272 
   2273 	/* Second, allocate the TX ring struct memory */
   2274 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
   2275 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2276 
   2277 	/* Third, allocate the RX ring */
   2278 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
   2279 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
   2280 
   2281 	/* For the ring itself */
   2282 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
   2283 	    DBA_ALIGN);
   2284 
   2285 	/*
   2286 	 * Now set up the TX queues, txconf is needed to handle the
   2287 	 * possibility that things fail midcourse and we need to
   2288 	 * undo memory gracefully
   2289 	 */
   2290 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   2291 		/* Set up some basics */
   2292 		txr = &adapter->tx_rings[i];
   2293 		txr->adapter = adapter;
   2294 		txr->txr_interq = NULL;
   2295 		/* In case SR-IOV is enabled, align the index properly */
   2296 #ifdef PCI_IOV
   2297 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2298 		    i);
   2299 #else
   2300 		txr->me = i;
   2301 #endif
   2302 		txr->num_desc = adapter->num_tx_desc;
   2303 
   2304 		/* Initialize the TX side lock */
   2305 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   2306 
   2307 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
   2308 		    BUS_DMA_NOWAIT)) {
   2309 			aprint_error_dev(dev,
   2310 			    "Unable to allocate TX Descriptor memory\n");
   2311 			error = ENOMEM;
   2312 			goto err_tx_desc;
   2313 		}
   2314 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   2315 		bzero((void *)txr->tx_base, tsize);
   2316 
   2317 		/* Now allocate transmit buffers for the ring */
   2318 		if (ixgbe_allocate_transmit_buffers(txr)) {
   2319 			aprint_error_dev(dev,
   2320 			    "Critical Failure setting up transmit buffers\n");
   2321 			error = ENOMEM;
   2322 			goto err_tx_desc;
   2323 		}
   2324 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
   2325 			/* Allocate a buf ring */
   2326 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
   2327 			if (txr->txr_interq == NULL) {
   2328 				aprint_error_dev(dev,
   2329 				    "Critical Failure setting up buf ring\n");
   2330 				error = ENOMEM;
   2331 				goto err_tx_desc;
   2332 			}
   2333 		}
   2334 	}
   2335 
   2336 	/*
   2337 	 * Next the RX queues...
   2338 	 */
   2339 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
   2340 	    DBA_ALIGN);
   2341 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   2342 		rxr = &adapter->rx_rings[i];
   2343 		/* Set up some basics */
   2344 		rxr->adapter = adapter;
   2345 #ifdef PCI_IOV
   2346 		/* In case SR-IOV is enabled, align the index properly */
   2347 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
   2348 		    i);
   2349 #else
   2350 		rxr->me = i;
   2351 #endif
   2352 		rxr->num_desc = adapter->num_rx_desc;
   2353 
   2354 		/* Initialize the RX side lock */
   2355 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   2356 
   2357 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
   2358 		    BUS_DMA_NOWAIT)) {
   2359 			aprint_error_dev(dev,
   2360 			    "Unable to allocate RxDescriptor memory\n");
   2361 			error = ENOMEM;
   2362 			goto err_rx_desc;
   2363 		}
   2364 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   2365 		bzero((void *)rxr->rx_base, rsize);
   2366 
   2367 		/* Allocate receive buffers for the ring */
   2368 		if (ixgbe_allocate_receive_buffers(rxr)) {
   2369 			aprint_error_dev(dev,
   2370 			    "Critical Failure setting up receive buffers\n");
   2371 			error = ENOMEM;
   2372 			goto err_rx_desc;
   2373 		}
   2374 	}
   2375 
   2376 	/*
   2377 	 * Finally set up the queue holding structs
   2378 	 */
   2379 	for (int i = 0; i < adapter->num_queues; i++) {
   2380 		que = &adapter->queues[i];
   2381 		que->adapter = adapter;
   2382 		que->me = i;
   2383 		que->txr = &adapter->tx_rings[i];
   2384 		que->rxr = &adapter->rx_rings[i];
   2385 
   2386 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
   2387 		que->disabled_count = 0;
   2388 	}
   2389 
   2390 	return (0);
   2391 
   2392 err_rx_desc:
   2393 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   2394 		ixgbe_dma_free(adapter, &rxr->rxdma);
   2395 err_tx_desc:
   2396 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   2397 		ixgbe_dma_free(adapter, &txr->txdma);
   2398 	free(adapter->rx_rings, M_DEVBUF);
   2399 	free(adapter->tx_rings, M_DEVBUF);
   2400 	free(adapter->queues, M_DEVBUF);
   2401 	return (error);
   2402 } /* ixgbe_allocate_queues */
   2403 
   2404 /************************************************************************
   2405  * ixgbe_free_queues
   2406  *
   2407  *   Free descriptors for the transmit and receive rings, and then
   2408  *   the memory associated with each.
   2409  ************************************************************************/
   2410 void
   2411 ixgbe_free_queues(struct adapter *adapter)
   2412 {
   2413 	struct ix_queue *que;
   2414 	int i;
   2415 
   2416 	ixgbe_free_transmit_structures(adapter);
   2417 	ixgbe_free_receive_structures(adapter);
   2418 	for (i = 0; i < adapter->num_queues; i++) {
   2419 		que = &adapter->queues[i];
   2420 		mutex_destroy(&que->dc_mtx);
   2421 	}
   2422 	free(adapter->queues, M_DEVBUF);
   2423 } /* ixgbe_free_queues */
   2424