ix_txrx.c revision 1.111 1 /* $NetBSD: ix_txrx.c,v 1.111 2023/12/13 08:25:54 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include <sys/cdefs.h>
67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.111 2023/12/13 08:25:54 msaitoh Exp $");
68
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71
72 #include "ixgbe.h"
73
74 /*
75 * HW RSC control:
76 * this feature only works with
77 * IPv4, and only on 82599 and later.
78 * Also this will cause IP forwarding to
79 * fail and that can't be controlled by
80 * the stack as LRO can. For all these
81 * reasons I've deemed it best to leave
82 * this off and not bother with a tuneable
83 * interface, this would need to be compiled
84 * to enable.
85 */
86 static bool ixgbe_rsc_enable = FALSE;
87
88 /*
89 * For Flow Director: this is the
90 * number of TX packets we sample
91 * for the filter pool, this means
92 * every 20th packet will be probed.
93 *
94 * This feature can be disabled by
95 * setting this to 0.
96 */
97 static int atr_sample_rate = 20;
98
99 #define IXGBE_M_ADJ(sc, rxr, mp) \
100 if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN)) \
101 m_adj(mp, ETHER_ALIGN)
102
103 /************************************************************************
104 * Local Function prototypes
105 ************************************************************************/
106 static void ixgbe_setup_transmit_ring(struct tx_ring *);
107 static void ixgbe_free_transmit_buffers(struct tx_ring *);
108 static int ixgbe_setup_receive_ring(struct rx_ring *);
109 static void ixgbe_free_receive_buffers(struct rx_ring *);
110 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
111 struct ixgbe_hw_stats *);
112 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
113 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
114 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
115 static int ixgbe_tx_ctx_setup(struct tx_ring *,
116 struct mbuf *, u32 *, u32 *);
117 static int ixgbe_tso_setup(struct tx_ring *,
118 struct mbuf *, u32 *, u32 *);
119 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
120 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
121 struct mbuf *, u32);
122 static int ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
123 struct ixgbe_dma_alloc *, int);
124 static void ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
125
126 static void ixgbe_setup_hw_rsc(struct rx_ring *);
127
128 /************************************************************************
129 * ixgbe_legacy_start_locked - Transmit entry point
130 *
131 * Called by the stack to initiate a transmit.
132 * The driver will remain in this routine as long as there are
133 * packets to transmit and transmit resources are available.
134 * In case resources are not available, the stack is notified
135 * and the packet is requeued.
136 ************************************************************************/
137 int
138 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
139 {
140 int rc;
141 struct mbuf *m_head;
142 struct ixgbe_softc *sc = txr->sc;
143
144 IXGBE_TX_LOCK_ASSERT(txr);
145
146 if (sc->link_active != LINK_STATE_UP) {
147 /*
148 * discard all packets buffered in IFQ to avoid
149 * sending old packets at next link up timing.
150 */
151 ixgbe_drain(ifp, txr);
152 return (ENETDOWN);
153 }
154 if ((ifp->if_flags & IFF_RUNNING) == 0)
155 return (ENETDOWN);
156 if (txr->txr_no_space)
157 return (ENETDOWN);
158
159 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
160 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
161 break;
162
163 IFQ_POLL(&ifp->if_snd, m_head);
164 if (m_head == NULL)
165 break;
166
167 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
168 break;
169 }
170 IFQ_DEQUEUE(&ifp->if_snd, m_head);
171 if (rc != 0) {
172 m_freem(m_head);
173 continue;
174 }
175
176 /* Send a copy of the frame to the BPF listener */
177 bpf_mtap(ifp, m_head, BPF_D_OUT);
178 }
179
180 return IXGBE_SUCCESS;
181 } /* ixgbe_legacy_start_locked */
182
183 /************************************************************************
184 * ixgbe_legacy_start
185 *
186 * Called by the stack, this always uses the first tx ring,
187 * and should not be used with multiqueue tx enabled.
188 ************************************************************************/
189 void
190 ixgbe_legacy_start(struct ifnet *ifp)
191 {
192 struct ixgbe_softc *sc = ifp->if_softc;
193 struct tx_ring *txr = sc->tx_rings;
194
195 if (ifp->if_flags & IFF_RUNNING) {
196 IXGBE_TX_LOCK(txr);
197 ixgbe_legacy_start_locked(ifp, txr);
198 IXGBE_TX_UNLOCK(txr);
199 }
200 } /* ixgbe_legacy_start */
201
202 /************************************************************************
203 * ixgbe_mq_start - Multiqueue Transmit Entry Point
204 *
205 * (if_transmit function)
206 ************************************************************************/
207 int
208 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
209 {
210 struct ixgbe_softc *sc = ifp->if_softc;
211 struct tx_ring *txr;
212 int i;
213 #ifdef RSS
214 uint32_t bucket_id;
215 #endif
216
217 /*
218 * When doing RSS, map it to the same outbound queue
219 * as the incoming flow would be mapped to.
220 *
221 * If everything is setup correctly, it should be the
222 * same bucket that the current CPU we're on is.
223 */
224 #ifdef RSS
225 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
226 if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
227 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
228 &bucket_id) == 0)) {
229 i = bucket_id % sc->num_queues;
230 #ifdef IXGBE_DEBUG
231 if (bucket_id > sc->num_queues)
232 if_printf(ifp,
233 "bucket_id (%d) > num_queues (%d)\n",
234 bucket_id, sc->num_queues);
235 #endif
236 } else
237 i = m->m_pkthdr.flowid % sc->num_queues;
238 } else
239 #endif /* 0 */
240 i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
241
242 /* Check for a hung queue and pick alternative */
243 if (((1ULL << i) & sc->active_queues) == 0)
244 i = ffs64(sc->active_queues);
245
246 txr = &sc->tx_rings[i];
247
248 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
249 m_freem(m);
250 IXGBE_EVC_ADD(&txr->pcq_drops, 1);
251 return ENOBUFS;
252 }
253 #ifdef IXGBE_ALWAYS_TXDEFER
254 kpreempt_disable();
255 softint_schedule(txr->txr_si);
256 kpreempt_enable();
257 #else
258 if (IXGBE_TX_TRYLOCK(txr)) {
259 ixgbe_mq_start_locked(ifp, txr);
260 IXGBE_TX_UNLOCK(txr);
261 } else {
262 if (sc->txrx_use_workqueue) {
263 u_int *enqueued;
264
265 /*
266 * This function itself is not called in interrupt
267 * context, however it can be called in fast softint
268 * context right after receiving forwarding packets.
269 * So, it is required to protect workqueue from twice
270 * enqueuing when the machine uses both spontaneous
271 * packets and forwarding packets.
272 */
273 enqueued = percpu_getref(sc->txr_wq_enqueued);
274 if (*enqueued == 0) {
275 *enqueued = 1;
276 percpu_putref(sc->txr_wq_enqueued);
277 workqueue_enqueue(sc->txr_wq,
278 &txr->wq_cookie, curcpu());
279 } else
280 percpu_putref(sc->txr_wq_enqueued);
281 } else {
282 kpreempt_disable();
283 softint_schedule(txr->txr_si);
284 kpreempt_enable();
285 }
286 }
287 #endif
288
289 return (0);
290 } /* ixgbe_mq_start */
291
292 /************************************************************************
293 * ixgbe_mq_start_locked
294 ************************************************************************/
295 int
296 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
297 {
298 struct mbuf *next;
299 int enqueued = 0, err = 0;
300
301 if (txr->sc->link_active != LINK_STATE_UP) {
302 /*
303 * discard all packets buffered in txr_interq to avoid
304 * sending old packets at next link up timing.
305 */
306 ixgbe_drain(ifp, txr);
307 return (ENETDOWN);
308 }
309 if ((ifp->if_flags & IFF_RUNNING) == 0)
310 return (ENETDOWN);
311 if (txr->txr_no_space)
312 return (ENETDOWN);
313
314 /* Process the queue */
315 while ((next = pcq_get(txr->txr_interq)) != NULL) {
316 if ((err = ixgbe_xmit(txr, next)) != 0) {
317 m_freem(next);
318 /* All errors are counted in ixgbe_xmit() */
319 break;
320 }
321 enqueued++;
322 #if __FreeBSD_version >= 1100036
323 /*
324 * Since we're looking at the tx ring, we can check
325 * to see if we're a VF by examining our tail register
326 * address.
327 */
328 if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
329 (next->m_flags & M_MCAST))
330 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
331 #endif
332 /* Send a copy of the frame to the BPF listener */
333 bpf_mtap(ifp, next, BPF_D_OUT);
334 if ((ifp->if_flags & IFF_RUNNING) == 0)
335 break;
336 }
337
338 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
339 ixgbe_txeof(txr);
340
341 return (err);
342 } /* ixgbe_mq_start_locked */
343
344 /************************************************************************
345 * ixgbe_deferred_mq_start
346 *
347 * Called from a softint and workqueue (indirectly) to drain queued
348 * transmit packets.
349 ************************************************************************/
350 void
351 ixgbe_deferred_mq_start(void *arg)
352 {
353 struct tx_ring *txr = arg;
354 struct ixgbe_softc *sc = txr->sc;
355 struct ifnet *ifp = sc->ifp;
356
357 IXGBE_TX_LOCK(txr);
358 if (pcq_peek(txr->txr_interq) != NULL)
359 ixgbe_mq_start_locked(ifp, txr);
360 IXGBE_TX_UNLOCK(txr);
361 } /* ixgbe_deferred_mq_start */
362
363 /************************************************************************
364 * ixgbe_deferred_mq_start_work
365 *
366 * Called from a workqueue to drain queued transmit packets.
367 ************************************************************************/
368 void
369 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
370 {
371 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
372 struct ixgbe_softc *sc = txr->sc;
373 u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
374 *enqueued = 0;
375 percpu_putref(sc->txr_wq_enqueued);
376
377 ixgbe_deferred_mq_start(txr);
378 } /* ixgbe_deferred_mq_start */
379
380 /************************************************************************
381 * ixgbe_drain_all
382 ************************************************************************/
383 void
384 ixgbe_drain_all(struct ixgbe_softc *sc)
385 {
386 struct ifnet *ifp = sc->ifp;
387 struct ix_queue *que = sc->queues;
388
389 for (int i = 0; i < sc->num_queues; i++, que++) {
390 struct tx_ring *txr = que->txr;
391
392 IXGBE_TX_LOCK(txr);
393 ixgbe_drain(ifp, txr);
394 IXGBE_TX_UNLOCK(txr);
395 }
396 }
397
398 /************************************************************************
399 * ixgbe_xmit
400 *
401 * Maps the mbufs to tx descriptors, allowing the
402 * TX engine to transmit the packets.
403 *
404 * Return 0 on success, positive on failure
405 ************************************************************************/
406 static int
407 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
408 {
409 struct ixgbe_softc *sc = txr->sc;
410 struct ixgbe_tx_buf *txbuf;
411 union ixgbe_adv_tx_desc *txd = NULL;
412 struct ifnet *ifp = sc->ifp;
413 int i, j, error;
414 int first;
415 u32 olinfo_status = 0, cmd_type_len;
416 bool remap = TRUE;
417 bus_dmamap_t map;
418
419 /* Basic descriptor defines */
420 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
421 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
422
423 if (vlan_has_tag(m_head))
424 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
425
426 /*
427 * Important to capture the first descriptor
428 * used because it will contain the index of
429 * the one we tell the hardware to report back
430 */
431 first = txr->next_avail_desc;
432 txbuf = &txr->tx_buffers[first];
433 map = txbuf->map;
434
435 /*
436 * Map the packet for DMA.
437 */
438 retry:
439 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
440 BUS_DMA_NOWAIT);
441
442 if (__predict_false(error)) {
443 struct mbuf *m;
444
445 switch (error) {
446 case EAGAIN:
447 txr->q_eagain_tx_dma_setup++;
448 return EAGAIN;
449 case ENOMEM:
450 txr->q_enomem_tx_dma_setup++;
451 return EAGAIN;
452 case EFBIG:
453 /* Try it again? - one try */
454 if (remap == TRUE) {
455 remap = FALSE;
456 /*
457 * XXX: m_defrag will choke on
458 * non-MCLBYTES-sized clusters
459 */
460 txr->q_efbig_tx_dma_setup++;
461 m = m_defrag(m_head, M_NOWAIT);
462 if (m == NULL) {
463 txr->q_mbuf_defrag_failed++;
464 return ENOBUFS;
465 }
466 m_head = m;
467 goto retry;
468 } else {
469 txr->q_efbig2_tx_dma_setup++;
470 return error;
471 }
472 case EINVAL:
473 txr->q_einval_tx_dma_setup++;
474 return error;
475 default:
476 txr->q_other_tx_dma_setup++;
477 return error;
478 }
479 }
480
481 /* Make certain there are enough descriptors */
482 if (txr->tx_avail < (map->dm_nsegs + 2)) {
483 txr->txr_no_space = true;
484 IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
485 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
486 return EAGAIN;
487 }
488
489 /*
490 * Set up the appropriate offload context if requested,
491 * this may consume one TX descriptor.
492 */
493 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
494 if (__predict_false(error)) {
495 return (error);
496 }
497
498 #ifdef IXGBE_FDIR
499 /* Do the flow director magic */
500 if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
501 (txr->atr_sample) && (!sc->fdir_reinit)) {
502 ++txr->atr_count;
503 if (txr->atr_count >= atr_sample_rate) {
504 ixgbe_atr(txr, m_head);
505 txr->atr_count = 0;
506 }
507 }
508 #endif
509
510 olinfo_status |= IXGBE_ADVTXD_CC;
511 i = txr->next_avail_desc;
512 for (j = 0; j < map->dm_nsegs; j++) {
513 bus_size_t seglen;
514 uint64_t segaddr;
515
516 txbuf = &txr->tx_buffers[i];
517 txd = &txr->tx_base[i];
518 seglen = map->dm_segs[j].ds_len;
519 segaddr = htole64(map->dm_segs[j].ds_addr);
520
521 txd->read.buffer_addr = segaddr;
522 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
523 txd->read.olinfo_status = htole32(olinfo_status);
524
525 if (++i == txr->num_desc)
526 i = 0;
527 }
528
529 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
530 txr->tx_avail -= map->dm_nsegs;
531 txr->next_avail_desc = i;
532
533 txbuf->m_head = m_head;
534 /*
535 * Here we swap the map so the last descriptor,
536 * which gets the completion interrupt has the
537 * real map, and the first descriptor gets the
538 * unused map from this descriptor.
539 */
540 txr->tx_buffers[first].map = txbuf->map;
541 txbuf->map = map;
542 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
543 BUS_DMASYNC_PREWRITE);
544
545 /* Set the EOP descriptor that will be marked done */
546 txbuf = &txr->tx_buffers[first];
547 txbuf->eop = txd;
548
549 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
550 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
551 /*
552 * Advance the Transmit Descriptor Tail (Tdt), this tells the
553 * hardware that this frame is available to transmit.
554 */
555 IXGBE_EVC_ADD(&txr->total_packets, 1);
556 IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
557
558 net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
559 if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
560 if (m_head->m_flags & M_MCAST)
561 if_statinc_ref(nsr, if_omcasts);
562 IF_STAT_PUTREF(ifp);
563
564 /* Mark queue as having work */
565 if (txr->busy == 0)
566 txr->busy = 1;
567
568 return (0);
569 } /* ixgbe_xmit */
570
571 /************************************************************************
572 * ixgbe_drain
573 ************************************************************************/
574 static void
575 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
576 {
577 struct mbuf *m;
578
579 IXGBE_TX_LOCK_ASSERT(txr);
580
581 if (txr->me == 0) {
582 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
583 IFQ_DEQUEUE(&ifp->if_snd, m);
584 m_freem(m);
585 IF_DROP(&ifp->if_snd);
586 }
587 }
588
589 while ((m = pcq_get(txr->txr_interq)) != NULL) {
590 m_freem(m);
591 IXGBE_EVC_ADD(&txr->pcq_drops, 1);
592 }
593 }
594
595 /************************************************************************
596 * ixgbe_allocate_transmit_buffers
597 *
598 * Allocate memory for tx_buffer structures. The tx_buffer stores all
599 * the information needed to transmit a packet on the wire. This is
600 * called only once at attach, setup is done every reset.
601 ************************************************************************/
602 static int
603 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
604 {
605 struct ixgbe_softc *sc = txr->sc;
606 device_t dev = sc->dev;
607 struct ixgbe_tx_buf *txbuf;
608 int error, i;
609
610 /*
611 * Setup DMA descriptor areas.
612 */
613 error = ixgbe_dma_tag_create(
614 /* parent */ sc->osdep.dmat,
615 /* alignment */ 1,
616 /* bounds */ 0,
617 /* maxsize */ IXGBE_TSO_SIZE,
618 /* nsegments */ sc->num_segs,
619 /* maxsegsize */ PAGE_SIZE,
620 /* flags */ 0,
621 &txr->txtag);
622 if (error != 0) {
623 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
624 goto fail;
625 }
626
627 txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
628 sc->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
629
630 /* Create the descriptor buffer dma maps */
631 txbuf = txr->tx_buffers;
632 for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
633 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
634 if (error != 0) {
635 aprint_error_dev(dev,
636 "Unable to create TX DMA map (%d)\n", error);
637 goto fail;
638 }
639 }
640
641 return 0;
642 fail:
643 /* We free all, it handles case where we are in the middle */
644 #if 0 /* XXX was FreeBSD */
645 ixgbe_free_transmit_structures(sc);
646 #else
647 ixgbe_free_transmit_buffers(txr);
648 #endif
649 return (error);
650 } /* ixgbe_allocate_transmit_buffers */
651
652 /************************************************************************
653 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
654 ************************************************************************/
655 static void
656 ixgbe_setup_transmit_ring(struct tx_ring *txr)
657 {
658 struct ixgbe_softc *sc = txr->sc;
659 struct ixgbe_tx_buf *txbuf;
660 #ifdef DEV_NETMAP
661 struct netmap_sc *na = NA(sc->ifp);
662 struct netmap_slot *slot;
663 #endif /* DEV_NETMAP */
664
665 /* Clear the old ring contents */
666 IXGBE_TX_LOCK(txr);
667
668 #ifdef DEV_NETMAP
669 if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
670 /*
671 * (under lock): if in netmap mode, do some consistency
672 * checks and set slot to entry 0 of the netmap ring.
673 */
674 slot = netmap_reset(na, NR_TX, txr->me, 0);
675 }
676 #endif /* DEV_NETMAP */
677
678 bzero((void *)txr->tx_base,
679 (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
680 /* Reset indices */
681 txr->next_avail_desc = 0;
682 txr->next_to_clean = 0;
683
684 /* Free any existing tx buffers. */
685 txbuf = txr->tx_buffers;
686 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
687 if (txbuf->m_head != NULL) {
688 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
689 0, txbuf->m_head->m_pkthdr.len,
690 BUS_DMASYNC_POSTWRITE);
691 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
692 m_freem(txbuf->m_head);
693 txbuf->m_head = NULL;
694 }
695
696 #ifdef DEV_NETMAP
697 /*
698 * In netmap mode, set the map for the packet buffer.
699 * NOTE: Some drivers (not this one) also need to set
700 * the physical buffer address in the NIC ring.
701 * Slots in the netmap ring (indexed by "si") are
702 * kring->nkr_hwofs positions "ahead" wrt the
703 * corresponding slot in the NIC ring. In some drivers
704 * (not here) nkr_hwofs can be negative. Function
705 * netmap_idx_n2k() handles wraparounds properly.
706 */
707 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
708 int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
709 netmap_load_map(na, txr->txtag,
710 txbuf->map, NMB(na, slot + si));
711 }
712 #endif /* DEV_NETMAP */
713
714 /* Clear the EOP descriptor pointer */
715 txbuf->eop = NULL;
716 }
717
718 /* Set the rate at which we sample packets */
719 if (sc->feat_en & IXGBE_FEATURE_FDIR)
720 txr->atr_sample = atr_sample_rate;
721
722 /* Set number of descriptors available */
723 txr->tx_avail = sc->num_tx_desc;
724
725 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
726 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
727 IXGBE_TX_UNLOCK(txr);
728 } /* ixgbe_setup_transmit_ring */
729
730 /************************************************************************
731 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
732 ************************************************************************/
733 int
734 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
735 {
736 struct tx_ring *txr = sc->tx_rings;
737
738 for (int i = 0; i < sc->num_queues; i++, txr++)
739 ixgbe_setup_transmit_ring(txr);
740
741 return (0);
742 } /* ixgbe_setup_transmit_structures */
743
744 /************************************************************************
745 * ixgbe_free_transmit_structures - Free all transmit rings.
746 ************************************************************************/
747 void
748 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
749 {
750 struct tx_ring *txr = sc->tx_rings;
751
752 for (int i = 0; i < sc->num_queues; i++, txr++) {
753 ixgbe_free_transmit_buffers(txr);
754 ixgbe_dma_free(sc, &txr->txdma);
755 IXGBE_TX_LOCK_DESTROY(txr);
756 }
757 free(sc->tx_rings, M_DEVBUF);
758 } /* ixgbe_free_transmit_structures */
759
760 /************************************************************************
761 * ixgbe_free_transmit_buffers
762 *
763 * Free transmit ring related data structures.
764 ************************************************************************/
765 static void
766 ixgbe_free_transmit_buffers(struct tx_ring *txr)
767 {
768 struct ixgbe_softc *sc = txr->sc;
769 struct ixgbe_tx_buf *tx_buffer;
770 int i;
771
772 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
773
774 if (txr->tx_buffers == NULL)
775 return;
776
777 tx_buffer = txr->tx_buffers;
778 for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
779 if (tx_buffer->m_head != NULL) {
780 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
781 0, tx_buffer->m_head->m_pkthdr.len,
782 BUS_DMASYNC_POSTWRITE);
783 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
784 m_freem(tx_buffer->m_head);
785 tx_buffer->m_head = NULL;
786 if (tx_buffer->map != NULL) {
787 ixgbe_dmamap_destroy(txr->txtag,
788 tx_buffer->map);
789 tx_buffer->map = NULL;
790 }
791 } else if (tx_buffer->map != NULL) {
792 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
793 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
794 tx_buffer->map = NULL;
795 }
796 }
797 if (txr->txr_interq != NULL) {
798 struct mbuf *m;
799
800 while ((m = pcq_get(txr->txr_interq)) != NULL)
801 m_freem(m);
802 pcq_destroy(txr->txr_interq);
803 }
804 if (txr->tx_buffers != NULL) {
805 free(txr->tx_buffers, M_DEVBUF);
806 txr->tx_buffers = NULL;
807 }
808 if (txr->txtag != NULL) {
809 ixgbe_dma_tag_destroy(txr->txtag);
810 txr->txtag = NULL;
811 }
812 } /* ixgbe_free_transmit_buffers */
813
814 /************************************************************************
815 * ixgbe_tx_ctx_setup
816 *
817 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
818 ************************************************************************/
819 static int
820 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
821 u32 *cmd_type_len, u32 *olinfo_status)
822 {
823 struct ixgbe_softc *sc = txr->sc;
824 struct ixgbe_adv_tx_context_desc *TXD;
825 struct ether_vlan_header *eh;
826 #ifdef INET
827 struct ip *ip;
828 #endif
829 #ifdef INET6
830 struct ip6_hdr *ip6;
831 #endif
832 int ehdrlen, ip_hlen = 0;
833 int offload = TRUE;
834 int ctxd = txr->next_avail_desc;
835 u32 vlan_macip_lens = 0;
836 u32 type_tucmd_mlhl = 0;
837 u16 vtag = 0;
838 u16 etype;
839 u8 ipproto = 0;
840 char *l3d;
841
842 /* First check if TSO is to be used */
843 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
844 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
845
846 if (rv != 0)
847 IXGBE_EVC_ADD(&sc->tso_err, 1);
848 return rv;
849 }
850
851 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
852 offload = FALSE;
853
854 /* Indicate the whole packet as payload when not doing TSO */
855 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
856
857 /*
858 * In advanced descriptors the vlan tag must
859 * be placed into the context descriptor. Hence
860 * we need to make one even if not doing offloads.
861 */
862 if (vlan_has_tag(mp)) {
863 vtag = htole16(vlan_get_tag(mp));
864 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
865 } else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
866 (offload == FALSE))
867 return (0);
868
869 /*
870 * Determine where frame payload starts.
871 * Jump over vlan headers if already present,
872 * helpful for QinQ too.
873 */
874 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
875 eh = mtod(mp, struct ether_vlan_header *);
876 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
877 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
878 etype = ntohs(eh->evl_proto);
879 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
880 } else {
881 etype = ntohs(eh->evl_encap_proto);
882 ehdrlen = ETHER_HDR_LEN;
883 }
884
885 /* Set the ether header length */
886 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
887
888 if (offload == FALSE)
889 goto no_offloads;
890
891 /*
892 * If the first mbuf only includes the ethernet header,
893 * jump to the next one
894 * XXX: This assumes the stack splits mbufs containing headers
895 * on header boundaries
896 * XXX: And assumes the entire IP header is contained in one mbuf
897 */
898 if (mp->m_len == ehdrlen && mp->m_next)
899 l3d = mtod(mp->m_next, char *);
900 else
901 l3d = mtod(mp, char *) + ehdrlen;
902
903 switch (etype) {
904 #ifdef INET
905 case ETHERTYPE_IP:
906 ip = (struct ip *)(l3d);
907 ip_hlen = ip->ip_hl << 2;
908 ipproto = ip->ip_p;
909 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
910 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
911 ip->ip_sum == 0);
912 break;
913 #endif
914 #ifdef INET6
915 case ETHERTYPE_IPV6:
916 ip6 = (struct ip6_hdr *)(l3d);
917 ip_hlen = sizeof(struct ip6_hdr);
918 ipproto = ip6->ip6_nxt;
919 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
920 break;
921 #endif
922 default:
923 offload = false;
924 break;
925 }
926
927 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
928 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
929
930 vlan_macip_lens |= ip_hlen;
931
932 /* No support for offloads for non-L4 next headers */
933 switch (ipproto) {
934 case IPPROTO_TCP:
935 if (mp->m_pkthdr.csum_flags &
936 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
937 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
938 else
939 offload = false;
940 break;
941 case IPPROTO_UDP:
942 if (mp->m_pkthdr.csum_flags &
943 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
944 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
945 else
946 offload = false;
947 break;
948 default:
949 offload = false;
950 break;
951 }
952
953 if (offload) /* Insert L4 checksum into data descriptors */
954 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
955
956 no_offloads:
957 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
958
959 /* Now ready a context descriptor */
960 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
961
962 /* Now copy bits into descriptor */
963 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
964 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
965 TXD->seqnum_seed = htole32(0);
966 TXD->mss_l4len_idx = htole32(0);
967
968 /* We've consumed the first desc, adjust counters */
969 if (++ctxd == txr->num_desc)
970 ctxd = 0;
971 txr->next_avail_desc = ctxd;
972 --txr->tx_avail;
973
974 return (0);
975 } /* ixgbe_tx_ctx_setup */
976
977 /************************************************************************
978 * ixgbe_tso_setup
979 *
980 * Setup work for hardware segmentation offload (TSO) on
981 * adapters using advanced tx descriptors
982 ************************************************************************/
983 static int
984 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
985 u32 *olinfo_status)
986 {
987 struct ixgbe_adv_tx_context_desc *TXD;
988 struct ether_vlan_header *eh;
989 #ifdef INET6
990 struct ip6_hdr *ip6;
991 #endif
992 #ifdef INET
993 struct ip *ip;
994 #endif
995 struct tcphdr *th;
996 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
997 u32 vlan_macip_lens = 0;
998 u32 type_tucmd_mlhl = 0;
999 u32 mss_l4len_idx = 0, paylen;
1000 u16 vtag = 0, eh_type;
1001
1002 /*
1003 * Determine where frame payload starts.
1004 * Jump over vlan headers if already present
1005 */
1006 eh = mtod(mp, struct ether_vlan_header *);
1007 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1008 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1009 eh_type = eh->evl_proto;
1010 } else {
1011 ehdrlen = ETHER_HDR_LEN;
1012 eh_type = eh->evl_encap_proto;
1013 }
1014
1015 switch (ntohs(eh_type)) {
1016 #ifdef INET
1017 case ETHERTYPE_IP:
1018 ip = (struct ip *)(mp->m_data + ehdrlen);
1019 if (ip->ip_p != IPPROTO_TCP)
1020 return (ENXIO);
1021 ip->ip_sum = 0;
1022 ip_hlen = ip->ip_hl << 2;
1023 th = (struct tcphdr *)((char *)ip + ip_hlen);
1024 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1025 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1026 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1027 /* Tell transmit desc to also do IPv4 checksum. */
1028 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1029 break;
1030 #endif
1031 #ifdef INET6
1032 case ETHERTYPE_IPV6:
1033 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1034 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1035 if (ip6->ip6_nxt != IPPROTO_TCP)
1036 return (ENXIO);
1037 ip_hlen = sizeof(struct ip6_hdr);
1038 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1039 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1040 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1041 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1042 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1043 break;
1044 #endif
1045 default:
1046 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1047 __func__, ntohs(eh_type));
1048 break;
1049 }
1050
1051 ctxd = txr->next_avail_desc;
1052 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1053
1054 tcp_hlen = th->th_off << 2;
1055
1056 /* This is used in the transmit desc in encap */
1057 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1058
1059 /* VLAN MACLEN IPLEN */
1060 if (vlan_has_tag(mp)) {
1061 vtag = htole16(vlan_get_tag(mp));
1062 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1063 }
1064
1065 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1066 vlan_macip_lens |= ip_hlen;
1067 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1068
1069 /* ADV DTYPE TUCMD */
1070 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1071 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1072 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1073
1074 /* MSS L4LEN IDX */
1075 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1076 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1077 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1078
1079 TXD->seqnum_seed = htole32(0);
1080
1081 if (++ctxd == txr->num_desc)
1082 ctxd = 0;
1083
1084 txr->tx_avail--;
1085 txr->next_avail_desc = ctxd;
1086 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1087 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1088 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1089 IXGBE_EVC_ADD(&txr->tso_tx, 1);
1090
1091 return (0);
1092 } /* ixgbe_tso_setup */
1093
1094
1095 /************************************************************************
1096 * ixgbe_txeof
1097 *
1098 * Examine each tx_buffer in the used queue. If the hardware is done
1099 * processing the packet then free associated resources. The
1100 * tx_buffer is put back on the free queue.
1101 ************************************************************************/
1102 bool
1103 ixgbe_txeof(struct tx_ring *txr)
1104 {
1105 struct ixgbe_softc *sc = txr->sc;
1106 struct ifnet *ifp = sc->ifp;
1107 struct ixgbe_tx_buf *buf;
1108 union ixgbe_adv_tx_desc *txd;
1109 u32 work, processed = 0;
1110 u32 limit = sc->tx_process_limit;
1111 u16 avail;
1112
1113 KASSERT(mutex_owned(&txr->tx_mtx));
1114
1115 #ifdef DEV_NETMAP
1116 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
1117 (sc->ifp->if_capenable & IFCAP_NETMAP)) {
1118 struct netmap_sc *na = NA(sc->ifp);
1119 struct netmap_kring *kring = na->tx_rings[txr->me];
1120 txd = txr->tx_base;
1121 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1122 BUS_DMASYNC_POSTREAD);
1123 /*
1124 * In netmap mode, all the work is done in the context
1125 * of the client thread. Interrupt handlers only wake up
1126 * clients, which may be sleeping on individual rings
1127 * or on a global resource for all rings.
1128 * To implement tx interrupt mitigation, we wake up the client
1129 * thread roughly every half ring, even if the NIC interrupts
1130 * more frequently. This is implemented as follows:
1131 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1132 * the slot that should wake up the thread (nkr_num_slots
1133 * means the user thread should not be woken up);
1134 * - the driver ignores tx interrupts unless netmap_mitigate=0
1135 * or the slot has the DD bit set.
1136 */
1137 if (kring->nr_kflags < kring->nkr_num_slots &&
1138 le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1139 netmap_tx_irq(ifp, txr->me);
1140 }
1141 return false;
1142 }
1143 #endif /* DEV_NETMAP */
1144
1145 if (txr->tx_avail == txr->num_desc) {
1146 txr->busy = 0;
1147 return false;
1148 }
1149
1150 /* Get work starting point */
1151 work = txr->next_to_clean;
1152 buf = &txr->tx_buffers[work];
1153 txd = &txr->tx_base[work];
1154 work -= txr->num_desc; /* The distance to ring end */
1155 avail = txr->tx_avail;
1156 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1157 BUS_DMASYNC_POSTREAD);
1158
1159 do {
1160 union ixgbe_adv_tx_desc *eop = buf->eop;
1161 if (eop == NULL) /* No work */
1162 break;
1163
1164 if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1165 break; /* I/O not complete */
1166
1167 if (buf->m_head) {
1168 txr->bytes += buf->m_head->m_pkthdr.len;
1169 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1170 0, buf->m_head->m_pkthdr.len,
1171 BUS_DMASYNC_POSTWRITE);
1172 ixgbe_dmamap_unload(txr->txtag, buf->map);
1173 m_freem(buf->m_head);
1174 buf->m_head = NULL;
1175 }
1176 buf->eop = NULL;
1177 ++avail;
1178
1179 /* We clean the range if multi segment */
1180 while (txd != eop) {
1181 ++txd;
1182 ++buf;
1183 ++work;
1184 /* wrap the ring? */
1185 if (__predict_false(!work)) {
1186 work -= txr->num_desc;
1187 buf = txr->tx_buffers;
1188 txd = txr->tx_base;
1189 }
1190 if (buf->m_head) {
1191 txr->bytes +=
1192 buf->m_head->m_pkthdr.len;
1193 bus_dmamap_sync(txr->txtag->dt_dmat,
1194 buf->map,
1195 0, buf->m_head->m_pkthdr.len,
1196 BUS_DMASYNC_POSTWRITE);
1197 ixgbe_dmamap_unload(txr->txtag,
1198 buf->map);
1199 m_freem(buf->m_head);
1200 buf->m_head = NULL;
1201 }
1202 ++avail;
1203 buf->eop = NULL;
1204
1205 }
1206 ++processed;
1207
1208 /* Try the next packet */
1209 ++txd;
1210 ++buf;
1211 ++work;
1212 /* reset with a wrap */
1213 if (__predict_false(!work)) {
1214 work -= txr->num_desc;
1215 buf = txr->tx_buffers;
1216 txd = txr->tx_base;
1217 }
1218 prefetch(txd);
1219 } while (__predict_true(--limit));
1220
1221 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1222 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1223
1224 work += txr->num_desc;
1225 txr->next_to_clean = work;
1226 if (processed) {
1227 txr->tx_avail = avail;
1228 txr->txr_no_space = false;
1229 txr->packets += processed;
1230 if_statadd(ifp, if_opackets, processed);
1231 }
1232
1233 /*
1234 * Queue Hang detection, we know there's
1235 * work outstanding or the first return
1236 * would have been taken, so increment busy
1237 * if nothing managed to get cleaned, then
1238 * in local_timer it will be checked and
1239 * marked as HUNG if it exceeds a MAX attempt.
1240 */
1241 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1242 ++txr->busy;
1243 /*
1244 * If anything gets cleaned we reset state to 1,
1245 * note this will turn off HUNG if its set.
1246 */
1247 if (processed)
1248 txr->busy = 1;
1249
1250 if (txr->tx_avail == txr->num_desc)
1251 txr->busy = 0;
1252
1253 return ((limit > 0) ? false : true);
1254 } /* ixgbe_txeof */
1255
1256 /************************************************************************
1257 * ixgbe_rsc_count
1258 *
1259 * Used to detect a descriptor that has been merged by Hardware RSC.
1260 ************************************************************************/
1261 static inline u32
1262 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1263 {
1264 return (le32toh(rx->wb.lower.lo_dword.data) &
1265 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1266 } /* ixgbe_rsc_count */
1267
1268 /************************************************************************
1269 * ixgbe_setup_hw_rsc
1270 *
1271 * Initialize Hardware RSC (LRO) feature on 82599
1272 * for an RX ring, this is toggled by the LRO capability
1273 * even though it is transparent to the stack.
1274 *
1275 * NOTE: Since this HW feature only works with IPv4 and
1276 * testing has shown soft LRO to be as effective,
1277 * this feature will be disabled by default.
1278 ************************************************************************/
1279 static void
1280 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1281 {
1282 struct ixgbe_softc *sc = rxr->sc;
1283 struct ixgbe_hw *hw = &sc->hw;
1284 u32 rscctrl, rdrxctl;
1285
1286 /* If turning LRO/RSC off we need to disable it */
1287 if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
1288 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1289 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1290 return;
1291 }
1292
1293 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1294 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1295 #ifdef DEV_NETMAP
1296 /* Always strip CRC unless Netmap disabled it */
1297 if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
1298 !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
1299 ix_crcstrip)
1300 #endif /* DEV_NETMAP */
1301 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1302 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1303 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1304
1305 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1306 rscctrl |= IXGBE_RSCCTL_RSCEN;
1307 /*
1308 * Limit the total number of descriptors that
1309 * can be combined, so it does not exceed 64K
1310 */
1311 if (rxr->mbuf_sz == MCLBYTES)
1312 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1313 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1314 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1315 else if (rxr->mbuf_sz == MJUM9BYTES)
1316 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1317 else /* Using 16K cluster */
1318 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1319
1320 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1321
1322 /* Enable TCP header recognition */
1323 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1324 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1325
1326 /* Disable RSC for ACK packets */
1327 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1328 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1329
1330 rxr->hw_rsc = TRUE;
1331 } /* ixgbe_setup_hw_rsc */
1332
1333 /************************************************************************
1334 * ixgbe_refresh_mbufs
1335 *
1336 * Refresh mbuf buffers for RX descriptor rings
1337 * - now keeps its own state so discards due to resource
1338 * exhaustion are unnecessary, if an mbuf cannot be obtained
1339 * it just returns, keeping its placeholder, thus it can simply
1340 * be recalled to try again.
1341 ************************************************************************/
1342 static void
1343 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1344 {
1345 struct ixgbe_softc *sc = rxr->sc;
1346 struct ixgbe_rx_buf *rxbuf;
1347 struct mbuf *mp;
1348 int i, error;
1349 bool refreshed = false;
1350
1351 i = rxr->next_to_refresh;
1352 /* next_to_refresh points to the previous one */
1353 if (++i == rxr->num_desc)
1354 i = 0;
1355
1356 while (i != limit) {
1357 rxbuf = &rxr->rx_buffers[i];
1358 if (__predict_false(rxbuf->buf == NULL)) {
1359 mp = ixgbe_getcl();
1360 if (mp == NULL) {
1361 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1362 goto update;
1363 }
1364 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1365 IXGBE_M_ADJ(sc, rxr, mp);
1366 } else
1367 mp = rxbuf->buf;
1368
1369 /* If we're dealing with an mbuf that was copied rather
1370 * than replaced, there's no need to go through busdma.
1371 */
1372 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1373 /* Get the memory mapping */
1374 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1375 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1376 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1377 if (__predict_false(error != 0)) {
1378 device_printf(sc->dev, "Refresh mbufs: "
1379 "payload dmamap load failure - %d\n",
1380 error);
1381 m_free(mp);
1382 rxbuf->buf = NULL;
1383 goto update;
1384 }
1385 rxbuf->buf = mp;
1386 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1387 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1388 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1389 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1390 } else {
1391 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1392 rxbuf->flags &= ~IXGBE_RX_COPY;
1393 }
1394
1395 refreshed = true;
1396 /* next_to_refresh points to the previous one */
1397 rxr->next_to_refresh = i;
1398 if (++i == rxr->num_desc)
1399 i = 0;
1400 }
1401
1402 update:
1403 if (refreshed) /* Update hardware tail index */
1404 IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
1405
1406 return;
1407 } /* ixgbe_refresh_mbufs */
1408
1409 /************************************************************************
1410 * ixgbe_allocate_receive_buffers
1411 *
1412 * Allocate memory for rx_buffer structures. Since we use one
1413 * rx_buffer per received packet, the maximum number of rx_buffer's
1414 * that we'll need is equal to the number of receive descriptors
1415 * that we've allocated.
1416 ************************************************************************/
1417 static int
1418 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1419 {
1420 struct ixgbe_softc *sc = rxr->sc;
1421 device_t dev = sc->dev;
1422 struct ixgbe_rx_buf *rxbuf;
1423 int bsize, error;
1424
1425 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1426 rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
1427
1428 error = ixgbe_dma_tag_create(
1429 /* parent */ sc->osdep.dmat,
1430 /* alignment */ 1,
1431 /* bounds */ 0,
1432 /* maxsize */ MJUM16BYTES,
1433 /* nsegments */ 1,
1434 /* maxsegsize */ MJUM16BYTES,
1435 /* flags */ 0,
1436 &rxr->ptag);
1437 if (error != 0) {
1438 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1439 goto fail;
1440 }
1441
1442 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1443 rxbuf = &rxr->rx_buffers[i];
1444 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1445 if (error) {
1446 aprint_error_dev(dev, "Unable to create RX dma map\n");
1447 goto fail;
1448 }
1449 }
1450
1451 return (0);
1452
1453 fail:
1454 /* Frees all, but can handle partial completion */
1455 ixgbe_free_receive_structures(sc);
1456
1457 return (error);
1458 } /* ixgbe_allocate_receive_buffers */
1459
1460 /************************************************************************
1461 * ixgbe_free_receive_ring
1462 ************************************************************************/
1463 static void
1464 ixgbe_free_receive_ring(struct rx_ring *rxr)
1465 {
1466 for (int i = 0; i < rxr->num_desc; i++) {
1467 ixgbe_rx_discard(rxr, i);
1468 }
1469 } /* ixgbe_free_receive_ring */
1470
1471 /************************************************************************
1472 * ixgbe_setup_receive_ring
1473 *
1474 * Initialize a receive ring and its buffers.
1475 ************************************************************************/
1476 static int
1477 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1478 {
1479 struct ixgbe_softc *sc;
1480 struct ixgbe_rx_buf *rxbuf;
1481 #ifdef LRO
1482 struct ifnet *ifp;
1483 struct lro_ctrl *lro = &rxr->lro;
1484 #endif /* LRO */
1485 #ifdef DEV_NETMAP
1486 struct netmap_sc *na = NA(rxr->sc->ifp);
1487 struct netmap_slot *slot;
1488 #endif /* DEV_NETMAP */
1489 int rsize, error = 0;
1490
1491 sc = rxr->sc;
1492 #ifdef LRO
1493 ifp = sc->ifp;
1494 #endif /* LRO */
1495
1496 /* Clear the ring contents */
1497 IXGBE_RX_LOCK(rxr);
1498
1499 #ifdef DEV_NETMAP
1500 if (sc->feat_en & IXGBE_FEATURE_NETMAP)
1501 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1502 #endif /* DEV_NETMAP */
1503
1504 rsize = roundup2(sc->num_rx_desc *
1505 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1506 bzero((void *)rxr->rx_base, rsize);
1507 /* Cache the size */
1508 rxr->mbuf_sz = sc->rx_mbuf_sz;
1509
1510 /* Free current RX buffer structs and their mbufs */
1511 ixgbe_free_receive_ring(rxr);
1512
1513 /* Now replenish the mbufs */
1514 for (int i = 0; i < rxr->num_desc; i++) {
1515 struct mbuf *mp;
1516
1517 rxbuf = &rxr->rx_buffers[i];
1518
1519 #ifdef DEV_NETMAP
1520 /*
1521 * In netmap mode, fill the map and set the buffer
1522 * address in the NIC ring, considering the offset
1523 * between the netmap and NIC rings (see comment in
1524 * ixgbe_setup_transmit_ring() ). No need to allocate
1525 * an mbuf, so end the block with a continue;
1526 */
1527 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1528 int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
1529 uint64_t paddr;
1530 void *addr;
1531
1532 addr = PNMB(na, slot + sj, &paddr);
1533 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1534 /* Update descriptor and the cached value */
1535 rxr->rx_base[i].read.pkt_addr = htole64(paddr);
1536 rxbuf->addr = htole64(paddr);
1537 continue;
1538 }
1539 #endif /* DEV_NETMAP */
1540
1541 rxbuf->flags = 0;
1542 rxbuf->buf = ixgbe_getcl();
1543 if (rxbuf->buf == NULL) {
1544 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1545 error = ENOBUFS;
1546 goto fail;
1547 }
1548 mp = rxbuf->buf;
1549 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1550 IXGBE_M_ADJ(sc, rxr, mp);
1551 /* Get the memory mapping */
1552 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1553 mp, BUS_DMA_NOWAIT);
1554 if (error != 0) {
1555 /*
1556 * Clear this entry for later cleanup in
1557 * ixgbe_discard() which is called via
1558 * ixgbe_free_receive_ring().
1559 */
1560 m_freem(mp);
1561 rxbuf->buf = NULL;
1562 goto fail;
1563 }
1564 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1565 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1566 /* Update the descriptor and the cached value */
1567 rxr->rx_base[i].read.pkt_addr =
1568 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1569 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1570 }
1571
1572 /* Setup our descriptor indices */
1573 rxr->next_to_check = 0;
1574 rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
1575 rxr->lro_enabled = FALSE;
1576 rxr->discard_multidesc = false;
1577 IXGBE_EVC_STORE(&rxr->rx_copies, 0);
1578 #if 0 /* NetBSD */
1579 IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
1580 #if 1 /* Fix inconsistency */
1581 IXGBE_EVC_STORE(&rxr->rx_packets, 0);
1582 #endif
1583 #endif
1584 rxr->vtag_strip = FALSE;
1585
1586 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1587 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1588
1589 /*
1590 * Now set up the LRO interface
1591 */
1592 if (ixgbe_rsc_enable)
1593 ixgbe_setup_hw_rsc(rxr);
1594 #ifdef LRO
1595 else if (ifp->if_capenable & IFCAP_LRO) {
1596 device_t dev = sc->dev;
1597 int err = tcp_lro_init(lro);
1598 if (err) {
1599 device_printf(dev, "LRO Initialization failed!\n");
1600 goto fail;
1601 }
1602 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1603 rxr->lro_enabled = TRUE;
1604 lro->ifp = sc->ifp;
1605 }
1606 #endif /* LRO */
1607
1608 IXGBE_RX_UNLOCK(rxr);
1609
1610 return (0);
1611
1612 fail:
1613 ixgbe_free_receive_ring(rxr);
1614 IXGBE_RX_UNLOCK(rxr);
1615
1616 return (error);
1617 } /* ixgbe_setup_receive_ring */
1618
1619 /************************************************************************
1620 * ixgbe_setup_receive_structures - Initialize all receive rings.
1621 ************************************************************************/
1622 int
1623 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
1624 {
1625 struct rx_ring *rxr = sc->rx_rings;
1626 int j;
1627
1628 INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1629 for (j = 0; j < sc->num_queues; j++, rxr++)
1630 if (ixgbe_setup_receive_ring(rxr))
1631 goto fail;
1632
1633 return (0);
1634 fail:
1635 /*
1636 * Free RX buffers allocated so far, we will only handle
1637 * the rings that completed, the failing case will have
1638 * cleaned up for itself. 'j' failed, so its the terminus.
1639 */
1640 for (int i = 0; i < j; ++i) {
1641 rxr = &sc->rx_rings[i];
1642 IXGBE_RX_LOCK(rxr);
1643 ixgbe_free_receive_ring(rxr);
1644 IXGBE_RX_UNLOCK(rxr);
1645 }
1646
1647 return (ENOBUFS);
1648 } /* ixgbe_setup_receive_structures */
1649
1650
1651 /************************************************************************
1652 * ixgbe_free_receive_structures - Free all receive rings.
1653 ************************************************************************/
1654 void
1655 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
1656 {
1657 struct rx_ring *rxr = sc->rx_rings;
1658
1659 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1660
1661 for (int i = 0; i < sc->num_queues; i++, rxr++) {
1662 ixgbe_free_receive_buffers(rxr);
1663 #ifdef LRO
1664 /* Free LRO memory */
1665 tcp_lro_free(&rxr->lro);
1666 #endif /* LRO */
1667 /* Free the ring memory as well */
1668 ixgbe_dma_free(sc, &rxr->rxdma);
1669 IXGBE_RX_LOCK_DESTROY(rxr);
1670 }
1671
1672 free(sc->rx_rings, M_DEVBUF);
1673 } /* ixgbe_free_receive_structures */
1674
1675
1676 /************************************************************************
1677 * ixgbe_free_receive_buffers - Free receive ring data structures
1678 ************************************************************************/
1679 static void
1680 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1681 {
1682 struct ixgbe_softc *sc = rxr->sc;
1683 struct ixgbe_rx_buf *rxbuf;
1684
1685 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1686
1687 /* Cleanup any existing buffers */
1688 if (rxr->rx_buffers != NULL) {
1689 for (int i = 0; i < sc->num_rx_desc; i++) {
1690 rxbuf = &rxr->rx_buffers[i];
1691 ixgbe_rx_discard(rxr, i);
1692 if (rxbuf->pmap != NULL) {
1693 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1694 rxbuf->pmap = NULL;
1695 }
1696 }
1697
1698 if (rxr->rx_buffers != NULL) {
1699 free(rxr->rx_buffers, M_DEVBUF);
1700 rxr->rx_buffers = NULL;
1701 }
1702 }
1703
1704 if (rxr->ptag != NULL) {
1705 ixgbe_dma_tag_destroy(rxr->ptag);
1706 rxr->ptag = NULL;
1707 }
1708
1709 return;
1710 } /* ixgbe_free_receive_buffers */
1711
1712 /************************************************************************
1713 * ixgbe_rx_input
1714 ************************************************************************/
1715 static __inline void
1716 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1717 u32 ptype)
1718 {
1719 struct ixgbe_softc *sc = ifp->if_softc;
1720
1721 #ifdef LRO
1722 struct ethercom *ec = &sc->osdep.ec;
1723
1724 /*
1725 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1726 * should be computed by hardware. Also it should not have VLAN tag in
1727 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1728 */
1729 if (rxr->lro_enabled &&
1730 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1731 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1732 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1733 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1734 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1735 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1736 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1737 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1738 /*
1739 * Send to the stack if:
1740 * - LRO not enabled, or
1741 * - no LRO resources, or
1742 * - lro enqueue fails
1743 */
1744 if (rxr->lro.lro_cnt != 0)
1745 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1746 return;
1747 }
1748 #endif /* LRO */
1749
1750 if_percpuq_enqueue(sc->ipq, m);
1751 } /* ixgbe_rx_input */
1752
1753 /************************************************************************
1754 * ixgbe_rx_discard
1755 ************************************************************************/
1756 static __inline void
1757 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1758 {
1759 struct ixgbe_rx_buf *rbuf;
1760
1761 rbuf = &rxr->rx_buffers[i];
1762
1763 /*
1764 * With advanced descriptors the writeback clobbers the buffer addrs,
1765 * so its easier to just free the existing mbufs and take the normal
1766 * refresh path to get new buffers and mapping.
1767 */
1768
1769 if (rbuf->fmp != NULL) {/* Partial chain ? */
1770 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1771 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1772 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1773 m_freem(rbuf->fmp);
1774 rbuf->fmp = NULL;
1775 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1776 } else if (rbuf->buf) {
1777 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1778 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1779 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1780 m_free(rbuf->buf);
1781 rbuf->buf = NULL;
1782 }
1783
1784 rbuf->flags = 0;
1785
1786 return;
1787 } /* ixgbe_rx_discard */
1788
1789
1790 /************************************************************************
1791 * ixgbe_rxeof
1792 *
1793 * Executes in interrupt context. It replenishes the
1794 * mbufs in the descriptor and sends data which has
1795 * been dma'ed into host memory to upper layer.
1796 *
1797 * Return TRUE for more work, FALSE for all clean.
1798 ************************************************************************/
1799 bool
1800 ixgbe_rxeof(struct ix_queue *que)
1801 {
1802 struct ixgbe_softc *sc = que->sc;
1803 struct rx_ring *rxr = que->rxr;
1804 struct ifnet *ifp = sc->ifp;
1805 #ifdef LRO
1806 struct lro_ctrl *lro = &rxr->lro;
1807 #endif /* LRO */
1808 union ixgbe_adv_rx_desc *cur;
1809 struct ixgbe_rx_buf *rbuf, *nbuf;
1810 int i, nextp, processed = 0;
1811 u32 staterr = 0;
1812 u32 loopcount = 0, numdesc;
1813 u32 limit = sc->rx_process_limit;
1814 u32 rx_copy_len = sc->rx_copy_len;
1815 bool discard_multidesc = rxr->discard_multidesc;
1816 bool wraparound = false;
1817 unsigned int syncremain;
1818 #ifdef RSS
1819 u16 pkt_info;
1820 #endif
1821
1822 IXGBE_RX_LOCK(rxr);
1823
1824 #ifdef DEV_NETMAP
1825 if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
1826 /* Same as the txeof routine: wakeup clients on intr. */
1827 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1828 IXGBE_RX_UNLOCK(rxr);
1829 return (FALSE);
1830 }
1831 }
1832 #endif /* DEV_NETMAP */
1833
1834 /* Sync the ring. The size is rx_process_limit or the first half */
1835 if ((rxr->next_to_check + limit) <= rxr->num_desc) {
1836 /* Non-wraparound */
1837 numdesc = limit;
1838 syncremain = 0;
1839 } else {
1840 /* Wraparound. Sync the first half. */
1841 numdesc = rxr->num_desc - rxr->next_to_check;
1842
1843 /* Set the size of the last half */
1844 syncremain = limit - numdesc;
1845 }
1846 bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1847 rxr->rxdma.dma_map,
1848 sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
1849 sizeof(union ixgbe_adv_rx_desc) * numdesc,
1850 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1851
1852 /*
1853 * The max number of loop is rx_process_limit. If discard_multidesc is
1854 * true, continue processing to not to send broken packet to the upper
1855 * layer.
1856 */
1857 for (i = rxr->next_to_check;
1858 (loopcount < limit) || (discard_multidesc == true);) {
1859
1860 struct mbuf *sendmp, *mp;
1861 struct mbuf *newmp;
1862 u32 rsc, ptype;
1863 u16 len;
1864 u16 vtag = 0;
1865 bool eop;
1866 bool discard = false;
1867
1868 if (wraparound) {
1869 /* Sync the last half. */
1870 KASSERT(syncremain != 0);
1871 numdesc = syncremain;
1872 wraparound = false;
1873 } else if (__predict_false(loopcount >= limit)) {
1874 KASSERT(discard_multidesc == true);
1875 numdesc = 1;
1876 } else
1877 numdesc = 0;
1878
1879 if (numdesc != 0)
1880 bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1881 rxr->rxdma.dma_map, 0,
1882 sizeof(union ixgbe_adv_rx_desc) * numdesc,
1883 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1884
1885 cur = &rxr->rx_base[i];
1886 staterr = le32toh(cur->wb.upper.status_error);
1887 #ifdef RSS
1888 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1889 #endif
1890
1891 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1892 break;
1893
1894 loopcount++;
1895 sendmp = newmp = NULL;
1896 nbuf = NULL;
1897 rsc = 0;
1898 cur->wb.upper.status_error = 0;
1899 rbuf = &rxr->rx_buffers[i];
1900 mp = rbuf->buf;
1901
1902 len = le16toh(cur->wb.upper.length);
1903 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1904 IXGBE_RXDADV_PKTTYPE_MASK;
1905 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1906
1907 /* Make sure bad packets are discarded */
1908 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1909 #if __FreeBSD_version >= 1100036
1910 if (sc->feat_en & IXGBE_FEATURE_VF)
1911 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1912 #endif
1913 IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
1914 ixgbe_rx_discard(rxr, i);
1915 discard_multidesc = false;
1916 goto next_desc;
1917 }
1918
1919 if (__predict_false(discard_multidesc))
1920 discard = true;
1921 else {
1922 /* Pre-alloc new mbuf. */
1923
1924 if ((rbuf->fmp == NULL) &&
1925 eop && (len <= rx_copy_len)) {
1926 /* For short packet. See below. */
1927 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1928 if (__predict_false(sendmp == NULL)) {
1929 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1930 discard = true;
1931 }
1932 } else {
1933 /* For long packet. */
1934 newmp = ixgbe_getcl();
1935 if (__predict_false(newmp == NULL)) {
1936 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1937 discard = true;
1938 }
1939 }
1940 }
1941
1942 if (__predict_false(discard)) {
1943 /*
1944 * Descriptor initialization is already done by the
1945 * above code (cur->wb.upper.status_error = 0).
1946 * So, we can reuse current rbuf->buf for new packet.
1947 *
1948 * Rewrite the buffer addr, see comment in
1949 * ixgbe_rx_discard().
1950 */
1951 cur->read.pkt_addr = rbuf->addr;
1952 m_freem(rbuf->fmp);
1953 rbuf->fmp = NULL;
1954 if (!eop) {
1955 /* Discard the entire packet. */
1956 discard_multidesc = true;
1957 } else
1958 discard_multidesc = false;
1959 goto next_desc;
1960 }
1961 discard_multidesc = false;
1962
1963 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1964 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1965
1966 /*
1967 * On 82599 which supports a hardware
1968 * LRO (called HW RSC), packets need
1969 * not be fragmented across sequential
1970 * descriptors, rather the next descriptor
1971 * is indicated in bits of the descriptor.
1972 * This also means that we might process
1973 * more than one packet at a time, something
1974 * that has never been true before, it
1975 * required eliminating global chain pointers
1976 * in favor of what we are doing here. -jfv
1977 */
1978 if (!eop) {
1979 /*
1980 * Figure out the next descriptor
1981 * of this frame.
1982 */
1983 if (rxr->hw_rsc == TRUE) {
1984 rsc = ixgbe_rsc_count(cur);
1985 rxr->rsc_num += (rsc - 1);
1986 }
1987 if (rsc) { /* Get hardware index */
1988 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1989 IXGBE_RXDADV_NEXTP_SHIFT);
1990 } else { /* Just sequential */
1991 nextp = i + 1;
1992 if (nextp == sc->num_rx_desc)
1993 nextp = 0;
1994 }
1995 nbuf = &rxr->rx_buffers[nextp];
1996 prefetch(nbuf);
1997 }
1998 /*
1999 * Rather than using the fmp/lmp global pointers
2000 * we now keep the head of a packet chain in the
2001 * buffer struct and pass this along from one
2002 * descriptor to the next, until we get EOP.
2003 */
2004 /*
2005 * See if there is a stored head
2006 * that determines what we are
2007 */
2008 if (rbuf->fmp != NULL) {
2009 /* Secondary frag */
2010 sendmp = rbuf->fmp;
2011
2012 /* Update new (used in future) mbuf */
2013 newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
2014 IXGBE_M_ADJ(sc, rxr, newmp);
2015 rbuf->buf = newmp;
2016 rbuf->fmp = NULL;
2017
2018 /* For secondary frag */
2019 mp->m_len = len;
2020 mp->m_flags &= ~M_PKTHDR;
2021
2022 /* For sendmp */
2023 sendmp->m_pkthdr.len += mp->m_len;
2024 } else {
2025 /*
2026 * It's the first segment of a multi descriptor
2027 * packet or a single segment which contains a full
2028 * packet.
2029 */
2030
2031 if (eop && (len <= rx_copy_len)) {
2032 /*
2033 * Optimize. This might be a small packet, may
2034 * be just a TCP ACK. Copy into a new mbuf, and
2035 * Leave the old mbuf+cluster for re-use.
2036 */
2037 sendmp->m_data += ETHER_ALIGN;
2038 memcpy(mtod(sendmp, void *),
2039 mtod(mp, void *), len);
2040 IXGBE_EVC_ADD(&rxr->rx_copies, 1);
2041 rbuf->flags |= IXGBE_RX_COPY;
2042 } else {
2043 /* For long packet */
2044
2045 /* Update new (used in future) mbuf */
2046 newmp->m_pkthdr.len = newmp->m_len
2047 = rxr->mbuf_sz;
2048 IXGBE_M_ADJ(sc, rxr, newmp);
2049 rbuf->buf = newmp;
2050 rbuf->fmp = NULL;
2051
2052 /* For sendmp */
2053 sendmp = mp;
2054 }
2055
2056 /* first desc of a non-ps chain */
2057 sendmp->m_pkthdr.len = sendmp->m_len = len;
2058 }
2059 ++processed;
2060
2061 /* Pass the head pointer on */
2062 if (eop == 0) {
2063 nbuf->fmp = sendmp;
2064 sendmp = NULL;
2065 mp->m_next = nbuf->buf;
2066 } else { /* Sending this frame */
2067 m_set_rcvif(sendmp, ifp);
2068 ++rxr->packets;
2069 IXGBE_EVC_ADD(&rxr->rx_packets, 1);
2070 /* capture data for AIM */
2071 rxr->bytes += sendmp->m_pkthdr.len;
2072 IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
2073 /* Process vlan info */
2074 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2075 vtag = le16toh(cur->wb.upper.vlan);
2076 if (vtag) {
2077 vlan_set_tag(sendmp, vtag);
2078 }
2079 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2080 ixgbe_rx_checksum(staterr, sendmp, ptype,
2081 &sc->stats.pf);
2082 }
2083
2084 #if 0 /* FreeBSD */
2085 /*
2086 * In case of multiqueue, we have RXCSUM.PCSD bit set
2087 * and never cleared. This means we have RSS hash
2088 * available to be used.
2089 */
2090 if (sc->num_queues > 1) {
2091 sendmp->m_pkthdr.flowid =
2092 le32toh(cur->wb.lower.hi_dword.rss);
2093 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2094 case IXGBE_RXDADV_RSSTYPE_IPV4:
2095 M_HASHTYPE_SET(sendmp,
2096 M_HASHTYPE_RSS_IPV4);
2097 break;
2098 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2099 M_HASHTYPE_SET(sendmp,
2100 M_HASHTYPE_RSS_TCP_IPV4);
2101 break;
2102 case IXGBE_RXDADV_RSSTYPE_IPV6:
2103 M_HASHTYPE_SET(sendmp,
2104 M_HASHTYPE_RSS_IPV6);
2105 break;
2106 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2107 M_HASHTYPE_SET(sendmp,
2108 M_HASHTYPE_RSS_TCP_IPV6);
2109 break;
2110 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2111 M_HASHTYPE_SET(sendmp,
2112 M_HASHTYPE_RSS_IPV6_EX);
2113 break;
2114 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2115 M_HASHTYPE_SET(sendmp,
2116 M_HASHTYPE_RSS_TCP_IPV6_EX);
2117 break;
2118 #if __FreeBSD_version > 1100000
2119 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2120 M_HASHTYPE_SET(sendmp,
2121 M_HASHTYPE_RSS_UDP_IPV4);
2122 break;
2123 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2124 M_HASHTYPE_SET(sendmp,
2125 M_HASHTYPE_RSS_UDP_IPV6);
2126 break;
2127 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2128 M_HASHTYPE_SET(sendmp,
2129 M_HASHTYPE_RSS_UDP_IPV6_EX);
2130 break;
2131 #endif
2132 default:
2133 M_HASHTYPE_SET(sendmp,
2134 M_HASHTYPE_OPAQUE_HASH);
2135 }
2136 } else {
2137 sendmp->m_pkthdr.flowid = que->msix;
2138 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2139 }
2140 #endif
2141 }
2142 next_desc:
2143 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2144 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2145
2146 /* Advance our pointers to the next descriptor. */
2147 if (++i == rxr->num_desc) {
2148 wraparound = true;
2149 i = 0;
2150 }
2151 rxr->next_to_check = i;
2152
2153 /* Now send to the stack or do LRO */
2154 if (sendmp != NULL)
2155 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2156
2157 /* Every 8 descriptors we go to refresh mbufs */
2158 if (processed == 8) {
2159 ixgbe_refresh_mbufs(rxr, i);
2160 processed = 0;
2161 }
2162 }
2163
2164 /* Save the current status */
2165 rxr->discard_multidesc = discard_multidesc;
2166
2167 /* Refresh any remaining buf structs */
2168 if (ixgbe_rx_unrefreshed(rxr))
2169 ixgbe_refresh_mbufs(rxr, i);
2170
2171 IXGBE_RX_UNLOCK(rxr);
2172
2173 #ifdef LRO
2174 /*
2175 * Flush any outstanding LRO work
2176 */
2177 tcp_lro_flush_all(lro);
2178 #endif /* LRO */
2179
2180 /*
2181 * Still have cleaning to do?
2182 */
2183 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2184 return (TRUE);
2185
2186 return (FALSE);
2187 } /* ixgbe_rxeof */
2188
2189
2190 /************************************************************************
2191 * ixgbe_rx_checksum
2192 *
2193 * Verify that the hardware indicated that the checksum is valid.
2194 * Inform the stack about the status of checksum so that stack
2195 * doesn't spend time verifying the checksum.
2196 ************************************************************************/
2197 static void
2198 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2199 struct ixgbe_hw_stats *stats)
2200 {
2201 u16 status = (u16)staterr;
2202 u8 errors = (u8)(staterr >> 24);
2203 #if 0
2204 bool sctp = false;
2205
2206 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2207 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2208 sctp = true;
2209 #endif
2210
2211 /* IPv4 checksum */
2212 if (status & IXGBE_RXD_STAT_IPCS) {
2213 IXGBE_EVC_ADD(&stats->ipcs, 1);
2214 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2215 /* IP Checksum Good */
2216 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2217 } else {
2218 IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
2219 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2220 }
2221 }
2222 /* TCP/UDP/SCTP checksum */
2223 if (status & IXGBE_RXD_STAT_L4CS) {
2224 IXGBE_EVC_ADD(&stats->l4cs, 1);
2225 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2226 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2227 mp->m_pkthdr.csum_flags |= type;
2228 } else {
2229 IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
2230 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2231 }
2232 }
2233 } /* ixgbe_rx_checksum */
2234
2235 /************************************************************************
2236 * ixgbe_dma_malloc
2237 ************************************************************************/
2238 int
2239 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
2240 struct ixgbe_dma_alloc *dma, const int mapflags)
2241 {
2242 device_t dev = sc->dev;
2243 int r, rsegs;
2244
2245 r = ixgbe_dma_tag_create(
2246 /* parent */ sc->osdep.dmat,
2247 /* alignment */ DBA_ALIGN,
2248 /* bounds */ 0,
2249 /* maxsize */ size,
2250 /* nsegments */ 1,
2251 /* maxsegsize */ size,
2252 /* flags */ BUS_DMA_ALLOCNOW,
2253 &dma->dma_tag);
2254 if (r != 0) {
2255 aprint_error_dev(dev,
2256 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2257 r);
2258 goto fail_0;
2259 }
2260
2261 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2262 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2263 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2264 if (r != 0) {
2265 aprint_error_dev(dev,
2266 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2267 goto fail_1;
2268 }
2269
2270 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2271 size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2272 if (r != 0) {
2273 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2274 __func__, r);
2275 goto fail_2;
2276 }
2277
2278 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2279 if (r != 0) {
2280 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2281 __func__, r);
2282 goto fail_3;
2283 }
2284
2285 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2286 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2287 if (r != 0) {
2288 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2289 __func__, r);
2290 goto fail_4;
2291 }
2292 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2293 dma->dma_size = size;
2294 return 0;
2295 fail_4:
2296 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2297 fail_3:
2298 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2299 fail_2:
2300 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2301 fail_1:
2302 ixgbe_dma_tag_destroy(dma->dma_tag);
2303 fail_0:
2304
2305 return (r);
2306 } /* ixgbe_dma_malloc */
2307
2308 /************************************************************************
2309 * ixgbe_dma_free
2310 ************************************************************************/
2311 void
2312 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
2313 {
2314 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2315 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2316 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2317 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
2318 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2319 ixgbe_dma_tag_destroy(dma->dma_tag);
2320 } /* ixgbe_dma_free */
2321
2322
2323 /************************************************************************
2324 * ixgbe_allocate_queues
2325 *
2326 * Allocate memory for the transmit and receive rings, and then
2327 * the descriptors associated with each, called only once at attach.
2328 ************************************************************************/
2329 int
2330 ixgbe_allocate_queues(struct ixgbe_softc *sc)
2331 {
2332 device_t dev = sc->dev;
2333 struct ix_queue *que;
2334 struct tx_ring *txr;
2335 struct rx_ring *rxr;
2336 int rsize, tsize, error = IXGBE_SUCCESS;
2337 int txconf = 0, rxconf = 0;
2338
2339 /* First, allocate the top level queue structs */
2340 sc->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2341 sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2342
2343 /* Second, allocate the TX ring struct memory */
2344 sc->tx_rings = malloc(sizeof(struct tx_ring) *
2345 sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2346
2347 /* Third, allocate the RX ring */
2348 sc->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2349 sc->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2350
2351 /* For the ring itself */
2352 tsize = roundup2(sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2353 DBA_ALIGN);
2354
2355 /*
2356 * Now set up the TX queues, txconf is needed to handle the
2357 * possibility that things fail midcourse and we need to
2358 * undo memory gracefully
2359 */
2360 for (int i = 0; i < sc->num_queues; i++, txconf++) {
2361 /* Set up some basics */
2362 txr = &sc->tx_rings[i];
2363 txr->sc = sc;
2364 txr->txr_interq = NULL;
2365 /* In case SR-IOV is enabled, align the index properly */
2366 #ifdef PCI_IOV
2367 txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2368 i);
2369 #else
2370 txr->me = i;
2371 #endif
2372 txr->num_desc = sc->num_tx_desc;
2373
2374 /* Initialize the TX side lock */
2375 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2376
2377 if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
2378 BUS_DMA_NOWAIT)) {
2379 aprint_error_dev(dev,
2380 "Unable to allocate TX Descriptor memory\n");
2381 error = ENOMEM;
2382 goto err_tx_desc;
2383 }
2384 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2385 bzero((void *)txr->tx_base, tsize);
2386
2387 /* Now allocate transmit buffers for the ring */
2388 if (ixgbe_allocate_transmit_buffers(txr)) {
2389 aprint_error_dev(dev,
2390 "Critical Failure setting up transmit buffers\n");
2391 error = ENOMEM;
2392 goto err_tx_desc;
2393 }
2394 if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2395 /* Allocate a buf ring */
2396 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2397 if (txr->txr_interq == NULL) {
2398 aprint_error_dev(dev,
2399 "Critical Failure setting up buf ring\n");
2400 error = ENOMEM;
2401 goto err_tx_desc;
2402 }
2403 }
2404 }
2405
2406 /*
2407 * Next the RX queues...
2408 */
2409 rsize = roundup2(sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2410 DBA_ALIGN);
2411 for (int i = 0; i < sc->num_queues; i++, rxconf++) {
2412 rxr = &sc->rx_rings[i];
2413 /* Set up some basics */
2414 rxr->sc = sc;
2415 #ifdef PCI_IOV
2416 /* In case SR-IOV is enabled, align the index properly */
2417 rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2418 i);
2419 #else
2420 rxr->me = i;
2421 #endif
2422 rxr->num_desc = sc->num_rx_desc;
2423
2424 /* Initialize the RX side lock */
2425 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2426
2427 if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
2428 BUS_DMA_NOWAIT)) {
2429 aprint_error_dev(dev,
2430 "Unable to allocate RxDescriptor memory\n");
2431 error = ENOMEM;
2432 goto err_rx_desc;
2433 }
2434 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2435 bzero((void *)rxr->rx_base, rsize);
2436
2437 /* Allocate receive buffers for the ring */
2438 if (ixgbe_allocate_receive_buffers(rxr)) {
2439 aprint_error_dev(dev,
2440 "Critical Failure setting up receive buffers\n");
2441 error = ENOMEM;
2442 goto err_rx_desc;
2443 }
2444 }
2445
2446 /*
2447 * Finally set up the queue holding structs
2448 */
2449 for (int i = 0; i < sc->num_queues; i++) {
2450 que = &sc->queues[i];
2451 que->sc = sc;
2452 que->me = i;
2453 que->txr = &sc->tx_rings[i];
2454 que->rxr = &sc->rx_rings[i];
2455
2456 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2457 que->disabled_count = 0;
2458 }
2459
2460 return (0);
2461
2462 err_rx_desc:
2463 for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
2464 ixgbe_dma_free(sc, &rxr->rxdma);
2465 err_tx_desc:
2466 for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
2467 ixgbe_dma_free(sc, &txr->txdma);
2468 free(sc->rx_rings, M_DEVBUF);
2469 free(sc->tx_rings, M_DEVBUF);
2470 free(sc->queues, M_DEVBUF);
2471 return (error);
2472 } /* ixgbe_allocate_queues */
2473
2474 /************************************************************************
2475 * ixgbe_free_queues
2476 *
2477 * Free descriptors for the transmit and receive rings, and then
2478 * the memory associated with each.
2479 ************************************************************************/
2480 void
2481 ixgbe_free_queues(struct ixgbe_softc *sc)
2482 {
2483 struct ix_queue *que;
2484 int i;
2485
2486 ixgbe_free_transmit_structures(sc);
2487 ixgbe_free_receive_structures(sc);
2488 for (i = 0; i < sc->num_queues; i++) {
2489 que = &sc->queues[i];
2490 mutex_destroy(&que->dc_mtx);
2491 }
2492 free(sc->queues, M_DEVBUF);
2493 } /* ixgbe_free_queues */
2494