ix_txrx.c revision 1.114 1 /* $NetBSD: ix_txrx.c,v 1.114 2023/12/28 10:13:51 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include <sys/cdefs.h>
67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.114 2023/12/28 10:13:51 msaitoh Exp $");
68
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71
72 #include "ixgbe.h"
73
74 /*
75 * HW RSC control:
76 * this feature only works with
77 * IPv4, and only on 82599 and later.
78 * Also this will cause IP forwarding to
79 * fail and that can't be controlled by
80 * the stack as LRO can. For all these
81 * reasons I've deemed it best to leave
82 * this off and not bother with a tuneable
83 * interface, this would need to be compiled
84 * to enable.
85 */
86 static bool ixgbe_rsc_enable = FALSE;
87
88 #ifdef IXGBE_FDIR
89 /*
90 * For Flow Director: this is the
91 * number of TX packets we sample
92 * for the filter pool, this means
93 * every 20th packet will be probed.
94 *
95 * This feature can be disabled by
96 * setting this to 0.
97 */
98 static int atr_sample_rate = 20;
99 #endif
100
101 #define IXGBE_M_ADJ(sc, rxr, mp) \
102 if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN)) \
103 m_adj(mp, ETHER_ALIGN)
104
105 /************************************************************************
106 * Local Function prototypes
107 ************************************************************************/
108 static void ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int ixgbe_setup_receive_ring(struct rx_ring *);
111 static void ixgbe_free_receive_buffers(struct rx_ring *);
112 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
113 struct ixgbe_hw_stats *);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
116 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
117 static int ixgbe_tx_ctx_setup(struct tx_ring *,
118 struct mbuf *, u32 *, u32 *);
119 static int ixgbe_tso_setup(struct tx_ring *,
120 struct mbuf *, u32 *, u32 *);
121 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
122 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
123 struct mbuf *, u32);
124 static int ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
125 struct ixgbe_dma_alloc *, int);
126 static void ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
127
128 static void ixgbe_setup_hw_rsc(struct rx_ring *);
129
130 /************************************************************************
131 * ixgbe_legacy_start_locked - Transmit entry point
132 *
133 * Called by the stack to initiate a transmit.
134 * The driver will remain in this routine as long as there are
135 * packets to transmit and transmit resources are available.
136 * In case resources are not available, the stack is notified
137 * and the packet is requeued.
138 ************************************************************************/
139 int
140 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
141 {
142 int rc;
143 struct mbuf *m_head;
144 struct ixgbe_softc *sc = txr->sc;
145
146 IXGBE_TX_LOCK_ASSERT(txr);
147
148 if (sc->link_active != LINK_STATE_UP) {
149 /*
150 * discard all packets buffered in IFQ to avoid
151 * sending old packets at next link up timing.
152 */
153 ixgbe_drain(ifp, txr);
154 return (ENETDOWN);
155 }
156 if ((ifp->if_flags & IFF_RUNNING) == 0)
157 return (ENETDOWN);
158 if (txr->txr_no_space)
159 return (ENETDOWN);
160
161 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
162 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
163 break;
164
165 IFQ_POLL(&ifp->if_snd, m_head);
166 if (m_head == NULL)
167 break;
168
169 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
170 break;
171 }
172 IFQ_DEQUEUE(&ifp->if_snd, m_head);
173 if (rc != 0) {
174 m_freem(m_head);
175 continue;
176 }
177
178 /* Send a copy of the frame to the BPF listener */
179 bpf_mtap(ifp, m_head, BPF_D_OUT);
180 }
181
182 return IXGBE_SUCCESS;
183 } /* ixgbe_legacy_start_locked */
184
185 /************************************************************************
186 * ixgbe_legacy_start
187 *
188 * Called by the stack, this always uses the first tx ring,
189 * and should not be used with multiqueue tx enabled.
190 ************************************************************************/
191 void
192 ixgbe_legacy_start(struct ifnet *ifp)
193 {
194 struct ixgbe_softc *sc = ifp->if_softc;
195 struct tx_ring *txr = sc->tx_rings;
196
197 if (ifp->if_flags & IFF_RUNNING) {
198 IXGBE_TX_LOCK(txr);
199 ixgbe_legacy_start_locked(ifp, txr);
200 IXGBE_TX_UNLOCK(txr);
201 }
202 } /* ixgbe_legacy_start */
203
204 /************************************************************************
205 * ixgbe_mq_start - Multiqueue Transmit Entry Point
206 *
207 * (if_transmit function)
208 ************************************************************************/
209 int
210 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
211 {
212 struct ixgbe_softc *sc = ifp->if_softc;
213 struct tx_ring *txr;
214 int i;
215 #ifdef RSS
216 uint32_t bucket_id;
217 #endif
218
219 /*
220 * When doing RSS, map it to the same outbound queue
221 * as the incoming flow would be mapped to.
222 *
223 * If everything is setup correctly, it should be the
224 * same bucket that the current CPU we're on is.
225 */
226 #ifdef RSS
227 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
228 if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
229 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
230 &bucket_id) == 0)) {
231 i = bucket_id % sc->num_queues;
232 #ifdef IXGBE_DEBUG
233 if (bucket_id > sc->num_queues)
234 if_printf(ifp,
235 "bucket_id (%d) > num_queues (%d)\n",
236 bucket_id, sc->num_queues);
237 #endif
238 } else
239 i = m->m_pkthdr.flowid % sc->num_queues;
240 } else
241 #endif /* 0 */
242 i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
243
244 /* Check for a hung queue and pick alternative */
245 if (((1ULL << i) & sc->active_queues) == 0)
246 i = ffs64(sc->active_queues);
247
248 txr = &sc->tx_rings[i];
249
250 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
251 m_freem(m);
252 IXGBE_EVC_ADD(&txr->pcq_drops, 1);
253 return ENOBUFS;
254 }
255 #ifdef IXGBE_ALWAYS_TXDEFER
256 kpreempt_disable();
257 softint_schedule(txr->txr_si);
258 kpreempt_enable();
259 #else
260 if (IXGBE_TX_TRYLOCK(txr)) {
261 ixgbe_mq_start_locked(ifp, txr);
262 IXGBE_TX_UNLOCK(txr);
263 } else {
264 if (sc->txrx_use_workqueue) {
265 u_int *enqueued;
266
267 /*
268 * This function itself is not called in interrupt
269 * context, however it can be called in fast softint
270 * context right after receiving forwarding packets.
271 * So, it is required to protect workqueue from twice
272 * enqueuing when the machine uses both spontaneous
273 * packets and forwarding packets.
274 */
275 enqueued = percpu_getref(sc->txr_wq_enqueued);
276 if (*enqueued == 0) {
277 *enqueued = 1;
278 percpu_putref(sc->txr_wq_enqueued);
279 workqueue_enqueue(sc->txr_wq,
280 &txr->wq_cookie, curcpu());
281 } else
282 percpu_putref(sc->txr_wq_enqueued);
283 } else {
284 kpreempt_disable();
285 softint_schedule(txr->txr_si);
286 kpreempt_enable();
287 }
288 }
289 #endif
290
291 return (0);
292 } /* ixgbe_mq_start */
293
294 /************************************************************************
295 * ixgbe_mq_start_locked
296 ************************************************************************/
297 int
298 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
299 {
300 struct mbuf *next;
301 int enqueued = 0, err = 0;
302
303 if (txr->sc->link_active != LINK_STATE_UP) {
304 /*
305 * discard all packets buffered in txr_interq to avoid
306 * sending old packets at next link up timing.
307 */
308 ixgbe_drain(ifp, txr);
309 return (ENETDOWN);
310 }
311 if ((ifp->if_flags & IFF_RUNNING) == 0)
312 return (ENETDOWN);
313 if (txr->txr_no_space)
314 return (ENETDOWN);
315
316 /* Process the queue */
317 while ((next = pcq_get(txr->txr_interq)) != NULL) {
318 if ((err = ixgbe_xmit(txr, next)) != 0) {
319 m_freem(next);
320 /* All errors are counted in ixgbe_xmit() */
321 break;
322 }
323 enqueued++;
324 #if __FreeBSD_version >= 1100036
325 /*
326 * Since we're looking at the tx ring, we can check
327 * to see if we're a VF by examining our tail register
328 * address.
329 */
330 if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
331 (next->m_flags & M_MCAST))
332 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
333 #endif
334 /* Send a copy of the frame to the BPF listener */
335 bpf_mtap(ifp, next, BPF_D_OUT);
336 if ((ifp->if_flags & IFF_RUNNING) == 0)
337 break;
338 }
339
340 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
341 ixgbe_txeof(txr);
342
343 return (err);
344 } /* ixgbe_mq_start_locked */
345
346 /************************************************************************
347 * ixgbe_deferred_mq_start
348 *
349 * Called from a softint and workqueue (indirectly) to drain queued
350 * transmit packets.
351 ************************************************************************/
352 void
353 ixgbe_deferred_mq_start(void *arg)
354 {
355 struct tx_ring *txr = arg;
356 struct ixgbe_softc *sc = txr->sc;
357 struct ifnet *ifp = sc->ifp;
358
359 IXGBE_TX_LOCK(txr);
360 if (pcq_peek(txr->txr_interq) != NULL)
361 ixgbe_mq_start_locked(ifp, txr);
362 IXGBE_TX_UNLOCK(txr);
363 } /* ixgbe_deferred_mq_start */
364
365 /************************************************************************
366 * ixgbe_deferred_mq_start_work
367 *
368 * Called from a workqueue to drain queued transmit packets.
369 ************************************************************************/
370 void
371 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
372 {
373 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
374 struct ixgbe_softc *sc = txr->sc;
375 u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
376 *enqueued = 0;
377 percpu_putref(sc->txr_wq_enqueued);
378
379 ixgbe_deferred_mq_start(txr);
380 } /* ixgbe_deferred_mq_start */
381
382 /************************************************************************
383 * ixgbe_drain_all
384 ************************************************************************/
385 void
386 ixgbe_drain_all(struct ixgbe_softc *sc)
387 {
388 struct ifnet *ifp = sc->ifp;
389 struct ix_queue *que = sc->queues;
390
391 for (int i = 0; i < sc->num_queues; i++, que++) {
392 struct tx_ring *txr = que->txr;
393
394 IXGBE_TX_LOCK(txr);
395 ixgbe_drain(ifp, txr);
396 IXGBE_TX_UNLOCK(txr);
397 }
398 }
399
400 /************************************************************************
401 * ixgbe_xmit
402 *
403 * Maps the mbufs to tx descriptors, allowing the
404 * TX engine to transmit the packets.
405 *
406 * Return 0 on success, positive on failure
407 ************************************************************************/
408 static int
409 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
410 {
411 struct ixgbe_softc *sc = txr->sc;
412 struct ixgbe_tx_buf *txbuf;
413 union ixgbe_adv_tx_desc *txd = NULL;
414 struct ifnet *ifp = sc->ifp;
415 int i, j, error;
416 int first;
417 u32 olinfo_status = 0, cmd_type_len;
418 bool remap = TRUE;
419 bus_dmamap_t map;
420
421 /* Basic descriptor defines */
422 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
423 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
424
425 if (vlan_has_tag(m_head))
426 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
427
428 /*
429 * Important to capture the first descriptor
430 * used because it will contain the index of
431 * the one we tell the hardware to report back
432 */
433 first = txr->next_avail_desc;
434 txbuf = &txr->tx_buffers[first];
435 map = txbuf->map;
436
437 /*
438 * Map the packet for DMA.
439 */
440 retry:
441 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
442 BUS_DMA_NOWAIT);
443
444 if (__predict_false(error)) {
445 struct mbuf *m;
446
447 switch (error) {
448 case EAGAIN:
449 txr->q_eagain_tx_dma_setup++;
450 return EAGAIN;
451 case ENOMEM:
452 txr->q_enomem_tx_dma_setup++;
453 return EAGAIN;
454 case EFBIG:
455 /* Try it again? - one try */
456 if (remap == TRUE) {
457 remap = FALSE;
458 /*
459 * XXX: m_defrag will choke on
460 * non-MCLBYTES-sized clusters
461 */
462 txr->q_efbig_tx_dma_setup++;
463 m = m_defrag(m_head, M_NOWAIT);
464 if (m == NULL) {
465 txr->q_mbuf_defrag_failed++;
466 return ENOBUFS;
467 }
468 m_head = m;
469 goto retry;
470 } else {
471 txr->q_efbig2_tx_dma_setup++;
472 return error;
473 }
474 case EINVAL:
475 txr->q_einval_tx_dma_setup++;
476 return error;
477 default:
478 txr->q_other_tx_dma_setup++;
479 return error;
480 }
481 }
482
483 /* Make certain there are enough descriptors */
484 if (txr->tx_avail < (map->dm_nsegs + 2)) {
485 txr->txr_no_space = true;
486 IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
487 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
488 return EAGAIN;
489 }
490
491 /*
492 * Set up the appropriate offload context if requested,
493 * this may consume one TX descriptor.
494 */
495 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
496 if (__predict_false(error)) {
497 return (error);
498 }
499
500 #ifdef IXGBE_FDIR
501 /* Do the flow director magic */
502 if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
503 (txr->atr_sample) && (!sc->fdir_reinit)) {
504 ++txr->atr_count;
505 if (txr->atr_count >= atr_sample_rate) {
506 ixgbe_atr(txr, m_head);
507 txr->atr_count = 0;
508 }
509 }
510 #endif
511
512 olinfo_status |= IXGBE_ADVTXD_CC;
513 i = txr->next_avail_desc;
514 for (j = 0; j < map->dm_nsegs; j++) {
515 bus_size_t seglen;
516 uint64_t segaddr;
517
518 txbuf = &txr->tx_buffers[i];
519 txd = &txr->tx_base[i];
520 seglen = map->dm_segs[j].ds_len;
521 segaddr = htole64(map->dm_segs[j].ds_addr);
522
523 txd->read.buffer_addr = segaddr;
524 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
525 txd->read.olinfo_status = htole32(olinfo_status);
526
527 if (++i == txr->num_desc)
528 i = 0;
529 }
530
531 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
532 txr->tx_avail -= map->dm_nsegs;
533 txr->next_avail_desc = i;
534
535 txbuf->m_head = m_head;
536 /*
537 * Here we swap the map so the last descriptor,
538 * which gets the completion interrupt has the
539 * real map, and the first descriptor gets the
540 * unused map from this descriptor.
541 */
542 txr->tx_buffers[first].map = txbuf->map;
543 txbuf->map = map;
544 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
545 BUS_DMASYNC_PREWRITE);
546
547 /* Set the EOP descriptor that will be marked done */
548 txbuf = &txr->tx_buffers[first];
549 txbuf->eop = txd;
550
551 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
552 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
553 /*
554 * Advance the Transmit Descriptor Tail (Tdt), this tells the
555 * hardware that this frame is available to transmit.
556 */
557 IXGBE_EVC_ADD(&txr->total_packets, 1);
558 IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
559
560 net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
561 if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
562 if (m_head->m_flags & M_MCAST)
563 if_statinc_ref(nsr, if_omcasts);
564 IF_STAT_PUTREF(ifp);
565
566 /* Mark queue as having work */
567 if (txr->busy == 0)
568 txr->busy = 1;
569
570 return (0);
571 } /* ixgbe_xmit */
572
573 /************************************************************************
574 * ixgbe_drain
575 ************************************************************************/
576 static void
577 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
578 {
579 struct mbuf *m;
580
581 IXGBE_TX_LOCK_ASSERT(txr);
582
583 if (txr->me == 0) {
584 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
585 IFQ_DEQUEUE(&ifp->if_snd, m);
586 m_freem(m);
587 IF_DROP(&ifp->if_snd);
588 }
589 }
590
591 while ((m = pcq_get(txr->txr_interq)) != NULL) {
592 m_freem(m);
593 IXGBE_EVC_ADD(&txr->pcq_drops, 1);
594 }
595 }
596
597 /************************************************************************
598 * ixgbe_allocate_transmit_buffers
599 *
600 * Allocate memory for tx_buffer structures. The tx_buffer stores all
601 * the information needed to transmit a packet on the wire. This is
602 * called only once at attach, setup is done every reset.
603 ************************************************************************/
604 static int
605 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
606 {
607 struct ixgbe_softc *sc = txr->sc;
608 device_t dev = sc->dev;
609 struct ixgbe_tx_buf *txbuf;
610 int error, i;
611
612 /*
613 * Setup DMA descriptor areas.
614 */
615 error = ixgbe_dma_tag_create(
616 /* parent */ sc->osdep.dmat,
617 /* alignment */ 1,
618 /* bounds */ 0,
619 /* maxsize */ IXGBE_TSO_SIZE,
620 /* nsegments */ sc->num_segs,
621 /* maxsegsize */ PAGE_SIZE,
622 /* flags */ 0,
623 &txr->txtag);
624 if (error != 0) {
625 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
626 goto fail;
627 }
628
629 txr->tx_buffers = kmem_zalloc(sizeof(struct ixgbe_tx_buf) *
630 sc->num_tx_desc, KM_SLEEP);
631
632 /* Create the descriptor buffer dma maps */
633 txbuf = txr->tx_buffers;
634 for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
635 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
636 if (error != 0) {
637 aprint_error_dev(dev,
638 "Unable to create TX DMA map (%d)\n", error);
639 goto fail;
640 }
641 }
642
643 return 0;
644 fail:
645 /* We free all, it handles case where we are in the middle */
646 #if 0 /* XXX was FreeBSD */
647 ixgbe_free_transmit_structures(sc);
648 #else
649 ixgbe_free_transmit_buffers(txr);
650 #endif
651 return (error);
652 } /* ixgbe_allocate_transmit_buffers */
653
654 /************************************************************************
655 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
656 ************************************************************************/
657 static void
658 ixgbe_setup_transmit_ring(struct tx_ring *txr)
659 {
660 struct ixgbe_softc *sc = txr->sc;
661 struct ixgbe_tx_buf *txbuf;
662 #ifdef DEV_NETMAP
663 struct netmap_sc *na = NA(sc->ifp);
664 struct netmap_slot *slot;
665 #endif /* DEV_NETMAP */
666
667 /* Clear the old ring contents */
668 IXGBE_TX_LOCK(txr);
669
670 #ifdef DEV_NETMAP
671 if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
672 /*
673 * (under lock): if in netmap mode, do some consistency
674 * checks and set slot to entry 0 of the netmap ring.
675 */
676 slot = netmap_reset(na, NR_TX, txr->me, 0);
677 }
678 #endif /* DEV_NETMAP */
679
680 bzero((void *)txr->tx_base,
681 (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
682 /* Reset indices */
683 txr->next_avail_desc = 0;
684 txr->next_to_clean = 0;
685
686 /* Free any existing tx buffers. */
687 txbuf = txr->tx_buffers;
688 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
689 if (txbuf->m_head != NULL) {
690 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
691 0, txbuf->m_head->m_pkthdr.len,
692 BUS_DMASYNC_POSTWRITE);
693 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
694 m_freem(txbuf->m_head);
695 txbuf->m_head = NULL;
696 }
697
698 #ifdef DEV_NETMAP
699 /*
700 * In netmap mode, set the map for the packet buffer.
701 * NOTE: Some drivers (not this one) also need to set
702 * the physical buffer address in the NIC ring.
703 * Slots in the netmap ring (indexed by "si") are
704 * kring->nkr_hwofs positions "ahead" wrt the
705 * corresponding slot in the NIC ring. In some drivers
706 * (not here) nkr_hwofs can be negative. Function
707 * netmap_idx_n2k() handles wraparounds properly.
708 */
709 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
710 int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
711 netmap_load_map(na, txr->txtag,
712 txbuf->map, NMB(na, slot + si));
713 }
714 #endif /* DEV_NETMAP */
715
716 /* Clear the EOP descriptor pointer */
717 txbuf->eop = NULL;
718 }
719
720 #ifdef IXGBE_FDIR
721 /* Set the rate at which we sample packets */
722 if (sc->feat_en & IXGBE_FEATURE_FDIR)
723 txr->atr_sample = atr_sample_rate;
724 #endif
725
726 /* Set number of descriptors available */
727 txr->tx_avail = sc->num_tx_desc;
728
729 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
730 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
731 IXGBE_TX_UNLOCK(txr);
732 } /* ixgbe_setup_transmit_ring */
733
734 /************************************************************************
735 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
736 ************************************************************************/
737 int
738 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
739 {
740 struct tx_ring *txr = sc->tx_rings;
741
742 for (int i = 0; i < sc->num_queues; i++, txr++)
743 ixgbe_setup_transmit_ring(txr);
744
745 return (0);
746 } /* ixgbe_setup_transmit_structures */
747
748 /************************************************************************
749 * ixgbe_free_transmit_structures - Free all transmit rings.
750 ************************************************************************/
751 void
752 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
753 {
754 struct tx_ring *txr = sc->tx_rings;
755
756 for (int i = 0; i < sc->num_queues; i++, txr++) {
757 ixgbe_free_transmit_buffers(txr);
758 ixgbe_dma_free(sc, &txr->txdma);
759 IXGBE_TX_LOCK_DESTROY(txr);
760 }
761 kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
762 } /* ixgbe_free_transmit_structures */
763
764 /************************************************************************
765 * ixgbe_free_transmit_buffers
766 *
767 * Free transmit ring related data structures.
768 ************************************************************************/
769 static void
770 ixgbe_free_transmit_buffers(struct tx_ring *txr)
771 {
772 struct ixgbe_softc *sc = txr->sc;
773 struct ixgbe_tx_buf *tx_buffer;
774 int i;
775
776 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
777
778 if (txr->tx_buffers == NULL)
779 return;
780
781 tx_buffer = txr->tx_buffers;
782 for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
783 if (tx_buffer->m_head != NULL) {
784 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
785 0, tx_buffer->m_head->m_pkthdr.len,
786 BUS_DMASYNC_POSTWRITE);
787 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
788 m_freem(tx_buffer->m_head);
789 tx_buffer->m_head = NULL;
790 if (tx_buffer->map != NULL) {
791 ixgbe_dmamap_destroy(txr->txtag,
792 tx_buffer->map);
793 tx_buffer->map = NULL;
794 }
795 } else if (tx_buffer->map != NULL) {
796 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
797 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
798 tx_buffer->map = NULL;
799 }
800 }
801 if (txr->txr_interq != NULL) {
802 struct mbuf *m;
803
804 while ((m = pcq_get(txr->txr_interq)) != NULL)
805 m_freem(m);
806 pcq_destroy(txr->txr_interq);
807 }
808 if (txr->tx_buffers != NULL) {
809 kmem_free(txr->tx_buffers,
810 sizeof(struct ixgbe_tx_buf) * sc->num_tx_desc);
811 txr->tx_buffers = NULL;
812 }
813 if (txr->txtag != NULL) {
814 ixgbe_dma_tag_destroy(txr->txtag);
815 txr->txtag = NULL;
816 }
817 } /* ixgbe_free_transmit_buffers */
818
819 /************************************************************************
820 * ixgbe_tx_ctx_setup
821 *
822 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
823 ************************************************************************/
824 static int
825 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
826 u32 *cmd_type_len, u32 *olinfo_status)
827 {
828 struct ixgbe_softc *sc = txr->sc;
829 struct ixgbe_adv_tx_context_desc *TXD;
830 struct ether_vlan_header *eh;
831 #ifdef INET
832 struct ip *ip;
833 #endif
834 #ifdef INET6
835 struct ip6_hdr *ip6;
836 #endif
837 int ehdrlen, ip_hlen = 0;
838 int offload = TRUE;
839 int ctxd = txr->next_avail_desc;
840 u32 vlan_macip_lens = 0;
841 u32 type_tucmd_mlhl = 0;
842 u16 vtag = 0;
843 u16 etype;
844 u8 ipproto = 0;
845 char *l3d;
846
847 /* First check if TSO is to be used */
848 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
849 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
850
851 if (rv != 0)
852 IXGBE_EVC_ADD(&sc->tso_err, 1);
853 return rv;
854 }
855
856 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
857 offload = FALSE;
858
859 /* Indicate the whole packet as payload when not doing TSO */
860 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
861
862 /*
863 * In advanced descriptors the vlan tag must
864 * be placed into the context descriptor. Hence
865 * we need to make one even if not doing offloads.
866 */
867 if (vlan_has_tag(mp)) {
868 vtag = htole16(vlan_get_tag(mp));
869 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
870 } else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
871 (offload == FALSE))
872 return (0);
873
874 /*
875 * Determine where frame payload starts.
876 * Jump over vlan headers if already present,
877 * helpful for QinQ too.
878 */
879 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
880 eh = mtod(mp, struct ether_vlan_header *);
881 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
882 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
883 etype = ntohs(eh->evl_proto);
884 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
885 } else {
886 etype = ntohs(eh->evl_encap_proto);
887 ehdrlen = ETHER_HDR_LEN;
888 }
889
890 /* Set the ether header length */
891 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
892
893 if (offload == FALSE)
894 goto no_offloads;
895
896 /*
897 * If the first mbuf only includes the ethernet header,
898 * jump to the next one
899 * XXX: This assumes the stack splits mbufs containing headers
900 * on header boundaries
901 * XXX: And assumes the entire IP header is contained in one mbuf
902 */
903 if (mp->m_len == ehdrlen && mp->m_next)
904 l3d = mtod(mp->m_next, char *);
905 else
906 l3d = mtod(mp, char *) + ehdrlen;
907
908 switch (etype) {
909 #ifdef INET
910 case ETHERTYPE_IP:
911 ip = (struct ip *)(l3d);
912 ip_hlen = ip->ip_hl << 2;
913 ipproto = ip->ip_p;
914 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
915 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
916 ip->ip_sum == 0);
917 break;
918 #endif
919 #ifdef INET6
920 case ETHERTYPE_IPV6:
921 ip6 = (struct ip6_hdr *)(l3d);
922 ip_hlen = sizeof(struct ip6_hdr);
923 ipproto = ip6->ip6_nxt;
924 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
925 break;
926 #endif
927 default:
928 offload = false;
929 break;
930 }
931
932 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
933 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
934
935 vlan_macip_lens |= ip_hlen;
936
937 /* No support for offloads for non-L4 next headers */
938 switch (ipproto) {
939 case IPPROTO_TCP:
940 if (mp->m_pkthdr.csum_flags &
941 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
942 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
943 else
944 offload = false;
945 break;
946 case IPPROTO_UDP:
947 if (mp->m_pkthdr.csum_flags &
948 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
949 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
950 else
951 offload = false;
952 break;
953 default:
954 offload = false;
955 break;
956 }
957
958 if (offload) /* Insert L4 checksum into data descriptors */
959 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
960
961 no_offloads:
962 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
963
964 /* Now ready a context descriptor */
965 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
966
967 /* Now copy bits into descriptor */
968 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
969 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
970 TXD->seqnum_seed = htole32(0);
971 TXD->mss_l4len_idx = htole32(0);
972
973 /* We've consumed the first desc, adjust counters */
974 if (++ctxd == txr->num_desc)
975 ctxd = 0;
976 txr->next_avail_desc = ctxd;
977 --txr->tx_avail;
978
979 return (0);
980 } /* ixgbe_tx_ctx_setup */
981
982 /************************************************************************
983 * ixgbe_tso_setup
984 *
985 * Setup work for hardware segmentation offload (TSO) on
986 * adapters using advanced tx descriptors
987 ************************************************************************/
988 static int
989 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
990 u32 *olinfo_status)
991 {
992 struct ixgbe_adv_tx_context_desc *TXD;
993 struct ether_vlan_header *eh;
994 #ifdef INET6
995 struct ip6_hdr *ip6;
996 #endif
997 #ifdef INET
998 struct ip *ip;
999 #endif
1000 struct tcphdr *th;
1001 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
1002 u32 vlan_macip_lens = 0;
1003 u32 type_tucmd_mlhl = 0;
1004 u32 mss_l4len_idx = 0, paylen;
1005 u16 vtag = 0, eh_type;
1006
1007 /*
1008 * Determine where frame payload starts.
1009 * Jump over vlan headers if already present
1010 */
1011 eh = mtod(mp, struct ether_vlan_header *);
1012 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1013 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1014 eh_type = eh->evl_proto;
1015 } else {
1016 ehdrlen = ETHER_HDR_LEN;
1017 eh_type = eh->evl_encap_proto;
1018 }
1019
1020 switch (ntohs(eh_type)) {
1021 #ifdef INET
1022 case ETHERTYPE_IP:
1023 ip = (struct ip *)(mp->m_data + ehdrlen);
1024 if (ip->ip_p != IPPROTO_TCP)
1025 return (ENXIO);
1026 ip->ip_sum = 0;
1027 ip_hlen = ip->ip_hl << 2;
1028 th = (struct tcphdr *)((char *)ip + ip_hlen);
1029 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1030 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1031 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1032 /* Tell transmit desc to also do IPv4 checksum. */
1033 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1034 break;
1035 #endif
1036 #ifdef INET6
1037 case ETHERTYPE_IPV6:
1038 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1039 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1040 if (ip6->ip6_nxt != IPPROTO_TCP)
1041 return (ENXIO);
1042 ip_hlen = sizeof(struct ip6_hdr);
1043 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1044 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1045 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1046 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1047 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1048 break;
1049 #endif
1050 default:
1051 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1052 __func__, ntohs(eh_type));
1053 break;
1054 }
1055
1056 ctxd = txr->next_avail_desc;
1057 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1058
1059 tcp_hlen = th->th_off << 2;
1060
1061 /* This is used in the transmit desc in encap */
1062 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1063
1064 /* VLAN MACLEN IPLEN */
1065 if (vlan_has_tag(mp)) {
1066 vtag = htole16(vlan_get_tag(mp));
1067 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1068 }
1069
1070 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1071 vlan_macip_lens |= ip_hlen;
1072 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1073
1074 /* ADV DTYPE TUCMD */
1075 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1076 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1077 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1078
1079 /* MSS L4LEN IDX */
1080 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1081 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1082 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1083
1084 TXD->seqnum_seed = htole32(0);
1085
1086 if (++ctxd == txr->num_desc)
1087 ctxd = 0;
1088
1089 txr->tx_avail--;
1090 txr->next_avail_desc = ctxd;
1091 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1092 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1093 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1094 IXGBE_EVC_ADD(&txr->tso_tx, 1);
1095
1096 return (0);
1097 } /* ixgbe_tso_setup */
1098
1099
1100 /************************************************************************
1101 * ixgbe_txeof
1102 *
1103 * Examine each tx_buffer in the used queue. If the hardware is done
1104 * processing the packet then free associated resources. The
1105 * tx_buffer is put back on the free queue.
1106 ************************************************************************/
1107 bool
1108 ixgbe_txeof(struct tx_ring *txr)
1109 {
1110 struct ixgbe_softc *sc = txr->sc;
1111 struct ifnet *ifp = sc->ifp;
1112 struct ixgbe_tx_buf *buf;
1113 union ixgbe_adv_tx_desc *txd;
1114 u32 work, processed = 0;
1115 u32 limit = sc->tx_process_limit;
1116 u16 avail;
1117
1118 KASSERT(mutex_owned(&txr->tx_mtx));
1119
1120 #ifdef DEV_NETMAP
1121 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
1122 (sc->ifp->if_capenable & IFCAP_NETMAP)) {
1123 struct netmap_sc *na = NA(sc->ifp);
1124 struct netmap_kring *kring = na->tx_rings[txr->me];
1125 txd = txr->tx_base;
1126 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1127 BUS_DMASYNC_POSTREAD);
1128 /*
1129 * In netmap mode, all the work is done in the context
1130 * of the client thread. Interrupt handlers only wake up
1131 * clients, which may be sleeping on individual rings
1132 * or on a global resource for all rings.
1133 * To implement tx interrupt mitigation, we wake up the client
1134 * thread roughly every half ring, even if the NIC interrupts
1135 * more frequently. This is implemented as follows:
1136 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1137 * the slot that should wake up the thread (nkr_num_slots
1138 * means the user thread should not be woken up);
1139 * - the driver ignores tx interrupts unless netmap_mitigate=0
1140 * or the slot has the DD bit set.
1141 */
1142 if (kring->nr_kflags < kring->nkr_num_slots &&
1143 le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1144 netmap_tx_irq(ifp, txr->me);
1145 }
1146 return false;
1147 }
1148 #endif /* DEV_NETMAP */
1149
1150 if (txr->tx_avail == txr->num_desc) {
1151 txr->busy = 0;
1152 return false;
1153 }
1154
1155 /* Get work starting point */
1156 work = txr->next_to_clean;
1157 buf = &txr->tx_buffers[work];
1158 txd = &txr->tx_base[work];
1159 work -= txr->num_desc; /* The distance to ring end */
1160 avail = txr->tx_avail;
1161 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1162 BUS_DMASYNC_POSTREAD);
1163
1164 do {
1165 union ixgbe_adv_tx_desc *eop = buf->eop;
1166 if (eop == NULL) /* No work */
1167 break;
1168
1169 if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1170 break; /* I/O not complete */
1171
1172 if (buf->m_head) {
1173 txr->bytes += buf->m_head->m_pkthdr.len;
1174 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1175 0, buf->m_head->m_pkthdr.len,
1176 BUS_DMASYNC_POSTWRITE);
1177 ixgbe_dmamap_unload(txr->txtag, buf->map);
1178 m_freem(buf->m_head);
1179 buf->m_head = NULL;
1180 }
1181 buf->eop = NULL;
1182 ++avail;
1183
1184 /* We clean the range if multi segment */
1185 while (txd != eop) {
1186 ++txd;
1187 ++buf;
1188 ++work;
1189 /* wrap the ring? */
1190 if (__predict_false(!work)) {
1191 work -= txr->num_desc;
1192 buf = txr->tx_buffers;
1193 txd = txr->tx_base;
1194 }
1195 if (buf->m_head) {
1196 txr->bytes +=
1197 buf->m_head->m_pkthdr.len;
1198 bus_dmamap_sync(txr->txtag->dt_dmat,
1199 buf->map,
1200 0, buf->m_head->m_pkthdr.len,
1201 BUS_DMASYNC_POSTWRITE);
1202 ixgbe_dmamap_unload(txr->txtag,
1203 buf->map);
1204 m_freem(buf->m_head);
1205 buf->m_head = NULL;
1206 }
1207 ++avail;
1208 buf->eop = NULL;
1209
1210 }
1211 ++processed;
1212
1213 /* Try the next packet */
1214 ++txd;
1215 ++buf;
1216 ++work;
1217 /* reset with a wrap */
1218 if (__predict_false(!work)) {
1219 work -= txr->num_desc;
1220 buf = txr->tx_buffers;
1221 txd = txr->tx_base;
1222 }
1223 prefetch(txd);
1224 } while (__predict_true(--limit));
1225
1226 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1227 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1228
1229 work += txr->num_desc;
1230 txr->next_to_clean = work;
1231 if (processed) {
1232 txr->tx_avail = avail;
1233 txr->txr_no_space = false;
1234 txr->packets += processed;
1235 if_statadd(ifp, if_opackets, processed);
1236 }
1237
1238 /*
1239 * Queue Hang detection, we know there's
1240 * work outstanding or the first return
1241 * would have been taken, so increment busy
1242 * if nothing managed to get cleaned, then
1243 * in local_timer it will be checked and
1244 * marked as HUNG if it exceeds a MAX attempt.
1245 */
1246 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1247 ++txr->busy;
1248 /*
1249 * If anything gets cleaned we reset state to 1,
1250 * note this will turn off HUNG if its set.
1251 */
1252 if (processed)
1253 txr->busy = 1;
1254
1255 if (txr->tx_avail == txr->num_desc)
1256 txr->busy = 0;
1257
1258 return ((limit > 0) ? false : true);
1259 } /* ixgbe_txeof */
1260
1261 /************************************************************************
1262 * ixgbe_rsc_count
1263 *
1264 * Used to detect a descriptor that has been merged by Hardware RSC.
1265 ************************************************************************/
1266 static inline u32
1267 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1268 {
1269 return (le32toh(rx->wb.lower.lo_dword.data) &
1270 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1271 } /* ixgbe_rsc_count */
1272
1273 /************************************************************************
1274 * ixgbe_setup_hw_rsc
1275 *
1276 * Initialize Hardware RSC (LRO) feature on 82599
1277 * for an RX ring, this is toggled by the LRO capability
1278 * even though it is transparent to the stack.
1279 *
1280 * NOTE: Since this HW feature only works with IPv4 and
1281 * testing has shown soft LRO to be as effective,
1282 * this feature will be disabled by default.
1283 ************************************************************************/
1284 static void
1285 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1286 {
1287 struct ixgbe_softc *sc = rxr->sc;
1288 struct ixgbe_hw *hw = &sc->hw;
1289 u32 rscctrl, rdrxctl;
1290
1291 /* If turning LRO/RSC off we need to disable it */
1292 if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
1293 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1294 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1295 return;
1296 }
1297
1298 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1299 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1300 #ifdef DEV_NETMAP
1301 /* Always strip CRC unless Netmap disabled it */
1302 if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
1303 !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
1304 ix_crcstrip)
1305 #endif /* DEV_NETMAP */
1306 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1307 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1308 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1309
1310 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1311 rscctrl |= IXGBE_RSCCTL_RSCEN;
1312 /*
1313 * Limit the total number of descriptors that
1314 * can be combined, so it does not exceed 64K
1315 */
1316 if (rxr->mbuf_sz == MCLBYTES)
1317 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1318 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1319 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1320 else if (rxr->mbuf_sz == MJUM9BYTES)
1321 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1322 else /* Using 16K cluster */
1323 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1324
1325 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1326
1327 /* Enable TCP header recognition */
1328 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1329 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1330
1331 /* Disable RSC for ACK packets */
1332 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1333 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1334
1335 rxr->hw_rsc = TRUE;
1336 } /* ixgbe_setup_hw_rsc */
1337
1338 /************************************************************************
1339 * ixgbe_refresh_mbufs
1340 *
1341 * Refresh mbuf buffers for RX descriptor rings
1342 * - now keeps its own state so discards due to resource
1343 * exhaustion are unnecessary, if an mbuf cannot be obtained
1344 * it just returns, keeping its placeholder, thus it can simply
1345 * be recalled to try again.
1346 ************************************************************************/
1347 static void
1348 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1349 {
1350 struct ixgbe_softc *sc = rxr->sc;
1351 struct ixgbe_rx_buf *rxbuf;
1352 struct mbuf *mp;
1353 int i, error;
1354 bool refreshed = false;
1355
1356 i = rxr->next_to_refresh;
1357 /* next_to_refresh points to the previous one */
1358 if (++i == rxr->num_desc)
1359 i = 0;
1360
1361 while (i != limit) {
1362 rxbuf = &rxr->rx_buffers[i];
1363 if (__predict_false(rxbuf->buf == NULL)) {
1364 mp = ixgbe_getcl();
1365 if (mp == NULL) {
1366 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1367 goto update;
1368 }
1369 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1370 IXGBE_M_ADJ(sc, rxr, mp);
1371 } else
1372 mp = rxbuf->buf;
1373
1374 /* If we're dealing with an mbuf that was copied rather
1375 * than replaced, there's no need to go through busdma.
1376 */
1377 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1378 /* Get the memory mapping */
1379 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1380 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1381 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1382 if (__predict_false(error != 0)) {
1383 device_printf(sc->dev, "Refresh mbufs: "
1384 "payload dmamap load failure - %d\n",
1385 error);
1386 m_free(mp);
1387 rxbuf->buf = NULL;
1388 goto update;
1389 }
1390 rxbuf->buf = mp;
1391 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1392 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1393 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1394 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1395 } else {
1396 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1397 rxbuf->flags &= ~IXGBE_RX_COPY;
1398 }
1399
1400 refreshed = true;
1401 /* next_to_refresh points to the previous one */
1402 rxr->next_to_refresh = i;
1403 if (++i == rxr->num_desc)
1404 i = 0;
1405 }
1406
1407 update:
1408 if (refreshed) /* Update hardware tail index */
1409 IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
1410
1411 return;
1412 } /* ixgbe_refresh_mbufs */
1413
1414 /************************************************************************
1415 * ixgbe_allocate_receive_buffers
1416 *
1417 * Allocate memory for rx_buffer structures. Since we use one
1418 * rx_buffer per received packet, the maximum number of rx_buffer's
1419 * that we'll need is equal to the number of receive descriptors
1420 * that we've allocated.
1421 ************************************************************************/
1422 static int
1423 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1424 {
1425 struct ixgbe_softc *sc = rxr->sc;
1426 device_t dev = sc->dev;
1427 struct ixgbe_rx_buf *rxbuf;
1428 int bsize, error;
1429
1430 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1431 rxr->rx_buffers = kmem_zalloc(bsize, KM_SLEEP);
1432
1433 error = ixgbe_dma_tag_create(
1434 /* parent */ sc->osdep.dmat,
1435 /* alignment */ 1,
1436 /* bounds */ 0,
1437 /* maxsize */ MJUM16BYTES,
1438 /* nsegments */ 1,
1439 /* maxsegsize */ MJUM16BYTES,
1440 /* flags */ 0,
1441 &rxr->ptag);
1442 if (error != 0) {
1443 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1444 goto fail;
1445 }
1446
1447 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1448 rxbuf = &rxr->rx_buffers[i];
1449 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1450 if (error) {
1451 aprint_error_dev(dev, "Unable to create RX dma map\n");
1452 goto fail;
1453 }
1454 }
1455
1456 return (0);
1457
1458 fail:
1459 /* Frees all, but can handle partial completion */
1460 ixgbe_free_receive_structures(sc);
1461
1462 return (error);
1463 } /* ixgbe_allocate_receive_buffers */
1464
1465 /************************************************************************
1466 * ixgbe_free_receive_ring
1467 ************************************************************************/
1468 static void
1469 ixgbe_free_receive_ring(struct rx_ring *rxr)
1470 {
1471 for (int i = 0; i < rxr->num_desc; i++) {
1472 ixgbe_rx_discard(rxr, i);
1473 }
1474 } /* ixgbe_free_receive_ring */
1475
1476 /************************************************************************
1477 * ixgbe_setup_receive_ring
1478 *
1479 * Initialize a receive ring and its buffers.
1480 ************************************************************************/
1481 static int
1482 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1483 {
1484 struct ixgbe_softc *sc;
1485 struct ixgbe_rx_buf *rxbuf;
1486 #ifdef LRO
1487 struct ifnet *ifp;
1488 struct lro_ctrl *lro = &rxr->lro;
1489 #endif /* LRO */
1490 #ifdef DEV_NETMAP
1491 struct netmap_sc *na = NA(rxr->sc->ifp);
1492 struct netmap_slot *slot;
1493 #endif /* DEV_NETMAP */
1494 int rsize, error = 0;
1495
1496 sc = rxr->sc;
1497 #ifdef LRO
1498 ifp = sc->ifp;
1499 #endif /* LRO */
1500
1501 /* Clear the ring contents */
1502 IXGBE_RX_LOCK(rxr);
1503
1504 #ifdef DEV_NETMAP
1505 if (sc->feat_en & IXGBE_FEATURE_NETMAP)
1506 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1507 #endif /* DEV_NETMAP */
1508
1509 rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
1510 KASSERT((rsize % DBA_ALIGN) == 0);
1511 bzero((void *)rxr->rx_base, rsize);
1512 /* Cache the size */
1513 rxr->mbuf_sz = sc->rx_mbuf_sz;
1514
1515 /* Free current RX buffer structs and their mbufs */
1516 ixgbe_free_receive_ring(rxr);
1517
1518 /* Now replenish the mbufs */
1519 for (int i = 0; i < rxr->num_desc; i++) {
1520 struct mbuf *mp;
1521
1522 rxbuf = &rxr->rx_buffers[i];
1523
1524 #ifdef DEV_NETMAP
1525 /*
1526 * In netmap mode, fill the map and set the buffer
1527 * address in the NIC ring, considering the offset
1528 * between the netmap and NIC rings (see comment in
1529 * ixgbe_setup_transmit_ring() ). No need to allocate
1530 * an mbuf, so end the block with a continue;
1531 */
1532 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1533 int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
1534 uint64_t paddr;
1535 void *addr;
1536
1537 addr = PNMB(na, slot + sj, &paddr);
1538 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1539 /* Update descriptor and the cached value */
1540 rxr->rx_base[i].read.pkt_addr = htole64(paddr);
1541 rxbuf->addr = htole64(paddr);
1542 continue;
1543 }
1544 #endif /* DEV_NETMAP */
1545
1546 rxbuf->flags = 0;
1547 rxbuf->buf = ixgbe_getcl();
1548 if (rxbuf->buf == NULL) {
1549 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1550 error = ENOBUFS;
1551 goto fail;
1552 }
1553 mp = rxbuf->buf;
1554 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1555 IXGBE_M_ADJ(sc, rxr, mp);
1556 /* Get the memory mapping */
1557 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1558 mp, BUS_DMA_NOWAIT);
1559 if (error != 0) {
1560 /*
1561 * Clear this entry for later cleanup in
1562 * ixgbe_discard() which is called via
1563 * ixgbe_free_receive_ring().
1564 */
1565 m_freem(mp);
1566 rxbuf->buf = NULL;
1567 goto fail;
1568 }
1569 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1570 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1571 /* Update the descriptor and the cached value */
1572 rxr->rx_base[i].read.pkt_addr =
1573 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1574 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1575 }
1576
1577 /* Setup our descriptor indices */
1578 rxr->next_to_check = 0;
1579 rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
1580 rxr->lro_enabled = FALSE;
1581 rxr->discard_multidesc = false;
1582 IXGBE_EVC_STORE(&rxr->rx_copies, 0);
1583 #if 0 /* NetBSD */
1584 IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
1585 #if 1 /* Fix inconsistency */
1586 IXGBE_EVC_STORE(&rxr->rx_packets, 0);
1587 #endif
1588 #endif
1589 rxr->vtag_strip = FALSE;
1590
1591 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1592 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1593
1594 /*
1595 * Now set up the LRO interface
1596 */
1597 if (ixgbe_rsc_enable)
1598 ixgbe_setup_hw_rsc(rxr);
1599 #ifdef LRO
1600 else if (ifp->if_capenable & IFCAP_LRO) {
1601 device_t dev = sc->dev;
1602 int err = tcp_lro_init(lro);
1603 if (err) {
1604 device_printf(dev, "LRO Initialization failed!\n");
1605 goto fail;
1606 }
1607 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1608 rxr->lro_enabled = TRUE;
1609 lro->ifp = sc->ifp;
1610 }
1611 #endif /* LRO */
1612
1613 IXGBE_RX_UNLOCK(rxr);
1614
1615 return (0);
1616
1617 fail:
1618 ixgbe_free_receive_ring(rxr);
1619 IXGBE_RX_UNLOCK(rxr);
1620
1621 return (error);
1622 } /* ixgbe_setup_receive_ring */
1623
1624 /************************************************************************
1625 * ixgbe_setup_receive_structures - Initialize all receive rings.
1626 ************************************************************************/
1627 int
1628 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
1629 {
1630 struct rx_ring *rxr = sc->rx_rings;
1631 int j;
1632
1633 INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1634 for (j = 0; j < sc->num_queues; j++, rxr++)
1635 if (ixgbe_setup_receive_ring(rxr))
1636 goto fail;
1637
1638 return (0);
1639 fail:
1640 /*
1641 * Free RX buffers allocated so far, we will only handle
1642 * the rings that completed, the failing case will have
1643 * cleaned up for itself. 'j' failed, so its the terminus.
1644 */
1645 for (int i = 0; i < j; ++i) {
1646 rxr = &sc->rx_rings[i];
1647 IXGBE_RX_LOCK(rxr);
1648 ixgbe_free_receive_ring(rxr);
1649 IXGBE_RX_UNLOCK(rxr);
1650 }
1651
1652 return (ENOBUFS);
1653 } /* ixgbe_setup_receive_structures */
1654
1655
1656 /************************************************************************
1657 * ixgbe_free_receive_structures - Free all receive rings.
1658 ************************************************************************/
1659 void
1660 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
1661 {
1662 struct rx_ring *rxr = sc->rx_rings;
1663
1664 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1665
1666 for (int i = 0; i < sc->num_queues; i++, rxr++) {
1667 ixgbe_free_receive_buffers(rxr);
1668 #ifdef LRO
1669 /* Free LRO memory */
1670 tcp_lro_free(&rxr->lro);
1671 #endif /* LRO */
1672 /* Free the ring memory as well */
1673 ixgbe_dma_free(sc, &rxr->rxdma);
1674 IXGBE_RX_LOCK_DESTROY(rxr);
1675 }
1676
1677 kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
1678 } /* ixgbe_free_receive_structures */
1679
1680
1681 /************************************************************************
1682 * ixgbe_free_receive_buffers - Free receive ring data structures
1683 ************************************************************************/
1684 static void
1685 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1686 {
1687 struct ixgbe_softc *sc = rxr->sc;
1688 struct ixgbe_rx_buf *rxbuf;
1689
1690 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1691
1692 /* Cleanup any existing buffers */
1693 if (rxr->rx_buffers != NULL) {
1694 for (int i = 0; i < sc->num_rx_desc; i++) {
1695 rxbuf = &rxr->rx_buffers[i];
1696 ixgbe_rx_discard(rxr, i);
1697 if (rxbuf->pmap != NULL) {
1698 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1699 rxbuf->pmap = NULL;
1700 }
1701 }
1702
1703 if (rxr->rx_buffers != NULL) {
1704 kmem_free(rxr->rx_buffers,
1705 sizeof(struct ixgbe_rx_buf) * rxr->num_desc);
1706 rxr->rx_buffers = NULL;
1707 }
1708 }
1709
1710 if (rxr->ptag != NULL) {
1711 ixgbe_dma_tag_destroy(rxr->ptag);
1712 rxr->ptag = NULL;
1713 }
1714
1715 return;
1716 } /* ixgbe_free_receive_buffers */
1717
1718 /************************************************************************
1719 * ixgbe_rx_input
1720 ************************************************************************/
1721 static __inline void
1722 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1723 u32 ptype)
1724 {
1725 struct ixgbe_softc *sc = ifp->if_softc;
1726
1727 #ifdef LRO
1728 struct ethercom *ec = &sc->osdep.ec;
1729
1730 /*
1731 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1732 * should be computed by hardware. Also it should not have VLAN tag in
1733 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1734 */
1735 if (rxr->lro_enabled &&
1736 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1737 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1738 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1739 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1740 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1741 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1742 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1743 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1744 /*
1745 * Send to the stack if:
1746 * - LRO not enabled, or
1747 * - no LRO resources, or
1748 * - lro enqueue fails
1749 */
1750 if (rxr->lro.lro_cnt != 0)
1751 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1752 return;
1753 }
1754 #endif /* LRO */
1755
1756 if_percpuq_enqueue(sc->ipq, m);
1757 } /* ixgbe_rx_input */
1758
1759 /************************************************************************
1760 * ixgbe_rx_discard
1761 ************************************************************************/
1762 static __inline void
1763 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1764 {
1765 struct ixgbe_rx_buf *rbuf;
1766
1767 rbuf = &rxr->rx_buffers[i];
1768
1769 /*
1770 * With advanced descriptors the writeback clobbers the buffer addrs,
1771 * so its easier to just free the existing mbufs and take the normal
1772 * refresh path to get new buffers and mapping.
1773 */
1774
1775 if (rbuf->fmp != NULL) {/* Partial chain ? */
1776 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1777 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1778 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1779 m_freem(rbuf->fmp);
1780 rbuf->fmp = NULL;
1781 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1782 } else if (rbuf->buf) {
1783 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1784 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1785 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1786 m_free(rbuf->buf);
1787 rbuf->buf = NULL;
1788 }
1789
1790 rbuf->flags = 0;
1791
1792 return;
1793 } /* ixgbe_rx_discard */
1794
1795
1796 /************************************************************************
1797 * ixgbe_rxeof
1798 *
1799 * Executes in interrupt context. It replenishes the
1800 * mbufs in the descriptor and sends data which has
1801 * been dma'ed into host memory to upper layer.
1802 *
1803 * Return TRUE for more work, FALSE for all clean.
1804 ************************************************************************/
1805 bool
1806 ixgbe_rxeof(struct ix_queue *que)
1807 {
1808 struct ixgbe_softc *sc = que->sc;
1809 struct rx_ring *rxr = que->rxr;
1810 struct ifnet *ifp = sc->ifp;
1811 #ifdef LRO
1812 struct lro_ctrl *lro = &rxr->lro;
1813 #endif /* LRO */
1814 union ixgbe_adv_rx_desc *cur;
1815 struct ixgbe_rx_buf *rbuf, *nbuf;
1816 int i, nextp, processed = 0;
1817 u32 staterr = 0;
1818 u32 loopcount = 0, numdesc;
1819 u32 limit = sc->rx_process_limit;
1820 u32 rx_copy_len = sc->rx_copy_len;
1821 bool discard_multidesc = rxr->discard_multidesc;
1822 bool wraparound = false;
1823 unsigned int syncremain;
1824 #ifdef RSS
1825 u16 pkt_info;
1826 #endif
1827
1828 IXGBE_RX_LOCK(rxr);
1829
1830 #ifdef DEV_NETMAP
1831 if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
1832 /* Same as the txeof routine: wakeup clients on intr. */
1833 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1834 IXGBE_RX_UNLOCK(rxr);
1835 return (FALSE);
1836 }
1837 }
1838 #endif /* DEV_NETMAP */
1839
1840 /* Sync the ring. The size is rx_process_limit or the first half */
1841 if ((rxr->next_to_check + limit) <= rxr->num_desc) {
1842 /* Non-wraparound */
1843 numdesc = limit;
1844 syncremain = 0;
1845 } else {
1846 /* Wraparound. Sync the first half. */
1847 numdesc = rxr->num_desc - rxr->next_to_check;
1848
1849 /* Set the size of the last half */
1850 syncremain = limit - numdesc;
1851 }
1852 bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1853 rxr->rxdma.dma_map,
1854 sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
1855 sizeof(union ixgbe_adv_rx_desc) * numdesc,
1856 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1857
1858 /*
1859 * The max number of loop is rx_process_limit. If discard_multidesc is
1860 * true, continue processing to not to send broken packet to the upper
1861 * layer.
1862 */
1863 for (i = rxr->next_to_check;
1864 (loopcount < limit) || (discard_multidesc == true);) {
1865
1866 struct mbuf *sendmp, *mp;
1867 struct mbuf *newmp;
1868 u32 rsc, ptype;
1869 u16 len;
1870 u16 vtag = 0;
1871 bool eop;
1872 bool discard = false;
1873
1874 if (wraparound) {
1875 /* Sync the last half. */
1876 KASSERT(syncremain != 0);
1877 numdesc = syncremain;
1878 wraparound = false;
1879 } else if (__predict_false(loopcount >= limit)) {
1880 KASSERT(discard_multidesc == true);
1881 numdesc = 1;
1882 } else
1883 numdesc = 0;
1884
1885 if (numdesc != 0)
1886 bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1887 rxr->rxdma.dma_map, 0,
1888 sizeof(union ixgbe_adv_rx_desc) * numdesc,
1889 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1890
1891 cur = &rxr->rx_base[i];
1892 staterr = le32toh(cur->wb.upper.status_error);
1893 #ifdef RSS
1894 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1895 #endif
1896
1897 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1898 break;
1899
1900 loopcount++;
1901 sendmp = newmp = NULL;
1902 nbuf = NULL;
1903 rsc = 0;
1904 cur->wb.upper.status_error = 0;
1905 rbuf = &rxr->rx_buffers[i];
1906 mp = rbuf->buf;
1907
1908 len = le16toh(cur->wb.upper.length);
1909 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1910 IXGBE_RXDADV_PKTTYPE_MASK;
1911 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1912
1913 /* Make sure bad packets are discarded */
1914 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1915 #if __FreeBSD_version >= 1100036
1916 if (sc->feat_en & IXGBE_FEATURE_VF)
1917 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1918 #endif
1919 IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
1920 ixgbe_rx_discard(rxr, i);
1921 discard_multidesc = false;
1922 goto next_desc;
1923 }
1924
1925 if (__predict_false(discard_multidesc))
1926 discard = true;
1927 else {
1928 /* Pre-alloc new mbuf. */
1929
1930 if ((rbuf->fmp == NULL) &&
1931 eop && (len <= rx_copy_len)) {
1932 /* For short packet. See below. */
1933 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1934 if (__predict_false(sendmp == NULL)) {
1935 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1936 discard = true;
1937 }
1938 } else {
1939 /* For long packet. */
1940 newmp = ixgbe_getcl();
1941 if (__predict_false(newmp == NULL)) {
1942 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1943 discard = true;
1944 }
1945 }
1946 }
1947
1948 if (__predict_false(discard)) {
1949 /*
1950 * Descriptor initialization is already done by the
1951 * above code (cur->wb.upper.status_error = 0).
1952 * So, we can reuse current rbuf->buf for new packet.
1953 *
1954 * Rewrite the buffer addr, see comment in
1955 * ixgbe_rx_discard().
1956 */
1957 cur->read.pkt_addr = rbuf->addr;
1958 m_freem(rbuf->fmp);
1959 rbuf->fmp = NULL;
1960 if (!eop) {
1961 /* Discard the entire packet. */
1962 discard_multidesc = true;
1963 } else
1964 discard_multidesc = false;
1965 goto next_desc;
1966 }
1967 discard_multidesc = false;
1968
1969 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1970 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1971
1972 /*
1973 * On 82599 which supports a hardware
1974 * LRO (called HW RSC), packets need
1975 * not be fragmented across sequential
1976 * descriptors, rather the next descriptor
1977 * is indicated in bits of the descriptor.
1978 * This also means that we might process
1979 * more than one packet at a time, something
1980 * that has never been true before, it
1981 * required eliminating global chain pointers
1982 * in favor of what we are doing here. -jfv
1983 */
1984 if (!eop) {
1985 /*
1986 * Figure out the next descriptor
1987 * of this frame.
1988 */
1989 if (rxr->hw_rsc == TRUE) {
1990 rsc = ixgbe_rsc_count(cur);
1991 rxr->rsc_num += (rsc - 1);
1992 }
1993 if (rsc) { /* Get hardware index */
1994 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1995 IXGBE_RXDADV_NEXTP_SHIFT);
1996 } else { /* Just sequential */
1997 nextp = i + 1;
1998 if (nextp == sc->num_rx_desc)
1999 nextp = 0;
2000 }
2001 nbuf = &rxr->rx_buffers[nextp];
2002 prefetch(nbuf);
2003 }
2004 /*
2005 * Rather than using the fmp/lmp global pointers
2006 * we now keep the head of a packet chain in the
2007 * buffer struct and pass this along from one
2008 * descriptor to the next, until we get EOP.
2009 */
2010 /*
2011 * See if there is a stored head
2012 * that determines what we are
2013 */
2014 if (rbuf->fmp != NULL) {
2015 /* Secondary frag */
2016 sendmp = rbuf->fmp;
2017
2018 /* Update new (used in future) mbuf */
2019 newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
2020 IXGBE_M_ADJ(sc, rxr, newmp);
2021 rbuf->buf = newmp;
2022 rbuf->fmp = NULL;
2023
2024 /* For secondary frag */
2025 mp->m_len = len;
2026 mp->m_flags &= ~M_PKTHDR;
2027
2028 /* For sendmp */
2029 sendmp->m_pkthdr.len += mp->m_len;
2030 } else {
2031 /*
2032 * It's the first segment of a multi descriptor
2033 * packet or a single segment which contains a full
2034 * packet.
2035 */
2036
2037 if (eop && (len <= rx_copy_len)) {
2038 /*
2039 * Optimize. This might be a small packet, may
2040 * be just a TCP ACK. Copy into a new mbuf, and
2041 * Leave the old mbuf+cluster for re-use.
2042 */
2043 sendmp->m_data += ETHER_ALIGN;
2044 memcpy(mtod(sendmp, void *),
2045 mtod(mp, void *), len);
2046 IXGBE_EVC_ADD(&rxr->rx_copies, 1);
2047 rbuf->flags |= IXGBE_RX_COPY;
2048 } else {
2049 /* For long packet */
2050
2051 /* Update new (used in future) mbuf */
2052 newmp->m_pkthdr.len = newmp->m_len
2053 = rxr->mbuf_sz;
2054 IXGBE_M_ADJ(sc, rxr, newmp);
2055 rbuf->buf = newmp;
2056 rbuf->fmp = NULL;
2057
2058 /* For sendmp */
2059 sendmp = mp;
2060 }
2061
2062 /* first desc of a non-ps chain */
2063 sendmp->m_pkthdr.len = sendmp->m_len = len;
2064 }
2065 ++processed;
2066
2067 /* Pass the head pointer on */
2068 if (eop == 0) {
2069 nbuf->fmp = sendmp;
2070 sendmp = NULL;
2071 mp->m_next = nbuf->buf;
2072 } else { /* Sending this frame */
2073 m_set_rcvif(sendmp, ifp);
2074 ++rxr->packets;
2075 IXGBE_EVC_ADD(&rxr->rx_packets, 1);
2076 /* capture data for AIM */
2077 rxr->bytes += sendmp->m_pkthdr.len;
2078 IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
2079 /* Process vlan info */
2080 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2081 vtag = le16toh(cur->wb.upper.vlan);
2082 if (vtag) {
2083 vlan_set_tag(sendmp, vtag);
2084 }
2085 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2086 ixgbe_rx_checksum(staterr, sendmp, ptype,
2087 &sc->stats.pf);
2088 }
2089
2090 #if 0 /* FreeBSD */
2091 /*
2092 * In case of multiqueue, we have RXCSUM.PCSD bit set
2093 * and never cleared. This means we have RSS hash
2094 * available to be used.
2095 */
2096 if (sc->num_queues > 1) {
2097 sendmp->m_pkthdr.flowid =
2098 le32toh(cur->wb.lower.hi_dword.rss);
2099 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2100 case IXGBE_RXDADV_RSSTYPE_IPV4:
2101 M_HASHTYPE_SET(sendmp,
2102 M_HASHTYPE_RSS_IPV4);
2103 break;
2104 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2105 M_HASHTYPE_SET(sendmp,
2106 M_HASHTYPE_RSS_TCP_IPV4);
2107 break;
2108 case IXGBE_RXDADV_RSSTYPE_IPV6:
2109 M_HASHTYPE_SET(sendmp,
2110 M_HASHTYPE_RSS_IPV6);
2111 break;
2112 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2113 M_HASHTYPE_SET(sendmp,
2114 M_HASHTYPE_RSS_TCP_IPV6);
2115 break;
2116 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2117 M_HASHTYPE_SET(sendmp,
2118 M_HASHTYPE_RSS_IPV6_EX);
2119 break;
2120 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2121 M_HASHTYPE_SET(sendmp,
2122 M_HASHTYPE_RSS_TCP_IPV6_EX);
2123 break;
2124 #if __FreeBSD_version > 1100000
2125 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2126 M_HASHTYPE_SET(sendmp,
2127 M_HASHTYPE_RSS_UDP_IPV4);
2128 break;
2129 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2130 M_HASHTYPE_SET(sendmp,
2131 M_HASHTYPE_RSS_UDP_IPV6);
2132 break;
2133 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2134 M_HASHTYPE_SET(sendmp,
2135 M_HASHTYPE_RSS_UDP_IPV6_EX);
2136 break;
2137 #endif
2138 default:
2139 M_HASHTYPE_SET(sendmp,
2140 M_HASHTYPE_OPAQUE_HASH);
2141 }
2142 } else {
2143 sendmp->m_pkthdr.flowid = que->msix;
2144 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2145 }
2146 #endif
2147 }
2148 next_desc:
2149 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2150 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2151
2152 /* Advance our pointers to the next descriptor. */
2153 if (++i == rxr->num_desc) {
2154 wraparound = true;
2155 i = 0;
2156 }
2157 rxr->next_to_check = i;
2158
2159 /* Now send to the stack or do LRO */
2160 if (sendmp != NULL)
2161 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2162
2163 /* Every 8 descriptors we go to refresh mbufs */
2164 if (processed == 8) {
2165 ixgbe_refresh_mbufs(rxr, i);
2166 processed = 0;
2167 }
2168 }
2169
2170 /* Save the current status */
2171 rxr->discard_multidesc = discard_multidesc;
2172
2173 /* Refresh any remaining buf structs */
2174 if (ixgbe_rx_unrefreshed(rxr))
2175 ixgbe_refresh_mbufs(rxr, i);
2176
2177 IXGBE_RX_UNLOCK(rxr);
2178
2179 #ifdef LRO
2180 /*
2181 * Flush any outstanding LRO work
2182 */
2183 tcp_lro_flush_all(lro);
2184 #endif /* LRO */
2185
2186 /*
2187 * Still have cleaning to do?
2188 */
2189 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2190 return (TRUE);
2191
2192 return (FALSE);
2193 } /* ixgbe_rxeof */
2194
2195
2196 /************************************************************************
2197 * ixgbe_rx_checksum
2198 *
2199 * Verify that the hardware indicated that the checksum is valid.
2200 * Inform the stack about the status of checksum so that stack
2201 * doesn't spend time verifying the checksum.
2202 ************************************************************************/
2203 static void
2204 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2205 struct ixgbe_hw_stats *stats)
2206 {
2207 u16 status = (u16)staterr;
2208 u8 errors = (u8)(staterr >> 24);
2209 #if 0
2210 bool sctp = false;
2211
2212 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2213 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2214 sctp = true;
2215 #endif
2216
2217 /* IPv4 checksum */
2218 if (status & IXGBE_RXD_STAT_IPCS) {
2219 IXGBE_EVC_ADD(&stats->ipcs, 1);
2220 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2221 /* IP Checksum Good */
2222 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2223 } else {
2224 IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
2225 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2226 }
2227 }
2228 /* TCP/UDP/SCTP checksum */
2229 if (status & IXGBE_RXD_STAT_L4CS) {
2230 IXGBE_EVC_ADD(&stats->l4cs, 1);
2231 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2232 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2233 mp->m_pkthdr.csum_flags |= type;
2234 } else {
2235 IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
2236 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2237 }
2238 }
2239 } /* ixgbe_rx_checksum */
2240
2241 /************************************************************************
2242 * ixgbe_dma_malloc
2243 ************************************************************************/
2244 int
2245 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
2246 struct ixgbe_dma_alloc *dma, const int mapflags)
2247 {
2248 device_t dev = sc->dev;
2249 int r, rsegs;
2250
2251 r = ixgbe_dma_tag_create(
2252 /* parent */ sc->osdep.dmat,
2253 /* alignment */ DBA_ALIGN,
2254 /* bounds */ 0,
2255 /* maxsize */ size,
2256 /* nsegments */ 1,
2257 /* maxsegsize */ size,
2258 /* flags */ BUS_DMA_ALLOCNOW,
2259 &dma->dma_tag);
2260 if (r != 0) {
2261 aprint_error_dev(dev,
2262 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2263 r);
2264 goto fail_0;
2265 }
2266
2267 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2268 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2269 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2270 if (r != 0) {
2271 aprint_error_dev(dev,
2272 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2273 goto fail_1;
2274 }
2275
2276 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2277 size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2278 if (r != 0) {
2279 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2280 __func__, r);
2281 goto fail_2;
2282 }
2283
2284 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2285 if (r != 0) {
2286 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2287 __func__, r);
2288 goto fail_3;
2289 }
2290
2291 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2292 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2293 if (r != 0) {
2294 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2295 __func__, r);
2296 goto fail_4;
2297 }
2298 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2299 dma->dma_size = size;
2300 return 0;
2301 fail_4:
2302 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2303 fail_3:
2304 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2305 fail_2:
2306 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2307 fail_1:
2308 ixgbe_dma_tag_destroy(dma->dma_tag);
2309 fail_0:
2310
2311 return (r);
2312 } /* ixgbe_dma_malloc */
2313
2314 /************************************************************************
2315 * ixgbe_dma_free
2316 ************************************************************************/
2317 void
2318 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
2319 {
2320 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2321 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2322 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2323 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
2324 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2325 ixgbe_dma_tag_destroy(dma->dma_tag);
2326 } /* ixgbe_dma_free */
2327
2328
2329 /************************************************************************
2330 * ixgbe_allocate_queues
2331 *
2332 * Allocate memory for the transmit and receive rings, and then
2333 * the descriptors associated with each, called only once at attach.
2334 ************************************************************************/
2335 int
2336 ixgbe_allocate_queues(struct ixgbe_softc *sc)
2337 {
2338 device_t dev = sc->dev;
2339 struct ix_queue *que;
2340 struct tx_ring *txr;
2341 struct rx_ring *rxr;
2342 int rsize, tsize, error = IXGBE_SUCCESS;
2343 int txconf = 0, rxconf = 0;
2344
2345 /* First, allocate the top level queue structs */
2346 sc->queues = kmem_zalloc(sizeof(struct ix_queue) * sc->num_queues,
2347 KM_SLEEP);
2348
2349 /* Second, allocate the TX ring struct memory */
2350 sc->tx_rings = kmem_zalloc(sizeof(struct tx_ring) * sc->num_queues,
2351 KM_SLEEP);
2352
2353 /* Third, allocate the RX ring */
2354 sc->rx_rings = kmem_zalloc(sizeof(struct rx_ring) * sc->num_queues,
2355 KM_SLEEP);
2356
2357 /* For the ring itself */
2358 tsize = sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc);
2359 KASSERT((tsize % DBA_ALIGN) == 0);
2360
2361 /*
2362 * Now set up the TX queues, txconf is needed to handle the
2363 * possibility that things fail midcourse and we need to
2364 * undo memory gracefully
2365 */
2366 for (int i = 0; i < sc->num_queues; i++, txconf++) {
2367 /* Set up some basics */
2368 txr = &sc->tx_rings[i];
2369 txr->sc = sc;
2370 txr->txr_interq = NULL;
2371 /* In case SR-IOV is enabled, align the index properly */
2372 #ifdef PCI_IOV
2373 txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2374 i);
2375 #else
2376 txr->me = i;
2377 #endif
2378 txr->num_desc = sc->num_tx_desc;
2379
2380 /* Initialize the TX side lock */
2381 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2382
2383 if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
2384 BUS_DMA_NOWAIT)) {
2385 aprint_error_dev(dev,
2386 "Unable to allocate TX Descriptor memory\n");
2387 error = ENOMEM;
2388 goto err_tx_desc;
2389 }
2390 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2391 bzero((void *)txr->tx_base, tsize);
2392
2393 /* Now allocate transmit buffers for the ring */
2394 if (ixgbe_allocate_transmit_buffers(txr)) {
2395 aprint_error_dev(dev,
2396 "Critical Failure setting up transmit buffers\n");
2397 error = ENOMEM;
2398 goto err_tx_desc;
2399 }
2400 if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2401 /* Allocate a buf ring */
2402 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2403 if (txr->txr_interq == NULL) {
2404 aprint_error_dev(dev,
2405 "Critical Failure setting up buf ring\n");
2406 error = ENOMEM;
2407 goto err_tx_desc;
2408 }
2409 }
2410 }
2411
2412 /*
2413 * Next the RX queues...
2414 */
2415 rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
2416 KASSERT((rsize % DBA_ALIGN) == 0);
2417 for (int i = 0; i < sc->num_queues; i++, rxconf++) {
2418 rxr = &sc->rx_rings[i];
2419 /* Set up some basics */
2420 rxr->sc = sc;
2421 #ifdef PCI_IOV
2422 /* In case SR-IOV is enabled, align the index properly */
2423 rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2424 i);
2425 #else
2426 rxr->me = i;
2427 #endif
2428 rxr->num_desc = sc->num_rx_desc;
2429
2430 /* Initialize the RX side lock */
2431 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2432
2433 if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
2434 BUS_DMA_NOWAIT)) {
2435 aprint_error_dev(dev,
2436 "Unable to allocate RxDescriptor memory\n");
2437 error = ENOMEM;
2438 goto err_rx_desc;
2439 }
2440 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2441 bzero((void *)rxr->rx_base, rsize);
2442
2443 /* Allocate receive buffers for the ring */
2444 if (ixgbe_allocate_receive_buffers(rxr)) {
2445 aprint_error_dev(dev,
2446 "Critical Failure setting up receive buffers\n");
2447 error = ENOMEM;
2448 goto err_rx_desc;
2449 }
2450 }
2451
2452 /*
2453 * Finally set up the queue holding structs
2454 */
2455 for (int i = 0; i < sc->num_queues; i++) {
2456 que = &sc->queues[i];
2457 que->sc = sc;
2458 que->me = i;
2459 que->txr = &sc->tx_rings[i];
2460 que->rxr = &sc->rx_rings[i];
2461
2462 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2463 que->disabled_count = 0;
2464 }
2465
2466 return (0);
2467
2468 err_rx_desc:
2469 for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
2470 ixgbe_dma_free(sc, &rxr->rxdma);
2471 err_tx_desc:
2472 for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
2473 ixgbe_dma_free(sc, &txr->txdma);
2474 kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
2475 kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
2476 kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2477 return (error);
2478 } /* ixgbe_allocate_queues */
2479
2480 /************************************************************************
2481 * ixgbe_free_queues
2482 *
2483 * Free descriptors for the transmit and receive rings, and then
2484 * the memory associated with each.
2485 ************************************************************************/
2486 void
2487 ixgbe_free_queues(struct ixgbe_softc *sc)
2488 {
2489 struct ix_queue *que;
2490 int i;
2491
2492 ixgbe_free_transmit_structures(sc);
2493 ixgbe_free_receive_structures(sc);
2494 for (i = 0; i < sc->num_queues; i++) {
2495 que = &sc->queues[i];
2496 mutex_destroy(&que->dc_mtx);
2497 }
2498 kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2499 } /* ixgbe_free_queues */
2500