ix_txrx.c revision 1.44 1 /* $NetBSD: ix_txrx.c,v 1.44 2018/05/16 08:08:24 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 struct mbuf *m_head;
134 struct adapter *adapter = txr->adapter;
135 int enqueued = 0;
136 int rc;
137
138 IXGBE_TX_LOCK_ASSERT(txr);
139
140 if (!adapter->link_active) {
141 /*
142 * discard all packets buffered in IFQ to avoid
143 * sending old packets at next link up timing.
144 */
145 ixgbe_drain(ifp, txr);
146 return (ENETDOWN);
147 }
148 if ((ifp->if_flags & IFF_RUNNING) == 0)
149 return (ENETDOWN);
150
151 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153 break;
154
155 IFQ_POLL(&ifp->if_snd, m_head);
156 if (m_head == NULL)
157 break;
158
159 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
160 break;
161 }
162 enqueued++;
163 IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 if (rc != 0) {
165 m_freem(m_head);
166 continue;
167 }
168
169 /* Send a copy of the frame to the BPF listener */
170 bpf_mtap(ifp, m_head);
171 }
172
173 if (enqueued) {
174 txr->lastsent = time_uptime;
175 txr->sending = true;
176 }
177
178 return IXGBE_SUCCESS;
179 } /* ixgbe_legacy_start_locked */
180
181 /************************************************************************
182 * ixgbe_legacy_start
183 *
184 * Called by the stack, this always uses the first tx ring,
185 * and should not be used with multiqueue tx enabled.
186 ************************************************************************/
187 void
188 ixgbe_legacy_start(struct ifnet *ifp)
189 {
190 struct adapter *adapter = ifp->if_softc;
191 struct tx_ring *txr = adapter->tx_rings;
192
193 if (ifp->if_flags & IFF_RUNNING) {
194 IXGBE_TX_LOCK(txr);
195 ixgbe_legacy_start_locked(ifp, txr);
196 IXGBE_TX_UNLOCK(txr);
197 }
198 } /* ixgbe_legacy_start */
199
200 /************************************************************************
201 * ixgbe_mq_start - Multiqueue Transmit Entry Point
202 *
203 * (if_transmit function)
204 ************************************************************************/
205 int
206 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
207 {
208 struct adapter *adapter = ifp->if_softc;
209 struct tx_ring *txr;
210 int i, err = 0;
211 #ifdef RSS
212 uint32_t bucket_id;
213 #endif
214
215 /*
216 * When doing RSS, map it to the same outbound queue
217 * as the incoming flow would be mapped to.
218 *
219 * If everything is setup correctly, it should be the
220 * same bucket that the current CPU we're on is.
221 */
222 #ifdef RSS
223 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
224 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
225 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
226 &bucket_id) == 0)) {
227 i = bucket_id % adapter->num_queues;
228 #ifdef IXGBE_DEBUG
229 if (bucket_id > adapter->num_queues)
230 if_printf(ifp,
231 "bucket_id (%d) > num_queues (%d)\n",
232 bucket_id, adapter->num_queues);
233 #endif
234 } else
235 i = m->m_pkthdr.flowid % adapter->num_queues;
236 } else
237 #endif /* 0 */
238 i = cpu_index(curcpu()) % adapter->num_queues;
239
240 /* Check for a hung queue and pick alternative */
241 if (((1 << i) & adapter->active_queues) == 0)
242 i = ffs64(adapter->active_queues);
243
244 txr = &adapter->tx_rings[i];
245
246 err = pcq_put(txr->txr_interq, m);
247 if (err == false) {
248 m_freem(m);
249 txr->pcq_drops.ev_count++;
250 return (err);
251 }
252 if (IXGBE_TX_TRYLOCK(txr)) {
253 ixgbe_mq_start_locked(ifp, txr);
254 IXGBE_TX_UNLOCK(txr);
255 } else {
256 if (adapter->txrx_use_workqueue) {
257 u_int *enqueued;
258
259 /*
260 * This function itself is not called in interrupt
261 * context, however it can be called in fast softint
262 * context right after receiving forwarding packets.
263 * So, it is required to protect workqueue from twice
264 * enqueuing when the machine uses both spontaneous
265 * packets and forwarding packets.
266 */
267 enqueued = percpu_getref(adapter->txr_wq_enqueued);
268 if (*enqueued == 0) {
269 *enqueued = 1;
270 percpu_putref(adapter->txr_wq_enqueued);
271 workqueue_enqueue(adapter->txr_wq,
272 &txr->wq_cookie, curcpu());
273 } else
274 percpu_putref(adapter->txr_wq_enqueued);
275 } else
276 softint_schedule(txr->txr_si);
277 }
278
279 return (0);
280 } /* ixgbe_mq_start */
281
282 /************************************************************************
283 * ixgbe_mq_start_locked
284 ************************************************************************/
285 int
286 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
287 {
288 struct mbuf *next;
289 int enqueued = 0, err = 0;
290
291 if (!txr->adapter->link_active) {
292 /*
293 * discard all packets buffered in txr_interq to avoid
294 * sending old packets at next link up timing.
295 */
296 ixgbe_drain(ifp, txr);
297 return (ENETDOWN);
298 }
299 if ((ifp->if_flags & IFF_RUNNING) == 0)
300 return (ENETDOWN);
301
302 /* Process the queue */
303 while ((next = pcq_get(txr->txr_interq)) != NULL) {
304 if ((err = ixgbe_xmit(txr, next)) != 0) {
305 m_freem(next);
306 /* All errors are counted in ixgbe_xmit() */
307 break;
308 }
309 enqueued++;
310 #if __FreeBSD_version >= 1100036
311 /*
312 * Since we're looking at the tx ring, we can check
313 * to see if we're a VF by examing our tail register
314 * address.
315 */
316 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
317 (next->m_flags & M_MCAST))
318 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
319 #endif
320 /* Send a copy of the frame to the BPF listener */
321 bpf_mtap(ifp, next);
322 if ((ifp->if_flags & IFF_RUNNING) == 0)
323 break;
324 }
325
326 if (enqueued) {
327 txr->lastsent = time_uptime;
328 txr->sending = true;
329 }
330
331 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
332 ixgbe_txeof(txr);
333
334 return (err);
335 } /* ixgbe_mq_start_locked */
336
337 /************************************************************************
338 * ixgbe_deferred_mq_start
339 *
340 * Called from a softint and workqueue (indirectly) to drain queued
341 * transmit packets.
342 ************************************************************************/
343 void
344 ixgbe_deferred_mq_start(void *arg)
345 {
346 struct tx_ring *txr = arg;
347 struct adapter *adapter = txr->adapter;
348 struct ifnet *ifp = adapter->ifp;
349
350 IXGBE_TX_LOCK(txr);
351 if (pcq_peek(txr->txr_interq) != NULL)
352 ixgbe_mq_start_locked(ifp, txr);
353 IXGBE_TX_UNLOCK(txr);
354 } /* ixgbe_deferred_mq_start */
355
356 /************************************************************************
357 * ixgbe_deferred_mq_start_work
358 *
359 * Called from a workqueue to drain queued transmit packets.
360 ************************************************************************/
361 void
362 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
363 {
364 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
365 struct adapter *adapter = txr->adapter;
366 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
367 *enqueued = 0;
368 percpu_putref(adapter->txr_wq_enqueued);
369
370 ixgbe_deferred_mq_start(txr);
371 } /* ixgbe_deferred_mq_start */
372
373 /************************************************************************
374 * ixgbe_drain_all
375 ************************************************************************/
376 void
377 ixgbe_drain_all(struct adapter *adapter)
378 {
379 struct ifnet *ifp = adapter->ifp;
380 struct ix_queue *que = adapter->queues;
381
382 for (int i = 0; i < adapter->num_queues; i++, que++) {
383 struct tx_ring *txr = que->txr;
384
385 IXGBE_TX_LOCK(txr);
386 ixgbe_drain(ifp, txr);
387 IXGBE_TX_UNLOCK(txr);
388 }
389 }
390
391 /************************************************************************
392 * ixgbe_xmit
393 *
394 * Maps the mbufs to tx descriptors, allowing the
395 * TX engine to transmit the packets.
396 *
397 * Return 0 on success, positive on failure
398 ************************************************************************/
399 static int
400 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
401 {
402 struct adapter *adapter = txr->adapter;
403 struct ixgbe_tx_buf *txbuf;
404 union ixgbe_adv_tx_desc *txd = NULL;
405 struct ifnet *ifp = adapter->ifp;
406 int i, j, error;
407 int first;
408 u32 olinfo_status = 0, cmd_type_len;
409 bool remap = TRUE;
410 bus_dmamap_t map;
411
412 /* Basic descriptor defines */
413 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
414 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
415
416 if (vlan_has_tag(m_head))
417 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
418
419 /*
420 * Important to capture the first descriptor
421 * used because it will contain the index of
422 * the one we tell the hardware to report back
423 */
424 first = txr->next_avail_desc;
425 txbuf = &txr->tx_buffers[first];
426 map = txbuf->map;
427
428 /*
429 * Map the packet for DMA.
430 */
431 retry:
432 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
433 BUS_DMA_NOWAIT);
434
435 if (__predict_false(error)) {
436 struct mbuf *m;
437
438 switch (error) {
439 case EAGAIN:
440 txr->q_eagain_tx_dma_setup++;
441 return EAGAIN;
442 case ENOMEM:
443 txr->q_enomem_tx_dma_setup++;
444 return EAGAIN;
445 case EFBIG:
446 /* Try it again? - one try */
447 if (remap == TRUE) {
448 remap = FALSE;
449 /*
450 * XXX: m_defrag will choke on
451 * non-MCLBYTES-sized clusters
452 */
453 txr->q_efbig_tx_dma_setup++;
454 m = m_defrag(m_head, M_NOWAIT);
455 if (m == NULL) {
456 txr->q_mbuf_defrag_failed++;
457 return ENOBUFS;
458 }
459 m_head = m;
460 goto retry;
461 } else {
462 txr->q_efbig2_tx_dma_setup++;
463 return error;
464 }
465 case EINVAL:
466 txr->q_einval_tx_dma_setup++;
467 return error;
468 default:
469 txr->q_other_tx_dma_setup++;
470 return error;
471 }
472 }
473
474 /* Make certain there are enough descriptors */
475 if (txr->tx_avail < (map->dm_nsegs + 2)) {
476 txr->no_desc_avail.ev_count++;
477 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
478 return EAGAIN;
479 }
480
481 /*
482 * Set up the appropriate offload context
483 * this will consume the first descriptor
484 */
485 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
486 if (__predict_false(error)) {
487 return (error);
488 }
489
490 /* Do the flow director magic */
491 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
492 (txr->atr_sample) && (!adapter->fdir_reinit)) {
493 ++txr->atr_count;
494 if (txr->atr_count >= atr_sample_rate) {
495 ixgbe_atr(txr, m_head);
496 txr->atr_count = 0;
497 }
498 }
499
500 olinfo_status |= IXGBE_ADVTXD_CC;
501 i = txr->next_avail_desc;
502 for (j = 0; j < map->dm_nsegs; j++) {
503 bus_size_t seglen;
504 bus_addr_t segaddr;
505
506 txbuf = &txr->tx_buffers[i];
507 txd = &txr->tx_base[i];
508 seglen = map->dm_segs[j].ds_len;
509 segaddr = htole64(map->dm_segs[j].ds_addr);
510
511 txd->read.buffer_addr = segaddr;
512 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
513 txd->read.olinfo_status = htole32(olinfo_status);
514
515 if (++i == txr->num_desc)
516 i = 0;
517 }
518
519 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
520 txr->tx_avail -= map->dm_nsegs;
521 txr->next_avail_desc = i;
522
523 txbuf->m_head = m_head;
524 /*
525 * Here we swap the map so the last descriptor,
526 * which gets the completion interrupt has the
527 * real map, and the first descriptor gets the
528 * unused map from this descriptor.
529 */
530 txr->tx_buffers[first].map = txbuf->map;
531 txbuf->map = map;
532 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
533 BUS_DMASYNC_PREWRITE);
534
535 /* Set the EOP descriptor that will be marked done */
536 txbuf = &txr->tx_buffers[first];
537 txbuf->eop = txd;
538
539 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
540 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
541 /*
542 * Advance the Transmit Descriptor Tail (Tdt), this tells the
543 * hardware that this frame is available to transmit.
544 */
545 ++txr->total_packets.ev_count;
546 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
547
548 /*
549 * XXXX NOMPSAFE: ifp->if_data should be percpu.
550 */
551 ifp->if_obytes += m_head->m_pkthdr.len;
552 if (m_head->m_flags & M_MCAST)
553 ifp->if_omcasts++;
554
555 return (0);
556 } /* ixgbe_xmit */
557
558 /************************************************************************
559 * ixgbe_drain
560 ************************************************************************/
561 static void
562 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
563 {
564 struct mbuf *m;
565
566 IXGBE_TX_LOCK_ASSERT(txr);
567
568 if (txr->me == 0) {
569 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
570 IFQ_DEQUEUE(&ifp->if_snd, m);
571 m_freem(m);
572 IF_DROP(&ifp->if_snd);
573 }
574 }
575
576 while ((m = pcq_get(txr->txr_interq)) != NULL) {
577 m_freem(m);
578 txr->pcq_drops.ev_count++;
579 }
580 }
581
582 /************************************************************************
583 * ixgbe_allocate_transmit_buffers
584 *
585 * Allocate memory for tx_buffer structures. The tx_buffer stores all
586 * the information needed to transmit a packet on the wire. This is
587 * called only once at attach, setup is done every reset.
588 ************************************************************************/
589 static int
590 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
591 {
592 struct adapter *adapter = txr->adapter;
593 device_t dev = adapter->dev;
594 struct ixgbe_tx_buf *txbuf;
595 int error, i;
596
597 /*
598 * Setup DMA descriptor areas.
599 */
600 error = ixgbe_dma_tag_create(
601 /* parent */ adapter->osdep.dmat,
602 /* alignment */ 1,
603 /* bounds */ 0,
604 /* maxsize */ IXGBE_TSO_SIZE,
605 /* nsegments */ adapter->num_segs,
606 /* maxsegsize */ PAGE_SIZE,
607 /* flags */ 0,
608 &txr->txtag);
609 if (error != 0) {
610 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
611 goto fail;
612 }
613
614 txr->tx_buffers =
615 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
616 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
617 if (txr->tx_buffers == NULL) {
618 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
619 error = ENOMEM;
620 goto fail;
621 }
622
623 /* Create the descriptor buffer dma maps */
624 txbuf = txr->tx_buffers;
625 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
626 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
627 if (error != 0) {
628 aprint_error_dev(dev,
629 "Unable to create TX DMA map (%d)\n", error);
630 goto fail;
631 }
632 }
633
634 return 0;
635 fail:
636 /* We free all, it handles case where we are in the middle */
637 #if 0 /* XXX was FreeBSD */
638 ixgbe_free_transmit_structures(adapter);
639 #else
640 ixgbe_free_transmit_buffers(txr);
641 #endif
642 return (error);
643 } /* ixgbe_allocate_transmit_buffers */
644
645 /************************************************************************
646 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
647 ************************************************************************/
648 static void
649 ixgbe_setup_transmit_ring(struct tx_ring *txr)
650 {
651 struct adapter *adapter = txr->adapter;
652 struct ixgbe_tx_buf *txbuf;
653 #ifdef DEV_NETMAP
654 struct netmap_adapter *na = NA(adapter->ifp);
655 struct netmap_slot *slot;
656 #endif /* DEV_NETMAP */
657
658 /* Clear the old ring contents */
659 IXGBE_TX_LOCK(txr);
660
661 #ifdef DEV_NETMAP
662 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
663 /*
664 * (under lock): if in netmap mode, do some consistency
665 * checks and set slot to entry 0 of the netmap ring.
666 */
667 slot = netmap_reset(na, NR_TX, txr->me, 0);
668 }
669 #endif /* DEV_NETMAP */
670
671 bzero((void *)txr->tx_base,
672 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
673 /* Reset indices */
674 txr->next_avail_desc = 0;
675 txr->next_to_clean = 0;
676
677 /* Free any existing tx buffers. */
678 txbuf = txr->tx_buffers;
679 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
680 txr->sending = false;
681 if (txbuf->m_head != NULL) {
682 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
683 0, txbuf->m_head->m_pkthdr.len,
684 BUS_DMASYNC_POSTWRITE);
685 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
686 m_freem(txbuf->m_head);
687 txbuf->m_head = NULL;
688 }
689
690 #ifdef DEV_NETMAP
691 /*
692 * In netmap mode, set the map for the packet buffer.
693 * NOTE: Some drivers (not this one) also need to set
694 * the physical buffer address in the NIC ring.
695 * Slots in the netmap ring (indexed by "si") are
696 * kring->nkr_hwofs positions "ahead" wrt the
697 * corresponding slot in the NIC ring. In some drivers
698 * (not here) nkr_hwofs can be negative. Function
699 * netmap_idx_n2k() handles wraparounds properly.
700 */
701 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
702 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
703 netmap_load_map(na, txr->txtag,
704 txbuf->map, NMB(na, slot + si));
705 }
706 #endif /* DEV_NETMAP */
707
708 /* Clear the EOP descriptor pointer */
709 txbuf->eop = NULL;
710 }
711
712 /* Set the rate at which we sample packets */
713 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
714 txr->atr_sample = atr_sample_rate;
715
716 /* Set number of descriptors available */
717 txr->tx_avail = adapter->num_tx_desc;
718
719 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
720 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
721 IXGBE_TX_UNLOCK(txr);
722 } /* ixgbe_setup_transmit_ring */
723
724 /************************************************************************
725 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
726 ************************************************************************/
727 int
728 ixgbe_setup_transmit_structures(struct adapter *adapter)
729 {
730 struct tx_ring *txr = adapter->tx_rings;
731
732 for (int i = 0; i < adapter->num_queues; i++, txr++)
733 ixgbe_setup_transmit_ring(txr);
734
735 return (0);
736 } /* ixgbe_setup_transmit_structures */
737
738 /************************************************************************
739 * ixgbe_free_transmit_structures - Free all transmit rings.
740 ************************************************************************/
741 void
742 ixgbe_free_transmit_structures(struct adapter *adapter)
743 {
744 struct tx_ring *txr = adapter->tx_rings;
745
746 for (int i = 0; i < adapter->num_queues; i++, txr++) {
747 ixgbe_free_transmit_buffers(txr);
748 ixgbe_dma_free(adapter, &txr->txdma);
749 IXGBE_TX_LOCK_DESTROY(txr);
750 }
751 free(adapter->tx_rings, M_DEVBUF);
752 } /* ixgbe_free_transmit_structures */
753
754 /************************************************************************
755 * ixgbe_free_transmit_buffers
756 *
757 * Free transmit ring related data structures.
758 ************************************************************************/
759 static void
760 ixgbe_free_transmit_buffers(struct tx_ring *txr)
761 {
762 struct adapter *adapter = txr->adapter;
763 struct ixgbe_tx_buf *tx_buffer;
764 int i;
765
766 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
767
768 if (txr->tx_buffers == NULL)
769 return;
770
771 tx_buffer = txr->tx_buffers;
772 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
773 if (tx_buffer->m_head != NULL) {
774 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
775 0, tx_buffer->m_head->m_pkthdr.len,
776 BUS_DMASYNC_POSTWRITE);
777 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
778 m_freem(tx_buffer->m_head);
779 tx_buffer->m_head = NULL;
780 if (tx_buffer->map != NULL) {
781 ixgbe_dmamap_destroy(txr->txtag,
782 tx_buffer->map);
783 tx_buffer->map = NULL;
784 }
785 } else if (tx_buffer->map != NULL) {
786 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
787 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
788 tx_buffer->map = NULL;
789 }
790 }
791 if (txr->txr_interq != NULL) {
792 struct mbuf *m;
793
794 while ((m = pcq_get(txr->txr_interq)) != NULL)
795 m_freem(m);
796 pcq_destroy(txr->txr_interq);
797 }
798 if (txr->tx_buffers != NULL) {
799 free(txr->tx_buffers, M_DEVBUF);
800 txr->tx_buffers = NULL;
801 }
802 if (txr->txtag != NULL) {
803 ixgbe_dma_tag_destroy(txr->txtag);
804 txr->txtag = NULL;
805 }
806 } /* ixgbe_free_transmit_buffers */
807
808 /************************************************************************
809 * ixgbe_tx_ctx_setup
810 *
811 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
812 ************************************************************************/
813 static int
814 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
815 u32 *cmd_type_len, u32 *olinfo_status)
816 {
817 struct adapter *adapter = txr->adapter;
818 struct ixgbe_adv_tx_context_desc *TXD;
819 struct ether_vlan_header *eh;
820 #ifdef INET
821 struct ip *ip;
822 #endif
823 #ifdef INET6
824 struct ip6_hdr *ip6;
825 #endif
826 int ehdrlen, ip_hlen = 0;
827 int offload = TRUE;
828 int ctxd = txr->next_avail_desc;
829 u32 vlan_macip_lens = 0;
830 u32 type_tucmd_mlhl = 0;
831 u16 vtag = 0;
832 u16 etype;
833 u8 ipproto = 0;
834 char *l3d;
835
836
837 /* First check if TSO is to be used */
838 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
839 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
840
841 if (rv != 0)
842 ++adapter->tso_err.ev_count;
843 return rv;
844 }
845
846 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
847 offload = FALSE;
848
849 /* Indicate the whole packet as payload when not doing TSO */
850 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
851
852 /* Now ready a context descriptor */
853 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
854
855 /*
856 * In advanced descriptors the vlan tag must
857 * be placed into the context descriptor. Hence
858 * we need to make one even if not doing offloads.
859 */
860 if (vlan_has_tag(mp)) {
861 vtag = htole16(vlan_get_tag(mp));
862 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
863 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
864 (offload == FALSE))
865 return (0);
866
867 /*
868 * Determine where frame payload starts.
869 * Jump over vlan headers if already present,
870 * helpful for QinQ too.
871 */
872 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
873 eh = mtod(mp, struct ether_vlan_header *);
874 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
875 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
876 etype = ntohs(eh->evl_proto);
877 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
878 } else {
879 etype = ntohs(eh->evl_encap_proto);
880 ehdrlen = ETHER_HDR_LEN;
881 }
882
883 /* Set the ether header length */
884 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
885
886 if (offload == FALSE)
887 goto no_offloads;
888
889 /*
890 * If the first mbuf only includes the ethernet header,
891 * jump to the next one
892 * XXX: This assumes the stack splits mbufs containing headers
893 * on header boundaries
894 * XXX: And assumes the entire IP header is contained in one mbuf
895 */
896 if (mp->m_len == ehdrlen && mp->m_next)
897 l3d = mtod(mp->m_next, char *);
898 else
899 l3d = mtod(mp, char *) + ehdrlen;
900
901 switch (etype) {
902 #ifdef INET
903 case ETHERTYPE_IP:
904 ip = (struct ip *)(l3d);
905 ip_hlen = ip->ip_hl << 2;
906 ipproto = ip->ip_p;
907 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
908 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
909 ip->ip_sum == 0);
910 break;
911 #endif
912 #ifdef INET6
913 case ETHERTYPE_IPV6:
914 ip6 = (struct ip6_hdr *)(l3d);
915 ip_hlen = sizeof(struct ip6_hdr);
916 ipproto = ip6->ip6_nxt;
917 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
918 break;
919 #endif
920 default:
921 offload = false;
922 break;
923 }
924
925 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
926 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
927
928 vlan_macip_lens |= ip_hlen;
929
930 /* No support for offloads for non-L4 next headers */
931 switch (ipproto) {
932 case IPPROTO_TCP:
933 if (mp->m_pkthdr.csum_flags &
934 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
935 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
936 else
937 offload = false;
938 break;
939 case IPPROTO_UDP:
940 if (mp->m_pkthdr.csum_flags &
941 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
942 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
943 else
944 offload = false;
945 break;
946 default:
947 offload = false;
948 break;
949 }
950
951 if (offload) /* Insert L4 checksum into data descriptors */
952 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
953
954 no_offloads:
955 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
956
957 /* Now copy bits into descriptor */
958 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
959 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
960 TXD->seqnum_seed = htole32(0);
961 TXD->mss_l4len_idx = htole32(0);
962
963 /* We've consumed the first desc, adjust counters */
964 if (++ctxd == txr->num_desc)
965 ctxd = 0;
966 txr->next_avail_desc = ctxd;
967 --txr->tx_avail;
968
969 return (0);
970 } /* ixgbe_tx_ctx_setup */
971
972 /************************************************************************
973 * ixgbe_tso_setup
974 *
975 * Setup work for hardware segmentation offload (TSO) on
976 * adapters using advanced tx descriptors
977 ************************************************************************/
978 static int
979 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
980 u32 *olinfo_status)
981 {
982 struct ixgbe_adv_tx_context_desc *TXD;
983 struct ether_vlan_header *eh;
984 #ifdef INET6
985 struct ip6_hdr *ip6;
986 #endif
987 #ifdef INET
988 struct ip *ip;
989 #endif
990 struct tcphdr *th;
991 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
992 u32 vlan_macip_lens = 0;
993 u32 type_tucmd_mlhl = 0;
994 u32 mss_l4len_idx = 0, paylen;
995 u16 vtag = 0, eh_type;
996
997 /*
998 * Determine where frame payload starts.
999 * Jump over vlan headers if already present
1000 */
1001 eh = mtod(mp, struct ether_vlan_header *);
1002 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1003 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1004 eh_type = eh->evl_proto;
1005 } else {
1006 ehdrlen = ETHER_HDR_LEN;
1007 eh_type = eh->evl_encap_proto;
1008 }
1009
1010 switch (ntohs(eh_type)) {
1011 #ifdef INET
1012 case ETHERTYPE_IP:
1013 ip = (struct ip *)(mp->m_data + ehdrlen);
1014 if (ip->ip_p != IPPROTO_TCP)
1015 return (ENXIO);
1016 ip->ip_sum = 0;
1017 ip_hlen = ip->ip_hl << 2;
1018 th = (struct tcphdr *)((char *)ip + ip_hlen);
1019 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1020 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1021 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1022 /* Tell transmit desc to also do IPv4 checksum. */
1023 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1024 break;
1025 #endif
1026 #ifdef INET6
1027 case ETHERTYPE_IPV6:
1028 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1029 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1030 if (ip6->ip6_nxt != IPPROTO_TCP)
1031 return (ENXIO);
1032 ip_hlen = sizeof(struct ip6_hdr);
1033 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1034 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1035 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1036 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1037 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1038 break;
1039 #endif
1040 default:
1041 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1042 __func__, ntohs(eh_type));
1043 break;
1044 }
1045
1046 ctxd = txr->next_avail_desc;
1047 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1048
1049 tcp_hlen = th->th_off << 2;
1050
1051 /* This is used in the transmit desc in encap */
1052 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1053
1054 /* VLAN MACLEN IPLEN */
1055 if (vlan_has_tag(mp)) {
1056 vtag = htole16(vlan_get_tag(mp));
1057 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1058 }
1059
1060 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1061 vlan_macip_lens |= ip_hlen;
1062 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1063
1064 /* ADV DTYPE TUCMD */
1065 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1066 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1067 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1068
1069 /* MSS L4LEN IDX */
1070 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1071 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1072 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1073
1074 TXD->seqnum_seed = htole32(0);
1075
1076 if (++ctxd == txr->num_desc)
1077 ctxd = 0;
1078
1079 txr->tx_avail--;
1080 txr->next_avail_desc = ctxd;
1081 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1082 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1083 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1084 ++txr->tso_tx.ev_count;
1085
1086 return (0);
1087 } /* ixgbe_tso_setup */
1088
1089
1090 /************************************************************************
1091 * ixgbe_txeof
1092 *
1093 * Examine each tx_buffer in the used queue. If the hardware is done
1094 * processing the packet then free associated resources. The
1095 * tx_buffer is put back on the free queue.
1096 ************************************************************************/
1097 bool
1098 ixgbe_txeof(struct tx_ring *txr)
1099 {
1100 struct adapter *adapter = txr->adapter;
1101 struct ifnet *ifp = adapter->ifp;
1102 struct ixgbe_tx_buf *buf;
1103 union ixgbe_adv_tx_desc *txd;
1104 u32 work, processed = 0;
1105 u32 limit = adapter->tx_process_limit;
1106
1107 KASSERT(mutex_owned(&txr->tx_mtx));
1108
1109 #ifdef DEV_NETMAP
1110 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1111 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1112 struct netmap_adapter *na = NA(adapter->ifp);
1113 struct netmap_kring *kring = &na->tx_rings[txr->me];
1114 txd = txr->tx_base;
1115 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1116 BUS_DMASYNC_POSTREAD);
1117 /*
1118 * In netmap mode, all the work is done in the context
1119 * of the client thread. Interrupt handlers only wake up
1120 * clients, which may be sleeping on individual rings
1121 * or on a global resource for all rings.
1122 * To implement tx interrupt mitigation, we wake up the client
1123 * thread roughly every half ring, even if the NIC interrupts
1124 * more frequently. This is implemented as follows:
1125 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1126 * the slot that should wake up the thread (nkr_num_slots
1127 * means the user thread should not be woken up);
1128 * - the driver ignores tx interrupts unless netmap_mitigate=0
1129 * or the slot has the DD bit set.
1130 */
1131 if (!netmap_mitigate ||
1132 (kring->nr_kflags < kring->nkr_num_slots &&
1133 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1134 netmap_tx_irq(ifp, txr->me);
1135 }
1136 return false;
1137 }
1138 #endif /* DEV_NETMAP */
1139
1140 if (txr->tx_avail == txr->num_desc) {
1141 txr->sending = false;
1142 return false;
1143 }
1144
1145 /* Get work starting point */
1146 work = txr->next_to_clean;
1147 buf = &txr->tx_buffers[work];
1148 txd = &txr->tx_base[work];
1149 work -= txr->num_desc; /* The distance to ring end */
1150 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1151 BUS_DMASYNC_POSTREAD);
1152
1153 do {
1154 union ixgbe_adv_tx_desc *eop = buf->eop;
1155 if (eop == NULL) /* No work */
1156 break;
1157
1158 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1159 break; /* I/O not complete */
1160
1161 if (buf->m_head) {
1162 txr->bytes += buf->m_head->m_pkthdr.len;
1163 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1164 0, buf->m_head->m_pkthdr.len,
1165 BUS_DMASYNC_POSTWRITE);
1166 ixgbe_dmamap_unload(txr->txtag, buf->map);
1167 m_freem(buf->m_head);
1168 buf->m_head = NULL;
1169 }
1170 buf->eop = NULL;
1171 ++txr->tx_avail;
1172
1173 /* We clean the range if multi segment */
1174 while (txd != eop) {
1175 ++txd;
1176 ++buf;
1177 ++work;
1178 /* wrap the ring? */
1179 if (__predict_false(!work)) {
1180 work -= txr->num_desc;
1181 buf = txr->tx_buffers;
1182 txd = txr->tx_base;
1183 }
1184 if (buf->m_head) {
1185 txr->bytes +=
1186 buf->m_head->m_pkthdr.len;
1187 bus_dmamap_sync(txr->txtag->dt_dmat,
1188 buf->map,
1189 0, buf->m_head->m_pkthdr.len,
1190 BUS_DMASYNC_POSTWRITE);
1191 ixgbe_dmamap_unload(txr->txtag,
1192 buf->map);
1193 m_freem(buf->m_head);
1194 buf->m_head = NULL;
1195 }
1196 ++txr->tx_avail;
1197 buf->eop = NULL;
1198
1199 }
1200 ++txr->packets;
1201 ++processed;
1202 ++ifp->if_opackets;
1203
1204 /* Try the next packet */
1205 ++txd;
1206 ++buf;
1207 ++work;
1208 /* reset with a wrap */
1209 if (__predict_false(!work)) {
1210 work -= txr->num_desc;
1211 buf = txr->tx_buffers;
1212 txd = txr->tx_base;
1213 }
1214 prefetch(txd);
1215 } while (__predict_true(--limit));
1216
1217 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1218 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1219
1220 work += txr->num_desc;
1221 txr->next_to_clean = work;
1222
1223 if (txr->tx_avail == txr->num_desc)
1224 txr->sending = false;
1225
1226 return ((limit > 0) ? false : true);
1227 } /* ixgbe_txeof */
1228
1229 /************************************************************************
1230 * ixgbe_rsc_count
1231 *
1232 * Used to detect a descriptor that has been merged by Hardware RSC.
1233 ************************************************************************/
1234 static inline u32
1235 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1236 {
1237 return (le32toh(rx->wb.lower.lo_dword.data) &
1238 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1239 } /* ixgbe_rsc_count */
1240
1241 /************************************************************************
1242 * ixgbe_setup_hw_rsc
1243 *
1244 * Initialize Hardware RSC (LRO) feature on 82599
1245 * for an RX ring, this is toggled by the LRO capability
1246 * even though it is transparent to the stack.
1247 *
1248 * NOTE: Since this HW feature only works with IPv4 and
1249 * testing has shown soft LRO to be as effective,
1250 * this feature will be disabled by default.
1251 ************************************************************************/
1252 static void
1253 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1254 {
1255 struct adapter *adapter = rxr->adapter;
1256 struct ixgbe_hw *hw = &adapter->hw;
1257 u32 rscctrl, rdrxctl;
1258
1259 /* If turning LRO/RSC off we need to disable it */
1260 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1261 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1262 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1263 return;
1264 }
1265
1266 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1267 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1268 #ifdef DEV_NETMAP
1269 /* Always strip CRC unless Netmap disabled it */
1270 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1271 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1272 ix_crcstrip)
1273 #endif /* DEV_NETMAP */
1274 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1275 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1276 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1277
1278 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1279 rscctrl |= IXGBE_RSCCTL_RSCEN;
1280 /*
1281 * Limit the total number of descriptors that
1282 * can be combined, so it does not exceed 64K
1283 */
1284 if (rxr->mbuf_sz == MCLBYTES)
1285 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1286 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1287 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1288 else if (rxr->mbuf_sz == MJUM9BYTES)
1289 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1290 else /* Using 16K cluster */
1291 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1292
1293 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1294
1295 /* Enable TCP header recognition */
1296 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1297 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1298
1299 /* Disable RSC for ACK packets */
1300 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1301 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1302
1303 rxr->hw_rsc = TRUE;
1304 } /* ixgbe_setup_hw_rsc */
1305
1306 /************************************************************************
1307 * ixgbe_refresh_mbufs
1308 *
1309 * Refresh mbuf buffers for RX descriptor rings
1310 * - now keeps its own state so discards due to resource
1311 * exhaustion are unnecessary, if an mbuf cannot be obtained
1312 * it just returns, keeping its placeholder, thus it can simply
1313 * be recalled to try again.
1314 ************************************************************************/
1315 static void
1316 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1317 {
1318 struct adapter *adapter = rxr->adapter;
1319 struct ixgbe_rx_buf *rxbuf;
1320 struct mbuf *mp;
1321 int i, j, error;
1322 bool refreshed = false;
1323
1324 i = j = rxr->next_to_refresh;
1325 /* Control the loop with one beyond */
1326 if (++j == rxr->num_desc)
1327 j = 0;
1328
1329 while (j != limit) {
1330 rxbuf = &rxr->rx_buffers[i];
1331 if (rxbuf->buf == NULL) {
1332 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1333 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1334 if (mp == NULL) {
1335 rxr->no_jmbuf.ev_count++;
1336 goto update;
1337 }
1338 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1339 m_adj(mp, ETHER_ALIGN);
1340 } else
1341 mp = rxbuf->buf;
1342
1343 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1344
1345 /* If we're dealing with an mbuf that was copied rather
1346 * than replaced, there's no need to go through busdma.
1347 */
1348 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1349 /* Get the memory mapping */
1350 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1351 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1352 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1353 if (error != 0) {
1354 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1355 m_free(mp);
1356 rxbuf->buf = NULL;
1357 goto update;
1358 }
1359 rxbuf->buf = mp;
1360 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1361 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1362 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1363 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1364 } else {
1365 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1366 rxbuf->flags &= ~IXGBE_RX_COPY;
1367 }
1368
1369 refreshed = true;
1370 /* Next is precalculated */
1371 i = j;
1372 rxr->next_to_refresh = i;
1373 if (++j == rxr->num_desc)
1374 j = 0;
1375 }
1376
1377 update:
1378 if (refreshed) /* Update hardware tail index */
1379 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1380
1381 return;
1382 } /* ixgbe_refresh_mbufs */
1383
1384 /************************************************************************
1385 * ixgbe_allocate_receive_buffers
1386 *
1387 * Allocate memory for rx_buffer structures. Since we use one
1388 * rx_buffer per received packet, the maximum number of rx_buffer's
1389 * that we'll need is equal to the number of receive descriptors
1390 * that we've allocated.
1391 ************************************************************************/
1392 static int
1393 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1394 {
1395 struct adapter *adapter = rxr->adapter;
1396 device_t dev = adapter->dev;
1397 struct ixgbe_rx_buf *rxbuf;
1398 int bsize, error;
1399
1400 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1401 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1402 M_NOWAIT | M_ZERO);
1403 if (rxr->rx_buffers == NULL) {
1404 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1405 error = ENOMEM;
1406 goto fail;
1407 }
1408
1409 error = ixgbe_dma_tag_create(
1410 /* parent */ adapter->osdep.dmat,
1411 /* alignment */ 1,
1412 /* bounds */ 0,
1413 /* maxsize */ MJUM16BYTES,
1414 /* nsegments */ 1,
1415 /* maxsegsize */ MJUM16BYTES,
1416 /* flags */ 0,
1417 &rxr->ptag);
1418 if (error != 0) {
1419 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1420 goto fail;
1421 }
1422
1423 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1424 rxbuf = &rxr->rx_buffers[i];
1425 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1426 if (error) {
1427 aprint_error_dev(dev, "Unable to create RX dma map\n");
1428 goto fail;
1429 }
1430 }
1431
1432 return (0);
1433
1434 fail:
1435 /* Frees all, but can handle partial completion */
1436 ixgbe_free_receive_structures(adapter);
1437
1438 return (error);
1439 } /* ixgbe_allocate_receive_buffers */
1440
1441 /************************************************************************
1442 * ixgbe_free_receive_ring
1443 ************************************************************************/
1444 static void
1445 ixgbe_free_receive_ring(struct rx_ring *rxr)
1446 {
1447 for (int i = 0; i < rxr->num_desc; i++) {
1448 ixgbe_rx_discard(rxr, i);
1449 }
1450 } /* ixgbe_free_receive_ring */
1451
1452 /************************************************************************
1453 * ixgbe_setup_receive_ring
1454 *
1455 * Initialize a receive ring and its buffers.
1456 ************************************************************************/
1457 static int
1458 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1459 {
1460 struct adapter *adapter;
1461 struct ixgbe_rx_buf *rxbuf;
1462 #ifdef LRO
1463 struct ifnet *ifp;
1464 struct lro_ctrl *lro = &rxr->lro;
1465 #endif /* LRO */
1466 #ifdef DEV_NETMAP
1467 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1468 struct netmap_slot *slot;
1469 #endif /* DEV_NETMAP */
1470 int rsize, error = 0;
1471
1472 adapter = rxr->adapter;
1473 #ifdef LRO
1474 ifp = adapter->ifp;
1475 #endif /* LRO */
1476
1477 /* Clear the ring contents */
1478 IXGBE_RX_LOCK(rxr);
1479
1480 #ifdef DEV_NETMAP
1481 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1482 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1483 #endif /* DEV_NETMAP */
1484
1485 rsize = roundup2(adapter->num_rx_desc *
1486 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1487 bzero((void *)rxr->rx_base, rsize);
1488 /* Cache the size */
1489 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1490
1491 /* Free current RX buffer structs and their mbufs */
1492 ixgbe_free_receive_ring(rxr);
1493
1494 /* Now replenish the mbufs */
1495 for (int j = 0; j != rxr->num_desc; ++j) {
1496 struct mbuf *mp;
1497
1498 rxbuf = &rxr->rx_buffers[j];
1499
1500 #ifdef DEV_NETMAP
1501 /*
1502 * In netmap mode, fill the map and set the buffer
1503 * address in the NIC ring, considering the offset
1504 * between the netmap and NIC rings (see comment in
1505 * ixgbe_setup_transmit_ring() ). No need to allocate
1506 * an mbuf, so end the block with a continue;
1507 */
1508 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1509 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1510 uint64_t paddr;
1511 void *addr;
1512
1513 addr = PNMB(na, slot + sj, &paddr);
1514 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1515 /* Update descriptor and the cached value */
1516 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1517 rxbuf->addr = htole64(paddr);
1518 continue;
1519 }
1520 #endif /* DEV_NETMAP */
1521
1522 rxbuf->flags = 0;
1523 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1524 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1525 if (rxbuf->buf == NULL) {
1526 error = ENOBUFS;
1527 goto fail;
1528 }
1529 mp = rxbuf->buf;
1530 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1531 /* Get the memory mapping */
1532 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1533 mp, BUS_DMA_NOWAIT);
1534 if (error != 0)
1535 goto fail;
1536 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1537 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1538 /* Update the descriptor and the cached value */
1539 rxr->rx_base[j].read.pkt_addr =
1540 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1541 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1542 }
1543
1544
1545 /* Setup our descriptor indices */
1546 rxr->next_to_check = 0;
1547 rxr->next_to_refresh = 0;
1548 rxr->lro_enabled = FALSE;
1549 rxr->rx_copies.ev_count = 0;
1550 #if 0 /* NetBSD */
1551 rxr->rx_bytes.ev_count = 0;
1552 #if 1 /* Fix inconsistency */
1553 rxr->rx_packets.ev_count = 0;
1554 #endif
1555 #endif
1556 rxr->vtag_strip = FALSE;
1557
1558 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1559 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1560
1561 /*
1562 * Now set up the LRO interface
1563 */
1564 if (ixgbe_rsc_enable)
1565 ixgbe_setup_hw_rsc(rxr);
1566 #ifdef LRO
1567 else if (ifp->if_capenable & IFCAP_LRO) {
1568 device_t dev = adapter->dev;
1569 int err = tcp_lro_init(lro);
1570 if (err) {
1571 device_printf(dev, "LRO Initialization failed!\n");
1572 goto fail;
1573 }
1574 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1575 rxr->lro_enabled = TRUE;
1576 lro->ifp = adapter->ifp;
1577 }
1578 #endif /* LRO */
1579
1580 IXGBE_RX_UNLOCK(rxr);
1581
1582 return (0);
1583
1584 fail:
1585 ixgbe_free_receive_ring(rxr);
1586 IXGBE_RX_UNLOCK(rxr);
1587
1588 return (error);
1589 } /* ixgbe_setup_receive_ring */
1590
1591 /************************************************************************
1592 * ixgbe_setup_receive_structures - Initialize all receive rings.
1593 ************************************************************************/
1594 int
1595 ixgbe_setup_receive_structures(struct adapter *adapter)
1596 {
1597 struct rx_ring *rxr = adapter->rx_rings;
1598 int j;
1599
1600 /*
1601 * Now reinitialize our supply of jumbo mbufs. The number
1602 * or size of jumbo mbufs may have changed.
1603 * Assume all of rxr->ptag are the same.
1604 */
1605 ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat,
1606 (2 * adapter->num_rx_desc) * adapter->num_queues,
1607 adapter->rx_mbuf_sz);
1608
1609 for (j = 0; j < adapter->num_queues; j++, rxr++)
1610 if (ixgbe_setup_receive_ring(rxr))
1611 goto fail;
1612
1613 return (0);
1614 fail:
1615 /*
1616 * Free RX buffers allocated so far, we will only handle
1617 * the rings that completed, the failing case will have
1618 * cleaned up for itself. 'j' failed, so its the terminus.
1619 */
1620 for (int i = 0; i < j; ++i) {
1621 rxr = &adapter->rx_rings[i];
1622 IXGBE_RX_LOCK(rxr);
1623 ixgbe_free_receive_ring(rxr);
1624 IXGBE_RX_UNLOCK(rxr);
1625 }
1626
1627 return (ENOBUFS);
1628 } /* ixgbe_setup_receive_structures */
1629
1630
1631 /************************************************************************
1632 * ixgbe_free_receive_structures - Free all receive rings.
1633 ************************************************************************/
1634 void
1635 ixgbe_free_receive_structures(struct adapter *adapter)
1636 {
1637 struct rx_ring *rxr = adapter->rx_rings;
1638
1639 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1640
1641 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1642 ixgbe_free_receive_buffers(rxr);
1643 #ifdef LRO
1644 /* Free LRO memory */
1645 tcp_lro_free(&rxr->lro);
1646 #endif /* LRO */
1647 /* Free the ring memory as well */
1648 ixgbe_dma_free(adapter, &rxr->rxdma);
1649 IXGBE_RX_LOCK_DESTROY(rxr);
1650 }
1651
1652 free(adapter->rx_rings, M_DEVBUF);
1653 } /* ixgbe_free_receive_structures */
1654
1655
1656 /************************************************************************
1657 * ixgbe_free_receive_buffers - Free receive ring data structures
1658 ************************************************************************/
1659 static void
1660 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1661 {
1662 struct adapter *adapter = rxr->adapter;
1663 struct ixgbe_rx_buf *rxbuf;
1664
1665 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1666
1667 /* Cleanup any existing buffers */
1668 if (rxr->rx_buffers != NULL) {
1669 for (int i = 0; i < adapter->num_rx_desc; i++) {
1670 rxbuf = &rxr->rx_buffers[i];
1671 ixgbe_rx_discard(rxr, i);
1672 if (rxbuf->pmap != NULL) {
1673 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1674 rxbuf->pmap = NULL;
1675 }
1676 }
1677 if (rxr->rx_buffers != NULL) {
1678 free(rxr->rx_buffers, M_DEVBUF);
1679 rxr->rx_buffers = NULL;
1680 }
1681 }
1682
1683 if (rxr->ptag != NULL) {
1684 ixgbe_dma_tag_destroy(rxr->ptag);
1685 rxr->ptag = NULL;
1686 }
1687
1688 return;
1689 } /* ixgbe_free_receive_buffers */
1690
1691 /************************************************************************
1692 * ixgbe_rx_input
1693 ************************************************************************/
1694 static __inline void
1695 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1696 u32 ptype)
1697 {
1698 struct adapter *adapter = ifp->if_softc;
1699
1700 #ifdef LRO
1701 struct ethercom *ec = &adapter->osdep.ec;
1702
1703 /*
1704 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1705 * should be computed by hardware. Also it should not have VLAN tag in
1706 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1707 */
1708 if (rxr->lro_enabled &&
1709 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1710 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1711 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1712 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1713 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1714 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1715 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1716 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1717 /*
1718 * Send to the stack if:
1719 ** - LRO not enabled, or
1720 ** - no LRO resources, or
1721 ** - lro enqueue fails
1722 */
1723 if (rxr->lro.lro_cnt != 0)
1724 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1725 return;
1726 }
1727 #endif /* LRO */
1728
1729 if_percpuq_enqueue(adapter->ipq, m);
1730 } /* ixgbe_rx_input */
1731
1732 /************************************************************************
1733 * ixgbe_rx_discard
1734 ************************************************************************/
1735 static __inline void
1736 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1737 {
1738 struct ixgbe_rx_buf *rbuf;
1739
1740 rbuf = &rxr->rx_buffers[i];
1741
1742 /*
1743 * With advanced descriptors the writeback
1744 * clobbers the buffer addrs, so its easier
1745 * to just free the existing mbufs and take
1746 * the normal refresh path to get new buffers
1747 * and mapping.
1748 */
1749
1750 if (rbuf->fmp != NULL) {/* Partial chain ? */
1751 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1752 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1753 m_freem(rbuf->fmp);
1754 rbuf->fmp = NULL;
1755 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1756 } else if (rbuf->buf) {
1757 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1758 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1759 m_free(rbuf->buf);
1760 rbuf->buf = NULL;
1761 }
1762 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1763
1764 rbuf->flags = 0;
1765
1766 return;
1767 } /* ixgbe_rx_discard */
1768
1769
1770 /************************************************************************
1771 * ixgbe_rxeof
1772 *
1773 * Executes in interrupt context. It replenishes the
1774 * mbufs in the descriptor and sends data which has
1775 * been dma'ed into host memory to upper layer.
1776 *
1777 * Return TRUE for more work, FALSE for all clean.
1778 ************************************************************************/
1779 bool
1780 ixgbe_rxeof(struct ix_queue *que)
1781 {
1782 struct adapter *adapter = que->adapter;
1783 struct rx_ring *rxr = que->rxr;
1784 struct ifnet *ifp = adapter->ifp;
1785 #ifdef LRO
1786 struct lro_ctrl *lro = &rxr->lro;
1787 #endif /* LRO */
1788 union ixgbe_adv_rx_desc *cur;
1789 struct ixgbe_rx_buf *rbuf, *nbuf;
1790 int i, nextp, processed = 0;
1791 u32 staterr = 0;
1792 u32 count = adapter->rx_process_limit;
1793 #ifdef RSS
1794 u16 pkt_info;
1795 #endif
1796
1797 IXGBE_RX_LOCK(rxr);
1798
1799 #ifdef DEV_NETMAP
1800 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1801 /* Same as the txeof routine: wakeup clients on intr. */
1802 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1803 IXGBE_RX_UNLOCK(rxr);
1804 return (FALSE);
1805 }
1806 }
1807 #endif /* DEV_NETMAP */
1808
1809 for (i = rxr->next_to_check; count != 0;) {
1810 struct mbuf *sendmp, *mp;
1811 u32 rsc, ptype;
1812 u16 len;
1813 u16 vtag = 0;
1814 bool eop;
1815
1816 /* Sync the ring. */
1817 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1818 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1819
1820 cur = &rxr->rx_base[i];
1821 staterr = le32toh(cur->wb.upper.status_error);
1822 #ifdef RSS
1823 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1824 #endif
1825
1826 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1827 break;
1828 if ((ifp->if_flags & IFF_RUNNING) == 0)
1829 break;
1830
1831 count--;
1832 sendmp = NULL;
1833 nbuf = NULL;
1834 rsc = 0;
1835 cur->wb.upper.status_error = 0;
1836 rbuf = &rxr->rx_buffers[i];
1837 mp = rbuf->buf;
1838
1839 len = le16toh(cur->wb.upper.length);
1840 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1841 IXGBE_RXDADV_PKTTYPE_MASK;
1842 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1843
1844 /* Make sure bad packets are discarded */
1845 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1846 #if __FreeBSD_version >= 1100036
1847 if (adapter->feat_en & IXGBE_FEATURE_VF)
1848 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1849 #endif
1850 rxr->rx_discarded.ev_count++;
1851 ixgbe_rx_discard(rxr, i);
1852 goto next_desc;
1853 }
1854
1855 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1856 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1857
1858 /*
1859 * On 82599 which supports a hardware
1860 * LRO (called HW RSC), packets need
1861 * not be fragmented across sequential
1862 * descriptors, rather the next descriptor
1863 * is indicated in bits of the descriptor.
1864 * This also means that we might proceses
1865 * more than one packet at a time, something
1866 * that has never been true before, it
1867 * required eliminating global chain pointers
1868 * in favor of what we are doing here. -jfv
1869 */
1870 if (!eop) {
1871 /*
1872 * Figure out the next descriptor
1873 * of this frame.
1874 */
1875 if (rxr->hw_rsc == TRUE) {
1876 rsc = ixgbe_rsc_count(cur);
1877 rxr->rsc_num += (rsc - 1);
1878 }
1879 if (rsc) { /* Get hardware index */
1880 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1881 IXGBE_RXDADV_NEXTP_SHIFT);
1882 } else { /* Just sequential */
1883 nextp = i + 1;
1884 if (nextp == adapter->num_rx_desc)
1885 nextp = 0;
1886 }
1887 nbuf = &rxr->rx_buffers[nextp];
1888 prefetch(nbuf);
1889 }
1890 /*
1891 * Rather than using the fmp/lmp global pointers
1892 * we now keep the head of a packet chain in the
1893 * buffer struct and pass this along from one
1894 * descriptor to the next, until we get EOP.
1895 */
1896 mp->m_len = len;
1897 /*
1898 * See if there is a stored head
1899 * that determines what we are
1900 */
1901 sendmp = rbuf->fmp;
1902 if (sendmp != NULL) { /* secondary frag */
1903 rbuf->buf = rbuf->fmp = NULL;
1904 mp->m_flags &= ~M_PKTHDR;
1905 sendmp->m_pkthdr.len += mp->m_len;
1906 } else {
1907 /*
1908 * Optimize. This might be a small packet,
1909 * maybe just a TCP ACK. Do a fast copy that
1910 * is cache aligned into a new mbuf, and
1911 * leave the old mbuf+cluster for re-use.
1912 */
1913 if (eop && len <= IXGBE_RX_COPY_LEN) {
1914 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1915 if (sendmp != NULL) {
1916 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1917 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1918 len);
1919 sendmp->m_len = len;
1920 rxr->rx_copies.ev_count++;
1921 rbuf->flags |= IXGBE_RX_COPY;
1922 }
1923 }
1924 if (sendmp == NULL) {
1925 rbuf->buf = rbuf->fmp = NULL;
1926 sendmp = mp;
1927 }
1928
1929 /* first desc of a non-ps chain */
1930 sendmp->m_flags |= M_PKTHDR;
1931 sendmp->m_pkthdr.len = mp->m_len;
1932 }
1933 ++processed;
1934
1935 /* Pass the head pointer on */
1936 if (eop == 0) {
1937 nbuf->fmp = sendmp;
1938 sendmp = NULL;
1939 mp->m_next = nbuf->buf;
1940 } else { /* Sending this frame */
1941 m_set_rcvif(sendmp, ifp);
1942 ++rxr->packets;
1943 rxr->rx_packets.ev_count++;
1944 /* capture data for AIM */
1945 rxr->bytes += sendmp->m_pkthdr.len;
1946 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1947 /* Process vlan info */
1948 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1949 vtag = le16toh(cur->wb.upper.vlan);
1950 if (vtag) {
1951 vlan_set_tag(sendmp, vtag);
1952 }
1953 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1954 ixgbe_rx_checksum(staterr, sendmp, ptype,
1955 &adapter->stats.pf);
1956 }
1957
1958 #if 0 /* FreeBSD */
1959 /*
1960 * In case of multiqueue, we have RXCSUM.PCSD bit set
1961 * and never cleared. This means we have RSS hash
1962 * available to be used.
1963 */
1964 if (adapter->num_queues > 1) {
1965 sendmp->m_pkthdr.flowid =
1966 le32toh(cur->wb.lower.hi_dword.rss);
1967 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1968 case IXGBE_RXDADV_RSSTYPE_IPV4:
1969 M_HASHTYPE_SET(sendmp,
1970 M_HASHTYPE_RSS_IPV4);
1971 break;
1972 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1973 M_HASHTYPE_SET(sendmp,
1974 M_HASHTYPE_RSS_TCP_IPV4);
1975 break;
1976 case IXGBE_RXDADV_RSSTYPE_IPV6:
1977 M_HASHTYPE_SET(sendmp,
1978 M_HASHTYPE_RSS_IPV6);
1979 break;
1980 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1981 M_HASHTYPE_SET(sendmp,
1982 M_HASHTYPE_RSS_TCP_IPV6);
1983 break;
1984 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1985 M_HASHTYPE_SET(sendmp,
1986 M_HASHTYPE_RSS_IPV6_EX);
1987 break;
1988 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1989 M_HASHTYPE_SET(sendmp,
1990 M_HASHTYPE_RSS_TCP_IPV6_EX);
1991 break;
1992 #if __FreeBSD_version > 1100000
1993 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1994 M_HASHTYPE_SET(sendmp,
1995 M_HASHTYPE_RSS_UDP_IPV4);
1996 break;
1997 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1998 M_HASHTYPE_SET(sendmp,
1999 M_HASHTYPE_RSS_UDP_IPV6);
2000 break;
2001 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2002 M_HASHTYPE_SET(sendmp,
2003 M_HASHTYPE_RSS_UDP_IPV6_EX);
2004 break;
2005 #endif
2006 default:
2007 M_HASHTYPE_SET(sendmp,
2008 M_HASHTYPE_OPAQUE_HASH);
2009 }
2010 } else {
2011 sendmp->m_pkthdr.flowid = que->msix;
2012 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2013 }
2014 #endif
2015 }
2016 next_desc:
2017 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2018 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2019
2020 /* Advance our pointers to the next descriptor. */
2021 if (++i == rxr->num_desc)
2022 i = 0;
2023
2024 /* Now send to the stack or do LRO */
2025 if (sendmp != NULL) {
2026 rxr->next_to_check = i;
2027 IXGBE_RX_UNLOCK(rxr);
2028 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2029 IXGBE_RX_LOCK(rxr);
2030 i = rxr->next_to_check;
2031 }
2032
2033 /* Every 8 descriptors we go to refresh mbufs */
2034 if (processed == 8) {
2035 ixgbe_refresh_mbufs(rxr, i);
2036 processed = 0;
2037 }
2038 }
2039
2040 /* Refresh any remaining buf structs */
2041 if (ixgbe_rx_unrefreshed(rxr))
2042 ixgbe_refresh_mbufs(rxr, i);
2043
2044 rxr->next_to_check = i;
2045
2046 IXGBE_RX_UNLOCK(rxr);
2047
2048 #ifdef LRO
2049 /*
2050 * Flush any outstanding LRO work
2051 */
2052 tcp_lro_flush_all(lro);
2053 #endif /* LRO */
2054
2055 /*
2056 * Still have cleaning to do?
2057 */
2058 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2059 return (TRUE);
2060
2061 return (FALSE);
2062 } /* ixgbe_rxeof */
2063
2064
2065 /************************************************************************
2066 * ixgbe_rx_checksum
2067 *
2068 * Verify that the hardware indicated that the checksum is valid.
2069 * Inform the stack about the status of checksum so that stack
2070 * doesn't spend time verifying the checksum.
2071 ************************************************************************/
2072 static void
2073 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2074 struct ixgbe_hw_stats *stats)
2075 {
2076 u16 status = (u16)staterr;
2077 u8 errors = (u8)(staterr >> 24);
2078 #if 0
2079 bool sctp = false;
2080
2081 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2082 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2083 sctp = true;
2084 #endif
2085
2086 /* IPv4 checksum */
2087 if (status & IXGBE_RXD_STAT_IPCS) {
2088 stats->ipcs.ev_count++;
2089 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2090 /* IP Checksum Good */
2091 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2092 } else {
2093 stats->ipcs_bad.ev_count++;
2094 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2095 }
2096 }
2097 /* TCP/UDP/SCTP checksum */
2098 if (status & IXGBE_RXD_STAT_L4CS) {
2099 stats->l4cs.ev_count++;
2100 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2101 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2102 mp->m_pkthdr.csum_flags |= type;
2103 } else {
2104 stats->l4cs_bad.ev_count++;
2105 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2106 }
2107 }
2108 } /* ixgbe_rx_checksum */
2109
2110 /************************************************************************
2111 * ixgbe_dma_malloc
2112 ************************************************************************/
2113 int
2114 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2115 struct ixgbe_dma_alloc *dma, const int mapflags)
2116 {
2117 device_t dev = adapter->dev;
2118 int r, rsegs;
2119
2120 r = ixgbe_dma_tag_create(
2121 /* parent */ adapter->osdep.dmat,
2122 /* alignment */ DBA_ALIGN,
2123 /* bounds */ 0,
2124 /* maxsize */ size,
2125 /* nsegments */ 1,
2126 /* maxsegsize */ size,
2127 /* flags */ BUS_DMA_ALLOCNOW,
2128 &dma->dma_tag);
2129 if (r != 0) {
2130 aprint_error_dev(dev,
2131 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2132 r);
2133 goto fail_0;
2134 }
2135
2136 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2137 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2138 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2139 if (r != 0) {
2140 aprint_error_dev(dev,
2141 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2142 goto fail_1;
2143 }
2144
2145 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2146 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2147 if (r != 0) {
2148 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2149 __func__, r);
2150 goto fail_2;
2151 }
2152
2153 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2154 if (r != 0) {
2155 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2156 __func__, r);
2157 goto fail_3;
2158 }
2159
2160 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2161 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2162 if (r != 0) {
2163 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2164 __func__, r);
2165 goto fail_4;
2166 }
2167 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2168 dma->dma_size = size;
2169 return 0;
2170 fail_4:
2171 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2172 fail_3:
2173 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2174 fail_2:
2175 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2176 fail_1:
2177 ixgbe_dma_tag_destroy(dma->dma_tag);
2178 fail_0:
2179
2180 return (r);
2181 } /* ixgbe_dma_malloc */
2182
2183 /************************************************************************
2184 * ixgbe_dma_free
2185 ************************************************************************/
2186 void
2187 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2188 {
2189 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2190 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2191 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2192 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2193 ixgbe_dma_tag_destroy(dma->dma_tag);
2194 } /* ixgbe_dma_free */
2195
2196
2197 /************************************************************************
2198 * ixgbe_allocate_queues
2199 *
2200 * Allocate memory for the transmit and receive rings, and then
2201 * the descriptors associated with each, called only once at attach.
2202 ************************************************************************/
2203 int
2204 ixgbe_allocate_queues(struct adapter *adapter)
2205 {
2206 device_t dev = adapter->dev;
2207 struct ix_queue *que;
2208 struct tx_ring *txr;
2209 struct rx_ring *rxr;
2210 int rsize, tsize, error = IXGBE_SUCCESS;
2211 int txconf = 0, rxconf = 0;
2212
2213 /* First, allocate the top level queue structs */
2214 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2215 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2216 if (adapter->queues == NULL) {
2217 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2218 error = ENOMEM;
2219 goto fail;
2220 }
2221
2222 /* Second, allocate the TX ring struct memory */
2223 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2224 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2225 if (adapter->tx_rings == NULL) {
2226 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2227 error = ENOMEM;
2228 goto tx_fail;
2229 }
2230
2231 /* Third, allocate the RX ring */
2232 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2233 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2234 if (adapter->rx_rings == NULL) {
2235 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2236 error = ENOMEM;
2237 goto rx_fail;
2238 }
2239
2240 /* For the ring itself */
2241 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2242 DBA_ALIGN);
2243
2244 /*
2245 * Now set up the TX queues, txconf is needed to handle the
2246 * possibility that things fail midcourse and we need to
2247 * undo memory gracefully
2248 */
2249 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2250 /* Set up some basics */
2251 txr = &adapter->tx_rings[i];
2252 txr->adapter = adapter;
2253 txr->txr_interq = NULL;
2254 /* In case SR-IOV is enabled, align the index properly */
2255 #ifdef PCI_IOV
2256 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2257 i);
2258 #else
2259 txr->me = i;
2260 #endif
2261 txr->num_desc = adapter->num_tx_desc;
2262
2263 /* Initialize the TX side lock */
2264 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2265
2266 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2267 BUS_DMA_NOWAIT)) {
2268 aprint_error_dev(dev,
2269 "Unable to allocate TX Descriptor memory\n");
2270 error = ENOMEM;
2271 goto err_tx_desc;
2272 }
2273 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2274 bzero((void *)txr->tx_base, tsize);
2275
2276 /* Now allocate transmit buffers for the ring */
2277 if (ixgbe_allocate_transmit_buffers(txr)) {
2278 aprint_error_dev(dev,
2279 "Critical Failure setting up transmit buffers\n");
2280 error = ENOMEM;
2281 goto err_tx_desc;
2282 }
2283 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2284 /* Allocate a buf ring */
2285 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2286 if (txr->txr_interq == NULL) {
2287 aprint_error_dev(dev,
2288 "Critical Failure setting up buf ring\n");
2289 error = ENOMEM;
2290 goto err_tx_desc;
2291 }
2292 }
2293 }
2294
2295 /*
2296 * Next the RX queues...
2297 */
2298 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2299 DBA_ALIGN);
2300 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2301 rxr = &adapter->rx_rings[i];
2302 /* Set up some basics */
2303 rxr->adapter = adapter;
2304 #ifdef PCI_IOV
2305 /* In case SR-IOV is enabled, align the index properly */
2306 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2307 i);
2308 #else
2309 rxr->me = i;
2310 #endif
2311 rxr->num_desc = adapter->num_rx_desc;
2312
2313 /* Initialize the RX side lock */
2314 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2315
2316 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2317 BUS_DMA_NOWAIT)) {
2318 aprint_error_dev(dev,
2319 "Unable to allocate RxDescriptor memory\n");
2320 error = ENOMEM;
2321 goto err_rx_desc;
2322 }
2323 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2324 bzero((void *)rxr->rx_base, rsize);
2325
2326 /* Allocate receive buffers for the ring */
2327 if (ixgbe_allocate_receive_buffers(rxr)) {
2328 aprint_error_dev(dev,
2329 "Critical Failure setting up receive buffers\n");
2330 error = ENOMEM;
2331 goto err_rx_desc;
2332 }
2333 }
2334
2335 /*
2336 * Finally set up the queue holding structs
2337 */
2338 for (int i = 0; i < adapter->num_queues; i++) {
2339 que = &adapter->queues[i];
2340 que->adapter = adapter;
2341 que->me = i;
2342 que->txr = &adapter->tx_rings[i];
2343 que->rxr = &adapter->rx_rings[i];
2344
2345 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2346 que->disabled_count = 0;
2347 }
2348
2349 return (0);
2350
2351 err_rx_desc:
2352 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2353 ixgbe_dma_free(adapter, &rxr->rxdma);
2354 err_tx_desc:
2355 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2356 ixgbe_dma_free(adapter, &txr->txdma);
2357 free(adapter->rx_rings, M_DEVBUF);
2358 rx_fail:
2359 free(adapter->tx_rings, M_DEVBUF);
2360 tx_fail:
2361 free(adapter->queues, M_DEVBUF);
2362 fail:
2363 return (error);
2364 } /* ixgbe_allocate_queues */
2365