ix_txrx.c revision 1.34 1 /* $NetBSD: ix_txrx.c,v 1.34 2018/03/02 10:19:20 knakahara Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 321476 2017-07-25 14:38:30Z sbruno $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
107 static int ixgbe_tx_ctx_setup(struct tx_ring *,
108 struct mbuf *, u32 *, u32 *);
109 static int ixgbe_tso_setup(struct tx_ring *,
110 struct mbuf *, u32 *, u32 *);
111 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
112 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
113 struct mbuf *, u32);
114 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
115 struct ixgbe_dma_alloc *, int);
116 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
117
118 static void ixgbe_setup_hw_rsc(struct rx_ring *);
119
120 /************************************************************************
121 * ixgbe_legacy_start_locked - Transmit entry point
122 *
123 * Called by the stack to initiate a transmit.
124 * The driver will remain in this routine as long as there are
125 * packets to transmit and transmit resources are available.
126 * In case resources are not available, the stack is notified
127 * and the packet is requeued.
128 ************************************************************************/
129 int
130 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
131 {
132 int rc;
133 struct mbuf *m_head;
134 struct adapter *adapter = txr->adapter;
135
136 IXGBE_TX_LOCK_ASSERT(txr);
137
138 if ((ifp->if_flags & IFF_RUNNING) == 0)
139 return (ENETDOWN);
140 if (!adapter->link_active)
141 return (ENETDOWN);
142
143 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
144 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
145 break;
146
147 IFQ_POLL(&ifp->if_snd, m_head);
148 if (m_head == NULL)
149 break;
150
151 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
152 break;
153 }
154 IFQ_DEQUEUE(&ifp->if_snd, m_head);
155 if (rc != 0) {
156 m_freem(m_head);
157 continue;
158 }
159
160 /* Send a copy of the frame to the BPF listener */
161 bpf_mtap(ifp, m_head);
162 }
163
164 return IXGBE_SUCCESS;
165 } /* ixgbe_legacy_start_locked */
166
167 /************************************************************************
168 * ixgbe_legacy_start
169 *
170 * Called by the stack, this always uses the first tx ring,
171 * and should not be used with multiqueue tx enabled.
172 ************************************************************************/
173 void
174 ixgbe_legacy_start(struct ifnet *ifp)
175 {
176 struct adapter *adapter = ifp->if_softc;
177 struct tx_ring *txr = adapter->tx_rings;
178
179 if (ifp->if_flags & IFF_RUNNING) {
180 IXGBE_TX_LOCK(txr);
181 ixgbe_legacy_start_locked(ifp, txr);
182 IXGBE_TX_UNLOCK(txr);
183 }
184 } /* ixgbe_legacy_start */
185
186 /************************************************************************
187 * ixgbe_mq_start - Multiqueue Transmit Entry Point
188 *
189 * (if_transmit function)
190 ************************************************************************/
191 int
192 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
193 {
194 struct adapter *adapter = ifp->if_softc;
195 struct tx_ring *txr;
196 int i, err = 0;
197 #ifdef RSS
198 uint32_t bucket_id;
199 #endif
200
201 /*
202 * When doing RSS, map it to the same outbound queue
203 * as the incoming flow would be mapped to.
204 *
205 * If everything is setup correctly, it should be the
206 * same bucket that the current CPU we're on is.
207 */
208 #ifdef RSS
209 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
210 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
211 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
212 &bucket_id) == 0)) {
213 i = bucket_id % adapter->num_queues;
214 #ifdef IXGBE_DEBUG
215 if (bucket_id > adapter->num_queues)
216 if_printf(ifp,
217 "bucket_id (%d) > num_queues (%d)\n",
218 bucket_id, adapter->num_queues);
219 #endif
220 } else
221 i = m->m_pkthdr.flowid % adapter->num_queues;
222 } else
223 #endif /* 0 */
224 i = cpu_index(curcpu()) % adapter->num_queues;
225
226 /* Check for a hung queue and pick alternative */
227 if (((1 << i) & adapter->active_queues) == 0)
228 i = ffs64(adapter->active_queues);
229
230 txr = &adapter->tx_rings[i];
231
232 err = pcq_put(txr->txr_interq, m);
233 if (err == false) {
234 m_freem(m);
235 txr->pcq_drops.ev_count++;
236 return (err);
237 }
238 if (IXGBE_TX_TRYLOCK(txr)) {
239 ixgbe_mq_start_locked(ifp, txr);
240 IXGBE_TX_UNLOCK(txr);
241 } else {
242 if (adapter->txrx_use_workqueue) {
243 /*
244 * This function itself is not called in interrupt
245 * context, however it can be called in fast softint
246 * context right after receiving forwarding packets.
247 * So, it is required to protect workqueue from twice
248 * enqueuing when the machine uses both spontaneous
249 * packets and forwarding packets.
250 */
251 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
252 if (*enqueued == 0) {
253 *enqueued = 1;
254 percpu_putref(adapter->txr_wq_enqueued);
255 workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
256 } else
257 percpu_putref(adapter->txr_wq_enqueued);
258 } else
259 softint_schedule(txr->txr_si);
260 }
261
262 return (0);
263 } /* ixgbe_mq_start */
264
265 /************************************************************************
266 * ixgbe_mq_start_locked
267 ************************************************************************/
268 int
269 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
270 {
271 struct mbuf *next;
272 int enqueued = 0, err = 0;
273
274 if ((ifp->if_flags & IFF_RUNNING) == 0)
275 return (ENETDOWN);
276 if (txr->adapter->link_active == 0)
277 return (ENETDOWN);
278
279 /* Process the queue */
280 while ((next = pcq_get(txr->txr_interq)) != NULL) {
281 if ((err = ixgbe_xmit(txr, next)) != 0) {
282 m_freem(next);
283 /* All errors are counted in ixgbe_xmit() */
284 break;
285 }
286 enqueued++;
287 #if __FreeBSD_version >= 1100036
288 /*
289 * Since we're looking at the tx ring, we can check
290 * to see if we're a VF by examing our tail register
291 * address.
292 */
293 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
294 (next->m_flags & M_MCAST))
295 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
296 #endif
297 /* Send a copy of the frame to the BPF listener */
298 bpf_mtap(ifp, next);
299 if ((ifp->if_flags & IFF_RUNNING) == 0)
300 break;
301 }
302
303 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
304 ixgbe_txeof(txr);
305
306 return (err);
307 } /* ixgbe_mq_start_locked */
308
309 /************************************************************************
310 * ixgbe_deferred_mq_start
311 *
312 * Called from a softint and workqueue (indirectly) to drain queued
313 * transmit packets.
314 ************************************************************************/
315 void
316 ixgbe_deferred_mq_start(void *arg)
317 {
318 struct tx_ring *txr = arg;
319 struct adapter *adapter = txr->adapter;
320 struct ifnet *ifp = adapter->ifp;
321
322 IXGBE_TX_LOCK(txr);
323 if (pcq_peek(txr->txr_interq) != NULL)
324 ixgbe_mq_start_locked(ifp, txr);
325 IXGBE_TX_UNLOCK(txr);
326 } /* ixgbe_deferred_mq_start */
327
328 /************************************************************************
329 * ixgbe_deferred_mq_start_work
330 *
331 * Called from a workqueue to drain queued transmit packets.
332 ************************************************************************/
333 void
334 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
335 {
336 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
337 struct adapter *adapter = txr->adapter;
338 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
339 *enqueued = 0;
340 percpu_putref(adapter->txr_wq_enqueued);
341
342 ixgbe_deferred_mq_start(txr);
343 } /* ixgbe_deferred_mq_start */
344
345
346 /************************************************************************
347 * ixgbe_xmit
348 *
349 * Maps the mbufs to tx descriptors, allowing the
350 * TX engine to transmit the packets.
351 *
352 * Return 0 on success, positive on failure
353 ************************************************************************/
354 static int
355 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
356 {
357 struct adapter *adapter = txr->adapter;
358 struct ixgbe_tx_buf *txbuf;
359 union ixgbe_adv_tx_desc *txd = NULL;
360 struct ifnet *ifp = adapter->ifp;
361 int i, j, error;
362 int first;
363 u32 olinfo_status = 0, cmd_type_len;
364 bool remap = TRUE;
365 bus_dmamap_t map;
366
367 /* Basic descriptor defines */
368 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
369 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
370
371 if (vlan_has_tag(m_head))
372 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
373
374 /*
375 * Important to capture the first descriptor
376 * used because it will contain the index of
377 * the one we tell the hardware to report back
378 */
379 first = txr->next_avail_desc;
380 txbuf = &txr->tx_buffers[first];
381 map = txbuf->map;
382
383 /*
384 * Map the packet for DMA.
385 */
386 retry:
387 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
388 BUS_DMA_NOWAIT);
389
390 if (__predict_false(error)) {
391 struct mbuf *m;
392
393 switch (error) {
394 case EAGAIN:
395 adapter->eagain_tx_dma_setup.ev_count++;
396 return EAGAIN;
397 case ENOMEM:
398 adapter->enomem_tx_dma_setup.ev_count++;
399 return EAGAIN;
400 case EFBIG:
401 /* Try it again? - one try */
402 if (remap == TRUE) {
403 remap = FALSE;
404 /*
405 * XXX: m_defrag will choke on
406 * non-MCLBYTES-sized clusters
407 */
408 adapter->efbig_tx_dma_setup.ev_count++;
409 m = m_defrag(m_head, M_NOWAIT);
410 if (m == NULL) {
411 adapter->mbuf_defrag_failed.ev_count++;
412 return ENOBUFS;
413 }
414 m_head = m;
415 goto retry;
416 } else {
417 adapter->efbig2_tx_dma_setup.ev_count++;
418 return error;
419 }
420 case EINVAL:
421 adapter->einval_tx_dma_setup.ev_count++;
422 return error;
423 default:
424 adapter->other_tx_dma_setup.ev_count++;
425 return error;
426 }
427 }
428
429 /* Make certain there are enough descriptors */
430 if (txr->tx_avail < (map->dm_nsegs + 2)) {
431 txr->no_desc_avail.ev_count++;
432 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
433 return EAGAIN;
434 }
435
436 /*
437 * Set up the appropriate offload context
438 * this will consume the first descriptor
439 */
440 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
441 if (__predict_false(error)) {
442 return (error);
443 }
444
445 /* Do the flow director magic */
446 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
447 (txr->atr_sample) && (!adapter->fdir_reinit)) {
448 ++txr->atr_count;
449 if (txr->atr_count >= atr_sample_rate) {
450 ixgbe_atr(txr, m_head);
451 txr->atr_count = 0;
452 }
453 }
454
455 olinfo_status |= IXGBE_ADVTXD_CC;
456 i = txr->next_avail_desc;
457 for (j = 0; j < map->dm_nsegs; j++) {
458 bus_size_t seglen;
459 bus_addr_t segaddr;
460
461 txbuf = &txr->tx_buffers[i];
462 txd = &txr->tx_base[i];
463 seglen = map->dm_segs[j].ds_len;
464 segaddr = htole64(map->dm_segs[j].ds_addr);
465
466 txd->read.buffer_addr = segaddr;
467 txd->read.cmd_type_len = htole32(txr->txd_cmd |
468 cmd_type_len | seglen);
469 txd->read.olinfo_status = htole32(olinfo_status);
470
471 if (++i == txr->num_desc)
472 i = 0;
473 }
474
475 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
476 txr->tx_avail -= map->dm_nsegs;
477 txr->next_avail_desc = i;
478
479 txbuf->m_head = m_head;
480 /*
481 * Here we swap the map so the last descriptor,
482 * which gets the completion interrupt has the
483 * real map, and the first descriptor gets the
484 * unused map from this descriptor.
485 */
486 txr->tx_buffers[first].map = txbuf->map;
487 txbuf->map = map;
488 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
489 BUS_DMASYNC_PREWRITE);
490
491 /* Set the EOP descriptor that will be marked done */
492 txbuf = &txr->tx_buffers[first];
493 txbuf->eop = txd;
494
495 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
496 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
497 /*
498 * Advance the Transmit Descriptor Tail (Tdt), this tells the
499 * hardware that this frame is available to transmit.
500 */
501 ++txr->total_packets.ev_count;
502 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
503
504 /*
505 * XXXX NOMPSAFE: ifp->if_data should be percpu.
506 */
507 ifp->if_obytes += m_head->m_pkthdr.len;
508 if (m_head->m_flags & M_MCAST)
509 ifp->if_omcasts++;
510
511 /* Mark queue as having work */
512 if (txr->busy == 0)
513 txr->busy = 1;
514
515 return (0);
516 } /* ixgbe_xmit */
517
518
519 /************************************************************************
520 * ixgbe_allocate_transmit_buffers
521 *
522 * Allocate memory for tx_buffer structures. The tx_buffer stores all
523 * the information needed to transmit a packet on the wire. This is
524 * called only once at attach, setup is done every reset.
525 ************************************************************************/
526 static int
527 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
528 {
529 struct adapter *adapter = txr->adapter;
530 device_t dev = adapter->dev;
531 struct ixgbe_tx_buf *txbuf;
532 int error, i;
533
534 /*
535 * Setup DMA descriptor areas.
536 */
537 error = ixgbe_dma_tag_create(
538 /* parent */ adapter->osdep.dmat,
539 /* alignment */ 1,
540 /* bounds */ 0,
541 /* maxsize */ IXGBE_TSO_SIZE,
542 /* nsegments */ adapter->num_segs,
543 /* maxsegsize */ PAGE_SIZE,
544 /* flags */ 0,
545 &txr->txtag);
546 if (error != 0) {
547 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
548 goto fail;
549 }
550
551 txr->tx_buffers =
552 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
553 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
554 if (txr->tx_buffers == NULL) {
555 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
556 error = ENOMEM;
557 goto fail;
558 }
559
560 /* Create the descriptor buffer dma maps */
561 txbuf = txr->tx_buffers;
562 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
563 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
564 if (error != 0) {
565 aprint_error_dev(dev,
566 "Unable to create TX DMA map (%d)\n", error);
567 goto fail;
568 }
569 }
570
571 return 0;
572 fail:
573 /* We free all, it handles case where we are in the middle */
574 #if 0 /* XXX was FreeBSD */
575 ixgbe_free_transmit_structures(adapter);
576 #else
577 ixgbe_free_transmit_buffers(txr);
578 #endif
579 return (error);
580 } /* ixgbe_allocate_transmit_buffers */
581
582 /************************************************************************
583 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
584 ************************************************************************/
585 static void
586 ixgbe_setup_transmit_ring(struct tx_ring *txr)
587 {
588 struct adapter *adapter = txr->adapter;
589 struct ixgbe_tx_buf *txbuf;
590 #ifdef DEV_NETMAP
591 struct netmap_adapter *na = NA(adapter->ifp);
592 struct netmap_slot *slot;
593 #endif /* DEV_NETMAP */
594
595 /* Clear the old ring contents */
596 IXGBE_TX_LOCK(txr);
597
598 #ifdef DEV_NETMAP
599 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
600 /*
601 * (under lock): if in netmap mode, do some consistency
602 * checks and set slot to entry 0 of the netmap ring.
603 */
604 slot = netmap_reset(na, NR_TX, txr->me, 0);
605 }
606 #endif /* DEV_NETMAP */
607
608 bzero((void *)txr->tx_base,
609 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
610 /* Reset indices */
611 txr->next_avail_desc = 0;
612 txr->next_to_clean = 0;
613
614 /* Free any existing tx buffers. */
615 txbuf = txr->tx_buffers;
616 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
617 if (txbuf->m_head != NULL) {
618 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
619 0, txbuf->m_head->m_pkthdr.len,
620 BUS_DMASYNC_POSTWRITE);
621 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
622 m_freem(txbuf->m_head);
623 txbuf->m_head = NULL;
624 }
625
626 #ifdef DEV_NETMAP
627 /*
628 * In netmap mode, set the map for the packet buffer.
629 * NOTE: Some drivers (not this one) also need to set
630 * the physical buffer address in the NIC ring.
631 * Slots in the netmap ring (indexed by "si") are
632 * kring->nkr_hwofs positions "ahead" wrt the
633 * corresponding slot in the NIC ring. In some drivers
634 * (not here) nkr_hwofs can be negative. Function
635 * netmap_idx_n2k() handles wraparounds properly.
636 */
637 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
638 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
639 netmap_load_map(na, txr->txtag,
640 txbuf->map, NMB(na, slot + si));
641 }
642 #endif /* DEV_NETMAP */
643
644 /* Clear the EOP descriptor pointer */
645 txbuf->eop = NULL;
646 }
647
648 /* Set the rate at which we sample packets */
649 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
650 txr->atr_sample = atr_sample_rate;
651
652 /* Set number of descriptors available */
653 txr->tx_avail = adapter->num_tx_desc;
654
655 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
656 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
657 IXGBE_TX_UNLOCK(txr);
658 } /* ixgbe_setup_transmit_ring */
659
660 /************************************************************************
661 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
662 ************************************************************************/
663 int
664 ixgbe_setup_transmit_structures(struct adapter *adapter)
665 {
666 struct tx_ring *txr = adapter->tx_rings;
667
668 for (int i = 0; i < adapter->num_queues; i++, txr++)
669 ixgbe_setup_transmit_ring(txr);
670
671 return (0);
672 } /* ixgbe_setup_transmit_structures */
673
674 /************************************************************************
675 * ixgbe_free_transmit_structures - Free all transmit rings.
676 ************************************************************************/
677 void
678 ixgbe_free_transmit_structures(struct adapter *adapter)
679 {
680 struct tx_ring *txr = adapter->tx_rings;
681
682 for (int i = 0; i < adapter->num_queues; i++, txr++) {
683 ixgbe_free_transmit_buffers(txr);
684 ixgbe_dma_free(adapter, &txr->txdma);
685 IXGBE_TX_LOCK_DESTROY(txr);
686 }
687 free(adapter->tx_rings, M_DEVBUF);
688 } /* ixgbe_free_transmit_structures */
689
690 /************************************************************************
691 * ixgbe_free_transmit_buffers
692 *
693 * Free transmit ring related data structures.
694 ************************************************************************/
695 static void
696 ixgbe_free_transmit_buffers(struct tx_ring *txr)
697 {
698 struct adapter *adapter = txr->adapter;
699 struct ixgbe_tx_buf *tx_buffer;
700 int i;
701
702 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
703
704 if (txr->tx_buffers == NULL)
705 return;
706
707 tx_buffer = txr->tx_buffers;
708 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
709 if (tx_buffer->m_head != NULL) {
710 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
711 0, tx_buffer->m_head->m_pkthdr.len,
712 BUS_DMASYNC_POSTWRITE);
713 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
714 m_freem(tx_buffer->m_head);
715 tx_buffer->m_head = NULL;
716 if (tx_buffer->map != NULL) {
717 ixgbe_dmamap_destroy(txr->txtag,
718 tx_buffer->map);
719 tx_buffer->map = NULL;
720 }
721 } else if (tx_buffer->map != NULL) {
722 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
723 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
724 tx_buffer->map = NULL;
725 }
726 }
727 if (txr->txr_interq != NULL) {
728 struct mbuf *m;
729
730 while ((m = pcq_get(txr->txr_interq)) != NULL)
731 m_freem(m);
732 pcq_destroy(txr->txr_interq);
733 }
734 if (txr->tx_buffers != NULL) {
735 free(txr->tx_buffers, M_DEVBUF);
736 txr->tx_buffers = NULL;
737 }
738 if (txr->txtag != NULL) {
739 ixgbe_dma_tag_destroy(txr->txtag);
740 txr->txtag = NULL;
741 }
742 } /* ixgbe_free_transmit_buffers */
743
744 /************************************************************************
745 * ixgbe_tx_ctx_setup
746 *
747 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
748 ************************************************************************/
749 static int
750 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
751 u32 *cmd_type_len, u32 *olinfo_status)
752 {
753 struct adapter *adapter = txr->adapter;
754 struct ixgbe_adv_tx_context_desc *TXD;
755 struct ether_vlan_header *eh;
756 #ifdef INET
757 struct ip *ip;
758 #endif
759 #ifdef INET6
760 struct ip6_hdr *ip6;
761 #endif
762 int ehdrlen, ip_hlen = 0;
763 int offload = TRUE;
764 int ctxd = txr->next_avail_desc;
765 u32 vlan_macip_lens = 0;
766 u32 type_tucmd_mlhl = 0;
767 u16 vtag = 0;
768 u16 etype;
769 u8 ipproto = 0;
770 char *l3d;
771
772
773 /* First check if TSO is to be used */
774 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
775 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
776
777 if (rv != 0)
778 ++adapter->tso_err.ev_count;
779 return rv;
780 }
781
782 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
783 offload = FALSE;
784
785 /* Indicate the whole packet as payload when not doing TSO */
786 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
787
788 /* Now ready a context descriptor */
789 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
790
791 /*
792 * In advanced descriptors the vlan tag must
793 * be placed into the context descriptor. Hence
794 * we need to make one even if not doing offloads.
795 */
796 if (vlan_has_tag(mp)) {
797 vtag = htole16(vlan_get_tag(mp));
798 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
799 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
800 (offload == FALSE))
801 return (0);
802
803 /*
804 * Determine where frame payload starts.
805 * Jump over vlan headers if already present,
806 * helpful for QinQ too.
807 */
808 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
809 eh = mtod(mp, struct ether_vlan_header *);
810 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
811 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
812 etype = ntohs(eh->evl_proto);
813 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
814 } else {
815 etype = ntohs(eh->evl_encap_proto);
816 ehdrlen = ETHER_HDR_LEN;
817 }
818
819 /* Set the ether header length */
820 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
821
822 if (offload == FALSE)
823 goto no_offloads;
824
825 /*
826 * If the first mbuf only includes the ethernet header,
827 * jump to the next one
828 * XXX: This assumes the stack splits mbufs containing headers
829 * on header boundaries
830 * XXX: And assumes the entire IP header is contained in one mbuf
831 */
832 if (mp->m_len == ehdrlen && mp->m_next)
833 l3d = mtod(mp->m_next, char *);
834 else
835 l3d = mtod(mp, char *) + ehdrlen;
836
837 switch (etype) {
838 #ifdef INET
839 case ETHERTYPE_IP:
840 ip = (struct ip *)(l3d);
841 ip_hlen = ip->ip_hl << 2;
842 ipproto = ip->ip_p;
843 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
844 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
845 ip->ip_sum == 0);
846 break;
847 #endif
848 #ifdef INET6
849 case ETHERTYPE_IPV6:
850 ip6 = (struct ip6_hdr *)(l3d);
851 ip_hlen = sizeof(struct ip6_hdr);
852 ipproto = ip6->ip6_nxt;
853 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
854 break;
855 #endif
856 default:
857 offload = false;
858 break;
859 }
860
861 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
862 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
863
864 vlan_macip_lens |= ip_hlen;
865
866 /* No support for offloads for non-L4 next headers */
867 switch (ipproto) {
868 case IPPROTO_TCP:
869 if (mp->m_pkthdr.csum_flags &
870 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
871 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
872 else
873 offload = false;
874 break;
875 case IPPROTO_UDP:
876 if (mp->m_pkthdr.csum_flags &
877 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
878 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
879 else
880 offload = false;
881 break;
882 default:
883 offload = false;
884 break;
885 }
886
887 if (offload) /* Insert L4 checksum into data descriptors */
888 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
889
890 no_offloads:
891 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
892
893 /* Now copy bits into descriptor */
894 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
895 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
896 TXD->seqnum_seed = htole32(0);
897 TXD->mss_l4len_idx = htole32(0);
898
899 /* We've consumed the first desc, adjust counters */
900 if (++ctxd == txr->num_desc)
901 ctxd = 0;
902 txr->next_avail_desc = ctxd;
903 --txr->tx_avail;
904
905 return (0);
906 } /* ixgbe_tx_ctx_setup */
907
908 /************************************************************************
909 * ixgbe_tso_setup
910 *
911 * Setup work for hardware segmentation offload (TSO) on
912 * adapters using advanced tx descriptors
913 ************************************************************************/
914 static int
915 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
916 u32 *olinfo_status)
917 {
918 struct ixgbe_adv_tx_context_desc *TXD;
919 struct ether_vlan_header *eh;
920 #ifdef INET6
921 struct ip6_hdr *ip6;
922 #endif
923 #ifdef INET
924 struct ip *ip;
925 #endif
926 struct tcphdr *th;
927 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
928 u32 vlan_macip_lens = 0;
929 u32 type_tucmd_mlhl = 0;
930 u32 mss_l4len_idx = 0, paylen;
931 u16 vtag = 0, eh_type;
932
933 /*
934 * Determine where frame payload starts.
935 * Jump over vlan headers if already present
936 */
937 eh = mtod(mp, struct ether_vlan_header *);
938 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
939 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
940 eh_type = eh->evl_proto;
941 } else {
942 ehdrlen = ETHER_HDR_LEN;
943 eh_type = eh->evl_encap_proto;
944 }
945
946 switch (ntohs(eh_type)) {
947 #ifdef INET
948 case ETHERTYPE_IP:
949 ip = (struct ip *)(mp->m_data + ehdrlen);
950 if (ip->ip_p != IPPROTO_TCP)
951 return (ENXIO);
952 ip->ip_sum = 0;
953 ip_hlen = ip->ip_hl << 2;
954 th = (struct tcphdr *)((char *)ip + ip_hlen);
955 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
956 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
957 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
958 /* Tell transmit desc to also do IPv4 checksum. */
959 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
960 break;
961 #endif
962 #ifdef INET6
963 case ETHERTYPE_IPV6:
964 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
965 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
966 if (ip6->ip6_nxt != IPPROTO_TCP)
967 return (ENXIO);
968 ip_hlen = sizeof(struct ip6_hdr);
969 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
970 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
971 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
972 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
973 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
974 break;
975 #endif
976 default:
977 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
978 __func__, ntohs(eh_type));
979 break;
980 }
981
982 ctxd = txr->next_avail_desc;
983 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
984
985 tcp_hlen = th->th_off << 2;
986
987 /* This is used in the transmit desc in encap */
988 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
989
990 /* VLAN MACLEN IPLEN */
991 if (vlan_has_tag(mp)) {
992 vtag = htole16(vlan_get_tag(mp));
993 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
994 }
995
996 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
997 vlan_macip_lens |= ip_hlen;
998 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
999
1000 /* ADV DTYPE TUCMD */
1001 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1002 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1003 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1004
1005 /* MSS L4LEN IDX */
1006 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1007 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1008 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1009
1010 TXD->seqnum_seed = htole32(0);
1011
1012 if (++ctxd == txr->num_desc)
1013 ctxd = 0;
1014
1015 txr->tx_avail--;
1016 txr->next_avail_desc = ctxd;
1017 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1018 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1019 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1020 ++txr->tso_tx.ev_count;
1021
1022 return (0);
1023 } /* ixgbe_tso_setup */
1024
1025
1026 /************************************************************************
1027 * ixgbe_txeof
1028 *
1029 * Examine each tx_buffer in the used queue. If the hardware is done
1030 * processing the packet then free associated resources. The
1031 * tx_buffer is put back on the free queue.
1032 ************************************************************************/
1033 bool
1034 ixgbe_txeof(struct tx_ring *txr)
1035 {
1036 struct adapter *adapter = txr->adapter;
1037 struct ifnet *ifp = adapter->ifp;
1038 struct ixgbe_tx_buf *buf;
1039 union ixgbe_adv_tx_desc *txd;
1040 u32 work, processed = 0;
1041 u32 limit = adapter->tx_process_limit;
1042
1043 KASSERT(mutex_owned(&txr->tx_mtx));
1044
1045 #ifdef DEV_NETMAP
1046 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1047 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1048 struct netmap_adapter *na = NA(adapter->ifp);
1049 struct netmap_kring *kring = &na->tx_rings[txr->me];
1050 txd = txr->tx_base;
1051 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1052 BUS_DMASYNC_POSTREAD);
1053 /*
1054 * In netmap mode, all the work is done in the context
1055 * of the client thread. Interrupt handlers only wake up
1056 * clients, which may be sleeping on individual rings
1057 * or on a global resource for all rings.
1058 * To implement tx interrupt mitigation, we wake up the client
1059 * thread roughly every half ring, even if the NIC interrupts
1060 * more frequently. This is implemented as follows:
1061 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1062 * the slot that should wake up the thread (nkr_num_slots
1063 * means the user thread should not be woken up);
1064 * - the driver ignores tx interrupts unless netmap_mitigate=0
1065 * or the slot has the DD bit set.
1066 */
1067 if (!netmap_mitigate ||
1068 (kring->nr_kflags < kring->nkr_num_slots &&
1069 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1070 netmap_tx_irq(ifp, txr->me);
1071 }
1072 return false;
1073 }
1074 #endif /* DEV_NETMAP */
1075
1076 if (txr->tx_avail == txr->num_desc) {
1077 txr->busy = 0;
1078 return false;
1079 }
1080
1081 /* Get work starting point */
1082 work = txr->next_to_clean;
1083 buf = &txr->tx_buffers[work];
1084 txd = &txr->tx_base[work];
1085 work -= txr->num_desc; /* The distance to ring end */
1086 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1087 BUS_DMASYNC_POSTREAD);
1088
1089 do {
1090 union ixgbe_adv_tx_desc *eop = buf->eop;
1091 if (eop == NULL) /* No work */
1092 break;
1093
1094 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1095 break; /* I/O not complete */
1096
1097 if (buf->m_head) {
1098 txr->bytes += buf->m_head->m_pkthdr.len;
1099 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1100 0, buf->m_head->m_pkthdr.len,
1101 BUS_DMASYNC_POSTWRITE);
1102 ixgbe_dmamap_unload(txr->txtag, buf->map);
1103 m_freem(buf->m_head);
1104 buf->m_head = NULL;
1105 }
1106 buf->eop = NULL;
1107 ++txr->tx_avail;
1108
1109 /* We clean the range if multi segment */
1110 while (txd != eop) {
1111 ++txd;
1112 ++buf;
1113 ++work;
1114 /* wrap the ring? */
1115 if (__predict_false(!work)) {
1116 work -= txr->num_desc;
1117 buf = txr->tx_buffers;
1118 txd = txr->tx_base;
1119 }
1120 if (buf->m_head) {
1121 txr->bytes +=
1122 buf->m_head->m_pkthdr.len;
1123 bus_dmamap_sync(txr->txtag->dt_dmat,
1124 buf->map,
1125 0, buf->m_head->m_pkthdr.len,
1126 BUS_DMASYNC_POSTWRITE);
1127 ixgbe_dmamap_unload(txr->txtag,
1128 buf->map);
1129 m_freem(buf->m_head);
1130 buf->m_head = NULL;
1131 }
1132 ++txr->tx_avail;
1133 buf->eop = NULL;
1134
1135 }
1136 ++txr->packets;
1137 ++processed;
1138 ++ifp->if_opackets;
1139
1140 /* Try the next packet */
1141 ++txd;
1142 ++buf;
1143 ++work;
1144 /* reset with a wrap */
1145 if (__predict_false(!work)) {
1146 work -= txr->num_desc;
1147 buf = txr->tx_buffers;
1148 txd = txr->tx_base;
1149 }
1150 prefetch(txd);
1151 } while (__predict_true(--limit));
1152
1153 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1154 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1155
1156 work += txr->num_desc;
1157 txr->next_to_clean = work;
1158
1159 /*
1160 * Queue Hang detection, we know there's
1161 * work outstanding or the first return
1162 * would have been taken, so increment busy
1163 * if nothing managed to get cleaned, then
1164 * in local_timer it will be checked and
1165 * marked as HUNG if it exceeds a MAX attempt.
1166 */
1167 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1168 ++txr->busy;
1169 /*
1170 * If anything gets cleaned we reset state to 1,
1171 * note this will turn off HUNG if its set.
1172 */
1173 if (processed)
1174 txr->busy = 1;
1175
1176 if (txr->tx_avail == txr->num_desc)
1177 txr->busy = 0;
1178
1179 return ((limit > 0) ? false : true);
1180 } /* ixgbe_txeof */
1181
1182 /************************************************************************
1183 * ixgbe_rsc_count
1184 *
1185 * Used to detect a descriptor that has been merged by Hardware RSC.
1186 ************************************************************************/
1187 static inline u32
1188 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1189 {
1190 return (le32toh(rx->wb.lower.lo_dword.data) &
1191 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1192 } /* ixgbe_rsc_count */
1193
1194 /************************************************************************
1195 * ixgbe_setup_hw_rsc
1196 *
1197 * Initialize Hardware RSC (LRO) feature on 82599
1198 * for an RX ring, this is toggled by the LRO capability
1199 * even though it is transparent to the stack.
1200 *
1201 * NOTE: Since this HW feature only works with IPv4 and
1202 * testing has shown soft LRO to be as effective,
1203 * this feature will be disabled by default.
1204 ************************************************************************/
1205 static void
1206 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1207 {
1208 struct adapter *adapter = rxr->adapter;
1209 struct ixgbe_hw *hw = &adapter->hw;
1210 u32 rscctrl, rdrxctl;
1211
1212 /* If turning LRO/RSC off we need to disable it */
1213 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1214 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1215 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1216 return;
1217 }
1218
1219 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1220 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1221 #ifdef DEV_NETMAP
1222 /* Always strip CRC unless Netmap disabled it */
1223 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1224 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1225 ix_crcstrip)
1226 #endif /* DEV_NETMAP */
1227 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1228 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1229 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1230
1231 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1232 rscctrl |= IXGBE_RSCCTL_RSCEN;
1233 /*
1234 * Limit the total number of descriptors that
1235 * can be combined, so it does not exceed 64K
1236 */
1237 if (rxr->mbuf_sz == MCLBYTES)
1238 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1239 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1240 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1241 else if (rxr->mbuf_sz == MJUM9BYTES)
1242 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1243 else /* Using 16K cluster */
1244 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1245
1246 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1247
1248 /* Enable TCP header recognition */
1249 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1250 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1251
1252 /* Disable RSC for ACK packets */
1253 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1254 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1255
1256 rxr->hw_rsc = TRUE;
1257 } /* ixgbe_setup_hw_rsc */
1258
1259 /************************************************************************
1260 * ixgbe_refresh_mbufs
1261 *
1262 * Refresh mbuf buffers for RX descriptor rings
1263 * - now keeps its own state so discards due to resource
1264 * exhaustion are unnecessary, if an mbuf cannot be obtained
1265 * it just returns, keeping its placeholder, thus it can simply
1266 * be recalled to try again.
1267 ************************************************************************/
1268 static void
1269 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1270 {
1271 struct adapter *adapter = rxr->adapter;
1272 struct ixgbe_rx_buf *rxbuf;
1273 struct mbuf *mp;
1274 int i, j, error;
1275 bool refreshed = false;
1276
1277 i = j = rxr->next_to_refresh;
1278 /* Control the loop with one beyond */
1279 if (++j == rxr->num_desc)
1280 j = 0;
1281
1282 while (j != limit) {
1283 rxbuf = &rxr->rx_buffers[i];
1284 if (rxbuf->buf == NULL) {
1285 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1286 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1287 if (mp == NULL) {
1288 rxr->no_jmbuf.ev_count++;
1289 goto update;
1290 }
1291 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1292 m_adj(mp, ETHER_ALIGN);
1293 } else
1294 mp = rxbuf->buf;
1295
1296 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1297
1298 /* If we're dealing with an mbuf that was copied rather
1299 * than replaced, there's no need to go through busdma.
1300 */
1301 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1302 /* Get the memory mapping */
1303 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1304 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1305 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1306 if (error != 0) {
1307 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1308 m_free(mp);
1309 rxbuf->buf = NULL;
1310 goto update;
1311 }
1312 rxbuf->buf = mp;
1313 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1314 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1315 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1316 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1317 } else {
1318 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1319 rxbuf->flags &= ~IXGBE_RX_COPY;
1320 }
1321
1322 refreshed = true;
1323 /* Next is precalculated */
1324 i = j;
1325 rxr->next_to_refresh = i;
1326 if (++j == rxr->num_desc)
1327 j = 0;
1328 }
1329
1330 update:
1331 if (refreshed) /* Update hardware tail index */
1332 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1333
1334 return;
1335 } /* ixgbe_refresh_mbufs */
1336
1337 /************************************************************************
1338 * ixgbe_allocate_receive_buffers
1339 *
1340 * Allocate memory for rx_buffer structures. Since we use one
1341 * rx_buffer per received packet, the maximum number of rx_buffer's
1342 * that we'll need is equal to the number of receive descriptors
1343 * that we've allocated.
1344 ************************************************************************/
1345 static int
1346 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1347 {
1348 struct adapter *adapter = rxr->adapter;
1349 device_t dev = adapter->dev;
1350 struct ixgbe_rx_buf *rxbuf;
1351 int bsize, error;
1352
1353 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1354 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1355 M_NOWAIT | M_ZERO);
1356 if (rxr->rx_buffers == NULL) {
1357 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1358 error = ENOMEM;
1359 goto fail;
1360 }
1361
1362 error = ixgbe_dma_tag_create(
1363 /* parent */ adapter->osdep.dmat,
1364 /* alignment */ 1,
1365 /* bounds */ 0,
1366 /* maxsize */ MJUM16BYTES,
1367 /* nsegments */ 1,
1368 /* maxsegsize */ MJUM16BYTES,
1369 /* flags */ 0,
1370 &rxr->ptag);
1371 if (error != 0) {
1372 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1373 goto fail;
1374 }
1375
1376 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1377 rxbuf = &rxr->rx_buffers[i];
1378 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1379 if (error) {
1380 aprint_error_dev(dev, "Unable to create RX dma map\n");
1381 goto fail;
1382 }
1383 }
1384
1385 return (0);
1386
1387 fail:
1388 /* Frees all, but can handle partial completion */
1389 ixgbe_free_receive_structures(adapter);
1390
1391 return (error);
1392 } /* ixgbe_allocate_receive_buffers */
1393
1394 /************************************************************************
1395 * ixgbe_free_receive_ring
1396 ************************************************************************/
1397 static void
1398 ixgbe_free_receive_ring(struct rx_ring *rxr)
1399 {
1400 for (int i = 0; i < rxr->num_desc; i++) {
1401 ixgbe_rx_discard(rxr, i);
1402 }
1403 } /* ixgbe_free_receive_ring */
1404
1405 /************************************************************************
1406 * ixgbe_setup_receive_ring
1407 *
1408 * Initialize a receive ring and its buffers.
1409 ************************************************************************/
1410 static int
1411 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1412 {
1413 struct adapter *adapter;
1414 struct ixgbe_rx_buf *rxbuf;
1415 #ifdef LRO
1416 struct ifnet *ifp;
1417 struct lro_ctrl *lro = &rxr->lro;
1418 #endif /* LRO */
1419 #ifdef DEV_NETMAP
1420 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1421 struct netmap_slot *slot;
1422 #endif /* DEV_NETMAP */
1423 int rsize, error = 0;
1424
1425 adapter = rxr->adapter;
1426 #ifdef LRO
1427 ifp = adapter->ifp;
1428 #endif /* LRO */
1429
1430 /* Clear the ring contents */
1431 IXGBE_RX_LOCK(rxr);
1432
1433 #ifdef DEV_NETMAP
1434 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1435 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1436 #endif /* DEV_NETMAP */
1437
1438 rsize = roundup2(adapter->num_rx_desc *
1439 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1440 bzero((void *)rxr->rx_base, rsize);
1441 /* Cache the size */
1442 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1443
1444 /* Free current RX buffer structs and their mbufs */
1445 ixgbe_free_receive_ring(rxr);
1446
1447 /* Now replenish the mbufs */
1448 for (int j = 0; j != rxr->num_desc; ++j) {
1449 struct mbuf *mp;
1450
1451 rxbuf = &rxr->rx_buffers[j];
1452
1453 #ifdef DEV_NETMAP
1454 /*
1455 * In netmap mode, fill the map and set the buffer
1456 * address in the NIC ring, considering the offset
1457 * between the netmap and NIC rings (see comment in
1458 * ixgbe_setup_transmit_ring() ). No need to allocate
1459 * an mbuf, so end the block with a continue;
1460 */
1461 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1462 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1463 uint64_t paddr;
1464 void *addr;
1465
1466 addr = PNMB(na, slot + sj, &paddr);
1467 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1468 /* Update descriptor and the cached value */
1469 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1470 rxbuf->addr = htole64(paddr);
1471 continue;
1472 }
1473 #endif /* DEV_NETMAP */
1474
1475 rxbuf->flags = 0;
1476 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1477 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1478 if (rxbuf->buf == NULL) {
1479 error = ENOBUFS;
1480 goto fail;
1481 }
1482 mp = rxbuf->buf;
1483 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1484 /* Get the memory mapping */
1485 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1486 mp, BUS_DMA_NOWAIT);
1487 if (error != 0)
1488 goto fail;
1489 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1490 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1491 /* Update the descriptor and the cached value */
1492 rxr->rx_base[j].read.pkt_addr =
1493 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1494 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1495 }
1496
1497
1498 /* Setup our descriptor indices */
1499 rxr->next_to_check = 0;
1500 rxr->next_to_refresh = 0;
1501 rxr->lro_enabled = FALSE;
1502 rxr->rx_copies.ev_count = 0;
1503 #if 0 /* NetBSD */
1504 rxr->rx_bytes.ev_count = 0;
1505 #if 1 /* Fix inconsistency */
1506 rxr->rx_packets.ev_count = 0;
1507 #endif
1508 #endif
1509 rxr->vtag_strip = FALSE;
1510
1511 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1512 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1513
1514 /*
1515 * Now set up the LRO interface
1516 */
1517 if (ixgbe_rsc_enable)
1518 ixgbe_setup_hw_rsc(rxr);
1519 #ifdef LRO
1520 else if (ifp->if_capenable & IFCAP_LRO) {
1521 device_t dev = adapter->dev;
1522 int err = tcp_lro_init(lro);
1523 if (err) {
1524 device_printf(dev, "LRO Initialization failed!\n");
1525 goto fail;
1526 }
1527 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1528 rxr->lro_enabled = TRUE;
1529 lro->ifp = adapter->ifp;
1530 }
1531 #endif /* LRO */
1532
1533 IXGBE_RX_UNLOCK(rxr);
1534
1535 return (0);
1536
1537 fail:
1538 ixgbe_free_receive_ring(rxr);
1539 IXGBE_RX_UNLOCK(rxr);
1540
1541 return (error);
1542 } /* ixgbe_setup_receive_ring */
1543
1544 /************************************************************************
1545 * ixgbe_setup_receive_structures - Initialize all receive rings.
1546 ************************************************************************/
1547 int
1548 ixgbe_setup_receive_structures(struct adapter *adapter)
1549 {
1550 struct rx_ring *rxr = adapter->rx_rings;
1551 int j;
1552
1553 /*
1554 * Now reinitialize our supply of jumbo mbufs. The number
1555 * or size of jumbo mbufs may have changed.
1556 * Assume all of rxr->ptag are the same.
1557 */
1558 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1559 (2 * adapter->num_rx_desc) * adapter->num_queues,
1560 adapter->rx_mbuf_sz);
1561
1562 for (j = 0; j < adapter->num_queues; j++, rxr++)
1563 if (ixgbe_setup_receive_ring(rxr))
1564 goto fail;
1565
1566 return (0);
1567 fail:
1568 /*
1569 * Free RX buffers allocated so far, we will only handle
1570 * the rings that completed, the failing case will have
1571 * cleaned up for itself. 'j' failed, so its the terminus.
1572 */
1573 for (int i = 0; i < j; ++i) {
1574 rxr = &adapter->rx_rings[i];
1575 IXGBE_RX_LOCK(rxr);
1576 ixgbe_free_receive_ring(rxr);
1577 IXGBE_RX_UNLOCK(rxr);
1578 }
1579
1580 return (ENOBUFS);
1581 } /* ixgbe_setup_receive_structures */
1582
1583
1584 /************************************************************************
1585 * ixgbe_free_receive_structures - Free all receive rings.
1586 ************************************************************************/
1587 void
1588 ixgbe_free_receive_structures(struct adapter *adapter)
1589 {
1590 struct rx_ring *rxr = adapter->rx_rings;
1591
1592 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1593
1594 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1595 ixgbe_free_receive_buffers(rxr);
1596 #ifdef LRO
1597 /* Free LRO memory */
1598 tcp_lro_free(&rxr->lro);
1599 #endif /* LRO */
1600 /* Free the ring memory as well */
1601 ixgbe_dma_free(adapter, &rxr->rxdma);
1602 IXGBE_RX_LOCK_DESTROY(rxr);
1603 }
1604
1605 free(adapter->rx_rings, M_DEVBUF);
1606 } /* ixgbe_free_receive_structures */
1607
1608
1609 /************************************************************************
1610 * ixgbe_free_receive_buffers - Free receive ring data structures
1611 ************************************************************************/
1612 static void
1613 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1614 {
1615 struct adapter *adapter = rxr->adapter;
1616 struct ixgbe_rx_buf *rxbuf;
1617
1618 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1619
1620 /* Cleanup any existing buffers */
1621 if (rxr->rx_buffers != NULL) {
1622 for (int i = 0; i < adapter->num_rx_desc; i++) {
1623 rxbuf = &rxr->rx_buffers[i];
1624 ixgbe_rx_discard(rxr, i);
1625 if (rxbuf->pmap != NULL) {
1626 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1627 rxbuf->pmap = NULL;
1628 }
1629 }
1630 if (rxr->rx_buffers != NULL) {
1631 free(rxr->rx_buffers, M_DEVBUF);
1632 rxr->rx_buffers = NULL;
1633 }
1634 }
1635
1636 if (rxr->ptag != NULL) {
1637 ixgbe_dma_tag_destroy(rxr->ptag);
1638 rxr->ptag = NULL;
1639 }
1640
1641 return;
1642 } /* ixgbe_free_receive_buffers */
1643
1644 /************************************************************************
1645 * ixgbe_rx_input
1646 ************************************************************************/
1647 static __inline void
1648 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1649 u32 ptype)
1650 {
1651 struct adapter *adapter = ifp->if_softc;
1652
1653 #ifdef LRO
1654 struct ethercom *ec = &adapter->osdep.ec;
1655
1656 /*
1657 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1658 * should be computed by hardware. Also it should not have VLAN tag in
1659 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1660 */
1661 if (rxr->lro_enabled &&
1662 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1663 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1664 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1665 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1666 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1667 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1668 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1669 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1670 /*
1671 * Send to the stack if:
1672 ** - LRO not enabled, or
1673 ** - no LRO resources, or
1674 ** - lro enqueue fails
1675 */
1676 if (rxr->lro.lro_cnt != 0)
1677 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1678 return;
1679 }
1680 #endif /* LRO */
1681
1682 if_percpuq_enqueue(adapter->ipq, m);
1683 } /* ixgbe_rx_input */
1684
1685 /************************************************************************
1686 * ixgbe_rx_discard
1687 ************************************************************************/
1688 static __inline void
1689 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1690 {
1691 struct ixgbe_rx_buf *rbuf;
1692
1693 rbuf = &rxr->rx_buffers[i];
1694
1695 /*
1696 * With advanced descriptors the writeback
1697 * clobbers the buffer addrs, so its easier
1698 * to just free the existing mbufs and take
1699 * the normal refresh path to get new buffers
1700 * and mapping.
1701 */
1702
1703 if (rbuf->fmp != NULL) {/* Partial chain ? */
1704 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1705 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1706 m_freem(rbuf->fmp);
1707 rbuf->fmp = NULL;
1708 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1709 } else if (rbuf->buf) {
1710 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1711 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1712 m_free(rbuf->buf);
1713 rbuf->buf = NULL;
1714 }
1715 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1716
1717 rbuf->flags = 0;
1718
1719 return;
1720 } /* ixgbe_rx_discard */
1721
1722
1723 /************************************************************************
1724 * ixgbe_rxeof
1725 *
1726 * Executes in interrupt context. It replenishes the
1727 * mbufs in the descriptor and sends data which has
1728 * been dma'ed into host memory to upper layer.
1729 *
1730 * Return TRUE for more work, FALSE for all clean.
1731 ************************************************************************/
1732 bool
1733 ixgbe_rxeof(struct ix_queue *que)
1734 {
1735 struct adapter *adapter = que->adapter;
1736 struct rx_ring *rxr = que->rxr;
1737 struct ifnet *ifp = adapter->ifp;
1738 #ifdef LRO
1739 struct lro_ctrl *lro = &rxr->lro;
1740 #endif /* LRO */
1741 union ixgbe_adv_rx_desc *cur;
1742 struct ixgbe_rx_buf *rbuf, *nbuf;
1743 int i, nextp, processed = 0;
1744 u32 staterr = 0;
1745 u32 count = adapter->rx_process_limit;
1746 #ifdef RSS
1747 u16 pkt_info;
1748 #endif
1749
1750 IXGBE_RX_LOCK(rxr);
1751
1752 #ifdef DEV_NETMAP
1753 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1754 /* Same as the txeof routine: wakeup clients on intr. */
1755 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1756 IXGBE_RX_UNLOCK(rxr);
1757 return (FALSE);
1758 }
1759 }
1760 #endif /* DEV_NETMAP */
1761
1762 for (i = rxr->next_to_check; count != 0;) {
1763 struct mbuf *sendmp, *mp;
1764 u32 rsc, ptype;
1765 u16 len;
1766 u16 vtag = 0;
1767 bool eop;
1768
1769 /* Sync the ring. */
1770 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1771 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1772
1773 cur = &rxr->rx_base[i];
1774 staterr = le32toh(cur->wb.upper.status_error);
1775 #ifdef RSS
1776 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1777 #endif
1778
1779 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1780 break;
1781 if ((ifp->if_flags & IFF_RUNNING) == 0)
1782 break;
1783
1784 count--;
1785 sendmp = NULL;
1786 nbuf = NULL;
1787 rsc = 0;
1788 cur->wb.upper.status_error = 0;
1789 rbuf = &rxr->rx_buffers[i];
1790 mp = rbuf->buf;
1791
1792 len = le16toh(cur->wb.upper.length);
1793 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1794 IXGBE_RXDADV_PKTTYPE_MASK;
1795 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1796
1797 /* Make sure bad packets are discarded */
1798 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1799 #if __FreeBSD_version >= 1100036
1800 if (adapter->feat_en & IXGBE_FEATURE_VF)
1801 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1802 #endif
1803 rxr->rx_discarded.ev_count++;
1804 ixgbe_rx_discard(rxr, i);
1805 goto next_desc;
1806 }
1807
1808 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1809 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1810
1811 /*
1812 * On 82599 which supports a hardware
1813 * LRO (called HW RSC), packets need
1814 * not be fragmented across sequential
1815 * descriptors, rather the next descriptor
1816 * is indicated in bits of the descriptor.
1817 * This also means that we might proceses
1818 * more than one packet at a time, something
1819 * that has never been true before, it
1820 * required eliminating global chain pointers
1821 * in favor of what we are doing here. -jfv
1822 */
1823 if (!eop) {
1824 /*
1825 * Figure out the next descriptor
1826 * of this frame.
1827 */
1828 if (rxr->hw_rsc == TRUE) {
1829 rsc = ixgbe_rsc_count(cur);
1830 rxr->rsc_num += (rsc - 1);
1831 }
1832 if (rsc) { /* Get hardware index */
1833 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1834 IXGBE_RXDADV_NEXTP_SHIFT);
1835 } else { /* Just sequential */
1836 nextp = i + 1;
1837 if (nextp == adapter->num_rx_desc)
1838 nextp = 0;
1839 }
1840 nbuf = &rxr->rx_buffers[nextp];
1841 prefetch(nbuf);
1842 }
1843 /*
1844 * Rather than using the fmp/lmp global pointers
1845 * we now keep the head of a packet chain in the
1846 * buffer struct and pass this along from one
1847 * descriptor to the next, until we get EOP.
1848 */
1849 mp->m_len = len;
1850 /*
1851 * See if there is a stored head
1852 * that determines what we are
1853 */
1854 sendmp = rbuf->fmp;
1855 if (sendmp != NULL) { /* secondary frag */
1856 rbuf->buf = rbuf->fmp = NULL;
1857 mp->m_flags &= ~M_PKTHDR;
1858 sendmp->m_pkthdr.len += mp->m_len;
1859 } else {
1860 /*
1861 * Optimize. This might be a small packet,
1862 * maybe just a TCP ACK. Do a fast copy that
1863 * is cache aligned into a new mbuf, and
1864 * leave the old mbuf+cluster for re-use.
1865 */
1866 if (eop && len <= IXGBE_RX_COPY_LEN) {
1867 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1868 if (sendmp != NULL) {
1869 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1870 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1871 len);
1872 sendmp->m_len = len;
1873 rxr->rx_copies.ev_count++;
1874 rbuf->flags |= IXGBE_RX_COPY;
1875 }
1876 }
1877 if (sendmp == NULL) {
1878 rbuf->buf = rbuf->fmp = NULL;
1879 sendmp = mp;
1880 }
1881
1882 /* first desc of a non-ps chain */
1883 sendmp->m_flags |= M_PKTHDR;
1884 sendmp->m_pkthdr.len = mp->m_len;
1885 }
1886 ++processed;
1887
1888 /* Pass the head pointer on */
1889 if (eop == 0) {
1890 nbuf->fmp = sendmp;
1891 sendmp = NULL;
1892 mp->m_next = nbuf->buf;
1893 } else { /* Sending this frame */
1894 m_set_rcvif(sendmp, ifp);
1895 ++rxr->packets;
1896 rxr->rx_packets.ev_count++;
1897 /* capture data for AIM */
1898 rxr->bytes += sendmp->m_pkthdr.len;
1899 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1900 /* Process vlan info */
1901 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1902 vtag = le16toh(cur->wb.upper.vlan);
1903 if (vtag) {
1904 vlan_set_tag(sendmp, vtag);
1905 }
1906 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1907 ixgbe_rx_checksum(staterr, sendmp, ptype,
1908 &adapter->stats.pf);
1909 }
1910
1911 #if 0 /* FreeBSD */
1912 /*
1913 * In case of multiqueue, we have RXCSUM.PCSD bit set
1914 * and never cleared. This means we have RSS hash
1915 * available to be used.
1916 */
1917 if (adapter->num_queues > 1) {
1918 sendmp->m_pkthdr.flowid =
1919 le32toh(cur->wb.lower.hi_dword.rss);
1920 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1921 case IXGBE_RXDADV_RSSTYPE_IPV4:
1922 M_HASHTYPE_SET(sendmp,
1923 M_HASHTYPE_RSS_IPV4);
1924 break;
1925 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1926 M_HASHTYPE_SET(sendmp,
1927 M_HASHTYPE_RSS_TCP_IPV4);
1928 break;
1929 case IXGBE_RXDADV_RSSTYPE_IPV6:
1930 M_HASHTYPE_SET(sendmp,
1931 M_HASHTYPE_RSS_IPV6);
1932 break;
1933 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1934 M_HASHTYPE_SET(sendmp,
1935 M_HASHTYPE_RSS_TCP_IPV6);
1936 break;
1937 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1938 M_HASHTYPE_SET(sendmp,
1939 M_HASHTYPE_RSS_IPV6_EX);
1940 break;
1941 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1942 M_HASHTYPE_SET(sendmp,
1943 M_HASHTYPE_RSS_TCP_IPV6_EX);
1944 break;
1945 #if __FreeBSD_version > 1100000
1946 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1947 M_HASHTYPE_SET(sendmp,
1948 M_HASHTYPE_RSS_UDP_IPV4);
1949 break;
1950 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1951 M_HASHTYPE_SET(sendmp,
1952 M_HASHTYPE_RSS_UDP_IPV6);
1953 break;
1954 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1955 M_HASHTYPE_SET(sendmp,
1956 M_HASHTYPE_RSS_UDP_IPV6_EX);
1957 break;
1958 #endif
1959 default:
1960 M_HASHTYPE_SET(sendmp,
1961 M_HASHTYPE_OPAQUE_HASH);
1962 }
1963 } else {
1964 sendmp->m_pkthdr.flowid = que->msix;
1965 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1966 }
1967 #endif
1968 }
1969 next_desc:
1970 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1971 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1972
1973 /* Advance our pointers to the next descriptor. */
1974 if (++i == rxr->num_desc)
1975 i = 0;
1976
1977 /* Now send to the stack or do LRO */
1978 if (sendmp != NULL) {
1979 rxr->next_to_check = i;
1980 IXGBE_RX_UNLOCK(rxr);
1981 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1982 IXGBE_RX_LOCK(rxr);
1983 i = rxr->next_to_check;
1984 }
1985
1986 /* Every 8 descriptors we go to refresh mbufs */
1987 if (processed == 8) {
1988 ixgbe_refresh_mbufs(rxr, i);
1989 processed = 0;
1990 }
1991 }
1992
1993 /* Refresh any remaining buf structs */
1994 if (ixgbe_rx_unrefreshed(rxr))
1995 ixgbe_refresh_mbufs(rxr, i);
1996
1997 rxr->next_to_check = i;
1998
1999 IXGBE_RX_UNLOCK(rxr);
2000
2001 #ifdef LRO
2002 /*
2003 * Flush any outstanding LRO work
2004 */
2005 tcp_lro_flush_all(lro);
2006 #endif /* LRO */
2007
2008 /*
2009 * Still have cleaning to do?
2010 */
2011 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2012 return (TRUE);
2013
2014 return (FALSE);
2015 } /* ixgbe_rxeof */
2016
2017
2018 /************************************************************************
2019 * ixgbe_rx_checksum
2020 *
2021 * Verify that the hardware indicated that the checksum is valid.
2022 * Inform the stack about the status of checksum so that stack
2023 * doesn't spend time verifying the checksum.
2024 ************************************************************************/
2025 static void
2026 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2027 struct ixgbe_hw_stats *stats)
2028 {
2029 u16 status = (u16)staterr;
2030 u8 errors = (u8)(staterr >> 24);
2031 #if 0
2032 bool sctp = false;
2033
2034 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2035 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2036 sctp = true;
2037 #endif
2038
2039 /* IPv4 checksum */
2040 if (status & IXGBE_RXD_STAT_IPCS) {
2041 stats->ipcs.ev_count++;
2042 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2043 /* IP Checksum Good */
2044 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2045 } else {
2046 stats->ipcs_bad.ev_count++;
2047 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2048 }
2049 }
2050 /* TCP/UDP/SCTP checksum */
2051 if (status & IXGBE_RXD_STAT_L4CS) {
2052 stats->l4cs.ev_count++;
2053 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2054 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2055 mp->m_pkthdr.csum_flags |= type;
2056 } else {
2057 stats->l4cs_bad.ev_count++;
2058 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2059 }
2060 }
2061 } /* ixgbe_rx_checksum */
2062
2063 /************************************************************************
2064 * ixgbe_dma_malloc
2065 ************************************************************************/
2066 int
2067 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2068 struct ixgbe_dma_alloc *dma, const int mapflags)
2069 {
2070 device_t dev = adapter->dev;
2071 int r, rsegs;
2072
2073 r = ixgbe_dma_tag_create(
2074 /* parent */ adapter->osdep.dmat,
2075 /* alignment */ DBA_ALIGN,
2076 /* bounds */ 0,
2077 /* maxsize */ size,
2078 /* nsegments */ 1,
2079 /* maxsegsize */ size,
2080 /* flags */ BUS_DMA_ALLOCNOW,
2081 &dma->dma_tag);
2082 if (r != 0) {
2083 aprint_error_dev(dev,
2084 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2085 goto fail_0;
2086 }
2087
2088 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2089 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2090 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2091 if (r != 0) {
2092 aprint_error_dev(dev,
2093 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2094 goto fail_1;
2095 }
2096
2097 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2098 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2099 if (r != 0) {
2100 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2101 __func__, r);
2102 goto fail_2;
2103 }
2104
2105 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2106 if (r != 0) {
2107 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2108 __func__, r);
2109 goto fail_3;
2110 }
2111
2112 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2113 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2114 if (r != 0) {
2115 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2116 __func__, r);
2117 goto fail_4;
2118 }
2119 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2120 dma->dma_size = size;
2121 return 0;
2122 fail_4:
2123 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2124 fail_3:
2125 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2126 fail_2:
2127 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2128 fail_1:
2129 ixgbe_dma_tag_destroy(dma->dma_tag);
2130 fail_0:
2131
2132 return (r);
2133 } /* ixgbe_dma_malloc */
2134
2135 /************************************************************************
2136 * ixgbe_dma_free
2137 ************************************************************************/
2138 void
2139 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2140 {
2141 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2142 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2143 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2144 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2145 ixgbe_dma_tag_destroy(dma->dma_tag);
2146 } /* ixgbe_dma_free */
2147
2148
2149 /************************************************************************
2150 * ixgbe_allocate_queues
2151 *
2152 * Allocate memory for the transmit and receive rings, and then
2153 * the descriptors associated with each, called only once at attach.
2154 ************************************************************************/
2155 int
2156 ixgbe_allocate_queues(struct adapter *adapter)
2157 {
2158 device_t dev = adapter->dev;
2159 struct ix_queue *que;
2160 struct tx_ring *txr;
2161 struct rx_ring *rxr;
2162 int rsize, tsize, error = IXGBE_SUCCESS;
2163 int txconf = 0, rxconf = 0;
2164
2165 /* First, allocate the top level queue structs */
2166 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2167 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2168 if (adapter->queues == NULL) {
2169 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2170 error = ENOMEM;
2171 goto fail;
2172 }
2173
2174 /* Second, allocate the TX ring struct memory */
2175 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2176 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2177 if (adapter->tx_rings == NULL) {
2178 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2179 error = ENOMEM;
2180 goto tx_fail;
2181 }
2182
2183 /* Third, allocate the RX ring */
2184 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2185 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2186 if (adapter->rx_rings == NULL) {
2187 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2188 error = ENOMEM;
2189 goto rx_fail;
2190 }
2191
2192 /* For the ring itself */
2193 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2194 DBA_ALIGN);
2195
2196 /*
2197 * Now set up the TX queues, txconf is needed to handle the
2198 * possibility that things fail midcourse and we need to
2199 * undo memory gracefully
2200 */
2201 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2202 /* Set up some basics */
2203 txr = &adapter->tx_rings[i];
2204 txr->adapter = adapter;
2205 txr->txr_interq = NULL;
2206 /* In case SR-IOV is enabled, align the index properly */
2207 #ifdef PCI_IOV
2208 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2209 i);
2210 #else
2211 txr->me = i;
2212 #endif
2213 txr->num_desc = adapter->num_tx_desc;
2214
2215 /* Initialize the TX side lock */
2216 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2217 device_xname(dev), txr->me);
2218 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2219
2220 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2221 BUS_DMA_NOWAIT)) {
2222 aprint_error_dev(dev,
2223 "Unable to allocate TX Descriptor memory\n");
2224 error = ENOMEM;
2225 goto err_tx_desc;
2226 }
2227 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2228 bzero((void *)txr->tx_base, tsize);
2229
2230 /* Now allocate transmit buffers for the ring */
2231 if (ixgbe_allocate_transmit_buffers(txr)) {
2232 aprint_error_dev(dev,
2233 "Critical Failure setting up transmit buffers\n");
2234 error = ENOMEM;
2235 goto err_tx_desc;
2236 }
2237 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2238 /* Allocate a buf ring */
2239 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2240 if (txr->txr_interq == NULL) {
2241 aprint_error_dev(dev,
2242 "Critical Failure setting up buf ring\n");
2243 error = ENOMEM;
2244 goto err_tx_desc;
2245 }
2246 }
2247 }
2248
2249 /*
2250 * Next the RX queues...
2251 */
2252 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2253 DBA_ALIGN);
2254 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2255 rxr = &adapter->rx_rings[i];
2256 /* Set up some basics */
2257 rxr->adapter = adapter;
2258 #ifdef PCI_IOV
2259 /* In case SR-IOV is enabled, align the index properly */
2260 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2261 i);
2262 #else
2263 rxr->me = i;
2264 #endif
2265 rxr->num_desc = adapter->num_rx_desc;
2266
2267 /* Initialize the RX side lock */
2268 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2269 device_xname(dev), rxr->me);
2270 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2271
2272 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2273 BUS_DMA_NOWAIT)) {
2274 aprint_error_dev(dev,
2275 "Unable to allocate RxDescriptor memory\n");
2276 error = ENOMEM;
2277 goto err_rx_desc;
2278 }
2279 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2280 bzero((void *)rxr->rx_base, rsize);
2281
2282 /* Allocate receive buffers for the ring */
2283 if (ixgbe_allocate_receive_buffers(rxr)) {
2284 aprint_error_dev(dev,
2285 "Critical Failure setting up receive buffers\n");
2286 error = ENOMEM;
2287 goto err_rx_desc;
2288 }
2289 }
2290
2291 /*
2292 * Finally set up the queue holding structs
2293 */
2294 for (int i = 0; i < adapter->num_queues; i++) {
2295 que = &adapter->queues[i];
2296 que->adapter = adapter;
2297 que->me = i;
2298 que->txr = &adapter->tx_rings[i];
2299 que->rxr = &adapter->rx_rings[i];
2300
2301 mutex_init(&que->im_mtx, MUTEX_DEFAULT, IPL_NET);
2302 que->im_nest = 0;
2303 }
2304
2305 return (0);
2306
2307 err_rx_desc:
2308 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2309 ixgbe_dma_free(adapter, &rxr->rxdma);
2310 err_tx_desc:
2311 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2312 ixgbe_dma_free(adapter, &txr->txdma);
2313 free(adapter->rx_rings, M_DEVBUF);
2314 rx_fail:
2315 free(adapter->tx_rings, M_DEVBUF);
2316 tx_fail:
2317 free(adapter->queues, M_DEVBUF);
2318 fail:
2319 return (error);
2320 } /* ixgbe_allocate_queues */
2321