ix_txrx.c revision 1.33 1 /* $NetBSD: ix_txrx.c,v 1.33 2018/02/26 04:19:00 knakahara Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 321476 2017-07-25 14:38:30Z sbruno $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
107 static int ixgbe_tx_ctx_setup(struct tx_ring *,
108 struct mbuf *, u32 *, u32 *);
109 static int ixgbe_tso_setup(struct tx_ring *,
110 struct mbuf *, u32 *, u32 *);
111 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
112 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
113 struct mbuf *, u32);
114 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
115 struct ixgbe_dma_alloc *, int);
116 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
117
118 static void ixgbe_setup_hw_rsc(struct rx_ring *);
119
120 /************************************************************************
121 * ixgbe_legacy_start_locked - Transmit entry point
122 *
123 * Called by the stack to initiate a transmit.
124 * The driver will remain in this routine as long as there are
125 * packets to transmit and transmit resources are available.
126 * In case resources are not available, the stack is notified
127 * and the packet is requeued.
128 ************************************************************************/
129 int
130 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
131 {
132 int rc;
133 struct mbuf *m_head;
134 struct adapter *adapter = txr->adapter;
135
136 IXGBE_TX_LOCK_ASSERT(txr);
137
138 if ((ifp->if_flags & IFF_RUNNING) == 0)
139 return (ENETDOWN);
140 if (!adapter->link_active)
141 return (ENETDOWN);
142
143 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
144 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
145 break;
146
147 IFQ_POLL(&ifp->if_snd, m_head);
148 if (m_head == NULL)
149 break;
150
151 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
152 break;
153 }
154 IFQ_DEQUEUE(&ifp->if_snd, m_head);
155 if (rc != 0) {
156 m_freem(m_head);
157 continue;
158 }
159
160 /* Send a copy of the frame to the BPF listener */
161 bpf_mtap(ifp, m_head);
162 }
163
164 return IXGBE_SUCCESS;
165 } /* ixgbe_legacy_start_locked */
166
167 /************************************************************************
168 * ixgbe_legacy_start
169 *
170 * Called by the stack, this always uses the first tx ring,
171 * and should not be used with multiqueue tx enabled.
172 ************************************************************************/
173 void
174 ixgbe_legacy_start(struct ifnet *ifp)
175 {
176 struct adapter *adapter = ifp->if_softc;
177 struct tx_ring *txr = adapter->tx_rings;
178
179 if (ifp->if_flags & IFF_RUNNING) {
180 IXGBE_TX_LOCK(txr);
181 ixgbe_legacy_start_locked(ifp, txr);
182 IXGBE_TX_UNLOCK(txr);
183 }
184 } /* ixgbe_legacy_start */
185
186 /************************************************************************
187 * ixgbe_mq_start - Multiqueue Transmit Entry Point
188 *
189 * (if_transmit function)
190 ************************************************************************/
191 int
192 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
193 {
194 struct adapter *adapter = ifp->if_softc;
195 struct tx_ring *txr;
196 int i, err = 0;
197 #ifdef RSS
198 uint32_t bucket_id;
199 #endif
200
201 /*
202 * When doing RSS, map it to the same outbound queue
203 * as the incoming flow would be mapped to.
204 *
205 * If everything is setup correctly, it should be the
206 * same bucket that the current CPU we're on is.
207 */
208 #ifdef RSS
209 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
210 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
211 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
212 &bucket_id) == 0)) {
213 i = bucket_id % adapter->num_queues;
214 #ifdef IXGBE_DEBUG
215 if (bucket_id > adapter->num_queues)
216 if_printf(ifp,
217 "bucket_id (%d) > num_queues (%d)\n",
218 bucket_id, adapter->num_queues);
219 #endif
220 } else
221 i = m->m_pkthdr.flowid % adapter->num_queues;
222 } else
223 #endif /* 0 */
224 i = cpu_index(curcpu()) % adapter->num_queues;
225
226 /* Check for a hung queue and pick alternative */
227 if (((1 << i) & adapter->active_queues) == 0)
228 i = ffs64(adapter->active_queues);
229
230 txr = &adapter->tx_rings[i];
231
232 err = pcq_put(txr->txr_interq, m);
233 if (err == false) {
234 m_freem(m);
235 txr->pcq_drops.ev_count++;
236 return (err);
237 }
238 if (IXGBE_TX_TRYLOCK(txr)) {
239 ixgbe_mq_start_locked(ifp, txr);
240 IXGBE_TX_UNLOCK(txr);
241 } else
242 softint_schedule(txr->txr_si);
243
244 return (0);
245 } /* ixgbe_mq_start */
246
247 /************************************************************************
248 * ixgbe_mq_start_locked
249 ************************************************************************/
250 int
251 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
252 {
253 struct mbuf *next;
254 int enqueued = 0, err = 0;
255
256 if ((ifp->if_flags & IFF_RUNNING) == 0)
257 return (ENETDOWN);
258 if (txr->adapter->link_active == 0)
259 return (ENETDOWN);
260
261 /* Process the queue */
262 while ((next = pcq_get(txr->txr_interq)) != NULL) {
263 if ((err = ixgbe_xmit(txr, next)) != 0) {
264 m_freem(next);
265 /* All errors are counted in ixgbe_xmit() */
266 break;
267 }
268 enqueued++;
269 #if __FreeBSD_version >= 1100036
270 /*
271 * Since we're looking at the tx ring, we can check
272 * to see if we're a VF by examing our tail register
273 * address.
274 */
275 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
276 (next->m_flags & M_MCAST))
277 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
278 #endif
279 /* Send a copy of the frame to the BPF listener */
280 bpf_mtap(ifp, next);
281 if ((ifp->if_flags & IFF_RUNNING) == 0)
282 break;
283 }
284
285 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
286 ixgbe_txeof(txr);
287
288 return (err);
289 } /* ixgbe_mq_start_locked */
290
291 /************************************************************************
292 * ixgbe_deferred_mq_start
293 *
294 * Called from a taskqueue to drain queued transmit packets.
295 ************************************************************************/
296 void
297 ixgbe_deferred_mq_start(void *arg)
298 {
299 struct tx_ring *txr = arg;
300 struct adapter *adapter = txr->adapter;
301 struct ifnet *ifp = adapter->ifp;
302
303 IXGBE_TX_LOCK(txr);
304 if (pcq_peek(txr->txr_interq) != NULL)
305 ixgbe_mq_start_locked(ifp, txr);
306 IXGBE_TX_UNLOCK(txr);
307 } /* ixgbe_deferred_mq_start */
308
309 /************************************************************************
310 * ixgbe_xmit
311 *
312 * Maps the mbufs to tx descriptors, allowing the
313 * TX engine to transmit the packets.
314 *
315 * Return 0 on success, positive on failure
316 ************************************************************************/
317 static int
318 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
319 {
320 struct adapter *adapter = txr->adapter;
321 struct ixgbe_tx_buf *txbuf;
322 union ixgbe_adv_tx_desc *txd = NULL;
323 struct ifnet *ifp = adapter->ifp;
324 int i, j, error;
325 int first;
326 u32 olinfo_status = 0, cmd_type_len;
327 bool remap = TRUE;
328 bus_dmamap_t map;
329
330 /* Basic descriptor defines */
331 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
332 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
333
334 if (vlan_has_tag(m_head))
335 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
336
337 /*
338 * Important to capture the first descriptor
339 * used because it will contain the index of
340 * the one we tell the hardware to report back
341 */
342 first = txr->next_avail_desc;
343 txbuf = &txr->tx_buffers[first];
344 map = txbuf->map;
345
346 /*
347 * Map the packet for DMA.
348 */
349 retry:
350 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
351 BUS_DMA_NOWAIT);
352
353 if (__predict_false(error)) {
354 struct mbuf *m;
355
356 switch (error) {
357 case EAGAIN:
358 adapter->eagain_tx_dma_setup.ev_count++;
359 return EAGAIN;
360 case ENOMEM:
361 adapter->enomem_tx_dma_setup.ev_count++;
362 return EAGAIN;
363 case EFBIG:
364 /* Try it again? - one try */
365 if (remap == TRUE) {
366 remap = FALSE;
367 /*
368 * XXX: m_defrag will choke on
369 * non-MCLBYTES-sized clusters
370 */
371 adapter->efbig_tx_dma_setup.ev_count++;
372 m = m_defrag(m_head, M_NOWAIT);
373 if (m == NULL) {
374 adapter->mbuf_defrag_failed.ev_count++;
375 return ENOBUFS;
376 }
377 m_head = m;
378 goto retry;
379 } else {
380 adapter->efbig2_tx_dma_setup.ev_count++;
381 return error;
382 }
383 case EINVAL:
384 adapter->einval_tx_dma_setup.ev_count++;
385 return error;
386 default:
387 adapter->other_tx_dma_setup.ev_count++;
388 return error;
389 }
390 }
391
392 /* Make certain there are enough descriptors */
393 if (txr->tx_avail < (map->dm_nsegs + 2)) {
394 txr->no_desc_avail.ev_count++;
395 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
396 return EAGAIN;
397 }
398
399 /*
400 * Set up the appropriate offload context
401 * this will consume the first descriptor
402 */
403 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
404 if (__predict_false(error)) {
405 return (error);
406 }
407
408 /* Do the flow director magic */
409 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
410 (txr->atr_sample) && (!adapter->fdir_reinit)) {
411 ++txr->atr_count;
412 if (txr->atr_count >= atr_sample_rate) {
413 ixgbe_atr(txr, m_head);
414 txr->atr_count = 0;
415 }
416 }
417
418 olinfo_status |= IXGBE_ADVTXD_CC;
419 i = txr->next_avail_desc;
420 for (j = 0; j < map->dm_nsegs; j++) {
421 bus_size_t seglen;
422 bus_addr_t segaddr;
423
424 txbuf = &txr->tx_buffers[i];
425 txd = &txr->tx_base[i];
426 seglen = map->dm_segs[j].ds_len;
427 segaddr = htole64(map->dm_segs[j].ds_addr);
428
429 txd->read.buffer_addr = segaddr;
430 txd->read.cmd_type_len = htole32(txr->txd_cmd |
431 cmd_type_len | seglen);
432 txd->read.olinfo_status = htole32(olinfo_status);
433
434 if (++i == txr->num_desc)
435 i = 0;
436 }
437
438 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
439 txr->tx_avail -= map->dm_nsegs;
440 txr->next_avail_desc = i;
441
442 txbuf->m_head = m_head;
443 /*
444 * Here we swap the map so the last descriptor,
445 * which gets the completion interrupt has the
446 * real map, and the first descriptor gets the
447 * unused map from this descriptor.
448 */
449 txr->tx_buffers[first].map = txbuf->map;
450 txbuf->map = map;
451 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
452 BUS_DMASYNC_PREWRITE);
453
454 /* Set the EOP descriptor that will be marked done */
455 txbuf = &txr->tx_buffers[first];
456 txbuf->eop = txd;
457
458 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
459 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
460 /*
461 * Advance the Transmit Descriptor Tail (Tdt), this tells the
462 * hardware that this frame is available to transmit.
463 */
464 ++txr->total_packets.ev_count;
465 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
466
467 /*
468 * XXXX NOMPSAFE: ifp->if_data should be percpu.
469 */
470 ifp->if_obytes += m_head->m_pkthdr.len;
471 if (m_head->m_flags & M_MCAST)
472 ifp->if_omcasts++;
473
474 /* Mark queue as having work */
475 if (txr->busy == 0)
476 txr->busy = 1;
477
478 return (0);
479 } /* ixgbe_xmit */
480
481
482 /************************************************************************
483 * ixgbe_allocate_transmit_buffers
484 *
485 * Allocate memory for tx_buffer structures. The tx_buffer stores all
486 * the information needed to transmit a packet on the wire. This is
487 * called only once at attach, setup is done every reset.
488 ************************************************************************/
489 static int
490 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
491 {
492 struct adapter *adapter = txr->adapter;
493 device_t dev = adapter->dev;
494 struct ixgbe_tx_buf *txbuf;
495 int error, i;
496
497 /*
498 * Setup DMA descriptor areas.
499 */
500 error = ixgbe_dma_tag_create(
501 /* parent */ adapter->osdep.dmat,
502 /* alignment */ 1,
503 /* bounds */ 0,
504 /* maxsize */ IXGBE_TSO_SIZE,
505 /* nsegments */ adapter->num_segs,
506 /* maxsegsize */ PAGE_SIZE,
507 /* flags */ 0,
508 &txr->txtag);
509 if (error != 0) {
510 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
511 goto fail;
512 }
513
514 txr->tx_buffers =
515 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
516 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
517 if (txr->tx_buffers == NULL) {
518 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
519 error = ENOMEM;
520 goto fail;
521 }
522
523 /* Create the descriptor buffer dma maps */
524 txbuf = txr->tx_buffers;
525 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
526 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
527 if (error != 0) {
528 aprint_error_dev(dev,
529 "Unable to create TX DMA map (%d)\n", error);
530 goto fail;
531 }
532 }
533
534 return 0;
535 fail:
536 /* We free all, it handles case where we are in the middle */
537 #if 0 /* XXX was FreeBSD */
538 ixgbe_free_transmit_structures(adapter);
539 #else
540 ixgbe_free_transmit_buffers(txr);
541 #endif
542 return (error);
543 } /* ixgbe_allocate_transmit_buffers */
544
545 /************************************************************************
546 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
547 ************************************************************************/
548 static void
549 ixgbe_setup_transmit_ring(struct tx_ring *txr)
550 {
551 struct adapter *adapter = txr->adapter;
552 struct ixgbe_tx_buf *txbuf;
553 #ifdef DEV_NETMAP
554 struct netmap_adapter *na = NA(adapter->ifp);
555 struct netmap_slot *slot;
556 #endif /* DEV_NETMAP */
557
558 /* Clear the old ring contents */
559 IXGBE_TX_LOCK(txr);
560
561 #ifdef DEV_NETMAP
562 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
563 /*
564 * (under lock): if in netmap mode, do some consistency
565 * checks and set slot to entry 0 of the netmap ring.
566 */
567 slot = netmap_reset(na, NR_TX, txr->me, 0);
568 }
569 #endif /* DEV_NETMAP */
570
571 bzero((void *)txr->tx_base,
572 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
573 /* Reset indices */
574 txr->next_avail_desc = 0;
575 txr->next_to_clean = 0;
576
577 /* Free any existing tx buffers. */
578 txbuf = txr->tx_buffers;
579 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
580 if (txbuf->m_head != NULL) {
581 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
582 0, txbuf->m_head->m_pkthdr.len,
583 BUS_DMASYNC_POSTWRITE);
584 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
585 m_freem(txbuf->m_head);
586 txbuf->m_head = NULL;
587 }
588
589 #ifdef DEV_NETMAP
590 /*
591 * In netmap mode, set the map for the packet buffer.
592 * NOTE: Some drivers (not this one) also need to set
593 * the physical buffer address in the NIC ring.
594 * Slots in the netmap ring (indexed by "si") are
595 * kring->nkr_hwofs positions "ahead" wrt the
596 * corresponding slot in the NIC ring. In some drivers
597 * (not here) nkr_hwofs can be negative. Function
598 * netmap_idx_n2k() handles wraparounds properly.
599 */
600 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
601 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
602 netmap_load_map(na, txr->txtag,
603 txbuf->map, NMB(na, slot + si));
604 }
605 #endif /* DEV_NETMAP */
606
607 /* Clear the EOP descriptor pointer */
608 txbuf->eop = NULL;
609 }
610
611 /* Set the rate at which we sample packets */
612 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
613 txr->atr_sample = atr_sample_rate;
614
615 /* Set number of descriptors available */
616 txr->tx_avail = adapter->num_tx_desc;
617
618 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
619 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
620 IXGBE_TX_UNLOCK(txr);
621 } /* ixgbe_setup_transmit_ring */
622
623 /************************************************************************
624 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
625 ************************************************************************/
626 int
627 ixgbe_setup_transmit_structures(struct adapter *adapter)
628 {
629 struct tx_ring *txr = adapter->tx_rings;
630
631 for (int i = 0; i < adapter->num_queues; i++, txr++)
632 ixgbe_setup_transmit_ring(txr);
633
634 return (0);
635 } /* ixgbe_setup_transmit_structures */
636
637 /************************************************************************
638 * ixgbe_free_transmit_structures - Free all transmit rings.
639 ************************************************************************/
640 void
641 ixgbe_free_transmit_structures(struct adapter *adapter)
642 {
643 struct tx_ring *txr = adapter->tx_rings;
644
645 for (int i = 0; i < adapter->num_queues; i++, txr++) {
646 ixgbe_free_transmit_buffers(txr);
647 ixgbe_dma_free(adapter, &txr->txdma);
648 IXGBE_TX_LOCK_DESTROY(txr);
649 }
650 free(adapter->tx_rings, M_DEVBUF);
651 } /* ixgbe_free_transmit_structures */
652
653 /************************************************************************
654 * ixgbe_free_transmit_buffers
655 *
656 * Free transmit ring related data structures.
657 ************************************************************************/
658 static void
659 ixgbe_free_transmit_buffers(struct tx_ring *txr)
660 {
661 struct adapter *adapter = txr->adapter;
662 struct ixgbe_tx_buf *tx_buffer;
663 int i;
664
665 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
666
667 if (txr->tx_buffers == NULL)
668 return;
669
670 tx_buffer = txr->tx_buffers;
671 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
672 if (tx_buffer->m_head != NULL) {
673 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
674 0, tx_buffer->m_head->m_pkthdr.len,
675 BUS_DMASYNC_POSTWRITE);
676 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
677 m_freem(tx_buffer->m_head);
678 tx_buffer->m_head = NULL;
679 if (tx_buffer->map != NULL) {
680 ixgbe_dmamap_destroy(txr->txtag,
681 tx_buffer->map);
682 tx_buffer->map = NULL;
683 }
684 } else if (tx_buffer->map != NULL) {
685 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
686 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
687 tx_buffer->map = NULL;
688 }
689 }
690 if (txr->txr_interq != NULL) {
691 struct mbuf *m;
692
693 while ((m = pcq_get(txr->txr_interq)) != NULL)
694 m_freem(m);
695 pcq_destroy(txr->txr_interq);
696 }
697 if (txr->tx_buffers != NULL) {
698 free(txr->tx_buffers, M_DEVBUF);
699 txr->tx_buffers = NULL;
700 }
701 if (txr->txtag != NULL) {
702 ixgbe_dma_tag_destroy(txr->txtag);
703 txr->txtag = NULL;
704 }
705 } /* ixgbe_free_transmit_buffers */
706
707 /************************************************************************
708 * ixgbe_tx_ctx_setup
709 *
710 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
711 ************************************************************************/
712 static int
713 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
714 u32 *cmd_type_len, u32 *olinfo_status)
715 {
716 struct adapter *adapter = txr->adapter;
717 struct ixgbe_adv_tx_context_desc *TXD;
718 struct ether_vlan_header *eh;
719 #ifdef INET
720 struct ip *ip;
721 #endif
722 #ifdef INET6
723 struct ip6_hdr *ip6;
724 #endif
725 int ehdrlen, ip_hlen = 0;
726 int offload = TRUE;
727 int ctxd = txr->next_avail_desc;
728 u32 vlan_macip_lens = 0;
729 u32 type_tucmd_mlhl = 0;
730 u16 vtag = 0;
731 u16 etype;
732 u8 ipproto = 0;
733 char *l3d;
734
735
736 /* First check if TSO is to be used */
737 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
738 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
739
740 if (rv != 0)
741 ++adapter->tso_err.ev_count;
742 return rv;
743 }
744
745 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
746 offload = FALSE;
747
748 /* Indicate the whole packet as payload when not doing TSO */
749 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
750
751 /* Now ready a context descriptor */
752 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
753
754 /*
755 * In advanced descriptors the vlan tag must
756 * be placed into the context descriptor. Hence
757 * we need to make one even if not doing offloads.
758 */
759 if (vlan_has_tag(mp)) {
760 vtag = htole16(vlan_get_tag(mp));
761 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
762 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
763 (offload == FALSE))
764 return (0);
765
766 /*
767 * Determine where frame payload starts.
768 * Jump over vlan headers if already present,
769 * helpful for QinQ too.
770 */
771 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
772 eh = mtod(mp, struct ether_vlan_header *);
773 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
774 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
775 etype = ntohs(eh->evl_proto);
776 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
777 } else {
778 etype = ntohs(eh->evl_encap_proto);
779 ehdrlen = ETHER_HDR_LEN;
780 }
781
782 /* Set the ether header length */
783 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
784
785 if (offload == FALSE)
786 goto no_offloads;
787
788 /*
789 * If the first mbuf only includes the ethernet header,
790 * jump to the next one
791 * XXX: This assumes the stack splits mbufs containing headers
792 * on header boundaries
793 * XXX: And assumes the entire IP header is contained in one mbuf
794 */
795 if (mp->m_len == ehdrlen && mp->m_next)
796 l3d = mtod(mp->m_next, char *);
797 else
798 l3d = mtod(mp, char *) + ehdrlen;
799
800 switch (etype) {
801 #ifdef INET
802 case ETHERTYPE_IP:
803 ip = (struct ip *)(l3d);
804 ip_hlen = ip->ip_hl << 2;
805 ipproto = ip->ip_p;
806 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
807 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
808 ip->ip_sum == 0);
809 break;
810 #endif
811 #ifdef INET6
812 case ETHERTYPE_IPV6:
813 ip6 = (struct ip6_hdr *)(l3d);
814 ip_hlen = sizeof(struct ip6_hdr);
815 ipproto = ip6->ip6_nxt;
816 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
817 break;
818 #endif
819 default:
820 offload = false;
821 break;
822 }
823
824 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
825 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
826
827 vlan_macip_lens |= ip_hlen;
828
829 /* No support for offloads for non-L4 next headers */
830 switch (ipproto) {
831 case IPPROTO_TCP:
832 if (mp->m_pkthdr.csum_flags &
833 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
834 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
835 else
836 offload = false;
837 break;
838 case IPPROTO_UDP:
839 if (mp->m_pkthdr.csum_flags &
840 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
841 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
842 else
843 offload = false;
844 break;
845 default:
846 offload = false;
847 break;
848 }
849
850 if (offload) /* Insert L4 checksum into data descriptors */
851 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
852
853 no_offloads:
854 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
855
856 /* Now copy bits into descriptor */
857 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
858 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
859 TXD->seqnum_seed = htole32(0);
860 TXD->mss_l4len_idx = htole32(0);
861
862 /* We've consumed the first desc, adjust counters */
863 if (++ctxd == txr->num_desc)
864 ctxd = 0;
865 txr->next_avail_desc = ctxd;
866 --txr->tx_avail;
867
868 return (0);
869 } /* ixgbe_tx_ctx_setup */
870
871 /************************************************************************
872 * ixgbe_tso_setup
873 *
874 * Setup work for hardware segmentation offload (TSO) on
875 * adapters using advanced tx descriptors
876 ************************************************************************/
877 static int
878 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
879 u32 *olinfo_status)
880 {
881 struct ixgbe_adv_tx_context_desc *TXD;
882 struct ether_vlan_header *eh;
883 #ifdef INET6
884 struct ip6_hdr *ip6;
885 #endif
886 #ifdef INET
887 struct ip *ip;
888 #endif
889 struct tcphdr *th;
890 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
891 u32 vlan_macip_lens = 0;
892 u32 type_tucmd_mlhl = 0;
893 u32 mss_l4len_idx = 0, paylen;
894 u16 vtag = 0, eh_type;
895
896 /*
897 * Determine where frame payload starts.
898 * Jump over vlan headers if already present
899 */
900 eh = mtod(mp, struct ether_vlan_header *);
901 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
902 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
903 eh_type = eh->evl_proto;
904 } else {
905 ehdrlen = ETHER_HDR_LEN;
906 eh_type = eh->evl_encap_proto;
907 }
908
909 switch (ntohs(eh_type)) {
910 #ifdef INET
911 case ETHERTYPE_IP:
912 ip = (struct ip *)(mp->m_data + ehdrlen);
913 if (ip->ip_p != IPPROTO_TCP)
914 return (ENXIO);
915 ip->ip_sum = 0;
916 ip_hlen = ip->ip_hl << 2;
917 th = (struct tcphdr *)((char *)ip + ip_hlen);
918 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
919 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
920 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
921 /* Tell transmit desc to also do IPv4 checksum. */
922 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
923 break;
924 #endif
925 #ifdef INET6
926 case ETHERTYPE_IPV6:
927 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
928 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
929 if (ip6->ip6_nxt != IPPROTO_TCP)
930 return (ENXIO);
931 ip_hlen = sizeof(struct ip6_hdr);
932 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
933 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
934 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
935 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
936 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
937 break;
938 #endif
939 default:
940 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
941 __func__, ntohs(eh_type));
942 break;
943 }
944
945 ctxd = txr->next_avail_desc;
946 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
947
948 tcp_hlen = th->th_off << 2;
949
950 /* This is used in the transmit desc in encap */
951 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
952
953 /* VLAN MACLEN IPLEN */
954 if (vlan_has_tag(mp)) {
955 vtag = htole16(vlan_get_tag(mp));
956 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
957 }
958
959 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
960 vlan_macip_lens |= ip_hlen;
961 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
962
963 /* ADV DTYPE TUCMD */
964 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
965 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
966 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
967
968 /* MSS L4LEN IDX */
969 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
970 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
971 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
972
973 TXD->seqnum_seed = htole32(0);
974
975 if (++ctxd == txr->num_desc)
976 ctxd = 0;
977
978 txr->tx_avail--;
979 txr->next_avail_desc = ctxd;
980 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
981 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
982 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
983 ++txr->tso_tx.ev_count;
984
985 return (0);
986 } /* ixgbe_tso_setup */
987
988
989 /************************************************************************
990 * ixgbe_txeof
991 *
992 * Examine each tx_buffer in the used queue. If the hardware is done
993 * processing the packet then free associated resources. The
994 * tx_buffer is put back on the free queue.
995 ************************************************************************/
996 bool
997 ixgbe_txeof(struct tx_ring *txr)
998 {
999 struct adapter *adapter = txr->adapter;
1000 struct ifnet *ifp = adapter->ifp;
1001 struct ixgbe_tx_buf *buf;
1002 union ixgbe_adv_tx_desc *txd;
1003 u32 work, processed = 0;
1004 u32 limit = adapter->tx_process_limit;
1005
1006 KASSERT(mutex_owned(&txr->tx_mtx));
1007
1008 #ifdef DEV_NETMAP
1009 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1010 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1011 struct netmap_adapter *na = NA(adapter->ifp);
1012 struct netmap_kring *kring = &na->tx_rings[txr->me];
1013 txd = txr->tx_base;
1014 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1015 BUS_DMASYNC_POSTREAD);
1016 /*
1017 * In netmap mode, all the work is done in the context
1018 * of the client thread. Interrupt handlers only wake up
1019 * clients, which may be sleeping on individual rings
1020 * or on a global resource for all rings.
1021 * To implement tx interrupt mitigation, we wake up the client
1022 * thread roughly every half ring, even if the NIC interrupts
1023 * more frequently. This is implemented as follows:
1024 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1025 * the slot that should wake up the thread (nkr_num_slots
1026 * means the user thread should not be woken up);
1027 * - the driver ignores tx interrupts unless netmap_mitigate=0
1028 * or the slot has the DD bit set.
1029 */
1030 if (!netmap_mitigate ||
1031 (kring->nr_kflags < kring->nkr_num_slots &&
1032 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1033 netmap_tx_irq(ifp, txr->me);
1034 }
1035 return false;
1036 }
1037 #endif /* DEV_NETMAP */
1038
1039 if (txr->tx_avail == txr->num_desc) {
1040 txr->busy = 0;
1041 return false;
1042 }
1043
1044 /* Get work starting point */
1045 work = txr->next_to_clean;
1046 buf = &txr->tx_buffers[work];
1047 txd = &txr->tx_base[work];
1048 work -= txr->num_desc; /* The distance to ring end */
1049 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1050 BUS_DMASYNC_POSTREAD);
1051
1052 do {
1053 union ixgbe_adv_tx_desc *eop = buf->eop;
1054 if (eop == NULL) /* No work */
1055 break;
1056
1057 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1058 break; /* I/O not complete */
1059
1060 if (buf->m_head) {
1061 txr->bytes += buf->m_head->m_pkthdr.len;
1062 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1063 0, buf->m_head->m_pkthdr.len,
1064 BUS_DMASYNC_POSTWRITE);
1065 ixgbe_dmamap_unload(txr->txtag, buf->map);
1066 m_freem(buf->m_head);
1067 buf->m_head = NULL;
1068 }
1069 buf->eop = NULL;
1070 ++txr->tx_avail;
1071
1072 /* We clean the range if multi segment */
1073 while (txd != eop) {
1074 ++txd;
1075 ++buf;
1076 ++work;
1077 /* wrap the ring? */
1078 if (__predict_false(!work)) {
1079 work -= txr->num_desc;
1080 buf = txr->tx_buffers;
1081 txd = txr->tx_base;
1082 }
1083 if (buf->m_head) {
1084 txr->bytes +=
1085 buf->m_head->m_pkthdr.len;
1086 bus_dmamap_sync(txr->txtag->dt_dmat,
1087 buf->map,
1088 0, buf->m_head->m_pkthdr.len,
1089 BUS_DMASYNC_POSTWRITE);
1090 ixgbe_dmamap_unload(txr->txtag,
1091 buf->map);
1092 m_freem(buf->m_head);
1093 buf->m_head = NULL;
1094 }
1095 ++txr->tx_avail;
1096 buf->eop = NULL;
1097
1098 }
1099 ++txr->packets;
1100 ++processed;
1101 ++ifp->if_opackets;
1102
1103 /* Try the next packet */
1104 ++txd;
1105 ++buf;
1106 ++work;
1107 /* reset with a wrap */
1108 if (__predict_false(!work)) {
1109 work -= txr->num_desc;
1110 buf = txr->tx_buffers;
1111 txd = txr->tx_base;
1112 }
1113 prefetch(txd);
1114 } while (__predict_true(--limit));
1115
1116 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1117 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1118
1119 work += txr->num_desc;
1120 txr->next_to_clean = work;
1121
1122 /*
1123 * Queue Hang detection, we know there's
1124 * work outstanding or the first return
1125 * would have been taken, so increment busy
1126 * if nothing managed to get cleaned, then
1127 * in local_timer it will be checked and
1128 * marked as HUNG if it exceeds a MAX attempt.
1129 */
1130 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1131 ++txr->busy;
1132 /*
1133 * If anything gets cleaned we reset state to 1,
1134 * note this will turn off HUNG if its set.
1135 */
1136 if (processed)
1137 txr->busy = 1;
1138
1139 if (txr->tx_avail == txr->num_desc)
1140 txr->busy = 0;
1141
1142 return ((limit > 0) ? false : true);
1143 } /* ixgbe_txeof */
1144
1145 /************************************************************************
1146 * ixgbe_rsc_count
1147 *
1148 * Used to detect a descriptor that has been merged by Hardware RSC.
1149 ************************************************************************/
1150 static inline u32
1151 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1152 {
1153 return (le32toh(rx->wb.lower.lo_dword.data) &
1154 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1155 } /* ixgbe_rsc_count */
1156
1157 /************************************************************************
1158 * ixgbe_setup_hw_rsc
1159 *
1160 * Initialize Hardware RSC (LRO) feature on 82599
1161 * for an RX ring, this is toggled by the LRO capability
1162 * even though it is transparent to the stack.
1163 *
1164 * NOTE: Since this HW feature only works with IPv4 and
1165 * testing has shown soft LRO to be as effective,
1166 * this feature will be disabled by default.
1167 ************************************************************************/
1168 static void
1169 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1170 {
1171 struct adapter *adapter = rxr->adapter;
1172 struct ixgbe_hw *hw = &adapter->hw;
1173 u32 rscctrl, rdrxctl;
1174
1175 /* If turning LRO/RSC off we need to disable it */
1176 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1177 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1178 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1179 return;
1180 }
1181
1182 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1183 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1184 #ifdef DEV_NETMAP
1185 /* Always strip CRC unless Netmap disabled it */
1186 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1187 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1188 ix_crcstrip)
1189 #endif /* DEV_NETMAP */
1190 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1191 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1192 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1193
1194 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1195 rscctrl |= IXGBE_RSCCTL_RSCEN;
1196 /*
1197 * Limit the total number of descriptors that
1198 * can be combined, so it does not exceed 64K
1199 */
1200 if (rxr->mbuf_sz == MCLBYTES)
1201 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1202 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1203 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1204 else if (rxr->mbuf_sz == MJUM9BYTES)
1205 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1206 else /* Using 16K cluster */
1207 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1208
1209 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1210
1211 /* Enable TCP header recognition */
1212 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1213 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1214
1215 /* Disable RSC for ACK packets */
1216 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1217 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1218
1219 rxr->hw_rsc = TRUE;
1220 } /* ixgbe_setup_hw_rsc */
1221
1222 /************************************************************************
1223 * ixgbe_refresh_mbufs
1224 *
1225 * Refresh mbuf buffers for RX descriptor rings
1226 * - now keeps its own state so discards due to resource
1227 * exhaustion are unnecessary, if an mbuf cannot be obtained
1228 * it just returns, keeping its placeholder, thus it can simply
1229 * be recalled to try again.
1230 ************************************************************************/
1231 static void
1232 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1233 {
1234 struct adapter *adapter = rxr->adapter;
1235 struct ixgbe_rx_buf *rxbuf;
1236 struct mbuf *mp;
1237 int i, j, error;
1238 bool refreshed = false;
1239
1240 i = j = rxr->next_to_refresh;
1241 /* Control the loop with one beyond */
1242 if (++j == rxr->num_desc)
1243 j = 0;
1244
1245 while (j != limit) {
1246 rxbuf = &rxr->rx_buffers[i];
1247 if (rxbuf->buf == NULL) {
1248 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1249 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1250 if (mp == NULL) {
1251 rxr->no_jmbuf.ev_count++;
1252 goto update;
1253 }
1254 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1255 m_adj(mp, ETHER_ALIGN);
1256 } else
1257 mp = rxbuf->buf;
1258
1259 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1260
1261 /* If we're dealing with an mbuf that was copied rather
1262 * than replaced, there's no need to go through busdma.
1263 */
1264 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1265 /* Get the memory mapping */
1266 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1267 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1268 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1269 if (error != 0) {
1270 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1271 m_free(mp);
1272 rxbuf->buf = NULL;
1273 goto update;
1274 }
1275 rxbuf->buf = mp;
1276 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1277 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1278 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1279 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1280 } else {
1281 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1282 rxbuf->flags &= ~IXGBE_RX_COPY;
1283 }
1284
1285 refreshed = true;
1286 /* Next is precalculated */
1287 i = j;
1288 rxr->next_to_refresh = i;
1289 if (++j == rxr->num_desc)
1290 j = 0;
1291 }
1292
1293 update:
1294 if (refreshed) /* Update hardware tail index */
1295 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1296
1297 return;
1298 } /* ixgbe_refresh_mbufs */
1299
1300 /************************************************************************
1301 * ixgbe_allocate_receive_buffers
1302 *
1303 * Allocate memory for rx_buffer structures. Since we use one
1304 * rx_buffer per received packet, the maximum number of rx_buffer's
1305 * that we'll need is equal to the number of receive descriptors
1306 * that we've allocated.
1307 ************************************************************************/
1308 static int
1309 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1310 {
1311 struct adapter *adapter = rxr->adapter;
1312 device_t dev = adapter->dev;
1313 struct ixgbe_rx_buf *rxbuf;
1314 int bsize, error;
1315
1316 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1317 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1318 M_NOWAIT | M_ZERO);
1319 if (rxr->rx_buffers == NULL) {
1320 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1321 error = ENOMEM;
1322 goto fail;
1323 }
1324
1325 error = ixgbe_dma_tag_create(
1326 /* parent */ adapter->osdep.dmat,
1327 /* alignment */ 1,
1328 /* bounds */ 0,
1329 /* maxsize */ MJUM16BYTES,
1330 /* nsegments */ 1,
1331 /* maxsegsize */ MJUM16BYTES,
1332 /* flags */ 0,
1333 &rxr->ptag);
1334 if (error != 0) {
1335 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1336 goto fail;
1337 }
1338
1339 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1340 rxbuf = &rxr->rx_buffers[i];
1341 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1342 if (error) {
1343 aprint_error_dev(dev, "Unable to create RX dma map\n");
1344 goto fail;
1345 }
1346 }
1347
1348 return (0);
1349
1350 fail:
1351 /* Frees all, but can handle partial completion */
1352 ixgbe_free_receive_structures(adapter);
1353
1354 return (error);
1355 } /* ixgbe_allocate_receive_buffers */
1356
1357 /************************************************************************
1358 * ixgbe_free_receive_ring
1359 ************************************************************************/
1360 static void
1361 ixgbe_free_receive_ring(struct rx_ring *rxr)
1362 {
1363 for (int i = 0; i < rxr->num_desc; i++) {
1364 ixgbe_rx_discard(rxr, i);
1365 }
1366 } /* ixgbe_free_receive_ring */
1367
1368 /************************************************************************
1369 * ixgbe_setup_receive_ring
1370 *
1371 * Initialize a receive ring and its buffers.
1372 ************************************************************************/
1373 static int
1374 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1375 {
1376 struct adapter *adapter;
1377 struct ixgbe_rx_buf *rxbuf;
1378 #ifdef LRO
1379 struct ifnet *ifp;
1380 struct lro_ctrl *lro = &rxr->lro;
1381 #endif /* LRO */
1382 #ifdef DEV_NETMAP
1383 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1384 struct netmap_slot *slot;
1385 #endif /* DEV_NETMAP */
1386 int rsize, error = 0;
1387
1388 adapter = rxr->adapter;
1389 #ifdef LRO
1390 ifp = adapter->ifp;
1391 #endif /* LRO */
1392
1393 /* Clear the ring contents */
1394 IXGBE_RX_LOCK(rxr);
1395
1396 #ifdef DEV_NETMAP
1397 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1398 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1399 #endif /* DEV_NETMAP */
1400
1401 rsize = roundup2(adapter->num_rx_desc *
1402 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1403 bzero((void *)rxr->rx_base, rsize);
1404 /* Cache the size */
1405 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1406
1407 /* Free current RX buffer structs and their mbufs */
1408 ixgbe_free_receive_ring(rxr);
1409
1410 /* Now replenish the mbufs */
1411 for (int j = 0; j != rxr->num_desc; ++j) {
1412 struct mbuf *mp;
1413
1414 rxbuf = &rxr->rx_buffers[j];
1415
1416 #ifdef DEV_NETMAP
1417 /*
1418 * In netmap mode, fill the map and set the buffer
1419 * address in the NIC ring, considering the offset
1420 * between the netmap and NIC rings (see comment in
1421 * ixgbe_setup_transmit_ring() ). No need to allocate
1422 * an mbuf, so end the block with a continue;
1423 */
1424 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1425 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1426 uint64_t paddr;
1427 void *addr;
1428
1429 addr = PNMB(na, slot + sj, &paddr);
1430 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1431 /* Update descriptor and the cached value */
1432 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1433 rxbuf->addr = htole64(paddr);
1434 continue;
1435 }
1436 #endif /* DEV_NETMAP */
1437
1438 rxbuf->flags = 0;
1439 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1440 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1441 if (rxbuf->buf == NULL) {
1442 error = ENOBUFS;
1443 goto fail;
1444 }
1445 mp = rxbuf->buf;
1446 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1447 /* Get the memory mapping */
1448 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1449 mp, BUS_DMA_NOWAIT);
1450 if (error != 0)
1451 goto fail;
1452 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1453 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1454 /* Update the descriptor and the cached value */
1455 rxr->rx_base[j].read.pkt_addr =
1456 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1457 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1458 }
1459
1460
1461 /* Setup our descriptor indices */
1462 rxr->next_to_check = 0;
1463 rxr->next_to_refresh = 0;
1464 rxr->lro_enabled = FALSE;
1465 rxr->rx_copies.ev_count = 0;
1466 #if 0 /* NetBSD */
1467 rxr->rx_bytes.ev_count = 0;
1468 #if 1 /* Fix inconsistency */
1469 rxr->rx_packets.ev_count = 0;
1470 #endif
1471 #endif
1472 rxr->vtag_strip = FALSE;
1473
1474 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1476
1477 /*
1478 * Now set up the LRO interface
1479 */
1480 if (ixgbe_rsc_enable)
1481 ixgbe_setup_hw_rsc(rxr);
1482 #ifdef LRO
1483 else if (ifp->if_capenable & IFCAP_LRO) {
1484 device_t dev = adapter->dev;
1485 int err = tcp_lro_init(lro);
1486 if (err) {
1487 device_printf(dev, "LRO Initialization failed!\n");
1488 goto fail;
1489 }
1490 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1491 rxr->lro_enabled = TRUE;
1492 lro->ifp = adapter->ifp;
1493 }
1494 #endif /* LRO */
1495
1496 IXGBE_RX_UNLOCK(rxr);
1497
1498 return (0);
1499
1500 fail:
1501 ixgbe_free_receive_ring(rxr);
1502 IXGBE_RX_UNLOCK(rxr);
1503
1504 return (error);
1505 } /* ixgbe_setup_receive_ring */
1506
1507 /************************************************************************
1508 * ixgbe_setup_receive_structures - Initialize all receive rings.
1509 ************************************************************************/
1510 int
1511 ixgbe_setup_receive_structures(struct adapter *adapter)
1512 {
1513 struct rx_ring *rxr = adapter->rx_rings;
1514 int j;
1515
1516 /*
1517 * Now reinitialize our supply of jumbo mbufs. The number
1518 * or size of jumbo mbufs may have changed.
1519 * Assume all of rxr->ptag are the same.
1520 */
1521 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1522 (2 * adapter->num_rx_desc) * adapter->num_queues,
1523 adapter->rx_mbuf_sz);
1524
1525 for (j = 0; j < adapter->num_queues; j++, rxr++)
1526 if (ixgbe_setup_receive_ring(rxr))
1527 goto fail;
1528
1529 return (0);
1530 fail:
1531 /*
1532 * Free RX buffers allocated so far, we will only handle
1533 * the rings that completed, the failing case will have
1534 * cleaned up for itself. 'j' failed, so its the terminus.
1535 */
1536 for (int i = 0; i < j; ++i) {
1537 rxr = &adapter->rx_rings[i];
1538 IXGBE_RX_LOCK(rxr);
1539 ixgbe_free_receive_ring(rxr);
1540 IXGBE_RX_UNLOCK(rxr);
1541 }
1542
1543 return (ENOBUFS);
1544 } /* ixgbe_setup_receive_structures */
1545
1546
1547 /************************************************************************
1548 * ixgbe_free_receive_structures - Free all receive rings.
1549 ************************************************************************/
1550 void
1551 ixgbe_free_receive_structures(struct adapter *adapter)
1552 {
1553 struct rx_ring *rxr = adapter->rx_rings;
1554
1555 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1556
1557 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1558 ixgbe_free_receive_buffers(rxr);
1559 #ifdef LRO
1560 /* Free LRO memory */
1561 tcp_lro_free(&rxr->lro);
1562 #endif /* LRO */
1563 /* Free the ring memory as well */
1564 ixgbe_dma_free(adapter, &rxr->rxdma);
1565 IXGBE_RX_LOCK_DESTROY(rxr);
1566 }
1567
1568 free(adapter->rx_rings, M_DEVBUF);
1569 } /* ixgbe_free_receive_structures */
1570
1571
1572 /************************************************************************
1573 * ixgbe_free_receive_buffers - Free receive ring data structures
1574 ************************************************************************/
1575 static void
1576 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1577 {
1578 struct adapter *adapter = rxr->adapter;
1579 struct ixgbe_rx_buf *rxbuf;
1580
1581 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1582
1583 /* Cleanup any existing buffers */
1584 if (rxr->rx_buffers != NULL) {
1585 for (int i = 0; i < adapter->num_rx_desc; i++) {
1586 rxbuf = &rxr->rx_buffers[i];
1587 ixgbe_rx_discard(rxr, i);
1588 if (rxbuf->pmap != NULL) {
1589 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1590 rxbuf->pmap = NULL;
1591 }
1592 }
1593 if (rxr->rx_buffers != NULL) {
1594 free(rxr->rx_buffers, M_DEVBUF);
1595 rxr->rx_buffers = NULL;
1596 }
1597 }
1598
1599 if (rxr->ptag != NULL) {
1600 ixgbe_dma_tag_destroy(rxr->ptag);
1601 rxr->ptag = NULL;
1602 }
1603
1604 return;
1605 } /* ixgbe_free_receive_buffers */
1606
1607 /************************************************************************
1608 * ixgbe_rx_input
1609 ************************************************************************/
1610 static __inline void
1611 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1612 u32 ptype)
1613 {
1614 struct adapter *adapter = ifp->if_softc;
1615
1616 #ifdef LRO
1617 struct ethercom *ec = &adapter->osdep.ec;
1618
1619 /*
1620 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1621 * should be computed by hardware. Also it should not have VLAN tag in
1622 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1623 */
1624 if (rxr->lro_enabled &&
1625 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1626 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1627 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1628 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1629 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1630 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1631 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1632 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1633 /*
1634 * Send to the stack if:
1635 ** - LRO not enabled, or
1636 ** - no LRO resources, or
1637 ** - lro enqueue fails
1638 */
1639 if (rxr->lro.lro_cnt != 0)
1640 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1641 return;
1642 }
1643 #endif /* LRO */
1644
1645 if_percpuq_enqueue(adapter->ipq, m);
1646 } /* ixgbe_rx_input */
1647
1648 /************************************************************************
1649 * ixgbe_rx_discard
1650 ************************************************************************/
1651 static __inline void
1652 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1653 {
1654 struct ixgbe_rx_buf *rbuf;
1655
1656 rbuf = &rxr->rx_buffers[i];
1657
1658 /*
1659 * With advanced descriptors the writeback
1660 * clobbers the buffer addrs, so its easier
1661 * to just free the existing mbufs and take
1662 * the normal refresh path to get new buffers
1663 * and mapping.
1664 */
1665
1666 if (rbuf->fmp != NULL) {/* Partial chain ? */
1667 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1668 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1669 m_freem(rbuf->fmp);
1670 rbuf->fmp = NULL;
1671 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1672 } else if (rbuf->buf) {
1673 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1674 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1675 m_free(rbuf->buf);
1676 rbuf->buf = NULL;
1677 }
1678 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1679
1680 rbuf->flags = 0;
1681
1682 return;
1683 } /* ixgbe_rx_discard */
1684
1685
1686 /************************************************************************
1687 * ixgbe_rxeof
1688 *
1689 * Executes in interrupt context. It replenishes the
1690 * mbufs in the descriptor and sends data which has
1691 * been dma'ed into host memory to upper layer.
1692 *
1693 * Return TRUE for more work, FALSE for all clean.
1694 ************************************************************************/
1695 bool
1696 ixgbe_rxeof(struct ix_queue *que)
1697 {
1698 struct adapter *adapter = que->adapter;
1699 struct rx_ring *rxr = que->rxr;
1700 struct ifnet *ifp = adapter->ifp;
1701 #ifdef LRO
1702 struct lro_ctrl *lro = &rxr->lro;
1703 #endif /* LRO */
1704 union ixgbe_adv_rx_desc *cur;
1705 struct ixgbe_rx_buf *rbuf, *nbuf;
1706 int i, nextp, processed = 0;
1707 u32 staterr = 0;
1708 u32 count = adapter->rx_process_limit;
1709 #ifdef RSS
1710 u16 pkt_info;
1711 #endif
1712
1713 IXGBE_RX_LOCK(rxr);
1714
1715 #ifdef DEV_NETMAP
1716 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1717 /* Same as the txeof routine: wakeup clients on intr. */
1718 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1719 IXGBE_RX_UNLOCK(rxr);
1720 return (FALSE);
1721 }
1722 }
1723 #endif /* DEV_NETMAP */
1724
1725 for (i = rxr->next_to_check; count != 0;) {
1726 struct mbuf *sendmp, *mp;
1727 u32 rsc, ptype;
1728 u16 len;
1729 u16 vtag = 0;
1730 bool eop;
1731
1732 /* Sync the ring. */
1733 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1734 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1735
1736 cur = &rxr->rx_base[i];
1737 staterr = le32toh(cur->wb.upper.status_error);
1738 #ifdef RSS
1739 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1740 #endif
1741
1742 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1743 break;
1744 if ((ifp->if_flags & IFF_RUNNING) == 0)
1745 break;
1746
1747 count--;
1748 sendmp = NULL;
1749 nbuf = NULL;
1750 rsc = 0;
1751 cur->wb.upper.status_error = 0;
1752 rbuf = &rxr->rx_buffers[i];
1753 mp = rbuf->buf;
1754
1755 len = le16toh(cur->wb.upper.length);
1756 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1757 IXGBE_RXDADV_PKTTYPE_MASK;
1758 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1759
1760 /* Make sure bad packets are discarded */
1761 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1762 #if __FreeBSD_version >= 1100036
1763 if (adapter->feat_en & IXGBE_FEATURE_VF)
1764 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1765 #endif
1766 rxr->rx_discarded.ev_count++;
1767 ixgbe_rx_discard(rxr, i);
1768 goto next_desc;
1769 }
1770
1771 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1772 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1773
1774 /*
1775 * On 82599 which supports a hardware
1776 * LRO (called HW RSC), packets need
1777 * not be fragmented across sequential
1778 * descriptors, rather the next descriptor
1779 * is indicated in bits of the descriptor.
1780 * This also means that we might proceses
1781 * more than one packet at a time, something
1782 * that has never been true before, it
1783 * required eliminating global chain pointers
1784 * in favor of what we are doing here. -jfv
1785 */
1786 if (!eop) {
1787 /*
1788 * Figure out the next descriptor
1789 * of this frame.
1790 */
1791 if (rxr->hw_rsc == TRUE) {
1792 rsc = ixgbe_rsc_count(cur);
1793 rxr->rsc_num += (rsc - 1);
1794 }
1795 if (rsc) { /* Get hardware index */
1796 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1797 IXGBE_RXDADV_NEXTP_SHIFT);
1798 } else { /* Just sequential */
1799 nextp = i + 1;
1800 if (nextp == adapter->num_rx_desc)
1801 nextp = 0;
1802 }
1803 nbuf = &rxr->rx_buffers[nextp];
1804 prefetch(nbuf);
1805 }
1806 /*
1807 * Rather than using the fmp/lmp global pointers
1808 * we now keep the head of a packet chain in the
1809 * buffer struct and pass this along from one
1810 * descriptor to the next, until we get EOP.
1811 */
1812 mp->m_len = len;
1813 /*
1814 * See if there is a stored head
1815 * that determines what we are
1816 */
1817 sendmp = rbuf->fmp;
1818 if (sendmp != NULL) { /* secondary frag */
1819 rbuf->buf = rbuf->fmp = NULL;
1820 mp->m_flags &= ~M_PKTHDR;
1821 sendmp->m_pkthdr.len += mp->m_len;
1822 } else {
1823 /*
1824 * Optimize. This might be a small packet,
1825 * maybe just a TCP ACK. Do a fast copy that
1826 * is cache aligned into a new mbuf, and
1827 * leave the old mbuf+cluster for re-use.
1828 */
1829 if (eop && len <= IXGBE_RX_COPY_LEN) {
1830 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1831 if (sendmp != NULL) {
1832 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1833 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1834 len);
1835 sendmp->m_len = len;
1836 rxr->rx_copies.ev_count++;
1837 rbuf->flags |= IXGBE_RX_COPY;
1838 }
1839 }
1840 if (sendmp == NULL) {
1841 rbuf->buf = rbuf->fmp = NULL;
1842 sendmp = mp;
1843 }
1844
1845 /* first desc of a non-ps chain */
1846 sendmp->m_flags |= M_PKTHDR;
1847 sendmp->m_pkthdr.len = mp->m_len;
1848 }
1849 ++processed;
1850
1851 /* Pass the head pointer on */
1852 if (eop == 0) {
1853 nbuf->fmp = sendmp;
1854 sendmp = NULL;
1855 mp->m_next = nbuf->buf;
1856 } else { /* Sending this frame */
1857 m_set_rcvif(sendmp, ifp);
1858 ++rxr->packets;
1859 rxr->rx_packets.ev_count++;
1860 /* capture data for AIM */
1861 rxr->bytes += sendmp->m_pkthdr.len;
1862 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1863 /* Process vlan info */
1864 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1865 vtag = le16toh(cur->wb.upper.vlan);
1866 if (vtag) {
1867 vlan_set_tag(sendmp, vtag);
1868 }
1869 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1870 ixgbe_rx_checksum(staterr, sendmp, ptype,
1871 &adapter->stats.pf);
1872 }
1873
1874 #if 0 /* FreeBSD */
1875 /*
1876 * In case of multiqueue, we have RXCSUM.PCSD bit set
1877 * and never cleared. This means we have RSS hash
1878 * available to be used.
1879 */
1880 if (adapter->num_queues > 1) {
1881 sendmp->m_pkthdr.flowid =
1882 le32toh(cur->wb.lower.hi_dword.rss);
1883 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1884 case IXGBE_RXDADV_RSSTYPE_IPV4:
1885 M_HASHTYPE_SET(sendmp,
1886 M_HASHTYPE_RSS_IPV4);
1887 break;
1888 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1889 M_HASHTYPE_SET(sendmp,
1890 M_HASHTYPE_RSS_TCP_IPV4);
1891 break;
1892 case IXGBE_RXDADV_RSSTYPE_IPV6:
1893 M_HASHTYPE_SET(sendmp,
1894 M_HASHTYPE_RSS_IPV6);
1895 break;
1896 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1897 M_HASHTYPE_SET(sendmp,
1898 M_HASHTYPE_RSS_TCP_IPV6);
1899 break;
1900 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1901 M_HASHTYPE_SET(sendmp,
1902 M_HASHTYPE_RSS_IPV6_EX);
1903 break;
1904 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1905 M_HASHTYPE_SET(sendmp,
1906 M_HASHTYPE_RSS_TCP_IPV6_EX);
1907 break;
1908 #if __FreeBSD_version > 1100000
1909 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1910 M_HASHTYPE_SET(sendmp,
1911 M_HASHTYPE_RSS_UDP_IPV4);
1912 break;
1913 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1914 M_HASHTYPE_SET(sendmp,
1915 M_HASHTYPE_RSS_UDP_IPV6);
1916 break;
1917 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1918 M_HASHTYPE_SET(sendmp,
1919 M_HASHTYPE_RSS_UDP_IPV6_EX);
1920 break;
1921 #endif
1922 default:
1923 M_HASHTYPE_SET(sendmp,
1924 M_HASHTYPE_OPAQUE_HASH);
1925 }
1926 } else {
1927 sendmp->m_pkthdr.flowid = que->msix;
1928 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1929 }
1930 #endif
1931 }
1932 next_desc:
1933 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1934 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1935
1936 /* Advance our pointers to the next descriptor. */
1937 if (++i == rxr->num_desc)
1938 i = 0;
1939
1940 /* Now send to the stack or do LRO */
1941 if (sendmp != NULL) {
1942 rxr->next_to_check = i;
1943 IXGBE_RX_UNLOCK(rxr);
1944 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1945 IXGBE_RX_LOCK(rxr);
1946 i = rxr->next_to_check;
1947 }
1948
1949 /* Every 8 descriptors we go to refresh mbufs */
1950 if (processed == 8) {
1951 ixgbe_refresh_mbufs(rxr, i);
1952 processed = 0;
1953 }
1954 }
1955
1956 /* Refresh any remaining buf structs */
1957 if (ixgbe_rx_unrefreshed(rxr))
1958 ixgbe_refresh_mbufs(rxr, i);
1959
1960 rxr->next_to_check = i;
1961
1962 IXGBE_RX_UNLOCK(rxr);
1963
1964 #ifdef LRO
1965 /*
1966 * Flush any outstanding LRO work
1967 */
1968 tcp_lro_flush_all(lro);
1969 #endif /* LRO */
1970
1971 /*
1972 * Still have cleaning to do?
1973 */
1974 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1975 return (TRUE);
1976
1977 return (FALSE);
1978 } /* ixgbe_rxeof */
1979
1980
1981 /************************************************************************
1982 * ixgbe_rx_checksum
1983 *
1984 * Verify that the hardware indicated that the checksum is valid.
1985 * Inform the stack about the status of checksum so that stack
1986 * doesn't spend time verifying the checksum.
1987 ************************************************************************/
1988 static void
1989 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
1990 struct ixgbe_hw_stats *stats)
1991 {
1992 u16 status = (u16)staterr;
1993 u8 errors = (u8)(staterr >> 24);
1994 #if 0
1995 bool sctp = false;
1996
1997 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1998 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1999 sctp = true;
2000 #endif
2001
2002 /* IPv4 checksum */
2003 if (status & IXGBE_RXD_STAT_IPCS) {
2004 stats->ipcs.ev_count++;
2005 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2006 /* IP Checksum Good */
2007 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2008 } else {
2009 stats->ipcs_bad.ev_count++;
2010 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2011 }
2012 }
2013 /* TCP/UDP/SCTP checksum */
2014 if (status & IXGBE_RXD_STAT_L4CS) {
2015 stats->l4cs.ev_count++;
2016 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2017 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2018 mp->m_pkthdr.csum_flags |= type;
2019 } else {
2020 stats->l4cs_bad.ev_count++;
2021 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2022 }
2023 }
2024 } /* ixgbe_rx_checksum */
2025
2026 /************************************************************************
2027 * ixgbe_dma_malloc
2028 ************************************************************************/
2029 int
2030 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2031 struct ixgbe_dma_alloc *dma, const int mapflags)
2032 {
2033 device_t dev = adapter->dev;
2034 int r, rsegs;
2035
2036 r = ixgbe_dma_tag_create(
2037 /* parent */ adapter->osdep.dmat,
2038 /* alignment */ DBA_ALIGN,
2039 /* bounds */ 0,
2040 /* maxsize */ size,
2041 /* nsegments */ 1,
2042 /* maxsegsize */ size,
2043 /* flags */ BUS_DMA_ALLOCNOW,
2044 &dma->dma_tag);
2045 if (r != 0) {
2046 aprint_error_dev(dev,
2047 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2048 goto fail_0;
2049 }
2050
2051 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2052 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2053 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2054 if (r != 0) {
2055 aprint_error_dev(dev,
2056 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2057 goto fail_1;
2058 }
2059
2060 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2061 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2062 if (r != 0) {
2063 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2064 __func__, r);
2065 goto fail_2;
2066 }
2067
2068 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2069 if (r != 0) {
2070 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2071 __func__, r);
2072 goto fail_3;
2073 }
2074
2075 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2076 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2077 if (r != 0) {
2078 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2079 __func__, r);
2080 goto fail_4;
2081 }
2082 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2083 dma->dma_size = size;
2084 return 0;
2085 fail_4:
2086 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2087 fail_3:
2088 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2089 fail_2:
2090 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2091 fail_1:
2092 ixgbe_dma_tag_destroy(dma->dma_tag);
2093 fail_0:
2094
2095 return (r);
2096 } /* ixgbe_dma_malloc */
2097
2098 /************************************************************************
2099 * ixgbe_dma_free
2100 ************************************************************************/
2101 void
2102 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2103 {
2104 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2105 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2106 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2107 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2108 ixgbe_dma_tag_destroy(dma->dma_tag);
2109 } /* ixgbe_dma_free */
2110
2111
2112 /************************************************************************
2113 * ixgbe_allocate_queues
2114 *
2115 * Allocate memory for the transmit and receive rings, and then
2116 * the descriptors associated with each, called only once at attach.
2117 ************************************************************************/
2118 int
2119 ixgbe_allocate_queues(struct adapter *adapter)
2120 {
2121 device_t dev = adapter->dev;
2122 struct ix_queue *que;
2123 struct tx_ring *txr;
2124 struct rx_ring *rxr;
2125 int rsize, tsize, error = IXGBE_SUCCESS;
2126 int txconf = 0, rxconf = 0;
2127
2128 /* First, allocate the top level queue structs */
2129 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2130 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2131 if (adapter->queues == NULL) {
2132 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2133 error = ENOMEM;
2134 goto fail;
2135 }
2136
2137 /* Second, allocate the TX ring struct memory */
2138 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2139 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2140 if (adapter->tx_rings == NULL) {
2141 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2142 error = ENOMEM;
2143 goto tx_fail;
2144 }
2145
2146 /* Third, allocate the RX ring */
2147 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2148 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2149 if (adapter->rx_rings == NULL) {
2150 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2151 error = ENOMEM;
2152 goto rx_fail;
2153 }
2154
2155 /* For the ring itself */
2156 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2157 DBA_ALIGN);
2158
2159 /*
2160 * Now set up the TX queues, txconf is needed to handle the
2161 * possibility that things fail midcourse and we need to
2162 * undo memory gracefully
2163 */
2164 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2165 /* Set up some basics */
2166 txr = &adapter->tx_rings[i];
2167 txr->adapter = adapter;
2168 txr->txr_interq = NULL;
2169 /* In case SR-IOV is enabled, align the index properly */
2170 #ifdef PCI_IOV
2171 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2172 i);
2173 #else
2174 txr->me = i;
2175 #endif
2176 txr->num_desc = adapter->num_tx_desc;
2177
2178 /* Initialize the TX side lock */
2179 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2180 device_xname(dev), txr->me);
2181 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2182
2183 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2184 BUS_DMA_NOWAIT)) {
2185 aprint_error_dev(dev,
2186 "Unable to allocate TX Descriptor memory\n");
2187 error = ENOMEM;
2188 goto err_tx_desc;
2189 }
2190 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2191 bzero((void *)txr->tx_base, tsize);
2192
2193 /* Now allocate transmit buffers for the ring */
2194 if (ixgbe_allocate_transmit_buffers(txr)) {
2195 aprint_error_dev(dev,
2196 "Critical Failure setting up transmit buffers\n");
2197 error = ENOMEM;
2198 goto err_tx_desc;
2199 }
2200 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2201 /* Allocate a buf ring */
2202 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2203 if (txr->txr_interq == NULL) {
2204 aprint_error_dev(dev,
2205 "Critical Failure setting up buf ring\n");
2206 error = ENOMEM;
2207 goto err_tx_desc;
2208 }
2209 }
2210 }
2211
2212 /*
2213 * Next the RX queues...
2214 */
2215 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2216 DBA_ALIGN);
2217 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2218 rxr = &adapter->rx_rings[i];
2219 /* Set up some basics */
2220 rxr->adapter = adapter;
2221 #ifdef PCI_IOV
2222 /* In case SR-IOV is enabled, align the index properly */
2223 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2224 i);
2225 #else
2226 rxr->me = i;
2227 #endif
2228 rxr->num_desc = adapter->num_rx_desc;
2229
2230 /* Initialize the RX side lock */
2231 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2232 device_xname(dev), rxr->me);
2233 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2234
2235 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2236 BUS_DMA_NOWAIT)) {
2237 aprint_error_dev(dev,
2238 "Unable to allocate RxDescriptor memory\n");
2239 error = ENOMEM;
2240 goto err_rx_desc;
2241 }
2242 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2243 bzero((void *)rxr->rx_base, rsize);
2244
2245 /* Allocate receive buffers for the ring */
2246 if (ixgbe_allocate_receive_buffers(rxr)) {
2247 aprint_error_dev(dev,
2248 "Critical Failure setting up receive buffers\n");
2249 error = ENOMEM;
2250 goto err_rx_desc;
2251 }
2252 }
2253
2254 /*
2255 * Finally set up the queue holding structs
2256 */
2257 for (int i = 0; i < adapter->num_queues; i++) {
2258 que = &adapter->queues[i];
2259 que->adapter = adapter;
2260 que->me = i;
2261 que->txr = &adapter->tx_rings[i];
2262 que->rxr = &adapter->rx_rings[i];
2263
2264 mutex_init(&que->im_mtx, MUTEX_DEFAULT, IPL_NET);
2265 que->im_nest = 0;
2266 }
2267
2268 return (0);
2269
2270 err_rx_desc:
2271 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2272 ixgbe_dma_free(adapter, &rxr->rxdma);
2273 err_tx_desc:
2274 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2275 ixgbe_dma_free(adapter, &txr->txdma);
2276 free(adapter->rx_rings, M_DEVBUF);
2277 rx_fail:
2278 free(adapter->tx_rings, M_DEVBUF);
2279 tx_fail:
2280 free(adapter->queues, M_DEVBUF);
2281 fail:
2282 return (error);
2283 } /* ixgbe_allocate_queues */
2284