ix_txrx.c revision 1.28 1 /* $NetBSD: ix_txrx.c,v 1.28 2017/08/30 08:49:18 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 321476 2017-07-25 14:38:30Z sbruno $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
107 static int ixgbe_tx_ctx_setup(struct tx_ring *,
108 struct mbuf *, u32 *, u32 *);
109 static int ixgbe_tso_setup(struct tx_ring *,
110 struct mbuf *, u32 *, u32 *);
111 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
112 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
113 struct mbuf *, u32);
114 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
115 struct ixgbe_dma_alloc *, int);
116 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
117
118 static void ixgbe_setup_hw_rsc(struct rx_ring *);
119
120 /************************************************************************
121 * ixgbe_legacy_start_locked - Transmit entry point
122 *
123 * Called by the stack to initiate a transmit.
124 * The driver will remain in this routine as long as there are
125 * packets to transmit and transmit resources are available.
126 * In case resources are not available, the stack is notified
127 * and the packet is requeued.
128 ************************************************************************/
129 int
130 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
131 {
132 int rc;
133 struct mbuf *m_head;
134 struct adapter *adapter = txr->adapter;
135
136 IXGBE_TX_LOCK_ASSERT(txr);
137
138 if ((ifp->if_flags & IFF_RUNNING) == 0)
139 return (ENETDOWN);
140 if (!adapter->link_active)
141 return (ENETDOWN);
142
143 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
144 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
145 break;
146
147 IFQ_POLL(&ifp->if_snd, m_head);
148 if (m_head == NULL)
149 break;
150
151 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
152 break;
153 }
154 IFQ_DEQUEUE(&ifp->if_snd, m_head);
155 if (rc != 0) {
156 m_freem(m_head);
157 continue;
158 }
159
160 /* Send a copy of the frame to the BPF listener */
161 bpf_mtap(ifp, m_head);
162 }
163
164 return IXGBE_SUCCESS;
165 } /* ixgbe_legacy_start_locked */
166
167 /************************************************************************
168 * ixgbe_legacy_start
169 *
170 * Called by the stack, this always uses the first tx ring,
171 * and should not be used with multiqueue tx enabled.
172 ************************************************************************/
173 void
174 ixgbe_legacy_start(struct ifnet *ifp)
175 {
176 struct adapter *adapter = ifp->if_softc;
177 struct tx_ring *txr = adapter->tx_rings;
178
179 if (ifp->if_flags & IFF_RUNNING) {
180 IXGBE_TX_LOCK(txr);
181 ixgbe_legacy_start_locked(ifp, txr);
182 IXGBE_TX_UNLOCK(txr);
183 }
184 } /* ixgbe_legacy_start */
185
186 /************************************************************************
187 * ixgbe_mq_start - Multiqueue Transmit Entry Point
188 *
189 * (if_transmit function)
190 ************************************************************************/
191 int
192 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
193 {
194 struct adapter *adapter = ifp->if_softc;
195 struct tx_ring *txr;
196 int i, err = 0;
197 #ifdef RSS
198 uint32_t bucket_id;
199 #endif
200
201 /*
202 * When doing RSS, map it to the same outbound queue
203 * as the incoming flow would be mapped to.
204 *
205 * If everything is setup correctly, it should be the
206 * same bucket that the current CPU we're on is.
207 */
208 #ifdef RSS
209 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
210 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
211 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
212 &bucket_id) == 0)) {
213 i = bucket_id % adapter->num_queues;
214 #ifdef IXGBE_DEBUG
215 if (bucket_id > adapter->num_queues)
216 if_printf(ifp,
217 "bucket_id (%d) > num_queues (%d)\n",
218 bucket_id, adapter->num_queues);
219 #endif
220 } else
221 i = m->m_pkthdr.flowid % adapter->num_queues;
222 } else
223 #endif /* 0 */
224 i = cpu_index(curcpu()) % adapter->num_queues;
225
226 /* Check for a hung queue and pick alternative */
227 if (((1 << i) & adapter->active_queues) == 0)
228 i = ffs64(adapter->active_queues);
229
230 txr = &adapter->tx_rings[i];
231
232 err = pcq_put(txr->txr_interq, m);
233 if (err == false) {
234 m_freem(m);
235 txr->pcq_drops.ev_count++;
236 return (err);
237 }
238 if (IXGBE_TX_TRYLOCK(txr)) {
239 ixgbe_mq_start_locked(ifp, txr);
240 IXGBE_TX_UNLOCK(txr);
241 } else
242 softint_schedule(txr->txr_si);
243
244 return (0);
245 } /* ixgbe_mq_start */
246
247 /************************************************************************
248 * ixgbe_mq_start_locked
249 ************************************************************************/
250 int
251 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
252 {
253 struct mbuf *next;
254 int enqueued = 0, err = 0;
255
256 if ((ifp->if_flags & IFF_RUNNING) == 0)
257 return (ENETDOWN);
258 if (txr->adapter->link_active == 0)
259 return (ENETDOWN);
260
261 /* Process the queue */
262 while ((next = pcq_get(txr->txr_interq)) != NULL) {
263 if ((err = ixgbe_xmit(txr, next)) != 0) {
264 m_freem(next);
265 /* All errors are counted in ixgbe_xmit() */
266 break;
267 }
268 enqueued++;
269 #if __FreeBSD_version >= 1100036
270 /*
271 * Since we're looking at the tx ring, we can check
272 * to see if we're a VF by examing our tail register
273 * address.
274 */
275 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
276 (next->m_flags & M_MCAST))
277 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
278 #endif
279 /* Send a copy of the frame to the BPF listener */
280 bpf_mtap(ifp, next);
281 if ((ifp->if_flags & IFF_RUNNING) == 0)
282 break;
283 }
284
285 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
286 ixgbe_txeof(txr);
287
288 return (err);
289 } /* ixgbe_mq_start_locked */
290
291 /************************************************************************
292 * ixgbe_deferred_mq_start
293 *
294 * Called from a taskqueue to drain queued transmit packets.
295 ************************************************************************/
296 void
297 ixgbe_deferred_mq_start(void *arg)
298 {
299 struct tx_ring *txr = arg;
300 struct adapter *adapter = txr->adapter;
301 struct ifnet *ifp = adapter->ifp;
302
303 IXGBE_TX_LOCK(txr);
304 if (pcq_peek(txr->txr_interq) != NULL)
305 ixgbe_mq_start_locked(ifp, txr);
306 IXGBE_TX_UNLOCK(txr);
307 } /* ixgbe_deferred_mq_start */
308
309 /************************************************************************
310 * ixgbe_xmit
311 *
312 * Maps the mbufs to tx descriptors, allowing the
313 * TX engine to transmit the packets.
314 *
315 * Return 0 on success, positive on failure
316 ************************************************************************/
317 static int
318 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
319 {
320 struct adapter *adapter = txr->adapter;
321 struct ixgbe_tx_buf *txbuf;
322 union ixgbe_adv_tx_desc *txd = NULL;
323 struct m_tag *mtag;
324 struct ifnet *ifp = adapter->ifp;
325 struct ethercom *ec = &adapter->osdep.ec;
326 int i, j, error;
327 int first;
328 u32 olinfo_status = 0, cmd_type_len;
329 bool remap = TRUE;
330 bus_dmamap_t map;
331
332 /* Basic descriptor defines */
333 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
334 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
335
336 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
337 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
338
339 /*
340 * Important to capture the first descriptor
341 * used because it will contain the index of
342 * the one we tell the hardware to report back
343 */
344 first = txr->next_avail_desc;
345 txbuf = &txr->tx_buffers[first];
346 map = txbuf->map;
347
348 /*
349 * Map the packet for DMA.
350 */
351 retry:
352 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
353 BUS_DMA_NOWAIT);
354
355 if (__predict_false(error)) {
356 struct mbuf *m;
357
358 switch (error) {
359 case EAGAIN:
360 adapter->eagain_tx_dma_setup.ev_count++;
361 return EAGAIN;
362 case ENOMEM:
363 adapter->enomem_tx_dma_setup.ev_count++;
364 return EAGAIN;
365 case EFBIG:
366 /* Try it again? - one try */
367 if (remap == TRUE) {
368 remap = FALSE;
369 /*
370 * XXX: m_defrag will choke on
371 * non-MCLBYTES-sized clusters
372 */
373 adapter->efbig_tx_dma_setup.ev_count++;
374 m = m_defrag(m_head, M_NOWAIT);
375 if (m == NULL) {
376 adapter->mbuf_defrag_failed.ev_count++;
377 return ENOBUFS;
378 }
379 m_head = m;
380 goto retry;
381 } else {
382 adapter->efbig2_tx_dma_setup.ev_count++;
383 return error;
384 }
385 case EINVAL:
386 adapter->einval_tx_dma_setup.ev_count++;
387 return error;
388 default:
389 adapter->other_tx_dma_setup.ev_count++;
390 return error;
391 }
392 }
393
394 /* Make certain there are enough descriptors */
395 if (txr->tx_avail < (map->dm_nsegs + 2)) {
396 txr->no_desc_avail.ev_count++;
397 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
398 return EAGAIN;
399 }
400
401 /*
402 * Set up the appropriate offload context
403 * this will consume the first descriptor
404 */
405 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
406 if (__predict_false(error)) {
407 return (error);
408 }
409
410 /* Do the flow director magic */
411 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
412 (txr->atr_sample) && (!adapter->fdir_reinit)) {
413 ++txr->atr_count;
414 if (txr->atr_count >= atr_sample_rate) {
415 ixgbe_atr(txr, m_head);
416 txr->atr_count = 0;
417 }
418 }
419
420 olinfo_status |= IXGBE_ADVTXD_CC;
421 i = txr->next_avail_desc;
422 for (j = 0; j < map->dm_nsegs; j++) {
423 bus_size_t seglen;
424 bus_addr_t segaddr;
425
426 txbuf = &txr->tx_buffers[i];
427 txd = &txr->tx_base[i];
428 seglen = map->dm_segs[j].ds_len;
429 segaddr = htole64(map->dm_segs[j].ds_addr);
430
431 txd->read.buffer_addr = segaddr;
432 txd->read.cmd_type_len = htole32(txr->txd_cmd |
433 cmd_type_len | seglen);
434 txd->read.olinfo_status = htole32(olinfo_status);
435
436 if (++i == txr->num_desc)
437 i = 0;
438 }
439
440 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
441 txr->tx_avail -= map->dm_nsegs;
442 txr->next_avail_desc = i;
443
444 txbuf->m_head = m_head;
445 /*
446 * Here we swap the map so the last descriptor,
447 * which gets the completion interrupt has the
448 * real map, and the first descriptor gets the
449 * unused map from this descriptor.
450 */
451 txr->tx_buffers[first].map = txbuf->map;
452 txbuf->map = map;
453 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
454 BUS_DMASYNC_PREWRITE);
455
456 /* Set the EOP descriptor that will be marked done */
457 txbuf = &txr->tx_buffers[first];
458 txbuf->eop = txd;
459
460 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
461 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
462 /*
463 * Advance the Transmit Descriptor Tail (Tdt), this tells the
464 * hardware that this frame is available to transmit.
465 */
466 ++txr->total_packets.ev_count;
467 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
468
469 /*
470 * XXXX NOMPSAFE: ifp->if_data should be percpu.
471 */
472 ifp->if_obytes += m_head->m_pkthdr.len;
473 if (m_head->m_flags & M_MCAST)
474 ifp->if_omcasts++;
475
476 /* Mark queue as having work */
477 if (txr->busy == 0)
478 txr->busy = 1;
479
480 return (0);
481 } /* ixgbe_xmit */
482
483
484 /************************************************************************
485 * ixgbe_allocate_transmit_buffers
486 *
487 * Allocate memory for tx_buffer structures. The tx_buffer stores all
488 * the information needed to transmit a packet on the wire. This is
489 * called only once at attach, setup is done every reset.
490 ************************************************************************/
491 static int
492 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
493 {
494 struct adapter *adapter = txr->adapter;
495 device_t dev = adapter->dev;
496 struct ixgbe_tx_buf *txbuf;
497 int error, i;
498
499 /*
500 * Setup DMA descriptor areas.
501 */
502 error = ixgbe_dma_tag_create(
503 /* parent */ adapter->osdep.dmat,
504 /* alignment */ 1,
505 /* bounds */ 0,
506 /* maxsize */ IXGBE_TSO_SIZE,
507 /* nsegments */ adapter->num_segs,
508 /* maxsegsize */ PAGE_SIZE,
509 /* flags */ 0,
510 &txr->txtag);
511 if (error != 0) {
512 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
513 goto fail;
514 }
515
516 txr->tx_buffers =
517 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
518 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
519 if (txr->tx_buffers == NULL) {
520 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
521 error = ENOMEM;
522 goto fail;
523 }
524
525 /* Create the descriptor buffer dma maps */
526 txbuf = txr->tx_buffers;
527 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
528 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
529 if (error != 0) {
530 aprint_error_dev(dev,
531 "Unable to create TX DMA map (%d)\n", error);
532 goto fail;
533 }
534 }
535
536 return 0;
537 fail:
538 /* We free all, it handles case where we are in the middle */
539 #if 0 /* XXX was FreeBSD */
540 ixgbe_free_transmit_structures(adapter);
541 #else
542 ixgbe_free_transmit_buffers(txr);
543 #endif
544 return (error);
545 } /* ixgbe_allocate_transmit_buffers */
546
547 /************************************************************************
548 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
549 ************************************************************************/
550 static void
551 ixgbe_setup_transmit_ring(struct tx_ring *txr)
552 {
553 struct adapter *adapter = txr->adapter;
554 struct ixgbe_tx_buf *txbuf;
555 #ifdef DEV_NETMAP
556 struct netmap_adapter *na = NA(adapter->ifp);
557 struct netmap_slot *slot;
558 #endif /* DEV_NETMAP */
559
560 /* Clear the old ring contents */
561 IXGBE_TX_LOCK(txr);
562
563 #ifdef DEV_NETMAP
564 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
565 /*
566 * (under lock): if in netmap mode, do some consistency
567 * checks and set slot to entry 0 of the netmap ring.
568 */
569 slot = netmap_reset(na, NR_TX, txr->me, 0);
570 }
571 #endif /* DEV_NETMAP */
572
573 bzero((void *)txr->tx_base,
574 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
575 /* Reset indices */
576 txr->next_avail_desc = 0;
577 txr->next_to_clean = 0;
578
579 /* Free any existing tx buffers. */
580 txbuf = txr->tx_buffers;
581 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
582 if (txbuf->m_head != NULL) {
583 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
584 0, txbuf->m_head->m_pkthdr.len,
585 BUS_DMASYNC_POSTWRITE);
586 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
587 m_freem(txbuf->m_head);
588 txbuf->m_head = NULL;
589 }
590
591 #ifdef DEV_NETMAP
592 /*
593 * In netmap mode, set the map for the packet buffer.
594 * NOTE: Some drivers (not this one) also need to set
595 * the physical buffer address in the NIC ring.
596 * Slots in the netmap ring (indexed by "si") are
597 * kring->nkr_hwofs positions "ahead" wrt the
598 * corresponding slot in the NIC ring. In some drivers
599 * (not here) nkr_hwofs can be negative. Function
600 * netmap_idx_n2k() handles wraparounds properly.
601 */
602 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
603 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604 netmap_load_map(na, txr->txtag,
605 txbuf->map, NMB(na, slot + si));
606 }
607 #endif /* DEV_NETMAP */
608
609 /* Clear the EOP descriptor pointer */
610 txbuf->eop = NULL;
611 }
612
613 /* Set the rate at which we sample packets */
614 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
615 txr->atr_sample = atr_sample_rate;
616
617 /* Set number of descriptors available */
618 txr->tx_avail = adapter->num_tx_desc;
619
620 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
621 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
622 IXGBE_TX_UNLOCK(txr);
623 } /* ixgbe_setup_transmit_ring */
624
625 /************************************************************************
626 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
627 ************************************************************************/
628 int
629 ixgbe_setup_transmit_structures(struct adapter *adapter)
630 {
631 struct tx_ring *txr = adapter->tx_rings;
632
633 for (int i = 0; i < adapter->num_queues; i++, txr++)
634 ixgbe_setup_transmit_ring(txr);
635
636 return (0);
637 } /* ixgbe_setup_transmit_structures */
638
639 /************************************************************************
640 * ixgbe_free_transmit_structures - Free all transmit rings.
641 ************************************************************************/
642 void
643 ixgbe_free_transmit_structures(struct adapter *adapter)
644 {
645 struct tx_ring *txr = adapter->tx_rings;
646
647 for (int i = 0; i < adapter->num_queues; i++, txr++) {
648 ixgbe_free_transmit_buffers(txr);
649 ixgbe_dma_free(adapter, &txr->txdma);
650 IXGBE_TX_LOCK_DESTROY(txr);
651 }
652 free(adapter->tx_rings, M_DEVBUF);
653 } /* ixgbe_free_transmit_structures */
654
655 /************************************************************************
656 * ixgbe_free_transmit_buffers
657 *
658 * Free transmit ring related data structures.
659 ************************************************************************/
660 static void
661 ixgbe_free_transmit_buffers(struct tx_ring *txr)
662 {
663 struct adapter *adapter = txr->adapter;
664 struct ixgbe_tx_buf *tx_buffer;
665 int i;
666
667 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
668
669 if (txr->tx_buffers == NULL)
670 return;
671
672 tx_buffer = txr->tx_buffers;
673 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
674 if (tx_buffer->m_head != NULL) {
675 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
676 0, tx_buffer->m_head->m_pkthdr.len,
677 BUS_DMASYNC_POSTWRITE);
678 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
679 m_freem(tx_buffer->m_head);
680 tx_buffer->m_head = NULL;
681 if (tx_buffer->map != NULL) {
682 ixgbe_dmamap_destroy(txr->txtag,
683 tx_buffer->map);
684 tx_buffer->map = NULL;
685 }
686 } else if (tx_buffer->map != NULL) {
687 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
688 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
689 tx_buffer->map = NULL;
690 }
691 }
692 if (txr->txr_interq != NULL) {
693 struct mbuf *m;
694
695 while ((m = pcq_get(txr->txr_interq)) != NULL)
696 m_freem(m);
697 pcq_destroy(txr->txr_interq);
698 }
699 if (txr->tx_buffers != NULL) {
700 free(txr->tx_buffers, M_DEVBUF);
701 txr->tx_buffers = NULL;
702 }
703 if (txr->txtag != NULL) {
704 ixgbe_dma_tag_destroy(txr->txtag);
705 txr->txtag = NULL;
706 }
707 } /* ixgbe_free_transmit_buffers */
708
709 /************************************************************************
710 * ixgbe_tx_ctx_setup
711 *
712 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
713 ************************************************************************/
714 static int
715 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
716 u32 *cmd_type_len, u32 *olinfo_status)
717 {
718 struct adapter *adapter = txr->adapter;
719 struct ethercom *ec = &adapter->osdep.ec;
720 struct m_tag *mtag;
721 struct ixgbe_adv_tx_context_desc *TXD;
722 struct ether_vlan_header *eh;
723 #ifdef INET
724 struct ip *ip;
725 #endif
726 #ifdef INET6
727 struct ip6_hdr *ip6;
728 #endif
729 int ehdrlen, ip_hlen = 0;
730 int offload = TRUE;
731 int ctxd = txr->next_avail_desc;
732 u32 vlan_macip_lens = 0;
733 u32 type_tucmd_mlhl = 0;
734 u16 vtag = 0;
735 u16 etype;
736 u8 ipproto = 0;
737 char *l3d;
738
739
740 /* First check if TSO is to be used */
741 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
742 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
743
744 if (rv != 0)
745 ++adapter->tso_err.ev_count;
746 return rv;
747 }
748
749 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
750 offload = FALSE;
751
752 /* Indicate the whole packet as payload when not doing TSO */
753 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
754
755 /* Now ready a context descriptor */
756 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
757
758 /*
759 * In advanced descriptors the vlan tag must
760 * be placed into the context descriptor. Hence
761 * we need to make one even if not doing offloads.
762 */
763 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
764 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
765 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
767 (offload == FALSE))
768 return (0);
769
770 /*
771 * Determine where frame payload starts.
772 * Jump over vlan headers if already present,
773 * helpful for QinQ too.
774 */
775 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
776 eh = mtod(mp, struct ether_vlan_header *);
777 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
778 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
779 etype = ntohs(eh->evl_proto);
780 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
781 } else {
782 etype = ntohs(eh->evl_encap_proto);
783 ehdrlen = ETHER_HDR_LEN;
784 }
785
786 /* Set the ether header length */
787 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
788
789 if (offload == FALSE)
790 goto no_offloads;
791
792 /*
793 * If the first mbuf only includes the ethernet header,
794 * jump to the next one
795 * XXX: This assumes the stack splits mbufs containing headers
796 * on header boundaries
797 * XXX: And assumes the entire IP header is contained in one mbuf
798 */
799 if (mp->m_len == ehdrlen && mp->m_next)
800 l3d = mtod(mp->m_next, char *);
801 else
802 l3d = mtod(mp, char *) + ehdrlen;
803
804 switch (etype) {
805 #ifdef INET
806 case ETHERTYPE_IP:
807 ip = (struct ip *)(l3d);
808 ip_hlen = ip->ip_hl << 2;
809 ipproto = ip->ip_p;
810 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
811 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
812 ip->ip_sum == 0);
813 break;
814 #endif
815 #ifdef INET6
816 case ETHERTYPE_IPV6:
817 ip6 = (struct ip6_hdr *)(l3d);
818 ip_hlen = sizeof(struct ip6_hdr);
819 ipproto = ip6->ip6_nxt;
820 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
821 break;
822 #endif
823 default:
824 offload = false;
825 break;
826 }
827
828 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
829 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
830
831 vlan_macip_lens |= ip_hlen;
832
833 /* No support for offloads for non-L4 next headers */
834 switch (ipproto) {
835 case IPPROTO_TCP:
836 if (mp->m_pkthdr.csum_flags &
837 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
838 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
839 else
840 offload = false;
841 break;
842 case IPPROTO_UDP:
843 if (mp->m_pkthdr.csum_flags &
844 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
845 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
846 else
847 offload = false;
848 break;
849 default:
850 offload = false;
851 break;
852 }
853
854 if (offload) /* Insert L4 checksum into data descriptors */
855 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
856
857 no_offloads:
858 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
859
860 /* Now copy bits into descriptor */
861 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
862 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
863 TXD->seqnum_seed = htole32(0);
864 TXD->mss_l4len_idx = htole32(0);
865
866 /* We've consumed the first desc, adjust counters */
867 if (++ctxd == txr->num_desc)
868 ctxd = 0;
869 txr->next_avail_desc = ctxd;
870 --txr->tx_avail;
871
872 return (0);
873 } /* ixgbe_tx_ctx_setup */
874
875 /************************************************************************
876 * ixgbe_tso_setup
877 *
878 * Setup work for hardware segmentation offload (TSO) on
879 * adapters using advanced tx descriptors
880 ************************************************************************/
881 static int
882 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
883 u32 *olinfo_status)
884 {
885 struct m_tag *mtag;
886 struct adapter *adapter = txr->adapter;
887 struct ethercom *ec = &adapter->osdep.ec;
888 struct ixgbe_adv_tx_context_desc *TXD;
889 struct ether_vlan_header *eh;
890 #ifdef INET6
891 struct ip6_hdr *ip6;
892 #endif
893 #ifdef INET
894 struct ip *ip;
895 #endif
896 struct tcphdr *th;
897 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
898 u32 vlan_macip_lens = 0;
899 u32 type_tucmd_mlhl = 0;
900 u32 mss_l4len_idx = 0, paylen;
901 u16 vtag = 0, eh_type;
902
903 /*
904 * Determine where frame payload starts.
905 * Jump over vlan headers if already present
906 */
907 eh = mtod(mp, struct ether_vlan_header *);
908 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
909 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
910 eh_type = eh->evl_proto;
911 } else {
912 ehdrlen = ETHER_HDR_LEN;
913 eh_type = eh->evl_encap_proto;
914 }
915
916 switch (ntohs(eh_type)) {
917 #ifdef INET
918 case ETHERTYPE_IP:
919 ip = (struct ip *)(mp->m_data + ehdrlen);
920 if (ip->ip_p != IPPROTO_TCP)
921 return (ENXIO);
922 ip->ip_sum = 0;
923 ip_hlen = ip->ip_hl << 2;
924 th = (struct tcphdr *)((char *)ip + ip_hlen);
925 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
926 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
927 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
928 /* Tell transmit desc to also do IPv4 checksum. */
929 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
930 break;
931 #endif
932 #ifdef INET6
933 case ETHERTYPE_IPV6:
934 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
935 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
936 if (ip6->ip6_nxt != IPPROTO_TCP)
937 return (ENXIO);
938 ip_hlen = sizeof(struct ip6_hdr);
939 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
940 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
941 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
942 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
943 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
944 break;
945 #endif
946 default:
947 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
948 __func__, ntohs(eh_type));
949 break;
950 }
951
952 ctxd = txr->next_avail_desc;
953 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
954
955 tcp_hlen = th->th_off << 2;
956
957 /* This is used in the transmit desc in encap */
958 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
959
960 /* VLAN MACLEN IPLEN */
961 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
962 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
963 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
964 }
965
966 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
967 vlan_macip_lens |= ip_hlen;
968 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
969
970 /* ADV DTYPE TUCMD */
971 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
972 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
973 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
974
975 /* MSS L4LEN IDX */
976 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
977 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
978 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
979
980 TXD->seqnum_seed = htole32(0);
981
982 if (++ctxd == txr->num_desc)
983 ctxd = 0;
984
985 txr->tx_avail--;
986 txr->next_avail_desc = ctxd;
987 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
988 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
989 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
990 ++txr->tso_tx.ev_count;
991
992 return (0);
993 } /* ixgbe_tso_setup */
994
995
996 /************************************************************************
997 * ixgbe_txeof
998 *
999 * Examine each tx_buffer in the used queue. If the hardware is done
1000 * processing the packet then free associated resources. The
1001 * tx_buffer is put back on the free queue.
1002 ************************************************************************/
1003 void
1004 ixgbe_txeof(struct tx_ring *txr)
1005 {
1006 struct adapter *adapter = txr->adapter;
1007 struct ifnet *ifp = adapter->ifp;
1008 struct ixgbe_tx_buf *buf;
1009 union ixgbe_adv_tx_desc *txd;
1010 u32 work, processed = 0;
1011 u32 limit = adapter->tx_process_limit;
1012
1013 KASSERT(mutex_owned(&txr->tx_mtx));
1014
1015 #ifdef DEV_NETMAP
1016 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1017 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1018 struct netmap_adapter *na = NA(adapter->ifp);
1019 struct netmap_kring *kring = &na->tx_rings[txr->me];
1020 txd = txr->tx_base;
1021 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1022 BUS_DMASYNC_POSTREAD);
1023 /*
1024 * In netmap mode, all the work is done in the context
1025 * of the client thread. Interrupt handlers only wake up
1026 * clients, which may be sleeping on individual rings
1027 * or on a global resource for all rings.
1028 * To implement tx interrupt mitigation, we wake up the client
1029 * thread roughly every half ring, even if the NIC interrupts
1030 * more frequently. This is implemented as follows:
1031 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1032 * the slot that should wake up the thread (nkr_num_slots
1033 * means the user thread should not be woken up);
1034 * - the driver ignores tx interrupts unless netmap_mitigate=0
1035 * or the slot has the DD bit set.
1036 */
1037 if (!netmap_mitigate ||
1038 (kring->nr_kflags < kring->nkr_num_slots &&
1039 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1040 netmap_tx_irq(ifp, txr->me);
1041 }
1042 return;
1043 }
1044 #endif /* DEV_NETMAP */
1045
1046 if (txr->tx_avail == txr->num_desc) {
1047 txr->busy = 0;
1048 return;
1049 }
1050
1051 /* Get work starting point */
1052 work = txr->next_to_clean;
1053 buf = &txr->tx_buffers[work];
1054 txd = &txr->tx_base[work];
1055 work -= txr->num_desc; /* The distance to ring end */
1056 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1057 BUS_DMASYNC_POSTREAD);
1058
1059 do {
1060 union ixgbe_adv_tx_desc *eop = buf->eop;
1061 if (eop == NULL) /* No work */
1062 break;
1063
1064 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1065 break; /* I/O not complete */
1066
1067 if (buf->m_head) {
1068 txr->bytes += buf->m_head->m_pkthdr.len;
1069 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1070 0, buf->m_head->m_pkthdr.len,
1071 BUS_DMASYNC_POSTWRITE);
1072 ixgbe_dmamap_unload(txr->txtag, buf->map);
1073 m_freem(buf->m_head);
1074 buf->m_head = NULL;
1075 }
1076 buf->eop = NULL;
1077 ++txr->tx_avail;
1078
1079 /* We clean the range if multi segment */
1080 while (txd != eop) {
1081 ++txd;
1082 ++buf;
1083 ++work;
1084 /* wrap the ring? */
1085 if (__predict_false(!work)) {
1086 work -= txr->num_desc;
1087 buf = txr->tx_buffers;
1088 txd = txr->tx_base;
1089 }
1090 if (buf->m_head) {
1091 txr->bytes +=
1092 buf->m_head->m_pkthdr.len;
1093 bus_dmamap_sync(txr->txtag->dt_dmat,
1094 buf->map,
1095 0, buf->m_head->m_pkthdr.len,
1096 BUS_DMASYNC_POSTWRITE);
1097 ixgbe_dmamap_unload(txr->txtag,
1098 buf->map);
1099 m_freem(buf->m_head);
1100 buf->m_head = NULL;
1101 }
1102 ++txr->tx_avail;
1103 buf->eop = NULL;
1104
1105 }
1106 ++txr->packets;
1107 ++processed;
1108 ++ifp->if_opackets;
1109
1110 /* Try the next packet */
1111 ++txd;
1112 ++buf;
1113 ++work;
1114 /* reset with a wrap */
1115 if (__predict_false(!work)) {
1116 work -= txr->num_desc;
1117 buf = txr->tx_buffers;
1118 txd = txr->tx_base;
1119 }
1120 prefetch(txd);
1121 } while (__predict_true(--limit));
1122
1123 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1124 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1125
1126 work += txr->num_desc;
1127 txr->next_to_clean = work;
1128
1129 /*
1130 * Queue Hang detection, we know there's
1131 * work outstanding or the first return
1132 * would have been taken, so increment busy
1133 * if nothing managed to get cleaned, then
1134 * in local_timer it will be checked and
1135 * marked as HUNG if it exceeds a MAX attempt.
1136 */
1137 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1138 ++txr->busy;
1139 /*
1140 * If anything gets cleaned we reset state to 1,
1141 * note this will turn off HUNG if its set.
1142 */
1143 if (processed)
1144 txr->busy = 1;
1145
1146 if (txr->tx_avail == txr->num_desc)
1147 txr->busy = 0;
1148
1149 return;
1150 } /* ixgbe_txeof */
1151
1152 /************************************************************************
1153 * ixgbe_rsc_count
1154 *
1155 * Used to detect a descriptor that has been merged by Hardware RSC.
1156 ************************************************************************/
1157 static inline u32
1158 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1159 {
1160 return (le32toh(rx->wb.lower.lo_dword.data) &
1161 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1162 } /* ixgbe_rsc_count */
1163
1164 /************************************************************************
1165 * ixgbe_setup_hw_rsc
1166 *
1167 * Initialize Hardware RSC (LRO) feature on 82599
1168 * for an RX ring, this is toggled by the LRO capability
1169 * even though it is transparent to the stack.
1170 *
1171 * NOTE: Since this HW feature only works with IPv4 and
1172 * testing has shown soft LRO to be as effective,
1173 * this feature will be disabled by default.
1174 ************************************************************************/
1175 static void
1176 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1177 {
1178 struct adapter *adapter = rxr->adapter;
1179 struct ixgbe_hw *hw = &adapter->hw;
1180 u32 rscctrl, rdrxctl;
1181
1182 /* If turning LRO/RSC off we need to disable it */
1183 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1184 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1185 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1186 return;
1187 }
1188
1189 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1190 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1191 #ifdef DEV_NETMAP
1192 /* Always strip CRC unless Netmap disabled it */
1193 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1194 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1195 ix_crcstrip)
1196 #endif /* DEV_NETMAP */
1197 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1198 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1199 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1200
1201 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1202 rscctrl |= IXGBE_RSCCTL_RSCEN;
1203 /*
1204 * Limit the total number of descriptors that
1205 * can be combined, so it does not exceed 64K
1206 */
1207 if (rxr->mbuf_sz == MCLBYTES)
1208 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1209 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1210 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1211 else if (rxr->mbuf_sz == MJUM9BYTES)
1212 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1213 else /* Using 16K cluster */
1214 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1215
1216 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1217
1218 /* Enable TCP header recognition */
1219 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1220 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1221
1222 /* Disable RSC for ACK packets */
1223 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1224 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1225
1226 rxr->hw_rsc = TRUE;
1227 } /* ixgbe_setup_hw_rsc */
1228
1229 /************************************************************************
1230 * ixgbe_refresh_mbufs
1231 *
1232 * Refresh mbuf buffers for RX descriptor rings
1233 * - now keeps its own state so discards due to resource
1234 * exhaustion are unnecessary, if an mbuf cannot be obtained
1235 * it just returns, keeping its placeholder, thus it can simply
1236 * be recalled to try again.
1237 ************************************************************************/
1238 static void
1239 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1240 {
1241 struct adapter *adapter = rxr->adapter;
1242 struct ixgbe_rx_buf *rxbuf;
1243 struct mbuf *mp;
1244 int i, j, error;
1245 bool refreshed = false;
1246
1247 i = j = rxr->next_to_refresh;
1248 /* Control the loop with one beyond */
1249 if (++j == rxr->num_desc)
1250 j = 0;
1251
1252 while (j != limit) {
1253 rxbuf = &rxr->rx_buffers[i];
1254 if (rxbuf->buf == NULL) {
1255 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1256 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1257 if (mp == NULL) {
1258 rxr->no_jmbuf.ev_count++;
1259 goto update;
1260 }
1261 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1262 m_adj(mp, ETHER_ALIGN);
1263 } else
1264 mp = rxbuf->buf;
1265
1266 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1267
1268 /* If we're dealing with an mbuf that was copied rather
1269 * than replaced, there's no need to go through busdma.
1270 */
1271 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1272 /* Get the memory mapping */
1273 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1274 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1275 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1276 if (error != 0) {
1277 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1278 m_free(mp);
1279 rxbuf->buf = NULL;
1280 goto update;
1281 }
1282 rxbuf->buf = mp;
1283 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1284 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1285 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1286 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1287 } else {
1288 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1289 rxbuf->flags &= ~IXGBE_RX_COPY;
1290 }
1291
1292 refreshed = true;
1293 /* Next is precalculated */
1294 i = j;
1295 rxr->next_to_refresh = i;
1296 if (++j == rxr->num_desc)
1297 j = 0;
1298 }
1299
1300 update:
1301 if (refreshed) /* Update hardware tail index */
1302 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1303
1304 return;
1305 } /* ixgbe_refresh_mbufs */
1306
1307 /************************************************************************
1308 * ixgbe_allocate_receive_buffers
1309 *
1310 * Allocate memory for rx_buffer structures. Since we use one
1311 * rx_buffer per received packet, the maximum number of rx_buffer's
1312 * that we'll need is equal to the number of receive descriptors
1313 * that we've allocated.
1314 ************************************************************************/
1315 static int
1316 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1317 {
1318 struct adapter *adapter = rxr->adapter;
1319 device_t dev = adapter->dev;
1320 struct ixgbe_rx_buf *rxbuf;
1321 int bsize, error;
1322
1323 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1324 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1325 M_NOWAIT | M_ZERO);
1326 if (rxr->rx_buffers == NULL) {
1327 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1328 error = ENOMEM;
1329 goto fail;
1330 }
1331
1332 error = ixgbe_dma_tag_create(
1333 /* parent */ adapter->osdep.dmat,
1334 /* alignment */ 1,
1335 /* bounds */ 0,
1336 /* maxsize */ MJUM16BYTES,
1337 /* nsegments */ 1,
1338 /* maxsegsize */ MJUM16BYTES,
1339 /* flags */ 0,
1340 &rxr->ptag);
1341 if (error != 0) {
1342 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1343 goto fail;
1344 }
1345
1346 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1347 rxbuf = &rxr->rx_buffers[i];
1348 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1349 if (error) {
1350 aprint_error_dev(dev, "Unable to create RX dma map\n");
1351 goto fail;
1352 }
1353 }
1354
1355 return (0);
1356
1357 fail:
1358 /* Frees all, but can handle partial completion */
1359 ixgbe_free_receive_structures(adapter);
1360
1361 return (error);
1362 } /* ixgbe_allocate_receive_buffers */
1363
1364 /************************************************************************
1365 * ixgbe_setup_receive_ring
1366 *
1367 * Initialize a receive ring and its buffers.
1368 ************************************************************************/
1369 static void
1370 ixgbe_free_receive_ring(struct rx_ring *rxr)
1371 {
1372 for (int i = 0; i < rxr->num_desc; i++) {
1373 ixgbe_rx_discard(rxr, i);
1374 }
1375 } /* ixgbe_free_receive_ring */
1376
1377 /************************************************************************
1378 * ixgbe_setup_receive_ring
1379 *
1380 * Initialize a receive ring and its buffers.
1381 ************************************************************************/
1382 static int
1383 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1384 {
1385 struct adapter *adapter;
1386 struct ixgbe_rx_buf *rxbuf;
1387 #ifdef LRO
1388 struct ifnet *ifp;
1389 struct lro_ctrl *lro = &rxr->lro;
1390 #endif /* LRO */
1391 #ifdef DEV_NETMAP
1392 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1393 struct netmap_slot *slot;
1394 #endif /* DEV_NETMAP */
1395 int rsize, error = 0;
1396
1397 adapter = rxr->adapter;
1398 #ifdef LRO
1399 ifp = adapter->ifp;
1400 #endif /* LRO */
1401
1402 /* Clear the ring contents */
1403 IXGBE_RX_LOCK(rxr);
1404
1405 #ifdef DEV_NETMAP
1406 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1407 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1408 #endif /* DEV_NETMAP */
1409
1410 rsize = roundup2(adapter->num_rx_desc *
1411 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1412 bzero((void *)rxr->rx_base, rsize);
1413 /* Cache the size */
1414 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1415
1416 /* Free current RX buffer structs and their mbufs */
1417 ixgbe_free_receive_ring(rxr);
1418
1419 IXGBE_RX_UNLOCK(rxr);
1420
1421 /* Now reinitialize our supply of jumbo mbufs. The number
1422 * or size of jumbo mbufs may have changed.
1423 */
1424 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1425 (2 * adapter->num_rx_desc) * adapter->num_queues,
1426 adapter->rx_mbuf_sz);
1427
1428 IXGBE_RX_LOCK(rxr);
1429
1430 /* Now replenish the mbufs */
1431 for (int j = 0; j != rxr->num_desc; ++j) {
1432 struct mbuf *mp;
1433
1434 rxbuf = &rxr->rx_buffers[j];
1435
1436 #ifdef DEV_NETMAP
1437 /*
1438 * In netmap mode, fill the map and set the buffer
1439 * address in the NIC ring, considering the offset
1440 * between the netmap and NIC rings (see comment in
1441 * ixgbe_setup_transmit_ring() ). No need to allocate
1442 * an mbuf, so end the block with a continue;
1443 */
1444 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1445 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1446 uint64_t paddr;
1447 void *addr;
1448
1449 addr = PNMB(na, slot + sj, &paddr);
1450 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1451 /* Update descriptor and the cached value */
1452 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1453 rxbuf->addr = htole64(paddr);
1454 continue;
1455 }
1456 #endif /* DEV_NETMAP */
1457
1458 rxbuf->flags = 0;
1459 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1460 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1461 if (rxbuf->buf == NULL) {
1462 error = ENOBUFS;
1463 goto fail;
1464 }
1465 mp = rxbuf->buf;
1466 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1467 /* Get the memory mapping */
1468 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1469 mp, BUS_DMA_NOWAIT);
1470 if (error != 0)
1471 goto fail;
1472 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1473 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1474 /* Update the descriptor and the cached value */
1475 rxr->rx_base[j].read.pkt_addr =
1476 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1477 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1478 }
1479
1480
1481 /* Setup our descriptor indices */
1482 rxr->next_to_check = 0;
1483 rxr->next_to_refresh = 0;
1484 rxr->lro_enabled = FALSE;
1485 rxr->rx_copies.ev_count = 0;
1486 #if 0 /* NetBSD */
1487 rxr->rx_bytes.ev_count = 0;
1488 #if 1 /* Fix inconsistency */
1489 rxr->rx_packets.ev_count = 0;
1490 #endif
1491 #endif
1492 rxr->vtag_strip = FALSE;
1493
1494 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1495 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1496
1497 /*
1498 * Now set up the LRO interface
1499 */
1500 if (ixgbe_rsc_enable)
1501 ixgbe_setup_hw_rsc(rxr);
1502 #ifdef LRO
1503 else if (ifp->if_capenable & IFCAP_LRO) {
1504 device_t dev = adapter->dev;
1505 int err = tcp_lro_init(lro);
1506 if (err) {
1507 device_printf(dev, "LRO Initialization failed!\n");
1508 goto fail;
1509 }
1510 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1511 rxr->lro_enabled = TRUE;
1512 lro->ifp = adapter->ifp;
1513 }
1514 #endif /* LRO */
1515
1516 IXGBE_RX_UNLOCK(rxr);
1517
1518 return (0);
1519
1520 fail:
1521 ixgbe_free_receive_ring(rxr);
1522 IXGBE_RX_UNLOCK(rxr);
1523
1524 return (error);
1525 } /* ixgbe_setup_receive_ring */
1526
1527 /************************************************************************
1528 * ixgbe_setup_receive_structures - Initialize all receive rings.
1529 ************************************************************************/
1530 int
1531 ixgbe_setup_receive_structures(struct adapter *adapter)
1532 {
1533 struct rx_ring *rxr = adapter->rx_rings;
1534 int j;
1535
1536 for (j = 0; j < adapter->num_queues; j++, rxr++)
1537 if (ixgbe_setup_receive_ring(rxr))
1538 goto fail;
1539
1540 return (0);
1541 fail:
1542 /*
1543 * Free RX buffers allocated so far, we will only handle
1544 * the rings that completed, the failing case will have
1545 * cleaned up for itself. 'j' failed, so its the terminus.
1546 */
1547 for (int i = 0; i < j; ++i) {
1548 rxr = &adapter->rx_rings[i];
1549 IXGBE_RX_LOCK(rxr);
1550 ixgbe_free_receive_ring(rxr);
1551 IXGBE_RX_UNLOCK(rxr);
1552 }
1553
1554 return (ENOBUFS);
1555 } /* ixgbe_setup_receive_structures */
1556
1557
1558 /************************************************************************
1559 * ixgbe_free_receive_structures - Free all receive rings.
1560 ************************************************************************/
1561 void
1562 ixgbe_free_receive_structures(struct adapter *adapter)
1563 {
1564 struct rx_ring *rxr = adapter->rx_rings;
1565
1566 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1567
1568 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1569 ixgbe_free_receive_buffers(rxr);
1570 #ifdef LRO
1571 /* Free LRO memory */
1572 tcp_lro_free(&rxr->lro);
1573 #endif /* LRO */
1574 /* Free the ring memory as well */
1575 ixgbe_dma_free(adapter, &rxr->rxdma);
1576 IXGBE_RX_LOCK_DESTROY(rxr);
1577 }
1578
1579 free(adapter->rx_rings, M_DEVBUF);
1580 } /* ixgbe_free_receive_structures */
1581
1582
1583 /************************************************************************
1584 * ixgbe_free_receive_buffers - Free receive ring data structures
1585 ************************************************************************/
1586 static void
1587 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1588 {
1589 struct adapter *adapter = rxr->adapter;
1590 struct ixgbe_rx_buf *rxbuf;
1591
1592 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1593
1594 /* Cleanup any existing buffers */
1595 if (rxr->rx_buffers != NULL) {
1596 for (int i = 0; i < adapter->num_rx_desc; i++) {
1597 rxbuf = &rxr->rx_buffers[i];
1598 ixgbe_rx_discard(rxr, i);
1599 if (rxbuf->pmap != NULL) {
1600 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1601 rxbuf->pmap = NULL;
1602 }
1603 }
1604 if (rxr->rx_buffers != NULL) {
1605 free(rxr->rx_buffers, M_DEVBUF);
1606 rxr->rx_buffers = NULL;
1607 }
1608 }
1609
1610 if (rxr->ptag != NULL) {
1611 ixgbe_dma_tag_destroy(rxr->ptag);
1612 rxr->ptag = NULL;
1613 }
1614
1615 return;
1616 } /* ixgbe_free_receive_buffers */
1617
1618 /************************************************************************
1619 * ixgbe_rx_input
1620 ************************************************************************/
1621 static __inline void
1622 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1623 u32 ptype)
1624 {
1625 struct adapter *adapter = ifp->if_softc;
1626
1627 #ifdef LRO
1628 struct ethercom *ec = &adapter->osdep.ec;
1629
1630 /*
1631 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1632 * should be computed by hardware. Also it should not have VLAN tag in
1633 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1634 */
1635 if (rxr->lro_enabled &&
1636 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1637 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1638 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1639 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1640 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1641 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1642 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1643 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1644 /*
1645 * Send to the stack if:
1646 ** - LRO not enabled, or
1647 ** - no LRO resources, or
1648 ** - lro enqueue fails
1649 */
1650 if (rxr->lro.lro_cnt != 0)
1651 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1652 return;
1653 }
1654 #endif /* LRO */
1655
1656 if_percpuq_enqueue(adapter->ipq, m);
1657 } /* ixgbe_rx_input */
1658
1659 /************************************************************************
1660 * ixgbe_rx_discard
1661 ************************************************************************/
1662 static __inline void
1663 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1664 {
1665 struct ixgbe_rx_buf *rbuf;
1666
1667 rbuf = &rxr->rx_buffers[i];
1668
1669 /*
1670 * With advanced descriptors the writeback
1671 * clobbers the buffer addrs, so its easier
1672 * to just free the existing mbufs and take
1673 * the normal refresh path to get new buffers
1674 * and mapping.
1675 */
1676
1677 if (rbuf->fmp != NULL) {/* Partial chain ? */
1678 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1679 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1680 m_freem(rbuf->fmp);
1681 rbuf->fmp = NULL;
1682 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1683 } else if (rbuf->buf) {
1684 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1685 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1686 m_free(rbuf->buf);
1687 rbuf->buf = NULL;
1688 }
1689 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1690
1691 rbuf->flags = 0;
1692
1693 return;
1694 } /* ixgbe_rx_discard */
1695
1696
1697 /************************************************************************
1698 * ixgbe_rxeof
1699 *
1700 * Executes in interrupt context. It replenishes the
1701 * mbufs in the descriptor and sends data which has
1702 * been dma'ed into host memory to upper layer.
1703 *
1704 * Return TRUE for more work, FALSE for all clean.
1705 ************************************************************************/
1706 bool
1707 ixgbe_rxeof(struct ix_queue *que)
1708 {
1709 struct adapter *adapter = que->adapter;
1710 struct rx_ring *rxr = que->rxr;
1711 struct ifnet *ifp = adapter->ifp;
1712 #ifdef LRO
1713 struct lro_ctrl *lro = &rxr->lro;
1714 #endif /* LRO */
1715 union ixgbe_adv_rx_desc *cur;
1716 struct ixgbe_rx_buf *rbuf, *nbuf;
1717 int i, nextp, processed = 0;
1718 u32 staterr = 0;
1719 u32 count = adapter->rx_process_limit;
1720 #ifdef RSS
1721 u16 pkt_info;
1722 #endif
1723
1724 IXGBE_RX_LOCK(rxr);
1725
1726 #ifdef DEV_NETMAP
1727 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1728 /* Same as the txeof routine: wakeup clients on intr. */
1729 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1730 IXGBE_RX_UNLOCK(rxr);
1731 return (FALSE);
1732 }
1733 }
1734 #endif /* DEV_NETMAP */
1735
1736 for (i = rxr->next_to_check; count != 0;) {
1737 struct mbuf *sendmp, *mp;
1738 u32 rsc, ptype;
1739 u16 len;
1740 u16 vtag = 0;
1741 bool eop;
1742
1743 /* Sync the ring. */
1744 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1745 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1746
1747 cur = &rxr->rx_base[i];
1748 staterr = le32toh(cur->wb.upper.status_error);
1749 #ifdef RSS
1750 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1751 #endif
1752
1753 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1754 break;
1755 if ((ifp->if_flags & IFF_RUNNING) == 0)
1756 break;
1757
1758 count--;
1759 sendmp = NULL;
1760 nbuf = NULL;
1761 rsc = 0;
1762 cur->wb.upper.status_error = 0;
1763 rbuf = &rxr->rx_buffers[i];
1764 mp = rbuf->buf;
1765
1766 len = le16toh(cur->wb.upper.length);
1767 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1768 IXGBE_RXDADV_PKTTYPE_MASK;
1769 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1770
1771 /* Make sure bad packets are discarded */
1772 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1773 #if __FreeBSD_version >= 1100036
1774 if (adapter->feat_en & IXGBE_FEATURE_VF)
1775 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1776 #endif
1777 rxr->rx_discarded.ev_count++;
1778 ixgbe_rx_discard(rxr, i);
1779 goto next_desc;
1780 }
1781
1782 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1783 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1784
1785 /*
1786 * On 82599 which supports a hardware
1787 * LRO (called HW RSC), packets need
1788 * not be fragmented across sequential
1789 * descriptors, rather the next descriptor
1790 * is indicated in bits of the descriptor.
1791 * This also means that we might proceses
1792 * more than one packet at a time, something
1793 * that has never been true before, it
1794 * required eliminating global chain pointers
1795 * in favor of what we are doing here. -jfv
1796 */
1797 if (!eop) {
1798 /*
1799 * Figure out the next descriptor
1800 * of this frame.
1801 */
1802 if (rxr->hw_rsc == TRUE) {
1803 rsc = ixgbe_rsc_count(cur);
1804 rxr->rsc_num += (rsc - 1);
1805 }
1806 if (rsc) { /* Get hardware index */
1807 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1808 IXGBE_RXDADV_NEXTP_SHIFT);
1809 } else { /* Just sequential */
1810 nextp = i + 1;
1811 if (nextp == adapter->num_rx_desc)
1812 nextp = 0;
1813 }
1814 nbuf = &rxr->rx_buffers[nextp];
1815 prefetch(nbuf);
1816 }
1817 /*
1818 * Rather than using the fmp/lmp global pointers
1819 * we now keep the head of a packet chain in the
1820 * buffer struct and pass this along from one
1821 * descriptor to the next, until we get EOP.
1822 */
1823 mp->m_len = len;
1824 /*
1825 * See if there is a stored head
1826 * that determines what we are
1827 */
1828 sendmp = rbuf->fmp;
1829 if (sendmp != NULL) { /* secondary frag */
1830 rbuf->buf = rbuf->fmp = NULL;
1831 mp->m_flags &= ~M_PKTHDR;
1832 sendmp->m_pkthdr.len += mp->m_len;
1833 } else {
1834 /*
1835 * Optimize. This might be a small packet,
1836 * maybe just a TCP ACK. Do a fast copy that
1837 * is cache aligned into a new mbuf, and
1838 * leave the old mbuf+cluster for re-use.
1839 */
1840 if (eop && len <= IXGBE_RX_COPY_LEN) {
1841 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1842 if (sendmp != NULL) {
1843 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1844 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1845 len);
1846 sendmp->m_len = len;
1847 rxr->rx_copies.ev_count++;
1848 rbuf->flags |= IXGBE_RX_COPY;
1849 }
1850 }
1851 if (sendmp == NULL) {
1852 rbuf->buf = rbuf->fmp = NULL;
1853 sendmp = mp;
1854 }
1855
1856 /* first desc of a non-ps chain */
1857 sendmp->m_flags |= M_PKTHDR;
1858 sendmp->m_pkthdr.len = mp->m_len;
1859 }
1860 ++processed;
1861
1862 /* Pass the head pointer on */
1863 if (eop == 0) {
1864 nbuf->fmp = sendmp;
1865 sendmp = NULL;
1866 mp->m_next = nbuf->buf;
1867 } else { /* Sending this frame */
1868 m_set_rcvif(sendmp, ifp);
1869 rxr->rx_packets.ev_count++;
1870 /* capture data for AIM */
1871 rxr->bytes += sendmp->m_pkthdr.len;
1872 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1873 /* Process vlan info */
1874 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1875 vtag = le16toh(cur->wb.upper.vlan);
1876 if (vtag) {
1877 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1878 printf("%s: could not apply VLAN "
1879 "tag", __func__));
1880 }
1881 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1882 ixgbe_rx_checksum(staterr, sendmp, ptype,
1883 &adapter->stats.pf);
1884 }
1885
1886 #if 0 /* FreeBSD */
1887 /*
1888 * In case of multiqueue, we have RXCSUM.PCSD bit set
1889 * and never cleared. This means we have RSS hash
1890 * available to be used.
1891 */
1892 if (adapter->num_queues > 1) {
1893 sendmp->m_pkthdr.flowid =
1894 le32toh(cur->wb.lower.hi_dword.rss);
1895 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1896 case IXGBE_RXDADV_RSSTYPE_IPV4:
1897 M_HASHTYPE_SET(sendmp,
1898 M_HASHTYPE_RSS_IPV4);
1899 break;
1900 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1901 M_HASHTYPE_SET(sendmp,
1902 M_HASHTYPE_RSS_TCP_IPV4);
1903 break;
1904 case IXGBE_RXDADV_RSSTYPE_IPV6:
1905 M_HASHTYPE_SET(sendmp,
1906 M_HASHTYPE_RSS_IPV6);
1907 break;
1908 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1909 M_HASHTYPE_SET(sendmp,
1910 M_HASHTYPE_RSS_TCP_IPV6);
1911 break;
1912 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1913 M_HASHTYPE_SET(sendmp,
1914 M_HASHTYPE_RSS_IPV6_EX);
1915 break;
1916 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1917 M_HASHTYPE_SET(sendmp,
1918 M_HASHTYPE_RSS_TCP_IPV6_EX);
1919 break;
1920 #if __FreeBSD_version > 1100000
1921 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1922 M_HASHTYPE_SET(sendmp,
1923 M_HASHTYPE_RSS_UDP_IPV4);
1924 break;
1925 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1926 M_HASHTYPE_SET(sendmp,
1927 M_HASHTYPE_RSS_UDP_IPV6);
1928 break;
1929 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1930 M_HASHTYPE_SET(sendmp,
1931 M_HASHTYPE_RSS_UDP_IPV6_EX);
1932 break;
1933 #endif
1934 default:
1935 M_HASHTYPE_SET(sendmp,
1936 M_HASHTYPE_OPAQUE_HASH);
1937 }
1938 } else {
1939 sendmp->m_pkthdr.flowid = que->msix;
1940 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1941 }
1942 #endif
1943 }
1944 next_desc:
1945 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1946 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1947
1948 /* Advance our pointers to the next descriptor. */
1949 if (++i == rxr->num_desc)
1950 i = 0;
1951
1952 /* Now send to the stack or do LRO */
1953 if (sendmp != NULL) {
1954 rxr->next_to_check = i;
1955 IXGBE_RX_UNLOCK(rxr);
1956 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1957 IXGBE_RX_LOCK(rxr);
1958 i = rxr->next_to_check;
1959 }
1960
1961 /* Every 8 descriptors we go to refresh mbufs */
1962 if (processed == 8) {
1963 ixgbe_refresh_mbufs(rxr, i);
1964 processed = 0;
1965 }
1966 }
1967
1968 /* Refresh any remaining buf structs */
1969 if (ixgbe_rx_unrefreshed(rxr))
1970 ixgbe_refresh_mbufs(rxr, i);
1971
1972 rxr->next_to_check = i;
1973
1974 IXGBE_RX_UNLOCK(rxr);
1975
1976 #ifdef LRO
1977 /*
1978 * Flush any outstanding LRO work
1979 */
1980 tcp_lro_flush_all(lro);
1981 #endif /* LRO */
1982
1983 /*
1984 * Still have cleaning to do?
1985 */
1986 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1987 return (TRUE);
1988
1989 return (FALSE);
1990 } /* ixgbe_rxeof */
1991
1992
1993 /************************************************************************
1994 * ixgbe_rx_checksum
1995 *
1996 * Verify that the hardware indicated that the checksum is valid.
1997 * Inform the stack about the status of checksum so that stack
1998 * doesn't spend time verifying the checksum.
1999 ************************************************************************/
2000 static void
2001 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2002 struct ixgbe_hw_stats *stats)
2003 {
2004 u16 status = (u16)staterr;
2005 u8 errors = (u8)(staterr >> 24);
2006 #if 0
2007 bool sctp = false;
2008
2009 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2010 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2011 sctp = true;
2012 #endif
2013
2014 /* IPv4 checksum */
2015 if (status & IXGBE_RXD_STAT_IPCS) {
2016 stats->ipcs.ev_count++;
2017 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2018 /* IP Checksum Good */
2019 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2020 } else {
2021 stats->ipcs_bad.ev_count++;
2022 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2023 }
2024 }
2025 /* TCP/UDP/SCTP checksum */
2026 if (status & IXGBE_RXD_STAT_L4CS) {
2027 stats->l4cs.ev_count++;
2028 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2029 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2030 mp->m_pkthdr.csum_flags |= type;
2031 } else {
2032 stats->l4cs_bad.ev_count++;
2033 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2034 }
2035 }
2036 } /* ixgbe_rx_checksum */
2037
2038 /************************************************************************
2039 * ixgbe_dma_malloc
2040 ************************************************************************/
2041 int
2042 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2043 struct ixgbe_dma_alloc *dma, const int mapflags)
2044 {
2045 device_t dev = adapter->dev;
2046 int r, rsegs;
2047
2048 r = ixgbe_dma_tag_create(
2049 /* parent */ adapter->osdep.dmat,
2050 /* alignment */ DBA_ALIGN,
2051 /* bounds */ 0,
2052 /* maxsize */ size,
2053 /* nsegments */ 1,
2054 /* maxsegsize */ size,
2055 /* flags */ BUS_DMA_ALLOCNOW,
2056 &dma->dma_tag);
2057 if (r != 0) {
2058 aprint_error_dev(dev,
2059 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2060 goto fail_0;
2061 }
2062
2063 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2064 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2065 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2066 if (r != 0) {
2067 aprint_error_dev(dev,
2068 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2069 goto fail_1;
2070 }
2071
2072 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2073 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2074 if (r != 0) {
2075 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2076 __func__, r);
2077 goto fail_2;
2078 }
2079
2080 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2081 if (r != 0) {
2082 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2083 __func__, r);
2084 goto fail_3;
2085 }
2086
2087 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2088 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2089 if (r != 0) {
2090 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2091 __func__, r);
2092 goto fail_4;
2093 }
2094 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2095 dma->dma_size = size;
2096 return 0;
2097 fail_4:
2098 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2099 fail_3:
2100 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2101 fail_2:
2102 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2103 fail_1:
2104 ixgbe_dma_tag_destroy(dma->dma_tag);
2105 fail_0:
2106
2107 return (r);
2108 } /* ixgbe_dma_malloc */
2109
2110 /************************************************************************
2111 * ixgbe_dma_free
2112 ************************************************************************/
2113 void
2114 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2115 {
2116 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2117 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2118 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2119 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2120 ixgbe_dma_tag_destroy(dma->dma_tag);
2121 } /* ixgbe_dma_free */
2122
2123
2124 /************************************************************************
2125 * ixgbe_allocate_queues
2126 *
2127 * Allocate memory for the transmit and receive rings, and then
2128 * the descriptors associated with each, called only once at attach.
2129 ************************************************************************/
2130 int
2131 ixgbe_allocate_queues(struct adapter *adapter)
2132 {
2133 device_t dev = adapter->dev;
2134 struct ix_queue *que;
2135 struct tx_ring *txr;
2136 struct rx_ring *rxr;
2137 int rsize, tsize, error = IXGBE_SUCCESS;
2138 int txconf = 0, rxconf = 0;
2139
2140 /* First, allocate the top level queue structs */
2141 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2142 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2143 if (adapter->queues == NULL) {
2144 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2145 error = ENOMEM;
2146 goto fail;
2147 }
2148
2149 /* Second, allocate the TX ring struct memory */
2150 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2151 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2152 if (adapter->tx_rings == NULL) {
2153 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2154 error = ENOMEM;
2155 goto tx_fail;
2156 }
2157
2158 /* Third, allocate the RX ring */
2159 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2160 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2161 if (adapter->rx_rings == NULL) {
2162 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2163 error = ENOMEM;
2164 goto rx_fail;
2165 }
2166
2167 /* For the ring itself */
2168 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2169 DBA_ALIGN);
2170
2171 /*
2172 * Now set up the TX queues, txconf is needed to handle the
2173 * possibility that things fail midcourse and we need to
2174 * undo memory gracefully
2175 */
2176 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2177 /* Set up some basics */
2178 txr = &adapter->tx_rings[i];
2179 txr->adapter = adapter;
2180 txr->txr_interq = NULL;
2181 /* In case SR-IOV is enabled, align the index properly */
2182 #ifdef PCI_IOV
2183 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2184 i);
2185 #else
2186 txr->me = i;
2187 #endif
2188 txr->num_desc = adapter->num_tx_desc;
2189
2190 /* Initialize the TX side lock */
2191 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2192 device_xname(dev), txr->me);
2193 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2194
2195 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2196 BUS_DMA_NOWAIT)) {
2197 aprint_error_dev(dev,
2198 "Unable to allocate TX Descriptor memory\n");
2199 error = ENOMEM;
2200 goto err_tx_desc;
2201 }
2202 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2203 bzero((void *)txr->tx_base, tsize);
2204
2205 /* Now allocate transmit buffers for the ring */
2206 if (ixgbe_allocate_transmit_buffers(txr)) {
2207 aprint_error_dev(dev,
2208 "Critical Failure setting up transmit buffers\n");
2209 error = ENOMEM;
2210 goto err_tx_desc;
2211 }
2212 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2213 /* Allocate a buf ring */
2214 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2215 if (txr->txr_interq == NULL) {
2216 aprint_error_dev(dev,
2217 "Critical Failure setting up buf ring\n");
2218 error = ENOMEM;
2219 goto err_tx_desc;
2220 }
2221 }
2222 }
2223
2224 /*
2225 * Next the RX queues...
2226 */
2227 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2228 DBA_ALIGN);
2229 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2230 rxr = &adapter->rx_rings[i];
2231 /* Set up some basics */
2232 rxr->adapter = adapter;
2233 #ifdef PCI_IOV
2234 /* In case SR-IOV is enabled, align the index properly */
2235 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2236 i);
2237 #else
2238 rxr->me = i;
2239 #endif
2240 rxr->num_desc = adapter->num_rx_desc;
2241
2242 /* Initialize the RX side lock */
2243 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2244 device_xname(dev), rxr->me);
2245 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2246
2247 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2248 BUS_DMA_NOWAIT)) {
2249 aprint_error_dev(dev,
2250 "Unable to allocate RxDescriptor memory\n");
2251 error = ENOMEM;
2252 goto err_rx_desc;
2253 }
2254 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2255 bzero((void *)rxr->rx_base, rsize);
2256
2257 /* Allocate receive buffers for the ring */
2258 if (ixgbe_allocate_receive_buffers(rxr)) {
2259 aprint_error_dev(dev,
2260 "Critical Failure setting up receive buffers\n");
2261 error = ENOMEM;
2262 goto err_rx_desc;
2263 }
2264 }
2265
2266 /*
2267 * Finally set up the queue holding structs
2268 */
2269 for (int i = 0; i < adapter->num_queues; i++) {
2270 que = &adapter->queues[i];
2271 que->adapter = adapter;
2272 que->me = i;
2273 que->txr = &adapter->tx_rings[i];
2274 que->rxr = &adapter->rx_rings[i];
2275 }
2276
2277 return (0);
2278
2279 err_rx_desc:
2280 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2281 ixgbe_dma_free(adapter, &rxr->rxdma);
2282 err_tx_desc:
2283 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2284 ixgbe_dma_free(adapter, &txr->txdma);
2285 free(adapter->rx_rings, M_DEVBUF);
2286 rx_fail:
2287 free(adapter->tx_rings, M_DEVBUF);
2288 tx_fail:
2289 free(adapter->queues, M_DEVBUF);
2290 fail:
2291 return (error);
2292 } /* ixgbe_allocate_queues */
2293