ixgbe_netmap.c revision 1.4 1 /* $NetBSD: ixgbe_netmap.c,v 1.4 2021/04/30 06:55:32 msaitoh Exp $ */
2 /******************************************************************************
3
4 Copyright (c) 2001-2017, Intel Corporation
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 3. Neither the name of the Intel Corporation nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
32
33 ******************************************************************************/
34 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe_netmap.c 320688 2017-07-05 17:27:03Z erj $*/
35
36 /*
37 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * $FreeBSD: head/sys/dev/ixgbe/ixgbe_netmap.c 320688 2017-07-05 17:27:03Z erj $
63 *
64 * netmap support for: ixgbe
65 *
66 * This file is meant to be a reference on how to implement
67 * netmap support for a network driver.
68 * This file contains code but only static or inline functions used
69 * by a single driver. To avoid replication of code we just #include
70 * it near the beginning of the standard driver.
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: ixgbe_netmap.c,v 1.4 2021/04/30 06:55:32 msaitoh Exp $");
75
76 #ifdef DEV_NETMAP
77 /*
78 * Some drivers may need the following headers. Others
79 * already include them by default
80
81 #include <vm/vm.h>
82 #include <vm/pmap.h>
83
84 */
85 #include "ixgbe.h"
86
87 /*
88 * device-specific sysctl variables:
89 *
90 * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
91 * During regular operations the CRC is stripped, but on some
92 * hardware reception of frames not multiple of 64 is slower,
93 * so using crcstrip=0 helps in benchmarks.
94 *
95 * ix_rx_miss, ix_rx_miss_bufs:
96 * count packets that might be missed due to lost interrupts.
97 */
98 SYSCTL_DECL(_dev_netmap);
99 static int ix_rx_miss, ix_rx_miss_bufs;
100 int ix_crcstrip;
101 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
102 CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
103 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
104 CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
105 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
106 CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs");
107
108
109 static void
110 set_crcstrip(struct ixgbe_hw *hw, int onoff)
111 {
112 /* crc stripping is set in two places:
113 * IXGBE_HLREG0 (modified on init_locked and hw reset)
114 * IXGBE_RDRXCTL (set by the original driver in
115 * ixgbe_setup_hw_rsc() called in init_locked.
116 * We disable the setting when netmap is compiled in).
117 * We update the values here, but also in ixgbe.c because
118 * init_locked sometimes is called outside our control.
119 */
120 uint32_t hl, rxc;
121
122 hl = IXGBE_READ_REG(hw, IXGBE_HLREG0);
123 rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
124 if (netmap_verbose)
125 D("%s read HLREG 0x%x rxc 0x%x",
126 onoff ? "enter" : "exit", hl, rxc);
127 /* hw requirements ... */
128 rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
129 rxc |= IXGBE_RDRXCTL_RSCACKC;
130 if (onoff && !ix_crcstrip) {
131 /* keep the crc. Fast rx */
132 hl &= ~IXGBE_HLREG0_RXCRCSTRP;
133 rxc &= ~IXGBE_RDRXCTL_CRCSTRIP;
134 } else {
135 /* reset default mode */
136 hl |= IXGBE_HLREG0_RXCRCSTRP;
137 rxc |= IXGBE_RDRXCTL_CRCSTRIP;
138 }
139 if (netmap_verbose)
140 D("%s write HLREG 0x%x rxc 0x%x",
141 onoff ? "enter" : "exit", hl, rxc);
142 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl);
143 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
144 }
145
146
147 /*
148 * Register/unregister. We are already under netmap lock.
149 * Only called on the first register or the last unregister.
150 */
151 static int
152 ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
153 {
154 struct ifnet *ifp = na->ifp;
155 struct adapter *adapter = ifp->if_softc;
156
157 IXGBE_CORE_LOCK(adapter);
158 adapter->stop_locked(adapter);
159
160 set_crcstrip(&adapter->hw, onoff);
161 /* enable or disable flags and callbacks in na and ifp */
162 if (onoff) {
163 nm_set_native_flags(na);
164 } else {
165 nm_clear_native_flags(na);
166 }
167 adapter->init_locked(adapter); /* also enables intr */
168 set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
169 IXGBE_CORE_UNLOCK(adapter);
170 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
171 }
172
173
174 /*
175 * Reconcile kernel and user view of the transmit ring.
176 *
177 * All information is in the kring.
178 * Userspace wants to send packets up to the one before kring->rhead,
179 * kernel knows kring->nr_hwcur is the first unsent packet.
180 *
181 * Here we push packets out (as many as possible), and possibly
182 * reclaim buffers from previously completed transmission.
183 *
184 * The caller (netmap) guarantees that there is only one instance
185 * running at any time. Any interference with other driver
186 * methods should be handled by the individual drivers.
187 */
188 static int
189 ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
190 {
191 struct netmap_adapter *na = kring->na;
192 struct ifnet *ifp = na->ifp;
193 struct netmap_ring *ring = kring->ring;
194 u_int nm_i; /* index into the netmap ring */
195 u_int nic_i; /* index into the NIC ring */
196 u_int n;
197 u_int const lim = kring->nkr_num_slots - 1;
198 u_int const head = kring->rhead;
199 /*
200 * interrupts on every tx packet are expensive so request
201 * them every half ring, or where NS_REPORT is set
202 */
203 u_int report_frequency = kring->nkr_num_slots >> 1;
204
205 /* device-specific */
206 struct adapter *adapter = ifp->if_softc;
207 struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
208 int reclaim_tx;
209
210 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
211 BUS_DMASYNC_POSTREAD);
212
213 /*
214 * First part: process new packets to send.
215 * nm_i is the current index in the netmap ring,
216 * nic_i is the corresponding index in the NIC ring.
217 * The two numbers differ because upon a *_init() we reset
218 * the NIC ring but leave the netmap ring unchanged.
219 * For the transmit ring, we have
220 *
221 * nm_i = kring->nr_hwcur
222 * nic_i = IXGBE_TDT (not tracked in the driver)
223 * and
224 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
225 *
226 * In this driver kring->nkr_hwofs >= 0, but for other
227 * drivers it might be negative as well.
228 */
229
230 /*
231 * If we have packets to send (kring->nr_hwcur != kring->rhead)
232 * iterate over the netmap ring, fetch length and update
233 * the corresponding slot in the NIC ring. Some drivers also
234 * need to update the buffer's physical address in the NIC slot
235 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
236 *
237 * The netmap_reload_map() calls is especially expensive,
238 * even when (as in this case) the tag is 0, so do only
239 * when the buffer has actually changed.
240 *
241 * If possible do not set the report/intr bit on all slots,
242 * but only a few times per ring or when NS_REPORT is set.
243 *
244 * Finally, on 10G and faster drivers, it might be useful
245 * to prefetch the next slot and txr entry.
246 */
247
248 nm_i = kring->nr_hwcur;
249 if (nm_i != head) { /* we have new packets to send */
250 nic_i = netmap_idx_k2n(kring, nm_i);
251
252 __builtin_prefetch(&ring->slot[nm_i]);
253 __builtin_prefetch(&txr->tx_buffers[nic_i]);
254
255 for (n = 0; nm_i != head; n++) {
256 struct netmap_slot *slot = &ring->slot[nm_i];
257 u_int len = slot->len;
258 uint64_t paddr;
259 void *addr = PNMB(na, slot, &paddr);
260
261 /* device-specific */
262 union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
263 struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
264 int flags = (slot->flags & NS_REPORT ||
265 nic_i == 0 || nic_i == report_frequency) ?
266 IXGBE_TXD_CMD_RS : 0;
267
268 /* prefetch for next round */
269 __builtin_prefetch(&ring->slot[nm_i + 1]);
270 __builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
271
272 NM_CHECK_ADDR_LEN(na, addr, len);
273
274 if (slot->flags & NS_BUF_CHANGED) {
275 /* buffer has changed, reload map */
276 netmap_reload_map(na, txr->txtag, txbuf->map, addr);
277 }
278 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
279
280 /* Fill the slot in the NIC ring. */
281 /* Use legacy descriptor, they are faster? */
282 curr->read.buffer_addr = htole64(paddr);
283 curr->read.olinfo_status = 0;
284 curr->read.cmd_type_len = htole32(len | flags |
285 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
286
287 /* make sure changes to the buffer are synced */
288 bus_dmamap_sync(txr->txtag, txbuf->map,
289 BUS_DMASYNC_PREWRITE);
290
291 nm_i = nm_next(nm_i, lim);
292 nic_i = nm_next(nic_i, lim);
293 }
294 kring->nr_hwcur = head;
295
296 /* synchronize the NIC ring */
297 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
298 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
299
300 /* (re)start the tx unit up to slot nic_i (excluded) */
301 IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i);
302 }
303
304 /*
305 * Second part: reclaim buffers for completed transmissions.
306 * Because this is expensive (we read a NIC register etc.)
307 * we only do it in specific cases (see below).
308 */
309 if (flags & NAF_FORCE_RECLAIM) {
310 reclaim_tx = 1; /* forced reclaim */
311 } else if (!nm_kr_txempty(kring)) {
312 reclaim_tx = 0; /* have buffers, no reclaim */
313 } else {
314 /*
315 * No buffers available. Locate previous slot with
316 * REPORT_STATUS set.
317 * If the slot has DD set, we can reclaim space,
318 * otherwise wait for the next interrupt.
319 * This enables interrupt moderation on the tx
320 * side though it might reduce throughput.
321 */
322 struct ixgbe_legacy_tx_desc *txd =
323 (struct ixgbe_legacy_tx_desc *)txr->tx_base;
324
325 nic_i = txr->next_to_clean + report_frequency;
326 if (nic_i > lim)
327 nic_i -= lim + 1;
328 // round to the closest with dd set
329 nic_i = (nic_i < kring->nkr_num_slots / 4 ||
330 nic_i >= kring->nkr_num_slots*3/4) ?
331 0 : report_frequency;
332 reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
333 }
334 if (reclaim_tx) {
335 /*
336 * Record completed transmissions.
337 * We (re)use the driver's txr->next_to_clean to keep
338 * track of the most recently completed transmission.
339 *
340 * The datasheet discourages the use of TDH to find
341 * out the number of sent packets, but we only set
342 * REPORT_STATUS in a few slots so TDH is the only
343 * good way.
344 */
345 nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id));
346 if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
347 D("TDH wrap %d", nic_i);
348 nic_i -= kring->nkr_num_slots;
349 }
350 if (nic_i != txr->next_to_clean) {
351 /* some tx completed, increment avail */
352 txr->next_to_clean = nic_i;
353 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
354 }
355 }
356
357 return 0;
358 }
359
360
361 /*
362 * Reconcile kernel and user view of the receive ring.
363 * Same as for the txsync, this routine must be efficient.
364 * The caller guarantees a single invocations, but races against
365 * the rest of the driver should be handled here.
366 *
367 * On call, kring->rhead is the first packet that userspace wants
368 * to keep, and kring->rcur is the wakeup point.
369 * The kernel has previously reported packets up to kring->rtail.
370 *
371 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
372 * of whether or not we received an interrupt.
373 */
374 static int
375 ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
376 {
377 struct netmap_adapter *na = kring->na;
378 struct ifnet *ifp = na->ifp;
379 struct netmap_ring *ring = kring->ring;
380 u_int nm_i; /* index into the netmap ring */
381 u_int nic_i; /* index into the NIC ring */
382 u_int n;
383 u_int const lim = kring->nkr_num_slots - 1;
384 u_int const head = kring->rhead;
385 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
386
387 /* device-specific */
388 struct adapter *adapter = ifp->if_softc;
389 struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
390
391 if (head > lim)
392 return netmap_ring_reinit(kring);
393
394 /* XXX check sync modes */
395 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
396 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
397
398 /*
399 * First part: import newly received packets.
400 *
401 * nm_i is the index of the next free slot in the netmap ring,
402 * nic_i is the index of the next received packet in the NIC ring,
403 * and they may differ in case if_init() has been called while
404 * in netmap mode. For the receive ring we have
405 *
406 * nic_i = rxr->next_to_check;
407 * nm_i = kring->nr_hwtail (previous)
408 * and
409 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
410 *
411 * rxr->next_to_check is set to 0 on a ring reinit
412 */
413 if (netmap_no_pendintr || force_update) {
414 int crclen = (ix_crcstrip) ? 0 : 4;
415
416 nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
417 nm_i = netmap_idx_n2k(kring, nic_i);
418
419 for (n = 0; ; n++) {
420 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
421 uint32_t staterr = le32toh(curr->wb.upper.status_error);
422
423 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
424 break;
425 ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
426 ring->slot[nm_i].flags = 0;
427 bus_dmamap_sync(rxr->ptag,
428 rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
429 nm_i = nm_next(nm_i, lim);
430 nic_i = nm_next(nic_i, lim);
431 }
432 if (n) { /* update the state variables */
433 if (netmap_no_pendintr && !force_update) {
434 /* diagnostics */
435 ix_rx_miss ++;
436 ix_rx_miss_bufs += n;
437 }
438 rxr->next_to_check = nic_i;
439 kring->nr_hwtail = nm_i;
440 }
441 kring->nr_kflags &= ~NKR_PENDINTR;
442 }
443
444 /*
445 * Second part: skip past packets that userspace has released.
446 * (kring->nr_hwcur to kring->rhead excluded),
447 * and make the buffers available for reception.
448 * As usual nm_i is the index in the netmap ring,
449 * nic_i is the index in the NIC ring, and
450 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
451 */
452 nm_i = kring->nr_hwcur;
453 if (nm_i != head) {
454 nic_i = netmap_idx_k2n(kring, nm_i);
455 for (n = 0; nm_i != head; n++) {
456 struct netmap_slot *slot = &ring->slot[nm_i];
457 uint64_t paddr;
458 void *addr = PNMB(na, slot, &paddr);
459
460 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
461 struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
462
463 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
464 goto ring_reset;
465
466 if (slot->flags & NS_BUF_CHANGED) {
467 /* buffer has changed, reload map */
468 netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
469 slot->flags &= ~NS_BUF_CHANGED;
470 }
471 curr->wb.upper.status_error = 0;
472 curr->read.pkt_addr = htole64(paddr);
473 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
474 BUS_DMASYNC_PREREAD);
475 nm_i = nm_next(nm_i, lim);
476 nic_i = nm_next(nic_i, lim);
477 }
478 kring->nr_hwcur = head;
479
480 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
481 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
482 /*
483 * IMPORTANT: we must leave one free slot in the ring,
484 * so move nic_i back by one unit
485 */
486 nic_i = nm_prev(nic_i, lim);
487 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i);
488 }
489
490 return 0;
491
492 ring_reset:
493 return netmap_ring_reinit(kring);
494 }
495
496
497 /*
498 * The attach routine, called near the end of ixgbe_attach(),
499 * fills the parameters for netmap_attach() and calls it.
500 * It cannot fail, in the worst case (such as no memory)
501 * netmap mode will be disabled and the driver will only
502 * operate in standard mode.
503 */
504 void
505 ixgbe_netmap_attach(struct adapter *adapter)
506 {
507 struct netmap_adapter na;
508
509 bzero(&na, sizeof(na));
510
511 na.ifp = adapter->ifp;
512 na.na_flags = NAF_BDG_MAYSLEEP;
513 na.num_tx_desc = adapter->num_tx_desc;
514 na.num_rx_desc = adapter->num_rx_desc;
515 na.nm_txsync = ixgbe_netmap_txsync;
516 na.nm_rxsync = ixgbe_netmap_rxsync;
517 na.nm_register = ixgbe_netmap_reg;
518 na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
519 netmap_attach(&na);
520 }
521
522 #endif /* DEV_NETMAP */
523
524 /* end of file */
525