ixgbe_netmap.c revision 1.3 1 /* $NetBSD: ixgbe_netmap.c,v 1.3 2021/04/30 06:41:36 msaitoh Exp $ */
2 /******************************************************************************
3
4 Copyright (c) 2001-2017, Intel Corporation
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 3. Neither the name of the Intel Corporation nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
32
33 ******************************************************************************/
34 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe_netmap.c 320688 2017-07-05 17:27:03Z erj $*/
35
36 /*
37 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * $FreeBSD: head/sys/dev/ixgbe/ixgbe_netmap.c 320688 2017-07-05 17:27:03Z erj $
63 *
64 * netmap support for: ixgbe
65 *
66 * This file is meant to be a reference on how to implement
67 * netmap support for a network driver.
68 * This file contains code but only static or inline functions used
69 * by a single driver. To avoid replication of code we just #include
70 * it near the beginning of the standard driver.
71 */
72
73 #ifdef DEV_NETMAP
74 /*
75 * Some drivers may need the following headers. Others
76 * already include them by default
77
78 #include <vm/vm.h>
79 #include <vm/pmap.h>
80
81 */
82 #include "ixgbe.h"
83
84 /*
85 * device-specific sysctl variables:
86 *
87 * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
88 * During regular operations the CRC is stripped, but on some
89 * hardware reception of frames not multiple of 64 is slower,
90 * so using crcstrip=0 helps in benchmarks.
91 *
92 * ix_rx_miss, ix_rx_miss_bufs:
93 * count packets that might be missed due to lost interrupts.
94 */
95 SYSCTL_DECL(_dev_netmap);
96 static int ix_rx_miss, ix_rx_miss_bufs;
97 int ix_crcstrip;
98 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
99 CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
100 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
101 CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
102 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
103 CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs");
104
105
106 static void
107 set_crcstrip(struct ixgbe_hw *hw, int onoff)
108 {
109 /* crc stripping is set in two places:
110 * IXGBE_HLREG0 (modified on init_locked and hw reset)
111 * IXGBE_RDRXCTL (set by the original driver in
112 * ixgbe_setup_hw_rsc() called in init_locked.
113 * We disable the setting when netmap is compiled in).
114 * We update the values here, but also in ixgbe.c because
115 * init_locked sometimes is called outside our control.
116 */
117 uint32_t hl, rxc;
118
119 hl = IXGBE_READ_REG(hw, IXGBE_HLREG0);
120 rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
121 if (netmap_verbose)
122 D("%s read HLREG 0x%x rxc 0x%x",
123 onoff ? "enter" : "exit", hl, rxc);
124 /* hw requirements ... */
125 rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
126 rxc |= IXGBE_RDRXCTL_RSCACKC;
127 if (onoff && !ix_crcstrip) {
128 /* keep the crc. Fast rx */
129 hl &= ~IXGBE_HLREG0_RXCRCSTRP;
130 rxc &= ~IXGBE_RDRXCTL_CRCSTRIP;
131 } else {
132 /* reset default mode */
133 hl |= IXGBE_HLREG0_RXCRCSTRP;
134 rxc |= IXGBE_RDRXCTL_CRCSTRIP;
135 }
136 if (netmap_verbose)
137 D("%s write HLREG 0x%x rxc 0x%x",
138 onoff ? "enter" : "exit", hl, rxc);
139 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl);
140 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
141 }
142
143
144 /*
145 * Register/unregister. We are already under netmap lock.
146 * Only called on the first register or the last unregister.
147 */
148 static int
149 ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
150 {
151 struct ifnet *ifp = na->ifp;
152 struct adapter *adapter = ifp->if_softc;
153
154 IXGBE_CORE_LOCK(adapter);
155 adapter->stop_locked(adapter);
156
157 set_crcstrip(&adapter->hw, onoff);
158 /* enable or disable flags and callbacks in na and ifp */
159 if (onoff) {
160 nm_set_native_flags(na);
161 } else {
162 nm_clear_native_flags(na);
163 }
164 adapter->init_locked(adapter); /* also enables intr */
165 set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
166 IXGBE_CORE_UNLOCK(adapter);
167 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
168 }
169
170
171 /*
172 * Reconcile kernel and user view of the transmit ring.
173 *
174 * All information is in the kring.
175 * Userspace wants to send packets up to the one before kring->rhead,
176 * kernel knows kring->nr_hwcur is the first unsent packet.
177 *
178 * Here we push packets out (as many as possible), and possibly
179 * reclaim buffers from previously completed transmission.
180 *
181 * The caller (netmap) guarantees that there is only one instance
182 * running at any time. Any interference with other driver
183 * methods should be handled by the individual drivers.
184 */
185 static int
186 ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
187 {
188 struct netmap_adapter *na = kring->na;
189 struct ifnet *ifp = na->ifp;
190 struct netmap_ring *ring = kring->ring;
191 u_int nm_i; /* index into the netmap ring */
192 u_int nic_i; /* index into the NIC ring */
193 u_int n;
194 u_int const lim = kring->nkr_num_slots - 1;
195 u_int const head = kring->rhead;
196 /*
197 * interrupts on every tx packet are expensive so request
198 * them every half ring, or where NS_REPORT is set
199 */
200 u_int report_frequency = kring->nkr_num_slots >> 1;
201
202 /* device-specific */
203 struct adapter *adapter = ifp->if_softc;
204 struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
205 int reclaim_tx;
206
207 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
208 BUS_DMASYNC_POSTREAD);
209
210 /*
211 * First part: process new packets to send.
212 * nm_i is the current index in the netmap ring,
213 * nic_i is the corresponding index in the NIC ring.
214 * The two numbers differ because upon a *_init() we reset
215 * the NIC ring but leave the netmap ring unchanged.
216 * For the transmit ring, we have
217 *
218 * nm_i = kring->nr_hwcur
219 * nic_i = IXGBE_TDT (not tracked in the driver)
220 * and
221 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
222 *
223 * In this driver kring->nkr_hwofs >= 0, but for other
224 * drivers it might be negative as well.
225 */
226
227 /*
228 * If we have packets to send (kring->nr_hwcur != kring->rhead)
229 * iterate over the netmap ring, fetch length and update
230 * the corresponding slot in the NIC ring. Some drivers also
231 * need to update the buffer's physical address in the NIC slot
232 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
233 *
234 * The netmap_reload_map() calls is especially expensive,
235 * even when (as in this case) the tag is 0, so do only
236 * when the buffer has actually changed.
237 *
238 * If possible do not set the report/intr bit on all slots,
239 * but only a few times per ring or when NS_REPORT is set.
240 *
241 * Finally, on 10G and faster drivers, it might be useful
242 * to prefetch the next slot and txr entry.
243 */
244
245 nm_i = kring->nr_hwcur;
246 if (nm_i != head) { /* we have new packets to send */
247 nic_i = netmap_idx_k2n(kring, nm_i);
248
249 __builtin_prefetch(&ring->slot[nm_i]);
250 __builtin_prefetch(&txr->tx_buffers[nic_i]);
251
252 for (n = 0; nm_i != head; n++) {
253 struct netmap_slot *slot = &ring->slot[nm_i];
254 u_int len = slot->len;
255 uint64_t paddr;
256 void *addr = PNMB(na, slot, &paddr);
257
258 /* device-specific */
259 union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
260 struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
261 int flags = (slot->flags & NS_REPORT ||
262 nic_i == 0 || nic_i == report_frequency) ?
263 IXGBE_TXD_CMD_RS : 0;
264
265 /* prefetch for next round */
266 __builtin_prefetch(&ring->slot[nm_i + 1]);
267 __builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
268
269 NM_CHECK_ADDR_LEN(na, addr, len);
270
271 if (slot->flags & NS_BUF_CHANGED) {
272 /* buffer has changed, reload map */
273 netmap_reload_map(na, txr->txtag, txbuf->map, addr);
274 }
275 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
276
277 /* Fill the slot in the NIC ring. */
278 /* Use legacy descriptor, they are faster? */
279 curr->read.buffer_addr = htole64(paddr);
280 curr->read.olinfo_status = 0;
281 curr->read.cmd_type_len = htole32(len | flags |
282 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
283
284 /* make sure changes to the buffer are synced */
285 bus_dmamap_sync(txr->txtag, txbuf->map,
286 BUS_DMASYNC_PREWRITE);
287
288 nm_i = nm_next(nm_i, lim);
289 nic_i = nm_next(nic_i, lim);
290 }
291 kring->nr_hwcur = head;
292
293 /* synchronize the NIC ring */
294 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
295 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
296
297 /* (re)start the tx unit up to slot nic_i (excluded) */
298 IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i);
299 }
300
301 /*
302 * Second part: reclaim buffers for completed transmissions.
303 * Because this is expensive (we read a NIC register etc.)
304 * we only do it in specific cases (see below).
305 */
306 if (flags & NAF_FORCE_RECLAIM) {
307 reclaim_tx = 1; /* forced reclaim */
308 } else if (!nm_kr_txempty(kring)) {
309 reclaim_tx = 0; /* have buffers, no reclaim */
310 } else {
311 /*
312 * No buffers available. Locate previous slot with
313 * REPORT_STATUS set.
314 * If the slot has DD set, we can reclaim space,
315 * otherwise wait for the next interrupt.
316 * This enables interrupt moderation on the tx
317 * side though it might reduce throughput.
318 */
319 struct ixgbe_legacy_tx_desc *txd =
320 (struct ixgbe_legacy_tx_desc *)txr->tx_base;
321
322 nic_i = txr->next_to_clean + report_frequency;
323 if (nic_i > lim)
324 nic_i -= lim + 1;
325 // round to the closest with dd set
326 nic_i = (nic_i < kring->nkr_num_slots / 4 ||
327 nic_i >= kring->nkr_num_slots*3/4) ?
328 0 : report_frequency;
329 reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
330 }
331 if (reclaim_tx) {
332 /*
333 * Record completed transmissions.
334 * We (re)use the driver's txr->next_to_clean to keep
335 * track of the most recently completed transmission.
336 *
337 * The datasheet discourages the use of TDH to find
338 * out the number of sent packets, but we only set
339 * REPORT_STATUS in a few slots so TDH is the only
340 * good way.
341 */
342 nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id));
343 if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
344 D("TDH wrap %d", nic_i);
345 nic_i -= kring->nkr_num_slots;
346 }
347 if (nic_i != txr->next_to_clean) {
348 /* some tx completed, increment avail */
349 txr->next_to_clean = nic_i;
350 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
351 }
352 }
353
354 return 0;
355 }
356
357
358 /*
359 * Reconcile kernel and user view of the receive ring.
360 * Same as for the txsync, this routine must be efficient.
361 * The caller guarantees a single invocations, but races against
362 * the rest of the driver should be handled here.
363 *
364 * On call, kring->rhead is the first packet that userspace wants
365 * to keep, and kring->rcur is the wakeup point.
366 * The kernel has previously reported packets up to kring->rtail.
367 *
368 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
369 * of whether or not we received an interrupt.
370 */
371 static int
372 ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
373 {
374 struct netmap_adapter *na = kring->na;
375 struct ifnet *ifp = na->ifp;
376 struct netmap_ring *ring = kring->ring;
377 u_int nm_i; /* index into the netmap ring */
378 u_int nic_i; /* index into the NIC ring */
379 u_int n;
380 u_int const lim = kring->nkr_num_slots - 1;
381 u_int const head = kring->rhead;
382 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
383
384 /* device-specific */
385 struct adapter *adapter = ifp->if_softc;
386 struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
387
388 if (head > lim)
389 return netmap_ring_reinit(kring);
390
391 /* XXX check sync modes */
392 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
393 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
394
395 /*
396 * First part: import newly received packets.
397 *
398 * nm_i is the index of the next free slot in the netmap ring,
399 * nic_i is the index of the next received packet in the NIC ring,
400 * and they may differ in case if_init() has been called while
401 * in netmap mode. For the receive ring we have
402 *
403 * nic_i = rxr->next_to_check;
404 * nm_i = kring->nr_hwtail (previous)
405 * and
406 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
407 *
408 * rxr->next_to_check is set to 0 on a ring reinit
409 */
410 if (netmap_no_pendintr || force_update) {
411 int crclen = (ix_crcstrip) ? 0 : 4;
412
413 nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
414 nm_i = netmap_idx_n2k(kring, nic_i);
415
416 for (n = 0; ; n++) {
417 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
418 uint32_t staterr = le32toh(curr->wb.upper.status_error);
419
420 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
421 break;
422 ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
423 ring->slot[nm_i].flags = 0;
424 bus_dmamap_sync(rxr->ptag,
425 rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
426 nm_i = nm_next(nm_i, lim);
427 nic_i = nm_next(nic_i, lim);
428 }
429 if (n) { /* update the state variables */
430 if (netmap_no_pendintr && !force_update) {
431 /* diagnostics */
432 ix_rx_miss ++;
433 ix_rx_miss_bufs += n;
434 }
435 rxr->next_to_check = nic_i;
436 kring->nr_hwtail = nm_i;
437 }
438 kring->nr_kflags &= ~NKR_PENDINTR;
439 }
440
441 /*
442 * Second part: skip past packets that userspace has released.
443 * (kring->nr_hwcur to kring->rhead excluded),
444 * and make the buffers available for reception.
445 * As usual nm_i is the index in the netmap ring,
446 * nic_i is the index in the NIC ring, and
447 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
448 */
449 nm_i = kring->nr_hwcur;
450 if (nm_i != head) {
451 nic_i = netmap_idx_k2n(kring, nm_i);
452 for (n = 0; nm_i != head; n++) {
453 struct netmap_slot *slot = &ring->slot[nm_i];
454 uint64_t paddr;
455 void *addr = PNMB(na, slot, &paddr);
456
457 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
458 struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
459
460 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
461 goto ring_reset;
462
463 if (slot->flags & NS_BUF_CHANGED) {
464 /* buffer has changed, reload map */
465 netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
466 slot->flags &= ~NS_BUF_CHANGED;
467 }
468 curr->wb.upper.status_error = 0;
469 curr->read.pkt_addr = htole64(paddr);
470 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
471 BUS_DMASYNC_PREREAD);
472 nm_i = nm_next(nm_i, lim);
473 nic_i = nm_next(nic_i, lim);
474 }
475 kring->nr_hwcur = head;
476
477 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
478 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
479 /*
480 * IMPORTANT: we must leave one free slot in the ring,
481 * so move nic_i back by one unit
482 */
483 nic_i = nm_prev(nic_i, lim);
484 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i);
485 }
486
487 return 0;
488
489 ring_reset:
490 return netmap_ring_reinit(kring);
491 }
492
493
494 /*
495 * The attach routine, called near the end of ixgbe_attach(),
496 * fills the parameters for netmap_attach() and calls it.
497 * It cannot fail, in the worst case (such as no memory)
498 * netmap mode will be disabled and the driver will only
499 * operate in standard mode.
500 */
501 void
502 ixgbe_netmap_attach(struct adapter *adapter)
503 {
504 struct netmap_adapter na;
505
506 bzero(&na, sizeof(na));
507
508 na.ifp = adapter->ifp;
509 na.na_flags = NAF_BDG_MAYSLEEP;
510 na.num_tx_desc = adapter->num_tx_desc;
511 na.num_rx_desc = adapter->num_rx_desc;
512 na.nm_txsync = ixgbe_netmap_txsync;
513 na.nm_rxsync = ixgbe_netmap_rxsync;
514 na.nm_register = ixgbe_netmap_reg;
515 na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
516 netmap_attach(&na);
517 }
518
519 #endif /* DEV_NETMAP */
520
521 /* end of file */
522