if_hvn.c revision 1.23 1 /* $NetBSD: if_hvn.c,v 1.23 2022/05/29 10:43:45 rin Exp $ */
2 /* $OpenBSD: if_hvn.c,v 1.39 2018/03/11 14:31:34 mikeb Exp $ */
3
4 /*-
5 * Copyright (c) 2009-2012,2016 Microsoft Corp.
6 * Copyright (c) 2010-2012 Citrix Inc.
7 * Copyright (c) 2012 NetApp Inc.
8 * Copyright (c) 2016 Mike Belopuhov <mike (at) esdenera.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice unmodified, this list of conditions, and the following
16 * disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * The OpenBSD port was done under funding by Esdenera Networks GmbH.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: if_hvn.c,v 1.23 2022/05/29 10:43:45 rin Exp $");
39
40 #ifdef _KERNEL_OPT
41 #include "opt_if_hvn.h"
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_net_mpsafe.h"
45 #endif
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/device.h>
51 #include <sys/bitops.h>
52 #include <sys/bus.h>
53 #include <sys/condvar.h>
54 #include <sys/cpu.h>
55 #include <sys/evcnt.h>
56 #include <sys/intr.h>
57 #include <sys/kmem.h>
58 #include <sys/kthread.h>
59 #include <sys/mutex.h>
60 #include <sys/pcq.h>
61 #include <sys/sysctl.h>
62 #include <sys/workqueue.h>
63
64 #include <net/if.h>
65 #include <net/if_ether.h>
66 #include <net/if_media.h>
67 #include <net/if_vlanvar.h>
68 #include <net/rss_config.h>
69 #include <netinet/in.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip6.h>
72 #include <netinet/udp.h>
73
74 #include <net/bpf.h>
75
76 #include <dev/ic/ndisreg.h>
77 #include <dev/ic/rndisreg.h>
78
79 #include <dev/hyperv/vmbusvar.h>
80 #include <dev/hyperv/if_hvnreg.h>
81
82 #ifndef EVL_PRIO_BITS
83 #define EVL_PRIO_BITS 13
84 #endif
85 #ifndef EVL_CFI_BITS
86 #define EVL_CFI_BITS 12
87 #endif
88
89 #define HVN_CHIM_SIZE (15 * 1024 * 1024)
90
91 #define HVN_NVS_MSGSIZE 32
92 #define HVN_NVS_BUFSIZE PAGE_SIZE
93
94 #define HVN_RING_BUFSIZE (128 * PAGE_SIZE)
95 #define HVN_RING_IDX2CPU(sc, idx) ((idx) % ncpu)
96
97 #ifndef HVN_CHANNEL_MAX_COUNT_DEFAULT
98 #define HVN_CHANNEL_MAX_COUNT_DEFAULT 8
99 #endif
100
101 #ifndef HVN_LINK_STATE_CHANGE_DELAY
102 #define HVN_LINK_STATE_CHANGE_DELAY 5000
103 #endif
104
105 #define HVN_WORKQUEUE_PRI PRI_SOFTNET
106
107 /*
108 * RNDIS control interface
109 */
110 #define HVN_RNDIS_CTLREQS 4
111 #define HVN_RNDIS_BUFSIZE 512
112
113 struct rndis_cmd {
114 uint32_t rc_id;
115 struct hvn_nvs_rndis rc_msg;
116 void *rc_req;
117 bus_dmamap_t rc_dmap;
118 bus_dma_segment_t rc_segs;
119 int rc_nsegs;
120 uint64_t rc_gpa;
121 struct rndis_packet_msg rc_cmp;
122 uint32_t rc_cmplen;
123 uint8_t rc_cmpbuf[HVN_RNDIS_BUFSIZE];
124 int rc_done;
125 TAILQ_ENTRY(rndis_cmd) rc_entry;
126 kmutex_t rc_lock;
127 kcondvar_t rc_cv;
128 };
129 TAILQ_HEAD(rndis_queue, rndis_cmd);
130
131 #define HVN_MTU_MIN 68
132 #define HVN_MTU_MAX (65535 - ETHER_ADDR_LEN)
133
134 #define HVN_RNDIS_XFER_SIZE 2048
135
136 #define HVN_NDIS_TXCSUM_CAP_IP4 \
137 (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
138 #define HVN_NDIS_TXCSUM_CAP_TCP4 \
139 (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
140 #define HVN_NDIS_TXCSUM_CAP_TCP6 \
141 (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
142 NDIS_TXCSUM_CAP_IP6EXT)
143 #define HVN_NDIS_TXCSUM_CAP_UDP6 \
144 (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
145 #define HVN_NDIS_LSOV2_CAP_IP6 \
146 (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
147
148 #define HVN_RNDIS_CMD_NORESP __BIT(0)
149
150 #define HVN_NVS_CMD_NORESP __BIT(0)
151
152 /*
153 * Tx ring
154 */
155 #define HVN_TX_DESC 512
156 #define HVN_TX_FRAGS 15 /* 31 is the max */
157 #define HVN_TX_FRAG_SIZE PAGE_SIZE
158 #define HVN_TX_PKT_SIZE 16384
159
160 #define HVN_RNDIS_PKT_LEN \
161 (sizeof(struct rndis_packet_msg) + \
162 sizeof(struct rndis_pktinfo) + NDIS_VLAN_INFO_SIZE + \
163 sizeof(struct rndis_pktinfo) + NDIS_TXCSUM_INFO_SIZE)
164
165 #define HVN_PKTSIZE_MIN(align) \
166 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
167 HVN_RNDIS_PKT_LEN, (align))
168 #define HVN_PKTSIZE(m, align) \
169 roundup2((m)->m_pkthdr.len + HVN_RNDIS_PKT_LEN, (align))
170
171 struct hvn_tx_desc {
172 uint32_t txd_id;
173 struct vmbus_gpa txd_sgl[HVN_TX_FRAGS + 1];
174 int txd_nsge;
175 struct mbuf *txd_buf;
176 bus_dmamap_t txd_dmap;
177 struct vmbus_gpa txd_gpa;
178 struct rndis_packet_msg *txd_req;
179 TAILQ_ENTRY(hvn_tx_desc) txd_entry;
180 u_int txd_refs;
181 uint32_t txd_flags;
182 #define HVN_TXD_FLAG_ONAGG __BIT(0)
183 #define HVN_TXD_FLAG_DMAMAP __BIT(1)
184 uint32_t txd_chim_index;
185 int txd_chim_size;
186 STAILQ_ENTRY(hvn_tx_desc) txd_agg_entry;
187 STAILQ_HEAD(, hvn_tx_desc) txd_agg_list;
188 };
189
190 struct hvn_softc;
191 struct hvn_rx_ring;
192
193 struct hvn_tx_ring {
194 struct hvn_softc *txr_softc;
195 struct vmbus_channel *txr_chan;
196 struct hvn_rx_ring *txr_rxr;
197 void *txr_si;
198 char txr_name[16];
199
200 int txr_id;
201 int txr_oactive;
202 int txr_suspended;
203 int txr_csum_assist;
204 uint64_t txr_caps_assist;
205 uint32_t txr_flags;
206 #define HVN_TXR_FLAG_UDP_HASH __BIT(0)
207
208 struct evcnt txr_evpkts;
209 struct evcnt txr_evsends;
210 struct evcnt txr_evnodesc;
211 struct evcnt txr_evdmafailed;
212 struct evcnt txr_evdefrag;
213 struct evcnt txr_evpcqdrop;
214 struct evcnt txr_evtransmitdefer;
215 struct evcnt txr_evflushfailed;
216 struct evcnt txr_evchimneytried;
217 struct evcnt txr_evchimney;
218 struct evcnt txr_evvlanfixup;
219 struct evcnt txr_evvlanhwtagging;
220 struct evcnt txr_evvlantap;
221
222 kmutex_t txr_lock;
223 pcq_t *txr_interq;
224
225 uint32_t txr_avail;
226 TAILQ_HEAD(, hvn_tx_desc) txr_list;
227 struct hvn_tx_desc txr_desc[HVN_TX_DESC];
228 uint8_t *txr_msgs;
229 struct hyperv_dma txr_dma;
230
231 int txr_chim_size;
232
233 /* Applied packet transmission aggregation limits. */
234 int txr_agg_szmax;
235 short txr_agg_pktmax;
236 short txr_agg_align;
237
238 /* Packet transmission aggregation states. */
239 struct hvn_tx_desc *txr_agg_txd;
240 int txr_agg_szleft;
241 short txr_agg_pktleft;
242 struct rndis_packet_msg *txr_agg_prevpkt;
243
244 /* Temporary stats for each sends. */
245 int txr_stat_pkts;
246 int txr_stat_size;
247 int txr_stat_mcasts;
248
249 int (*txr_sendpkt)(struct hvn_tx_ring *,
250 struct hvn_tx_desc *);
251 } __aligned(CACHE_LINE_SIZE);
252
253 struct hvn_rx_ring {
254 struct hvn_softc *rxr_softc;
255 struct vmbus_channel *rxr_chan;
256 struct hvn_tx_ring *rxr_txr;
257 void *rxr_si;
258 bool rxr_workqueue;
259 char rxr_name[16];
260
261 struct work rxr_wk;
262 volatile bool rxr_onlist;
263 volatile bool rxr_onproc;
264 kmutex_t rxr_onwork_lock;
265 kcondvar_t rxr_onwork_cv;
266
267 uint32_t rxr_flags;
268 #define HVN_RXR_FLAG_UDP_HASH __BIT(0)
269
270 kmutex_t rxr_lock;
271
272 struct evcnt rxr_evpkts;
273 struct evcnt rxr_evcsum_ip;
274 struct evcnt rxr_evcsum_tcp;
275 struct evcnt rxr_evcsum_udp;
276 struct evcnt rxr_evvlanhwtagging;
277 struct evcnt rxr_evintr;
278 struct evcnt rxr_evdefer;
279 struct evcnt rxr_evdeferreq;
280 struct evcnt rxr_evredeferreq;
281
282 /* NVS */
283 uint8_t *rxr_nvsbuf;
284 } __aligned(CACHE_LINE_SIZE);
285
286 struct hvn_softc {
287 device_t sc_dev;
288
289 struct vmbus_softc *sc_vmbus;
290 struct vmbus_channel *sc_prichan;
291 bus_dma_tag_t sc_dmat;
292
293 struct ethercom sc_ec;
294 struct ifmedia sc_media;
295 struct if_percpuq *sc_ipq;
296 struct workqueue *sc_wq;
297 bool sc_txrx_workqueue;
298 kmutex_t sc_core_lock;
299
300 kmutex_t sc_link_lock;
301 kcondvar_t sc_link_cv;
302 callout_t sc_link_tmout;
303 lwp_t *sc_link_lwp;
304 uint32_t sc_link_ev;
305 #define HVN_LINK_EV_STATE_CHANGE __BIT(0)
306 #define HVN_LINK_EV_NETWORK_CHANGE_TMOUT __BIT(1)
307 #define HVN_LINK_EV_NETWORK_CHANGE __BIT(2)
308 #define HVN_LINK_EV_RESUME_NETWORK __BIT(3)
309 #define HVN_LINK_EV_EXIT_THREAD __BIT(4)
310 int sc_link_state;
311 bool sc_link_onproc;
312 bool sc_link_pending;
313 bool sc_link_suspend;
314
315 int sc_tx_process_limit;
316 int sc_rx_process_limit;
317 int sc_tx_intr_process_limit;
318 int sc_rx_intr_process_limit;
319
320 struct sysctllog *sc_sysctllog;
321
322 uint32_t sc_caps;
323 #define HVN_CAPS_VLAN __BIT(0)
324 #define HVN_CAPS_MTU __BIT(1)
325 #define HVN_CAPS_IPCS __BIT(2)
326 #define HVN_CAPS_TCP4CS __BIT(3)
327 #define HVN_CAPS_TCP6CS __BIT(4)
328 #define HVN_CAPS_UDP4CS __BIT(5)
329 #define HVN_CAPS_UDP6CS __BIT(6)
330 #define HVN_CAPS_TSO4 __BIT(7)
331 #define HVN_CAPS_TSO6 __BIT(8)
332 #define HVN_CAPS_HASHVAL __BIT(9)
333 #define HVN_CAPS_UDPHASH __BIT(10)
334
335 uint32_t sc_flags;
336 #define HVN_SCF_ATTACHED __BIT(0)
337 #define HVN_SCF_RXBUF_CONNECTED __BIT(1)
338 #define HVN_SCF_CHIM_CONNECTED __BIT(2)
339 #define HVN_SCF_REVOKED __BIT(3)
340 #define HVN_SCF_HAS_RSSKEY __BIT(4)
341 #define HVN_SCF_HAS_RSSIND __BIT(5)
342
343 /* NVS protocol */
344 int sc_proto;
345 uint32_t sc_nvstid;
346 uint8_t sc_nvsrsp[HVN_NVS_MSGSIZE];
347 int sc_nvsdone;
348 kmutex_t sc_nvsrsp_lock;
349 kcondvar_t sc_nvsrsp_cv;
350
351 /* RNDIS protocol */
352 int sc_ndisver;
353 uint32_t sc_rndisrid;
354 int sc_tso_szmax;
355 int sc_tso_sgmin;
356 uint32_t sc_rndis_agg_size;
357 uint32_t sc_rndis_agg_pkts;
358 uint32_t sc_rndis_agg_align;
359 struct rndis_queue sc_cntl_sq; /* submission queue */
360 kmutex_t sc_cntl_sqlck;
361 struct rndis_queue sc_cntl_cq; /* completion queue */
362 kmutex_t sc_cntl_cqlck;
363 struct rndis_queue sc_cntl_fq; /* free queue */
364 kmutex_t sc_cntl_fqlck;
365 kcondvar_t sc_cntl_fqcv;
366 struct rndis_cmd sc_cntl_msgs[HVN_RNDIS_CTLREQS];
367 struct hvn_nvs_rndis sc_data_msg;
368
369 int sc_rss_ind_size;
370 uint32_t sc_rss_hash; /* setting, NDIS_HASH_ */
371 uint32_t sc_rss_hcap; /* caps, NDIS_HASH_ */
372 struct ndis_rssprm_toeplitz sc_rss;
373
374 /* Rx ring */
375 uint8_t *sc_rx_ring;
376 int sc_rx_size;
377 uint32_t sc_rx_hndl;
378 struct hyperv_dma sc_rx_dma;
379 struct hvn_rx_ring *sc_rxr;
380 int sc_nrxr;
381 int sc_nrxr_inuse;
382
383 /* Tx ring */
384 struct hvn_tx_ring *sc_txr;
385 int sc_ntxr;
386 int sc_ntxr_inuse;
387
388 /* chimney sending buffers */
389 uint8_t *sc_chim;
390 uint32_t sc_chim_hndl;
391 struct hyperv_dma sc_chim_dma;
392 kmutex_t sc_chim_bmap_lock;
393 u_long *sc_chim_bmap;
394 int sc_chim_bmap_cnt;
395 int sc_chim_cnt;
396 int sc_chim_szmax;
397
398 /* Packet transmission aggregation user settings. */
399 int sc_agg_size;
400 int sc_agg_pkts;
401 };
402
403 #define SC2IFP(_sc_) (&(_sc_)->sc_ec.ec_if)
404 #define IFP2SC(_ifp_) ((_ifp_)->if_softc)
405
406 #ifndef HVN_TX_PROCESS_LIMIT_DEFAULT
407 #define HVN_TX_PROCESS_LIMIT_DEFAULT 128
408 #endif
409 #ifndef HVN_RX_PROCESS_LIMIT_DEFAULT
410 #define HVN_RX_PROCESS_LIMIT_DEFAULT 128
411 #endif
412 #ifndef HVN_TX_INTR_PROCESS_LIMIT_DEFAULT
413 #define HVN_TX_INTR_PROCESS_LIMIT_DEFAULT 256
414 #endif
415 #ifndef HVN_RX_INTR_PROCESS_LIMIT_DEFAULT
416 #define HVN_RX_INTR_PROCESS_LIMIT_DEFAULT 256
417 #endif
418
419 /*
420 * See hvn_set_hlen().
421 *
422 * This value is for Azure. For Hyper-V, set this above
423 * 65536 to disable UDP datagram checksum fixup.
424 */
425 #ifndef HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT
426 #define HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT 1420
427 #endif
428 static int hvn_udpcs_fixup_mtu = HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT;
429
430 /* Limit chimney send size */
431 static int hvn_tx_chimney_size = 0;
432
433 /* # of channels to use; each channel has one RX ring and one TX ring */
434 #ifndef HVN_CHANNEL_COUNT_DEFAULT
435 #define HVN_CHANNEL_COUNT_DEFAULT 0
436 #endif
437 static int hvn_channel_cnt = HVN_CHANNEL_COUNT_DEFAULT;
438
439 /* # of transmit rings to use */
440 #ifndef HVN_TX_RING_COUNT_DEFAULT
441 #define HVN_TX_RING_COUNT_DEFAULT 0
442 #endif
443 static int hvn_tx_ring_cnt = HVN_TX_RING_COUNT_DEFAULT;
444
445 /* Packet transmission aggregation size limit */
446 static int hvn_tx_agg_size = -1;
447
448 /* Packet transmission aggregation count limit */
449 static int hvn_tx_agg_pkts = -1;
450
451 static int hvn_match(device_t, cfdata_t, void *);
452 static void hvn_attach(device_t, device_t, void *);
453 static int hvn_detach(device_t, int);
454
455 CFATTACH_DECL_NEW(hvn, sizeof(struct hvn_softc),
456 hvn_match, hvn_attach, hvn_detach, NULL);
457
458 static int hvn_ioctl(struct ifnet *, u_long, void *);
459 static int hvn_media_change(struct ifnet *);
460 static void hvn_media_status(struct ifnet *, struct ifmediareq *);
461 static void hvn_link_task(void *);
462 static void hvn_link_event(struct hvn_softc *, uint32_t);
463 static void hvn_link_netchg_tmout_cb(void *);
464 static int hvn_init(struct ifnet *);
465 static int hvn_init_locked(struct ifnet *);
466 static void hvn_stop(struct ifnet *, int);
467 static void hvn_stop_locked(struct ifnet *);
468 static void hvn_start(struct ifnet *);
469 static int hvn_transmit(struct ifnet *, struct mbuf *);
470 static void hvn_deferred_transmit(void *);
471 static int hvn_flush_txagg(struct hvn_tx_ring *);
472 static int hvn_encap(struct hvn_tx_ring *, struct hvn_tx_desc *,
473 struct mbuf *, int);
474 static int hvn_txpkt(struct hvn_tx_ring *, struct hvn_tx_desc *);
475 static void hvn_txeof(struct hvn_tx_ring *, uint64_t);
476 static int hvn_rx_ring_create(struct hvn_softc *, int);
477 static int hvn_rx_ring_destroy(struct hvn_softc *);
478 static void hvn_fixup_rx_data(struct hvn_softc *);
479 static int hvn_tx_ring_create(struct hvn_softc *, int);
480 static void hvn_tx_ring_destroy(struct hvn_softc *);
481 static void hvn_set_chim_size(struct hvn_softc *, int);
482 static uint32_t hvn_chim_alloc(struct hvn_softc *);
483 static void hvn_chim_free(struct hvn_softc *, uint32_t);
484 static void hvn_fixup_tx_data(struct hvn_softc *);
485 static struct mbuf *
486 hvn_set_hlen(struct mbuf *, int *);
487 static int hvn_txd_peek(struct hvn_tx_ring *);
488 static struct hvn_tx_desc *
489 hvn_txd_get(struct hvn_tx_ring *);
490 static void hvn_txd_put(struct hvn_tx_ring *, struct hvn_tx_desc *);
491 static void hvn_txd_gc(struct hvn_tx_ring *, struct hvn_tx_desc *);
492 static void hvn_txd_hold(struct hvn_tx_desc *);
493 static void hvn_txd_agg(struct hvn_tx_desc *, struct hvn_tx_desc *);
494 static int hvn_tx_ring_pending(struct hvn_tx_ring *);
495 static void hvn_tx_ring_qflush(struct hvn_softc *, struct hvn_tx_ring *);
496 static int hvn_get_rsscaps(struct hvn_softc *, int *);
497 static int hvn_set_rss(struct hvn_softc *, uint16_t);
498 static void hvn_fixup_rss_ind(struct hvn_softc *);
499 static int hvn_get_hwcaps(struct hvn_softc *, struct ndis_offload *);
500 static int hvn_set_capabilities(struct hvn_softc *, int);
501 static int hvn_get_lladdr(struct hvn_softc *, uint8_t *);
502 static void hvn_update_link_status(struct hvn_softc *);
503 static int hvn_get_mtu(struct hvn_softc *, uint32_t *);
504 static int hvn_channel_attach(struct hvn_softc *, struct vmbus_channel *);
505 static void hvn_channel_detach(struct hvn_softc *, struct vmbus_channel *);
506 static void hvn_channel_detach_all(struct hvn_softc *);
507 static int hvn_subchannel_attach(struct hvn_softc *);
508 static int hvn_synth_alloc_subchannels(struct hvn_softc *, int *);
509 static int hvn_synth_attachable(const struct hvn_softc *);
510 static int hvn_synth_attach(struct hvn_softc *, int);
511 static void hvn_synth_detach(struct hvn_softc *);
512 static void hvn_set_ring_inuse(struct hvn_softc *, int);
513 static void hvn_disable_rx(struct hvn_softc *);
514 static void hvn_drain_rxtx(struct hvn_softc *, int );
515 static void hvn_suspend_data(struct hvn_softc *);
516 static void hvn_suspend_mgmt(struct hvn_softc *);
517 static void hvn_suspend(struct hvn_softc *) __unused;
518 static void hvn_resume_tx(struct hvn_softc *, int);
519 static void hvn_resume_data(struct hvn_softc *);
520 static void hvn_resume_mgmt(struct hvn_softc *);
521 static void hvn_resume(struct hvn_softc *) __unused;
522 static void hvn_init_sysctls(struct hvn_softc *);
523
524 /* NSVP */
525 static int hvn_nvs_init(struct hvn_softc *);
526 static void hvn_nvs_destroy(struct hvn_softc *);
527 static int hvn_nvs_attach(struct hvn_softc *, int);
528 static int hvn_nvs_connect_rxbuf(struct hvn_softc *);
529 static int hvn_nvs_disconnect_rxbuf(struct hvn_softc *);
530 static int hvn_nvs_connect_chim(struct hvn_softc *);
531 static int hvn_nvs_disconnect_chim(struct hvn_softc *);
532 static void hvn_handle_ring_work(struct work *, void *);
533 static void hvn_nvs_softintr(void *);
534 static void hvn_nvs_intr(void *);
535 static void hvn_nvs_intr1(struct hvn_rx_ring *, int, int);
536 static int hvn_nvs_cmd(struct hvn_softc *, void *, size_t, uint64_t,
537 u_int);
538 static int hvn_nvs_ack(struct hvn_rx_ring *, uint64_t);
539 static void hvn_nvs_detach(struct hvn_softc *);
540 static int hvn_nvs_alloc_subchannels(struct hvn_softc *, int *);
541
542 /* RNDIS */
543 static int hvn_rndis_init(struct hvn_softc *);
544 static void hvn_rndis_destroy(struct hvn_softc *);
545 static int hvn_rndis_attach(struct hvn_softc *, int);
546 static int hvn_rndis_cmd(struct hvn_softc *, struct rndis_cmd *, u_int);
547 static int hvn_rndis_input(struct hvn_rx_ring *, uint64_t, void *);
548 static int hvn_rxeof(struct hvn_rx_ring *, uint8_t *, uint32_t);
549 static void hvn_rndis_complete(struct hvn_softc *, uint8_t *, uint32_t);
550 static int hvn_rndis_output_sgl(struct hvn_tx_ring *,
551 struct hvn_tx_desc *);
552 static int hvn_rndis_output_chim(struct hvn_tx_ring *,
553 struct hvn_tx_desc *);
554 static void hvn_rndis_status(struct hvn_softc *, uint8_t *, uint32_t);
555 static int hvn_rndis_query(struct hvn_softc *, uint32_t, void *, size_t *);
556 static int hvn_rndis_query2(struct hvn_softc *, uint32_t, const void *,
557 size_t, void *, size_t *, size_t);
558 static int hvn_rndis_set(struct hvn_softc *, uint32_t, void *, size_t);
559 static int hvn_rndis_open(struct hvn_softc *);
560 static int hvn_rndis_close(struct hvn_softc *);
561 static void hvn_rndis_detach(struct hvn_softc *);
562
563 static int
564 hvn_match(device_t parent, cfdata_t match, void *aux)
565 {
566 struct vmbus_attach_args *aa = aux;
567
568 if (memcmp(aa->aa_type, &hyperv_guid_network, sizeof(*aa->aa_type)))
569 return 0;
570 return 1;
571 }
572
573 static void
574 hvn_attach(device_t parent, device_t self, void *aux)
575 {
576 struct hvn_softc *sc = device_private(self);
577 struct vmbus_attach_args *aa = aux;
578 struct ifnet *ifp = SC2IFP(sc);
579 char xnamebuf[32];
580 uint8_t enaddr[ETHER_ADDR_LEN];
581 uint32_t mtu;
582 int tx_ring_cnt, ring_cnt;
583 int error;
584
585 sc->sc_dev = self;
586 sc->sc_vmbus = (struct vmbus_softc *)device_private(parent);
587 sc->sc_prichan = aa->aa_chan;
588 sc->sc_dmat = sc->sc_vmbus->sc_dmat;
589
590 aprint_naive("\n");
591 aprint_normal(": Hyper-V NetVSC\n");
592
593 sc->sc_txrx_workqueue = true;
594 sc->sc_tx_process_limit = HVN_TX_PROCESS_LIMIT_DEFAULT;
595 sc->sc_rx_process_limit = HVN_RX_PROCESS_LIMIT_DEFAULT;
596 sc->sc_tx_intr_process_limit = HVN_TX_INTR_PROCESS_LIMIT_DEFAULT;
597 sc->sc_rx_intr_process_limit = HVN_RX_INTR_PROCESS_LIMIT_DEFAULT;
598 sc->sc_agg_size = hvn_tx_agg_size;
599 sc->sc_agg_pkts = hvn_tx_agg_pkts;
600
601 mutex_init(&sc->sc_core_lock, MUTEX_DEFAULT, IPL_SOFTNET);
602 mutex_init(&sc->sc_link_lock, MUTEX_DEFAULT, IPL_NET);
603 cv_init(&sc->sc_link_cv, "hvnknkcv");
604 callout_init(&sc->sc_link_tmout, CALLOUT_MPSAFE);
605 callout_setfunc(&sc->sc_link_tmout, hvn_link_netchg_tmout_cb, sc);
606 if (kthread_create(PRI_NONE, KTHREAD_MUSTJOIN | KTHREAD_MPSAFE, NULL,
607 hvn_link_task, sc, &sc->sc_link_lwp, "%slink",
608 device_xname(self))) {
609 aprint_error_dev(self, "failed to create link thread\n");
610 return;
611 }
612
613 snprintf(xnamebuf, sizeof(xnamebuf), "%srxtx", device_xname(self));
614 if (workqueue_create(&sc->sc_wq, xnamebuf, hvn_handle_ring_work,
615 sc, HVN_WORKQUEUE_PRI, IPL_NET, WQ_PERCPU | WQ_MPSAFE)) {
616 aprint_error_dev(self, "failed to create workqueue\n");
617 sc->sc_wq = NULL;
618 goto destroy_link_thread;
619 }
620
621 ring_cnt = hvn_channel_cnt;
622 if (ring_cnt <= 0) {
623 ring_cnt = ncpu;
624 if (ring_cnt > HVN_CHANNEL_MAX_COUNT_DEFAULT)
625 ring_cnt = HVN_CHANNEL_MAX_COUNT_DEFAULT;
626 } else if (ring_cnt > ncpu)
627 ring_cnt = ncpu;
628
629 tx_ring_cnt = hvn_tx_ring_cnt;
630 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
631 tx_ring_cnt = ring_cnt;
632
633 if (hvn_tx_ring_create(sc, tx_ring_cnt)) {
634 aprint_error_dev(self, "failed to create Tx ring\n");
635 goto destroy_wq;
636 }
637
638 if (hvn_rx_ring_create(sc, ring_cnt)) {
639 aprint_error_dev(self, "failed to create Rx ring\n");
640 goto destroy_tx_ring;
641 }
642
643 strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
644 ifp->if_softc = sc;
645 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
646 ifp->if_extflags = IFEF_MPSAFE;
647 ifp->if_ioctl = hvn_ioctl;
648 ifp->if_start = hvn_start;
649 ifp->if_transmit = hvn_transmit;
650 ifp->if_init = hvn_init;
651 ifp->if_stop = hvn_stop;
652 ifp->if_baudrate = IF_Gbps(10);
653
654 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(HVN_TX_DESC - 1, IFQ_MAXLEN));
655 IFQ_SET_READY(&ifp->if_snd);
656
657 /* Initialize ifmedia structures. */
658 sc->sc_ec.ec_ifmedia = &sc->sc_media;
659 ifmedia_init_with_lock(&sc->sc_media, IFM_IMASK,
660 hvn_media_change, hvn_media_status, &sc->sc_core_lock);
661 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
662 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL);
663 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T, 0, NULL);
664 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
665
666 if_initialize(ifp);
667 sc->sc_ipq = if_percpuq_create(ifp);
668 if_deferred_start_init(ifp, NULL);
669
670 hvn_nvs_init(sc);
671 hvn_rndis_init(sc);
672 if (hvn_synth_attach(sc, ETHERMTU)) {
673 aprint_error_dev(self, "failed to attach synth\n");
674 goto destroy_if_percpuq;
675 }
676
677 aprint_normal_dev(self, "NVS %d.%d NDIS %d.%d\n",
678 sc->sc_proto >> 16, sc->sc_proto & 0xffff,
679 sc->sc_ndisver >> 16 , sc->sc_ndisver & 0xffff);
680
681 if (hvn_get_lladdr(sc, enaddr)) {
682 aprint_error_dev(self,
683 "failed to obtain an ethernet address\n");
684 goto detach_synth;
685 }
686 aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(enaddr));
687
688 /*
689 * Fixup TX/RX stuffs after synthetic parts are attached.
690 */
691 hvn_fixup_tx_data(sc);
692 hvn_fixup_rx_data(sc);
693
694 ifp->if_capabilities |= sc->sc_txr[0].txr_caps_assist &
695 (IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
696 IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
697 IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
698 IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
699 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx);
700 /* XXX TSOv4, TSOv6 */
701 if (sc->sc_caps & HVN_CAPS_VLAN) {
702 /* XXX not sure about VLAN_MTU. */
703 sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_HWTAGGING;
704 sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_MTU;
705 }
706 sc->sc_ec.ec_capabilities |= ETHERCAP_JUMBO_MTU;
707
708 ether_ifattach(ifp, enaddr);
709
710 error = hvn_get_mtu(sc, &mtu);
711 if (error)
712 mtu = ETHERMTU;
713 if (mtu < ETHERMTU) {
714 DPRINTF("%s: fixup mtu %u -> %u\n", device_xname(sc->sc_dev),
715 ETHERMTU, mtu);
716 ifp->if_mtu = mtu;
717 }
718
719 if_register(ifp);
720
721 /*
722 * Kick off link status check.
723 */
724 hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE);
725
726 hvn_init_sysctls(sc);
727
728 if (pmf_device_register(self, NULL, NULL))
729 pmf_class_network_register(self, ifp);
730 else
731 aprint_error_dev(self, "couldn't establish power handler\n");
732
733 SET(sc->sc_flags, HVN_SCF_ATTACHED);
734 return;
735
736 detach_synth:
737 hvn_synth_detach(sc);
738 hvn_rndis_destroy(sc);
739 hvn_nvs_destroy(sc);
740 destroy_if_percpuq:
741 if_percpuq_destroy(sc->sc_ipq);
742 hvn_rx_ring_destroy(sc);
743 destroy_tx_ring:
744 hvn_tx_ring_destroy(sc);
745 destroy_wq:
746 workqueue_destroy(sc->sc_wq);
747 sc->sc_wq = NULL;
748 destroy_link_thread:
749 hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD);
750 kthread_join(sc->sc_link_lwp);
751 callout_destroy(&sc->sc_link_tmout);
752 cv_destroy(&sc->sc_link_cv);
753 mutex_destroy(&sc->sc_link_lock);
754 mutex_destroy(&sc->sc_core_lock);
755 }
756
757 static int
758 hvn_detach(device_t self, int flags)
759 {
760 struct hvn_softc *sc = device_private(self);
761 struct ifnet *ifp = SC2IFP(sc);
762
763 if (!ISSET(sc->sc_flags, HVN_SCF_ATTACHED))
764 return 0;
765
766 if (vmbus_channel_is_revoked(sc->sc_prichan))
767 SET(sc->sc_flags, HVN_SCF_REVOKED);
768
769 pmf_device_deregister(self);
770
771 mutex_enter(&sc->sc_core_lock);
772
773 if (ifp->if_flags & IFF_RUNNING)
774 hvn_stop_locked(ifp);
775 /*
776 * NOTE:
777 * hvn_stop() only suspends data, so managment
778 * stuffs have to be suspended manually here.
779 */
780 hvn_suspend_mgmt(sc);
781
782 ether_ifdetach(ifp);
783 if_detach(ifp);
784 if_percpuq_destroy(sc->sc_ipq);
785
786 hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD);
787 kthread_join(sc->sc_link_lwp);
788 callout_halt(&sc->sc_link_tmout, NULL);
789
790 hvn_synth_detach(sc);
791 hvn_rndis_destroy(sc);
792 hvn_nvs_destroy(sc);
793
794 mutex_exit(&sc->sc_core_lock);
795
796 hvn_rx_ring_destroy(sc);
797 hvn_tx_ring_destroy(sc);
798
799 workqueue_destroy(sc->sc_wq);
800 callout_destroy(&sc->sc_link_tmout);
801 cv_destroy(&sc->sc_link_cv);
802 mutex_destroy(&sc->sc_link_lock);
803 mutex_destroy(&sc->sc_core_lock);
804
805 sysctl_teardown(&sc->sc_sysctllog);
806
807 return 0;
808 }
809
810 static int
811 hvn_ioctl(struct ifnet *ifp, u_long command, void * data)
812 {
813 struct hvn_softc *sc = IFP2SC(ifp);
814 struct ifreq *ifr = (struct ifreq *)data;
815 uint32_t mtu;
816 int s, error = 0;
817
818 switch (command) {
819 case SIOCSIFMTU:
820 if (ifr->ifr_mtu < HVN_MTU_MIN || ifr->ifr_mtu > HVN_MTU_MAX) {
821 error = EINVAL;
822 break;
823 }
824
825 mutex_enter(&sc->sc_core_lock);
826
827 if (!(sc->sc_caps & HVN_CAPS_MTU)) {
828 /* Can't change MTU */
829 mutex_exit(&sc->sc_core_lock);
830 error = EOPNOTSUPP;
831 break;
832 }
833
834 if (ifp->if_mtu == ifr->ifr_mtu) {
835 mutex_exit(&sc->sc_core_lock);
836 break;
837 }
838
839 /*
840 * Suspend this interface before the synthetic parts
841 * are ripped.
842 */
843 hvn_suspend(sc);
844
845 /*
846 * Detach the synthetics parts, i.e. NVS and RNDIS.
847 */
848 hvn_synth_detach(sc);
849
850 /*
851 * Reattach the synthetic parts, i.e. NVS and RNDIS,
852 * with the new MTU setting.
853 */
854 error = hvn_synth_attach(sc, ifr->ifr_mtu);
855 if (error) {
856 mutex_exit(&sc->sc_core_lock);
857 break;
858 }
859
860 error = hvn_get_mtu(sc, &mtu);
861 if (error)
862 mtu = ifr->ifr_mtu;
863 DPRINTF("%s: RNDIS mtu=%d\n", device_xname(sc->sc_dev), mtu);
864
865 /*
866 * Commit the requested MTU, after the synthetic parts
867 * have been successfully attached.
868 */
869 if (mtu >= ifr->ifr_mtu) {
870 mtu = ifr->ifr_mtu;
871 } else {
872 DPRINTF("%s: fixup mtu %d -> %u\n",
873 device_xname(sc->sc_dev), ifr->ifr_mtu, mtu);
874 }
875 ifp->if_mtu = mtu;
876
877 /*
878 * Synthetic parts' reattach may change the chimney
879 * sending size; update it.
880 */
881 if (sc->sc_txr[0].txr_chim_size > sc->sc_chim_szmax)
882 hvn_set_chim_size(sc, sc->sc_chim_szmax);
883
884 /*
885 * All done! Resume the interface now.
886 */
887 hvn_resume(sc);
888
889 mutex_exit(&sc->sc_core_lock);
890 break;
891 default:
892 s = splnet();
893 if (command == SIOCGIFMEDIA || command == SIOCSIFMEDIA)
894 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command);
895 else
896 error = ether_ioctl(ifp, command, data);
897 splx(s);
898 if (error == ENETRESET) {
899 mutex_enter(&sc->sc_core_lock);
900 if (ifp->if_flags & IFF_RUNNING)
901 hvn_init_locked(ifp);
902 mutex_exit(&sc->sc_core_lock);
903 error = 0;
904 }
905 break;
906 }
907
908 return error;
909 }
910
911 static int
912 hvn_media_change(struct ifnet *ifp)
913 {
914 struct hvn_softc *sc = IFP2SC(ifp);
915 struct ifmedia *ifm = &sc->sc_media;
916
917 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
918 return EINVAL;
919
920 switch (IFM_SUBTYPE(ifm->ifm_media)) {
921 case IFM_AUTO:
922 break;
923 default:
924 device_printf(sc->sc_dev, "Only auto media type\n");
925 return EINVAL;
926 }
927 return 0;
928 }
929
930 static void
931 hvn_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
932 {
933 struct hvn_softc *sc = IFP2SC(ifp);
934
935 ifmr->ifm_status = IFM_AVALID;
936 ifmr->ifm_active = IFM_ETHER;
937
938 if (sc->sc_link_state != LINK_STATE_UP) {
939 ifmr->ifm_active |= IFM_NONE;
940 return;
941 }
942
943 ifmr->ifm_status |= IFM_ACTIVE;
944 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
945 }
946
947 static void
948 hvn_link_task(void *arg)
949 {
950 struct hvn_softc *sc = arg;
951 struct ifnet *ifp = SC2IFP(sc);
952 uint32_t event;
953 int old_link_state;
954
955 mutex_enter(&sc->sc_link_lock);
956 sc->sc_link_onproc = false;
957 for (;;) {
958 if (sc->sc_link_ev == 0) {
959 cv_wait(&sc->sc_link_cv, &sc->sc_link_lock);
960 continue;
961 }
962
963 sc->sc_link_onproc = true;
964 event = sc->sc_link_ev;
965 sc->sc_link_ev = 0;
966 mutex_exit(&sc->sc_link_lock);
967
968 if (event & HVN_LINK_EV_EXIT_THREAD)
969 break;
970
971 if (sc->sc_link_suspend)
972 goto next;
973
974 if (event & HVN_LINK_EV_RESUME_NETWORK) {
975 if (sc->sc_link_pending)
976 event |= HVN_LINK_EV_NETWORK_CHANGE;
977 else
978 event |= HVN_LINK_EV_STATE_CHANGE;
979 }
980
981 if (event & HVN_LINK_EV_NETWORK_CHANGE) {
982 /* Prevent any link status checks from running. */
983 sc->sc_link_pending = true;
984
985 /*
986 * Fake up a [link down --> link up] state change;
987 * 5 seconds delay is used, which closely simulates
988 * miibus reaction upon link down event.
989 */
990 old_link_state = sc->sc_link_state;
991 sc->sc_link_state = LINK_STATE_DOWN;
992 if (old_link_state != sc->sc_link_state) {
993 if_link_state_change(ifp, LINK_STATE_DOWN);
994 }
995 #if defined(HVN_LINK_STATE_CHANGE_DELAY) && HVN_LINK_STATE_CHANGE_DELAY > 0
996 callout_schedule(&sc->sc_link_tmout,
997 mstohz(HVN_LINK_STATE_CHANGE_DELAY));
998 #else
999 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT);
1000 #endif
1001 } else if (event & HVN_LINK_EV_NETWORK_CHANGE_TMOUT) {
1002 /* Re-allow link status checks. */
1003 sc->sc_link_pending = false;
1004 hvn_update_link_status(sc);
1005 } else if (event & HVN_LINK_EV_STATE_CHANGE) {
1006 if (!sc->sc_link_pending)
1007 hvn_update_link_status(sc);
1008 }
1009 next:
1010 mutex_enter(&sc->sc_link_lock);
1011 sc->sc_link_onproc = false;
1012 }
1013
1014 mutex_enter(&sc->sc_link_lock);
1015 sc->sc_link_onproc = false;
1016 mutex_exit(&sc->sc_link_lock);
1017
1018 kthread_exit(0);
1019 }
1020
1021 static void
1022 hvn_link_event(struct hvn_softc *sc, uint32_t ev)
1023 {
1024
1025 mutex_enter(&sc->sc_link_lock);
1026 SET(sc->sc_link_ev, ev);
1027 cv_signal(&sc->sc_link_cv);
1028 mutex_exit(&sc->sc_link_lock);
1029 }
1030
1031 static void
1032 hvn_link_netchg_tmout_cb(void *arg)
1033 {
1034 struct hvn_softc *sc = arg;
1035
1036 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT);
1037 }
1038
1039 static int
1040 hvn_init(struct ifnet *ifp)
1041 {
1042 struct hvn_softc *sc = IFP2SC(ifp);
1043 int error;
1044
1045 mutex_enter(&sc->sc_core_lock);
1046 error = hvn_init_locked(ifp);
1047 mutex_exit(&sc->sc_core_lock);
1048
1049 return error;
1050 }
1051
1052 static int
1053 hvn_init_locked(struct ifnet *ifp)
1054 {
1055 struct hvn_softc *sc = IFP2SC(ifp);
1056 int error;
1057
1058 KASSERT(mutex_owned(&sc->sc_core_lock));
1059
1060 hvn_stop_locked(ifp);
1061
1062 error = hvn_rndis_open(sc);
1063 if (error)
1064 return error;
1065
1066 /* Clear OACTIVE bit. */
1067 ifp->if_flags &= ~IFF_OACTIVE;
1068
1069 /* Clear TX 'suspended' bit. */
1070 hvn_resume_tx(sc, sc->sc_ntxr_inuse);
1071
1072 /* Everything is ready; unleash! */
1073 ifp->if_flags |= IFF_RUNNING;
1074
1075 return 0;
1076 }
1077
1078 static void
1079 hvn_stop(struct ifnet *ifp, int disable)
1080 {
1081 struct hvn_softc *sc = IFP2SC(ifp);
1082
1083 mutex_enter(&sc->sc_core_lock);
1084 hvn_stop_locked(ifp);
1085 mutex_exit(&sc->sc_core_lock);
1086 }
1087
1088 static void
1089 hvn_stop_locked(struct ifnet *ifp)
1090 {
1091 struct hvn_softc *sc = IFP2SC(ifp);
1092 int i;
1093
1094 KASSERT(mutex_owned(&sc->sc_core_lock));
1095
1096 /* Clear RUNNING bit ASAP. */
1097 ifp->if_flags &= ~IFF_RUNNING;
1098
1099 /* Suspend data transfers. */
1100 hvn_suspend_data(sc);
1101
1102 /* Clear OACTIVE bit. */
1103 ifp->if_flags &= ~IFF_OACTIVE;
1104 for (i = 0; i < sc->sc_ntxr_inuse; i++)
1105 sc->sc_txr[i].txr_oactive = 0;
1106 }
1107
1108 static void
1109 hvn_transmit_common(struct ifnet *ifp, struct hvn_tx_ring *txr,
1110 bool is_transmit)
1111 {
1112 struct hvn_tx_desc *txd;
1113 struct mbuf *m;
1114 int l2hlen = ETHER_HDR_LEN;
1115
1116 KASSERT(mutex_owned(&txr->txr_lock));
1117
1118 if (!(ifp->if_flags & IFF_RUNNING))
1119 return;
1120 if (!is_transmit && (ifp->if_flags & IFF_OACTIVE))
1121 return;
1122 if (txr->txr_oactive)
1123 return;
1124 if (txr->txr_suspended)
1125 return;
1126
1127 for (;;) {
1128 if (!hvn_txd_peek(txr)) {
1129 /* transient */
1130 if (!is_transmit)
1131 ifp->if_flags |= IFF_OACTIVE;
1132 txr->txr_oactive = 1;
1133 txr->txr_evnodesc.ev_count++;
1134 break;
1135 }
1136
1137 if (is_transmit)
1138 m = pcq_get(txr->txr_interq);
1139 else
1140 IFQ_DEQUEUE(&ifp->if_snd, m);
1141 if (m == NULL)
1142 break;
1143
1144 #if defined(INET) || defined(INET6)
1145 if (m->m_pkthdr.csum_flags &
1146 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TCPv6|M_CSUM_UDPv6)) {
1147 m = hvn_set_hlen(m, &l2hlen);
1148 if (__predict_false(m == NULL)) {
1149 if_statinc(ifp, if_oerrors);
1150 continue;
1151 }
1152 }
1153 #endif
1154
1155 txd = hvn_txd_get(txr);
1156 if (hvn_encap(txr, txd, m, l2hlen)) {
1157 /* the chain is too large */
1158 if_statinc(ifp, if_oerrors);
1159 hvn_txd_put(txr, txd);
1160 m_freem(m);
1161 continue;
1162 }
1163
1164 if (txr->txr_agg_pktleft == 0) {
1165 if (txr->txr_agg_txd != NULL) {
1166 hvn_flush_txagg(txr);
1167 } else {
1168 if (hvn_txpkt(txr, txd)) {
1169 /* txd is freed, but m is not. */
1170 m_freem(m);
1171 if_statinc(ifp, if_oerrors);
1172 }
1173 }
1174 }
1175 }
1176
1177 /* Flush pending aggerated transmission. */
1178 if (txr->txr_agg_txd != NULL)
1179 hvn_flush_txagg(txr);
1180 }
1181
1182 static void
1183 hvn_start(struct ifnet *ifp)
1184 {
1185 struct hvn_softc *sc = IFP2SC(ifp);
1186 struct hvn_tx_ring *txr = &sc->sc_txr[0];
1187
1188 mutex_enter(&txr->txr_lock);
1189 hvn_transmit_common(ifp, txr, false);
1190 mutex_exit(&txr->txr_lock);
1191 }
1192
1193 static int
1194 hvn_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused)
1195 {
1196 struct hvn_softc *sc = IFP2SC(ifp);
1197 u_int cpu;
1198
1199 cpu = cpu_index(curcpu());
1200
1201 return cpu % sc->sc_ntxr_inuse;
1202 }
1203
1204 static int
1205 hvn_transmit(struct ifnet *ifp, struct mbuf *m)
1206 {
1207 struct hvn_softc *sc = IFP2SC(ifp);
1208 struct hvn_tx_ring *txr;
1209 int qid;
1210
1211 qid = hvn_select_txqueue(ifp, m);
1212 txr = &sc->sc_txr[qid];
1213
1214 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
1215 mutex_enter(&txr->txr_lock);
1216 txr->txr_evpcqdrop.ev_count++;
1217 mutex_exit(&txr->txr_lock);
1218 m_freem(m);
1219 return ENOBUFS;
1220 }
1221
1222 kpreempt_disable();
1223 softint_schedule(txr->txr_si);
1224 kpreempt_enable();
1225 return 0;
1226 }
1227
1228 static void
1229 hvn_deferred_transmit(void *arg)
1230 {
1231 struct hvn_tx_ring *txr = arg;
1232 struct hvn_softc *sc = txr->txr_softc;
1233 struct ifnet *ifp = SC2IFP(sc);
1234
1235 mutex_enter(&txr->txr_lock);
1236 txr->txr_evtransmitdefer.ev_count++;
1237 hvn_transmit_common(ifp, txr, true);
1238 mutex_exit(&txr->txr_lock);
1239 }
1240
1241 static inline char *
1242 hvn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1243 size_t datalen, uint32_t type)
1244 {
1245 struct rndis_pktinfo *pi;
1246 size_t pi_size = sizeof(*pi) + datalen;
1247 char *cp;
1248
1249 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <=
1250 pktsize);
1251
1252 cp = (char *)pkt + pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1253 pi = (struct rndis_pktinfo *)cp;
1254 pi->rm_size = pi_size;
1255 pi->rm_type = type;
1256 pi->rm_pktinfooffset = sizeof(*pi);
1257 pkt->rm_pktinfolen += pi_size;
1258 pkt->rm_dataoffset += pi_size;
1259 pkt->rm_len += pi_size;
1260
1261 return (char *)pi->rm_data;
1262 }
1263
1264 static struct mbuf *
1265 hvn_pullup_hdr(struct mbuf *m, int len)
1266 {
1267 struct mbuf *mn;
1268
1269 if (__predict_false(m->m_len < len)) {
1270 mn = m_pullup(m, len);
1271 if (mn == NULL)
1272 return NULL;
1273 m = mn;
1274 }
1275 return m;
1276 }
1277
1278 /*
1279 * NOTE: If this function failed, the m would be freed.
1280 */
1281 static struct mbuf *
1282 hvn_set_hlen(struct mbuf *m, int *l2hlenp)
1283 {
1284 const struct ether_header *eh;
1285 int l2hlen, off;
1286
1287 m = hvn_pullup_hdr(m, sizeof(*eh));
1288 if (m == NULL)
1289 return NULL;
1290
1291 eh = mtod(m, const struct ether_header *);
1292 if (eh->ether_type == ntohs(ETHERTYPE_VLAN))
1293 l2hlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1294 else
1295 l2hlen = ETHER_HDR_LEN;
1296
1297 #if defined(INET)
1298 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) {
1299 const struct ip *ip;
1300
1301 off = l2hlen + sizeof(*ip);
1302 m = hvn_pullup_hdr(m, off);
1303 if (m == NULL)
1304 return NULL;
1305
1306 ip = (struct ip *)((mtod(m, uint8_t *)) + off);
1307
1308 /*
1309 * UDP checksum offload does not work in Azure, if the
1310 * following conditions meet:
1311 * - sizeof(IP hdr + UDP hdr + payload) > 1420.
1312 * - IP_DF is not set in the IP hdr.
1313 *
1314 * Fallback to software checksum for these UDP datagrams.
1315 */
1316 if ((m->m_pkthdr.csum_flags & M_CSUM_UDPv4) &&
1317 m->m_pkthdr.len > hvn_udpcs_fixup_mtu + l2hlen &&
1318 !(ntohs(ip->ip_off) & IP_DF)) {
1319 uint16_t *csump;
1320
1321 off = l2hlen +
1322 M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
1323 m = hvn_pullup_hdr(m, off + sizeof(struct udphdr));
1324 if (m == NULL)
1325 return NULL;
1326
1327 csump = (uint16_t *)(mtod(m, uint8_t *) + off +
1328 M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data));
1329 *csump = cpu_in_cksum(m, m->m_pkthdr.len - off, off, 0);
1330 m->m_pkthdr.csum_flags &= ~M_CSUM_UDPv4;
1331 }
1332 }
1333 #endif /* INET */
1334 #if defined(INET) && defined(INET6)
1335 else
1336 #endif /* INET && INET6 */
1337 #if defined(INET6)
1338 {
1339 const struct ip6_hdr *ip6;
1340
1341 off = l2hlen + sizeof(*ip6);
1342 m = hvn_pullup_hdr(m, off);
1343 if (m == NULL)
1344 return NULL;
1345
1346 ip6 = (struct ip6_hdr *)((mtod(m, uint8_t *)) + l2hlen);
1347 if (ip6->ip6_nxt != IPPROTO_TCP &&
1348 ip6->ip6_nxt != IPPROTO_UDP) {
1349 m_freem(m);
1350 return NULL;
1351 }
1352 }
1353 #endif /* INET6 */
1354
1355 *l2hlenp = l2hlen;
1356
1357 return m;
1358 }
1359
1360 static int
1361 hvn_flush_txagg(struct hvn_tx_ring *txr)
1362 {
1363 struct hvn_softc *sc = txr->txr_softc;
1364 struct ifnet *ifp = SC2IFP(sc);
1365 struct hvn_tx_desc *txd;
1366 struct mbuf *m;
1367 int error, pkts;
1368
1369 txd = txr->txr_agg_txd;
1370 KASSERTMSG(txd != NULL, "no aggregate txdesc");
1371
1372 /*
1373 * Since hvn_txpkt() will reset this temporary stat, save
1374 * it now, so that oerrors can be updated properly, if
1375 * hvn_txpkt() ever fails.
1376 */
1377 pkts = txr->txr_stat_pkts;
1378
1379 /*
1380 * Since txd's mbuf will _not_ be freed upon hvn_txpkt()
1381 * failure, save it for later freeing, if hvn_txpkt() ever
1382 * fails.
1383 */
1384 m = txd->txd_buf;
1385 error = hvn_txpkt(txr, txd);
1386 if (__predict_false(error)) {
1387 /* txd is freed, but m is not. */
1388 m_freem(m);
1389 txr->txr_evflushfailed.ev_count++;
1390 if_statadd(ifp, if_oerrors, pkts);
1391 }
1392
1393 /* Reset all aggregation states. */
1394 txr->txr_agg_txd = NULL;
1395 txr->txr_agg_szleft = 0;
1396 txr->txr_agg_pktleft = 0;
1397 txr->txr_agg_prevpkt = NULL;
1398
1399 return error;
1400 }
1401
1402 static void *
1403 hvn_try_txagg(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, int pktsz)
1404 {
1405 struct hvn_softc *sc = txr->txr_softc;
1406 struct hvn_tx_desc *agg_txd;
1407 struct rndis_packet_msg *pkt;
1408 void *chim;
1409 int olen;
1410
1411 if (txr->txr_agg_txd != NULL) {
1412 if (txr->txr_agg_pktleft > 0 && txr->txr_agg_szleft > pktsz) {
1413 agg_txd = txr->txr_agg_txd;
1414 pkt = txr->txr_agg_prevpkt;
1415
1416 /*
1417 * Update the previous RNDIS packet's total length,
1418 * it can be increased due to the mandatory alignment
1419 * padding for this RNDIS packet. And update the
1420 * aggregating txdesc's chimney sending buffer size
1421 * accordingly.
1422 *
1423 * XXX
1424 * Zero-out the padding, as required by the RNDIS spec.
1425 */
1426 olen = pkt->rm_len;
1427 pkt->rm_len = roundup2(olen, txr->txr_agg_align);
1428 agg_txd->txd_chim_size += pkt->rm_len - olen;
1429
1430 /* Link this txdesc to the parent. */
1431 hvn_txd_agg(agg_txd, txd);
1432
1433 chim = (uint8_t *)pkt + pkt->rm_len;
1434 /* Save the current packet for later fixup. */
1435 txr->txr_agg_prevpkt = chim;
1436
1437 txr->txr_agg_pktleft--;
1438 txr->txr_agg_szleft -= pktsz;
1439 if (txr->txr_agg_szleft <=
1440 HVN_PKTSIZE_MIN(txr->txr_agg_align)) {
1441 /*
1442 * Probably can't aggregate more packets,
1443 * flush this aggregating txdesc proactively.
1444 */
1445 txr->txr_agg_pktleft = 0;
1446 }
1447
1448 /* Done! */
1449 return chim;
1450 }
1451 hvn_flush_txagg(txr);
1452 }
1453
1454 txr->txr_evchimneytried.ev_count++;
1455 txd->txd_chim_index = hvn_chim_alloc(sc);
1456 if (txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID)
1457 return NULL;
1458 txr->txr_evchimney.ev_count++;
1459
1460 chim = sc->sc_chim + (txd->txd_chim_index * sc->sc_chim_szmax);
1461
1462 if (txr->txr_agg_pktmax > 1 &&
1463 txr->txr_agg_szmax > pktsz + HVN_PKTSIZE_MIN(txr->txr_agg_align)) {
1464 txr->txr_agg_txd = txd;
1465 txr->txr_agg_pktleft = txr->txr_agg_pktmax - 1;
1466 txr->txr_agg_szleft = txr->txr_agg_szmax - pktsz;
1467 txr->txr_agg_prevpkt = chim;
1468 }
1469
1470 return chim;
1471 }
1472
1473 static int
1474 hvn_encap(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, struct mbuf *m,
1475 int l2hlen)
1476 {
1477 /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
1478 static const char zero_pad[ETHER_MIN_LEN];
1479 struct hvn_softc *sc = txr->txr_softc;
1480 struct rndis_packet_msg *pkt;
1481 bus_dma_segment_t *seg;
1482 void *chim = NULL;
1483 size_t pktlen, pktsize;
1484 int l3hlen;
1485 int i, rv;
1486
1487 if (ISSET(sc->sc_caps, HVN_CAPS_VLAN) && !vlan_has_tag(m)) {
1488 struct ether_vlan_header *evl;
1489
1490 m = hvn_pullup_hdr(m, sizeof(*evl));
1491 if (m == NULL) {
1492 DPRINTF("%s: failed to pullup mbuf\n",
1493 device_xname(sc->sc_dev));
1494 return -1;
1495 }
1496
1497 evl = mtod(m, struct ether_vlan_header *);
1498 if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1499 struct ether_header *eh;
1500 uint16_t proto = evl->evl_proto;
1501
1502 vlan_set_tag(m, ntohs(evl->evl_tag));
1503
1504 /*
1505 * Trim VLAN tag from header.
1506 */
1507 memmove((uint8_t *)evl + ETHER_VLAN_ENCAP_LEN,
1508 evl, ETHER_HDR_LEN);
1509 m_adj(m, ETHER_VLAN_ENCAP_LEN);
1510
1511 eh = mtod(m, struct ether_header *);
1512 eh->ether_type = proto;
1513
1514 /*
1515 * Re-padding. See sys/net/if_vlan.c:vlan_start().
1516 */
1517 if (m->m_pkthdr.len < (ETHER_MIN_LEN - ETHER_CRC_LEN +
1518 ETHER_VLAN_ENCAP_LEN)) {
1519 m_copyback(m, m->m_pkthdr.len,
1520 (ETHER_MIN_LEN - ETHER_CRC_LEN +
1521 ETHER_VLAN_ENCAP_LEN) -
1522 m->m_pkthdr.len, zero_pad);
1523 }
1524
1525 txr->txr_evvlanfixup.ev_count++;
1526 }
1527 }
1528
1529 pkt = txd->txd_req;
1530 pktsize = HVN_PKTSIZE(m, txr->txr_agg_align);
1531 if (pktsize < txr->txr_chim_size) {
1532 chim = hvn_try_txagg(txr, txd, pktsize);
1533 if (chim != NULL)
1534 pkt = chim;
1535 } else {
1536 if (txr->txr_agg_txd != NULL)
1537 hvn_flush_txagg(txr);
1538 }
1539
1540 memset(pkt, 0, HVN_RNDIS_PKT_LEN);
1541 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1542 pkt->rm_len = sizeof(*pkt) + m->m_pkthdr.len;
1543 pkt->rm_dataoffset = RNDIS_DATA_OFFSET;
1544 pkt->rm_datalen = m->m_pkthdr.len;
1545 pkt->rm_pktinfooffset = sizeof(*pkt); /* adjusted below */
1546 pkt->rm_pktinfolen = 0;
1547
1548 if (txr->txr_flags & HVN_TXR_FLAG_UDP_HASH) {
1549 char *cp;
1550
1551 /*
1552 * Set the hash value for this packet, so that the host could
1553 * dispatch the TX done event for this packet back to this TX
1554 * ring's channel.
1555 */
1556 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1557 HVN_NDIS_HASH_VALUE_SIZE, HVN_NDIS_PKTINFO_TYPE_HASHVAL);
1558 memcpy(cp, &txr->txr_id, HVN_NDIS_HASH_VALUE_SIZE);
1559 }
1560
1561 if (vlan_has_tag(m)) {
1562 uint32_t vlan;
1563 char *cp;
1564 uint16_t tag;
1565
1566 tag = vlan_get_tag(m);
1567 vlan = NDIS_VLAN_INFO_MAKE(EVL_VLANOFTAG(tag),
1568 EVL_PRIOFTAG(tag), EVL_CFIOFTAG(tag));
1569 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1570 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1571 memcpy(cp, &vlan, NDIS_VLAN_INFO_SIZE);
1572 txr->txr_evvlanhwtagging.ev_count++;
1573 }
1574
1575 if (m->m_pkthdr.csum_flags & txr->txr_csum_assist) {
1576 uint32_t csum;
1577 char *cp;
1578
1579 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv6 | M_CSUM_UDPv6)) {
1580 csum = NDIS_TXCSUM_INFO_IPV6;
1581 l3hlen = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
1582 if (m->m_pkthdr.csum_flags & M_CSUM_TCPv6)
1583 csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen +
1584 l3hlen);
1585 if (m->m_pkthdr.csum_flags & M_CSUM_UDPv6)
1586 csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen +
1587 l3hlen);
1588 } else {
1589 csum = NDIS_TXCSUM_INFO_IPV4;
1590 l3hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
1591 if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
1592 csum |= NDIS_TXCSUM_INFO_IPCS;
1593 if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
1594 csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen +
1595 l3hlen);
1596 if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
1597 csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen +
1598 l3hlen);
1599 }
1600 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1601 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1602 memcpy(cp, &csum, NDIS_TXCSUM_INFO_SIZE);
1603 }
1604
1605 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1606 pkt->rm_pktinfooffset -= RNDIS_HEADER_OFFSET;
1607
1608 /*
1609 * Fast path: Chimney sending.
1610 */
1611 if (chim != NULL) {
1612 struct hvn_tx_desc *tgt_txd;
1613
1614 tgt_txd = (txr->txr_agg_txd != NULL) ? txr->txr_agg_txd : txd;
1615
1616 KASSERTMSG(pkt == chim,
1617 "RNDIS pkt not in chimney sending buffer");
1618 KASSERTMSG(tgt_txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID,
1619 "chimney sending buffer is not used");
1620
1621 tgt_txd->txd_chim_size += pkt->rm_len;
1622 m_copydata(m, 0, m->m_pkthdr.len, (uint8_t *)chim + pktlen);
1623
1624 txr->txr_sendpkt = hvn_rndis_output_chim;
1625 goto done;
1626 }
1627
1628 KASSERTMSG(txr->txr_agg_txd == NULL, "aggregating sglist txdesc");
1629 KASSERTMSG(txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID,
1630 "chimney buffer is used");
1631 KASSERTMSG(pkt == txd->txd_req, "RNDIS pkt not in txdesc");
1632
1633 rv = bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m, BUS_DMA_READ |
1634 BUS_DMA_NOWAIT);
1635 switch (rv) {
1636 case 0:
1637 break;
1638 case EFBIG:
1639 if (m_defrag(m, M_NOWAIT) != NULL) {
1640 txr->txr_evdefrag.ev_count++;
1641 if (bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m,
1642 BUS_DMA_READ | BUS_DMA_NOWAIT) == 0)
1643 break;
1644 }
1645 /* FALLTHROUGH */
1646 default:
1647 DPRINTF("%s: failed to load mbuf\n", device_xname(sc->sc_dev));
1648 txr->txr_evdmafailed.ev_count++;
1649 return -1;
1650 }
1651 bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap,
1652 0, txd->txd_dmap->dm_mapsize, BUS_DMASYNC_PREWRITE);
1653 SET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP);
1654
1655 /* Attach an RNDIS message to the first slot */
1656 txd->txd_sgl[0].gpa_page = txd->txd_gpa.gpa_page;
1657 txd->txd_sgl[0].gpa_ofs = txd->txd_gpa.gpa_ofs;
1658 txd->txd_sgl[0].gpa_len = pktlen;
1659 txd->txd_nsge = txd->txd_dmap->dm_nsegs + 1;
1660
1661 for (i = 0; i < txd->txd_dmap->dm_nsegs; i++) {
1662 seg = &txd->txd_dmap->dm_segs[i];
1663 txd->txd_sgl[1 + i].gpa_page = atop(seg->ds_addr);
1664 txd->txd_sgl[1 + i].gpa_ofs = seg->ds_addr & PAGE_MASK;
1665 txd->txd_sgl[1 + i].gpa_len = seg->ds_len;
1666 }
1667
1668 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
1669 txd->txd_chim_size = 0;
1670 txr->txr_sendpkt = hvn_rndis_output_sgl;
1671 done:
1672 txd->txd_buf = m;
1673
1674 /* Update temporary stats for later use. */
1675 txr->txr_stat_pkts++;
1676 txr->txr_stat_size += m->m_pkthdr.len;
1677 if (m->m_flags & M_MCAST)
1678 txr->txr_stat_mcasts++;
1679
1680 return 0;
1681 }
1682
1683 static void
1684 hvn_bpf_mtap(struct hvn_tx_ring *txr, struct mbuf *m, u_int direction)
1685 {
1686 struct hvn_softc *sc = txr->txr_softc;
1687 struct ifnet *ifp = SC2IFP(sc);
1688 struct ether_header *eh;
1689 struct ether_vlan_header evl;
1690
1691 if (!vlan_has_tag(m)) {
1692 bpf_mtap(ifp, m, direction);
1693 return;
1694 }
1695
1696 if (ifp->if_bpf == NULL)
1697 return;
1698
1699 txr->txr_evvlantap.ev_count++;
1700
1701 /*
1702 * Restore a VLAN tag for bpf.
1703 *
1704 * Do not modify contents of the original mbuf,
1705 * because Tx processing on the mbuf is still in progress.
1706 */
1707
1708 eh = mtod(m, struct ether_header *);
1709 memcpy(evl.evl_dhost, eh->ether_dhost, ETHER_ADDR_LEN * 2);
1710 evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
1711 evl.evl_tag = htons(vlan_get_tag(m));
1712 evl.evl_proto = eh->ether_type;
1713
1714 /* Do not tap ether header of the original mbuf. */
1715 m_adj(m, sizeof(*eh));
1716
1717 bpf_mtap2(ifp->if_bpf, &evl, sizeof(evl), m, direction);
1718
1719 /* Cannot restore ether header of the original mbuf,
1720 * but do not worry about it because just free it. */
1721 }
1722
1723 static int
1724 hvn_txpkt(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
1725 {
1726 struct hvn_softc *sc = txr->txr_softc;
1727 struct ifnet *ifp = SC2IFP(sc);
1728 const struct hvn_tx_desc *tmp_txd;
1729 int error;
1730
1731 /*
1732 * Make sure that this txd and any aggregated txds are not
1733 * freed before bpf_mtap.
1734 */
1735 hvn_txd_hold(txd);
1736
1737 error = (*txr->txr_sendpkt)(txr, txd);
1738 if (error == 0) {
1739 hvn_bpf_mtap(txr, txd->txd_buf, BPF_D_OUT);
1740 STAILQ_FOREACH(tmp_txd, &txd->txd_agg_list, txd_agg_entry)
1741 hvn_bpf_mtap(txr, tmp_txd->txd_buf, BPF_D_OUT);
1742
1743 if_statadd(ifp, if_opackets, txr->txr_stat_pkts);
1744 if_statadd(ifp, if_obytes, txr->txr_stat_size);
1745 if (txr->txr_stat_mcasts != 0)
1746 if_statadd(ifp, if_omcasts, txr->txr_stat_mcasts);
1747 txr->txr_evpkts.ev_count += txr->txr_stat_pkts;
1748 txr->txr_evsends.ev_count++;
1749 }
1750
1751 hvn_txd_put(txr, txd);
1752
1753 if (__predict_false(error)) {
1754 /*
1755 * Caller will perform further processing on the
1756 * associated mbuf, so don't free it in hvn_txd_put();
1757 * only unload it from the DMA map in hvn_txd_put(),
1758 * if it was loaded.
1759 */
1760 txd->txd_buf = NULL;
1761 hvn_txd_put(txr, txd);
1762 }
1763
1764 /* Reset temporary stats, after this sending is done. */
1765 txr->txr_stat_pkts = 0;
1766 txr->txr_stat_size = 0;
1767 txr->txr_stat_mcasts = 0;
1768
1769 return error;
1770 }
1771
1772 static void
1773 hvn_txeof(struct hvn_tx_ring *txr, uint64_t tid)
1774 {
1775 struct hvn_softc *sc = txr->txr_softc;
1776 struct hvn_tx_desc *txd;
1777 uint32_t id = tid >> 32;
1778
1779 if ((tid & 0xffffffffU) != 0)
1780 return;
1781
1782 id -= HVN_NVS_CHIM_SIG;
1783 if (id >= HVN_TX_DESC) {
1784 device_printf(sc->sc_dev, "tx packet index too large: %u", id);
1785 return;
1786 }
1787
1788 txd = &txr->txr_desc[id];
1789
1790 if (txd->txd_buf == NULL)
1791 device_printf(sc->sc_dev, "no mbuf @%u\n", id);
1792
1793 hvn_txd_put(txr, txd);
1794 }
1795
1796 static int
1797 hvn_rx_ring_create(struct hvn_softc *sc, int ring_cnt)
1798 {
1799 struct hvn_rx_ring *rxr;
1800 int i;
1801
1802 if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_2)
1803 sc->sc_rx_size = 15 * 1024 * 1024; /* 15MB */
1804 else
1805 sc->sc_rx_size = 16 * 1024 * 1024; /* 16MB */
1806 sc->sc_rx_ring = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_rx_dma,
1807 sc->sc_rx_size, PAGE_SIZE, PAGE_SIZE, sc->sc_rx_size / PAGE_SIZE);
1808 if (sc->sc_rx_ring == NULL) {
1809 DPRINTF("%s: failed to allocate Rx ring buffer\n",
1810 device_xname(sc->sc_dev));
1811 return -1;
1812 }
1813
1814 sc->sc_rxr = kmem_zalloc(sizeof(*rxr) * ring_cnt, KM_SLEEP);
1815 sc->sc_nrxr_inuse = sc->sc_nrxr = ring_cnt;
1816
1817 for (i = 0; i < sc->sc_nrxr; i++) {
1818 rxr = &sc->sc_rxr[i];
1819 rxr->rxr_softc = sc;
1820 if (i < sc->sc_ntxr) {
1821 rxr->rxr_txr = &sc->sc_txr[i];
1822 rxr->rxr_txr->txr_rxr = rxr;
1823 }
1824
1825 mutex_init(&rxr->rxr_lock, MUTEX_DEFAULT, IPL_NET);
1826 mutex_init(&rxr->rxr_onwork_lock, MUTEX_DEFAULT, IPL_NET);
1827 cv_init(&rxr->rxr_onwork_cv, "waitonwk");
1828
1829 snprintf(rxr->rxr_name, sizeof(rxr->rxr_name),
1830 "%s-rx%d", device_xname(sc->sc_dev), i);
1831 evcnt_attach_dynamic(&rxr->rxr_evpkts, EVCNT_TYPE_MISC,
1832 NULL, rxr->rxr_name, "packets received");
1833 evcnt_attach_dynamic(&rxr->rxr_evcsum_ip, EVCNT_TYPE_MISC,
1834 NULL, rxr->rxr_name, "IP checksum");
1835 evcnt_attach_dynamic(&rxr->rxr_evcsum_tcp, EVCNT_TYPE_MISC,
1836 NULL, rxr->rxr_name, "TCP checksum");
1837 evcnt_attach_dynamic(&rxr->rxr_evcsum_udp, EVCNT_TYPE_MISC,
1838 NULL, rxr->rxr_name, "UDP checksum");
1839 evcnt_attach_dynamic(&rxr->rxr_evvlanhwtagging, EVCNT_TYPE_MISC,
1840 NULL, rxr->rxr_name, "VLAN H/W tagging");
1841 evcnt_attach_dynamic(&rxr->rxr_evintr, EVCNT_TYPE_INTR,
1842 NULL, rxr->rxr_name, "interrupt on ring");
1843 evcnt_attach_dynamic(&rxr->rxr_evdefer, EVCNT_TYPE_MISC,
1844 NULL, rxr->rxr_name, "handled queue in workqueue");
1845 evcnt_attach_dynamic(&rxr->rxr_evdeferreq, EVCNT_TYPE_MISC,
1846 NULL, rxr->rxr_name, "requested defer on ring");
1847 evcnt_attach_dynamic(&rxr->rxr_evredeferreq, EVCNT_TYPE_MISC,
1848 NULL, rxr->rxr_name, "requested defer in workqueue");
1849
1850 rxr->rxr_nvsbuf = kmem_zalloc(HVN_NVS_BUFSIZE, KM_SLEEP);
1851 if (rxr->rxr_nvsbuf == NULL) {
1852 DPRINTF("%s: failed to allocate channel data buffer\n",
1853 device_xname(sc->sc_dev));
1854 goto errout;
1855 }
1856
1857 rxr->rxr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1858 hvn_nvs_softintr, rxr);
1859 if (rxr->rxr_si == NULL) {
1860 DPRINTF("%s: failed to establish rx softint\n",
1861 device_xname(sc->sc_dev));
1862 goto errout;
1863 }
1864 }
1865
1866 return 0;
1867
1868 errout:
1869 hvn_rx_ring_destroy(sc);
1870 return -1;
1871 }
1872
1873 static int
1874 hvn_rx_ring_destroy(struct hvn_softc *sc)
1875 {
1876 struct hvn_rx_ring *rxr;
1877 int i;
1878
1879 if (sc->sc_rxr != NULL) {
1880 for (i = 0; i < sc->sc_nrxr; i++) {
1881 rxr = &sc->sc_rxr[i];
1882
1883 if (rxr->rxr_si != NULL) {
1884 softint_disestablish(rxr->rxr_si);
1885 rxr->rxr_si = NULL;
1886 }
1887
1888 if (rxr->rxr_nvsbuf != NULL) {
1889 kmem_free(rxr->rxr_nvsbuf, HVN_NVS_BUFSIZE);
1890 rxr->rxr_nvsbuf = NULL;
1891 }
1892
1893 evcnt_detach(&rxr->rxr_evpkts);
1894 evcnt_detach(&rxr->rxr_evcsum_ip);
1895 evcnt_detach(&rxr->rxr_evcsum_tcp);
1896 evcnt_detach(&rxr->rxr_evcsum_udp);
1897 evcnt_detach(&rxr->rxr_evvlanhwtagging);
1898 evcnt_detach(&rxr->rxr_evintr);
1899 evcnt_detach(&rxr->rxr_evdefer);
1900 evcnt_detach(&rxr->rxr_evdeferreq);
1901 evcnt_detach(&rxr->rxr_evredeferreq);
1902
1903 cv_destroy(&rxr->rxr_onwork_cv);
1904 mutex_destroy(&rxr->rxr_onwork_lock);
1905 mutex_destroy(&rxr->rxr_lock);
1906 }
1907 kmem_free(sc->sc_rxr, sizeof(*rxr) * sc->sc_nrxr);
1908 sc->sc_rxr = NULL;
1909 sc->sc_nrxr = 0;
1910 }
1911 if (sc->sc_rx_ring != NULL) {
1912 hyperv_dma_free(sc->sc_dmat, &sc->sc_rx_dma);
1913 sc->sc_rx_ring = NULL;
1914 }
1915
1916 return 0;
1917 }
1918
1919 static void
1920 hvn_fixup_rx_data(struct hvn_softc *sc)
1921 {
1922 struct hvn_rx_ring *rxr;
1923 int i;
1924
1925 if (sc->sc_caps & HVN_CAPS_UDPHASH) {
1926 for (i = 0; i < sc->sc_nrxr; i++) {
1927 rxr = &sc->sc_rxr[i];
1928 rxr->rxr_flags |= HVN_RXR_FLAG_UDP_HASH;
1929 }
1930 }
1931 }
1932
1933 static int
1934 hvn_tx_ring_create(struct hvn_softc *sc, int ring_cnt)
1935 {
1936 struct hvn_tx_ring *txr;
1937 struct hvn_tx_desc *txd;
1938 bus_dma_segment_t *seg;
1939 size_t msgsize;
1940 int i, j;
1941 paddr_t pa;
1942
1943 /*
1944 * Create TXBUF for chimney sending.
1945 *
1946 * NOTE: It is shared by all channels.
1947 */
1948 sc->sc_chim = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_chim_dma,
1949 HVN_CHIM_SIZE, PAGE_SIZE, 0, 1);
1950 if (sc->sc_chim == NULL) {
1951 DPRINTF("%s: failed to allocate chimney sending memory",
1952 device_xname(sc->sc_dev));
1953 goto errout;
1954 }
1955
1956 sc->sc_txr = kmem_zalloc(sizeof(*txr) * ring_cnt, KM_SLEEP);
1957 sc->sc_ntxr_inuse = sc->sc_ntxr = ring_cnt;
1958
1959 msgsize = roundup(HVN_RNDIS_PKT_LEN, 128);
1960
1961 for (j = 0; j < ring_cnt; j++) {
1962 txr = &sc->sc_txr[j];
1963 txr->txr_softc = sc;
1964 txr->txr_id = j;
1965
1966 mutex_init(&txr->txr_lock, MUTEX_DEFAULT, IPL_NET);
1967 txr->txr_interq = pcq_create(HVN_TX_DESC, KM_SLEEP);
1968
1969 snprintf(txr->txr_name, sizeof(txr->txr_name),
1970 "%s-tx%d", device_xname(sc->sc_dev), j);
1971 evcnt_attach_dynamic(&txr->txr_evpkts, EVCNT_TYPE_MISC,
1972 NULL, txr->txr_name, "packets transmit");
1973 evcnt_attach_dynamic(&txr->txr_evsends, EVCNT_TYPE_MISC,
1974 NULL, txr->txr_name, "sends");
1975 evcnt_attach_dynamic(&txr->txr_evnodesc, EVCNT_TYPE_MISC,
1976 NULL, txr->txr_name, "descriptor shortage");
1977 evcnt_attach_dynamic(&txr->txr_evdmafailed, EVCNT_TYPE_MISC,
1978 NULL, txr->txr_name, "DMA failure");
1979 evcnt_attach_dynamic(&txr->txr_evdefrag, EVCNT_TYPE_MISC,
1980 NULL, txr->txr_name, "mbuf defraged");
1981 evcnt_attach_dynamic(&txr->txr_evpcqdrop, EVCNT_TYPE_MISC,
1982 NULL, txr->txr_name, "dropped in pcq");
1983 evcnt_attach_dynamic(&txr->txr_evtransmitdefer, EVCNT_TYPE_MISC,
1984 NULL, txr->txr_name, "deferred transmit");
1985 evcnt_attach_dynamic(&txr->txr_evflushfailed, EVCNT_TYPE_MISC,
1986 NULL, txr->txr_name, "aggregation flush failure");
1987 evcnt_attach_dynamic(&txr->txr_evchimneytried, EVCNT_TYPE_MISC,
1988 NULL, txr->txr_name, "chimney send tried");
1989 evcnt_attach_dynamic(&txr->txr_evchimney, EVCNT_TYPE_MISC,
1990 NULL, txr->txr_name, "chimney send");
1991 evcnt_attach_dynamic(&txr->txr_evvlanfixup, EVCNT_TYPE_MISC,
1992 NULL, txr->txr_name, "VLAN fixup");
1993 evcnt_attach_dynamic(&txr->txr_evvlanhwtagging, EVCNT_TYPE_MISC,
1994 NULL, txr->txr_name, "VLAN H/W tagging");
1995 evcnt_attach_dynamic(&txr->txr_evvlantap, EVCNT_TYPE_MISC,
1996 NULL, txr->txr_name, "VLAN bpf_mtap fixup");
1997
1998 txr->txr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1999 hvn_deferred_transmit, txr);
2000 if (txr->txr_si == NULL) {
2001 aprint_error_dev(sc->sc_dev,
2002 "failed to establish softint for tx ring\n");
2003 goto errout;
2004 }
2005
2006 /* Allocate memory to store RNDIS messages */
2007 txr->txr_msgs = hyperv_dma_alloc(sc->sc_dmat, &txr->txr_dma,
2008 msgsize * HVN_TX_DESC, PAGE_SIZE, 0, 1);
2009 if (txr->txr_msgs == NULL) {
2010 DPRINTF("%s: failed to allocate memory for RDNIS "
2011 "messages\n", device_xname(sc->sc_dev));
2012 goto errout;
2013 }
2014
2015 TAILQ_INIT(&txr->txr_list);
2016 for (i = 0; i < HVN_TX_DESC; i++) {
2017 txd = &txr->txr_desc[i];
2018 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
2019 txd->txd_chim_size = 0;
2020 STAILQ_INIT(&txd->txd_agg_list);
2021 if (bus_dmamap_create(sc->sc_dmat, HVN_TX_PKT_SIZE,
2022 HVN_TX_FRAGS, HVN_TX_FRAG_SIZE, PAGE_SIZE,
2023 BUS_DMA_WAITOK, &txd->txd_dmap)) {
2024 DPRINTF("%s: failed to create map for TX "
2025 "descriptors\n", device_xname(sc->sc_dev));
2026 goto errout;
2027 }
2028 seg = &txr->txr_dma.map->dm_segs[0];
2029 pa = seg->ds_addr + (msgsize * i);
2030 txd->txd_gpa.gpa_page = atop(pa);
2031 txd->txd_gpa.gpa_ofs = pa & PAGE_MASK;
2032 txd->txd_gpa.gpa_len = msgsize;
2033 txd->txd_req = (void *)(txr->txr_msgs + (msgsize * i));
2034 txd->txd_id = i + HVN_NVS_CHIM_SIG;
2035 TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry);
2036 }
2037 txr->txr_avail = HVN_TX_DESC;
2038 }
2039
2040 return 0;
2041
2042 errout:
2043 hvn_tx_ring_destroy(sc);
2044 return -1;
2045 }
2046
2047 static void
2048 hvn_tx_ring_destroy(struct hvn_softc *sc)
2049 {
2050 struct hvn_tx_ring *txr;
2051 struct hvn_tx_desc *txd;
2052 int i, j;
2053
2054 if (sc->sc_txr != NULL) {
2055 for (j = 0; j < sc->sc_ntxr; j++) {
2056 txr = &sc->sc_txr[j];
2057
2058 mutex_enter(&txr->txr_lock);
2059 for (i = 0; i < HVN_TX_DESC; i++) {
2060 txd = &txr->txr_desc[i];
2061 hvn_txd_gc(txr, txd);
2062 }
2063 mutex_exit(&txr->txr_lock);
2064 for (i = 0; i < HVN_TX_DESC; i++) {
2065 txd = &txr->txr_desc[i];
2066 if (txd->txd_dmap != NULL) {
2067 bus_dmamap_destroy(sc->sc_dmat,
2068 txd->txd_dmap);
2069 txd->txd_dmap = NULL;
2070 }
2071 }
2072 if (txr->txr_msgs != NULL) {
2073 hyperv_dma_free(sc->sc_dmat, &txr->txr_dma);
2074 txr->txr_msgs = NULL;
2075 }
2076 if (txr->txr_si != NULL) {
2077 softint_disestablish(txr->txr_si);
2078 txr->txr_si = NULL;
2079 }
2080 if (txr->txr_interq != NULL) {
2081 hvn_tx_ring_qflush(sc, txr);
2082 pcq_destroy(txr->txr_interq);
2083 txr->txr_interq = NULL;
2084 }
2085
2086 evcnt_detach(&txr->txr_evpkts);
2087 evcnt_detach(&txr->txr_evsends);
2088 evcnt_detach(&txr->txr_evnodesc);
2089 evcnt_detach(&txr->txr_evdmafailed);
2090 evcnt_detach(&txr->txr_evdefrag);
2091 evcnt_detach(&txr->txr_evpcqdrop);
2092 evcnt_detach(&txr->txr_evtransmitdefer);
2093 evcnt_detach(&txr->txr_evflushfailed);
2094 evcnt_detach(&txr->txr_evchimneytried);
2095 evcnt_detach(&txr->txr_evchimney);
2096 evcnt_detach(&txr->txr_evvlanfixup);
2097 evcnt_detach(&txr->txr_evvlanhwtagging);
2098 evcnt_detach(&txr->txr_evvlantap);
2099
2100 mutex_destroy(&txr->txr_lock);
2101 }
2102
2103 kmem_free(sc->sc_txr, sizeof(*txr) * sc->sc_ntxr);
2104 sc->sc_txr = NULL;
2105 }
2106
2107 if (sc->sc_chim != NULL) {
2108 hyperv_dma_free(sc->sc_dmat, &sc->sc_chim_dma);
2109 sc->sc_chim = NULL;
2110 }
2111 }
2112
2113 static void
2114 hvn_set_chim_size(struct hvn_softc *sc, int chim_size)
2115 {
2116 struct hvn_tx_ring *txr;
2117 int i;
2118
2119 for (i = 0; i < sc->sc_ntxr_inuse; i++) {
2120 txr = &sc->sc_txr[i];
2121 txr->txr_chim_size = chim_size;
2122 }
2123 }
2124
2125 #if LONG_BIT == 64
2126 #define ffsl(v) ffs64(v)
2127 #elif LONG_BIT == 32
2128 #define ffsl(v) ffs32(v)
2129 #else
2130 #error unsupport LONG_BIT
2131 #endif /* LONG_BIT */
2132
2133 static uint32_t
2134 hvn_chim_alloc(struct hvn_softc *sc)
2135 {
2136 uint32_t chim_idx = HVN_NVS_CHIM_IDX_INVALID;
2137 int i, idx;
2138
2139 mutex_spin_enter(&sc->sc_chim_bmap_lock);
2140 for (i = 0; i < sc->sc_chim_bmap_cnt; i++) {
2141 idx = ffsl(~sc->sc_chim_bmap[i]);
2142 if (idx == 0)
2143 continue;
2144
2145 --idx; /* ffsl is 1-based */
2146 SET(sc->sc_chim_bmap[i], __BIT(idx));
2147
2148 chim_idx = i * LONG_BIT + idx;
2149 break;
2150 }
2151 mutex_spin_exit(&sc->sc_chim_bmap_lock);
2152
2153 return chim_idx;
2154 }
2155
2156 static void
2157 hvn_chim_free(struct hvn_softc *sc, uint32_t chim_idx)
2158 {
2159 u_long mask;
2160 uint32_t idx;
2161
2162 idx = chim_idx / LONG_BIT;
2163 mask = __BIT(chim_idx % LONG_BIT);
2164
2165 mutex_spin_enter(&sc->sc_chim_bmap_lock);
2166 CLR(sc->sc_chim_bmap[idx], mask);
2167 mutex_spin_exit(&sc->sc_chim_bmap_lock);
2168 }
2169
2170 static void
2171 hvn_fixup_tx_data(struct hvn_softc *sc)
2172 {
2173 struct hvn_tx_ring *txr;
2174 uint64_t caps_assist;
2175 int csum_assist;
2176 int i;
2177
2178 hvn_set_chim_size(sc, sc->sc_chim_szmax);
2179 if (hvn_tx_chimney_size > 0 && hvn_tx_chimney_size < sc->sc_chim_szmax)
2180 hvn_set_chim_size(sc, hvn_tx_chimney_size);
2181
2182 caps_assist = 0;
2183 csum_assist = 0;
2184 if (sc->sc_caps & HVN_CAPS_IPCS) {
2185 caps_assist |= IFCAP_CSUM_IPv4_Tx;
2186 caps_assist |= IFCAP_CSUM_IPv4_Rx;
2187 csum_assist |= M_CSUM_IPv4;
2188 }
2189 if (sc->sc_caps & HVN_CAPS_TCP4CS) {
2190 caps_assist |= IFCAP_CSUM_TCPv4_Tx;
2191 caps_assist |= IFCAP_CSUM_TCPv4_Rx;
2192 csum_assist |= M_CSUM_TCPv4;
2193 }
2194 if (sc->sc_caps & HVN_CAPS_TCP6CS) {
2195 caps_assist |= IFCAP_CSUM_TCPv6_Tx;
2196 csum_assist |= M_CSUM_TCPv6;
2197 }
2198 if (sc->sc_caps & HVN_CAPS_UDP4CS) {
2199 caps_assist |= IFCAP_CSUM_UDPv4_Tx;
2200 caps_assist |= IFCAP_CSUM_UDPv4_Rx;
2201 csum_assist |= M_CSUM_UDPv4;
2202 }
2203 if (sc->sc_caps & HVN_CAPS_UDP6CS) {
2204 caps_assist |= IFCAP_CSUM_UDPv6_Tx;
2205 csum_assist |= M_CSUM_UDPv6;
2206 }
2207 for (i = 0; i < sc->sc_ntxr; i++) {
2208 txr = &sc->sc_txr[i];
2209 txr->txr_caps_assist = caps_assist;
2210 txr->txr_csum_assist = csum_assist;
2211 }
2212
2213 if (sc->sc_caps & HVN_CAPS_UDPHASH) {
2214 for (i = 0; i < sc->sc_ntxr; i++) {
2215 txr = &sc->sc_txr[i];
2216 txr->txr_flags |= HVN_TXR_FLAG_UDP_HASH;
2217 }
2218 }
2219 }
2220
2221 static int
2222 hvn_txd_peek(struct hvn_tx_ring *txr)
2223 {
2224
2225 KASSERT(mutex_owned(&txr->txr_lock));
2226
2227 return txr->txr_avail;
2228 }
2229
2230 static struct hvn_tx_desc *
2231 hvn_txd_get(struct hvn_tx_ring *txr)
2232 {
2233 struct hvn_tx_desc *txd;
2234
2235 KASSERT(mutex_owned(&txr->txr_lock));
2236
2237 txd = TAILQ_FIRST(&txr->txr_list);
2238 KASSERT(txd != NULL);
2239 TAILQ_REMOVE(&txr->txr_list, txd, txd_entry);
2240 txr->txr_avail--;
2241
2242 txd->txd_refs = 1;
2243
2244 return txd;
2245 }
2246
2247 static void
2248 hvn_txd_put(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
2249 {
2250 struct hvn_softc *sc = txr->txr_softc;
2251 struct hvn_tx_desc *tmp_txd;
2252
2253 KASSERT(mutex_owned(&txr->txr_lock));
2254 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2255 "put an onagg txd %#x", txd->txd_flags);
2256
2257 KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs);
2258 if (atomic_dec_uint_nv(&txd->txd_refs) != 0)
2259 return;
2260
2261 if (!STAILQ_EMPTY(&txd->txd_agg_list)) {
2262 while ((tmp_txd = STAILQ_FIRST(&txd->txd_agg_list)) != NULL) {
2263 KASSERTMSG(STAILQ_EMPTY(&tmp_txd->txd_agg_list),
2264 "resursive aggregation on aggregated txdesc");
2265 KASSERTMSG(
2266 ISSET(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2267 "not aggregated txdesc");
2268 KASSERTMSG(
2269 tmp_txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID,
2270 "aggregated txdesc consumes chimney sending "
2271 "buffer: idx %u", tmp_txd->txd_chim_index);
2272 KASSERTMSG(tmp_txd->txd_chim_size == 0,
2273 "aggregated txdesc has non-zero chimney sending "
2274 "size: sz %u", tmp_txd->txd_chim_size);
2275
2276 STAILQ_REMOVE_HEAD(&txd->txd_agg_list, txd_agg_entry);
2277 CLR(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG);
2278 hvn_txd_put(txr, tmp_txd);
2279 }
2280 }
2281
2282 if (txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID) {
2283 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP),
2284 "chim txd uses dmamap");
2285 hvn_chim_free(sc, txd->txd_chim_index);
2286 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
2287 txd->txd_chim_size = 0;
2288 } else if (ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP)) {
2289 bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap,
2290 0, txd->txd_dmap->dm_mapsize,
2291 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2292 bus_dmamap_unload(sc->sc_dmat, txd->txd_dmap);
2293 CLR(txd->txd_flags, HVN_TXD_FLAG_DMAMAP);
2294 }
2295
2296 if (txd->txd_buf != NULL) {
2297 m_freem(txd->txd_buf);
2298 txd->txd_buf = NULL;
2299 }
2300
2301 TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry);
2302 txr->txr_avail++;
2303 txr->txr_oactive = 0;
2304 }
2305
2306 static void
2307 hvn_txd_gc(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
2308 {
2309
2310 KASSERTMSG(txd->txd_refs == 0 || txd->txd_refs == 1,
2311 "invalid txd refs %d", txd->txd_refs);
2312
2313 /* Aggregated txds will be freed by their aggregating txd. */
2314 if (txd->txd_refs > 0 && !ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG))
2315 hvn_txd_put(txr, txd);
2316 }
2317
2318 static void
2319 hvn_txd_hold(struct hvn_tx_desc *txd)
2320 {
2321
2322 /* 0->1 transition will never work */
2323 KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs);
2324
2325 atomic_inc_uint(&txd->txd_refs);
2326 }
2327
2328 static void
2329 hvn_txd_agg(struct hvn_tx_desc *agg_txd, struct hvn_tx_desc *txd)
2330 {
2331
2332 KASSERTMSG(!ISSET(agg_txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2333 "recursive aggregation on aggregating txdesc");
2334 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2335 "already aggregated");
2336 KASSERTMSG(STAILQ_EMPTY(&txd->txd_agg_list),
2337 "recursive aggregation on to-be-aggregated txdesc");
2338
2339 SET(txd->txd_flags, HVN_TXD_FLAG_ONAGG);
2340 STAILQ_INSERT_TAIL(&agg_txd->txd_agg_list, txd, txd_agg_entry);
2341 }
2342
2343 static int
2344 hvn_tx_ring_pending(struct hvn_tx_ring *txr)
2345 {
2346 int pending = 0;
2347
2348 mutex_enter(&txr->txr_lock);
2349 if (hvn_txd_peek(txr) != HVN_TX_DESC)
2350 pending = 1;
2351 mutex_exit(&txr->txr_lock);
2352
2353 return pending;
2354 }
2355
2356 static void
2357 hvn_tx_ring_qflush(struct hvn_softc *sc, struct hvn_tx_ring *txr)
2358 {
2359 struct mbuf *m;
2360
2361 while ((m = pcq_get(txr->txr_interq)) != NULL)
2362 m_freem(m);
2363 }
2364
2365 static int
2366 hvn_get_lladdr(struct hvn_softc *sc, uint8_t *enaddr)
2367 {
2368 size_t addrlen = ETHER_ADDR_LEN;
2369 int rv;
2370
2371 rv = hvn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, enaddr, &addrlen);
2372 if (rv == 0 && addrlen != ETHER_ADDR_LEN)
2373 rv = -1;
2374 return rv;
2375 }
2376
2377 static void
2378 hvn_update_link_status(struct hvn_softc *sc)
2379 {
2380 struct ifnet *ifp = SC2IFP(sc);
2381 uint32_t state, old_link_state;
2382 size_t len = sizeof(state);
2383 int rv;
2384
2385 rv = hvn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, &state, &len);
2386 if (rv != 0 || len != sizeof(state))
2387 return;
2388
2389 old_link_state = sc->sc_link_state;
2390 sc->sc_link_state = (state == NDIS_MEDIA_STATE_CONNECTED) ?
2391 LINK_STATE_UP : LINK_STATE_DOWN;
2392 if (old_link_state != sc->sc_link_state) {
2393 if_link_state_change(ifp, sc->sc_link_state);
2394 }
2395 }
2396
2397 static int
2398 hvn_get_mtu(struct hvn_softc *sc, uint32_t *mtu)
2399 {
2400 size_t mtusz = sizeof(*mtu);
2401 int rv;
2402
2403 rv = hvn_rndis_query(sc, OID_GEN_MAXIMUM_FRAME_SIZE, mtu, &mtusz);
2404 if (rv == 0 && mtusz != sizeof(*mtu))
2405 rv = -1;
2406 return rv;
2407 }
2408
2409 static int
2410 hvn_channel_attach(struct hvn_softc *sc, struct vmbus_channel *chan)
2411 {
2412 struct hvn_rx_ring *rxr;
2413 struct hvn_tx_ring *txr;
2414 int idx;
2415
2416 idx = chan->ch_subidx;
2417 if (idx < 0 || idx >= sc->sc_nrxr_inuse) {
2418 DPRINTF("%s: invalid sub-channel %u\n",
2419 device_xname(sc->sc_dev), idx);
2420 return -1;
2421 }
2422
2423 rxr = &sc->sc_rxr[idx];
2424 rxr->rxr_chan = chan;
2425
2426 if (idx < sc->sc_ntxr_inuse) {
2427 txr = &sc->sc_txr[idx];
2428 txr->txr_chan = chan;
2429 }
2430
2431 /* Bind this channel to a proper CPU. */
2432 vmbus_channel_cpu_set(chan, HVN_RING_IDX2CPU(sc, idx));
2433
2434 chan->ch_flags &= ~CHF_BATCHED;
2435
2436 /* Associate our interrupt handler with the channel */
2437 if (vmbus_channel_open(chan,
2438 HVN_RING_BUFSIZE - sizeof(struct vmbus_bufring), NULL, 0,
2439 hvn_nvs_intr, rxr)) {
2440 DPRINTF("%s: failed to open channel\n",
2441 device_xname(sc->sc_dev));
2442 return -1;
2443 }
2444
2445 return 0;
2446 }
2447
2448 static void
2449 hvn_channel_detach(struct hvn_softc *sc, struct vmbus_channel *chan)
2450 {
2451
2452 vmbus_channel_close_direct(chan);
2453 }
2454
2455 static void
2456 hvn_channel_detach_all(struct hvn_softc *sc)
2457 {
2458 struct vmbus_channel **subchans;
2459 int i, subchan_cnt = sc->sc_nrxr_inuse - 1;
2460
2461 if (subchan_cnt > 0) {
2462 /* Detach the sub-channels. */
2463 subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt);
2464 for (i = 0; i < subchan_cnt; i++)
2465 hvn_channel_detach(sc, subchans[i]);
2466 vmbus_subchannel_rel(subchans, subchan_cnt);
2467 }
2468
2469 /*
2470 * Detach the primary channel, _after_ all sub-channels
2471 * are detached.
2472 */
2473 hvn_channel_detach(sc, sc->sc_prichan);
2474
2475 /* Wait for sub-channels to be destroyed, if any. */
2476 vmbus_subchannel_drain(sc->sc_prichan);
2477 }
2478
2479 static int
2480 hvn_subchannel_attach(struct hvn_softc *sc)
2481 {
2482 struct vmbus_channel **subchans;
2483 int subchan_cnt = sc->sc_nrxr_inuse - 1;
2484 int i, error = 0;
2485
2486 KASSERTMSG(subchan_cnt > 0, "no sub-channels");
2487
2488 /* Attach the sub-channels. */
2489 subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt);
2490 for (i = 0; i < subchan_cnt; ++i) {
2491 int error1;
2492
2493 error1 = hvn_channel_attach(sc, subchans[i]);
2494 if (error1) {
2495 error = error1;
2496 /* Move on; all channels will be detached later. */
2497 }
2498 }
2499 vmbus_subchannel_rel(subchans, subchan_cnt);
2500
2501 if (error) {
2502 aprint_error_dev(sc->sc_dev,
2503 "sub-channels attach failed: %d\n", error);
2504 return error;
2505 }
2506
2507 aprint_debug_dev(sc->sc_dev, "%d sub-channels attached\n",
2508 subchan_cnt);
2509 return 0;
2510 }
2511
2512 static int
2513 hvn_synth_alloc_subchannels(struct hvn_softc *sc, int *nsubch)
2514 {
2515 struct vmbus_channel **subchans;
2516 int error, nchan, rxr_cnt;
2517
2518 nchan = *nsubch + 1;
2519 if (nchan < 2) {
2520 /* Multiple RX/TX rings are not requested. */
2521 *nsubch = 0;
2522 return 0;
2523 }
2524
2525 /*
2526 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
2527 * table entries.
2528 */
2529 if (hvn_get_rsscaps(sc, &rxr_cnt)) {
2530 /* No RSS. */
2531 *nsubch = 0;
2532 return 0;
2533 }
2534
2535 aprint_debug_dev(sc->sc_dev, "RX rings offered %u, requested %d\n",
2536 rxr_cnt, nchan);
2537
2538 if (nchan > rxr_cnt)
2539 nchan = rxr_cnt;
2540 if (nchan == 1) {
2541 aprint_debug_dev(sc->sc_dev,
2542 "only 1 channel is supported, no vRSS\n");
2543 *nsubch = 0;
2544 return 0;
2545 }
2546
2547 *nsubch = nchan - 1;
2548 error = hvn_nvs_alloc_subchannels(sc, nsubch);
2549 if (error || *nsubch == 0) {
2550 /* Failed to allocate sub-channels. */
2551 *nsubch = 0;
2552 return 0;
2553 }
2554
2555 /*
2556 * Wait for all sub-channels to become ready before moving on.
2557 */
2558 subchans = vmbus_subchannel_get(sc->sc_prichan, *nsubch);
2559 vmbus_subchannel_rel(subchans, *nsubch);
2560 return 0;
2561 }
2562
2563 static int
2564 hvn_synth_attachable(const struct hvn_softc *sc)
2565 {
2566 #if 0
2567 const struct hvn_rx_ring *rxr;
2568 int i;
2569
2570 for (i = 0; i < sc->sc_nrxr; i++) {
2571 rxr = &sc->sc_rxr[i];
2572 if (rxr->rxr_flags)
2573 return 0;
2574 }
2575 #endif
2576 return 1;
2577 }
2578
2579 /*
2580 * Make sure that the RX filter is zero after the successful
2581 * RNDIS initialization.
2582 *
2583 * NOTE:
2584 * Under certain conditions on certain versions of Hyper-V,
2585 * the RNDIS rxfilter is _not_ zero on the hypervisor side
2586 * after the successful RNDIS initialization, which breaks
2587 * the assumption of any following code (well, it breaks the
2588 * RNDIS API contract actually). Clear the RNDIS rxfilter
2589 * explicitly, drain packets sneaking through, and drain the
2590 * interrupt taskqueues scheduled due to the stealth packets.
2591 */
2592 static void
2593 hvn_init_fixat(struct hvn_softc *sc, int nchan)
2594 {
2595
2596 hvn_disable_rx(sc);
2597 hvn_drain_rxtx(sc, nchan);
2598 }
2599
2600 static void
2601 hvn_set_txagg(struct hvn_softc *sc)
2602 {
2603 struct hvn_tx_ring *txr;
2604 uint32_t size, pkts;
2605 int i;
2606
2607 /*
2608 * Setup aggregation size.
2609 */
2610 if (sc->sc_agg_size < 0)
2611 size = UINT32_MAX;
2612 else
2613 size = sc->sc_agg_size;
2614
2615 if (size > sc->sc_rndis_agg_size)
2616 size = sc->sc_rndis_agg_size;
2617
2618 /* NOTE: We only aggregate packets using chimney sending buffers. */
2619 if (size > (uint32_t)sc->sc_chim_szmax)
2620 size = sc->sc_chim_szmax;
2621
2622 if (size <= 2 * HVN_PKTSIZE_MIN(sc->sc_rndis_agg_align)) {
2623 /* Disable */
2624 size = 0;
2625 pkts = 0;
2626 goto done;
2627 }
2628
2629 /* NOTE: Type of the per TX ring setting is 'int'. */
2630 if (size > INT_MAX)
2631 size = INT_MAX;
2632
2633 /*
2634 * Setup aggregation packet count.
2635 */
2636 if (sc->sc_agg_pkts < 0)
2637 pkts = UINT32_MAX;
2638 else
2639 pkts = sc->sc_agg_pkts;
2640
2641 if (pkts > sc->sc_rndis_agg_pkts)
2642 pkts = sc->sc_rndis_agg_pkts;
2643
2644 if (pkts <= 1) {
2645 /* Disable */
2646 size = 0;
2647 pkts = 0;
2648 goto done;
2649 }
2650
2651 /* NOTE: Type of the per TX ring setting is 'short'. */
2652 if (pkts > SHRT_MAX)
2653 pkts = SHRT_MAX;
2654
2655 done:
2656 /* NOTE: Type of the per TX ring setting is 'short'. */
2657 if (sc->sc_rndis_agg_align > SHRT_MAX) {
2658 /* Disable */
2659 size = 0;
2660 pkts = 0;
2661 }
2662
2663 aprint_verbose_dev(sc->sc_dev,
2664 "TX aggregate size %u, pkts %u, align %u\n",
2665 size, pkts, sc->sc_rndis_agg_align);
2666
2667 for (i = 0; i < sc->sc_ntxr_inuse; ++i) {
2668 txr = &sc->sc_txr[i];
2669
2670 mutex_enter(&txr->txr_lock);
2671 txr->txr_agg_szmax = size;
2672 txr->txr_agg_pktmax = pkts;
2673 txr->txr_agg_align = sc->sc_rndis_agg_align;
2674 mutex_exit(&txr->txr_lock);
2675 }
2676 }
2677
2678 static int
2679 hvn_synth_attach(struct hvn_softc *sc, int mtu)
2680 {
2681 uint8_t rss_key[RSS_KEYSIZE];
2682 uint32_t old_caps;
2683 int nchan = 1, nsubch;
2684 int i, error;
2685
2686 if (!hvn_synth_attachable(sc))
2687 return ENXIO;
2688
2689 /* Save capabilities for later verification. */
2690 old_caps = sc->sc_caps;
2691 sc->sc_caps = 0;
2692
2693 /* Clear RSS stuffs. */
2694 sc->sc_rss_ind_size = 0;
2695 sc->sc_rss_hash = 0;
2696 sc->sc_rss_hcap = 0;
2697
2698 /*
2699 * Attach the primary channel _before_ attaching NVS and RNDIS.
2700 */
2701 error = hvn_channel_attach(sc, sc->sc_prichan);
2702 if (error) {
2703 aprint_error_dev(sc->sc_dev,
2704 "failed to attach primary channel\n");
2705 goto failed;
2706 }
2707
2708 /*
2709 * Attach NVS.
2710 */
2711 error = hvn_nvs_attach(sc, mtu);
2712 if (error) {
2713 aprint_error_dev(sc->sc_dev, "failed to init NVSP\n");
2714 goto detach_channel;
2715 }
2716
2717 /*
2718 * Attach RNDIS _after_ NVS is attached.
2719 */
2720 error = hvn_rndis_attach(sc, mtu);
2721 if (error) {
2722 aprint_error_dev(sc->sc_dev, "failed to init RNDIS\n");
2723 goto detach_nvs;
2724 }
2725
2726 error = hvn_set_capabilities(sc, mtu);
2727 if (error) {
2728 aprint_error_dev(sc->sc_dev, "failed to setup offloading\n");
2729 goto detach_rndis;
2730 }
2731
2732 if ((sc->sc_flags & HVN_SCF_ATTACHED) && old_caps != sc->sc_caps) {
2733 device_printf(sc->sc_dev, "caps mismatch "
2734 "old 0x%08x, new 0x%08x\n", old_caps, sc->sc_caps);
2735 error = ENXIO;
2736 goto detach_rndis;
2737 }
2738
2739 /*
2740 * Allocate sub-channels for multi-TX/RX rings.
2741 *
2742 * NOTE:
2743 * The # of RX rings that can be used is equivalent to the # of
2744 * channels to be requested.
2745 */
2746 nsubch = sc->sc_nrxr - 1;
2747 error = hvn_synth_alloc_subchannels(sc, &nsubch);
2748 if (error) {
2749 aprint_error_dev(sc->sc_dev,
2750 "failed to allocate sub channels\n");
2751 goto detach_synth;
2752 }
2753
2754 /*
2755 * Set the # of TX/RX rings that could be used according to
2756 * the # of channels that NVS offered.
2757 */
2758 nchan = nsubch + 1;
2759 hvn_set_ring_inuse(sc, nchan);
2760
2761 if (nchan > 1) {
2762 /*
2763 * Attach the sub-channels.
2764 *
2765 * NOTE: hvn_set_ring_inuse() _must_ have been called.
2766 */
2767 error = hvn_subchannel_attach(sc);
2768 if (error) {
2769 aprint_error_dev(sc->sc_dev,
2770 "failed to attach sub channels\n");
2771 goto detach_synth;
2772 }
2773
2774 /*
2775 * Configure RSS key and indirect table _after_ all sub-channels
2776 * are attached.
2777 */
2778 if (!(sc->sc_flags & HVN_SCF_HAS_RSSKEY)) {
2779 /* Set the default RSS key. */
2780 CTASSERT(sizeof(sc->sc_rss.rss_key) == sizeof(rss_key));
2781 rss_getkey(rss_key);
2782 memcpy(&sc->sc_rss.rss_key, rss_key,
2783 sizeof(sc->sc_rss.rss_key));
2784 sc->sc_flags |= HVN_SCF_HAS_RSSKEY;
2785 }
2786
2787 if (!(sc->sc_flags & HVN_SCF_HAS_RSSIND)) {
2788 /* Setup RSS indirect table in round-robin fashion. */
2789 for (i = 0; i < NDIS_HASH_INDCNT; i++) {
2790 sc->sc_rss.rss_ind[i] = i % nchan;
2791 }
2792 sc->sc_flags |= HVN_SCF_HAS_RSSIND;
2793 } else {
2794 /*
2795 * # of usable channels may be changed, so we have to
2796 * make sure that all entries in RSS indirect table
2797 * are valid.
2798 *
2799 * NOTE: hvn_set_ring_inuse() _must_ have been called.
2800 */
2801 hvn_fixup_rss_ind(sc);
2802 }
2803
2804 sc->sc_rss_hash = sc->sc_rss_hcap;
2805 error = hvn_set_rss(sc, NDIS_RSS_FLAG_NONE);
2806 if (error) {
2807 aprint_error_dev(sc->sc_dev, "failed to setup RSS\n");
2808 goto detach_synth;
2809 }
2810 }
2811
2812 /*
2813 * Fixup transmission aggregation setup.
2814 */
2815 hvn_set_txagg(sc);
2816 hvn_init_fixat(sc, nchan);
2817 return 0;
2818
2819 detach_synth:
2820 hvn_init_fixat(sc, nchan);
2821 hvn_synth_detach(sc);
2822 return error;
2823
2824 detach_rndis:
2825 hvn_init_fixat(sc, nchan);
2826 hvn_rndis_detach(sc);
2827 detach_nvs:
2828 hvn_nvs_detach(sc);
2829 detach_channel:
2830 hvn_channel_detach(sc, sc->sc_prichan);
2831 failed:
2832 /* Restore old capabilities. */
2833 sc->sc_caps = old_caps;
2834 return error;
2835 }
2836
2837 static void
2838 hvn_synth_detach(struct hvn_softc *sc)
2839 {
2840
2841 /* Detach the RNDIS first. */
2842 hvn_rndis_detach(sc);
2843
2844 /* Detach NVS. */
2845 hvn_nvs_detach(sc);
2846
2847 /* Detach all of the channels. */
2848 hvn_channel_detach_all(sc);
2849
2850 if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 &&
2851 sc->sc_rx_hndl) {
2852 /*
2853 * Host is post-Win2016, disconnect RXBUF from primary channel
2854 * here.
2855 */
2856 vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl);
2857 sc->sc_rx_hndl = 0;
2858 }
2859
2860 if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 &&
2861 sc->sc_chim_hndl) {
2862 /*
2863 * Host is post-Win2016, disconnect chimney sending buffer
2864 * from primary channel here.
2865 */
2866 vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl);
2867 sc->sc_chim_hndl = 0;
2868 }
2869 }
2870
2871 static void
2872 hvn_set_ring_inuse(struct hvn_softc *sc, int ring_cnt)
2873 {
2874
2875 if (sc->sc_ntxr > ring_cnt)
2876 sc->sc_ntxr_inuse = ring_cnt;
2877 else
2878 sc->sc_ntxr_inuse = sc->sc_ntxr;
2879 sc->sc_nrxr_inuse = ring_cnt;
2880 }
2881
2882 static void
2883 hvn_channel_drain(struct hvn_softc *sc, struct vmbus_channel *chan)
2884 {
2885 struct hvn_rx_ring *rxr;
2886 int i, s;
2887
2888 for (rxr = NULL, i = 0; i < sc->sc_nrxr_inuse; i++) {
2889 rxr = &sc->sc_rxr[i];
2890 if (rxr->rxr_chan == chan)
2891 break;
2892 }
2893 KASSERT(i < sc->sc_nrxr_inuse);
2894
2895 /*
2896 * NOTE:
2897 * The TX bufring will not be drained by the hypervisor,
2898 * if the primary channel is revoked.
2899 */
2900 while (!vmbus_channel_rx_empty(chan) ||
2901 (!vmbus_channel_is_revoked(sc->sc_prichan) &&
2902 !vmbus_channel_tx_empty(chan))) {
2903 DELAY(20);
2904 s = splnet();
2905 hvn_nvs_intr1(rxr, sc->sc_tx_process_limit,
2906 sc->sc_rx_process_limit);
2907 splx(s);
2908 }
2909
2910 mutex_enter(&rxr->rxr_onwork_lock);
2911 while (rxr->rxr_onlist || rxr->rxr_onproc)
2912 cv_wait(&rxr->rxr_onwork_cv, &rxr->rxr_onwork_lock);
2913 mutex_exit(&rxr->rxr_onwork_lock);
2914 }
2915
2916 static void
2917 hvn_disable_rx(struct hvn_softc *sc)
2918 {
2919
2920 /*
2921 * Disable RX by clearing RX filter forcefully.
2922 */
2923 (void)hvn_rndis_close(sc); /* ignore error */
2924
2925 /*
2926 * Give RNDIS enough time to flush all pending data packets.
2927 */
2928 DELAY(200);
2929 }
2930
2931 static void
2932 hvn_drain_rxtx(struct hvn_softc *sc, int nchan)
2933 {
2934 struct vmbus_channel **subchans = NULL;
2935 int i, nsubch;
2936
2937 /*
2938 * Drain RX/TX bufrings and interrupts.
2939 */
2940 nsubch = nchan - 1;
2941 if (nsubch > 0)
2942 subchans = vmbus_subchannel_get(sc->sc_prichan, nsubch);
2943
2944 if (subchans != NULL) {
2945 for (i = 0; i < nsubch; ++i)
2946 hvn_channel_drain(sc, subchans[i]);
2947 }
2948 hvn_channel_drain(sc, sc->sc_prichan);
2949
2950 if (subchans != NULL)
2951 vmbus_subchannel_rel(subchans, nsubch);
2952 }
2953
2954 static void
2955 hvn_suspend_data(struct hvn_softc *sc)
2956 {
2957 struct hvn_tx_ring *txr;
2958 int i, s;
2959
2960 /*
2961 * Suspend TX.
2962 */
2963 for (i = 0; i < sc->sc_ntxr_inuse; i++) {
2964 txr = &sc->sc_txr[i];
2965
2966 mutex_enter(&txr->txr_lock);
2967 txr->txr_suspended = 1;
2968 mutex_exit(&txr->txr_lock);
2969 /* No one is able send more packets now. */
2970
2971 /*
2972 * Wait for all pending sends to finish.
2973 *
2974 * NOTE:
2975 * We will _not_ receive all pending send-done, if the
2976 * primary channel is revoked.
2977 */
2978 while (hvn_tx_ring_pending(txr) &&
2979 !vmbus_channel_is_revoked(sc->sc_prichan)) {
2980 DELAY(20);
2981 s = splnet();
2982 hvn_nvs_intr1(txr->txr_rxr, sc->sc_tx_process_limit,
2983 sc->sc_rx_process_limit);
2984 splx(s);
2985 }
2986 }
2987
2988 /*
2989 * Disable RX.
2990 */
2991 hvn_disable_rx(sc);
2992
2993 /*
2994 * Drain RX/TX.
2995 */
2996 hvn_drain_rxtx(sc, sc->sc_nrxr_inuse);
2997 }
2998
2999 static void
3000 hvn_suspend_mgmt(struct hvn_softc *sc)
3001 {
3002
3003 sc->sc_link_suspend = true;
3004 callout_halt(&sc->sc_link_tmout, NULL);
3005
3006 /* Drain link state task */
3007 mutex_enter(&sc->sc_link_lock);
3008 for (;;) {
3009 if (!sc->sc_link_onproc)
3010 break;
3011 mutex_exit(&sc->sc_link_lock);
3012 DELAY(20);
3013 mutex_enter(&sc->sc_link_lock);
3014 }
3015 mutex_exit(&sc->sc_link_lock);
3016 }
3017
3018 static void
3019 hvn_suspend(struct hvn_softc *sc)
3020 {
3021 struct ifnet *ifp = SC2IFP(sc);
3022
3023 if (ifp->if_flags & IFF_RUNNING)
3024 hvn_suspend_data(sc);
3025 hvn_suspend_mgmt(sc);
3026 }
3027
3028 static void
3029 hvn_resume_tx(struct hvn_softc *sc, int ring_cnt)
3030 {
3031 struct hvn_tx_ring *txr;
3032 int i;
3033
3034 for (i = 0; i < ring_cnt; i++) {
3035 txr = &sc->sc_txr[i];
3036 mutex_enter(&txr->txr_lock);
3037 txr->txr_suspended = 0;
3038 mutex_exit(&txr->txr_lock);
3039 }
3040 }
3041
3042 static void
3043 hvn_resume_data(struct hvn_softc *sc)
3044 {
3045 struct ifnet *ifp = SC2IFP(sc);
3046 struct hvn_tx_ring *txr;
3047 int i;
3048
3049 /*
3050 * Re-enable RX.
3051 */
3052 hvn_rndis_open(sc);
3053
3054 /*
3055 * Make sure to clear suspend status on "all" TX rings,
3056 * since sc_ntxr_inuse can be changed after hvn_suspend_data().
3057 */
3058 hvn_resume_tx(sc, sc->sc_ntxr);
3059
3060 /*
3061 * Flush unused mbuf, since sc_ntxr_inuse may be reduced.
3062 */
3063 for (i = sc->sc_ntxr_inuse; i < sc->sc_ntxr; i++)
3064 hvn_tx_ring_qflush(sc, &sc->sc_txr[i]);
3065
3066 /*
3067 * Kick start TX.
3068 */
3069 for (i = 0; i < sc->sc_ntxr_inuse; i++) {
3070 txr = &sc->sc_txr[i];
3071 mutex_enter(&txr->txr_lock);
3072 txr->txr_oactive = 0;
3073
3074 /* ALTQ */
3075 if (txr->txr_id == 0)
3076 if_schedule_deferred_start(ifp);
3077 softint_schedule(txr->txr_si);
3078 mutex_exit(&txr->txr_lock);
3079 }
3080 }
3081
3082 static void
3083 hvn_resume_mgmt(struct hvn_softc *sc)
3084 {
3085
3086 sc->sc_link_suspend = false;
3087 hvn_link_event(sc, HVN_LINK_EV_RESUME_NETWORK);
3088 }
3089
3090 static void
3091 hvn_resume(struct hvn_softc *sc)
3092 {
3093 struct ifnet *ifp = SC2IFP(sc);
3094
3095 if (ifp->if_flags & IFF_RUNNING)
3096 hvn_resume_data(sc);
3097 hvn_resume_mgmt(sc);
3098 }
3099
3100 static int
3101 hvn_nvs_init(struct hvn_softc *sc)
3102 {
3103
3104 mutex_init(&sc->sc_nvsrsp_lock, MUTEX_DEFAULT, IPL_NET);
3105 cv_init(&sc->sc_nvsrsp_cv, "nvsrspcv");
3106
3107 return 0;
3108 }
3109
3110 static void
3111 hvn_nvs_destroy(struct hvn_softc *sc)
3112 {
3113
3114 mutex_destroy(&sc->sc_nvsrsp_lock);
3115 cv_destroy(&sc->sc_nvsrsp_cv);
3116 }
3117
3118 static int
3119 hvn_nvs_doinit(struct hvn_softc *sc, uint32_t proto)
3120 {
3121 struct hvn_nvs_init cmd;
3122 struct hvn_nvs_init_resp *rsp;
3123 uint64_t tid;
3124 int error;
3125
3126 memset(&cmd, 0, sizeof(cmd));
3127 cmd.nvs_type = HVN_NVS_TYPE_INIT;
3128 cmd.nvs_ver_min = cmd.nvs_ver_max = proto;
3129
3130 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3131 mutex_enter(&sc->sc_nvsrsp_lock);
3132 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3133 if (error == 0) {
3134 rsp = (struct hvn_nvs_init_resp *)&sc->sc_nvsrsp;
3135 if (rsp->nvs_status != HVN_NVS_STATUS_OK)
3136 error = EINVAL;
3137 }
3138 mutex_exit(&sc->sc_nvsrsp_lock);
3139
3140 return error;
3141 }
3142
3143 static int
3144 hvn_nvs_conf_ndis(struct hvn_softc *sc, int mtu)
3145 {
3146 struct hvn_nvs_ndis_conf cmd;
3147 uint64_t tid;
3148 int error;
3149
3150 memset(&cmd, 0, sizeof(cmd));
3151 cmd.nvs_type = HVN_NVS_TYPE_NDIS_CONF;
3152 cmd.nvs_mtu = mtu + ETHER_HDR_LEN;
3153 cmd.nvs_caps = HVN_NVS_NDIS_CONF_VLAN;
3154
3155 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3156 mutex_enter(&sc->sc_nvsrsp_lock);
3157 /* NOTE: No response. */
3158 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3159 mutex_exit(&sc->sc_nvsrsp_lock);
3160
3161 if (error == 0)
3162 sc->sc_caps |= HVN_CAPS_MTU | HVN_CAPS_VLAN;
3163 return error;
3164 }
3165
3166 static int
3167 hvn_nvs_init_ndis(struct hvn_softc *sc)
3168 {
3169 struct hvn_nvs_ndis_init cmd;
3170 uint64_t tid;
3171 int error;
3172
3173 memset(&cmd, 0, sizeof(cmd));
3174 cmd.nvs_type = HVN_NVS_TYPE_NDIS_INIT;
3175 cmd.nvs_ndis_major = (sc->sc_ndisver & 0xffff0000) >> 16;
3176 cmd.nvs_ndis_minor = sc->sc_ndisver & 0x0000ffff;
3177
3178 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3179 mutex_enter(&sc->sc_nvsrsp_lock);
3180 /* NOTE: No response. */
3181 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3182 mutex_exit(&sc->sc_nvsrsp_lock);
3183
3184 return error;
3185 }
3186
3187 static int
3188 hvn_nvs_attach(struct hvn_softc *sc, int mtu)
3189 {
3190 static const uint32_t protos[] = {
3191 HVN_NVS_PROTO_VERSION_5,
3192 HVN_NVS_PROTO_VERSION_4,
3193 HVN_NVS_PROTO_VERSION_2,
3194 HVN_NVS_PROTO_VERSION_1
3195 };
3196 int i;
3197
3198 if (hyperv_ver_major >= 10)
3199 sc->sc_caps |= HVN_CAPS_UDPHASH;
3200
3201 /*
3202 * Initialize NVS.
3203 */
3204 if (sc->sc_flags & HVN_SCF_ATTACHED) {
3205 /*
3206 * NVS version and NDIS version MUST NOT be changed.
3207 */
3208 DPRINTF("%s: reinit NVS version %#x, NDIS version %u.%u\n",
3209 device_xname(sc->sc_dev), sc->sc_proto,
3210 (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff);
3211
3212 if (hvn_nvs_doinit(sc, sc->sc_proto)) {
3213 DPRINTF("%s: failed to reinit NVSP version %#x\n",
3214 device_xname(sc->sc_dev), sc->sc_proto);
3215 return -1;
3216 }
3217 } else {
3218 /*
3219 * Find the supported NVS version and set NDIS version
3220 * accordingly.
3221 */
3222 for (i = 0; i < __arraycount(protos); i++) {
3223 if (hvn_nvs_doinit(sc, protos[i]) == 0)
3224 break;
3225 }
3226 if (i == __arraycount(protos)) {
3227 DPRINTF("%s: failed to negotiate NVSP version\n",
3228 device_xname(sc->sc_dev));
3229 return -1;
3230 }
3231
3232 sc->sc_proto = protos[i];
3233 if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_4)
3234 sc->sc_ndisver = NDIS_VERSION_6_1;
3235 else
3236 sc->sc_ndisver = NDIS_VERSION_6_30;
3237
3238 DPRINTF("%s: NVS version %#x, NDIS version %u.%u\n",
3239 device_xname(sc->sc_dev), sc->sc_proto,
3240 (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff);
3241 }
3242
3243 if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_5)
3244 sc->sc_caps |= HVN_CAPS_HASHVAL;
3245
3246 if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_2) {
3247 /*
3248 * Configure NDIS before initializing it.
3249 */
3250 if (hvn_nvs_conf_ndis(sc, mtu))
3251 return -1;
3252 }
3253
3254 /*
3255 * Initialize NDIS.
3256 */
3257 if (hvn_nvs_init_ndis(sc))
3258 return -1;
3259
3260 /*
3261 * Connect RXBUF.
3262 */
3263 if (hvn_nvs_connect_rxbuf(sc))
3264 return -1;
3265
3266 /*
3267 * Connect chimney sending buffer.
3268 */
3269 if (hvn_nvs_connect_chim(sc))
3270 return -1;
3271
3272 return 0;
3273 }
3274
3275 static int
3276 hvn_nvs_connect_rxbuf(struct hvn_softc *sc)
3277 {
3278 struct hvn_nvs_rxbuf_conn cmd;
3279 struct hvn_nvs_rxbuf_conn_resp *rsp;
3280 uint64_t tid;
3281
3282 if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_rx_dma, sc->sc_rx_size,
3283 &sc->sc_rx_hndl)) {
3284 DPRINTF("%s: failed to obtain a PA handle\n",
3285 device_xname(sc->sc_dev));
3286 return -1;
3287 }
3288
3289 memset(&cmd, 0, sizeof(cmd));
3290 cmd.nvs_type = HVN_NVS_TYPE_RXBUF_CONN;
3291 cmd.nvs_gpadl = sc->sc_rx_hndl;
3292 cmd.nvs_sig = HVN_NVS_RXBUF_SIG;
3293
3294 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3295 mutex_enter(&sc->sc_nvsrsp_lock);
3296 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0))
3297 goto errout;
3298
3299 rsp = (struct hvn_nvs_rxbuf_conn_resp *)&sc->sc_nvsrsp;
3300 if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3301 DPRINTF("%s: failed to set up the Rx ring\n",
3302 device_xname(sc->sc_dev));
3303 goto errout;
3304 }
3305
3306 SET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED);
3307
3308 if (rsp->nvs_nsect > 1) {
3309 DPRINTF("%s: invalid number of Rx ring sections: %u\n",
3310 device_xname(sc->sc_dev), rsp->nvs_nsect);
3311 goto errout;
3312 }
3313 mutex_exit(&sc->sc_nvsrsp_lock);
3314
3315 return 0;
3316
3317 errout:
3318 mutex_exit(&sc->sc_nvsrsp_lock);
3319 hvn_nvs_disconnect_rxbuf(sc);
3320 return -1;
3321 }
3322
3323 static int
3324 hvn_nvs_disconnect_rxbuf(struct hvn_softc *sc)
3325 {
3326 struct hvn_nvs_rxbuf_disconn cmd;
3327 uint64_t tid;
3328 int s, error;
3329
3330 if (ISSET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED)) {
3331 memset(&cmd, 0, sizeof(cmd));
3332 cmd.nvs_type = HVN_NVS_TYPE_RXBUF_DISCONN;
3333 cmd.nvs_sig = HVN_NVS_RXBUF_SIG;
3334
3335 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3336 mutex_enter(&sc->sc_nvsrsp_lock);
3337 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid,
3338 HVN_NVS_CMD_NORESP);
3339 if (error) {
3340 device_printf(sc->sc_dev,
3341 "failed to send rxbuf disconn: %d", error);
3342 }
3343 CLR(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED);
3344 mutex_exit(&sc->sc_nvsrsp_lock);
3345
3346 /*
3347 * Wait for the hypervisor to receive this NVS request.
3348 *
3349 * NOTE:
3350 * The TX bufring will not be drained by the hypervisor,
3351 * if the primary channel is revoked.
3352 */
3353 while (!vmbus_channel_tx_empty(sc->sc_prichan) &&
3354 !vmbus_channel_is_revoked(sc->sc_prichan)) {
3355 DELAY(20);
3356 s = splnet();
3357 hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit,
3358 sc->sc_rx_process_limit);
3359 splx(s);
3360 }
3361 /*
3362 * Linger long enough for NVS to disconnect RXBUF.
3363 */
3364 DELAY(200);
3365 }
3366
3367 if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 &&
3368 sc->sc_rx_hndl) {
3369 /*
3370 * Disconnect RXBUF from primary channel.
3371 */
3372 vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl);
3373 sc->sc_rx_hndl = 0;
3374 }
3375
3376 return 0;
3377 }
3378
3379 static int
3380 hvn_nvs_connect_chim(struct hvn_softc *sc)
3381 {
3382 struct hvn_nvs_chim_conn cmd;
3383 const struct hvn_nvs_chim_conn_resp *rsp;
3384 uint64_t tid;
3385
3386 mutex_init(&sc->sc_chim_bmap_lock, MUTEX_DEFAULT, IPL_NET);
3387
3388 /*
3389 * Connect chimney sending buffer GPADL to the primary channel.
3390 *
3391 * NOTE:
3392 * Only primary channel has chimney sending buffer connected to it.
3393 * Sub-channels just share this chimney sending buffer.
3394 */
3395 if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_chim_dma, HVN_CHIM_SIZE,
3396 &sc->sc_chim_hndl)) {
3397 DPRINTF("%s: failed to obtain a PA handle for chimney\n",
3398 device_xname(sc->sc_dev));
3399 return -1;
3400 }
3401
3402 memset(&cmd, 0, sizeof(cmd));
3403 cmd.nvs_type = HVN_NVS_TYPE_CHIM_CONN;
3404 cmd.nvs_gpadl = sc->sc_chim_hndl;
3405 cmd.nvs_sig = HVN_NVS_CHIM_SIG;
3406
3407 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3408 mutex_enter(&sc->sc_nvsrsp_lock);
3409 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0))
3410 goto errout;
3411
3412 rsp = (struct hvn_nvs_chim_conn_resp *)&sc->sc_nvsrsp;
3413 if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3414 DPRINTF("%s: failed to set up chimney sending buffer\n",
3415 device_xname(sc->sc_dev));
3416 goto errout;
3417 }
3418
3419 if (rsp->nvs_sectsz == 0 ||
3420 (rsp->nvs_sectsz % sizeof(uint32_t)) != 0) {
3421 /*
3422 * Can't use chimney sending buffer; done!
3423 */
3424 if (rsp->nvs_sectsz == 0) {
3425 device_printf(sc->sc_dev,
3426 "zero chimney sending buffer section size\n");
3427 } else {
3428 device_printf(sc->sc_dev,
3429 "misaligned chimney sending buffers,"
3430 " section size: %d", rsp->nvs_sectsz);
3431 }
3432 sc->sc_chim_szmax = 0;
3433 sc->sc_chim_cnt = 0;
3434 } else {
3435 sc->sc_chim_szmax = rsp->nvs_sectsz;
3436 sc->sc_chim_cnt = HVN_CHIM_SIZE / sc->sc_chim_szmax;
3437 }
3438
3439 if (sc->sc_chim_szmax > 0) {
3440 if ((HVN_CHIM_SIZE % sc->sc_chim_szmax) != 0) {
3441 device_printf(sc->sc_dev,
3442 "chimney sending sections are not properly "
3443 "aligned\n");
3444 }
3445 if ((sc->sc_chim_cnt % LONG_BIT) != 0) {
3446 device_printf(sc->sc_dev,
3447 "discard %d chimney sending sections\n",
3448 sc->sc_chim_cnt % LONG_BIT);
3449 }
3450
3451 sc->sc_chim_bmap_cnt = sc->sc_chim_cnt / LONG_BIT;
3452 sc->sc_chim_bmap = kmem_zalloc(sc->sc_chim_bmap_cnt *
3453 sizeof(u_long), KM_SLEEP);
3454 }
3455
3456 /* Done! */
3457 SET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED);
3458
3459 aprint_verbose_dev(sc->sc_dev, "chimney sending buffer %d/%d\n",
3460 sc->sc_chim_szmax, sc->sc_chim_cnt);
3461
3462 mutex_exit(&sc->sc_nvsrsp_lock);
3463
3464 return 0;
3465
3466 errout:
3467 mutex_exit(&sc->sc_nvsrsp_lock);
3468 hvn_nvs_disconnect_chim(sc);
3469 return -1;
3470 }
3471
3472 static int
3473 hvn_nvs_disconnect_chim(struct hvn_softc *sc)
3474 {
3475 struct hvn_nvs_chim_disconn cmd;
3476 uint64_t tid;
3477 int s, error;
3478
3479 if (ISSET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED)) {
3480 memset(&cmd, 0, sizeof(cmd));
3481 cmd.nvs_type = HVN_NVS_TYPE_CHIM_DISCONN;
3482 cmd.nvs_sig = HVN_NVS_CHIM_SIG;
3483
3484 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3485 mutex_enter(&sc->sc_nvsrsp_lock);
3486 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid,
3487 HVN_NVS_CMD_NORESP);
3488 if (error) {
3489 device_printf(sc->sc_dev,
3490 "failed to send chim disconn: %d", error);
3491 }
3492 CLR(sc->sc_flags, HVN_SCF_CHIM_CONNECTED);
3493 mutex_exit(&sc->sc_nvsrsp_lock);
3494
3495 /*
3496 * Wait for the hypervisor to receive this NVS request.
3497 *
3498 * NOTE:
3499 * The TX bufring will not be drained by the hypervisor,
3500 * if the primary channel is revoked.
3501 */
3502 while (!vmbus_channel_tx_empty(sc->sc_prichan) &&
3503 !vmbus_channel_is_revoked(sc->sc_prichan)) {
3504 DELAY(20);
3505 s = splnet();
3506 hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit,
3507 sc->sc_rx_process_limit);
3508 splx(s);
3509 }
3510 /*
3511 * Linger long enough for NVS to disconnect chimney
3512 * sending buffer.
3513 */
3514 DELAY(200);
3515 }
3516
3517 if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 &&
3518 sc->sc_chim_hndl) {
3519 /*
3520 * Disconnect chimney sending buffer from primary channel.
3521 */
3522 vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl);
3523 sc->sc_chim_hndl = 0;
3524 }
3525
3526 if (sc->sc_chim_bmap != NULL) {
3527 kmem_free(sc->sc_chim_bmap, sc->sc_chim_cnt / LONG_BIT);
3528 sc->sc_chim_bmap = NULL;
3529 sc->sc_chim_bmap_cnt = 0;
3530 }
3531
3532 mutex_destroy(&sc->sc_chim_bmap_lock);
3533
3534 return 0;
3535 }
3536
3537 #define HVN_HANDLE_RING_DOTX __BIT(0)
3538
3539 static int
3540 hvn_handle_ring(struct hvn_rx_ring *rxr, int txlimit, int rxlimit)
3541 {
3542 struct hvn_softc *sc = rxr->rxr_softc;
3543 struct vmbus_chanpkt_hdr *cph;
3544 const struct hvn_nvs_hdr *nvs;
3545 uint64_t rid;
3546 uint32_t rlen;
3547 int n, tx = 0, rx = 0;
3548 int result = 0;
3549 int rv;
3550
3551 mutex_enter(&rxr->rxr_lock);
3552 for (;;) {
3553 rv = vmbus_channel_recv(rxr->rxr_chan, rxr->rxr_nvsbuf,
3554 HVN_NVS_BUFSIZE, &rlen, &rid, 1);
3555 if (rv != 0 || rlen == 0) {
3556 if (rv != EAGAIN)
3557 device_printf(sc->sc_dev,
3558 "failed to receive an NVSP packet\n");
3559 break;
3560 }
3561 cph = (struct vmbus_chanpkt_hdr *)rxr->rxr_nvsbuf;
3562 nvs = (const struct hvn_nvs_hdr *)VMBUS_CHANPKT_CONST_DATA(cph);
3563
3564 if (cph->cph_type == VMBUS_CHANPKT_TYPE_COMP) {
3565 switch (nvs->nvs_type) {
3566 case HVN_NVS_TYPE_INIT_RESP:
3567 case HVN_NVS_TYPE_RXBUF_CONNRESP:
3568 case HVN_NVS_TYPE_CHIM_CONNRESP:
3569 case HVN_NVS_TYPE_SUBCH_RESP:
3570 mutex_enter(&sc->sc_nvsrsp_lock);
3571 /* copy the response back */
3572 memcpy(&sc->sc_nvsrsp, nvs, HVN_NVS_MSGSIZE);
3573 sc->sc_nvsdone = 1;
3574 cv_signal(&sc->sc_nvsrsp_cv);
3575 mutex_exit(&sc->sc_nvsrsp_lock);
3576 break;
3577 case HVN_NVS_TYPE_RNDIS_ACK:
3578 if (rxr->rxr_txr == NULL)
3579 break;
3580
3581 result |= HVN_HANDLE_RING_DOTX;
3582 mutex_enter(&rxr->rxr_txr->txr_lock);
3583 hvn_txeof(rxr->rxr_txr, cph->cph_tid);
3584 mutex_exit(&rxr->rxr_txr->txr_lock);
3585 if (txlimit > 0 && ++tx >= txlimit)
3586 goto out;
3587 break;
3588 default:
3589 device_printf(sc->sc_dev,
3590 "unhandled NVSP packet type %u "
3591 "on completion\n", nvs->nvs_type);
3592 break;
3593 }
3594 } else if (cph->cph_type == VMBUS_CHANPKT_TYPE_RXBUF) {
3595 switch (nvs->nvs_type) {
3596 case HVN_NVS_TYPE_RNDIS:
3597 n = hvn_rndis_input(rxr, cph->cph_tid, cph);
3598 if (rxlimit > 0) {
3599 if (n < 0)
3600 goto out;
3601 rx += n;
3602 if (rx >= rxlimit)
3603 goto out;
3604 }
3605 break;
3606 default:
3607 device_printf(sc->sc_dev,
3608 "unhandled NVSP packet type %u "
3609 "on receive\n", nvs->nvs_type);
3610 break;
3611 }
3612 } else if (cph->cph_type == VMBUS_CHANPKT_TYPE_INBAND) {
3613 switch (nvs->nvs_type) {
3614 case HVN_NVS_TYPE_TXTBL_NOTE:
3615 /* Useless; ignore */
3616 break;
3617 default:
3618 device_printf(sc->sc_dev,
3619 "got notify, nvs type %u\n", nvs->nvs_type);
3620 break;
3621 }
3622 } else
3623 device_printf(sc->sc_dev,
3624 "unknown NVSP packet type %u\n", cph->cph_type);
3625 }
3626 out:
3627 mutex_exit(&rxr->rxr_lock);
3628
3629 return result;
3630 }
3631
3632 static void
3633 hvn_nvs_intr1(struct hvn_rx_ring *rxr, int txlimit, int rxlimit)
3634 {
3635 struct hvn_softc *sc = rxr->rxr_softc;
3636 struct ifnet *ifp = SC2IFP(sc);
3637 struct hvn_tx_ring *txr = rxr->rxr_txr;
3638 int result;
3639
3640 rxr->rxr_workqueue = sc->sc_txrx_workqueue;
3641
3642 result = hvn_handle_ring(rxr, txlimit, rxlimit);
3643
3644 if ((result & HVN_HANDLE_RING_DOTX) && txr != NULL) {
3645 mutex_enter(&txr->txr_lock);
3646 /* ALTQ */
3647 if (txr->txr_id == 0) {
3648 ifp->if_flags &= ~IFF_OACTIVE;
3649 if_schedule_deferred_start(ifp);
3650 }
3651 softint_schedule(txr->txr_si);
3652 mutex_exit(&txr->txr_lock);
3653 }
3654 }
3655
3656 static void
3657 hvn_schedule_handle_ring(struct hvn_softc *sc, struct hvn_rx_ring *rxr,
3658 bool intr)
3659 {
3660
3661 KASSERT(mutex_owned(&rxr->rxr_onwork_lock));
3662
3663 if (rxr->rxr_workqueue) {
3664 if (!rxr->rxr_onlist) {
3665 rxr->rxr_onlist = true;
3666 if (intr)
3667 rxr->rxr_evdeferreq.ev_count++;
3668 else
3669 rxr->rxr_evredeferreq.ev_count++;
3670 workqueue_enqueue(sc->sc_wq, &rxr->rxr_wk, NULL);
3671 }
3672 } else {
3673 rxr->rxr_onlist = true;
3674 if (intr)
3675 rxr->rxr_evdeferreq.ev_count++;
3676 else
3677 rxr->rxr_evredeferreq.ev_count++;
3678 softint_schedule(rxr->rxr_si);
3679 }
3680 }
3681
3682 static void
3683 hvn_handle_ring_common(struct hvn_rx_ring *rxr)
3684 {
3685 struct hvn_softc *sc = rxr->rxr_softc;
3686 int txlimit = sc->sc_tx_process_limit;
3687 int rxlimit = sc->sc_rx_process_limit;
3688
3689 rxr->rxr_evdefer.ev_count++;
3690
3691 mutex_enter(&rxr->rxr_onwork_lock);
3692 rxr->rxr_onproc = true;
3693 rxr->rxr_onlist = false;
3694 mutex_exit(&rxr->rxr_onwork_lock);
3695
3696 hvn_nvs_intr1(rxr, txlimit, rxlimit);
3697
3698 mutex_enter(&rxr->rxr_onwork_lock);
3699 if (vmbus_channel_unpause(rxr->rxr_chan)) {
3700 vmbus_channel_pause(rxr->rxr_chan);
3701 hvn_schedule_handle_ring(sc, rxr, false);
3702 }
3703 rxr->rxr_onproc = false;
3704 cv_broadcast(&rxr->rxr_onwork_cv);
3705 mutex_exit(&rxr->rxr_onwork_lock);
3706 }
3707
3708 static void
3709 hvn_handle_ring_work(struct work *wk, void *arg)
3710 {
3711 struct hvn_rx_ring *rxr = container_of(wk, struct hvn_rx_ring, rxr_wk);
3712
3713 hvn_handle_ring_common(rxr);
3714 }
3715
3716 static void
3717 hvn_nvs_softintr(void *arg)
3718 {
3719 struct hvn_rx_ring *rxr = arg;
3720
3721 hvn_handle_ring_common(rxr);
3722 }
3723
3724 static void
3725 hvn_nvs_intr(void *arg)
3726 {
3727 struct hvn_rx_ring *rxr = arg;
3728 struct hvn_softc *sc = rxr->rxr_softc;
3729 int txlimit = cold ? 0 : sc->sc_tx_intr_process_limit;
3730 int rxlimit = cold ? 0 : sc->sc_rx_intr_process_limit;
3731
3732 rxr->rxr_evintr.ev_count++;
3733
3734 KASSERT(!rxr->rxr_onproc);
3735 KASSERT(!rxr->rxr_onlist);
3736
3737 vmbus_channel_pause(rxr->rxr_chan);
3738
3739 hvn_nvs_intr1(rxr, txlimit, rxlimit);
3740
3741 if (vmbus_channel_unpause(rxr->rxr_chan) && !cold) {
3742 vmbus_channel_pause(rxr->rxr_chan);
3743 mutex_enter(&rxr->rxr_onwork_lock);
3744 hvn_schedule_handle_ring(sc, rxr, true);
3745 mutex_exit(&rxr->rxr_onwork_lock);
3746 }
3747 }
3748
3749 static int
3750 hvn_nvs_cmd(struct hvn_softc *sc, void *cmd, size_t cmdsize, uint64_t tid,
3751 u_int flags)
3752 {
3753 struct hvn_rx_ring *rxr = &sc->sc_rxr[0]; /* primary channel */
3754 struct hvn_nvs_hdr *hdr = cmd;
3755 int tries = 10;
3756 int rv, s;
3757
3758 KASSERT(mutex_owned(&sc->sc_nvsrsp_lock));
3759
3760 sc->sc_nvsdone = 0;
3761
3762 do {
3763 rv = vmbus_channel_send(rxr->rxr_chan, cmd, cmdsize,
3764 tid, VMBUS_CHANPKT_TYPE_INBAND,
3765 ISSET(flags, HVN_NVS_CMD_NORESP) ? 0 :
3766 VMBUS_CHANPKT_FLAG_RC);
3767 if (rv == EAGAIN) {
3768 DELAY(1000);
3769 } else if (rv) {
3770 DPRINTF("%s: NVSP operation %u send error %d\n",
3771 device_xname(sc->sc_dev), hdr->nvs_type, rv);
3772 return rv;
3773 }
3774 } while (rv != 0 && --tries > 0);
3775
3776 if (tries == 0 && rv != 0) {
3777 device_printf(sc->sc_dev,
3778 "NVSP operation %u send error %d\n", hdr->nvs_type, rv);
3779 return rv;
3780 }
3781
3782 if (ISSET(flags, HVN_NVS_CMD_NORESP))
3783 return 0;
3784
3785 while (!sc->sc_nvsdone && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) {
3786 mutex_exit(&sc->sc_nvsrsp_lock);
3787 DELAY(1000);
3788 s = splnet();
3789 hvn_nvs_intr1(rxr, 0, 0);
3790 splx(s);
3791 mutex_enter(&sc->sc_nvsrsp_lock);
3792 }
3793
3794 return 0;
3795 }
3796
3797 static int
3798 hvn_nvs_ack(struct hvn_rx_ring *rxr, uint64_t tid)
3799 {
3800 struct hvn_softc *sc __unused = rxr->rxr_softc;
3801 struct hvn_nvs_rndis_ack cmd;
3802 int tries = 5;
3803 int rv;
3804
3805 cmd.nvs_type = HVN_NVS_TYPE_RNDIS_ACK;
3806 cmd.nvs_status = HVN_NVS_STATUS_OK;
3807 do {
3808 rv = vmbus_channel_send(rxr->rxr_chan, &cmd, sizeof(cmd),
3809 tid, VMBUS_CHANPKT_TYPE_COMP, 0);
3810 if (rv == EAGAIN)
3811 DELAY(10);
3812 else if (rv) {
3813 DPRINTF("%s: NVSP acknowledgement error %d\n",
3814 device_xname(sc->sc_dev), rv);
3815 return rv;
3816 }
3817 } while (rv != 0 && --tries > 0);
3818 return rv;
3819 }
3820
3821 static void
3822 hvn_nvs_detach(struct hvn_softc *sc)
3823 {
3824
3825 hvn_nvs_disconnect_rxbuf(sc);
3826 hvn_nvs_disconnect_chim(sc);
3827 }
3828
3829 static int
3830 hvn_nvs_alloc_subchannels(struct hvn_softc *sc, int *nsubchp)
3831 {
3832 struct hvn_nvs_subch_req cmd;
3833 struct hvn_nvs_subch_resp *rsp;
3834 uint64_t tid;
3835 int nsubch, nsubch_req;
3836
3837 nsubch_req = *nsubchp;
3838 KASSERTMSG(nsubch_req > 0, "invalid # of sub-channels %d", nsubch_req);
3839
3840 memset(&cmd, 0, sizeof(cmd));
3841 cmd.nvs_type = HVN_NVS_TYPE_SUBCH_REQ;
3842 cmd.nvs_op = HVN_NVS_SUBCH_OP_ALLOC;
3843 cmd.nvs_nsubch = nsubch_req;
3844
3845 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3846 mutex_enter(&sc->sc_nvsrsp_lock);
3847 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0)) {
3848 mutex_exit(&sc->sc_nvsrsp_lock);
3849 return EIO;
3850 }
3851
3852 rsp = (struct hvn_nvs_subch_resp *)&sc->sc_nvsrsp;
3853 if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3854 mutex_exit(&sc->sc_nvsrsp_lock);
3855 DPRINTF("%s: failed to alloc sub-channels\n",
3856 device_xname(sc->sc_dev));
3857 return EIO;
3858 }
3859
3860 nsubch = rsp->nvs_nsubch;
3861 if (nsubch > nsubch_req) {
3862 aprint_debug_dev(sc->sc_dev,
3863 "%u subchans are allocated, requested %d\n",
3864 nsubch, nsubch_req);
3865 nsubch = nsubch_req;
3866 }
3867 mutex_exit(&sc->sc_nvsrsp_lock);
3868
3869 *nsubchp = nsubch;
3870
3871 return 0;
3872 }
3873
3874 static inline struct rndis_cmd *
3875 hvn_alloc_cmd(struct hvn_softc *sc)
3876 {
3877 struct rndis_cmd *rc;
3878
3879 mutex_enter(&sc->sc_cntl_fqlck);
3880 while ((rc = TAILQ_FIRST(&sc->sc_cntl_fq)) == NULL)
3881 cv_wait(&sc->sc_cntl_fqcv, &sc->sc_cntl_fqlck);
3882 TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry);
3883 mutex_exit(&sc->sc_cntl_fqlck);
3884 return rc;
3885 }
3886
3887 static inline void
3888 hvn_submit_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3889 {
3890
3891 mutex_enter(&sc->sc_cntl_sqlck);
3892 TAILQ_INSERT_TAIL(&sc->sc_cntl_sq, rc, rc_entry);
3893 mutex_exit(&sc->sc_cntl_sqlck);
3894 }
3895
3896 static inline struct rndis_cmd *
3897 hvn_complete_cmd(struct hvn_softc *sc, uint32_t id)
3898 {
3899 struct rndis_cmd *rc;
3900
3901 mutex_enter(&sc->sc_cntl_sqlck);
3902 TAILQ_FOREACH(rc, &sc->sc_cntl_sq, rc_entry) {
3903 if (rc->rc_id == id) {
3904 TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry);
3905 break;
3906 }
3907 }
3908 mutex_exit(&sc->sc_cntl_sqlck);
3909 if (rc != NULL) {
3910 mutex_enter(&sc->sc_cntl_cqlck);
3911 TAILQ_INSERT_TAIL(&sc->sc_cntl_cq, rc, rc_entry);
3912 mutex_exit(&sc->sc_cntl_cqlck);
3913 }
3914 return rc;
3915 }
3916
3917 static inline void
3918 hvn_release_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3919 {
3920
3921 mutex_enter(&sc->sc_cntl_cqlck);
3922 TAILQ_REMOVE(&sc->sc_cntl_cq, rc, rc_entry);
3923 mutex_exit(&sc->sc_cntl_cqlck);
3924 }
3925
3926 static inline int
3927 hvn_rollback_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3928 {
3929 struct rndis_cmd *rn;
3930
3931 mutex_enter(&sc->sc_cntl_sqlck);
3932 TAILQ_FOREACH(rn, &sc->sc_cntl_sq, rc_entry) {
3933 if (rn == rc) {
3934 TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry);
3935 mutex_exit(&sc->sc_cntl_sqlck);
3936 return 0;
3937 }
3938 }
3939 mutex_exit(&sc->sc_cntl_sqlck);
3940 return -1;
3941 }
3942
3943 static inline void
3944 hvn_free_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3945 {
3946
3947 memset(rc->rc_req, 0, sizeof(struct rndis_packet_msg));
3948 memset(&rc->rc_cmp, 0, sizeof(rc->rc_cmp));
3949 memset(&rc->rc_msg, 0, sizeof(rc->rc_msg));
3950 mutex_enter(&sc->sc_cntl_fqlck);
3951 TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry);
3952 cv_signal(&sc->sc_cntl_fqcv);
3953 mutex_exit(&sc->sc_cntl_fqlck);
3954 }
3955
3956 static int
3957 hvn_rndis_init(struct hvn_softc *sc)
3958 {
3959 struct rndis_cmd *rc;
3960 int i;
3961
3962 /* RNDIS control message queues */
3963 TAILQ_INIT(&sc->sc_cntl_sq);
3964 TAILQ_INIT(&sc->sc_cntl_cq);
3965 TAILQ_INIT(&sc->sc_cntl_fq);
3966 mutex_init(&sc->sc_cntl_sqlck, MUTEX_DEFAULT, IPL_NET);
3967 mutex_init(&sc->sc_cntl_cqlck, MUTEX_DEFAULT, IPL_NET);
3968 mutex_init(&sc->sc_cntl_fqlck, MUTEX_DEFAULT, IPL_NET);
3969 cv_init(&sc->sc_cntl_fqcv, "nvsalloc");
3970
3971 for (i = 0; i < HVN_RNDIS_CTLREQS; i++) {
3972 rc = &sc->sc_cntl_msgs[i];
3973 if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
3974 BUS_DMA_WAITOK, &rc->rc_dmap)) {
3975 DPRINTF("%s: failed to create RNDIS command map\n",
3976 device_xname(sc->sc_dev));
3977 goto errout;
3978 }
3979 if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE,
3980 0, &rc->rc_segs, 1, &rc->rc_nsegs, BUS_DMA_WAITOK)) {
3981 DPRINTF("%s: failed to allocate RNDIS command\n",
3982 device_xname(sc->sc_dev));
3983 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3984 goto errout;
3985 }
3986 if (bus_dmamem_map(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs,
3987 PAGE_SIZE, (void **)&rc->rc_req, BUS_DMA_WAITOK)) {
3988 DPRINTF("%s: failed to allocate RNDIS command\n",
3989 device_xname(sc->sc_dev));
3990 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs,
3991 rc->rc_nsegs);
3992 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3993 goto errout;
3994 }
3995 memset(rc->rc_req, 0, PAGE_SIZE);
3996 if (bus_dmamap_load(sc->sc_dmat, rc->rc_dmap, rc->rc_req,
3997 PAGE_SIZE, NULL, BUS_DMA_WAITOK)) {
3998 DPRINTF("%s: failed to load RNDIS command map\n",
3999 device_xname(sc->sc_dev));
4000 bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE);
4001 rc->rc_req = NULL;
4002 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs,
4003 rc->rc_nsegs);
4004 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
4005 goto errout;
4006 }
4007 rc->rc_gpa = atop(rc->rc_dmap->dm_segs[0].ds_addr);
4008 mutex_init(&rc->rc_lock, MUTEX_DEFAULT, IPL_NET);
4009 cv_init(&rc->rc_cv, "rndiscmd");
4010 TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry);
4011 }
4012
4013 /* Initialize RNDIS Data command */
4014 memset(&sc->sc_data_msg, 0, sizeof(sc->sc_data_msg));
4015 sc->sc_data_msg.nvs_type = HVN_NVS_TYPE_RNDIS;
4016 sc->sc_data_msg.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA;
4017 sc->sc_data_msg.nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID;
4018
4019 return 0;
4020
4021 errout:
4022 hvn_rndis_destroy(sc);
4023 return -1;
4024 }
4025
4026 static void
4027 hvn_rndis_destroy(struct hvn_softc *sc)
4028 {
4029 struct rndis_cmd *rc;
4030 int i;
4031
4032 for (i = 0; i < HVN_RNDIS_CTLREQS; i++) {
4033 rc = &sc->sc_cntl_msgs[i];
4034 if (rc->rc_req == NULL)
4035 continue;
4036
4037 TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry);
4038 bus_dmamap_unload(sc->sc_dmat, rc->rc_dmap);
4039 bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE);
4040 rc->rc_req = NULL;
4041 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs);
4042 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
4043 mutex_destroy(&rc->rc_lock);
4044 cv_destroy(&rc->rc_cv);
4045 }
4046
4047 mutex_destroy(&sc->sc_cntl_sqlck);
4048 mutex_destroy(&sc->sc_cntl_cqlck);
4049 mutex_destroy(&sc->sc_cntl_fqlck);
4050 cv_destroy(&sc->sc_cntl_fqcv);
4051 }
4052
4053 static int
4054 hvn_rndis_attach(struct hvn_softc *sc, int mtu)
4055 {
4056 struct rndis_init_req *req;
4057 struct rndis_init_comp *cmp;
4058 struct rndis_cmd *rc;
4059 int rv;
4060
4061 rc = hvn_alloc_cmd(sc);
4062
4063 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4064 BUS_DMASYNC_PREREAD);
4065
4066 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
4067
4068 req = rc->rc_req;
4069 req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
4070 req->rm_len = sizeof(*req);
4071 req->rm_rid = rc->rc_id;
4072 req->rm_ver_major = RNDIS_VERSION_MAJOR;
4073 req->rm_ver_minor = RNDIS_VERSION_MINOR;
4074 req->rm_max_xfersz = HVN_RNDIS_XFER_SIZE;
4075
4076 rc->rc_cmplen = sizeof(*cmp);
4077
4078 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4079 BUS_DMASYNC_PREWRITE);
4080
4081 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
4082 DPRINTF("%s: INITIALIZE_MSG failed, error %d\n",
4083 device_xname(sc->sc_dev), rv);
4084 hvn_free_cmd(sc, rc);
4085 return -1;
4086 }
4087 cmp = (struct rndis_init_comp *)&rc->rc_cmp;
4088 if (cmp->rm_status != RNDIS_STATUS_SUCCESS) {
4089 DPRINTF("%s: failed to init RNDIS, error %#x\n",
4090 device_xname(sc->sc_dev), cmp->rm_status);
4091 hvn_free_cmd(sc, rc);
4092 return -1;
4093 }
4094
4095 sc->sc_rndis_agg_size = cmp->rm_pktmaxsz;
4096 sc->sc_rndis_agg_pkts = cmp->rm_pktmaxcnt;
4097 sc->sc_rndis_agg_align = __BIT(cmp->rm_align);
4098
4099 if (sc->sc_rndis_agg_align < sizeof(uint32_t)) {
4100 /*
4101 * The RNDIS packet messsage encap assumes that the RNDIS
4102 * packet message is at least 4 bytes aligned. Fix up the
4103 * alignment here, if the remote side sets the alignment
4104 * too low.
4105 */
4106 aprint_verbose_dev(sc->sc_dev,
4107 "fixup RNDIS aggpkt align: %u -> %zu\n",
4108 sc->sc_rndis_agg_align, sizeof(uint32_t));
4109 sc->sc_rndis_agg_align = sizeof(uint32_t);
4110 }
4111
4112 aprint_verbose_dev(sc->sc_dev,
4113 "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u\n",
4114 cmp->rm_ver_major, cmp->rm_ver_minor, sc->sc_rndis_agg_size,
4115 sc->sc_rndis_agg_pkts, sc->sc_rndis_agg_align);
4116
4117 hvn_free_cmd(sc, rc);
4118
4119 return 0;
4120 }
4121
4122 static int
4123 hvn_get_rsscaps(struct hvn_softc *sc, int *nrxr)
4124 {
4125 struct ndis_rss_caps in, caps;
4126 size_t caps_len;
4127 int error, rxr_cnt, indsz, hash_fnidx;
4128 uint32_t hash_func = 0, hash_types = 0;
4129
4130 *nrxr = 0;
4131
4132 if (sc->sc_ndisver < NDIS_VERSION_6_20)
4133 return EOPNOTSUPP;
4134
4135 memset(&in, 0, sizeof(in));
4136 in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
4137 in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
4138 in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
4139
4140 caps_len = NDIS_RSS_CAPS_SIZE;
4141 error = hvn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
4142 &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0);
4143 if (error)
4144 return error;
4145
4146 /*
4147 * Preliminary verification.
4148 */
4149 if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
4150 DPRINTF("%s: invalid NDIS objtype 0x%02x\n",
4151 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_type);
4152 return EINVAL;
4153 }
4154 if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
4155 DPRINTF("%s: invalid NDIS objrev 0x%02x\n",
4156 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_rev);
4157 return EINVAL;
4158 }
4159 if (caps.ndis_hdr.ndis_size > caps_len) {
4160 DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n",
4161 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size,
4162 caps_len);
4163 return EINVAL;
4164 } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
4165 DPRINTF("%s: invalid NDIS objsize %u\n",
4166 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size);
4167 return EINVAL;
4168 }
4169
4170 /*
4171 * Save information for later RSS configuration.
4172 */
4173 if (caps.ndis_nrxr == 0) {
4174 DPRINTF("%s: 0 RX rings!?\n", device_xname(sc->sc_dev));
4175 return EINVAL;
4176 }
4177 rxr_cnt = caps.ndis_nrxr;
4178 aprint_debug_dev(sc->sc_dev, "%u Rx rings\n", rxr_cnt);
4179
4180 if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
4181 caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
4182 if (caps.ndis_nind > NDIS_HASH_INDCNT) {
4183 DPRINTF("%s: too many RSS indirect table entries %u\n",
4184 device_xname(sc->sc_dev), caps.ndis_nind);
4185 return EOPNOTSUPP;
4186 }
4187 if (!powerof2(caps.ndis_nind)) {
4188 DPRINTF("%s: RSS indirect table size is not power-of-2:"
4189 " %u\n", device_xname(sc->sc_dev), caps.ndis_nind);
4190 return EOPNOTSUPP;
4191 }
4192
4193 indsz = caps.ndis_nind;
4194 } else {
4195 indsz = NDIS_HASH_INDCNT;
4196 }
4197 if (rxr_cnt > indsz) {
4198 aprint_debug_dev(sc->sc_dev,
4199 "# of RX rings (%u) > RSS indirect table size %u\n",
4200 rxr_cnt, indsz);
4201 rxr_cnt = indsz;
4202 }
4203
4204 /*
4205 * NOTE:
4206 * Toeplitz is at the lowest bit, and it is prefered; so ffs(),
4207 * instead of fls(), is used here.
4208 */
4209 hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK);
4210 if (hash_fnidx == 0) {
4211 DPRINTF("%s: no hash functions, caps 0x%08x\n",
4212 device_xname(sc->sc_dev), caps.ndis_caps);
4213 return EOPNOTSUPP;
4214 }
4215 hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */
4216
4217 if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
4218 hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4;
4219 if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
4220 hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
4221 if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
4222 hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX;
4223 if (hash_types == 0) {
4224 DPRINTF("%s: no hash types, caps 0x%08x\n",
4225 device_xname(sc->sc_dev), caps.ndis_caps);
4226 return EOPNOTSUPP;
4227 }
4228 aprint_debug_dev(sc->sc_dev, "RSS caps %#x\n", caps.ndis_caps);
4229
4230 sc->sc_rss_ind_size = indsz;
4231 sc->sc_rss_hcap = hash_func | hash_types;
4232 if (sc->sc_caps & HVN_CAPS_UDPHASH) {
4233 /* UDP 4-tuple hash is unconditionally enabled. */
4234 sc->sc_rss_hcap |= NDIS_HASH_UDP_IPV4_X;
4235 }
4236 *nrxr = rxr_cnt;
4237
4238 return 0;
4239 }
4240
4241 static int
4242 hvn_set_rss(struct hvn_softc *sc, uint16_t flags)
4243 {
4244 struct ndis_rssprm_toeplitz *rss = &sc->sc_rss;
4245 struct ndis_rss_params *params = &rss->rss_params;
4246 int len;
4247
4248 /*
4249 * Only NDIS 6.20+ is supported:
4250 * We only support 4bytes element in indirect table, which has been
4251 * adopted since NDIS 6.20.
4252 */
4253 if (sc->sc_ndisver < NDIS_VERSION_6_20)
4254 return 0;
4255
4256 /* XXX only one can be specified through, popcnt? */
4257 KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_FUNCTION_MASK),
4258 "no hash func %08x", sc->sc_rss_hash);
4259 KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_STD),
4260 "no standard hash types %08x", sc->sc_rss_hash);
4261 KASSERTMSG(sc->sc_rss_ind_size > 0, "no indirect table size");
4262
4263 aprint_debug_dev(sc->sc_dev, "RSS indirect table size %d, hash %#x\n",
4264 sc->sc_rss_ind_size, sc->sc_rss_hash);
4265
4266 len = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->sc_rss_ind_size);
4267
4268 memset(params, 0, sizeof(*params));
4269 params->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
4270 params->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
4271 params->ndis_hdr.ndis_size = len;
4272 params->ndis_flags = flags;
4273 params->ndis_hash =
4274 sc->sc_rss_hash & (NDIS_HASH_FUNCTION_MASK | NDIS_HASH_STD);
4275 params->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->sc_rss_ind_size;
4276 params->ndis_indoffset =
4277 offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
4278 params->ndis_keysize = sizeof(rss->rss_key);
4279 params->ndis_keyoffset =
4280 offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
4281
4282 return hvn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS, rss, len);
4283 }
4284
4285 static void
4286 hvn_fixup_rss_ind(struct hvn_softc *sc)
4287 {
4288 struct ndis_rssprm_toeplitz *rss = &sc->sc_rss;
4289 int i, nchan;
4290
4291 nchan = sc->sc_nrxr_inuse;
4292 KASSERTMSG(nchan > 1, "invalid # of channels %d", nchan);
4293
4294 /*
4295 * Check indirect table to make sure that all channels in it
4296 * can be used.
4297 */
4298 for (i = 0; i < NDIS_HASH_INDCNT; i++) {
4299 if (rss->rss_ind[i] >= nchan) {
4300 DPRINTF("%s: RSS indirect table %d fixup: %u -> %d\n",
4301 device_xname(sc->sc_dev), i, rss->rss_ind[i],
4302 nchan - 1);
4303 rss->rss_ind[i] = nchan - 1;
4304 }
4305 }
4306 }
4307
4308 static int
4309 hvn_get_hwcaps(struct hvn_softc *sc, struct ndis_offload *caps)
4310 {
4311 struct ndis_offload in;
4312 size_t caps_len, len;
4313 int error;
4314
4315 memset(&in, 0, sizeof(in));
4316 in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
4317 if (sc->sc_ndisver >= NDIS_VERSION_6_30) {
4318 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
4319 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE;
4320 } else if (sc->sc_ndisver >= NDIS_VERSION_6_1) {
4321 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
4322 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_1;
4323 } else {
4324 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
4325 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_0;
4326 }
4327
4328 caps_len = NDIS_OFFLOAD_SIZE;
4329 error = hvn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
4330 &in, len, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0);
4331 if (error)
4332 return error;
4333
4334 /*
4335 * Preliminary verification.
4336 */
4337 if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
4338 DPRINTF("%s: invalid NDIS objtype 0x%02x\n",
4339 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_type);
4340 return EINVAL;
4341 }
4342 if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
4343 DPRINTF("%s: invalid NDIS objrev 0x%02x\n",
4344 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_rev);
4345 return EINVAL;
4346 }
4347 if (caps->ndis_hdr.ndis_size > caps_len) {
4348 DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n",
4349 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size,
4350 caps_len);
4351 return EINVAL;
4352 } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
4353 DPRINTF("%s: invalid NDIS objsize %u\n",
4354 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size);
4355 return EINVAL;
4356 }
4357
4358 /*
4359 * NOTE:
4360 * caps->ndis_hdr.ndis_size MUST be checked before accessing
4361 * NDIS 6.1+ specific fields.
4362 */
4363 aprint_debug_dev(sc->sc_dev, "hwcaps rev %u\n",
4364 caps->ndis_hdr.ndis_rev);
4365
4366 aprint_debug_dev(sc->sc_dev, "hwcaps csum: "
4367 "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, "
4368 "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n",
4369 caps->ndis_csum.ndis_ip4_txcsum, caps->ndis_csum.ndis_ip4_txenc,
4370 caps->ndis_csum.ndis_ip4_rxcsum, caps->ndis_csum.ndis_ip4_rxenc,
4371 caps->ndis_csum.ndis_ip6_txcsum, caps->ndis_csum.ndis_ip6_txenc,
4372 caps->ndis_csum.ndis_ip6_rxcsum, caps->ndis_csum.ndis_ip6_rxenc);
4373 aprint_debug_dev(sc->sc_dev, "hwcaps lsov2: "
4374 "ip4 maxsz %u minsg %u encap 0x%x, "
4375 "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n",
4376 caps->ndis_lsov2.ndis_ip4_maxsz, caps->ndis_lsov2.ndis_ip4_minsg,
4377 caps->ndis_lsov2.ndis_ip4_encap, caps->ndis_lsov2.ndis_ip6_maxsz,
4378 caps->ndis_lsov2.ndis_ip6_minsg, caps->ndis_lsov2.ndis_ip6_encap,
4379 caps->ndis_lsov2.ndis_ip6_opts);
4380
4381 return 0;
4382 }
4383
4384 static int
4385 hvn_set_capabilities(struct hvn_softc *sc, int mtu)
4386 {
4387 struct ndis_offload hwcaps;
4388 struct ndis_offload_params params;
4389 size_t len;
4390 uint32_t caps = 0;
4391 int error, tso_maxsz, tso_minsg;
4392
4393 error = hvn_get_hwcaps(sc, &hwcaps);
4394 if (error) {
4395 DPRINTF("%s: failed to query hwcaps\n",
4396 device_xname(sc->sc_dev));
4397 return error;
4398 }
4399
4400 /* NOTE: 0 means "no change" */
4401 memset(¶ms, 0, sizeof(params));
4402
4403 params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
4404 if (sc->sc_ndisver < NDIS_VERSION_6_30) {
4405 params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
4406 len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
4407 } else {
4408 params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
4409 len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE;
4410 }
4411
4412 /*
4413 * TSO4/TSO6 setup.
4414 */
4415 tso_maxsz = IP_MAXPACKET;
4416 tso_minsg = 2;
4417 if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
4418 caps |= HVN_CAPS_TSO4;
4419 params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
4420
4421 if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz)
4422 tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz;
4423 if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg)
4424 tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg;
4425 }
4426 if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
4427 (hwcaps.ndis_lsov2.ndis_ip6_opts & HVN_NDIS_LSOV2_CAP_IP6) ==
4428 HVN_NDIS_LSOV2_CAP_IP6) {
4429 caps |= HVN_CAPS_TSO6;
4430 params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
4431
4432 if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz)
4433 tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz;
4434 if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg)
4435 tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg;
4436 }
4437 sc->sc_tso_szmax = 0;
4438 sc->sc_tso_sgmin = 0;
4439 if (caps & (HVN_CAPS_TSO4 | HVN_CAPS_TSO6)) {
4440 KASSERTMSG(tso_maxsz <= IP_MAXPACKET,
4441 "invalid NDIS TSO maxsz %d", tso_maxsz);
4442 KASSERTMSG(tso_minsg >= 2,
4443 "invalid NDIS TSO minsg %d", tso_minsg);
4444 if (tso_maxsz < tso_minsg * mtu) {
4445 DPRINTF("%s: invalid NDIS TSO config: "
4446 "maxsz %d, minsg %d, mtu %d; "
4447 "disable TSO4 and TSO6\n", device_xname(sc->sc_dev),
4448 tso_maxsz, tso_minsg, mtu);
4449 caps &= ~(HVN_CAPS_TSO4 | HVN_CAPS_TSO6);
4450 params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF;
4451 params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF;
4452 } else {
4453 sc->sc_tso_szmax = tso_maxsz;
4454 sc->sc_tso_sgmin = tso_minsg;
4455 aprint_debug_dev(sc->sc_dev,
4456 "NDIS TSO szmax %d sgmin %d\n",
4457 sc->sc_tso_szmax, sc->sc_tso_sgmin);
4458 }
4459 }
4460
4461 /* IPv4 checksum */
4462 if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_IP4) ==
4463 HVN_NDIS_TXCSUM_CAP_IP4) {
4464 caps |= HVN_CAPS_IPCS;
4465 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
4466 }
4467 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) {
4468 if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX)
4469 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
4470 else
4471 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX;
4472 }
4473
4474 /* TCP4 checksum */
4475 if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_TCP4) ==
4476 HVN_NDIS_TXCSUM_CAP_TCP4) {
4477 caps |= HVN_CAPS_TCP4CS;
4478 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
4479 }
4480 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) {
4481 if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX)
4482 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
4483 else
4484 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX;
4485 }
4486
4487 /* UDP4 checksum */
4488 if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
4489 caps |= HVN_CAPS_UDP4CS;
4490 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
4491 }
4492 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) {
4493 if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX)
4494 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
4495 else
4496 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX;
4497 }
4498
4499 /* TCP6 checksum */
4500 if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_TCP6) ==
4501 HVN_NDIS_TXCSUM_CAP_TCP6) {
4502 caps |= HVN_CAPS_TCP6CS;
4503 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
4504 }
4505 if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) {
4506 if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX)
4507 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
4508 else
4509 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX;
4510 }
4511
4512 /* UDP6 checksum */
4513 if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_UDP6) ==
4514 HVN_NDIS_TXCSUM_CAP_UDP6) {
4515 caps |= HVN_CAPS_UDP6CS;
4516 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
4517 }
4518 if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) {
4519 if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX)
4520 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
4521 else
4522 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
4523 }
4524
4525 aprint_debug_dev(sc->sc_dev, "offload csum: "
4526 "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
4527 params.ndis_ip4csum, params.ndis_tcp4csum, params.ndis_udp4csum,
4528 params.ndis_tcp6csum, params.ndis_udp6csum);
4529 aprint_debug_dev(sc->sc_dev, "offload lsov2: ip4 %u, ip6 %u\n",
4530 params.ndis_lsov2_ip4, params.ndis_lsov2_ip6);
4531
4532 error = hvn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, len);
4533 if (error) {
4534 DPRINTF("%s: offload config failed: %d\n",
4535 device_xname(sc->sc_dev), error);
4536 return error;
4537 }
4538
4539 aprint_debug_dev(sc->sc_dev, "offload config done\n");
4540 sc->sc_caps |= caps;
4541
4542 return 0;
4543 }
4544
4545 static int
4546 hvn_rndis_cmd(struct hvn_softc *sc, struct rndis_cmd *rc, u_int flags)
4547 {
4548 struct hvn_rx_ring *rxr = &sc->sc_rxr[0]; /* primary channel */
4549 struct hvn_nvs_rndis *msg = &rc->rc_msg;
4550 struct rndis_msghdr *hdr = rc->rc_req;
4551 struct vmbus_gpa sgl[1];
4552 int tries = 10;
4553 int rv, s;
4554
4555 msg->nvs_type = HVN_NVS_TYPE_RNDIS;
4556 msg->nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_CTRL;
4557 msg->nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID;
4558
4559 sgl[0].gpa_page = rc->rc_gpa;
4560 sgl[0].gpa_len = hdr->rm_len;
4561 sgl[0].gpa_ofs = 0;
4562
4563 rc->rc_done = 0;
4564
4565 mutex_enter(&rc->rc_lock);
4566
4567 hvn_submit_cmd(sc, rc);
4568
4569 do {
4570 rv = vmbus_channel_send_sgl(rxr->rxr_chan, sgl, 1, &rc->rc_msg,
4571 sizeof(*msg), rc->rc_id);
4572 if (rv == EAGAIN) {
4573 DELAY(1000);
4574 } else if (rv) {
4575 mutex_exit(&rc->rc_lock);
4576 DPRINTF("%s: RNDIS operation %u send error %d\n",
4577 device_xname(sc->sc_dev), hdr->rm_type, rv);
4578 hvn_rollback_cmd(sc, rc);
4579 return rv;
4580 }
4581 } while (rv != 0 && --tries > 0);
4582
4583 if (tries == 0 && rv != 0) {
4584 mutex_exit(&rc->rc_lock);
4585 device_printf(sc->sc_dev,
4586 "RNDIS operation %u send error %d\n", hdr->rm_type, rv);
4587 hvn_rollback_cmd(sc, rc);
4588 return rv;
4589 }
4590 if (vmbus_channel_is_revoked(rxr->rxr_chan) ||
4591 ISSET(flags, HVN_RNDIS_CMD_NORESP)) {
4592 /* No response */
4593 mutex_exit(&rc->rc_lock);
4594 if (hvn_rollback_cmd(sc, rc))
4595 hvn_release_cmd(sc, rc);
4596 return 0;
4597 }
4598
4599 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4600 BUS_DMASYNC_POSTWRITE);
4601
4602 while (!rc->rc_done && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) {
4603 mutex_exit(&rc->rc_lock);
4604 DELAY(1000);
4605 s = splnet();
4606 hvn_nvs_intr1(rxr, 0, 0);
4607 splx(s);
4608 mutex_enter(&rc->rc_lock);
4609 }
4610 mutex_exit(&rc->rc_lock);
4611
4612 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4613 BUS_DMASYNC_POSTREAD);
4614
4615 if (!rc->rc_done) {
4616 rv = EINTR;
4617 if (hvn_rollback_cmd(sc, rc)) {
4618 hvn_release_cmd(sc, rc);
4619 rv = 0;
4620 }
4621 return rv;
4622 }
4623
4624 hvn_release_cmd(sc, rc);
4625 return 0;
4626 }
4627
4628 static int
4629 hvn_rndis_input(struct hvn_rx_ring *rxr, uint64_t tid, void *arg)
4630 {
4631 struct hvn_softc *sc = rxr->rxr_softc;
4632 struct vmbus_chanpkt_prplist *cp = arg;
4633 uint32_t off, len, type;
4634 int i, rv, rx = 0;
4635 bool qfull = false;
4636
4637 if (sc->sc_rx_ring == NULL) {
4638 DPRINTF("%s: invalid rx ring\n", device_xname(sc->sc_dev));
4639 return 0;
4640 }
4641
4642 for (i = 0; i < cp->cp_range_cnt; i++) {
4643 off = cp->cp_range[i].gpa_ofs;
4644 len = cp->cp_range[i].gpa_len;
4645
4646 KASSERT(off + len <= sc->sc_rx_size);
4647 KASSERT(len >= RNDIS_HEADER_OFFSET + 4);
4648
4649 memcpy(&type, sc->sc_rx_ring + off, sizeof(type));
4650 switch (type) {
4651 /* data message */
4652 case REMOTE_NDIS_PACKET_MSG:
4653 rv = hvn_rxeof(rxr, sc->sc_rx_ring + off, len);
4654 if (rv == 1)
4655 rx++;
4656 else if (rv == -1) /* The receive queue is full. */
4657 qfull = true;
4658 break;
4659 /* completion messages */
4660 case REMOTE_NDIS_INITIALIZE_CMPLT:
4661 case REMOTE_NDIS_QUERY_CMPLT:
4662 case REMOTE_NDIS_SET_CMPLT:
4663 case REMOTE_NDIS_RESET_CMPLT:
4664 case REMOTE_NDIS_KEEPALIVE_CMPLT:
4665 hvn_rndis_complete(sc, sc->sc_rx_ring + off, len);
4666 break;
4667 /* notification message */
4668 case REMOTE_NDIS_INDICATE_STATUS_MSG:
4669 hvn_rndis_status(sc, sc->sc_rx_ring + off, len);
4670 break;
4671 default:
4672 device_printf(sc->sc_dev,
4673 "unhandled RNDIS message type %u\n", type);
4674 break;
4675 }
4676 }
4677
4678 hvn_nvs_ack(rxr, tid);
4679
4680 if (qfull)
4681 return -1;
4682 return rx;
4683 }
4684
4685 static inline struct mbuf *
4686 hvn_devget(struct hvn_softc *sc, void *buf, uint32_t len)
4687 {
4688 struct ifnet *ifp = SC2IFP(sc);
4689 struct mbuf *m;
4690 size_t size = len + ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN;
4691
4692 MGETHDR(m, M_NOWAIT, MT_DATA);
4693 if (m == NULL)
4694 return NULL;
4695
4696 if (size > MHLEN) {
4697 if (size <= MCLBYTES)
4698 MCLGET(m, M_NOWAIT);
4699 else
4700 MEXTMALLOC(m, size, M_NOWAIT);
4701 if ((m->m_flags & M_EXT) == 0) {
4702 m_freem(m);
4703 return NULL;
4704 }
4705 }
4706
4707 m->m_len = m->m_pkthdr.len = size;
4708 m_adj(m, ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN);
4709 m_copyback(m, 0, len, buf);
4710 m_set_rcvif(m, ifp);
4711 return m;
4712 }
4713
4714 #define HVN_RXINFO_CSUM __BIT(NDIS_PKTINFO_TYPE_CSUM)
4715 #define HVN_RXINFO_VLAN __BIT(NDIS_PKTINFO_TYPE_VLAN)
4716 #define HVN_RXINFO_HASHVAL __BIT(HVN_NDIS_PKTINFO_TYPE_HASHVAL)
4717 #define HVN_RXINFO_HASHINFO __BIT(HVN_NDIS_PKTINFO_TYPE_HASHINF)
4718 #define HVN_RXINFO_ALL (HVN_RXINFO_CSUM | \
4719 HVN_RXINFO_VLAN | \
4720 HVN_RXINFO_HASHVAL | \
4721 HVN_RXINFO_HASHINFO)
4722
4723 static int
4724 hvn_rxeof(struct hvn_rx_ring *rxr, uint8_t *buf, uint32_t len)
4725 {
4726 struct hvn_softc *sc = rxr->rxr_softc;
4727 struct ifnet *ifp = SC2IFP(sc);
4728 struct rndis_packet_msg *pkt;
4729 struct rndis_pktinfo *pi;
4730 struct mbuf *m;
4731 uint32_t mask, csum, vlan, hashval, hashinfo;
4732
4733 if (!(ifp->if_flags & IFF_RUNNING))
4734 return 0;
4735
4736 if (len < sizeof(*pkt)) {
4737 device_printf(sc->sc_dev, "data packet too short: %u\n",
4738 len);
4739 return 0;
4740 }
4741
4742 pkt = (struct rndis_packet_msg *)buf;
4743 if (pkt->rm_dataoffset + pkt->rm_datalen > len) {
4744 device_printf(sc->sc_dev,
4745 "data packet out of bounds: %u@%u\n", pkt->rm_dataoffset,
4746 pkt->rm_datalen);
4747 return 0;
4748 }
4749
4750 if ((m = hvn_devget(sc, buf + RNDIS_HEADER_OFFSET + pkt->rm_dataoffset,
4751 pkt->rm_datalen)) == NULL) {
4752 if_statinc(ifp, if_ierrors);
4753 return 0;
4754 }
4755
4756 if (pkt->rm_pktinfooffset + pkt->rm_pktinfolen > len) {
4757 device_printf(sc->sc_dev,
4758 "pktinfo is out of bounds: %u@%u vs %u\n",
4759 pkt->rm_pktinfolen, pkt->rm_pktinfooffset, len);
4760 goto done;
4761 }
4762
4763 mask = csum = hashval = hashinfo = 0;
4764 vlan = 0xffffffff;
4765 pi = (struct rndis_pktinfo *)(buf + RNDIS_HEADER_OFFSET +
4766 pkt->rm_pktinfooffset);
4767 while (pkt->rm_pktinfolen > 0) {
4768 if (pi->rm_size > pkt->rm_pktinfolen) {
4769 device_printf(sc->sc_dev,
4770 "invalid pktinfo size: %u/%u\n", pi->rm_size,
4771 pkt->rm_pktinfolen);
4772 break;
4773 }
4774
4775 switch (pi->rm_type) {
4776 case NDIS_PKTINFO_TYPE_CSUM:
4777 memcpy(&csum, pi->rm_data, sizeof(csum));
4778 SET(mask, HVN_RXINFO_CSUM);
4779 break;
4780 case NDIS_PKTINFO_TYPE_VLAN:
4781 memcpy(&vlan, pi->rm_data, sizeof(vlan));
4782 SET(mask, HVN_RXINFO_VLAN);
4783 break;
4784 case HVN_NDIS_PKTINFO_TYPE_HASHVAL:
4785 memcpy(&hashval, pi->rm_data, sizeof(hashval));
4786 SET(mask, HVN_RXINFO_HASHVAL);
4787 break;
4788 case HVN_NDIS_PKTINFO_TYPE_HASHINF:
4789 memcpy(&hashinfo, pi->rm_data, sizeof(hashinfo));
4790 SET(mask, HVN_RXINFO_HASHINFO);
4791 break;
4792 default:
4793 DPRINTF("%s: unhandled pktinfo type %u\n",
4794 device_xname(sc->sc_dev), pi->rm_type);
4795 goto next;
4796 }
4797
4798 if (mask == HVN_RXINFO_ALL) {
4799 /* All found; done */
4800 break;
4801 }
4802 next:
4803 pkt->rm_pktinfolen -= pi->rm_size;
4804 pi = (struct rndis_pktinfo *)((char *)pi + pi->rm_size);
4805 }
4806
4807 /*
4808 * Final fixup.
4809 * - If there is no hash value, invalidate the hash info.
4810 */
4811 if (!ISSET(mask, HVN_RXINFO_HASHVAL))
4812 hashinfo = 0;
4813
4814 if (csum != 0) {
4815 if (ISSET(csum, NDIS_RXCSUM_INFO_IPCS_OK) &&
4816 ISSET(ifp->if_csum_flags_rx, M_CSUM_IPv4)) {
4817 SET(m->m_pkthdr.csum_flags, M_CSUM_IPv4);
4818 rxr->rxr_evcsum_ip.ev_count++;
4819 }
4820 if (ISSET(csum, NDIS_RXCSUM_INFO_TCPCS_OK) &&
4821 ISSET(ifp->if_csum_flags_rx, M_CSUM_TCPv4)) {
4822 SET(m->m_pkthdr.csum_flags, M_CSUM_TCPv4);
4823 rxr->rxr_evcsum_tcp.ev_count++;
4824 }
4825 if (ISSET(csum, NDIS_RXCSUM_INFO_UDPCS_OK) &&
4826 ISSET(ifp->if_csum_flags_rx, M_CSUM_UDPv4)) {
4827 SET(m->m_pkthdr.csum_flags, M_CSUM_UDPv4);
4828 rxr->rxr_evcsum_udp.ev_count++;
4829 }
4830 }
4831
4832 if (vlan != 0xffffffff) {
4833 uint16_t t = NDIS_VLAN_INFO_ID(vlan);
4834 t |= NDIS_VLAN_INFO_PRI(vlan) << EVL_PRIO_BITS;
4835 t |= NDIS_VLAN_INFO_CFI(vlan) << EVL_CFI_BITS;
4836
4837 if (ISSET(sc->sc_ec.ec_capenable, ETHERCAP_VLAN_HWTAGGING)) {
4838 vlan_set_tag(m, t);
4839 rxr->rxr_evvlanhwtagging.ev_count++;
4840 } else {
4841 struct ether_header eh;
4842 struct ether_vlan_header *evl;
4843
4844 KDASSERT(m->m_pkthdr.len >= sizeof(eh));
4845 m_copydata(m, 0, sizeof(eh), &eh);
4846 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
4847 KDASSERT(m != NULL);
4848
4849 evl = mtod(m, struct ether_vlan_header *);
4850 memcpy(evl->evl_dhost, eh.ether_dhost,
4851 ETHER_ADDR_LEN * 2);
4852 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
4853 evl->evl_tag = htons(t);
4854 evl->evl_proto = eh.ether_type;
4855 }
4856 }
4857
4858 /* XXX RSS hash is not supported. */
4859
4860 done:
4861 rxr->rxr_evpkts.ev_count++;
4862 if_percpuq_enqueue(sc->sc_ipq, m);
4863 /* XXX Unable to detect that the receive queue is full. */
4864 return 1;
4865 }
4866
4867 static void
4868 hvn_rndis_complete(struct hvn_softc *sc, uint8_t *buf, uint32_t len)
4869 {
4870 struct rndis_cmd *rc;
4871 uint32_t id;
4872
4873 memcpy(&id, buf + RNDIS_HEADER_OFFSET, sizeof(id));
4874 if ((rc = hvn_complete_cmd(sc, id)) != NULL) {
4875 mutex_enter(&rc->rc_lock);
4876 if (len < rc->rc_cmplen)
4877 device_printf(sc->sc_dev,
4878 "RNDIS response %u too short: %u\n", id, len);
4879 else
4880 memcpy(&rc->rc_cmp, buf, rc->rc_cmplen);
4881 if (len > rc->rc_cmplen &&
4882 len - rc->rc_cmplen > HVN_RNDIS_BUFSIZE)
4883 device_printf(sc->sc_dev,
4884 "RNDIS response %u too large: %u\n", id, len);
4885 else if (len > rc->rc_cmplen)
4886 memcpy(&rc->rc_cmpbuf, buf + rc->rc_cmplen,
4887 len - rc->rc_cmplen);
4888 rc->rc_done = 1;
4889 cv_signal(&rc->rc_cv);
4890 mutex_exit(&rc->rc_lock);
4891 } else {
4892 DPRINTF("%s: failed to complete RNDIS request id %u\n",
4893 device_xname(sc->sc_dev), id);
4894 }
4895 }
4896
4897 static int
4898 hvn_rndis_output_sgl(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
4899 {
4900 struct hvn_softc *sc = txr->txr_softc;
4901 uint64_t rid = (uint64_t)txd->txd_id << 32;
4902 int rv;
4903
4904 rv = vmbus_channel_send_sgl(txr->txr_chan, txd->txd_sgl, txd->txd_nsge,
4905 &sc->sc_data_msg, sizeof(sc->sc_data_msg), rid);
4906 if (rv) {
4907 DPRINTF("%s: RNDIS data send error %d\n",
4908 device_xname(sc->sc_dev), rv);
4909 return rv;
4910 }
4911 return 0;
4912 }
4913
4914 static int
4915 hvn_rndis_output_chim(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
4916 {
4917 struct hvn_nvs_rndis rndis;
4918 uint64_t rid = (uint64_t)txd->txd_id << 32;
4919 int rv;
4920
4921 memset(&rndis, 0, sizeof(rndis));
4922 rndis.nvs_type = HVN_NVS_TYPE_RNDIS;
4923 rndis.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA;
4924 rndis.nvs_chim_idx = txd->txd_chim_index;
4925 rndis.nvs_chim_sz = txd->txd_chim_size;
4926
4927 rv = vmbus_channel_send(txr->txr_chan, &rndis, sizeof(rndis),
4928 rid, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC);
4929 if (rv) {
4930 DPRINTF("%s: RNDIS chimney data send error %d: idx %u, sz %u\n",
4931 device_xname(sc->sc_dev), rv, rndis.nvs_chim_idx,
4932 rndis.nvs_chim_sz);
4933 return rv;
4934 }
4935 return 0;
4936 }
4937
4938 static void
4939 hvn_rndis_status(struct hvn_softc *sc, uint8_t *buf, uint32_t len)
4940 {
4941 uint32_t status;
4942
4943 memcpy(&status, buf + RNDIS_HEADER_OFFSET, sizeof(status));
4944 switch (status) {
4945 case RNDIS_STATUS_MEDIA_CONNECT:
4946 case RNDIS_STATUS_MEDIA_DISCONNECT:
4947 hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE);
4948 break;
4949 case RNDIS_STATUS_NETWORK_CHANGE:
4950 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE);
4951 break;
4952 /* Ignore these */
4953 case RNDIS_STATUS_OFFLOAD_CURRENT_CONFIG:
4954 case RNDIS_STATUS_LINK_SPEED_CHANGE:
4955 return;
4956 default:
4957 DPRINTF("%s: unhandled status %#x\n", device_xname(sc->sc_dev),
4958 status);
4959 return;
4960 }
4961 }
4962
4963 static int
4964 hvn_rndis_query(struct hvn_softc *sc, uint32_t oid, void *res, size_t *length)
4965 {
4966
4967 return hvn_rndis_query2(sc, oid, NULL, 0, res, length, 0);
4968 }
4969
4970 static int
4971 hvn_rndis_query2(struct hvn_softc *sc, uint32_t oid, const void *idata,
4972 size_t idlen, void *odata, size_t *odlen, size_t min_odlen)
4973 {
4974 struct rndis_cmd *rc;
4975 struct rndis_query_req *req;
4976 struct rndis_query_comp *cmp;
4977 size_t olength = *odlen;
4978 int rv;
4979
4980 rc = hvn_alloc_cmd(sc);
4981
4982 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4983 BUS_DMASYNC_PREREAD);
4984
4985 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
4986
4987 req = rc->rc_req;
4988 req->rm_type = REMOTE_NDIS_QUERY_MSG;
4989 req->rm_len = sizeof(*req) + idlen;
4990 req->rm_rid = rc->rc_id;
4991 req->rm_oid = oid;
4992 req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET;
4993 if (idlen > 0) {
4994 KASSERT(sizeof(*req) + idlen <= PAGE_SIZE);
4995 req->rm_infobuflen = idlen;
4996 memcpy(req + 1, idata, idlen);
4997 }
4998
4999 rc->rc_cmplen = sizeof(*cmp);
5000
5001 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5002 BUS_DMASYNC_PREWRITE);
5003
5004 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
5005 DPRINTF("%s: QUERY_MSG failed, error %d\n",
5006 device_xname(sc->sc_dev), rv);
5007 hvn_free_cmd(sc, rc);
5008 return rv;
5009 }
5010
5011 cmp = (struct rndis_query_comp *)&rc->rc_cmp;
5012 switch (cmp->rm_status) {
5013 case RNDIS_STATUS_SUCCESS:
5014 if (cmp->rm_infobuflen > olength ||
5015 (min_odlen > 0 && cmp->rm_infobuflen < min_odlen)) {
5016 rv = EINVAL;
5017 break;
5018 }
5019 memcpy(odata, rc->rc_cmpbuf, cmp->rm_infobuflen);
5020 *odlen = cmp->rm_infobuflen;
5021 break;
5022 default:
5023 *odlen = 0;
5024 rv = EIO;
5025 break;
5026 }
5027
5028 hvn_free_cmd(sc, rc);
5029 return rv;
5030 }
5031
5032 static int
5033 hvn_rndis_set(struct hvn_softc *sc, uint32_t oid, void *data, size_t length)
5034 {
5035 struct rndis_cmd *rc;
5036 struct rndis_set_req *req;
5037 struct rndis_set_comp *cmp;
5038 int rv;
5039
5040 rc = hvn_alloc_cmd(sc);
5041
5042 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5043 BUS_DMASYNC_PREREAD);
5044
5045 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
5046
5047 req = rc->rc_req;
5048 req->rm_type = REMOTE_NDIS_SET_MSG;
5049 req->rm_len = sizeof(*req) + length;
5050 req->rm_rid = rc->rc_id;
5051 req->rm_oid = oid;
5052 req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET;
5053
5054 rc->rc_cmplen = sizeof(*cmp);
5055
5056 if (length > 0) {
5057 KASSERT(sizeof(*req) + length < PAGE_SIZE);
5058 req->rm_infobuflen = length;
5059 memcpy(req + 1, data, length);
5060 }
5061
5062 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5063 BUS_DMASYNC_PREWRITE);
5064
5065 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
5066 DPRINTF("%s: SET_MSG failed, error %d\n",
5067 device_xname(sc->sc_dev), rv);
5068 hvn_free_cmd(sc, rc);
5069 return rv;
5070 }
5071
5072 cmp = (struct rndis_set_comp *)&rc->rc_cmp;
5073 if (cmp->rm_status != RNDIS_STATUS_SUCCESS)
5074 rv = EIO;
5075
5076 hvn_free_cmd(sc, rc);
5077 return rv;
5078 }
5079
5080 static int
5081 hvn_rndis_open(struct hvn_softc *sc)
5082 {
5083 struct ifnet *ifp = SC2IFP(sc);
5084 uint32_t filter;
5085 int rv;
5086
5087 if (ifp->if_flags & IFF_PROMISC) {
5088 filter = RNDIS_PACKET_TYPE_PROMISCUOUS;
5089 } else {
5090 filter = RNDIS_PACKET_TYPE_DIRECTED;
5091 if (ifp->if_flags & IFF_BROADCAST)
5092 filter |= RNDIS_PACKET_TYPE_BROADCAST;
5093 if (ifp->if_flags & IFF_ALLMULTI)
5094 filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
5095 else {
5096 struct ethercom *ec = &sc->sc_ec;
5097 struct ether_multi *enm;
5098 struct ether_multistep step;
5099
5100 ETHER_LOCK(ec);
5101 ETHER_FIRST_MULTI(step, ec, enm);
5102 /* TODO: support multicast list */
5103 if (enm != NULL)
5104 filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
5105 ETHER_UNLOCK(ec);
5106 }
5107 }
5108
5109 rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
5110 &filter, sizeof(filter));
5111 if (rv) {
5112 DPRINTF("%s: failed to set RNDIS filter to %#x\n",
5113 device_xname(sc->sc_dev), filter);
5114 }
5115 return rv;
5116 }
5117
5118 static int
5119 hvn_rndis_close(struct hvn_softc *sc)
5120 {
5121 uint32_t filter = 0;
5122 int rv;
5123
5124 rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
5125 &filter, sizeof(filter));
5126 if (rv) {
5127 DPRINTF("%s: failed to clear RNDIS filter\n",
5128 device_xname(sc->sc_dev));
5129 }
5130 return rv;
5131 }
5132
5133 static void
5134 hvn_rndis_detach(struct hvn_softc *sc)
5135 {
5136 struct rndis_cmd *rc;
5137 struct rndis_halt_req *req;
5138 int rv;
5139
5140 rc = hvn_alloc_cmd(sc);
5141
5142 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5143 BUS_DMASYNC_PREREAD);
5144
5145 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
5146
5147 req = rc->rc_req;
5148 req->rm_type = REMOTE_NDIS_HALT_MSG;
5149 req->rm_len = sizeof(*req);
5150 req->rm_rid = rc->rc_id;
5151
5152 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5153 BUS_DMASYNC_PREWRITE);
5154
5155 /* No RNDIS completion; rely on NVS message send completion */
5156 if ((rv = hvn_rndis_cmd(sc, rc, HVN_RNDIS_CMD_NORESP)) != 0) {
5157 DPRINTF("%s: HALT_MSG failed, error %d\n",
5158 device_xname(sc->sc_dev), rv);
5159 }
5160 hvn_free_cmd(sc, rc);
5161 }
5162
5163 static void
5164 hvn_init_sysctls(struct hvn_softc *sc)
5165 {
5166 struct sysctllog **log;
5167 const struct sysctlnode *rnode, *cnode, *rxnode, *txnode;
5168 const char *dvname;
5169 int error;
5170
5171 log = &sc->sc_sysctllog;
5172 dvname = device_xname(sc->sc_dev);
5173
5174 error = sysctl_createv(log, 0, NULL, &rnode,
5175 0, CTLTYPE_NODE, dvname,
5176 SYSCTL_DESCR("hvn information and settings"),
5177 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
5178 if (error)
5179 goto err;
5180
5181 error = sysctl_createv(log, 0, &rnode, &cnode,
5182 CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
5183 SYSCTL_DESCR("Use workqueue for packet processing"),
5184 NULL, 0, &sc->sc_txrx_workqueue, 0, CTL_CREATE, CTL_EOL);
5185 if (error)
5186 goto out;
5187
5188 error = sysctl_createv(log, 0, &rnode, &rxnode,
5189 0, CTLTYPE_NODE, "rx",
5190 SYSCTL_DESCR("hvn information and settings for Rx"),
5191 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
5192 if (error)
5193 goto out;
5194
5195 error = sysctl_createv(log, 0, &rxnode, NULL,
5196 CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
5197 SYSCTL_DESCR("max number of Rx packets"
5198 " to process for interrupt processing"),
5199 NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
5200 if (error)
5201 goto out;
5202
5203 error = sysctl_createv(log, 0, &rxnode, NULL,
5204 CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
5205 SYSCTL_DESCR("max number of Rx packets"
5206 " to process for deferred processing"),
5207 NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
5208 if (error)
5209 goto out;
5210
5211 error = sysctl_createv(log, 0, &rnode, &txnode,
5212 0, CTLTYPE_NODE, "tx",
5213 SYSCTL_DESCR("hvn information and settings for Tx"),
5214 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
5215 if (error)
5216 goto out;
5217
5218 error = sysctl_createv(log, 0, &txnode, NULL,
5219 CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
5220 SYSCTL_DESCR("max number of Tx packets"
5221 " to process for interrupt processing"),
5222 NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
5223 if (error)
5224 goto out;
5225
5226 error = sysctl_createv(log, 0, &txnode, NULL,
5227 CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
5228 SYSCTL_DESCR("max number of Tx packets"
5229 " to process for deferred processing"),
5230 NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
5231 if (error)
5232 goto out;
5233
5234 return;
5235
5236 out:
5237 sysctl_teardown(log);
5238 sc->sc_sysctllog = NULL;
5239 err:
5240 aprint_error_dev(sc->sc_dev, "sysctl_createv failed (err = %d)\n",
5241 error);
5242 }
5243
5244 SYSCTL_SETUP(sysctl_hw_hvn_setup, "sysctl hw.hvn setup")
5245 {
5246 const struct sysctlnode *rnode;
5247 const struct sysctlnode *cnode;
5248 int error;
5249
5250 error = sysctl_createv(clog, 0, NULL, &rnode,
5251 CTLFLAG_PERMANENT, CTLTYPE_NODE, "hvn",
5252 SYSCTL_DESCR("hvn global controls"),
5253 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
5254 if (error)
5255 goto fail;
5256
5257 error = sysctl_createv(clog, 0, &rnode, &cnode,
5258 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5259 "udp_csum_fixup_mtu",
5260 SYSCTL_DESCR("UDP checksum offloding fixup MTU"),
5261 NULL, 0, &hvn_udpcs_fixup_mtu, sizeof(hvn_udpcs_fixup_mtu),
5262 CTL_CREATE, CTL_EOL);
5263 if (error)
5264 goto fail;
5265
5266 error = sysctl_createv(clog, 0, &rnode, &cnode,
5267 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5268 "chimney_size",
5269 SYSCTL_DESCR("Chimney send packet size limit"),
5270 NULL, 0, &hvn_tx_chimney_size, sizeof(hvn_tx_chimney_size),
5271 CTL_CREATE, CTL_EOL);
5272 if (error)
5273 goto fail;
5274
5275 error = sysctl_createv(clog, 0, &rnode, &cnode,
5276 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5277 "channel_count",
5278 SYSCTL_DESCR("# of channels to use"),
5279 NULL, 0, &hvn_channel_cnt, sizeof(hvn_channel_cnt),
5280 CTL_CREATE, CTL_EOL);
5281 if (error)
5282 goto fail;
5283
5284 error = sysctl_createv(clog, 0, &rnode, &cnode,
5285 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5286 "tx_ring_count",
5287 SYSCTL_DESCR("# of transmit rings to use"),
5288 NULL, 0, &hvn_tx_ring_cnt, sizeof(hvn_tx_ring_cnt),
5289 CTL_CREATE, CTL_EOL);
5290 if (error)
5291 goto fail;
5292
5293 return;
5294
5295 fail:
5296 aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, error);
5297 }
5298