if_hvn.c revision 1.27 1 /* $NetBSD: if_hvn.c,v 1.27 2024/02/09 22:08:34 andvar Exp $ */
2 /* $OpenBSD: if_hvn.c,v 1.39 2018/03/11 14:31:34 mikeb Exp $ */
3
4 /*-
5 * Copyright (c) 2009-2012,2016 Microsoft Corp.
6 * Copyright (c) 2010-2012 Citrix Inc.
7 * Copyright (c) 2012 NetApp Inc.
8 * Copyright (c) 2016 Mike Belopuhov <mike (at) esdenera.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice unmodified, this list of conditions, and the following
16 * disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * The OpenBSD port was done under funding by Esdenera Networks GmbH.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: if_hvn.c,v 1.27 2024/02/09 22:08:34 andvar Exp $");
39
40 #ifdef _KERNEL_OPT
41 #include "opt_if_hvn.h"
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #endif
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/device.h>
50 #include <sys/bitops.h>
51 #include <sys/bus.h>
52 #include <sys/condvar.h>
53 #include <sys/cpu.h>
54 #include <sys/evcnt.h>
55 #include <sys/intr.h>
56 #include <sys/kmem.h>
57 #include <sys/kthread.h>
58 #include <sys/mutex.h>
59 #include <sys/pcq.h>
60 #include <sys/sysctl.h>
61 #include <sys/workqueue.h>
62
63 #include <net/if.h>
64 #include <net/if_ether.h>
65 #include <net/if_media.h>
66 #include <net/if_vlanvar.h>
67 #include <net/rss_config.h>
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet/udp.h>
72
73 #include <net/bpf.h>
74
75 #include <dev/ic/ndisreg.h>
76 #include <dev/ic/rndisreg.h>
77
78 #include <dev/hyperv/vmbusvar.h>
79 #include <dev/hyperv/if_hvnreg.h>
80
81 #ifndef EVL_PRIO_BITS
82 #define EVL_PRIO_BITS 13
83 #endif
84 #ifndef EVL_CFI_BITS
85 #define EVL_CFI_BITS 12
86 #endif
87
88 #define HVN_CHIM_SIZE (15 * 1024 * 1024)
89
90 #define HVN_NVS_MSGSIZE 32
91 #define HVN_NVS_BUFSIZE PAGE_SIZE
92
93 #define HVN_RING_BUFSIZE (128 * PAGE_SIZE)
94 #define HVN_RING_IDX2CPU(sc, idx) ((idx) % ncpu)
95
96 #ifndef HVN_CHANNEL_MAX_COUNT_DEFAULT
97 #define HVN_CHANNEL_MAX_COUNT_DEFAULT 8
98 #endif
99
100 #ifndef HVN_LINK_STATE_CHANGE_DELAY
101 #define HVN_LINK_STATE_CHANGE_DELAY 5000
102 #endif
103
104 #define HVN_WORKQUEUE_PRI PRI_SOFTNET
105
106 /*
107 * RNDIS control interface
108 */
109 #define HVN_RNDIS_CTLREQS 4
110 #define HVN_RNDIS_BUFSIZE 512
111
112 struct rndis_cmd {
113 uint32_t rc_id;
114 struct hvn_nvs_rndis rc_msg;
115 void *rc_req;
116 bus_dmamap_t rc_dmap;
117 bus_dma_segment_t rc_segs;
118 int rc_nsegs;
119 uint64_t rc_gpa;
120 struct rndis_packet_msg rc_cmp;
121 uint32_t rc_cmplen;
122 uint8_t rc_cmpbuf[HVN_RNDIS_BUFSIZE];
123 int rc_done;
124 TAILQ_ENTRY(rndis_cmd) rc_entry;
125 kmutex_t rc_lock;
126 kcondvar_t rc_cv;
127 };
128 TAILQ_HEAD(rndis_queue, rndis_cmd);
129
130 #define HVN_MTU_MIN 68
131 #define HVN_MTU_MAX (65535 - ETHER_ADDR_LEN)
132
133 #define HVN_RNDIS_XFER_SIZE 2048
134
135 #define HVN_NDIS_TXCSUM_CAP_IP4 \
136 (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
137 #define HVN_NDIS_TXCSUM_CAP_TCP4 \
138 (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
139 #define HVN_NDIS_TXCSUM_CAP_TCP6 \
140 (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
141 NDIS_TXCSUM_CAP_IP6EXT)
142 #define HVN_NDIS_TXCSUM_CAP_UDP6 \
143 (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
144 #define HVN_NDIS_LSOV2_CAP_IP6 \
145 (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
146
147 #define HVN_RNDIS_CMD_NORESP __BIT(0)
148
149 #define HVN_NVS_CMD_NORESP __BIT(0)
150
151 /*
152 * Tx ring
153 */
154 #define HVN_TX_DESC 512
155 #define HVN_TX_FRAGS 15 /* 31 is the max */
156 #define HVN_TX_FRAG_SIZE PAGE_SIZE
157 #define HVN_TX_PKT_SIZE 16384
158
159 #define HVN_RNDIS_PKT_LEN \
160 (sizeof(struct rndis_packet_msg) + \
161 sizeof(struct rndis_pktinfo) + NDIS_VLAN_INFO_SIZE + \
162 sizeof(struct rndis_pktinfo) + NDIS_TXCSUM_INFO_SIZE)
163
164 #define HVN_PKTSIZE_MIN(align) \
165 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
166 HVN_RNDIS_PKT_LEN, (align))
167 #define HVN_PKTSIZE(m, align) \
168 roundup2((m)->m_pkthdr.len + HVN_RNDIS_PKT_LEN, (align))
169
170 struct hvn_tx_desc {
171 uint32_t txd_id;
172 struct vmbus_gpa txd_sgl[HVN_TX_FRAGS + 1];
173 int txd_nsge;
174 struct mbuf *txd_buf;
175 bus_dmamap_t txd_dmap;
176 struct vmbus_gpa txd_gpa;
177 struct rndis_packet_msg *txd_req;
178 TAILQ_ENTRY(hvn_tx_desc) txd_entry;
179 u_int txd_refs;
180 uint32_t txd_flags;
181 #define HVN_TXD_FLAG_ONAGG __BIT(0)
182 #define HVN_TXD_FLAG_DMAMAP __BIT(1)
183 uint32_t txd_chim_index;
184 int txd_chim_size;
185 STAILQ_ENTRY(hvn_tx_desc) txd_agg_entry;
186 STAILQ_HEAD(, hvn_tx_desc) txd_agg_list;
187 };
188
189 struct hvn_softc;
190 struct hvn_rx_ring;
191
192 struct hvn_tx_ring {
193 struct hvn_softc *txr_softc;
194 struct vmbus_channel *txr_chan;
195 struct hvn_rx_ring *txr_rxr;
196 void *txr_si;
197 char txr_name[16];
198
199 int txr_id;
200 int txr_oactive;
201 int txr_suspended;
202 int txr_csum_assist;
203 uint64_t txr_caps_assist;
204 uint32_t txr_flags;
205 #define HVN_TXR_FLAG_UDP_HASH __BIT(0)
206
207 struct evcnt txr_evpkts;
208 struct evcnt txr_evsends;
209 struct evcnt txr_evnodesc;
210 struct evcnt txr_evdmafailed;
211 struct evcnt txr_evdefrag;
212 struct evcnt txr_evpcqdrop;
213 struct evcnt txr_evtransmitdefer;
214 struct evcnt txr_evflushfailed;
215 struct evcnt txr_evchimneytried;
216 struct evcnt txr_evchimney;
217 struct evcnt txr_evvlanfixup;
218 struct evcnt txr_evvlanhwtagging;
219 struct evcnt txr_evvlantap;
220
221 kmutex_t txr_lock;
222 pcq_t *txr_interq;
223
224 uint32_t txr_avail;
225 TAILQ_HEAD(, hvn_tx_desc) txr_list;
226 struct hvn_tx_desc txr_desc[HVN_TX_DESC];
227 uint8_t *txr_msgs;
228 struct hyperv_dma txr_dma;
229
230 int txr_chim_size;
231
232 /* Applied packet transmission aggregation limits. */
233 int txr_agg_szmax;
234 short txr_agg_pktmax;
235 short txr_agg_align;
236
237 /* Packet transmission aggregation states. */
238 struct hvn_tx_desc *txr_agg_txd;
239 int txr_agg_szleft;
240 short txr_agg_pktleft;
241 struct rndis_packet_msg *txr_agg_prevpkt;
242
243 /* Temporary stats for each sends. */
244 int txr_stat_pkts;
245 int txr_stat_size;
246 int txr_stat_mcasts;
247
248 int (*txr_sendpkt)(struct hvn_tx_ring *,
249 struct hvn_tx_desc *);
250 } __aligned(CACHE_LINE_SIZE);
251
252 struct hvn_rx_ring {
253 struct hvn_softc *rxr_softc;
254 struct vmbus_channel *rxr_chan;
255 struct hvn_tx_ring *rxr_txr;
256 void *rxr_si;
257 bool rxr_workqueue;
258 char rxr_name[16];
259
260 struct work rxr_wk;
261 volatile bool rxr_onlist;
262 volatile bool rxr_onproc;
263 kmutex_t rxr_onwork_lock;
264 kcondvar_t rxr_onwork_cv;
265
266 uint32_t rxr_flags;
267 #define HVN_RXR_FLAG_UDP_HASH __BIT(0)
268
269 kmutex_t rxr_lock;
270
271 struct evcnt rxr_evpkts;
272 struct evcnt rxr_evcsum_ip;
273 struct evcnt rxr_evcsum_tcp;
274 struct evcnt rxr_evcsum_udp;
275 struct evcnt rxr_evvlanhwtagging;
276 struct evcnt rxr_evintr;
277 struct evcnt rxr_evdefer;
278 struct evcnt rxr_evdeferreq;
279 struct evcnt rxr_evredeferreq;
280
281 /* NVS */
282 uint8_t *rxr_nvsbuf;
283 } __aligned(CACHE_LINE_SIZE);
284
285 struct hvn_softc {
286 device_t sc_dev;
287
288 struct vmbus_softc *sc_vmbus;
289 struct vmbus_channel *sc_prichan;
290 bus_dma_tag_t sc_dmat;
291
292 struct ethercom sc_ec;
293 struct ifmedia sc_media;
294 struct if_percpuq *sc_ipq;
295 struct workqueue *sc_wq;
296 bool sc_txrx_workqueue;
297 kmutex_t sc_core_lock;
298
299 kmutex_t sc_link_lock;
300 kcondvar_t sc_link_cv;
301 callout_t sc_link_tmout;
302 lwp_t *sc_link_lwp;
303 uint32_t sc_link_ev;
304 #define HVN_LINK_EV_STATE_CHANGE __BIT(0)
305 #define HVN_LINK_EV_NETWORK_CHANGE_TMOUT __BIT(1)
306 #define HVN_LINK_EV_NETWORK_CHANGE __BIT(2)
307 #define HVN_LINK_EV_RESUME_NETWORK __BIT(3)
308 #define HVN_LINK_EV_EXIT_THREAD __BIT(4)
309 int sc_link_state;
310 bool sc_link_onproc;
311 bool sc_link_pending;
312 bool sc_link_suspend;
313
314 int sc_tx_process_limit;
315 int sc_rx_process_limit;
316 int sc_tx_intr_process_limit;
317 int sc_rx_intr_process_limit;
318
319 struct sysctllog *sc_sysctllog;
320
321 uint32_t sc_caps;
322 #define HVN_CAPS_VLAN __BIT(0)
323 #define HVN_CAPS_MTU __BIT(1)
324 #define HVN_CAPS_IPCS __BIT(2)
325 #define HVN_CAPS_TCP4CS __BIT(3)
326 #define HVN_CAPS_TCP6CS __BIT(4)
327 #define HVN_CAPS_UDP4CS __BIT(5)
328 #define HVN_CAPS_UDP6CS __BIT(6)
329 #define HVN_CAPS_TSO4 __BIT(7)
330 #define HVN_CAPS_TSO6 __BIT(8)
331 #define HVN_CAPS_HASHVAL __BIT(9)
332 #define HVN_CAPS_UDPHASH __BIT(10)
333
334 uint32_t sc_flags;
335 #define HVN_SCF_ATTACHED __BIT(0)
336 #define HVN_SCF_RXBUF_CONNECTED __BIT(1)
337 #define HVN_SCF_CHIM_CONNECTED __BIT(2)
338 #define HVN_SCF_REVOKED __BIT(3)
339 #define HVN_SCF_HAS_RSSKEY __BIT(4)
340 #define HVN_SCF_HAS_RSSIND __BIT(5)
341
342 /* NVS protocol */
343 int sc_proto;
344 uint32_t sc_nvstid;
345 uint8_t sc_nvsrsp[HVN_NVS_MSGSIZE];
346 int sc_nvsdone;
347 kmutex_t sc_nvsrsp_lock;
348 kcondvar_t sc_nvsrsp_cv;
349
350 /* RNDIS protocol */
351 int sc_ndisver;
352 uint32_t sc_rndisrid;
353 int sc_tso_szmax;
354 int sc_tso_sgmin;
355 uint32_t sc_rndis_agg_size;
356 uint32_t sc_rndis_agg_pkts;
357 uint32_t sc_rndis_agg_align;
358 struct rndis_queue sc_cntl_sq; /* submission queue */
359 kmutex_t sc_cntl_sqlck;
360 struct rndis_queue sc_cntl_cq; /* completion queue */
361 kmutex_t sc_cntl_cqlck;
362 struct rndis_queue sc_cntl_fq; /* free queue */
363 kmutex_t sc_cntl_fqlck;
364 kcondvar_t sc_cntl_fqcv;
365 struct rndis_cmd sc_cntl_msgs[HVN_RNDIS_CTLREQS];
366 struct hvn_nvs_rndis sc_data_msg;
367
368 int sc_rss_ind_size;
369 uint32_t sc_rss_hash; /* setting, NDIS_HASH_ */
370 uint32_t sc_rss_hcap; /* caps, NDIS_HASH_ */
371 struct ndis_rssprm_toeplitz sc_rss;
372
373 /* Rx ring */
374 uint8_t *sc_rx_ring;
375 int sc_rx_size;
376 uint32_t sc_rx_hndl;
377 struct hyperv_dma sc_rx_dma;
378 struct hvn_rx_ring *sc_rxr;
379 int sc_nrxr;
380 int sc_nrxr_inuse;
381
382 /* Tx ring */
383 struct hvn_tx_ring *sc_txr;
384 int sc_ntxr;
385 int sc_ntxr_inuse;
386
387 /* chimney sending buffers */
388 uint8_t *sc_chim;
389 uint32_t sc_chim_hndl;
390 struct hyperv_dma sc_chim_dma;
391 kmutex_t sc_chim_bmap_lock;
392 u_long *sc_chim_bmap;
393 int sc_chim_bmap_cnt;
394 int sc_chim_cnt;
395 int sc_chim_szmax;
396
397 /* Packet transmission aggregation user settings. */
398 int sc_agg_size;
399 int sc_agg_pkts;
400 };
401
402 #define SC2IFP(_sc_) (&(_sc_)->sc_ec.ec_if)
403 #define IFP2SC(_ifp_) ((_ifp_)->if_softc)
404
405 #ifndef HVN_TX_PROCESS_LIMIT_DEFAULT
406 #define HVN_TX_PROCESS_LIMIT_DEFAULT 128
407 #endif
408 #ifndef HVN_RX_PROCESS_LIMIT_DEFAULT
409 #define HVN_RX_PROCESS_LIMIT_DEFAULT 128
410 #endif
411 #ifndef HVN_TX_INTR_PROCESS_LIMIT_DEFAULT
412 #define HVN_TX_INTR_PROCESS_LIMIT_DEFAULT 256
413 #endif
414 #ifndef HVN_RX_INTR_PROCESS_LIMIT_DEFAULT
415 #define HVN_RX_INTR_PROCESS_LIMIT_DEFAULT 256
416 #endif
417
418 /*
419 * See hvn_set_hlen().
420 *
421 * This value is for Azure. For Hyper-V, set this above
422 * 65536 to disable UDP datagram checksum fixup.
423 */
424 #ifndef HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT
425 #define HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT 1420
426 #endif
427 static int hvn_udpcs_fixup_mtu = HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT;
428
429 /* Limit chimney send size */
430 static int hvn_tx_chimney_size = 0;
431
432 /* # of channels to use; each channel has one RX ring and one TX ring */
433 #ifndef HVN_CHANNEL_COUNT_DEFAULT
434 #define HVN_CHANNEL_COUNT_DEFAULT 0
435 #endif
436 static int hvn_channel_cnt = HVN_CHANNEL_COUNT_DEFAULT;
437
438 /* # of transmit rings to use */
439 #ifndef HVN_TX_RING_COUNT_DEFAULT
440 #define HVN_TX_RING_COUNT_DEFAULT 0
441 #endif
442 static int hvn_tx_ring_cnt = HVN_TX_RING_COUNT_DEFAULT;
443
444 /* Packet transmission aggregation size limit */
445 static int hvn_tx_agg_size = -1;
446
447 /* Packet transmission aggregation count limit */
448 static int hvn_tx_agg_pkts = -1;
449
450 static int hvn_match(device_t, cfdata_t, void *);
451 static void hvn_attach(device_t, device_t, void *);
452 static int hvn_detach(device_t, int);
453
454 CFATTACH_DECL_NEW(hvn, sizeof(struct hvn_softc),
455 hvn_match, hvn_attach, hvn_detach, NULL);
456
457 static int hvn_ioctl(struct ifnet *, u_long, void *);
458 static int hvn_media_change(struct ifnet *);
459 static void hvn_media_status(struct ifnet *, struct ifmediareq *);
460 static void hvn_link_task(void *);
461 static void hvn_link_event(struct hvn_softc *, uint32_t);
462 static void hvn_link_netchg_tmout_cb(void *);
463 static int hvn_init(struct ifnet *);
464 static int hvn_init_locked(struct ifnet *);
465 static void hvn_stop(struct ifnet *, int);
466 static void hvn_stop_locked(struct ifnet *);
467 static void hvn_start(struct ifnet *);
468 static int hvn_transmit(struct ifnet *, struct mbuf *);
469 static void hvn_deferred_transmit(void *);
470 static int hvn_flush_txagg(struct hvn_tx_ring *);
471 static int hvn_encap(struct hvn_tx_ring *, struct hvn_tx_desc *,
472 struct mbuf *, int);
473 static int hvn_txpkt(struct hvn_tx_ring *, struct hvn_tx_desc *);
474 static void hvn_txeof(struct hvn_tx_ring *, uint64_t);
475 static int hvn_rx_ring_create(struct hvn_softc *, int);
476 static int hvn_rx_ring_destroy(struct hvn_softc *);
477 static void hvn_fixup_rx_data(struct hvn_softc *);
478 static int hvn_tx_ring_create(struct hvn_softc *, int);
479 static void hvn_tx_ring_destroy(struct hvn_softc *);
480 static void hvn_set_chim_size(struct hvn_softc *, int);
481 static uint32_t hvn_chim_alloc(struct hvn_softc *);
482 static void hvn_chim_free(struct hvn_softc *, uint32_t);
483 static void hvn_fixup_tx_data(struct hvn_softc *);
484 static struct mbuf *
485 hvn_set_hlen(struct mbuf *, int *);
486 static int hvn_txd_peek(struct hvn_tx_ring *);
487 static struct hvn_tx_desc *
488 hvn_txd_get(struct hvn_tx_ring *);
489 static void hvn_txd_put(struct hvn_tx_ring *, struct hvn_tx_desc *);
490 static void hvn_txd_gc(struct hvn_tx_ring *, struct hvn_tx_desc *);
491 static void hvn_txd_hold(struct hvn_tx_desc *);
492 static void hvn_txd_agg(struct hvn_tx_desc *, struct hvn_tx_desc *);
493 static int hvn_tx_ring_pending(struct hvn_tx_ring *);
494 static void hvn_tx_ring_qflush(struct hvn_softc *, struct hvn_tx_ring *);
495 static int hvn_get_rsscaps(struct hvn_softc *, int *);
496 static int hvn_set_rss(struct hvn_softc *, uint16_t);
497 static void hvn_fixup_rss_ind(struct hvn_softc *);
498 static int hvn_get_hwcaps(struct hvn_softc *, struct ndis_offload *);
499 static int hvn_set_capabilities(struct hvn_softc *, int);
500 static int hvn_get_lladdr(struct hvn_softc *, uint8_t *);
501 static void hvn_update_link_status(struct hvn_softc *);
502 static int hvn_get_mtu(struct hvn_softc *, uint32_t *);
503 static int hvn_channel_attach(struct hvn_softc *, struct vmbus_channel *);
504 static void hvn_channel_detach(struct hvn_softc *, struct vmbus_channel *);
505 static void hvn_channel_detach_all(struct hvn_softc *);
506 static int hvn_subchannel_attach(struct hvn_softc *);
507 static int hvn_synth_alloc_subchannels(struct hvn_softc *, int *);
508 static int hvn_synth_attachable(const struct hvn_softc *);
509 static int hvn_synth_attach(struct hvn_softc *, int);
510 static void hvn_synth_detach(struct hvn_softc *);
511 static void hvn_set_ring_inuse(struct hvn_softc *, int);
512 static void hvn_disable_rx(struct hvn_softc *);
513 static void hvn_drain_rxtx(struct hvn_softc *, int );
514 static void hvn_suspend_data(struct hvn_softc *);
515 static void hvn_suspend_mgmt(struct hvn_softc *);
516 static void hvn_suspend(struct hvn_softc *) __unused;
517 static void hvn_resume_tx(struct hvn_softc *, int);
518 static void hvn_resume_data(struct hvn_softc *);
519 static void hvn_resume_mgmt(struct hvn_softc *);
520 static void hvn_resume(struct hvn_softc *) __unused;
521 static void hvn_init_sysctls(struct hvn_softc *);
522
523 /* NSVP */
524 static int hvn_nvs_init(struct hvn_softc *);
525 static void hvn_nvs_destroy(struct hvn_softc *);
526 static int hvn_nvs_attach(struct hvn_softc *, int);
527 static int hvn_nvs_connect_rxbuf(struct hvn_softc *);
528 static int hvn_nvs_disconnect_rxbuf(struct hvn_softc *);
529 static int hvn_nvs_connect_chim(struct hvn_softc *);
530 static int hvn_nvs_disconnect_chim(struct hvn_softc *);
531 static void hvn_handle_ring_work(struct work *, void *);
532 static void hvn_nvs_softintr(void *);
533 static void hvn_nvs_intr(void *);
534 static void hvn_nvs_intr1(struct hvn_rx_ring *, int, int);
535 static int hvn_nvs_cmd(struct hvn_softc *, void *, size_t, uint64_t,
536 u_int);
537 static int hvn_nvs_ack(struct hvn_rx_ring *, uint64_t);
538 static void hvn_nvs_detach(struct hvn_softc *);
539 static int hvn_nvs_alloc_subchannels(struct hvn_softc *, int *);
540
541 /* RNDIS */
542 static int hvn_rndis_init(struct hvn_softc *);
543 static void hvn_rndis_destroy(struct hvn_softc *);
544 static int hvn_rndis_attach(struct hvn_softc *, int);
545 static int hvn_rndis_cmd(struct hvn_softc *, struct rndis_cmd *, u_int);
546 static int hvn_rndis_input(struct hvn_rx_ring *, uint64_t, void *);
547 static int hvn_rxeof(struct hvn_rx_ring *, uint8_t *, uint32_t);
548 static void hvn_rndis_complete(struct hvn_softc *, uint8_t *, uint32_t);
549 static int hvn_rndis_output_sgl(struct hvn_tx_ring *,
550 struct hvn_tx_desc *);
551 static int hvn_rndis_output_chim(struct hvn_tx_ring *,
552 struct hvn_tx_desc *);
553 static void hvn_rndis_status(struct hvn_softc *, uint8_t *, uint32_t);
554 static int hvn_rndis_query(struct hvn_softc *, uint32_t, void *, size_t *);
555 static int hvn_rndis_query2(struct hvn_softc *, uint32_t, const void *,
556 size_t, void *, size_t *, size_t);
557 static int hvn_rndis_set(struct hvn_softc *, uint32_t, void *, size_t);
558 static int hvn_rndis_open(struct hvn_softc *);
559 static int hvn_rndis_close(struct hvn_softc *);
560 static void hvn_rndis_detach(struct hvn_softc *);
561
562 static int
563 hvn_match(device_t parent, cfdata_t match, void *aux)
564 {
565 struct vmbus_attach_args *aa = aux;
566
567 if (memcmp(aa->aa_type, &hyperv_guid_network, sizeof(*aa->aa_type)))
568 return 0;
569 return 1;
570 }
571
572 static void
573 hvn_attach(device_t parent, device_t self, void *aux)
574 {
575 struct hvn_softc *sc = device_private(self);
576 struct vmbus_attach_args *aa = aux;
577 struct ifnet *ifp = SC2IFP(sc);
578 char xnamebuf[32];
579 uint8_t enaddr[ETHER_ADDR_LEN];
580 uint32_t mtu;
581 int tx_ring_cnt, ring_cnt;
582 int error;
583
584 sc->sc_dev = self;
585 sc->sc_vmbus = (struct vmbus_softc *)device_private(parent);
586 sc->sc_prichan = aa->aa_chan;
587 sc->sc_dmat = sc->sc_vmbus->sc_dmat;
588
589 aprint_naive("\n");
590 aprint_normal(": Hyper-V NetVSC\n");
591
592 sc->sc_txrx_workqueue = true;
593 sc->sc_tx_process_limit = HVN_TX_PROCESS_LIMIT_DEFAULT;
594 sc->sc_rx_process_limit = HVN_RX_PROCESS_LIMIT_DEFAULT;
595 sc->sc_tx_intr_process_limit = HVN_TX_INTR_PROCESS_LIMIT_DEFAULT;
596 sc->sc_rx_intr_process_limit = HVN_RX_INTR_PROCESS_LIMIT_DEFAULT;
597 sc->sc_agg_size = hvn_tx_agg_size;
598 sc->sc_agg_pkts = hvn_tx_agg_pkts;
599
600 mutex_init(&sc->sc_core_lock, MUTEX_DEFAULT, IPL_SOFTNET);
601 mutex_init(&sc->sc_link_lock, MUTEX_DEFAULT, IPL_NET);
602 cv_init(&sc->sc_link_cv, "hvnknkcv");
603 callout_init(&sc->sc_link_tmout, CALLOUT_MPSAFE);
604 callout_setfunc(&sc->sc_link_tmout, hvn_link_netchg_tmout_cb, sc);
605 if (kthread_create(PRI_NONE, KTHREAD_MUSTJOIN | KTHREAD_MPSAFE, NULL,
606 hvn_link_task, sc, &sc->sc_link_lwp, "%slink",
607 device_xname(self))) {
608 aprint_error_dev(self, "failed to create link thread\n");
609 return;
610 }
611
612 snprintf(xnamebuf, sizeof(xnamebuf), "%srxtx", device_xname(self));
613 if (workqueue_create(&sc->sc_wq, xnamebuf, hvn_handle_ring_work,
614 sc, HVN_WORKQUEUE_PRI, IPL_NET, WQ_PERCPU | WQ_MPSAFE)) {
615 aprint_error_dev(self, "failed to create workqueue\n");
616 sc->sc_wq = NULL;
617 goto destroy_link_thread;
618 }
619
620 ring_cnt = hvn_channel_cnt;
621 if (ring_cnt <= 0) {
622 ring_cnt = ncpu;
623 if (ring_cnt > HVN_CHANNEL_MAX_COUNT_DEFAULT)
624 ring_cnt = HVN_CHANNEL_MAX_COUNT_DEFAULT;
625 } else if (ring_cnt > ncpu)
626 ring_cnt = ncpu;
627
628 tx_ring_cnt = hvn_tx_ring_cnt;
629 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
630 tx_ring_cnt = ring_cnt;
631
632 if (hvn_tx_ring_create(sc, tx_ring_cnt)) {
633 aprint_error_dev(self, "failed to create Tx ring\n");
634 goto destroy_wq;
635 }
636
637 if (hvn_rx_ring_create(sc, ring_cnt)) {
638 aprint_error_dev(self, "failed to create Rx ring\n");
639 goto destroy_tx_ring;
640 }
641
642 strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
643 ifp->if_softc = sc;
644 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
645 ifp->if_extflags = IFEF_MPSAFE;
646 ifp->if_ioctl = hvn_ioctl;
647 ifp->if_start = hvn_start;
648 ifp->if_transmit = hvn_transmit;
649 ifp->if_init = hvn_init;
650 ifp->if_stop = hvn_stop;
651 ifp->if_baudrate = IF_Gbps(10);
652
653 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(HVN_TX_DESC - 1, IFQ_MAXLEN));
654 IFQ_SET_READY(&ifp->if_snd);
655
656 /* Initialize ifmedia structures. */
657 sc->sc_ec.ec_ifmedia = &sc->sc_media;
658 ifmedia_init_with_lock(&sc->sc_media, IFM_IMASK,
659 hvn_media_change, hvn_media_status, &sc->sc_core_lock);
660 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
661 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL);
662 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T, 0, NULL);
663 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
664
665 if_initialize(ifp);
666 sc->sc_ipq = if_percpuq_create(ifp);
667 if_deferred_start_init(ifp, NULL);
668
669 hvn_nvs_init(sc);
670 hvn_rndis_init(sc);
671 if (hvn_synth_attach(sc, ETHERMTU)) {
672 aprint_error_dev(self, "failed to attach synth\n");
673 goto destroy_if_percpuq;
674 }
675
676 aprint_normal_dev(self, "NVS %d.%d NDIS %d.%d\n",
677 sc->sc_proto >> 16, sc->sc_proto & 0xffff,
678 sc->sc_ndisver >> 16 , sc->sc_ndisver & 0xffff);
679
680 if (hvn_get_lladdr(sc, enaddr)) {
681 aprint_error_dev(self,
682 "failed to obtain an ethernet address\n");
683 goto detach_synth;
684 }
685 aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(enaddr));
686
687 /*
688 * Fixup TX/RX stuffs after synthetic parts are attached.
689 */
690 hvn_fixup_tx_data(sc);
691 hvn_fixup_rx_data(sc);
692
693 ifp->if_capabilities |= sc->sc_txr[0].txr_caps_assist &
694 (IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
695 IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
696 IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
697 IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
698 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx);
699 /* XXX TSOv4, TSOv6 */
700 if (sc->sc_caps & HVN_CAPS_VLAN) {
701 /* XXX not sure about VLAN_MTU. */
702 sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_HWTAGGING;
703 sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_MTU;
704 }
705 sc->sc_ec.ec_capabilities |= ETHERCAP_JUMBO_MTU;
706
707 ether_ifattach(ifp, enaddr);
708
709 error = hvn_get_mtu(sc, &mtu);
710 if (error)
711 mtu = ETHERMTU;
712 if (mtu < ETHERMTU) {
713 DPRINTF("%s: fixup mtu %u -> %u\n", device_xname(sc->sc_dev),
714 ETHERMTU, mtu);
715 ifp->if_mtu = mtu;
716 }
717
718 if_register(ifp);
719
720 /*
721 * Kick off link status check.
722 */
723 hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE);
724
725 hvn_init_sysctls(sc);
726
727 if (pmf_device_register(self, NULL, NULL))
728 pmf_class_network_register(self, ifp);
729 else
730 aprint_error_dev(self, "couldn't establish power handler\n");
731
732 SET(sc->sc_flags, HVN_SCF_ATTACHED);
733 return;
734
735 detach_synth:
736 hvn_synth_detach(sc);
737 hvn_rndis_destroy(sc);
738 hvn_nvs_destroy(sc);
739 destroy_if_percpuq:
740 if_percpuq_destroy(sc->sc_ipq);
741 hvn_rx_ring_destroy(sc);
742 destroy_tx_ring:
743 hvn_tx_ring_destroy(sc);
744 destroy_wq:
745 workqueue_destroy(sc->sc_wq);
746 sc->sc_wq = NULL;
747 destroy_link_thread:
748 hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD);
749 kthread_join(sc->sc_link_lwp);
750 callout_destroy(&sc->sc_link_tmout);
751 cv_destroy(&sc->sc_link_cv);
752 mutex_destroy(&sc->sc_link_lock);
753 mutex_destroy(&sc->sc_core_lock);
754 }
755
756 static int
757 hvn_detach(device_t self, int flags)
758 {
759 struct hvn_softc *sc = device_private(self);
760 struct ifnet *ifp = SC2IFP(sc);
761
762 if (!ISSET(sc->sc_flags, HVN_SCF_ATTACHED))
763 return 0;
764
765 if (vmbus_channel_is_revoked(sc->sc_prichan))
766 SET(sc->sc_flags, HVN_SCF_REVOKED);
767
768 pmf_device_deregister(self);
769
770 mutex_enter(&sc->sc_core_lock);
771
772 if (ifp->if_flags & IFF_RUNNING)
773 hvn_stop_locked(ifp);
774 /*
775 * NOTE:
776 * hvn_stop() only suspends data, so management
777 * stuffs have to be suspended manually here.
778 */
779 hvn_suspend_mgmt(sc);
780
781 ether_ifdetach(ifp);
782 if_detach(ifp);
783 if_percpuq_destroy(sc->sc_ipq);
784
785 hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD);
786 kthread_join(sc->sc_link_lwp);
787 callout_halt(&sc->sc_link_tmout, NULL);
788
789 hvn_synth_detach(sc);
790 hvn_rndis_destroy(sc);
791 hvn_nvs_destroy(sc);
792
793 mutex_exit(&sc->sc_core_lock);
794
795 hvn_rx_ring_destroy(sc);
796 hvn_tx_ring_destroy(sc);
797
798 workqueue_destroy(sc->sc_wq);
799 callout_destroy(&sc->sc_link_tmout);
800 cv_destroy(&sc->sc_link_cv);
801 mutex_destroy(&sc->sc_link_lock);
802 mutex_destroy(&sc->sc_core_lock);
803
804 sysctl_teardown(&sc->sc_sysctllog);
805
806 return 0;
807 }
808
809 static int
810 hvn_ioctl(struct ifnet *ifp, u_long command, void * data)
811 {
812 struct hvn_softc *sc = IFP2SC(ifp);
813 struct ifreq *ifr = (struct ifreq *)data;
814 uint32_t mtu;
815 int s, error = 0;
816
817 switch (command) {
818 case SIOCSIFMTU:
819 if (ifr->ifr_mtu < HVN_MTU_MIN || ifr->ifr_mtu > HVN_MTU_MAX) {
820 error = EINVAL;
821 break;
822 }
823
824 mutex_enter(&sc->sc_core_lock);
825
826 if (!(sc->sc_caps & HVN_CAPS_MTU)) {
827 /* Can't change MTU */
828 mutex_exit(&sc->sc_core_lock);
829 error = EOPNOTSUPP;
830 break;
831 }
832
833 if (ifp->if_mtu == ifr->ifr_mtu) {
834 mutex_exit(&sc->sc_core_lock);
835 break;
836 }
837
838 /*
839 * Suspend this interface before the synthetic parts
840 * are ripped.
841 */
842 hvn_suspend(sc);
843
844 /*
845 * Detach the synthetics parts, i.e. NVS and RNDIS.
846 */
847 hvn_synth_detach(sc);
848
849 /*
850 * Reattach the synthetic parts, i.e. NVS and RNDIS,
851 * with the new MTU setting.
852 */
853 error = hvn_synth_attach(sc, ifr->ifr_mtu);
854 if (error) {
855 mutex_exit(&sc->sc_core_lock);
856 break;
857 }
858
859 error = hvn_get_mtu(sc, &mtu);
860 if (error)
861 mtu = ifr->ifr_mtu;
862 DPRINTF("%s: RNDIS mtu=%d\n", device_xname(sc->sc_dev), mtu);
863
864 /*
865 * Commit the requested MTU, after the synthetic parts
866 * have been successfully attached.
867 */
868 if (mtu >= ifr->ifr_mtu) {
869 mtu = ifr->ifr_mtu;
870 } else {
871 DPRINTF("%s: fixup mtu %d -> %u\n",
872 device_xname(sc->sc_dev), ifr->ifr_mtu, mtu);
873 }
874 ifp->if_mtu = mtu;
875
876 /*
877 * Synthetic parts' reattach may change the chimney
878 * sending size; update it.
879 */
880 if (sc->sc_txr[0].txr_chim_size > sc->sc_chim_szmax)
881 hvn_set_chim_size(sc, sc->sc_chim_szmax);
882
883 /*
884 * All done! Resume the interface now.
885 */
886 hvn_resume(sc);
887
888 mutex_exit(&sc->sc_core_lock);
889 break;
890 default:
891 s = splnet();
892 if (command == SIOCGIFMEDIA || command == SIOCSIFMEDIA)
893 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command);
894 else
895 error = ether_ioctl(ifp, command, data);
896 splx(s);
897 if (error == ENETRESET) {
898 mutex_enter(&sc->sc_core_lock);
899 if (ifp->if_flags & IFF_RUNNING)
900 hvn_init_locked(ifp);
901 mutex_exit(&sc->sc_core_lock);
902 error = 0;
903 }
904 break;
905 }
906
907 return error;
908 }
909
910 static int
911 hvn_media_change(struct ifnet *ifp)
912 {
913 struct hvn_softc *sc = IFP2SC(ifp);
914 struct ifmedia *ifm = &sc->sc_media;
915
916 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
917 return EINVAL;
918
919 switch (IFM_SUBTYPE(ifm->ifm_media)) {
920 case IFM_AUTO:
921 break;
922 default:
923 device_printf(sc->sc_dev, "Only auto media type\n");
924 return EINVAL;
925 }
926 return 0;
927 }
928
929 static void
930 hvn_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
931 {
932 struct hvn_softc *sc = IFP2SC(ifp);
933
934 ifmr->ifm_status = IFM_AVALID;
935 ifmr->ifm_active = IFM_ETHER;
936
937 if (sc->sc_link_state != LINK_STATE_UP) {
938 ifmr->ifm_active |= IFM_NONE;
939 return;
940 }
941
942 ifmr->ifm_status |= IFM_ACTIVE;
943 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
944 }
945
946 static void
947 hvn_link_task(void *arg)
948 {
949 struct hvn_softc *sc = arg;
950 struct ifnet *ifp = SC2IFP(sc);
951 uint32_t event;
952 int old_link_state;
953
954 mutex_enter(&sc->sc_link_lock);
955 sc->sc_link_onproc = false;
956 for (;;) {
957 if (sc->sc_link_ev == 0) {
958 cv_wait(&sc->sc_link_cv, &sc->sc_link_lock);
959 continue;
960 }
961
962 sc->sc_link_onproc = true;
963 event = sc->sc_link_ev;
964 sc->sc_link_ev = 0;
965 mutex_exit(&sc->sc_link_lock);
966
967 if (event & HVN_LINK_EV_EXIT_THREAD)
968 break;
969
970 if (sc->sc_link_suspend)
971 goto next;
972
973 if (event & HVN_LINK_EV_RESUME_NETWORK) {
974 if (sc->sc_link_pending)
975 event |= HVN_LINK_EV_NETWORK_CHANGE;
976 else
977 event |= HVN_LINK_EV_STATE_CHANGE;
978 }
979
980 if (event & HVN_LINK_EV_NETWORK_CHANGE) {
981 /* Prevent any link status checks from running. */
982 sc->sc_link_pending = true;
983
984 /*
985 * Fake up a [link down --> link up] state change;
986 * 5 seconds delay is used, which closely simulates
987 * miibus reaction upon link down event.
988 */
989 old_link_state = sc->sc_link_state;
990 sc->sc_link_state = LINK_STATE_DOWN;
991 if (old_link_state != sc->sc_link_state) {
992 if_link_state_change(ifp, LINK_STATE_DOWN);
993 }
994 #if defined(HVN_LINK_STATE_CHANGE_DELAY) && HVN_LINK_STATE_CHANGE_DELAY > 0
995 callout_schedule(&sc->sc_link_tmout,
996 mstohz(HVN_LINK_STATE_CHANGE_DELAY));
997 #else
998 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT);
999 #endif
1000 } else if (event & HVN_LINK_EV_NETWORK_CHANGE_TMOUT) {
1001 /* Re-allow link status checks. */
1002 sc->sc_link_pending = false;
1003 hvn_update_link_status(sc);
1004 } else if (event & HVN_LINK_EV_STATE_CHANGE) {
1005 if (!sc->sc_link_pending)
1006 hvn_update_link_status(sc);
1007 }
1008 next:
1009 mutex_enter(&sc->sc_link_lock);
1010 sc->sc_link_onproc = false;
1011 }
1012
1013 mutex_enter(&sc->sc_link_lock);
1014 sc->sc_link_onproc = false;
1015 mutex_exit(&sc->sc_link_lock);
1016
1017 kthread_exit(0);
1018 }
1019
1020 static void
1021 hvn_link_event(struct hvn_softc *sc, uint32_t ev)
1022 {
1023
1024 mutex_enter(&sc->sc_link_lock);
1025 SET(sc->sc_link_ev, ev);
1026 cv_signal(&sc->sc_link_cv);
1027 mutex_exit(&sc->sc_link_lock);
1028 }
1029
1030 static void
1031 hvn_link_netchg_tmout_cb(void *arg)
1032 {
1033 struct hvn_softc *sc = arg;
1034
1035 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT);
1036 }
1037
1038 static int
1039 hvn_init(struct ifnet *ifp)
1040 {
1041 struct hvn_softc *sc = IFP2SC(ifp);
1042 int error;
1043
1044 mutex_enter(&sc->sc_core_lock);
1045 error = hvn_init_locked(ifp);
1046 mutex_exit(&sc->sc_core_lock);
1047
1048 return error;
1049 }
1050
1051 static int
1052 hvn_init_locked(struct ifnet *ifp)
1053 {
1054 struct hvn_softc *sc = IFP2SC(ifp);
1055 int error;
1056
1057 KASSERT(mutex_owned(&sc->sc_core_lock));
1058
1059 hvn_stop_locked(ifp);
1060
1061 error = hvn_rndis_open(sc);
1062 if (error)
1063 return error;
1064
1065 /* Clear TX 'suspended' bit. */
1066 hvn_resume_tx(sc, sc->sc_ntxr_inuse);
1067
1068 /* Everything is ready; unleash! */
1069 ifp->if_flags |= IFF_RUNNING;
1070
1071 return 0;
1072 }
1073
1074 static void
1075 hvn_stop(struct ifnet *ifp, int disable)
1076 {
1077 struct hvn_softc *sc = IFP2SC(ifp);
1078
1079 mutex_enter(&sc->sc_core_lock);
1080 hvn_stop_locked(ifp);
1081 mutex_exit(&sc->sc_core_lock);
1082 }
1083
1084 static void
1085 hvn_stop_locked(struct ifnet *ifp)
1086 {
1087 struct hvn_softc *sc = IFP2SC(ifp);
1088 int i;
1089
1090 KASSERT(mutex_owned(&sc->sc_core_lock));
1091
1092 /* Clear RUNNING bit ASAP. */
1093 ifp->if_flags &= ~IFF_RUNNING;
1094
1095 /* Suspend data transfers. */
1096 hvn_suspend_data(sc);
1097
1098 /* Clear OACTIVE state. */
1099 for (i = 0; i < sc->sc_ntxr_inuse; i++)
1100 sc->sc_txr[i].txr_oactive = 0;
1101 }
1102
1103 static void
1104 hvn_transmit_common(struct ifnet *ifp, struct hvn_tx_ring *txr,
1105 bool is_transmit)
1106 {
1107 struct hvn_tx_desc *txd;
1108 struct mbuf *m;
1109 int l2hlen = ETHER_HDR_LEN;
1110
1111 KASSERT(mutex_owned(&txr->txr_lock));
1112
1113 if (!(ifp->if_flags & IFF_RUNNING))
1114 return;
1115 if (txr->txr_oactive)
1116 return;
1117 if (txr->txr_suspended)
1118 return;
1119
1120 for (;;) {
1121 if (!hvn_txd_peek(txr)) {
1122 /* transient */
1123 txr->txr_oactive = 1;
1124 txr->txr_evnodesc.ev_count++;
1125 break;
1126 }
1127
1128 if (is_transmit)
1129 m = pcq_get(txr->txr_interq);
1130 else
1131 IFQ_DEQUEUE(&ifp->if_snd, m);
1132 if (m == NULL)
1133 break;
1134
1135 #if defined(INET) || defined(INET6)
1136 if (m->m_pkthdr.csum_flags &
1137 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TCPv6|M_CSUM_UDPv6)) {
1138 m = hvn_set_hlen(m, &l2hlen);
1139 if (__predict_false(m == NULL)) {
1140 if_statinc(ifp, if_oerrors);
1141 continue;
1142 }
1143 }
1144 #endif
1145
1146 txd = hvn_txd_get(txr);
1147 if (hvn_encap(txr, txd, m, l2hlen)) {
1148 /* the chain is too large */
1149 if_statinc(ifp, if_oerrors);
1150 hvn_txd_put(txr, txd);
1151 m_freem(m);
1152 continue;
1153 }
1154
1155 if (txr->txr_agg_pktleft == 0) {
1156 if (txr->txr_agg_txd != NULL) {
1157 hvn_flush_txagg(txr);
1158 } else {
1159 if (hvn_txpkt(txr, txd)) {
1160 /* txd is freed, but m is not. */
1161 m_freem(m);
1162 if_statinc(ifp, if_oerrors);
1163 }
1164 }
1165 }
1166 }
1167
1168 /* Flush pending aggerated transmission. */
1169 if (txr->txr_agg_txd != NULL)
1170 hvn_flush_txagg(txr);
1171 }
1172
1173 static void
1174 hvn_start(struct ifnet *ifp)
1175 {
1176 struct hvn_softc *sc = IFP2SC(ifp);
1177 struct hvn_tx_ring *txr = &sc->sc_txr[0];
1178
1179 mutex_enter(&txr->txr_lock);
1180 hvn_transmit_common(ifp, txr, false);
1181 mutex_exit(&txr->txr_lock);
1182 }
1183
1184 static int
1185 hvn_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused)
1186 {
1187 struct hvn_softc *sc = IFP2SC(ifp);
1188 u_int cpu;
1189
1190 cpu = cpu_index(curcpu());
1191
1192 return cpu % sc->sc_ntxr_inuse;
1193 }
1194
1195 static int
1196 hvn_transmit(struct ifnet *ifp, struct mbuf *m)
1197 {
1198 struct hvn_softc *sc = IFP2SC(ifp);
1199 struct hvn_tx_ring *txr;
1200 int qid;
1201
1202 qid = hvn_select_txqueue(ifp, m);
1203 txr = &sc->sc_txr[qid];
1204
1205 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
1206 mutex_enter(&txr->txr_lock);
1207 txr->txr_evpcqdrop.ev_count++;
1208 mutex_exit(&txr->txr_lock);
1209 m_freem(m);
1210 return ENOBUFS;
1211 }
1212
1213 kpreempt_disable();
1214 softint_schedule(txr->txr_si);
1215 kpreempt_enable();
1216 return 0;
1217 }
1218
1219 static void
1220 hvn_deferred_transmit(void *arg)
1221 {
1222 struct hvn_tx_ring *txr = arg;
1223 struct hvn_softc *sc = txr->txr_softc;
1224 struct ifnet *ifp = SC2IFP(sc);
1225
1226 mutex_enter(&txr->txr_lock);
1227 txr->txr_evtransmitdefer.ev_count++;
1228 hvn_transmit_common(ifp, txr, true);
1229 mutex_exit(&txr->txr_lock);
1230 }
1231
1232 static inline char *
1233 hvn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1234 size_t datalen, uint32_t type)
1235 {
1236 struct rndis_pktinfo *pi;
1237 size_t pi_size = sizeof(*pi) + datalen;
1238 char *cp;
1239
1240 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <=
1241 pktsize);
1242
1243 cp = (char *)pkt + pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1244 pi = (struct rndis_pktinfo *)cp;
1245 pi->rm_size = pi_size;
1246 pi->rm_type = type;
1247 pi->rm_pktinfooffset = sizeof(*pi);
1248 pkt->rm_pktinfolen += pi_size;
1249 pkt->rm_dataoffset += pi_size;
1250 pkt->rm_len += pi_size;
1251
1252 return (char *)pi->rm_data;
1253 }
1254
1255 static struct mbuf *
1256 hvn_pullup_hdr(struct mbuf *m, int len)
1257 {
1258 struct mbuf *mn;
1259
1260 if (__predict_false(m->m_len < len)) {
1261 mn = m_pullup(m, len);
1262 if (mn == NULL)
1263 return NULL;
1264 m = mn;
1265 }
1266 return m;
1267 }
1268
1269 /*
1270 * NOTE: If this function failed, the m would be freed.
1271 */
1272 static struct mbuf *
1273 hvn_set_hlen(struct mbuf *m, int *l2hlenp)
1274 {
1275 const struct ether_header *eh;
1276 int l2hlen, off;
1277
1278 m = hvn_pullup_hdr(m, sizeof(*eh));
1279 if (m == NULL)
1280 return NULL;
1281
1282 eh = mtod(m, const struct ether_header *);
1283 if (eh->ether_type == ntohs(ETHERTYPE_VLAN))
1284 l2hlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1285 else
1286 l2hlen = ETHER_HDR_LEN;
1287
1288 #if defined(INET)
1289 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) {
1290 const struct ip *ip;
1291
1292 off = l2hlen + sizeof(*ip);
1293 m = hvn_pullup_hdr(m, off);
1294 if (m == NULL)
1295 return NULL;
1296
1297 ip = (struct ip *)((mtod(m, uint8_t *)) + off);
1298
1299 /*
1300 * UDP checksum offload does not work in Azure, if the
1301 * following conditions meet:
1302 * - sizeof(IP hdr + UDP hdr + payload) > 1420.
1303 * - IP_DF is not set in the IP hdr.
1304 *
1305 * Fallback to software checksum for these UDP datagrams.
1306 */
1307 if ((m->m_pkthdr.csum_flags & M_CSUM_UDPv4) &&
1308 m->m_pkthdr.len > hvn_udpcs_fixup_mtu + l2hlen &&
1309 !(ntohs(ip->ip_off) & IP_DF)) {
1310 uint16_t *csump;
1311
1312 off = l2hlen +
1313 M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
1314 m = hvn_pullup_hdr(m, off + sizeof(struct udphdr));
1315 if (m == NULL)
1316 return NULL;
1317
1318 csump = (uint16_t *)(mtod(m, uint8_t *) + off +
1319 M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data));
1320 *csump = cpu_in_cksum(m, m->m_pkthdr.len - off, off, 0);
1321 m->m_pkthdr.csum_flags &= ~M_CSUM_UDPv4;
1322 }
1323 }
1324 #endif /* INET */
1325 #if defined(INET) && defined(INET6)
1326 else
1327 #endif /* INET && INET6 */
1328 #if defined(INET6)
1329 {
1330 const struct ip6_hdr *ip6;
1331
1332 off = l2hlen + sizeof(*ip6);
1333 m = hvn_pullup_hdr(m, off);
1334 if (m == NULL)
1335 return NULL;
1336
1337 ip6 = (struct ip6_hdr *)((mtod(m, uint8_t *)) + l2hlen);
1338 if (ip6->ip6_nxt != IPPROTO_TCP &&
1339 ip6->ip6_nxt != IPPROTO_UDP) {
1340 m_freem(m);
1341 return NULL;
1342 }
1343 }
1344 #endif /* INET6 */
1345
1346 *l2hlenp = l2hlen;
1347
1348 return m;
1349 }
1350
1351 static int
1352 hvn_flush_txagg(struct hvn_tx_ring *txr)
1353 {
1354 struct hvn_softc *sc = txr->txr_softc;
1355 struct ifnet *ifp = SC2IFP(sc);
1356 struct hvn_tx_desc *txd;
1357 struct mbuf *m;
1358 int error, pkts;
1359
1360 txd = txr->txr_agg_txd;
1361 KASSERTMSG(txd != NULL, "no aggregate txdesc");
1362
1363 /*
1364 * Since hvn_txpkt() will reset this temporary stat, save
1365 * it now, so that oerrors can be updated properly, if
1366 * hvn_txpkt() ever fails.
1367 */
1368 pkts = txr->txr_stat_pkts;
1369
1370 /*
1371 * Since txd's mbuf will _not_ be freed upon hvn_txpkt()
1372 * failure, save it for later freeing, if hvn_txpkt() ever
1373 * fails.
1374 */
1375 m = txd->txd_buf;
1376 error = hvn_txpkt(txr, txd);
1377 if (__predict_false(error)) {
1378 /* txd is freed, but m is not. */
1379 m_freem(m);
1380 txr->txr_evflushfailed.ev_count++;
1381 if_statadd(ifp, if_oerrors, pkts);
1382 }
1383
1384 /* Reset all aggregation states. */
1385 txr->txr_agg_txd = NULL;
1386 txr->txr_agg_szleft = 0;
1387 txr->txr_agg_pktleft = 0;
1388 txr->txr_agg_prevpkt = NULL;
1389
1390 return error;
1391 }
1392
1393 static void *
1394 hvn_try_txagg(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, int pktsz)
1395 {
1396 struct hvn_softc *sc = txr->txr_softc;
1397 struct hvn_tx_desc *agg_txd;
1398 struct rndis_packet_msg *pkt;
1399 void *chim;
1400 int olen;
1401
1402 if (txr->txr_agg_txd != NULL) {
1403 if (txr->txr_agg_pktleft > 0 && txr->txr_agg_szleft > pktsz) {
1404 agg_txd = txr->txr_agg_txd;
1405 pkt = txr->txr_agg_prevpkt;
1406
1407 /*
1408 * Update the previous RNDIS packet's total length,
1409 * it can be increased due to the mandatory alignment
1410 * padding for this RNDIS packet. And update the
1411 * aggregating txdesc's chimney sending buffer size
1412 * accordingly.
1413 *
1414 * XXX
1415 * Zero-out the padding, as required by the RNDIS spec.
1416 */
1417 olen = pkt->rm_len;
1418 pkt->rm_len = roundup2(olen, txr->txr_agg_align);
1419 agg_txd->txd_chim_size += pkt->rm_len - olen;
1420
1421 /* Link this txdesc to the parent. */
1422 hvn_txd_agg(agg_txd, txd);
1423
1424 chim = (uint8_t *)pkt + pkt->rm_len;
1425 /* Save the current packet for later fixup. */
1426 txr->txr_agg_prevpkt = chim;
1427
1428 txr->txr_agg_pktleft--;
1429 txr->txr_agg_szleft -= pktsz;
1430 if (txr->txr_agg_szleft <=
1431 HVN_PKTSIZE_MIN(txr->txr_agg_align)) {
1432 /*
1433 * Probably can't aggregate more packets,
1434 * flush this aggregating txdesc proactively.
1435 */
1436 txr->txr_agg_pktleft = 0;
1437 }
1438
1439 /* Done! */
1440 return chim;
1441 }
1442 hvn_flush_txagg(txr);
1443 }
1444
1445 txr->txr_evchimneytried.ev_count++;
1446 txd->txd_chim_index = hvn_chim_alloc(sc);
1447 if (txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID)
1448 return NULL;
1449 txr->txr_evchimney.ev_count++;
1450
1451 chim = sc->sc_chim + (txd->txd_chim_index * sc->sc_chim_szmax);
1452
1453 if (txr->txr_agg_pktmax > 1 &&
1454 txr->txr_agg_szmax > pktsz + HVN_PKTSIZE_MIN(txr->txr_agg_align)) {
1455 txr->txr_agg_txd = txd;
1456 txr->txr_agg_pktleft = txr->txr_agg_pktmax - 1;
1457 txr->txr_agg_szleft = txr->txr_agg_szmax - pktsz;
1458 txr->txr_agg_prevpkt = chim;
1459 }
1460
1461 return chim;
1462 }
1463
1464 static int
1465 hvn_encap(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, struct mbuf *m,
1466 int l2hlen)
1467 {
1468 /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
1469 static const char zero_pad[ETHER_MIN_LEN];
1470 struct hvn_softc *sc = txr->txr_softc;
1471 struct rndis_packet_msg *pkt;
1472 bus_dma_segment_t *seg;
1473 void *chim = NULL;
1474 size_t pktlen, pktsize;
1475 int l3hlen;
1476 int i, rv;
1477
1478 if (ISSET(sc->sc_caps, HVN_CAPS_VLAN) && !vlan_has_tag(m)) {
1479 struct ether_vlan_header *evl;
1480
1481 m = hvn_pullup_hdr(m, sizeof(*evl));
1482 if (m == NULL) {
1483 DPRINTF("%s: failed to pullup mbuf\n",
1484 device_xname(sc->sc_dev));
1485 return -1;
1486 }
1487
1488 evl = mtod(m, struct ether_vlan_header *);
1489 if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1490 struct ether_header *eh;
1491 uint16_t proto = evl->evl_proto;
1492
1493 vlan_set_tag(m, ntohs(evl->evl_tag));
1494
1495 /*
1496 * Trim VLAN tag from header.
1497 */
1498 memmove((uint8_t *)evl + ETHER_VLAN_ENCAP_LEN,
1499 evl, ETHER_HDR_LEN);
1500 m_adj(m, ETHER_VLAN_ENCAP_LEN);
1501
1502 eh = mtod(m, struct ether_header *);
1503 eh->ether_type = proto;
1504
1505 /*
1506 * Re-padding. See sys/net/if_vlan.c:vlan_start().
1507 */
1508 if (m->m_pkthdr.len < (ETHER_MIN_LEN - ETHER_CRC_LEN +
1509 ETHER_VLAN_ENCAP_LEN)) {
1510 m_copyback(m, m->m_pkthdr.len,
1511 (ETHER_MIN_LEN - ETHER_CRC_LEN +
1512 ETHER_VLAN_ENCAP_LEN) -
1513 m->m_pkthdr.len, zero_pad);
1514 }
1515
1516 txr->txr_evvlanfixup.ev_count++;
1517 }
1518 }
1519
1520 pkt = txd->txd_req;
1521 pktsize = HVN_PKTSIZE(m, txr->txr_agg_align);
1522 if (pktsize < txr->txr_chim_size) {
1523 chim = hvn_try_txagg(txr, txd, pktsize);
1524 if (chim != NULL)
1525 pkt = chim;
1526 } else {
1527 if (txr->txr_agg_txd != NULL)
1528 hvn_flush_txagg(txr);
1529 }
1530
1531 memset(pkt, 0, HVN_RNDIS_PKT_LEN);
1532 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1533 pkt->rm_len = sizeof(*pkt) + m->m_pkthdr.len;
1534 pkt->rm_dataoffset = RNDIS_DATA_OFFSET;
1535 pkt->rm_datalen = m->m_pkthdr.len;
1536 pkt->rm_pktinfooffset = sizeof(*pkt); /* adjusted below */
1537 pkt->rm_pktinfolen = 0;
1538
1539 if (txr->txr_flags & HVN_TXR_FLAG_UDP_HASH) {
1540 char *cp;
1541
1542 /*
1543 * Set the hash value for this packet, so that the host could
1544 * dispatch the TX done event for this packet back to this TX
1545 * ring's channel.
1546 */
1547 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1548 HVN_NDIS_HASH_VALUE_SIZE, HVN_NDIS_PKTINFO_TYPE_HASHVAL);
1549 memcpy(cp, &txr->txr_id, HVN_NDIS_HASH_VALUE_SIZE);
1550 }
1551
1552 if (vlan_has_tag(m)) {
1553 uint32_t vlan;
1554 char *cp;
1555 uint16_t tag;
1556
1557 tag = vlan_get_tag(m);
1558 vlan = NDIS_VLAN_INFO_MAKE(EVL_VLANOFTAG(tag),
1559 EVL_PRIOFTAG(tag), EVL_CFIOFTAG(tag));
1560 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1561 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1562 memcpy(cp, &vlan, NDIS_VLAN_INFO_SIZE);
1563 txr->txr_evvlanhwtagging.ev_count++;
1564 }
1565
1566 if (m->m_pkthdr.csum_flags & txr->txr_csum_assist) {
1567 uint32_t csum;
1568 char *cp;
1569
1570 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv6 | M_CSUM_UDPv6)) {
1571 csum = NDIS_TXCSUM_INFO_IPV6;
1572 l3hlen = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
1573 if (m->m_pkthdr.csum_flags & M_CSUM_TCPv6)
1574 csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen +
1575 l3hlen);
1576 if (m->m_pkthdr.csum_flags & M_CSUM_UDPv6)
1577 csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen +
1578 l3hlen);
1579 } else {
1580 csum = NDIS_TXCSUM_INFO_IPV4;
1581 l3hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
1582 if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
1583 csum |= NDIS_TXCSUM_INFO_IPCS;
1584 if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
1585 csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen +
1586 l3hlen);
1587 if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
1588 csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen +
1589 l3hlen);
1590 }
1591 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1592 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1593 memcpy(cp, &csum, NDIS_TXCSUM_INFO_SIZE);
1594 }
1595
1596 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1597 pkt->rm_pktinfooffset -= RNDIS_HEADER_OFFSET;
1598
1599 /*
1600 * Fast path: Chimney sending.
1601 */
1602 if (chim != NULL) {
1603 struct hvn_tx_desc *tgt_txd;
1604
1605 tgt_txd = (txr->txr_agg_txd != NULL) ? txr->txr_agg_txd : txd;
1606
1607 KASSERTMSG(pkt == chim,
1608 "RNDIS pkt not in chimney sending buffer");
1609 KASSERTMSG(tgt_txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID,
1610 "chimney sending buffer is not used");
1611
1612 tgt_txd->txd_chim_size += pkt->rm_len;
1613 m_copydata(m, 0, m->m_pkthdr.len, (uint8_t *)chim + pktlen);
1614
1615 txr->txr_sendpkt = hvn_rndis_output_chim;
1616 goto done;
1617 }
1618
1619 KASSERTMSG(txr->txr_agg_txd == NULL, "aggregating sglist txdesc");
1620 KASSERTMSG(txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID,
1621 "chimney buffer is used");
1622 KASSERTMSG(pkt == txd->txd_req, "RNDIS pkt not in txdesc");
1623
1624 rv = bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m, BUS_DMA_READ |
1625 BUS_DMA_NOWAIT);
1626 switch (rv) {
1627 case 0:
1628 break;
1629 case EFBIG:
1630 if (m_defrag(m, M_NOWAIT) != NULL) {
1631 txr->txr_evdefrag.ev_count++;
1632 if (bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m,
1633 BUS_DMA_READ | BUS_DMA_NOWAIT) == 0)
1634 break;
1635 }
1636 /* FALLTHROUGH */
1637 default:
1638 DPRINTF("%s: failed to load mbuf\n", device_xname(sc->sc_dev));
1639 txr->txr_evdmafailed.ev_count++;
1640 return -1;
1641 }
1642 bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap,
1643 0, txd->txd_dmap->dm_mapsize, BUS_DMASYNC_PREWRITE);
1644 SET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP);
1645
1646 /* Attach an RNDIS message to the first slot */
1647 txd->txd_sgl[0].gpa_page = txd->txd_gpa.gpa_page;
1648 txd->txd_sgl[0].gpa_ofs = txd->txd_gpa.gpa_ofs;
1649 txd->txd_sgl[0].gpa_len = pktlen;
1650 txd->txd_nsge = txd->txd_dmap->dm_nsegs + 1;
1651
1652 for (i = 0; i < txd->txd_dmap->dm_nsegs; i++) {
1653 seg = &txd->txd_dmap->dm_segs[i];
1654 txd->txd_sgl[1 + i].gpa_page = atop(seg->ds_addr);
1655 txd->txd_sgl[1 + i].gpa_ofs = seg->ds_addr & PAGE_MASK;
1656 txd->txd_sgl[1 + i].gpa_len = seg->ds_len;
1657 }
1658
1659 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
1660 txd->txd_chim_size = 0;
1661 txr->txr_sendpkt = hvn_rndis_output_sgl;
1662 done:
1663 txd->txd_buf = m;
1664
1665 /* Update temporary stats for later use. */
1666 txr->txr_stat_pkts++;
1667 txr->txr_stat_size += m->m_pkthdr.len;
1668 if (m->m_flags & M_MCAST)
1669 txr->txr_stat_mcasts++;
1670
1671 return 0;
1672 }
1673
1674 static void
1675 hvn_bpf_mtap(struct hvn_tx_ring *txr, struct mbuf *m, u_int direction)
1676 {
1677 struct hvn_softc *sc = txr->txr_softc;
1678 struct ifnet *ifp = SC2IFP(sc);
1679 struct ether_header *eh;
1680 struct ether_vlan_header evl;
1681
1682 if (!vlan_has_tag(m)) {
1683 bpf_mtap(ifp, m, direction);
1684 return;
1685 }
1686
1687 if (ifp->if_bpf == NULL)
1688 return;
1689
1690 txr->txr_evvlantap.ev_count++;
1691
1692 /*
1693 * Restore a VLAN tag for bpf.
1694 *
1695 * Do not modify contents of the original mbuf,
1696 * because Tx processing on the mbuf is still in progress.
1697 */
1698
1699 eh = mtod(m, struct ether_header *);
1700 memcpy(evl.evl_dhost, eh->ether_dhost, ETHER_ADDR_LEN * 2);
1701 evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
1702 evl.evl_tag = htons(vlan_get_tag(m));
1703 evl.evl_proto = eh->ether_type;
1704
1705 /* Do not tap ether header of the original mbuf. */
1706 m_adj(m, sizeof(*eh));
1707
1708 bpf_mtap2(ifp->if_bpf, &evl, sizeof(evl), m, direction);
1709
1710 /* Cannot restore ether header of the original mbuf,
1711 * but do not worry about it because just free it. */
1712 }
1713
1714 static int
1715 hvn_txpkt(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
1716 {
1717 struct hvn_softc *sc = txr->txr_softc;
1718 struct ifnet *ifp = SC2IFP(sc);
1719 const struct hvn_tx_desc *tmp_txd;
1720 int error;
1721
1722 /*
1723 * Make sure that this txd and any aggregated txds are not
1724 * freed before bpf_mtap.
1725 */
1726 hvn_txd_hold(txd);
1727
1728 error = (*txr->txr_sendpkt)(txr, txd);
1729 if (error == 0) {
1730 hvn_bpf_mtap(txr, txd->txd_buf, BPF_D_OUT);
1731 STAILQ_FOREACH(tmp_txd, &txd->txd_agg_list, txd_agg_entry)
1732 hvn_bpf_mtap(txr, tmp_txd->txd_buf, BPF_D_OUT);
1733
1734 if_statadd(ifp, if_opackets, txr->txr_stat_pkts);
1735 if_statadd(ifp, if_obytes, txr->txr_stat_size);
1736 if (txr->txr_stat_mcasts != 0)
1737 if_statadd(ifp, if_omcasts, txr->txr_stat_mcasts);
1738 txr->txr_evpkts.ev_count += txr->txr_stat_pkts;
1739 txr->txr_evsends.ev_count++;
1740 }
1741
1742 hvn_txd_put(txr, txd);
1743
1744 if (__predict_false(error)) {
1745 /*
1746 * Caller will perform further processing on the
1747 * associated mbuf, so don't free it in hvn_txd_put();
1748 * only unload it from the DMA map in hvn_txd_put(),
1749 * if it was loaded.
1750 */
1751 txd->txd_buf = NULL;
1752 hvn_txd_put(txr, txd);
1753 }
1754
1755 /* Reset temporary stats, after this sending is done. */
1756 txr->txr_stat_pkts = 0;
1757 txr->txr_stat_size = 0;
1758 txr->txr_stat_mcasts = 0;
1759
1760 return error;
1761 }
1762
1763 static void
1764 hvn_txeof(struct hvn_tx_ring *txr, uint64_t tid)
1765 {
1766 struct hvn_softc *sc = txr->txr_softc;
1767 struct hvn_tx_desc *txd;
1768 uint32_t id = tid >> 32;
1769
1770 if ((tid & 0xffffffffU) != 0)
1771 return;
1772
1773 id -= HVN_NVS_CHIM_SIG;
1774 if (id >= HVN_TX_DESC) {
1775 device_printf(sc->sc_dev, "tx packet index too large: %u", id);
1776 return;
1777 }
1778
1779 txd = &txr->txr_desc[id];
1780
1781 if (txd->txd_buf == NULL)
1782 device_printf(sc->sc_dev, "no mbuf @%u\n", id);
1783
1784 hvn_txd_put(txr, txd);
1785 }
1786
1787 static int
1788 hvn_rx_ring_create(struct hvn_softc *sc, int ring_cnt)
1789 {
1790 struct hvn_rx_ring *rxr;
1791 int i;
1792
1793 if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_2)
1794 sc->sc_rx_size = 15 * 1024 * 1024; /* 15MB */
1795 else
1796 sc->sc_rx_size = 16 * 1024 * 1024; /* 16MB */
1797 sc->sc_rx_ring = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_rx_dma,
1798 sc->sc_rx_size, PAGE_SIZE, PAGE_SIZE, sc->sc_rx_size / PAGE_SIZE);
1799 if (sc->sc_rx_ring == NULL) {
1800 DPRINTF("%s: failed to allocate Rx ring buffer\n",
1801 device_xname(sc->sc_dev));
1802 return -1;
1803 }
1804
1805 sc->sc_rxr = kmem_zalloc(sizeof(*rxr) * ring_cnt, KM_SLEEP);
1806 sc->sc_nrxr_inuse = sc->sc_nrxr = ring_cnt;
1807
1808 for (i = 0; i < sc->sc_nrxr; i++) {
1809 rxr = &sc->sc_rxr[i];
1810 rxr->rxr_softc = sc;
1811 if (i < sc->sc_ntxr) {
1812 rxr->rxr_txr = &sc->sc_txr[i];
1813 rxr->rxr_txr->txr_rxr = rxr;
1814 }
1815
1816 mutex_init(&rxr->rxr_lock, MUTEX_DEFAULT, IPL_NET);
1817 mutex_init(&rxr->rxr_onwork_lock, MUTEX_DEFAULT, IPL_NET);
1818 cv_init(&rxr->rxr_onwork_cv, "waitonwk");
1819
1820 snprintf(rxr->rxr_name, sizeof(rxr->rxr_name),
1821 "%s-rx%d", device_xname(sc->sc_dev), i);
1822 evcnt_attach_dynamic(&rxr->rxr_evpkts, EVCNT_TYPE_MISC,
1823 NULL, rxr->rxr_name, "packets received");
1824 evcnt_attach_dynamic(&rxr->rxr_evcsum_ip, EVCNT_TYPE_MISC,
1825 NULL, rxr->rxr_name, "IP checksum");
1826 evcnt_attach_dynamic(&rxr->rxr_evcsum_tcp, EVCNT_TYPE_MISC,
1827 NULL, rxr->rxr_name, "TCP checksum");
1828 evcnt_attach_dynamic(&rxr->rxr_evcsum_udp, EVCNT_TYPE_MISC,
1829 NULL, rxr->rxr_name, "UDP checksum");
1830 evcnt_attach_dynamic(&rxr->rxr_evvlanhwtagging, EVCNT_TYPE_MISC,
1831 NULL, rxr->rxr_name, "VLAN H/W tagging");
1832 evcnt_attach_dynamic(&rxr->rxr_evintr, EVCNT_TYPE_INTR,
1833 NULL, rxr->rxr_name, "interrupt on ring");
1834 evcnt_attach_dynamic(&rxr->rxr_evdefer, EVCNT_TYPE_MISC,
1835 NULL, rxr->rxr_name, "handled queue in workqueue");
1836 evcnt_attach_dynamic(&rxr->rxr_evdeferreq, EVCNT_TYPE_MISC,
1837 NULL, rxr->rxr_name, "requested defer on ring");
1838 evcnt_attach_dynamic(&rxr->rxr_evredeferreq, EVCNT_TYPE_MISC,
1839 NULL, rxr->rxr_name, "requested defer in workqueue");
1840
1841 rxr->rxr_nvsbuf = kmem_zalloc(HVN_NVS_BUFSIZE, KM_SLEEP);
1842 if (rxr->rxr_nvsbuf == NULL) {
1843 DPRINTF("%s: failed to allocate channel data buffer\n",
1844 device_xname(sc->sc_dev));
1845 goto errout;
1846 }
1847
1848 rxr->rxr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1849 hvn_nvs_softintr, rxr);
1850 if (rxr->rxr_si == NULL) {
1851 DPRINTF("%s: failed to establish rx softint\n",
1852 device_xname(sc->sc_dev));
1853 goto errout;
1854 }
1855 }
1856
1857 return 0;
1858
1859 errout:
1860 hvn_rx_ring_destroy(sc);
1861 return -1;
1862 }
1863
1864 static int
1865 hvn_rx_ring_destroy(struct hvn_softc *sc)
1866 {
1867 struct hvn_rx_ring *rxr;
1868 int i;
1869
1870 if (sc->sc_rxr != NULL) {
1871 for (i = 0; i < sc->sc_nrxr; i++) {
1872 rxr = &sc->sc_rxr[i];
1873
1874 if (rxr->rxr_si != NULL) {
1875 softint_disestablish(rxr->rxr_si);
1876 rxr->rxr_si = NULL;
1877 }
1878
1879 if (rxr->rxr_nvsbuf != NULL) {
1880 kmem_free(rxr->rxr_nvsbuf, HVN_NVS_BUFSIZE);
1881 rxr->rxr_nvsbuf = NULL;
1882 }
1883
1884 evcnt_detach(&rxr->rxr_evpkts);
1885 evcnt_detach(&rxr->rxr_evcsum_ip);
1886 evcnt_detach(&rxr->rxr_evcsum_tcp);
1887 evcnt_detach(&rxr->rxr_evcsum_udp);
1888 evcnt_detach(&rxr->rxr_evvlanhwtagging);
1889 evcnt_detach(&rxr->rxr_evintr);
1890 evcnt_detach(&rxr->rxr_evdefer);
1891 evcnt_detach(&rxr->rxr_evdeferreq);
1892 evcnt_detach(&rxr->rxr_evredeferreq);
1893
1894 cv_destroy(&rxr->rxr_onwork_cv);
1895 mutex_destroy(&rxr->rxr_onwork_lock);
1896 mutex_destroy(&rxr->rxr_lock);
1897 }
1898 kmem_free(sc->sc_rxr, sizeof(*rxr) * sc->sc_nrxr);
1899 sc->sc_rxr = NULL;
1900 sc->sc_nrxr = 0;
1901 }
1902 if (sc->sc_rx_ring != NULL) {
1903 hyperv_dma_free(sc->sc_dmat, &sc->sc_rx_dma);
1904 sc->sc_rx_ring = NULL;
1905 }
1906
1907 return 0;
1908 }
1909
1910 static void
1911 hvn_fixup_rx_data(struct hvn_softc *sc)
1912 {
1913 struct hvn_rx_ring *rxr;
1914 int i;
1915
1916 if (sc->sc_caps & HVN_CAPS_UDPHASH) {
1917 for (i = 0; i < sc->sc_nrxr; i++) {
1918 rxr = &sc->sc_rxr[i];
1919 rxr->rxr_flags |= HVN_RXR_FLAG_UDP_HASH;
1920 }
1921 }
1922 }
1923
1924 static int
1925 hvn_tx_ring_create(struct hvn_softc *sc, int ring_cnt)
1926 {
1927 struct hvn_tx_ring *txr;
1928 struct hvn_tx_desc *txd;
1929 bus_dma_segment_t *seg;
1930 size_t msgsize;
1931 int i, j;
1932 paddr_t pa;
1933
1934 /*
1935 * Create TXBUF for chimney sending.
1936 *
1937 * NOTE: It is shared by all channels.
1938 */
1939 sc->sc_chim = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_chim_dma,
1940 HVN_CHIM_SIZE, PAGE_SIZE, 0, 1);
1941 if (sc->sc_chim == NULL) {
1942 DPRINTF("%s: failed to allocate chimney sending memory",
1943 device_xname(sc->sc_dev));
1944 goto errout;
1945 }
1946
1947 sc->sc_txr = kmem_zalloc(sizeof(*txr) * ring_cnt, KM_SLEEP);
1948 sc->sc_ntxr_inuse = sc->sc_ntxr = ring_cnt;
1949
1950 msgsize = roundup(HVN_RNDIS_PKT_LEN, 128);
1951
1952 for (j = 0; j < ring_cnt; j++) {
1953 txr = &sc->sc_txr[j];
1954 txr->txr_softc = sc;
1955 txr->txr_id = j;
1956
1957 mutex_init(&txr->txr_lock, MUTEX_DEFAULT, IPL_NET);
1958 txr->txr_interq = pcq_create(HVN_TX_DESC, KM_SLEEP);
1959
1960 snprintf(txr->txr_name, sizeof(txr->txr_name),
1961 "%s-tx%d", device_xname(sc->sc_dev), j);
1962 evcnt_attach_dynamic(&txr->txr_evpkts, EVCNT_TYPE_MISC,
1963 NULL, txr->txr_name, "packets transmit");
1964 evcnt_attach_dynamic(&txr->txr_evsends, EVCNT_TYPE_MISC,
1965 NULL, txr->txr_name, "sends");
1966 evcnt_attach_dynamic(&txr->txr_evnodesc, EVCNT_TYPE_MISC,
1967 NULL, txr->txr_name, "descriptor shortage");
1968 evcnt_attach_dynamic(&txr->txr_evdmafailed, EVCNT_TYPE_MISC,
1969 NULL, txr->txr_name, "DMA failure");
1970 evcnt_attach_dynamic(&txr->txr_evdefrag, EVCNT_TYPE_MISC,
1971 NULL, txr->txr_name, "mbuf defraged");
1972 evcnt_attach_dynamic(&txr->txr_evpcqdrop, EVCNT_TYPE_MISC,
1973 NULL, txr->txr_name, "dropped in pcq");
1974 evcnt_attach_dynamic(&txr->txr_evtransmitdefer, EVCNT_TYPE_MISC,
1975 NULL, txr->txr_name, "deferred transmit");
1976 evcnt_attach_dynamic(&txr->txr_evflushfailed, EVCNT_TYPE_MISC,
1977 NULL, txr->txr_name, "aggregation flush failure");
1978 evcnt_attach_dynamic(&txr->txr_evchimneytried, EVCNT_TYPE_MISC,
1979 NULL, txr->txr_name, "chimney send tried");
1980 evcnt_attach_dynamic(&txr->txr_evchimney, EVCNT_TYPE_MISC,
1981 NULL, txr->txr_name, "chimney send");
1982 evcnt_attach_dynamic(&txr->txr_evvlanfixup, EVCNT_TYPE_MISC,
1983 NULL, txr->txr_name, "VLAN fixup");
1984 evcnt_attach_dynamic(&txr->txr_evvlanhwtagging, EVCNT_TYPE_MISC,
1985 NULL, txr->txr_name, "VLAN H/W tagging");
1986 evcnt_attach_dynamic(&txr->txr_evvlantap, EVCNT_TYPE_MISC,
1987 NULL, txr->txr_name, "VLAN bpf_mtap fixup");
1988
1989 txr->txr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1990 hvn_deferred_transmit, txr);
1991 if (txr->txr_si == NULL) {
1992 aprint_error_dev(sc->sc_dev,
1993 "failed to establish softint for tx ring\n");
1994 goto errout;
1995 }
1996
1997 /* Allocate memory to store RNDIS messages */
1998 txr->txr_msgs = hyperv_dma_alloc(sc->sc_dmat, &txr->txr_dma,
1999 msgsize * HVN_TX_DESC, PAGE_SIZE, 0, 1);
2000 if (txr->txr_msgs == NULL) {
2001 DPRINTF("%s: failed to allocate memory for RDNIS "
2002 "messages\n", device_xname(sc->sc_dev));
2003 goto errout;
2004 }
2005
2006 TAILQ_INIT(&txr->txr_list);
2007 for (i = 0; i < HVN_TX_DESC; i++) {
2008 txd = &txr->txr_desc[i];
2009 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
2010 txd->txd_chim_size = 0;
2011 STAILQ_INIT(&txd->txd_agg_list);
2012 if (bus_dmamap_create(sc->sc_dmat, HVN_TX_PKT_SIZE,
2013 HVN_TX_FRAGS, HVN_TX_FRAG_SIZE, PAGE_SIZE,
2014 BUS_DMA_WAITOK, &txd->txd_dmap)) {
2015 DPRINTF("%s: failed to create map for TX "
2016 "descriptors\n", device_xname(sc->sc_dev));
2017 goto errout;
2018 }
2019 seg = &txr->txr_dma.map->dm_segs[0];
2020 pa = seg->ds_addr + (msgsize * i);
2021 txd->txd_gpa.gpa_page = atop(pa);
2022 txd->txd_gpa.gpa_ofs = pa & PAGE_MASK;
2023 txd->txd_gpa.gpa_len = msgsize;
2024 txd->txd_req = (void *)(txr->txr_msgs + (msgsize * i));
2025 txd->txd_id = i + HVN_NVS_CHIM_SIG;
2026 TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry);
2027 }
2028 txr->txr_avail = HVN_TX_DESC;
2029 }
2030
2031 return 0;
2032
2033 errout:
2034 hvn_tx_ring_destroy(sc);
2035 return -1;
2036 }
2037
2038 static void
2039 hvn_tx_ring_destroy(struct hvn_softc *sc)
2040 {
2041 struct hvn_tx_ring *txr;
2042 struct hvn_tx_desc *txd;
2043 int i, j;
2044
2045 if (sc->sc_txr != NULL) {
2046 for (j = 0; j < sc->sc_ntxr; j++) {
2047 txr = &sc->sc_txr[j];
2048
2049 mutex_enter(&txr->txr_lock);
2050 for (i = 0; i < HVN_TX_DESC; i++) {
2051 txd = &txr->txr_desc[i];
2052 hvn_txd_gc(txr, txd);
2053 }
2054 mutex_exit(&txr->txr_lock);
2055 for (i = 0; i < HVN_TX_DESC; i++) {
2056 txd = &txr->txr_desc[i];
2057 if (txd->txd_dmap != NULL) {
2058 bus_dmamap_destroy(sc->sc_dmat,
2059 txd->txd_dmap);
2060 txd->txd_dmap = NULL;
2061 }
2062 }
2063 if (txr->txr_msgs != NULL) {
2064 hyperv_dma_free(sc->sc_dmat, &txr->txr_dma);
2065 txr->txr_msgs = NULL;
2066 }
2067 if (txr->txr_si != NULL) {
2068 softint_disestablish(txr->txr_si);
2069 txr->txr_si = NULL;
2070 }
2071 if (txr->txr_interq != NULL) {
2072 hvn_tx_ring_qflush(sc, txr);
2073 pcq_destroy(txr->txr_interq);
2074 txr->txr_interq = NULL;
2075 }
2076
2077 evcnt_detach(&txr->txr_evpkts);
2078 evcnt_detach(&txr->txr_evsends);
2079 evcnt_detach(&txr->txr_evnodesc);
2080 evcnt_detach(&txr->txr_evdmafailed);
2081 evcnt_detach(&txr->txr_evdefrag);
2082 evcnt_detach(&txr->txr_evpcqdrop);
2083 evcnt_detach(&txr->txr_evtransmitdefer);
2084 evcnt_detach(&txr->txr_evflushfailed);
2085 evcnt_detach(&txr->txr_evchimneytried);
2086 evcnt_detach(&txr->txr_evchimney);
2087 evcnt_detach(&txr->txr_evvlanfixup);
2088 evcnt_detach(&txr->txr_evvlanhwtagging);
2089 evcnt_detach(&txr->txr_evvlantap);
2090
2091 mutex_destroy(&txr->txr_lock);
2092 }
2093
2094 kmem_free(sc->sc_txr, sizeof(*txr) * sc->sc_ntxr);
2095 sc->sc_txr = NULL;
2096 }
2097
2098 if (sc->sc_chim != NULL) {
2099 hyperv_dma_free(sc->sc_dmat, &sc->sc_chim_dma);
2100 sc->sc_chim = NULL;
2101 }
2102 }
2103
2104 static void
2105 hvn_set_chim_size(struct hvn_softc *sc, int chim_size)
2106 {
2107 struct hvn_tx_ring *txr;
2108 int i;
2109
2110 for (i = 0; i < sc->sc_ntxr_inuse; i++) {
2111 txr = &sc->sc_txr[i];
2112 txr->txr_chim_size = chim_size;
2113 }
2114 }
2115
2116 #if LONG_BIT == 64
2117 #define ffsl(v) ffs64(v)
2118 #elif LONG_BIT == 32
2119 #define ffsl(v) ffs32(v)
2120 #else
2121 #error unsupport LONG_BIT
2122 #endif /* LONG_BIT */
2123
2124 static uint32_t
2125 hvn_chim_alloc(struct hvn_softc *sc)
2126 {
2127 uint32_t chim_idx = HVN_NVS_CHIM_IDX_INVALID;
2128 int i, idx;
2129
2130 mutex_spin_enter(&sc->sc_chim_bmap_lock);
2131 for (i = 0; i < sc->sc_chim_bmap_cnt; i++) {
2132 idx = ffsl(~sc->sc_chim_bmap[i]);
2133 if (idx == 0)
2134 continue;
2135
2136 --idx; /* ffsl is 1-based */
2137 SET(sc->sc_chim_bmap[i], __BIT(idx));
2138
2139 chim_idx = i * LONG_BIT + idx;
2140 break;
2141 }
2142 mutex_spin_exit(&sc->sc_chim_bmap_lock);
2143
2144 return chim_idx;
2145 }
2146
2147 static void
2148 hvn_chim_free(struct hvn_softc *sc, uint32_t chim_idx)
2149 {
2150 u_long mask;
2151 uint32_t idx;
2152
2153 idx = chim_idx / LONG_BIT;
2154 mask = __BIT(chim_idx % LONG_BIT);
2155
2156 mutex_spin_enter(&sc->sc_chim_bmap_lock);
2157 CLR(sc->sc_chim_bmap[idx], mask);
2158 mutex_spin_exit(&sc->sc_chim_bmap_lock);
2159 }
2160
2161 static void
2162 hvn_fixup_tx_data(struct hvn_softc *sc)
2163 {
2164 struct hvn_tx_ring *txr;
2165 uint64_t caps_assist;
2166 int csum_assist;
2167 int i;
2168
2169 hvn_set_chim_size(sc, sc->sc_chim_szmax);
2170 if (hvn_tx_chimney_size > 0 && hvn_tx_chimney_size < sc->sc_chim_szmax)
2171 hvn_set_chim_size(sc, hvn_tx_chimney_size);
2172
2173 caps_assist = 0;
2174 csum_assist = 0;
2175 if (sc->sc_caps & HVN_CAPS_IPCS) {
2176 caps_assist |= IFCAP_CSUM_IPv4_Tx;
2177 caps_assist |= IFCAP_CSUM_IPv4_Rx;
2178 csum_assist |= M_CSUM_IPv4;
2179 }
2180 if (sc->sc_caps & HVN_CAPS_TCP4CS) {
2181 caps_assist |= IFCAP_CSUM_TCPv4_Tx;
2182 caps_assist |= IFCAP_CSUM_TCPv4_Rx;
2183 csum_assist |= M_CSUM_TCPv4;
2184 }
2185 if (sc->sc_caps & HVN_CAPS_TCP6CS) {
2186 caps_assist |= IFCAP_CSUM_TCPv6_Tx;
2187 csum_assist |= M_CSUM_TCPv6;
2188 }
2189 if (sc->sc_caps & HVN_CAPS_UDP4CS) {
2190 caps_assist |= IFCAP_CSUM_UDPv4_Tx;
2191 caps_assist |= IFCAP_CSUM_UDPv4_Rx;
2192 csum_assist |= M_CSUM_UDPv4;
2193 }
2194 if (sc->sc_caps & HVN_CAPS_UDP6CS) {
2195 caps_assist |= IFCAP_CSUM_UDPv6_Tx;
2196 csum_assist |= M_CSUM_UDPv6;
2197 }
2198 for (i = 0; i < sc->sc_ntxr; i++) {
2199 txr = &sc->sc_txr[i];
2200 txr->txr_caps_assist = caps_assist;
2201 txr->txr_csum_assist = csum_assist;
2202 }
2203
2204 if (sc->sc_caps & HVN_CAPS_UDPHASH) {
2205 for (i = 0; i < sc->sc_ntxr; i++) {
2206 txr = &sc->sc_txr[i];
2207 txr->txr_flags |= HVN_TXR_FLAG_UDP_HASH;
2208 }
2209 }
2210 }
2211
2212 static int
2213 hvn_txd_peek(struct hvn_tx_ring *txr)
2214 {
2215
2216 KASSERT(mutex_owned(&txr->txr_lock));
2217
2218 return txr->txr_avail;
2219 }
2220
2221 static struct hvn_tx_desc *
2222 hvn_txd_get(struct hvn_tx_ring *txr)
2223 {
2224 struct hvn_tx_desc *txd;
2225
2226 KASSERT(mutex_owned(&txr->txr_lock));
2227
2228 txd = TAILQ_FIRST(&txr->txr_list);
2229 KASSERT(txd != NULL);
2230 TAILQ_REMOVE(&txr->txr_list, txd, txd_entry);
2231 txr->txr_avail--;
2232
2233 txd->txd_refs = 1;
2234
2235 return txd;
2236 }
2237
2238 static void
2239 hvn_txd_put(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
2240 {
2241 struct hvn_softc *sc = txr->txr_softc;
2242 struct hvn_tx_desc *tmp_txd;
2243
2244 KASSERT(mutex_owned(&txr->txr_lock));
2245 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2246 "put an onagg txd %#x", txd->txd_flags);
2247
2248 KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs);
2249 if (atomic_dec_uint_nv(&txd->txd_refs) != 0)
2250 return;
2251
2252 if (!STAILQ_EMPTY(&txd->txd_agg_list)) {
2253 while ((tmp_txd = STAILQ_FIRST(&txd->txd_agg_list)) != NULL) {
2254 KASSERTMSG(STAILQ_EMPTY(&tmp_txd->txd_agg_list),
2255 "resursive aggregation on aggregated txdesc");
2256 KASSERTMSG(
2257 ISSET(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2258 "not aggregated txdesc");
2259 KASSERTMSG(
2260 tmp_txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID,
2261 "aggregated txdesc consumes chimney sending "
2262 "buffer: idx %u", tmp_txd->txd_chim_index);
2263 KASSERTMSG(tmp_txd->txd_chim_size == 0,
2264 "aggregated txdesc has non-zero chimney sending "
2265 "size: sz %u", tmp_txd->txd_chim_size);
2266
2267 STAILQ_REMOVE_HEAD(&txd->txd_agg_list, txd_agg_entry);
2268 CLR(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG);
2269 hvn_txd_put(txr, tmp_txd);
2270 }
2271 }
2272
2273 if (txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID) {
2274 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP),
2275 "chim txd uses dmamap");
2276 hvn_chim_free(sc, txd->txd_chim_index);
2277 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
2278 txd->txd_chim_size = 0;
2279 } else if (ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP)) {
2280 bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap,
2281 0, txd->txd_dmap->dm_mapsize,
2282 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2283 bus_dmamap_unload(sc->sc_dmat, txd->txd_dmap);
2284 CLR(txd->txd_flags, HVN_TXD_FLAG_DMAMAP);
2285 }
2286
2287 if (txd->txd_buf != NULL) {
2288 m_freem(txd->txd_buf);
2289 txd->txd_buf = NULL;
2290 }
2291
2292 TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry);
2293 txr->txr_avail++;
2294 txr->txr_oactive = 0;
2295 }
2296
2297 static void
2298 hvn_txd_gc(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
2299 {
2300
2301 KASSERTMSG(txd->txd_refs == 0 || txd->txd_refs == 1,
2302 "invalid txd refs %d", txd->txd_refs);
2303
2304 /* Aggregated txds will be freed by their aggregating txd. */
2305 if (txd->txd_refs > 0 && !ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG))
2306 hvn_txd_put(txr, txd);
2307 }
2308
2309 static void
2310 hvn_txd_hold(struct hvn_tx_desc *txd)
2311 {
2312
2313 /* 0->1 transition will never work */
2314 KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs);
2315
2316 atomic_inc_uint(&txd->txd_refs);
2317 }
2318
2319 static void
2320 hvn_txd_agg(struct hvn_tx_desc *agg_txd, struct hvn_tx_desc *txd)
2321 {
2322
2323 KASSERTMSG(!ISSET(agg_txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2324 "recursive aggregation on aggregating txdesc");
2325 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2326 "already aggregated");
2327 KASSERTMSG(STAILQ_EMPTY(&txd->txd_agg_list),
2328 "recursive aggregation on to-be-aggregated txdesc");
2329
2330 SET(txd->txd_flags, HVN_TXD_FLAG_ONAGG);
2331 STAILQ_INSERT_TAIL(&agg_txd->txd_agg_list, txd, txd_agg_entry);
2332 }
2333
2334 static int
2335 hvn_tx_ring_pending(struct hvn_tx_ring *txr)
2336 {
2337 int pending = 0;
2338
2339 mutex_enter(&txr->txr_lock);
2340 if (hvn_txd_peek(txr) != HVN_TX_DESC)
2341 pending = 1;
2342 mutex_exit(&txr->txr_lock);
2343
2344 return pending;
2345 }
2346
2347 static void
2348 hvn_tx_ring_qflush(struct hvn_softc *sc, struct hvn_tx_ring *txr)
2349 {
2350 struct mbuf *m;
2351
2352 while ((m = pcq_get(txr->txr_interq)) != NULL)
2353 m_freem(m);
2354 }
2355
2356 static int
2357 hvn_get_lladdr(struct hvn_softc *sc, uint8_t *enaddr)
2358 {
2359 size_t addrlen = ETHER_ADDR_LEN;
2360 int rv;
2361
2362 rv = hvn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, enaddr, &addrlen);
2363 if (rv == 0 && addrlen != ETHER_ADDR_LEN)
2364 rv = -1;
2365 return rv;
2366 }
2367
2368 static void
2369 hvn_update_link_status(struct hvn_softc *sc)
2370 {
2371 struct ifnet *ifp = SC2IFP(sc);
2372 uint32_t state, old_link_state;
2373 size_t len = sizeof(state);
2374 int rv;
2375
2376 rv = hvn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, &state, &len);
2377 if (rv != 0 || len != sizeof(state))
2378 return;
2379
2380 old_link_state = sc->sc_link_state;
2381 sc->sc_link_state = (state == NDIS_MEDIA_STATE_CONNECTED) ?
2382 LINK_STATE_UP : LINK_STATE_DOWN;
2383 if (old_link_state != sc->sc_link_state) {
2384 if_link_state_change(ifp, sc->sc_link_state);
2385 }
2386 }
2387
2388 static int
2389 hvn_get_mtu(struct hvn_softc *sc, uint32_t *mtu)
2390 {
2391 size_t mtusz = sizeof(*mtu);
2392 int rv;
2393
2394 rv = hvn_rndis_query(sc, OID_GEN_MAXIMUM_FRAME_SIZE, mtu, &mtusz);
2395 if (rv == 0 && mtusz != sizeof(*mtu))
2396 rv = -1;
2397 return rv;
2398 }
2399
2400 static int
2401 hvn_channel_attach(struct hvn_softc *sc, struct vmbus_channel *chan)
2402 {
2403 struct hvn_rx_ring *rxr;
2404 struct hvn_tx_ring *txr;
2405 int idx;
2406
2407 idx = chan->ch_subidx;
2408 if (idx < 0 || idx >= sc->sc_nrxr_inuse) {
2409 DPRINTF("%s: invalid sub-channel %u\n",
2410 device_xname(sc->sc_dev), idx);
2411 return -1;
2412 }
2413
2414 rxr = &sc->sc_rxr[idx];
2415 rxr->rxr_chan = chan;
2416
2417 if (idx < sc->sc_ntxr_inuse) {
2418 txr = &sc->sc_txr[idx];
2419 txr->txr_chan = chan;
2420 }
2421
2422 /* Bind this channel to a proper CPU. */
2423 vmbus_channel_cpu_set(chan, HVN_RING_IDX2CPU(sc, idx));
2424
2425 chan->ch_flags &= ~CHF_BATCHED;
2426
2427 /* Associate our interrupt handler with the channel */
2428 if (vmbus_channel_open(chan,
2429 HVN_RING_BUFSIZE - sizeof(struct vmbus_bufring), NULL, 0,
2430 hvn_nvs_intr, rxr)) {
2431 DPRINTF("%s: failed to open channel\n",
2432 device_xname(sc->sc_dev));
2433 return -1;
2434 }
2435
2436 return 0;
2437 }
2438
2439 static void
2440 hvn_channel_detach(struct hvn_softc *sc, struct vmbus_channel *chan)
2441 {
2442
2443 vmbus_channel_close_direct(chan);
2444 }
2445
2446 static void
2447 hvn_channel_detach_all(struct hvn_softc *sc)
2448 {
2449 struct vmbus_channel **subchans;
2450 int i, subchan_cnt = sc->sc_nrxr_inuse - 1;
2451
2452 if (subchan_cnt > 0) {
2453 /* Detach the sub-channels. */
2454 subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt);
2455 for (i = 0; i < subchan_cnt; i++)
2456 hvn_channel_detach(sc, subchans[i]);
2457 vmbus_subchannel_rel(subchans, subchan_cnt);
2458 }
2459
2460 /*
2461 * Detach the primary channel, _after_ all sub-channels
2462 * are detached.
2463 */
2464 hvn_channel_detach(sc, sc->sc_prichan);
2465
2466 /* Wait for sub-channels to be destroyed, if any. */
2467 vmbus_subchannel_drain(sc->sc_prichan);
2468 }
2469
2470 static int
2471 hvn_subchannel_attach(struct hvn_softc *sc)
2472 {
2473 struct vmbus_channel **subchans;
2474 int subchan_cnt = sc->sc_nrxr_inuse - 1;
2475 int i, error = 0;
2476
2477 KASSERTMSG(subchan_cnt > 0, "no sub-channels");
2478
2479 /* Attach the sub-channels. */
2480 subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt);
2481 for (i = 0; i < subchan_cnt; ++i) {
2482 int error1;
2483
2484 error1 = hvn_channel_attach(sc, subchans[i]);
2485 if (error1) {
2486 error = error1;
2487 /* Move on; all channels will be detached later. */
2488 }
2489 }
2490 vmbus_subchannel_rel(subchans, subchan_cnt);
2491
2492 if (error) {
2493 aprint_error_dev(sc->sc_dev,
2494 "sub-channels attach failed: %d\n", error);
2495 return error;
2496 }
2497
2498 aprint_debug_dev(sc->sc_dev, "%d sub-channels attached\n",
2499 subchan_cnt);
2500 return 0;
2501 }
2502
2503 static int
2504 hvn_synth_alloc_subchannels(struct hvn_softc *sc, int *nsubch)
2505 {
2506 struct vmbus_channel **subchans;
2507 int error, nchan, rxr_cnt;
2508
2509 nchan = *nsubch + 1;
2510 if (nchan < 2) {
2511 /* Multiple RX/TX rings are not requested. */
2512 *nsubch = 0;
2513 return 0;
2514 }
2515
2516 /*
2517 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
2518 * table entries.
2519 */
2520 if (hvn_get_rsscaps(sc, &rxr_cnt)) {
2521 /* No RSS. */
2522 *nsubch = 0;
2523 return 0;
2524 }
2525
2526 aprint_debug_dev(sc->sc_dev, "RX rings offered %u, requested %d\n",
2527 rxr_cnt, nchan);
2528
2529 if (nchan > rxr_cnt)
2530 nchan = rxr_cnt;
2531 if (nchan == 1) {
2532 aprint_debug_dev(sc->sc_dev,
2533 "only 1 channel is supported, no vRSS\n");
2534 *nsubch = 0;
2535 return 0;
2536 }
2537
2538 *nsubch = nchan - 1;
2539 error = hvn_nvs_alloc_subchannels(sc, nsubch);
2540 if (error || *nsubch == 0) {
2541 /* Failed to allocate sub-channels. */
2542 *nsubch = 0;
2543 return 0;
2544 }
2545
2546 /*
2547 * Wait for all sub-channels to become ready before moving on.
2548 */
2549 subchans = vmbus_subchannel_get(sc->sc_prichan, *nsubch);
2550 vmbus_subchannel_rel(subchans, *nsubch);
2551 return 0;
2552 }
2553
2554 static int
2555 hvn_synth_attachable(const struct hvn_softc *sc)
2556 {
2557 #if 0
2558 const struct hvn_rx_ring *rxr;
2559 int i;
2560
2561 for (i = 0; i < sc->sc_nrxr; i++) {
2562 rxr = &sc->sc_rxr[i];
2563 if (rxr->rxr_flags)
2564 return 0;
2565 }
2566 #endif
2567 return 1;
2568 }
2569
2570 /*
2571 * Make sure that the RX filter is zero after the successful
2572 * RNDIS initialization.
2573 *
2574 * NOTE:
2575 * Under certain conditions on certain versions of Hyper-V,
2576 * the RNDIS rxfilter is _not_ zero on the hypervisor side
2577 * after the successful RNDIS initialization, which breaks
2578 * the assumption of any following code (well, it breaks the
2579 * RNDIS API contract actually). Clear the RNDIS rxfilter
2580 * explicitly, drain packets sneaking through, and drain the
2581 * interrupt taskqueues scheduled due to the stealth packets.
2582 */
2583 static void
2584 hvn_init_fixat(struct hvn_softc *sc, int nchan)
2585 {
2586
2587 hvn_disable_rx(sc);
2588 hvn_drain_rxtx(sc, nchan);
2589 }
2590
2591 static void
2592 hvn_set_txagg(struct hvn_softc *sc)
2593 {
2594 struct hvn_tx_ring *txr;
2595 uint32_t size, pkts;
2596 int i;
2597
2598 /*
2599 * Setup aggregation size.
2600 */
2601 if (sc->sc_agg_size < 0)
2602 size = UINT32_MAX;
2603 else
2604 size = sc->sc_agg_size;
2605
2606 if (size > sc->sc_rndis_agg_size)
2607 size = sc->sc_rndis_agg_size;
2608
2609 /* NOTE: We only aggregate packets using chimney sending buffers. */
2610 if (size > (uint32_t)sc->sc_chim_szmax)
2611 size = sc->sc_chim_szmax;
2612
2613 if (size <= 2 * HVN_PKTSIZE_MIN(sc->sc_rndis_agg_align)) {
2614 /* Disable */
2615 size = 0;
2616 pkts = 0;
2617 goto done;
2618 }
2619
2620 /* NOTE: Type of the per TX ring setting is 'int'. */
2621 if (size > INT_MAX)
2622 size = INT_MAX;
2623
2624 /*
2625 * Setup aggregation packet count.
2626 */
2627 if (sc->sc_agg_pkts < 0)
2628 pkts = UINT32_MAX;
2629 else
2630 pkts = sc->sc_agg_pkts;
2631
2632 if (pkts > sc->sc_rndis_agg_pkts)
2633 pkts = sc->sc_rndis_agg_pkts;
2634
2635 if (pkts <= 1) {
2636 /* Disable */
2637 size = 0;
2638 pkts = 0;
2639 goto done;
2640 }
2641
2642 /* NOTE: Type of the per TX ring setting is 'short'. */
2643 if (pkts > SHRT_MAX)
2644 pkts = SHRT_MAX;
2645
2646 done:
2647 /* NOTE: Type of the per TX ring setting is 'short'. */
2648 if (sc->sc_rndis_agg_align > SHRT_MAX) {
2649 /* Disable */
2650 size = 0;
2651 pkts = 0;
2652 }
2653
2654 aprint_verbose_dev(sc->sc_dev,
2655 "TX aggregate size %u, pkts %u, align %u\n",
2656 size, pkts, sc->sc_rndis_agg_align);
2657
2658 for (i = 0; i < sc->sc_ntxr_inuse; ++i) {
2659 txr = &sc->sc_txr[i];
2660
2661 mutex_enter(&txr->txr_lock);
2662 txr->txr_agg_szmax = size;
2663 txr->txr_agg_pktmax = pkts;
2664 txr->txr_agg_align = sc->sc_rndis_agg_align;
2665 mutex_exit(&txr->txr_lock);
2666 }
2667 }
2668
2669 static int
2670 hvn_synth_attach(struct hvn_softc *sc, int mtu)
2671 {
2672 uint8_t rss_key[RSS_KEYSIZE];
2673 uint32_t old_caps;
2674 int nchan = 1, nsubch;
2675 int i, error;
2676
2677 if (!hvn_synth_attachable(sc))
2678 return ENXIO;
2679
2680 /* Save capabilities for later verification. */
2681 old_caps = sc->sc_caps;
2682 sc->sc_caps = 0;
2683
2684 /* Clear RSS stuffs. */
2685 sc->sc_rss_ind_size = 0;
2686 sc->sc_rss_hash = 0;
2687 sc->sc_rss_hcap = 0;
2688
2689 /*
2690 * Attach the primary channel _before_ attaching NVS and RNDIS.
2691 */
2692 error = hvn_channel_attach(sc, sc->sc_prichan);
2693 if (error) {
2694 aprint_error_dev(sc->sc_dev,
2695 "failed to attach primary channel\n");
2696 goto failed;
2697 }
2698
2699 /*
2700 * Attach NVS.
2701 */
2702 error = hvn_nvs_attach(sc, mtu);
2703 if (error) {
2704 aprint_error_dev(sc->sc_dev, "failed to init NVSP\n");
2705 goto detach_channel;
2706 }
2707
2708 /*
2709 * Attach RNDIS _after_ NVS is attached.
2710 */
2711 error = hvn_rndis_attach(sc, mtu);
2712 if (error) {
2713 aprint_error_dev(sc->sc_dev, "failed to init RNDIS\n");
2714 goto detach_nvs;
2715 }
2716
2717 error = hvn_set_capabilities(sc, mtu);
2718 if (error) {
2719 aprint_error_dev(sc->sc_dev, "failed to setup offloading\n");
2720 goto detach_rndis;
2721 }
2722
2723 if ((sc->sc_flags & HVN_SCF_ATTACHED) && old_caps != sc->sc_caps) {
2724 device_printf(sc->sc_dev, "caps mismatch "
2725 "old 0x%08x, new 0x%08x\n", old_caps, sc->sc_caps);
2726 error = ENXIO;
2727 goto detach_rndis;
2728 }
2729
2730 /*
2731 * Allocate sub-channels for multi-TX/RX rings.
2732 *
2733 * NOTE:
2734 * The # of RX rings that can be used is equivalent to the # of
2735 * channels to be requested.
2736 */
2737 nsubch = sc->sc_nrxr - 1;
2738 error = hvn_synth_alloc_subchannels(sc, &nsubch);
2739 if (error) {
2740 aprint_error_dev(sc->sc_dev,
2741 "failed to allocate sub channels\n");
2742 goto detach_synth;
2743 }
2744
2745 /*
2746 * Set the # of TX/RX rings that could be used according to
2747 * the # of channels that NVS offered.
2748 */
2749 nchan = nsubch + 1;
2750 hvn_set_ring_inuse(sc, nchan);
2751
2752 if (nchan > 1) {
2753 /*
2754 * Attach the sub-channels.
2755 *
2756 * NOTE: hvn_set_ring_inuse() _must_ have been called.
2757 */
2758 error = hvn_subchannel_attach(sc);
2759 if (error) {
2760 aprint_error_dev(sc->sc_dev,
2761 "failed to attach sub channels\n");
2762 goto detach_synth;
2763 }
2764
2765 /*
2766 * Configure RSS key and indirect table _after_ all sub-channels
2767 * are attached.
2768 */
2769 if (!(sc->sc_flags & HVN_SCF_HAS_RSSKEY)) {
2770 /* Set the default RSS key. */
2771 CTASSERT(sizeof(sc->sc_rss.rss_key) == sizeof(rss_key));
2772 rss_getkey(rss_key);
2773 memcpy(&sc->sc_rss.rss_key, rss_key,
2774 sizeof(sc->sc_rss.rss_key));
2775 sc->sc_flags |= HVN_SCF_HAS_RSSKEY;
2776 }
2777
2778 if (!(sc->sc_flags & HVN_SCF_HAS_RSSIND)) {
2779 /* Setup RSS indirect table in round-robin fashion. */
2780 for (i = 0; i < NDIS_HASH_INDCNT; i++) {
2781 sc->sc_rss.rss_ind[i] = i % nchan;
2782 }
2783 sc->sc_flags |= HVN_SCF_HAS_RSSIND;
2784 } else {
2785 /*
2786 * # of usable channels may be changed, so we have to
2787 * make sure that all entries in RSS indirect table
2788 * are valid.
2789 *
2790 * NOTE: hvn_set_ring_inuse() _must_ have been called.
2791 */
2792 hvn_fixup_rss_ind(sc);
2793 }
2794
2795 sc->sc_rss_hash = sc->sc_rss_hcap;
2796 error = hvn_set_rss(sc, NDIS_RSS_FLAG_NONE);
2797 if (error) {
2798 aprint_error_dev(sc->sc_dev, "failed to setup RSS\n");
2799 goto detach_synth;
2800 }
2801 }
2802
2803 /*
2804 * Fixup transmission aggregation setup.
2805 */
2806 hvn_set_txagg(sc);
2807 hvn_init_fixat(sc, nchan);
2808 return 0;
2809
2810 detach_synth:
2811 hvn_init_fixat(sc, nchan);
2812 hvn_synth_detach(sc);
2813 return error;
2814
2815 detach_rndis:
2816 hvn_init_fixat(sc, nchan);
2817 hvn_rndis_detach(sc);
2818 detach_nvs:
2819 hvn_nvs_detach(sc);
2820 detach_channel:
2821 hvn_channel_detach(sc, sc->sc_prichan);
2822 failed:
2823 /* Restore old capabilities. */
2824 sc->sc_caps = old_caps;
2825 return error;
2826 }
2827
2828 static void
2829 hvn_synth_detach(struct hvn_softc *sc)
2830 {
2831
2832 /* Detach the RNDIS first. */
2833 hvn_rndis_detach(sc);
2834
2835 /* Detach NVS. */
2836 hvn_nvs_detach(sc);
2837
2838 /* Detach all of the channels. */
2839 hvn_channel_detach_all(sc);
2840
2841 if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 &&
2842 sc->sc_rx_hndl) {
2843 /*
2844 * Host is post-Win2016, disconnect RXBUF from primary channel
2845 * here.
2846 */
2847 vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl);
2848 sc->sc_rx_hndl = 0;
2849 }
2850
2851 if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 &&
2852 sc->sc_chim_hndl) {
2853 /*
2854 * Host is post-Win2016, disconnect chimney sending buffer
2855 * from primary channel here.
2856 */
2857 vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl);
2858 sc->sc_chim_hndl = 0;
2859 }
2860 }
2861
2862 static void
2863 hvn_set_ring_inuse(struct hvn_softc *sc, int ring_cnt)
2864 {
2865
2866 if (sc->sc_ntxr > ring_cnt)
2867 sc->sc_ntxr_inuse = ring_cnt;
2868 else
2869 sc->sc_ntxr_inuse = sc->sc_ntxr;
2870 sc->sc_nrxr_inuse = ring_cnt;
2871 }
2872
2873 static void
2874 hvn_channel_drain(struct hvn_softc *sc, struct vmbus_channel *chan)
2875 {
2876 struct hvn_rx_ring *rxr;
2877 int i, s;
2878
2879 for (rxr = NULL, i = 0; i < sc->sc_nrxr_inuse; i++) {
2880 rxr = &sc->sc_rxr[i];
2881 if (rxr->rxr_chan == chan)
2882 break;
2883 }
2884 KASSERT(i < sc->sc_nrxr_inuse);
2885
2886 /*
2887 * NOTE:
2888 * The TX bufring will not be drained by the hypervisor,
2889 * if the primary channel is revoked.
2890 */
2891 while (!vmbus_channel_rx_empty(chan) ||
2892 (!vmbus_channel_is_revoked(sc->sc_prichan) &&
2893 !vmbus_channel_tx_empty(chan))) {
2894 DELAY(20);
2895 s = splnet();
2896 hvn_nvs_intr1(rxr, sc->sc_tx_process_limit,
2897 sc->sc_rx_process_limit);
2898 splx(s);
2899 }
2900
2901 mutex_enter(&rxr->rxr_onwork_lock);
2902 while (rxr->rxr_onlist || rxr->rxr_onproc)
2903 cv_wait(&rxr->rxr_onwork_cv, &rxr->rxr_onwork_lock);
2904 mutex_exit(&rxr->rxr_onwork_lock);
2905 }
2906
2907 static void
2908 hvn_disable_rx(struct hvn_softc *sc)
2909 {
2910
2911 /*
2912 * Disable RX by clearing RX filter forcefully.
2913 */
2914 (void)hvn_rndis_close(sc); /* ignore error */
2915
2916 /*
2917 * Give RNDIS enough time to flush all pending data packets.
2918 */
2919 DELAY(200);
2920 }
2921
2922 static void
2923 hvn_drain_rxtx(struct hvn_softc *sc, int nchan)
2924 {
2925 struct vmbus_channel **subchans = NULL;
2926 int i, nsubch;
2927
2928 /*
2929 * Drain RX/TX bufrings and interrupts.
2930 */
2931 nsubch = nchan - 1;
2932 if (nsubch > 0)
2933 subchans = vmbus_subchannel_get(sc->sc_prichan, nsubch);
2934
2935 if (subchans != NULL) {
2936 for (i = 0; i < nsubch; ++i)
2937 hvn_channel_drain(sc, subchans[i]);
2938 }
2939 hvn_channel_drain(sc, sc->sc_prichan);
2940
2941 if (subchans != NULL)
2942 vmbus_subchannel_rel(subchans, nsubch);
2943 }
2944
2945 static void
2946 hvn_suspend_data(struct hvn_softc *sc)
2947 {
2948 struct hvn_tx_ring *txr;
2949 int i, s;
2950
2951 /*
2952 * Suspend TX.
2953 */
2954 for (i = 0; i < sc->sc_ntxr_inuse; i++) {
2955 txr = &sc->sc_txr[i];
2956
2957 mutex_enter(&txr->txr_lock);
2958 txr->txr_suspended = 1;
2959 mutex_exit(&txr->txr_lock);
2960 /* No one is able send more packets now. */
2961
2962 /*
2963 * Wait for all pending sends to finish.
2964 *
2965 * NOTE:
2966 * We will _not_ receive all pending send-done, if the
2967 * primary channel is revoked.
2968 */
2969 while (hvn_tx_ring_pending(txr) &&
2970 !vmbus_channel_is_revoked(sc->sc_prichan)) {
2971 DELAY(20);
2972 s = splnet();
2973 hvn_nvs_intr1(txr->txr_rxr, sc->sc_tx_process_limit,
2974 sc->sc_rx_process_limit);
2975 splx(s);
2976 }
2977 }
2978
2979 /*
2980 * Disable RX.
2981 */
2982 hvn_disable_rx(sc);
2983
2984 /*
2985 * Drain RX/TX.
2986 */
2987 hvn_drain_rxtx(sc, sc->sc_nrxr_inuse);
2988 }
2989
2990 static void
2991 hvn_suspend_mgmt(struct hvn_softc *sc)
2992 {
2993
2994 sc->sc_link_suspend = true;
2995 callout_halt(&sc->sc_link_tmout, NULL);
2996
2997 /* Drain link state task */
2998 mutex_enter(&sc->sc_link_lock);
2999 for (;;) {
3000 if (!sc->sc_link_onproc)
3001 break;
3002 mutex_exit(&sc->sc_link_lock);
3003 DELAY(20);
3004 mutex_enter(&sc->sc_link_lock);
3005 }
3006 mutex_exit(&sc->sc_link_lock);
3007 }
3008
3009 static void
3010 hvn_suspend(struct hvn_softc *sc)
3011 {
3012 struct ifnet *ifp = SC2IFP(sc);
3013
3014 if (ifp->if_flags & IFF_RUNNING)
3015 hvn_suspend_data(sc);
3016 hvn_suspend_mgmt(sc);
3017 }
3018
3019 static void
3020 hvn_resume_tx(struct hvn_softc *sc, int ring_cnt)
3021 {
3022 struct hvn_tx_ring *txr;
3023 int i;
3024
3025 for (i = 0; i < ring_cnt; i++) {
3026 txr = &sc->sc_txr[i];
3027 mutex_enter(&txr->txr_lock);
3028 txr->txr_suspended = 0;
3029 mutex_exit(&txr->txr_lock);
3030 }
3031 }
3032
3033 static void
3034 hvn_resume_data(struct hvn_softc *sc)
3035 {
3036 struct ifnet *ifp = SC2IFP(sc);
3037 struct hvn_tx_ring *txr;
3038 int i;
3039
3040 /*
3041 * Re-enable RX.
3042 */
3043 hvn_rndis_open(sc);
3044
3045 /*
3046 * Make sure to clear suspend status on "all" TX rings,
3047 * since sc_ntxr_inuse can be changed after hvn_suspend_data().
3048 */
3049 hvn_resume_tx(sc, sc->sc_ntxr);
3050
3051 /*
3052 * Flush unused mbuf, since sc_ntxr_inuse may be reduced.
3053 */
3054 for (i = sc->sc_ntxr_inuse; i < sc->sc_ntxr; i++)
3055 hvn_tx_ring_qflush(sc, &sc->sc_txr[i]);
3056
3057 /*
3058 * Kick start TX.
3059 */
3060 for (i = 0; i < sc->sc_ntxr_inuse; i++) {
3061 txr = &sc->sc_txr[i];
3062 mutex_enter(&txr->txr_lock);
3063 txr->txr_oactive = 0;
3064
3065 /* ALTQ */
3066 if (txr->txr_id == 0)
3067 if_schedule_deferred_start(ifp);
3068 softint_schedule(txr->txr_si);
3069 mutex_exit(&txr->txr_lock);
3070 }
3071 }
3072
3073 static void
3074 hvn_resume_mgmt(struct hvn_softc *sc)
3075 {
3076
3077 sc->sc_link_suspend = false;
3078 hvn_link_event(sc, HVN_LINK_EV_RESUME_NETWORK);
3079 }
3080
3081 static void
3082 hvn_resume(struct hvn_softc *sc)
3083 {
3084 struct ifnet *ifp = SC2IFP(sc);
3085
3086 if (ifp->if_flags & IFF_RUNNING)
3087 hvn_resume_data(sc);
3088 hvn_resume_mgmt(sc);
3089 }
3090
3091 static int
3092 hvn_nvs_init(struct hvn_softc *sc)
3093 {
3094
3095 mutex_init(&sc->sc_nvsrsp_lock, MUTEX_DEFAULT, IPL_NET);
3096 cv_init(&sc->sc_nvsrsp_cv, "nvsrspcv");
3097
3098 return 0;
3099 }
3100
3101 static void
3102 hvn_nvs_destroy(struct hvn_softc *sc)
3103 {
3104
3105 mutex_destroy(&sc->sc_nvsrsp_lock);
3106 cv_destroy(&sc->sc_nvsrsp_cv);
3107 }
3108
3109 static int
3110 hvn_nvs_doinit(struct hvn_softc *sc, uint32_t proto)
3111 {
3112 struct hvn_nvs_init cmd;
3113 struct hvn_nvs_init_resp *rsp;
3114 uint64_t tid;
3115 int error;
3116
3117 memset(&cmd, 0, sizeof(cmd));
3118 cmd.nvs_type = HVN_NVS_TYPE_INIT;
3119 cmd.nvs_ver_min = cmd.nvs_ver_max = proto;
3120
3121 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3122 mutex_enter(&sc->sc_nvsrsp_lock);
3123 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3124 if (error == 0) {
3125 rsp = (struct hvn_nvs_init_resp *)&sc->sc_nvsrsp;
3126 if (rsp->nvs_status != HVN_NVS_STATUS_OK)
3127 error = EINVAL;
3128 }
3129 mutex_exit(&sc->sc_nvsrsp_lock);
3130
3131 return error;
3132 }
3133
3134 static int
3135 hvn_nvs_conf_ndis(struct hvn_softc *sc, int mtu)
3136 {
3137 struct hvn_nvs_ndis_conf cmd;
3138 uint64_t tid;
3139 int error;
3140
3141 memset(&cmd, 0, sizeof(cmd));
3142 cmd.nvs_type = HVN_NVS_TYPE_NDIS_CONF;
3143 cmd.nvs_mtu = mtu + ETHER_HDR_LEN;
3144 cmd.nvs_caps = HVN_NVS_NDIS_CONF_VLAN;
3145
3146 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3147 mutex_enter(&sc->sc_nvsrsp_lock);
3148 /* NOTE: No response. */
3149 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3150 mutex_exit(&sc->sc_nvsrsp_lock);
3151
3152 if (error == 0)
3153 sc->sc_caps |= HVN_CAPS_MTU | HVN_CAPS_VLAN;
3154 return error;
3155 }
3156
3157 static int
3158 hvn_nvs_init_ndis(struct hvn_softc *sc)
3159 {
3160 struct hvn_nvs_ndis_init cmd;
3161 uint64_t tid;
3162 int error;
3163
3164 memset(&cmd, 0, sizeof(cmd));
3165 cmd.nvs_type = HVN_NVS_TYPE_NDIS_INIT;
3166 cmd.nvs_ndis_major = (sc->sc_ndisver & 0xffff0000) >> 16;
3167 cmd.nvs_ndis_minor = sc->sc_ndisver & 0x0000ffff;
3168
3169 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3170 mutex_enter(&sc->sc_nvsrsp_lock);
3171 /* NOTE: No response. */
3172 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3173 mutex_exit(&sc->sc_nvsrsp_lock);
3174
3175 return error;
3176 }
3177
3178 static int
3179 hvn_nvs_attach(struct hvn_softc *sc, int mtu)
3180 {
3181 static const uint32_t protos[] = {
3182 HVN_NVS_PROTO_VERSION_5,
3183 HVN_NVS_PROTO_VERSION_4,
3184 HVN_NVS_PROTO_VERSION_2,
3185 HVN_NVS_PROTO_VERSION_1
3186 };
3187 int i;
3188
3189 if (hyperv_ver_major >= 10)
3190 sc->sc_caps |= HVN_CAPS_UDPHASH;
3191
3192 /*
3193 * Initialize NVS.
3194 */
3195 if (sc->sc_flags & HVN_SCF_ATTACHED) {
3196 /*
3197 * NVS version and NDIS version MUST NOT be changed.
3198 */
3199 DPRINTF("%s: reinit NVS version %#x, NDIS version %u.%u\n",
3200 device_xname(sc->sc_dev), sc->sc_proto,
3201 (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff);
3202
3203 if (hvn_nvs_doinit(sc, sc->sc_proto)) {
3204 DPRINTF("%s: failed to reinit NVSP version %#x\n",
3205 device_xname(sc->sc_dev), sc->sc_proto);
3206 return -1;
3207 }
3208 } else {
3209 /*
3210 * Find the supported NVS version and set NDIS version
3211 * accordingly.
3212 */
3213 for (i = 0; i < __arraycount(protos); i++) {
3214 if (hvn_nvs_doinit(sc, protos[i]) == 0)
3215 break;
3216 }
3217 if (i == __arraycount(protos)) {
3218 DPRINTF("%s: failed to negotiate NVSP version\n",
3219 device_xname(sc->sc_dev));
3220 return -1;
3221 }
3222
3223 sc->sc_proto = protos[i];
3224 if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_4)
3225 sc->sc_ndisver = NDIS_VERSION_6_1;
3226 else
3227 sc->sc_ndisver = NDIS_VERSION_6_30;
3228
3229 DPRINTF("%s: NVS version %#x, NDIS version %u.%u\n",
3230 device_xname(sc->sc_dev), sc->sc_proto,
3231 (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff);
3232 }
3233
3234 if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_5)
3235 sc->sc_caps |= HVN_CAPS_HASHVAL;
3236
3237 if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_2) {
3238 /*
3239 * Configure NDIS before initializing it.
3240 */
3241 if (hvn_nvs_conf_ndis(sc, mtu))
3242 return -1;
3243 }
3244
3245 /*
3246 * Initialize NDIS.
3247 */
3248 if (hvn_nvs_init_ndis(sc))
3249 return -1;
3250
3251 /*
3252 * Connect RXBUF.
3253 */
3254 if (hvn_nvs_connect_rxbuf(sc))
3255 return -1;
3256
3257 /*
3258 * Connect chimney sending buffer.
3259 */
3260 if (hvn_nvs_connect_chim(sc))
3261 return -1;
3262
3263 return 0;
3264 }
3265
3266 static int
3267 hvn_nvs_connect_rxbuf(struct hvn_softc *sc)
3268 {
3269 struct hvn_nvs_rxbuf_conn cmd;
3270 struct hvn_nvs_rxbuf_conn_resp *rsp;
3271 uint64_t tid;
3272
3273 if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_rx_dma, sc->sc_rx_size,
3274 &sc->sc_rx_hndl)) {
3275 DPRINTF("%s: failed to obtain a PA handle\n",
3276 device_xname(sc->sc_dev));
3277 return -1;
3278 }
3279
3280 memset(&cmd, 0, sizeof(cmd));
3281 cmd.nvs_type = HVN_NVS_TYPE_RXBUF_CONN;
3282 cmd.nvs_gpadl = sc->sc_rx_hndl;
3283 cmd.nvs_sig = HVN_NVS_RXBUF_SIG;
3284
3285 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3286 mutex_enter(&sc->sc_nvsrsp_lock);
3287 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0))
3288 goto errout;
3289
3290 rsp = (struct hvn_nvs_rxbuf_conn_resp *)&sc->sc_nvsrsp;
3291 if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3292 DPRINTF("%s: failed to set up the Rx ring\n",
3293 device_xname(sc->sc_dev));
3294 goto errout;
3295 }
3296
3297 SET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED);
3298
3299 if (rsp->nvs_nsect > 1) {
3300 DPRINTF("%s: invalid number of Rx ring sections: %u\n",
3301 device_xname(sc->sc_dev), rsp->nvs_nsect);
3302 goto errout;
3303 }
3304 mutex_exit(&sc->sc_nvsrsp_lock);
3305
3306 return 0;
3307
3308 errout:
3309 mutex_exit(&sc->sc_nvsrsp_lock);
3310 hvn_nvs_disconnect_rxbuf(sc);
3311 return -1;
3312 }
3313
3314 static int
3315 hvn_nvs_disconnect_rxbuf(struct hvn_softc *sc)
3316 {
3317 struct hvn_nvs_rxbuf_disconn cmd;
3318 uint64_t tid;
3319 int s, error;
3320
3321 if (ISSET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED)) {
3322 memset(&cmd, 0, sizeof(cmd));
3323 cmd.nvs_type = HVN_NVS_TYPE_RXBUF_DISCONN;
3324 cmd.nvs_sig = HVN_NVS_RXBUF_SIG;
3325
3326 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3327 mutex_enter(&sc->sc_nvsrsp_lock);
3328 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid,
3329 HVN_NVS_CMD_NORESP);
3330 if (error) {
3331 device_printf(sc->sc_dev,
3332 "failed to send rxbuf disconn: %d", error);
3333 }
3334 CLR(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED);
3335 mutex_exit(&sc->sc_nvsrsp_lock);
3336
3337 /*
3338 * Wait for the hypervisor to receive this NVS request.
3339 *
3340 * NOTE:
3341 * The TX bufring will not be drained by the hypervisor,
3342 * if the primary channel is revoked.
3343 */
3344 while (!vmbus_channel_tx_empty(sc->sc_prichan) &&
3345 !vmbus_channel_is_revoked(sc->sc_prichan)) {
3346 DELAY(20);
3347 s = splnet();
3348 hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit,
3349 sc->sc_rx_process_limit);
3350 splx(s);
3351 }
3352 /*
3353 * Linger long enough for NVS to disconnect RXBUF.
3354 */
3355 DELAY(200);
3356 }
3357
3358 if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 &&
3359 sc->sc_rx_hndl) {
3360 /*
3361 * Disconnect RXBUF from primary channel.
3362 */
3363 vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl);
3364 sc->sc_rx_hndl = 0;
3365 }
3366
3367 return 0;
3368 }
3369
3370 static int
3371 hvn_nvs_connect_chim(struct hvn_softc *sc)
3372 {
3373 struct hvn_nvs_chim_conn cmd;
3374 const struct hvn_nvs_chim_conn_resp *rsp;
3375 uint64_t tid;
3376
3377 mutex_init(&sc->sc_chim_bmap_lock, MUTEX_DEFAULT, IPL_NET);
3378
3379 /*
3380 * Connect chimney sending buffer GPADL to the primary channel.
3381 *
3382 * NOTE:
3383 * Only primary channel has chimney sending buffer connected to it.
3384 * Sub-channels just share this chimney sending buffer.
3385 */
3386 if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_chim_dma, HVN_CHIM_SIZE,
3387 &sc->sc_chim_hndl)) {
3388 DPRINTF("%s: failed to obtain a PA handle for chimney\n",
3389 device_xname(sc->sc_dev));
3390 return -1;
3391 }
3392
3393 memset(&cmd, 0, sizeof(cmd));
3394 cmd.nvs_type = HVN_NVS_TYPE_CHIM_CONN;
3395 cmd.nvs_gpadl = sc->sc_chim_hndl;
3396 cmd.nvs_sig = HVN_NVS_CHIM_SIG;
3397
3398 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3399 mutex_enter(&sc->sc_nvsrsp_lock);
3400 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0))
3401 goto errout;
3402
3403 rsp = (struct hvn_nvs_chim_conn_resp *)&sc->sc_nvsrsp;
3404 if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3405 DPRINTF("%s: failed to set up chimney sending buffer\n",
3406 device_xname(sc->sc_dev));
3407 goto errout;
3408 }
3409
3410 if (rsp->nvs_sectsz == 0 ||
3411 (rsp->nvs_sectsz % sizeof(uint32_t)) != 0) {
3412 /*
3413 * Can't use chimney sending buffer; done!
3414 */
3415 if (rsp->nvs_sectsz == 0) {
3416 device_printf(sc->sc_dev,
3417 "zero chimney sending buffer section size\n");
3418 } else {
3419 device_printf(sc->sc_dev,
3420 "misaligned chimney sending buffers,"
3421 " section size: %d", rsp->nvs_sectsz);
3422 }
3423 sc->sc_chim_szmax = 0;
3424 sc->sc_chim_cnt = 0;
3425 } else {
3426 sc->sc_chim_szmax = rsp->nvs_sectsz;
3427 sc->sc_chim_cnt = HVN_CHIM_SIZE / sc->sc_chim_szmax;
3428 }
3429
3430 if (sc->sc_chim_szmax > 0) {
3431 if ((HVN_CHIM_SIZE % sc->sc_chim_szmax) != 0) {
3432 device_printf(sc->sc_dev,
3433 "chimney sending sections are not properly "
3434 "aligned\n");
3435 }
3436 if ((sc->sc_chim_cnt % LONG_BIT) != 0) {
3437 device_printf(sc->sc_dev,
3438 "discard %d chimney sending sections\n",
3439 sc->sc_chim_cnt % LONG_BIT);
3440 }
3441
3442 sc->sc_chim_bmap_cnt = sc->sc_chim_cnt / LONG_BIT;
3443 sc->sc_chim_bmap = kmem_zalloc(sc->sc_chim_bmap_cnt *
3444 sizeof(u_long), KM_SLEEP);
3445 }
3446
3447 /* Done! */
3448 SET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED);
3449
3450 aprint_verbose_dev(sc->sc_dev, "chimney sending buffer %d/%d\n",
3451 sc->sc_chim_szmax, sc->sc_chim_cnt);
3452
3453 mutex_exit(&sc->sc_nvsrsp_lock);
3454
3455 return 0;
3456
3457 errout:
3458 mutex_exit(&sc->sc_nvsrsp_lock);
3459 hvn_nvs_disconnect_chim(sc);
3460 return -1;
3461 }
3462
3463 static int
3464 hvn_nvs_disconnect_chim(struct hvn_softc *sc)
3465 {
3466 struct hvn_nvs_chim_disconn cmd;
3467 uint64_t tid;
3468 int s, error;
3469
3470 if (ISSET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED)) {
3471 memset(&cmd, 0, sizeof(cmd));
3472 cmd.nvs_type = HVN_NVS_TYPE_CHIM_DISCONN;
3473 cmd.nvs_sig = HVN_NVS_CHIM_SIG;
3474
3475 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3476 mutex_enter(&sc->sc_nvsrsp_lock);
3477 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid,
3478 HVN_NVS_CMD_NORESP);
3479 if (error) {
3480 device_printf(sc->sc_dev,
3481 "failed to send chim disconn: %d", error);
3482 }
3483 CLR(sc->sc_flags, HVN_SCF_CHIM_CONNECTED);
3484 mutex_exit(&sc->sc_nvsrsp_lock);
3485
3486 /*
3487 * Wait for the hypervisor to receive this NVS request.
3488 *
3489 * NOTE:
3490 * The TX bufring will not be drained by the hypervisor,
3491 * if the primary channel is revoked.
3492 */
3493 while (!vmbus_channel_tx_empty(sc->sc_prichan) &&
3494 !vmbus_channel_is_revoked(sc->sc_prichan)) {
3495 DELAY(20);
3496 s = splnet();
3497 hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit,
3498 sc->sc_rx_process_limit);
3499 splx(s);
3500 }
3501 /*
3502 * Linger long enough for NVS to disconnect chimney
3503 * sending buffer.
3504 */
3505 DELAY(200);
3506 }
3507
3508 if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 &&
3509 sc->sc_chim_hndl) {
3510 /*
3511 * Disconnect chimney sending buffer from primary channel.
3512 */
3513 vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl);
3514 sc->sc_chim_hndl = 0;
3515 }
3516
3517 if (sc->sc_chim_bmap != NULL) {
3518 kmem_free(sc->sc_chim_bmap, sc->sc_chim_cnt / LONG_BIT);
3519 sc->sc_chim_bmap = NULL;
3520 sc->sc_chim_bmap_cnt = 0;
3521 }
3522
3523 mutex_destroy(&sc->sc_chim_bmap_lock);
3524
3525 return 0;
3526 }
3527
3528 #define HVN_HANDLE_RING_DOTX __BIT(0)
3529
3530 static int
3531 hvn_handle_ring(struct hvn_rx_ring *rxr, int txlimit, int rxlimit)
3532 {
3533 struct hvn_softc *sc = rxr->rxr_softc;
3534 struct vmbus_chanpkt_hdr *cph;
3535 const struct hvn_nvs_hdr *nvs;
3536 uint64_t rid;
3537 uint32_t rlen;
3538 int n, tx = 0, rx = 0;
3539 int result = 0;
3540 int rv;
3541
3542 mutex_enter(&rxr->rxr_lock);
3543 for (;;) {
3544 rv = vmbus_channel_recv(rxr->rxr_chan, rxr->rxr_nvsbuf,
3545 HVN_NVS_BUFSIZE, &rlen, &rid, 1);
3546 if (rv != 0 || rlen == 0) {
3547 if (rv != EAGAIN)
3548 device_printf(sc->sc_dev,
3549 "failed to receive an NVSP packet\n");
3550 break;
3551 }
3552 cph = (struct vmbus_chanpkt_hdr *)rxr->rxr_nvsbuf;
3553 nvs = (const struct hvn_nvs_hdr *)VMBUS_CHANPKT_CONST_DATA(cph);
3554
3555 if (cph->cph_type == VMBUS_CHANPKT_TYPE_COMP) {
3556 switch (nvs->nvs_type) {
3557 case HVN_NVS_TYPE_INIT_RESP:
3558 case HVN_NVS_TYPE_RXBUF_CONNRESP:
3559 case HVN_NVS_TYPE_CHIM_CONNRESP:
3560 case HVN_NVS_TYPE_SUBCH_RESP:
3561 mutex_enter(&sc->sc_nvsrsp_lock);
3562 /* copy the response back */
3563 memcpy(&sc->sc_nvsrsp, nvs, HVN_NVS_MSGSIZE);
3564 sc->sc_nvsdone = 1;
3565 cv_signal(&sc->sc_nvsrsp_cv);
3566 mutex_exit(&sc->sc_nvsrsp_lock);
3567 break;
3568 case HVN_NVS_TYPE_RNDIS_ACK:
3569 if (rxr->rxr_txr == NULL)
3570 break;
3571
3572 result |= HVN_HANDLE_RING_DOTX;
3573 mutex_enter(&rxr->rxr_txr->txr_lock);
3574 hvn_txeof(rxr->rxr_txr, cph->cph_tid);
3575 mutex_exit(&rxr->rxr_txr->txr_lock);
3576 if (txlimit > 0 && ++tx >= txlimit)
3577 goto out;
3578 break;
3579 default:
3580 device_printf(sc->sc_dev,
3581 "unhandled NVSP packet type %u "
3582 "on completion\n", nvs->nvs_type);
3583 break;
3584 }
3585 } else if (cph->cph_type == VMBUS_CHANPKT_TYPE_RXBUF) {
3586 switch (nvs->nvs_type) {
3587 case HVN_NVS_TYPE_RNDIS:
3588 n = hvn_rndis_input(rxr, cph->cph_tid, cph);
3589 if (rxlimit > 0) {
3590 if (n < 0)
3591 goto out;
3592 rx += n;
3593 if (rx >= rxlimit)
3594 goto out;
3595 }
3596 break;
3597 default:
3598 device_printf(sc->sc_dev,
3599 "unhandled NVSP packet type %u "
3600 "on receive\n", nvs->nvs_type);
3601 break;
3602 }
3603 } else if (cph->cph_type == VMBUS_CHANPKT_TYPE_INBAND) {
3604 switch (nvs->nvs_type) {
3605 case HVN_NVS_TYPE_TXTBL_NOTE:
3606 /* Useless; ignore */
3607 break;
3608 default:
3609 device_printf(sc->sc_dev,
3610 "got notify, nvs type %u\n", nvs->nvs_type);
3611 break;
3612 }
3613 } else
3614 device_printf(sc->sc_dev,
3615 "unknown NVSP packet type %u\n", cph->cph_type);
3616 }
3617 out:
3618 mutex_exit(&rxr->rxr_lock);
3619
3620 return result;
3621 }
3622
3623 static void
3624 hvn_nvs_intr1(struct hvn_rx_ring *rxr, int txlimit, int rxlimit)
3625 {
3626 struct hvn_softc *sc = rxr->rxr_softc;
3627 struct ifnet *ifp = SC2IFP(sc);
3628 struct hvn_tx_ring *txr = rxr->rxr_txr;
3629 int result;
3630
3631 rxr->rxr_workqueue = sc->sc_txrx_workqueue;
3632
3633 result = hvn_handle_ring(rxr, txlimit, rxlimit);
3634
3635 if ((result & HVN_HANDLE_RING_DOTX) && txr != NULL) {
3636 mutex_enter(&txr->txr_lock);
3637 /* ALTQ */
3638 if (txr->txr_id == 0) {
3639 if_schedule_deferred_start(ifp);
3640 }
3641 softint_schedule(txr->txr_si);
3642 mutex_exit(&txr->txr_lock);
3643 }
3644 }
3645
3646 static void
3647 hvn_schedule_handle_ring(struct hvn_softc *sc, struct hvn_rx_ring *rxr,
3648 bool intr)
3649 {
3650
3651 KASSERT(mutex_owned(&rxr->rxr_onwork_lock));
3652
3653 if (rxr->rxr_workqueue) {
3654 if (!rxr->rxr_onlist) {
3655 rxr->rxr_onlist = true;
3656 if (intr)
3657 rxr->rxr_evdeferreq.ev_count++;
3658 else
3659 rxr->rxr_evredeferreq.ev_count++;
3660 workqueue_enqueue(sc->sc_wq, &rxr->rxr_wk, NULL);
3661 }
3662 } else {
3663 rxr->rxr_onlist = true;
3664 if (intr)
3665 rxr->rxr_evdeferreq.ev_count++;
3666 else
3667 rxr->rxr_evredeferreq.ev_count++;
3668 softint_schedule(rxr->rxr_si);
3669 }
3670 }
3671
3672 static void
3673 hvn_handle_ring_common(struct hvn_rx_ring *rxr)
3674 {
3675 struct hvn_softc *sc = rxr->rxr_softc;
3676 int txlimit = sc->sc_tx_process_limit;
3677 int rxlimit = sc->sc_rx_process_limit;
3678
3679 rxr->rxr_evdefer.ev_count++;
3680
3681 mutex_enter(&rxr->rxr_onwork_lock);
3682 rxr->rxr_onproc = true;
3683 rxr->rxr_onlist = false;
3684 mutex_exit(&rxr->rxr_onwork_lock);
3685
3686 hvn_nvs_intr1(rxr, txlimit, rxlimit);
3687
3688 mutex_enter(&rxr->rxr_onwork_lock);
3689 if (vmbus_channel_unpause(rxr->rxr_chan)) {
3690 vmbus_channel_pause(rxr->rxr_chan);
3691 hvn_schedule_handle_ring(sc, rxr, false);
3692 }
3693 rxr->rxr_onproc = false;
3694 cv_broadcast(&rxr->rxr_onwork_cv);
3695 mutex_exit(&rxr->rxr_onwork_lock);
3696 }
3697
3698 static void
3699 hvn_handle_ring_work(struct work *wk, void *arg)
3700 {
3701 struct hvn_rx_ring *rxr = container_of(wk, struct hvn_rx_ring, rxr_wk);
3702
3703 hvn_handle_ring_common(rxr);
3704 }
3705
3706 static void
3707 hvn_nvs_softintr(void *arg)
3708 {
3709 struct hvn_rx_ring *rxr = arg;
3710
3711 hvn_handle_ring_common(rxr);
3712 }
3713
3714 static void
3715 hvn_nvs_intr(void *arg)
3716 {
3717 struct hvn_rx_ring *rxr = arg;
3718 struct hvn_softc *sc = rxr->rxr_softc;
3719 int txlimit = cold ? 0 : sc->sc_tx_intr_process_limit;
3720 int rxlimit = cold ? 0 : sc->sc_rx_intr_process_limit;
3721
3722 rxr->rxr_evintr.ev_count++;
3723
3724 KASSERT(!rxr->rxr_onproc);
3725 KASSERT(!rxr->rxr_onlist);
3726
3727 vmbus_channel_pause(rxr->rxr_chan);
3728
3729 hvn_nvs_intr1(rxr, txlimit, rxlimit);
3730
3731 if (vmbus_channel_unpause(rxr->rxr_chan) && !cold) {
3732 vmbus_channel_pause(rxr->rxr_chan);
3733 mutex_enter(&rxr->rxr_onwork_lock);
3734 hvn_schedule_handle_ring(sc, rxr, true);
3735 mutex_exit(&rxr->rxr_onwork_lock);
3736 }
3737 }
3738
3739 static int
3740 hvn_nvs_cmd(struct hvn_softc *sc, void *cmd, size_t cmdsize, uint64_t tid,
3741 u_int flags)
3742 {
3743 struct hvn_rx_ring *rxr = &sc->sc_rxr[0]; /* primary channel */
3744 struct hvn_nvs_hdr *hdr = cmd;
3745 int tries = 10;
3746 int rv, s;
3747
3748 KASSERT(mutex_owned(&sc->sc_nvsrsp_lock));
3749
3750 sc->sc_nvsdone = 0;
3751
3752 do {
3753 rv = vmbus_channel_send(rxr->rxr_chan, cmd, cmdsize,
3754 tid, VMBUS_CHANPKT_TYPE_INBAND,
3755 ISSET(flags, HVN_NVS_CMD_NORESP) ? 0 :
3756 VMBUS_CHANPKT_FLAG_RC);
3757 if (rv == EAGAIN) {
3758 DELAY(1000);
3759 } else if (rv) {
3760 DPRINTF("%s: NVSP operation %u send error %d\n",
3761 device_xname(sc->sc_dev), hdr->nvs_type, rv);
3762 return rv;
3763 }
3764 } while (rv != 0 && --tries > 0);
3765
3766 if (tries == 0 && rv != 0) {
3767 device_printf(sc->sc_dev,
3768 "NVSP operation %u send error %d\n", hdr->nvs_type, rv);
3769 return rv;
3770 }
3771
3772 if (ISSET(flags, HVN_NVS_CMD_NORESP))
3773 return 0;
3774
3775 while (!sc->sc_nvsdone && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) {
3776 mutex_exit(&sc->sc_nvsrsp_lock);
3777 DELAY(1000);
3778 s = splnet();
3779 hvn_nvs_intr1(rxr, 0, 0);
3780 splx(s);
3781 mutex_enter(&sc->sc_nvsrsp_lock);
3782 }
3783
3784 return 0;
3785 }
3786
3787 static int
3788 hvn_nvs_ack(struct hvn_rx_ring *rxr, uint64_t tid)
3789 {
3790 struct hvn_softc *sc __unused = rxr->rxr_softc;
3791 struct hvn_nvs_rndis_ack cmd;
3792 int tries = 5;
3793 int rv;
3794
3795 cmd.nvs_type = HVN_NVS_TYPE_RNDIS_ACK;
3796 cmd.nvs_status = HVN_NVS_STATUS_OK;
3797 do {
3798 rv = vmbus_channel_send(rxr->rxr_chan, &cmd, sizeof(cmd),
3799 tid, VMBUS_CHANPKT_TYPE_COMP, 0);
3800 if (rv == EAGAIN)
3801 DELAY(10);
3802 else if (rv) {
3803 DPRINTF("%s: NVSP acknowledgement error %d\n",
3804 device_xname(sc->sc_dev), rv);
3805 return rv;
3806 }
3807 } while (rv != 0 && --tries > 0);
3808 return rv;
3809 }
3810
3811 static void
3812 hvn_nvs_detach(struct hvn_softc *sc)
3813 {
3814
3815 hvn_nvs_disconnect_rxbuf(sc);
3816 hvn_nvs_disconnect_chim(sc);
3817 }
3818
3819 static int
3820 hvn_nvs_alloc_subchannels(struct hvn_softc *sc, int *nsubchp)
3821 {
3822 struct hvn_nvs_subch_req cmd;
3823 struct hvn_nvs_subch_resp *rsp;
3824 uint64_t tid;
3825 int nsubch, nsubch_req;
3826
3827 nsubch_req = *nsubchp;
3828 KASSERTMSG(nsubch_req > 0, "invalid # of sub-channels %d", nsubch_req);
3829
3830 memset(&cmd, 0, sizeof(cmd));
3831 cmd.nvs_type = HVN_NVS_TYPE_SUBCH_REQ;
3832 cmd.nvs_op = HVN_NVS_SUBCH_OP_ALLOC;
3833 cmd.nvs_nsubch = nsubch_req;
3834
3835 tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3836 mutex_enter(&sc->sc_nvsrsp_lock);
3837 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0)) {
3838 mutex_exit(&sc->sc_nvsrsp_lock);
3839 return EIO;
3840 }
3841
3842 rsp = (struct hvn_nvs_subch_resp *)&sc->sc_nvsrsp;
3843 if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3844 mutex_exit(&sc->sc_nvsrsp_lock);
3845 DPRINTF("%s: failed to alloc sub-channels\n",
3846 device_xname(sc->sc_dev));
3847 return EIO;
3848 }
3849
3850 nsubch = rsp->nvs_nsubch;
3851 if (nsubch > nsubch_req) {
3852 aprint_debug_dev(sc->sc_dev,
3853 "%u subchans are allocated, requested %d\n",
3854 nsubch, nsubch_req);
3855 nsubch = nsubch_req;
3856 }
3857 mutex_exit(&sc->sc_nvsrsp_lock);
3858
3859 *nsubchp = nsubch;
3860
3861 return 0;
3862 }
3863
3864 static inline struct rndis_cmd *
3865 hvn_alloc_cmd(struct hvn_softc *sc)
3866 {
3867 struct rndis_cmd *rc;
3868
3869 mutex_enter(&sc->sc_cntl_fqlck);
3870 while ((rc = TAILQ_FIRST(&sc->sc_cntl_fq)) == NULL)
3871 cv_wait(&sc->sc_cntl_fqcv, &sc->sc_cntl_fqlck);
3872 TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry);
3873 mutex_exit(&sc->sc_cntl_fqlck);
3874 return rc;
3875 }
3876
3877 static inline void
3878 hvn_submit_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3879 {
3880
3881 mutex_enter(&sc->sc_cntl_sqlck);
3882 TAILQ_INSERT_TAIL(&sc->sc_cntl_sq, rc, rc_entry);
3883 mutex_exit(&sc->sc_cntl_sqlck);
3884 }
3885
3886 static inline struct rndis_cmd *
3887 hvn_complete_cmd(struct hvn_softc *sc, uint32_t id)
3888 {
3889 struct rndis_cmd *rc;
3890
3891 mutex_enter(&sc->sc_cntl_sqlck);
3892 TAILQ_FOREACH(rc, &sc->sc_cntl_sq, rc_entry) {
3893 if (rc->rc_id == id) {
3894 TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry);
3895 break;
3896 }
3897 }
3898 mutex_exit(&sc->sc_cntl_sqlck);
3899 if (rc != NULL) {
3900 mutex_enter(&sc->sc_cntl_cqlck);
3901 TAILQ_INSERT_TAIL(&sc->sc_cntl_cq, rc, rc_entry);
3902 mutex_exit(&sc->sc_cntl_cqlck);
3903 }
3904 return rc;
3905 }
3906
3907 static inline void
3908 hvn_release_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3909 {
3910
3911 mutex_enter(&sc->sc_cntl_cqlck);
3912 TAILQ_REMOVE(&sc->sc_cntl_cq, rc, rc_entry);
3913 mutex_exit(&sc->sc_cntl_cqlck);
3914 }
3915
3916 static inline int
3917 hvn_rollback_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3918 {
3919 struct rndis_cmd *rn;
3920
3921 mutex_enter(&sc->sc_cntl_sqlck);
3922 TAILQ_FOREACH(rn, &sc->sc_cntl_sq, rc_entry) {
3923 if (rn == rc) {
3924 TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry);
3925 mutex_exit(&sc->sc_cntl_sqlck);
3926 return 0;
3927 }
3928 }
3929 mutex_exit(&sc->sc_cntl_sqlck);
3930 return -1;
3931 }
3932
3933 static inline void
3934 hvn_free_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3935 {
3936
3937 memset(rc->rc_req, 0, sizeof(struct rndis_packet_msg));
3938 memset(&rc->rc_cmp, 0, sizeof(rc->rc_cmp));
3939 memset(&rc->rc_msg, 0, sizeof(rc->rc_msg));
3940 mutex_enter(&sc->sc_cntl_fqlck);
3941 TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry);
3942 cv_signal(&sc->sc_cntl_fqcv);
3943 mutex_exit(&sc->sc_cntl_fqlck);
3944 }
3945
3946 static int
3947 hvn_rndis_init(struct hvn_softc *sc)
3948 {
3949 struct rndis_cmd *rc;
3950 int i;
3951
3952 /* RNDIS control message queues */
3953 TAILQ_INIT(&sc->sc_cntl_sq);
3954 TAILQ_INIT(&sc->sc_cntl_cq);
3955 TAILQ_INIT(&sc->sc_cntl_fq);
3956 mutex_init(&sc->sc_cntl_sqlck, MUTEX_DEFAULT, IPL_NET);
3957 mutex_init(&sc->sc_cntl_cqlck, MUTEX_DEFAULT, IPL_NET);
3958 mutex_init(&sc->sc_cntl_fqlck, MUTEX_DEFAULT, IPL_NET);
3959 cv_init(&sc->sc_cntl_fqcv, "nvsalloc");
3960
3961 for (i = 0; i < HVN_RNDIS_CTLREQS; i++) {
3962 rc = &sc->sc_cntl_msgs[i];
3963 if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
3964 BUS_DMA_WAITOK, &rc->rc_dmap)) {
3965 DPRINTF("%s: failed to create RNDIS command map\n",
3966 device_xname(sc->sc_dev));
3967 goto errout;
3968 }
3969 if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE,
3970 0, &rc->rc_segs, 1, &rc->rc_nsegs, BUS_DMA_WAITOK)) {
3971 DPRINTF("%s: failed to allocate RNDIS command\n",
3972 device_xname(sc->sc_dev));
3973 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3974 goto errout;
3975 }
3976 if (bus_dmamem_map(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs,
3977 PAGE_SIZE, (void **)&rc->rc_req, BUS_DMA_WAITOK)) {
3978 DPRINTF("%s: failed to allocate RNDIS command\n",
3979 device_xname(sc->sc_dev));
3980 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs,
3981 rc->rc_nsegs);
3982 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3983 goto errout;
3984 }
3985 memset(rc->rc_req, 0, PAGE_SIZE);
3986 if (bus_dmamap_load(sc->sc_dmat, rc->rc_dmap, rc->rc_req,
3987 PAGE_SIZE, NULL, BUS_DMA_WAITOK)) {
3988 DPRINTF("%s: failed to load RNDIS command map\n",
3989 device_xname(sc->sc_dev));
3990 bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE);
3991 rc->rc_req = NULL;
3992 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs,
3993 rc->rc_nsegs);
3994 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3995 goto errout;
3996 }
3997 rc->rc_gpa = atop(rc->rc_dmap->dm_segs[0].ds_addr);
3998 mutex_init(&rc->rc_lock, MUTEX_DEFAULT, IPL_NET);
3999 cv_init(&rc->rc_cv, "rndiscmd");
4000 TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry);
4001 }
4002
4003 /* Initialize RNDIS Data command */
4004 memset(&sc->sc_data_msg, 0, sizeof(sc->sc_data_msg));
4005 sc->sc_data_msg.nvs_type = HVN_NVS_TYPE_RNDIS;
4006 sc->sc_data_msg.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA;
4007 sc->sc_data_msg.nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID;
4008
4009 return 0;
4010
4011 errout:
4012 hvn_rndis_destroy(sc);
4013 return -1;
4014 }
4015
4016 static void
4017 hvn_rndis_destroy(struct hvn_softc *sc)
4018 {
4019 struct rndis_cmd *rc;
4020 int i;
4021
4022 for (i = 0; i < HVN_RNDIS_CTLREQS; i++) {
4023 rc = &sc->sc_cntl_msgs[i];
4024 if (rc->rc_req == NULL)
4025 continue;
4026
4027 TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry);
4028 bus_dmamap_unload(sc->sc_dmat, rc->rc_dmap);
4029 bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE);
4030 rc->rc_req = NULL;
4031 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs);
4032 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
4033 mutex_destroy(&rc->rc_lock);
4034 cv_destroy(&rc->rc_cv);
4035 }
4036
4037 mutex_destroy(&sc->sc_cntl_sqlck);
4038 mutex_destroy(&sc->sc_cntl_cqlck);
4039 mutex_destroy(&sc->sc_cntl_fqlck);
4040 cv_destroy(&sc->sc_cntl_fqcv);
4041 }
4042
4043 static int
4044 hvn_rndis_attach(struct hvn_softc *sc, int mtu)
4045 {
4046 struct rndis_init_req *req;
4047 struct rndis_init_comp *cmp;
4048 struct rndis_cmd *rc;
4049 int rv;
4050
4051 rc = hvn_alloc_cmd(sc);
4052
4053 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4054 BUS_DMASYNC_PREREAD);
4055
4056 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
4057
4058 req = rc->rc_req;
4059 req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
4060 req->rm_len = sizeof(*req);
4061 req->rm_rid = rc->rc_id;
4062 req->rm_ver_major = RNDIS_VERSION_MAJOR;
4063 req->rm_ver_minor = RNDIS_VERSION_MINOR;
4064 req->rm_max_xfersz = HVN_RNDIS_XFER_SIZE;
4065
4066 rc->rc_cmplen = sizeof(*cmp);
4067
4068 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4069 BUS_DMASYNC_PREWRITE);
4070
4071 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
4072 DPRINTF("%s: INITIALIZE_MSG failed, error %d\n",
4073 device_xname(sc->sc_dev), rv);
4074 hvn_free_cmd(sc, rc);
4075 return -1;
4076 }
4077 cmp = (struct rndis_init_comp *)&rc->rc_cmp;
4078 if (cmp->rm_status != RNDIS_STATUS_SUCCESS) {
4079 DPRINTF("%s: failed to init RNDIS, error %#x\n",
4080 device_xname(sc->sc_dev), cmp->rm_status);
4081 hvn_free_cmd(sc, rc);
4082 return -1;
4083 }
4084
4085 sc->sc_rndis_agg_size = cmp->rm_pktmaxsz;
4086 sc->sc_rndis_agg_pkts = cmp->rm_pktmaxcnt;
4087 sc->sc_rndis_agg_align = __BIT(cmp->rm_align);
4088
4089 if (sc->sc_rndis_agg_align < sizeof(uint32_t)) {
4090 /*
4091 * The RNDIS packet message encap assumes that the RNDIS
4092 * packet message is at least 4 bytes aligned. Fix up the
4093 * alignment here, if the remote side sets the alignment
4094 * too low.
4095 */
4096 aprint_verbose_dev(sc->sc_dev,
4097 "fixup RNDIS aggpkt align: %u -> %zu\n",
4098 sc->sc_rndis_agg_align, sizeof(uint32_t));
4099 sc->sc_rndis_agg_align = sizeof(uint32_t);
4100 }
4101
4102 aprint_verbose_dev(sc->sc_dev,
4103 "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u\n",
4104 cmp->rm_ver_major, cmp->rm_ver_minor, sc->sc_rndis_agg_size,
4105 sc->sc_rndis_agg_pkts, sc->sc_rndis_agg_align);
4106
4107 hvn_free_cmd(sc, rc);
4108
4109 return 0;
4110 }
4111
4112 static int
4113 hvn_get_rsscaps(struct hvn_softc *sc, int *nrxr)
4114 {
4115 struct ndis_rss_caps in, caps;
4116 size_t caps_len;
4117 int error, rxr_cnt, indsz, hash_fnidx;
4118 uint32_t hash_func = 0, hash_types = 0;
4119
4120 *nrxr = 0;
4121
4122 if (sc->sc_ndisver < NDIS_VERSION_6_20)
4123 return EOPNOTSUPP;
4124
4125 memset(&in, 0, sizeof(in));
4126 in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
4127 in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
4128 in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
4129
4130 caps_len = NDIS_RSS_CAPS_SIZE;
4131 error = hvn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
4132 &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0);
4133 if (error)
4134 return error;
4135
4136 /*
4137 * Preliminary verification.
4138 */
4139 if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
4140 DPRINTF("%s: invalid NDIS objtype 0x%02x\n",
4141 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_type);
4142 return EINVAL;
4143 }
4144 if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
4145 DPRINTF("%s: invalid NDIS objrev 0x%02x\n",
4146 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_rev);
4147 return EINVAL;
4148 }
4149 if (caps.ndis_hdr.ndis_size > caps_len) {
4150 DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n",
4151 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size,
4152 caps_len);
4153 return EINVAL;
4154 } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
4155 DPRINTF("%s: invalid NDIS objsize %u\n",
4156 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size);
4157 return EINVAL;
4158 }
4159
4160 /*
4161 * Save information for later RSS configuration.
4162 */
4163 if (caps.ndis_nrxr == 0) {
4164 DPRINTF("%s: 0 RX rings!?\n", device_xname(sc->sc_dev));
4165 return EINVAL;
4166 }
4167 rxr_cnt = caps.ndis_nrxr;
4168 aprint_debug_dev(sc->sc_dev, "%u Rx rings\n", rxr_cnt);
4169
4170 if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
4171 caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
4172 if (caps.ndis_nind > NDIS_HASH_INDCNT) {
4173 DPRINTF("%s: too many RSS indirect table entries %u\n",
4174 device_xname(sc->sc_dev), caps.ndis_nind);
4175 return EOPNOTSUPP;
4176 }
4177 if (!powerof2(caps.ndis_nind)) {
4178 DPRINTF("%s: RSS indirect table size is not power-of-2:"
4179 " %u\n", device_xname(sc->sc_dev), caps.ndis_nind);
4180 return EOPNOTSUPP;
4181 }
4182
4183 indsz = caps.ndis_nind;
4184 } else {
4185 indsz = NDIS_HASH_INDCNT;
4186 }
4187 if (rxr_cnt > indsz) {
4188 aprint_debug_dev(sc->sc_dev,
4189 "# of RX rings (%u) > RSS indirect table size %u\n",
4190 rxr_cnt, indsz);
4191 rxr_cnt = indsz;
4192 }
4193
4194 /*
4195 * NOTE:
4196 * Toeplitz is at the lowest bit, and it is prefered; so ffs(),
4197 * instead of fls(), is used here.
4198 */
4199 hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK);
4200 if (hash_fnidx == 0) {
4201 DPRINTF("%s: no hash functions, caps 0x%08x\n",
4202 device_xname(sc->sc_dev), caps.ndis_caps);
4203 return EOPNOTSUPP;
4204 }
4205 hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */
4206
4207 if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
4208 hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4;
4209 if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
4210 hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
4211 if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
4212 hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX;
4213 if (hash_types == 0) {
4214 DPRINTF("%s: no hash types, caps 0x%08x\n",
4215 device_xname(sc->sc_dev), caps.ndis_caps);
4216 return EOPNOTSUPP;
4217 }
4218 aprint_debug_dev(sc->sc_dev, "RSS caps %#x\n", caps.ndis_caps);
4219
4220 sc->sc_rss_ind_size = indsz;
4221 sc->sc_rss_hcap = hash_func | hash_types;
4222 if (sc->sc_caps & HVN_CAPS_UDPHASH) {
4223 /* UDP 4-tuple hash is unconditionally enabled. */
4224 sc->sc_rss_hcap |= NDIS_HASH_UDP_IPV4_X;
4225 }
4226 *nrxr = rxr_cnt;
4227
4228 return 0;
4229 }
4230
4231 static int
4232 hvn_set_rss(struct hvn_softc *sc, uint16_t flags)
4233 {
4234 struct ndis_rssprm_toeplitz *rss = &sc->sc_rss;
4235 struct ndis_rss_params *params = &rss->rss_params;
4236 int len;
4237
4238 /*
4239 * Only NDIS 6.20+ is supported:
4240 * We only support 4bytes element in indirect table, which has been
4241 * adopted since NDIS 6.20.
4242 */
4243 if (sc->sc_ndisver < NDIS_VERSION_6_20)
4244 return 0;
4245
4246 /* XXX only one can be specified through, popcnt? */
4247 KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_FUNCTION_MASK),
4248 "no hash func %08x", sc->sc_rss_hash);
4249 KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_STD),
4250 "no standard hash types %08x", sc->sc_rss_hash);
4251 KASSERTMSG(sc->sc_rss_ind_size > 0, "no indirect table size");
4252
4253 aprint_debug_dev(sc->sc_dev, "RSS indirect table size %d, hash %#x\n",
4254 sc->sc_rss_ind_size, sc->sc_rss_hash);
4255
4256 len = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->sc_rss_ind_size);
4257
4258 memset(params, 0, sizeof(*params));
4259 params->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
4260 params->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
4261 params->ndis_hdr.ndis_size = len;
4262 params->ndis_flags = flags;
4263 params->ndis_hash =
4264 sc->sc_rss_hash & (NDIS_HASH_FUNCTION_MASK | NDIS_HASH_STD);
4265 params->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->sc_rss_ind_size;
4266 params->ndis_indoffset =
4267 offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
4268 params->ndis_keysize = sizeof(rss->rss_key);
4269 params->ndis_keyoffset =
4270 offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
4271
4272 return hvn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS, rss, len);
4273 }
4274
4275 static void
4276 hvn_fixup_rss_ind(struct hvn_softc *sc)
4277 {
4278 struct ndis_rssprm_toeplitz *rss = &sc->sc_rss;
4279 int i, nchan;
4280
4281 nchan = sc->sc_nrxr_inuse;
4282 KASSERTMSG(nchan > 1, "invalid # of channels %d", nchan);
4283
4284 /*
4285 * Check indirect table to make sure that all channels in it
4286 * can be used.
4287 */
4288 for (i = 0; i < NDIS_HASH_INDCNT; i++) {
4289 if (rss->rss_ind[i] >= nchan) {
4290 DPRINTF("%s: RSS indirect table %d fixup: %u -> %d\n",
4291 device_xname(sc->sc_dev), i, rss->rss_ind[i],
4292 nchan - 1);
4293 rss->rss_ind[i] = nchan - 1;
4294 }
4295 }
4296 }
4297
4298 static int
4299 hvn_get_hwcaps(struct hvn_softc *sc, struct ndis_offload *caps)
4300 {
4301 struct ndis_offload in;
4302 size_t caps_len, len;
4303 int error;
4304
4305 memset(&in, 0, sizeof(in));
4306 in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
4307 if (sc->sc_ndisver >= NDIS_VERSION_6_30) {
4308 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
4309 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE;
4310 } else if (sc->sc_ndisver >= NDIS_VERSION_6_1) {
4311 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
4312 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_1;
4313 } else {
4314 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
4315 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_0;
4316 }
4317
4318 caps_len = NDIS_OFFLOAD_SIZE;
4319 error = hvn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
4320 &in, len, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0);
4321 if (error)
4322 return error;
4323
4324 /*
4325 * Preliminary verification.
4326 */
4327 if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
4328 DPRINTF("%s: invalid NDIS objtype 0x%02x\n",
4329 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_type);
4330 return EINVAL;
4331 }
4332 if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
4333 DPRINTF("%s: invalid NDIS objrev 0x%02x\n",
4334 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_rev);
4335 return EINVAL;
4336 }
4337 if (caps->ndis_hdr.ndis_size > caps_len) {
4338 DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n",
4339 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size,
4340 caps_len);
4341 return EINVAL;
4342 } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
4343 DPRINTF("%s: invalid NDIS objsize %u\n",
4344 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size);
4345 return EINVAL;
4346 }
4347
4348 /*
4349 * NOTE:
4350 * caps->ndis_hdr.ndis_size MUST be checked before accessing
4351 * NDIS 6.1+ specific fields.
4352 */
4353 aprint_debug_dev(sc->sc_dev, "hwcaps rev %u\n",
4354 caps->ndis_hdr.ndis_rev);
4355
4356 aprint_debug_dev(sc->sc_dev, "hwcaps csum: "
4357 "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, "
4358 "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n",
4359 caps->ndis_csum.ndis_ip4_txcsum, caps->ndis_csum.ndis_ip4_txenc,
4360 caps->ndis_csum.ndis_ip4_rxcsum, caps->ndis_csum.ndis_ip4_rxenc,
4361 caps->ndis_csum.ndis_ip6_txcsum, caps->ndis_csum.ndis_ip6_txenc,
4362 caps->ndis_csum.ndis_ip6_rxcsum, caps->ndis_csum.ndis_ip6_rxenc);
4363 aprint_debug_dev(sc->sc_dev, "hwcaps lsov2: "
4364 "ip4 maxsz %u minsg %u encap 0x%x, "
4365 "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n",
4366 caps->ndis_lsov2.ndis_ip4_maxsz, caps->ndis_lsov2.ndis_ip4_minsg,
4367 caps->ndis_lsov2.ndis_ip4_encap, caps->ndis_lsov2.ndis_ip6_maxsz,
4368 caps->ndis_lsov2.ndis_ip6_minsg, caps->ndis_lsov2.ndis_ip6_encap,
4369 caps->ndis_lsov2.ndis_ip6_opts);
4370
4371 return 0;
4372 }
4373
4374 static int
4375 hvn_set_capabilities(struct hvn_softc *sc, int mtu)
4376 {
4377 struct ndis_offload hwcaps;
4378 struct ndis_offload_params params;
4379 size_t len;
4380 uint32_t caps = 0;
4381 int error, tso_maxsz, tso_minsg;
4382
4383 error = hvn_get_hwcaps(sc, &hwcaps);
4384 if (error) {
4385 DPRINTF("%s: failed to query hwcaps\n",
4386 device_xname(sc->sc_dev));
4387 return error;
4388 }
4389
4390 /* NOTE: 0 means "no change" */
4391 memset(¶ms, 0, sizeof(params));
4392
4393 params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
4394 if (sc->sc_ndisver < NDIS_VERSION_6_30) {
4395 params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
4396 len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
4397 } else {
4398 params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
4399 len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE;
4400 }
4401
4402 /*
4403 * TSO4/TSO6 setup.
4404 */
4405 tso_maxsz = IP_MAXPACKET;
4406 tso_minsg = 2;
4407 if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
4408 caps |= HVN_CAPS_TSO4;
4409 params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
4410
4411 if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz)
4412 tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz;
4413 if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg)
4414 tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg;
4415 }
4416 if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
4417 (hwcaps.ndis_lsov2.ndis_ip6_opts & HVN_NDIS_LSOV2_CAP_IP6) ==
4418 HVN_NDIS_LSOV2_CAP_IP6) {
4419 caps |= HVN_CAPS_TSO6;
4420 params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
4421
4422 if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz)
4423 tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz;
4424 if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg)
4425 tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg;
4426 }
4427 sc->sc_tso_szmax = 0;
4428 sc->sc_tso_sgmin = 0;
4429 if (caps & (HVN_CAPS_TSO4 | HVN_CAPS_TSO6)) {
4430 KASSERTMSG(tso_maxsz <= IP_MAXPACKET,
4431 "invalid NDIS TSO maxsz %d", tso_maxsz);
4432 KASSERTMSG(tso_minsg >= 2,
4433 "invalid NDIS TSO minsg %d", tso_minsg);
4434 if (tso_maxsz < tso_minsg * mtu) {
4435 DPRINTF("%s: invalid NDIS TSO config: "
4436 "maxsz %d, minsg %d, mtu %d; "
4437 "disable TSO4 and TSO6\n", device_xname(sc->sc_dev),
4438 tso_maxsz, tso_minsg, mtu);
4439 caps &= ~(HVN_CAPS_TSO4 | HVN_CAPS_TSO6);
4440 params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF;
4441 params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF;
4442 } else {
4443 sc->sc_tso_szmax = tso_maxsz;
4444 sc->sc_tso_sgmin = tso_minsg;
4445 aprint_debug_dev(sc->sc_dev,
4446 "NDIS TSO szmax %d sgmin %d\n",
4447 sc->sc_tso_szmax, sc->sc_tso_sgmin);
4448 }
4449 }
4450
4451 /* IPv4 checksum */
4452 if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_IP4) ==
4453 HVN_NDIS_TXCSUM_CAP_IP4) {
4454 caps |= HVN_CAPS_IPCS;
4455 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
4456 }
4457 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) {
4458 if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX)
4459 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
4460 else
4461 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX;
4462 }
4463
4464 /* TCP4 checksum */
4465 if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_TCP4) ==
4466 HVN_NDIS_TXCSUM_CAP_TCP4) {
4467 caps |= HVN_CAPS_TCP4CS;
4468 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
4469 }
4470 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) {
4471 if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX)
4472 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
4473 else
4474 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX;
4475 }
4476
4477 /* UDP4 checksum */
4478 if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
4479 caps |= HVN_CAPS_UDP4CS;
4480 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
4481 }
4482 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) {
4483 if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX)
4484 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
4485 else
4486 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX;
4487 }
4488
4489 /* TCP6 checksum */
4490 if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_TCP6) ==
4491 HVN_NDIS_TXCSUM_CAP_TCP6) {
4492 caps |= HVN_CAPS_TCP6CS;
4493 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
4494 }
4495 if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) {
4496 if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX)
4497 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
4498 else
4499 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX;
4500 }
4501
4502 /* UDP6 checksum */
4503 if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_UDP6) ==
4504 HVN_NDIS_TXCSUM_CAP_UDP6) {
4505 caps |= HVN_CAPS_UDP6CS;
4506 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
4507 }
4508 if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) {
4509 if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX)
4510 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
4511 else
4512 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
4513 }
4514
4515 aprint_debug_dev(sc->sc_dev, "offload csum: "
4516 "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
4517 params.ndis_ip4csum, params.ndis_tcp4csum, params.ndis_udp4csum,
4518 params.ndis_tcp6csum, params.ndis_udp6csum);
4519 aprint_debug_dev(sc->sc_dev, "offload lsov2: ip4 %u, ip6 %u\n",
4520 params.ndis_lsov2_ip4, params.ndis_lsov2_ip6);
4521
4522 error = hvn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, len);
4523 if (error) {
4524 DPRINTF("%s: offload config failed: %d\n",
4525 device_xname(sc->sc_dev), error);
4526 return error;
4527 }
4528
4529 aprint_debug_dev(sc->sc_dev, "offload config done\n");
4530 sc->sc_caps |= caps;
4531
4532 return 0;
4533 }
4534
4535 static int
4536 hvn_rndis_cmd(struct hvn_softc *sc, struct rndis_cmd *rc, u_int flags)
4537 {
4538 struct hvn_rx_ring *rxr = &sc->sc_rxr[0]; /* primary channel */
4539 struct hvn_nvs_rndis *msg = &rc->rc_msg;
4540 struct rndis_msghdr *hdr = rc->rc_req;
4541 struct vmbus_gpa sgl[1];
4542 int tries = 10;
4543 int rv, s;
4544
4545 msg->nvs_type = HVN_NVS_TYPE_RNDIS;
4546 msg->nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_CTRL;
4547 msg->nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID;
4548
4549 sgl[0].gpa_page = rc->rc_gpa;
4550 sgl[0].gpa_len = hdr->rm_len;
4551 sgl[0].gpa_ofs = 0;
4552
4553 rc->rc_done = 0;
4554
4555 mutex_enter(&rc->rc_lock);
4556
4557 hvn_submit_cmd(sc, rc);
4558
4559 do {
4560 rv = vmbus_channel_send_sgl(rxr->rxr_chan, sgl, 1, &rc->rc_msg,
4561 sizeof(*msg), rc->rc_id);
4562 if (rv == EAGAIN) {
4563 DELAY(1000);
4564 } else if (rv) {
4565 mutex_exit(&rc->rc_lock);
4566 DPRINTF("%s: RNDIS operation %u send error %d\n",
4567 device_xname(sc->sc_dev), hdr->rm_type, rv);
4568 hvn_rollback_cmd(sc, rc);
4569 return rv;
4570 }
4571 } while (rv != 0 && --tries > 0);
4572
4573 if (tries == 0 && rv != 0) {
4574 mutex_exit(&rc->rc_lock);
4575 device_printf(sc->sc_dev,
4576 "RNDIS operation %u send error %d\n", hdr->rm_type, rv);
4577 hvn_rollback_cmd(sc, rc);
4578 return rv;
4579 }
4580 if (vmbus_channel_is_revoked(rxr->rxr_chan) ||
4581 ISSET(flags, HVN_RNDIS_CMD_NORESP)) {
4582 /* No response */
4583 mutex_exit(&rc->rc_lock);
4584 if (hvn_rollback_cmd(sc, rc))
4585 hvn_release_cmd(sc, rc);
4586 return 0;
4587 }
4588
4589 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4590 BUS_DMASYNC_POSTWRITE);
4591
4592 while (!rc->rc_done && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) {
4593 mutex_exit(&rc->rc_lock);
4594 DELAY(1000);
4595 s = splnet();
4596 hvn_nvs_intr1(rxr, 0, 0);
4597 splx(s);
4598 mutex_enter(&rc->rc_lock);
4599 }
4600 mutex_exit(&rc->rc_lock);
4601
4602 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4603 BUS_DMASYNC_POSTREAD);
4604
4605 if (!rc->rc_done) {
4606 rv = EINTR;
4607 if (hvn_rollback_cmd(sc, rc)) {
4608 hvn_release_cmd(sc, rc);
4609 rv = 0;
4610 }
4611 return rv;
4612 }
4613
4614 hvn_release_cmd(sc, rc);
4615 return 0;
4616 }
4617
4618 static int
4619 hvn_rndis_input(struct hvn_rx_ring *rxr, uint64_t tid, void *arg)
4620 {
4621 struct hvn_softc *sc = rxr->rxr_softc;
4622 struct vmbus_chanpkt_prplist *cp = arg;
4623 uint32_t off, len, type;
4624 int i, rv, rx = 0;
4625 bool qfull = false;
4626
4627 if (sc->sc_rx_ring == NULL) {
4628 DPRINTF("%s: invalid rx ring\n", device_xname(sc->sc_dev));
4629 return 0;
4630 }
4631
4632 for (i = 0; i < cp->cp_range_cnt; i++) {
4633 off = cp->cp_range[i].gpa_ofs;
4634 len = cp->cp_range[i].gpa_len;
4635
4636 KASSERT(off + len <= sc->sc_rx_size);
4637 KASSERT(len >= RNDIS_HEADER_OFFSET + 4);
4638
4639 memcpy(&type, sc->sc_rx_ring + off, sizeof(type));
4640 switch (type) {
4641 /* data message */
4642 case REMOTE_NDIS_PACKET_MSG:
4643 rv = hvn_rxeof(rxr, sc->sc_rx_ring + off, len);
4644 if (rv == 1)
4645 rx++;
4646 else if (rv == -1) /* The receive queue is full. */
4647 qfull = true;
4648 break;
4649 /* completion messages */
4650 case REMOTE_NDIS_INITIALIZE_CMPLT:
4651 case REMOTE_NDIS_QUERY_CMPLT:
4652 case REMOTE_NDIS_SET_CMPLT:
4653 case REMOTE_NDIS_RESET_CMPLT:
4654 case REMOTE_NDIS_KEEPALIVE_CMPLT:
4655 hvn_rndis_complete(sc, sc->sc_rx_ring + off, len);
4656 break;
4657 /* notification message */
4658 case REMOTE_NDIS_INDICATE_STATUS_MSG:
4659 hvn_rndis_status(sc, sc->sc_rx_ring + off, len);
4660 break;
4661 default:
4662 device_printf(sc->sc_dev,
4663 "unhandled RNDIS message type %u\n", type);
4664 break;
4665 }
4666 }
4667
4668 hvn_nvs_ack(rxr, tid);
4669
4670 if (qfull)
4671 return -1;
4672 return rx;
4673 }
4674
4675 static inline struct mbuf *
4676 hvn_devget(struct hvn_softc *sc, void *buf, uint32_t len)
4677 {
4678 struct ifnet *ifp = SC2IFP(sc);
4679 struct mbuf *m;
4680 size_t size = len + ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN;
4681
4682 MGETHDR(m, M_NOWAIT, MT_DATA);
4683 if (m == NULL)
4684 return NULL;
4685
4686 if (size > MHLEN) {
4687 if (size <= MCLBYTES)
4688 MCLGET(m, M_NOWAIT);
4689 else
4690 MEXTMALLOC(m, size, M_NOWAIT);
4691 if ((m->m_flags & M_EXT) == 0) {
4692 m_freem(m);
4693 return NULL;
4694 }
4695 }
4696
4697 m->m_len = m->m_pkthdr.len = size;
4698 m_adj(m, ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN);
4699 m_copyback(m, 0, len, buf);
4700 m_set_rcvif(m, ifp);
4701 return m;
4702 }
4703
4704 #define HVN_RXINFO_CSUM __BIT(NDIS_PKTINFO_TYPE_CSUM)
4705 #define HVN_RXINFO_VLAN __BIT(NDIS_PKTINFO_TYPE_VLAN)
4706 #define HVN_RXINFO_HASHVAL __BIT(HVN_NDIS_PKTINFO_TYPE_HASHVAL)
4707 #define HVN_RXINFO_HASHINFO __BIT(HVN_NDIS_PKTINFO_TYPE_HASHINF)
4708 #define HVN_RXINFO_ALL (HVN_RXINFO_CSUM | \
4709 HVN_RXINFO_VLAN | \
4710 HVN_RXINFO_HASHVAL | \
4711 HVN_RXINFO_HASHINFO)
4712
4713 static int
4714 hvn_rxeof(struct hvn_rx_ring *rxr, uint8_t *buf, uint32_t len)
4715 {
4716 struct hvn_softc *sc = rxr->rxr_softc;
4717 struct ifnet *ifp = SC2IFP(sc);
4718 struct rndis_packet_msg *pkt;
4719 struct rndis_pktinfo *pi;
4720 struct mbuf *m;
4721 uint32_t mask, csum, vlan, hashval, hashinfo;
4722
4723 if (!(ifp->if_flags & IFF_RUNNING))
4724 return 0;
4725
4726 if (len < sizeof(*pkt)) {
4727 device_printf(sc->sc_dev, "data packet too short: %u\n",
4728 len);
4729 return 0;
4730 }
4731
4732 pkt = (struct rndis_packet_msg *)buf;
4733 if (pkt->rm_dataoffset + pkt->rm_datalen > len) {
4734 device_printf(sc->sc_dev,
4735 "data packet out of bounds: %u@%u\n", pkt->rm_dataoffset,
4736 pkt->rm_datalen);
4737 return 0;
4738 }
4739
4740 if ((m = hvn_devget(sc, buf + RNDIS_HEADER_OFFSET + pkt->rm_dataoffset,
4741 pkt->rm_datalen)) == NULL) {
4742 if_statinc(ifp, if_ierrors);
4743 return 0;
4744 }
4745
4746 if (pkt->rm_pktinfooffset + pkt->rm_pktinfolen > len) {
4747 device_printf(sc->sc_dev,
4748 "pktinfo is out of bounds: %u@%u vs %u\n",
4749 pkt->rm_pktinfolen, pkt->rm_pktinfooffset, len);
4750 goto done;
4751 }
4752
4753 mask = csum = hashval = hashinfo = 0;
4754 vlan = 0xffffffff;
4755 pi = (struct rndis_pktinfo *)(buf + RNDIS_HEADER_OFFSET +
4756 pkt->rm_pktinfooffset);
4757 while (pkt->rm_pktinfolen > 0) {
4758 if (pi->rm_size > pkt->rm_pktinfolen) {
4759 device_printf(sc->sc_dev,
4760 "invalid pktinfo size: %u/%u\n", pi->rm_size,
4761 pkt->rm_pktinfolen);
4762 break;
4763 }
4764
4765 switch (pi->rm_type) {
4766 case NDIS_PKTINFO_TYPE_CSUM:
4767 memcpy(&csum, pi->rm_data, sizeof(csum));
4768 SET(mask, HVN_RXINFO_CSUM);
4769 break;
4770 case NDIS_PKTINFO_TYPE_VLAN:
4771 memcpy(&vlan, pi->rm_data, sizeof(vlan));
4772 SET(mask, HVN_RXINFO_VLAN);
4773 break;
4774 case HVN_NDIS_PKTINFO_TYPE_HASHVAL:
4775 memcpy(&hashval, pi->rm_data, sizeof(hashval));
4776 SET(mask, HVN_RXINFO_HASHVAL);
4777 break;
4778 case HVN_NDIS_PKTINFO_TYPE_HASHINF:
4779 memcpy(&hashinfo, pi->rm_data, sizeof(hashinfo));
4780 SET(mask, HVN_RXINFO_HASHINFO);
4781 break;
4782 default:
4783 DPRINTF("%s: unhandled pktinfo type %u\n",
4784 device_xname(sc->sc_dev), pi->rm_type);
4785 goto next;
4786 }
4787
4788 if (mask == HVN_RXINFO_ALL) {
4789 /* All found; done */
4790 break;
4791 }
4792 next:
4793 pkt->rm_pktinfolen -= pi->rm_size;
4794 pi = (struct rndis_pktinfo *)((char *)pi + pi->rm_size);
4795 }
4796
4797 /*
4798 * Final fixup.
4799 * - If there is no hash value, invalidate the hash info.
4800 */
4801 if (!ISSET(mask, HVN_RXINFO_HASHVAL))
4802 hashinfo = 0;
4803
4804 if (csum != 0) {
4805 if (ISSET(csum, NDIS_RXCSUM_INFO_IPCS_OK) &&
4806 ISSET(ifp->if_csum_flags_rx, M_CSUM_IPv4)) {
4807 SET(m->m_pkthdr.csum_flags, M_CSUM_IPv4);
4808 rxr->rxr_evcsum_ip.ev_count++;
4809 }
4810 if (ISSET(csum, NDIS_RXCSUM_INFO_TCPCS_OK) &&
4811 ISSET(ifp->if_csum_flags_rx, M_CSUM_TCPv4)) {
4812 SET(m->m_pkthdr.csum_flags, M_CSUM_TCPv4);
4813 rxr->rxr_evcsum_tcp.ev_count++;
4814 }
4815 if (ISSET(csum, NDIS_RXCSUM_INFO_UDPCS_OK) &&
4816 ISSET(ifp->if_csum_flags_rx, M_CSUM_UDPv4)) {
4817 SET(m->m_pkthdr.csum_flags, M_CSUM_UDPv4);
4818 rxr->rxr_evcsum_udp.ev_count++;
4819 }
4820 }
4821
4822 if (vlan != 0xffffffff) {
4823 uint16_t t = NDIS_VLAN_INFO_ID(vlan);
4824 t |= NDIS_VLAN_INFO_PRI(vlan) << EVL_PRIO_BITS;
4825 t |= NDIS_VLAN_INFO_CFI(vlan) << EVL_CFI_BITS;
4826
4827 if (ISSET(sc->sc_ec.ec_capenable, ETHERCAP_VLAN_HWTAGGING)) {
4828 vlan_set_tag(m, t);
4829 rxr->rxr_evvlanhwtagging.ev_count++;
4830 } else {
4831 struct ether_header eh;
4832 struct ether_vlan_header *evl;
4833
4834 KDASSERT(m->m_pkthdr.len >= sizeof(eh));
4835 m_copydata(m, 0, sizeof(eh), &eh);
4836 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
4837 KDASSERT(m != NULL);
4838
4839 evl = mtod(m, struct ether_vlan_header *);
4840 memcpy(evl->evl_dhost, eh.ether_dhost,
4841 ETHER_ADDR_LEN * 2);
4842 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
4843 evl->evl_tag = htons(t);
4844 evl->evl_proto = eh.ether_type;
4845 }
4846 }
4847
4848 /* XXX RSS hash is not supported. */
4849
4850 done:
4851 rxr->rxr_evpkts.ev_count++;
4852 if_percpuq_enqueue(sc->sc_ipq, m);
4853 /* XXX Unable to detect that the receive queue is full. */
4854 return 1;
4855 }
4856
4857 static void
4858 hvn_rndis_complete(struct hvn_softc *sc, uint8_t *buf, uint32_t len)
4859 {
4860 struct rndis_cmd *rc;
4861 uint32_t id;
4862
4863 memcpy(&id, buf + RNDIS_HEADER_OFFSET, sizeof(id));
4864 if ((rc = hvn_complete_cmd(sc, id)) != NULL) {
4865 mutex_enter(&rc->rc_lock);
4866 if (len < rc->rc_cmplen)
4867 device_printf(sc->sc_dev,
4868 "RNDIS response %u too short: %u\n", id, len);
4869 else
4870 memcpy(&rc->rc_cmp, buf, rc->rc_cmplen);
4871 if (len > rc->rc_cmplen &&
4872 len - rc->rc_cmplen > HVN_RNDIS_BUFSIZE)
4873 device_printf(sc->sc_dev,
4874 "RNDIS response %u too large: %u\n", id, len);
4875 else if (len > rc->rc_cmplen)
4876 memcpy(&rc->rc_cmpbuf, buf + rc->rc_cmplen,
4877 len - rc->rc_cmplen);
4878 rc->rc_done = 1;
4879 cv_signal(&rc->rc_cv);
4880 mutex_exit(&rc->rc_lock);
4881 } else {
4882 DPRINTF("%s: failed to complete RNDIS request id %u\n",
4883 device_xname(sc->sc_dev), id);
4884 }
4885 }
4886
4887 static int
4888 hvn_rndis_output_sgl(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
4889 {
4890 struct hvn_softc *sc = txr->txr_softc;
4891 uint64_t rid = (uint64_t)txd->txd_id << 32;
4892 int rv;
4893
4894 rv = vmbus_channel_send_sgl(txr->txr_chan, txd->txd_sgl, txd->txd_nsge,
4895 &sc->sc_data_msg, sizeof(sc->sc_data_msg), rid);
4896 if (rv) {
4897 DPRINTF("%s: RNDIS data send error %d\n",
4898 device_xname(sc->sc_dev), rv);
4899 return rv;
4900 }
4901 return 0;
4902 }
4903
4904 static int
4905 hvn_rndis_output_chim(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
4906 {
4907 struct hvn_nvs_rndis rndis;
4908 uint64_t rid = (uint64_t)txd->txd_id << 32;
4909 int rv;
4910
4911 memset(&rndis, 0, sizeof(rndis));
4912 rndis.nvs_type = HVN_NVS_TYPE_RNDIS;
4913 rndis.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA;
4914 rndis.nvs_chim_idx = txd->txd_chim_index;
4915 rndis.nvs_chim_sz = txd->txd_chim_size;
4916
4917 rv = vmbus_channel_send(txr->txr_chan, &rndis, sizeof(rndis),
4918 rid, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC);
4919 if (rv) {
4920 DPRINTF("%s: RNDIS chimney data send error %d: idx %u, sz %u\n",
4921 device_xname(sc->sc_dev), rv, rndis.nvs_chim_idx,
4922 rndis.nvs_chim_sz);
4923 return rv;
4924 }
4925 return 0;
4926 }
4927
4928 static void
4929 hvn_rndis_status(struct hvn_softc *sc, uint8_t *buf, uint32_t len)
4930 {
4931 uint32_t status;
4932
4933 memcpy(&status, buf + RNDIS_HEADER_OFFSET, sizeof(status));
4934 switch (status) {
4935 case RNDIS_STATUS_MEDIA_CONNECT:
4936 case RNDIS_STATUS_MEDIA_DISCONNECT:
4937 hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE);
4938 break;
4939 case RNDIS_STATUS_NETWORK_CHANGE:
4940 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE);
4941 break;
4942 /* Ignore these */
4943 case RNDIS_STATUS_OFFLOAD_CURRENT_CONFIG:
4944 case RNDIS_STATUS_LINK_SPEED_CHANGE:
4945 return;
4946 default:
4947 DPRINTF("%s: unhandled status %#x\n", device_xname(sc->sc_dev),
4948 status);
4949 return;
4950 }
4951 }
4952
4953 static int
4954 hvn_rndis_query(struct hvn_softc *sc, uint32_t oid, void *res, size_t *length)
4955 {
4956
4957 return hvn_rndis_query2(sc, oid, NULL, 0, res, length, 0);
4958 }
4959
4960 static int
4961 hvn_rndis_query2(struct hvn_softc *sc, uint32_t oid, const void *idata,
4962 size_t idlen, void *odata, size_t *odlen, size_t min_odlen)
4963 {
4964 struct rndis_cmd *rc;
4965 struct rndis_query_req *req;
4966 struct rndis_query_comp *cmp;
4967 size_t olength = *odlen;
4968 int rv;
4969
4970 rc = hvn_alloc_cmd(sc);
4971
4972 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4973 BUS_DMASYNC_PREREAD);
4974
4975 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
4976
4977 req = rc->rc_req;
4978 req->rm_type = REMOTE_NDIS_QUERY_MSG;
4979 req->rm_len = sizeof(*req) + idlen;
4980 req->rm_rid = rc->rc_id;
4981 req->rm_oid = oid;
4982 req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET;
4983 if (idlen > 0) {
4984 KASSERT(sizeof(*req) + idlen <= PAGE_SIZE);
4985 req->rm_infobuflen = idlen;
4986 memcpy(req + 1, idata, idlen);
4987 }
4988
4989 rc->rc_cmplen = sizeof(*cmp);
4990
4991 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4992 BUS_DMASYNC_PREWRITE);
4993
4994 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
4995 DPRINTF("%s: QUERY_MSG failed, error %d\n",
4996 device_xname(sc->sc_dev), rv);
4997 hvn_free_cmd(sc, rc);
4998 return rv;
4999 }
5000
5001 cmp = (struct rndis_query_comp *)&rc->rc_cmp;
5002 switch (cmp->rm_status) {
5003 case RNDIS_STATUS_SUCCESS:
5004 if (cmp->rm_infobuflen > olength ||
5005 (min_odlen > 0 && cmp->rm_infobuflen < min_odlen)) {
5006 rv = EINVAL;
5007 break;
5008 }
5009 memcpy(odata, rc->rc_cmpbuf, cmp->rm_infobuflen);
5010 *odlen = cmp->rm_infobuflen;
5011 break;
5012 default:
5013 *odlen = 0;
5014 rv = EIO;
5015 break;
5016 }
5017
5018 hvn_free_cmd(sc, rc);
5019 return rv;
5020 }
5021
5022 static int
5023 hvn_rndis_set(struct hvn_softc *sc, uint32_t oid, void *data, size_t length)
5024 {
5025 struct rndis_cmd *rc;
5026 struct rndis_set_req *req;
5027 struct rndis_set_comp *cmp;
5028 int rv;
5029
5030 rc = hvn_alloc_cmd(sc);
5031
5032 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5033 BUS_DMASYNC_PREREAD);
5034
5035 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
5036
5037 req = rc->rc_req;
5038 req->rm_type = REMOTE_NDIS_SET_MSG;
5039 req->rm_len = sizeof(*req) + length;
5040 req->rm_rid = rc->rc_id;
5041 req->rm_oid = oid;
5042 req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET;
5043
5044 rc->rc_cmplen = sizeof(*cmp);
5045
5046 if (length > 0) {
5047 KASSERT(sizeof(*req) + length < PAGE_SIZE);
5048 req->rm_infobuflen = length;
5049 memcpy(req + 1, data, length);
5050 }
5051
5052 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5053 BUS_DMASYNC_PREWRITE);
5054
5055 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
5056 DPRINTF("%s: SET_MSG failed, error %d\n",
5057 device_xname(sc->sc_dev), rv);
5058 hvn_free_cmd(sc, rc);
5059 return rv;
5060 }
5061
5062 cmp = (struct rndis_set_comp *)&rc->rc_cmp;
5063 if (cmp->rm_status != RNDIS_STATUS_SUCCESS)
5064 rv = EIO;
5065
5066 hvn_free_cmd(sc, rc);
5067 return rv;
5068 }
5069
5070 static int
5071 hvn_rndis_open(struct hvn_softc *sc)
5072 {
5073 struct ifnet *ifp = SC2IFP(sc);
5074 uint32_t filter;
5075 int rv;
5076
5077 if (ifp->if_flags & IFF_PROMISC) {
5078 filter = RNDIS_PACKET_TYPE_PROMISCUOUS;
5079 } else {
5080 filter = RNDIS_PACKET_TYPE_DIRECTED;
5081 if (ifp->if_flags & IFF_BROADCAST)
5082 filter |= RNDIS_PACKET_TYPE_BROADCAST;
5083 if (ifp->if_flags & IFF_ALLMULTI)
5084 filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
5085 else {
5086 struct ethercom *ec = &sc->sc_ec;
5087 struct ether_multi *enm;
5088 struct ether_multistep step;
5089
5090 ETHER_LOCK(ec);
5091 ETHER_FIRST_MULTI(step, ec, enm);
5092 /* TODO: support multicast list */
5093 if (enm != NULL)
5094 filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
5095 ETHER_UNLOCK(ec);
5096 }
5097 }
5098
5099 rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
5100 &filter, sizeof(filter));
5101 if (rv) {
5102 DPRINTF("%s: failed to set RNDIS filter to %#x\n",
5103 device_xname(sc->sc_dev), filter);
5104 }
5105 return rv;
5106 }
5107
5108 static int
5109 hvn_rndis_close(struct hvn_softc *sc)
5110 {
5111 uint32_t filter = 0;
5112 int rv;
5113
5114 rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
5115 &filter, sizeof(filter));
5116 if (rv) {
5117 DPRINTF("%s: failed to clear RNDIS filter\n",
5118 device_xname(sc->sc_dev));
5119 }
5120 return rv;
5121 }
5122
5123 static void
5124 hvn_rndis_detach(struct hvn_softc *sc)
5125 {
5126 struct rndis_cmd *rc;
5127 struct rndis_halt_req *req;
5128 int rv;
5129
5130 rc = hvn_alloc_cmd(sc);
5131
5132 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5133 BUS_DMASYNC_PREREAD);
5134
5135 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
5136
5137 req = rc->rc_req;
5138 req->rm_type = REMOTE_NDIS_HALT_MSG;
5139 req->rm_len = sizeof(*req);
5140 req->rm_rid = rc->rc_id;
5141
5142 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5143 BUS_DMASYNC_PREWRITE);
5144
5145 /* No RNDIS completion; rely on NVS message send completion */
5146 if ((rv = hvn_rndis_cmd(sc, rc, HVN_RNDIS_CMD_NORESP)) != 0) {
5147 DPRINTF("%s: HALT_MSG failed, error %d\n",
5148 device_xname(sc->sc_dev), rv);
5149 }
5150 hvn_free_cmd(sc, rc);
5151 }
5152
5153 static void
5154 hvn_init_sysctls(struct hvn_softc *sc)
5155 {
5156 struct sysctllog **log;
5157 const struct sysctlnode *rnode, *cnode, *rxnode, *txnode;
5158 const char *dvname;
5159 int error;
5160
5161 log = &sc->sc_sysctllog;
5162 dvname = device_xname(sc->sc_dev);
5163
5164 error = sysctl_createv(log, 0, NULL, &rnode,
5165 0, CTLTYPE_NODE, dvname,
5166 SYSCTL_DESCR("hvn information and settings"),
5167 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
5168 if (error)
5169 goto err;
5170
5171 error = sysctl_createv(log, 0, &rnode, &cnode,
5172 CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
5173 SYSCTL_DESCR("Use workqueue for packet processing"),
5174 NULL, 0, &sc->sc_txrx_workqueue, 0, CTL_CREATE, CTL_EOL);
5175 if (error)
5176 goto out;
5177
5178 error = sysctl_createv(log, 0, &rnode, &rxnode,
5179 0, CTLTYPE_NODE, "rx",
5180 SYSCTL_DESCR("hvn information and settings for Rx"),
5181 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
5182 if (error)
5183 goto out;
5184
5185 error = sysctl_createv(log, 0, &rxnode, NULL,
5186 CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
5187 SYSCTL_DESCR("max number of Rx packets"
5188 " to process for interrupt processing"),
5189 NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
5190 if (error)
5191 goto out;
5192
5193 error = sysctl_createv(log, 0, &rxnode, NULL,
5194 CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
5195 SYSCTL_DESCR("max number of Rx packets"
5196 " to process for deferred processing"),
5197 NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
5198 if (error)
5199 goto out;
5200
5201 error = sysctl_createv(log, 0, &rnode, &txnode,
5202 0, CTLTYPE_NODE, "tx",
5203 SYSCTL_DESCR("hvn information and settings for Tx"),
5204 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
5205 if (error)
5206 goto out;
5207
5208 error = sysctl_createv(log, 0, &txnode, NULL,
5209 CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
5210 SYSCTL_DESCR("max number of Tx packets"
5211 " to process for interrupt processing"),
5212 NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
5213 if (error)
5214 goto out;
5215
5216 error = sysctl_createv(log, 0, &txnode, NULL,
5217 CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
5218 SYSCTL_DESCR("max number of Tx packets"
5219 " to process for deferred processing"),
5220 NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
5221 if (error)
5222 goto out;
5223
5224 return;
5225
5226 out:
5227 sysctl_teardown(log);
5228 sc->sc_sysctllog = NULL;
5229 err:
5230 aprint_error_dev(sc->sc_dev, "sysctl_createv failed (err = %d)\n",
5231 error);
5232 }
5233
5234 SYSCTL_SETUP(sysctl_hw_hvn_setup, "sysctl hw.hvn setup")
5235 {
5236 const struct sysctlnode *rnode;
5237 const struct sysctlnode *cnode;
5238 int error;
5239
5240 error = sysctl_createv(clog, 0, NULL, &rnode,
5241 CTLFLAG_PERMANENT, CTLTYPE_NODE, "hvn",
5242 SYSCTL_DESCR("hvn global controls"),
5243 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
5244 if (error)
5245 goto fail;
5246
5247 error = sysctl_createv(clog, 0, &rnode, &cnode,
5248 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5249 "udp_csum_fixup_mtu",
5250 SYSCTL_DESCR("UDP checksum offloding fixup MTU"),
5251 NULL, 0, &hvn_udpcs_fixup_mtu, sizeof(hvn_udpcs_fixup_mtu),
5252 CTL_CREATE, CTL_EOL);
5253 if (error)
5254 goto fail;
5255
5256 error = sysctl_createv(clog, 0, &rnode, &cnode,
5257 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5258 "chimney_size",
5259 SYSCTL_DESCR("Chimney send packet size limit"),
5260 NULL, 0, &hvn_tx_chimney_size, sizeof(hvn_tx_chimney_size),
5261 CTL_CREATE, CTL_EOL);
5262 if (error)
5263 goto fail;
5264
5265 error = sysctl_createv(clog, 0, &rnode, &cnode,
5266 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5267 "channel_count",
5268 SYSCTL_DESCR("# of channels to use"),
5269 NULL, 0, &hvn_channel_cnt, sizeof(hvn_channel_cnt),
5270 CTL_CREATE, CTL_EOL);
5271 if (error)
5272 goto fail;
5273
5274 error = sysctl_createv(clog, 0, &rnode, &cnode,
5275 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5276 "tx_ring_count",
5277 SYSCTL_DESCR("# of transmit rings to use"),
5278 NULL, 0, &hvn_tx_ring_cnt, sizeof(hvn_tx_ring_cnt),
5279 CTL_CREATE, CTL_EOL);
5280 if (error)
5281 goto fail;
5282
5283 return;
5284
5285 fail:
5286 aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, error);
5287 }
5288