Home | History | Annotate | Line # | Download | only in x86
      1 /*	$NetBSD: hyperv.c,v 1.17 2025/04/12 19:31:44 nonaka Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
      5  * Copyright (c) 2012 NetApp Inc.
      6  * Copyright (c) 2012 Citrix Inc.
      7  * All rights reserved.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice unmodified, this list of conditions, and the following
     14  *    disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /**
     32  * Implements low-level interactions with Hyper-V/Azure
     33  */
     34 #include <sys/cdefs.h>
     35 #ifdef __KERNEL_RCSID
     36 __KERNEL_RCSID(0, "$NetBSD: hyperv.c,v 1.17 2025/04/12 19:31:44 nonaka Exp $");
     37 #endif
     38 #ifdef __FBSDID
     39 __FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hyperv.c 331757 2018-03-30 02:25:12Z emaste $");
     40 #endif
     41 
     42 #ifdef _KERNEL_OPT
     43 #include "lapic.h"
     44 #include "genfb.h"
     45 #include "opt_ddb.h"
     46 #include "vmbus.h"
     47 #include "wsdisplay.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/kernel.h>
     53 #include <sys/device.h>
     54 #include <sys/bus.h>
     55 #include <sys/cpu.h>
     56 #include <sys/kmem.h>
     57 #include <sys/module.h>
     58 #include <sys/pmf.h>
     59 #include <sys/sysctl.h>
     60 #include <sys/timetc.h>
     61 
     62 #include <uvm/uvm_extern.h>
     63 
     64 #include <machine/autoconf.h>
     65 #include <machine/bootinfo.h>
     66 #include <machine/cpufunc.h>
     67 #include <machine/cputypes.h>
     68 #include <machine/cpuvar.h>
     69 #include <machine/cpu_counter.h>
     70 #include <x86/apicvar.h>
     71 #include <x86/efi.h>
     72 
     73 #include <dev/wsfb/genfbvar.h>
     74 #include <x86/genfb_machdep.h>
     75 
     76 #include <x86/x86/hypervreg.h>
     77 #include <x86/x86/hypervvar.h>
     78 #include <dev/hyperv/vmbusvar.h>
     79 #include <dev/hyperv/genfb_vmbusvar.h>
     80 
     81 #ifdef DDB
     82 #include <machine/db_machdep.h>
     83 #include <ddb/db_sym.h>
     84 #include <ddb/db_extern.h>
     85 #endif
     86 
     87 struct hyperv_softc {
     88 	device_t		sc_dev;
     89 
     90 	struct sysctllog	*sc_log;
     91 };
     92 
     93 struct hyperv_hypercall_ctx {
     94 	void		*hc_addr;
     95 	paddr_t		hc_paddr;
     96 };
     97 
     98 struct hyperv_percpu_data {
     99 	int	pd_idtvec;
    100 };
    101 
    102 static struct hyperv_hypercall_ctx hyperv_hypercall_ctx;
    103 
    104 static void __attribute__((naked)) __aligned(PAGE_SIZE)
    105 hyperv_hypercall_page(void)
    106 {
    107 	__asm__ __volatile__ (".fill %c0, 1, 0xcc" :: "i" (PAGE_SIZE));
    108 }
    109 
    110 static u_int	hyperv_get_timecount(struct timecounter *);
    111 
    112 static u_int hyperv_features;		/* CPUID_HV_MSR_ */
    113 static u_int hyperv_recommends;
    114 
    115 static u_int hyperv_pm_features;
    116 static u_int hyperv_features3;
    117 
    118 static char hyperv_version_str[64];
    119 static char hyperv_features_str[256];
    120 static char hyperv_pm_features_str[256];
    121 static char hyperv_features3_str[256];
    122 
    123 uint32_t hyperv_vcpuid[MAXCPUS];
    124 
    125 static struct timecounter hyperv_timecounter = {
    126 	.tc_get_timecount = hyperv_get_timecount,
    127 	.tc_counter_mask = 0xffffffff,
    128 	.tc_frequency = HYPERV_TIMER_FREQ,
    129 	.tc_name = "Hyper-V",
    130 	.tc_quality = 2000,
    131 };
    132 
    133 static void	hyperv_proc_dummy(void *, struct cpu_info *);
    134 
    135 struct hyperv_proc {
    136 	hyperv_proc_t	func;
    137 	void		*arg;
    138 };
    139 
    140 static struct hyperv_proc hyperv_event_proc = {
    141 	.func = hyperv_proc_dummy,
    142 };
    143 
    144 static struct hyperv_proc hyperv_message_proc = {
    145 	.func = hyperv_proc_dummy,
    146 };
    147 
    148 static int	hyperv_match(device_t, cfdata_t, void *);
    149 static void	hyperv_attach(device_t, device_t, void *);
    150 static int	hyperv_detach(device_t, int);
    151 
    152 CFATTACH_DECL_NEW(hyperv, sizeof(struct hyperv_softc),
    153     hyperv_match, hyperv_attach, hyperv_detach, NULL);
    154 
    155 static void	hyperv_hypercall_memfree(void);
    156 static bool	hyperv_init_hypercall(void);
    157 static int	hyperv_sysctl_setup_root(struct hyperv_softc *);
    158 
    159 static u_int
    160 hyperv_get_timecount(struct timecounter *tc)
    161 {
    162 
    163 	return (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
    164 }
    165 
    166 static uint64_t
    167 hyperv_tc64_rdmsr(void)
    168 {
    169 
    170 	return rdmsr(MSR_HV_TIME_REF_COUNT);
    171 }
    172 
    173 #ifdef __amd64__
    174 /*
    175  * Reference TSC
    176  */
    177 struct hyperv_ref_tsc {
    178 	struct hyperv_reftsc	*tsc_ref;
    179 	paddr_t			tsc_paddr;
    180 };
    181 
    182 static struct hyperv_ref_tsc hyperv_ref_tsc;
    183 
    184 static u_int	hyperv_tsc_timecount(struct timecounter *);
    185 
    186 static struct timecounter hyperv_tsc_timecounter = {
    187 	.tc_get_timecount = hyperv_tsc_timecount,
    188 	.tc_counter_mask = 0xffffffff,
    189 	.tc_frequency = HYPERV_TIMER_FREQ,
    190 	.tc_name = "Hyper-V-TSC",
    191 	.tc_quality = 3000,
    192 };
    193 
    194 static __inline u_int
    195 atomic_load_acq_int(volatile u_int *p)
    196 {
    197 	u_int r = *p;
    198 	__insn_barrier();
    199 	return r;
    200 }
    201 
    202 static uint64_t
    203 hyperv_tc64_tsc(void)
    204 {
    205 	struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref;
    206 	uint32_t seq;
    207 
    208 	while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
    209 		uint64_t disc, ret, tsc;
    210 		uint64_t scale = tsc_ref->tsc_scale;
    211 		int64_t ofs = tsc_ref->tsc_ofs;
    212 
    213 		tsc = cpu_counter();
    214 
    215 		/* ret = ((tsc * scale) >> 64) + ofs */
    216 		__asm__ __volatile__ ("mulq %3" :
    217 		    "=d" (ret), "=a" (disc) :
    218 		    "a" (tsc), "r" (scale));
    219 		ret += ofs;
    220 
    221 		__insn_barrier();
    222 		if (tsc_ref->tsc_seq == seq)
    223 			return ret;
    224 
    225 		/* Sequence changed; re-sync. */
    226 	}
    227 	/* Fallback to the generic timecounter, i.e. rdmsr. */
    228 	return rdmsr(MSR_HV_TIME_REF_COUNT);
    229 }
    230 
    231 static u_int
    232 hyperv_tsc_timecount(struct timecounter *tc __unused)
    233 {
    234 
    235 	return hyperv_tc64_tsc();
    236 }
    237 
    238 static bool
    239 hyperv_tsc_tcinit(void)
    240 {
    241 	uint64_t orig_msr, msr;
    242 
    243 	if ((hyperv_features &
    244 	     (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) !=
    245 	    (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) ||
    246 	    (cpu_feature[0] & CPUID_SSE2) == 0)	/* SSE2 for mfence/lfence */
    247 		return false;
    248 
    249 	hyperv_ref_tsc.tsc_ref = (void *)uvm_km_alloc(kernel_map,
    250 	    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
    251 	if (hyperv_ref_tsc.tsc_ref == NULL) {
    252 		aprint_error("Hyper-V: reference TSC page allocation failed\n");
    253 		return false;
    254 	}
    255 
    256 	if (!pmap_extract(pmap_kernel(), (vaddr_t)hyperv_ref_tsc.tsc_ref,
    257 	    &hyperv_ref_tsc.tsc_paddr)) {
    258 		aprint_error("Hyper-V: reference TSC page setup failed\n");
    259 		uvm_km_free(kernel_map, (vaddr_t)hyperv_ref_tsc.tsc_ref,
    260 		    PAGE_SIZE, UVM_KMF_WIRED);
    261 		hyperv_ref_tsc.tsc_ref = NULL;
    262 		return false;
    263 	}
    264 
    265 	orig_msr = rdmsr(MSR_HV_REFERENCE_TSC);
    266 	msr = MSR_HV_REFTSC_ENABLE | (orig_msr & MSR_HV_REFTSC_RSVD_MASK) |
    267 	    (atop(hyperv_ref_tsc.tsc_paddr) << MSR_HV_REFTSC_PGSHIFT);
    268 	wrmsr(MSR_HV_REFERENCE_TSC, msr);
    269 
    270 	/* Install 64 bits timecounter method for other modules to use. */
    271 	hyperv_tc64 = hyperv_tc64_tsc;
    272 
    273 	/* Register "enlightened" timecounter. */
    274 	tc_init(&hyperv_tsc_timecounter);
    275 
    276 	return true;
    277 }
    278 #endif /* __amd64__ */
    279 
    280 static void
    281 delay_tc(unsigned int n)
    282 {
    283 	struct timecounter *tc;
    284 	uint64_t end, now;
    285 	u_int last, u;
    286 
    287 	tc = timecounter;
    288 	if (tc->tc_quality <= 0) {
    289 		x86_delay(n);
    290 		return;
    291 	}
    292 
    293 	now = 0;
    294 	end = tc->tc_frequency * n / 1000000;
    295 	last = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
    296 	do {
    297 		x86_pause();
    298 		u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
    299 		if (u < last)
    300 			now += tc->tc_counter_mask - last + u + 1;
    301 		else
    302 			now += u - last;
    303 		last = u;
    304 	} while (now < end);
    305 }
    306 
    307 static void
    308 delay_msr(unsigned int n)
    309 {
    310 	uint64_t end, now;
    311 	u_int last, u;
    312 
    313 	now = 0;
    314 	end = HYPERV_TIMER_FREQ * n / 1000000ULL;
    315 	last = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
    316 	do {
    317 		x86_pause();
    318 		u = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
    319 		if (u < last)
    320 			now += 0xffffffff - last + u + 1;
    321 		else
    322 			now += u - last;
    323 		last = u;
    324 	} while (now < end);
    325 }
    326 
    327 static __inline uint64_t
    328 hyperv_hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr,
    329     uint64_t out_paddr)
    330 {
    331 	uint64_t status;
    332 
    333 #ifdef __amd64__
    334 	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8");
    335 	__asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val),
    336 	    "d" (in_paddr), "m" (hc_addr));
    337 #else
    338 	uint32_t in_val_hi = in_val >> 32;
    339 	uint32_t in_val_lo = in_val & 0xFFFFFFFF;
    340 	uint32_t status_hi, status_lo;
    341 	uint32_t in_paddr_hi = in_paddr >> 32;
    342 	uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF;
    343 	uint32_t out_paddr_hi = out_paddr >> 32;
    344 	uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF;
    345 
    346 	__asm__ __volatile__ ("call *%8" : "=d" (status_hi), "=a" (status_lo) :
    347 	    "d" (in_val_hi), "a" (in_val_lo),
    348 	    "b" (in_paddr_hi), "c" (in_paddr_lo),
    349 	    "D" (out_paddr_hi), "S" (out_paddr_lo),
    350 	    "m" (hc_addr));
    351 	status = status_lo | ((uint64_t)status_hi << 32);
    352 #endif
    353 
    354 	return status;
    355 }
    356 
    357 uint64_t
    358 hyperv_hypercall(uint64_t control, paddr_t in_paddr, paddr_t out_paddr)
    359 {
    360 
    361 	if (hyperv_hypercall_ctx.hc_addr == NULL)
    362 		return ~HYPERCALL_STATUS_SUCCESS;
    363 
    364 	return hyperv_hypercall_md(hyperv_hypercall_ctx.hc_addr, control,
    365 	    in_paddr, out_paddr);
    366 }
    367 
    368 static bool
    369 hyperv_probe(u_int *maxleaf, u_int *features, u_int *pm_features,
    370     u_int *features3)
    371 {
    372 	u_int regs[4];
    373 
    374 	if (vm_guest != VM_GUEST_HV)
    375 		return false;
    376 
    377 	x86_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
    378 	*maxleaf = regs[0];
    379 	if (*maxleaf < CPUID_LEAF_HV_LIMITS)
    380 		return false;
    381 
    382 	x86_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
    383 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
    384 		return false;
    385 
    386 	x86_cpuid(CPUID_LEAF_HV_FEATURES, regs);
    387 	if (!(regs[0] & CPUID_HV_MSR_HYPERCALL)) {
    388 		/*
    389 		 * Hyper-V w/o Hypercall is impossible; someone
    390 		 * is faking Hyper-V.
    391 		 */
    392 		return false;
    393 	}
    394 
    395 	*features = regs[0];
    396 	*pm_features = regs[2];
    397 	*features3 = regs[3];
    398 
    399 	return true;
    400 }
    401 
    402 static bool
    403 hyperv_identify(void)
    404 {
    405 	char buf[256];
    406 	u_int regs[4];
    407 	u_int maxleaf;
    408 
    409 	if (!hyperv_probe(&maxleaf, &hyperv_features, &hyperv_pm_features,
    410 	    &hyperv_features3))
    411 		return false;
    412 
    413 	x86_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
    414 	hyperv_ver_major = regs[1] >> 16;
    415 	snprintf(hyperv_version_str, sizeof(hyperv_version_str),
    416 	    "%d.%d.%d [SP%d]",
    417 	    hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
    418 	aprint_verbose("Hyper-V Version: %s\n", hyperv_version_str);
    419 
    420 	snprintb(hyperv_features_str, sizeof(hyperv_features_str),
    421 	    "\020"
    422 	    "\001VPRUNTIME"	/* MSR_HV_VP_RUNTIME */
    423 	    "\002TMREFCNT"	/* MSR_HV_TIME_REF_COUNT */
    424 	    "\003SYNIC"		/* MSRs for SynIC */
    425 	    "\004SYNTM"		/* MSRs for SynTimer */
    426 	    "\005APIC"		/* MSR_HV_{EOI,ICR,TPR} */
    427 	    "\006HYPERCALL"	/* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
    428 	    "\007VPINDEX"	/* MSR_HV_VP_INDEX */
    429 	    "\010RESET"		/* MSR_HV_RESET */
    430 	    "\011STATS"		/* MSR_HV_STATS_ */
    431 	    "\012REFTSC"	/* MSR_HV_REFERENCE_TSC */
    432 	    "\013IDLE"		/* MSR_HV_GUEST_IDLE */
    433 	    "\014TMFREQ"	/* MSR_HV_{TSC,APIC}_FREQUENCY */
    434 	    "\015DEBUG",	/* MSR_HV_SYNTH_DEBUG_ */
    435 	    hyperv_features);
    436 	aprint_verbose("  Features=%s\n", hyperv_features_str);
    437 	snprintb(buf, sizeof(buf),
    438 	    "\020"
    439 	    "\005C3HPET",	/* HPET is required for C3 state */
    440 	    (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK));
    441 	snprintf(hyperv_pm_features_str, sizeof(hyperv_pm_features_str),
    442 	    "%s [C%u]", buf, CPUPM_HV_CSTATE(hyperv_pm_features));
    443 	aprint_verbose("  PM Features=%s\n", hyperv_pm_features_str);
    444 	snprintb(hyperv_features3_str, sizeof(hyperv_features3_str),
    445 	    "\020"
    446 	    "\001MWAIT"		/* MWAIT */
    447 	    "\002DEBUG"		/* guest debug support */
    448 	    "\003PERFMON"	/* performance monitor */
    449 	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
    450 	    "\005XMMHC"		/* hypercall input through XMM regs */
    451 	    "\006IDLE"		/* guest idle support */
    452 	    "\007SLEEP"		/* hypervisor sleep support */
    453 	    "\010NUMA"		/* NUMA distance query support */
    454 	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
    455 	    "\012SYNCMC"	/* inject synthetic machine checks */
    456 	    "\013CRASH"		/* MSRs for guest crash */
    457 	    "\014DEBUGMSR"	/* MSRs for guest debug */
    458 	    "\015NPIEP"		/* NPIEP */
    459 	    "\016HVDIS",	/* disabling hypervisor */
    460 	    hyperv_features3);
    461 	aprint_verbose("  Features3=%s\n", hyperv_features3_str);
    462 
    463 	x86_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
    464 	hyperv_recommends = regs[0];
    465 	aprint_verbose("  Recommends: %08x %08x\n", regs[0], regs[1]);
    466 
    467 	x86_cpuid(CPUID_LEAF_HV_LIMITS, regs);
    468 	aprint_verbose("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
    469 	    regs[0], regs[1], regs[2]);
    470 
    471 	if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
    472 		x86_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
    473 		aprint_verbose("  HW Features: %08x, AMD: %08x\n",
    474 		    regs[0], regs[3]);
    475 	}
    476 
    477 	return true;
    478 }
    479 
    480 void
    481 hyperv_early_init(void)
    482 {
    483 	u_int features, pm_features, features3;
    484 	u_int maxleaf;
    485 	int i;
    486 
    487 	if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
    488 		return;
    489 
    490 	if (features & CPUID_HV_MSR_TIME_REFCNT)
    491 		x86_delay = delay_func = delay_msr;
    492 
    493 	if (features & CPUID_HV_MSR_VP_INDEX) {
    494 		/* Save virtual processor id. */
    495 		hyperv_vcpuid[0] = rdmsr(MSR_HV_VP_INDEX);
    496 	} else {
    497 		/* Set virtual processor id to 0 for compatibility. */
    498 		hyperv_vcpuid[0] = 0;
    499 	}
    500 	for (i = 1; i < MAXCPUS; i++)
    501 		hyperv_vcpuid[i] = hyperv_vcpuid[0];
    502 }
    503 
    504 void
    505 hyperv_init_cpu(struct cpu_info *ci)
    506 {
    507 	u_int features, pm_features, features3;
    508 	u_int maxleaf;
    509 
    510 	if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
    511 		return;
    512 
    513 	if (features & CPUID_HV_MSR_VP_INDEX)
    514 		hyperv_vcpuid[ci->ci_index] = rdmsr(MSR_HV_VP_INDEX);
    515 }
    516 
    517 uint32_t
    518 hyperv_get_vcpuid(cpuid_t cpu)
    519 {
    520 
    521 	if (cpu < MAXCPUS)
    522 		return hyperv_vcpuid[cpu];
    523 	return 0;
    524 }
    525 
    526 static bool
    527 hyperv_init(void)
    528 {
    529 
    530 	if (!hyperv_identify()) {
    531 		/* Not Hyper-V; reset guest id to the generic one. */
    532 		if (vm_guest == VM_GUEST_HV)
    533 			vm_guest = VM_GUEST_VM;
    534 		return false;
    535 	}
    536 
    537 	/* Set guest id */
    538 	wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_OSTYPE_NETBSD |
    539 	    (uint64_t)__NetBSD_Version__ << MSR_HV_GUESTID_VERSION_SHIFT);
    540 
    541 	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
    542 		/* Register Hyper-V timecounter */
    543 		tc_init(&hyperv_timecounter);
    544 
    545 		/*
    546 		 * Install 64 bits timecounter method for other modules to use.
    547 		 */
    548 		hyperv_tc64 = hyperv_tc64_rdmsr;
    549 #ifdef __amd64__
    550 		hyperv_tsc_tcinit();
    551 #endif
    552 
    553 		/* delay with timecounter */
    554 		x86_delay = delay_func = delay_tc;
    555 	}
    556 
    557 #if NLAPIC > 0
    558 	if ((hyperv_features & CPUID_HV_MSR_TIME_FREQ) &&
    559 	    (hyperv_features3 & CPUID3_HV_TIME_FREQ))
    560 		lapic_per_second = rdmsr(MSR_HV_APIC_FREQUENCY);
    561 #endif
    562 
    563 	return hyperv_init_hypercall();
    564 }
    565 
    566 static bool
    567 hyperv_is_initialized(void)
    568 {
    569 	uint64_t msr;
    570 
    571 	if (vm_guest != VM_GUEST_HV)
    572 		return false;
    573 	if (rdmsr_safe(MSR_HV_HYPERCALL, &msr) == EFAULT)
    574 		return false;
    575 	return (msr & MSR_HV_HYPERCALL_ENABLE) ? true : false;
    576 }
    577 
    578 static int
    579 hyperv_match(device_t parent, cfdata_t cf, void *aux)
    580 {
    581 	struct cpufeature_attach_args *cfaa = aux;
    582 	struct cpu_info *ci = cfaa->ci;
    583 
    584 	if (strcmp(cfaa->name, "vm") != 0)
    585 		return 0;
    586 	if ((ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) == 0)
    587 		return 0;
    588 	if (vm_guest != VM_GUEST_HV)
    589 		return 0;
    590 
    591 	return 1;
    592 }
    593 
    594 static void
    595 hyperv_attach(device_t parent, device_t self, void *aux)
    596 {
    597 	struct hyperv_softc *sc = device_private(self);
    598 
    599 	sc->sc_dev = self;
    600 
    601 	aprint_naive("\n");
    602 	aprint_normal(": Hyper-V\n");
    603 
    604 	if (!hyperv_is_initialized()) {
    605 		if (rdmsr(MSR_HV_GUEST_OS_ID) == 0) {
    606 			if (!hyperv_init()) {
    607 				aprint_error_dev(self, "initialize failed\n");
    608 				return;
    609 			}
    610 		}
    611 		hyperv_init_hypercall();
    612 	}
    613 
    614 	(void) pmf_device_register(self, NULL, NULL);
    615 
    616 	(void) hyperv_sysctl_setup_root(sc);
    617 }
    618 
    619 static int
    620 hyperv_detach(device_t self, int flags)
    621 {
    622 	struct hyperv_softc *sc = device_private(self);
    623 	uint64_t hc;
    624 
    625 	/* Disable Hypercall */
    626 	hc = rdmsr(MSR_HV_HYPERCALL);
    627 	wrmsr(MSR_HV_HYPERCALL, hc & MSR_HV_HYPERCALL_RSVD_MASK);
    628 	hyperv_hypercall_memfree();
    629 
    630 	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT)
    631 		tc_detach(&hyperv_timecounter);
    632 
    633 	wrmsr(MSR_HV_GUEST_OS_ID, 0);
    634 
    635 	pmf_device_deregister(self);
    636 
    637 	if (sc->sc_log != NULL) {
    638 		sysctl_teardown(&sc->sc_log);
    639 		sc->sc_log = NULL;
    640 	}
    641 
    642 	return 0;
    643 }
    644 
    645 void
    646 hyperv_intr(void)
    647 {
    648 	struct cpu_info *ci = curcpu();
    649 
    650 	(*hyperv_event_proc.func)(hyperv_event_proc.arg, ci);
    651 	(*hyperv_message_proc.func)(hyperv_message_proc.arg, ci);
    652 }
    653 
    654 void hyperv_hypercall_intr(struct trapframe *);
    655 void
    656 hyperv_hypercall_intr(struct trapframe *frame __unused)
    657 {
    658 	struct cpu_info *ci = curcpu();
    659 
    660 	ci->ci_isources[LIR_HV]->is_evcnt.ev_count++;
    661 
    662 	hyperv_intr();
    663 }
    664 
    665 static void
    666 hyperv_proc_dummy(void *arg __unused, struct cpu_info *ci __unused)
    667 {
    668 }
    669 
    670 void
    671 hyperv_set_event_proc(void (*func)(void *, struct cpu_info *), void *arg)
    672 {
    673 
    674 	hyperv_event_proc.func = func;
    675 	hyperv_event_proc.arg = arg;
    676 }
    677 
    678 void
    679 hyperv_set_message_proc(void (*func)(void *, struct cpu_info *), void *arg)
    680 {
    681 
    682 	hyperv_message_proc.func = func;
    683 	hyperv_message_proc.arg = arg;
    684 }
    685 
    686 static void
    687 hyperv_hypercall_memfree(void)
    688 {
    689 
    690 	hyperv_hypercall_ctx.hc_addr = NULL;
    691 }
    692 
    693 static bool
    694 hyperv_init_hypercall(void)
    695 {
    696 	uint64_t hc, hc_orig;
    697 
    698 	hyperv_hypercall_ctx.hc_addr = hyperv_hypercall_page;
    699 	hyperv_hypercall_ctx.hc_paddr = vtophys((vaddr_t)hyperv_hypercall_page);
    700 	KASSERT(hyperv_hypercall_ctx.hc_paddr != 0);
    701 
    702 	/* Get the 'reserved' bits, which requires preservation. */
    703 	hc_orig = rdmsr(MSR_HV_HYPERCALL);
    704 
    705 	/*
    706 	 * Setup the Hypercall page.
    707 	 *
    708 	 * NOTE: 'reserved' bits MUST be preserved.
    709 	 */
    710 	hc = (atop(hyperv_hypercall_ctx.hc_paddr) << MSR_HV_HYPERCALL_PGSHIFT) |
    711 	    (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
    712 	    MSR_HV_HYPERCALL_ENABLE;
    713 	wrmsr(MSR_HV_HYPERCALL, hc);
    714 
    715 	/*
    716 	 * Confirm that Hypercall page did get setup.
    717 	 */
    718 	hc = rdmsr(MSR_HV_HYPERCALL);
    719 	if (!(hc & MSR_HV_HYPERCALL_ENABLE)) {
    720 		aprint_error("Hyper-V: Hypercall setup failed\n");
    721 		hyperv_hypercall_memfree();
    722 		/* Can't perform any Hyper-V specific actions */
    723 		vm_guest = VM_GUEST_VM;
    724 		return false;
    725 	}
    726 
    727 	return true;
    728 }
    729 
    730 int
    731 hyperv_hypercall_enabled(void)
    732 {
    733 
    734 	return hyperv_is_initialized();
    735 }
    736 
    737 int
    738 hyperv_synic_supported(void)
    739 {
    740 
    741 	return (hyperv_features & CPUID_HV_MSR_SYNIC) ? 1 : 0;
    742 }
    743 
    744 int
    745 hyperv_is_gen1(void)
    746 {
    747 
    748 	return !efi_probe();
    749 }
    750 
    751 void
    752 hyperv_send_eom(void)
    753 {
    754 
    755 	wrmsr(MSR_HV_EOM, 0);
    756 }
    757 
    758 void
    759 vmbus_init_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
    760 {
    761 	extern void Xintr_hyperv_hypercall(void);
    762 	struct vmbus_percpu_data *pd;
    763 	struct hyperv_percpu_data *hv_pd;
    764 	struct cpu_info *ci;
    765 	struct idt_vec *iv;
    766 	int hyperv_idtvec;
    767 	cpuid_t cpu0;
    768 
    769 	cpu0 = cpu_index(&cpu_info_primary);
    770 
    771 	if (cpu == cpu0 || idt_vec_is_pcpu()) {
    772 		/*
    773 		 * All Hyper-V ISR required resources are setup, now let's find a
    774 		 * free IDT vector for Hyper-V ISR and set it up.
    775 		 */
    776 		ci = cpu_lookup(cpu);
    777 		iv = &ci->ci_idtvec;
    778 		mutex_enter(&cpu_lock);
    779 		hyperv_idtvec = idt_vec_alloc(iv,
    780 		    APIC_LEVEL(NIPL), IDT_INTR_HIGH);
    781 		mutex_exit(&cpu_lock);
    782 		KASSERT(hyperv_idtvec > 0);
    783 		idt_vec_set(iv, hyperv_idtvec, Xintr_hyperv_hypercall);
    784 	} else {
    785 		pd = &sc->sc_percpu[cpu0];
    786 		hv_pd = pd->md_cookie;
    787 		KASSERT(hv_pd != NULL && hv_pd->pd_idtvec > 0);
    788 		hyperv_idtvec = hv_pd->pd_idtvec;
    789 	}
    790 
    791 	hv_pd = kmem_zalloc(sizeof(*hv_pd), KM_SLEEP);
    792 	hv_pd->pd_idtvec = hyperv_idtvec;
    793 	pd = &sc->sc_percpu[cpu];
    794 	pd->md_cookie = (void *)hv_pd;
    795 }
    796 
    797 void
    798 vmbus_deinit_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
    799 {
    800 	struct vmbus_percpu_data *pd;
    801 	struct hyperv_percpu_data *hv_pd;
    802 	struct cpu_info *ci;
    803 	struct idt_vec *iv;
    804 
    805 	pd = &sc->sc_percpu[cpu];
    806 	hv_pd = pd->md_cookie;
    807 	KASSERT(hv_pd != NULL);
    808 
    809 	if (cpu == cpu_index(&cpu_info_primary) ||
    810 	    idt_vec_is_pcpu()) {
    811 		ci = cpu_lookup(cpu);
    812 		iv = &ci->ci_idtvec;
    813 
    814 		if (hv_pd->pd_idtvec > 0) {
    815 			idt_vec_free(iv, hv_pd->pd_idtvec);
    816 		}
    817 	}
    818 
    819 	pd->md_cookie = NULL;
    820 	kmem_free(hv_pd, sizeof(*hv_pd));
    821 }
    822 
    823 void
    824 vmbus_init_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
    825 {
    826 	extern void Xintr_hyperv_hypercall(void);
    827 	struct vmbus_percpu_data *pd;
    828 	struct hyperv_percpu_data *hv_pd;
    829 	uint64_t val, orig;
    830 	uint32_t sint;
    831 	int hyperv_idtvec;
    832 
    833 	pd = &sc->sc_percpu[cpu];
    834 	hv_pd = pd->md_cookie;
    835 	hyperv_idtvec = hv_pd->pd_idtvec;
    836 
    837 	/*
    838 	 * Setup the SynIC message.
    839 	 */
    840 	orig = rdmsr(MSR_HV_SIMP);
    841 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
    842 	    (atop(hyperv_dma_get_paddr(&pd->simp_dma)) << MSR_HV_SIMP_PGSHIFT);
    843 	wrmsr(MSR_HV_SIMP, val);
    844 
    845 	/*
    846 	 * Setup the SynIC event flags.
    847 	 */
    848 	orig = rdmsr(MSR_HV_SIEFP);
    849 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
    850 	    (atop(hyperv_dma_get_paddr(&pd->siep_dma)) << MSR_HV_SIEFP_PGSHIFT);
    851 	wrmsr(MSR_HV_SIEFP, val);
    852 
    853 	/*
    854 	 * Configure and unmask SINT for message and event flags.
    855 	 */
    856 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
    857 	orig = rdmsr(sint);
    858 	val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
    859 	    (orig & MSR_HV_SINT_RSVD_MASK);
    860 	wrmsr(sint, val);
    861 
    862 	/*
    863 	 * Configure and unmask SINT for timer.
    864 	 */
    865 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
    866 	orig = rdmsr(sint);
    867 	val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
    868 	    (orig & MSR_HV_SINT_RSVD_MASK);
    869 	wrmsr(sint, val);
    870 
    871 	/*
    872 	 * All done; enable SynIC.
    873 	 */
    874 	orig = rdmsr(MSR_HV_SCONTROL);
    875 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
    876 	wrmsr(MSR_HV_SCONTROL, val);
    877 }
    878 
    879 void
    880 vmbus_deinit_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
    881 {
    882 	uint64_t orig;
    883 	uint32_t sint;
    884 
    885 	/*
    886 	 * Disable SynIC.
    887 	 */
    888 	orig = rdmsr(MSR_HV_SCONTROL);
    889 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
    890 
    891 	/*
    892 	 * Mask message and event flags SINT.
    893 	 */
    894 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
    895 	orig = rdmsr(sint);
    896 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
    897 
    898 	/*
    899 	 * Mask timer SINT.
    900 	 */
    901 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
    902 	orig = rdmsr(sint);
    903 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
    904 
    905 	/*
    906 	 * Teardown SynIC message.
    907 	 */
    908 	orig = rdmsr(MSR_HV_SIMP);
    909 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
    910 
    911 	/*
    912 	 * Teardown SynIC event flags.
    913 	 */
    914 	orig = rdmsr(MSR_HV_SIEFP);
    915 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
    916 }
    917 
    918 static int
    919 hyperv_sysctl_setup(struct hyperv_softc *sc,
    920     const struct sysctlnode *hyperv_node)
    921 {
    922 	int error;
    923 
    924 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
    925 	    CTLFLAG_READONLY, CTLTYPE_STRING, "version", NULL,
    926 	    NULL, 0, hyperv_version_str,
    927 	    0, CTL_CREATE, CTL_EOL);
    928 	if (error)
    929 		return error;
    930 
    931 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
    932 	    CTLFLAG_READONLY, CTLTYPE_STRING, "features", NULL,
    933 	    NULL, 0, hyperv_features_str,
    934 	    0, CTL_CREATE, CTL_EOL);
    935 	if (error)
    936 		return error;
    937 
    938 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
    939 	    CTLFLAG_READONLY, CTLTYPE_STRING, "pm_features", NULL,
    940 	    NULL, 0, hyperv_pm_features_str,
    941 	    0, CTL_CREATE, CTL_EOL);
    942 	if (error)
    943 		return error;
    944 
    945 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
    946 	    CTLFLAG_READONLY, CTLTYPE_STRING, "features3", NULL,
    947 	    NULL, 0, hyperv_features3_str,
    948 	    0, CTL_CREATE, CTL_EOL);
    949 	if (error)
    950 		return error;
    951 
    952 	return 0;
    953 }
    954 
    955 static int
    956 hyperv_sysctl_setup_root(struct hyperv_softc *sc)
    957 {
    958 	const struct sysctlnode *machdep_node, *hyperv_node;
    959 	int error;
    960 
    961 	error = sysctl_createv(&sc->sc_log, 0, NULL, &machdep_node,
    962 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
    963 	    NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
    964 	if (error)
    965 		goto fail;
    966 
    967 	error = sysctl_createv(&sc->sc_log, 0, &machdep_node, &hyperv_node,
    968 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "hyperv", NULL,
    969 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
    970 	if (error)
    971 		goto fail;
    972 
    973 	error = hyperv_sysctl_setup(sc, hyperv_node);
    974 	if (error)
    975 		goto fail;
    976 
    977 	return 0;
    978 
    979 fail:
    980 	sysctl_teardown(&sc->sc_log);
    981 	sc->sc_log = NULL;
    982 	return error;
    983 }
    984 
    985 MODULE(MODULE_CLASS_DRIVER, hyperv, NULL);
    986 
    987 #ifdef _MODULE
    988 #include "ioconf.c"
    989 #endif
    990 
    991 static int
    992 hyperv_modcmd(modcmd_t cmd, void *aux)
    993 {
    994 	int rv = 0;
    995 
    996 	switch (cmd) {
    997 	case MODULE_CMD_INIT:
    998 #ifdef _MODULE
    999 		rv = config_init_component(cfdriver_ioconf_hyperv,
   1000 		    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
   1001 #endif
   1002 		hyperv_init();
   1003 		break;
   1004 
   1005 	case MODULE_CMD_FINI:
   1006 #ifdef _MODULE
   1007 		rv = config_fini_component(cfdriver_ioconf_hyperv,
   1008 		    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
   1009 #endif
   1010 		break;
   1011 
   1012 	default:
   1013 		rv = ENOTTY;
   1014 		break;
   1015 	}
   1016 
   1017 	return rv;
   1018 }
   1019 
   1020 #if NVMBUS > 0
   1021 /*
   1022  * genfb at vmbus
   1023  */
   1024 static struct genfb_pmf_callback pmf_cb;
   1025 static struct genfb_mode_callback mode_cb;
   1026 
   1027 static bool
   1028 x86_genfb_setmode(struct genfb_softc *sc, int newmode)
   1029 {
   1030 	return true;
   1031 }
   1032 
   1033 static bool
   1034 x86_genfb_suspend(device_t dev, const pmf_qual_t *qual)
   1035 {
   1036 	return true;
   1037 }
   1038 
   1039 static bool
   1040 x86_genfb_resume(device_t dev, const pmf_qual_t *qual)
   1041 {
   1042 #if NGENFB > 0
   1043 	struct genfb_vmbus_softc *sc = device_private(dev);
   1044 
   1045 	genfb_restore_palette(&sc->sc_gen);
   1046 #endif
   1047 	return true;
   1048 }
   1049 
   1050 static void
   1051 populate_fbinfo(device_t dev, prop_dictionary_t dict)
   1052 {
   1053 #if NWSDISPLAY > 0 && NGENFB > 0
   1054 	struct rasops_info *ri = &x86_genfb_console_screen.scr_ri;
   1055 #endif
   1056 	const void *fbptr = lookup_bootinfo(BTINFO_FRAMEBUFFER);
   1057 	struct btinfo_framebuffer fbinfo;
   1058 
   1059 	if (fbptr == NULL)
   1060 		return;
   1061 
   1062 	memcpy(&fbinfo, fbptr, sizeof(fbinfo));
   1063 
   1064 	if (fbinfo.physaddr != 0) {
   1065 		prop_dictionary_set_uint32(dict, "width", fbinfo.width);
   1066 		prop_dictionary_set_uint32(dict, "height", fbinfo.height);
   1067 		prop_dictionary_set_uint8(dict, "depth", fbinfo.depth);
   1068 		prop_dictionary_set_uint16(dict, "linebytes", fbinfo.stride);
   1069 
   1070 		prop_dictionary_set_uint64(dict, "address", fbinfo.physaddr);
   1071 #if NWSDISPLAY > 0 && NGENFB > 0
   1072 		if (ri->ri_bits != NULL) {
   1073 			prop_dictionary_set_uint64(dict, "virtual_address",
   1074 			    ri->ri_hwbits != NULL ?
   1075 			    (vaddr_t)ri->ri_hworigbits :
   1076 			    (vaddr_t)ri->ri_origbits);
   1077 		}
   1078 #endif
   1079 	}
   1080 #if notyet
   1081 	prop_dictionary_set_bool(dict, "splash",
   1082 	    (fbinfo.flags & BI_FB_SPLASH) != 0);
   1083 #endif
   1084 #if 0
   1085 	if (fbinfo.depth == 8) {
   1086 		gfb_cb.gcc_cookie = NULL;
   1087 		gfb_cb.gcc_set_mapreg = x86_genfb_set_mapreg;
   1088 		prop_dictionary_set_uint64(dict, "cmap_callback",
   1089 		    (uint64_t)(uintptr_t)&gfb_cb);
   1090 	}
   1091 #endif
   1092 	if (fbinfo.physaddr != 0) {
   1093 		mode_cb.gmc_setmode = x86_genfb_setmode;
   1094 		prop_dictionary_set_uint64(dict, "mode_callback",
   1095 		    (uint64_t)(uintptr_t)&mode_cb);
   1096 	}
   1097 
   1098 #if NWSDISPLAY > 0 && NGENFB > 0
   1099 	if (device_is_a(dev, "genfb")) {
   1100 		prop_dictionary_set_bool(dict, "enable_shadowfb",
   1101 		    ri->ri_hwbits != NULL);
   1102 
   1103 		x86_genfb_set_console_dev(dev);
   1104 #ifdef DDB
   1105 		db_trap_callback = x86_genfb_ddb_trap_callback;
   1106 #endif
   1107 	}
   1108 #endif
   1109 }
   1110 #endif
   1111 
   1112 device_t
   1113 device_hyperv_register(device_t dev, void *aux)
   1114 {
   1115 #if NVMBUS > 0
   1116 	device_t parent = device_parent(dev);
   1117 
   1118 	if (parent && device_is_a(parent, "vmbus") && !x86_found_console) {
   1119 		struct vmbus_attach_args *aa = aux;
   1120 
   1121 		if (memcmp(aa->aa_type, &hyperv_guid_video,
   1122 		    sizeof(*aa->aa_type)) == 0) {
   1123 			prop_dictionary_t dict = device_properties(dev);
   1124 
   1125 			/* Initialize genfb for serial console */
   1126 			x86_genfb_init();
   1127 
   1128 			/*
   1129 			 * framebuffer drivers other than genfb can work
   1130 			 * without the address property
   1131 			 */
   1132 			populate_fbinfo(dev, dict);
   1133 
   1134 #if 1 && NWSDISPLAY > 0 && NGENFB > 0
   1135 			/* XXX */
   1136 			if (device_is_a(dev, "genfb")) {
   1137 				prop_dictionary_set_bool(dict, "is_console",
   1138 				    genfb_is_console());
   1139 			} else
   1140 #endif
   1141 			prop_dictionary_set_bool(dict, "is_console", true);
   1142 
   1143 			prop_dictionary_set_bool(dict, "clear-screen", false);
   1144 #if NWSDISPLAY > 0 && NGENFB > 0
   1145 			prop_dictionary_set_uint16(dict, "cursor-row",
   1146 			    x86_genfb_console_screen.scr_ri.ri_crow);
   1147 #endif
   1148 			pmf_cb.gpc_suspend = x86_genfb_suspend;
   1149 			pmf_cb.gpc_resume = x86_genfb_resume;
   1150 			prop_dictionary_set_uint64(dict, "pmf_callback",
   1151 			    (uint64_t)(uintptr_t)&pmf_cb);
   1152 			x86_found_console = true;
   1153 			return NULL;
   1154 		}
   1155 	}
   1156 #endif
   1157 	return NULL;
   1158 }
   1159