1 /* $NetBSD: nvmm_x86_svm.c,v 1.90 2025/08/15 11:36:44 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net 5 * All rights reserved. 6 * 7 * This code is part of the NVMM hypervisor. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.90 2025/08/15 11:36:44 skrll Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/kmem.h> 38 #include <sys/cpu.h> 39 #include <sys/xcall.h> 40 #include <sys/mman.h> 41 42 #include <uvm/uvm_extern.h> 43 #include <uvm/uvm_page.h> 44 45 #include <x86/apicvar.h> 46 #include <x86/cputypes.h> 47 #include <x86/specialreg.h> 48 #include <x86/dbregs.h> 49 #include <x86/cpu_counter.h> 50 51 #include <machine/cpuvar.h> 52 #include <machine/pmap_private.h> 53 54 #include <dev/nvmm/nvmm.h> 55 #include <dev/nvmm/nvmm_internal.h> 56 #include <dev/nvmm/x86/nvmm_x86.h> 57 58 int svm_vmrun(paddr_t, uint64_t *); 59 60 static inline void 61 svm_clgi(void) 62 { 63 asm volatile ("clgi" ::: "memory"); 64 } 65 66 static inline void 67 svm_stgi(void) 68 { 69 asm volatile ("stgi" ::: "memory"); 70 } 71 72 #define MSR_VM_HSAVE_PA 0xC0010117 73 74 /* -------------------------------------------------------------------------- */ 75 76 #define VMCB_EXITCODE_CR0_READ 0x0000 77 #define VMCB_EXITCODE_CR1_READ 0x0001 78 #define VMCB_EXITCODE_CR2_READ 0x0002 79 #define VMCB_EXITCODE_CR3_READ 0x0003 80 #define VMCB_EXITCODE_CR4_READ 0x0004 81 #define VMCB_EXITCODE_CR5_READ 0x0005 82 #define VMCB_EXITCODE_CR6_READ 0x0006 83 #define VMCB_EXITCODE_CR7_READ 0x0007 84 #define VMCB_EXITCODE_CR8_READ 0x0008 85 #define VMCB_EXITCODE_CR9_READ 0x0009 86 #define VMCB_EXITCODE_CR10_READ 0x000A 87 #define VMCB_EXITCODE_CR11_READ 0x000B 88 #define VMCB_EXITCODE_CR12_READ 0x000C 89 #define VMCB_EXITCODE_CR13_READ 0x000D 90 #define VMCB_EXITCODE_CR14_READ 0x000E 91 #define VMCB_EXITCODE_CR15_READ 0x000F 92 #define VMCB_EXITCODE_CR0_WRITE 0x0010 93 #define VMCB_EXITCODE_CR1_WRITE 0x0011 94 #define VMCB_EXITCODE_CR2_WRITE 0x0012 95 #define VMCB_EXITCODE_CR3_WRITE 0x0013 96 #define VMCB_EXITCODE_CR4_WRITE 0x0014 97 #define VMCB_EXITCODE_CR5_WRITE 0x0015 98 #define VMCB_EXITCODE_CR6_WRITE 0x0016 99 #define VMCB_EXITCODE_CR7_WRITE 0x0017 100 #define VMCB_EXITCODE_CR8_WRITE 0x0018 101 #define VMCB_EXITCODE_CR9_WRITE 0x0019 102 #define VMCB_EXITCODE_CR10_WRITE 0x001A 103 #define VMCB_EXITCODE_CR11_WRITE 0x001B 104 #define VMCB_EXITCODE_CR12_WRITE 0x001C 105 #define VMCB_EXITCODE_CR13_WRITE 0x001D 106 #define VMCB_EXITCODE_CR14_WRITE 0x001E 107 #define VMCB_EXITCODE_CR15_WRITE 0x001F 108 #define VMCB_EXITCODE_DR0_READ 0x0020 109 #define VMCB_EXITCODE_DR1_READ 0x0021 110 #define VMCB_EXITCODE_DR2_READ 0x0022 111 #define VMCB_EXITCODE_DR3_READ 0x0023 112 #define VMCB_EXITCODE_DR4_READ 0x0024 113 #define VMCB_EXITCODE_DR5_READ 0x0025 114 #define VMCB_EXITCODE_DR6_READ 0x0026 115 #define VMCB_EXITCODE_DR7_READ 0x0027 116 #define VMCB_EXITCODE_DR8_READ 0x0028 117 #define VMCB_EXITCODE_DR9_READ 0x0029 118 #define VMCB_EXITCODE_DR10_READ 0x002A 119 #define VMCB_EXITCODE_DR11_READ 0x002B 120 #define VMCB_EXITCODE_DR12_READ 0x002C 121 #define VMCB_EXITCODE_DR13_READ 0x002D 122 #define VMCB_EXITCODE_DR14_READ 0x002E 123 #define VMCB_EXITCODE_DR15_READ 0x002F 124 #define VMCB_EXITCODE_DR0_WRITE 0x0030 125 #define VMCB_EXITCODE_DR1_WRITE 0x0031 126 #define VMCB_EXITCODE_DR2_WRITE 0x0032 127 #define VMCB_EXITCODE_DR3_WRITE 0x0033 128 #define VMCB_EXITCODE_DR4_WRITE 0x0034 129 #define VMCB_EXITCODE_DR5_WRITE 0x0035 130 #define VMCB_EXITCODE_DR6_WRITE 0x0036 131 #define VMCB_EXITCODE_DR7_WRITE 0x0037 132 #define VMCB_EXITCODE_DR8_WRITE 0x0038 133 #define VMCB_EXITCODE_DR9_WRITE 0x0039 134 #define VMCB_EXITCODE_DR10_WRITE 0x003A 135 #define VMCB_EXITCODE_DR11_WRITE 0x003B 136 #define VMCB_EXITCODE_DR12_WRITE 0x003C 137 #define VMCB_EXITCODE_DR13_WRITE 0x003D 138 #define VMCB_EXITCODE_DR14_WRITE 0x003E 139 #define VMCB_EXITCODE_DR15_WRITE 0x003F 140 #define VMCB_EXITCODE_EXCP0 0x0040 141 #define VMCB_EXITCODE_EXCP1 0x0041 142 #define VMCB_EXITCODE_EXCP2 0x0042 143 #define VMCB_EXITCODE_EXCP3 0x0043 144 #define VMCB_EXITCODE_EXCP4 0x0044 145 #define VMCB_EXITCODE_EXCP5 0x0045 146 #define VMCB_EXITCODE_EXCP6 0x0046 147 #define VMCB_EXITCODE_EXCP7 0x0047 148 #define VMCB_EXITCODE_EXCP8 0x0048 149 #define VMCB_EXITCODE_EXCP9 0x0049 150 #define VMCB_EXITCODE_EXCP10 0x004A 151 #define VMCB_EXITCODE_EXCP11 0x004B 152 #define VMCB_EXITCODE_EXCP12 0x004C 153 #define VMCB_EXITCODE_EXCP13 0x004D 154 #define VMCB_EXITCODE_EXCP14 0x004E 155 #define VMCB_EXITCODE_EXCP15 0x004F 156 #define VMCB_EXITCODE_EXCP16 0x0050 157 #define VMCB_EXITCODE_EXCP17 0x0051 158 #define VMCB_EXITCODE_EXCP18 0x0052 159 #define VMCB_EXITCODE_EXCP19 0x0053 160 #define VMCB_EXITCODE_EXCP20 0x0054 161 #define VMCB_EXITCODE_EXCP21 0x0055 162 #define VMCB_EXITCODE_EXCP22 0x0056 163 #define VMCB_EXITCODE_EXCP23 0x0057 164 #define VMCB_EXITCODE_EXCP24 0x0058 165 #define VMCB_EXITCODE_EXCP25 0x0059 166 #define VMCB_EXITCODE_EXCP26 0x005A 167 #define VMCB_EXITCODE_EXCP27 0x005B 168 #define VMCB_EXITCODE_EXCP28 0x005C 169 #define VMCB_EXITCODE_EXCP29 0x005D 170 #define VMCB_EXITCODE_EXCP30 0x005E 171 #define VMCB_EXITCODE_EXCP31 0x005F 172 #define VMCB_EXITCODE_INTR 0x0060 173 #define VMCB_EXITCODE_NMI 0x0061 174 #define VMCB_EXITCODE_SMI 0x0062 175 #define VMCB_EXITCODE_INIT 0x0063 176 #define VMCB_EXITCODE_VINTR 0x0064 177 #define VMCB_EXITCODE_CR0_SEL_WRITE 0x0065 178 #define VMCB_EXITCODE_IDTR_READ 0x0066 179 #define VMCB_EXITCODE_GDTR_READ 0x0067 180 #define VMCB_EXITCODE_LDTR_READ 0x0068 181 #define VMCB_EXITCODE_TR_READ 0x0069 182 #define VMCB_EXITCODE_IDTR_WRITE 0x006A 183 #define VMCB_EXITCODE_GDTR_WRITE 0x006B 184 #define VMCB_EXITCODE_LDTR_WRITE 0x006C 185 #define VMCB_EXITCODE_TR_WRITE 0x006D 186 #define VMCB_EXITCODE_RDTSC 0x006E 187 #define VMCB_EXITCODE_RDPMC 0x006F 188 #define VMCB_EXITCODE_PUSHF 0x0070 189 #define VMCB_EXITCODE_POPF 0x0071 190 #define VMCB_EXITCODE_CPUID 0x0072 191 #define VMCB_EXITCODE_RSM 0x0073 192 #define VMCB_EXITCODE_IRET 0x0074 193 #define VMCB_EXITCODE_SWINT 0x0075 194 #define VMCB_EXITCODE_INVD 0x0076 195 #define VMCB_EXITCODE_PAUSE 0x0077 196 #define VMCB_EXITCODE_HLT 0x0078 197 #define VMCB_EXITCODE_INVLPG 0x0079 198 #define VMCB_EXITCODE_INVLPGA 0x007A 199 #define VMCB_EXITCODE_IOIO 0x007B 200 #define VMCB_EXITCODE_MSR 0x007C 201 #define VMCB_EXITCODE_TASK_SWITCH 0x007D 202 #define VMCB_EXITCODE_FERR_FREEZE 0x007E 203 #define VMCB_EXITCODE_SHUTDOWN 0x007F 204 #define VMCB_EXITCODE_VMRUN 0x0080 205 #define VMCB_EXITCODE_VMMCALL 0x0081 206 #define VMCB_EXITCODE_VMLOAD 0x0082 207 #define VMCB_EXITCODE_VMSAVE 0x0083 208 #define VMCB_EXITCODE_STGI 0x0084 209 #define VMCB_EXITCODE_CLGI 0x0085 210 #define VMCB_EXITCODE_SKINIT 0x0086 211 #define VMCB_EXITCODE_RDTSCP 0x0087 212 #define VMCB_EXITCODE_ICEBP 0x0088 213 #define VMCB_EXITCODE_WBINVD 0x0089 214 #define VMCB_EXITCODE_MONITOR 0x008A 215 #define VMCB_EXITCODE_MWAIT 0x008B 216 #define VMCB_EXITCODE_MWAIT_CONDITIONAL 0x008C 217 #define VMCB_EXITCODE_XSETBV 0x008D 218 #define VMCB_EXITCODE_RDPRU 0x008E 219 #define VMCB_EXITCODE_EFER_WRITE_TRAP 0x008F 220 #define VMCB_EXITCODE_CR0_WRITE_TRAP 0x0090 221 #define VMCB_EXITCODE_CR1_WRITE_TRAP 0x0091 222 #define VMCB_EXITCODE_CR2_WRITE_TRAP 0x0092 223 #define VMCB_EXITCODE_CR3_WRITE_TRAP 0x0093 224 #define VMCB_EXITCODE_CR4_WRITE_TRAP 0x0094 225 #define VMCB_EXITCODE_CR5_WRITE_TRAP 0x0095 226 #define VMCB_EXITCODE_CR6_WRITE_TRAP 0x0096 227 #define VMCB_EXITCODE_CR7_WRITE_TRAP 0x0097 228 #define VMCB_EXITCODE_CR8_WRITE_TRAP 0x0098 229 #define VMCB_EXITCODE_CR9_WRITE_TRAP 0x0099 230 #define VMCB_EXITCODE_CR10_WRITE_TRAP 0x009A 231 #define VMCB_EXITCODE_CR11_WRITE_TRAP 0x009B 232 #define VMCB_EXITCODE_CR12_WRITE_TRAP 0x009C 233 #define VMCB_EXITCODE_CR13_WRITE_TRAP 0x009D 234 #define VMCB_EXITCODE_CR14_WRITE_TRAP 0x009E 235 #define VMCB_EXITCODE_CR15_WRITE_TRAP 0x009F 236 #define VMCB_EXITCODE_INVLPGB 0x00A0 237 #define VMCB_EXITCODE_INVLPGB_ILLEGAL 0x00A1 238 #define VMCB_EXITCODE_INVPCID 0x00A2 239 #define VMCB_EXITCODE_MCOMMIT 0x00A3 240 #define VMCB_EXITCODE_TLBSYNC 0x00A4 241 #define VMCB_EXITCODE_NPF 0x0400 242 #define VMCB_EXITCODE_AVIC_INCOMP_IPI 0x0401 243 #define VMCB_EXITCODE_AVIC_NOACCEL 0x0402 244 #define VMCB_EXITCODE_VMGEXIT 0x0403 245 #define VMCB_EXITCODE_BUSY -2ULL 246 #define VMCB_EXITCODE_INVALID -1ULL 247 248 /* -------------------------------------------------------------------------- */ 249 250 struct vmcb_ctrl { 251 uint32_t intercept_cr; 252 #define VMCB_CTRL_INTERCEPT_RCR(x) __BIT( 0 + x) 253 #define VMCB_CTRL_INTERCEPT_WCR(x) __BIT(16 + x) 254 255 uint32_t intercept_dr; 256 #define VMCB_CTRL_INTERCEPT_RDR(x) __BIT( 0 + x) 257 #define VMCB_CTRL_INTERCEPT_WDR(x) __BIT(16 + x) 258 259 uint32_t intercept_vec; 260 #define VMCB_CTRL_INTERCEPT_VEC(x) __BIT(x) 261 262 uint32_t intercept_misc1; 263 #define VMCB_CTRL_INTERCEPT_INTR __BIT(0) 264 #define VMCB_CTRL_INTERCEPT_NMI __BIT(1) 265 #define VMCB_CTRL_INTERCEPT_SMI __BIT(2) 266 #define VMCB_CTRL_INTERCEPT_INIT __BIT(3) 267 #define VMCB_CTRL_INTERCEPT_VINTR __BIT(4) 268 #define VMCB_CTRL_INTERCEPT_CR0_SPEC __BIT(5) 269 #define VMCB_CTRL_INTERCEPT_RIDTR __BIT(6) 270 #define VMCB_CTRL_INTERCEPT_RGDTR __BIT(7) 271 #define VMCB_CTRL_INTERCEPT_RLDTR __BIT(8) 272 #define VMCB_CTRL_INTERCEPT_RTR __BIT(9) 273 #define VMCB_CTRL_INTERCEPT_WIDTR __BIT(10) 274 #define VMCB_CTRL_INTERCEPT_WGDTR __BIT(11) 275 #define VMCB_CTRL_INTERCEPT_WLDTR __BIT(12) 276 #define VMCB_CTRL_INTERCEPT_WTR __BIT(13) 277 #define VMCB_CTRL_INTERCEPT_RDTSC __BIT(14) 278 #define VMCB_CTRL_INTERCEPT_RDPMC __BIT(15) 279 #define VMCB_CTRL_INTERCEPT_PUSHF __BIT(16) 280 #define VMCB_CTRL_INTERCEPT_POPF __BIT(17) 281 #define VMCB_CTRL_INTERCEPT_CPUID __BIT(18) 282 #define VMCB_CTRL_INTERCEPT_RSM __BIT(19) 283 #define VMCB_CTRL_INTERCEPT_IRET __BIT(20) 284 #define VMCB_CTRL_INTERCEPT_INTN __BIT(21) 285 #define VMCB_CTRL_INTERCEPT_INVD __BIT(22) 286 #define VMCB_CTRL_INTERCEPT_PAUSE __BIT(23) 287 #define VMCB_CTRL_INTERCEPT_HLT __BIT(24) 288 #define VMCB_CTRL_INTERCEPT_INVLPG __BIT(25) 289 #define VMCB_CTRL_INTERCEPT_INVLPGA __BIT(26) 290 #define VMCB_CTRL_INTERCEPT_IOIO_PROT __BIT(27) 291 #define VMCB_CTRL_INTERCEPT_MSR_PROT __BIT(28) 292 #define VMCB_CTRL_INTERCEPT_TASKSW __BIT(29) 293 #define VMCB_CTRL_INTERCEPT_FERR_FREEZE __BIT(30) 294 #define VMCB_CTRL_INTERCEPT_SHUTDOWN __BIT(31) 295 296 uint32_t intercept_misc2; 297 #define VMCB_CTRL_INTERCEPT_VMRUN __BIT(0) 298 #define VMCB_CTRL_INTERCEPT_VMMCALL __BIT(1) 299 #define VMCB_CTRL_INTERCEPT_VMLOAD __BIT(2) 300 #define VMCB_CTRL_INTERCEPT_VMSAVE __BIT(3) 301 #define VMCB_CTRL_INTERCEPT_STGI __BIT(4) 302 #define VMCB_CTRL_INTERCEPT_CLGI __BIT(5) 303 #define VMCB_CTRL_INTERCEPT_SKINIT __BIT(6) 304 #define VMCB_CTRL_INTERCEPT_RDTSCP __BIT(7) 305 #define VMCB_CTRL_INTERCEPT_ICEBP __BIT(8) 306 #define VMCB_CTRL_INTERCEPT_WBINVD __BIT(9) 307 #define VMCB_CTRL_INTERCEPT_MONITOR __BIT(10) 308 #define VMCB_CTRL_INTERCEPT_MWAIT __BIT(11) 309 #define VMCB_CTRL_INTERCEPT_MWAIT_ARMED __BIT(12) 310 #define VMCB_CTRL_INTERCEPT_XSETBV __BIT(13) 311 #define VMCB_CTRL_INTERCEPT_RDPRU __BIT(14) 312 #define VMCB_CTRL_INTERCEPT_EFER_SPEC __BIT(15) 313 #define VMCB_CTRL_INTERCEPT_WCR_SPEC(x) __BIT(16 + x) 314 315 uint32_t intercept_misc3; 316 #define VMCB_CTRL_INTERCEPT_INVLPGB_ALL __BIT(0) 317 #define VMCB_CTRL_INTERCEPT_INVLPGB_ILL __BIT(1) 318 #define VMCB_CTRL_INTERCEPT_PCID __BIT(2) 319 #define VMCB_CTRL_INTERCEPT_MCOMMIT __BIT(3) 320 #define VMCB_CTRL_INTERCEPT_TLBSYNC __BIT(4) 321 322 uint8_t rsvd1[36]; 323 uint16_t pause_filt_thresh; 324 uint16_t pause_filt_cnt; 325 uint64_t iopm_base_pa; 326 uint64_t msrpm_base_pa; 327 uint64_t tsc_offset; 328 uint32_t guest_asid; 329 330 uint32_t tlb_ctrl; 331 #define VMCB_CTRL_TLB_CTRL_FLUSH_ALL 0x01 332 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST 0x03 333 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST_NONGLOBAL 0x07 334 335 uint64_t v; 336 #define VMCB_CTRL_V_TPR __BITS(3,0) 337 #define VMCB_CTRL_V_IRQ __BIT(8) 338 #define VMCB_CTRL_V_VGIF __BIT(9) 339 #define VMCB_CTRL_V_INTR_PRIO __BITS(19,16) 340 #define VMCB_CTRL_V_IGN_TPR __BIT(20) 341 #define VMCB_CTRL_V_INTR_MASKING __BIT(24) 342 #define VMCB_CTRL_V_GUEST_VGIF __BIT(25) 343 #define VMCB_CTRL_V_AVIC_EN __BIT(31) 344 #define VMCB_CTRL_V_INTR_VECTOR __BITS(39,32) 345 346 uint64_t intr; 347 #define VMCB_CTRL_INTR_SHADOW __BIT(0) 348 #define VMCB_CTRL_INTR_MASK __BIT(1) 349 350 uint64_t exitcode; 351 uint64_t exitinfo1; 352 uint64_t exitinfo2; 353 354 uint64_t exitintinfo; 355 #define VMCB_CTRL_EXITINTINFO_VECTOR __BITS(7,0) 356 #define VMCB_CTRL_EXITINTINFO_TYPE __BITS(10,8) 357 #define VMCB_CTRL_EXITINTINFO_EV __BIT(11) 358 #define VMCB_CTRL_EXITINTINFO_V __BIT(31) 359 #define VMCB_CTRL_EXITINTINFO_ERRORCODE __BITS(63,32) 360 361 uint64_t enable1; 362 #define VMCB_CTRL_ENABLE_NP __BIT(0) 363 #define VMCB_CTRL_ENABLE_SEV __BIT(1) 364 #define VMCB_CTRL_ENABLE_ES_SEV __BIT(2) 365 #define VMCB_CTRL_ENABLE_GMET __BIT(3) 366 #define VMCB_CTRL_ENABLE_VTE __BIT(5) 367 368 uint64_t avic; 369 #define VMCB_CTRL_AVIC_APIC_BAR __BITS(51,0) 370 371 uint64_t ghcb; 372 373 uint64_t eventinj; 374 #define VMCB_CTRL_EVENTINJ_VECTOR __BITS(7,0) 375 #define VMCB_CTRL_EVENTINJ_TYPE __BITS(10,8) 376 #define VMCB_CTRL_EVENTINJ_EV __BIT(11) 377 #define VMCB_CTRL_EVENTINJ_V __BIT(31) 378 #define VMCB_CTRL_EVENTINJ_ERRORCODE __BITS(63,32) 379 380 uint64_t n_cr3; 381 382 uint64_t enable2; 383 #define VMCB_CTRL_ENABLE_LBR __BIT(0) 384 #define VMCB_CTRL_ENABLE_VVMSAVE __BIT(1) 385 386 uint32_t vmcb_clean; 387 #define VMCB_CTRL_VMCB_CLEAN_I __BIT(0) 388 #define VMCB_CTRL_VMCB_CLEAN_IOPM __BIT(1) 389 #define VMCB_CTRL_VMCB_CLEAN_ASID __BIT(2) 390 #define VMCB_CTRL_VMCB_CLEAN_TPR __BIT(3) 391 #define VMCB_CTRL_VMCB_CLEAN_NP __BIT(4) 392 #define VMCB_CTRL_VMCB_CLEAN_CR __BIT(5) 393 #define VMCB_CTRL_VMCB_CLEAN_DR __BIT(6) 394 #define VMCB_CTRL_VMCB_CLEAN_DT __BIT(7) 395 #define VMCB_CTRL_VMCB_CLEAN_SEG __BIT(8) 396 #define VMCB_CTRL_VMCB_CLEAN_CR2 __BIT(9) 397 #define VMCB_CTRL_VMCB_CLEAN_LBR __BIT(10) 398 #define VMCB_CTRL_VMCB_CLEAN_AVIC __BIT(11) 399 400 uint32_t rsvd2; 401 uint64_t nrip; 402 uint8_t inst_len; 403 uint8_t inst_bytes[15]; 404 uint64_t avic_abpp; 405 uint64_t rsvd3; 406 uint64_t avic_ltp; 407 408 uint64_t avic_phys; 409 #define VMCB_CTRL_AVIC_PHYS_TABLE_PTR __BITS(51,12) 410 #define VMCB_CTRL_AVIC_PHYS_MAX_INDEX __BITS(7,0) 411 412 uint64_t rsvd4; 413 uint64_t vmsa_ptr; 414 415 uint8_t pad[752]; 416 } __packed; 417 418 CTASSERT(sizeof(struct vmcb_ctrl) == 1024); 419 420 struct vmcb_segment { 421 uint16_t selector; 422 uint16_t attrib; /* hidden */ 423 uint32_t limit; /* hidden */ 424 uint64_t base; /* hidden */ 425 } __packed; 426 427 CTASSERT(sizeof(struct vmcb_segment) == 16); 428 429 struct vmcb_state { 430 struct vmcb_segment es; 431 struct vmcb_segment cs; 432 struct vmcb_segment ss; 433 struct vmcb_segment ds; 434 struct vmcb_segment fs; 435 struct vmcb_segment gs; 436 struct vmcb_segment gdt; 437 struct vmcb_segment ldt; 438 struct vmcb_segment idt; 439 struct vmcb_segment tr; 440 uint8_t rsvd1[43]; 441 uint8_t cpl; 442 uint8_t rsvd2[4]; 443 uint64_t efer; 444 uint8_t rsvd3[112]; 445 uint64_t cr4; 446 uint64_t cr3; 447 uint64_t cr0; 448 uint64_t dr7; 449 uint64_t dr6; 450 uint64_t rflags; 451 uint64_t rip; 452 uint8_t rsvd4[88]; 453 uint64_t rsp; 454 uint8_t rsvd5[24]; 455 uint64_t rax; 456 uint64_t star; 457 uint64_t lstar; 458 uint64_t cstar; 459 uint64_t sfmask; 460 uint64_t kernelgsbase; 461 uint64_t sysenter_cs; 462 uint64_t sysenter_esp; 463 uint64_t sysenter_eip; 464 uint64_t cr2; 465 uint8_t rsvd6[32]; 466 uint64_t g_pat; 467 uint64_t dbgctl; 468 uint64_t br_from; 469 uint64_t br_to; 470 uint64_t int_from; 471 uint64_t int_to; 472 uint8_t pad[2408]; 473 } __packed; 474 475 CTASSERT(sizeof(struct vmcb_state) == 0xC00); 476 477 struct vmcb { 478 struct vmcb_ctrl ctrl; 479 struct vmcb_state state; 480 } __packed; 481 482 CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); 483 CTASSERT(offsetof(struct vmcb, state) == 0x400); 484 485 /* -------------------------------------------------------------------------- */ 486 487 static void svm_vcpu_state_provide(struct nvmm_cpu *, uint64_t); 488 static void svm_vcpu_state_commit(struct nvmm_cpu *); 489 490 struct svm_hsave { 491 paddr_t pa; 492 }; 493 494 static struct svm_hsave hsave[MAXCPUS]; 495 496 static uint8_t *svm_asidmap __read_mostly; 497 static uint32_t svm_maxasid __read_mostly; 498 static kmutex_t svm_asidlock __cacheline_aligned; 499 500 static bool svm_decode_assist __read_mostly; 501 static uint32_t svm_ctrl_tlb_flush __read_mostly; 502 503 #define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 504 static uint64_t svm_xcr0_mask __read_mostly; 505 506 #define SVM_NCPUIDS 32 507 508 #define VMCB_NPAGES 1 509 510 #define MSRBM_NPAGES 2 511 #define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 512 513 #define IOBM_NPAGES 3 514 #define IOBM_SIZE (IOBM_NPAGES * PAGE_SIZE) 515 516 /* Does not include EFER_LMSLE. */ 517 #define EFER_VALID \ 518 (EFER_SCE|EFER_LME|EFER_LMA|EFER_NXE|EFER_SVME|EFER_FFXSR|EFER_TCE) 519 520 #define EFER_TLB_FLUSH \ 521 (EFER_NXE|EFER_LMA|EFER_LME) 522 #define CR0_TLB_FLUSH \ 523 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 524 #define CR4_TLB_FLUSH \ 525 (CR4_PSE|CR4_PAE|CR4_PGE|CR4_PCIDE|CR4_SMEP) 526 527 #define CR4_VALID \ 528 (CR4_VME | \ 529 CR4_PVI | \ 530 CR4_TSD | \ 531 CR4_DE | \ 532 CR4_PSE | \ 533 CR4_PAE | \ 534 CR4_MCE | \ 535 CR4_PGE | \ 536 CR4_PCE | \ 537 CR4_OSFXSR | \ 538 CR4_OSXMMEXCPT | \ 539 CR4_UMIP | \ 540 /* CR4_LA57 excluded */ \ 541 /* bit 13 reserved on AMD */ \ 542 /* bit 14 reserved on AMD */ \ 543 /* bit 15 reserved on AMD */ \ 544 CR4_FSGSBASE | \ 545 CR4_PCIDE | \ 546 CR4_OSXSAVE | \ 547 /* bit 19 reserved on AMD */ \ 548 CR4_SMEP | \ 549 CR4_SMAP \ 550 /* CR4_PKE excluded */ \ 551 /* CR4_CET excluded */ \ 552 /* bits 24:63 reserved on AMD */) 553 554 /* -------------------------------------------------------------------------- */ 555 556 struct svm_machdata { 557 volatile uint64_t mach_htlb_gen; 558 }; 559 560 static const size_t svm_vcpu_conf_sizes[NVMM_X86_VCPU_NCONF] = { 561 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID)] = 562 sizeof(struct nvmm_vcpu_conf_cpuid), 563 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_TPR)] = 564 sizeof(struct nvmm_vcpu_conf_tpr) 565 }; 566 567 struct svm_cpudata { 568 /* General */ 569 bool shared_asid; 570 bool gtlb_want_flush; 571 bool gtsc_want_update; 572 uint64_t vcpu_htlb_gen; 573 574 /* VMCB */ 575 struct vmcb *vmcb; 576 paddr_t vmcb_pa; 577 578 /* I/O bitmap */ 579 uint8_t *iobm; 580 paddr_t iobm_pa; 581 582 /* MSR bitmap */ 583 uint8_t *msrbm; 584 paddr_t msrbm_pa; 585 586 /* Host state */ 587 uint64_t hxcr0; 588 uint64_t star; 589 uint64_t lstar; 590 uint64_t cstar; 591 uint64_t sfmask; 592 uint64_t fsbase; 593 uint64_t kernelgsbase; 594 595 /* Intr state */ 596 bool int_window_exit; 597 bool nmi_window_exit; 598 bool evt_pending; 599 600 /* Guest state */ 601 uint64_t gxcr0; 602 uint64_t gprs[NVMM_X64_NGPR]; 603 uint64_t drs[NVMM_X64_NDR]; 604 uint64_t gtsc; 605 struct xsave_header gfpu __aligned(64); 606 607 /* VCPU configuration. */ 608 bool cpuidpresent[SVM_NCPUIDS]; 609 struct nvmm_vcpu_conf_cpuid cpuid[SVM_NCPUIDS]; 610 }; 611 612 static void 613 svm_vmcb_cache_default(struct vmcb *vmcb) 614 { 615 vmcb->ctrl.vmcb_clean = 616 VMCB_CTRL_VMCB_CLEAN_I | 617 VMCB_CTRL_VMCB_CLEAN_IOPM | 618 VMCB_CTRL_VMCB_CLEAN_ASID | 619 VMCB_CTRL_VMCB_CLEAN_TPR | 620 VMCB_CTRL_VMCB_CLEAN_NP | 621 VMCB_CTRL_VMCB_CLEAN_CR | 622 VMCB_CTRL_VMCB_CLEAN_DR | 623 VMCB_CTRL_VMCB_CLEAN_DT | 624 VMCB_CTRL_VMCB_CLEAN_SEG | 625 VMCB_CTRL_VMCB_CLEAN_CR2 | 626 VMCB_CTRL_VMCB_CLEAN_LBR | 627 VMCB_CTRL_VMCB_CLEAN_AVIC; 628 } 629 630 static void 631 svm_vmcb_cache_update(struct vmcb *vmcb, uint64_t flags) 632 { 633 if (flags & NVMM_X64_STATE_SEGS) { 634 vmcb->ctrl.vmcb_clean &= 635 ~(VMCB_CTRL_VMCB_CLEAN_SEG | VMCB_CTRL_VMCB_CLEAN_DT); 636 } 637 if (flags & NVMM_X64_STATE_CRS) { 638 vmcb->ctrl.vmcb_clean &= 639 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_CR2 | 640 VMCB_CTRL_VMCB_CLEAN_TPR); 641 } 642 if (flags & NVMM_X64_STATE_DRS) { 643 vmcb->ctrl.vmcb_clean &= ~VMCB_CTRL_VMCB_CLEAN_DR; 644 } 645 if (flags & NVMM_X64_STATE_MSRS) { 646 /* CR for EFER, NP for PAT. */ 647 vmcb->ctrl.vmcb_clean &= 648 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_NP); 649 } 650 } 651 652 static inline void 653 svm_vmcb_cache_flush(struct vmcb *vmcb, uint64_t flags) 654 { 655 vmcb->ctrl.vmcb_clean &= ~flags; 656 } 657 658 static inline void 659 svm_vmcb_cache_flush_all(struct vmcb *vmcb) 660 { 661 vmcb->ctrl.vmcb_clean = 0; 662 } 663 664 #define SVM_EVENT_TYPE_HW_INT 0 665 #define SVM_EVENT_TYPE_NMI 2 666 #define SVM_EVENT_TYPE_EXC 3 667 #define SVM_EVENT_TYPE_SW_INT 4 668 669 static void 670 svm_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 671 { 672 struct svm_cpudata *cpudata = vcpu->cpudata; 673 struct vmcb *vmcb = cpudata->vmcb; 674 675 if (nmi) { 676 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_IRET; 677 cpudata->nmi_window_exit = true; 678 } else { 679 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_VINTR; 680 vmcb->ctrl.v |= (VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 681 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 682 cpudata->int_window_exit = true; 683 } 684 685 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 686 } 687 688 static void 689 svm_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 690 { 691 struct svm_cpudata *cpudata = vcpu->cpudata; 692 struct vmcb *vmcb = cpudata->vmcb; 693 694 if (nmi) { 695 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_IRET; 696 cpudata->nmi_window_exit = false; 697 } else { 698 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_VINTR; 699 vmcb->ctrl.v &= ~(VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 700 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 701 cpudata->int_window_exit = false; 702 } 703 704 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 705 } 706 707 static inline bool 708 svm_excp_has_rf(uint8_t vector) 709 { 710 switch (vector) { 711 case 1: /* #DB */ 712 case 4: /* #OF */ 713 case 8: /* #DF */ 714 case 18: /* #MC */ 715 return false; 716 default: 717 return true; 718 } 719 } 720 721 static inline int 722 svm_excp_has_error(uint8_t vector) 723 { 724 switch (vector) { 725 case 8: /* #DF */ 726 case 10: /* #TS */ 727 case 11: /* #NP */ 728 case 12: /* #SS */ 729 case 13: /* #GP */ 730 case 14: /* #PF */ 731 case 17: /* #AC */ 732 case 30: /* #SX */ 733 return 1; 734 default: 735 return 0; 736 } 737 } 738 739 static int 740 svm_vcpu_inject(struct nvmm_cpu *vcpu) 741 { 742 struct nvmm_comm_page *comm = vcpu->comm; 743 struct svm_cpudata *cpudata = vcpu->cpudata; 744 struct vmcb *vmcb = cpudata->vmcb; 745 u_int evtype; 746 uint8_t vector; 747 uint64_t error; 748 int type = 0, err = 0; 749 750 evtype = comm->event.type; 751 vector = comm->event.vector; 752 error = comm->event.u.excp.error; 753 __insn_barrier(); 754 755 switch (evtype) { 756 case NVMM_VCPU_EVENT_EXCP: 757 type = SVM_EVENT_TYPE_EXC; 758 if (vector == 2 || vector >= 32) 759 return EINVAL; 760 if (vector == 3 || vector == 0) 761 return EINVAL; 762 if (svm_excp_has_rf(vector)) { 763 vmcb->state.rflags |= PSL_RF; 764 } 765 err = svm_excp_has_error(vector); 766 break; 767 case NVMM_VCPU_EVENT_INTR: 768 type = SVM_EVENT_TYPE_HW_INT; 769 if (vector == 2) { 770 type = SVM_EVENT_TYPE_NMI; 771 svm_event_waitexit_enable(vcpu, true); 772 } 773 err = 0; 774 break; 775 default: 776 return EINVAL; 777 } 778 779 vmcb->ctrl.eventinj = 780 __SHIFTIN(vector, VMCB_CTRL_EVENTINJ_VECTOR) | 781 __SHIFTIN(type, VMCB_CTRL_EVENTINJ_TYPE) | 782 __SHIFTIN(err, VMCB_CTRL_EVENTINJ_EV) | 783 __SHIFTIN(1, VMCB_CTRL_EVENTINJ_V) | 784 __SHIFTIN(error, VMCB_CTRL_EVENTINJ_ERRORCODE); 785 786 cpudata->evt_pending = true; 787 788 return 0; 789 } 790 791 static void 792 svm_inject_ud(struct nvmm_cpu *vcpu) 793 { 794 struct nvmm_comm_page *comm = vcpu->comm; 795 int ret __diagused; 796 797 comm->event.type = NVMM_VCPU_EVENT_EXCP; 798 comm->event.vector = 6; 799 comm->event.u.excp.error = 0; 800 801 ret = svm_vcpu_inject(vcpu); 802 KASSERT(ret == 0); 803 } 804 805 static void 806 svm_inject_gp(struct nvmm_cpu *vcpu) 807 { 808 struct nvmm_comm_page *comm = vcpu->comm; 809 int ret __diagused; 810 811 comm->event.type = NVMM_VCPU_EVENT_EXCP; 812 comm->event.vector = 13; 813 comm->event.u.excp.error = 0; 814 815 ret = svm_vcpu_inject(vcpu); 816 KASSERT(ret == 0); 817 } 818 819 static inline int 820 svm_vcpu_event_commit(struct nvmm_cpu *vcpu) 821 { 822 if (__predict_true(!vcpu->comm->event_commit)) { 823 return 0; 824 } 825 vcpu->comm->event_commit = false; 826 return svm_vcpu_inject(vcpu); 827 } 828 829 static inline void 830 svm_inkernel_advance(struct vmcb *vmcb) 831 { 832 /* 833 * Maybe we should also apply single-stepping and debug exceptions. 834 * Matters for guest-ring3, because it can execute 'cpuid' under a 835 * debugger. 836 */ 837 vmcb->state.rip = vmcb->ctrl.nrip; 838 vmcb->state.rflags &= ~PSL_RF; 839 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 840 } 841 842 #define SVM_CPUID_MAX_BASIC 0xD 843 #define SVM_CPUID_MAX_HYPERVISOR 0x40000010 844 #define SVM_CPUID_MAX_EXTENDED 0x8000001F 845 static uint32_t svm_cpuid_max_basic __read_mostly; 846 static uint32_t svm_cpuid_max_extended __read_mostly; 847 848 static void 849 svm_inkernel_exec_cpuid(struct svm_cpudata *cpudata, uint64_t eax, uint64_t ecx) 850 { 851 u_int descs[4]; 852 853 x86_cpuid2(eax, ecx, descs); 854 cpudata->vmcb->state.rax = descs[0]; 855 cpudata->gprs[NVMM_X64_GPR_RBX] = descs[1]; 856 cpudata->gprs[NVMM_X64_GPR_RCX] = descs[2]; 857 cpudata->gprs[NVMM_X64_GPR_RDX] = descs[3]; 858 } 859 860 static void 861 svm_inkernel_handle_cpuid(struct nvmm_cpu *vcpu, uint64_t eax, uint64_t ecx) 862 { 863 struct svm_cpudata *cpudata = vcpu->cpudata; 864 uint64_t cr4; 865 866 867 /* 868 * `If a value entered for CPUID.EAX is higher than the maximum 869 * input value for basic or extended function for that 870 * processor then the data for the highest basic information 871 * leaf is returned.' 872 * 873 * --Intel 64 and IA-32 Architectures Software Developer's 874 * Manual, Vol. 2A, Order Number: 325383-077US, April 2022, 875 * Sec. 3.2 `Instructions (A-L)', CPUID--CPU Identification, 876 * p. 3-214. 877 * 878 * We take the same to hold for the hypervisor range, 879 * 0x40000000-0x4fffffff. 880 * 881 * (Sync with nvmm_x86_vmx.c.) 882 */ 883 if (eax < 0x40000000) { /* basic CPUID range */ 884 if (__predict_false(eax > svm_cpuid_max_basic)) { 885 eax = svm_cpuid_max_basic; 886 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 887 } 888 } else if (eax < 0x80000000) { /* hypervisor CPUID range */ 889 if (__predict_false(eax > SVM_CPUID_MAX_HYPERVISOR)) { 890 eax = svm_cpuid_max_basic; 891 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 892 } 893 } else { /* extended CPUID range */ 894 if (__predict_false(eax > svm_cpuid_max_extended)) { 895 eax = svm_cpuid_max_basic; 896 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 897 } 898 } 899 900 switch (eax) { 901 902 /* 903 * basic CPUID range 904 */ 905 case 0x00000000: 906 cpudata->vmcb->state.rax = svm_cpuid_max_basic; 907 break; 908 case 0x00000001: 909 cpudata->vmcb->state.rax &= nvmm_cpuid_00000001.eax; 910 911 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_LOCAL_APIC_ID; 912 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 913 CPUID_LOCAL_APIC_ID); 914 915 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 916 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_RAZ; 917 918 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 919 920 /* CPUID2_OSXSAVE depends on CR4. */ 921 cr4 = cpudata->vmcb->state.cr4; 922 if (!(cr4 & CR4_OSXSAVE)) { 923 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID2_OSXSAVE; 924 } 925 break; 926 case 0x00000002: /* Empty */ 927 case 0x00000003: /* Empty */ 928 case 0x00000004: /* Empty */ 929 case 0x00000005: /* Monitor/MWait */ 930 case 0x00000006: /* Power Management Related Features */ 931 cpudata->vmcb->state.rax = 0; 932 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 933 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 934 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 935 break; 936 case 0x00000007: /* Structured Extended Features */ 937 switch (ecx) { 938 case 0: 939 cpudata->vmcb->state.rax = 0; 940 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 941 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 942 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 943 break; 944 default: 945 cpudata->vmcb->state.rax = 0; 946 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 947 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 948 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 949 break; 950 } 951 break; 952 case 0x00000008: /* Empty */ 953 case 0x00000009: /* Empty */ 954 case 0x0000000A: /* Empty */ 955 case 0x0000000B: /* Empty */ 956 case 0x0000000C: /* Empty */ 957 cpudata->vmcb->state.rax = 0; 958 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 959 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 960 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 961 break; 962 case 0x0000000D: /* Processor Extended State Enumeration */ 963 if (svm_xcr0_mask == 0) { 964 break; 965 } 966 switch (ecx) { 967 case 0: 968 cpudata->vmcb->state.rax = svm_xcr0_mask & 0xFFFFFFFF; 969 if (cpudata->gxcr0 & XCR0_SSE) { 970 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct fxsave); 971 } else { 972 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct save87); 973 } 974 cpudata->gprs[NVMM_X64_GPR_RBX] += 64; /* XSAVE header */ 975 cpudata->gprs[NVMM_X64_GPR_RCX] = sizeof(struct fxsave) + 64; 976 cpudata->gprs[NVMM_X64_GPR_RDX] = svm_xcr0_mask >> 32; 977 break; 978 case 1: 979 cpudata->vmcb->state.rax &= 980 (CPUID_PES1_XSAVEOPT | CPUID_PES1_XSAVEC | 981 CPUID_PES1_XGETBV); 982 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 983 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 984 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 985 break; 986 default: 987 cpudata->vmcb->state.rax = 0; 988 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 989 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 990 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 991 break; 992 } 993 break; 994 995 /* 996 * hypervisor CPUID range 997 */ 998 case 0x40000000: /* Hypervisor Information */ 999 cpudata->vmcb->state.rax = SVM_CPUID_MAX_HYPERVISOR; 1000 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1001 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1002 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1003 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 1004 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 1005 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 1006 break; 1007 case 0x40000010: /* VMware-style TSC and LAPIC freq */ 1008 cpudata->gprs[NVMM_X64_GPR_RAX] = curcpu()->ci_data.cpu_cc_freq / 1000; 1009 if (has_lapic()) 1010 cpudata->gprs[NVMM_X64_GPR_RBX] = lapic_per_second / 1000; 1011 else 1012 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1013 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1014 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1015 break; 1016 1017 /* 1018 * extended CPUID range 1019 */ 1020 case 0x80000000: 1021 cpudata->vmcb->state.rax = svm_cpuid_max_extended; 1022 break; 1023 case 0x80000001: 1024 cpudata->vmcb->state.rax &= nvmm_cpuid_80000001.eax; 1025 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 1026 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 1027 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 1028 break; 1029 case 0x80000002: /* Extended Processor Name String */ 1030 case 0x80000003: /* Extended Processor Name String */ 1031 case 0x80000004: /* Extended Processor Name String */ 1032 case 0x80000005: /* L1 Cache and TLB Information */ 1033 case 0x80000006: /* L2 Cache and TLB and L3 Cache Information */ 1034 break; 1035 case 0x80000007: /* Processor Power Management and RAS Capabilities */ 1036 cpudata->vmcb->state.rax &= nvmm_cpuid_80000007.eax; 1037 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000007.ebx; 1038 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000007.ecx; 1039 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000007.edx; 1040 break; 1041 case 0x80000008: /* Processor Capacity Parameters and Ext Feat Ident */ 1042 cpudata->vmcb->state.rax &= nvmm_cpuid_80000008.eax; 1043 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000008.ebx; 1044 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000008.ecx; 1045 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000008.edx; 1046 break; 1047 case 0x80000009: /* Empty */ 1048 case 0x8000000A: /* SVM Features */ 1049 case 0x8000000B: /* Empty */ 1050 case 0x8000000C: /* Empty */ 1051 case 0x8000000D: /* Empty */ 1052 case 0x8000000E: /* Empty */ 1053 case 0x8000000F: /* Empty */ 1054 case 0x80000010: /* Empty */ 1055 case 0x80000011: /* Empty */ 1056 case 0x80000012: /* Empty */ 1057 case 0x80000013: /* Empty */ 1058 case 0x80000014: /* Empty */ 1059 case 0x80000015: /* Empty */ 1060 case 0x80000016: /* Empty */ 1061 case 0x80000017: /* Empty */ 1062 case 0x80000018: /* Empty */ 1063 cpudata->vmcb->state.rax = 0; 1064 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1065 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1066 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1067 break; 1068 case 0x80000019: /* TLB Characteristics for 1GB pages */ 1069 case 0x8000001A: /* Instruction Optimizations */ 1070 break; 1071 case 0x8000001B: /* Instruction-Based Sampling Capabilities */ 1072 case 0x8000001C: /* Lightweight Profiling Capabilities */ 1073 cpudata->vmcb->state.rax = 0; 1074 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1075 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1076 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1077 break; 1078 case 0x8000001D: /* Cache Topology Information */ 1079 case 0x8000001E: /* Processor Topology Information */ 1080 break; /* TODO? */ 1081 case 0x8000001F: /* Encrypted Memory Capabilities */ 1082 cpudata->vmcb->state.rax = 0; 1083 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1084 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1085 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1086 break; 1087 1088 default: 1089 break; 1090 } 1091 } 1092 1093 static void 1094 svm_exit_insn(struct vmcb *vmcb, struct nvmm_vcpu_exit *exit, uint64_t reason) 1095 { 1096 exit->u.insn.npc = vmcb->ctrl.nrip; 1097 exit->reason = reason; 1098 } 1099 1100 static void 1101 svm_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1102 struct nvmm_vcpu_exit *exit) 1103 { 1104 struct svm_cpudata *cpudata = vcpu->cpudata; 1105 struct nvmm_vcpu_conf_cpuid *cpuid; 1106 uint64_t eax, ecx; 1107 size_t i; 1108 1109 eax = cpudata->vmcb->state.rax; 1110 ecx = cpudata->gprs[NVMM_X64_GPR_RCX]; 1111 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 1112 svm_inkernel_handle_cpuid(vcpu, eax, ecx); 1113 1114 for (i = 0; i < SVM_NCPUIDS; i++) { 1115 if (!cpudata->cpuidpresent[i]) { 1116 continue; 1117 } 1118 cpuid = &cpudata->cpuid[i]; 1119 if (cpuid->leaf != eax) { 1120 continue; 1121 } 1122 1123 if (cpuid->exit) { 1124 svm_exit_insn(cpudata->vmcb, exit, NVMM_VCPU_EXIT_CPUID); 1125 return; 1126 } 1127 KASSERT(cpuid->mask); 1128 1129 /* del */ 1130 cpudata->vmcb->state.rax &= ~cpuid->u.mask.del.eax; 1131 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->u.mask.del.ebx; 1132 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->u.mask.del.ecx; 1133 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->u.mask.del.edx; 1134 1135 /* set */ 1136 cpudata->vmcb->state.rax |= cpuid->u.mask.set.eax; 1137 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->u.mask.set.ebx; 1138 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->u.mask.set.ecx; 1139 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->u.mask.set.edx; 1140 1141 break; 1142 } 1143 1144 svm_inkernel_advance(cpudata->vmcb); 1145 exit->reason = NVMM_VCPU_EXIT_NONE; 1146 } 1147 1148 static void 1149 svm_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1150 struct nvmm_vcpu_exit *exit) 1151 { 1152 struct svm_cpudata *cpudata = vcpu->cpudata; 1153 struct vmcb *vmcb = cpudata->vmcb; 1154 1155 if (cpudata->int_window_exit && (vmcb->state.rflags & PSL_I)) { 1156 svm_event_waitexit_disable(vcpu, false); 1157 } 1158 1159 svm_inkernel_advance(cpudata->vmcb); 1160 exit->reason = NVMM_VCPU_EXIT_HALTED; 1161 } 1162 1163 #define SVM_EXIT_IO_PORT __BITS(31,16) 1164 #define SVM_EXIT_IO_SEG __BITS(12,10) 1165 #define SVM_EXIT_IO_A64 __BIT(9) 1166 #define SVM_EXIT_IO_A32 __BIT(8) 1167 #define SVM_EXIT_IO_A16 __BIT(7) 1168 #define SVM_EXIT_IO_SZ32 __BIT(6) 1169 #define SVM_EXIT_IO_SZ16 __BIT(5) 1170 #define SVM_EXIT_IO_SZ8 __BIT(4) 1171 #define SVM_EXIT_IO_REP __BIT(3) 1172 #define SVM_EXIT_IO_STR __BIT(2) 1173 #define SVM_EXIT_IO_IN __BIT(0) 1174 1175 static void 1176 svm_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1177 struct nvmm_vcpu_exit *exit) 1178 { 1179 struct svm_cpudata *cpudata = vcpu->cpudata; 1180 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1181 uint64_t nextpc = cpudata->vmcb->ctrl.exitinfo2; 1182 1183 exit->reason = NVMM_VCPU_EXIT_IO; 1184 1185 exit->u.io.in = (info & SVM_EXIT_IO_IN) != 0; 1186 exit->u.io.port = __SHIFTOUT(info, SVM_EXIT_IO_PORT); 1187 1188 if (svm_decode_assist) { 1189 KASSERT(__SHIFTOUT(info, SVM_EXIT_IO_SEG) < 6); 1190 exit->u.io.seg = __SHIFTOUT(info, SVM_EXIT_IO_SEG); 1191 } else { 1192 exit->u.io.seg = -1; 1193 } 1194 1195 if (info & SVM_EXIT_IO_A64) { 1196 exit->u.io.address_size = 8; 1197 } else if (info & SVM_EXIT_IO_A32) { 1198 exit->u.io.address_size = 4; 1199 } else if (info & SVM_EXIT_IO_A16) { 1200 exit->u.io.address_size = 2; 1201 } 1202 1203 if (info & SVM_EXIT_IO_SZ32) { 1204 exit->u.io.operand_size = 4; 1205 } else if (info & SVM_EXIT_IO_SZ16) { 1206 exit->u.io.operand_size = 2; 1207 } else if (info & SVM_EXIT_IO_SZ8) { 1208 exit->u.io.operand_size = 1; 1209 } 1210 1211 exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0; 1212 exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0; 1213 exit->u.io.npc = nextpc; 1214 1215 svm_vcpu_state_provide(vcpu, 1216 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1217 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1218 } 1219 1220 static const uint64_t msr_ignore_list[] = { 1221 0xc0010055, /* MSR_CMPHALT */ 1222 MSR_DE_CFG, 1223 MSR_IC_CFG, 1224 MSR_UCODE_AMD_PATCHLEVEL 1225 }; 1226 1227 static bool 1228 svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1229 struct nvmm_vcpu_exit *exit) 1230 { 1231 struct svm_cpudata *cpudata = vcpu->cpudata; 1232 struct vmcb *vmcb = cpudata->vmcb; 1233 uint64_t val; 1234 size_t i; 1235 1236 if (exit->reason == NVMM_VCPU_EXIT_RDMSR) { 1237 if (exit->u.rdmsr.msr == MSR_EFER) { 1238 val = vmcb->state.efer & ~EFER_SVME; 1239 vmcb->state.rax = (val & 0xFFFFFFFF); 1240 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1241 goto handled; 1242 } 1243 if (exit->u.rdmsr.msr == MSR_NB_CFG) { 1244 val = NB_CFG_INITAPICCPUIDLO; 1245 vmcb->state.rax = (val & 0xFFFFFFFF); 1246 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1247 goto handled; 1248 } 1249 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1250 if (msr_ignore_list[i] != exit->u.rdmsr.msr) 1251 continue; 1252 val = 0; 1253 vmcb->state.rax = (val & 0xFFFFFFFF); 1254 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1255 goto handled; 1256 } 1257 } else { 1258 if (exit->u.wrmsr.msr == MSR_EFER) { 1259 if (__predict_false(exit->u.wrmsr.val & ~EFER_VALID)) { 1260 goto error; 1261 } 1262 if ((vmcb->state.efer ^ exit->u.wrmsr.val) & 1263 EFER_TLB_FLUSH) { 1264 cpudata->gtlb_want_flush = true; 1265 } 1266 vmcb->state.efer = exit->u.wrmsr.val | EFER_SVME; 1267 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_CR); 1268 goto handled; 1269 } 1270 if (exit->u.wrmsr.msr == MSR_TSC) { 1271 cpudata->gtsc = exit->u.wrmsr.val; 1272 cpudata->gtsc_want_update = true; 1273 goto handled; 1274 } 1275 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1276 if (msr_ignore_list[i] != exit->u.wrmsr.msr) 1277 continue; 1278 goto handled; 1279 } 1280 } 1281 1282 return false; 1283 1284 handled: 1285 svm_inkernel_advance(cpudata->vmcb); 1286 return true; 1287 1288 error: 1289 svm_inject_gp(vcpu); 1290 return true; 1291 } 1292 1293 static inline void 1294 svm_exit_rdmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1295 struct nvmm_vcpu_exit *exit) 1296 { 1297 struct svm_cpudata *cpudata = vcpu->cpudata; 1298 1299 exit->reason = NVMM_VCPU_EXIT_RDMSR; 1300 exit->u.rdmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1301 exit->u.rdmsr.npc = cpudata->vmcb->ctrl.nrip; 1302 1303 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1304 exit->reason = NVMM_VCPU_EXIT_NONE; 1305 return; 1306 } 1307 1308 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1309 } 1310 1311 static inline void 1312 svm_exit_wrmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1313 struct nvmm_vcpu_exit *exit) 1314 { 1315 struct svm_cpudata *cpudata = vcpu->cpudata; 1316 uint64_t rdx, rax; 1317 1318 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1319 rax = cpudata->vmcb->state.rax; 1320 1321 exit->reason = NVMM_VCPU_EXIT_WRMSR; 1322 exit->u.wrmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1323 exit->u.wrmsr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1324 exit->u.wrmsr.npc = cpudata->vmcb->ctrl.nrip; 1325 1326 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1327 exit->reason = NVMM_VCPU_EXIT_NONE; 1328 return; 1329 } 1330 1331 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1332 } 1333 1334 static void 1335 svm_exit_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1336 struct nvmm_vcpu_exit *exit) 1337 { 1338 struct svm_cpudata *cpudata = vcpu->cpudata; 1339 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1340 1341 if (info == 0) { 1342 svm_exit_rdmsr(mach, vcpu, exit); 1343 } else { 1344 svm_exit_wrmsr(mach, vcpu, exit); 1345 } 1346 } 1347 1348 static void 1349 svm_exit_npf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1350 struct nvmm_vcpu_exit *exit) 1351 { 1352 struct svm_cpudata *cpudata = vcpu->cpudata; 1353 gpaddr_t gpa = cpudata->vmcb->ctrl.exitinfo2; 1354 1355 exit->reason = NVMM_VCPU_EXIT_MEMORY; 1356 if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_W) 1357 exit->u.mem.prot = PROT_WRITE; 1358 else if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_I) 1359 exit->u.mem.prot = PROT_EXEC; 1360 else 1361 exit->u.mem.prot = PROT_READ; 1362 exit->u.mem.gpa = gpa; 1363 exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len; 1364 memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes, 1365 sizeof(exit->u.mem.inst_bytes)); 1366 1367 svm_vcpu_state_provide(vcpu, 1368 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1369 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1370 } 1371 1372 static void 1373 svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1374 struct nvmm_vcpu_exit *exit) 1375 { 1376 struct svm_cpudata *cpudata = vcpu->cpudata; 1377 struct vmcb *vmcb = cpudata->vmcb; 1378 uint64_t val; 1379 1380 exit->reason = NVMM_VCPU_EXIT_NONE; 1381 1382 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1383 (vmcb->state.rax & 0xFFFFFFFF); 1384 1385 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1386 goto error; 1387 } else if (__predict_false(vmcb->state.cpl != 0)) { 1388 goto error; 1389 } else if (__predict_false((val & ~svm_xcr0_mask) != 0)) { 1390 goto error; 1391 } else if (__predict_false((val & XCR0_X87) == 0)) { 1392 goto error; 1393 } 1394 1395 cpudata->gxcr0 = val; 1396 1397 svm_inkernel_advance(cpudata->vmcb); 1398 return; 1399 1400 error: 1401 svm_inject_gp(vcpu); 1402 } 1403 1404 static void 1405 svm_exit_invalid(struct nvmm_vcpu_exit *exit, uint64_t code) 1406 { 1407 exit->u.inv.hwcode = code; 1408 exit->reason = NVMM_VCPU_EXIT_INVALID; 1409 } 1410 1411 /* -------------------------------------------------------------------------- */ 1412 1413 static void 1414 svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1415 { 1416 struct svm_cpudata *cpudata = vcpu->cpudata; 1417 1418 fpu_kern_enter(); 1419 /* TODO: should we use *XSAVE64 here? */ 1420 fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask, false); 1421 1422 if (svm_xcr0_mask != 0) { 1423 cpudata->hxcr0 = rdxcr(0); 1424 wrxcr(0, cpudata->gxcr0); 1425 } 1426 } 1427 1428 static void 1429 svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 1430 { 1431 struct svm_cpudata *cpudata = vcpu->cpudata; 1432 1433 if (svm_xcr0_mask != 0) { 1434 cpudata->gxcr0 = rdxcr(0); 1435 wrxcr(0, cpudata->hxcr0); 1436 } 1437 1438 /* TODO: should we use *XSAVE64 here? */ 1439 fpu_area_save(&cpudata->gfpu, svm_xcr0_mask, false); 1440 fpu_kern_leave(); 1441 } 1442 1443 static void 1444 svm_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 1445 { 1446 struct svm_cpudata *cpudata = vcpu->cpudata; 1447 1448 x86_dbregs_save(curlwp); 1449 1450 ldr7(0); 1451 1452 ldr0(cpudata->drs[NVMM_X64_DR_DR0]); 1453 ldr1(cpudata->drs[NVMM_X64_DR_DR1]); 1454 ldr2(cpudata->drs[NVMM_X64_DR_DR2]); 1455 ldr3(cpudata->drs[NVMM_X64_DR_DR3]); 1456 } 1457 1458 static void 1459 svm_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 1460 { 1461 struct svm_cpudata *cpudata = vcpu->cpudata; 1462 1463 cpudata->drs[NVMM_X64_DR_DR0] = rdr0(); 1464 cpudata->drs[NVMM_X64_DR_DR1] = rdr1(); 1465 cpudata->drs[NVMM_X64_DR_DR2] = rdr2(); 1466 cpudata->drs[NVMM_X64_DR_DR3] = rdr3(); 1467 1468 x86_dbregs_restore(curlwp); 1469 } 1470 1471 static void 1472 svm_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 1473 { 1474 struct svm_cpudata *cpudata = vcpu->cpudata; 1475 1476 cpudata->fsbase = rdmsr(MSR_FSBASE); 1477 cpudata->kernelgsbase = rdmsr(MSR_KERNELGSBASE); 1478 } 1479 1480 static void 1481 svm_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 1482 { 1483 struct svm_cpudata *cpudata = vcpu->cpudata; 1484 1485 wrmsr(MSR_STAR, cpudata->star); 1486 wrmsr(MSR_LSTAR, cpudata->lstar); 1487 wrmsr(MSR_CSTAR, cpudata->cstar); 1488 wrmsr(MSR_SFMASK, cpudata->sfmask); 1489 wrmsr(MSR_FSBASE, cpudata->fsbase); 1490 wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase); 1491 } 1492 1493 /* -------------------------------------------------------------------------- */ 1494 1495 static inline void 1496 svm_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1497 { 1498 struct svm_cpudata *cpudata = vcpu->cpudata; 1499 1500 if (vcpu->hcpu_last != hcpu || cpudata->shared_asid) { 1501 cpudata->gtlb_want_flush = true; 1502 } 1503 } 1504 1505 static inline void 1506 svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1507 { 1508 /* 1509 * Nothing to do. If an hTLB flush was needed, either the VCPU was 1510 * executing on this hCPU and the hTLB already got flushed, or it 1511 * was executing on another hCPU in which case the catchup is done 1512 * in svm_gtlb_catchup(). 1513 */ 1514 } 1515 1516 static inline uint64_t 1517 svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata) 1518 { 1519 struct vmcb *vmcb = cpudata->vmcb; 1520 uint64_t machgen; 1521 1522 machgen = machdata->mach_htlb_gen; 1523 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 1524 return machgen; 1525 } 1526 1527 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1528 return machgen; 1529 } 1530 1531 static inline void 1532 svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen) 1533 { 1534 struct vmcb *vmcb = cpudata->vmcb; 1535 1536 if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) { 1537 cpudata->vcpu_htlb_gen = machgen; 1538 } 1539 } 1540 1541 static inline void 1542 svm_exit_evt(struct svm_cpudata *cpudata, struct vmcb *vmcb) 1543 { 1544 cpudata->evt_pending = false; 1545 1546 if (__predict_false(vmcb->ctrl.exitintinfo & VMCB_CTRL_EXITINTINFO_V)) { 1547 vmcb->ctrl.eventinj = vmcb->ctrl.exitintinfo; 1548 cpudata->evt_pending = true; 1549 } 1550 } 1551 1552 static int 1553 svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1554 struct nvmm_vcpu_exit *exit) 1555 { 1556 struct nvmm_comm_page *comm = vcpu->comm; 1557 struct svm_machdata *machdata = mach->machdata; 1558 struct svm_cpudata *cpudata = vcpu->cpudata; 1559 struct vmcb *vmcb = cpudata->vmcb; 1560 uint64_t machgen; 1561 int hcpu; 1562 1563 svm_vcpu_state_commit(vcpu); 1564 comm->state_cached = 0; 1565 1566 if (__predict_false(svm_vcpu_event_commit(vcpu) != 0)) { 1567 return EINVAL; 1568 } 1569 1570 kpreempt_disable(); 1571 hcpu = cpu_number(); 1572 1573 svm_gtlb_catchup(vcpu, hcpu); 1574 svm_htlb_catchup(vcpu, hcpu); 1575 1576 if (vcpu->hcpu_last != hcpu) { 1577 svm_vmcb_cache_flush_all(vmcb); 1578 cpudata->gtsc_want_update = true; 1579 } 1580 1581 svm_vcpu_guest_dbregs_enter(vcpu); 1582 svm_vcpu_guest_misc_enter(vcpu); 1583 1584 while (1) { 1585 if (cpudata->gtlb_want_flush) { 1586 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1587 } else { 1588 vmcb->ctrl.tlb_ctrl = 0; 1589 } 1590 1591 if (__predict_false(cpudata->gtsc_want_update)) { 1592 vmcb->ctrl.tsc_offset = cpudata->gtsc - rdtsc(); 1593 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 1594 } 1595 1596 svm_vcpu_guest_fpu_enter(vcpu); 1597 svm_clgi(); 1598 machgen = svm_htlb_flush(machdata, cpudata); 1599 svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); 1600 svm_htlb_flush_ack(cpudata, machgen); 1601 svm_stgi(); 1602 svm_vcpu_guest_fpu_leave(vcpu); 1603 1604 svm_vmcb_cache_default(vmcb); 1605 1606 if (vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID) { 1607 cpudata->gtlb_want_flush = false; 1608 cpudata->gtsc_want_update = false; 1609 vcpu->hcpu_last = hcpu; 1610 } 1611 svm_exit_evt(cpudata, vmcb); 1612 1613 switch (vmcb->ctrl.exitcode) { 1614 case VMCB_EXITCODE_INTR: 1615 case VMCB_EXITCODE_NMI: 1616 exit->reason = NVMM_VCPU_EXIT_NONE; 1617 break; 1618 case VMCB_EXITCODE_VINTR: 1619 svm_event_waitexit_disable(vcpu, false); 1620 exit->reason = NVMM_VCPU_EXIT_INT_READY; 1621 break; 1622 case VMCB_EXITCODE_IRET: 1623 svm_event_waitexit_disable(vcpu, true); 1624 exit->reason = NVMM_VCPU_EXIT_NMI_READY; 1625 break; 1626 case VMCB_EXITCODE_CPUID: 1627 svm_exit_cpuid(mach, vcpu, exit); 1628 break; 1629 case VMCB_EXITCODE_HLT: 1630 svm_exit_hlt(mach, vcpu, exit); 1631 break; 1632 case VMCB_EXITCODE_IOIO: 1633 svm_exit_io(mach, vcpu, exit); 1634 break; 1635 case VMCB_EXITCODE_MSR: 1636 svm_exit_msr(mach, vcpu, exit); 1637 break; 1638 case VMCB_EXITCODE_SHUTDOWN: 1639 exit->reason = NVMM_VCPU_EXIT_SHUTDOWN; 1640 break; 1641 case VMCB_EXITCODE_RDPMC: 1642 case VMCB_EXITCODE_RSM: 1643 case VMCB_EXITCODE_INVLPGA: 1644 case VMCB_EXITCODE_VMRUN: 1645 case VMCB_EXITCODE_VMMCALL: 1646 case VMCB_EXITCODE_VMLOAD: 1647 case VMCB_EXITCODE_VMSAVE: 1648 case VMCB_EXITCODE_STGI: 1649 case VMCB_EXITCODE_CLGI: 1650 case VMCB_EXITCODE_SKINIT: 1651 case VMCB_EXITCODE_RDTSCP: 1652 case VMCB_EXITCODE_RDPRU: 1653 case VMCB_EXITCODE_INVLPGB: 1654 case VMCB_EXITCODE_INVPCID: 1655 case VMCB_EXITCODE_MCOMMIT: 1656 case VMCB_EXITCODE_TLBSYNC: 1657 svm_inject_ud(vcpu); 1658 exit->reason = NVMM_VCPU_EXIT_NONE; 1659 break; 1660 case VMCB_EXITCODE_MONITOR: 1661 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MONITOR); 1662 break; 1663 case VMCB_EXITCODE_MWAIT: 1664 case VMCB_EXITCODE_MWAIT_CONDITIONAL: 1665 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MWAIT); 1666 break; 1667 case VMCB_EXITCODE_XSETBV: 1668 svm_exit_xsetbv(mach, vcpu, exit); 1669 break; 1670 case VMCB_EXITCODE_NPF: 1671 svm_exit_npf(mach, vcpu, exit); 1672 break; 1673 case VMCB_EXITCODE_FERR_FREEZE: /* ? */ 1674 default: 1675 svm_exit_invalid(exit, vmcb->ctrl.exitcode); 1676 break; 1677 } 1678 1679 /* If no reason to return to userland, keep rolling. */ 1680 if (nvmm_return_needed(vcpu, exit)) { 1681 break; 1682 } 1683 if (exit->reason != NVMM_VCPU_EXIT_NONE) { 1684 break; 1685 } 1686 } 1687 1688 cpudata->gtsc = rdtsc() + vmcb->ctrl.tsc_offset; 1689 1690 svm_vcpu_guest_misc_leave(vcpu); 1691 svm_vcpu_guest_dbregs_leave(vcpu); 1692 1693 kpreempt_enable(); 1694 1695 exit->exitstate.rflags = vmcb->state.rflags; 1696 exit->exitstate.cr8 = __SHIFTOUT(vmcb->ctrl.v, VMCB_CTRL_V_TPR); 1697 exit->exitstate.int_shadow = 1698 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0); 1699 exit->exitstate.int_window_exiting = cpudata->int_window_exit; 1700 exit->exitstate.nmi_window_exiting = cpudata->nmi_window_exit; 1701 exit->exitstate.evt_pending = cpudata->evt_pending; 1702 1703 return 0; 1704 } 1705 1706 /* -------------------------------------------------------------------------- */ 1707 1708 static int 1709 svm_memalloc(paddr_t *pa, vaddr_t *va, size_t npages) 1710 { 1711 struct pglist pglist; 1712 paddr_t _pa; 1713 vaddr_t _va; 1714 size_t i; 1715 int ret; 1716 1717 ret = uvm_pglistalloc(npages * PAGE_SIZE, 0, ~0UL, PAGE_SIZE, 0, 1718 &pglist, 1, 0); 1719 if (ret != 0) 1720 return ENOMEM; 1721 _pa = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist)); 1722 _va = uvm_km_alloc(kernel_map, npages * PAGE_SIZE, 0, 1723 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1724 if (_va == 0) 1725 goto error; 1726 1727 for (i = 0; i < npages; i++) { 1728 pmap_kenter_pa(_va + i * PAGE_SIZE, _pa + i * PAGE_SIZE, 1729 VM_PROT_READ | VM_PROT_WRITE, PMAP_WRITE_BACK); 1730 } 1731 pmap_update(pmap_kernel()); 1732 1733 memset((void *)_va, 0, npages * PAGE_SIZE); 1734 1735 *pa = _pa; 1736 *va = _va; 1737 return 0; 1738 1739 error: 1740 for (i = 0; i < npages; i++) { 1741 uvm_pagefree(PHYS_TO_VM_PAGE(_pa + i * PAGE_SIZE)); 1742 } 1743 return ENOMEM; 1744 } 1745 1746 static void 1747 svm_memfree(paddr_t pa, vaddr_t va, size_t npages) 1748 { 1749 size_t i; 1750 1751 pmap_kremove(va, npages * PAGE_SIZE); 1752 pmap_update(pmap_kernel()); 1753 uvm_km_free(kernel_map, va, npages * PAGE_SIZE, UVM_KMF_VAONLY); 1754 for (i = 0; i < npages; i++) { 1755 uvm_pagefree(PHYS_TO_VM_PAGE(pa + i * PAGE_SIZE)); 1756 } 1757 } 1758 1759 /* -------------------------------------------------------------------------- */ 1760 1761 #define SVM_MSRBM_READ __BIT(0) 1762 #define SVM_MSRBM_WRITE __BIT(1) 1763 1764 static void 1765 svm_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 1766 { 1767 uint64_t byte; 1768 uint8_t bitoff; 1769 1770 if (msr < 0x00002000) { 1771 /* Range 1 */ 1772 byte = ((msr - 0x00000000) >> 2UL) + 0x0000; 1773 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 1774 /* Range 2 */ 1775 byte = ((msr - 0xC0000000) >> 2UL) + 0x0800; 1776 } else if (msr >= 0xC0010000 && msr < 0xC0012000) { 1777 /* Range 3 */ 1778 byte = ((msr - 0xC0010000) >> 2UL) + 0x1000; 1779 } else { 1780 panic("%s: wrong range", __func__); 1781 } 1782 1783 bitoff = (msr & 0x3) << 1; 1784 1785 if (read) { 1786 bitmap[byte] &= ~(SVM_MSRBM_READ << bitoff); 1787 } 1788 if (write) { 1789 bitmap[byte] &= ~(SVM_MSRBM_WRITE << bitoff); 1790 } 1791 } 1792 1793 #define SVM_SEG_ATTRIB_TYPE __BITS(3,0) 1794 #define SVM_SEG_ATTRIB_S __BIT(4) 1795 #define SVM_SEG_ATTRIB_DPL __BITS(6,5) 1796 #define SVM_SEG_ATTRIB_P __BIT(7) 1797 #define SVM_SEG_ATTRIB_AVL __BIT(8) 1798 #define SVM_SEG_ATTRIB_L __BIT(9) 1799 #define SVM_SEG_ATTRIB_DEF __BIT(10) 1800 #define SVM_SEG_ATTRIB_G __BIT(11) 1801 1802 static void 1803 svm_vcpu_setstate_seg(const struct nvmm_x64_state_seg *seg, 1804 struct vmcb_segment *vseg) 1805 { 1806 vseg->selector = seg->selector; 1807 vseg->attrib = 1808 __SHIFTIN(seg->attrib.type, SVM_SEG_ATTRIB_TYPE) | 1809 __SHIFTIN(seg->attrib.s, SVM_SEG_ATTRIB_S) | 1810 __SHIFTIN(seg->attrib.dpl, SVM_SEG_ATTRIB_DPL) | 1811 __SHIFTIN(seg->attrib.p, SVM_SEG_ATTRIB_P) | 1812 __SHIFTIN(seg->attrib.avl, SVM_SEG_ATTRIB_AVL) | 1813 __SHIFTIN(seg->attrib.l, SVM_SEG_ATTRIB_L) | 1814 __SHIFTIN(seg->attrib.def, SVM_SEG_ATTRIB_DEF) | 1815 __SHIFTIN(seg->attrib.g, SVM_SEG_ATTRIB_G); 1816 vseg->limit = seg->limit; 1817 vseg->base = seg->base; 1818 } 1819 1820 static void 1821 svm_vcpu_getstate_seg(struct nvmm_x64_state_seg *seg, struct vmcb_segment *vseg) 1822 { 1823 seg->selector = vseg->selector; 1824 seg->attrib.type = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_TYPE); 1825 seg->attrib.s = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_S); 1826 seg->attrib.dpl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DPL); 1827 seg->attrib.p = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_P); 1828 seg->attrib.avl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_AVL); 1829 seg->attrib.l = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_L); 1830 seg->attrib.def = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DEF); 1831 seg->attrib.g = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_G); 1832 seg->limit = vseg->limit; 1833 seg->base = vseg->base; 1834 } 1835 1836 static inline bool 1837 svm_state_tlb_flush(const struct vmcb *vmcb, const struct nvmm_x64_state *state, 1838 uint64_t flags) 1839 { 1840 if (flags & NVMM_X64_STATE_CRS) { 1841 if ((vmcb->state.cr0 ^ 1842 state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 1843 return true; 1844 } 1845 if (vmcb->state.cr3 != state->crs[NVMM_X64_CR_CR3]) { 1846 return true; 1847 } 1848 if ((vmcb->state.cr4 ^ 1849 state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 1850 return true; 1851 } 1852 } 1853 1854 if (flags & NVMM_X64_STATE_MSRS) { 1855 if ((vmcb->state.efer ^ 1856 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 1857 return true; 1858 } 1859 } 1860 1861 return false; 1862 } 1863 1864 static void 1865 svm_vcpu_setstate(struct nvmm_cpu *vcpu) 1866 { 1867 struct nvmm_comm_page *comm = vcpu->comm; 1868 const struct nvmm_x64_state *state = &comm->state; 1869 struct svm_cpudata *cpudata = vcpu->cpudata; 1870 struct vmcb *vmcb = cpudata->vmcb; 1871 struct fxsave *fpustate; 1872 uint64_t flags; 1873 1874 flags = comm->state_wanted; 1875 1876 if (svm_state_tlb_flush(vmcb, state, flags)) { 1877 cpudata->gtlb_want_flush = true; 1878 } 1879 1880 if (flags & NVMM_X64_STATE_SEGS) { 1881 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_CS], 1882 &vmcb->state.cs); 1883 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_DS], 1884 &vmcb->state.ds); 1885 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_ES], 1886 &vmcb->state.es); 1887 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_FS], 1888 &vmcb->state.fs); 1889 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GS], 1890 &vmcb->state.gs); 1891 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_SS], 1892 &vmcb->state.ss); 1893 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1894 &vmcb->state.gdt); 1895 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1896 &vmcb->state.idt); 1897 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1898 &vmcb->state.ldt); 1899 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_TR], 1900 &vmcb->state.tr); 1901 1902 vmcb->state.cpl = state->segs[NVMM_X64_SEG_SS].attrib.dpl; 1903 } 1904 1905 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1906 if (flags & NVMM_X64_STATE_GPRS) { 1907 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 1908 1909 vmcb->state.rip = state->gprs[NVMM_X64_GPR_RIP]; 1910 vmcb->state.rsp = state->gprs[NVMM_X64_GPR_RSP]; 1911 vmcb->state.rax = state->gprs[NVMM_X64_GPR_RAX]; 1912 vmcb->state.rflags = state->gprs[NVMM_X64_GPR_RFLAGS]; 1913 } 1914 1915 if (flags & NVMM_X64_STATE_CRS) { 1916 vmcb->state.cr0 = state->crs[NVMM_X64_CR_CR0]; 1917 vmcb->state.cr2 = state->crs[NVMM_X64_CR_CR2]; 1918 vmcb->state.cr3 = state->crs[NVMM_X64_CR_CR3]; 1919 vmcb->state.cr4 = state->crs[NVMM_X64_CR_CR4]; 1920 vmcb->state.cr4 &= CR4_VALID; 1921 1922 vmcb->ctrl.v &= ~VMCB_CTRL_V_TPR; 1923 vmcb->ctrl.v |= __SHIFTIN(state->crs[NVMM_X64_CR_CR8], 1924 VMCB_CTRL_V_TPR); 1925 1926 if (svm_xcr0_mask != 0) { 1927 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 1928 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 1929 cpudata->gxcr0 &= svm_xcr0_mask; 1930 cpudata->gxcr0 |= XCR0_X87; 1931 } 1932 } 1933 1934 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1935 if (flags & NVMM_X64_STATE_DRS) { 1936 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 1937 1938 vmcb->state.dr6 = state->drs[NVMM_X64_DR_DR6]; 1939 vmcb->state.dr7 = state->drs[NVMM_X64_DR_DR7]; 1940 } 1941 1942 if (flags & NVMM_X64_STATE_MSRS) { 1943 /* 1944 * EFER_SVME is mandatory. 1945 */ 1946 vmcb->state.efer = state->msrs[NVMM_X64_MSR_EFER] | EFER_SVME; 1947 vmcb->state.star = state->msrs[NVMM_X64_MSR_STAR]; 1948 vmcb->state.lstar = state->msrs[NVMM_X64_MSR_LSTAR]; 1949 vmcb->state.cstar = state->msrs[NVMM_X64_MSR_CSTAR]; 1950 vmcb->state.sfmask = state->msrs[NVMM_X64_MSR_SFMASK]; 1951 vmcb->state.kernelgsbase = 1952 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 1953 vmcb->state.sysenter_cs = 1954 state->msrs[NVMM_X64_MSR_SYSENTER_CS]; 1955 vmcb->state.sysenter_esp = 1956 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; 1957 vmcb->state.sysenter_eip = 1958 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; 1959 vmcb->state.g_pat = state->msrs[NVMM_X64_MSR_PAT]; 1960 1961 cpudata->gtsc = state->msrs[NVMM_X64_MSR_TSC]; 1962 cpudata->gtsc_want_update = true; 1963 } 1964 1965 if (flags & NVMM_X64_STATE_INTR) { 1966 if (state->intr.int_shadow) { 1967 vmcb->ctrl.intr |= VMCB_CTRL_INTR_SHADOW; 1968 } else { 1969 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 1970 } 1971 1972 if (state->intr.int_window_exiting) { 1973 svm_event_waitexit_enable(vcpu, false); 1974 } else { 1975 svm_event_waitexit_disable(vcpu, false); 1976 } 1977 1978 if (state->intr.nmi_window_exiting) { 1979 svm_event_waitexit_enable(vcpu, true); 1980 } else { 1981 svm_event_waitexit_disable(vcpu, true); 1982 } 1983 } 1984 1985 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1986 if (flags & NVMM_X64_STATE_FPU) { 1987 memcpy(cpudata->gfpu.xsh_fxsave, &state->fpu, 1988 sizeof(state->fpu)); 1989 1990 fpustate = (struct fxsave *)cpudata->gfpu.xsh_fxsave; 1991 fpustate->fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 1992 fpustate->fx_mxcsr &= fpustate->fx_mxcsr_mask; 1993 1994 if (svm_xcr0_mask != 0) { 1995 /* Reset XSTATE_BV, to force a reload. */ 1996 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 1997 } 1998 } 1999 2000 svm_vmcb_cache_update(vmcb, flags); 2001 2002 comm->state_wanted = 0; 2003 comm->state_cached |= flags; 2004 } 2005 2006 static void 2007 svm_vcpu_getstate(struct nvmm_cpu *vcpu) 2008 { 2009 struct nvmm_comm_page *comm = vcpu->comm; 2010 struct nvmm_x64_state *state = &comm->state; 2011 struct svm_cpudata *cpudata = vcpu->cpudata; 2012 struct vmcb *vmcb = cpudata->vmcb; 2013 uint64_t flags; 2014 2015 flags = comm->state_wanted; 2016 2017 if (flags & NVMM_X64_STATE_SEGS) { 2018 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS], 2019 &vmcb->state.cs); 2020 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_DS], 2021 &vmcb->state.ds); 2022 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_ES], 2023 &vmcb->state.es); 2024 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_FS], 2025 &vmcb->state.fs); 2026 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GS], 2027 &vmcb->state.gs); 2028 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_SS], 2029 &vmcb->state.ss); 2030 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GDT], 2031 &vmcb->state.gdt); 2032 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_IDT], 2033 &vmcb->state.idt); 2034 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_LDT], 2035 &vmcb->state.ldt); 2036 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_TR], 2037 &vmcb->state.tr); 2038 2039 state->segs[NVMM_X64_SEG_SS].attrib.dpl = vmcb->state.cpl; 2040 } 2041 2042 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 2043 if (flags & NVMM_X64_STATE_GPRS) { 2044 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 2045 2046 state->gprs[NVMM_X64_GPR_RIP] = vmcb->state.rip; 2047 state->gprs[NVMM_X64_GPR_RSP] = vmcb->state.rsp; 2048 state->gprs[NVMM_X64_GPR_RAX] = vmcb->state.rax; 2049 state->gprs[NVMM_X64_GPR_RFLAGS] = vmcb->state.rflags; 2050 } 2051 2052 if (flags & NVMM_X64_STATE_CRS) { 2053 state->crs[NVMM_X64_CR_CR0] = vmcb->state.cr0; 2054 state->crs[NVMM_X64_CR_CR2] = vmcb->state.cr2; 2055 state->crs[NVMM_X64_CR_CR3] = vmcb->state.cr3; 2056 state->crs[NVMM_X64_CR_CR4] = vmcb->state.cr4; 2057 state->crs[NVMM_X64_CR_CR8] = __SHIFTOUT(vmcb->ctrl.v, 2058 VMCB_CTRL_V_TPR); 2059 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 2060 } 2061 2062 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 2063 if (flags & NVMM_X64_STATE_DRS) { 2064 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 2065 2066 state->drs[NVMM_X64_DR_DR6] = vmcb->state.dr6; 2067 state->drs[NVMM_X64_DR_DR7] = vmcb->state.dr7; 2068 } 2069 2070 if (flags & NVMM_X64_STATE_MSRS) { 2071 state->msrs[NVMM_X64_MSR_EFER] = vmcb->state.efer; 2072 state->msrs[NVMM_X64_MSR_STAR] = vmcb->state.star; 2073 state->msrs[NVMM_X64_MSR_LSTAR] = vmcb->state.lstar; 2074 state->msrs[NVMM_X64_MSR_CSTAR] = vmcb->state.cstar; 2075 state->msrs[NVMM_X64_MSR_SFMASK] = vmcb->state.sfmask; 2076 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 2077 vmcb->state.kernelgsbase; 2078 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 2079 vmcb->state.sysenter_cs; 2080 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 2081 vmcb->state.sysenter_esp; 2082 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 2083 vmcb->state.sysenter_eip; 2084 state->msrs[NVMM_X64_MSR_PAT] = vmcb->state.g_pat; 2085 state->msrs[NVMM_X64_MSR_TSC] = cpudata->gtsc; 2086 2087 /* Hide SVME. */ 2088 state->msrs[NVMM_X64_MSR_EFER] &= ~EFER_SVME; 2089 } 2090 2091 if (flags & NVMM_X64_STATE_INTR) { 2092 state->intr.int_shadow = 2093 (vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0; 2094 state->intr.int_window_exiting = cpudata->int_window_exit; 2095 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 2096 state->intr.evt_pending = cpudata->evt_pending; 2097 } 2098 2099 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 2100 if (flags & NVMM_X64_STATE_FPU) { 2101 memcpy(&state->fpu, cpudata->gfpu.xsh_fxsave, 2102 sizeof(state->fpu)); 2103 } 2104 2105 comm->state_wanted = 0; 2106 comm->state_cached |= flags; 2107 } 2108 2109 static void 2110 svm_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags) 2111 { 2112 vcpu->comm->state_wanted = flags; 2113 svm_vcpu_getstate(vcpu); 2114 } 2115 2116 static void 2117 svm_vcpu_state_commit(struct nvmm_cpu *vcpu) 2118 { 2119 vcpu->comm->state_wanted = vcpu->comm->state_commit; 2120 vcpu->comm->state_commit = 0; 2121 svm_vcpu_setstate(vcpu); 2122 } 2123 2124 /* -------------------------------------------------------------------------- */ 2125 2126 static void 2127 svm_asid_alloc(struct nvmm_cpu *vcpu) 2128 { 2129 struct svm_cpudata *cpudata = vcpu->cpudata; 2130 struct vmcb *vmcb = cpudata->vmcb; 2131 size_t i, oct, bit; 2132 2133 mutex_enter(&svm_asidlock); 2134 2135 for (i = 0; i < svm_maxasid; i++) { 2136 oct = i / 8; 2137 bit = i % 8; 2138 2139 if (svm_asidmap[oct] & __BIT(bit)) { 2140 continue; 2141 } 2142 2143 svm_asidmap[oct] |= __BIT(bit); 2144 vmcb->ctrl.guest_asid = i; 2145 mutex_exit(&svm_asidlock); 2146 return; 2147 } 2148 2149 /* 2150 * No free ASID. Use the last one, which is shared and requires 2151 * special TLB handling. 2152 */ 2153 cpudata->shared_asid = true; 2154 vmcb->ctrl.guest_asid = svm_maxasid - 1; 2155 mutex_exit(&svm_asidlock); 2156 } 2157 2158 static void 2159 svm_asid_free(struct nvmm_cpu *vcpu) 2160 { 2161 struct svm_cpudata *cpudata = vcpu->cpudata; 2162 struct vmcb *vmcb = cpudata->vmcb; 2163 size_t oct, bit; 2164 2165 if (cpudata->shared_asid) { 2166 return; 2167 } 2168 2169 oct = vmcb->ctrl.guest_asid / 8; 2170 bit = vmcb->ctrl.guest_asid % 8; 2171 2172 mutex_enter(&svm_asidlock); 2173 svm_asidmap[oct] &= ~__BIT(bit); 2174 mutex_exit(&svm_asidlock); 2175 } 2176 2177 static void 2178 svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2179 { 2180 struct svm_cpudata *cpudata = vcpu->cpudata; 2181 struct vmcb *vmcb = cpudata->vmcb; 2182 2183 /* Allow reads/writes of Control Registers. */ 2184 vmcb->ctrl.intercept_cr = 0; 2185 2186 /* Allow reads/writes of Debug Registers. */ 2187 vmcb->ctrl.intercept_dr = 0; 2188 2189 /* Allow exceptions 0 to 31. */ 2190 vmcb->ctrl.intercept_vec = 0; 2191 2192 /* 2193 * Allow: 2194 * - SMI [smm interrupts] 2195 * - VINTR [virtual interrupts] 2196 * - CR0_SPEC [CR0 writes changing other fields than CR0.TS or CR0.MP] 2197 * - RIDTR [reads of IDTR] 2198 * - RGDTR [reads of GDTR] 2199 * - RLDTR [reads of LDTR] 2200 * - RTR [reads of TR] 2201 * - WIDTR [writes of IDTR] 2202 * - WGDTR [writes of GDTR] 2203 * - WLDTR [writes of LDTR] 2204 * - WTR [writes of TR] 2205 * - RDTSC [rdtsc instruction] 2206 * - PUSHF [pushf instruction] 2207 * - POPF [popf instruction] 2208 * - IRET [iret instruction] 2209 * - INTN [int $n instructions] 2210 * - PAUSE [pause instruction] 2211 * - INVLPG [invplg instruction] 2212 * - TASKSW [task switches] 2213 * 2214 * Intercept the rest below. 2215 */ 2216 vmcb->ctrl.intercept_misc1 = 2217 VMCB_CTRL_INTERCEPT_INTR | 2218 VMCB_CTRL_INTERCEPT_NMI | 2219 VMCB_CTRL_INTERCEPT_INIT | 2220 VMCB_CTRL_INTERCEPT_RDPMC | 2221 VMCB_CTRL_INTERCEPT_CPUID | 2222 VMCB_CTRL_INTERCEPT_RSM | 2223 VMCB_CTRL_INTERCEPT_INVD | 2224 VMCB_CTRL_INTERCEPT_HLT | 2225 VMCB_CTRL_INTERCEPT_INVLPGA | 2226 VMCB_CTRL_INTERCEPT_IOIO_PROT | 2227 VMCB_CTRL_INTERCEPT_MSR_PROT | 2228 VMCB_CTRL_INTERCEPT_FERR_FREEZE | 2229 VMCB_CTRL_INTERCEPT_SHUTDOWN; 2230 2231 /* 2232 * Allow: 2233 * - ICEBP [icebp instruction] 2234 * - WBINVD [wbinvd instruction] 2235 * - WCR_SPEC(0..15) [writes of CR0-15, received after instruction] 2236 * 2237 * Intercept the rest below. 2238 */ 2239 vmcb->ctrl.intercept_misc2 = 2240 VMCB_CTRL_INTERCEPT_VMRUN | 2241 VMCB_CTRL_INTERCEPT_VMMCALL | 2242 VMCB_CTRL_INTERCEPT_VMLOAD | 2243 VMCB_CTRL_INTERCEPT_VMSAVE | 2244 VMCB_CTRL_INTERCEPT_STGI | 2245 VMCB_CTRL_INTERCEPT_CLGI | 2246 VMCB_CTRL_INTERCEPT_SKINIT | 2247 VMCB_CTRL_INTERCEPT_RDTSCP | 2248 VMCB_CTRL_INTERCEPT_MONITOR | 2249 VMCB_CTRL_INTERCEPT_MWAIT | 2250 VMCB_CTRL_INTERCEPT_XSETBV | 2251 VMCB_CTRL_INTERCEPT_RDPRU; 2252 2253 /* 2254 * Intercept everything. 2255 */ 2256 vmcb->ctrl.intercept_misc3 = 2257 VMCB_CTRL_INTERCEPT_INVLPGB_ALL | 2258 VMCB_CTRL_INTERCEPT_PCID | 2259 VMCB_CTRL_INTERCEPT_MCOMMIT | 2260 VMCB_CTRL_INTERCEPT_TLBSYNC; 2261 2262 /* Intercept all I/O accesses. */ 2263 memset(cpudata->iobm, 0xFF, IOBM_SIZE); 2264 vmcb->ctrl.iopm_base_pa = cpudata->iobm_pa; 2265 2266 /* Allow direct access to certain MSRs. */ 2267 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 2268 svm_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 2269 svm_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 2270 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 2271 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 2272 svm_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 2273 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 2274 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 2275 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 2276 svm_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 2277 svm_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 2278 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CR_PAT, true, true); 2279 svm_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 2280 vmcb->ctrl.msrpm_base_pa = cpudata->msrbm_pa; 2281 2282 /* Generate ASID. */ 2283 svm_asid_alloc(vcpu); 2284 2285 /* Virtual TPR. */ 2286 vmcb->ctrl.v = VMCB_CTRL_V_INTR_MASKING; 2287 2288 /* Enable Nested Paging. */ 2289 vmcb->ctrl.enable1 = VMCB_CTRL_ENABLE_NP; 2290 vmcb->ctrl.n_cr3 = mach->vm->vm_map.pmap->pm_pdirpa[0]; 2291 2292 /* Init XSAVE header. */ 2293 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 2294 cpudata->gfpu.xsh_xcomp_bv = 0; 2295 2296 /* These MSRs are static. */ 2297 cpudata->star = rdmsr(MSR_STAR); 2298 cpudata->lstar = rdmsr(MSR_LSTAR); 2299 cpudata->cstar = rdmsr(MSR_CSTAR); 2300 cpudata->sfmask = rdmsr(MSR_SFMASK); 2301 2302 /* Install the RESET state. */ 2303 memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, 2304 sizeof(nvmm_x86_reset_state)); 2305 vcpu->comm->state_wanted = NVMM_X64_STATE_ALL; 2306 vcpu->comm->state_cached = 0; 2307 svm_vcpu_setstate(vcpu); 2308 } 2309 2310 static int 2311 svm_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2312 { 2313 struct svm_cpudata *cpudata; 2314 int error; 2315 2316 /* Allocate the SVM cpudata. */ 2317 cpudata = (struct svm_cpudata *)uvm_km_alloc(kernel_map, 2318 roundup(sizeof(*cpudata), PAGE_SIZE), 0, 2319 UVM_KMF_WIRED|UVM_KMF_ZERO); 2320 vcpu->cpudata = cpudata; 2321 2322 /* VMCB */ 2323 error = svm_memalloc(&cpudata->vmcb_pa, (vaddr_t *)&cpudata->vmcb, 2324 VMCB_NPAGES); 2325 if (error) 2326 goto error; 2327 2328 /* I/O Bitmap */ 2329 error = svm_memalloc(&cpudata->iobm_pa, (vaddr_t *)&cpudata->iobm, 2330 IOBM_NPAGES); 2331 if (error) 2332 goto error; 2333 2334 /* MSR Bitmap */ 2335 error = svm_memalloc(&cpudata->msrbm_pa, (vaddr_t *)&cpudata->msrbm, 2336 MSRBM_NPAGES); 2337 if (error) 2338 goto error; 2339 2340 /* Init the VCPU info. */ 2341 svm_vcpu_init(mach, vcpu); 2342 2343 return 0; 2344 2345 error: 2346 if (cpudata->vmcb_pa) { 2347 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2348 VMCB_NPAGES); 2349 } 2350 if (cpudata->iobm_pa) { 2351 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2352 IOBM_NPAGES); 2353 } 2354 if (cpudata->msrbm_pa) { 2355 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2356 MSRBM_NPAGES); 2357 } 2358 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2359 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2360 return error; 2361 } 2362 2363 static void 2364 svm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2365 { 2366 struct svm_cpudata *cpudata = vcpu->cpudata; 2367 2368 svm_asid_free(vcpu); 2369 2370 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, VMCB_NPAGES); 2371 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, IOBM_NPAGES); 2372 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, MSRBM_NPAGES); 2373 2374 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2375 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2376 } 2377 2378 /* -------------------------------------------------------------------------- */ 2379 2380 static int 2381 svm_vcpu_configure_cpuid(struct svm_cpudata *cpudata, void *data) 2382 { 2383 struct nvmm_vcpu_conf_cpuid *cpuid = data; 2384 size_t i; 2385 2386 if (__predict_false(cpuid->mask && cpuid->exit)) { 2387 return EINVAL; 2388 } 2389 if (__predict_false(cpuid->mask && 2390 ((cpuid->u.mask.set.eax & cpuid->u.mask.del.eax) || 2391 (cpuid->u.mask.set.ebx & cpuid->u.mask.del.ebx) || 2392 (cpuid->u.mask.set.ecx & cpuid->u.mask.del.ecx) || 2393 (cpuid->u.mask.set.edx & cpuid->u.mask.del.edx)))) { 2394 return EINVAL; 2395 } 2396 2397 /* If unset, delete, to restore the default behavior. */ 2398 if (!cpuid->mask && !cpuid->exit) { 2399 for (i = 0; i < SVM_NCPUIDS; i++) { 2400 if (!cpudata->cpuidpresent[i]) { 2401 continue; 2402 } 2403 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2404 cpudata->cpuidpresent[i] = false; 2405 } 2406 } 2407 return 0; 2408 } 2409 2410 /* If already here, replace. */ 2411 for (i = 0; i < SVM_NCPUIDS; i++) { 2412 if (!cpudata->cpuidpresent[i]) { 2413 continue; 2414 } 2415 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2416 memcpy(&cpudata->cpuid[i], cpuid, 2417 sizeof(struct nvmm_vcpu_conf_cpuid)); 2418 return 0; 2419 } 2420 } 2421 2422 /* Not here, insert. */ 2423 for (i = 0; i < SVM_NCPUIDS; i++) { 2424 if (!cpudata->cpuidpresent[i]) { 2425 cpudata->cpuidpresent[i] = true; 2426 memcpy(&cpudata->cpuid[i], cpuid, 2427 sizeof(struct nvmm_vcpu_conf_cpuid)); 2428 return 0; 2429 } 2430 } 2431 2432 return ENOBUFS; 2433 } 2434 2435 static int 2436 svm_vcpu_configure(struct nvmm_cpu *vcpu, uint64_t op, void *data) 2437 { 2438 struct svm_cpudata *cpudata = vcpu->cpudata; 2439 2440 switch (op) { 2441 case NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID): 2442 return svm_vcpu_configure_cpuid(cpudata, data); 2443 default: 2444 return EINVAL; 2445 } 2446 } 2447 2448 /* -------------------------------------------------------------------------- */ 2449 2450 static void 2451 svm_tlb_flush(struct pmap *pm) 2452 { 2453 struct nvmm_machine *mach = pm->pm_data; 2454 struct svm_machdata *machdata = mach->machdata; 2455 2456 atomic_inc_64(&machdata->mach_htlb_gen); 2457 2458 /* Generates IPIs, which cause #VMEXITs. */ 2459 pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_NVMM); 2460 } 2461 2462 static void 2463 svm_machine_create(struct nvmm_machine *mach) 2464 { 2465 struct svm_machdata *machdata; 2466 2467 /* Fill in pmap info. */ 2468 mach->vm->vm_map.pmap->pm_data = (void *)mach; 2469 mach->vm->vm_map.pmap->pm_tlb_flush = svm_tlb_flush; 2470 2471 machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP); 2472 mach->machdata = machdata; 2473 2474 /* Start with an hTLB flush everywhere. */ 2475 machdata->mach_htlb_gen = 1; 2476 } 2477 2478 static void 2479 svm_machine_destroy(struct nvmm_machine *mach) 2480 { 2481 kmem_free(mach->machdata, sizeof(struct svm_machdata)); 2482 } 2483 2484 static int 2485 svm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 2486 { 2487 panic("%s: impossible", __func__); 2488 } 2489 2490 /* -------------------------------------------------------------------------- */ 2491 2492 static bool 2493 svm_ident(void) 2494 { 2495 u_int descs[4]; 2496 uint64_t msr; 2497 2498 if (cpu_vendor != CPUVENDOR_AMD) { 2499 return false; 2500 } 2501 if (!(cpu_feature[3] & CPUID_SVM)) { 2502 printf("NVMM: SVM not supported\n"); 2503 return false; 2504 } 2505 2506 if (curcpu()->ci_max_ext_cpuid < 0x8000000a) { 2507 printf("NVMM: CPUID leaf not available\n"); 2508 return false; 2509 } 2510 x86_cpuid(0x8000000a, descs); 2511 2512 /* Expect revision 1. */ 2513 if (__SHIFTOUT(descs[0], CPUID_AMD_SVM_REV) != 1) { 2514 printf("NVMM: SVM revision not supported\n"); 2515 return false; 2516 } 2517 2518 /* Want Nested Paging. */ 2519 if (!(descs[3] & CPUID_AMD_SVM_NP)) { 2520 printf("NVMM: SVM-NP not supported\n"); 2521 return false; 2522 } 2523 2524 /* Want nRIP. */ 2525 if (!(descs[3] & CPUID_AMD_SVM_NRIPS)) { 2526 printf("NVMM: SVM-NRIPS not supported\n"); 2527 return false; 2528 } 2529 2530 svm_decode_assist = (descs[3] & CPUID_AMD_SVM_DecodeAssist) != 0; 2531 2532 msr = rdmsr(MSR_VMCR); 2533 if ((msr & VMCR_SVMED) && (msr & VMCR_LOCK)) { 2534 printf("NVMM: SVM disabled in BIOS\n"); 2535 return false; 2536 } 2537 2538 return true; 2539 } 2540 2541 static void 2542 svm_init_asid(uint32_t maxasid) 2543 { 2544 size_t i, j, allocsz; 2545 2546 mutex_init(&svm_asidlock, MUTEX_DEFAULT, IPL_NONE); 2547 2548 /* Arbitrarily limit. */ 2549 maxasid = uimin(maxasid, 8192); 2550 2551 svm_maxasid = maxasid; 2552 allocsz = roundup(maxasid, 8) / 8; 2553 svm_asidmap = kmem_zalloc(allocsz, KM_SLEEP); 2554 2555 /* ASID 0 is reserved for the host. */ 2556 svm_asidmap[0] |= __BIT(0); 2557 2558 /* ASID n-1 is special, we share it. */ 2559 i = (maxasid - 1) / 8; 2560 j = (maxasid - 1) % 8; 2561 svm_asidmap[i] |= __BIT(j); 2562 } 2563 2564 static void 2565 svm_change_cpu(void *arg1, void *arg2) 2566 { 2567 bool enable = arg1 != NULL; 2568 uint64_t msr; 2569 2570 msr = rdmsr(MSR_VMCR); 2571 if (msr & VMCR_SVMED) { 2572 wrmsr(MSR_VMCR, msr & ~VMCR_SVMED); 2573 } 2574 2575 if (!enable) { 2576 wrmsr(MSR_VM_HSAVE_PA, 0); 2577 } 2578 2579 msr = rdmsr(MSR_EFER); 2580 if (enable) { 2581 msr |= EFER_SVME; 2582 } else { 2583 msr &= ~EFER_SVME; 2584 } 2585 wrmsr(MSR_EFER, msr); 2586 2587 if (enable) { 2588 wrmsr(MSR_VM_HSAVE_PA, hsave[cpu_index(curcpu())].pa); 2589 } 2590 } 2591 2592 static void 2593 svm_init(void) 2594 { 2595 CPU_INFO_ITERATOR cii; 2596 struct cpu_info *ci; 2597 struct vm_page *pg; 2598 u_int descs[4]; 2599 uint64_t xc; 2600 2601 x86_cpuid(0x8000000a, descs); 2602 2603 /* The guest TLB flush command. */ 2604 if (descs[3] & CPUID_AMD_SVM_FlushByASID) { 2605 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_GUEST; 2606 } else { 2607 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_ALL; 2608 } 2609 2610 /* Init the ASID. */ 2611 svm_init_asid(descs[1]); 2612 2613 /* Init the XCR0 mask. */ 2614 svm_xcr0_mask = SVM_XCR0_MASK_DEFAULT & x86_xsave_features; 2615 2616 /* Init the max basic CPUID leaf. */ 2617 svm_cpuid_max_basic = uimin(cpuid_level, SVM_CPUID_MAX_BASIC); 2618 2619 /* Init the max extended CPUID leaf. */ 2620 x86_cpuid(0x80000000, descs); 2621 svm_cpuid_max_extended = uimin(descs[0], SVM_CPUID_MAX_EXTENDED); 2622 2623 memset(hsave, 0, sizeof(hsave)); 2624 for (CPU_INFO_FOREACH(cii, ci)) { 2625 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 2626 hsave[cpu_index(ci)].pa = VM_PAGE_TO_PHYS(pg); 2627 } 2628 2629 xc = xc_broadcast(0, svm_change_cpu, (void *)true, NULL); 2630 xc_wait(xc); 2631 } 2632 2633 static void 2634 svm_fini_asid(void) 2635 { 2636 size_t allocsz; 2637 2638 allocsz = roundup(svm_maxasid, 8) / 8; 2639 kmem_free(svm_asidmap, allocsz); 2640 2641 mutex_destroy(&svm_asidlock); 2642 } 2643 2644 static void 2645 svm_fini(void) 2646 { 2647 uint64_t xc; 2648 size_t i; 2649 2650 xc = xc_broadcast(0, svm_change_cpu, (void *)false, NULL); 2651 xc_wait(xc); 2652 2653 for (i = 0; i < MAXCPUS; i++) { 2654 if (hsave[i].pa != 0) 2655 uvm_pagefree(PHYS_TO_VM_PAGE(hsave[i].pa)); 2656 } 2657 2658 svm_fini_asid(); 2659 } 2660 2661 static void 2662 svm_capability(struct nvmm_capability *cap) 2663 { 2664 cap->arch.mach_conf_support = 0; 2665 cap->arch.vcpu_conf_support = 2666 NVMM_CAP_ARCH_VCPU_CONF_CPUID; 2667 cap->arch.xcr0_mask = svm_xcr0_mask; 2668 cap->arch.mxcsr_mask = x86_fpu_mxcsr_mask; 2669 cap->arch.conf_cpuid_maxops = SVM_NCPUIDS; 2670 } 2671 2672 const struct nvmm_impl nvmm_x86_svm = { 2673 .name = "x86-svm", 2674 .ident = svm_ident, 2675 .init = svm_init, 2676 .fini = svm_fini, 2677 .capability = svm_capability, 2678 .mach_conf_max = NVMM_X86_MACH_NCONF, 2679 .mach_conf_sizes = NULL, 2680 .vcpu_conf_max = NVMM_X86_VCPU_NCONF, 2681 .vcpu_conf_sizes = svm_vcpu_conf_sizes, 2682 .state_size = sizeof(struct nvmm_x64_state), 2683 .machine_create = svm_machine_create, 2684 .machine_destroy = svm_machine_destroy, 2685 .machine_configure = svm_machine_configure, 2686 .vcpu_create = svm_vcpu_create, 2687 .vcpu_destroy = svm_vcpu_destroy, 2688 .vcpu_configure = svm_vcpu_configure, 2689 .vcpu_setstate = svm_vcpu_setstate, 2690 .vcpu_getstate = svm_vcpu_getstate, 2691 .vcpu_inject = svm_vcpu_inject, 2692 .vcpu_run = svm_vcpu_run 2693 }; 2694