1 /* $NetBSD: libnvmm.c,v 1.21 2026/02/07 23:50:20 nia Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net 5 * All rights reserved. 6 * 7 * This code is part of the NVMM hypervisor. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <unistd.h> 37 #include <fcntl.h> 38 #include <errno.h> 39 #include <sys/ioctl.h> 40 #include <sys/mman.h> 41 #include <sys/queue.h> 42 #include <machine/vmparam.h> 43 44 #include "nvmm.h" 45 46 static struct nvmm_capability __capability; 47 48 #ifdef __x86_64__ 49 #include "libnvmm_x86.c" 50 #endif 51 52 typedef struct __area { 53 LIST_ENTRY(__area) list; 54 gpaddr_t gpa; 55 uintptr_t hva; 56 size_t size; 57 nvmm_prot_t prot; 58 } area_t; 59 60 typedef LIST_HEAD(, __area) area_list_t; 61 62 static int nvmm_fd = -1; 63 64 /* -------------------------------------------------------------------------- */ 65 66 static bool 67 __area_isvalid(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, 68 size_t size) 69 { 70 area_list_t *areas = mach->areas; 71 area_t *ent; 72 73 LIST_FOREACH(ent, areas, list) { 74 /* Collision on GPA */ 75 if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) { 76 return false; 77 } 78 if (gpa + size > ent->gpa && 79 gpa + size <= ent->gpa + ent->size) { 80 return false; 81 } 82 if (gpa <= ent->gpa && gpa + size >= ent->gpa + ent->size) { 83 return false; 84 } 85 } 86 87 return true; 88 } 89 90 static int 91 __area_add(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, size_t size, 92 int prot) 93 { 94 area_list_t *areas = mach->areas; 95 nvmm_prot_t nprot; 96 area_t *area; 97 98 nprot = 0; 99 if (prot & PROT_READ) 100 nprot |= NVMM_PROT_READ; 101 if (prot & PROT_WRITE) 102 nprot |= NVMM_PROT_WRITE; 103 if (prot & PROT_EXEC) 104 nprot |= NVMM_PROT_EXEC; 105 106 if (!__area_isvalid(mach, hva, gpa, size)) { 107 errno = EINVAL; 108 return -1; 109 } 110 111 area = malloc(sizeof(*area)); 112 if (area == NULL) 113 return -1; 114 area->gpa = gpa; 115 area->hva = hva; 116 area->size = size; 117 area->prot = nprot; 118 119 LIST_INSERT_HEAD(areas, area, list); 120 121 return 0; 122 } 123 124 static int 125 __area_delete(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, 126 size_t size) 127 { 128 area_list_t *areas = mach->areas; 129 area_t *ent, *nxt; 130 131 LIST_FOREACH_SAFE(ent, areas, list, nxt) { 132 if (hva == ent->hva && gpa == ent->gpa && size == ent->size) { 133 LIST_REMOVE(ent, list); 134 free(ent); 135 return 0; 136 } 137 } 138 139 return -1; 140 } 141 142 static void 143 __area_remove_all(struct nvmm_machine *mach) 144 { 145 area_list_t *areas = mach->areas; 146 area_t *ent; 147 148 while ((ent = LIST_FIRST(areas)) != NULL) { 149 LIST_REMOVE(ent, list); 150 free(ent); 151 } 152 153 free(areas); 154 } 155 156 /* -------------------------------------------------------------------------- */ 157 158 int 159 nvmm_init(void) 160 { 161 if (nvmm_fd != -1) 162 return 0; 163 nvmm_fd = open("/dev/nvmm", O_RDONLY | O_CLOEXEC); 164 if (nvmm_fd == -1) 165 return -1; 166 if (nvmm_capability(&__capability) == -1) { 167 close(nvmm_fd); 168 nvmm_fd = -1; 169 return -1; 170 } 171 if (__capability.version != NVMM_KERN_VERSION) { 172 close(nvmm_fd); 173 nvmm_fd = -1; 174 errno = EPROGMISMATCH; 175 return -1; 176 } 177 178 return 0; 179 } 180 181 int 182 nvmm_root_init(void) 183 { 184 if (nvmm_fd != -1) 185 return 0; 186 nvmm_fd = open("/dev/nvmm", O_WRONLY | O_CLOEXEC); 187 if (nvmm_fd == -1) 188 return -1; 189 if (nvmm_capability(&__capability) == -1) { 190 close(nvmm_fd); 191 nvmm_fd = -1; 192 return -1; 193 } 194 if (__capability.version != NVMM_KERN_VERSION) { 195 close(nvmm_fd); 196 nvmm_fd = -1; 197 errno = EPROGMISMATCH; 198 return -1; 199 } 200 201 return 0; 202 } 203 204 int 205 nvmm_capability(struct nvmm_capability *cap) 206 { 207 struct nvmm_ioc_capability args; 208 int ret; 209 210 ret = ioctl(nvmm_fd, NVMM_IOC_CAPABILITY, &args); 211 if (ret == -1) 212 return -1; 213 214 memcpy(cap, &args.cap, sizeof(args.cap)); 215 216 return 0; 217 } 218 219 int 220 nvmm_machine_create(struct nvmm_machine *mach) 221 { 222 struct nvmm_ioc_machine_create args; 223 struct nvmm_comm_page **pages; 224 area_list_t *areas; 225 int ret; 226 227 areas = calloc(1, sizeof(*areas)); 228 if (areas == NULL) 229 return -1; 230 231 pages = calloc(__capability.max_vcpus, sizeof(*pages)); 232 if (pages == NULL) { 233 free(areas); 234 return -1; 235 } 236 237 ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CREATE, &args); 238 if (ret == -1) { 239 free(areas); 240 return -1; 241 } 242 243 LIST_INIT(areas); 244 245 memset(mach, 0, sizeof(*mach)); 246 mach->machid = args.machid; 247 mach->pages = pages; 248 mach->areas = areas; 249 250 return 0; 251 } 252 253 int 254 nvmm_machine_destroy(struct nvmm_machine *mach) 255 { 256 struct nvmm_ioc_machine_destroy args; 257 int ret; 258 259 args.machid = mach->machid; 260 261 ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_DESTROY, &args); 262 if (ret == -1) 263 return -1; 264 265 __area_remove_all(mach); 266 free(mach->pages); 267 268 return 0; 269 } 270 271 int 272 nvmm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *conf) 273 { 274 struct nvmm_ioc_machine_configure args; 275 int ret; 276 277 args.machid = mach->machid; 278 args.op = op; 279 args.conf = conf; 280 281 ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CONFIGURE, &args); 282 if (ret == -1) 283 return -1; 284 285 return 0; 286 } 287 288 int 289 nvmm_vcpu_create(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 290 struct nvmm_vcpu *vcpu) 291 { 292 struct nvmm_ioc_vcpu_create args; 293 struct nvmm_comm_page *comm; 294 int ret; 295 296 vcpu->exit = malloc(sizeof(*vcpu->exit)); 297 if (vcpu->exit == NULL) 298 return -1; 299 300 args.machid = mach->machid; 301 args.cpuid = cpuid; 302 303 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CREATE, &args); 304 if (ret == -1) { 305 free(vcpu->exit); 306 return -1; 307 } 308 309 comm = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, 310 nvmm_fd, NVMM_COMM_OFF(mach->machid, cpuid)); 311 if (comm == MAP_FAILED) { 312 free(vcpu->exit); 313 return -1; 314 } 315 316 mach->pages[cpuid] = comm; 317 318 vcpu->cpuid = cpuid; 319 vcpu->state = &comm->state; 320 vcpu->event = &comm->event; 321 vcpu->stop = &comm->stop; 322 323 return 0; 324 } 325 326 int 327 nvmm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) 328 { 329 struct nvmm_ioc_vcpu_destroy args; 330 struct nvmm_comm_page *comm; 331 int ret; 332 333 args.machid = mach->machid; 334 args.cpuid = vcpu->cpuid; 335 336 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_DESTROY, &args); 337 if (ret == -1) 338 return -1; 339 340 comm = mach->pages[vcpu->cpuid]; 341 munmap(comm, PAGE_SIZE); 342 free(vcpu->exit); 343 344 return 0; 345 } 346 347 int 348 nvmm_vcpu_configure(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu, 349 uint64_t op, void *conf) 350 { 351 struct nvmm_ioc_vcpu_configure args; 352 int ret; 353 354 switch (op) { 355 case NVMM_VCPU_CONF_CALLBACKS: 356 memcpy(&vcpu->cbs, conf, sizeof(vcpu->cbs)); 357 return 0; 358 } 359 360 args.machid = mach->machid; 361 args.cpuid = vcpu->cpuid; 362 args.op = op; 363 args.conf = conf; 364 365 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CONFIGURE, &args); 366 if (ret == -1) 367 return -1; 368 369 return 0; 370 } 371 372 int 373 nvmm_vcpu_setstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu, 374 uint64_t flags) 375 { 376 struct nvmm_comm_page *comm; 377 378 comm = mach->pages[vcpu->cpuid]; 379 comm->state_commit |= flags; 380 comm->state_cached |= flags; 381 382 return 0; 383 } 384 385 int 386 nvmm_vcpu_getstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu, 387 uint64_t flags) 388 { 389 struct nvmm_ioc_vcpu_getstate args; 390 struct nvmm_comm_page *comm; 391 int ret; 392 393 comm = mach->pages[vcpu->cpuid]; 394 395 if (__predict_true((flags & ~comm->state_cached) == 0)) { 396 return 0; 397 } 398 comm->state_wanted = flags & ~comm->state_cached; 399 400 args.machid = mach->machid; 401 args.cpuid = vcpu->cpuid; 402 403 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_GETSTATE, &args); 404 if (ret == -1) 405 return -1; 406 407 return 0; 408 } 409 410 int 411 nvmm_vcpu_inject(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) 412 { 413 struct nvmm_comm_page *comm; 414 415 comm = mach->pages[vcpu->cpuid]; 416 comm->event_commit = true; 417 418 return 0; 419 } 420 421 int 422 nvmm_vcpu_run(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) 423 { 424 struct nvmm_ioc_vcpu_run args; 425 int ret; 426 427 args.machid = mach->machid; 428 args.cpuid = vcpu->cpuid; 429 memset(&args.exit, 0, sizeof(args.exit)); 430 431 ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_RUN, &args); 432 if (ret == -1) 433 return -1; 434 435 /* No comm support yet, just copy. */ 436 memcpy(vcpu->exit, &args.exit, sizeof(args.exit)); 437 438 return 0; 439 } 440 441 int 442 nvmm_gpa_map(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, 443 size_t size, int prot) 444 { 445 struct nvmm_ioc_gpa_map args; 446 int ret; 447 448 ret = __area_add(mach, hva, gpa, size, prot); 449 if (ret == -1) 450 return -1; 451 452 args.machid = mach->machid; 453 args.hva = hva; 454 args.gpa = gpa; 455 args.size = size; 456 args.prot = prot; 457 458 ret = ioctl(nvmm_fd, NVMM_IOC_GPA_MAP, &args); 459 if (ret == -1) { 460 /* Can't recover. */ 461 abort(); 462 } 463 464 return 0; 465 } 466 467 int 468 nvmm_gpa_unmap(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, 469 size_t size) 470 { 471 struct nvmm_ioc_gpa_unmap args; 472 int ret; 473 474 ret = __area_delete(mach, hva, gpa, size); 475 if (ret == -1) 476 return -1; 477 478 args.machid = mach->machid; 479 args.gpa = gpa; 480 args.size = size; 481 482 ret = ioctl(nvmm_fd, NVMM_IOC_GPA_UNMAP, &args); 483 if (ret == -1) { 484 /* Can't recover. */ 485 abort(); 486 } 487 488 return 0; 489 } 490 491 int 492 nvmm_hva_map(struct nvmm_machine *mach, uintptr_t hva, size_t size) 493 { 494 struct nvmm_ioc_hva_map args; 495 int ret; 496 497 args.machid = mach->machid; 498 args.hva = hva; 499 args.size = size; 500 501 ret = ioctl(nvmm_fd, NVMM_IOC_HVA_MAP, &args); 502 if (ret == -1) 503 return -1; 504 505 return 0; 506 } 507 508 int 509 nvmm_hva_unmap(struct nvmm_machine *mach, uintptr_t hva, size_t size) 510 { 511 struct nvmm_ioc_hva_unmap args; 512 int ret; 513 514 args.machid = mach->machid; 515 args.hva = hva; 516 args.size = size; 517 518 ret = ioctl(nvmm_fd, NVMM_IOC_HVA_UNMAP, &args); 519 if (ret == -1) 520 return -1; 521 522 return 0; 523 } 524 525 /* 526 * nvmm_gva_to_gpa(): architecture-specific. 527 */ 528 529 int 530 nvmm_gpa_to_hva(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t *hva, 531 nvmm_prot_t *prot) 532 { 533 area_list_t *areas = mach->areas; 534 area_t *ent; 535 536 LIST_FOREACH(ent, areas, list) { 537 if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) { 538 *hva = ent->hva + (gpa - ent->gpa); 539 *prot = ent->prot; 540 return 0; 541 } 542 } 543 544 errno = ENOENT; 545 return -1; 546 } 547 548 /* 549 * nvmm_assist_io(): architecture-specific. 550 */ 551 552 /* 553 * nvmm_assist_mem(): architecture-specific. 554 */ 555 556 int 557 nvmm_ctl(int op, void *data, size_t size) 558 { 559 struct nvmm_ioc_ctl args; 560 int ret; 561 562 args.op = op; 563 args.data = data; 564 args.size = size; 565 566 ret = ioctl(nvmm_fd, NVMM_IOC_CTL, &args); 567 if (ret == -1) 568 return -1; 569 570 return 0; 571 } 572 573 int 574 nvmm_vcpu_stop(struct nvmm_vcpu *vcpu) 575 { 576 577 *vcpu->stop = 1; 578 579 return 0; 580 } 581