Home | History | Annotate | Line # | Download | only in nvmm
nvmm.c revision 1.13
      1 /*	$NetBSD: nvmm.c,v 1.13 2019/04/07 14:05:15 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.13 2019/04/07 14:05:15 maxv Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/systm.h>
     37 #include <sys/kernel.h>
     38 
     39 #include <sys/cpu.h>
     40 #include <sys/conf.h>
     41 #include <sys/kmem.h>
     42 #include <sys/module.h>
     43 #include <sys/proc.h>
     44 #include <sys/mman.h>
     45 
     46 #include <uvm/uvm.h>
     47 #include <uvm/uvm_page.h>
     48 
     49 #include "ioconf.h"
     50 
     51 #include <dev/nvmm/nvmm.h>
     52 #include <dev/nvmm/nvmm_internal.h>
     53 #include <dev/nvmm/nvmm_ioctl.h>
     54 
     55 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
     56 static volatile unsigned int nmachines __cacheline_aligned;
     57 
     58 static const struct nvmm_impl *nvmm_impl_list[] = {
     59 	&nvmm_x86_svm,	/* x86 AMD SVM */
     60 	&nvmm_x86_vmx	/* x86 Intel VMX */
     61 };
     62 
     63 static const struct nvmm_impl *nvmm_impl = NULL;
     64 
     65 /* -------------------------------------------------------------------------- */
     66 
     67 static int
     68 nvmm_machine_alloc(struct nvmm_machine **ret)
     69 {
     70 	struct nvmm_machine *mach;
     71 	size_t i;
     72 
     73 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
     74 		mach = &machines[i];
     75 
     76 		rw_enter(&mach->lock, RW_WRITER);
     77 		if (mach->present) {
     78 			rw_exit(&mach->lock);
     79 			continue;
     80 		}
     81 
     82 		mach->present = true;
     83 		*ret = mach;
     84 		atomic_inc_uint(&nmachines);
     85 		return 0;
     86 	}
     87 
     88 	return ENOBUFS;
     89 }
     90 
     91 static void
     92 nvmm_machine_free(struct nvmm_machine *mach)
     93 {
     94 	KASSERT(rw_write_held(&mach->lock));
     95 	KASSERT(mach->present);
     96 	mach->present = false;
     97 	atomic_dec_uint(&nmachines);
     98 }
     99 
    100 static int
    101 nvmm_machine_get(nvmm_machid_t machid, struct nvmm_machine **ret, bool writer)
    102 {
    103 	struct nvmm_machine *mach;
    104 	krw_t op = writer ? RW_WRITER : RW_READER;
    105 
    106 	if (machid >= NVMM_MAX_MACHINES) {
    107 		return EINVAL;
    108 	}
    109 	mach = &machines[machid];
    110 
    111 	rw_enter(&mach->lock, op);
    112 	if (!mach->present) {
    113 		rw_exit(&mach->lock);
    114 		return ENOENT;
    115 	}
    116 	if (mach->procid != curproc->p_pid) {
    117 		rw_exit(&mach->lock);
    118 		return EPERM;
    119 	}
    120 	*ret = mach;
    121 
    122 	return 0;
    123 }
    124 
    125 static void
    126 nvmm_machine_put(struct nvmm_machine *mach)
    127 {
    128 	rw_exit(&mach->lock);
    129 }
    130 
    131 /* -------------------------------------------------------------------------- */
    132 
    133 static int
    134 nvmm_vcpu_alloc(struct nvmm_machine *mach, struct nvmm_cpu **ret)
    135 {
    136 	struct nvmm_cpu *vcpu;
    137 	size_t i;
    138 
    139 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    140 		vcpu = &mach->cpus[i];
    141 
    142 		mutex_enter(&vcpu->lock);
    143 		if (vcpu->present) {
    144 			mutex_exit(&vcpu->lock);
    145 			continue;
    146 		}
    147 
    148 		vcpu->present = true;
    149 		vcpu->cpuid = i;
    150 		vcpu->state = kmem_zalloc(nvmm_impl->state_size, KM_SLEEP);
    151 		*ret = vcpu;
    152 		return 0;
    153 	}
    154 
    155 	return ENOBUFS;
    156 }
    157 
    158 static void
    159 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
    160 {
    161 	KASSERT(mutex_owned(&vcpu->lock));
    162 	vcpu->present = false;
    163 	kmem_free(vcpu->state, nvmm_impl->state_size);
    164 	vcpu->hcpu_last = -1;
    165 }
    166 
    167 int
    168 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    169     struct nvmm_cpu **ret)
    170 {
    171 	struct nvmm_cpu *vcpu;
    172 
    173 	if (cpuid >= NVMM_MAX_VCPUS) {
    174 		return EINVAL;
    175 	}
    176 	vcpu = &mach->cpus[cpuid];
    177 
    178 	mutex_enter(&vcpu->lock);
    179 	if (!vcpu->present) {
    180 		mutex_exit(&vcpu->lock);
    181 		return ENOENT;
    182 	}
    183 	*ret = vcpu;
    184 
    185 	return 0;
    186 }
    187 
    188 void
    189 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
    190 {
    191 	mutex_exit(&vcpu->lock);
    192 }
    193 
    194 /* -------------------------------------------------------------------------- */
    195 
    196 static void
    197 nvmm_kill_machines(pid_t pid)
    198 {
    199 	struct nvmm_machine *mach;
    200 	struct nvmm_cpu *vcpu;
    201 	size_t i, j;
    202 	int error;
    203 
    204 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    205 		mach = &machines[i];
    206 
    207 		rw_enter(&mach->lock, RW_WRITER);
    208 		if (!mach->present || mach->procid != pid) {
    209 			rw_exit(&mach->lock);
    210 			continue;
    211 		}
    212 
    213 		/* Kill it. */
    214 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
    215 			error = nvmm_vcpu_get(mach, j, &vcpu);
    216 			if (error)
    217 				continue;
    218 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    219 			nvmm_vcpu_free(mach, vcpu);
    220 			nvmm_vcpu_put(vcpu);
    221 		}
    222 		uvmspace_free(mach->vm);
    223 
    224 		/* Drop the kernel UOBJ refs. */
    225 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
    226 			if (!mach->hmap[j].present)
    227 				continue;
    228 			uao_detach(mach->hmap[j].uobj);
    229 		}
    230 
    231 		nvmm_machine_free(mach);
    232 
    233 		rw_exit(&mach->lock);
    234 	}
    235 }
    236 
    237 /* -------------------------------------------------------------------------- */
    238 
    239 static int
    240 nvmm_capability(struct nvmm_ioc_capability *args)
    241 {
    242 	args->cap.version = NVMM_CAPABILITY_VERSION;
    243 	args->cap.state_size = nvmm_impl->state_size;
    244 	args->cap.max_machines = NVMM_MAX_MACHINES;
    245 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
    246 	args->cap.max_ram = NVMM_MAX_RAM;
    247 
    248 	(*nvmm_impl->capability)(&args->cap);
    249 
    250 	return 0;
    251 }
    252 
    253 static int
    254 nvmm_machine_create(struct nvmm_ioc_machine_create *args)
    255 {
    256 	struct nvmm_machine *mach;
    257 	int error;
    258 
    259 	error = nvmm_machine_alloc(&mach);
    260 	if (error)
    261 		return error;
    262 
    263 	/* Curproc owns the machine. */
    264 	mach->procid = curproc->p_pid;
    265 
    266 	/* Zero out the host mappings. */
    267 	memset(&mach->hmap, 0, sizeof(mach->hmap));
    268 
    269 	/* Create the machine vmspace. */
    270 	mach->gpa_begin = 0;
    271 	mach->gpa_end = NVMM_MAX_RAM;
    272 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
    273 
    274 	(*nvmm_impl->machine_create)(mach);
    275 
    276 	args->machid = mach->machid;
    277 	nvmm_machine_put(mach);
    278 
    279 	return 0;
    280 }
    281 
    282 static int
    283 nvmm_machine_destroy(struct nvmm_ioc_machine_destroy *args)
    284 {
    285 	struct nvmm_machine *mach;
    286 	struct nvmm_cpu *vcpu;
    287 	int error;
    288 	size_t i;
    289 
    290 	error = nvmm_machine_get(args->machid, &mach, true);
    291 	if (error)
    292 		return error;
    293 
    294 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    295 		error = nvmm_vcpu_get(mach, i, &vcpu);
    296 		if (error)
    297 			continue;
    298 
    299 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    300 		nvmm_vcpu_free(mach, vcpu);
    301 		nvmm_vcpu_put(vcpu);
    302 	}
    303 
    304 	(*nvmm_impl->machine_destroy)(mach);
    305 
    306 	/* Free the machine vmspace. */
    307 	uvmspace_free(mach->vm);
    308 
    309 	/* Drop the kernel UOBJ refs. */
    310 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    311 		if (!mach->hmap[i].present)
    312 			continue;
    313 		uao_detach(mach->hmap[i].uobj);
    314 	}
    315 
    316 	nvmm_machine_free(mach);
    317 	nvmm_machine_put(mach);
    318 
    319 	return 0;
    320 }
    321 
    322 static int
    323 nvmm_machine_configure(struct nvmm_ioc_machine_configure *args)
    324 {
    325 	struct nvmm_machine *mach;
    326 	size_t allocsz;
    327 	void *data;
    328 	int error;
    329 
    330 	if (__predict_false(args->op >= nvmm_impl->conf_max)) {
    331 		return EINVAL;
    332 	}
    333 
    334 	allocsz = nvmm_impl->conf_sizes[args->op];
    335 	data = kmem_alloc(allocsz, KM_SLEEP);
    336 
    337 	error = nvmm_machine_get(args->machid, &mach, true);
    338 	if (error) {
    339 		kmem_free(data, allocsz);
    340 		return error;
    341 	}
    342 
    343 	error = copyin(args->conf, data, allocsz);
    344 	if (error) {
    345 		goto out;
    346 	}
    347 
    348 	error = (*nvmm_impl->machine_configure)(mach, args->op, data);
    349 
    350 out:
    351 	nvmm_machine_put(mach);
    352 	kmem_free(data, allocsz);
    353 	return error;
    354 }
    355 
    356 static int
    357 nvmm_vcpu_create(struct nvmm_ioc_vcpu_create *args)
    358 {
    359 	struct nvmm_machine *mach;
    360 	struct nvmm_cpu *vcpu;
    361 	int error;
    362 
    363 	error = nvmm_machine_get(args->machid, &mach, false);
    364 	if (error)
    365 		return error;
    366 
    367 	error = nvmm_vcpu_alloc(mach, &vcpu);
    368 	if (error)
    369 		goto out;
    370 
    371 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
    372 	if (error) {
    373 		nvmm_vcpu_free(mach, vcpu);
    374 		nvmm_vcpu_put(vcpu);
    375 		goto out;
    376 	}
    377 
    378 	nvmm_vcpu_put(vcpu);
    379 
    380 out:
    381 	nvmm_machine_put(mach);
    382 	return error;
    383 }
    384 
    385 static int
    386 nvmm_vcpu_destroy(struct nvmm_ioc_vcpu_destroy *args)
    387 {
    388 	struct nvmm_machine *mach;
    389 	struct nvmm_cpu *vcpu;
    390 	int error;
    391 
    392 	error = nvmm_machine_get(args->machid, &mach, false);
    393 	if (error)
    394 		return error;
    395 
    396 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    397 	if (error)
    398 		goto out;
    399 
    400 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    401 	nvmm_vcpu_free(mach, vcpu);
    402 	nvmm_vcpu_put(vcpu);
    403 
    404 out:
    405 	nvmm_machine_put(mach);
    406 	return error;
    407 }
    408 
    409 static int
    410 nvmm_vcpu_setstate(struct nvmm_ioc_vcpu_setstate *args)
    411 {
    412 	struct nvmm_machine *mach;
    413 	struct nvmm_cpu *vcpu;
    414 	int error;
    415 
    416 	error = nvmm_machine_get(args->machid, &mach, false);
    417 	if (error)
    418 		return error;
    419 
    420 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    421 	if (error)
    422 		goto out;
    423 
    424 	error = copyin(args->state, vcpu->state, nvmm_impl->state_size);
    425 	if (error) {
    426 		nvmm_vcpu_put(vcpu);
    427 		goto out;
    428 	}
    429 
    430 	(*nvmm_impl->vcpu_setstate)(vcpu, vcpu->state, args->flags);
    431 	nvmm_vcpu_put(vcpu);
    432 
    433 out:
    434 	nvmm_machine_put(mach);
    435 	return error;
    436 }
    437 
    438 static int
    439 nvmm_vcpu_getstate(struct nvmm_ioc_vcpu_getstate *args)
    440 {
    441 	struct nvmm_machine *mach;
    442 	struct nvmm_cpu *vcpu;
    443 	int error;
    444 
    445 	error = nvmm_machine_get(args->machid, &mach, false);
    446 	if (error)
    447 		return error;
    448 
    449 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    450 	if (error)
    451 		goto out;
    452 
    453 	(*nvmm_impl->vcpu_getstate)(vcpu, vcpu->state, args->flags);
    454 	nvmm_vcpu_put(vcpu);
    455 	error = copyout(vcpu->state, args->state, nvmm_impl->state_size);
    456 
    457 out:
    458 	nvmm_machine_put(mach);
    459 	return error;
    460 }
    461 
    462 static int
    463 nvmm_vcpu_inject(struct nvmm_ioc_vcpu_inject *args)
    464 {
    465 	struct nvmm_machine *mach;
    466 	struct nvmm_cpu *vcpu;
    467 	int error;
    468 
    469 	error = nvmm_machine_get(args->machid, &mach, false);
    470 	if (error)
    471 		return error;
    472 
    473 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    474 	if (error)
    475 		goto out;
    476 
    477 	error = (*nvmm_impl->vcpu_inject)(mach, vcpu, &args->event);
    478 	nvmm_vcpu_put(vcpu);
    479 
    480 out:
    481 	nvmm_machine_put(mach);
    482 	return error;
    483 }
    484 
    485 static void
    486 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
    487     struct nvmm_exit *exit)
    488 {
    489 	struct vmspace *vm = mach->vm;
    490 
    491 	while (1) {
    492 		(*nvmm_impl->vcpu_run)(mach, vcpu, exit);
    493 
    494 		if (__predict_true(exit->reason != NVMM_EXIT_MEMORY)) {
    495 			break;
    496 		}
    497 		if (exit->u.mem.gpa >= mach->gpa_end) {
    498 			break;
    499 		}
    500 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
    501 			break;
    502 		}
    503 	}
    504 }
    505 
    506 static int
    507 nvmm_vcpu_run(struct nvmm_ioc_vcpu_run *args)
    508 {
    509 	struct nvmm_machine *mach;
    510 	struct nvmm_cpu *vcpu;
    511 	int error;
    512 
    513 	error = nvmm_machine_get(args->machid, &mach, false);
    514 	if (error)
    515 		return error;
    516 
    517 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    518 	if (error)
    519 		goto out;
    520 
    521 	nvmm_do_vcpu_run(mach, vcpu, &args->exit);
    522 	nvmm_vcpu_put(vcpu);
    523 
    524 out:
    525 	nvmm_machine_put(mach);
    526 	return error;
    527 }
    528 
    529 /* -------------------------------------------------------------------------- */
    530 
    531 static struct uvm_object *
    532 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
    533    size_t *off)
    534 {
    535 	struct nvmm_hmapping *hmapping;
    536 	size_t i;
    537 
    538 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    539 		hmapping = &mach->hmap[i];
    540 		if (!hmapping->present) {
    541 			continue;
    542 		}
    543 		if (hva >= hmapping->hva &&
    544 		    hva + size <= hmapping->hva + hmapping->size) {
    545 			*off = hva - hmapping->hva;
    546 			return hmapping->uobj;
    547 		}
    548 	}
    549 
    550 	return NULL;
    551 }
    552 
    553 static int
    554 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    555 {
    556 	struct nvmm_hmapping *hmapping;
    557 	size_t i;
    558 
    559 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
    560 		return EINVAL;
    561 	}
    562 	if (hva == 0) {
    563 		return EINVAL;
    564 	}
    565 
    566 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    567 		hmapping = &mach->hmap[i];
    568 		if (!hmapping->present) {
    569 			continue;
    570 		}
    571 
    572 		if (hva >= hmapping->hva &&
    573 		    hva + size <= hmapping->hva + hmapping->size) {
    574 			break;
    575 		}
    576 
    577 		if (hva >= hmapping->hva &&
    578 		    hva < hmapping->hva + hmapping->size) {
    579 			return EEXIST;
    580 		}
    581 		if (hva + size > hmapping->hva &&
    582 		    hva + size <= hmapping->hva + hmapping->size) {
    583 			return EEXIST;
    584 		}
    585 		if (hva <= hmapping->hva &&
    586 		    hva + size >= hmapping->hva + hmapping->size) {
    587 			return EEXIST;
    588 		}
    589 	}
    590 
    591 	return 0;
    592 }
    593 
    594 static struct nvmm_hmapping *
    595 nvmm_hmapping_alloc(struct nvmm_machine *mach)
    596 {
    597 	struct nvmm_hmapping *hmapping;
    598 	size_t i;
    599 
    600 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    601 		hmapping = &mach->hmap[i];
    602 		if (!hmapping->present) {
    603 			hmapping->present = true;
    604 			return hmapping;
    605 		}
    606 	}
    607 
    608 	return NULL;
    609 }
    610 
    611 static int
    612 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    613 {
    614 	struct vmspace *vmspace = curproc->p_vmspace;
    615 	struct nvmm_hmapping *hmapping;
    616 	size_t i;
    617 
    618 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    619 		hmapping = &mach->hmap[i];
    620 		if (!hmapping->present || hmapping->hva != hva ||
    621 		    hmapping->size != size) {
    622 			continue;
    623 		}
    624 
    625 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
    626 		    hmapping->hva + hmapping->size);
    627 		uao_detach(hmapping->uobj);
    628 
    629 		hmapping->uobj = NULL;
    630 		hmapping->present = false;
    631 
    632 		return 0;
    633 	}
    634 
    635 	return ENOENT;
    636 }
    637 
    638 static int
    639 nvmm_hva_map(struct nvmm_ioc_hva_map *args)
    640 {
    641 	struct vmspace *vmspace = curproc->p_vmspace;
    642 	struct nvmm_machine *mach;
    643 	struct nvmm_hmapping *hmapping;
    644 	vaddr_t uva;
    645 	int error;
    646 
    647 	error = nvmm_machine_get(args->machid, &mach, true);
    648 	if (error)
    649 		return error;
    650 
    651 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
    652 	if (error)
    653 		goto out;
    654 
    655 	hmapping = nvmm_hmapping_alloc(mach);
    656 	if (hmapping == NULL) {
    657 		error = ENOBUFS;
    658 		goto out;
    659 	}
    660 
    661 	hmapping->hva = args->hva;
    662 	hmapping->size = args->size;
    663 	hmapping->uobj = uao_create(hmapping->size, 0);
    664 	uva = hmapping->hva;
    665 
    666 	/* Take a reference for the user. */
    667 	uao_reference(hmapping->uobj);
    668 
    669 	/* Map the uobj into the user address space, as pageable. */
    670 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
    671 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
    672 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    673 	if (error) {
    674 		uao_detach(hmapping->uobj);
    675 	}
    676 
    677 out:
    678 	nvmm_machine_put(mach);
    679 	return error;
    680 }
    681 
    682 static int
    683 nvmm_hva_unmap(struct nvmm_ioc_hva_unmap *args)
    684 {
    685 	struct nvmm_machine *mach;
    686 	int error;
    687 
    688 	error = nvmm_machine_get(args->machid, &mach, true);
    689 	if (error)
    690 		return error;
    691 
    692 	error = nvmm_hmapping_free(mach, args->hva, args->size);
    693 
    694 	nvmm_machine_put(mach);
    695 	return error;
    696 }
    697 
    698 /* -------------------------------------------------------------------------- */
    699 
    700 static int
    701 nvmm_gpa_map(struct nvmm_ioc_gpa_map *args)
    702 {
    703 	struct nvmm_machine *mach;
    704 	struct uvm_object *uobj;
    705 	gpaddr_t gpa;
    706 	size_t off;
    707 	int error;
    708 
    709 	error = nvmm_machine_get(args->machid, &mach, false);
    710 	if (error)
    711 		return error;
    712 
    713 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
    714 		error = EINVAL;
    715 		goto out;
    716 	}
    717 
    718 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
    719 	    (args->hva % PAGE_SIZE) != 0) {
    720 		error = EINVAL;
    721 		goto out;
    722 	}
    723 	if (args->hva == 0) {
    724 		error = EINVAL;
    725 		goto out;
    726 	}
    727 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    728 		error = EINVAL;
    729 		goto out;
    730 	}
    731 	if (args->gpa + args->size <= args->gpa) {
    732 		error = EINVAL;
    733 		goto out;
    734 	}
    735 	if (args->gpa + args->size > mach->gpa_end) {
    736 		error = EINVAL;
    737 		goto out;
    738 	}
    739 	gpa = args->gpa;
    740 
    741 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
    742 	if (uobj == NULL) {
    743 		error = EINVAL;
    744 		goto out;
    745 	}
    746 
    747 	/* Take a reference for the machine. */
    748 	uao_reference(uobj);
    749 
    750 	/* Map the uobj into the machine address space, as pageable. */
    751 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
    752 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
    753 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    754 	if (error) {
    755 		uao_detach(uobj);
    756 		goto out;
    757 	}
    758 	if (gpa != args->gpa) {
    759 		uao_detach(uobj);
    760 		printf("[!] uvm_map problem\n");
    761 		error = EINVAL;
    762 		goto out;
    763 	}
    764 
    765 out:
    766 	nvmm_machine_put(mach);
    767 	return error;
    768 }
    769 
    770 static int
    771 nvmm_gpa_unmap(struct nvmm_ioc_gpa_unmap *args)
    772 {
    773 	struct nvmm_machine *mach;
    774 	gpaddr_t gpa;
    775 	int error;
    776 
    777 	error = nvmm_machine_get(args->machid, &mach, false);
    778 	if (error)
    779 		return error;
    780 
    781 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
    782 		error = EINVAL;
    783 		goto out;
    784 	}
    785 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    786 		error = EINVAL;
    787 		goto out;
    788 	}
    789 	if (args->gpa + args->size <= args->gpa) {
    790 		error = EINVAL;
    791 		goto out;
    792 	}
    793 	if (args->gpa + args->size >= mach->gpa_end) {
    794 		error = EINVAL;
    795 		goto out;
    796 	}
    797 	gpa = args->gpa;
    798 
    799 	/* Unmap the memory from the machine. */
    800 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
    801 
    802 out:
    803 	nvmm_machine_put(mach);
    804 	return error;
    805 }
    806 
    807 /* -------------------------------------------------------------------------- */
    808 
    809 static int
    810 nvmm_init(void)
    811 {
    812 	size_t i, n;
    813 
    814 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
    815 		if (!(*nvmm_impl_list[i]->ident)()) {
    816 			continue;
    817 		}
    818 		nvmm_impl = nvmm_impl_list[i];
    819 		break;
    820 	}
    821 	if (nvmm_impl == NULL) {
    822 		printf("[!] No implementation found\n");
    823 		return ENOTSUP;
    824 	}
    825 
    826 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    827 		machines[i].machid = i;
    828 		rw_init(&machines[i].lock);
    829 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    830 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
    831 			    IPL_NONE);
    832 			machines[i].cpus[n].hcpu_last = -1;
    833 		}
    834 	}
    835 
    836 	(*nvmm_impl->init)();
    837 
    838 	return 0;
    839 }
    840 
    841 static void
    842 nvmm_fini(void)
    843 {
    844 	size_t i, n;
    845 
    846 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    847 		rw_destroy(&machines[i].lock);
    848 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    849 			mutex_destroy(&machines[i].cpus[n].lock);
    850 		}
    851 	}
    852 
    853 	(*nvmm_impl->fini)();
    854 }
    855 
    856 /* -------------------------------------------------------------------------- */
    857 
    858 static int
    859 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
    860 {
    861 	if (minor(dev) != 0) {
    862 		return EXDEV;
    863 	}
    864 
    865 	return 0;
    866 }
    867 
    868 static int
    869 nvmm_close(dev_t dev, int flags, int type, struct lwp *l)
    870 {
    871 	KASSERT(minor(dev) == 0);
    872 
    873 	nvmm_kill_machines(l->l_proc->p_pid);
    874 
    875 	return 0;
    876 }
    877 
    878 static int
    879 nvmm_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
    880 {
    881 	KASSERT(minor(dev) == 0);
    882 
    883 	switch (cmd) {
    884 	case NVMM_IOC_CAPABILITY:
    885 		return nvmm_capability(data);
    886 	case NVMM_IOC_MACHINE_CREATE:
    887 		return nvmm_machine_create(data);
    888 	case NVMM_IOC_MACHINE_DESTROY:
    889 		return nvmm_machine_destroy(data);
    890 	case NVMM_IOC_MACHINE_CONFIGURE:
    891 		return nvmm_machine_configure(data);
    892 	case NVMM_IOC_VCPU_CREATE:
    893 		return nvmm_vcpu_create(data);
    894 	case NVMM_IOC_VCPU_DESTROY:
    895 		return nvmm_vcpu_destroy(data);
    896 	case NVMM_IOC_VCPU_SETSTATE:
    897 		return nvmm_vcpu_setstate(data);
    898 	case NVMM_IOC_VCPU_GETSTATE:
    899 		return nvmm_vcpu_getstate(data);
    900 	case NVMM_IOC_VCPU_INJECT:
    901 		return nvmm_vcpu_inject(data);
    902 	case NVMM_IOC_VCPU_RUN:
    903 		return nvmm_vcpu_run(data);
    904 	case NVMM_IOC_GPA_MAP:
    905 		return nvmm_gpa_map(data);
    906 	case NVMM_IOC_GPA_UNMAP:
    907 		return nvmm_gpa_unmap(data);
    908 	case NVMM_IOC_HVA_MAP:
    909 		return nvmm_hva_map(data);
    910 	case NVMM_IOC_HVA_UNMAP:
    911 		return nvmm_hva_unmap(data);
    912 	default:
    913 		return EINVAL;
    914 	}
    915 }
    916 
    917 const struct cdevsw nvmm_cdevsw = {
    918 	.d_open = nvmm_open,
    919 	.d_close = nvmm_close,
    920 	.d_read = noread,
    921 	.d_write = nowrite,
    922 	.d_ioctl = nvmm_ioctl,
    923 	.d_stop = nostop,
    924 	.d_tty = notty,
    925 	.d_poll = nopoll,
    926 	.d_mmap = nommap,
    927 	.d_kqfilter = nokqfilter,
    928 	.d_discard = nodiscard,
    929 	.d_flag = D_OTHER | D_MPSAFE
    930 };
    931 
    932 void
    933 nvmmattach(int nunits)
    934 {
    935 	/* nothing */
    936 }
    937 
    938 MODULE(MODULE_CLASS_ANY, nvmm, NULL);
    939 
    940 static int
    941 nvmm_modcmd(modcmd_t cmd, void *arg)
    942 {
    943 	int error;
    944 
    945 	switch (cmd) {
    946 	case MODULE_CMD_INIT:
    947 		error = nvmm_init();
    948 		if (error)
    949 			return error;
    950 
    951 #if defined(_MODULE)
    952 		{
    953 			devmajor_t bmajor = NODEVMAJOR;
    954 			devmajor_t cmajor = 345;
    955 
    956 			/* mknod /dev/nvmm c 345 0 */
    957 			error = devsw_attach("nvmm", NULL, &bmajor,
    958 			    &nvmm_cdevsw, &cmajor);
    959 			if (error) {
    960 				nvmm_fini();
    961 				return error;
    962 			}
    963 		}
    964 #endif
    965 		return 0;
    966 
    967 	case MODULE_CMD_FINI:
    968 		if (nmachines > 0) {
    969 			return EBUSY;
    970 		}
    971 #if defined(_MODULE)
    972 		{
    973 			error = devsw_detach(NULL, &nvmm_cdevsw);
    974 			if (error) {
    975 				return error;
    976 			}
    977 		}
    978 #endif
    979 		nvmm_fini();
    980 		return 0;
    981 
    982 	case MODULE_CMD_AUTOUNLOAD:
    983 		return EBUSY;
    984 
    985 	default:
    986 		return ENOTTY;
    987 	}
    988 }
    989