Home | History | Annotate | Line # | Download | only in nvmm
nvmm.c revision 1.34
      1 /*	$NetBSD: nvmm.c,v 1.34 2020/08/18 17:03:58 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.34 2020/08/18 17:03:58 maxv Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/systm.h>
     37 #include <sys/kernel.h>
     38 
     39 #include <sys/cpu.h>
     40 #include <sys/conf.h>
     41 #include <sys/kmem.h>
     42 #include <sys/module.h>
     43 #include <sys/proc.h>
     44 #include <sys/mman.h>
     45 #include <sys/file.h>
     46 #include <sys/filedesc.h>
     47 #include <sys/device.h>
     48 
     49 #include <uvm/uvm.h>
     50 #include <uvm/uvm_page.h>
     51 
     52 #include "ioconf.h"
     53 
     54 #include <dev/nvmm/nvmm.h>
     55 #include <dev/nvmm/nvmm_internal.h>
     56 #include <dev/nvmm/nvmm_ioctl.h>
     57 
     58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
     59 static volatile unsigned int nmachines __cacheline_aligned;
     60 
     61 static const struct nvmm_impl *nvmm_impl_list[] = {
     62 #if defined(__x86_64__)
     63 	&nvmm_x86_svm,	/* x86 AMD SVM */
     64 	&nvmm_x86_vmx	/* x86 Intel VMX */
     65 #endif
     66 };
     67 
     68 static const struct nvmm_impl *nvmm_impl = NULL;
     69 
     70 static struct nvmm_owner root_owner;
     71 
     72 /* -------------------------------------------------------------------------- */
     73 
     74 static int
     75 nvmm_machine_alloc(struct nvmm_machine **ret)
     76 {
     77 	struct nvmm_machine *mach;
     78 	size_t i;
     79 
     80 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
     81 		mach = &machines[i];
     82 
     83 		rw_enter(&mach->lock, RW_WRITER);
     84 		if (mach->present) {
     85 			rw_exit(&mach->lock);
     86 			continue;
     87 		}
     88 
     89 		mach->present = true;
     90 		mach->time = time_second;
     91 		*ret = mach;
     92 		atomic_inc_uint(&nmachines);
     93 		return 0;
     94 	}
     95 
     96 	return ENOBUFS;
     97 }
     98 
     99 static void
    100 nvmm_machine_free(struct nvmm_machine *mach)
    101 {
    102 	KASSERT(rw_write_held(&mach->lock));
    103 	KASSERT(mach->present);
    104 	mach->present = false;
    105 	atomic_dec_uint(&nmachines);
    106 }
    107 
    108 static int
    109 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
    110     struct nvmm_machine **ret, bool writer)
    111 {
    112 	struct nvmm_machine *mach;
    113 	krw_t op = writer ? RW_WRITER : RW_READER;
    114 
    115 	if (machid >= NVMM_MAX_MACHINES) {
    116 		return EINVAL;
    117 	}
    118 	mach = &machines[machid];
    119 
    120 	rw_enter(&mach->lock, op);
    121 	if (!mach->present) {
    122 		rw_exit(&mach->lock);
    123 		return ENOENT;
    124 	}
    125 	if (owner != &root_owner && mach->owner != owner) {
    126 		rw_exit(&mach->lock);
    127 		return EPERM;
    128 	}
    129 	*ret = mach;
    130 
    131 	return 0;
    132 }
    133 
    134 static void
    135 nvmm_machine_put(struct nvmm_machine *mach)
    136 {
    137 	rw_exit(&mach->lock);
    138 }
    139 
    140 /* -------------------------------------------------------------------------- */
    141 
    142 static int
    143 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    144     struct nvmm_cpu **ret)
    145 {
    146 	struct nvmm_cpu *vcpu;
    147 
    148 	if (cpuid >= NVMM_MAX_VCPUS) {
    149 		return EINVAL;
    150 	}
    151 	vcpu = &mach->cpus[cpuid];
    152 
    153 	mutex_enter(&vcpu->lock);
    154 	if (vcpu->present) {
    155 		mutex_exit(&vcpu->lock);
    156 		return EBUSY;
    157 	}
    158 
    159 	vcpu->present = true;
    160 	vcpu->comm = NULL;
    161 	vcpu->hcpu_last = -1;
    162 	*ret = vcpu;
    163 	return 0;
    164 }
    165 
    166 static void
    167 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
    168 {
    169 	KASSERT(mutex_owned(&vcpu->lock));
    170 	vcpu->present = false;
    171 	if (vcpu->comm != NULL) {
    172 		uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
    173 	}
    174 }
    175 
    176 static int
    177 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    178     struct nvmm_cpu **ret)
    179 {
    180 	struct nvmm_cpu *vcpu;
    181 
    182 	if (cpuid >= NVMM_MAX_VCPUS) {
    183 		return EINVAL;
    184 	}
    185 	vcpu = &mach->cpus[cpuid];
    186 
    187 	mutex_enter(&vcpu->lock);
    188 	if (!vcpu->present) {
    189 		mutex_exit(&vcpu->lock);
    190 		return ENOENT;
    191 	}
    192 	*ret = vcpu;
    193 
    194 	return 0;
    195 }
    196 
    197 static void
    198 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
    199 {
    200 	mutex_exit(&vcpu->lock);
    201 }
    202 
    203 /* -------------------------------------------------------------------------- */
    204 
    205 static void
    206 nvmm_kill_machines(struct nvmm_owner *owner)
    207 {
    208 	struct nvmm_machine *mach;
    209 	struct nvmm_cpu *vcpu;
    210 	size_t i, j;
    211 	int error;
    212 
    213 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    214 		mach = &machines[i];
    215 
    216 		rw_enter(&mach->lock, RW_WRITER);
    217 		if (!mach->present || mach->owner != owner) {
    218 			rw_exit(&mach->lock);
    219 			continue;
    220 		}
    221 
    222 		/* Kill it. */
    223 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
    224 			error = nvmm_vcpu_get(mach, j, &vcpu);
    225 			if (error)
    226 				continue;
    227 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    228 			nvmm_vcpu_free(mach, vcpu);
    229 			nvmm_vcpu_put(vcpu);
    230 		}
    231 		(*nvmm_impl->machine_destroy)(mach);
    232 		uvmspace_free(mach->vm);
    233 
    234 		/* Drop the kernel UOBJ refs. */
    235 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
    236 			if (!mach->hmap[j].present)
    237 				continue;
    238 			uao_detach(mach->hmap[j].uobj);
    239 		}
    240 
    241 		nvmm_machine_free(mach);
    242 
    243 		rw_exit(&mach->lock);
    244 	}
    245 }
    246 
    247 /* -------------------------------------------------------------------------- */
    248 
    249 static int
    250 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
    251 {
    252 	args->cap.version = NVMM_KERN_VERSION;
    253 	args->cap.state_size = nvmm_impl->state_size;
    254 	args->cap.max_machines = NVMM_MAX_MACHINES;
    255 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
    256 	args->cap.max_ram = NVMM_MAX_RAM;
    257 
    258 	(*nvmm_impl->capability)(&args->cap);
    259 
    260 	return 0;
    261 }
    262 
    263 static int
    264 nvmm_machine_create(struct nvmm_owner *owner,
    265     struct nvmm_ioc_machine_create *args)
    266 {
    267 	struct nvmm_machine *mach;
    268 	int error;
    269 
    270 	error = nvmm_machine_alloc(&mach);
    271 	if (error)
    272 		return error;
    273 
    274 	/* Curproc owns the machine. */
    275 	mach->owner = owner;
    276 
    277 	/* Zero out the host mappings. */
    278 	memset(&mach->hmap, 0, sizeof(mach->hmap));
    279 
    280 	/* Create the machine vmspace. */
    281 	mach->gpa_begin = 0;
    282 	mach->gpa_end = NVMM_MAX_RAM;
    283 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
    284 
    285 	/* Create the comm uobj. */
    286 	mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
    287 
    288 	(*nvmm_impl->machine_create)(mach);
    289 
    290 	args->machid = mach->machid;
    291 	nvmm_machine_put(mach);
    292 
    293 	return 0;
    294 }
    295 
    296 static int
    297 nvmm_machine_destroy(struct nvmm_owner *owner,
    298     struct nvmm_ioc_machine_destroy *args)
    299 {
    300 	struct nvmm_machine *mach;
    301 	struct nvmm_cpu *vcpu;
    302 	int error;
    303 	size_t i;
    304 
    305 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    306 	if (error)
    307 		return error;
    308 
    309 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    310 		error = nvmm_vcpu_get(mach, i, &vcpu);
    311 		if (error)
    312 			continue;
    313 
    314 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    315 		nvmm_vcpu_free(mach, vcpu);
    316 		nvmm_vcpu_put(vcpu);
    317 	}
    318 
    319 	(*nvmm_impl->machine_destroy)(mach);
    320 
    321 	/* Free the machine vmspace. */
    322 	uvmspace_free(mach->vm);
    323 
    324 	/* Drop the kernel UOBJ refs. */
    325 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    326 		if (!mach->hmap[i].present)
    327 			continue;
    328 		uao_detach(mach->hmap[i].uobj);
    329 	}
    330 
    331 	nvmm_machine_free(mach);
    332 	nvmm_machine_put(mach);
    333 
    334 	return 0;
    335 }
    336 
    337 static int
    338 nvmm_machine_configure(struct nvmm_owner *owner,
    339     struct nvmm_ioc_machine_configure *args)
    340 {
    341 	struct nvmm_machine *mach;
    342 	size_t allocsz;
    343 	uint64_t op;
    344 	void *data;
    345 	int error;
    346 
    347 	op = NVMM_MACH_CONF_MD(args->op);
    348 	if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
    349 		return EINVAL;
    350 	}
    351 
    352 	allocsz = nvmm_impl->mach_conf_sizes[op];
    353 	data = kmem_alloc(allocsz, KM_SLEEP);
    354 
    355 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    356 	if (error) {
    357 		kmem_free(data, allocsz);
    358 		return error;
    359 	}
    360 
    361 	error = copyin(args->conf, data, allocsz);
    362 	if (error) {
    363 		goto out;
    364 	}
    365 
    366 	error = (*nvmm_impl->machine_configure)(mach, op, data);
    367 
    368 out:
    369 	nvmm_machine_put(mach);
    370 	kmem_free(data, allocsz);
    371 	return error;
    372 }
    373 
    374 static int
    375 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
    376 {
    377 	struct nvmm_machine *mach;
    378 	struct nvmm_cpu *vcpu;
    379 	int error;
    380 
    381 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    382 	if (error)
    383 		return error;
    384 
    385 	error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
    386 	if (error)
    387 		goto out;
    388 
    389 	/* Allocate the comm page. */
    390 	uao_reference(mach->commuobj);
    391 	error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
    392 	    mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
    393 	    UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
    394 	if (error) {
    395 		uao_detach(mach->commuobj);
    396 		nvmm_vcpu_free(mach, vcpu);
    397 		nvmm_vcpu_put(vcpu);
    398 		goto out;
    399 	}
    400 	error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
    401 	    (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
    402 	if (error) {
    403 		nvmm_vcpu_free(mach, vcpu);
    404 		nvmm_vcpu_put(vcpu);
    405 		goto out;
    406 	}
    407 	memset(vcpu->comm, 0, PAGE_SIZE);
    408 
    409 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
    410 	if (error) {
    411 		nvmm_vcpu_free(mach, vcpu);
    412 		nvmm_vcpu_put(vcpu);
    413 		goto out;
    414 	}
    415 
    416 	nvmm_vcpu_put(vcpu);
    417 
    418 	atomic_inc_uint(&mach->ncpus);
    419 
    420 out:
    421 	nvmm_machine_put(mach);
    422 	return error;
    423 }
    424 
    425 static int
    426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
    427 {
    428 	struct nvmm_machine *mach;
    429 	struct nvmm_cpu *vcpu;
    430 	int error;
    431 
    432 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    433 	if (error)
    434 		return error;
    435 
    436 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    437 	if (error)
    438 		goto out;
    439 
    440 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    441 	nvmm_vcpu_free(mach, vcpu);
    442 	nvmm_vcpu_put(vcpu);
    443 
    444 	atomic_dec_uint(&mach->ncpus);
    445 
    446 out:
    447 	nvmm_machine_put(mach);
    448 	return error;
    449 }
    450 
    451 static int
    452 nvmm_vcpu_configure(struct nvmm_owner *owner,
    453     struct nvmm_ioc_vcpu_configure *args)
    454 {
    455 	struct nvmm_machine *mach;
    456 	struct nvmm_cpu *vcpu;
    457 	size_t allocsz;
    458 	uint64_t op;
    459 	void *data;
    460 	int error;
    461 
    462 	op = NVMM_VCPU_CONF_MD(args->op);
    463 	if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
    464 		return EINVAL;
    465 
    466 	allocsz = nvmm_impl->vcpu_conf_sizes[op];
    467 	data = kmem_alloc(allocsz, KM_SLEEP);
    468 
    469 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    470 	if (error) {
    471 		kmem_free(data, allocsz);
    472 		return error;
    473 	}
    474 
    475 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    476 	if (error) {
    477 		nvmm_machine_put(mach);
    478 		kmem_free(data, allocsz);
    479 		return error;
    480 	}
    481 
    482 	error = copyin(args->conf, data, allocsz);
    483 	if (error) {
    484 		goto out;
    485 	}
    486 
    487 	error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
    488 
    489 out:
    490 	nvmm_vcpu_put(vcpu);
    491 	nvmm_machine_put(mach);
    492 	kmem_free(data, allocsz);
    493 	return error;
    494 }
    495 
    496 static int
    497 nvmm_vcpu_setstate(struct nvmm_owner *owner,
    498     struct nvmm_ioc_vcpu_setstate *args)
    499 {
    500 	struct nvmm_machine *mach;
    501 	struct nvmm_cpu *vcpu;
    502 	int error;
    503 
    504 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    505 	if (error)
    506 		return error;
    507 
    508 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    509 	if (error)
    510 		goto out;
    511 
    512 	(*nvmm_impl->vcpu_setstate)(vcpu);
    513 	nvmm_vcpu_put(vcpu);
    514 
    515 out:
    516 	nvmm_machine_put(mach);
    517 	return error;
    518 }
    519 
    520 static int
    521 nvmm_vcpu_getstate(struct nvmm_owner *owner,
    522     struct nvmm_ioc_vcpu_getstate *args)
    523 {
    524 	struct nvmm_machine *mach;
    525 	struct nvmm_cpu *vcpu;
    526 	int error;
    527 
    528 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    529 	if (error)
    530 		return error;
    531 
    532 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    533 	if (error)
    534 		goto out;
    535 
    536 	(*nvmm_impl->vcpu_getstate)(vcpu);
    537 	nvmm_vcpu_put(vcpu);
    538 
    539 out:
    540 	nvmm_machine_put(mach);
    541 	return error;
    542 }
    543 
    544 static int
    545 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
    546 {
    547 	struct nvmm_machine *mach;
    548 	struct nvmm_cpu *vcpu;
    549 	int error;
    550 
    551 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    552 	if (error)
    553 		return error;
    554 
    555 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    556 	if (error)
    557 		goto out;
    558 
    559 	error = (*nvmm_impl->vcpu_inject)(vcpu);
    560 	nvmm_vcpu_put(vcpu);
    561 
    562 out:
    563 	nvmm_machine_put(mach);
    564 	return error;
    565 }
    566 
    567 static int
    568 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
    569     struct nvmm_vcpu_exit *exit)
    570 {
    571 	struct vmspace *vm = mach->vm;
    572 	int ret;
    573 
    574 	while (1) {
    575 		/* Got a signal? Or pending resched? Leave. */
    576 		if (__predict_false(nvmm_return_needed())) {
    577 			exit->reason = NVMM_VCPU_EXIT_NONE;
    578 			return 0;
    579 		}
    580 
    581 		/* Run the VCPU. */
    582 		ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
    583 		if (__predict_false(ret != 0)) {
    584 			return ret;
    585 		}
    586 
    587 		/* Process nested page faults. */
    588 		if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
    589 			break;
    590 		}
    591 		if (exit->u.mem.gpa >= mach->gpa_end) {
    592 			break;
    593 		}
    594 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
    595 			break;
    596 		}
    597 	}
    598 
    599 	return 0;
    600 }
    601 
    602 static int
    603 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
    604 {
    605 	struct nvmm_machine *mach;
    606 	struct nvmm_cpu *vcpu;
    607 	int error;
    608 
    609 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    610 	if (error)
    611 		return error;
    612 
    613 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    614 	if (error)
    615 		goto out;
    616 
    617 	error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
    618 	nvmm_vcpu_put(vcpu);
    619 
    620 out:
    621 	nvmm_machine_put(mach);
    622 	return error;
    623 }
    624 
    625 /* -------------------------------------------------------------------------- */
    626 
    627 static struct uvm_object *
    628 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
    629    size_t *off)
    630 {
    631 	struct nvmm_hmapping *hmapping;
    632 	size_t i;
    633 
    634 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    635 		hmapping = &mach->hmap[i];
    636 		if (!hmapping->present) {
    637 			continue;
    638 		}
    639 		if (hva >= hmapping->hva &&
    640 		    hva + size <= hmapping->hva + hmapping->size) {
    641 			*off = hva - hmapping->hva;
    642 			return hmapping->uobj;
    643 		}
    644 	}
    645 
    646 	return NULL;
    647 }
    648 
    649 static int
    650 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    651 {
    652 	struct nvmm_hmapping *hmapping;
    653 	size_t i;
    654 
    655 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
    656 		return EINVAL;
    657 	}
    658 	if (hva == 0) {
    659 		return EINVAL;
    660 	}
    661 
    662 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    663 		hmapping = &mach->hmap[i];
    664 		if (!hmapping->present) {
    665 			continue;
    666 		}
    667 
    668 		if (hva >= hmapping->hva &&
    669 		    hva + size <= hmapping->hva + hmapping->size) {
    670 			break;
    671 		}
    672 
    673 		if (hva >= hmapping->hva &&
    674 		    hva < hmapping->hva + hmapping->size) {
    675 			return EEXIST;
    676 		}
    677 		if (hva + size > hmapping->hva &&
    678 		    hva + size <= hmapping->hva + hmapping->size) {
    679 			return EEXIST;
    680 		}
    681 		if (hva <= hmapping->hva &&
    682 		    hva + size >= hmapping->hva + hmapping->size) {
    683 			return EEXIST;
    684 		}
    685 	}
    686 
    687 	return 0;
    688 }
    689 
    690 static struct nvmm_hmapping *
    691 nvmm_hmapping_alloc(struct nvmm_machine *mach)
    692 {
    693 	struct nvmm_hmapping *hmapping;
    694 	size_t i;
    695 
    696 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    697 		hmapping = &mach->hmap[i];
    698 		if (!hmapping->present) {
    699 			hmapping->present = true;
    700 			return hmapping;
    701 		}
    702 	}
    703 
    704 	return NULL;
    705 }
    706 
    707 static int
    708 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    709 {
    710 	struct vmspace *vmspace = curproc->p_vmspace;
    711 	struct nvmm_hmapping *hmapping;
    712 	size_t i;
    713 
    714 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    715 		hmapping = &mach->hmap[i];
    716 		if (!hmapping->present || hmapping->hva != hva ||
    717 		    hmapping->size != size) {
    718 			continue;
    719 		}
    720 
    721 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
    722 		    hmapping->hva + hmapping->size);
    723 		uao_detach(hmapping->uobj);
    724 
    725 		hmapping->uobj = NULL;
    726 		hmapping->present = false;
    727 
    728 		return 0;
    729 	}
    730 
    731 	return ENOENT;
    732 }
    733 
    734 static int
    735 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
    736 {
    737 	struct vmspace *vmspace = curproc->p_vmspace;
    738 	struct nvmm_machine *mach;
    739 	struct nvmm_hmapping *hmapping;
    740 	vaddr_t uva;
    741 	int error;
    742 
    743 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    744 	if (error)
    745 		return error;
    746 
    747 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
    748 	if (error)
    749 		goto out;
    750 
    751 	hmapping = nvmm_hmapping_alloc(mach);
    752 	if (hmapping == NULL) {
    753 		error = ENOBUFS;
    754 		goto out;
    755 	}
    756 
    757 	hmapping->hva = args->hva;
    758 	hmapping->size = args->size;
    759 	hmapping->uobj = uao_create(hmapping->size, 0);
    760 	uva = hmapping->hva;
    761 
    762 	/* Take a reference for the user. */
    763 	uao_reference(hmapping->uobj);
    764 
    765 	/* Map the uobj into the user address space, as pageable. */
    766 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
    767 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
    768 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    769 	if (error) {
    770 		uao_detach(hmapping->uobj);
    771 	}
    772 
    773 out:
    774 	nvmm_machine_put(mach);
    775 	return error;
    776 }
    777 
    778 static int
    779 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
    780 {
    781 	struct nvmm_machine *mach;
    782 	int error;
    783 
    784 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    785 	if (error)
    786 		return error;
    787 
    788 	error = nvmm_hmapping_free(mach, args->hva, args->size);
    789 
    790 	nvmm_machine_put(mach);
    791 	return error;
    792 }
    793 
    794 /* -------------------------------------------------------------------------- */
    795 
    796 static int
    797 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
    798 {
    799 	struct nvmm_machine *mach;
    800 	struct uvm_object *uobj;
    801 	gpaddr_t gpa;
    802 	size_t off;
    803 	int error;
    804 
    805 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    806 	if (error)
    807 		return error;
    808 
    809 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
    810 		error = EINVAL;
    811 		goto out;
    812 	}
    813 
    814 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
    815 	    (args->hva % PAGE_SIZE) != 0) {
    816 		error = EINVAL;
    817 		goto out;
    818 	}
    819 	if (args->hva == 0) {
    820 		error = EINVAL;
    821 		goto out;
    822 	}
    823 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    824 		error = EINVAL;
    825 		goto out;
    826 	}
    827 	if (args->gpa + args->size <= args->gpa) {
    828 		error = EINVAL;
    829 		goto out;
    830 	}
    831 	if (args->gpa + args->size > mach->gpa_end) {
    832 		error = EINVAL;
    833 		goto out;
    834 	}
    835 	gpa = args->gpa;
    836 
    837 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
    838 	if (uobj == NULL) {
    839 		error = EINVAL;
    840 		goto out;
    841 	}
    842 
    843 	/* Take a reference for the machine. */
    844 	uao_reference(uobj);
    845 
    846 	/* Map the uobj into the machine address space, as pageable. */
    847 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
    848 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
    849 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    850 	if (error) {
    851 		uao_detach(uobj);
    852 		goto out;
    853 	}
    854 	if (gpa != args->gpa) {
    855 		uao_detach(uobj);
    856 		printf("[!] uvm_map problem\n");
    857 		error = EINVAL;
    858 		goto out;
    859 	}
    860 
    861 out:
    862 	nvmm_machine_put(mach);
    863 	return error;
    864 }
    865 
    866 static int
    867 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
    868 {
    869 	struct nvmm_machine *mach;
    870 	gpaddr_t gpa;
    871 	int error;
    872 
    873 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    874 	if (error)
    875 		return error;
    876 
    877 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
    878 		error = EINVAL;
    879 		goto out;
    880 	}
    881 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    882 		error = EINVAL;
    883 		goto out;
    884 	}
    885 	if (args->gpa + args->size <= args->gpa) {
    886 		error = EINVAL;
    887 		goto out;
    888 	}
    889 	if (args->gpa + args->size >= mach->gpa_end) {
    890 		error = EINVAL;
    891 		goto out;
    892 	}
    893 	gpa = args->gpa;
    894 
    895 	/* Unmap the memory from the machine. */
    896 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
    897 
    898 out:
    899 	nvmm_machine_put(mach);
    900 	return error;
    901 }
    902 
    903 /* -------------------------------------------------------------------------- */
    904 
    905 static int
    906 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
    907 {
    908 	struct nvmm_ctl_mach_info ctl;
    909 	struct nvmm_machine *mach;
    910 	struct nvmm_cpu *vcpu;
    911 	int error;
    912 	size_t i;
    913 
    914 	if (args->size != sizeof(ctl))
    915 		return EINVAL;
    916 	error = copyin(args->data, &ctl, sizeof(ctl));
    917 	if (error)
    918 		return error;
    919 
    920 	error = nvmm_machine_get(owner, ctl.machid, &mach, true);
    921 	if (error)
    922 		return error;
    923 
    924 	ctl.nvcpus = 0;
    925 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    926 		error = nvmm_vcpu_get(mach, i, &vcpu);
    927 		if (error)
    928 			continue;
    929 		ctl.nvcpus++;
    930 		nvmm_vcpu_put(vcpu);
    931 	}
    932 
    933 	ctl.nram = 0;
    934 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    935 		if (!mach->hmap[i].present)
    936 			continue;
    937 		ctl.nram += mach->hmap[i].size;
    938 	}
    939 
    940 	ctl.pid = mach->owner->pid;
    941 	ctl.time = mach->time;
    942 
    943 	nvmm_machine_put(mach);
    944 
    945 	error = copyout(&ctl, args->data, sizeof(ctl));
    946 	if (error)
    947 		return error;
    948 
    949 	return 0;
    950 }
    951 
    952 static int
    953 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
    954 {
    955 	switch (args->op) {
    956 	case NVMM_CTL_MACH_INFO:
    957 		return nvmm_ctl_mach_info(owner, args);
    958 	default:
    959 		return EINVAL;
    960 	}
    961 }
    962 
    963 /* -------------------------------------------------------------------------- */
    964 
    965 static const struct nvmm_impl *
    966 nvmm_ident(void)
    967 {
    968 	size_t i;
    969 
    970 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
    971 		if ((*nvmm_impl_list[i]->ident)())
    972 			return nvmm_impl_list[i];
    973 	}
    974 
    975 	return NULL;
    976 }
    977 
    978 static int
    979 nvmm_init(void)
    980 {
    981 	size_t i, n;
    982 
    983 	nvmm_impl = nvmm_ident();
    984 	if (nvmm_impl == NULL)
    985 		return ENOTSUP;
    986 
    987 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    988 		machines[i].machid = i;
    989 		rw_init(&machines[i].lock);
    990 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    991 			machines[i].cpus[n].present = false;
    992 			machines[i].cpus[n].cpuid = n;
    993 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
    994 			    IPL_NONE);
    995 		}
    996 	}
    997 
    998 	(*nvmm_impl->init)();
    999 
   1000 	return 0;
   1001 }
   1002 
   1003 static void
   1004 nvmm_fini(void)
   1005 {
   1006 	size_t i, n;
   1007 
   1008 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
   1009 		rw_destroy(&machines[i].lock);
   1010 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
   1011 			mutex_destroy(&machines[i].cpus[n].lock);
   1012 		}
   1013 	}
   1014 
   1015 	(*nvmm_impl->fini)();
   1016 	nvmm_impl = NULL;
   1017 }
   1018 
   1019 /* -------------------------------------------------------------------------- */
   1020 
   1021 static dev_type_open(nvmm_open);
   1022 
   1023 const struct cdevsw nvmm_cdevsw = {
   1024 	.d_open = nvmm_open,
   1025 	.d_close = noclose,
   1026 	.d_read = noread,
   1027 	.d_write = nowrite,
   1028 	.d_ioctl = noioctl,
   1029 	.d_stop = nostop,
   1030 	.d_tty = notty,
   1031 	.d_poll = nopoll,
   1032 	.d_mmap = nommap,
   1033 	.d_kqfilter = nokqfilter,
   1034 	.d_discard = nodiscard,
   1035 	.d_flag = D_OTHER | D_MPSAFE
   1036 };
   1037 
   1038 static int nvmm_ioctl(file_t *, u_long, void *);
   1039 static int nvmm_close(file_t *);
   1040 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
   1041     struct uvm_object **, int *);
   1042 
   1043 static const struct fileops nvmm_fileops = {
   1044 	.fo_read = fbadop_read,
   1045 	.fo_write = fbadop_write,
   1046 	.fo_ioctl = nvmm_ioctl,
   1047 	.fo_fcntl = fnullop_fcntl,
   1048 	.fo_poll = fnullop_poll,
   1049 	.fo_stat = fbadop_stat,
   1050 	.fo_close = nvmm_close,
   1051 	.fo_kqfilter = fnullop_kqfilter,
   1052 	.fo_restart = fnullop_restart,
   1053 	.fo_mmap = nvmm_mmap,
   1054 };
   1055 
   1056 static int
   1057 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
   1058 {
   1059 	struct nvmm_owner *owner;
   1060 	struct file *fp;
   1061 	int error, fd;
   1062 
   1063 	if (__predict_false(nvmm_impl == NULL))
   1064 		return ENXIO;
   1065 	if (minor(dev) != 0)
   1066 		return EXDEV;
   1067 	if (!(flags & O_CLOEXEC))
   1068 		return EINVAL;
   1069 	error = fd_allocfile(&fp, &fd);
   1070 	if (error)
   1071 		return error;
   1072 
   1073 	if (OFLAGS(flags) & O_WRONLY) {
   1074 		owner = &root_owner;
   1075 	} else {
   1076 		owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
   1077 		owner->pid = l->l_proc->p_pid;
   1078 	}
   1079 
   1080 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
   1081 }
   1082 
   1083 static int
   1084 nvmm_close(file_t *fp)
   1085 {
   1086 	struct nvmm_owner *owner = fp->f_data;
   1087 
   1088 	KASSERT(owner != NULL);
   1089 	nvmm_kill_machines(owner);
   1090 	if (owner != &root_owner) {
   1091 		kmem_free(owner, sizeof(*owner));
   1092 	}
   1093 	fp->f_data = NULL;
   1094 
   1095    	return 0;
   1096 }
   1097 
   1098 static int
   1099 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
   1100     int *advicep, struct uvm_object **uobjp, int *maxprotp)
   1101 {
   1102 	struct nvmm_owner *owner = fp->f_data;
   1103 	struct nvmm_machine *mach;
   1104 	nvmm_machid_t machid;
   1105 	nvmm_cpuid_t cpuid;
   1106 	int error;
   1107 
   1108 	if (prot & PROT_EXEC)
   1109 		return EACCES;
   1110 	if (size != PAGE_SIZE)
   1111 		return EINVAL;
   1112 
   1113 	cpuid = NVMM_COMM_CPUID(*offp);
   1114 	if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
   1115 		return EINVAL;
   1116 
   1117 	machid = NVMM_COMM_MACHID(*offp);
   1118 	error = nvmm_machine_get(owner, machid, &mach, false);
   1119 	if (error)
   1120 		return error;
   1121 
   1122 	uao_reference(mach->commuobj);
   1123 	*uobjp = mach->commuobj;
   1124 	*offp = cpuid * PAGE_SIZE;
   1125 	*maxprotp = prot;
   1126 	*advicep = UVM_ADV_RANDOM;
   1127 
   1128 	nvmm_machine_put(mach);
   1129 	return 0;
   1130 }
   1131 
   1132 static int
   1133 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
   1134 {
   1135 	struct nvmm_owner *owner = fp->f_data;
   1136 
   1137 	KASSERT(owner != NULL);
   1138 
   1139 	switch (cmd) {
   1140 	case NVMM_IOC_CAPABILITY:
   1141 		return nvmm_capability(owner, data);
   1142 	case NVMM_IOC_MACHINE_CREATE:
   1143 		return nvmm_machine_create(owner, data);
   1144 	case NVMM_IOC_MACHINE_DESTROY:
   1145 		return nvmm_machine_destroy(owner, data);
   1146 	case NVMM_IOC_MACHINE_CONFIGURE:
   1147 		return nvmm_machine_configure(owner, data);
   1148 	case NVMM_IOC_VCPU_CREATE:
   1149 		return nvmm_vcpu_create(owner, data);
   1150 	case NVMM_IOC_VCPU_DESTROY:
   1151 		return nvmm_vcpu_destroy(owner, data);
   1152 	case NVMM_IOC_VCPU_CONFIGURE:
   1153 		return nvmm_vcpu_configure(owner, data);
   1154 	case NVMM_IOC_VCPU_SETSTATE:
   1155 		return nvmm_vcpu_setstate(owner, data);
   1156 	case NVMM_IOC_VCPU_GETSTATE:
   1157 		return nvmm_vcpu_getstate(owner, data);
   1158 	case NVMM_IOC_VCPU_INJECT:
   1159 		return nvmm_vcpu_inject(owner, data);
   1160 	case NVMM_IOC_VCPU_RUN:
   1161 		return nvmm_vcpu_run(owner, data);
   1162 	case NVMM_IOC_GPA_MAP:
   1163 		return nvmm_gpa_map(owner, data);
   1164 	case NVMM_IOC_GPA_UNMAP:
   1165 		return nvmm_gpa_unmap(owner, data);
   1166 	case NVMM_IOC_HVA_MAP:
   1167 		return nvmm_hva_map(owner, data);
   1168 	case NVMM_IOC_HVA_UNMAP:
   1169 		return nvmm_hva_unmap(owner, data);
   1170 	case NVMM_IOC_CTL:
   1171 		return nvmm_ctl(owner, data);
   1172 	default:
   1173 		return EINVAL;
   1174 	}
   1175 }
   1176 
   1177 /* -------------------------------------------------------------------------- */
   1178 
   1179 static int nvmm_match(device_t, cfdata_t, void *);
   1180 static void nvmm_attach(device_t, device_t, void *);
   1181 static int nvmm_detach(device_t, int);
   1182 
   1183 extern struct cfdriver nvmm_cd;
   1184 
   1185 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
   1186 
   1187 static struct cfdata nvmm_cfdata[] = {
   1188 	{
   1189 		.cf_name = "nvmm",
   1190 		.cf_atname = "nvmm",
   1191 		.cf_unit = 0,
   1192 		.cf_fstate = FSTATE_STAR,
   1193 		.cf_loc = NULL,
   1194 		.cf_flags = 0,
   1195 		.cf_pspec = NULL,
   1196 	},
   1197 	{ NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
   1198 };
   1199 
   1200 static int
   1201 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
   1202 {
   1203 	return 1;
   1204 }
   1205 
   1206 static void
   1207 nvmm_attach(device_t parent, device_t self, void *aux)
   1208 {
   1209 	int error;
   1210 
   1211 	error = nvmm_init();
   1212 	if (error)
   1213 		panic("%s: impossible", __func__);
   1214 	aprint_normal_dev(self, "attached, using backend %s\n",
   1215 	    nvmm_impl->name);
   1216 }
   1217 
   1218 static int
   1219 nvmm_detach(device_t self, int flags)
   1220 {
   1221 	if (nmachines > 0)
   1222 		return EBUSY;
   1223 	nvmm_fini();
   1224 	return 0;
   1225 }
   1226 
   1227 void
   1228 nvmmattach(int nunits)
   1229 {
   1230 	/* nothing */
   1231 }
   1232 
   1233 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
   1234 
   1235 #if defined(_MODULE)
   1236 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
   1237 #endif
   1238 
   1239 static int
   1240 nvmm_modcmd(modcmd_t cmd, void *arg)
   1241 {
   1242 #if defined(_MODULE)
   1243 	devmajor_t bmajor = NODEVMAJOR;
   1244 	devmajor_t cmajor = 345;
   1245 #endif
   1246 	int error;
   1247 
   1248 	switch (cmd) {
   1249 	case MODULE_CMD_INIT:
   1250 		if (nvmm_ident() == NULL) {
   1251 			aprint_error("%s: cpu not supported\n",
   1252 			    nvmm_cd.cd_name);
   1253 			return ENOTSUP;
   1254 		}
   1255 #if defined(_MODULE)
   1256 		error = config_cfdriver_attach(&nvmm_cd);
   1257 		if (error)
   1258 			return error;
   1259 #endif
   1260 		error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
   1261 		if (error) {
   1262 			config_cfdriver_detach(&nvmm_cd);
   1263 			aprint_error("%s: config_cfattach_attach failed\n",
   1264 			    nvmm_cd.cd_name);
   1265 			return error;
   1266 		}
   1267 
   1268 		error = config_cfdata_attach(nvmm_cfdata, 1);
   1269 		if (error) {
   1270 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1271 			config_cfdriver_detach(&nvmm_cd);
   1272 			aprint_error("%s: unable to register cfdata\n",
   1273 			    nvmm_cd.cd_name);
   1274 			return error;
   1275 		}
   1276 
   1277 		if (config_attach_pseudo(nvmm_cfdata) == NULL) {
   1278 			aprint_error("%s: config_attach_pseudo failed\n",
   1279 			    nvmm_cd.cd_name);
   1280 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1281 			config_cfdriver_detach(&nvmm_cd);
   1282 			return ENXIO;
   1283 		}
   1284 
   1285 #if defined(_MODULE)
   1286 		/* mknod /dev/nvmm c 345 0 */
   1287 		error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
   1288 			&nvmm_cdevsw, &cmajor);
   1289 		if (error) {
   1290 			aprint_error("%s: unable to register devsw\n",
   1291 			    nvmm_cd.cd_name);
   1292 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1293 			config_cfdriver_detach(&nvmm_cd);
   1294 			return error;
   1295 		}
   1296 #endif
   1297 		return 0;
   1298 	case MODULE_CMD_FINI:
   1299 		error = config_cfdata_detach(nvmm_cfdata);
   1300 		if (error)
   1301 			return error;
   1302 		error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1303 		if (error)
   1304 			return error;
   1305 #if defined(_MODULE)
   1306 		config_cfdriver_detach(&nvmm_cd);
   1307 		devsw_detach(NULL, &nvmm_cdevsw);
   1308 #endif
   1309 		return 0;
   1310 	case MODULE_CMD_AUTOUNLOAD:
   1311 		return EBUSY;
   1312 	default:
   1313 		return ENOTTY;
   1314 	}
   1315 }
   1316