Home | History | Annotate | Line # | Download | only in nvmm
nvmm.c revision 1.14
      1 /*	$NetBSD: nvmm.c,v 1.14 2019/04/08 18:21:42 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.14 2019/04/08 18:21:42 maxv Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/systm.h>
     37 #include <sys/kernel.h>
     38 
     39 #include <sys/cpu.h>
     40 #include <sys/conf.h>
     41 #include <sys/kmem.h>
     42 #include <sys/module.h>
     43 #include <sys/proc.h>
     44 #include <sys/mman.h>
     45 #include <sys/file.h>
     46 #include <sys/filedesc.h>
     47 
     48 #include <uvm/uvm.h>
     49 #include <uvm/uvm_page.h>
     50 
     51 #include "ioconf.h"
     52 
     53 #include <dev/nvmm/nvmm.h>
     54 #include <dev/nvmm/nvmm_internal.h>
     55 #include <dev/nvmm/nvmm_ioctl.h>
     56 
     57 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
     58 static volatile unsigned int nmachines __cacheline_aligned;
     59 
     60 static const struct nvmm_impl *nvmm_impl_list[] = {
     61 	&nvmm_x86_svm,	/* x86 AMD SVM */
     62 	&nvmm_x86_vmx	/* x86 Intel VMX */
     63 };
     64 
     65 static const struct nvmm_impl *nvmm_impl = NULL;
     66 
     67 /* -------------------------------------------------------------------------- */
     68 
     69 static int
     70 nvmm_machine_alloc(struct nvmm_machine **ret)
     71 {
     72 	struct nvmm_machine *mach;
     73 	size_t i;
     74 
     75 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
     76 		mach = &machines[i];
     77 
     78 		rw_enter(&mach->lock, RW_WRITER);
     79 		if (mach->present) {
     80 			rw_exit(&mach->lock);
     81 			continue;
     82 		}
     83 
     84 		mach->present = true;
     85 		*ret = mach;
     86 		atomic_inc_uint(&nmachines);
     87 		return 0;
     88 	}
     89 
     90 	return ENOBUFS;
     91 }
     92 
     93 static void
     94 nvmm_machine_free(struct nvmm_machine *mach)
     95 {
     96 	KASSERT(rw_write_held(&mach->lock));
     97 	KASSERT(mach->present);
     98 	mach->present = false;
     99 	atomic_dec_uint(&nmachines);
    100 }
    101 
    102 static int
    103 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
    104     struct nvmm_machine **ret, bool writer)
    105 {
    106 	struct nvmm_machine *mach;
    107 	krw_t op = writer ? RW_WRITER : RW_READER;
    108 
    109 	if (machid >= NVMM_MAX_MACHINES) {
    110 		return EINVAL;
    111 	}
    112 	mach = &machines[machid];
    113 
    114 	rw_enter(&mach->lock, op);
    115 	if (!mach->present) {
    116 		rw_exit(&mach->lock);
    117 		return ENOENT;
    118 	}
    119 	if (mach->owner != owner) {
    120 		rw_exit(&mach->lock);
    121 		return EPERM;
    122 	}
    123 	*ret = mach;
    124 
    125 	return 0;
    126 }
    127 
    128 static void
    129 nvmm_machine_put(struct nvmm_machine *mach)
    130 {
    131 	rw_exit(&mach->lock);
    132 }
    133 
    134 /* -------------------------------------------------------------------------- */
    135 
    136 static int
    137 nvmm_vcpu_alloc(struct nvmm_machine *mach, struct nvmm_cpu **ret)
    138 {
    139 	struct nvmm_cpu *vcpu;
    140 	size_t i;
    141 
    142 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    143 		vcpu = &mach->cpus[i];
    144 
    145 		mutex_enter(&vcpu->lock);
    146 		if (vcpu->present) {
    147 			mutex_exit(&vcpu->lock);
    148 			continue;
    149 		}
    150 
    151 		vcpu->present = true;
    152 		vcpu->cpuid = i;
    153 		vcpu->state = kmem_zalloc(nvmm_impl->state_size, KM_SLEEP);
    154 		*ret = vcpu;
    155 		return 0;
    156 	}
    157 
    158 	return ENOBUFS;
    159 }
    160 
    161 static void
    162 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
    163 {
    164 	KASSERT(mutex_owned(&vcpu->lock));
    165 	vcpu->present = false;
    166 	kmem_free(vcpu->state, nvmm_impl->state_size);
    167 	vcpu->hcpu_last = -1;
    168 }
    169 
    170 int
    171 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    172     struct nvmm_cpu **ret)
    173 {
    174 	struct nvmm_cpu *vcpu;
    175 
    176 	if (cpuid >= NVMM_MAX_VCPUS) {
    177 		return EINVAL;
    178 	}
    179 	vcpu = &mach->cpus[cpuid];
    180 
    181 	mutex_enter(&vcpu->lock);
    182 	if (!vcpu->present) {
    183 		mutex_exit(&vcpu->lock);
    184 		return ENOENT;
    185 	}
    186 	*ret = vcpu;
    187 
    188 	return 0;
    189 }
    190 
    191 void
    192 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
    193 {
    194 	mutex_exit(&vcpu->lock);
    195 }
    196 
    197 /* -------------------------------------------------------------------------- */
    198 
    199 static void
    200 nvmm_kill_machines(struct nvmm_owner *owner)
    201 {
    202 	struct nvmm_machine *mach;
    203 	struct nvmm_cpu *vcpu;
    204 	size_t i, j;
    205 	int error;
    206 
    207 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    208 		mach = &machines[i];
    209 
    210 		rw_enter(&mach->lock, RW_WRITER);
    211 		if (!mach->present || mach->owner != owner) {
    212 			rw_exit(&mach->lock);
    213 			continue;
    214 		}
    215 
    216 		/* Kill it. */
    217 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
    218 			error = nvmm_vcpu_get(mach, j, &vcpu);
    219 			if (error)
    220 				continue;
    221 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    222 			nvmm_vcpu_free(mach, vcpu);
    223 			nvmm_vcpu_put(vcpu);
    224 		}
    225 		uvmspace_free(mach->vm);
    226 
    227 		/* Drop the kernel UOBJ refs. */
    228 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
    229 			if (!mach->hmap[j].present)
    230 				continue;
    231 			uao_detach(mach->hmap[j].uobj);
    232 		}
    233 
    234 		nvmm_machine_free(mach);
    235 
    236 		rw_exit(&mach->lock);
    237 	}
    238 }
    239 
    240 /* -------------------------------------------------------------------------- */
    241 
    242 static int
    243 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
    244 {
    245 	args->cap.version = NVMM_CAPABILITY_VERSION;
    246 	args->cap.state_size = nvmm_impl->state_size;
    247 	args->cap.max_machines = NVMM_MAX_MACHINES;
    248 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
    249 	args->cap.max_ram = NVMM_MAX_RAM;
    250 
    251 	(*nvmm_impl->capability)(&args->cap);
    252 
    253 	return 0;
    254 }
    255 
    256 static int
    257 nvmm_machine_create(struct nvmm_owner *owner,
    258     struct nvmm_ioc_machine_create *args)
    259 {
    260 	struct nvmm_machine *mach;
    261 	int error;
    262 
    263 	error = nvmm_machine_alloc(&mach);
    264 	if (error)
    265 		return error;
    266 
    267 	/* Curproc owns the machine. */
    268 	mach->owner = owner;
    269 
    270 	/* Zero out the host mappings. */
    271 	memset(&mach->hmap, 0, sizeof(mach->hmap));
    272 
    273 	/* Create the machine vmspace. */
    274 	mach->gpa_begin = 0;
    275 	mach->gpa_end = NVMM_MAX_RAM;
    276 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
    277 
    278 	(*nvmm_impl->machine_create)(mach);
    279 
    280 	args->machid = mach->machid;
    281 	nvmm_machine_put(mach);
    282 
    283 	return 0;
    284 }
    285 
    286 static int
    287 nvmm_machine_destroy(struct nvmm_owner *owner,
    288     struct nvmm_ioc_machine_destroy *args)
    289 {
    290 	struct nvmm_machine *mach;
    291 	struct nvmm_cpu *vcpu;
    292 	int error;
    293 	size_t i;
    294 
    295 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    296 	if (error)
    297 		return error;
    298 
    299 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    300 		error = nvmm_vcpu_get(mach, i, &vcpu);
    301 		if (error)
    302 			continue;
    303 
    304 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    305 		nvmm_vcpu_free(mach, vcpu);
    306 		nvmm_vcpu_put(vcpu);
    307 	}
    308 
    309 	(*nvmm_impl->machine_destroy)(mach);
    310 
    311 	/* Free the machine vmspace. */
    312 	uvmspace_free(mach->vm);
    313 
    314 	/* Drop the kernel UOBJ refs. */
    315 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    316 		if (!mach->hmap[i].present)
    317 			continue;
    318 		uao_detach(mach->hmap[i].uobj);
    319 	}
    320 
    321 	nvmm_machine_free(mach);
    322 	nvmm_machine_put(mach);
    323 
    324 	return 0;
    325 }
    326 
    327 static int
    328 nvmm_machine_configure(struct nvmm_owner *owner,
    329     struct nvmm_ioc_machine_configure *args)
    330 {
    331 	struct nvmm_machine *mach;
    332 	size_t allocsz;
    333 	void *data;
    334 	int error;
    335 
    336 	if (__predict_false(args->op >= nvmm_impl->conf_max)) {
    337 		return EINVAL;
    338 	}
    339 
    340 	allocsz = nvmm_impl->conf_sizes[args->op];
    341 	data = kmem_alloc(allocsz, KM_SLEEP);
    342 
    343 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    344 	if (error) {
    345 		kmem_free(data, allocsz);
    346 		return error;
    347 	}
    348 
    349 	error = copyin(args->conf, data, allocsz);
    350 	if (error) {
    351 		goto out;
    352 	}
    353 
    354 	error = (*nvmm_impl->machine_configure)(mach, args->op, data);
    355 
    356 out:
    357 	nvmm_machine_put(mach);
    358 	kmem_free(data, allocsz);
    359 	return error;
    360 }
    361 
    362 static int
    363 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
    364 {
    365 	struct nvmm_machine *mach;
    366 	struct nvmm_cpu *vcpu;
    367 	int error;
    368 
    369 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    370 	if (error)
    371 		return error;
    372 
    373 	error = nvmm_vcpu_alloc(mach, &vcpu);
    374 	if (error)
    375 		goto out;
    376 
    377 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
    378 	if (error) {
    379 		nvmm_vcpu_free(mach, vcpu);
    380 		nvmm_vcpu_put(vcpu);
    381 		goto out;
    382 	}
    383 
    384 	nvmm_vcpu_put(vcpu);
    385 
    386 out:
    387 	nvmm_machine_put(mach);
    388 	return error;
    389 }
    390 
    391 static int
    392 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
    393 {
    394 	struct nvmm_machine *mach;
    395 	struct nvmm_cpu *vcpu;
    396 	int error;
    397 
    398 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    399 	if (error)
    400 		return error;
    401 
    402 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    403 	if (error)
    404 		goto out;
    405 
    406 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    407 	nvmm_vcpu_free(mach, vcpu);
    408 	nvmm_vcpu_put(vcpu);
    409 
    410 out:
    411 	nvmm_machine_put(mach);
    412 	return error;
    413 }
    414 
    415 static int
    416 nvmm_vcpu_setstate(struct nvmm_owner *owner,
    417     struct nvmm_ioc_vcpu_setstate *args)
    418 {
    419 	struct nvmm_machine *mach;
    420 	struct nvmm_cpu *vcpu;
    421 	int error;
    422 
    423 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    424 	if (error)
    425 		return error;
    426 
    427 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    428 	if (error)
    429 		goto out;
    430 
    431 	error = copyin(args->state, vcpu->state, nvmm_impl->state_size);
    432 	if (error) {
    433 		nvmm_vcpu_put(vcpu);
    434 		goto out;
    435 	}
    436 
    437 	(*nvmm_impl->vcpu_setstate)(vcpu, vcpu->state, args->flags);
    438 	nvmm_vcpu_put(vcpu);
    439 
    440 out:
    441 	nvmm_machine_put(mach);
    442 	return error;
    443 }
    444 
    445 static int
    446 nvmm_vcpu_getstate(struct nvmm_owner *owner,
    447     struct nvmm_ioc_vcpu_getstate *args)
    448 {
    449 	struct nvmm_machine *mach;
    450 	struct nvmm_cpu *vcpu;
    451 	int error;
    452 
    453 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    454 	if (error)
    455 		return error;
    456 
    457 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    458 	if (error)
    459 		goto out;
    460 
    461 	(*nvmm_impl->vcpu_getstate)(vcpu, vcpu->state, args->flags);
    462 	nvmm_vcpu_put(vcpu);
    463 	error = copyout(vcpu->state, args->state, nvmm_impl->state_size);
    464 
    465 out:
    466 	nvmm_machine_put(mach);
    467 	return error;
    468 }
    469 
    470 static int
    471 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
    472 {
    473 	struct nvmm_machine *mach;
    474 	struct nvmm_cpu *vcpu;
    475 	int error;
    476 
    477 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    478 	if (error)
    479 		return error;
    480 
    481 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    482 	if (error)
    483 		goto out;
    484 
    485 	error = (*nvmm_impl->vcpu_inject)(mach, vcpu, &args->event);
    486 	nvmm_vcpu_put(vcpu);
    487 
    488 out:
    489 	nvmm_machine_put(mach);
    490 	return error;
    491 }
    492 
    493 static void
    494 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
    495     struct nvmm_exit *exit)
    496 {
    497 	struct vmspace *vm = mach->vm;
    498 
    499 	while (1) {
    500 		(*nvmm_impl->vcpu_run)(mach, vcpu, exit);
    501 
    502 		if (__predict_true(exit->reason != NVMM_EXIT_MEMORY)) {
    503 			break;
    504 		}
    505 		if (exit->u.mem.gpa >= mach->gpa_end) {
    506 			break;
    507 		}
    508 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
    509 			break;
    510 		}
    511 	}
    512 }
    513 
    514 static int
    515 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
    516 {
    517 	struct nvmm_machine *mach;
    518 	struct nvmm_cpu *vcpu;
    519 	int error;
    520 
    521 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    522 	if (error)
    523 		return error;
    524 
    525 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    526 	if (error)
    527 		goto out;
    528 
    529 	nvmm_do_vcpu_run(mach, vcpu, &args->exit);
    530 	nvmm_vcpu_put(vcpu);
    531 
    532 out:
    533 	nvmm_machine_put(mach);
    534 	return error;
    535 }
    536 
    537 /* -------------------------------------------------------------------------- */
    538 
    539 static struct uvm_object *
    540 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
    541    size_t *off)
    542 {
    543 	struct nvmm_hmapping *hmapping;
    544 	size_t i;
    545 
    546 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    547 		hmapping = &mach->hmap[i];
    548 		if (!hmapping->present) {
    549 			continue;
    550 		}
    551 		if (hva >= hmapping->hva &&
    552 		    hva + size <= hmapping->hva + hmapping->size) {
    553 			*off = hva - hmapping->hva;
    554 			return hmapping->uobj;
    555 		}
    556 	}
    557 
    558 	return NULL;
    559 }
    560 
    561 static int
    562 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    563 {
    564 	struct nvmm_hmapping *hmapping;
    565 	size_t i;
    566 
    567 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
    568 		return EINVAL;
    569 	}
    570 	if (hva == 0) {
    571 		return EINVAL;
    572 	}
    573 
    574 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    575 		hmapping = &mach->hmap[i];
    576 		if (!hmapping->present) {
    577 			continue;
    578 		}
    579 
    580 		if (hva >= hmapping->hva &&
    581 		    hva + size <= hmapping->hva + hmapping->size) {
    582 			break;
    583 		}
    584 
    585 		if (hva >= hmapping->hva &&
    586 		    hva < hmapping->hva + hmapping->size) {
    587 			return EEXIST;
    588 		}
    589 		if (hva + size > hmapping->hva &&
    590 		    hva + size <= hmapping->hva + hmapping->size) {
    591 			return EEXIST;
    592 		}
    593 		if (hva <= hmapping->hva &&
    594 		    hva + size >= hmapping->hva + hmapping->size) {
    595 			return EEXIST;
    596 		}
    597 	}
    598 
    599 	return 0;
    600 }
    601 
    602 static struct nvmm_hmapping *
    603 nvmm_hmapping_alloc(struct nvmm_machine *mach)
    604 {
    605 	struct nvmm_hmapping *hmapping;
    606 	size_t i;
    607 
    608 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    609 		hmapping = &mach->hmap[i];
    610 		if (!hmapping->present) {
    611 			hmapping->present = true;
    612 			return hmapping;
    613 		}
    614 	}
    615 
    616 	return NULL;
    617 }
    618 
    619 static int
    620 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    621 {
    622 	struct vmspace *vmspace = curproc->p_vmspace;
    623 	struct nvmm_hmapping *hmapping;
    624 	size_t i;
    625 
    626 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    627 		hmapping = &mach->hmap[i];
    628 		if (!hmapping->present || hmapping->hva != hva ||
    629 		    hmapping->size != size) {
    630 			continue;
    631 		}
    632 
    633 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
    634 		    hmapping->hva + hmapping->size);
    635 		uao_detach(hmapping->uobj);
    636 
    637 		hmapping->uobj = NULL;
    638 		hmapping->present = false;
    639 
    640 		return 0;
    641 	}
    642 
    643 	return ENOENT;
    644 }
    645 
    646 static int
    647 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
    648 {
    649 	struct vmspace *vmspace = curproc->p_vmspace;
    650 	struct nvmm_machine *mach;
    651 	struct nvmm_hmapping *hmapping;
    652 	vaddr_t uva;
    653 	int error;
    654 
    655 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    656 	if (error)
    657 		return error;
    658 
    659 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
    660 	if (error)
    661 		goto out;
    662 
    663 	hmapping = nvmm_hmapping_alloc(mach);
    664 	if (hmapping == NULL) {
    665 		error = ENOBUFS;
    666 		goto out;
    667 	}
    668 
    669 	hmapping->hva = args->hva;
    670 	hmapping->size = args->size;
    671 	hmapping->uobj = uao_create(hmapping->size, 0);
    672 	uva = hmapping->hva;
    673 
    674 	/* Take a reference for the user. */
    675 	uao_reference(hmapping->uobj);
    676 
    677 	/* Map the uobj into the user address space, as pageable. */
    678 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
    679 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
    680 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    681 	if (error) {
    682 		uao_detach(hmapping->uobj);
    683 	}
    684 
    685 out:
    686 	nvmm_machine_put(mach);
    687 	return error;
    688 }
    689 
    690 static int
    691 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
    692 {
    693 	struct nvmm_machine *mach;
    694 	int error;
    695 
    696 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    697 	if (error)
    698 		return error;
    699 
    700 	error = nvmm_hmapping_free(mach, args->hva, args->size);
    701 
    702 	nvmm_machine_put(mach);
    703 	return error;
    704 }
    705 
    706 /* -------------------------------------------------------------------------- */
    707 
    708 static int
    709 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
    710 {
    711 	struct nvmm_machine *mach;
    712 	struct uvm_object *uobj;
    713 	gpaddr_t gpa;
    714 	size_t off;
    715 	int error;
    716 
    717 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    718 	if (error)
    719 		return error;
    720 
    721 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
    722 		error = EINVAL;
    723 		goto out;
    724 	}
    725 
    726 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
    727 	    (args->hva % PAGE_SIZE) != 0) {
    728 		error = EINVAL;
    729 		goto out;
    730 	}
    731 	if (args->hva == 0) {
    732 		error = EINVAL;
    733 		goto out;
    734 	}
    735 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    736 		error = EINVAL;
    737 		goto out;
    738 	}
    739 	if (args->gpa + args->size <= args->gpa) {
    740 		error = EINVAL;
    741 		goto out;
    742 	}
    743 	if (args->gpa + args->size > mach->gpa_end) {
    744 		error = EINVAL;
    745 		goto out;
    746 	}
    747 	gpa = args->gpa;
    748 
    749 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
    750 	if (uobj == NULL) {
    751 		error = EINVAL;
    752 		goto out;
    753 	}
    754 
    755 	/* Take a reference for the machine. */
    756 	uao_reference(uobj);
    757 
    758 	/* Map the uobj into the machine address space, as pageable. */
    759 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
    760 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
    761 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    762 	if (error) {
    763 		uao_detach(uobj);
    764 		goto out;
    765 	}
    766 	if (gpa != args->gpa) {
    767 		uao_detach(uobj);
    768 		printf("[!] uvm_map problem\n");
    769 		error = EINVAL;
    770 		goto out;
    771 	}
    772 
    773 out:
    774 	nvmm_machine_put(mach);
    775 	return error;
    776 }
    777 
    778 static int
    779 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
    780 {
    781 	struct nvmm_machine *mach;
    782 	gpaddr_t gpa;
    783 	int error;
    784 
    785 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    786 	if (error)
    787 		return error;
    788 
    789 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
    790 		error = EINVAL;
    791 		goto out;
    792 	}
    793 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    794 		error = EINVAL;
    795 		goto out;
    796 	}
    797 	if (args->gpa + args->size <= args->gpa) {
    798 		error = EINVAL;
    799 		goto out;
    800 	}
    801 	if (args->gpa + args->size >= mach->gpa_end) {
    802 		error = EINVAL;
    803 		goto out;
    804 	}
    805 	gpa = args->gpa;
    806 
    807 	/* Unmap the memory from the machine. */
    808 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
    809 
    810 out:
    811 	nvmm_machine_put(mach);
    812 	return error;
    813 }
    814 
    815 /* -------------------------------------------------------------------------- */
    816 
    817 static int
    818 nvmm_init(void)
    819 {
    820 	size_t i, n;
    821 
    822 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
    823 		if (!(*nvmm_impl_list[i]->ident)()) {
    824 			continue;
    825 		}
    826 		nvmm_impl = nvmm_impl_list[i];
    827 		break;
    828 	}
    829 	if (nvmm_impl == NULL) {
    830 		printf("[!] No implementation found\n");
    831 		return ENOTSUP;
    832 	}
    833 
    834 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    835 		machines[i].machid = i;
    836 		rw_init(&machines[i].lock);
    837 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    838 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
    839 			    IPL_NONE);
    840 			machines[i].cpus[n].hcpu_last = -1;
    841 		}
    842 	}
    843 
    844 	(*nvmm_impl->init)();
    845 
    846 	return 0;
    847 }
    848 
    849 static void
    850 nvmm_fini(void)
    851 {
    852 	size_t i, n;
    853 
    854 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    855 		rw_destroy(&machines[i].lock);
    856 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    857 			mutex_destroy(&machines[i].cpus[n].lock);
    858 		}
    859 	}
    860 
    861 	(*nvmm_impl->fini)();
    862 }
    863 
    864 /* -------------------------------------------------------------------------- */
    865 
    866 static dev_type_open(nvmm_open);
    867 
    868 const struct cdevsw nvmm_cdevsw = {
    869 	.d_open = nvmm_open,
    870 	.d_close = noclose,
    871 	.d_read = noread,
    872 	.d_write = nowrite,
    873 	.d_ioctl = noioctl,
    874 	.d_stop = nostop,
    875 	.d_tty = notty,
    876 	.d_poll = nopoll,
    877 	.d_mmap = nommap,
    878 	.d_kqfilter = nokqfilter,
    879 	.d_discard = nodiscard,
    880 	.d_flag = D_OTHER | D_MPSAFE
    881 };
    882 
    883 static int nvmm_ioctl(file_t *, u_long, void *);
    884 static int nvmm_close(file_t *);
    885 
    886 const struct fileops nvmm_fileops = {
    887 	.fo_read = fbadop_read,
    888 	.fo_write = fbadop_write,
    889 	.fo_ioctl = nvmm_ioctl,
    890 	.fo_fcntl = fnullop_fcntl,
    891 	.fo_poll = fnullop_poll,
    892 	.fo_stat = fbadop_stat,
    893 	.fo_close = nvmm_close,
    894 	.fo_kqfilter = fnullop_kqfilter,
    895 	.fo_restart = fnullop_restart,
    896 	.fo_mmap = NULL,
    897 };
    898 
    899 static int
    900 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
    901 {
    902 	struct nvmm_owner *owner;
    903 	struct file *fp;
    904 	int error, fd;
    905 
    906 	if (minor(dev) != 0)
    907 		return EXDEV;
    908 	error = fd_allocfile(&fp, &fd);
    909 	if (error)
    910 		return error;
    911 
    912 	owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
    913 	owner->pid = l->l_proc->p_pid;
    914 
    915 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
    916 }
    917 
    918 static int
    919 nvmm_close(file_t *fp)
    920 {
    921 	struct nvmm_owner *owner = fp->f_data;
    922 
    923 	KASSERT(owner != NULL);
    924 	nvmm_kill_machines(owner);
    925 	kmem_free(owner, sizeof(*owner));
    926 	fp->f_data = NULL;
    927 
    928    	return 0;
    929 }
    930 
    931 static int
    932 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
    933 {
    934 	struct nvmm_owner *owner = fp->f_data;
    935 
    936 	KASSERT(owner != NULL);
    937 
    938 	switch (cmd) {
    939 	case NVMM_IOC_CAPABILITY:
    940 		return nvmm_capability(owner, data);
    941 	case NVMM_IOC_MACHINE_CREATE:
    942 		return nvmm_machine_create(owner, data);
    943 	case NVMM_IOC_MACHINE_DESTROY:
    944 		return nvmm_machine_destroy(owner, data);
    945 	case NVMM_IOC_MACHINE_CONFIGURE:
    946 		return nvmm_machine_configure(owner, data);
    947 	case NVMM_IOC_VCPU_CREATE:
    948 		return nvmm_vcpu_create(owner, data);
    949 	case NVMM_IOC_VCPU_DESTROY:
    950 		return nvmm_vcpu_destroy(owner, data);
    951 	case NVMM_IOC_VCPU_SETSTATE:
    952 		return nvmm_vcpu_setstate(owner, data);
    953 	case NVMM_IOC_VCPU_GETSTATE:
    954 		return nvmm_vcpu_getstate(owner, data);
    955 	case NVMM_IOC_VCPU_INJECT:
    956 		return nvmm_vcpu_inject(owner, data);
    957 	case NVMM_IOC_VCPU_RUN:
    958 		return nvmm_vcpu_run(owner, data);
    959 	case NVMM_IOC_GPA_MAP:
    960 		return nvmm_gpa_map(owner, data);
    961 	case NVMM_IOC_GPA_UNMAP:
    962 		return nvmm_gpa_unmap(owner, data);
    963 	case NVMM_IOC_HVA_MAP:
    964 		return nvmm_hva_map(owner, data);
    965 	case NVMM_IOC_HVA_UNMAP:
    966 		return nvmm_hva_unmap(owner, data);
    967 	default:
    968 		return EINVAL;
    969 	}
    970 }
    971 
    972 /* -------------------------------------------------------------------------- */
    973 
    974 void
    975 nvmmattach(int nunits)
    976 {
    977 	/* nothing */
    978 }
    979 
    980 MODULE(MODULE_CLASS_ANY, nvmm, NULL);
    981 
    982 static int
    983 nvmm_modcmd(modcmd_t cmd, void *arg)
    984 {
    985 	int error;
    986 
    987 	switch (cmd) {
    988 	case MODULE_CMD_INIT:
    989 		error = nvmm_init();
    990 		if (error)
    991 			return error;
    992 
    993 #if defined(_MODULE)
    994 		{
    995 			devmajor_t bmajor = NODEVMAJOR;
    996 			devmajor_t cmajor = 345;
    997 
    998 			/* mknod /dev/nvmm c 345 0 */
    999 			error = devsw_attach("nvmm", NULL, &bmajor,
   1000 			    &nvmm_cdevsw, &cmajor);
   1001 			if (error) {
   1002 				nvmm_fini();
   1003 				return error;
   1004 			}
   1005 		}
   1006 #endif
   1007 		return 0;
   1008 
   1009 	case MODULE_CMD_FINI:
   1010 		if (nmachines > 0) {
   1011 			return EBUSY;
   1012 		}
   1013 #if defined(_MODULE)
   1014 		{
   1015 			error = devsw_detach(NULL, &nvmm_cdevsw);
   1016 			if (error) {
   1017 				return error;
   1018 			}
   1019 		}
   1020 #endif
   1021 		nvmm_fini();
   1022 		return 0;
   1023 
   1024 	case MODULE_CMD_AUTOUNLOAD:
   1025 		return EBUSY;
   1026 
   1027 	default:
   1028 		return ENOTTY;
   1029 	}
   1030 }
   1031