Home | History | Annotate | Line # | Download | only in nvmm
nvmm.c revision 1.17
      1 /*	$NetBSD: nvmm.c,v 1.17 2019/04/10 18:49:04 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.17 2019/04/10 18:49:04 maxv Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/systm.h>
     37 #include <sys/kernel.h>
     38 
     39 #include <sys/cpu.h>
     40 #include <sys/conf.h>
     41 #include <sys/kmem.h>
     42 #include <sys/module.h>
     43 #include <sys/proc.h>
     44 #include <sys/mman.h>
     45 #include <sys/file.h>
     46 #include <sys/filedesc.h>
     47 #include <sys/kauth.h>
     48 
     49 #include <uvm/uvm.h>
     50 #include <uvm/uvm_page.h>
     51 
     52 #include "ioconf.h"
     53 
     54 #include <dev/nvmm/nvmm.h>
     55 #include <dev/nvmm/nvmm_internal.h>
     56 #include <dev/nvmm/nvmm_ioctl.h>
     57 
     58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
     59 static volatile unsigned int nmachines __cacheline_aligned;
     60 
     61 static const struct nvmm_impl *nvmm_impl_list[] = {
     62 	&nvmm_x86_svm,	/* x86 AMD SVM */
     63 	&nvmm_x86_vmx	/* x86 Intel VMX */
     64 };
     65 
     66 static const struct nvmm_impl *nvmm_impl = NULL;
     67 
     68 static struct nvmm_owner root_owner;
     69 
     70 /* -------------------------------------------------------------------------- */
     71 
     72 static int
     73 nvmm_machine_alloc(struct nvmm_machine **ret)
     74 {
     75 	struct nvmm_machine *mach;
     76 	size_t i;
     77 
     78 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
     79 		mach = &machines[i];
     80 
     81 		rw_enter(&mach->lock, RW_WRITER);
     82 		if (mach->present) {
     83 			rw_exit(&mach->lock);
     84 			continue;
     85 		}
     86 
     87 		mach->present = true;
     88 		mach->time = time_second;
     89 		*ret = mach;
     90 		atomic_inc_uint(&nmachines);
     91 		return 0;
     92 	}
     93 
     94 	return ENOBUFS;
     95 }
     96 
     97 static void
     98 nvmm_machine_free(struct nvmm_machine *mach)
     99 {
    100 	KASSERT(rw_write_held(&mach->lock));
    101 	KASSERT(mach->present);
    102 	mach->present = false;
    103 	atomic_dec_uint(&nmachines);
    104 }
    105 
    106 static int
    107 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
    108     struct nvmm_machine **ret, bool writer)
    109 {
    110 	struct nvmm_machine *mach;
    111 	krw_t op = writer ? RW_WRITER : RW_READER;
    112 
    113 	if (machid >= NVMM_MAX_MACHINES) {
    114 		return EINVAL;
    115 	}
    116 	mach = &machines[machid];
    117 
    118 	rw_enter(&mach->lock, op);
    119 	if (!mach->present) {
    120 		rw_exit(&mach->lock);
    121 		return ENOENT;
    122 	}
    123 	if (owner != &root_owner && mach->owner != owner) {
    124 		rw_exit(&mach->lock);
    125 		return EPERM;
    126 	}
    127 	*ret = mach;
    128 
    129 	return 0;
    130 }
    131 
    132 static void
    133 nvmm_machine_put(struct nvmm_machine *mach)
    134 {
    135 	rw_exit(&mach->lock);
    136 }
    137 
    138 /* -------------------------------------------------------------------------- */
    139 
    140 static int
    141 nvmm_vcpu_alloc(struct nvmm_machine *mach, struct nvmm_cpu **ret)
    142 {
    143 	struct nvmm_cpu *vcpu;
    144 	size_t i;
    145 
    146 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    147 		vcpu = &mach->cpus[i];
    148 
    149 		mutex_enter(&vcpu->lock);
    150 		if (vcpu->present) {
    151 			mutex_exit(&vcpu->lock);
    152 			continue;
    153 		}
    154 
    155 		vcpu->present = true;
    156 		vcpu->cpuid = i;
    157 		vcpu->state = kmem_zalloc(nvmm_impl->state_size, KM_SLEEP);
    158 		*ret = vcpu;
    159 		return 0;
    160 	}
    161 
    162 	return ENOBUFS;
    163 }
    164 
    165 static void
    166 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
    167 {
    168 	KASSERT(mutex_owned(&vcpu->lock));
    169 	vcpu->present = false;
    170 	kmem_free(vcpu->state, nvmm_impl->state_size);
    171 	vcpu->hcpu_last = -1;
    172 }
    173 
    174 int
    175 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    176     struct nvmm_cpu **ret)
    177 {
    178 	struct nvmm_cpu *vcpu;
    179 
    180 	if (cpuid >= NVMM_MAX_VCPUS) {
    181 		return EINVAL;
    182 	}
    183 	vcpu = &mach->cpus[cpuid];
    184 
    185 	mutex_enter(&vcpu->lock);
    186 	if (!vcpu->present) {
    187 		mutex_exit(&vcpu->lock);
    188 		return ENOENT;
    189 	}
    190 	*ret = vcpu;
    191 
    192 	return 0;
    193 }
    194 
    195 void
    196 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
    197 {
    198 	mutex_exit(&vcpu->lock);
    199 }
    200 
    201 /* -------------------------------------------------------------------------- */
    202 
    203 static void
    204 nvmm_kill_machines(struct nvmm_owner *owner)
    205 {
    206 	struct nvmm_machine *mach;
    207 	struct nvmm_cpu *vcpu;
    208 	size_t i, j;
    209 	int error;
    210 
    211 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    212 		mach = &machines[i];
    213 
    214 		rw_enter(&mach->lock, RW_WRITER);
    215 		if (!mach->present || mach->owner != owner) {
    216 			rw_exit(&mach->lock);
    217 			continue;
    218 		}
    219 
    220 		/* Kill it. */
    221 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
    222 			error = nvmm_vcpu_get(mach, j, &vcpu);
    223 			if (error)
    224 				continue;
    225 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    226 			nvmm_vcpu_free(mach, vcpu);
    227 			nvmm_vcpu_put(vcpu);
    228 		}
    229 		(*nvmm_impl->machine_destroy)(mach);
    230 		uvmspace_free(mach->vm);
    231 
    232 		/* Drop the kernel UOBJ refs. */
    233 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
    234 			if (!mach->hmap[j].present)
    235 				continue;
    236 			uao_detach(mach->hmap[j].uobj);
    237 		}
    238 
    239 		nvmm_machine_free(mach);
    240 
    241 		rw_exit(&mach->lock);
    242 	}
    243 }
    244 
    245 /* -------------------------------------------------------------------------- */
    246 
    247 static int
    248 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
    249 {
    250 	args->cap.version = NVMM_CAPABILITY_VERSION;
    251 	args->cap.state_size = nvmm_impl->state_size;
    252 	args->cap.max_machines = NVMM_MAX_MACHINES;
    253 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
    254 	args->cap.max_ram = NVMM_MAX_RAM;
    255 
    256 	(*nvmm_impl->capability)(&args->cap);
    257 
    258 	return 0;
    259 }
    260 
    261 static int
    262 nvmm_machine_create(struct nvmm_owner *owner,
    263     struct nvmm_ioc_machine_create *args)
    264 {
    265 	struct nvmm_machine *mach;
    266 	int error;
    267 
    268 	error = nvmm_machine_alloc(&mach);
    269 	if (error)
    270 		return error;
    271 
    272 	/* Curproc owns the machine. */
    273 	mach->owner = owner;
    274 
    275 	/* Zero out the host mappings. */
    276 	memset(&mach->hmap, 0, sizeof(mach->hmap));
    277 
    278 	/* Create the machine vmspace. */
    279 	mach->gpa_begin = 0;
    280 	mach->gpa_end = NVMM_MAX_RAM;
    281 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
    282 
    283 	(*nvmm_impl->machine_create)(mach);
    284 
    285 	args->machid = mach->machid;
    286 	nvmm_machine_put(mach);
    287 
    288 	return 0;
    289 }
    290 
    291 static int
    292 nvmm_machine_destroy(struct nvmm_owner *owner,
    293     struct nvmm_ioc_machine_destroy *args)
    294 {
    295 	struct nvmm_machine *mach;
    296 	struct nvmm_cpu *vcpu;
    297 	int error;
    298 	size_t i;
    299 
    300 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    301 	if (error)
    302 		return error;
    303 
    304 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    305 		error = nvmm_vcpu_get(mach, i, &vcpu);
    306 		if (error)
    307 			continue;
    308 
    309 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    310 		nvmm_vcpu_free(mach, vcpu);
    311 		nvmm_vcpu_put(vcpu);
    312 	}
    313 
    314 	(*nvmm_impl->machine_destroy)(mach);
    315 
    316 	/* Free the machine vmspace. */
    317 	uvmspace_free(mach->vm);
    318 
    319 	/* Drop the kernel UOBJ refs. */
    320 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    321 		if (!mach->hmap[i].present)
    322 			continue;
    323 		uao_detach(mach->hmap[i].uobj);
    324 	}
    325 
    326 	nvmm_machine_free(mach);
    327 	nvmm_machine_put(mach);
    328 
    329 	return 0;
    330 }
    331 
    332 static int
    333 nvmm_machine_configure(struct nvmm_owner *owner,
    334     struct nvmm_ioc_machine_configure *args)
    335 {
    336 	struct nvmm_machine *mach;
    337 	size_t allocsz;
    338 	void *data;
    339 	int error;
    340 
    341 	if (__predict_false(args->op >= nvmm_impl->conf_max)) {
    342 		return EINVAL;
    343 	}
    344 
    345 	allocsz = nvmm_impl->conf_sizes[args->op];
    346 	data = kmem_alloc(allocsz, KM_SLEEP);
    347 
    348 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    349 	if (error) {
    350 		kmem_free(data, allocsz);
    351 		return error;
    352 	}
    353 
    354 	error = copyin(args->conf, data, allocsz);
    355 	if (error) {
    356 		goto out;
    357 	}
    358 
    359 	error = (*nvmm_impl->machine_configure)(mach, args->op, data);
    360 
    361 out:
    362 	nvmm_machine_put(mach);
    363 	kmem_free(data, allocsz);
    364 	return error;
    365 }
    366 
    367 static int
    368 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
    369 {
    370 	struct nvmm_machine *mach;
    371 	struct nvmm_cpu *vcpu;
    372 	int error;
    373 
    374 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    375 	if (error)
    376 		return error;
    377 
    378 	error = nvmm_vcpu_alloc(mach, &vcpu);
    379 	if (error)
    380 		goto out;
    381 
    382 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
    383 	if (error) {
    384 		nvmm_vcpu_free(mach, vcpu);
    385 		nvmm_vcpu_put(vcpu);
    386 		goto out;
    387 	}
    388 
    389 	nvmm_vcpu_put(vcpu);
    390 
    391 out:
    392 	nvmm_machine_put(mach);
    393 	return error;
    394 }
    395 
    396 static int
    397 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
    398 {
    399 	struct nvmm_machine *mach;
    400 	struct nvmm_cpu *vcpu;
    401 	int error;
    402 
    403 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    404 	if (error)
    405 		return error;
    406 
    407 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    408 	if (error)
    409 		goto out;
    410 
    411 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    412 	nvmm_vcpu_free(mach, vcpu);
    413 	nvmm_vcpu_put(vcpu);
    414 
    415 out:
    416 	nvmm_machine_put(mach);
    417 	return error;
    418 }
    419 
    420 static int
    421 nvmm_vcpu_setstate(struct nvmm_owner *owner,
    422     struct nvmm_ioc_vcpu_setstate *args)
    423 {
    424 	struct nvmm_machine *mach;
    425 	struct nvmm_cpu *vcpu;
    426 	int error;
    427 
    428 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    429 	if (error)
    430 		return error;
    431 
    432 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    433 	if (error)
    434 		goto out;
    435 
    436 	error = copyin(args->state, vcpu->state, nvmm_impl->state_size);
    437 	if (error) {
    438 		nvmm_vcpu_put(vcpu);
    439 		goto out;
    440 	}
    441 
    442 	(*nvmm_impl->vcpu_setstate)(vcpu, vcpu->state, args->flags);
    443 	nvmm_vcpu_put(vcpu);
    444 
    445 out:
    446 	nvmm_machine_put(mach);
    447 	return error;
    448 }
    449 
    450 static int
    451 nvmm_vcpu_getstate(struct nvmm_owner *owner,
    452     struct nvmm_ioc_vcpu_getstate *args)
    453 {
    454 	struct nvmm_machine *mach;
    455 	struct nvmm_cpu *vcpu;
    456 	int error;
    457 
    458 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    459 	if (error)
    460 		return error;
    461 
    462 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    463 	if (error)
    464 		goto out;
    465 
    466 	(*nvmm_impl->vcpu_getstate)(vcpu, vcpu->state, args->flags);
    467 	nvmm_vcpu_put(vcpu);
    468 	error = copyout(vcpu->state, args->state, nvmm_impl->state_size);
    469 
    470 out:
    471 	nvmm_machine_put(mach);
    472 	return error;
    473 }
    474 
    475 static int
    476 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
    477 {
    478 	struct nvmm_machine *mach;
    479 	struct nvmm_cpu *vcpu;
    480 	int error;
    481 
    482 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    483 	if (error)
    484 		return error;
    485 
    486 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    487 	if (error)
    488 		goto out;
    489 
    490 	error = (*nvmm_impl->vcpu_inject)(mach, vcpu, &args->event);
    491 	nvmm_vcpu_put(vcpu);
    492 
    493 out:
    494 	nvmm_machine_put(mach);
    495 	return error;
    496 }
    497 
    498 static void
    499 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
    500     struct nvmm_exit *exit)
    501 {
    502 	struct vmspace *vm = mach->vm;
    503 
    504 	while (1) {
    505 		(*nvmm_impl->vcpu_run)(mach, vcpu, exit);
    506 
    507 		if (__predict_true(exit->reason != NVMM_EXIT_MEMORY)) {
    508 			break;
    509 		}
    510 		if (exit->u.mem.gpa >= mach->gpa_end) {
    511 			break;
    512 		}
    513 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
    514 			break;
    515 		}
    516 	}
    517 }
    518 
    519 static int
    520 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
    521 {
    522 	struct nvmm_machine *mach;
    523 	struct nvmm_cpu *vcpu;
    524 	int error;
    525 
    526 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    527 	if (error)
    528 		return error;
    529 
    530 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    531 	if (error)
    532 		goto out;
    533 
    534 	nvmm_do_vcpu_run(mach, vcpu, &args->exit);
    535 	nvmm_vcpu_put(vcpu);
    536 
    537 out:
    538 	nvmm_machine_put(mach);
    539 	return error;
    540 }
    541 
    542 /* -------------------------------------------------------------------------- */
    543 
    544 static struct uvm_object *
    545 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
    546    size_t *off)
    547 {
    548 	struct nvmm_hmapping *hmapping;
    549 	size_t i;
    550 
    551 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    552 		hmapping = &mach->hmap[i];
    553 		if (!hmapping->present) {
    554 			continue;
    555 		}
    556 		if (hva >= hmapping->hva &&
    557 		    hva + size <= hmapping->hva + hmapping->size) {
    558 			*off = hva - hmapping->hva;
    559 			return hmapping->uobj;
    560 		}
    561 	}
    562 
    563 	return NULL;
    564 }
    565 
    566 static int
    567 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    568 {
    569 	struct nvmm_hmapping *hmapping;
    570 	size_t i;
    571 
    572 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
    573 		return EINVAL;
    574 	}
    575 	if (hva == 0) {
    576 		return EINVAL;
    577 	}
    578 
    579 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    580 		hmapping = &mach->hmap[i];
    581 		if (!hmapping->present) {
    582 			continue;
    583 		}
    584 
    585 		if (hva >= hmapping->hva &&
    586 		    hva + size <= hmapping->hva + hmapping->size) {
    587 			break;
    588 		}
    589 
    590 		if (hva >= hmapping->hva &&
    591 		    hva < hmapping->hva + hmapping->size) {
    592 			return EEXIST;
    593 		}
    594 		if (hva + size > hmapping->hva &&
    595 		    hva + size <= hmapping->hva + hmapping->size) {
    596 			return EEXIST;
    597 		}
    598 		if (hva <= hmapping->hva &&
    599 		    hva + size >= hmapping->hva + hmapping->size) {
    600 			return EEXIST;
    601 		}
    602 	}
    603 
    604 	return 0;
    605 }
    606 
    607 static struct nvmm_hmapping *
    608 nvmm_hmapping_alloc(struct nvmm_machine *mach)
    609 {
    610 	struct nvmm_hmapping *hmapping;
    611 	size_t i;
    612 
    613 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    614 		hmapping = &mach->hmap[i];
    615 		if (!hmapping->present) {
    616 			hmapping->present = true;
    617 			return hmapping;
    618 		}
    619 	}
    620 
    621 	return NULL;
    622 }
    623 
    624 static int
    625 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    626 {
    627 	struct vmspace *vmspace = curproc->p_vmspace;
    628 	struct nvmm_hmapping *hmapping;
    629 	size_t i;
    630 
    631 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    632 		hmapping = &mach->hmap[i];
    633 		if (!hmapping->present || hmapping->hva != hva ||
    634 		    hmapping->size != size) {
    635 			continue;
    636 		}
    637 
    638 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
    639 		    hmapping->hva + hmapping->size);
    640 		uao_detach(hmapping->uobj);
    641 
    642 		hmapping->uobj = NULL;
    643 		hmapping->present = false;
    644 
    645 		return 0;
    646 	}
    647 
    648 	return ENOENT;
    649 }
    650 
    651 static int
    652 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
    653 {
    654 	struct vmspace *vmspace = curproc->p_vmspace;
    655 	struct nvmm_machine *mach;
    656 	struct nvmm_hmapping *hmapping;
    657 	vaddr_t uva;
    658 	int error;
    659 
    660 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    661 	if (error)
    662 		return error;
    663 
    664 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
    665 	if (error)
    666 		goto out;
    667 
    668 	hmapping = nvmm_hmapping_alloc(mach);
    669 	if (hmapping == NULL) {
    670 		error = ENOBUFS;
    671 		goto out;
    672 	}
    673 
    674 	hmapping->hva = args->hva;
    675 	hmapping->size = args->size;
    676 	hmapping->uobj = uao_create(hmapping->size, 0);
    677 	uva = hmapping->hva;
    678 
    679 	/* Take a reference for the user. */
    680 	uao_reference(hmapping->uobj);
    681 
    682 	/* Map the uobj into the user address space, as pageable. */
    683 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
    684 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
    685 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    686 	if (error) {
    687 		uao_detach(hmapping->uobj);
    688 	}
    689 
    690 out:
    691 	nvmm_machine_put(mach);
    692 	return error;
    693 }
    694 
    695 static int
    696 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
    697 {
    698 	struct nvmm_machine *mach;
    699 	int error;
    700 
    701 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    702 	if (error)
    703 		return error;
    704 
    705 	error = nvmm_hmapping_free(mach, args->hva, args->size);
    706 
    707 	nvmm_machine_put(mach);
    708 	return error;
    709 }
    710 
    711 /* -------------------------------------------------------------------------- */
    712 
    713 static int
    714 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
    715 {
    716 	struct nvmm_machine *mach;
    717 	struct uvm_object *uobj;
    718 	gpaddr_t gpa;
    719 	size_t off;
    720 	int error;
    721 
    722 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    723 	if (error)
    724 		return error;
    725 
    726 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
    727 		error = EINVAL;
    728 		goto out;
    729 	}
    730 
    731 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
    732 	    (args->hva % PAGE_SIZE) != 0) {
    733 		error = EINVAL;
    734 		goto out;
    735 	}
    736 	if (args->hva == 0) {
    737 		error = EINVAL;
    738 		goto out;
    739 	}
    740 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    741 		error = EINVAL;
    742 		goto out;
    743 	}
    744 	if (args->gpa + args->size <= args->gpa) {
    745 		error = EINVAL;
    746 		goto out;
    747 	}
    748 	if (args->gpa + args->size > mach->gpa_end) {
    749 		error = EINVAL;
    750 		goto out;
    751 	}
    752 	gpa = args->gpa;
    753 
    754 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
    755 	if (uobj == NULL) {
    756 		error = EINVAL;
    757 		goto out;
    758 	}
    759 
    760 	/* Take a reference for the machine. */
    761 	uao_reference(uobj);
    762 
    763 	/* Map the uobj into the machine address space, as pageable. */
    764 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
    765 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
    766 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    767 	if (error) {
    768 		uao_detach(uobj);
    769 		goto out;
    770 	}
    771 	if (gpa != args->gpa) {
    772 		uao_detach(uobj);
    773 		printf("[!] uvm_map problem\n");
    774 		error = EINVAL;
    775 		goto out;
    776 	}
    777 
    778 out:
    779 	nvmm_machine_put(mach);
    780 	return error;
    781 }
    782 
    783 static int
    784 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
    785 {
    786 	struct nvmm_machine *mach;
    787 	gpaddr_t gpa;
    788 	int error;
    789 
    790 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    791 	if (error)
    792 		return error;
    793 
    794 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
    795 		error = EINVAL;
    796 		goto out;
    797 	}
    798 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    799 		error = EINVAL;
    800 		goto out;
    801 	}
    802 	if (args->gpa + args->size <= args->gpa) {
    803 		error = EINVAL;
    804 		goto out;
    805 	}
    806 	if (args->gpa + args->size >= mach->gpa_end) {
    807 		error = EINVAL;
    808 		goto out;
    809 	}
    810 	gpa = args->gpa;
    811 
    812 	/* Unmap the memory from the machine. */
    813 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
    814 
    815 out:
    816 	nvmm_machine_put(mach);
    817 	return error;
    818 }
    819 
    820 /* -------------------------------------------------------------------------- */
    821 
    822 static int
    823 nvmm_ctl_mach_info(struct nvmm_ioc_ctl *args)
    824 {
    825 	struct nvmm_ctl_mach_info ctl;
    826 	struct nvmm_machine *mach;
    827 	struct nvmm_cpu *vcpu;
    828 	int error;
    829 	size_t i;
    830 
    831 	if (args->size != sizeof(ctl))
    832 		return EINVAL;
    833 	error = copyin(args->data, &ctl, sizeof(ctl));
    834 	if (error)
    835 		return error;
    836 
    837 	error = nvmm_machine_get(&root_owner, ctl.machid, &mach, true);
    838 	if (error)
    839 		return error;
    840 
    841 	ctl.nvcpus = 0;
    842 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    843 		error = nvmm_vcpu_get(mach, i, &vcpu);
    844 		if (error)
    845 			continue;
    846 		ctl.nvcpus++;
    847 		nvmm_vcpu_put(vcpu);
    848 	}
    849 	ctl.pid = mach->owner->pid;
    850 	ctl.time = mach->time;
    851 
    852 	nvmm_machine_put(mach);
    853 
    854 	error = copyout(&ctl, args->data, sizeof(ctl));
    855 	if (error)
    856 		return error;
    857 
    858 	return 0;
    859 }
    860 
    861 static int
    862 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
    863 {
    864 	int error;
    865 
    866 	error = kauth_authorize_device(curlwp->l_cred, KAUTH_DEVICE_NVMM_CTL,
    867 	    NULL, NULL, NULL, NULL);
    868 	if (error)
    869 		return error;
    870 
    871 	switch (args->op) {
    872 	case NVMM_CTL_MACH_INFO:
    873 		return nvmm_ctl_mach_info(args);
    874 	default:
    875 		return EINVAL;
    876 	}
    877 }
    878 
    879 /* -------------------------------------------------------------------------- */
    880 
    881 static int
    882 nvmm_init(void)
    883 {
    884 	size_t i, n;
    885 
    886 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
    887 		if (!(*nvmm_impl_list[i]->ident)()) {
    888 			continue;
    889 		}
    890 		nvmm_impl = nvmm_impl_list[i];
    891 		break;
    892 	}
    893 	if (nvmm_impl == NULL) {
    894 		printf("[!] No implementation found\n");
    895 		return ENOTSUP;
    896 	}
    897 
    898 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    899 		machines[i].machid = i;
    900 		rw_init(&machines[i].lock);
    901 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    902 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
    903 			    IPL_NONE);
    904 			machines[i].cpus[n].hcpu_last = -1;
    905 		}
    906 	}
    907 
    908 	(*nvmm_impl->init)();
    909 
    910 	return 0;
    911 }
    912 
    913 static void
    914 nvmm_fini(void)
    915 {
    916 	size_t i, n;
    917 
    918 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    919 		rw_destroy(&machines[i].lock);
    920 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    921 			mutex_destroy(&machines[i].cpus[n].lock);
    922 		}
    923 	}
    924 
    925 	(*nvmm_impl->fini)();
    926 }
    927 
    928 /* -------------------------------------------------------------------------- */
    929 
    930 static dev_type_open(nvmm_open);
    931 
    932 const struct cdevsw nvmm_cdevsw = {
    933 	.d_open = nvmm_open,
    934 	.d_close = noclose,
    935 	.d_read = noread,
    936 	.d_write = nowrite,
    937 	.d_ioctl = noioctl,
    938 	.d_stop = nostop,
    939 	.d_tty = notty,
    940 	.d_poll = nopoll,
    941 	.d_mmap = nommap,
    942 	.d_kqfilter = nokqfilter,
    943 	.d_discard = nodiscard,
    944 	.d_flag = D_OTHER | D_MPSAFE
    945 };
    946 
    947 static int nvmm_ioctl(file_t *, u_long, void *);
    948 static int nvmm_close(file_t *);
    949 
    950 const struct fileops nvmm_fileops = {
    951 	.fo_read = fbadop_read,
    952 	.fo_write = fbadop_write,
    953 	.fo_ioctl = nvmm_ioctl,
    954 	.fo_fcntl = fnullop_fcntl,
    955 	.fo_poll = fnullop_poll,
    956 	.fo_stat = fbadop_stat,
    957 	.fo_close = nvmm_close,
    958 	.fo_kqfilter = fnullop_kqfilter,
    959 	.fo_restart = fnullop_restart,
    960 	.fo_mmap = NULL,
    961 };
    962 
    963 static int
    964 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
    965 {
    966 	struct nvmm_owner *owner;
    967 	struct file *fp;
    968 	int error, fd;
    969 
    970 	if (minor(dev) != 0)
    971 		return EXDEV;
    972 	error = fd_allocfile(&fp, &fd);
    973 	if (error)
    974 		return error;
    975 
    976 	owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
    977 	owner->pid = l->l_proc->p_pid;
    978 
    979 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
    980 }
    981 
    982 static int
    983 nvmm_close(file_t *fp)
    984 {
    985 	struct nvmm_owner *owner = fp->f_data;
    986 
    987 	KASSERT(owner != NULL);
    988 	nvmm_kill_machines(owner);
    989 	kmem_free(owner, sizeof(*owner));
    990 	fp->f_data = NULL;
    991 
    992    	return 0;
    993 }
    994 
    995 static int
    996 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
    997 {
    998 	struct nvmm_owner *owner = fp->f_data;
    999 
   1000 	KASSERT(owner != NULL);
   1001 
   1002 	switch (cmd) {
   1003 	case NVMM_IOC_CAPABILITY:
   1004 		return nvmm_capability(owner, data);
   1005 	case NVMM_IOC_MACHINE_CREATE:
   1006 		return nvmm_machine_create(owner, data);
   1007 	case NVMM_IOC_MACHINE_DESTROY:
   1008 		return nvmm_machine_destroy(owner, data);
   1009 	case NVMM_IOC_MACHINE_CONFIGURE:
   1010 		return nvmm_machine_configure(owner, data);
   1011 	case NVMM_IOC_VCPU_CREATE:
   1012 		return nvmm_vcpu_create(owner, data);
   1013 	case NVMM_IOC_VCPU_DESTROY:
   1014 		return nvmm_vcpu_destroy(owner, data);
   1015 	case NVMM_IOC_VCPU_SETSTATE:
   1016 		return nvmm_vcpu_setstate(owner, data);
   1017 	case NVMM_IOC_VCPU_GETSTATE:
   1018 		return nvmm_vcpu_getstate(owner, data);
   1019 	case NVMM_IOC_VCPU_INJECT:
   1020 		return nvmm_vcpu_inject(owner, data);
   1021 	case NVMM_IOC_VCPU_RUN:
   1022 		return nvmm_vcpu_run(owner, data);
   1023 	case NVMM_IOC_GPA_MAP:
   1024 		return nvmm_gpa_map(owner, data);
   1025 	case NVMM_IOC_GPA_UNMAP:
   1026 		return nvmm_gpa_unmap(owner, data);
   1027 	case NVMM_IOC_HVA_MAP:
   1028 		return nvmm_hva_map(owner, data);
   1029 	case NVMM_IOC_HVA_UNMAP:
   1030 		return nvmm_hva_unmap(owner, data);
   1031 	case NVMM_IOC_CTL:
   1032 		return nvmm_ctl(owner, data);
   1033 	default:
   1034 		return EINVAL;
   1035 	}
   1036 }
   1037 
   1038 /* -------------------------------------------------------------------------- */
   1039 
   1040 void
   1041 nvmmattach(int nunits)
   1042 {
   1043 	/* nothing */
   1044 }
   1045 
   1046 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
   1047 
   1048 static int
   1049 nvmm_modcmd(modcmd_t cmd, void *arg)
   1050 {
   1051 	int error;
   1052 
   1053 	switch (cmd) {
   1054 	case MODULE_CMD_INIT:
   1055 		error = nvmm_init();
   1056 		if (error)
   1057 			return error;
   1058 
   1059 #if defined(_MODULE)
   1060 		{
   1061 			devmajor_t bmajor = NODEVMAJOR;
   1062 			devmajor_t cmajor = 345;
   1063 
   1064 			/* mknod /dev/nvmm c 345 0 */
   1065 			error = devsw_attach("nvmm", NULL, &bmajor,
   1066 			    &nvmm_cdevsw, &cmajor);
   1067 			if (error) {
   1068 				nvmm_fini();
   1069 				return error;
   1070 			}
   1071 		}
   1072 #endif
   1073 		return 0;
   1074 
   1075 	case MODULE_CMD_FINI:
   1076 		if (nmachines > 0) {
   1077 			return EBUSY;
   1078 		}
   1079 #if defined(_MODULE)
   1080 		{
   1081 			error = devsw_detach(NULL, &nvmm_cdevsw);
   1082 			if (error) {
   1083 				return error;
   1084 			}
   1085 		}
   1086 #endif
   1087 		nvmm_fini();
   1088 		return 0;
   1089 
   1090 	case MODULE_CMD_AUTOUNLOAD:
   1091 		return EBUSY;
   1092 
   1093 	default:
   1094 		return ENOTTY;
   1095 	}
   1096 }
   1097