Home | History | Annotate | Line # | Download | only in nvmm
nvmm.c revision 1.18
      1 /*	$NetBSD: nvmm.c,v 1.18 2019/04/27 17:30:38 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.18 2019/04/27 17:30:38 maxv Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/systm.h>
     37 #include <sys/kernel.h>
     38 
     39 #include <sys/cpu.h>
     40 #include <sys/conf.h>
     41 #include <sys/kmem.h>
     42 #include <sys/module.h>
     43 #include <sys/proc.h>
     44 #include <sys/mman.h>
     45 #include <sys/file.h>
     46 #include <sys/filedesc.h>
     47 #include <sys/kauth.h>
     48 
     49 #include <uvm/uvm.h>
     50 #include <uvm/uvm_page.h>
     51 
     52 #include "ioconf.h"
     53 
     54 #include <dev/nvmm/nvmm.h>
     55 #include <dev/nvmm/nvmm_internal.h>
     56 #include <dev/nvmm/nvmm_ioctl.h>
     57 
     58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
     59 static volatile unsigned int nmachines __cacheline_aligned;
     60 
     61 static const struct nvmm_impl *nvmm_impl_list[] = {
     62 	&nvmm_x86_svm,	/* x86 AMD SVM */
     63 	&nvmm_x86_vmx	/* x86 Intel VMX */
     64 };
     65 
     66 static const struct nvmm_impl *nvmm_impl = NULL;
     67 
     68 static struct nvmm_owner root_owner;
     69 
     70 /* -------------------------------------------------------------------------- */
     71 
     72 static int
     73 nvmm_machine_alloc(struct nvmm_machine **ret)
     74 {
     75 	struct nvmm_machine *mach;
     76 	size_t i;
     77 
     78 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
     79 		mach = &machines[i];
     80 
     81 		rw_enter(&mach->lock, RW_WRITER);
     82 		if (mach->present) {
     83 			rw_exit(&mach->lock);
     84 			continue;
     85 		}
     86 
     87 		mach->present = true;
     88 		mach->time = time_second;
     89 		*ret = mach;
     90 		atomic_inc_uint(&nmachines);
     91 		return 0;
     92 	}
     93 
     94 	return ENOBUFS;
     95 }
     96 
     97 static void
     98 nvmm_machine_free(struct nvmm_machine *mach)
     99 {
    100 	KASSERT(rw_write_held(&mach->lock));
    101 	KASSERT(mach->present);
    102 	mach->present = false;
    103 	atomic_dec_uint(&nmachines);
    104 }
    105 
    106 static int
    107 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
    108     struct nvmm_machine **ret, bool writer)
    109 {
    110 	struct nvmm_machine *mach;
    111 	krw_t op = writer ? RW_WRITER : RW_READER;
    112 
    113 	if (machid >= NVMM_MAX_MACHINES) {
    114 		return EINVAL;
    115 	}
    116 	mach = &machines[machid];
    117 
    118 	rw_enter(&mach->lock, op);
    119 	if (!mach->present) {
    120 		rw_exit(&mach->lock);
    121 		return ENOENT;
    122 	}
    123 	if (owner != &root_owner && mach->owner != owner) {
    124 		rw_exit(&mach->lock);
    125 		return EPERM;
    126 	}
    127 	*ret = mach;
    128 
    129 	return 0;
    130 }
    131 
    132 static void
    133 nvmm_machine_put(struct nvmm_machine *mach)
    134 {
    135 	rw_exit(&mach->lock);
    136 }
    137 
    138 /* -------------------------------------------------------------------------- */
    139 
    140 static int
    141 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    142     struct nvmm_cpu **ret)
    143 {
    144 	struct nvmm_cpu *vcpu;
    145 
    146 	if (cpuid >= NVMM_MAX_VCPUS) {
    147 		return EINVAL;
    148 	}
    149 	vcpu = &mach->cpus[cpuid];
    150 
    151 	mutex_enter(&vcpu->lock);
    152 	if (vcpu->present) {
    153 		mutex_exit(&vcpu->lock);
    154 		return EBUSY;
    155 	}
    156 
    157 	vcpu->present = true;
    158 	vcpu->state = kmem_zalloc(nvmm_impl->state_size, KM_SLEEP);
    159 	vcpu->hcpu_last = -1;
    160 	*ret = vcpu;
    161 	return 0;
    162 }
    163 
    164 static void
    165 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
    166 {
    167 	KASSERT(mutex_owned(&vcpu->lock));
    168 	vcpu->present = false;
    169 	kmem_free(vcpu->state, nvmm_impl->state_size);
    170 }
    171 
    172 int
    173 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    174     struct nvmm_cpu **ret)
    175 {
    176 	struct nvmm_cpu *vcpu;
    177 
    178 	if (cpuid >= NVMM_MAX_VCPUS) {
    179 		return EINVAL;
    180 	}
    181 	vcpu = &mach->cpus[cpuid];
    182 
    183 	mutex_enter(&vcpu->lock);
    184 	if (!vcpu->present) {
    185 		mutex_exit(&vcpu->lock);
    186 		return ENOENT;
    187 	}
    188 	*ret = vcpu;
    189 
    190 	return 0;
    191 }
    192 
    193 void
    194 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
    195 {
    196 	mutex_exit(&vcpu->lock);
    197 }
    198 
    199 /* -------------------------------------------------------------------------- */
    200 
    201 static void
    202 nvmm_kill_machines(struct nvmm_owner *owner)
    203 {
    204 	struct nvmm_machine *mach;
    205 	struct nvmm_cpu *vcpu;
    206 	size_t i, j;
    207 	int error;
    208 
    209 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    210 		mach = &machines[i];
    211 
    212 		rw_enter(&mach->lock, RW_WRITER);
    213 		if (!mach->present || mach->owner != owner) {
    214 			rw_exit(&mach->lock);
    215 			continue;
    216 		}
    217 
    218 		/* Kill it. */
    219 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
    220 			error = nvmm_vcpu_get(mach, j, &vcpu);
    221 			if (error)
    222 				continue;
    223 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    224 			nvmm_vcpu_free(mach, vcpu);
    225 			nvmm_vcpu_put(vcpu);
    226 		}
    227 		(*nvmm_impl->machine_destroy)(mach);
    228 		uvmspace_free(mach->vm);
    229 
    230 		/* Drop the kernel UOBJ refs. */
    231 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
    232 			if (!mach->hmap[j].present)
    233 				continue;
    234 			uao_detach(mach->hmap[j].uobj);
    235 		}
    236 
    237 		nvmm_machine_free(mach);
    238 
    239 		rw_exit(&mach->lock);
    240 	}
    241 }
    242 
    243 /* -------------------------------------------------------------------------- */
    244 
    245 static int
    246 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
    247 {
    248 	args->cap.version = NVMM_CAPABILITY_VERSION;
    249 	args->cap.state_size = nvmm_impl->state_size;
    250 	args->cap.max_machines = NVMM_MAX_MACHINES;
    251 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
    252 	args->cap.max_ram = NVMM_MAX_RAM;
    253 
    254 	(*nvmm_impl->capability)(&args->cap);
    255 
    256 	return 0;
    257 }
    258 
    259 static int
    260 nvmm_machine_create(struct nvmm_owner *owner,
    261     struct nvmm_ioc_machine_create *args)
    262 {
    263 	struct nvmm_machine *mach;
    264 	int error;
    265 
    266 	error = nvmm_machine_alloc(&mach);
    267 	if (error)
    268 		return error;
    269 
    270 	/* Curproc owns the machine. */
    271 	mach->owner = owner;
    272 
    273 	/* Zero out the host mappings. */
    274 	memset(&mach->hmap, 0, sizeof(mach->hmap));
    275 
    276 	/* Create the machine vmspace. */
    277 	mach->gpa_begin = 0;
    278 	mach->gpa_end = NVMM_MAX_RAM;
    279 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
    280 
    281 	(*nvmm_impl->machine_create)(mach);
    282 
    283 	args->machid = mach->machid;
    284 	nvmm_machine_put(mach);
    285 
    286 	return 0;
    287 }
    288 
    289 static int
    290 nvmm_machine_destroy(struct nvmm_owner *owner,
    291     struct nvmm_ioc_machine_destroy *args)
    292 {
    293 	struct nvmm_machine *mach;
    294 	struct nvmm_cpu *vcpu;
    295 	int error;
    296 	size_t i;
    297 
    298 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    299 	if (error)
    300 		return error;
    301 
    302 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    303 		error = nvmm_vcpu_get(mach, i, &vcpu);
    304 		if (error)
    305 			continue;
    306 
    307 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    308 		nvmm_vcpu_free(mach, vcpu);
    309 		nvmm_vcpu_put(vcpu);
    310 	}
    311 
    312 	(*nvmm_impl->machine_destroy)(mach);
    313 
    314 	/* Free the machine vmspace. */
    315 	uvmspace_free(mach->vm);
    316 
    317 	/* Drop the kernel UOBJ refs. */
    318 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    319 		if (!mach->hmap[i].present)
    320 			continue;
    321 		uao_detach(mach->hmap[i].uobj);
    322 	}
    323 
    324 	nvmm_machine_free(mach);
    325 	nvmm_machine_put(mach);
    326 
    327 	return 0;
    328 }
    329 
    330 static int
    331 nvmm_machine_configure(struct nvmm_owner *owner,
    332     struct nvmm_ioc_machine_configure *args)
    333 {
    334 	struct nvmm_machine *mach;
    335 	size_t allocsz;
    336 	void *data;
    337 	int error;
    338 
    339 	if (__predict_false(args->op >= nvmm_impl->conf_max)) {
    340 		return EINVAL;
    341 	}
    342 
    343 	allocsz = nvmm_impl->conf_sizes[args->op];
    344 	data = kmem_alloc(allocsz, KM_SLEEP);
    345 
    346 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    347 	if (error) {
    348 		kmem_free(data, allocsz);
    349 		return error;
    350 	}
    351 
    352 	error = copyin(args->conf, data, allocsz);
    353 	if (error) {
    354 		goto out;
    355 	}
    356 
    357 	error = (*nvmm_impl->machine_configure)(mach, args->op, data);
    358 
    359 out:
    360 	nvmm_machine_put(mach);
    361 	kmem_free(data, allocsz);
    362 	return error;
    363 }
    364 
    365 static int
    366 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
    367 {
    368 	struct nvmm_machine *mach;
    369 	struct nvmm_cpu *vcpu;
    370 	int error;
    371 
    372 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    373 	if (error)
    374 		return error;
    375 
    376 	error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
    377 	if (error)
    378 		goto out;
    379 
    380 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
    381 	if (error) {
    382 		nvmm_vcpu_free(mach, vcpu);
    383 		nvmm_vcpu_put(vcpu);
    384 		goto out;
    385 	}
    386 
    387 	nvmm_vcpu_put(vcpu);
    388 
    389 out:
    390 	nvmm_machine_put(mach);
    391 	return error;
    392 }
    393 
    394 static int
    395 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
    396 {
    397 	struct nvmm_machine *mach;
    398 	struct nvmm_cpu *vcpu;
    399 	int error;
    400 
    401 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    402 	if (error)
    403 		return error;
    404 
    405 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    406 	if (error)
    407 		goto out;
    408 
    409 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    410 	nvmm_vcpu_free(mach, vcpu);
    411 	nvmm_vcpu_put(vcpu);
    412 
    413 out:
    414 	nvmm_machine_put(mach);
    415 	return error;
    416 }
    417 
    418 static int
    419 nvmm_vcpu_setstate(struct nvmm_owner *owner,
    420     struct nvmm_ioc_vcpu_setstate *args)
    421 {
    422 	struct nvmm_machine *mach;
    423 	struct nvmm_cpu *vcpu;
    424 	int error;
    425 
    426 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    427 	if (error)
    428 		return error;
    429 
    430 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    431 	if (error)
    432 		goto out;
    433 
    434 	error = copyin(args->state, vcpu->state, nvmm_impl->state_size);
    435 	if (error) {
    436 		nvmm_vcpu_put(vcpu);
    437 		goto out;
    438 	}
    439 
    440 	(*nvmm_impl->vcpu_setstate)(vcpu, vcpu->state, args->flags);
    441 	nvmm_vcpu_put(vcpu);
    442 
    443 out:
    444 	nvmm_machine_put(mach);
    445 	return error;
    446 }
    447 
    448 static int
    449 nvmm_vcpu_getstate(struct nvmm_owner *owner,
    450     struct nvmm_ioc_vcpu_getstate *args)
    451 {
    452 	struct nvmm_machine *mach;
    453 	struct nvmm_cpu *vcpu;
    454 	int error;
    455 
    456 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    457 	if (error)
    458 		return error;
    459 
    460 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    461 	if (error)
    462 		goto out;
    463 
    464 	(*nvmm_impl->vcpu_getstate)(vcpu, vcpu->state, args->flags);
    465 	nvmm_vcpu_put(vcpu);
    466 	error = copyout(vcpu->state, args->state, nvmm_impl->state_size);
    467 
    468 out:
    469 	nvmm_machine_put(mach);
    470 	return error;
    471 }
    472 
    473 static int
    474 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
    475 {
    476 	struct nvmm_machine *mach;
    477 	struct nvmm_cpu *vcpu;
    478 	int error;
    479 
    480 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    481 	if (error)
    482 		return error;
    483 
    484 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    485 	if (error)
    486 		goto out;
    487 
    488 	error = (*nvmm_impl->vcpu_inject)(mach, vcpu, &args->event);
    489 	nvmm_vcpu_put(vcpu);
    490 
    491 out:
    492 	nvmm_machine_put(mach);
    493 	return error;
    494 }
    495 
    496 static void
    497 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
    498     struct nvmm_exit *exit)
    499 {
    500 	struct vmspace *vm = mach->vm;
    501 
    502 	while (1) {
    503 		(*nvmm_impl->vcpu_run)(mach, vcpu, exit);
    504 
    505 		if (__predict_true(exit->reason != NVMM_EXIT_MEMORY)) {
    506 			break;
    507 		}
    508 		if (exit->u.mem.gpa >= mach->gpa_end) {
    509 			break;
    510 		}
    511 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
    512 			break;
    513 		}
    514 	}
    515 }
    516 
    517 static int
    518 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
    519 {
    520 	struct nvmm_machine *mach;
    521 	struct nvmm_cpu *vcpu;
    522 	int error;
    523 
    524 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    525 	if (error)
    526 		return error;
    527 
    528 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    529 	if (error)
    530 		goto out;
    531 
    532 	nvmm_do_vcpu_run(mach, vcpu, &args->exit);
    533 	nvmm_vcpu_put(vcpu);
    534 
    535 out:
    536 	nvmm_machine_put(mach);
    537 	return error;
    538 }
    539 
    540 /* -------------------------------------------------------------------------- */
    541 
    542 static struct uvm_object *
    543 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
    544    size_t *off)
    545 {
    546 	struct nvmm_hmapping *hmapping;
    547 	size_t i;
    548 
    549 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    550 		hmapping = &mach->hmap[i];
    551 		if (!hmapping->present) {
    552 			continue;
    553 		}
    554 		if (hva >= hmapping->hva &&
    555 		    hva + size <= hmapping->hva + hmapping->size) {
    556 			*off = hva - hmapping->hva;
    557 			return hmapping->uobj;
    558 		}
    559 	}
    560 
    561 	return NULL;
    562 }
    563 
    564 static int
    565 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    566 {
    567 	struct nvmm_hmapping *hmapping;
    568 	size_t i;
    569 
    570 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
    571 		return EINVAL;
    572 	}
    573 	if (hva == 0) {
    574 		return EINVAL;
    575 	}
    576 
    577 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    578 		hmapping = &mach->hmap[i];
    579 		if (!hmapping->present) {
    580 			continue;
    581 		}
    582 
    583 		if (hva >= hmapping->hva &&
    584 		    hva + size <= hmapping->hva + hmapping->size) {
    585 			break;
    586 		}
    587 
    588 		if (hva >= hmapping->hva &&
    589 		    hva < hmapping->hva + hmapping->size) {
    590 			return EEXIST;
    591 		}
    592 		if (hva + size > hmapping->hva &&
    593 		    hva + size <= hmapping->hva + hmapping->size) {
    594 			return EEXIST;
    595 		}
    596 		if (hva <= hmapping->hva &&
    597 		    hva + size >= hmapping->hva + hmapping->size) {
    598 			return EEXIST;
    599 		}
    600 	}
    601 
    602 	return 0;
    603 }
    604 
    605 static struct nvmm_hmapping *
    606 nvmm_hmapping_alloc(struct nvmm_machine *mach)
    607 {
    608 	struct nvmm_hmapping *hmapping;
    609 	size_t i;
    610 
    611 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    612 		hmapping = &mach->hmap[i];
    613 		if (!hmapping->present) {
    614 			hmapping->present = true;
    615 			return hmapping;
    616 		}
    617 	}
    618 
    619 	return NULL;
    620 }
    621 
    622 static int
    623 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    624 {
    625 	struct vmspace *vmspace = curproc->p_vmspace;
    626 	struct nvmm_hmapping *hmapping;
    627 	size_t i;
    628 
    629 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    630 		hmapping = &mach->hmap[i];
    631 		if (!hmapping->present || hmapping->hva != hva ||
    632 		    hmapping->size != size) {
    633 			continue;
    634 		}
    635 
    636 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
    637 		    hmapping->hva + hmapping->size);
    638 		uao_detach(hmapping->uobj);
    639 
    640 		hmapping->uobj = NULL;
    641 		hmapping->present = false;
    642 
    643 		return 0;
    644 	}
    645 
    646 	return ENOENT;
    647 }
    648 
    649 static int
    650 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
    651 {
    652 	struct vmspace *vmspace = curproc->p_vmspace;
    653 	struct nvmm_machine *mach;
    654 	struct nvmm_hmapping *hmapping;
    655 	vaddr_t uva;
    656 	int error;
    657 
    658 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    659 	if (error)
    660 		return error;
    661 
    662 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
    663 	if (error)
    664 		goto out;
    665 
    666 	hmapping = nvmm_hmapping_alloc(mach);
    667 	if (hmapping == NULL) {
    668 		error = ENOBUFS;
    669 		goto out;
    670 	}
    671 
    672 	hmapping->hva = args->hva;
    673 	hmapping->size = args->size;
    674 	hmapping->uobj = uao_create(hmapping->size, 0);
    675 	uva = hmapping->hva;
    676 
    677 	/* Take a reference for the user. */
    678 	uao_reference(hmapping->uobj);
    679 
    680 	/* Map the uobj into the user address space, as pageable. */
    681 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
    682 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
    683 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    684 	if (error) {
    685 		uao_detach(hmapping->uobj);
    686 	}
    687 
    688 out:
    689 	nvmm_machine_put(mach);
    690 	return error;
    691 }
    692 
    693 static int
    694 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
    695 {
    696 	struct nvmm_machine *mach;
    697 	int error;
    698 
    699 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    700 	if (error)
    701 		return error;
    702 
    703 	error = nvmm_hmapping_free(mach, args->hva, args->size);
    704 
    705 	nvmm_machine_put(mach);
    706 	return error;
    707 }
    708 
    709 /* -------------------------------------------------------------------------- */
    710 
    711 static int
    712 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
    713 {
    714 	struct nvmm_machine *mach;
    715 	struct uvm_object *uobj;
    716 	gpaddr_t gpa;
    717 	size_t off;
    718 	int error;
    719 
    720 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    721 	if (error)
    722 		return error;
    723 
    724 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
    725 		error = EINVAL;
    726 		goto out;
    727 	}
    728 
    729 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
    730 	    (args->hva % PAGE_SIZE) != 0) {
    731 		error = EINVAL;
    732 		goto out;
    733 	}
    734 	if (args->hva == 0) {
    735 		error = EINVAL;
    736 		goto out;
    737 	}
    738 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    739 		error = EINVAL;
    740 		goto out;
    741 	}
    742 	if (args->gpa + args->size <= args->gpa) {
    743 		error = EINVAL;
    744 		goto out;
    745 	}
    746 	if (args->gpa + args->size > mach->gpa_end) {
    747 		error = EINVAL;
    748 		goto out;
    749 	}
    750 	gpa = args->gpa;
    751 
    752 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
    753 	if (uobj == NULL) {
    754 		error = EINVAL;
    755 		goto out;
    756 	}
    757 
    758 	/* Take a reference for the machine. */
    759 	uao_reference(uobj);
    760 
    761 	/* Map the uobj into the machine address space, as pageable. */
    762 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
    763 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
    764 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    765 	if (error) {
    766 		uao_detach(uobj);
    767 		goto out;
    768 	}
    769 	if (gpa != args->gpa) {
    770 		uao_detach(uobj);
    771 		printf("[!] uvm_map problem\n");
    772 		error = EINVAL;
    773 		goto out;
    774 	}
    775 
    776 out:
    777 	nvmm_machine_put(mach);
    778 	return error;
    779 }
    780 
    781 static int
    782 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
    783 {
    784 	struct nvmm_machine *mach;
    785 	gpaddr_t gpa;
    786 	int error;
    787 
    788 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    789 	if (error)
    790 		return error;
    791 
    792 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
    793 		error = EINVAL;
    794 		goto out;
    795 	}
    796 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    797 		error = EINVAL;
    798 		goto out;
    799 	}
    800 	if (args->gpa + args->size <= args->gpa) {
    801 		error = EINVAL;
    802 		goto out;
    803 	}
    804 	if (args->gpa + args->size >= mach->gpa_end) {
    805 		error = EINVAL;
    806 		goto out;
    807 	}
    808 	gpa = args->gpa;
    809 
    810 	/* Unmap the memory from the machine. */
    811 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
    812 
    813 out:
    814 	nvmm_machine_put(mach);
    815 	return error;
    816 }
    817 
    818 /* -------------------------------------------------------------------------- */
    819 
    820 static int
    821 nvmm_ctl_mach_info(struct nvmm_ioc_ctl *args)
    822 {
    823 	struct nvmm_ctl_mach_info ctl;
    824 	struct nvmm_machine *mach;
    825 	struct nvmm_cpu *vcpu;
    826 	int error;
    827 	size_t i;
    828 
    829 	if (args->size != sizeof(ctl))
    830 		return EINVAL;
    831 	error = copyin(args->data, &ctl, sizeof(ctl));
    832 	if (error)
    833 		return error;
    834 
    835 	error = nvmm_machine_get(&root_owner, ctl.machid, &mach, true);
    836 	if (error)
    837 		return error;
    838 
    839 	ctl.nvcpus = 0;
    840 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    841 		error = nvmm_vcpu_get(mach, i, &vcpu);
    842 		if (error)
    843 			continue;
    844 		ctl.nvcpus++;
    845 		nvmm_vcpu_put(vcpu);
    846 	}
    847 	ctl.pid = mach->owner->pid;
    848 	ctl.time = mach->time;
    849 
    850 	nvmm_machine_put(mach);
    851 
    852 	error = copyout(&ctl, args->data, sizeof(ctl));
    853 	if (error)
    854 		return error;
    855 
    856 	return 0;
    857 }
    858 
    859 static int
    860 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
    861 {
    862 	int error;
    863 
    864 	error = kauth_authorize_device(curlwp->l_cred, KAUTH_DEVICE_NVMM_CTL,
    865 	    NULL, NULL, NULL, NULL);
    866 	if (error)
    867 		return error;
    868 
    869 	switch (args->op) {
    870 	case NVMM_CTL_MACH_INFO:
    871 		return nvmm_ctl_mach_info(args);
    872 	default:
    873 		return EINVAL;
    874 	}
    875 }
    876 
    877 /* -------------------------------------------------------------------------- */
    878 
    879 static int
    880 nvmm_init(void)
    881 {
    882 	size_t i, n;
    883 
    884 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
    885 		if (!(*nvmm_impl_list[i]->ident)()) {
    886 			continue;
    887 		}
    888 		nvmm_impl = nvmm_impl_list[i];
    889 		break;
    890 	}
    891 	if (nvmm_impl == NULL) {
    892 		printf("[!] No implementation found\n");
    893 		return ENOTSUP;
    894 	}
    895 
    896 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    897 		machines[i].machid = i;
    898 		rw_init(&machines[i].lock);
    899 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    900 			machines[i].cpus[n].present = false;
    901 			machines[i].cpus[n].cpuid = n;
    902 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
    903 			    IPL_NONE);
    904 		}
    905 	}
    906 
    907 	(*nvmm_impl->init)();
    908 
    909 	return 0;
    910 }
    911 
    912 static void
    913 nvmm_fini(void)
    914 {
    915 	size_t i, n;
    916 
    917 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    918 		rw_destroy(&machines[i].lock);
    919 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    920 			mutex_destroy(&machines[i].cpus[n].lock);
    921 		}
    922 	}
    923 
    924 	(*nvmm_impl->fini)();
    925 }
    926 
    927 /* -------------------------------------------------------------------------- */
    928 
    929 static dev_type_open(nvmm_open);
    930 
    931 const struct cdevsw nvmm_cdevsw = {
    932 	.d_open = nvmm_open,
    933 	.d_close = noclose,
    934 	.d_read = noread,
    935 	.d_write = nowrite,
    936 	.d_ioctl = noioctl,
    937 	.d_stop = nostop,
    938 	.d_tty = notty,
    939 	.d_poll = nopoll,
    940 	.d_mmap = nommap,
    941 	.d_kqfilter = nokqfilter,
    942 	.d_discard = nodiscard,
    943 	.d_flag = D_OTHER | D_MPSAFE
    944 };
    945 
    946 static int nvmm_ioctl(file_t *, u_long, void *);
    947 static int nvmm_close(file_t *);
    948 
    949 const struct fileops nvmm_fileops = {
    950 	.fo_read = fbadop_read,
    951 	.fo_write = fbadop_write,
    952 	.fo_ioctl = nvmm_ioctl,
    953 	.fo_fcntl = fnullop_fcntl,
    954 	.fo_poll = fnullop_poll,
    955 	.fo_stat = fbadop_stat,
    956 	.fo_close = nvmm_close,
    957 	.fo_kqfilter = fnullop_kqfilter,
    958 	.fo_restart = fnullop_restart,
    959 	.fo_mmap = NULL,
    960 };
    961 
    962 static int
    963 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
    964 {
    965 	struct nvmm_owner *owner;
    966 	struct file *fp;
    967 	int error, fd;
    968 
    969 	if (minor(dev) != 0)
    970 		return EXDEV;
    971 	error = fd_allocfile(&fp, &fd);
    972 	if (error)
    973 		return error;
    974 
    975 	owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
    976 	owner->pid = l->l_proc->p_pid;
    977 
    978 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
    979 }
    980 
    981 static int
    982 nvmm_close(file_t *fp)
    983 {
    984 	struct nvmm_owner *owner = fp->f_data;
    985 
    986 	KASSERT(owner != NULL);
    987 	nvmm_kill_machines(owner);
    988 	kmem_free(owner, sizeof(*owner));
    989 	fp->f_data = NULL;
    990 
    991    	return 0;
    992 }
    993 
    994 static int
    995 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
    996 {
    997 	struct nvmm_owner *owner = fp->f_data;
    998 
    999 	KASSERT(owner != NULL);
   1000 
   1001 	switch (cmd) {
   1002 	case NVMM_IOC_CAPABILITY:
   1003 		return nvmm_capability(owner, data);
   1004 	case NVMM_IOC_MACHINE_CREATE:
   1005 		return nvmm_machine_create(owner, data);
   1006 	case NVMM_IOC_MACHINE_DESTROY:
   1007 		return nvmm_machine_destroy(owner, data);
   1008 	case NVMM_IOC_MACHINE_CONFIGURE:
   1009 		return nvmm_machine_configure(owner, data);
   1010 	case NVMM_IOC_VCPU_CREATE:
   1011 		return nvmm_vcpu_create(owner, data);
   1012 	case NVMM_IOC_VCPU_DESTROY:
   1013 		return nvmm_vcpu_destroy(owner, data);
   1014 	case NVMM_IOC_VCPU_SETSTATE:
   1015 		return nvmm_vcpu_setstate(owner, data);
   1016 	case NVMM_IOC_VCPU_GETSTATE:
   1017 		return nvmm_vcpu_getstate(owner, data);
   1018 	case NVMM_IOC_VCPU_INJECT:
   1019 		return nvmm_vcpu_inject(owner, data);
   1020 	case NVMM_IOC_VCPU_RUN:
   1021 		return nvmm_vcpu_run(owner, data);
   1022 	case NVMM_IOC_GPA_MAP:
   1023 		return nvmm_gpa_map(owner, data);
   1024 	case NVMM_IOC_GPA_UNMAP:
   1025 		return nvmm_gpa_unmap(owner, data);
   1026 	case NVMM_IOC_HVA_MAP:
   1027 		return nvmm_hva_map(owner, data);
   1028 	case NVMM_IOC_HVA_UNMAP:
   1029 		return nvmm_hva_unmap(owner, data);
   1030 	case NVMM_IOC_CTL:
   1031 		return nvmm_ctl(owner, data);
   1032 	default:
   1033 		return EINVAL;
   1034 	}
   1035 }
   1036 
   1037 /* -------------------------------------------------------------------------- */
   1038 
   1039 void
   1040 nvmmattach(int nunits)
   1041 {
   1042 	/* nothing */
   1043 }
   1044 
   1045 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
   1046 
   1047 static int
   1048 nvmm_modcmd(modcmd_t cmd, void *arg)
   1049 {
   1050 	int error;
   1051 
   1052 	switch (cmd) {
   1053 	case MODULE_CMD_INIT:
   1054 		error = nvmm_init();
   1055 		if (error)
   1056 			return error;
   1057 
   1058 #if defined(_MODULE)
   1059 		{
   1060 			devmajor_t bmajor = NODEVMAJOR;
   1061 			devmajor_t cmajor = 345;
   1062 
   1063 			/* mknod /dev/nvmm c 345 0 */
   1064 			error = devsw_attach("nvmm", NULL, &bmajor,
   1065 			    &nvmm_cdevsw, &cmajor);
   1066 			if (error) {
   1067 				nvmm_fini();
   1068 				return error;
   1069 			}
   1070 		}
   1071 #endif
   1072 		return 0;
   1073 
   1074 	case MODULE_CMD_FINI:
   1075 		if (nmachines > 0) {
   1076 			return EBUSY;
   1077 		}
   1078 #if defined(_MODULE)
   1079 		{
   1080 			error = devsw_detach(NULL, &nvmm_cdevsw);
   1081 			if (error) {
   1082 				return error;
   1083 			}
   1084 		}
   1085 #endif
   1086 		nvmm_fini();
   1087 		return 0;
   1088 
   1089 	case MODULE_CMD_AUTOUNLOAD:
   1090 		return EBUSY;
   1091 
   1092 	default:
   1093 		return ENOTTY;
   1094 	}
   1095 }
   1096