Home | History | Annotate | Line # | Download | only in nvmm
nvmm.c revision 1.19
      1 /*	$NetBSD: nvmm.c,v 1.19 2019/04/28 14:22:13 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.19 2019/04/28 14:22:13 maxv Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/systm.h>
     37 #include <sys/kernel.h>
     38 
     39 #include <sys/cpu.h>
     40 #include <sys/conf.h>
     41 #include <sys/kmem.h>
     42 #include <sys/module.h>
     43 #include <sys/proc.h>
     44 #include <sys/mman.h>
     45 #include <sys/file.h>
     46 #include <sys/filedesc.h>
     47 #include <sys/kauth.h>
     48 
     49 #include <uvm/uvm.h>
     50 #include <uvm/uvm_page.h>
     51 
     52 #include "ioconf.h"
     53 
     54 #include <dev/nvmm/nvmm.h>
     55 #include <dev/nvmm/nvmm_internal.h>
     56 #include <dev/nvmm/nvmm_ioctl.h>
     57 
     58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
     59 static volatile unsigned int nmachines __cacheline_aligned;
     60 
     61 static const struct nvmm_impl *nvmm_impl_list[] = {
     62 	&nvmm_x86_svm,	/* x86 AMD SVM */
     63 	&nvmm_x86_vmx	/* x86 Intel VMX */
     64 };
     65 
     66 static const struct nvmm_impl *nvmm_impl = NULL;
     67 
     68 static struct nvmm_owner root_owner;
     69 
     70 /* -------------------------------------------------------------------------- */
     71 
     72 static int
     73 nvmm_machine_alloc(struct nvmm_machine **ret)
     74 {
     75 	struct nvmm_machine *mach;
     76 	size_t i;
     77 
     78 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
     79 		mach = &machines[i];
     80 
     81 		rw_enter(&mach->lock, RW_WRITER);
     82 		if (mach->present) {
     83 			rw_exit(&mach->lock);
     84 			continue;
     85 		}
     86 
     87 		mach->present = true;
     88 		mach->time = time_second;
     89 		*ret = mach;
     90 		atomic_inc_uint(&nmachines);
     91 		return 0;
     92 	}
     93 
     94 	return ENOBUFS;
     95 }
     96 
     97 static void
     98 nvmm_machine_free(struct nvmm_machine *mach)
     99 {
    100 	KASSERT(rw_write_held(&mach->lock));
    101 	KASSERT(mach->present);
    102 	mach->present = false;
    103 	atomic_dec_uint(&nmachines);
    104 }
    105 
    106 static int
    107 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
    108     struct nvmm_machine **ret, bool writer)
    109 {
    110 	struct nvmm_machine *mach;
    111 	krw_t op = writer ? RW_WRITER : RW_READER;
    112 
    113 	if (machid >= NVMM_MAX_MACHINES) {
    114 		return EINVAL;
    115 	}
    116 	mach = &machines[machid];
    117 
    118 	rw_enter(&mach->lock, op);
    119 	if (!mach->present) {
    120 		rw_exit(&mach->lock);
    121 		return ENOENT;
    122 	}
    123 	if (owner != &root_owner && mach->owner != owner) {
    124 		rw_exit(&mach->lock);
    125 		return EPERM;
    126 	}
    127 	*ret = mach;
    128 
    129 	return 0;
    130 }
    131 
    132 static void
    133 nvmm_machine_put(struct nvmm_machine *mach)
    134 {
    135 	rw_exit(&mach->lock);
    136 }
    137 
    138 /* -------------------------------------------------------------------------- */
    139 
    140 static int
    141 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    142     struct nvmm_cpu **ret)
    143 {
    144 	struct nvmm_cpu *vcpu;
    145 
    146 	if (cpuid >= NVMM_MAX_VCPUS) {
    147 		return EINVAL;
    148 	}
    149 	vcpu = &mach->cpus[cpuid];
    150 
    151 	mutex_enter(&vcpu->lock);
    152 	if (vcpu->present) {
    153 		mutex_exit(&vcpu->lock);
    154 		return EBUSY;
    155 	}
    156 
    157 	vcpu->present = true;
    158 	vcpu->comm = NULL;
    159 	vcpu->hcpu_last = -1;
    160 	*ret = vcpu;
    161 	return 0;
    162 }
    163 
    164 static void
    165 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
    166 {
    167 	KASSERT(mutex_owned(&vcpu->lock));
    168 	vcpu->present = false;
    169 	if (vcpu->comm != NULL) {
    170 		uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
    171 	}
    172 }
    173 
    174 int
    175 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    176     struct nvmm_cpu **ret)
    177 {
    178 	struct nvmm_cpu *vcpu;
    179 
    180 	if (cpuid >= NVMM_MAX_VCPUS) {
    181 		return EINVAL;
    182 	}
    183 	vcpu = &mach->cpus[cpuid];
    184 
    185 	mutex_enter(&vcpu->lock);
    186 	if (!vcpu->present) {
    187 		mutex_exit(&vcpu->lock);
    188 		return ENOENT;
    189 	}
    190 	*ret = vcpu;
    191 
    192 	return 0;
    193 }
    194 
    195 void
    196 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
    197 {
    198 	mutex_exit(&vcpu->lock);
    199 }
    200 
    201 /* -------------------------------------------------------------------------- */
    202 
    203 static void
    204 nvmm_kill_machines(struct nvmm_owner *owner)
    205 {
    206 	struct nvmm_machine *mach;
    207 	struct nvmm_cpu *vcpu;
    208 	size_t i, j;
    209 	int error;
    210 
    211 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    212 		mach = &machines[i];
    213 
    214 		rw_enter(&mach->lock, RW_WRITER);
    215 		if (!mach->present || mach->owner != owner) {
    216 			rw_exit(&mach->lock);
    217 			continue;
    218 		}
    219 
    220 		/* Kill it. */
    221 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
    222 			error = nvmm_vcpu_get(mach, j, &vcpu);
    223 			if (error)
    224 				continue;
    225 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    226 			nvmm_vcpu_free(mach, vcpu);
    227 			nvmm_vcpu_put(vcpu);
    228 		}
    229 		(*nvmm_impl->machine_destroy)(mach);
    230 		uvmspace_free(mach->vm);
    231 
    232 		/* Drop the kernel UOBJ refs. */
    233 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
    234 			if (!mach->hmap[j].present)
    235 				continue;
    236 			uao_detach(mach->hmap[j].uobj);
    237 		}
    238 
    239 		nvmm_machine_free(mach);
    240 
    241 		rw_exit(&mach->lock);
    242 	}
    243 }
    244 
    245 /* -------------------------------------------------------------------------- */
    246 
    247 static int
    248 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
    249 {
    250 	args->cap.version = NVMM_CAPABILITY_VERSION;
    251 	args->cap.state_size = nvmm_impl->state_size;
    252 	args->cap.max_machines = NVMM_MAX_MACHINES;
    253 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
    254 	args->cap.max_ram = NVMM_MAX_RAM;
    255 
    256 	(*nvmm_impl->capability)(&args->cap);
    257 
    258 	return 0;
    259 }
    260 
    261 static int
    262 nvmm_machine_create(struct nvmm_owner *owner,
    263     struct nvmm_ioc_machine_create *args)
    264 {
    265 	struct nvmm_machine *mach;
    266 	int error;
    267 
    268 	error = nvmm_machine_alloc(&mach);
    269 	if (error)
    270 		return error;
    271 
    272 	/* Curproc owns the machine. */
    273 	mach->owner = owner;
    274 
    275 	/* Zero out the host mappings. */
    276 	memset(&mach->hmap, 0, sizeof(mach->hmap));
    277 
    278 	/* Create the machine vmspace. */
    279 	mach->gpa_begin = 0;
    280 	mach->gpa_end = NVMM_MAX_RAM;
    281 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
    282 
    283 	/* Create the comm uobj. */
    284 	mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
    285 
    286 	(*nvmm_impl->machine_create)(mach);
    287 
    288 	args->machid = mach->machid;
    289 	nvmm_machine_put(mach);
    290 
    291 	return 0;
    292 }
    293 
    294 static int
    295 nvmm_machine_destroy(struct nvmm_owner *owner,
    296     struct nvmm_ioc_machine_destroy *args)
    297 {
    298 	struct nvmm_machine *mach;
    299 	struct nvmm_cpu *vcpu;
    300 	int error;
    301 	size_t i;
    302 
    303 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    304 	if (error)
    305 		return error;
    306 
    307 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    308 		error = nvmm_vcpu_get(mach, i, &vcpu);
    309 		if (error)
    310 			continue;
    311 
    312 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    313 		nvmm_vcpu_free(mach, vcpu);
    314 		nvmm_vcpu_put(vcpu);
    315 	}
    316 
    317 	(*nvmm_impl->machine_destroy)(mach);
    318 
    319 	/* Free the machine vmspace. */
    320 	uvmspace_free(mach->vm);
    321 
    322 	/* Drop the kernel UOBJ refs. */
    323 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    324 		if (!mach->hmap[i].present)
    325 			continue;
    326 		uao_detach(mach->hmap[i].uobj);
    327 	}
    328 
    329 	nvmm_machine_free(mach);
    330 	nvmm_machine_put(mach);
    331 
    332 	return 0;
    333 }
    334 
    335 static int
    336 nvmm_machine_configure(struct nvmm_owner *owner,
    337     struct nvmm_ioc_machine_configure *args)
    338 {
    339 	struct nvmm_machine *mach;
    340 	size_t allocsz;
    341 	void *data;
    342 	int error;
    343 
    344 	if (__predict_false(args->op >= nvmm_impl->conf_max)) {
    345 		return EINVAL;
    346 	}
    347 
    348 	allocsz = nvmm_impl->conf_sizes[args->op];
    349 	data = kmem_alloc(allocsz, KM_SLEEP);
    350 
    351 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    352 	if (error) {
    353 		kmem_free(data, allocsz);
    354 		return error;
    355 	}
    356 
    357 	error = copyin(args->conf, data, allocsz);
    358 	if (error) {
    359 		goto out;
    360 	}
    361 
    362 	error = (*nvmm_impl->machine_configure)(mach, args->op, data);
    363 
    364 out:
    365 	nvmm_machine_put(mach);
    366 	kmem_free(data, allocsz);
    367 	return error;
    368 }
    369 
    370 static int
    371 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
    372 {
    373 	struct nvmm_machine *mach;
    374 	struct nvmm_cpu *vcpu;
    375 	int error;
    376 
    377 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    378 	if (error)
    379 		return error;
    380 
    381 	error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
    382 	if (error)
    383 		goto out;
    384 
    385 	/* Allocate the comm page. */
    386 	uao_reference(mach->commuobj);
    387 	error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
    388 	    mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
    389 	    UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
    390 	if (error) {
    391 		uao_detach(mach->commuobj);
    392 		nvmm_vcpu_free(mach, vcpu);
    393 		nvmm_vcpu_put(vcpu);
    394 		goto out;
    395 	}
    396 	error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
    397 	    (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
    398 	if (error) {
    399 		nvmm_vcpu_free(mach, vcpu);
    400 		nvmm_vcpu_put(vcpu);
    401 		goto out;
    402 	}
    403 	memset(vcpu->comm, 0, PAGE_SIZE);
    404 
    405 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
    406 	if (error) {
    407 		nvmm_vcpu_free(mach, vcpu);
    408 		nvmm_vcpu_put(vcpu);
    409 		goto out;
    410 	}
    411 
    412 	nvmm_vcpu_put(vcpu);
    413 
    414 out:
    415 	nvmm_machine_put(mach);
    416 	return error;
    417 }
    418 
    419 static int
    420 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
    421 {
    422 	struct nvmm_machine *mach;
    423 	struct nvmm_cpu *vcpu;
    424 	int error;
    425 
    426 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    427 	if (error)
    428 		return error;
    429 
    430 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    431 	if (error)
    432 		goto out;
    433 
    434 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    435 	nvmm_vcpu_free(mach, vcpu);
    436 	nvmm_vcpu_put(vcpu);
    437 
    438 out:
    439 	nvmm_machine_put(mach);
    440 	return error;
    441 }
    442 
    443 static int
    444 nvmm_vcpu_setstate(struct nvmm_owner *owner,
    445     struct nvmm_ioc_vcpu_setstate *args)
    446 {
    447 	struct nvmm_machine *mach;
    448 	struct nvmm_cpu *vcpu;
    449 	int error;
    450 
    451 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    452 	if (error)
    453 		return error;
    454 
    455 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    456 	if (error)
    457 		goto out;
    458 
    459 	(*nvmm_impl->vcpu_setstate)(vcpu);
    460 	nvmm_vcpu_put(vcpu);
    461 
    462 out:
    463 	nvmm_machine_put(mach);
    464 	return error;
    465 }
    466 
    467 static int
    468 nvmm_vcpu_getstate(struct nvmm_owner *owner,
    469     struct nvmm_ioc_vcpu_getstate *args)
    470 {
    471 	struct nvmm_machine *mach;
    472 	struct nvmm_cpu *vcpu;
    473 	int error;
    474 
    475 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    476 	if (error)
    477 		return error;
    478 
    479 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    480 	if (error)
    481 		goto out;
    482 
    483 	(*nvmm_impl->vcpu_getstate)(vcpu);
    484 	nvmm_vcpu_put(vcpu);
    485 
    486 out:
    487 	nvmm_machine_put(mach);
    488 	return error;
    489 }
    490 
    491 static int
    492 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
    493 {
    494 	struct nvmm_machine *mach;
    495 	struct nvmm_cpu *vcpu;
    496 	int error;
    497 
    498 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    499 	if (error)
    500 		return error;
    501 
    502 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    503 	if (error)
    504 		goto out;
    505 
    506 	error = (*nvmm_impl->vcpu_inject)(mach, vcpu, &args->event);
    507 	nvmm_vcpu_put(vcpu);
    508 
    509 out:
    510 	nvmm_machine_put(mach);
    511 	return error;
    512 }
    513 
    514 static void
    515 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
    516     struct nvmm_exit *exit)
    517 {
    518 	struct vmspace *vm = mach->vm;
    519 
    520 	while (1) {
    521 		(*nvmm_impl->vcpu_run)(mach, vcpu, exit);
    522 
    523 		if (__predict_true(exit->reason != NVMM_EXIT_MEMORY)) {
    524 			break;
    525 		}
    526 		if (exit->u.mem.gpa >= mach->gpa_end) {
    527 			break;
    528 		}
    529 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
    530 			break;
    531 		}
    532 	}
    533 }
    534 
    535 static int
    536 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
    537 {
    538 	struct nvmm_machine *mach;
    539 	struct nvmm_cpu *vcpu;
    540 	int error;
    541 
    542 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    543 	if (error)
    544 		return error;
    545 
    546 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    547 	if (error)
    548 		goto out;
    549 
    550 	nvmm_do_vcpu_run(mach, vcpu, &args->exit);
    551 	nvmm_vcpu_put(vcpu);
    552 
    553 out:
    554 	nvmm_machine_put(mach);
    555 	return error;
    556 }
    557 
    558 /* -------------------------------------------------------------------------- */
    559 
    560 static struct uvm_object *
    561 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
    562    size_t *off)
    563 {
    564 	struct nvmm_hmapping *hmapping;
    565 	size_t i;
    566 
    567 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    568 		hmapping = &mach->hmap[i];
    569 		if (!hmapping->present) {
    570 			continue;
    571 		}
    572 		if (hva >= hmapping->hva &&
    573 		    hva + size <= hmapping->hva + hmapping->size) {
    574 			*off = hva - hmapping->hva;
    575 			return hmapping->uobj;
    576 		}
    577 	}
    578 
    579 	return NULL;
    580 }
    581 
    582 static int
    583 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    584 {
    585 	struct nvmm_hmapping *hmapping;
    586 	size_t i;
    587 
    588 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
    589 		return EINVAL;
    590 	}
    591 	if (hva == 0) {
    592 		return EINVAL;
    593 	}
    594 
    595 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    596 		hmapping = &mach->hmap[i];
    597 		if (!hmapping->present) {
    598 			continue;
    599 		}
    600 
    601 		if (hva >= hmapping->hva &&
    602 		    hva + size <= hmapping->hva + hmapping->size) {
    603 			break;
    604 		}
    605 
    606 		if (hva >= hmapping->hva &&
    607 		    hva < hmapping->hva + hmapping->size) {
    608 			return EEXIST;
    609 		}
    610 		if (hva + size > hmapping->hva &&
    611 		    hva + size <= hmapping->hva + hmapping->size) {
    612 			return EEXIST;
    613 		}
    614 		if (hva <= hmapping->hva &&
    615 		    hva + size >= hmapping->hva + hmapping->size) {
    616 			return EEXIST;
    617 		}
    618 	}
    619 
    620 	return 0;
    621 }
    622 
    623 static struct nvmm_hmapping *
    624 nvmm_hmapping_alloc(struct nvmm_machine *mach)
    625 {
    626 	struct nvmm_hmapping *hmapping;
    627 	size_t i;
    628 
    629 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    630 		hmapping = &mach->hmap[i];
    631 		if (!hmapping->present) {
    632 			hmapping->present = true;
    633 			return hmapping;
    634 		}
    635 	}
    636 
    637 	return NULL;
    638 }
    639 
    640 static int
    641 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    642 {
    643 	struct vmspace *vmspace = curproc->p_vmspace;
    644 	struct nvmm_hmapping *hmapping;
    645 	size_t i;
    646 
    647 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    648 		hmapping = &mach->hmap[i];
    649 		if (!hmapping->present || hmapping->hva != hva ||
    650 		    hmapping->size != size) {
    651 			continue;
    652 		}
    653 
    654 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
    655 		    hmapping->hva + hmapping->size);
    656 		uao_detach(hmapping->uobj);
    657 
    658 		hmapping->uobj = NULL;
    659 		hmapping->present = false;
    660 
    661 		return 0;
    662 	}
    663 
    664 	return ENOENT;
    665 }
    666 
    667 static int
    668 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
    669 {
    670 	struct vmspace *vmspace = curproc->p_vmspace;
    671 	struct nvmm_machine *mach;
    672 	struct nvmm_hmapping *hmapping;
    673 	vaddr_t uva;
    674 	int error;
    675 
    676 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    677 	if (error)
    678 		return error;
    679 
    680 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
    681 	if (error)
    682 		goto out;
    683 
    684 	hmapping = nvmm_hmapping_alloc(mach);
    685 	if (hmapping == NULL) {
    686 		error = ENOBUFS;
    687 		goto out;
    688 	}
    689 
    690 	hmapping->hva = args->hva;
    691 	hmapping->size = args->size;
    692 	hmapping->uobj = uao_create(hmapping->size, 0);
    693 	uva = hmapping->hva;
    694 
    695 	/* Take a reference for the user. */
    696 	uao_reference(hmapping->uobj);
    697 
    698 	/* Map the uobj into the user address space, as pageable. */
    699 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
    700 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
    701 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    702 	if (error) {
    703 		uao_detach(hmapping->uobj);
    704 	}
    705 
    706 out:
    707 	nvmm_machine_put(mach);
    708 	return error;
    709 }
    710 
    711 static int
    712 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
    713 {
    714 	struct nvmm_machine *mach;
    715 	int error;
    716 
    717 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    718 	if (error)
    719 		return error;
    720 
    721 	error = nvmm_hmapping_free(mach, args->hva, args->size);
    722 
    723 	nvmm_machine_put(mach);
    724 	return error;
    725 }
    726 
    727 /* -------------------------------------------------------------------------- */
    728 
    729 static int
    730 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
    731 {
    732 	struct nvmm_machine *mach;
    733 	struct uvm_object *uobj;
    734 	gpaddr_t gpa;
    735 	size_t off;
    736 	int error;
    737 
    738 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    739 	if (error)
    740 		return error;
    741 
    742 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
    743 		error = EINVAL;
    744 		goto out;
    745 	}
    746 
    747 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
    748 	    (args->hva % PAGE_SIZE) != 0) {
    749 		error = EINVAL;
    750 		goto out;
    751 	}
    752 	if (args->hva == 0) {
    753 		error = EINVAL;
    754 		goto out;
    755 	}
    756 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    757 		error = EINVAL;
    758 		goto out;
    759 	}
    760 	if (args->gpa + args->size <= args->gpa) {
    761 		error = EINVAL;
    762 		goto out;
    763 	}
    764 	if (args->gpa + args->size > mach->gpa_end) {
    765 		error = EINVAL;
    766 		goto out;
    767 	}
    768 	gpa = args->gpa;
    769 
    770 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
    771 	if (uobj == NULL) {
    772 		error = EINVAL;
    773 		goto out;
    774 	}
    775 
    776 	/* Take a reference for the machine. */
    777 	uao_reference(uobj);
    778 
    779 	/* Map the uobj into the machine address space, as pageable. */
    780 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
    781 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
    782 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    783 	if (error) {
    784 		uao_detach(uobj);
    785 		goto out;
    786 	}
    787 	if (gpa != args->gpa) {
    788 		uao_detach(uobj);
    789 		printf("[!] uvm_map problem\n");
    790 		error = EINVAL;
    791 		goto out;
    792 	}
    793 
    794 out:
    795 	nvmm_machine_put(mach);
    796 	return error;
    797 }
    798 
    799 static int
    800 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
    801 {
    802 	struct nvmm_machine *mach;
    803 	gpaddr_t gpa;
    804 	int error;
    805 
    806 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    807 	if (error)
    808 		return error;
    809 
    810 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
    811 		error = EINVAL;
    812 		goto out;
    813 	}
    814 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    815 		error = EINVAL;
    816 		goto out;
    817 	}
    818 	if (args->gpa + args->size <= args->gpa) {
    819 		error = EINVAL;
    820 		goto out;
    821 	}
    822 	if (args->gpa + args->size >= mach->gpa_end) {
    823 		error = EINVAL;
    824 		goto out;
    825 	}
    826 	gpa = args->gpa;
    827 
    828 	/* Unmap the memory from the machine. */
    829 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
    830 
    831 out:
    832 	nvmm_machine_put(mach);
    833 	return error;
    834 }
    835 
    836 /* -------------------------------------------------------------------------- */
    837 
    838 static int
    839 nvmm_ctl_mach_info(struct nvmm_ioc_ctl *args)
    840 {
    841 	struct nvmm_ctl_mach_info ctl;
    842 	struct nvmm_machine *mach;
    843 	struct nvmm_cpu *vcpu;
    844 	int error;
    845 	size_t i;
    846 
    847 	if (args->size != sizeof(ctl))
    848 		return EINVAL;
    849 	error = copyin(args->data, &ctl, sizeof(ctl));
    850 	if (error)
    851 		return error;
    852 
    853 	error = nvmm_machine_get(&root_owner, ctl.machid, &mach, true);
    854 	if (error)
    855 		return error;
    856 
    857 	ctl.nvcpus = 0;
    858 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    859 		error = nvmm_vcpu_get(mach, i, &vcpu);
    860 		if (error)
    861 			continue;
    862 		ctl.nvcpus++;
    863 		nvmm_vcpu_put(vcpu);
    864 	}
    865 	ctl.pid = mach->owner->pid;
    866 	ctl.time = mach->time;
    867 
    868 	nvmm_machine_put(mach);
    869 
    870 	error = copyout(&ctl, args->data, sizeof(ctl));
    871 	if (error)
    872 		return error;
    873 
    874 	return 0;
    875 }
    876 
    877 static int
    878 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
    879 {
    880 	int error;
    881 
    882 	error = kauth_authorize_device(curlwp->l_cred, KAUTH_DEVICE_NVMM_CTL,
    883 	    NULL, NULL, NULL, NULL);
    884 	if (error)
    885 		return error;
    886 
    887 	switch (args->op) {
    888 	case NVMM_CTL_MACH_INFO:
    889 		return nvmm_ctl_mach_info(args);
    890 	default:
    891 		return EINVAL;
    892 	}
    893 }
    894 
    895 /* -------------------------------------------------------------------------- */
    896 
    897 static int
    898 nvmm_init(void)
    899 {
    900 	size_t i, n;
    901 
    902 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
    903 		if (!(*nvmm_impl_list[i]->ident)()) {
    904 			continue;
    905 		}
    906 		nvmm_impl = nvmm_impl_list[i];
    907 		break;
    908 	}
    909 	if (nvmm_impl == NULL) {
    910 		printf("[!] No implementation found\n");
    911 		return ENOTSUP;
    912 	}
    913 
    914 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    915 		machines[i].machid = i;
    916 		rw_init(&machines[i].lock);
    917 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    918 			machines[i].cpus[n].present = false;
    919 			machines[i].cpus[n].cpuid = n;
    920 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
    921 			    IPL_NONE);
    922 		}
    923 	}
    924 
    925 	(*nvmm_impl->init)();
    926 
    927 	return 0;
    928 }
    929 
    930 static void
    931 nvmm_fini(void)
    932 {
    933 	size_t i, n;
    934 
    935 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    936 		rw_destroy(&machines[i].lock);
    937 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    938 			mutex_destroy(&machines[i].cpus[n].lock);
    939 		}
    940 	}
    941 
    942 	(*nvmm_impl->fini)();
    943 }
    944 
    945 /* -------------------------------------------------------------------------- */
    946 
    947 static dev_type_open(nvmm_open);
    948 
    949 const struct cdevsw nvmm_cdevsw = {
    950 	.d_open = nvmm_open,
    951 	.d_close = noclose,
    952 	.d_read = noread,
    953 	.d_write = nowrite,
    954 	.d_ioctl = noioctl,
    955 	.d_stop = nostop,
    956 	.d_tty = notty,
    957 	.d_poll = nopoll,
    958 	.d_mmap = nommap,
    959 	.d_kqfilter = nokqfilter,
    960 	.d_discard = nodiscard,
    961 	.d_flag = D_OTHER | D_MPSAFE
    962 };
    963 
    964 static int nvmm_ioctl(file_t *, u_long, void *);
    965 static int nvmm_close(file_t *);
    966 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
    967     struct uvm_object **, int *);
    968 
    969 const struct fileops nvmm_fileops = {
    970 	.fo_read = fbadop_read,
    971 	.fo_write = fbadop_write,
    972 	.fo_ioctl = nvmm_ioctl,
    973 	.fo_fcntl = fnullop_fcntl,
    974 	.fo_poll = fnullop_poll,
    975 	.fo_stat = fbadop_stat,
    976 	.fo_close = nvmm_close,
    977 	.fo_kqfilter = fnullop_kqfilter,
    978 	.fo_restart = fnullop_restart,
    979 	.fo_mmap = nvmm_mmap,
    980 };
    981 
    982 static int
    983 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
    984 {
    985 	struct nvmm_owner *owner;
    986 	struct file *fp;
    987 	int error, fd;
    988 
    989 	if (minor(dev) != 0)
    990 		return EXDEV;
    991 	error = fd_allocfile(&fp, &fd);
    992 	if (error)
    993 		return error;
    994 
    995 	owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
    996 	owner->pid = l->l_proc->p_pid;
    997 
    998 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
    999 }
   1000 
   1001 static int
   1002 nvmm_close(file_t *fp)
   1003 {
   1004 	struct nvmm_owner *owner = fp->f_data;
   1005 
   1006 	KASSERT(owner != NULL);
   1007 	nvmm_kill_machines(owner);
   1008 	kmem_free(owner, sizeof(*owner));
   1009 	fp->f_data = NULL;
   1010 
   1011    	return 0;
   1012 }
   1013 
   1014 static int
   1015 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
   1016     int *advicep, struct uvm_object **uobjp, int *maxprotp)
   1017 {
   1018 	struct nvmm_owner *owner = fp->f_data;
   1019 	struct nvmm_machine *mach;
   1020 	nvmm_machid_t machid;
   1021 	nvmm_cpuid_t cpuid;
   1022 	int error;
   1023 
   1024 	if (prot & PROT_EXEC)
   1025 		return EACCES;
   1026 	if (size != PAGE_SIZE)
   1027 		return EINVAL;
   1028 
   1029 	cpuid = NVMM_COMM_CPUID(*offp);
   1030 	if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
   1031 		return EINVAL;
   1032 
   1033 	machid = NVMM_COMM_MACHID(*offp);
   1034 	error = nvmm_machine_get(owner, machid, &mach, false);
   1035 	if (error)
   1036 		return error;
   1037 
   1038 	uao_reference(mach->commuobj);
   1039 	*uobjp = mach->commuobj;
   1040 	*offp = cpuid * PAGE_SIZE;
   1041 	*maxprotp = prot;
   1042 	*advicep = UVM_ADV_RANDOM;
   1043 
   1044 	nvmm_machine_put(mach);
   1045 	return 0;
   1046 }
   1047 
   1048 static int
   1049 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
   1050 {
   1051 	struct nvmm_owner *owner = fp->f_data;
   1052 
   1053 	KASSERT(owner != NULL);
   1054 
   1055 	switch (cmd) {
   1056 	case NVMM_IOC_CAPABILITY:
   1057 		return nvmm_capability(owner, data);
   1058 	case NVMM_IOC_MACHINE_CREATE:
   1059 		return nvmm_machine_create(owner, data);
   1060 	case NVMM_IOC_MACHINE_DESTROY:
   1061 		return nvmm_machine_destroy(owner, data);
   1062 	case NVMM_IOC_MACHINE_CONFIGURE:
   1063 		return nvmm_machine_configure(owner, data);
   1064 	case NVMM_IOC_VCPU_CREATE:
   1065 		return nvmm_vcpu_create(owner, data);
   1066 	case NVMM_IOC_VCPU_DESTROY:
   1067 		return nvmm_vcpu_destroy(owner, data);
   1068 	case NVMM_IOC_VCPU_SETSTATE:
   1069 		return nvmm_vcpu_setstate(owner, data);
   1070 	case NVMM_IOC_VCPU_GETSTATE:
   1071 		return nvmm_vcpu_getstate(owner, data);
   1072 	case NVMM_IOC_VCPU_INJECT:
   1073 		return nvmm_vcpu_inject(owner, data);
   1074 	case NVMM_IOC_VCPU_RUN:
   1075 		return nvmm_vcpu_run(owner, data);
   1076 	case NVMM_IOC_GPA_MAP:
   1077 		return nvmm_gpa_map(owner, data);
   1078 	case NVMM_IOC_GPA_UNMAP:
   1079 		return nvmm_gpa_unmap(owner, data);
   1080 	case NVMM_IOC_HVA_MAP:
   1081 		return nvmm_hva_map(owner, data);
   1082 	case NVMM_IOC_HVA_UNMAP:
   1083 		return nvmm_hva_unmap(owner, data);
   1084 	case NVMM_IOC_CTL:
   1085 		return nvmm_ctl(owner, data);
   1086 	default:
   1087 		return EINVAL;
   1088 	}
   1089 }
   1090 
   1091 /* -------------------------------------------------------------------------- */
   1092 
   1093 void
   1094 nvmmattach(int nunits)
   1095 {
   1096 	/* nothing */
   1097 }
   1098 
   1099 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
   1100 
   1101 static int
   1102 nvmm_modcmd(modcmd_t cmd, void *arg)
   1103 {
   1104 	int error;
   1105 
   1106 	switch (cmd) {
   1107 	case MODULE_CMD_INIT:
   1108 		error = nvmm_init();
   1109 		if (error)
   1110 			return error;
   1111 
   1112 #if defined(_MODULE)
   1113 		{
   1114 			devmajor_t bmajor = NODEVMAJOR;
   1115 			devmajor_t cmajor = 345;
   1116 
   1117 			/* mknod /dev/nvmm c 345 0 */
   1118 			error = devsw_attach("nvmm", NULL, &bmajor,
   1119 			    &nvmm_cdevsw, &cmajor);
   1120 			if (error) {
   1121 				nvmm_fini();
   1122 				return error;
   1123 			}
   1124 		}
   1125 #endif
   1126 		return 0;
   1127 
   1128 	case MODULE_CMD_FINI:
   1129 		if (nmachines > 0) {
   1130 			return EBUSY;
   1131 		}
   1132 #if defined(_MODULE)
   1133 		{
   1134 			error = devsw_detach(NULL, &nvmm_cdevsw);
   1135 			if (error) {
   1136 				return error;
   1137 			}
   1138 		}
   1139 #endif
   1140 		nvmm_fini();
   1141 		return 0;
   1142 
   1143 	case MODULE_CMD_AUTOUNLOAD:
   1144 		return EBUSY;
   1145 
   1146 	default:
   1147 		return ENOTTY;
   1148 	}
   1149 }
   1150