Home | History | Annotate | Line # | Download | only in nvmm
nvmm.c revision 1.32
      1 /*	$NetBSD: nvmm.c,v 1.32 2020/07/03 16:09:54 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.32 2020/07/03 16:09:54 maxv Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/systm.h>
     37 #include <sys/kernel.h>
     38 
     39 #include <sys/cpu.h>
     40 #include <sys/conf.h>
     41 #include <sys/kmem.h>
     42 #include <sys/module.h>
     43 #include <sys/proc.h>
     44 #include <sys/mman.h>
     45 #include <sys/file.h>
     46 #include <sys/filedesc.h>
     47 #include <sys/device.h>
     48 
     49 #include <uvm/uvm.h>
     50 #include <uvm/uvm_page.h>
     51 
     52 #include "ioconf.h"
     53 
     54 #include <dev/nvmm/nvmm.h>
     55 #include <dev/nvmm/nvmm_internal.h>
     56 #include <dev/nvmm/nvmm_ioctl.h>
     57 
     58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
     59 static volatile unsigned int nmachines __cacheline_aligned;
     60 
     61 static const struct nvmm_impl *nvmm_impl_list[] = {
     62 	&nvmm_x86_svm,	/* x86 AMD SVM */
     63 	&nvmm_x86_vmx	/* x86 Intel VMX */
     64 };
     65 
     66 static const struct nvmm_impl *nvmm_impl = NULL;
     67 
     68 static struct nvmm_owner root_owner;
     69 
     70 /* -------------------------------------------------------------------------- */
     71 
     72 static int
     73 nvmm_machine_alloc(struct nvmm_machine **ret)
     74 {
     75 	struct nvmm_machine *mach;
     76 	size_t i;
     77 
     78 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
     79 		mach = &machines[i];
     80 
     81 		rw_enter(&mach->lock, RW_WRITER);
     82 		if (mach->present) {
     83 			rw_exit(&mach->lock);
     84 			continue;
     85 		}
     86 
     87 		mach->present = true;
     88 		mach->time = time_second;
     89 		*ret = mach;
     90 		atomic_inc_uint(&nmachines);
     91 		return 0;
     92 	}
     93 
     94 	return ENOBUFS;
     95 }
     96 
     97 static void
     98 nvmm_machine_free(struct nvmm_machine *mach)
     99 {
    100 	KASSERT(rw_write_held(&mach->lock));
    101 	KASSERT(mach->present);
    102 	mach->present = false;
    103 	atomic_dec_uint(&nmachines);
    104 }
    105 
    106 static int
    107 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
    108     struct nvmm_machine **ret, bool writer)
    109 {
    110 	struct nvmm_machine *mach;
    111 	krw_t op = writer ? RW_WRITER : RW_READER;
    112 
    113 	if (machid >= NVMM_MAX_MACHINES) {
    114 		return EINVAL;
    115 	}
    116 	mach = &machines[machid];
    117 
    118 	rw_enter(&mach->lock, op);
    119 	if (!mach->present) {
    120 		rw_exit(&mach->lock);
    121 		return ENOENT;
    122 	}
    123 	if (owner != &root_owner && mach->owner != owner) {
    124 		rw_exit(&mach->lock);
    125 		return EPERM;
    126 	}
    127 	*ret = mach;
    128 
    129 	return 0;
    130 }
    131 
    132 static void
    133 nvmm_machine_put(struct nvmm_machine *mach)
    134 {
    135 	rw_exit(&mach->lock);
    136 }
    137 
    138 /* -------------------------------------------------------------------------- */
    139 
    140 static int
    141 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    142     struct nvmm_cpu **ret)
    143 {
    144 	struct nvmm_cpu *vcpu;
    145 
    146 	if (cpuid >= NVMM_MAX_VCPUS) {
    147 		return EINVAL;
    148 	}
    149 	vcpu = &mach->cpus[cpuid];
    150 
    151 	mutex_enter(&vcpu->lock);
    152 	if (vcpu->present) {
    153 		mutex_exit(&vcpu->lock);
    154 		return EBUSY;
    155 	}
    156 
    157 	vcpu->present = true;
    158 	vcpu->comm = NULL;
    159 	vcpu->hcpu_last = -1;
    160 	*ret = vcpu;
    161 	return 0;
    162 }
    163 
    164 static void
    165 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
    166 {
    167 	KASSERT(mutex_owned(&vcpu->lock));
    168 	vcpu->present = false;
    169 	if (vcpu->comm != NULL) {
    170 		uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
    171 	}
    172 }
    173 
    174 static int
    175 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
    176     struct nvmm_cpu **ret)
    177 {
    178 	struct nvmm_cpu *vcpu;
    179 
    180 	if (cpuid >= NVMM_MAX_VCPUS) {
    181 		return EINVAL;
    182 	}
    183 	vcpu = &mach->cpus[cpuid];
    184 
    185 	mutex_enter(&vcpu->lock);
    186 	if (!vcpu->present) {
    187 		mutex_exit(&vcpu->lock);
    188 		return ENOENT;
    189 	}
    190 	*ret = vcpu;
    191 
    192 	return 0;
    193 }
    194 
    195 static void
    196 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
    197 {
    198 	mutex_exit(&vcpu->lock);
    199 }
    200 
    201 /* -------------------------------------------------------------------------- */
    202 
    203 static void
    204 nvmm_kill_machines(struct nvmm_owner *owner)
    205 {
    206 	struct nvmm_machine *mach;
    207 	struct nvmm_cpu *vcpu;
    208 	size_t i, j;
    209 	int error;
    210 
    211 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    212 		mach = &machines[i];
    213 
    214 		rw_enter(&mach->lock, RW_WRITER);
    215 		if (!mach->present || mach->owner != owner) {
    216 			rw_exit(&mach->lock);
    217 			continue;
    218 		}
    219 
    220 		/* Kill it. */
    221 		for (j = 0; j < NVMM_MAX_VCPUS; j++) {
    222 			error = nvmm_vcpu_get(mach, j, &vcpu);
    223 			if (error)
    224 				continue;
    225 			(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    226 			nvmm_vcpu_free(mach, vcpu);
    227 			nvmm_vcpu_put(vcpu);
    228 		}
    229 		(*nvmm_impl->machine_destroy)(mach);
    230 		uvmspace_free(mach->vm);
    231 
    232 		/* Drop the kernel UOBJ refs. */
    233 		for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
    234 			if (!mach->hmap[j].present)
    235 				continue;
    236 			uao_detach(mach->hmap[j].uobj);
    237 		}
    238 
    239 		nvmm_machine_free(mach);
    240 
    241 		rw_exit(&mach->lock);
    242 	}
    243 }
    244 
    245 /* -------------------------------------------------------------------------- */
    246 
    247 static int
    248 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
    249 {
    250 	args->cap.version = NVMM_KERN_VERSION;
    251 	args->cap.state_size = nvmm_impl->state_size;
    252 	args->cap.max_machines = NVMM_MAX_MACHINES;
    253 	args->cap.max_vcpus = NVMM_MAX_VCPUS;
    254 	args->cap.max_ram = NVMM_MAX_RAM;
    255 
    256 	(*nvmm_impl->capability)(&args->cap);
    257 
    258 	return 0;
    259 }
    260 
    261 static int
    262 nvmm_machine_create(struct nvmm_owner *owner,
    263     struct nvmm_ioc_machine_create *args)
    264 {
    265 	struct nvmm_machine *mach;
    266 	int error;
    267 
    268 	error = nvmm_machine_alloc(&mach);
    269 	if (error)
    270 		return error;
    271 
    272 	/* Curproc owns the machine. */
    273 	mach->owner = owner;
    274 
    275 	/* Zero out the host mappings. */
    276 	memset(&mach->hmap, 0, sizeof(mach->hmap));
    277 
    278 	/* Create the machine vmspace. */
    279 	mach->gpa_begin = 0;
    280 	mach->gpa_end = NVMM_MAX_RAM;
    281 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
    282 
    283 	/* Create the comm uobj. */
    284 	mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
    285 
    286 	(*nvmm_impl->machine_create)(mach);
    287 
    288 	args->machid = mach->machid;
    289 	nvmm_machine_put(mach);
    290 
    291 	return 0;
    292 }
    293 
    294 static int
    295 nvmm_machine_destroy(struct nvmm_owner *owner,
    296     struct nvmm_ioc_machine_destroy *args)
    297 {
    298 	struct nvmm_machine *mach;
    299 	struct nvmm_cpu *vcpu;
    300 	int error;
    301 	size_t i;
    302 
    303 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    304 	if (error)
    305 		return error;
    306 
    307 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    308 		error = nvmm_vcpu_get(mach, i, &vcpu);
    309 		if (error)
    310 			continue;
    311 
    312 		(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    313 		nvmm_vcpu_free(mach, vcpu);
    314 		nvmm_vcpu_put(vcpu);
    315 	}
    316 
    317 	(*nvmm_impl->machine_destroy)(mach);
    318 
    319 	/* Free the machine vmspace. */
    320 	uvmspace_free(mach->vm);
    321 
    322 	/* Drop the kernel UOBJ refs. */
    323 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    324 		if (!mach->hmap[i].present)
    325 			continue;
    326 		uao_detach(mach->hmap[i].uobj);
    327 	}
    328 
    329 	nvmm_machine_free(mach);
    330 	nvmm_machine_put(mach);
    331 
    332 	return 0;
    333 }
    334 
    335 static int
    336 nvmm_machine_configure(struct nvmm_owner *owner,
    337     struct nvmm_ioc_machine_configure *args)
    338 {
    339 	struct nvmm_machine *mach;
    340 	size_t allocsz;
    341 	uint64_t op;
    342 	void *data;
    343 	int error;
    344 
    345 	op = NVMM_MACH_CONF_MD(args->op);
    346 	if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
    347 		return EINVAL;
    348 	}
    349 
    350 	allocsz = nvmm_impl->mach_conf_sizes[op];
    351 	data = kmem_alloc(allocsz, KM_SLEEP);
    352 
    353 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    354 	if (error) {
    355 		kmem_free(data, allocsz);
    356 		return error;
    357 	}
    358 
    359 	error = copyin(args->conf, data, allocsz);
    360 	if (error) {
    361 		goto out;
    362 	}
    363 
    364 	error = (*nvmm_impl->machine_configure)(mach, op, data);
    365 
    366 out:
    367 	nvmm_machine_put(mach);
    368 	kmem_free(data, allocsz);
    369 	return error;
    370 }
    371 
    372 static int
    373 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
    374 {
    375 	struct nvmm_machine *mach;
    376 	struct nvmm_cpu *vcpu;
    377 	int error;
    378 
    379 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    380 	if (error)
    381 		return error;
    382 
    383 	error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
    384 	if (error)
    385 		goto out;
    386 
    387 	/* Allocate the comm page. */
    388 	uao_reference(mach->commuobj);
    389 	error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
    390 	    mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
    391 	    UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
    392 	if (error) {
    393 		uao_detach(mach->commuobj);
    394 		nvmm_vcpu_free(mach, vcpu);
    395 		nvmm_vcpu_put(vcpu);
    396 		goto out;
    397 	}
    398 	error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
    399 	    (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
    400 	if (error) {
    401 		nvmm_vcpu_free(mach, vcpu);
    402 		nvmm_vcpu_put(vcpu);
    403 		goto out;
    404 	}
    405 	memset(vcpu->comm, 0, PAGE_SIZE);
    406 
    407 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
    408 	if (error) {
    409 		nvmm_vcpu_free(mach, vcpu);
    410 		nvmm_vcpu_put(vcpu);
    411 		goto out;
    412 	}
    413 
    414 	nvmm_vcpu_put(vcpu);
    415 
    416 	atomic_inc_uint(&mach->ncpus);
    417 
    418 out:
    419 	nvmm_machine_put(mach);
    420 	return error;
    421 }
    422 
    423 static int
    424 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
    425 {
    426 	struct nvmm_machine *mach;
    427 	struct nvmm_cpu *vcpu;
    428 	int error;
    429 
    430 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    431 	if (error)
    432 		return error;
    433 
    434 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    435 	if (error)
    436 		goto out;
    437 
    438 	(*nvmm_impl->vcpu_destroy)(mach, vcpu);
    439 	nvmm_vcpu_free(mach, vcpu);
    440 	nvmm_vcpu_put(vcpu);
    441 
    442 	atomic_dec_uint(&mach->ncpus);
    443 
    444 out:
    445 	nvmm_machine_put(mach);
    446 	return error;
    447 }
    448 
    449 static int
    450 nvmm_vcpu_configure(struct nvmm_owner *owner,
    451     struct nvmm_ioc_vcpu_configure *args)
    452 {
    453 	struct nvmm_machine *mach;
    454 	struct nvmm_cpu *vcpu;
    455 	size_t allocsz;
    456 	uint64_t op;
    457 	void *data;
    458 	int error;
    459 
    460 	op = NVMM_VCPU_CONF_MD(args->op);
    461 	if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
    462 		return EINVAL;
    463 
    464 	allocsz = nvmm_impl->vcpu_conf_sizes[op];
    465 	data = kmem_alloc(allocsz, KM_SLEEP);
    466 
    467 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    468 	if (error) {
    469 		kmem_free(data, allocsz);
    470 		return error;
    471 	}
    472 
    473 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    474 	if (error) {
    475 		nvmm_machine_put(mach);
    476 		kmem_free(data, allocsz);
    477 		return error;
    478 	}
    479 
    480 	error = copyin(args->conf, data, allocsz);
    481 	if (error) {
    482 		goto out;
    483 	}
    484 
    485 	error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
    486 
    487 out:
    488 	nvmm_vcpu_put(vcpu);
    489 	nvmm_machine_put(mach);
    490 	kmem_free(data, allocsz);
    491 	return error;
    492 }
    493 
    494 static int
    495 nvmm_vcpu_setstate(struct nvmm_owner *owner,
    496     struct nvmm_ioc_vcpu_setstate *args)
    497 {
    498 	struct nvmm_machine *mach;
    499 	struct nvmm_cpu *vcpu;
    500 	int error;
    501 
    502 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    503 	if (error)
    504 		return error;
    505 
    506 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    507 	if (error)
    508 		goto out;
    509 
    510 	(*nvmm_impl->vcpu_setstate)(vcpu);
    511 	nvmm_vcpu_put(vcpu);
    512 
    513 out:
    514 	nvmm_machine_put(mach);
    515 	return error;
    516 }
    517 
    518 static int
    519 nvmm_vcpu_getstate(struct nvmm_owner *owner,
    520     struct nvmm_ioc_vcpu_getstate *args)
    521 {
    522 	struct nvmm_machine *mach;
    523 	struct nvmm_cpu *vcpu;
    524 	int error;
    525 
    526 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    527 	if (error)
    528 		return error;
    529 
    530 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    531 	if (error)
    532 		goto out;
    533 
    534 	(*nvmm_impl->vcpu_getstate)(vcpu);
    535 	nvmm_vcpu_put(vcpu);
    536 
    537 out:
    538 	nvmm_machine_put(mach);
    539 	return error;
    540 }
    541 
    542 static int
    543 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
    544 {
    545 	struct nvmm_machine *mach;
    546 	struct nvmm_cpu *vcpu;
    547 	int error;
    548 
    549 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    550 	if (error)
    551 		return error;
    552 
    553 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    554 	if (error)
    555 		goto out;
    556 
    557 	error = (*nvmm_impl->vcpu_inject)(vcpu);
    558 	nvmm_vcpu_put(vcpu);
    559 
    560 out:
    561 	nvmm_machine_put(mach);
    562 	return error;
    563 }
    564 
    565 static int
    566 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
    567     struct nvmm_vcpu_exit *exit)
    568 {
    569 	struct vmspace *vm = mach->vm;
    570 	int ret;
    571 
    572 	while (1) {
    573 		/* Got a signal? Or pending resched? Leave. */
    574 		if (__predict_false(nvmm_return_needed())) {
    575 			exit->reason = NVMM_VCPU_EXIT_NONE;
    576 			return 0;
    577 		}
    578 
    579 		/* Run the VCPU. */
    580 		ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
    581 		if (__predict_false(ret != 0)) {
    582 			return ret;
    583 		}
    584 
    585 		/* Process nested page faults. */
    586 		if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
    587 			break;
    588 		}
    589 		if (exit->u.mem.gpa >= mach->gpa_end) {
    590 			break;
    591 		}
    592 		if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
    593 			break;
    594 		}
    595 	}
    596 
    597 	return 0;
    598 }
    599 
    600 static int
    601 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
    602 {
    603 	struct nvmm_machine *mach;
    604 	struct nvmm_cpu *vcpu;
    605 	int error;
    606 
    607 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    608 	if (error)
    609 		return error;
    610 
    611 	error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
    612 	if (error)
    613 		goto out;
    614 
    615 	error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
    616 	nvmm_vcpu_put(vcpu);
    617 
    618 out:
    619 	nvmm_machine_put(mach);
    620 	return error;
    621 }
    622 
    623 /* -------------------------------------------------------------------------- */
    624 
    625 static struct uvm_object *
    626 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
    627    size_t *off)
    628 {
    629 	struct nvmm_hmapping *hmapping;
    630 	size_t i;
    631 
    632 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    633 		hmapping = &mach->hmap[i];
    634 		if (!hmapping->present) {
    635 			continue;
    636 		}
    637 		if (hva >= hmapping->hva &&
    638 		    hva + size <= hmapping->hva + hmapping->size) {
    639 			*off = hva - hmapping->hva;
    640 			return hmapping->uobj;
    641 		}
    642 	}
    643 
    644 	return NULL;
    645 }
    646 
    647 static int
    648 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    649 {
    650 	struct nvmm_hmapping *hmapping;
    651 	size_t i;
    652 
    653 	if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
    654 		return EINVAL;
    655 	}
    656 	if (hva == 0) {
    657 		return EINVAL;
    658 	}
    659 
    660 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    661 		hmapping = &mach->hmap[i];
    662 		if (!hmapping->present) {
    663 			continue;
    664 		}
    665 
    666 		if (hva >= hmapping->hva &&
    667 		    hva + size <= hmapping->hva + hmapping->size) {
    668 			break;
    669 		}
    670 
    671 		if (hva >= hmapping->hva &&
    672 		    hva < hmapping->hva + hmapping->size) {
    673 			return EEXIST;
    674 		}
    675 		if (hva + size > hmapping->hva &&
    676 		    hva + size <= hmapping->hva + hmapping->size) {
    677 			return EEXIST;
    678 		}
    679 		if (hva <= hmapping->hva &&
    680 		    hva + size >= hmapping->hva + hmapping->size) {
    681 			return EEXIST;
    682 		}
    683 	}
    684 
    685 	return 0;
    686 }
    687 
    688 static struct nvmm_hmapping *
    689 nvmm_hmapping_alloc(struct nvmm_machine *mach)
    690 {
    691 	struct nvmm_hmapping *hmapping;
    692 	size_t i;
    693 
    694 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    695 		hmapping = &mach->hmap[i];
    696 		if (!hmapping->present) {
    697 			hmapping->present = true;
    698 			return hmapping;
    699 		}
    700 	}
    701 
    702 	return NULL;
    703 }
    704 
    705 static int
    706 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
    707 {
    708 	struct vmspace *vmspace = curproc->p_vmspace;
    709 	struct nvmm_hmapping *hmapping;
    710 	size_t i;
    711 
    712 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    713 		hmapping = &mach->hmap[i];
    714 		if (!hmapping->present || hmapping->hva != hva ||
    715 		    hmapping->size != size) {
    716 			continue;
    717 		}
    718 
    719 		uvm_unmap(&vmspace->vm_map, hmapping->hva,
    720 		    hmapping->hva + hmapping->size);
    721 		uao_detach(hmapping->uobj);
    722 
    723 		hmapping->uobj = NULL;
    724 		hmapping->present = false;
    725 
    726 		return 0;
    727 	}
    728 
    729 	return ENOENT;
    730 }
    731 
    732 static int
    733 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
    734 {
    735 	struct vmspace *vmspace = curproc->p_vmspace;
    736 	struct nvmm_machine *mach;
    737 	struct nvmm_hmapping *hmapping;
    738 	vaddr_t uva;
    739 	int error;
    740 
    741 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    742 	if (error)
    743 		return error;
    744 
    745 	error = nvmm_hmapping_validate(mach, args->hva, args->size);
    746 	if (error)
    747 		goto out;
    748 
    749 	hmapping = nvmm_hmapping_alloc(mach);
    750 	if (hmapping == NULL) {
    751 		error = ENOBUFS;
    752 		goto out;
    753 	}
    754 
    755 	hmapping->hva = args->hva;
    756 	hmapping->size = args->size;
    757 	hmapping->uobj = uao_create(hmapping->size, 0);
    758 	uva = hmapping->hva;
    759 
    760 	/* Take a reference for the user. */
    761 	uao_reference(hmapping->uobj);
    762 
    763 	/* Map the uobj into the user address space, as pageable. */
    764 	error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
    765 	    0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
    766 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    767 	if (error) {
    768 		uao_detach(hmapping->uobj);
    769 	}
    770 
    771 out:
    772 	nvmm_machine_put(mach);
    773 	return error;
    774 }
    775 
    776 static int
    777 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
    778 {
    779 	struct nvmm_machine *mach;
    780 	int error;
    781 
    782 	error = nvmm_machine_get(owner, args->machid, &mach, true);
    783 	if (error)
    784 		return error;
    785 
    786 	error = nvmm_hmapping_free(mach, args->hva, args->size);
    787 
    788 	nvmm_machine_put(mach);
    789 	return error;
    790 }
    791 
    792 /* -------------------------------------------------------------------------- */
    793 
    794 static int
    795 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
    796 {
    797 	struct nvmm_machine *mach;
    798 	struct uvm_object *uobj;
    799 	gpaddr_t gpa;
    800 	size_t off;
    801 	int error;
    802 
    803 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    804 	if (error)
    805 		return error;
    806 
    807 	if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
    808 		error = EINVAL;
    809 		goto out;
    810 	}
    811 
    812 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
    813 	    (args->hva % PAGE_SIZE) != 0) {
    814 		error = EINVAL;
    815 		goto out;
    816 	}
    817 	if (args->hva == 0) {
    818 		error = EINVAL;
    819 		goto out;
    820 	}
    821 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    822 		error = EINVAL;
    823 		goto out;
    824 	}
    825 	if (args->gpa + args->size <= args->gpa) {
    826 		error = EINVAL;
    827 		goto out;
    828 	}
    829 	if (args->gpa + args->size > mach->gpa_end) {
    830 		error = EINVAL;
    831 		goto out;
    832 	}
    833 	gpa = args->gpa;
    834 
    835 	uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
    836 	if (uobj == NULL) {
    837 		error = EINVAL;
    838 		goto out;
    839 	}
    840 
    841 	/* Take a reference for the machine. */
    842 	uao_reference(uobj);
    843 
    844 	/* Map the uobj into the machine address space, as pageable. */
    845 	error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
    846 	    UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
    847 	    UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
    848 	if (error) {
    849 		uao_detach(uobj);
    850 		goto out;
    851 	}
    852 	if (gpa != args->gpa) {
    853 		uao_detach(uobj);
    854 		printf("[!] uvm_map problem\n");
    855 		error = EINVAL;
    856 		goto out;
    857 	}
    858 
    859 out:
    860 	nvmm_machine_put(mach);
    861 	return error;
    862 }
    863 
    864 static int
    865 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
    866 {
    867 	struct nvmm_machine *mach;
    868 	gpaddr_t gpa;
    869 	int error;
    870 
    871 	error = nvmm_machine_get(owner, args->machid, &mach, false);
    872 	if (error)
    873 		return error;
    874 
    875 	if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
    876 		error = EINVAL;
    877 		goto out;
    878 	}
    879 	if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
    880 		error = EINVAL;
    881 		goto out;
    882 	}
    883 	if (args->gpa + args->size <= args->gpa) {
    884 		error = EINVAL;
    885 		goto out;
    886 	}
    887 	if (args->gpa + args->size >= mach->gpa_end) {
    888 		error = EINVAL;
    889 		goto out;
    890 	}
    891 	gpa = args->gpa;
    892 
    893 	/* Unmap the memory from the machine. */
    894 	uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
    895 
    896 out:
    897 	nvmm_machine_put(mach);
    898 	return error;
    899 }
    900 
    901 /* -------------------------------------------------------------------------- */
    902 
    903 static int
    904 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
    905 {
    906 	struct nvmm_ctl_mach_info ctl;
    907 	struct nvmm_machine *mach;
    908 	struct nvmm_cpu *vcpu;
    909 	int error;
    910 	size_t i;
    911 
    912 	if (args->size != sizeof(ctl))
    913 		return EINVAL;
    914 	error = copyin(args->data, &ctl, sizeof(ctl));
    915 	if (error)
    916 		return error;
    917 
    918 	error = nvmm_machine_get(owner, ctl.machid, &mach, true);
    919 	if (error)
    920 		return error;
    921 
    922 	ctl.nvcpus = 0;
    923 	for (i = 0; i < NVMM_MAX_VCPUS; i++) {
    924 		error = nvmm_vcpu_get(mach, i, &vcpu);
    925 		if (error)
    926 			continue;
    927 		ctl.nvcpus++;
    928 		nvmm_vcpu_put(vcpu);
    929 	}
    930 
    931 	ctl.nram = 0;
    932 	for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
    933 		if (!mach->hmap[i].present)
    934 			continue;
    935 		ctl.nram += mach->hmap[i].size;
    936 	}
    937 
    938 	ctl.pid = mach->owner->pid;
    939 	ctl.time = mach->time;
    940 
    941 	nvmm_machine_put(mach);
    942 
    943 	error = copyout(&ctl, args->data, sizeof(ctl));
    944 	if (error)
    945 		return error;
    946 
    947 	return 0;
    948 }
    949 
    950 static int
    951 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
    952 {
    953 	switch (args->op) {
    954 	case NVMM_CTL_MACH_INFO:
    955 		return nvmm_ctl_mach_info(owner, args);
    956 	default:
    957 		return EINVAL;
    958 	}
    959 }
    960 
    961 /* -------------------------------------------------------------------------- */
    962 
    963 static const struct nvmm_impl *
    964 nvmm_ident(void)
    965 {
    966 	size_t i;
    967 
    968 	for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
    969 		if ((*nvmm_impl_list[i]->ident)())
    970 			return nvmm_impl_list[i];
    971 	}
    972 
    973 	return NULL;
    974 }
    975 
    976 static int
    977 nvmm_init(void)
    978 {
    979 	size_t i, n;
    980 
    981 	nvmm_impl = nvmm_ident();
    982 	if (nvmm_impl == NULL)
    983 		return ENOTSUP;
    984 
    985 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
    986 		machines[i].machid = i;
    987 		rw_init(&machines[i].lock);
    988 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
    989 			machines[i].cpus[n].present = false;
    990 			machines[i].cpus[n].cpuid = n;
    991 			mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
    992 			    IPL_NONE);
    993 		}
    994 	}
    995 
    996 	(*nvmm_impl->init)();
    997 
    998 	return 0;
    999 }
   1000 
   1001 static void
   1002 nvmm_fini(void)
   1003 {
   1004 	size_t i, n;
   1005 
   1006 	for (i = 0; i < NVMM_MAX_MACHINES; i++) {
   1007 		rw_destroy(&machines[i].lock);
   1008 		for (n = 0; n < NVMM_MAX_VCPUS; n++) {
   1009 			mutex_destroy(&machines[i].cpus[n].lock);
   1010 		}
   1011 	}
   1012 
   1013 	(*nvmm_impl->fini)();
   1014 	nvmm_impl = NULL;
   1015 }
   1016 
   1017 /* -------------------------------------------------------------------------- */
   1018 
   1019 static dev_type_open(nvmm_open);
   1020 
   1021 const struct cdevsw nvmm_cdevsw = {
   1022 	.d_open = nvmm_open,
   1023 	.d_close = noclose,
   1024 	.d_read = noread,
   1025 	.d_write = nowrite,
   1026 	.d_ioctl = noioctl,
   1027 	.d_stop = nostop,
   1028 	.d_tty = notty,
   1029 	.d_poll = nopoll,
   1030 	.d_mmap = nommap,
   1031 	.d_kqfilter = nokqfilter,
   1032 	.d_discard = nodiscard,
   1033 	.d_flag = D_OTHER | D_MPSAFE
   1034 };
   1035 
   1036 static int nvmm_ioctl(file_t *, u_long, void *);
   1037 static int nvmm_close(file_t *);
   1038 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
   1039     struct uvm_object **, int *);
   1040 
   1041 const struct fileops nvmm_fileops = {
   1042 	.fo_read = fbadop_read,
   1043 	.fo_write = fbadop_write,
   1044 	.fo_ioctl = nvmm_ioctl,
   1045 	.fo_fcntl = fnullop_fcntl,
   1046 	.fo_poll = fnullop_poll,
   1047 	.fo_stat = fbadop_stat,
   1048 	.fo_close = nvmm_close,
   1049 	.fo_kqfilter = fnullop_kqfilter,
   1050 	.fo_restart = fnullop_restart,
   1051 	.fo_mmap = nvmm_mmap,
   1052 };
   1053 
   1054 static int
   1055 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
   1056 {
   1057 	struct nvmm_owner *owner;
   1058 	struct file *fp;
   1059 	int error, fd;
   1060 
   1061 	if (__predict_false(nvmm_impl == NULL))
   1062 		return ENXIO;
   1063 	if (minor(dev) != 0)
   1064 		return EXDEV;
   1065 	if (!(flags & O_CLOEXEC))
   1066 		return EINVAL;
   1067 	error = fd_allocfile(&fp, &fd);
   1068 	if (error)
   1069 		return error;
   1070 
   1071 	if (OFLAGS(flags) & O_WRONLY) {
   1072 		owner = &root_owner;
   1073 	} else {
   1074 		owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
   1075 		owner->pid = l->l_proc->p_pid;
   1076 	}
   1077 
   1078 	return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
   1079 }
   1080 
   1081 static int
   1082 nvmm_close(file_t *fp)
   1083 {
   1084 	struct nvmm_owner *owner = fp->f_data;
   1085 
   1086 	KASSERT(owner != NULL);
   1087 	nvmm_kill_machines(owner);
   1088 	if (owner != &root_owner) {
   1089 		kmem_free(owner, sizeof(*owner));
   1090 	}
   1091 	fp->f_data = NULL;
   1092 
   1093    	return 0;
   1094 }
   1095 
   1096 static int
   1097 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
   1098     int *advicep, struct uvm_object **uobjp, int *maxprotp)
   1099 {
   1100 	struct nvmm_owner *owner = fp->f_data;
   1101 	struct nvmm_machine *mach;
   1102 	nvmm_machid_t machid;
   1103 	nvmm_cpuid_t cpuid;
   1104 	int error;
   1105 
   1106 	if (prot & PROT_EXEC)
   1107 		return EACCES;
   1108 	if (size != PAGE_SIZE)
   1109 		return EINVAL;
   1110 
   1111 	cpuid = NVMM_COMM_CPUID(*offp);
   1112 	if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
   1113 		return EINVAL;
   1114 
   1115 	machid = NVMM_COMM_MACHID(*offp);
   1116 	error = nvmm_machine_get(owner, machid, &mach, false);
   1117 	if (error)
   1118 		return error;
   1119 
   1120 	uao_reference(mach->commuobj);
   1121 	*uobjp = mach->commuobj;
   1122 	*offp = cpuid * PAGE_SIZE;
   1123 	*maxprotp = prot;
   1124 	*advicep = UVM_ADV_RANDOM;
   1125 
   1126 	nvmm_machine_put(mach);
   1127 	return 0;
   1128 }
   1129 
   1130 static int
   1131 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
   1132 {
   1133 	struct nvmm_owner *owner = fp->f_data;
   1134 
   1135 	KASSERT(owner != NULL);
   1136 
   1137 	switch (cmd) {
   1138 	case NVMM_IOC_CAPABILITY:
   1139 		return nvmm_capability(owner, data);
   1140 	case NVMM_IOC_MACHINE_CREATE:
   1141 		return nvmm_machine_create(owner, data);
   1142 	case NVMM_IOC_MACHINE_DESTROY:
   1143 		return nvmm_machine_destroy(owner, data);
   1144 	case NVMM_IOC_MACHINE_CONFIGURE:
   1145 		return nvmm_machine_configure(owner, data);
   1146 	case NVMM_IOC_VCPU_CREATE:
   1147 		return nvmm_vcpu_create(owner, data);
   1148 	case NVMM_IOC_VCPU_DESTROY:
   1149 		return nvmm_vcpu_destroy(owner, data);
   1150 	case NVMM_IOC_VCPU_CONFIGURE:
   1151 		return nvmm_vcpu_configure(owner, data);
   1152 	case NVMM_IOC_VCPU_SETSTATE:
   1153 		return nvmm_vcpu_setstate(owner, data);
   1154 	case NVMM_IOC_VCPU_GETSTATE:
   1155 		return nvmm_vcpu_getstate(owner, data);
   1156 	case NVMM_IOC_VCPU_INJECT:
   1157 		return nvmm_vcpu_inject(owner, data);
   1158 	case NVMM_IOC_VCPU_RUN:
   1159 		return nvmm_vcpu_run(owner, data);
   1160 	case NVMM_IOC_GPA_MAP:
   1161 		return nvmm_gpa_map(owner, data);
   1162 	case NVMM_IOC_GPA_UNMAP:
   1163 		return nvmm_gpa_unmap(owner, data);
   1164 	case NVMM_IOC_HVA_MAP:
   1165 		return nvmm_hva_map(owner, data);
   1166 	case NVMM_IOC_HVA_UNMAP:
   1167 		return nvmm_hva_unmap(owner, data);
   1168 	case NVMM_IOC_CTL:
   1169 		return nvmm_ctl(owner, data);
   1170 	default:
   1171 		return EINVAL;
   1172 	}
   1173 }
   1174 
   1175 /* -------------------------------------------------------------------------- */
   1176 
   1177 static int nvmm_match(device_t, cfdata_t, void *);
   1178 static void nvmm_attach(device_t, device_t, void *);
   1179 static int nvmm_detach(device_t, int);
   1180 
   1181 extern struct cfdriver nvmm_cd;
   1182 
   1183 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
   1184 
   1185 static struct cfdata nvmm_cfdata[] = {
   1186 	{
   1187 		.cf_name = "nvmm",
   1188 		.cf_atname = "nvmm",
   1189 		.cf_unit = 0,
   1190 		.cf_fstate = FSTATE_STAR,
   1191 		.cf_loc = NULL,
   1192 		.cf_flags = 0,
   1193 		.cf_pspec = NULL,
   1194 	},
   1195 	{ NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
   1196 };
   1197 
   1198 static int
   1199 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
   1200 {
   1201 	return 1;
   1202 }
   1203 
   1204 static void
   1205 nvmm_attach(device_t parent, device_t self, void *aux)
   1206 {
   1207 	int error;
   1208 
   1209 	error = nvmm_init();
   1210 	if (error)
   1211 		panic("%s: impossible", __func__);
   1212 	aprint_normal_dev(self, "attached, using backend %s\n",
   1213 	    nvmm_impl->name);
   1214 }
   1215 
   1216 static int
   1217 nvmm_detach(device_t self, int flags)
   1218 {
   1219 	if (nmachines > 0)
   1220 		return EBUSY;
   1221 	nvmm_fini();
   1222 	return 0;
   1223 }
   1224 
   1225 void
   1226 nvmmattach(int nunits)
   1227 {
   1228 	/* nothing */
   1229 }
   1230 
   1231 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
   1232 
   1233 #if defined(_MODULE)
   1234 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
   1235 #endif
   1236 
   1237 static int
   1238 nvmm_modcmd(modcmd_t cmd, void *arg)
   1239 {
   1240 #if defined(_MODULE)
   1241 	devmajor_t bmajor = NODEVMAJOR;
   1242 	devmajor_t cmajor = 345;
   1243 #endif
   1244 	int error;
   1245 
   1246 	switch (cmd) {
   1247 	case MODULE_CMD_INIT:
   1248 		if (nvmm_ident() == NULL) {
   1249 			aprint_error("%s: cpu not supported\n",
   1250 			    nvmm_cd.cd_name);
   1251 			return ENOTSUP;
   1252 		}
   1253 #if defined(_MODULE)
   1254 		error = config_cfdriver_attach(&nvmm_cd);
   1255 		if (error)
   1256 			return error;
   1257 #endif
   1258 		error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
   1259 		if (error) {
   1260 			config_cfdriver_detach(&nvmm_cd);
   1261 			aprint_error("%s: config_cfattach_attach failed\n",
   1262 			    nvmm_cd.cd_name);
   1263 			return error;
   1264 		}
   1265 
   1266 		error = config_cfdata_attach(nvmm_cfdata, 1);
   1267 		if (error) {
   1268 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1269 			config_cfdriver_detach(&nvmm_cd);
   1270 			aprint_error("%s: unable to register cfdata\n",
   1271 			    nvmm_cd.cd_name);
   1272 			return error;
   1273 		}
   1274 
   1275 		if (config_attach_pseudo(nvmm_cfdata) == NULL) {
   1276 			aprint_error("%s: config_attach_pseudo failed\n",
   1277 			    nvmm_cd.cd_name);
   1278 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1279 			config_cfdriver_detach(&nvmm_cd);
   1280 			return ENXIO;
   1281 		}
   1282 
   1283 #if defined(_MODULE)
   1284 		/* mknod /dev/nvmm c 345 0 */
   1285 		error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
   1286 			&nvmm_cdevsw, &cmajor);
   1287 		if (error) {
   1288 			aprint_error("%s: unable to register devsw\n",
   1289 			    nvmm_cd.cd_name);
   1290 			config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1291 			config_cfdriver_detach(&nvmm_cd);
   1292 			return error;
   1293 		}
   1294 #endif
   1295 		return 0;
   1296 	case MODULE_CMD_FINI:
   1297 		error = config_cfdata_detach(nvmm_cfdata);
   1298 		if (error)
   1299 			return error;
   1300 		error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
   1301 		if (error)
   1302 			return error;
   1303 #if defined(_MODULE)
   1304 		config_cfdriver_detach(&nvmm_cd);
   1305 		devsw_detach(NULL, &nvmm_cdevsw);
   1306 #endif
   1307 		return 0;
   1308 	case MODULE_CMD_AUTOUNLOAD:
   1309 		return EBUSY;
   1310 	default:
   1311 		return ENOTTY;
   1312 	}
   1313 }
   1314