nvmm.c revision 1.37 1 /* $NetBSD: nvmm.c,v 1.37 2020/08/29 07:14:17 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.37 2020/08/29 07:14:17 maxv Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38
39 #include <sys/atomic.h>
40 #include <sys/cpu.h>
41 #include <sys/conf.h>
42 #include <sys/kmem.h>
43 #include <sys/module.h>
44 #include <sys/proc.h>
45 #include <sys/mman.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/device.h>
49
50 #include <uvm/uvm.h>
51 #include <uvm/uvm_page.h>
52
53 #include "ioconf.h"
54
55 #include <dev/nvmm/nvmm.h>
56 #include <dev/nvmm/nvmm_internal.h>
57 #include <dev/nvmm/nvmm_ioctl.h>
58
59 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
60 static volatile unsigned int nmachines __cacheline_aligned;
61
62 static const struct nvmm_impl *nvmm_impl_list[] = {
63 #if defined(__x86_64__)
64 &nvmm_x86_svm, /* x86 AMD SVM */
65 &nvmm_x86_vmx /* x86 Intel VMX */
66 #endif
67 };
68
69 static const struct nvmm_impl *nvmm_impl = NULL;
70
71 static struct nvmm_owner root_owner;
72
73 /* -------------------------------------------------------------------------- */
74
75 static int
76 nvmm_machine_alloc(struct nvmm_machine **ret)
77 {
78 struct nvmm_machine *mach;
79 size_t i;
80
81 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
82 mach = &machines[i];
83
84 rw_enter(&mach->lock, RW_WRITER);
85 if (mach->present) {
86 rw_exit(&mach->lock);
87 continue;
88 }
89
90 mach->present = true;
91 mach->time = time_second;
92 *ret = mach;
93 atomic_inc_uint(&nmachines);
94 return 0;
95 }
96
97 return ENOBUFS;
98 }
99
100 static void
101 nvmm_machine_free(struct nvmm_machine *mach)
102 {
103 KASSERT(rw_write_held(&mach->lock));
104 KASSERT(mach->present);
105 mach->present = false;
106 atomic_dec_uint(&nmachines);
107 }
108
109 static int
110 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
111 struct nvmm_machine **ret, bool writer)
112 {
113 struct nvmm_machine *mach;
114 krw_t op = writer ? RW_WRITER : RW_READER;
115
116 if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
117 return EINVAL;
118 }
119 mach = &machines[machid];
120
121 rw_enter(&mach->lock, op);
122 if (__predict_false(!mach->present)) {
123 rw_exit(&mach->lock);
124 return ENOENT;
125 }
126 if (__predict_false(mach->owner != owner && owner != &root_owner)) {
127 rw_exit(&mach->lock);
128 return EPERM;
129 }
130 *ret = mach;
131
132 return 0;
133 }
134
135 static void
136 nvmm_machine_put(struct nvmm_machine *mach)
137 {
138 rw_exit(&mach->lock);
139 }
140
141 /* -------------------------------------------------------------------------- */
142
143 static int
144 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
145 struct nvmm_cpu **ret)
146 {
147 struct nvmm_cpu *vcpu;
148
149 if (cpuid >= NVMM_MAX_VCPUS) {
150 return EINVAL;
151 }
152 vcpu = &mach->cpus[cpuid];
153
154 mutex_enter(&vcpu->lock);
155 if (vcpu->present) {
156 mutex_exit(&vcpu->lock);
157 return EBUSY;
158 }
159
160 vcpu->present = true;
161 vcpu->comm = NULL;
162 vcpu->hcpu_last = -1;
163 *ret = vcpu;
164 return 0;
165 }
166
167 static void
168 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
169 {
170 KASSERT(mutex_owned(&vcpu->lock));
171 vcpu->present = false;
172 if (vcpu->comm != NULL) {
173 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
174 }
175 }
176
177 static int
178 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
179 struct nvmm_cpu **ret)
180 {
181 struct nvmm_cpu *vcpu;
182
183 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
184 return EINVAL;
185 }
186 vcpu = &mach->cpus[cpuid];
187
188 mutex_enter(&vcpu->lock);
189 if (__predict_false(!vcpu->present)) {
190 mutex_exit(&vcpu->lock);
191 return ENOENT;
192 }
193 *ret = vcpu;
194
195 return 0;
196 }
197
198 static void
199 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
200 {
201 mutex_exit(&vcpu->lock);
202 }
203
204 /* -------------------------------------------------------------------------- */
205
206 static void
207 nvmm_kill_machines(struct nvmm_owner *owner)
208 {
209 struct nvmm_machine *mach;
210 struct nvmm_cpu *vcpu;
211 size_t i, j;
212 int error;
213
214 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
215 mach = &machines[i];
216
217 rw_enter(&mach->lock, RW_WRITER);
218 if (!mach->present || mach->owner != owner) {
219 rw_exit(&mach->lock);
220 continue;
221 }
222
223 /* Kill it. */
224 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
225 error = nvmm_vcpu_get(mach, j, &vcpu);
226 if (error)
227 continue;
228 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
229 nvmm_vcpu_free(mach, vcpu);
230 nvmm_vcpu_put(vcpu);
231 atomic_dec_uint(&mach->ncpus);
232 }
233 (*nvmm_impl->machine_destroy)(mach);
234 uvmspace_free(mach->vm);
235
236 /* Drop the kernel UOBJ refs. */
237 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
238 if (!mach->hmap[j].present)
239 continue;
240 uao_detach(mach->hmap[j].uobj);
241 }
242
243 nvmm_machine_free(mach);
244
245 rw_exit(&mach->lock);
246 }
247 }
248
249 /* -------------------------------------------------------------------------- */
250
251 static int
252 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
253 {
254 args->cap.version = NVMM_KERN_VERSION;
255 args->cap.state_size = nvmm_impl->state_size;
256 args->cap.max_machines = NVMM_MAX_MACHINES;
257 args->cap.max_vcpus = NVMM_MAX_VCPUS;
258 args->cap.max_ram = NVMM_MAX_RAM;
259
260 (*nvmm_impl->capability)(&args->cap);
261
262 return 0;
263 }
264
265 static int
266 nvmm_machine_create(struct nvmm_owner *owner,
267 struct nvmm_ioc_machine_create *args)
268 {
269 struct nvmm_machine *mach;
270 int error;
271
272 error = nvmm_machine_alloc(&mach);
273 if (error)
274 return error;
275
276 /* Curproc owns the machine. */
277 mach->owner = owner;
278
279 /* Zero out the host mappings. */
280 memset(&mach->hmap, 0, sizeof(mach->hmap));
281
282 /* Create the machine vmspace. */
283 mach->gpa_begin = 0;
284 mach->gpa_end = NVMM_MAX_RAM;
285 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
286
287 /* Create the comm uobj. */
288 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
289
290 (*nvmm_impl->machine_create)(mach);
291
292 args->machid = mach->machid;
293 nvmm_machine_put(mach);
294
295 return 0;
296 }
297
298 static int
299 nvmm_machine_destroy(struct nvmm_owner *owner,
300 struct nvmm_ioc_machine_destroy *args)
301 {
302 struct nvmm_machine *mach;
303 struct nvmm_cpu *vcpu;
304 int error;
305 size_t i;
306
307 error = nvmm_machine_get(owner, args->machid, &mach, true);
308 if (error)
309 return error;
310
311 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
312 error = nvmm_vcpu_get(mach, i, &vcpu);
313 if (error)
314 continue;
315
316 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
317 nvmm_vcpu_free(mach, vcpu);
318 nvmm_vcpu_put(vcpu);
319 atomic_dec_uint(&mach->ncpus);
320 }
321
322 (*nvmm_impl->machine_destroy)(mach);
323
324 /* Free the machine vmspace. */
325 uvmspace_free(mach->vm);
326
327 /* Drop the kernel UOBJ refs. */
328 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
329 if (!mach->hmap[i].present)
330 continue;
331 uao_detach(mach->hmap[i].uobj);
332 }
333
334 nvmm_machine_free(mach);
335 nvmm_machine_put(mach);
336
337 return 0;
338 }
339
340 static int
341 nvmm_machine_configure(struct nvmm_owner *owner,
342 struct nvmm_ioc_machine_configure *args)
343 {
344 struct nvmm_machine *mach;
345 size_t allocsz;
346 uint64_t op;
347 void *data;
348 int error;
349
350 op = NVMM_MACH_CONF_MD(args->op);
351 if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
352 return EINVAL;
353 }
354
355 allocsz = nvmm_impl->mach_conf_sizes[op];
356 data = kmem_alloc(allocsz, KM_SLEEP);
357
358 error = nvmm_machine_get(owner, args->machid, &mach, true);
359 if (error) {
360 kmem_free(data, allocsz);
361 return error;
362 }
363
364 error = copyin(args->conf, data, allocsz);
365 if (error) {
366 goto out;
367 }
368
369 error = (*nvmm_impl->machine_configure)(mach, op, data);
370
371 out:
372 nvmm_machine_put(mach);
373 kmem_free(data, allocsz);
374 return error;
375 }
376
377 static int
378 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
379 {
380 struct nvmm_machine *mach;
381 struct nvmm_cpu *vcpu;
382 int error;
383
384 error = nvmm_machine_get(owner, args->machid, &mach, false);
385 if (error)
386 return error;
387
388 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
389 if (error)
390 goto out;
391
392 /* Allocate the comm page. */
393 uao_reference(mach->commuobj);
394 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
395 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
396 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
397 if (error) {
398 uao_detach(mach->commuobj);
399 nvmm_vcpu_free(mach, vcpu);
400 nvmm_vcpu_put(vcpu);
401 goto out;
402 }
403 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
404 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
405 if (error) {
406 nvmm_vcpu_free(mach, vcpu);
407 nvmm_vcpu_put(vcpu);
408 goto out;
409 }
410 memset(vcpu->comm, 0, PAGE_SIZE);
411
412 error = (*nvmm_impl->vcpu_create)(mach, vcpu);
413 if (error) {
414 nvmm_vcpu_free(mach, vcpu);
415 nvmm_vcpu_put(vcpu);
416 goto out;
417 }
418
419 nvmm_vcpu_put(vcpu);
420 atomic_inc_uint(&mach->ncpus);
421
422 out:
423 nvmm_machine_put(mach);
424 return error;
425 }
426
427 static int
428 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
429 {
430 struct nvmm_machine *mach;
431 struct nvmm_cpu *vcpu;
432 int error;
433
434 error = nvmm_machine_get(owner, args->machid, &mach, false);
435 if (error)
436 return error;
437
438 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
439 if (error)
440 goto out;
441
442 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
443 nvmm_vcpu_free(mach, vcpu);
444 nvmm_vcpu_put(vcpu);
445 atomic_dec_uint(&mach->ncpus);
446
447 out:
448 nvmm_machine_put(mach);
449 return error;
450 }
451
452 static int
453 nvmm_vcpu_configure(struct nvmm_owner *owner,
454 struct nvmm_ioc_vcpu_configure *args)
455 {
456 struct nvmm_machine *mach;
457 struct nvmm_cpu *vcpu;
458 size_t allocsz;
459 uint64_t op;
460 void *data;
461 int error;
462
463 op = NVMM_VCPU_CONF_MD(args->op);
464 if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
465 return EINVAL;
466
467 allocsz = nvmm_impl->vcpu_conf_sizes[op];
468 data = kmem_alloc(allocsz, KM_SLEEP);
469
470 error = nvmm_machine_get(owner, args->machid, &mach, false);
471 if (error) {
472 kmem_free(data, allocsz);
473 return error;
474 }
475
476 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
477 if (error) {
478 nvmm_machine_put(mach);
479 kmem_free(data, allocsz);
480 return error;
481 }
482
483 error = copyin(args->conf, data, allocsz);
484 if (error) {
485 goto out;
486 }
487
488 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
489
490 out:
491 nvmm_vcpu_put(vcpu);
492 nvmm_machine_put(mach);
493 kmem_free(data, allocsz);
494 return error;
495 }
496
497 static int
498 nvmm_vcpu_setstate(struct nvmm_owner *owner,
499 struct nvmm_ioc_vcpu_setstate *args)
500 {
501 struct nvmm_machine *mach;
502 struct nvmm_cpu *vcpu;
503 int error;
504
505 error = nvmm_machine_get(owner, args->machid, &mach, false);
506 if (error)
507 return error;
508
509 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
510 if (error)
511 goto out;
512
513 (*nvmm_impl->vcpu_setstate)(vcpu);
514 nvmm_vcpu_put(vcpu);
515
516 out:
517 nvmm_machine_put(mach);
518 return error;
519 }
520
521 static int
522 nvmm_vcpu_getstate(struct nvmm_owner *owner,
523 struct nvmm_ioc_vcpu_getstate *args)
524 {
525 struct nvmm_machine *mach;
526 struct nvmm_cpu *vcpu;
527 int error;
528
529 error = nvmm_machine_get(owner, args->machid, &mach, false);
530 if (error)
531 return error;
532
533 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
534 if (error)
535 goto out;
536
537 (*nvmm_impl->vcpu_getstate)(vcpu);
538 nvmm_vcpu_put(vcpu);
539
540 out:
541 nvmm_machine_put(mach);
542 return error;
543 }
544
545 static int
546 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
547 {
548 struct nvmm_machine *mach;
549 struct nvmm_cpu *vcpu;
550 int error;
551
552 error = nvmm_machine_get(owner, args->machid, &mach, false);
553 if (error)
554 return error;
555
556 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
557 if (error)
558 goto out;
559
560 error = (*nvmm_impl->vcpu_inject)(vcpu);
561 nvmm_vcpu_put(vcpu);
562
563 out:
564 nvmm_machine_put(mach);
565 return error;
566 }
567
568 static int
569 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
570 struct nvmm_vcpu_exit *exit)
571 {
572 struct vmspace *vm = mach->vm;
573 int ret;
574
575 while (1) {
576 /* Got a signal? Or pending resched? Leave. */
577 if (__predict_false(nvmm_return_needed())) {
578 exit->reason = NVMM_VCPU_EXIT_NONE;
579 return 0;
580 }
581
582 /* Run the VCPU. */
583 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
584 if (__predict_false(ret != 0)) {
585 return ret;
586 }
587
588 /* Process nested page faults. */
589 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
590 break;
591 }
592 if (exit->u.mem.gpa >= mach->gpa_end) {
593 break;
594 }
595 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
596 break;
597 }
598 }
599
600 return 0;
601 }
602
603 static int
604 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
605 {
606 struct nvmm_machine *mach;
607 struct nvmm_cpu *vcpu;
608 int error;
609
610 error = nvmm_machine_get(owner, args->machid, &mach, false);
611 if (error)
612 return error;
613
614 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
615 if (error)
616 goto out;
617
618 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
619 nvmm_vcpu_put(vcpu);
620
621 out:
622 nvmm_machine_put(mach);
623 return error;
624 }
625
626 /* -------------------------------------------------------------------------- */
627
628 static struct uvm_object *
629 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
630 size_t *off)
631 {
632 struct nvmm_hmapping *hmapping;
633 size_t i;
634
635 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
636 hmapping = &mach->hmap[i];
637 if (!hmapping->present) {
638 continue;
639 }
640 if (hva >= hmapping->hva &&
641 hva + size <= hmapping->hva + hmapping->size) {
642 *off = hva - hmapping->hva;
643 return hmapping->uobj;
644 }
645 }
646
647 return NULL;
648 }
649
650 static int
651 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
652 {
653 struct nvmm_hmapping *hmapping;
654 size_t i;
655
656 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
657 return EINVAL;
658 }
659 if (hva == 0) {
660 return EINVAL;
661 }
662
663 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
664 hmapping = &mach->hmap[i];
665 if (!hmapping->present) {
666 continue;
667 }
668
669 if (hva >= hmapping->hva &&
670 hva + size <= hmapping->hva + hmapping->size) {
671 break;
672 }
673
674 if (hva >= hmapping->hva &&
675 hva < hmapping->hva + hmapping->size) {
676 return EEXIST;
677 }
678 if (hva + size > hmapping->hva &&
679 hva + size <= hmapping->hva + hmapping->size) {
680 return EEXIST;
681 }
682 if (hva <= hmapping->hva &&
683 hva + size >= hmapping->hva + hmapping->size) {
684 return EEXIST;
685 }
686 }
687
688 return 0;
689 }
690
691 static struct nvmm_hmapping *
692 nvmm_hmapping_alloc(struct nvmm_machine *mach)
693 {
694 struct nvmm_hmapping *hmapping;
695 size_t i;
696
697 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
698 hmapping = &mach->hmap[i];
699 if (!hmapping->present) {
700 hmapping->present = true;
701 return hmapping;
702 }
703 }
704
705 return NULL;
706 }
707
708 static int
709 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
710 {
711 struct vmspace *vmspace = curproc->p_vmspace;
712 struct nvmm_hmapping *hmapping;
713 size_t i;
714
715 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
716 hmapping = &mach->hmap[i];
717 if (!hmapping->present || hmapping->hva != hva ||
718 hmapping->size != size) {
719 continue;
720 }
721
722 uvm_unmap(&vmspace->vm_map, hmapping->hva,
723 hmapping->hva + hmapping->size);
724 uao_detach(hmapping->uobj);
725
726 hmapping->uobj = NULL;
727 hmapping->present = false;
728
729 return 0;
730 }
731
732 return ENOENT;
733 }
734
735 static int
736 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
737 {
738 struct vmspace *vmspace = curproc->p_vmspace;
739 struct nvmm_machine *mach;
740 struct nvmm_hmapping *hmapping;
741 vaddr_t uva;
742 int error;
743
744 error = nvmm_machine_get(owner, args->machid, &mach, true);
745 if (error)
746 return error;
747
748 error = nvmm_hmapping_validate(mach, args->hva, args->size);
749 if (error)
750 goto out;
751
752 hmapping = nvmm_hmapping_alloc(mach);
753 if (hmapping == NULL) {
754 error = ENOBUFS;
755 goto out;
756 }
757
758 hmapping->hva = args->hva;
759 hmapping->size = args->size;
760 hmapping->uobj = uao_create(hmapping->size, 0);
761 uva = hmapping->hva;
762
763 /* Take a reference for the user. */
764 uao_reference(hmapping->uobj);
765
766 /* Map the uobj into the user address space, as pageable. */
767 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
768 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
769 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
770 if (error) {
771 uao_detach(hmapping->uobj);
772 }
773
774 out:
775 nvmm_machine_put(mach);
776 return error;
777 }
778
779 static int
780 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
781 {
782 struct nvmm_machine *mach;
783 int error;
784
785 error = nvmm_machine_get(owner, args->machid, &mach, true);
786 if (error)
787 return error;
788
789 error = nvmm_hmapping_free(mach, args->hva, args->size);
790
791 nvmm_machine_put(mach);
792 return error;
793 }
794
795 /* -------------------------------------------------------------------------- */
796
797 static int
798 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
799 {
800 struct nvmm_machine *mach;
801 struct uvm_object *uobj;
802 gpaddr_t gpa;
803 size_t off;
804 int error;
805
806 error = nvmm_machine_get(owner, args->machid, &mach, false);
807 if (error)
808 return error;
809
810 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
811 error = EINVAL;
812 goto out;
813 }
814
815 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
816 (args->hva % PAGE_SIZE) != 0) {
817 error = EINVAL;
818 goto out;
819 }
820 if (args->hva == 0) {
821 error = EINVAL;
822 goto out;
823 }
824 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
825 error = EINVAL;
826 goto out;
827 }
828 if (args->gpa + args->size <= args->gpa) {
829 error = EINVAL;
830 goto out;
831 }
832 if (args->gpa + args->size > mach->gpa_end) {
833 error = EINVAL;
834 goto out;
835 }
836 gpa = args->gpa;
837
838 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
839 if (uobj == NULL) {
840 error = EINVAL;
841 goto out;
842 }
843
844 /* Take a reference for the machine. */
845 uao_reference(uobj);
846
847 /* Map the uobj into the machine address space, as pageable. */
848 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
849 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
850 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
851 if (error) {
852 uao_detach(uobj);
853 goto out;
854 }
855 if (gpa != args->gpa) {
856 uao_detach(uobj);
857 printf("[!] uvm_map problem\n");
858 error = EINVAL;
859 goto out;
860 }
861
862 out:
863 nvmm_machine_put(mach);
864 return error;
865 }
866
867 static int
868 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
869 {
870 struct nvmm_machine *mach;
871 gpaddr_t gpa;
872 int error;
873
874 error = nvmm_machine_get(owner, args->machid, &mach, false);
875 if (error)
876 return error;
877
878 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
879 error = EINVAL;
880 goto out;
881 }
882 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
883 error = EINVAL;
884 goto out;
885 }
886 if (args->gpa + args->size <= args->gpa) {
887 error = EINVAL;
888 goto out;
889 }
890 if (args->gpa + args->size >= mach->gpa_end) {
891 error = EINVAL;
892 goto out;
893 }
894 gpa = args->gpa;
895
896 /* Unmap the memory from the machine. */
897 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
898
899 out:
900 nvmm_machine_put(mach);
901 return error;
902 }
903
904 /* -------------------------------------------------------------------------- */
905
906 static int
907 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
908 {
909 struct nvmm_ctl_mach_info ctl;
910 struct nvmm_machine *mach;
911 int error;
912 size_t i;
913
914 if (args->size != sizeof(ctl))
915 return EINVAL;
916 error = copyin(args->data, &ctl, sizeof(ctl));
917 if (error)
918 return error;
919
920 error = nvmm_machine_get(owner, ctl.machid, &mach, true);
921 if (error)
922 return error;
923
924 ctl.nvcpus = mach->ncpus;
925
926 ctl.nram = 0;
927 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
928 if (!mach->hmap[i].present)
929 continue;
930 ctl.nram += mach->hmap[i].size;
931 }
932
933 ctl.pid = mach->owner->pid;
934 ctl.time = mach->time;
935
936 nvmm_machine_put(mach);
937
938 error = copyout(&ctl, args->data, sizeof(ctl));
939 if (error)
940 return error;
941
942 return 0;
943 }
944
945 static int
946 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
947 {
948 switch (args->op) {
949 case NVMM_CTL_MACH_INFO:
950 return nvmm_ctl_mach_info(owner, args);
951 default:
952 return EINVAL;
953 }
954 }
955
956 /* -------------------------------------------------------------------------- */
957
958 static const struct nvmm_impl *
959 nvmm_ident(void)
960 {
961 size_t i;
962
963 for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
964 if ((*nvmm_impl_list[i]->ident)())
965 return nvmm_impl_list[i];
966 }
967
968 return NULL;
969 }
970
971 static int
972 nvmm_init(void)
973 {
974 size_t i, n;
975
976 nvmm_impl = nvmm_ident();
977 if (nvmm_impl == NULL)
978 return ENOTSUP;
979
980 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
981 machines[i].machid = i;
982 rw_init(&machines[i].lock);
983 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
984 machines[i].cpus[n].present = false;
985 machines[i].cpus[n].cpuid = n;
986 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
987 IPL_NONE);
988 }
989 }
990
991 (*nvmm_impl->init)();
992
993 return 0;
994 }
995
996 static void
997 nvmm_fini(void)
998 {
999 size_t i, n;
1000
1001 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1002 rw_destroy(&machines[i].lock);
1003 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1004 mutex_destroy(&machines[i].cpus[n].lock);
1005 }
1006 }
1007
1008 (*nvmm_impl->fini)();
1009 nvmm_impl = NULL;
1010 }
1011
1012 /* -------------------------------------------------------------------------- */
1013
1014 static dev_type_open(nvmm_open);
1015
1016 const struct cdevsw nvmm_cdevsw = {
1017 .d_open = nvmm_open,
1018 .d_close = noclose,
1019 .d_read = noread,
1020 .d_write = nowrite,
1021 .d_ioctl = noioctl,
1022 .d_stop = nostop,
1023 .d_tty = notty,
1024 .d_poll = nopoll,
1025 .d_mmap = nommap,
1026 .d_kqfilter = nokqfilter,
1027 .d_discard = nodiscard,
1028 .d_flag = D_OTHER | D_MPSAFE
1029 };
1030
1031 static int nvmm_ioctl(file_t *, u_long, void *);
1032 static int nvmm_close(file_t *);
1033 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1034 struct uvm_object **, int *);
1035
1036 static const struct fileops nvmm_fileops = {
1037 .fo_read = fbadop_read,
1038 .fo_write = fbadop_write,
1039 .fo_ioctl = nvmm_ioctl,
1040 .fo_fcntl = fnullop_fcntl,
1041 .fo_poll = fnullop_poll,
1042 .fo_stat = fbadop_stat,
1043 .fo_close = nvmm_close,
1044 .fo_kqfilter = fnullop_kqfilter,
1045 .fo_restart = fnullop_restart,
1046 .fo_mmap = nvmm_mmap,
1047 };
1048
1049 static int
1050 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1051 {
1052 struct nvmm_owner *owner;
1053 struct file *fp;
1054 int error, fd;
1055
1056 if (__predict_false(nvmm_impl == NULL))
1057 return ENXIO;
1058 if (minor(dev) != 0)
1059 return EXDEV;
1060 if (!(flags & O_CLOEXEC))
1061 return EINVAL;
1062 error = fd_allocfile(&fp, &fd);
1063 if (error)
1064 return error;
1065
1066 if (OFLAGS(flags) & O_WRONLY) {
1067 owner = &root_owner;
1068 } else {
1069 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1070 owner->pid = l->l_proc->p_pid;
1071 }
1072
1073 return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1074 }
1075
1076 static int
1077 nvmm_close(file_t *fp)
1078 {
1079 struct nvmm_owner *owner = fp->f_data;
1080
1081 KASSERT(owner != NULL);
1082 nvmm_kill_machines(owner);
1083 if (owner != &root_owner) {
1084 kmem_free(owner, sizeof(*owner));
1085 }
1086 fp->f_data = NULL;
1087
1088 return 0;
1089 }
1090
1091 static int
1092 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1093 int *advicep, struct uvm_object **uobjp, int *maxprotp)
1094 {
1095 struct nvmm_owner *owner = fp->f_data;
1096 struct nvmm_machine *mach;
1097 nvmm_machid_t machid;
1098 nvmm_cpuid_t cpuid;
1099 int error;
1100
1101 if (prot & PROT_EXEC)
1102 return EACCES;
1103 if (size != PAGE_SIZE)
1104 return EINVAL;
1105
1106 cpuid = NVMM_COMM_CPUID(*offp);
1107 if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1108 return EINVAL;
1109
1110 machid = NVMM_COMM_MACHID(*offp);
1111 error = nvmm_machine_get(owner, machid, &mach, false);
1112 if (error)
1113 return error;
1114
1115 uao_reference(mach->commuobj);
1116 *uobjp = mach->commuobj;
1117 *offp = cpuid * PAGE_SIZE;
1118 *maxprotp = prot;
1119 *advicep = UVM_ADV_RANDOM;
1120
1121 nvmm_machine_put(mach);
1122 return 0;
1123 }
1124
1125 static int
1126 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
1127 {
1128 struct nvmm_owner *owner = fp->f_data;
1129
1130 KASSERT(owner != NULL);
1131
1132 switch (cmd) {
1133 case NVMM_IOC_CAPABILITY:
1134 return nvmm_capability(owner, data);
1135 case NVMM_IOC_MACHINE_CREATE:
1136 return nvmm_machine_create(owner, data);
1137 case NVMM_IOC_MACHINE_DESTROY:
1138 return nvmm_machine_destroy(owner, data);
1139 case NVMM_IOC_MACHINE_CONFIGURE:
1140 return nvmm_machine_configure(owner, data);
1141 case NVMM_IOC_VCPU_CREATE:
1142 return nvmm_vcpu_create(owner, data);
1143 case NVMM_IOC_VCPU_DESTROY:
1144 return nvmm_vcpu_destroy(owner, data);
1145 case NVMM_IOC_VCPU_CONFIGURE:
1146 return nvmm_vcpu_configure(owner, data);
1147 case NVMM_IOC_VCPU_SETSTATE:
1148 return nvmm_vcpu_setstate(owner, data);
1149 case NVMM_IOC_VCPU_GETSTATE:
1150 return nvmm_vcpu_getstate(owner, data);
1151 case NVMM_IOC_VCPU_INJECT:
1152 return nvmm_vcpu_inject(owner, data);
1153 case NVMM_IOC_VCPU_RUN:
1154 return nvmm_vcpu_run(owner, data);
1155 case NVMM_IOC_GPA_MAP:
1156 return nvmm_gpa_map(owner, data);
1157 case NVMM_IOC_GPA_UNMAP:
1158 return nvmm_gpa_unmap(owner, data);
1159 case NVMM_IOC_HVA_MAP:
1160 return nvmm_hva_map(owner, data);
1161 case NVMM_IOC_HVA_UNMAP:
1162 return nvmm_hva_unmap(owner, data);
1163 case NVMM_IOC_CTL:
1164 return nvmm_ctl(owner, data);
1165 default:
1166 return EINVAL;
1167 }
1168 }
1169
1170 /* -------------------------------------------------------------------------- */
1171
1172 static int nvmm_match(device_t, cfdata_t, void *);
1173 static void nvmm_attach(device_t, device_t, void *);
1174 static int nvmm_detach(device_t, int);
1175
1176 extern struct cfdriver nvmm_cd;
1177
1178 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
1179
1180 static struct cfdata nvmm_cfdata[] = {
1181 {
1182 .cf_name = "nvmm",
1183 .cf_atname = "nvmm",
1184 .cf_unit = 0,
1185 .cf_fstate = FSTATE_STAR,
1186 .cf_loc = NULL,
1187 .cf_flags = 0,
1188 .cf_pspec = NULL,
1189 },
1190 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
1191 };
1192
1193 static int
1194 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
1195 {
1196 return 1;
1197 }
1198
1199 static void
1200 nvmm_attach(device_t parent, device_t self, void *aux)
1201 {
1202 int error;
1203
1204 error = nvmm_init();
1205 if (error)
1206 panic("%s: impossible", __func__);
1207 aprint_normal_dev(self, "attached, using backend %s\n",
1208 nvmm_impl->name);
1209 }
1210
1211 static int
1212 nvmm_detach(device_t self, int flags)
1213 {
1214 if (atomic_load_relaxed(&nmachines) > 0)
1215 return EBUSY;
1216 nvmm_fini();
1217 return 0;
1218 }
1219
1220 void
1221 nvmmattach(int nunits)
1222 {
1223 /* nothing */
1224 }
1225
1226 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1227
1228 #if defined(_MODULE)
1229 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
1230 #endif
1231
1232 static int
1233 nvmm_modcmd(modcmd_t cmd, void *arg)
1234 {
1235 #if defined(_MODULE)
1236 devmajor_t bmajor = NODEVMAJOR;
1237 devmajor_t cmajor = 345;
1238 #endif
1239 int error;
1240
1241 switch (cmd) {
1242 case MODULE_CMD_INIT:
1243 if (nvmm_ident() == NULL) {
1244 aprint_error("%s: cpu not supported\n",
1245 nvmm_cd.cd_name);
1246 return ENOTSUP;
1247 }
1248 #if defined(_MODULE)
1249 error = config_cfdriver_attach(&nvmm_cd);
1250 if (error)
1251 return error;
1252 #endif
1253 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
1254 if (error) {
1255 config_cfdriver_detach(&nvmm_cd);
1256 aprint_error("%s: config_cfattach_attach failed\n",
1257 nvmm_cd.cd_name);
1258 return error;
1259 }
1260
1261 error = config_cfdata_attach(nvmm_cfdata, 1);
1262 if (error) {
1263 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1264 config_cfdriver_detach(&nvmm_cd);
1265 aprint_error("%s: unable to register cfdata\n",
1266 nvmm_cd.cd_name);
1267 return error;
1268 }
1269
1270 if (config_attach_pseudo(nvmm_cfdata) == NULL) {
1271 aprint_error("%s: config_attach_pseudo failed\n",
1272 nvmm_cd.cd_name);
1273 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1274 config_cfdriver_detach(&nvmm_cd);
1275 return ENXIO;
1276 }
1277
1278 #if defined(_MODULE)
1279 /* mknod /dev/nvmm c 345 0 */
1280 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
1281 &nvmm_cdevsw, &cmajor);
1282 if (error) {
1283 aprint_error("%s: unable to register devsw\n",
1284 nvmm_cd.cd_name);
1285 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1286 config_cfdriver_detach(&nvmm_cd);
1287 return error;
1288 }
1289 #endif
1290 return 0;
1291 case MODULE_CMD_FINI:
1292 error = config_cfdata_detach(nvmm_cfdata);
1293 if (error)
1294 return error;
1295 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1296 if (error)
1297 return error;
1298 #if defined(_MODULE)
1299 config_cfdriver_detach(&nvmm_cd);
1300 devsw_detach(NULL, &nvmm_cdevsw);
1301 #endif
1302 return 0;
1303 case MODULE_CMD_AUTOUNLOAD:
1304 return EBUSY;
1305 default:
1306 return ENOTTY;
1307 }
1308 }
1309