nvmm.c revision 1.46 1 /* $NetBSD: nvmm.c,v 1.46 2022/07/07 23:50:33 pgoyette Exp $ */
2
3 /*
4 * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
5 * All rights reserved.
6 *
7 * This code is part of the NVMM hypervisor.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.46 2022/07/07 23:50:33 pgoyette Exp $");
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37
38 #include <sys/atomic.h>
39 #include <sys/cpu.h>
40 #include <sys/conf.h>
41 #include <sys/kmem.h>
42 #include <sys/module.h>
43 #include <sys/proc.h>
44 #include <sys/mman.h>
45 #include <sys/file.h>
46 #include <sys/filedesc.h>
47 #include <sys/device.h>
48
49 #include <uvm/uvm_aobj.h>
50 #include <uvm/uvm_extern.h>
51 #include <uvm/uvm_page.h>
52
53 #include "ioconf.h"
54
55 #include <dev/nvmm/nvmm.h>
56 #include <dev/nvmm/nvmm_internal.h>
57 #include <dev/nvmm/nvmm_ioctl.h>
58
59 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
60 static volatile unsigned int nmachines __cacheline_aligned;
61
62 static const struct nvmm_impl *nvmm_impl_list[] = {
63 #if defined(__x86_64__)
64 &nvmm_x86_svm, /* x86 AMD SVM */
65 &nvmm_x86_vmx /* x86 Intel VMX */
66 #endif
67 };
68
69 static const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
70
71 static struct nvmm_owner root_owner;
72
73 /* -------------------------------------------------------------------------- */
74
75 static int
76 nvmm_machine_alloc(struct nvmm_machine **ret)
77 {
78 struct nvmm_machine *mach;
79 size_t i;
80
81 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
82 mach = &machines[i];
83
84 rw_enter(&mach->lock, RW_WRITER);
85 if (mach->present) {
86 rw_exit(&mach->lock);
87 continue;
88 }
89
90 mach->present = true;
91 mach->time = time_second;
92 *ret = mach;
93 atomic_inc_uint(&nmachines);
94 return 0;
95 }
96
97 return ENOBUFS;
98 }
99
100 static void
101 nvmm_machine_free(struct nvmm_machine *mach)
102 {
103 KASSERT(rw_write_held(&mach->lock));
104 KASSERT(mach->present);
105 mach->present = false;
106 atomic_dec_uint(&nmachines);
107 }
108
109 static int
110 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
111 struct nvmm_machine **ret, bool writer)
112 {
113 struct nvmm_machine *mach;
114 krw_t op = writer ? RW_WRITER : RW_READER;
115
116 if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
117 return EINVAL;
118 }
119 mach = &machines[machid];
120
121 rw_enter(&mach->lock, op);
122 if (__predict_false(!mach->present)) {
123 rw_exit(&mach->lock);
124 return ENOENT;
125 }
126 if (__predict_false(mach->owner != owner && owner != &root_owner)) {
127 rw_exit(&mach->lock);
128 return EPERM;
129 }
130 *ret = mach;
131
132 return 0;
133 }
134
135 static void
136 nvmm_machine_put(struct nvmm_machine *mach)
137 {
138 rw_exit(&mach->lock);
139 }
140
141 /* -------------------------------------------------------------------------- */
142
143 static int
144 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
145 struct nvmm_cpu **ret)
146 {
147 struct nvmm_cpu *vcpu;
148
149 if (cpuid >= NVMM_MAX_VCPUS) {
150 return EINVAL;
151 }
152 vcpu = &mach->cpus[cpuid];
153
154 mutex_enter(&vcpu->lock);
155 if (vcpu->present) {
156 mutex_exit(&vcpu->lock);
157 return EBUSY;
158 }
159
160 vcpu->present = true;
161 vcpu->comm = NULL;
162 vcpu->hcpu_last = -1;
163 *ret = vcpu;
164 return 0;
165 }
166
167 static void
168 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
169 {
170 KASSERT(mutex_owned(&vcpu->lock));
171 vcpu->present = false;
172 if (vcpu->comm != NULL) {
173 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
174 }
175 }
176
177 static int
178 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
179 struct nvmm_cpu **ret)
180 {
181 struct nvmm_cpu *vcpu;
182
183 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
184 return EINVAL;
185 }
186 vcpu = &mach->cpus[cpuid];
187
188 mutex_enter(&vcpu->lock);
189 if (__predict_false(!vcpu->present)) {
190 mutex_exit(&vcpu->lock);
191 return ENOENT;
192 }
193 *ret = vcpu;
194
195 return 0;
196 }
197
198 static void
199 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
200 {
201 mutex_exit(&vcpu->lock);
202 }
203
204 /* -------------------------------------------------------------------------- */
205
206 static void
207 nvmm_kill_machines(struct nvmm_owner *owner)
208 {
209 struct nvmm_machine *mach;
210 struct nvmm_cpu *vcpu;
211 size_t i, j;
212 int error;
213
214 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
215 mach = &machines[i];
216
217 rw_enter(&mach->lock, RW_WRITER);
218 if (!mach->present || mach->owner != owner) {
219 rw_exit(&mach->lock);
220 continue;
221 }
222
223 /* Kill it. */
224 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
225 error = nvmm_vcpu_get(mach, j, &vcpu);
226 if (error)
227 continue;
228 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
229 nvmm_vcpu_free(mach, vcpu);
230 nvmm_vcpu_put(vcpu);
231 atomic_dec_uint(&mach->ncpus);
232 }
233 (*nvmm_impl->machine_destroy)(mach);
234 uvmspace_free(mach->vm);
235
236 /* Drop the kernel UOBJ refs. */
237 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
238 if (!mach->hmap[j].present)
239 continue;
240 uao_detach(mach->hmap[j].uobj);
241 }
242
243 nvmm_machine_free(mach);
244
245 rw_exit(&mach->lock);
246 }
247 }
248
249 /* -------------------------------------------------------------------------- */
250
251 static int
252 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
253 {
254 args->cap.version = NVMM_KERN_VERSION;
255 args->cap.state_size = nvmm_impl->state_size;
256 args->cap.max_machines = NVMM_MAX_MACHINES;
257 args->cap.max_vcpus = NVMM_MAX_VCPUS;
258 args->cap.max_ram = NVMM_MAX_RAM;
259
260 (*nvmm_impl->capability)(&args->cap);
261
262 return 0;
263 }
264
265 static int
266 nvmm_machine_create(struct nvmm_owner *owner,
267 struct nvmm_ioc_machine_create *args)
268 {
269 struct nvmm_machine *mach;
270 int error;
271
272 error = nvmm_machine_alloc(&mach);
273 if (error)
274 return error;
275
276 /* Curproc owns the machine. */
277 mach->owner = owner;
278
279 /* Zero out the host mappings. */
280 memset(&mach->hmap, 0, sizeof(mach->hmap));
281
282 /* Create the machine vmspace. */
283 mach->gpa_begin = 0;
284 mach->gpa_end = NVMM_MAX_RAM;
285 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
286
287 /* Create the comm uobj. */
288 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
289
290 (*nvmm_impl->machine_create)(mach);
291
292 args->machid = mach->machid;
293 nvmm_machine_put(mach);
294
295 return 0;
296 }
297
298 static int
299 nvmm_machine_destroy(struct nvmm_owner *owner,
300 struct nvmm_ioc_machine_destroy *args)
301 {
302 struct nvmm_machine *mach;
303 struct nvmm_cpu *vcpu;
304 int error;
305 size_t i;
306
307 error = nvmm_machine_get(owner, args->machid, &mach, true);
308 if (error)
309 return error;
310
311 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
312 error = nvmm_vcpu_get(mach, i, &vcpu);
313 if (error)
314 continue;
315
316 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
317 nvmm_vcpu_free(mach, vcpu);
318 nvmm_vcpu_put(vcpu);
319 atomic_dec_uint(&mach->ncpus);
320 }
321
322 (*nvmm_impl->machine_destroy)(mach);
323
324 /* Free the machine vmspace. */
325 uvmspace_free(mach->vm);
326
327 /* Drop the kernel UOBJ refs. */
328 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
329 if (!mach->hmap[i].present)
330 continue;
331 uao_detach(mach->hmap[i].uobj);
332 }
333
334 nvmm_machine_free(mach);
335 nvmm_machine_put(mach);
336
337 return 0;
338 }
339
340 static int
341 nvmm_machine_configure(struct nvmm_owner *owner,
342 struct nvmm_ioc_machine_configure *args)
343 {
344 struct nvmm_machine *mach;
345 size_t allocsz;
346 uint64_t op;
347 void *data;
348 int error;
349
350 op = NVMM_MACH_CONF_MD(args->op);
351 if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
352 return EINVAL;
353 }
354
355 allocsz = nvmm_impl->mach_conf_sizes[op];
356 data = kmem_alloc(allocsz, KM_SLEEP);
357
358 error = nvmm_machine_get(owner, args->machid, &mach, true);
359 if (error) {
360 kmem_free(data, allocsz);
361 return error;
362 }
363
364 error = copyin(args->conf, data, allocsz);
365 if (error) {
366 goto out;
367 }
368
369 error = (*nvmm_impl->machine_configure)(mach, op, data);
370
371 out:
372 nvmm_machine_put(mach);
373 kmem_free(data, allocsz);
374 return error;
375 }
376
377 static int
378 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
379 {
380 struct nvmm_machine *mach;
381 struct nvmm_cpu *vcpu;
382 int error;
383
384 error = nvmm_machine_get(owner, args->machid, &mach, false);
385 if (error)
386 return error;
387
388 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
389 if (error)
390 goto out;
391
392 /* Allocate the comm page. */
393 uao_reference(mach->commuobj);
394 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
395 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
396 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
397 if (error) {
398 uao_detach(mach->commuobj);
399 nvmm_vcpu_free(mach, vcpu);
400 nvmm_vcpu_put(vcpu);
401 goto out;
402 }
403 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
404 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
405 if (error) {
406 nvmm_vcpu_free(mach, vcpu);
407 nvmm_vcpu_put(vcpu);
408 goto out;
409 }
410 memset(vcpu->comm, 0, PAGE_SIZE);
411
412 error = (*nvmm_impl->vcpu_create)(mach, vcpu);
413 if (error) {
414 nvmm_vcpu_free(mach, vcpu);
415 nvmm_vcpu_put(vcpu);
416 goto out;
417 }
418
419 nvmm_vcpu_put(vcpu);
420 atomic_inc_uint(&mach->ncpus);
421
422 out:
423 nvmm_machine_put(mach);
424 return error;
425 }
426
427 static int
428 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
429 {
430 struct nvmm_machine *mach;
431 struct nvmm_cpu *vcpu;
432 int error;
433
434 error = nvmm_machine_get(owner, args->machid, &mach, false);
435 if (error)
436 return error;
437
438 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
439 if (error)
440 goto out;
441
442 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
443 nvmm_vcpu_free(mach, vcpu);
444 nvmm_vcpu_put(vcpu);
445 atomic_dec_uint(&mach->ncpus);
446
447 out:
448 nvmm_machine_put(mach);
449 return error;
450 }
451
452 static int
453 nvmm_vcpu_configure(struct nvmm_owner *owner,
454 struct nvmm_ioc_vcpu_configure *args)
455 {
456 struct nvmm_machine *mach;
457 struct nvmm_cpu *vcpu;
458 size_t allocsz;
459 uint64_t op;
460 void *data;
461 int error;
462
463 op = NVMM_VCPU_CONF_MD(args->op);
464 if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
465 return EINVAL;
466
467 allocsz = nvmm_impl->vcpu_conf_sizes[op];
468 data = kmem_alloc(allocsz, KM_SLEEP);
469
470 error = nvmm_machine_get(owner, args->machid, &mach, false);
471 if (error) {
472 kmem_free(data, allocsz);
473 return error;
474 }
475
476 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
477 if (error) {
478 nvmm_machine_put(mach);
479 kmem_free(data, allocsz);
480 return error;
481 }
482
483 error = copyin(args->conf, data, allocsz);
484 if (error) {
485 goto out;
486 }
487
488 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
489
490 out:
491 nvmm_vcpu_put(vcpu);
492 nvmm_machine_put(mach);
493 kmem_free(data, allocsz);
494 return error;
495 }
496
497 static int
498 nvmm_vcpu_setstate(struct nvmm_owner *owner,
499 struct nvmm_ioc_vcpu_setstate *args)
500 {
501 struct nvmm_machine *mach;
502 struct nvmm_cpu *vcpu;
503 int error;
504
505 error = nvmm_machine_get(owner, args->machid, &mach, false);
506 if (error)
507 return error;
508
509 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
510 if (error)
511 goto out;
512
513 (*nvmm_impl->vcpu_setstate)(vcpu);
514 nvmm_vcpu_put(vcpu);
515
516 out:
517 nvmm_machine_put(mach);
518 return error;
519 }
520
521 static int
522 nvmm_vcpu_getstate(struct nvmm_owner *owner,
523 struct nvmm_ioc_vcpu_getstate *args)
524 {
525 struct nvmm_machine *mach;
526 struct nvmm_cpu *vcpu;
527 int error;
528
529 error = nvmm_machine_get(owner, args->machid, &mach, false);
530 if (error)
531 return error;
532
533 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
534 if (error)
535 goto out;
536
537 (*nvmm_impl->vcpu_getstate)(vcpu);
538 nvmm_vcpu_put(vcpu);
539
540 out:
541 nvmm_machine_put(mach);
542 return error;
543 }
544
545 static int
546 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
547 {
548 struct nvmm_machine *mach;
549 struct nvmm_cpu *vcpu;
550 int error;
551
552 error = nvmm_machine_get(owner, args->machid, &mach, false);
553 if (error)
554 return error;
555
556 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
557 if (error)
558 goto out;
559
560 error = (*nvmm_impl->vcpu_inject)(vcpu);
561 nvmm_vcpu_put(vcpu);
562
563 out:
564 nvmm_machine_put(mach);
565 return error;
566 }
567
568 static int
569 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
570 struct nvmm_vcpu_exit *exit)
571 {
572 struct vmspace *vm = mach->vm;
573 int ret;
574
575 while (1) {
576 /* Got a signal? Or pending resched? Leave. */
577 if (__predict_false(nvmm_return_needed(vcpu, exit))) {
578 return 0;
579 }
580
581 /* Run the VCPU. */
582 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
583 if (__predict_false(ret != 0)) {
584 return ret;
585 }
586
587 /* Process nested page faults. */
588 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
589 break;
590 }
591 if (exit->u.mem.gpa >= mach->gpa_end) {
592 break;
593 }
594 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
595 break;
596 }
597 }
598
599 return 0;
600 }
601
602 static int
603 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
604 {
605 struct nvmm_machine *mach;
606 struct nvmm_cpu *vcpu = NULL;
607 int error;
608
609 error = nvmm_machine_get(owner, args->machid, &mach, false);
610 if (error)
611 return error;
612
613 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
614 if (error)
615 goto out;
616
617 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
618 nvmm_vcpu_put(vcpu);
619
620 out:
621 nvmm_machine_put(mach);
622 if (vcpu)
623 vcpu->comm->stop = 0;
624 return error;
625 }
626
627 /* -------------------------------------------------------------------------- */
628
629 static struct uvm_object *
630 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
631 size_t *off)
632 {
633 struct nvmm_hmapping *hmapping;
634 size_t i;
635
636 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
637 hmapping = &mach->hmap[i];
638 if (!hmapping->present) {
639 continue;
640 }
641 if (hva >= hmapping->hva &&
642 hva + size <= hmapping->hva + hmapping->size) {
643 *off = hva - hmapping->hva;
644 return hmapping->uobj;
645 }
646 }
647
648 return NULL;
649 }
650
651 static int
652 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
653 {
654 struct nvmm_hmapping *hmapping;
655 size_t i;
656
657 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
658 return EINVAL;
659 }
660 if (hva == 0) {
661 return EINVAL;
662 }
663
664 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
665 hmapping = &mach->hmap[i];
666 if (!hmapping->present) {
667 continue;
668 }
669
670 if (hva >= hmapping->hva &&
671 hva + size <= hmapping->hva + hmapping->size) {
672 break;
673 }
674
675 if (hva >= hmapping->hva &&
676 hva < hmapping->hva + hmapping->size) {
677 return EEXIST;
678 }
679 if (hva + size > hmapping->hva &&
680 hva + size <= hmapping->hva + hmapping->size) {
681 return EEXIST;
682 }
683 if (hva <= hmapping->hva &&
684 hva + size >= hmapping->hva + hmapping->size) {
685 return EEXIST;
686 }
687 }
688
689 return 0;
690 }
691
692 static struct nvmm_hmapping *
693 nvmm_hmapping_alloc(struct nvmm_machine *mach)
694 {
695 struct nvmm_hmapping *hmapping;
696 size_t i;
697
698 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
699 hmapping = &mach->hmap[i];
700 if (!hmapping->present) {
701 hmapping->present = true;
702 return hmapping;
703 }
704 }
705
706 return NULL;
707 }
708
709 static int
710 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
711 {
712 struct vmspace *vmspace = curproc->p_vmspace;
713 struct nvmm_hmapping *hmapping;
714 size_t i;
715
716 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
717 hmapping = &mach->hmap[i];
718 if (!hmapping->present || hmapping->hva != hva ||
719 hmapping->size != size) {
720 continue;
721 }
722
723 uvm_unmap(&vmspace->vm_map, hmapping->hva,
724 hmapping->hva + hmapping->size);
725 uao_detach(hmapping->uobj);
726
727 hmapping->uobj = NULL;
728 hmapping->present = false;
729
730 return 0;
731 }
732
733 return ENOENT;
734 }
735
736 static int
737 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
738 {
739 struct vmspace *vmspace = curproc->p_vmspace;
740 struct nvmm_machine *mach;
741 struct nvmm_hmapping *hmapping;
742 vaddr_t uva;
743 int error;
744
745 error = nvmm_machine_get(owner, args->machid, &mach, true);
746 if (error)
747 return error;
748
749 error = nvmm_hmapping_validate(mach, args->hva, args->size);
750 if (error)
751 goto out;
752
753 hmapping = nvmm_hmapping_alloc(mach);
754 if (hmapping == NULL) {
755 error = ENOBUFS;
756 goto out;
757 }
758
759 hmapping->hva = args->hva;
760 hmapping->size = args->size;
761 hmapping->uobj = uao_create(hmapping->size, 0);
762 uva = hmapping->hva;
763
764 /* Take a reference for the user. */
765 uao_reference(hmapping->uobj);
766
767 /* Map the uobj into the user address space, as pageable. */
768 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
769 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
770 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
771 if (error) {
772 uao_detach(hmapping->uobj);
773 }
774
775 out:
776 nvmm_machine_put(mach);
777 return error;
778 }
779
780 static int
781 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
782 {
783 struct nvmm_machine *mach;
784 int error;
785
786 error = nvmm_machine_get(owner, args->machid, &mach, true);
787 if (error)
788 return error;
789
790 error = nvmm_hmapping_free(mach, args->hva, args->size);
791
792 nvmm_machine_put(mach);
793 return error;
794 }
795
796 /* -------------------------------------------------------------------------- */
797
798 static int
799 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
800 {
801 struct nvmm_machine *mach;
802 struct uvm_object *uobj;
803 gpaddr_t gpa;
804 size_t off;
805 int error;
806
807 error = nvmm_machine_get(owner, args->machid, &mach, false);
808 if (error)
809 return error;
810
811 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
812 error = EINVAL;
813 goto out;
814 }
815
816 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
817 (args->hva % PAGE_SIZE) != 0) {
818 error = EINVAL;
819 goto out;
820 }
821 if (args->hva == 0) {
822 error = EINVAL;
823 goto out;
824 }
825 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
826 error = EINVAL;
827 goto out;
828 }
829 if (args->gpa + args->size <= args->gpa) {
830 error = EINVAL;
831 goto out;
832 }
833 if (args->gpa + args->size > mach->gpa_end) {
834 error = EINVAL;
835 goto out;
836 }
837 gpa = args->gpa;
838
839 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
840 if (uobj == NULL) {
841 error = EINVAL;
842 goto out;
843 }
844
845 /* Take a reference for the machine. */
846 uao_reference(uobj);
847
848 /* Map the uobj into the machine address space, as pageable. */
849 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
850 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
851 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
852 if (error) {
853 uao_detach(uobj);
854 goto out;
855 }
856 if (gpa != args->gpa) {
857 uao_detach(uobj);
858 printf("[!] uvm_map problem\n");
859 error = EINVAL;
860 goto out;
861 }
862
863 out:
864 nvmm_machine_put(mach);
865 return error;
866 }
867
868 static int
869 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
870 {
871 struct nvmm_machine *mach;
872 gpaddr_t gpa;
873 int error;
874
875 error = nvmm_machine_get(owner, args->machid, &mach, false);
876 if (error)
877 return error;
878
879 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
880 error = EINVAL;
881 goto out;
882 }
883 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
884 error = EINVAL;
885 goto out;
886 }
887 if (args->gpa + args->size <= args->gpa) {
888 error = EINVAL;
889 goto out;
890 }
891 if (args->gpa + args->size >= mach->gpa_end) {
892 error = EINVAL;
893 goto out;
894 }
895 gpa = args->gpa;
896
897 /* Unmap the memory from the machine. */
898 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
899
900 out:
901 nvmm_machine_put(mach);
902 return error;
903 }
904
905 /* -------------------------------------------------------------------------- */
906
907 static int
908 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
909 {
910 struct nvmm_ctl_mach_info ctl;
911 struct nvmm_machine *mach;
912 int error;
913 size_t i;
914
915 if (args->size != sizeof(ctl))
916 return EINVAL;
917 error = copyin(args->data, &ctl, sizeof(ctl));
918 if (error)
919 return error;
920
921 error = nvmm_machine_get(owner, ctl.machid, &mach, true);
922 if (error)
923 return error;
924
925 ctl.nvcpus = mach->ncpus;
926
927 ctl.nram = 0;
928 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
929 if (!mach->hmap[i].present)
930 continue;
931 ctl.nram += mach->hmap[i].size;
932 }
933
934 ctl.pid = mach->owner->pid;
935 ctl.time = mach->time;
936
937 nvmm_machine_put(mach);
938
939 error = copyout(&ctl, args->data, sizeof(ctl));
940 if (error)
941 return error;
942
943 return 0;
944 }
945
946 static int
947 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
948 {
949 switch (args->op) {
950 case NVMM_CTL_MACH_INFO:
951 return nvmm_ctl_mach_info(owner, args);
952 default:
953 return EINVAL;
954 }
955 }
956
957 /* -------------------------------------------------------------------------- */
958
959 static const struct nvmm_impl *
960 nvmm_ident(void)
961 {
962 size_t i;
963
964 for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
965 if ((*nvmm_impl_list[i]->ident)())
966 return nvmm_impl_list[i];
967 }
968
969 return NULL;
970 }
971
972 static int
973 nvmm_init(void)
974 {
975 size_t i, n;
976
977 nvmm_impl = nvmm_ident();
978 if (nvmm_impl == NULL)
979 return ENOTSUP;
980
981 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
982 machines[i].machid = i;
983 rw_init(&machines[i].lock);
984 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
985 machines[i].cpus[n].present = false;
986 machines[i].cpus[n].cpuid = n;
987 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
988 IPL_NONE);
989 }
990 }
991
992 (*nvmm_impl->init)();
993
994 return 0;
995 }
996
997 static void
998 nvmm_fini(void)
999 {
1000 size_t i, n;
1001
1002 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1003 rw_destroy(&machines[i].lock);
1004 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1005 mutex_destroy(&machines[i].cpus[n].lock);
1006 }
1007 }
1008
1009 (*nvmm_impl->fini)();
1010 nvmm_impl = NULL;
1011 }
1012
1013 /* -------------------------------------------------------------------------- */
1014
1015 static dev_type_open(nvmm_open);
1016
1017 const struct cdevsw nvmm_cdevsw = {
1018 .d_open = nvmm_open,
1019 .d_close = noclose,
1020 .d_read = noread,
1021 .d_write = nowrite,
1022 .d_ioctl = noioctl,
1023 .d_stop = nostop,
1024 .d_tty = notty,
1025 .d_poll = nopoll,
1026 .d_mmap = nommap,
1027 .d_kqfilter = nokqfilter,
1028 .d_discard = nodiscard,
1029 .d_flag = D_OTHER | D_MPSAFE
1030 };
1031
1032 static int nvmm_ioctl(file_t *, u_long, void *);
1033 static int nvmm_close(file_t *);
1034 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1035 struct uvm_object **, int *);
1036
1037 static const struct fileops nvmm_fileops = {
1038 .fo_read = fbadop_read,
1039 .fo_write = fbadop_write,
1040 .fo_ioctl = nvmm_ioctl,
1041 .fo_fcntl = fnullop_fcntl,
1042 .fo_poll = fnullop_poll,
1043 .fo_stat = fbadop_stat,
1044 .fo_close = nvmm_close,
1045 .fo_kqfilter = fnullop_kqfilter,
1046 .fo_restart = fnullop_restart,
1047 .fo_mmap = nvmm_mmap,
1048 };
1049
1050 static int
1051 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1052 {
1053 struct nvmm_owner *owner;
1054 struct file *fp;
1055 int error, fd;
1056
1057 if (__predict_false(nvmm_impl == NULL))
1058 return ENXIO;
1059 if (minor(dev) != 0)
1060 return EXDEV;
1061 if (!(flags & O_CLOEXEC))
1062 return EINVAL;
1063 error = fd_allocfile(&fp, &fd);
1064 if (error)
1065 return error;
1066
1067 if (OFLAGS(flags) & O_WRONLY) {
1068 owner = &root_owner;
1069 } else {
1070 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1071 owner->pid = l->l_proc->p_pid;
1072 }
1073
1074 return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1075 }
1076
1077 static int
1078 nvmm_close(file_t *fp)
1079 {
1080 struct nvmm_owner *owner = fp->f_data;
1081
1082 KASSERT(owner != NULL);
1083 nvmm_kill_machines(owner);
1084 if (owner != &root_owner) {
1085 kmem_free(owner, sizeof(*owner));
1086 }
1087 fp->f_data = NULL;
1088
1089 return 0;
1090 }
1091
1092 static int
1093 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1094 int *advicep, struct uvm_object **uobjp, int *maxprotp)
1095 {
1096 struct nvmm_owner *owner = fp->f_data;
1097 struct nvmm_machine *mach;
1098 nvmm_machid_t machid;
1099 nvmm_cpuid_t cpuid;
1100 int error;
1101
1102 KASSERT(size > 0);
1103
1104 if (prot & PROT_EXEC)
1105 return EACCES;
1106 if (size != PAGE_SIZE)
1107 return EINVAL;
1108
1109 cpuid = NVMM_COMM_CPUID(*offp);
1110 if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1111 return EINVAL;
1112
1113 machid = NVMM_COMM_MACHID(*offp);
1114 error = nvmm_machine_get(owner, machid, &mach, false);
1115 if (error)
1116 return error;
1117
1118 uao_reference(mach->commuobj);
1119 *uobjp = mach->commuobj;
1120 *offp = cpuid * PAGE_SIZE;
1121 *maxprotp = prot;
1122 *advicep = UVM_ADV_RANDOM;
1123
1124 nvmm_machine_put(mach);
1125 return 0;
1126 }
1127
1128 static int
1129 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
1130 {
1131 struct nvmm_owner *owner = fp->f_data;
1132
1133 KASSERT(owner != NULL);
1134
1135 switch (cmd) {
1136 case NVMM_IOC_CAPABILITY:
1137 return nvmm_capability(owner, data);
1138 case NVMM_IOC_MACHINE_CREATE:
1139 return nvmm_machine_create(owner, data);
1140 case NVMM_IOC_MACHINE_DESTROY:
1141 return nvmm_machine_destroy(owner, data);
1142 case NVMM_IOC_MACHINE_CONFIGURE:
1143 return nvmm_machine_configure(owner, data);
1144 case NVMM_IOC_VCPU_CREATE:
1145 return nvmm_vcpu_create(owner, data);
1146 case NVMM_IOC_VCPU_DESTROY:
1147 return nvmm_vcpu_destroy(owner, data);
1148 case NVMM_IOC_VCPU_CONFIGURE:
1149 return nvmm_vcpu_configure(owner, data);
1150 case NVMM_IOC_VCPU_SETSTATE:
1151 return nvmm_vcpu_setstate(owner, data);
1152 case NVMM_IOC_VCPU_GETSTATE:
1153 return nvmm_vcpu_getstate(owner, data);
1154 case NVMM_IOC_VCPU_INJECT:
1155 return nvmm_vcpu_inject(owner, data);
1156 case NVMM_IOC_VCPU_RUN:
1157 return nvmm_vcpu_run(owner, data);
1158 case NVMM_IOC_GPA_MAP:
1159 return nvmm_gpa_map(owner, data);
1160 case NVMM_IOC_GPA_UNMAP:
1161 return nvmm_gpa_unmap(owner, data);
1162 case NVMM_IOC_HVA_MAP:
1163 return nvmm_hva_map(owner, data);
1164 case NVMM_IOC_HVA_UNMAP:
1165 return nvmm_hva_unmap(owner, data);
1166 case NVMM_IOC_CTL:
1167 return nvmm_ctl(owner, data);
1168 default:
1169 return EINVAL;
1170 }
1171 }
1172
1173 /* -------------------------------------------------------------------------- */
1174
1175 static int nvmm_match(device_t, cfdata_t, void *);
1176 static void nvmm_attach(device_t, device_t, void *);
1177 static int nvmm_detach(device_t, int);
1178
1179 extern struct cfdriver nvmm_cd;
1180
1181 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
1182
1183 static struct cfdata nvmm_cfdata[] = {
1184 {
1185 .cf_name = "nvmm",
1186 .cf_atname = "nvmm",
1187 .cf_unit = 0,
1188 .cf_fstate = FSTATE_STAR,
1189 .cf_loc = NULL,
1190 .cf_flags = 0,
1191 .cf_pspec = NULL,
1192 },
1193 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
1194 };
1195
1196 static int
1197 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
1198 {
1199 return 1;
1200 }
1201
1202 static void
1203 nvmm_attach(device_t parent, device_t self, void *aux)
1204 {
1205 int error;
1206
1207 error = nvmm_init();
1208 if (error)
1209 panic("%s: impossible", __func__);
1210 aprint_normal_dev(self, "attached, using backend %s\n",
1211 nvmm_impl->name);
1212 }
1213
1214 static int
1215 nvmm_detach(device_t self, int flags)
1216 {
1217 if (atomic_load_relaxed(&nmachines) > 0)
1218 return EBUSY;
1219 nvmm_fini();
1220 return 0;
1221 }
1222
1223 void
1224 nvmmattach(int nunits)
1225 {
1226 /* nothing */
1227 }
1228
1229 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1230
1231 #if defined(_MODULE)
1232 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
1233 #endif
1234
1235 static int
1236 nvmm_modcmd(modcmd_t cmd, void *arg)
1237 {
1238 #if defined(_MODULE)
1239 devmajor_t bmajor = NODEVMAJOR;
1240 devmajor_t cmajor = 345;
1241 #endif
1242 int error;
1243
1244 switch (cmd) {
1245 case MODULE_CMD_INIT:
1246 if (nvmm_ident() == NULL) {
1247 aprint_error("%s: cpu not supported\n",
1248 nvmm_cd.cd_name);
1249 return ENOTSUP;
1250 }
1251 #if defined(_MODULE)
1252 error = config_cfdriver_attach(&nvmm_cd);
1253 if (error)
1254 return error;
1255 #endif
1256 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
1257 if (error) {
1258 #if defined(_MODULE)
1259 config_cfdriver_detach(&nvmm_cd);
1260 #endif
1261 aprint_error("%s: config_cfattach_attach failed\n",
1262 nvmm_cd.cd_name);
1263 return error;
1264 }
1265
1266 error = config_cfdata_attach(nvmm_cfdata, 1);
1267 if (error) {
1268 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1269 #if defined(_MODULE)
1270 config_cfdriver_detach(&nvmm_cd);
1271 #endif
1272 aprint_error("%s: unable to register cfdata\n",
1273 nvmm_cd.cd_name);
1274 return error;
1275 }
1276
1277 if (config_attach_pseudo(nvmm_cfdata) == NULL) {
1278 aprint_error("%s: config_attach_pseudo failed\n",
1279 nvmm_cd.cd_name);
1280 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1281 #if defined(_MODULE)
1282 config_cfdriver_detach(&nvmm_cd);
1283 #endif
1284 return ENXIO;
1285 }
1286
1287 #if defined(_MODULE)
1288 /* mknod /dev/nvmm c 345 0 */
1289 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
1290 &nvmm_cdevsw, &cmajor);
1291 if (error) {
1292 aprint_error("%s: unable to register devsw, err %d\n",
1293 nvmm_cd.cd_name, error);
1294 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1295 config_cfdriver_detach(&nvmm_cd);
1296 return error;
1297 }
1298 #endif
1299 return 0;
1300 case MODULE_CMD_FINI:
1301 error = config_cfdata_detach(nvmm_cfdata);
1302 if (error)
1303 return error;
1304 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1305 if (error)
1306 return error;
1307 #if defined(_MODULE)
1308 config_cfdriver_detach(&nvmm_cd);
1309 devsw_detach(NULL, &nvmm_cdevsw);
1310 #endif
1311 return 0;
1312 case MODULE_CMD_AUTOUNLOAD:
1313 return EBUSY;
1314 default:
1315 return ENOTTY;
1316 }
1317 }
1318