nvmm.c revision 1.33 1 /* $NetBSD: nvmm.c,v 1.33 2020/08/01 08:18:36 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.33 2020/08/01 08:18:36 maxv Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38
39 #include <sys/cpu.h>
40 #include <sys/conf.h>
41 #include <sys/kmem.h>
42 #include <sys/module.h>
43 #include <sys/proc.h>
44 #include <sys/mman.h>
45 #include <sys/file.h>
46 #include <sys/filedesc.h>
47 #include <sys/device.h>
48
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_page.h>
51
52 #include "ioconf.h"
53
54 #include <dev/nvmm/nvmm.h>
55 #include <dev/nvmm/nvmm_internal.h>
56 #include <dev/nvmm/nvmm_ioctl.h>
57
58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
59 static volatile unsigned int nmachines __cacheline_aligned;
60
61 static const struct nvmm_impl *nvmm_impl_list[] = {
62 #if defined(__x86_64__)
63 &nvmm_x86_svm, /* x86 AMD SVM */
64 &nvmm_x86_vmx /* x86 Intel VMX */
65 #endif
66 };
67
68 static const struct nvmm_impl *nvmm_impl = NULL;
69
70 static struct nvmm_owner root_owner;
71
72 /* -------------------------------------------------------------------------- */
73
74 static int
75 nvmm_machine_alloc(struct nvmm_machine **ret)
76 {
77 struct nvmm_machine *mach;
78 size_t i;
79
80 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
81 mach = &machines[i];
82
83 rw_enter(&mach->lock, RW_WRITER);
84 if (mach->present) {
85 rw_exit(&mach->lock);
86 continue;
87 }
88
89 mach->present = true;
90 mach->time = time_second;
91 *ret = mach;
92 atomic_inc_uint(&nmachines);
93 return 0;
94 }
95
96 return ENOBUFS;
97 }
98
99 static void
100 nvmm_machine_free(struct nvmm_machine *mach)
101 {
102 KASSERT(rw_write_held(&mach->lock));
103 KASSERT(mach->present);
104 mach->present = false;
105 atomic_dec_uint(&nmachines);
106 }
107
108 static int
109 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
110 struct nvmm_machine **ret, bool writer)
111 {
112 struct nvmm_machine *mach;
113 krw_t op = writer ? RW_WRITER : RW_READER;
114
115 if (machid >= NVMM_MAX_MACHINES) {
116 return EINVAL;
117 }
118 mach = &machines[machid];
119
120 rw_enter(&mach->lock, op);
121 if (!mach->present) {
122 rw_exit(&mach->lock);
123 return ENOENT;
124 }
125 if (owner != &root_owner && mach->owner != owner) {
126 rw_exit(&mach->lock);
127 return EPERM;
128 }
129 *ret = mach;
130
131 return 0;
132 }
133
134 static void
135 nvmm_machine_put(struct nvmm_machine *mach)
136 {
137 rw_exit(&mach->lock);
138 }
139
140 /* -------------------------------------------------------------------------- */
141
142 static int
143 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
144 struct nvmm_cpu **ret)
145 {
146 struct nvmm_cpu *vcpu;
147
148 if (cpuid >= NVMM_MAX_VCPUS) {
149 return EINVAL;
150 }
151 vcpu = &mach->cpus[cpuid];
152
153 mutex_enter(&vcpu->lock);
154 if (vcpu->present) {
155 mutex_exit(&vcpu->lock);
156 return EBUSY;
157 }
158
159 vcpu->present = true;
160 vcpu->comm = NULL;
161 vcpu->hcpu_last = -1;
162 *ret = vcpu;
163 return 0;
164 }
165
166 static void
167 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
168 {
169 KASSERT(mutex_owned(&vcpu->lock));
170 vcpu->present = false;
171 if (vcpu->comm != NULL) {
172 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
173 }
174 }
175
176 static int
177 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
178 struct nvmm_cpu **ret)
179 {
180 struct nvmm_cpu *vcpu;
181
182 if (cpuid >= NVMM_MAX_VCPUS) {
183 return EINVAL;
184 }
185 vcpu = &mach->cpus[cpuid];
186
187 mutex_enter(&vcpu->lock);
188 if (!vcpu->present) {
189 mutex_exit(&vcpu->lock);
190 return ENOENT;
191 }
192 *ret = vcpu;
193
194 return 0;
195 }
196
197 static void
198 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
199 {
200 mutex_exit(&vcpu->lock);
201 }
202
203 /* -------------------------------------------------------------------------- */
204
205 static void
206 nvmm_kill_machines(struct nvmm_owner *owner)
207 {
208 struct nvmm_machine *mach;
209 struct nvmm_cpu *vcpu;
210 size_t i, j;
211 int error;
212
213 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
214 mach = &machines[i];
215
216 rw_enter(&mach->lock, RW_WRITER);
217 if (!mach->present || mach->owner != owner) {
218 rw_exit(&mach->lock);
219 continue;
220 }
221
222 /* Kill it. */
223 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
224 error = nvmm_vcpu_get(mach, j, &vcpu);
225 if (error)
226 continue;
227 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
228 nvmm_vcpu_free(mach, vcpu);
229 nvmm_vcpu_put(vcpu);
230 }
231 (*nvmm_impl->machine_destroy)(mach);
232 uvmspace_free(mach->vm);
233
234 /* Drop the kernel UOBJ refs. */
235 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
236 if (!mach->hmap[j].present)
237 continue;
238 uao_detach(mach->hmap[j].uobj);
239 }
240
241 nvmm_machine_free(mach);
242
243 rw_exit(&mach->lock);
244 }
245 }
246
247 /* -------------------------------------------------------------------------- */
248
249 static int
250 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
251 {
252 args->cap.version = NVMM_KERN_VERSION;
253 args->cap.state_size = nvmm_impl->state_size;
254 args->cap.max_machines = NVMM_MAX_MACHINES;
255 args->cap.max_vcpus = NVMM_MAX_VCPUS;
256 args->cap.max_ram = NVMM_MAX_RAM;
257
258 (*nvmm_impl->capability)(&args->cap);
259
260 return 0;
261 }
262
263 static int
264 nvmm_machine_create(struct nvmm_owner *owner,
265 struct nvmm_ioc_machine_create *args)
266 {
267 struct nvmm_machine *mach;
268 int error;
269
270 error = nvmm_machine_alloc(&mach);
271 if (error)
272 return error;
273
274 /* Curproc owns the machine. */
275 mach->owner = owner;
276
277 /* Zero out the host mappings. */
278 memset(&mach->hmap, 0, sizeof(mach->hmap));
279
280 /* Create the machine vmspace. */
281 mach->gpa_begin = 0;
282 mach->gpa_end = NVMM_MAX_RAM;
283 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
284
285 /* Create the comm uobj. */
286 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
287
288 (*nvmm_impl->machine_create)(mach);
289
290 args->machid = mach->machid;
291 nvmm_machine_put(mach);
292
293 return 0;
294 }
295
296 static int
297 nvmm_machine_destroy(struct nvmm_owner *owner,
298 struct nvmm_ioc_machine_destroy *args)
299 {
300 struct nvmm_machine *mach;
301 struct nvmm_cpu *vcpu;
302 int error;
303 size_t i;
304
305 error = nvmm_machine_get(owner, args->machid, &mach, true);
306 if (error)
307 return error;
308
309 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
310 error = nvmm_vcpu_get(mach, i, &vcpu);
311 if (error)
312 continue;
313
314 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
315 nvmm_vcpu_free(mach, vcpu);
316 nvmm_vcpu_put(vcpu);
317 }
318
319 (*nvmm_impl->machine_destroy)(mach);
320
321 /* Free the machine vmspace. */
322 uvmspace_free(mach->vm);
323
324 /* Drop the kernel UOBJ refs. */
325 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
326 if (!mach->hmap[i].present)
327 continue;
328 uao_detach(mach->hmap[i].uobj);
329 }
330
331 nvmm_machine_free(mach);
332 nvmm_machine_put(mach);
333
334 return 0;
335 }
336
337 static int
338 nvmm_machine_configure(struct nvmm_owner *owner,
339 struct nvmm_ioc_machine_configure *args)
340 {
341 struct nvmm_machine *mach;
342 size_t allocsz;
343 uint64_t op;
344 void *data;
345 int error;
346
347 op = NVMM_MACH_CONF_MD(args->op);
348 if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
349 return EINVAL;
350 }
351
352 allocsz = nvmm_impl->mach_conf_sizes[op];
353 data = kmem_alloc(allocsz, KM_SLEEP);
354
355 error = nvmm_machine_get(owner, args->machid, &mach, true);
356 if (error) {
357 kmem_free(data, allocsz);
358 return error;
359 }
360
361 error = copyin(args->conf, data, allocsz);
362 if (error) {
363 goto out;
364 }
365
366 error = (*nvmm_impl->machine_configure)(mach, op, data);
367
368 out:
369 nvmm_machine_put(mach);
370 kmem_free(data, allocsz);
371 return error;
372 }
373
374 static int
375 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
376 {
377 struct nvmm_machine *mach;
378 struct nvmm_cpu *vcpu;
379 int error;
380
381 error = nvmm_machine_get(owner, args->machid, &mach, false);
382 if (error)
383 return error;
384
385 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
386 if (error)
387 goto out;
388
389 /* Allocate the comm page. */
390 uao_reference(mach->commuobj);
391 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
392 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
393 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
394 if (error) {
395 uao_detach(mach->commuobj);
396 nvmm_vcpu_free(mach, vcpu);
397 nvmm_vcpu_put(vcpu);
398 goto out;
399 }
400 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
401 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
402 if (error) {
403 nvmm_vcpu_free(mach, vcpu);
404 nvmm_vcpu_put(vcpu);
405 goto out;
406 }
407 memset(vcpu->comm, 0, PAGE_SIZE);
408
409 error = (*nvmm_impl->vcpu_create)(mach, vcpu);
410 if (error) {
411 nvmm_vcpu_free(mach, vcpu);
412 nvmm_vcpu_put(vcpu);
413 goto out;
414 }
415
416 nvmm_vcpu_put(vcpu);
417
418 atomic_inc_uint(&mach->ncpus);
419
420 out:
421 nvmm_machine_put(mach);
422 return error;
423 }
424
425 static int
426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
427 {
428 struct nvmm_machine *mach;
429 struct nvmm_cpu *vcpu;
430 int error;
431
432 error = nvmm_machine_get(owner, args->machid, &mach, false);
433 if (error)
434 return error;
435
436 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
437 if (error)
438 goto out;
439
440 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
441 nvmm_vcpu_free(mach, vcpu);
442 nvmm_vcpu_put(vcpu);
443
444 atomic_dec_uint(&mach->ncpus);
445
446 out:
447 nvmm_machine_put(mach);
448 return error;
449 }
450
451 static int
452 nvmm_vcpu_configure(struct nvmm_owner *owner,
453 struct nvmm_ioc_vcpu_configure *args)
454 {
455 struct nvmm_machine *mach;
456 struct nvmm_cpu *vcpu;
457 size_t allocsz;
458 uint64_t op;
459 void *data;
460 int error;
461
462 op = NVMM_VCPU_CONF_MD(args->op);
463 if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
464 return EINVAL;
465
466 allocsz = nvmm_impl->vcpu_conf_sizes[op];
467 data = kmem_alloc(allocsz, KM_SLEEP);
468
469 error = nvmm_machine_get(owner, args->machid, &mach, false);
470 if (error) {
471 kmem_free(data, allocsz);
472 return error;
473 }
474
475 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
476 if (error) {
477 nvmm_machine_put(mach);
478 kmem_free(data, allocsz);
479 return error;
480 }
481
482 error = copyin(args->conf, data, allocsz);
483 if (error) {
484 goto out;
485 }
486
487 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
488
489 out:
490 nvmm_vcpu_put(vcpu);
491 nvmm_machine_put(mach);
492 kmem_free(data, allocsz);
493 return error;
494 }
495
496 static int
497 nvmm_vcpu_setstate(struct nvmm_owner *owner,
498 struct nvmm_ioc_vcpu_setstate *args)
499 {
500 struct nvmm_machine *mach;
501 struct nvmm_cpu *vcpu;
502 int error;
503
504 error = nvmm_machine_get(owner, args->machid, &mach, false);
505 if (error)
506 return error;
507
508 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
509 if (error)
510 goto out;
511
512 (*nvmm_impl->vcpu_setstate)(vcpu);
513 nvmm_vcpu_put(vcpu);
514
515 out:
516 nvmm_machine_put(mach);
517 return error;
518 }
519
520 static int
521 nvmm_vcpu_getstate(struct nvmm_owner *owner,
522 struct nvmm_ioc_vcpu_getstate *args)
523 {
524 struct nvmm_machine *mach;
525 struct nvmm_cpu *vcpu;
526 int error;
527
528 error = nvmm_machine_get(owner, args->machid, &mach, false);
529 if (error)
530 return error;
531
532 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
533 if (error)
534 goto out;
535
536 (*nvmm_impl->vcpu_getstate)(vcpu);
537 nvmm_vcpu_put(vcpu);
538
539 out:
540 nvmm_machine_put(mach);
541 return error;
542 }
543
544 static int
545 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
546 {
547 struct nvmm_machine *mach;
548 struct nvmm_cpu *vcpu;
549 int error;
550
551 error = nvmm_machine_get(owner, args->machid, &mach, false);
552 if (error)
553 return error;
554
555 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
556 if (error)
557 goto out;
558
559 error = (*nvmm_impl->vcpu_inject)(vcpu);
560 nvmm_vcpu_put(vcpu);
561
562 out:
563 nvmm_machine_put(mach);
564 return error;
565 }
566
567 static int
568 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
569 struct nvmm_vcpu_exit *exit)
570 {
571 struct vmspace *vm = mach->vm;
572 int ret;
573
574 while (1) {
575 /* Got a signal? Or pending resched? Leave. */
576 if (__predict_false(nvmm_return_needed())) {
577 exit->reason = NVMM_VCPU_EXIT_NONE;
578 return 0;
579 }
580
581 /* Run the VCPU. */
582 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
583 if (__predict_false(ret != 0)) {
584 return ret;
585 }
586
587 /* Process nested page faults. */
588 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
589 break;
590 }
591 if (exit->u.mem.gpa >= mach->gpa_end) {
592 break;
593 }
594 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
595 break;
596 }
597 }
598
599 return 0;
600 }
601
602 static int
603 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
604 {
605 struct nvmm_machine *mach;
606 struct nvmm_cpu *vcpu;
607 int error;
608
609 error = nvmm_machine_get(owner, args->machid, &mach, false);
610 if (error)
611 return error;
612
613 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
614 if (error)
615 goto out;
616
617 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
618 nvmm_vcpu_put(vcpu);
619
620 out:
621 nvmm_machine_put(mach);
622 return error;
623 }
624
625 /* -------------------------------------------------------------------------- */
626
627 static struct uvm_object *
628 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
629 size_t *off)
630 {
631 struct nvmm_hmapping *hmapping;
632 size_t i;
633
634 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
635 hmapping = &mach->hmap[i];
636 if (!hmapping->present) {
637 continue;
638 }
639 if (hva >= hmapping->hva &&
640 hva + size <= hmapping->hva + hmapping->size) {
641 *off = hva - hmapping->hva;
642 return hmapping->uobj;
643 }
644 }
645
646 return NULL;
647 }
648
649 static int
650 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
651 {
652 struct nvmm_hmapping *hmapping;
653 size_t i;
654
655 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
656 return EINVAL;
657 }
658 if (hva == 0) {
659 return EINVAL;
660 }
661
662 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
663 hmapping = &mach->hmap[i];
664 if (!hmapping->present) {
665 continue;
666 }
667
668 if (hva >= hmapping->hva &&
669 hva + size <= hmapping->hva + hmapping->size) {
670 break;
671 }
672
673 if (hva >= hmapping->hva &&
674 hva < hmapping->hva + hmapping->size) {
675 return EEXIST;
676 }
677 if (hva + size > hmapping->hva &&
678 hva + size <= hmapping->hva + hmapping->size) {
679 return EEXIST;
680 }
681 if (hva <= hmapping->hva &&
682 hva + size >= hmapping->hva + hmapping->size) {
683 return EEXIST;
684 }
685 }
686
687 return 0;
688 }
689
690 static struct nvmm_hmapping *
691 nvmm_hmapping_alloc(struct nvmm_machine *mach)
692 {
693 struct nvmm_hmapping *hmapping;
694 size_t i;
695
696 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
697 hmapping = &mach->hmap[i];
698 if (!hmapping->present) {
699 hmapping->present = true;
700 return hmapping;
701 }
702 }
703
704 return NULL;
705 }
706
707 static int
708 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
709 {
710 struct vmspace *vmspace = curproc->p_vmspace;
711 struct nvmm_hmapping *hmapping;
712 size_t i;
713
714 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
715 hmapping = &mach->hmap[i];
716 if (!hmapping->present || hmapping->hva != hva ||
717 hmapping->size != size) {
718 continue;
719 }
720
721 uvm_unmap(&vmspace->vm_map, hmapping->hva,
722 hmapping->hva + hmapping->size);
723 uao_detach(hmapping->uobj);
724
725 hmapping->uobj = NULL;
726 hmapping->present = false;
727
728 return 0;
729 }
730
731 return ENOENT;
732 }
733
734 static int
735 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
736 {
737 struct vmspace *vmspace = curproc->p_vmspace;
738 struct nvmm_machine *mach;
739 struct nvmm_hmapping *hmapping;
740 vaddr_t uva;
741 int error;
742
743 error = nvmm_machine_get(owner, args->machid, &mach, true);
744 if (error)
745 return error;
746
747 error = nvmm_hmapping_validate(mach, args->hva, args->size);
748 if (error)
749 goto out;
750
751 hmapping = nvmm_hmapping_alloc(mach);
752 if (hmapping == NULL) {
753 error = ENOBUFS;
754 goto out;
755 }
756
757 hmapping->hva = args->hva;
758 hmapping->size = args->size;
759 hmapping->uobj = uao_create(hmapping->size, 0);
760 uva = hmapping->hva;
761
762 /* Take a reference for the user. */
763 uao_reference(hmapping->uobj);
764
765 /* Map the uobj into the user address space, as pageable. */
766 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
767 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
768 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
769 if (error) {
770 uao_detach(hmapping->uobj);
771 }
772
773 out:
774 nvmm_machine_put(mach);
775 return error;
776 }
777
778 static int
779 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
780 {
781 struct nvmm_machine *mach;
782 int error;
783
784 error = nvmm_machine_get(owner, args->machid, &mach, true);
785 if (error)
786 return error;
787
788 error = nvmm_hmapping_free(mach, args->hva, args->size);
789
790 nvmm_machine_put(mach);
791 return error;
792 }
793
794 /* -------------------------------------------------------------------------- */
795
796 static int
797 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
798 {
799 struct nvmm_machine *mach;
800 struct uvm_object *uobj;
801 gpaddr_t gpa;
802 size_t off;
803 int error;
804
805 error = nvmm_machine_get(owner, args->machid, &mach, false);
806 if (error)
807 return error;
808
809 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
810 error = EINVAL;
811 goto out;
812 }
813
814 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
815 (args->hva % PAGE_SIZE) != 0) {
816 error = EINVAL;
817 goto out;
818 }
819 if (args->hva == 0) {
820 error = EINVAL;
821 goto out;
822 }
823 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
824 error = EINVAL;
825 goto out;
826 }
827 if (args->gpa + args->size <= args->gpa) {
828 error = EINVAL;
829 goto out;
830 }
831 if (args->gpa + args->size > mach->gpa_end) {
832 error = EINVAL;
833 goto out;
834 }
835 gpa = args->gpa;
836
837 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
838 if (uobj == NULL) {
839 error = EINVAL;
840 goto out;
841 }
842
843 /* Take a reference for the machine. */
844 uao_reference(uobj);
845
846 /* Map the uobj into the machine address space, as pageable. */
847 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
848 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
849 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
850 if (error) {
851 uao_detach(uobj);
852 goto out;
853 }
854 if (gpa != args->gpa) {
855 uao_detach(uobj);
856 printf("[!] uvm_map problem\n");
857 error = EINVAL;
858 goto out;
859 }
860
861 out:
862 nvmm_machine_put(mach);
863 return error;
864 }
865
866 static int
867 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
868 {
869 struct nvmm_machine *mach;
870 gpaddr_t gpa;
871 int error;
872
873 error = nvmm_machine_get(owner, args->machid, &mach, false);
874 if (error)
875 return error;
876
877 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
878 error = EINVAL;
879 goto out;
880 }
881 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
882 error = EINVAL;
883 goto out;
884 }
885 if (args->gpa + args->size <= args->gpa) {
886 error = EINVAL;
887 goto out;
888 }
889 if (args->gpa + args->size >= mach->gpa_end) {
890 error = EINVAL;
891 goto out;
892 }
893 gpa = args->gpa;
894
895 /* Unmap the memory from the machine. */
896 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
897
898 out:
899 nvmm_machine_put(mach);
900 return error;
901 }
902
903 /* -------------------------------------------------------------------------- */
904
905 static int
906 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
907 {
908 struct nvmm_ctl_mach_info ctl;
909 struct nvmm_machine *mach;
910 struct nvmm_cpu *vcpu;
911 int error;
912 size_t i;
913
914 if (args->size != sizeof(ctl))
915 return EINVAL;
916 error = copyin(args->data, &ctl, sizeof(ctl));
917 if (error)
918 return error;
919
920 error = nvmm_machine_get(owner, ctl.machid, &mach, true);
921 if (error)
922 return error;
923
924 ctl.nvcpus = 0;
925 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
926 error = nvmm_vcpu_get(mach, i, &vcpu);
927 if (error)
928 continue;
929 ctl.nvcpus++;
930 nvmm_vcpu_put(vcpu);
931 }
932
933 ctl.nram = 0;
934 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
935 if (!mach->hmap[i].present)
936 continue;
937 ctl.nram += mach->hmap[i].size;
938 }
939
940 ctl.pid = mach->owner->pid;
941 ctl.time = mach->time;
942
943 nvmm_machine_put(mach);
944
945 error = copyout(&ctl, args->data, sizeof(ctl));
946 if (error)
947 return error;
948
949 return 0;
950 }
951
952 static int
953 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
954 {
955 switch (args->op) {
956 case NVMM_CTL_MACH_INFO:
957 return nvmm_ctl_mach_info(owner, args);
958 default:
959 return EINVAL;
960 }
961 }
962
963 /* -------------------------------------------------------------------------- */
964
965 static const struct nvmm_impl *
966 nvmm_ident(void)
967 {
968 size_t i;
969
970 for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
971 if ((*nvmm_impl_list[i]->ident)())
972 return nvmm_impl_list[i];
973 }
974
975 return NULL;
976 }
977
978 static int
979 nvmm_init(void)
980 {
981 size_t i, n;
982
983 nvmm_impl = nvmm_ident();
984 if (nvmm_impl == NULL)
985 return ENOTSUP;
986
987 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
988 machines[i].machid = i;
989 rw_init(&machines[i].lock);
990 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
991 machines[i].cpus[n].present = false;
992 machines[i].cpus[n].cpuid = n;
993 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
994 IPL_NONE);
995 }
996 }
997
998 (*nvmm_impl->init)();
999
1000 return 0;
1001 }
1002
1003 static void
1004 nvmm_fini(void)
1005 {
1006 size_t i, n;
1007
1008 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1009 rw_destroy(&machines[i].lock);
1010 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1011 mutex_destroy(&machines[i].cpus[n].lock);
1012 }
1013 }
1014
1015 (*nvmm_impl->fini)();
1016 nvmm_impl = NULL;
1017 }
1018
1019 /* -------------------------------------------------------------------------- */
1020
1021 static dev_type_open(nvmm_open);
1022
1023 const struct cdevsw nvmm_cdevsw = {
1024 .d_open = nvmm_open,
1025 .d_close = noclose,
1026 .d_read = noread,
1027 .d_write = nowrite,
1028 .d_ioctl = noioctl,
1029 .d_stop = nostop,
1030 .d_tty = notty,
1031 .d_poll = nopoll,
1032 .d_mmap = nommap,
1033 .d_kqfilter = nokqfilter,
1034 .d_discard = nodiscard,
1035 .d_flag = D_OTHER | D_MPSAFE
1036 };
1037
1038 static int nvmm_ioctl(file_t *, u_long, void *);
1039 static int nvmm_close(file_t *);
1040 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1041 struct uvm_object **, int *);
1042
1043 const struct fileops nvmm_fileops = {
1044 .fo_read = fbadop_read,
1045 .fo_write = fbadop_write,
1046 .fo_ioctl = nvmm_ioctl,
1047 .fo_fcntl = fnullop_fcntl,
1048 .fo_poll = fnullop_poll,
1049 .fo_stat = fbadop_stat,
1050 .fo_close = nvmm_close,
1051 .fo_kqfilter = fnullop_kqfilter,
1052 .fo_restart = fnullop_restart,
1053 .fo_mmap = nvmm_mmap,
1054 };
1055
1056 static int
1057 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1058 {
1059 struct nvmm_owner *owner;
1060 struct file *fp;
1061 int error, fd;
1062
1063 if (__predict_false(nvmm_impl == NULL))
1064 return ENXIO;
1065 if (minor(dev) != 0)
1066 return EXDEV;
1067 if (!(flags & O_CLOEXEC))
1068 return EINVAL;
1069 error = fd_allocfile(&fp, &fd);
1070 if (error)
1071 return error;
1072
1073 if (OFLAGS(flags) & O_WRONLY) {
1074 owner = &root_owner;
1075 } else {
1076 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1077 owner->pid = l->l_proc->p_pid;
1078 }
1079
1080 return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1081 }
1082
1083 static int
1084 nvmm_close(file_t *fp)
1085 {
1086 struct nvmm_owner *owner = fp->f_data;
1087
1088 KASSERT(owner != NULL);
1089 nvmm_kill_machines(owner);
1090 if (owner != &root_owner) {
1091 kmem_free(owner, sizeof(*owner));
1092 }
1093 fp->f_data = NULL;
1094
1095 return 0;
1096 }
1097
1098 static int
1099 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1100 int *advicep, struct uvm_object **uobjp, int *maxprotp)
1101 {
1102 struct nvmm_owner *owner = fp->f_data;
1103 struct nvmm_machine *mach;
1104 nvmm_machid_t machid;
1105 nvmm_cpuid_t cpuid;
1106 int error;
1107
1108 if (prot & PROT_EXEC)
1109 return EACCES;
1110 if (size != PAGE_SIZE)
1111 return EINVAL;
1112
1113 cpuid = NVMM_COMM_CPUID(*offp);
1114 if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1115 return EINVAL;
1116
1117 machid = NVMM_COMM_MACHID(*offp);
1118 error = nvmm_machine_get(owner, machid, &mach, false);
1119 if (error)
1120 return error;
1121
1122 uao_reference(mach->commuobj);
1123 *uobjp = mach->commuobj;
1124 *offp = cpuid * PAGE_SIZE;
1125 *maxprotp = prot;
1126 *advicep = UVM_ADV_RANDOM;
1127
1128 nvmm_machine_put(mach);
1129 return 0;
1130 }
1131
1132 static int
1133 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
1134 {
1135 struct nvmm_owner *owner = fp->f_data;
1136
1137 KASSERT(owner != NULL);
1138
1139 switch (cmd) {
1140 case NVMM_IOC_CAPABILITY:
1141 return nvmm_capability(owner, data);
1142 case NVMM_IOC_MACHINE_CREATE:
1143 return nvmm_machine_create(owner, data);
1144 case NVMM_IOC_MACHINE_DESTROY:
1145 return nvmm_machine_destroy(owner, data);
1146 case NVMM_IOC_MACHINE_CONFIGURE:
1147 return nvmm_machine_configure(owner, data);
1148 case NVMM_IOC_VCPU_CREATE:
1149 return nvmm_vcpu_create(owner, data);
1150 case NVMM_IOC_VCPU_DESTROY:
1151 return nvmm_vcpu_destroy(owner, data);
1152 case NVMM_IOC_VCPU_CONFIGURE:
1153 return nvmm_vcpu_configure(owner, data);
1154 case NVMM_IOC_VCPU_SETSTATE:
1155 return nvmm_vcpu_setstate(owner, data);
1156 case NVMM_IOC_VCPU_GETSTATE:
1157 return nvmm_vcpu_getstate(owner, data);
1158 case NVMM_IOC_VCPU_INJECT:
1159 return nvmm_vcpu_inject(owner, data);
1160 case NVMM_IOC_VCPU_RUN:
1161 return nvmm_vcpu_run(owner, data);
1162 case NVMM_IOC_GPA_MAP:
1163 return nvmm_gpa_map(owner, data);
1164 case NVMM_IOC_GPA_UNMAP:
1165 return nvmm_gpa_unmap(owner, data);
1166 case NVMM_IOC_HVA_MAP:
1167 return nvmm_hva_map(owner, data);
1168 case NVMM_IOC_HVA_UNMAP:
1169 return nvmm_hva_unmap(owner, data);
1170 case NVMM_IOC_CTL:
1171 return nvmm_ctl(owner, data);
1172 default:
1173 return EINVAL;
1174 }
1175 }
1176
1177 /* -------------------------------------------------------------------------- */
1178
1179 static int nvmm_match(device_t, cfdata_t, void *);
1180 static void nvmm_attach(device_t, device_t, void *);
1181 static int nvmm_detach(device_t, int);
1182
1183 extern struct cfdriver nvmm_cd;
1184
1185 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
1186
1187 static struct cfdata nvmm_cfdata[] = {
1188 {
1189 .cf_name = "nvmm",
1190 .cf_atname = "nvmm",
1191 .cf_unit = 0,
1192 .cf_fstate = FSTATE_STAR,
1193 .cf_loc = NULL,
1194 .cf_flags = 0,
1195 .cf_pspec = NULL,
1196 },
1197 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
1198 };
1199
1200 static int
1201 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
1202 {
1203 return 1;
1204 }
1205
1206 static void
1207 nvmm_attach(device_t parent, device_t self, void *aux)
1208 {
1209 int error;
1210
1211 error = nvmm_init();
1212 if (error)
1213 panic("%s: impossible", __func__);
1214 aprint_normal_dev(self, "attached, using backend %s\n",
1215 nvmm_impl->name);
1216 }
1217
1218 static int
1219 nvmm_detach(device_t self, int flags)
1220 {
1221 if (nmachines > 0)
1222 return EBUSY;
1223 nvmm_fini();
1224 return 0;
1225 }
1226
1227 void
1228 nvmmattach(int nunits)
1229 {
1230 /* nothing */
1231 }
1232
1233 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1234
1235 #if defined(_MODULE)
1236 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
1237 #endif
1238
1239 static int
1240 nvmm_modcmd(modcmd_t cmd, void *arg)
1241 {
1242 #if defined(_MODULE)
1243 devmajor_t bmajor = NODEVMAJOR;
1244 devmajor_t cmajor = 345;
1245 #endif
1246 int error;
1247
1248 switch (cmd) {
1249 case MODULE_CMD_INIT:
1250 if (nvmm_ident() == NULL) {
1251 aprint_error("%s: cpu not supported\n",
1252 nvmm_cd.cd_name);
1253 return ENOTSUP;
1254 }
1255 #if defined(_MODULE)
1256 error = config_cfdriver_attach(&nvmm_cd);
1257 if (error)
1258 return error;
1259 #endif
1260 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
1261 if (error) {
1262 config_cfdriver_detach(&nvmm_cd);
1263 aprint_error("%s: config_cfattach_attach failed\n",
1264 nvmm_cd.cd_name);
1265 return error;
1266 }
1267
1268 error = config_cfdata_attach(nvmm_cfdata, 1);
1269 if (error) {
1270 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1271 config_cfdriver_detach(&nvmm_cd);
1272 aprint_error("%s: unable to register cfdata\n",
1273 nvmm_cd.cd_name);
1274 return error;
1275 }
1276
1277 if (config_attach_pseudo(nvmm_cfdata) == NULL) {
1278 aprint_error("%s: config_attach_pseudo failed\n",
1279 nvmm_cd.cd_name);
1280 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1281 config_cfdriver_detach(&nvmm_cd);
1282 return ENXIO;
1283 }
1284
1285 #if defined(_MODULE)
1286 /* mknod /dev/nvmm c 345 0 */
1287 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
1288 &nvmm_cdevsw, &cmajor);
1289 if (error) {
1290 aprint_error("%s: unable to register devsw\n",
1291 nvmm_cd.cd_name);
1292 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1293 config_cfdriver_detach(&nvmm_cd);
1294 return error;
1295 }
1296 #endif
1297 return 0;
1298 case MODULE_CMD_FINI:
1299 error = config_cfdata_detach(nvmm_cfdata);
1300 if (error)
1301 return error;
1302 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1303 if (error)
1304 return error;
1305 #if defined(_MODULE)
1306 config_cfdriver_detach(&nvmm_cd);
1307 devsw_detach(NULL, &nvmm_cdevsw);
1308 #endif
1309 return 0;
1310 case MODULE_CMD_AUTOUNLOAD:
1311 return EBUSY;
1312 default:
1313 return ENOTTY;
1314 }
1315 }
1316