nvmm.c revision 1.32 1 /* $NetBSD: nvmm.c,v 1.32 2020/07/03 16:09:54 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.32 2020/07/03 16:09:54 maxv Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38
39 #include <sys/cpu.h>
40 #include <sys/conf.h>
41 #include <sys/kmem.h>
42 #include <sys/module.h>
43 #include <sys/proc.h>
44 #include <sys/mman.h>
45 #include <sys/file.h>
46 #include <sys/filedesc.h>
47 #include <sys/device.h>
48
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_page.h>
51
52 #include "ioconf.h"
53
54 #include <dev/nvmm/nvmm.h>
55 #include <dev/nvmm/nvmm_internal.h>
56 #include <dev/nvmm/nvmm_ioctl.h>
57
58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
59 static volatile unsigned int nmachines __cacheline_aligned;
60
61 static const struct nvmm_impl *nvmm_impl_list[] = {
62 &nvmm_x86_svm, /* x86 AMD SVM */
63 &nvmm_x86_vmx /* x86 Intel VMX */
64 };
65
66 static const struct nvmm_impl *nvmm_impl = NULL;
67
68 static struct nvmm_owner root_owner;
69
70 /* -------------------------------------------------------------------------- */
71
72 static int
73 nvmm_machine_alloc(struct nvmm_machine **ret)
74 {
75 struct nvmm_machine *mach;
76 size_t i;
77
78 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
79 mach = &machines[i];
80
81 rw_enter(&mach->lock, RW_WRITER);
82 if (mach->present) {
83 rw_exit(&mach->lock);
84 continue;
85 }
86
87 mach->present = true;
88 mach->time = time_second;
89 *ret = mach;
90 atomic_inc_uint(&nmachines);
91 return 0;
92 }
93
94 return ENOBUFS;
95 }
96
97 static void
98 nvmm_machine_free(struct nvmm_machine *mach)
99 {
100 KASSERT(rw_write_held(&mach->lock));
101 KASSERT(mach->present);
102 mach->present = false;
103 atomic_dec_uint(&nmachines);
104 }
105
106 static int
107 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
108 struct nvmm_machine **ret, bool writer)
109 {
110 struct nvmm_machine *mach;
111 krw_t op = writer ? RW_WRITER : RW_READER;
112
113 if (machid >= NVMM_MAX_MACHINES) {
114 return EINVAL;
115 }
116 mach = &machines[machid];
117
118 rw_enter(&mach->lock, op);
119 if (!mach->present) {
120 rw_exit(&mach->lock);
121 return ENOENT;
122 }
123 if (owner != &root_owner && mach->owner != owner) {
124 rw_exit(&mach->lock);
125 return EPERM;
126 }
127 *ret = mach;
128
129 return 0;
130 }
131
132 static void
133 nvmm_machine_put(struct nvmm_machine *mach)
134 {
135 rw_exit(&mach->lock);
136 }
137
138 /* -------------------------------------------------------------------------- */
139
140 static int
141 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
142 struct nvmm_cpu **ret)
143 {
144 struct nvmm_cpu *vcpu;
145
146 if (cpuid >= NVMM_MAX_VCPUS) {
147 return EINVAL;
148 }
149 vcpu = &mach->cpus[cpuid];
150
151 mutex_enter(&vcpu->lock);
152 if (vcpu->present) {
153 mutex_exit(&vcpu->lock);
154 return EBUSY;
155 }
156
157 vcpu->present = true;
158 vcpu->comm = NULL;
159 vcpu->hcpu_last = -1;
160 *ret = vcpu;
161 return 0;
162 }
163
164 static void
165 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
166 {
167 KASSERT(mutex_owned(&vcpu->lock));
168 vcpu->present = false;
169 if (vcpu->comm != NULL) {
170 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
171 }
172 }
173
174 static int
175 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
176 struct nvmm_cpu **ret)
177 {
178 struct nvmm_cpu *vcpu;
179
180 if (cpuid >= NVMM_MAX_VCPUS) {
181 return EINVAL;
182 }
183 vcpu = &mach->cpus[cpuid];
184
185 mutex_enter(&vcpu->lock);
186 if (!vcpu->present) {
187 mutex_exit(&vcpu->lock);
188 return ENOENT;
189 }
190 *ret = vcpu;
191
192 return 0;
193 }
194
195 static void
196 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
197 {
198 mutex_exit(&vcpu->lock);
199 }
200
201 /* -------------------------------------------------------------------------- */
202
203 static void
204 nvmm_kill_machines(struct nvmm_owner *owner)
205 {
206 struct nvmm_machine *mach;
207 struct nvmm_cpu *vcpu;
208 size_t i, j;
209 int error;
210
211 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
212 mach = &machines[i];
213
214 rw_enter(&mach->lock, RW_WRITER);
215 if (!mach->present || mach->owner != owner) {
216 rw_exit(&mach->lock);
217 continue;
218 }
219
220 /* Kill it. */
221 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
222 error = nvmm_vcpu_get(mach, j, &vcpu);
223 if (error)
224 continue;
225 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
226 nvmm_vcpu_free(mach, vcpu);
227 nvmm_vcpu_put(vcpu);
228 }
229 (*nvmm_impl->machine_destroy)(mach);
230 uvmspace_free(mach->vm);
231
232 /* Drop the kernel UOBJ refs. */
233 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
234 if (!mach->hmap[j].present)
235 continue;
236 uao_detach(mach->hmap[j].uobj);
237 }
238
239 nvmm_machine_free(mach);
240
241 rw_exit(&mach->lock);
242 }
243 }
244
245 /* -------------------------------------------------------------------------- */
246
247 static int
248 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
249 {
250 args->cap.version = NVMM_KERN_VERSION;
251 args->cap.state_size = nvmm_impl->state_size;
252 args->cap.max_machines = NVMM_MAX_MACHINES;
253 args->cap.max_vcpus = NVMM_MAX_VCPUS;
254 args->cap.max_ram = NVMM_MAX_RAM;
255
256 (*nvmm_impl->capability)(&args->cap);
257
258 return 0;
259 }
260
261 static int
262 nvmm_machine_create(struct nvmm_owner *owner,
263 struct nvmm_ioc_machine_create *args)
264 {
265 struct nvmm_machine *mach;
266 int error;
267
268 error = nvmm_machine_alloc(&mach);
269 if (error)
270 return error;
271
272 /* Curproc owns the machine. */
273 mach->owner = owner;
274
275 /* Zero out the host mappings. */
276 memset(&mach->hmap, 0, sizeof(mach->hmap));
277
278 /* Create the machine vmspace. */
279 mach->gpa_begin = 0;
280 mach->gpa_end = NVMM_MAX_RAM;
281 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
282
283 /* Create the comm uobj. */
284 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
285
286 (*nvmm_impl->machine_create)(mach);
287
288 args->machid = mach->machid;
289 nvmm_machine_put(mach);
290
291 return 0;
292 }
293
294 static int
295 nvmm_machine_destroy(struct nvmm_owner *owner,
296 struct nvmm_ioc_machine_destroy *args)
297 {
298 struct nvmm_machine *mach;
299 struct nvmm_cpu *vcpu;
300 int error;
301 size_t i;
302
303 error = nvmm_machine_get(owner, args->machid, &mach, true);
304 if (error)
305 return error;
306
307 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
308 error = nvmm_vcpu_get(mach, i, &vcpu);
309 if (error)
310 continue;
311
312 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
313 nvmm_vcpu_free(mach, vcpu);
314 nvmm_vcpu_put(vcpu);
315 }
316
317 (*nvmm_impl->machine_destroy)(mach);
318
319 /* Free the machine vmspace. */
320 uvmspace_free(mach->vm);
321
322 /* Drop the kernel UOBJ refs. */
323 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
324 if (!mach->hmap[i].present)
325 continue;
326 uao_detach(mach->hmap[i].uobj);
327 }
328
329 nvmm_machine_free(mach);
330 nvmm_machine_put(mach);
331
332 return 0;
333 }
334
335 static int
336 nvmm_machine_configure(struct nvmm_owner *owner,
337 struct nvmm_ioc_machine_configure *args)
338 {
339 struct nvmm_machine *mach;
340 size_t allocsz;
341 uint64_t op;
342 void *data;
343 int error;
344
345 op = NVMM_MACH_CONF_MD(args->op);
346 if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
347 return EINVAL;
348 }
349
350 allocsz = nvmm_impl->mach_conf_sizes[op];
351 data = kmem_alloc(allocsz, KM_SLEEP);
352
353 error = nvmm_machine_get(owner, args->machid, &mach, true);
354 if (error) {
355 kmem_free(data, allocsz);
356 return error;
357 }
358
359 error = copyin(args->conf, data, allocsz);
360 if (error) {
361 goto out;
362 }
363
364 error = (*nvmm_impl->machine_configure)(mach, op, data);
365
366 out:
367 nvmm_machine_put(mach);
368 kmem_free(data, allocsz);
369 return error;
370 }
371
372 static int
373 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
374 {
375 struct nvmm_machine *mach;
376 struct nvmm_cpu *vcpu;
377 int error;
378
379 error = nvmm_machine_get(owner, args->machid, &mach, false);
380 if (error)
381 return error;
382
383 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
384 if (error)
385 goto out;
386
387 /* Allocate the comm page. */
388 uao_reference(mach->commuobj);
389 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
390 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
391 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
392 if (error) {
393 uao_detach(mach->commuobj);
394 nvmm_vcpu_free(mach, vcpu);
395 nvmm_vcpu_put(vcpu);
396 goto out;
397 }
398 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
399 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
400 if (error) {
401 nvmm_vcpu_free(mach, vcpu);
402 nvmm_vcpu_put(vcpu);
403 goto out;
404 }
405 memset(vcpu->comm, 0, PAGE_SIZE);
406
407 error = (*nvmm_impl->vcpu_create)(mach, vcpu);
408 if (error) {
409 nvmm_vcpu_free(mach, vcpu);
410 nvmm_vcpu_put(vcpu);
411 goto out;
412 }
413
414 nvmm_vcpu_put(vcpu);
415
416 atomic_inc_uint(&mach->ncpus);
417
418 out:
419 nvmm_machine_put(mach);
420 return error;
421 }
422
423 static int
424 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
425 {
426 struct nvmm_machine *mach;
427 struct nvmm_cpu *vcpu;
428 int error;
429
430 error = nvmm_machine_get(owner, args->machid, &mach, false);
431 if (error)
432 return error;
433
434 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
435 if (error)
436 goto out;
437
438 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
439 nvmm_vcpu_free(mach, vcpu);
440 nvmm_vcpu_put(vcpu);
441
442 atomic_dec_uint(&mach->ncpus);
443
444 out:
445 nvmm_machine_put(mach);
446 return error;
447 }
448
449 static int
450 nvmm_vcpu_configure(struct nvmm_owner *owner,
451 struct nvmm_ioc_vcpu_configure *args)
452 {
453 struct nvmm_machine *mach;
454 struct nvmm_cpu *vcpu;
455 size_t allocsz;
456 uint64_t op;
457 void *data;
458 int error;
459
460 op = NVMM_VCPU_CONF_MD(args->op);
461 if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
462 return EINVAL;
463
464 allocsz = nvmm_impl->vcpu_conf_sizes[op];
465 data = kmem_alloc(allocsz, KM_SLEEP);
466
467 error = nvmm_machine_get(owner, args->machid, &mach, false);
468 if (error) {
469 kmem_free(data, allocsz);
470 return error;
471 }
472
473 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
474 if (error) {
475 nvmm_machine_put(mach);
476 kmem_free(data, allocsz);
477 return error;
478 }
479
480 error = copyin(args->conf, data, allocsz);
481 if (error) {
482 goto out;
483 }
484
485 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
486
487 out:
488 nvmm_vcpu_put(vcpu);
489 nvmm_machine_put(mach);
490 kmem_free(data, allocsz);
491 return error;
492 }
493
494 static int
495 nvmm_vcpu_setstate(struct nvmm_owner *owner,
496 struct nvmm_ioc_vcpu_setstate *args)
497 {
498 struct nvmm_machine *mach;
499 struct nvmm_cpu *vcpu;
500 int error;
501
502 error = nvmm_machine_get(owner, args->machid, &mach, false);
503 if (error)
504 return error;
505
506 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
507 if (error)
508 goto out;
509
510 (*nvmm_impl->vcpu_setstate)(vcpu);
511 nvmm_vcpu_put(vcpu);
512
513 out:
514 nvmm_machine_put(mach);
515 return error;
516 }
517
518 static int
519 nvmm_vcpu_getstate(struct nvmm_owner *owner,
520 struct nvmm_ioc_vcpu_getstate *args)
521 {
522 struct nvmm_machine *mach;
523 struct nvmm_cpu *vcpu;
524 int error;
525
526 error = nvmm_machine_get(owner, args->machid, &mach, false);
527 if (error)
528 return error;
529
530 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
531 if (error)
532 goto out;
533
534 (*nvmm_impl->vcpu_getstate)(vcpu);
535 nvmm_vcpu_put(vcpu);
536
537 out:
538 nvmm_machine_put(mach);
539 return error;
540 }
541
542 static int
543 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
544 {
545 struct nvmm_machine *mach;
546 struct nvmm_cpu *vcpu;
547 int error;
548
549 error = nvmm_machine_get(owner, args->machid, &mach, false);
550 if (error)
551 return error;
552
553 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
554 if (error)
555 goto out;
556
557 error = (*nvmm_impl->vcpu_inject)(vcpu);
558 nvmm_vcpu_put(vcpu);
559
560 out:
561 nvmm_machine_put(mach);
562 return error;
563 }
564
565 static int
566 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
567 struct nvmm_vcpu_exit *exit)
568 {
569 struct vmspace *vm = mach->vm;
570 int ret;
571
572 while (1) {
573 /* Got a signal? Or pending resched? Leave. */
574 if (__predict_false(nvmm_return_needed())) {
575 exit->reason = NVMM_VCPU_EXIT_NONE;
576 return 0;
577 }
578
579 /* Run the VCPU. */
580 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
581 if (__predict_false(ret != 0)) {
582 return ret;
583 }
584
585 /* Process nested page faults. */
586 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
587 break;
588 }
589 if (exit->u.mem.gpa >= mach->gpa_end) {
590 break;
591 }
592 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
593 break;
594 }
595 }
596
597 return 0;
598 }
599
600 static int
601 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
602 {
603 struct nvmm_machine *mach;
604 struct nvmm_cpu *vcpu;
605 int error;
606
607 error = nvmm_machine_get(owner, args->machid, &mach, false);
608 if (error)
609 return error;
610
611 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
612 if (error)
613 goto out;
614
615 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
616 nvmm_vcpu_put(vcpu);
617
618 out:
619 nvmm_machine_put(mach);
620 return error;
621 }
622
623 /* -------------------------------------------------------------------------- */
624
625 static struct uvm_object *
626 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
627 size_t *off)
628 {
629 struct nvmm_hmapping *hmapping;
630 size_t i;
631
632 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
633 hmapping = &mach->hmap[i];
634 if (!hmapping->present) {
635 continue;
636 }
637 if (hva >= hmapping->hva &&
638 hva + size <= hmapping->hva + hmapping->size) {
639 *off = hva - hmapping->hva;
640 return hmapping->uobj;
641 }
642 }
643
644 return NULL;
645 }
646
647 static int
648 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
649 {
650 struct nvmm_hmapping *hmapping;
651 size_t i;
652
653 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
654 return EINVAL;
655 }
656 if (hva == 0) {
657 return EINVAL;
658 }
659
660 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
661 hmapping = &mach->hmap[i];
662 if (!hmapping->present) {
663 continue;
664 }
665
666 if (hva >= hmapping->hva &&
667 hva + size <= hmapping->hva + hmapping->size) {
668 break;
669 }
670
671 if (hva >= hmapping->hva &&
672 hva < hmapping->hva + hmapping->size) {
673 return EEXIST;
674 }
675 if (hva + size > hmapping->hva &&
676 hva + size <= hmapping->hva + hmapping->size) {
677 return EEXIST;
678 }
679 if (hva <= hmapping->hva &&
680 hva + size >= hmapping->hva + hmapping->size) {
681 return EEXIST;
682 }
683 }
684
685 return 0;
686 }
687
688 static struct nvmm_hmapping *
689 nvmm_hmapping_alloc(struct nvmm_machine *mach)
690 {
691 struct nvmm_hmapping *hmapping;
692 size_t i;
693
694 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
695 hmapping = &mach->hmap[i];
696 if (!hmapping->present) {
697 hmapping->present = true;
698 return hmapping;
699 }
700 }
701
702 return NULL;
703 }
704
705 static int
706 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
707 {
708 struct vmspace *vmspace = curproc->p_vmspace;
709 struct nvmm_hmapping *hmapping;
710 size_t i;
711
712 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
713 hmapping = &mach->hmap[i];
714 if (!hmapping->present || hmapping->hva != hva ||
715 hmapping->size != size) {
716 continue;
717 }
718
719 uvm_unmap(&vmspace->vm_map, hmapping->hva,
720 hmapping->hva + hmapping->size);
721 uao_detach(hmapping->uobj);
722
723 hmapping->uobj = NULL;
724 hmapping->present = false;
725
726 return 0;
727 }
728
729 return ENOENT;
730 }
731
732 static int
733 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
734 {
735 struct vmspace *vmspace = curproc->p_vmspace;
736 struct nvmm_machine *mach;
737 struct nvmm_hmapping *hmapping;
738 vaddr_t uva;
739 int error;
740
741 error = nvmm_machine_get(owner, args->machid, &mach, true);
742 if (error)
743 return error;
744
745 error = nvmm_hmapping_validate(mach, args->hva, args->size);
746 if (error)
747 goto out;
748
749 hmapping = nvmm_hmapping_alloc(mach);
750 if (hmapping == NULL) {
751 error = ENOBUFS;
752 goto out;
753 }
754
755 hmapping->hva = args->hva;
756 hmapping->size = args->size;
757 hmapping->uobj = uao_create(hmapping->size, 0);
758 uva = hmapping->hva;
759
760 /* Take a reference for the user. */
761 uao_reference(hmapping->uobj);
762
763 /* Map the uobj into the user address space, as pageable. */
764 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
765 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
766 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
767 if (error) {
768 uao_detach(hmapping->uobj);
769 }
770
771 out:
772 nvmm_machine_put(mach);
773 return error;
774 }
775
776 static int
777 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
778 {
779 struct nvmm_machine *mach;
780 int error;
781
782 error = nvmm_machine_get(owner, args->machid, &mach, true);
783 if (error)
784 return error;
785
786 error = nvmm_hmapping_free(mach, args->hva, args->size);
787
788 nvmm_machine_put(mach);
789 return error;
790 }
791
792 /* -------------------------------------------------------------------------- */
793
794 static int
795 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
796 {
797 struct nvmm_machine *mach;
798 struct uvm_object *uobj;
799 gpaddr_t gpa;
800 size_t off;
801 int error;
802
803 error = nvmm_machine_get(owner, args->machid, &mach, false);
804 if (error)
805 return error;
806
807 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
808 error = EINVAL;
809 goto out;
810 }
811
812 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
813 (args->hva % PAGE_SIZE) != 0) {
814 error = EINVAL;
815 goto out;
816 }
817 if (args->hva == 0) {
818 error = EINVAL;
819 goto out;
820 }
821 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
822 error = EINVAL;
823 goto out;
824 }
825 if (args->gpa + args->size <= args->gpa) {
826 error = EINVAL;
827 goto out;
828 }
829 if (args->gpa + args->size > mach->gpa_end) {
830 error = EINVAL;
831 goto out;
832 }
833 gpa = args->gpa;
834
835 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
836 if (uobj == NULL) {
837 error = EINVAL;
838 goto out;
839 }
840
841 /* Take a reference for the machine. */
842 uao_reference(uobj);
843
844 /* Map the uobj into the machine address space, as pageable. */
845 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
846 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
847 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
848 if (error) {
849 uao_detach(uobj);
850 goto out;
851 }
852 if (gpa != args->gpa) {
853 uao_detach(uobj);
854 printf("[!] uvm_map problem\n");
855 error = EINVAL;
856 goto out;
857 }
858
859 out:
860 nvmm_machine_put(mach);
861 return error;
862 }
863
864 static int
865 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
866 {
867 struct nvmm_machine *mach;
868 gpaddr_t gpa;
869 int error;
870
871 error = nvmm_machine_get(owner, args->machid, &mach, false);
872 if (error)
873 return error;
874
875 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
876 error = EINVAL;
877 goto out;
878 }
879 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
880 error = EINVAL;
881 goto out;
882 }
883 if (args->gpa + args->size <= args->gpa) {
884 error = EINVAL;
885 goto out;
886 }
887 if (args->gpa + args->size >= mach->gpa_end) {
888 error = EINVAL;
889 goto out;
890 }
891 gpa = args->gpa;
892
893 /* Unmap the memory from the machine. */
894 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
895
896 out:
897 nvmm_machine_put(mach);
898 return error;
899 }
900
901 /* -------------------------------------------------------------------------- */
902
903 static int
904 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
905 {
906 struct nvmm_ctl_mach_info ctl;
907 struct nvmm_machine *mach;
908 struct nvmm_cpu *vcpu;
909 int error;
910 size_t i;
911
912 if (args->size != sizeof(ctl))
913 return EINVAL;
914 error = copyin(args->data, &ctl, sizeof(ctl));
915 if (error)
916 return error;
917
918 error = nvmm_machine_get(owner, ctl.machid, &mach, true);
919 if (error)
920 return error;
921
922 ctl.nvcpus = 0;
923 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
924 error = nvmm_vcpu_get(mach, i, &vcpu);
925 if (error)
926 continue;
927 ctl.nvcpus++;
928 nvmm_vcpu_put(vcpu);
929 }
930
931 ctl.nram = 0;
932 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
933 if (!mach->hmap[i].present)
934 continue;
935 ctl.nram += mach->hmap[i].size;
936 }
937
938 ctl.pid = mach->owner->pid;
939 ctl.time = mach->time;
940
941 nvmm_machine_put(mach);
942
943 error = copyout(&ctl, args->data, sizeof(ctl));
944 if (error)
945 return error;
946
947 return 0;
948 }
949
950 static int
951 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
952 {
953 switch (args->op) {
954 case NVMM_CTL_MACH_INFO:
955 return nvmm_ctl_mach_info(owner, args);
956 default:
957 return EINVAL;
958 }
959 }
960
961 /* -------------------------------------------------------------------------- */
962
963 static const struct nvmm_impl *
964 nvmm_ident(void)
965 {
966 size_t i;
967
968 for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
969 if ((*nvmm_impl_list[i]->ident)())
970 return nvmm_impl_list[i];
971 }
972
973 return NULL;
974 }
975
976 static int
977 nvmm_init(void)
978 {
979 size_t i, n;
980
981 nvmm_impl = nvmm_ident();
982 if (nvmm_impl == NULL)
983 return ENOTSUP;
984
985 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
986 machines[i].machid = i;
987 rw_init(&machines[i].lock);
988 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
989 machines[i].cpus[n].present = false;
990 machines[i].cpus[n].cpuid = n;
991 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
992 IPL_NONE);
993 }
994 }
995
996 (*nvmm_impl->init)();
997
998 return 0;
999 }
1000
1001 static void
1002 nvmm_fini(void)
1003 {
1004 size_t i, n;
1005
1006 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1007 rw_destroy(&machines[i].lock);
1008 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1009 mutex_destroy(&machines[i].cpus[n].lock);
1010 }
1011 }
1012
1013 (*nvmm_impl->fini)();
1014 nvmm_impl = NULL;
1015 }
1016
1017 /* -------------------------------------------------------------------------- */
1018
1019 static dev_type_open(nvmm_open);
1020
1021 const struct cdevsw nvmm_cdevsw = {
1022 .d_open = nvmm_open,
1023 .d_close = noclose,
1024 .d_read = noread,
1025 .d_write = nowrite,
1026 .d_ioctl = noioctl,
1027 .d_stop = nostop,
1028 .d_tty = notty,
1029 .d_poll = nopoll,
1030 .d_mmap = nommap,
1031 .d_kqfilter = nokqfilter,
1032 .d_discard = nodiscard,
1033 .d_flag = D_OTHER | D_MPSAFE
1034 };
1035
1036 static int nvmm_ioctl(file_t *, u_long, void *);
1037 static int nvmm_close(file_t *);
1038 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1039 struct uvm_object **, int *);
1040
1041 const struct fileops nvmm_fileops = {
1042 .fo_read = fbadop_read,
1043 .fo_write = fbadop_write,
1044 .fo_ioctl = nvmm_ioctl,
1045 .fo_fcntl = fnullop_fcntl,
1046 .fo_poll = fnullop_poll,
1047 .fo_stat = fbadop_stat,
1048 .fo_close = nvmm_close,
1049 .fo_kqfilter = fnullop_kqfilter,
1050 .fo_restart = fnullop_restart,
1051 .fo_mmap = nvmm_mmap,
1052 };
1053
1054 static int
1055 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1056 {
1057 struct nvmm_owner *owner;
1058 struct file *fp;
1059 int error, fd;
1060
1061 if (__predict_false(nvmm_impl == NULL))
1062 return ENXIO;
1063 if (minor(dev) != 0)
1064 return EXDEV;
1065 if (!(flags & O_CLOEXEC))
1066 return EINVAL;
1067 error = fd_allocfile(&fp, &fd);
1068 if (error)
1069 return error;
1070
1071 if (OFLAGS(flags) & O_WRONLY) {
1072 owner = &root_owner;
1073 } else {
1074 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1075 owner->pid = l->l_proc->p_pid;
1076 }
1077
1078 return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1079 }
1080
1081 static int
1082 nvmm_close(file_t *fp)
1083 {
1084 struct nvmm_owner *owner = fp->f_data;
1085
1086 KASSERT(owner != NULL);
1087 nvmm_kill_machines(owner);
1088 if (owner != &root_owner) {
1089 kmem_free(owner, sizeof(*owner));
1090 }
1091 fp->f_data = NULL;
1092
1093 return 0;
1094 }
1095
1096 static int
1097 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1098 int *advicep, struct uvm_object **uobjp, int *maxprotp)
1099 {
1100 struct nvmm_owner *owner = fp->f_data;
1101 struct nvmm_machine *mach;
1102 nvmm_machid_t machid;
1103 nvmm_cpuid_t cpuid;
1104 int error;
1105
1106 if (prot & PROT_EXEC)
1107 return EACCES;
1108 if (size != PAGE_SIZE)
1109 return EINVAL;
1110
1111 cpuid = NVMM_COMM_CPUID(*offp);
1112 if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1113 return EINVAL;
1114
1115 machid = NVMM_COMM_MACHID(*offp);
1116 error = nvmm_machine_get(owner, machid, &mach, false);
1117 if (error)
1118 return error;
1119
1120 uao_reference(mach->commuobj);
1121 *uobjp = mach->commuobj;
1122 *offp = cpuid * PAGE_SIZE;
1123 *maxprotp = prot;
1124 *advicep = UVM_ADV_RANDOM;
1125
1126 nvmm_machine_put(mach);
1127 return 0;
1128 }
1129
1130 static int
1131 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
1132 {
1133 struct nvmm_owner *owner = fp->f_data;
1134
1135 KASSERT(owner != NULL);
1136
1137 switch (cmd) {
1138 case NVMM_IOC_CAPABILITY:
1139 return nvmm_capability(owner, data);
1140 case NVMM_IOC_MACHINE_CREATE:
1141 return nvmm_machine_create(owner, data);
1142 case NVMM_IOC_MACHINE_DESTROY:
1143 return nvmm_machine_destroy(owner, data);
1144 case NVMM_IOC_MACHINE_CONFIGURE:
1145 return nvmm_machine_configure(owner, data);
1146 case NVMM_IOC_VCPU_CREATE:
1147 return nvmm_vcpu_create(owner, data);
1148 case NVMM_IOC_VCPU_DESTROY:
1149 return nvmm_vcpu_destroy(owner, data);
1150 case NVMM_IOC_VCPU_CONFIGURE:
1151 return nvmm_vcpu_configure(owner, data);
1152 case NVMM_IOC_VCPU_SETSTATE:
1153 return nvmm_vcpu_setstate(owner, data);
1154 case NVMM_IOC_VCPU_GETSTATE:
1155 return nvmm_vcpu_getstate(owner, data);
1156 case NVMM_IOC_VCPU_INJECT:
1157 return nvmm_vcpu_inject(owner, data);
1158 case NVMM_IOC_VCPU_RUN:
1159 return nvmm_vcpu_run(owner, data);
1160 case NVMM_IOC_GPA_MAP:
1161 return nvmm_gpa_map(owner, data);
1162 case NVMM_IOC_GPA_UNMAP:
1163 return nvmm_gpa_unmap(owner, data);
1164 case NVMM_IOC_HVA_MAP:
1165 return nvmm_hva_map(owner, data);
1166 case NVMM_IOC_HVA_UNMAP:
1167 return nvmm_hva_unmap(owner, data);
1168 case NVMM_IOC_CTL:
1169 return nvmm_ctl(owner, data);
1170 default:
1171 return EINVAL;
1172 }
1173 }
1174
1175 /* -------------------------------------------------------------------------- */
1176
1177 static int nvmm_match(device_t, cfdata_t, void *);
1178 static void nvmm_attach(device_t, device_t, void *);
1179 static int nvmm_detach(device_t, int);
1180
1181 extern struct cfdriver nvmm_cd;
1182
1183 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
1184
1185 static struct cfdata nvmm_cfdata[] = {
1186 {
1187 .cf_name = "nvmm",
1188 .cf_atname = "nvmm",
1189 .cf_unit = 0,
1190 .cf_fstate = FSTATE_STAR,
1191 .cf_loc = NULL,
1192 .cf_flags = 0,
1193 .cf_pspec = NULL,
1194 },
1195 { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
1196 };
1197
1198 static int
1199 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
1200 {
1201 return 1;
1202 }
1203
1204 static void
1205 nvmm_attach(device_t parent, device_t self, void *aux)
1206 {
1207 int error;
1208
1209 error = nvmm_init();
1210 if (error)
1211 panic("%s: impossible", __func__);
1212 aprint_normal_dev(self, "attached, using backend %s\n",
1213 nvmm_impl->name);
1214 }
1215
1216 static int
1217 nvmm_detach(device_t self, int flags)
1218 {
1219 if (nmachines > 0)
1220 return EBUSY;
1221 nvmm_fini();
1222 return 0;
1223 }
1224
1225 void
1226 nvmmattach(int nunits)
1227 {
1228 /* nothing */
1229 }
1230
1231 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1232
1233 #if defined(_MODULE)
1234 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
1235 #endif
1236
1237 static int
1238 nvmm_modcmd(modcmd_t cmd, void *arg)
1239 {
1240 #if defined(_MODULE)
1241 devmajor_t bmajor = NODEVMAJOR;
1242 devmajor_t cmajor = 345;
1243 #endif
1244 int error;
1245
1246 switch (cmd) {
1247 case MODULE_CMD_INIT:
1248 if (nvmm_ident() == NULL) {
1249 aprint_error("%s: cpu not supported\n",
1250 nvmm_cd.cd_name);
1251 return ENOTSUP;
1252 }
1253 #if defined(_MODULE)
1254 error = config_cfdriver_attach(&nvmm_cd);
1255 if (error)
1256 return error;
1257 #endif
1258 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
1259 if (error) {
1260 config_cfdriver_detach(&nvmm_cd);
1261 aprint_error("%s: config_cfattach_attach failed\n",
1262 nvmm_cd.cd_name);
1263 return error;
1264 }
1265
1266 error = config_cfdata_attach(nvmm_cfdata, 1);
1267 if (error) {
1268 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1269 config_cfdriver_detach(&nvmm_cd);
1270 aprint_error("%s: unable to register cfdata\n",
1271 nvmm_cd.cd_name);
1272 return error;
1273 }
1274
1275 if (config_attach_pseudo(nvmm_cfdata) == NULL) {
1276 aprint_error("%s: config_attach_pseudo failed\n",
1277 nvmm_cd.cd_name);
1278 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1279 config_cfdriver_detach(&nvmm_cd);
1280 return ENXIO;
1281 }
1282
1283 #if defined(_MODULE)
1284 /* mknod /dev/nvmm c 345 0 */
1285 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
1286 &nvmm_cdevsw, &cmajor);
1287 if (error) {
1288 aprint_error("%s: unable to register devsw\n",
1289 nvmm_cd.cd_name);
1290 config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1291 config_cfdriver_detach(&nvmm_cd);
1292 return error;
1293 }
1294 #endif
1295 return 0;
1296 case MODULE_CMD_FINI:
1297 error = config_cfdata_detach(nvmm_cfdata);
1298 if (error)
1299 return error;
1300 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1301 if (error)
1302 return error;
1303 #if defined(_MODULE)
1304 config_cfdriver_detach(&nvmm_cd);
1305 devsw_detach(NULL, &nvmm_cdevsw);
1306 #endif
1307 return 0;
1308 case MODULE_CMD_AUTOUNLOAD:
1309 return EBUSY;
1310 default:
1311 return ENOTTY;
1312 }
1313 }
1314