nvmm.c revision 1.23 1 /* $NetBSD: nvmm.c,v 1.23 2019/10/23 07:01:11 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.23 2019/10/23 07:01:11 maxv Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38
39 #include <sys/cpu.h>
40 #include <sys/conf.h>
41 #include <sys/kmem.h>
42 #include <sys/module.h>
43 #include <sys/proc.h>
44 #include <sys/mman.h>
45 #include <sys/file.h>
46 #include <sys/filedesc.h>
47 #include <sys/kauth.h>
48
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_page.h>
51
52 #include "ioconf.h"
53
54 #include <dev/nvmm/nvmm.h>
55 #include <dev/nvmm/nvmm_internal.h>
56 #include <dev/nvmm/nvmm_ioctl.h>
57
58 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
59 static volatile unsigned int nmachines __cacheline_aligned;
60
61 static const struct nvmm_impl *nvmm_impl_list[] = {
62 &nvmm_x86_svm, /* x86 AMD SVM */
63 &nvmm_x86_vmx /* x86 Intel VMX */
64 };
65
66 static const struct nvmm_impl *nvmm_impl = NULL;
67
68 static struct nvmm_owner root_owner;
69
70 /* -------------------------------------------------------------------------- */
71
72 static int
73 nvmm_machine_alloc(struct nvmm_machine **ret)
74 {
75 struct nvmm_machine *mach;
76 size_t i;
77
78 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
79 mach = &machines[i];
80
81 rw_enter(&mach->lock, RW_WRITER);
82 if (mach->present) {
83 rw_exit(&mach->lock);
84 continue;
85 }
86
87 mach->present = true;
88 mach->time = time_second;
89 *ret = mach;
90 atomic_inc_uint(&nmachines);
91 return 0;
92 }
93
94 return ENOBUFS;
95 }
96
97 static void
98 nvmm_machine_free(struct nvmm_machine *mach)
99 {
100 KASSERT(rw_write_held(&mach->lock));
101 KASSERT(mach->present);
102 mach->present = false;
103 atomic_dec_uint(&nmachines);
104 }
105
106 static int
107 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
108 struct nvmm_machine **ret, bool writer)
109 {
110 struct nvmm_machine *mach;
111 krw_t op = writer ? RW_WRITER : RW_READER;
112
113 if (machid >= NVMM_MAX_MACHINES) {
114 return EINVAL;
115 }
116 mach = &machines[machid];
117
118 rw_enter(&mach->lock, op);
119 if (!mach->present) {
120 rw_exit(&mach->lock);
121 return ENOENT;
122 }
123 if (owner != &root_owner && mach->owner != owner) {
124 rw_exit(&mach->lock);
125 return EPERM;
126 }
127 *ret = mach;
128
129 return 0;
130 }
131
132 static void
133 nvmm_machine_put(struct nvmm_machine *mach)
134 {
135 rw_exit(&mach->lock);
136 }
137
138 /* -------------------------------------------------------------------------- */
139
140 static int
141 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
142 struct nvmm_cpu **ret)
143 {
144 struct nvmm_cpu *vcpu;
145
146 if (cpuid >= NVMM_MAX_VCPUS) {
147 return EINVAL;
148 }
149 vcpu = &mach->cpus[cpuid];
150
151 mutex_enter(&vcpu->lock);
152 if (vcpu->present) {
153 mutex_exit(&vcpu->lock);
154 return EBUSY;
155 }
156
157 vcpu->present = true;
158 vcpu->comm = NULL;
159 vcpu->hcpu_last = -1;
160 *ret = vcpu;
161 return 0;
162 }
163
164 static void
165 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
166 {
167 KASSERT(mutex_owned(&vcpu->lock));
168 vcpu->present = false;
169 if (vcpu->comm != NULL) {
170 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
171 }
172 }
173
174 static int
175 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
176 struct nvmm_cpu **ret)
177 {
178 struct nvmm_cpu *vcpu;
179
180 if (cpuid >= NVMM_MAX_VCPUS) {
181 return EINVAL;
182 }
183 vcpu = &mach->cpus[cpuid];
184
185 mutex_enter(&vcpu->lock);
186 if (!vcpu->present) {
187 mutex_exit(&vcpu->lock);
188 return ENOENT;
189 }
190 *ret = vcpu;
191
192 return 0;
193 }
194
195 static void
196 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
197 {
198 mutex_exit(&vcpu->lock);
199 }
200
201 /* -------------------------------------------------------------------------- */
202
203 static void
204 nvmm_kill_machines(struct nvmm_owner *owner)
205 {
206 struct nvmm_machine *mach;
207 struct nvmm_cpu *vcpu;
208 size_t i, j;
209 int error;
210
211 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
212 mach = &machines[i];
213
214 rw_enter(&mach->lock, RW_WRITER);
215 if (!mach->present || mach->owner != owner) {
216 rw_exit(&mach->lock);
217 continue;
218 }
219
220 /* Kill it. */
221 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
222 error = nvmm_vcpu_get(mach, j, &vcpu);
223 if (error)
224 continue;
225 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
226 nvmm_vcpu_free(mach, vcpu);
227 nvmm_vcpu_put(vcpu);
228 }
229 (*nvmm_impl->machine_destroy)(mach);
230 uvmspace_free(mach->vm);
231
232 /* Drop the kernel UOBJ refs. */
233 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
234 if (!mach->hmap[j].present)
235 continue;
236 uao_detach(mach->hmap[j].uobj);
237 }
238
239 nvmm_machine_free(mach);
240
241 rw_exit(&mach->lock);
242 }
243 }
244
245 /* -------------------------------------------------------------------------- */
246
247 static int
248 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
249 {
250 args->cap.version = NVMM_KERN_VERSION;
251 args->cap.state_size = nvmm_impl->state_size;
252 args->cap.max_machines = NVMM_MAX_MACHINES;
253 args->cap.max_vcpus = NVMM_MAX_VCPUS;
254 args->cap.max_ram = NVMM_MAX_RAM;
255
256 (*nvmm_impl->capability)(&args->cap);
257
258 return 0;
259 }
260
261 static int
262 nvmm_machine_create(struct nvmm_owner *owner,
263 struct nvmm_ioc_machine_create *args)
264 {
265 struct nvmm_machine *mach;
266 int error;
267
268 error = nvmm_machine_alloc(&mach);
269 if (error)
270 return error;
271
272 /* Curproc owns the machine. */
273 mach->owner = owner;
274
275 /* Zero out the host mappings. */
276 memset(&mach->hmap, 0, sizeof(mach->hmap));
277
278 /* Create the machine vmspace. */
279 mach->gpa_begin = 0;
280 mach->gpa_end = NVMM_MAX_RAM;
281 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
282
283 /* Create the comm uobj. */
284 mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
285
286 (*nvmm_impl->machine_create)(mach);
287
288 args->machid = mach->machid;
289 nvmm_machine_put(mach);
290
291 return 0;
292 }
293
294 static int
295 nvmm_machine_destroy(struct nvmm_owner *owner,
296 struct nvmm_ioc_machine_destroy *args)
297 {
298 struct nvmm_machine *mach;
299 struct nvmm_cpu *vcpu;
300 int error;
301 size_t i;
302
303 error = nvmm_machine_get(owner, args->machid, &mach, true);
304 if (error)
305 return error;
306
307 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
308 error = nvmm_vcpu_get(mach, i, &vcpu);
309 if (error)
310 continue;
311
312 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
313 nvmm_vcpu_free(mach, vcpu);
314 nvmm_vcpu_put(vcpu);
315 }
316
317 (*nvmm_impl->machine_destroy)(mach);
318
319 /* Free the machine vmspace. */
320 uvmspace_free(mach->vm);
321
322 /* Drop the kernel UOBJ refs. */
323 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
324 if (!mach->hmap[i].present)
325 continue;
326 uao_detach(mach->hmap[i].uobj);
327 }
328
329 nvmm_machine_free(mach);
330 nvmm_machine_put(mach);
331
332 return 0;
333 }
334
335 static int
336 nvmm_machine_configure(struct nvmm_owner *owner,
337 struct nvmm_ioc_machine_configure *args)
338 {
339 struct nvmm_machine *mach;
340 size_t allocsz;
341 uint64_t op;
342 void *data;
343 int error;
344
345 op = NVMM_MACH_CONF_MD(args->op);
346 if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
347 return EINVAL;
348 }
349
350 allocsz = nvmm_impl->mach_conf_sizes[op];
351 data = kmem_alloc(allocsz, KM_SLEEP);
352
353 error = nvmm_machine_get(owner, args->machid, &mach, true);
354 if (error) {
355 kmem_free(data, allocsz);
356 return error;
357 }
358
359 error = copyin(args->conf, data, allocsz);
360 if (error) {
361 goto out;
362 }
363
364 error = (*nvmm_impl->machine_configure)(mach, op, data);
365
366 out:
367 nvmm_machine_put(mach);
368 kmem_free(data, allocsz);
369 return error;
370 }
371
372 static int
373 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
374 {
375 struct nvmm_machine *mach;
376 struct nvmm_cpu *vcpu;
377 int error;
378
379 error = nvmm_machine_get(owner, args->machid, &mach, false);
380 if (error)
381 return error;
382
383 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
384 if (error)
385 goto out;
386
387 /* Allocate the comm page. */
388 uao_reference(mach->commuobj);
389 error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
390 mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
391 UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
392 if (error) {
393 uao_detach(mach->commuobj);
394 nvmm_vcpu_free(mach, vcpu);
395 nvmm_vcpu_put(vcpu);
396 goto out;
397 }
398 error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
399 (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
400 if (error) {
401 nvmm_vcpu_free(mach, vcpu);
402 nvmm_vcpu_put(vcpu);
403 goto out;
404 }
405 memset(vcpu->comm, 0, PAGE_SIZE);
406
407 error = (*nvmm_impl->vcpu_create)(mach, vcpu);
408 if (error) {
409 nvmm_vcpu_free(mach, vcpu);
410 nvmm_vcpu_put(vcpu);
411 goto out;
412 }
413
414 nvmm_vcpu_put(vcpu);
415
416 out:
417 nvmm_machine_put(mach);
418 return error;
419 }
420
421 static int
422 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
423 {
424 struct nvmm_machine *mach;
425 struct nvmm_cpu *vcpu;
426 int error;
427
428 error = nvmm_machine_get(owner, args->machid, &mach, false);
429 if (error)
430 return error;
431
432 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
433 if (error)
434 goto out;
435
436 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
437 nvmm_vcpu_free(mach, vcpu);
438 nvmm_vcpu_put(vcpu);
439
440 out:
441 nvmm_machine_put(mach);
442 return error;
443 }
444
445 static int
446 nvmm_vcpu_configure(struct nvmm_owner *owner,
447 struct nvmm_ioc_vcpu_configure *args)
448 {
449 struct nvmm_machine *mach;
450 struct nvmm_cpu *vcpu;
451 size_t allocsz;
452 uint64_t op;
453 void *data;
454 int error;
455
456 op = NVMM_VCPU_CONF_MD(args->op);
457 if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
458 return EINVAL;
459
460 allocsz = nvmm_impl->vcpu_conf_sizes[op];
461 data = kmem_alloc(allocsz, KM_SLEEP);
462
463 error = nvmm_machine_get(owner, args->machid, &mach, false);
464 if (error) {
465 kmem_free(data, allocsz);
466 return error;
467 }
468
469 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
470 if (error) {
471 nvmm_machine_put(mach);
472 kmem_free(data, allocsz);
473 return error;
474 }
475
476 error = copyin(args->conf, data, allocsz);
477 if (error) {
478 goto out;
479 }
480
481 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
482
483 out:
484 nvmm_vcpu_put(vcpu);
485 nvmm_machine_put(mach);
486 kmem_free(data, allocsz);
487 return error;
488 }
489
490 static int
491 nvmm_vcpu_setstate(struct nvmm_owner *owner,
492 struct nvmm_ioc_vcpu_setstate *args)
493 {
494 struct nvmm_machine *mach;
495 struct nvmm_cpu *vcpu;
496 int error;
497
498 error = nvmm_machine_get(owner, args->machid, &mach, false);
499 if (error)
500 return error;
501
502 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
503 if (error)
504 goto out;
505
506 (*nvmm_impl->vcpu_setstate)(vcpu);
507 nvmm_vcpu_put(vcpu);
508
509 out:
510 nvmm_machine_put(mach);
511 return error;
512 }
513
514 static int
515 nvmm_vcpu_getstate(struct nvmm_owner *owner,
516 struct nvmm_ioc_vcpu_getstate *args)
517 {
518 struct nvmm_machine *mach;
519 struct nvmm_cpu *vcpu;
520 int error;
521
522 error = nvmm_machine_get(owner, args->machid, &mach, false);
523 if (error)
524 return error;
525
526 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
527 if (error)
528 goto out;
529
530 (*nvmm_impl->vcpu_getstate)(vcpu);
531 nvmm_vcpu_put(vcpu);
532
533 out:
534 nvmm_machine_put(mach);
535 return error;
536 }
537
538 static int
539 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
540 {
541 struct nvmm_machine *mach;
542 struct nvmm_cpu *vcpu;
543 int error;
544
545 error = nvmm_machine_get(owner, args->machid, &mach, false);
546 if (error)
547 return error;
548
549 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
550 if (error)
551 goto out;
552
553 error = (*nvmm_impl->vcpu_inject)(vcpu);
554 nvmm_vcpu_put(vcpu);
555
556 out:
557 nvmm_machine_put(mach);
558 return error;
559 }
560
561 static int
562 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
563 struct nvmm_vcpu_exit *exit)
564 {
565 struct vmspace *vm = mach->vm;
566 int ret;
567
568 while (1) {
569 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
570 if (__predict_false(ret != 0)) {
571 return ret;
572 }
573
574 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
575 break;
576 }
577 if (exit->u.mem.gpa >= mach->gpa_end) {
578 break;
579 }
580 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
581 break;
582 }
583 }
584
585 return 0;
586 }
587
588 static int
589 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
590 {
591 struct nvmm_machine *mach;
592 struct nvmm_cpu *vcpu;
593 int error;
594
595 error = nvmm_machine_get(owner, args->machid, &mach, false);
596 if (error)
597 return error;
598
599 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
600 if (error)
601 goto out;
602
603 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
604 nvmm_vcpu_put(vcpu);
605
606 out:
607 nvmm_machine_put(mach);
608 return error;
609 }
610
611 /* -------------------------------------------------------------------------- */
612
613 static struct uvm_object *
614 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
615 size_t *off)
616 {
617 struct nvmm_hmapping *hmapping;
618 size_t i;
619
620 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
621 hmapping = &mach->hmap[i];
622 if (!hmapping->present) {
623 continue;
624 }
625 if (hva >= hmapping->hva &&
626 hva + size <= hmapping->hva + hmapping->size) {
627 *off = hva - hmapping->hva;
628 return hmapping->uobj;
629 }
630 }
631
632 return NULL;
633 }
634
635 static int
636 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
637 {
638 struct nvmm_hmapping *hmapping;
639 size_t i;
640
641 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
642 return EINVAL;
643 }
644 if (hva == 0) {
645 return EINVAL;
646 }
647
648 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
649 hmapping = &mach->hmap[i];
650 if (!hmapping->present) {
651 continue;
652 }
653
654 if (hva >= hmapping->hva &&
655 hva + size <= hmapping->hva + hmapping->size) {
656 break;
657 }
658
659 if (hva >= hmapping->hva &&
660 hva < hmapping->hva + hmapping->size) {
661 return EEXIST;
662 }
663 if (hva + size > hmapping->hva &&
664 hva + size <= hmapping->hva + hmapping->size) {
665 return EEXIST;
666 }
667 if (hva <= hmapping->hva &&
668 hva + size >= hmapping->hva + hmapping->size) {
669 return EEXIST;
670 }
671 }
672
673 return 0;
674 }
675
676 static struct nvmm_hmapping *
677 nvmm_hmapping_alloc(struct nvmm_machine *mach)
678 {
679 struct nvmm_hmapping *hmapping;
680 size_t i;
681
682 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
683 hmapping = &mach->hmap[i];
684 if (!hmapping->present) {
685 hmapping->present = true;
686 return hmapping;
687 }
688 }
689
690 return NULL;
691 }
692
693 static int
694 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
695 {
696 struct vmspace *vmspace = curproc->p_vmspace;
697 struct nvmm_hmapping *hmapping;
698 size_t i;
699
700 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
701 hmapping = &mach->hmap[i];
702 if (!hmapping->present || hmapping->hva != hva ||
703 hmapping->size != size) {
704 continue;
705 }
706
707 uvm_unmap(&vmspace->vm_map, hmapping->hva,
708 hmapping->hva + hmapping->size);
709 uao_detach(hmapping->uobj);
710
711 hmapping->uobj = NULL;
712 hmapping->present = false;
713
714 return 0;
715 }
716
717 return ENOENT;
718 }
719
720 static int
721 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
722 {
723 struct vmspace *vmspace = curproc->p_vmspace;
724 struct nvmm_machine *mach;
725 struct nvmm_hmapping *hmapping;
726 vaddr_t uva;
727 int error;
728
729 error = nvmm_machine_get(owner, args->machid, &mach, true);
730 if (error)
731 return error;
732
733 error = nvmm_hmapping_validate(mach, args->hva, args->size);
734 if (error)
735 goto out;
736
737 hmapping = nvmm_hmapping_alloc(mach);
738 if (hmapping == NULL) {
739 error = ENOBUFS;
740 goto out;
741 }
742
743 hmapping->hva = args->hva;
744 hmapping->size = args->size;
745 hmapping->uobj = uao_create(hmapping->size, 0);
746 uva = hmapping->hva;
747
748 /* Take a reference for the user. */
749 uao_reference(hmapping->uobj);
750
751 /* Map the uobj into the user address space, as pageable. */
752 error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
753 0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
754 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
755 if (error) {
756 uao_detach(hmapping->uobj);
757 }
758
759 out:
760 nvmm_machine_put(mach);
761 return error;
762 }
763
764 static int
765 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
766 {
767 struct nvmm_machine *mach;
768 int error;
769
770 error = nvmm_machine_get(owner, args->machid, &mach, true);
771 if (error)
772 return error;
773
774 error = nvmm_hmapping_free(mach, args->hva, args->size);
775
776 nvmm_machine_put(mach);
777 return error;
778 }
779
780 /* -------------------------------------------------------------------------- */
781
782 static int
783 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
784 {
785 struct nvmm_machine *mach;
786 struct uvm_object *uobj;
787 gpaddr_t gpa;
788 size_t off;
789 int error;
790
791 error = nvmm_machine_get(owner, args->machid, &mach, false);
792 if (error)
793 return error;
794
795 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
796 error = EINVAL;
797 goto out;
798 }
799
800 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
801 (args->hva % PAGE_SIZE) != 0) {
802 error = EINVAL;
803 goto out;
804 }
805 if (args->hva == 0) {
806 error = EINVAL;
807 goto out;
808 }
809 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
810 error = EINVAL;
811 goto out;
812 }
813 if (args->gpa + args->size <= args->gpa) {
814 error = EINVAL;
815 goto out;
816 }
817 if (args->gpa + args->size > mach->gpa_end) {
818 error = EINVAL;
819 goto out;
820 }
821 gpa = args->gpa;
822
823 uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
824 if (uobj == NULL) {
825 error = EINVAL;
826 goto out;
827 }
828
829 /* Take a reference for the machine. */
830 uao_reference(uobj);
831
832 /* Map the uobj into the machine address space, as pageable. */
833 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
834 UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
835 UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
836 if (error) {
837 uao_detach(uobj);
838 goto out;
839 }
840 if (gpa != args->gpa) {
841 uao_detach(uobj);
842 printf("[!] uvm_map problem\n");
843 error = EINVAL;
844 goto out;
845 }
846
847 out:
848 nvmm_machine_put(mach);
849 return error;
850 }
851
852 static int
853 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
854 {
855 struct nvmm_machine *mach;
856 gpaddr_t gpa;
857 int error;
858
859 error = nvmm_machine_get(owner, args->machid, &mach, false);
860 if (error)
861 return error;
862
863 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
864 error = EINVAL;
865 goto out;
866 }
867 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
868 error = EINVAL;
869 goto out;
870 }
871 if (args->gpa + args->size <= args->gpa) {
872 error = EINVAL;
873 goto out;
874 }
875 if (args->gpa + args->size >= mach->gpa_end) {
876 error = EINVAL;
877 goto out;
878 }
879 gpa = args->gpa;
880
881 /* Unmap the memory from the machine. */
882 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
883
884 out:
885 nvmm_machine_put(mach);
886 return error;
887 }
888
889 /* -------------------------------------------------------------------------- */
890
891 static int
892 nvmm_ctl_mach_info(struct nvmm_ioc_ctl *args)
893 {
894 struct nvmm_ctl_mach_info ctl;
895 struct nvmm_machine *mach;
896 struct nvmm_cpu *vcpu;
897 int error;
898 size_t i;
899
900 if (args->size != sizeof(ctl))
901 return EINVAL;
902 error = copyin(args->data, &ctl, sizeof(ctl));
903 if (error)
904 return error;
905
906 error = nvmm_machine_get(&root_owner, ctl.machid, &mach, true);
907 if (error)
908 return error;
909
910 ctl.nvcpus = 0;
911 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
912 error = nvmm_vcpu_get(mach, i, &vcpu);
913 if (error)
914 continue;
915 ctl.nvcpus++;
916 nvmm_vcpu_put(vcpu);
917 }
918 ctl.pid = mach->owner->pid;
919 ctl.time = mach->time;
920
921 nvmm_machine_put(mach);
922
923 error = copyout(&ctl, args->data, sizeof(ctl));
924 if (error)
925 return error;
926
927 return 0;
928 }
929
930 static int
931 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
932 {
933 int error;
934
935 error = kauth_authorize_device(curlwp->l_cred, KAUTH_DEVICE_NVMM_CTL,
936 NULL, NULL, NULL, NULL);
937 if (error)
938 return error;
939
940 switch (args->op) {
941 case NVMM_CTL_MACH_INFO:
942 return nvmm_ctl_mach_info(args);
943 default:
944 return EINVAL;
945 }
946 }
947
948 /* -------------------------------------------------------------------------- */
949
950 static int
951 nvmm_init(void)
952 {
953 size_t i, n;
954
955 for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
956 if (!(*nvmm_impl_list[i]->ident)()) {
957 continue;
958 }
959 nvmm_impl = nvmm_impl_list[i];
960 break;
961 }
962 if (nvmm_impl == NULL) {
963 printf("[!] No implementation found\n");
964 return ENOTSUP;
965 }
966
967 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
968 machines[i].machid = i;
969 rw_init(&machines[i].lock);
970 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
971 machines[i].cpus[n].present = false;
972 machines[i].cpus[n].cpuid = n;
973 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
974 IPL_NONE);
975 }
976 }
977
978 (*nvmm_impl->init)();
979
980 return 0;
981 }
982
983 static void
984 nvmm_fini(void)
985 {
986 size_t i, n;
987
988 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
989 rw_destroy(&machines[i].lock);
990 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
991 mutex_destroy(&machines[i].cpus[n].lock);
992 }
993 }
994
995 (*nvmm_impl->fini)();
996 }
997
998 /* -------------------------------------------------------------------------- */
999
1000 static dev_type_open(nvmm_open);
1001
1002 const struct cdevsw nvmm_cdevsw = {
1003 .d_open = nvmm_open,
1004 .d_close = noclose,
1005 .d_read = noread,
1006 .d_write = nowrite,
1007 .d_ioctl = noioctl,
1008 .d_stop = nostop,
1009 .d_tty = notty,
1010 .d_poll = nopoll,
1011 .d_mmap = nommap,
1012 .d_kqfilter = nokqfilter,
1013 .d_discard = nodiscard,
1014 .d_flag = D_OTHER | D_MPSAFE
1015 };
1016
1017 static int nvmm_ioctl(file_t *, u_long, void *);
1018 static int nvmm_close(file_t *);
1019 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1020 struct uvm_object **, int *);
1021
1022 const struct fileops nvmm_fileops = {
1023 .fo_read = fbadop_read,
1024 .fo_write = fbadop_write,
1025 .fo_ioctl = nvmm_ioctl,
1026 .fo_fcntl = fnullop_fcntl,
1027 .fo_poll = fnullop_poll,
1028 .fo_stat = fbadop_stat,
1029 .fo_close = nvmm_close,
1030 .fo_kqfilter = fnullop_kqfilter,
1031 .fo_restart = fnullop_restart,
1032 .fo_mmap = nvmm_mmap,
1033 };
1034
1035 static int
1036 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1037 {
1038 struct nvmm_owner *owner;
1039 struct file *fp;
1040 int error, fd;
1041
1042 if (minor(dev) != 0)
1043 return EXDEV;
1044 if (!(flags & O_CLOEXEC))
1045 return EINVAL;
1046 error = fd_allocfile(&fp, &fd);
1047 if (error)
1048 return error;
1049
1050 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1051 owner->pid = l->l_proc->p_pid;
1052
1053 return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1054 }
1055
1056 static int
1057 nvmm_close(file_t *fp)
1058 {
1059 struct nvmm_owner *owner = fp->f_data;
1060
1061 KASSERT(owner != NULL);
1062 nvmm_kill_machines(owner);
1063 kmem_free(owner, sizeof(*owner));
1064 fp->f_data = NULL;
1065
1066 return 0;
1067 }
1068
1069 static int
1070 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1071 int *advicep, struct uvm_object **uobjp, int *maxprotp)
1072 {
1073 struct nvmm_owner *owner = fp->f_data;
1074 struct nvmm_machine *mach;
1075 nvmm_machid_t machid;
1076 nvmm_cpuid_t cpuid;
1077 int error;
1078
1079 if (prot & PROT_EXEC)
1080 return EACCES;
1081 if (size != PAGE_SIZE)
1082 return EINVAL;
1083
1084 cpuid = NVMM_COMM_CPUID(*offp);
1085 if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1086 return EINVAL;
1087
1088 machid = NVMM_COMM_MACHID(*offp);
1089 error = nvmm_machine_get(owner, machid, &mach, false);
1090 if (error)
1091 return error;
1092
1093 uao_reference(mach->commuobj);
1094 *uobjp = mach->commuobj;
1095 *offp = cpuid * PAGE_SIZE;
1096 *maxprotp = prot;
1097 *advicep = UVM_ADV_RANDOM;
1098
1099 nvmm_machine_put(mach);
1100 return 0;
1101 }
1102
1103 static int
1104 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
1105 {
1106 struct nvmm_owner *owner = fp->f_data;
1107
1108 KASSERT(owner != NULL);
1109
1110 switch (cmd) {
1111 case NVMM_IOC_CAPABILITY:
1112 return nvmm_capability(owner, data);
1113 case NVMM_IOC_MACHINE_CREATE:
1114 return nvmm_machine_create(owner, data);
1115 case NVMM_IOC_MACHINE_DESTROY:
1116 return nvmm_machine_destroy(owner, data);
1117 case NVMM_IOC_MACHINE_CONFIGURE:
1118 return nvmm_machine_configure(owner, data);
1119 case NVMM_IOC_VCPU_CREATE:
1120 return nvmm_vcpu_create(owner, data);
1121 case NVMM_IOC_VCPU_DESTROY:
1122 return nvmm_vcpu_destroy(owner, data);
1123 case NVMM_IOC_VCPU_CONFIGURE:
1124 return nvmm_vcpu_configure(owner, data);
1125 case NVMM_IOC_VCPU_SETSTATE:
1126 return nvmm_vcpu_setstate(owner, data);
1127 case NVMM_IOC_VCPU_GETSTATE:
1128 return nvmm_vcpu_getstate(owner, data);
1129 case NVMM_IOC_VCPU_INJECT:
1130 return nvmm_vcpu_inject(owner, data);
1131 case NVMM_IOC_VCPU_RUN:
1132 return nvmm_vcpu_run(owner, data);
1133 case NVMM_IOC_GPA_MAP:
1134 return nvmm_gpa_map(owner, data);
1135 case NVMM_IOC_GPA_UNMAP:
1136 return nvmm_gpa_unmap(owner, data);
1137 case NVMM_IOC_HVA_MAP:
1138 return nvmm_hva_map(owner, data);
1139 case NVMM_IOC_HVA_UNMAP:
1140 return nvmm_hva_unmap(owner, data);
1141 case NVMM_IOC_CTL:
1142 return nvmm_ctl(owner, data);
1143 default:
1144 return EINVAL;
1145 }
1146 }
1147
1148 /* -------------------------------------------------------------------------- */
1149
1150 void
1151 nvmmattach(int nunits)
1152 {
1153 /* nothing */
1154 }
1155
1156 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1157
1158 static int
1159 nvmm_modcmd(modcmd_t cmd, void *arg)
1160 {
1161 int error;
1162
1163 switch (cmd) {
1164 case MODULE_CMD_INIT:
1165 error = nvmm_init();
1166 if (error)
1167 return error;
1168
1169 #if defined(_MODULE)
1170 {
1171 devmajor_t bmajor = NODEVMAJOR;
1172 devmajor_t cmajor = 345;
1173
1174 /* mknod /dev/nvmm c 345 0 */
1175 error = devsw_attach("nvmm", NULL, &bmajor,
1176 &nvmm_cdevsw, &cmajor);
1177 if (error) {
1178 nvmm_fini();
1179 return error;
1180 }
1181 }
1182 #endif
1183 return 0;
1184
1185 case MODULE_CMD_FINI:
1186 if (nmachines > 0) {
1187 return EBUSY;
1188 }
1189 #if defined(_MODULE)
1190 {
1191 error = devsw_detach(NULL, &nvmm_cdevsw);
1192 if (error) {
1193 return error;
1194 }
1195 }
1196 #endif
1197 nvmm_fini();
1198 return 0;
1199
1200 case MODULE_CMD_AUTOUNLOAD:
1201 return EBUSY;
1202
1203 default:
1204 return ENOTTY;
1205 }
1206 }
1207