nvmm.c revision 1.1 1 /* $NetBSD: nvmm.c,v 1.1 2018/11/07 07:43:08 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.1 2018/11/07 07:43:08 maxv Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38
39 #include <sys/cpu.h>
40 #include <sys/conf.h>
41 #include <sys/kmem.h>
42 #include <sys/module.h>
43 #include <sys/proc.h>
44
45 #include <uvm/uvm.h>
46 #include <uvm/uvm_page.h>
47
48 #include "ioconf.h"
49
50 #include <dev/nvmm/nvmm.h>
51 #include <dev/nvmm/nvmm_internal.h>
52 #include <dev/nvmm/nvmm_ioctl.h>
53
54 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
55
56 static const struct nvmm_impl *nvmm_impl_list[] = {
57 &nvmm_x86_svm /* x86 AMD SVM */
58 };
59
60 static const struct nvmm_impl *nvmm_impl = NULL;
61
62 /* -------------------------------------------------------------------------- */
63
64 static int
65 nvmm_machine_alloc(struct nvmm_machine **ret)
66 {
67 struct nvmm_machine *mach;
68 size_t i;
69
70 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
71 mach = &machines[i];
72
73 rw_enter(&mach->lock, RW_WRITER);
74 if (mach->present) {
75 rw_exit(&mach->lock);
76 continue;
77 }
78
79 mach->present = true;
80 *ret = mach;
81 return 0;
82 }
83
84 return ENOBUFS;
85 }
86
87 static void
88 nvmm_machine_free(struct nvmm_machine *mach)
89 {
90 KASSERT(rw_write_held(&mach->lock));
91 KASSERT(mach->present);
92 mach->present = false;
93 }
94
95 static int
96 nvmm_machine_get(nvmm_machid_t machid, struct nvmm_machine **ret, bool writer)
97 {
98 struct nvmm_machine *mach;
99 krw_t op = writer ? RW_WRITER : RW_READER;
100
101 if (machid >= NVMM_MAX_MACHINES) {
102 return EINVAL;
103 }
104 mach = &machines[machid];
105
106 rw_enter(&mach->lock, op);
107 if (!mach->present) {
108 rw_exit(&mach->lock);
109 return ENOENT;
110 }
111 if (mach->procid != curproc->p_pid) {
112 rw_exit(&mach->lock);
113 return EPERM;
114 }
115 *ret = mach;
116
117 return 0;
118 }
119
120 static void
121 nvmm_machine_put(struct nvmm_machine *mach)
122 {
123 rw_exit(&mach->lock);
124 }
125
126 /* -------------------------------------------------------------------------- */
127
128 static int
129 nvmm_vcpu_alloc(struct nvmm_machine *mach, struct nvmm_cpu **ret)
130 {
131 struct nvmm_cpu *vcpu;
132 size_t i;
133
134 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
135 vcpu = &mach->cpus[i];
136
137 mutex_enter(&vcpu->lock);
138 if (vcpu->present) {
139 mutex_exit(&vcpu->lock);
140 continue;
141 }
142
143 vcpu->present = true;
144 vcpu->cpuid = i;
145 *ret = vcpu;
146 return 0;
147 }
148
149 return ENOBUFS;
150 }
151
152 static void
153 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
154 {
155 KASSERT(mutex_owned(&vcpu->lock));
156 vcpu->present = false;
157 vcpu->hcpu_last = -1;
158 }
159
160 int
161 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
162 struct nvmm_cpu **ret)
163 {
164 struct nvmm_cpu *vcpu;
165
166 if (cpuid >= NVMM_MAX_VCPUS) {
167 return EINVAL;
168 }
169 vcpu = &mach->cpus[cpuid];
170
171 mutex_enter(&vcpu->lock);
172 if (!vcpu->present) {
173 mutex_exit(&vcpu->lock);
174 return ENOENT;
175 }
176 *ret = vcpu;
177
178 return 0;
179 }
180
181 void
182 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
183 {
184 mutex_exit(&vcpu->lock);
185 }
186
187 /* -------------------------------------------------------------------------- */
188
189 static void
190 nvmm_kill_machines(pid_t pid)
191 {
192 struct nvmm_machine *mach;
193 struct nvmm_cpu *vcpu;
194 size_t i, j;
195 int error;
196
197 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
198 mach = &machines[i];
199
200 rw_enter(&mach->lock, RW_WRITER);
201 if (!mach->present || mach->procid != pid) {
202 rw_exit(&mach->lock);
203 continue;
204 }
205
206 /* Kill it. */
207 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
208 error = nvmm_vcpu_get(mach, j, &vcpu);
209 if (error)
210 continue;
211 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
212 nvmm_vcpu_free(mach, vcpu);
213 nvmm_vcpu_put(vcpu);
214 }
215 uvmspace_free(mach->vm);
216 uao_detach(mach->uobj);
217 nvmm_machine_free(mach);
218
219 rw_exit(&mach->lock);
220 }
221 }
222
223 /* -------------------------------------------------------------------------- */
224
225 static int
226 nvmm_capability(struct nvmm_ioc_capability *args)
227 {
228 args->cap.version = NVMM_CAPABILITY_VERSION;
229 args->cap.state_size = nvmm_impl->state_size;
230 args->cap.max_machines = NVMM_MAX_MACHINES;
231 args->cap.max_vcpus = NVMM_MAX_VCPUS;
232 args->cap.max_ram = NVMM_MAX_RAM;
233
234 (*nvmm_impl->capability)(&args->cap);
235
236 return 0;
237 }
238
239 static int
240 nvmm_machine_create(struct nvmm_ioc_machine_create *args)
241 {
242 struct nvmm_machine *mach;
243 int error;
244
245 error = nvmm_machine_alloc(&mach);
246 if (error)
247 return error;
248
249 /* Curproc owns the machine. */
250 mach->procid = curproc->p_pid;
251
252 /* Create the machine vmspace. */
253 mach->gpa_begin = 0;
254 mach->gpa_end = NVMM_MAX_RAM;
255 mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
256 mach->uobj = uao_create(mach->gpa_end - mach->gpa_begin, 0);
257
258 /* Grab a reference for the machine. */
259 uao_reference(mach->uobj);
260
261 (*nvmm_impl->machine_create)(mach);
262
263 args->machid = mach->machid;
264 nvmm_machine_put(mach);
265
266 return 0;
267 }
268
269 static int
270 nvmm_machine_destroy(struct nvmm_ioc_machine_destroy *args)
271 {
272 struct nvmm_machine *mach;
273 struct nvmm_cpu *vcpu;
274 int error;
275 size_t i;
276
277 error = nvmm_machine_get(args->machid, &mach, true);
278 if (error)
279 return error;
280
281 for (i = 0; i < NVMM_MAX_VCPUS; i++) {
282 error = nvmm_vcpu_get(mach, i, &vcpu);
283 if (error)
284 continue;
285
286 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
287 nvmm_vcpu_free(mach, vcpu);
288 nvmm_vcpu_put(vcpu);
289 }
290
291 (*nvmm_impl->machine_destroy)(mach);
292
293 /* Free the machine vmspace. */
294 uvmspace_free(mach->vm);
295 uao_detach(mach->uobj);
296
297 nvmm_machine_free(mach);
298 nvmm_machine_put(mach);
299
300 return 0;
301 }
302
303 static int
304 nvmm_machine_configure(struct nvmm_ioc_machine_configure *args)
305 {
306 struct nvmm_machine *mach;
307 size_t allocsz;
308 void *data;
309 int error;
310
311 if (__predict_false(args->op >= nvmm_impl->conf_max)) {
312 return EINVAL;
313 }
314
315 allocsz = nvmm_impl->conf_sizes[args->op];
316 data = kmem_alloc(allocsz, KM_SLEEP);
317
318 error = nvmm_machine_get(args->machid, &mach, true);
319 if (error) {
320 kmem_free(data, allocsz);
321 return error;
322 }
323
324 error = copyin(args->conf, data, allocsz);
325 if (error) {
326 goto out;
327 }
328
329 error = (*nvmm_impl->machine_configure)(mach, args->op, data);
330
331 out:
332 nvmm_machine_put(mach);
333 kmem_free(data, allocsz);
334 return error;
335 }
336
337 static int
338 nvmm_vcpu_create(struct nvmm_ioc_vcpu_create *args)
339 {
340 struct nvmm_machine *mach;
341 struct nvmm_cpu *vcpu;
342 int error;
343
344 error = nvmm_machine_get(args->machid, &mach, false);
345 if (error)
346 return error;
347
348 error = nvmm_vcpu_alloc(mach, &vcpu);
349 if (error)
350 goto out;
351
352 error = (*nvmm_impl->vcpu_create)(mach, vcpu);
353 if (error) {
354 nvmm_vcpu_free(mach, vcpu);
355 nvmm_vcpu_put(vcpu);
356 goto out;
357 }
358
359 nvmm_vcpu_put(vcpu);
360
361 out:
362 nvmm_machine_put(mach);
363 return error;
364 }
365
366 static int
367 nvmm_vcpu_destroy(struct nvmm_ioc_vcpu_destroy *args)
368 {
369 struct nvmm_machine *mach;
370 struct nvmm_cpu *vcpu;
371 int error;
372
373 error = nvmm_machine_get(args->machid, &mach, false);
374 if (error)
375 return error;
376
377 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
378 if (error)
379 goto out;
380
381 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
382 nvmm_vcpu_free(mach, vcpu);
383 nvmm_vcpu_put(vcpu);
384
385 out:
386 nvmm_machine_put(mach);
387 return error;
388 }
389
390 static int
391 nvmm_vcpu_setstate(struct nvmm_ioc_vcpu_setstate *args)
392 {
393 struct nvmm_machine *mach;
394 struct nvmm_cpu *vcpu;
395 void *data;
396 int error;
397
398 data = kmem_alloc(nvmm_impl->state_size, KM_SLEEP);
399
400 error = nvmm_machine_get(args->machid, &mach, false);
401 if (error) {
402 kmem_free(data, nvmm_impl->state_size);
403 return error;
404 }
405
406 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
407 if (error)
408 goto out;
409
410 error = copyin(args->state, data, nvmm_impl->state_size);
411 if (error) {
412 nvmm_vcpu_put(vcpu);
413 goto out;
414 }
415
416 (*nvmm_impl->vcpu_setstate)(vcpu, data, args->flags);
417 nvmm_vcpu_put(vcpu);
418
419 out:
420 nvmm_machine_put(mach);
421 kmem_free(data, nvmm_impl->state_size);
422 return error;
423 }
424
425 static int
426 nvmm_vcpu_getstate(struct nvmm_ioc_vcpu_getstate *args)
427 {
428 struct nvmm_machine *mach;
429 struct nvmm_cpu *vcpu;
430 void *data;
431 int error;
432
433 data = kmem_alloc(nvmm_impl->state_size, KM_SLEEP);
434
435 error = nvmm_machine_get(args->machid, &mach, false);
436 if (error) {
437 kmem_free(data, nvmm_impl->state_size);
438 return error;
439 }
440
441 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
442 if (error)
443 goto out;
444
445 (*nvmm_impl->vcpu_getstate)(vcpu, data, args->flags);
446 nvmm_vcpu_put(vcpu);
447 error = copyout(data, args->state, nvmm_impl->state_size);
448
449 out:
450 nvmm_machine_put(mach);
451 kmem_free(data, nvmm_impl->state_size);
452 return error;
453 }
454
455 static int
456 nvmm_vcpu_inject(struct nvmm_ioc_vcpu_inject *args)
457 {
458 struct nvmm_machine *mach;
459 struct nvmm_cpu *vcpu;
460 int error;
461
462 error = nvmm_machine_get(args->machid, &mach, false);
463 if (error)
464 return error;
465
466 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
467 if (error)
468 goto out;
469
470 error = (*nvmm_impl->vcpu_inject)(mach, vcpu, &args->event);
471 nvmm_vcpu_put(vcpu);
472
473 out:
474 nvmm_machine_put(mach);
475 return error;
476 }
477
478 static int
479 nvmm_vcpu_run(struct nvmm_ioc_vcpu_run *args)
480 {
481 struct nvmm_machine *mach;
482 struct nvmm_cpu *vcpu;
483 int error;
484
485 error = nvmm_machine_get(args->machid, &mach, false);
486 if (error)
487 return error;
488
489 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
490 if (error)
491 goto out;
492
493 (*nvmm_impl->vcpu_run)(mach, vcpu, &args->exit);
494 nvmm_vcpu_put(vcpu);
495
496 out:
497 nvmm_machine_put(mach);
498 return error;
499 }
500
501 /* -------------------------------------------------------------------------- */
502
503 static int
504 nvmm_gpa_map(struct nvmm_ioc_gpa_map *args)
505 {
506 struct proc *p = curproc;
507 struct nvmm_machine *mach;
508 struct vmspace *vmspace;
509 gpaddr_t gpa;
510 vaddr_t uva;
511 int error;
512
513 error = nvmm_machine_get(args->machid, &mach, false);
514 if (error)
515 return error;
516
517 vmspace = p->p_vmspace;
518
519 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
520 (args->hva % PAGE_SIZE) != 0) {
521 error = EINVAL;
522 goto out;
523 }
524 if (args->hva == 0) {
525 error = EINVAL;
526 goto out;
527 }
528 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
529 error = EINVAL;
530 goto out;
531 }
532 if (args->gpa + args->size <= args->gpa) {
533 error = EINVAL;
534 goto out;
535 }
536 if (args->gpa + args->size >= mach->gpa_end) {
537 error = EINVAL;
538 goto out;
539 }
540 gpa = args->gpa;
541
542 /* Take a reference for the kernel. */
543 uao_reference(mach->uobj);
544
545 /* Map the uobj into the machine address space, as pageable. */
546 error = uvm_map(&mach->vm->vm_map, &gpa, args->size, mach->uobj,
547 args->gpa, 0, UVM_MAPFLAG(UVM_PROT_RWX, UVM_PROT_RWX,
548 UVM_INH_NONE, UVM_ADV_NORMAL, UVM_FLAG_FIXED));
549 if (error) {
550 uao_detach(mach->uobj);
551 goto out;
552 }
553 if (gpa != args->gpa) {
554 uao_detach(mach->uobj);
555 printf("[!] uvm_map problem\n");
556 error = EINVAL;
557 goto out;
558 }
559
560 uva = (vaddr_t)args->hva;
561
562 /* Take a reference for the user. */
563 uao_reference(mach->uobj);
564
565 /* Map the uobj into the user address space, as pageable. */
566 error = uvm_map(&vmspace->vm_map, &uva, args->size, mach->uobj,
567 args->gpa, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
568 UVM_INH_SHARE, UVM_ADV_NORMAL, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
569 if (error) {
570 uao_detach(mach->uobj);
571 goto out;
572 }
573
574 out:
575 nvmm_machine_put(mach);
576 return error;
577 }
578
579 static int
580 nvmm_gpa_unmap(struct nvmm_ioc_gpa_unmap *args)
581 {
582 struct nvmm_machine *mach;
583 gpaddr_t gpa;
584 int error;
585
586 error = nvmm_machine_get(args->machid, &mach, false);
587 if (error)
588 return error;
589
590 if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
591 error = EINVAL;
592 goto out;
593 }
594 if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
595 error = EINVAL;
596 goto out;
597 }
598 if (args->gpa + args->size <= args->gpa) {
599 error = EINVAL;
600 goto out;
601 }
602 if (args->gpa + args->size >= mach->gpa_end) {
603 error = EINVAL;
604 goto out;
605 }
606 gpa = args->gpa;
607
608 /* Unmap the memory from the machine. */
609 uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
610
611 out:
612 nvmm_machine_put(mach);
613 return error;
614 }
615
616 /* -------------------------------------------------------------------------- */
617
618 static int
619 nvmm_init(void)
620 {
621 size_t i, n;
622
623 for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
624 if (!(*nvmm_impl_list[i]->ident)()) {
625 continue;
626 }
627 nvmm_impl = nvmm_impl_list[i];
628 break;
629 }
630 if (nvmm_impl == NULL) {
631 printf("[!] No implementation found\n");
632 return ENOTSUP;
633 }
634
635 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
636 machines[i].machid = i;
637 rw_init(&machines[i].lock);
638 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
639 mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
640 IPL_NONE);
641 machines[i].cpus[n].hcpu_last = -1;
642 }
643 }
644
645 (*nvmm_impl->init)();
646
647 return 0;
648 }
649
650 static void
651 nvmm_fini(void)
652 {
653 size_t i, n;
654
655 for (i = 0; i < NVMM_MAX_MACHINES; i++) {
656 rw_destroy(&machines[i].lock);
657 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
658 mutex_destroy(&machines[i].cpus[n].lock);
659 }
660 /* TODO need to free stuff, etc */
661 }
662
663 (*nvmm_impl->fini)();
664 }
665
666 /* -------------------------------------------------------------------------- */
667
668 static int
669 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
670 {
671 if (minor(dev) != 0) {
672 return EXDEV;
673 }
674
675 return 0;
676 }
677
678 static int
679 nvmm_close(dev_t dev, int flags, int type, struct lwp *l)
680 {
681 KASSERT(minor(dev) == 0);
682
683 nvmm_kill_machines(l->l_proc->p_pid);
684
685 return 0;
686 }
687
688 static int
689 nvmm_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
690 {
691 KASSERT(minor(dev) == 0);
692
693 switch (cmd) {
694 case NVMM_IOC_CAPABILITY:
695 return nvmm_capability(data);
696 case NVMM_IOC_MACHINE_CREATE:
697 return nvmm_machine_create(data);
698 case NVMM_IOC_MACHINE_DESTROY:
699 return nvmm_machine_destroy(data);
700 case NVMM_IOC_MACHINE_CONFIGURE:
701 return nvmm_machine_configure(data);
702 case NVMM_IOC_VCPU_CREATE:
703 return nvmm_vcpu_create(data);
704 case NVMM_IOC_VCPU_DESTROY:
705 return nvmm_vcpu_destroy(data);
706 case NVMM_IOC_VCPU_SETSTATE:
707 return nvmm_vcpu_setstate(data);
708 case NVMM_IOC_VCPU_GETSTATE:
709 return nvmm_vcpu_getstate(data);
710 case NVMM_IOC_VCPU_INJECT:
711 return nvmm_vcpu_inject(data);
712 case NVMM_IOC_VCPU_RUN:
713 return nvmm_vcpu_run(data);
714 case NVMM_IOC_GPA_MAP:
715 return nvmm_gpa_map(data);
716 case NVMM_IOC_GPA_UNMAP:
717 return nvmm_gpa_unmap(data);
718 default:
719 return EINVAL;
720 }
721 }
722
723 const struct cdevsw nvmm_cdevsw = {
724 .d_open = nvmm_open,
725 .d_close = nvmm_close,
726 .d_read = noread,
727 .d_write = nowrite,
728 .d_ioctl = nvmm_ioctl,
729 .d_stop = nostop,
730 .d_tty = notty,
731 .d_poll = nopoll,
732 .d_mmap = nommap,
733 .d_kqfilter = nokqfilter,
734 .d_discard = nodiscard,
735 .d_flag = D_OTHER | D_MPSAFE
736 };
737
738 void
739 nvmmattach(int nunits)
740 {
741 /* nothing */
742 }
743
744 MODULE(MODULE_CLASS_DRIVER, nvmm, NULL);
745
746 static int
747 nvmm_modcmd(modcmd_t cmd, void *arg)
748 {
749 int error;
750
751 switch (cmd) {
752 case MODULE_CMD_INIT:
753 error = nvmm_init();
754 if (error)
755 return error;
756
757 #if defined(_MODULE)
758 {
759 devmajor_t bmajor = NODEVMAJOR;
760 devmajor_t cmajor = 345;
761
762 /* mknod /dev/nvmm c 345 0 */
763 error = devsw_attach("nvmm", NULL, &bmajor,
764 &nvmm_cdevsw, &cmajor);
765 if (error) {
766 nvmm_fini();
767 return error;
768 }
769 }
770 #endif
771 return 0;
772
773 case MODULE_CMD_FINI:
774 #if defined(_MODULE)
775 {
776 error = devsw_detach(NULL, &nvmm_cdevsw);
777 if (error) {
778 return error;
779 }
780 }
781 #endif
782 nvmm_fini();
783 return 0;
784
785 default:
786 return ENOTTY;
787 }
788 }
789